1*cdf0e10cSrcweir/************************************************************************* 2*cdf0e10cSrcweir* 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir*************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir#import "OOoMetaDataParser.h" 29*cdf0e10cSrcweir 30*cdf0e10cSrcweirstatic NSSet *singleValueXMLElements; 31*cdf0e10cSrcweirstatic NSSet *multiValueXMLElements; 32*cdf0e10cSrcweirstatic NSDictionary *metaXML2MDIKeys; 33*cdf0e10cSrcweir 34*cdf0e10cSrcweir@implementation OOoMetaDataParser 35*cdf0e10cSrcweir 36*cdf0e10cSrcweir+ (void)initialize 37*cdf0e10cSrcweir{ 38*cdf0e10cSrcweir static BOOL isInitialized = NO; 39*cdf0e10cSrcweir 40*cdf0e10cSrcweir if (isInitialized == NO) { 41*cdf0e10cSrcweir //set up the meta elements with only one value 42*cdf0e10cSrcweir NSMutableSet *temp = [NSMutableSet new]; 43*cdf0e10cSrcweir [temp addObject:@"dc:title"]; 44*cdf0e10cSrcweir [temp addObject:@"dc:description"]; 45*cdf0e10cSrcweir [temp addObject:@"meta:user-defined"]; 46*cdf0e10cSrcweir singleValueXMLElements = [[NSSet setWithSet:temp] retain]; 47*cdf0e10cSrcweir 48*cdf0e10cSrcweir //set up the meta elements that can have more than one value 49*cdf0e10cSrcweir [temp removeAllObjects]; 50*cdf0e10cSrcweir [temp addObject:@"dc:subject"]; 51*cdf0e10cSrcweir [temp addObject:@"meta:keyword"]; 52*cdf0e10cSrcweir [temp addObject:@"meta:initial-creator"]; 53*cdf0e10cSrcweir [temp addObject:@"dc:creator"]; 54*cdf0e10cSrcweir multiValueXMLElements = [[NSSet setWithSet:temp] retain]; 55*cdf0e10cSrcweir [temp release]; 56*cdf0e10cSrcweir 57*cdf0e10cSrcweir //set up the map to store the values with the correct MDI keys 58*cdf0e10cSrcweir NSMutableDictionary *tempDict = [NSMutableDictionary new]; 59*cdf0e10cSrcweir [tempDict setObject:(NSString*)kMDItemTitle forKey:@"dc:title"]; 60*cdf0e10cSrcweir [tempDict setObject:(NSString*)kMDItemDescription forKey:@"dc:description"]; 61*cdf0e10cSrcweir [tempDict setObject:(NSString*)kMDItemKeywords forKey:@"dc:subject"]; 62*cdf0e10cSrcweir [tempDict setObject:(NSString*)kMDItemAuthors forKey:@"meta:initial-creator"]; 63*cdf0e10cSrcweir [tempDict setObject:(NSString*)kMDItemAuthors forKey:@"dc:creator"]; 64*cdf0e10cSrcweir [tempDict setObject:(NSString*)kMDItemKeywords forKey:@"meta:keyword"]; 65*cdf0e10cSrcweir [tempDict setObject:@"org_openoffice_opendocument_custominfo1" forKey:@"Info 1"]; 66*cdf0e10cSrcweir [tempDict setObject:@"org_openoffice_opendocument_custominfo2" forKey:@"Info 2"]; 67*cdf0e10cSrcweir [tempDict setObject:@"org_openoffice_opendocument_custominfo3" forKey:@"Info 3"]; 68*cdf0e10cSrcweir [tempDict setObject:@"org_openoffice_opendocument_custominfo4" forKey:@"Info 4"]; 69*cdf0e10cSrcweir metaXML2MDIKeys = [[NSDictionary dictionaryWithDictionary:tempDict] retain]; 70*cdf0e10cSrcweir [tempDict release]; 71*cdf0e10cSrcweir 72*cdf0e10cSrcweir isInitialized = YES; 73*cdf0e10cSrcweir } 74*cdf0e10cSrcweir} 75*cdf0e10cSrcweir 76*cdf0e10cSrcweir- (id)init 77*cdf0e10cSrcweir{ 78*cdf0e10cSrcweir if ((self = [super init]) != nil) { 79*cdf0e10cSrcweir shouldReadCharacters = NO; 80*cdf0e10cSrcweir// currentElement = nil; 81*cdf0e10cSrcweir textCurrentElement = nil; 82*cdf0e10cSrcweir 83*cdf0e10cSrcweir return self; 84*cdf0e10cSrcweir } 85*cdf0e10cSrcweir 86*cdf0e10cSrcweir return nil; 87*cdf0e10cSrcweir} 88*cdf0e10cSrcweir 89*cdf0e10cSrcweir- (void)parseXML:(NSData*)data intoDictionary:(NSMutableDictionary*)dict 90*cdf0e10cSrcweir{ 91*cdf0e10cSrcweir metaValues = dict; 92*cdf0e10cSrcweir 93*cdf0e10cSrcweir //NSLog(@"data: %@ %d", data, [data length]); 94*cdf0e10cSrcweir 95*cdf0e10cSrcweir //init parser settings 96*cdf0e10cSrcweir shouldReadCharacters = NO; 97*cdf0e10cSrcweir 98*cdf0e10cSrcweir NSXMLParser *parser = [[NSXMLParser alloc] initWithData:data]; 99*cdf0e10cSrcweir 100*cdf0e10cSrcweir [parser setDelegate:self]; 101*cdf0e10cSrcweir [parser setShouldResolveExternalEntities:NO]; 102*cdf0e10cSrcweir [parser parse]; 103*cdf0e10cSrcweir 104*cdf0e10cSrcweir [parser release]; 105*cdf0e10cSrcweir 106*cdf0e10cSrcweir //NSLog(@"finished parsing meta"); 107*cdf0e10cSrcweir} 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser didStartElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qualifiedName attributes:(NSDictionary *)attributeDict 110*cdf0e10cSrcweir{ 111*cdf0e10cSrcweir// NSLog(@"<%@>", elementName); 112*cdf0e10cSrcweir if ([singleValueXMLElements containsObject:elementName] == YES) { 113*cdf0e10cSrcweir shouldReadCharacters = YES; 114*cdf0e10cSrcweir } else if ([multiValueXMLElements containsObject:elementName] == YES) { 115*cdf0e10cSrcweir shouldReadCharacters = YES; 116*cdf0e10cSrcweir } else { 117*cdf0e10cSrcweir //we are not interested in this element 118*cdf0e10cSrcweir shouldReadCharacters = NO; 119*cdf0e10cSrcweir return; 120*cdf0e10cSrcweir } 121*cdf0e10cSrcweir 122*cdf0e10cSrcweir if (shouldReadCharacters == YES) { 123*cdf0e10cSrcweir textCurrentElement = [NSMutableString new]; 124*cdf0e10cSrcweir isCustom = [elementName isEqualToString:@"meta:user-defined"]; 125*cdf0e10cSrcweir if (isCustom == YES) { 126*cdf0e10cSrcweir customAttribute = [[attributeDict objectForKey:@"meta:name"] retain]; 127*cdf0e10cSrcweir //NSLog(customAttribute); 128*cdf0e10cSrcweir } 129*cdf0e10cSrcweir } 130*cdf0e10cSrcweir 131*cdf0e10cSrcweir //NSLog(@"start element %@", elementName); 132*cdf0e10cSrcweir} 133*cdf0e10cSrcweir 134*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser didEndElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qName 135*cdf0e10cSrcweir{ 136*cdf0e10cSrcweir// NSLog(@"</%@>", elementName); 137*cdf0e10cSrcweir if (shouldReadCharacters == YES) { 138*cdf0e10cSrcweir NSString *mdiName = nil; 139*cdf0e10cSrcweir if (isCustom == YES) { 140*cdf0e10cSrcweir mdiName = (NSString*)[metaXML2MDIKeys objectForKey:customAttribute]; 141*cdf0e10cSrcweir } else { 142*cdf0e10cSrcweir mdiName = (NSString*)[metaXML2MDIKeys objectForKey:elementName]; 143*cdf0e10cSrcweir } 144*cdf0e10cSrcweir //NSLog(@"mdiName: %@", mdiName); 145*cdf0e10cSrcweir 146*cdf0e10cSrcweir if (mdiName == nil) { 147*cdf0e10cSrcweir return; 148*cdf0e10cSrcweir } 149*cdf0e10cSrcweir 150*cdf0e10cSrcweir if ([singleValueXMLElements containsObject:elementName] == YES) { 151*cdf0e10cSrcweir [metaValues setObject:textCurrentElement forKey:mdiName]; 152*cdf0e10cSrcweir } else { 153*cdf0e10cSrcweir // must be multi-value 154*cdf0e10cSrcweir NSMutableArray *arr = [metaValues objectForKey:mdiName]; 155*cdf0e10cSrcweir if (arr == nil) { 156*cdf0e10cSrcweir // we have no array yet, create it 157*cdf0e10cSrcweir arr = [[NSMutableArray new] autorelease]; 158*cdf0e10cSrcweir // and store it 159*cdf0e10cSrcweir [metaValues setObject:arr forKey:mdiName]; 160*cdf0e10cSrcweir } 161*cdf0e10cSrcweir // only store an element once, no need for duplicates 162*cdf0e10cSrcweir if ([arr containsObject:textCurrentElement] == NO) { 163*cdf0e10cSrcweir [arr addObject:textCurrentElement]; 164*cdf0e10cSrcweir } 165*cdf0e10cSrcweir } 166*cdf0e10cSrcweir // cleanup part 1 167*cdf0e10cSrcweir [textCurrentElement release]; 168*cdf0e10cSrcweir if (customAttribute != nil) { 169*cdf0e10cSrcweir [customAttribute release]; 170*cdf0e10cSrcweir } 171*cdf0e10cSrcweir } 172*cdf0e10cSrcweir 173*cdf0e10cSrcweir //cleanup part 2 174*cdf0e10cSrcweir shouldReadCharacters = NO; 175*cdf0e10cSrcweir isCustom = NO; 176*cdf0e10cSrcweir} 177*cdf0e10cSrcweir 178*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser foundCharacters:(NSString *)string 179*cdf0e10cSrcweir{ 180*cdf0e10cSrcweir// NSLog(@"%@", string); 181*cdf0e10cSrcweir if (shouldReadCharacters == NO) { 182*cdf0e10cSrcweir return; 183*cdf0e10cSrcweir } 184*cdf0e10cSrcweir 185*cdf0e10cSrcweir // this delegate method might be called several times for a single element, 186*cdf0e10cSrcweir // so we have to collect the received data 187*cdf0e10cSrcweir [textCurrentElement appendString:string]; 188*cdf0e10cSrcweir 189*cdf0e10cSrcweir //NSLog(@"chars read: %@", string); 190*cdf0e10cSrcweir} 191*cdf0e10cSrcweir 192*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser parseErrorOccurred:(NSError *)parseError 193*cdf0e10cSrcweir{ 194*cdf0e10cSrcweir //NSLog(@"parsing finished with error"); 195*cdf0e10cSrcweir NSLog([NSString stringWithFormat:@"Error %i, Description: %@, Line: %i, Column: %i", [parseError code], 196*cdf0e10cSrcweir [[parser parserError] localizedDescription], [parser lineNumber], 197*cdf0e10cSrcweir [parser columnNumber]]); 198*cdf0e10cSrcweir} 199*cdf0e10cSrcweir 200*cdf0e10cSrcweir@end 201