1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 package org.apache.openoffice.ooxml.schema.generator; 23 24 import java.io.File; 25 import java.io.FileNotFoundException; 26 import java.io.FileOutputStream; 27 import java.io.PrintStream; 28 import java.util.HashMap; 29 import java.util.Map; 30 import java.util.Map.Entry; 31 import java.util.Set; 32 import java.util.TreeMap; 33 import java.util.TreeSet; 34 35 import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomaton; 36 import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer; 37 import org.apache.openoffice.ooxml.schema.automaton.SkipData; 38 import org.apache.openoffice.ooxml.schema.automaton.State; 39 import org.apache.openoffice.ooxml.schema.automaton.Transition; 40 import org.apache.openoffice.ooxml.schema.model.attribute.Attribute; 41 import org.apache.openoffice.ooxml.schema.model.attribute.AttributeBase.Use; 42 import org.apache.openoffice.ooxml.schema.model.base.INode; 43 import org.apache.openoffice.ooxml.schema.model.base.QualifiedName; 44 import org.apache.openoffice.ooxml.schema.model.schema.NamespaceMap; 45 import org.apache.openoffice.ooxml.schema.parser.FormDefault; 46 import org.apache.openoffice.ooxml.schema.simple.BlobNode; 47 import org.apache.openoffice.ooxml.schema.simple.DateTimeNode; 48 import org.apache.openoffice.ooxml.schema.simple.ISimpleTypeNode; 49 import org.apache.openoffice.ooxml.schema.simple.ISimpleTypeNodeVisitor; 50 import org.apache.openoffice.ooxml.schema.simple.NumberNode; 51 import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer; 52 import org.apache.openoffice.ooxml.schema.simple.SimpleTypeDescriptor; 53 import org.apache.openoffice.ooxml.schema.simple.StringNode; 54 import org.apache.openoffice.ooxml.schema.simple.UnionNode; 55 56 public class ParserTablesGenerator 57 { ParserTablesGenerator( final FiniteAutomatonContainer aAutomatons, final NamespaceMap aNamespaces, final SimpleTypeContainer aSimpleTypes, final Map<String,Integer> aAttributeValueToIdMap)58 public ParserTablesGenerator ( 59 final FiniteAutomatonContainer aAutomatons, 60 final NamespaceMap aNamespaces, 61 final SimpleTypeContainer aSimpleTypes, 62 final Map<String,Integer> aAttributeValueToIdMap) 63 { 64 maAutomatons = aAutomatons; 65 maSimpleTypes = aSimpleTypes; 66 maNamespaces = aNamespaces; 67 maNameToIdMap = new TreeMap<>(); 68 maPrefixToIdMap = new HashMap<>(); 69 maTypeNameToIdMap = new TreeMap<>(); 70 maAttributeValueToIdMap = aAttributeValueToIdMap; 71 } 72 73 74 75 Generate( final File aParseTableFile)76 public void Generate ( 77 final File aParseTableFile) 78 { 79 final long nStartTime = System.currentTimeMillis(); 80 81 SetupNameList(); 82 AssignNameIds(); 83 84 try 85 { 86 final PrintStream aOut = new PrintStream(new FileOutputStream(aParseTableFile)); 87 88 WriteNamespaceList(aOut); 89 WriteNameList(aOut); 90 WriteGlobalStartEndStates(aOut); 91 WriteAutomatonList(aOut); 92 WriteSimpleTypes(aOut); 93 WriteAttributeValues(aOut); 94 aOut.close(); 95 } 96 catch (final FileNotFoundException aException) 97 { 98 aException.printStackTrace(); 99 } 100 101 final long nEndTime = System.currentTimeMillis(); 102 System.out.printf("wrote parse tables to %s in %fs\n", 103 aParseTableFile.toString(), 104 (nEndTime-nStartTime)/1000.0); 105 } 106 107 108 109 SetupNameList()110 private void SetupNameList () 111 { 112 final Set<String> aNames = new TreeSet<>(); 113 114 // Add the element names. 115 for (final FiniteAutomaton aAutomaton : maAutomatons.GetAutomatons()) 116 for (final Transition aTransition : aAutomaton.GetTransitions()) 117 { 118 if (aTransition.GetElementName() == null) 119 throw new RuntimeException(); 120 aNames.add(aTransition.GetElementName().GetLocalPart()); 121 } 122 123 // Add the attribute names. 124 for (final FiniteAutomaton aAutomaton : maAutomatons.GetAutomatons()) 125 for (final Attribute aAttribute : aAutomaton.GetAttributes()) 126 aNames.add(aAttribute.GetName().GetLocalPart()); 127 128 // Create unique ids for the names. 129 int nIndex = 1; 130 maNameToIdMap.clear(); 131 for (final String sName : aNames) 132 maNameToIdMap.put(sName, nIndex++); 133 134 // Create unique ids for namespace prefixes. 135 nIndex = 1; 136 maPrefixToIdMap.clear(); 137 for (final Entry<String, String> aEntry : maNamespaces) 138 { 139 maPrefixToIdMap.put(aEntry.getValue(), nIndex++); 140 } 141 } 142 143 144 145 146 /** During the largest part of the parsing process, states and elements are 147 * identified not via their name but via a unique id. 148 * That allows a fast lookup. 149 */ AssignNameIds()150 private void AssignNameIds () 151 { 152 maTypeNameToIdMap.clear(); 153 int nIndex = 0; 154 155 // Process state names. 156 final Set<QualifiedName> aSortedTypeNames = new TreeSet<>(); 157 for (final State aState : maAutomatons.GetStates()) 158 aSortedTypeNames.add(aState.GetQualifiedName()); 159 for (final Entry<String, SimpleTypeDescriptor> aSimpleType : maSimpleTypes.GetSimpleTypes()) 160 aSortedTypeNames.add(aSimpleType.getValue().GetName()); 161 162 for (final QualifiedName aName : aSortedTypeNames) 163 maTypeNameToIdMap.put(aName.GetStateName(), nIndex++); 164 } 165 166 167 168 WriteNamespaceList(final PrintStream aOut)169 private void WriteNamespaceList (final PrintStream aOut) 170 { 171 aOut.printf("# namespaces\n"); 172 for (final Entry<String, String> aEntry : maNamespaces) 173 { 174 aOut.printf("namespace %-8s %2d %s\n", 175 aEntry.getValue(), 176 maPrefixToIdMap.get(aEntry.getValue()), 177 aEntry.getKey()); 178 } 179 } 180 181 182 183 WriteGlobalStartEndStates(final PrintStream aOut)184 private void WriteGlobalStartEndStates (final PrintStream aOut) 185 { 186 aOut.printf("\n# start and end states\n"); 187 188 final FiniteAutomaton aAutomaton = maAutomatons.GetTopLevelAutomaton(); 189 final State aStartState = aAutomaton.GetStartState(); 190 aOut.printf("start-state %4d %s\n", 191 maTypeNameToIdMap.get(aStartState.GetFullname()), 192 aStartState.GetFullname()); 193 for (final State aAcceptingState : aAutomaton.GetAcceptingStates()) 194 aOut.printf("end-state %4d %s\n", 195 maTypeNameToIdMap.get(aAcceptingState.GetFullname()), 196 aAcceptingState.GetFullname()); 197 } 198 199 200 201 WriteNameList(final PrintStream aOut)202 private void WriteNameList (final PrintStream aOut) 203 { 204 aOut.printf("\n# %d names\n", maNameToIdMap.size()); 205 for (final Entry<String, Integer> aEntry : maNameToIdMap.entrySet()) 206 { 207 aOut.printf("name %4d %s\n", 208 aEntry.getValue(), 209 aEntry.getKey()); 210 } 211 212 aOut.printf("\n# %s states\n", maTypeNameToIdMap.size()); 213 for (final Entry<String, Integer> aEntry : maTypeNameToIdMap.entrySet()) 214 { 215 aOut.printf("state-name %4d %s\n", 216 aEntry.getValue(), 217 aEntry.getKey()); 218 } 219 } 220 221 222 223 WriteAutomatonList(final PrintStream aOut)224 private void WriteAutomatonList (final PrintStream aOut) 225 { 226 for (final FiniteAutomaton aAutomaton : maAutomatons.GetAutomatons()) 227 { 228 aOut.printf("# %s at %s\n", aAutomaton.GetTypeName(), aAutomaton.GetLocation()); 229 230 final State aStartState = aAutomaton.GetStartState(); 231 final int nStartStateId = maTypeNameToIdMap.get(aStartState.GetFullname()); 232 233 // Write start state. 234 aOut.printf("start-state %d %s\n", 235 nStartStateId, 236 aStartState); 237 238 // Write accepting states. 239 for (final State aState : aAutomaton.GetAcceptingStates()) 240 { 241 aOut.printf("accepting-state %d %s\n", 242 maTypeNameToIdMap.get(aState.GetFullname()), 243 aState.GetFullname()); 244 } 245 246 // Write text type. 247 final INode aTextType = aStartState.GetTextType(); 248 if (aTextType != null) 249 { 250 switch(aTextType.GetNodeType()) 251 { 252 case BuiltIn: 253 aOut.printf("text-type %d %d %s\n", 254 nStartStateId, 255 maTypeNameToIdMap.get(aTextType.GetName().GetStateName()), 256 aTextType.GetName().GetStateName()); 257 break; 258 case SimpleType: 259 aOut.printf("text-type %d %d %s\n", 260 nStartStateId, 261 maTypeNameToIdMap.get(aTextType.GetName().GetStateName()), 262 aTextType.GetName().GetStateName()); 263 break; 264 default: 265 throw new RuntimeException(); 266 } 267 } 268 269 WriteAttributes( 270 aOut, 271 aStartState, 272 aAutomaton.GetAttributes()); 273 274 // Write transitions. 275 for (final Transition aTransition : aAutomaton.GetTransitions()) 276 { 277 final Integer nId = maTypeNameToIdMap.get(aTransition.GetElementTypeName()); 278 aOut.printf("transition %4d %4d %2d %4d %4d %s %s %s %s\n", 279 maTypeNameToIdMap.get(aTransition.GetStartState().GetFullname()), 280 maTypeNameToIdMap.get(aTransition.GetEndState().GetFullname()), 281 maPrefixToIdMap.get(aTransition.GetElementName().GetNamespacePrefix()), 282 maNameToIdMap.get(aTransition.GetElementName().GetLocalPart()), 283 nId!=null ? nId : -1, 284 aTransition.GetStartState().GetFullname(), 285 aTransition.GetEndState().GetFullname(), 286 aTransition.GetElementName().GetStateName(), 287 aTransition.GetElementTypeName()); 288 } 289 // Write skip data. 290 for (final State aState : aAutomaton.GetStates()) 291 { 292 for (@SuppressWarnings("unused") final SkipData aSkipData : aState.GetSkipData()) 293 aOut.printf("skip %4d %s\n", 294 maTypeNameToIdMap.get(aState.GetFullname()), 295 aState.GetFullname()); 296 } 297 } 298 } 299 300 301 302 WriteAttributes( final PrintStream aOut, final State aState, final Iterable<Attribute> aAttributes)303 private void WriteAttributes ( 304 final PrintStream aOut, 305 final State aState, 306 final Iterable<Attribute> aAttributes) 307 { 308 // Write attributes. 309 for (final Attribute aAttribute : aAttributes) 310 { 311 aOut.printf("attribute %4d %2d %c %4d %4d %s %s %s %s %s\n", 312 maTypeNameToIdMap.get(aState.GetFullname()), 313 maPrefixToIdMap.get(aAttribute.GetName().GetNamespacePrefix()), 314 aAttribute.GetFormDefault()==FormDefault.qualified ? 'q' : 'u', 315 maNameToIdMap.get(aAttribute.GetName().GetLocalPart()), 316 maTypeNameToIdMap.get(aAttribute.GetTypeName().GetStateName()), 317 aAttribute.GetUse()==Use.Optional ? 'o' : 'u', 318 aAttribute.GetDefault()==null ? "null" : '"'+aAttribute.GetDefault()+'"', 319 aState.GetFullname(), 320 aAttribute.GetName().GetStateName(), 321 aAttribute.GetTypeName().GetStateName()); 322 } 323 } 324 325 326 327 WriteSimpleTypes( final PrintStream aOut)328 private void WriteSimpleTypes ( 329 final PrintStream aOut) 330 { 331 if (maSimpleTypes == null) 332 { 333 aOut.printf("\n// There is no simple type information.\n"); 334 } 335 else 336 { 337 aOut.printf("\n// %d simple types.\n", maSimpleTypes.GetSimpleTypeCount()); 338 for (final Entry<String,SimpleTypeDescriptor> aEntry : maSimpleTypes.GetSimpleTypesSorted()) 339 { 340 int nIndex = 0; 341 for (final ISimpleTypeNode aSubType : aEntry.getValue().GetSubType()) 342 { 343 final int nCurrentIndex = nIndex++; 344 345 final StringBuffer aLine = new StringBuffer(); 346 aLine.append(String.format( 347 "simple-type %5d %1d %c ", 348 maTypeNameToIdMap.get(aEntry.getKey()), 349 nCurrentIndex, 350 aSubType.IsList() ? 'L' : 'T')); 351 352 aSubType.AcceptVisitor(new ISimpleTypeNodeVisitor() 353 { 354 @Override public void Visit(UnionNode aType) 355 { 356 throw new RuntimeException("unexpected"); 357 } 358 @Override public void Visit(StringNode aType) 359 { 360 AppendStringDescription(aLine, aType); 361 } 362 @Override public void Visit(NumberNode<?> aType) 363 { 364 AppendNumberDescription(aLine, aType); 365 } 366 @Override public void Visit(DateTimeNode aType) 367 { 368 AppendDateTimeDescription(aLine, aType); 369 } 370 @Override public void Visit(BlobNode aType) 371 { 372 AppendBlobDescription(aLine, aType); 373 } 374 }); 375 aOut.printf("%s\n", aLine.toString()); 376 } 377 } 378 } 379 } 380 381 382 383 WriteAttributeValues( final PrintStream aOut)384 private void WriteAttributeValues ( 385 final PrintStream aOut) 386 { 387 final Map<String,Integer> aSortedMap = new TreeMap<>(); 388 aSortedMap.putAll(maAttributeValueToIdMap); 389 aOut.printf("// %d attribute values from enumerations.\n", maAttributeValueToIdMap.size()); 390 for (final Entry<String,Integer> aEntry : aSortedMap.entrySet()) 391 aOut.printf("attribute-value %5d %s\n", aEntry.getValue(), QuoteString(aEntry.getKey())); 392 } 393 394 395 396 AppendStringDescription( final StringBuffer aLine, final StringNode aType)397 private static void AppendStringDescription ( 398 final StringBuffer aLine, 399 final StringNode aType) 400 { 401 aLine.append("S "); 402 switch(aType.GetRestrictionType()) 403 { 404 case Enumeration: 405 aLine.append('E'); 406 for (final int nValueId : aType.GetEnumerationRestriction()) 407 { 408 aLine.append(' '); 409 aLine.append(nValueId); 410 } 411 break; 412 case Pattern: 413 aLine.append("P "); 414 aLine.append(QuoteString(aType.GetPatternRestriction())); 415 break; 416 case Length: 417 aLine.append("L "); 418 final int[] aLengthRestriction = aType.GetLengthRestriction(); 419 aLine.append(aLengthRestriction[0]); 420 aLine.append(' '); 421 aLine.append(aLengthRestriction[1]); 422 break; 423 case None: 424 aLine.append('N'); 425 break; 426 default: 427 throw new RuntimeException(); 428 } 429 } 430 431 432 433 AppendNumberDescription( final StringBuffer aLine, final NumberNode<?> aType)434 private static void AppendNumberDescription ( 435 final StringBuffer aLine, 436 final NumberNode<?> aType) 437 { 438 aLine.append("N "); 439 switch(aType.GetNumberType()) 440 { 441 case Boolean: aLine.append("u1"); break; 442 case Byte: aLine.append("s8"); break; 443 case UnsignedByte: aLine.append("u8"); break; 444 case Short: aLine.append("s16"); break; 445 case UnsignedShort: aLine.append("u16"); break; 446 case Int: aLine.append("s32"); break; 447 case UnsignedInt: aLine.append("u32"); break; 448 case Long: aLine.append("s64"); break; 449 case UnsignedLong: aLine.append("u64"); break; 450 case Integer: aLine.append("s*"); break; 451 case Float: aLine.append("f"); break; 452 case Double: aLine.append("d"); break; 453 default: 454 throw new RuntimeException("unsupported numerical type "+aType.GetNumberType()); 455 } 456 aLine.append(' '); 457 switch(aType.GetRestrictionType()) 458 { 459 case Enumeration: 460 aLine.append("E "); 461 for (final Object nValue : aType.GetEnumerationRestriction()) 462 { 463 aLine.append(" "); 464 aLine.append(nValue); 465 } 466 break; 467 case Size: 468 aLine.append("S"); 469 if (aType.GetMinimum() != null) 470 { 471 if (aType.IsMinimumInclusive()) 472 aLine.append(" >= "); 473 else 474 aLine.append(" > "); 475 aLine.append(aType.GetMinimum()); 476 } 477 if (aType.GetMaximum() != null) 478 { 479 if (aType.IsMaximumInclusive()) 480 aLine.append(" <= "); 481 else 482 aLine.append(" < "); 483 aLine.append(aType.GetMaximum()); 484 } 485 break; 486 case None: 487 aLine.append("N"); 488 break; 489 default: 490 throw new RuntimeException("unsupported numerical restriction "+aType.GetRestrictionType()); 491 } 492 } 493 494 495 496 AppendDateTimeDescription( final StringBuffer aLine, final DateTimeNode aType)497 private static void AppendDateTimeDescription ( 498 final StringBuffer aLine, 499 final DateTimeNode aType) 500 { 501 aLine.append("D"); 502 } 503 504 505 506 AppendBlobDescription( final StringBuffer aLine, final BlobNode aType)507 private static void AppendBlobDescription ( 508 final StringBuffer aLine, 509 final BlobNode aType) 510 { 511 aLine.append("B "); 512 switch(aType.GetBlobType()) 513 { 514 case Base64Binary: 515 aLine.append("B "); 516 break; 517 case HexBinary: 518 aLine.append ("H "); 519 break; 520 default: 521 throw new RuntimeException("unsupported blob type"); 522 } 523 switch(aType.GetRestrictionType()) 524 { 525 case Length: 526 aLine.append("L "); 527 aLine.append(aType.GetLengthRestriction()); 528 break; 529 case None: 530 aLine.append("N"); 531 break; 532 default: 533 throw new RuntimeException(); 534 } 535 } 536 537 538 539 QuoteString(final String sText)540 private static String QuoteString(final String sText) 541 { 542 return "\"" + sText.replace("\"", """).replace(" ", "%20") + "\""; 543 } 544 545 546 547 548 private final FiniteAutomatonContainer maAutomatons; 549 private final SimpleTypeContainer maSimpleTypes; 550 private final NamespaceMap maNamespaces; 551 private final Map<String,Integer> maNameToIdMap; 552 private final Map<String,Integer> maPrefixToIdMap; 553 private final Map<String,Integer> maTypeNameToIdMap; 554 private final Map<String,Integer> maAttributeValueToIdMap; 555 } 556