1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements.  See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership.  The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License.  You may obtain a copy of the License at
10 *
11 *   http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied.  See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21 
22 package org.apache.openoffice.ooxml.schema.generator;
23 
24 import java.io.File;
25 import java.io.FileNotFoundException;
26 import java.io.FileOutputStream;
27 import java.io.PrintStream;
28 import java.util.HashMap;
29 import java.util.Map;
30 import java.util.Map.Entry;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.TreeSet;
34 
35 import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomaton;
36 import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer;
37 import org.apache.openoffice.ooxml.schema.automaton.SkipData;
38 import org.apache.openoffice.ooxml.schema.automaton.State;
39 import org.apache.openoffice.ooxml.schema.automaton.Transition;
40 import org.apache.openoffice.ooxml.schema.model.attribute.Attribute;
41 import org.apache.openoffice.ooxml.schema.model.attribute.AttributeBase.Use;
42 import org.apache.openoffice.ooxml.schema.model.base.INode;
43 import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
44 import org.apache.openoffice.ooxml.schema.model.schema.NamespaceMap;
45 import org.apache.openoffice.ooxml.schema.parser.FormDefault;
46 import org.apache.openoffice.ooxml.schema.simple.BlobNode;
47 import org.apache.openoffice.ooxml.schema.simple.DateTimeNode;
48 import org.apache.openoffice.ooxml.schema.simple.ISimpleTypeNode;
49 import org.apache.openoffice.ooxml.schema.simple.ISimpleTypeNodeVisitor;
50 import org.apache.openoffice.ooxml.schema.simple.NumberNode;
51 import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer;
52 import org.apache.openoffice.ooxml.schema.simple.SimpleTypeDescriptor;
53 import org.apache.openoffice.ooxml.schema.simple.StringNode;
54 import org.apache.openoffice.ooxml.schema.simple.UnionNode;
55 
56 public class ParserTablesGenerator
57 {
ParserTablesGenerator( final FiniteAutomatonContainer aAutomatons, final NamespaceMap aNamespaces, final SimpleTypeContainer aSimpleTypes, final Map<String,Integer> aAttributeValueToIdMap)58     public ParserTablesGenerator (
59         final FiniteAutomatonContainer aAutomatons,
60         final NamespaceMap aNamespaces,
61         final SimpleTypeContainer aSimpleTypes,
62         final Map<String,Integer> aAttributeValueToIdMap)
63     {
64         maAutomatons = aAutomatons;
65         maSimpleTypes = aSimpleTypes;
66         maNamespaces = aNamespaces;
67         maNameToIdMap = new TreeMap<>();
68         maPrefixToIdMap = new HashMap<>();
69         maTypeNameToIdMap = new TreeMap<>();
70         maAttributeValueToIdMap = aAttributeValueToIdMap;
71     }
72 
73 
74 
75 
Generate( final File aParseTableFile)76     public void Generate (
77         final File aParseTableFile)
78     {
79         final long nStartTime = System.currentTimeMillis();
80 
81         SetupNameList();
82         AssignNameIds();
83 
84         try
85         {
86             final PrintStream aOut = new PrintStream(new FileOutputStream(aParseTableFile));
87 
88             WriteNamespaceList(aOut);
89             WriteNameList(aOut);
90             WriteGlobalStartEndStates(aOut);
91             WriteAutomatonList(aOut);
92             WriteSimpleTypes(aOut);
93             WriteAttributeValues(aOut);
94             aOut.close();
95         }
96         catch (final FileNotFoundException aException)
97         {
98             aException.printStackTrace();
99         }
100 
101         final long nEndTime = System.currentTimeMillis();
102         System.out.printf("wrote parse tables to %s in %fs\n",
103             aParseTableFile.toString(),
104             (nEndTime-nStartTime)/1000.0);
105     }
106 
107 
108 
109 
SetupNameList()110     private void SetupNameList ()
111     {
112         final Set<String> aNames = new TreeSet<>();
113 
114         // Add the element names.
115         for (final FiniteAutomaton aAutomaton : maAutomatons.GetAutomatons())
116             for (final Transition aTransition : aAutomaton.GetTransitions())
117             {
118                 if (aTransition.GetElementName() == null)
119                     throw new RuntimeException();
120                 aNames.add(aTransition.GetElementName().GetLocalPart());
121             }
122 
123         // Add the attribute names.
124         for (final FiniteAutomaton aAutomaton : maAutomatons.GetAutomatons())
125             for (final Attribute aAttribute : aAutomaton.GetAttributes())
126                 aNames.add(aAttribute.GetName().GetLocalPart());
127 
128         // Create unique ids for the names.
129         int nIndex = 1;
130         maNameToIdMap.clear();
131         for (final String sName : aNames)
132             maNameToIdMap.put(sName, nIndex++);
133 
134         // Create unique ids for namespace prefixes.
135         nIndex = 1;
136         maPrefixToIdMap.clear();
137         for (final Entry<String, String> aEntry : maNamespaces)
138         {
139             maPrefixToIdMap.put(aEntry.getValue(), nIndex++);
140         }
141     }
142 
143 
144 
145 
146     /** During the largest part of the parsing process, states and elements are
147      *  identified not via their name but via a unique id.
148      *  That allows a fast lookup.
149      */
AssignNameIds()150     private void AssignNameIds ()
151     {
152         maTypeNameToIdMap.clear();
153         int nIndex = 0;
154 
155         // Process state names.
156         final Set<QualifiedName> aSortedTypeNames = new TreeSet<>();
157         for (final State aState : maAutomatons.GetStates())
158             aSortedTypeNames.add(aState.GetQualifiedName());
159         for (final Entry<String, SimpleTypeDescriptor> aSimpleType : maSimpleTypes.GetSimpleTypes())
160             aSortedTypeNames.add(aSimpleType.getValue().GetName());
161 
162         for (final QualifiedName aName : aSortedTypeNames)
163             maTypeNameToIdMap.put(aName.GetStateName(), nIndex++);
164     }
165 
166 
167 
168 
WriteNamespaceList(final PrintStream aOut)169     private void WriteNamespaceList (final PrintStream aOut)
170     {
171         aOut.printf("# namespaces\n");
172         for (final Entry<String, String> aEntry : maNamespaces)
173         {
174             aOut.printf("namespace %-8s %2d %s\n",
175                 aEntry.getValue(),
176                 maPrefixToIdMap.get(aEntry.getValue()),
177                 aEntry.getKey());
178         }
179     }
180 
181 
182 
183 
WriteGlobalStartEndStates(final PrintStream aOut)184     private void WriteGlobalStartEndStates (final PrintStream aOut)
185     {
186         aOut.printf("\n# start and end states\n");
187 
188         final FiniteAutomaton aAutomaton = maAutomatons.GetTopLevelAutomaton();
189         final State aStartState = aAutomaton.GetStartState();
190         aOut.printf("start-state %4d %s\n",
191             maTypeNameToIdMap.get(aStartState.GetFullname()),
192             aStartState.GetFullname());
193         for (final State aAcceptingState : aAutomaton.GetAcceptingStates())
194             aOut.printf("end-state %4d %s\n",
195                 maTypeNameToIdMap.get(aAcceptingState.GetFullname()),
196                 aAcceptingState.GetFullname());
197     }
198 
199 
200 
201 
WriteNameList(final PrintStream aOut)202     private void WriteNameList (final PrintStream aOut)
203     {
204         aOut.printf("\n# %d names\n", maNameToIdMap.size());
205         for (final Entry<String, Integer> aEntry : maNameToIdMap.entrySet())
206         {
207             aOut.printf("name %4d %s\n",
208                 aEntry.getValue(),
209                 aEntry.getKey());
210         }
211 
212         aOut.printf("\n# %s states\n",  maTypeNameToIdMap.size());
213         for (final Entry<String, Integer> aEntry : maTypeNameToIdMap.entrySet())
214         {
215             aOut.printf("state-name %4d %s\n",
216                 aEntry.getValue(),
217                 aEntry.getKey());
218         }
219     }
220 
221 
222 
223 
WriteAutomatonList(final PrintStream aOut)224     private void WriteAutomatonList (final PrintStream aOut)
225     {
226         for (final FiniteAutomaton aAutomaton : maAutomatons.GetAutomatons())
227         {
228             aOut.printf("# %s at %s\n", aAutomaton.GetTypeName(), aAutomaton.GetLocation());
229 
230             final State aStartState = aAutomaton.GetStartState();
231             final int nStartStateId = maTypeNameToIdMap.get(aStartState.GetFullname());
232 
233             // Write start state.
234             aOut.printf("start-state %d %s\n",
235                 nStartStateId,
236                 aStartState);
237 
238             // Write accepting states.
239             for (final State aState : aAutomaton.GetAcceptingStates())
240             {
241                 aOut.printf("accepting-state %d %s\n",
242                     maTypeNameToIdMap.get(aState.GetFullname()),
243                     aState.GetFullname());
244             }
245 
246             // Write text type.
247             final INode aTextType = aStartState.GetTextType();
248             if (aTextType != null)
249             {
250                 switch(aTextType.GetNodeType())
251                 {
252                     case BuiltIn:
253                         aOut.printf("text-type %d %d   %s\n",
254                             nStartStateId,
255                             maTypeNameToIdMap.get(aTextType.GetName().GetStateName()),
256                             aTextType.GetName().GetStateName());
257                         break;
258                     case SimpleType:
259                         aOut.printf("text-type %d %d   %s\n",
260                             nStartStateId,
261                             maTypeNameToIdMap.get(aTextType.GetName().GetStateName()),
262                             aTextType.GetName().GetStateName());
263                         break;
264                     default:
265                         throw new RuntimeException();
266                 }
267             }
268 
269             WriteAttributes(
270                 aOut,
271                 aStartState,
272                 aAutomaton.GetAttributes());
273 
274             // Write transitions.
275             for (final Transition aTransition : aAutomaton.GetTransitions())
276             {
277                 final Integer nId = maTypeNameToIdMap.get(aTransition.GetElementTypeName());
278                 aOut.printf("transition %4d %4d %2d %4d %4d  %s %s %s %s\n",
279                     maTypeNameToIdMap.get(aTransition.GetStartState().GetFullname()),
280                     maTypeNameToIdMap.get(aTransition.GetEndState().GetFullname()),
281                     maPrefixToIdMap.get(aTransition.GetElementName().GetNamespacePrefix()),
282                     maNameToIdMap.get(aTransition.GetElementName().GetLocalPart()),
283                     nId!=null ? nId : -1,
284                     aTransition.GetStartState().GetFullname(),
285                     aTransition.GetEndState().GetFullname(),
286                     aTransition.GetElementName().GetStateName(),
287                     aTransition.GetElementTypeName());
288             }
289             // Write skip data.
290             for (final State aState : aAutomaton.GetStates())
291             {
292                 for (@SuppressWarnings("unused") final SkipData aSkipData : aState.GetSkipData())
293                     aOut.printf("skip %4d   %s\n",
294                         maTypeNameToIdMap.get(aState.GetFullname()),
295                         aState.GetFullname());
296             }
297         }
298     }
299 
300 
301 
302 
WriteAttributes( final PrintStream aOut, final State aState, final Iterable<Attribute> aAttributes)303     private void WriteAttributes (
304         final PrintStream aOut,
305         final State aState,
306         final Iterable<Attribute> aAttributes)
307     {
308         // Write attributes.
309         for (final Attribute aAttribute : aAttributes)
310         {
311             aOut.printf("attribute %4d %2d %c %4d %4d %s %s  %s %s %s\n",
312                 maTypeNameToIdMap.get(aState.GetFullname()),
313                 maPrefixToIdMap.get(aAttribute.GetName().GetNamespacePrefix()),
314                 aAttribute.GetFormDefault()==FormDefault.qualified ? 'q' : 'u',
315                 maNameToIdMap.get(aAttribute.GetName().GetLocalPart()),
316                 maTypeNameToIdMap.get(aAttribute.GetTypeName().GetStateName()),
317                 aAttribute.GetUse()==Use.Optional ? 'o' : 'u',
318                 aAttribute.GetDefault()==null ? "null" : '"'+aAttribute.GetDefault()+'"',
319                 aState.GetFullname(),
320                 aAttribute.GetName().GetStateName(),
321                 aAttribute.GetTypeName().GetStateName());
322         }
323     }
324 
325 
326 
327 
WriteSimpleTypes( final PrintStream aOut)328     private void WriteSimpleTypes (
329         final PrintStream aOut)
330     {
331         if (maSimpleTypes == null)
332         {
333             aOut.printf("\n// There is no simple type information.\n");
334         }
335         else
336         {
337             aOut.printf("\n// %d simple types.\n", maSimpleTypes.GetSimpleTypeCount());
338             for (final Entry<String,SimpleTypeDescriptor> aEntry : maSimpleTypes.GetSimpleTypesSorted())
339             {
340                 int nIndex = 0;
341                 for (final ISimpleTypeNode aSubType : aEntry.getValue().GetSubType())
342                 {
343                     final int nCurrentIndex = nIndex++;
344 
345                     final StringBuffer aLine = new StringBuffer();
346                     aLine.append(String.format(
347                         "simple-type %5d %1d %c ",
348                         maTypeNameToIdMap.get(aEntry.getKey()),
349                         nCurrentIndex,
350                         aSubType.IsList() ? 'L' : 'T'));
351 
352                     aSubType.AcceptVisitor(new ISimpleTypeNodeVisitor()
353                     {
354                         @Override public void Visit(UnionNode aType)
355                         {
356                             throw new RuntimeException("unexpected");
357                         }
358                         @Override public void Visit(StringNode aType)
359                         {
360                             AppendStringDescription(aLine, aType);
361                         }
362                         @Override public void Visit(NumberNode<?> aType)
363                         {
364                             AppendNumberDescription(aLine, aType);
365                         }
366                         @Override public void Visit(DateTimeNode aType)
367                         {
368                             AppendDateTimeDescription(aLine, aType);
369                         }
370                         @Override public void Visit(BlobNode aType)
371                         {
372                             AppendBlobDescription(aLine, aType);
373                         }
374                     });
375                     aOut.printf("%s\n", aLine.toString());
376                 }
377             }
378         }
379     }
380 
381 
382 
383 
WriteAttributeValues( final PrintStream aOut)384     private void WriteAttributeValues (
385         final PrintStream aOut)
386     {
387         final Map<String,Integer> aSortedMap = new TreeMap<>();
388         aSortedMap.putAll(maAttributeValueToIdMap);
389         aOut.printf("//  %d attribute values from enumerations.\n", maAttributeValueToIdMap.size());
390         for (final Entry<String,Integer> aEntry : aSortedMap.entrySet())
391             aOut.printf("attribute-value %5d %s\n", aEntry.getValue(), QuoteString(aEntry.getKey()));
392     }
393 
394 
395 
396 
AppendStringDescription( final StringBuffer aLine, final StringNode aType)397     private static void AppendStringDescription (
398         final StringBuffer aLine,
399         final StringNode aType)
400     {
401         aLine.append("S ");
402         switch(aType.GetRestrictionType())
403         {
404             case Enumeration:
405                 aLine.append('E');
406                 for (final int nValueId : aType.GetEnumerationRestriction())
407                 {
408                     aLine.append(' ');
409                     aLine.append(nValueId);
410                 }
411                 break;
412             case Pattern:
413                 aLine.append("P ");
414                 aLine.append(QuoteString(aType.GetPatternRestriction()));
415                 break;
416             case Length:
417                 aLine.append("L ");
418                 final int[] aLengthRestriction = aType.GetLengthRestriction();
419                 aLine.append(aLengthRestriction[0]);
420                 aLine.append(' ');
421                 aLine.append(aLengthRestriction[1]);
422                 break;
423             case None:
424                 aLine.append('N');
425                 break;
426             default:
427                 throw new RuntimeException();
428         }
429     }
430 
431 
432 
433 
AppendNumberDescription( final StringBuffer aLine, final NumberNode<?> aType)434     private static void AppendNumberDescription (
435         final StringBuffer aLine,
436         final NumberNode<?> aType)
437     {
438         aLine.append("N ");
439         switch(aType.GetNumberType())
440         {
441             case Boolean: aLine.append("u1"); break;
442             case Byte: aLine.append("s8"); break;
443             case UnsignedByte: aLine.append("u8"); break;
444             case Short: aLine.append("s16"); break;
445             case UnsignedShort: aLine.append("u16"); break;
446             case Int: aLine.append("s32"); break;
447             case UnsignedInt: aLine.append("u32"); break;
448             case Long: aLine.append("s64"); break;
449             case UnsignedLong: aLine.append("u64"); break;
450             case Integer: aLine.append("s*"); break;
451             case Float: aLine.append("f"); break;
452             case Double: aLine.append("d"); break;
453             default:
454                 throw new RuntimeException("unsupported numerical type "+aType.GetNumberType());
455         }
456         aLine.append(' ');
457         switch(aType.GetRestrictionType())
458         {
459             case Enumeration:
460                 aLine.append("E ");
461                 for (final Object nValue : aType.GetEnumerationRestriction())
462                 {
463                     aLine.append(" ");
464                     aLine.append(nValue);
465                 }
466                 break;
467             case Size:
468                 aLine.append("S");
469                 if (aType.GetMinimum() != null)
470                 {
471                     if (aType.IsMinimumInclusive())
472                         aLine.append(" >= ");
473                     else
474                         aLine.append(" > ");
475                     aLine.append(aType.GetMinimum());
476                 }
477                 if (aType.GetMaximum() != null)
478                 {
479                     if (aType.IsMaximumInclusive())
480                         aLine.append(" <= ");
481                     else
482                         aLine.append(" < ");
483                     aLine.append(aType.GetMaximum());
484                 }
485                 break;
486             case None:
487                 aLine.append("N");
488                 break;
489             default:
490                 throw new RuntimeException("unsupported numerical restriction "+aType.GetRestrictionType());
491         }
492     }
493 
494 
495 
496 
AppendDateTimeDescription( final StringBuffer aLine, final DateTimeNode aType)497     private static void AppendDateTimeDescription (
498         final StringBuffer aLine,
499         final DateTimeNode aType)
500     {
501         aLine.append("D");
502     }
503 
504 
505 
506 
AppendBlobDescription( final StringBuffer aLine, final BlobNode aType)507     private static void AppendBlobDescription (
508         final StringBuffer aLine,
509         final BlobNode aType)
510     {
511         aLine.append("B ");
512         switch(aType.GetBlobType())
513         {
514             case Base64Binary:
515                 aLine.append("B ");
516                 break;
517             case HexBinary:
518                 aLine.append ("H ");
519                 break;
520             default:
521                 throw new RuntimeException("unsupported blob type");
522         }
523         switch(aType.GetRestrictionType())
524         {
525             case Length:
526                 aLine.append("L ");
527                 aLine.append(aType.GetLengthRestriction());
528                 break;
529             case None:
530                 aLine.append("N");
531                 break;
532             default:
533                 throw new RuntimeException();
534         }
535     }
536 
537 
538 
539 
QuoteString(final String sText)540     private static String QuoteString(final String sText)
541     {
542         return "\"" + sText.replace("\"", "&quot;").replace(" ", "%20") + "\"";
543     }
544 
545 
546 
547 
548     private final FiniteAutomatonContainer maAutomatons;
549     private final SimpleTypeContainer maSimpleTypes;
550     private final NamespaceMap maNamespaces;
551     private final Map<String,Integer> maNameToIdMap;
552     private final Map<String,Integer> maPrefixToIdMap;
553     private final Map<String,Integer> maTypeNameToIdMap;
554     private final Map<String,Integer> maAttributeValueToIdMap;
555 }
556