xref: /aoo4110/main/sc/source/filter/inc/htmlpars.hxx (revision b1cdbd2c)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #ifndef SC_HTMLPARS_HXX
25 #define SC_HTMLPARS_HXX
26 
27 #include <tools/stack.hxx>
28 
29 #include <memory>
30 #include <vector>
31 #include <list>
32 #include <map>
33 
34 #include "rangelst.hxx"
35 #include "eeparser.hxx"
36 
37 const sal_uInt32 SC_HTML_FONTSIZES = 7;        // wie Export, HTML-Options
38 
39 // Pixel tolerance for SeekOffset and related.
40 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1;    // single table
41 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10;   // nested
42 
43 // ============================================================================
44 // BASE class for HTML parser classes
45 // ============================================================================
46 
47 class ScHTMLTable;
48 
49 /** Base class for HTML parser classes. */
50 class ScHTMLParser : public ScEEParser
51 {
52 protected:
53     sal_uInt32                  maFontHeights[ SC_HTML_FONTSIZES ];
54     ScDocument*                 mpDoc;          /// The destination document.
55 
56 public:
57     explicit                    ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
58     virtual                     ~ScHTMLParser();
59 
60     virtual sal_uLong		        Read( SvStream& rStrm, const String& rBaseURL  ) = 0;
61 
62     /** Returns the "global table" which contains the entire HTML document. */
63     virtual const ScHTMLTable*  GetGlobalTable() const = 0;
64 };
65 
66 
67 // ============================================================================
68 
69 SV_DECL_VARARR_SORT( ScHTMLColOffset, sal_uLong, 16, 4)
70 
71 struct ScHTMLTableStackEntry
72 {
73 	ScRangeListRef		xLockedList;
74 	ScEEParseEntry*		pCellEntry;
75 	ScHTMLColOffset*	pLocalColOffset;
76 	sal_uLong				nFirstTableCell;
77 	SCCOL				nColCnt;
78 	SCROW				nRowCnt;
79 	SCCOL				nColCntStart;
80 	SCCOL				nMaxCol;
81 	sal_uInt16				nTable;
82 	sal_uInt16				nTableWidth;
83 	sal_uInt16				nColOffset;
84 	sal_uInt16				nColOffsetStart;
85 	sal_Bool				bFirstRow;
ScHTMLTableStackEntryScHTMLTableStackEntry86 						ScHTMLTableStackEntry( ScEEParseEntry* pE,
87 								const ScRangeListRef& rL, ScHTMLColOffset* pTO,
88 								sal_uLong nFTC,
89 								SCCOL nCol, SCROW nRow,
90 								SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
91 								sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
92 								sal_Bool bFR )
93 							: xLockedList( rL ), pCellEntry( pE ),
94 							pLocalColOffset( pTO ),
95 							nFirstTableCell( nFTC ),
96 							nColCnt( nCol ), nRowCnt( nRow ),
97 							nColCntStart( nStart ), nMaxCol( nMax ),
98 							nTable( nTab ), nTableWidth( nTW ),
99 							nColOffset( nCO ), nColOffsetStart( nCOS ),
100 							bFirstRow( bFR )
101 							{}
~ScHTMLTableStackEntryScHTMLTableStackEntry102 						~ScHTMLTableStackEntry() {}
103 };
104 DECLARE_STACK( ScHTMLTableStack, ScHTMLTableStackEntry* )
105 
106 struct ScHTMLAdjustStackEntry
107 {
108 	SCCOL				nLastCol;
109 	SCROW				nNextRow;
110 	SCROW				nCurRow;
ScHTMLAdjustStackEntryScHTMLAdjustStackEntry111 						ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
112 								SCROW nCRow )
113 							: nLastCol( nLCol ), nNextRow( nNRow ),
114 							nCurRow( nCRow )
115 							{}
116 };
117 DECLARE_STACK( ScHTMLAdjustStack, ScHTMLAdjustStackEntry* )
118 
119 
120 // ============================================================================
121 
122 class EditEngine;
123 class ScDocument;
124 class HTMLOption;
125 
126 class ScHTMLLayoutParser : public ScHTMLParser
127 {
128 private:
129     Size                aPageSize;
130     String              aBaseURL;
131 	ScHTMLTableStack	aTableStack;
132 	String				aString;
133 	ScRangeListRef		xLockedList;		// je Table
134 	Table*				pTables;
135 	ScHTMLColOffset*	pColOffset;
136 	ScHTMLColOffset*	pLocalColOffset;	// je Table
137 	sal_uLong				nFirstTableCell;	// je Table
138 	short				nTableLevel;
139 	sal_uInt16				nTable;
140 	sal_uInt16				nMaxTable;
141 	SCCOL				nColCntStart;		// erste Col je Table
142 	SCCOL				nMaxCol;			// je Table
143 	sal_uInt16				nTableWidth;		// je Table
144 	sal_uInt16				nColOffset;			// aktuell, Pixel
145 	sal_uInt16				nColOffsetStart;	// Startwert je Table, in Pixel
146 	sal_uInt16				nMetaCnt;			// fuer ParseMetaOptions
147     sal_uInt16              nOffsetTolerance;   // for use with SeekOffset and related
148 	sal_Bool				bCalcWidthHeight;	// TRUE: calculate real column width
149 											// FALSE: 1 html-col = 1 sc-col
150 	sal_Bool				bTabInTabCell;
151 	sal_Bool				bFirstRow;			// je Table, ob in erster Zeile
152 	sal_Bool				bInCell;
153 	sal_Bool				bInTitle;
154 
155 	DECL_LINK( HTMLImportHdl, ImportInfo* );
156 	void				NewActEntry( ScEEParseEntry* );
157 	void				EntryEnd( ScEEParseEntry*, const ESelection& );
158 	void 				ProcToken( ImportInfo* );
159 	void 				CloseEntry( ImportInfo* );
160 	void				NextRow(  ImportInfo*  );
161 	void				SkipLocked( ScEEParseEntry*, sal_Bool bJoin = sal_True );
162 	static sal_Bool 		SeekOffset( ScHTMLColOffset*, sal_uInt16 nOffset,
163 									SCCOL* pCol, sal_uInt16 nOffsetTol );
164 	static void			MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
165 								sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
166                                 sal_uInt16 nWidthTol );
167 	static void			MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
168 								sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
169                                 sal_uInt16 nWidthTol );
170 	static void 		ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
171 									sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
172 	void				Colonize( ScEEParseEntry* );
173 	sal_uInt16				GetWidth( ScEEParseEntry* );
174 	void				SetWidths();
175 	void				Adjust();
176 
177 	sal_uInt16				GetWidthPixel( const HTMLOption* );
178 	sal_Bool				IsAtBeginningOfText( ImportInfo* );
179 
180 	void				TableOn( ImportInfo* );
181     void                ColOn( ImportInfo* );
182     void                TableRowOn( ImportInfo* );
183     void                TableRowOff( ImportInfo* );
184 	void				TableDataOn( ImportInfo* );
185     void                TableDataOff( ImportInfo* );
186 	void				TableOff( ImportInfo* );
187     void                Image( ImportInfo* );
188 	void				AnchorOn( ImportInfo* );
189 	void				FontOn( ImportInfo* );
190 
191 public:
192                         ScHTMLLayoutParser( EditEngine*, const String& rBaseURL, const Size& aPageSize, ScDocument* );
193     virtual             ~ScHTMLLayoutParser();
194     virtual sal_uLong       Read( SvStream&, const String& rBaseURL  );
195     virtual const ScHTMLTable*  GetGlobalTable() const;
196 };
197 
198 
199 
200 // ============================================================================
201 // HTML DATA QUERY PARSER
202 // ============================================================================
203 
204 /** Declares the orientation in or for a table: column or row. */
205 enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
206 
207 /** Type for a unique identifier for each table. */
208 typedef sal_uInt16 ScHTMLTableId;
209 /** Identifier of the "global table" (the entire HTML document). */
210 const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
211 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
212 const ScHTMLTableId SC_HTML_NO_TABLE = 0;
213 
214 // ============================================================================
215 
216 /** A 2D cell position in an HTML table. */
217 struct ScHTMLPos
218 {
219     SCCOL               mnCol;
220     SCROW               mnRow;
221 
ScHTMLPosScHTMLPos222     inline explicit     ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
ScHTMLPosScHTMLPos223     inline explicit     ScHTMLPos( SCCOL nCol, SCROW nRow ) :
224                             mnCol( nCol ), mnRow( nRow ) {}
ScHTMLPosScHTMLPos225     inline explicit     ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
226 
GetScHTMLPos227     inline SCCOLROW     Get( ScHTMLOrient eOrient ) const
228                             { return (eOrient == tdCol) ? mnCol : mnRow; }
SetScHTMLPos229     inline void         Set( SCCOL nCol, SCROW nRow )
230                             { mnCol = nCol; mnRow = nRow; }
SetScHTMLPos231     inline void         Set( const ScAddress& rAddr )
232                             { Set( rAddr.Col(), rAddr.Row() ); }
MoveScHTMLPos233     inline void         Move( SCsCOL nColDiff, SCsROW nRowDiff )
234                             { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
MakeAddrScHTMLPos235     inline ScAddress    MakeAddr() const
236                             { return ScAddress( mnCol, mnRow, 0 ); }
237 };
238 
operator ==(const ScHTMLPos & rPos1,const ScHTMLPos & rPos2)239 inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
240 {
241     return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
242 }
243 
operator <(const ScHTMLPos & rPos1,const ScHTMLPos & rPos2)244 inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
245 {
246     return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
247 }
248 
249 // ----------------------------------------------------------------------------
250 
251 /** A 2D cell size in an HTML table. */
252 struct ScHTMLSize
253 {
254     SCCOL               mnCols;
255     SCROW               mnRows;
256 
ScHTMLSizeScHTMLSize257     inline explicit     ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
ScHTMLSizeScHTMLSize258     inline explicit     ScHTMLSize( SCCOL nCols, SCROW nRows ) :
259                             mnCols( nCols ), mnRows( nRows ) {}
260 
GetScHTMLSize261     inline SCCOLROW     Get( ScHTMLOrient eOrient ) const
262                             { return (eOrient == tdCol) ? mnCols : mnRows; }
SetScHTMLSize263     inline void         Set( SCCOL nCols, SCROW nRows )
264                             { mnCols = nCols; mnRows = nRows; }
ExpandScHTMLSize265     inline void         Expand( SCsCOL nColDiff, SCsROW nRowDiff )
266                             { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
267 };
268 
operator ==(const ScHTMLSize & rSize1,const ScHTMLSize & rSize2)269 inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
270 {
271     return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
272 }
273 
274 // ============================================================================
275 
276 /** A single entry containing a line of text or representing a table. */
277 struct ScHTMLEntry : public ScEEParseEntry
278 {
279 public:
280     explicit            ScHTMLEntry(
281                             const SfxItemSet& rItemSet,
282                             ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
283 
284     /** Returns true, if the selection of the entry is empty. */
IsEmptyScHTMLEntry285     inline bool         IsEmpty() const { return !aSel.HasRange(); }
286     /** Returns true, if the entry has any content to be imported. */
287     bool                HasContents() const;
288     /** Returns true, if the entry represents a table. */
IsTableScHTMLEntry289     inline bool         IsTable() const { return nTab != SC_HTML_NO_TABLE; }
290     /** Returns true, if the entry represents a table. */
GetTableIdScHTMLEntry291     inline ScHTMLTableId GetTableId() const { return nTab; }
292 
293     /** Sets or cleares the import always state. */
SetImportAlwaysScHTMLEntry294     inline void         SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
295     /** Sets start point of the entry selection to the start of the import info object. */
296     void                AdjustStart( const ImportInfo& rInfo );
297     /** Sets end point of the entry selection to the end of the import info object. */
298     void                AdjustEnd( const ImportInfo& rInfo );
299     /** Deletes leading and trailing empty paragraphs from the entry. */
300     void                Strip( const EditEngine& rEditEngine );
301 
302     /** Returns read/write access to the item set of this entry. */
GetItemSetScHTMLEntry303     inline SfxItemSet&  GetItemSet() { return aItemSet; }
304     /** Returns read-only access to the item set of this entry. */
GetItemSetScHTMLEntry305     inline const SfxItemSet& GetItemSet() const { return aItemSet; }
306 
307 private:
308     bool                mbImportAlways;     /// true = Always import this entry.
309 };
310 
311 // ============================================================================
312 
313 /** This struct handles creation of unique table identifiers. */
314 struct ScHTMLTableAutoId
315 {
316     const ScHTMLTableId mnTableId;          /// The created unique table identifier.
317     ScHTMLTableId&      mrnUnusedId;        /// Reference to global unused identifier variable.
318 
319     /** The constructor assigns an unused identifier to member mnTableId. */
320     explicit            ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
321 };
322 
323 // ----------------------------------------------------------------------------
324 
325 class ScHTMLTableMap;
326 
327 /** Stores data for one table in an HTML document.
328 
329     This class does the main work for importing an HTML document. It manages
330     the correct insertion of parse entries into the correct cells and the
331     creation of nested tables. Recalculation of resulting document size and
332     position is done recursively in all nested tables.
333  */
334 class ScHTMLTable
335 {
336 public:
337     /** Creates a new HTML table without content.
338         @descr  Internally handles a current cell position. This position is
339             invalid until first calls of RowOn() and DataOn().
340         @param rParentTable  Reference to the parent table that owns this table.
341         @param bPreFormText  true = Table is based on preformatted text (<pre> tag). */
342     explicit            ScHTMLTable(
343                             ScHTMLTable& rParentTable,
344                             const ImportInfo& rInfo,
345                             bool bPreFormText );
346 
347     virtual             ~ScHTMLTable();
348 
349     /** Returns the name of the table, specified in the TABLE tag. */
GetTableName() const350     inline const String& GetTableName() const { return maTableName; }
351     /** Returns the unique identifier of the table. */
GetTableId() const352     inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
353     /** Returns the table size. */
GetSize() const354     inline const ScHTMLSize& GetSize() const { return maSize; }
355     /** Returns the cell spanning of the specified cell. */
356     ScHTMLSize          GetSpan( const ScHTMLPos& rCellPos ) const;
357 
358     /** Searches in all nested tables for the specified table.
359         @param nTableId  Unique identifier of the table. */
360     ScHTMLTable*        FindNestedTable( ScHTMLTableId nTableId ) const;
361 
362     /** Puts the item into the item set of the current entry. */
363     void                PutItem( const SfxPoolItem& rItem );
364     /** Inserts a text portion into current entry. */
365     void                PutText( const ImportInfo& rInfo );
366     /** Inserts a new line, if in preformatted text, else does nothing. */
367     void                InsertPara( const ImportInfo& rInfo );
368 
369     /** Inserts a line break (<br> tag).
370         @descr  Inserts the current entry regardless if it is empty. */
371     void                BreakOn();
372     /** Inserts a heading line (<p> and <h*> tags). */
373     void                HeadingOn();
374     /** Processes a hyperlink (<a> tag). */
375     void                AnchorOn();
376 
377     /** Starts a *new* table nested in this table (<table> tag).
378         @return  Pointer to the new table. */
379     ScHTMLTable*        TableOn( const ImportInfo& rInfo );
380     /** Closes *this* table (</table> tag).
381         @return  Pointer to the parent table. */
382     ScHTMLTable*        TableOff( const ImportInfo& rInfo );
383     /** Starts a *new* table based on preformatted text (<pre> tag).
384         @return  Pointer to the new table. */
385     ScHTMLTable*        PreOn( const ImportInfo& rInfo );
386     /** Closes *this* table based on preformatted text (</pre> tag).
387         @return  Pointer to the parent table. */
388     ScHTMLTable*        PreOff( const ImportInfo& rInfo );
389 
390     /** Starts next row (<tr> tag).
391         @descr  Cell address is invalid until first call of DataOn(). */
392     void                RowOn( const ImportInfo& rInfo );
393     /** Closes the current row (<tr> tag).
394         @descr  Cell address is invalid until call of RowOn() and DataOn(). */
395     void                RowOff( const ImportInfo& rInfo );
396     /** Starts the next cell (<td> or <th> tag). */
397     void                DataOn( const ImportInfo& rInfo );
398     /** Closes the current cell (</td> or </th> tag).
399         @descr  Cell address is invalid until next call of DataOn(). */
400     void                DataOff( const ImportInfo& rInfo );
401 
402     /** Starts the body of the HTML document (<body> tag). */
403     void                BodyOn( const ImportInfo& rInfo );
404     /** Closes the body of the HTML document (</body> tag). */
405     void                BodyOff( const ImportInfo& rInfo );
406 
407     /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
408         @descr  Used to close this table object regardless on opening tag type.
409         @return  Pointer to the parent table, or this, if no parent found. */
410     ScHTMLTable*        CloseTable( const ImportInfo& rInfo );
411 
412     /** Returns the resulting document row/column count of the specified HTML row/column. */
413     SCCOLROW            GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
414     /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
415     SCCOLROW            GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
416     /** Returns the total document row/column count in the specified direction. */
417     SCCOLROW            GetDocSize( ScHTMLOrient eOrient ) const;
418     /** Returns the total document row/column count of the specified HTML cell. */
419     ScHTMLSize          GetDocSize( const ScHTMLPos& rCellPos ) const;
420 
421     /** Returns the resulting Calc position of the top left edge of the table. */
GetDocPos() const422     inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
423     /** Calculates the resulting Calc position of the specified HTML column/row. */
424     SCCOLROW            GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
425     /** Calculates the resulting Calc position of the specified HTML cell. */
426     ScHTMLPos           GetDocPos( const ScHTMLPos& rCellPos ) const;
427 
428     /** Calculates the current Calc document area of this table. */
429     void                GetDocRange( ScRange& rRange ) const;
430 
431     /** Applies border formatting to the passed document. */
432     void                ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
433 
434 protected:
435     /** Creates a new HTML table without parent.
436         @descr  This constructor is used to create the "global table". */
437     explicit            ScHTMLTable(
438                             SfxItemPool& rPool,
439                             EditEngine& rEditEngine,
440                             ScEEParseList& rEEParseList,
441                             ScHTMLTableId& rnUnusedId );
442 
443     /** Fills all empty cells in this and nested tables with dummy parse entries. */
444     void                FillEmptyCells();
445     /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
446     void                RecalcDocSize();
447     /** Recalculates the position of all cell entries and nested tables.
448         @param rBasePos  The origin of the table in the Calc document. */
449     void                RecalcDocPos( const ScHTMLPos& rBasePos );
450 
451 private:
452     typedef ::std::auto_ptr< ScHTMLTableMap >           ScHTMLTableMapPtr;
453     typedef ::std::auto_ptr< SfxItemSet >               SfxItemSetPtr;
454     typedef ::std::vector< SCCOLROW >                   ScSizeVec;
455     typedef ::std::list< ScHTMLEntry* >                 ScHTMLEntryList;
456     typedef ::std::map< ScHTMLPos, ScHTMLEntryList >    ScHTMLEntryMap;
457     typedef ::std::auto_ptr< ScHTMLEntry >              ScHTMLEntryPtr;
458 
459     /** Returns true, if the current cell does not contain an entry yet. */
460     bool                IsEmptyCell() const;
461     /** Returns the item set from cell, row, or table, depending on current state. */
462     const SfxItemSet&   GetCurrItemSet() const;
463 
464     /** Returns true, if import info represents a space character. */
465     static bool         IsSpaceCharInfo( const ImportInfo& rInfo );
466 
467     /** Creates and returns a new empty flying entry at position (0,0). */
468     ScHTMLEntryPtr      CreateEntry() const;
469     /** Creates a new flying entry.
470         @param rInfo  Contains the initial edit engine selection for the entry. */
471     void                CreateNewEntry( const ImportInfo& rInfo );
472 
473     /** Inserts an empty line in front of the next entry. */
474     void                InsertLeadingEmptyLine();
475 
476     /** Pushes the passed entry into the list of the current cell. */
477     void                ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
478     /** Tries to insert the entry into the current cell.
479         @descr  If insertion is not possible (i.e., currently no cell open), the
480         entry will be inserted into the parent table.
481         @return  true = Entry as been pushed into the current cell; false = Entry dropped. */
482     bool                PushEntry( ScHTMLEntryPtr& rxEntry );
483     /** Puts the current entry into the entry list, if it is not empty.
484         @param rInfo  The import info struct containing the end position of the current entry.
485         @param bLastInCell  true = If cell is still empty, put this entry always.
486         @return  true = Entry as been pushed into the current cell; false = Entry dropped. */
487     bool                PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
488     /** Pushes a new entry into current cell which references a nested table.
489         @return  true = Entry as been pushed into the current cell; false = Entry dropped. */
490     bool                PushTableEntry( ScHTMLTableId nTableId );
491 
492     /** Tries to find a table from the table container.
493         @descr  Assumes that the table is located in the current container or
494         that the passed table identifier is 0.
495         @param nTableId  Unique identifier of the table or 0. */
496     ScHTMLTable*        GetExistingTable( ScHTMLTableId nTableId ) const;
497     /** Inserts a nested table in the current cell at the specified position.
498         @param bPreFormText  true = New table is based on preformatted text (<pre> tag). */
499     ScHTMLTable*        InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
500 
501     /** Inserts a new cell in an unused position, starting from current cell position. */
502     void                InsertNewCell( const ScHTMLSize& rSpanSize );
503 
504     /** Set internal states for a new table row. */
505     void                ImplRowOn();
506     /** Set internal states for leaving a table row. */
507     void                ImplRowOff();
508     /** Set internal states for entering a new table cell. */
509     void                ImplDataOn( const ScHTMLSize& rSpanSize );
510     /** Set internal states for leaving a table cell. */
511     void                ImplDataOff();
512 
513     /** Inserts additional formatting options from import info into the item set. */
514     void                ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
515 
516     /** Updates the document column/row size of the specified column or row.
517         @descr  Only increases the present count, never decreases. */
518     void                SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
519     /** Calculates and sets the resulting size the cell needs in the document.
520         @descr  Reduces the needed size in merged cells.
521         @param nCellPos  The first column/row position of the (merged) cell.
522         @param nCellSpan  The cell spanning in the specified orientation.
523         @param nRealDocSize  The raw document size of all entries of the cell. */
524     void                CalcNeededDocSize(
525                             ScHTMLOrient eOrient, SCCOLROW nCellPos,
526                             SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
527 
528 private:
529     ScHTMLTable*        mpParentTable;      /// Pointer to parent table.
530     ScHTMLTableMapPtr   mxNestedTables;     /// Table of nested HTML tables.
531     String              maTableName;        /// Table name from <table id> option.
532     ScHTMLTableAutoId   maTableId;          /// Unique identifier of this table.
533     SfxItemSet          maTableItemSet;     /// Items for the entire table.
534     SfxItemSetPtr       mxRowItemSet;       /// Items for the current table row.
535     SfxItemSetPtr       mxDataItemSet;      /// Items for the current cell.
536     ScRangeList         maHMergedCells;     /// List of all horizontally merged cells.
537     ScRangeList         maVMergedCells;     /// List of all vertically merged cells.
538     ScRangeList         maUsedCells;        /// List of all used cells.
539     EditEngine&         mrEditEngine;       /// Edit engine (from ScEEParser).
540     ScEEParseList&      mrEEParseList;      /// List that owns the parse entries (from ScEEParser).
541     ScHTMLEntryMap      maEntryMap;         /// List of entries for each cell.
542     ScHTMLEntryList*    mpCurrEntryList;    /// Current entry list from map for faster access.
543     ScHTMLEntryPtr      mxCurrEntry;        /// Working entry, not yet inserted in a list.
544     ScSizeVec           maCumSizes[ 2 ];    /// Cumulated cell counts for each HTML table column/row.
545     ScHTMLSize          maSize;             /// Size of the table.
546     ScHTMLPos           maCurrCell;         /// Address of current cell to fill.
547     ScHTMLPos           maDocBasePos;       /// Resulting base address in a Calc document.
548     bool                mbBorderOn;         /// true = Table borders on.
549     bool                mbPreFormText;      /// true = Table from preformatted text (<pre> tag).
550     bool                mbRowOn;            /// true = Inside of <tr> </tr>.
551     bool                mbDataOn;           /// true = Inside of <td> </td> or <th> </th>.
552     bool                mbPushEmptyLine;    /// true = Insert empty line before current entry.
553 };
554 
555 // ----------------------------------------------------------------------------
556 
557 /** The "global table" representing the entire HTML document. */
558 class ScHTMLGlobalTable : public ScHTMLTable
559 {
560 public:
561     explicit            ScHTMLGlobalTable(
562                             SfxItemPool& rPool,
563                             EditEngine& rEditEngine,
564                             ScEEParseList& rEEParseList,
565                             ScHTMLTableId& rnUnusedId );
566 
567     virtual             ~ScHTMLGlobalTable();
568 
569     /** Recalculates sizes and resulting positions of all document entries. */
570     void                Recalc();
571 };
572 
573 // ============================================================================
574 
575 /** The HTML parser for data queries. Focuses on data import, not on layout.
576 
577     Builds the table structure correctly, ignores extended formatting like
578     pictures or column widths.
579  */
580 class ScHTMLQueryParser : public ScHTMLParser
581 {
582 public:
583     explicit            ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
584     virtual             ~ScHTMLQueryParser();
585 
586     virtual sal_uLong       Read( SvStream& rStrm, const String& rBaseURL  );
587 
588     /** Returns the "global table" which contains the entire HTML document. */
589     virtual const ScHTMLTable* GetGlobalTable() const;
590 
591 private:
592     /** Handles all possible tags in the HTML document. */
593     void                ProcessToken( const ImportInfo& rInfo );
594     /** Inserts a text portion into current entry. */
595     void                InsertText( const ImportInfo& rInfo );
596     /** Processes the <font> tag. */
597     void                FontOn( const ImportInfo& rInfo );
598 
599     /** Processes the <meta> tag. */
600     void                MetaOn( const ImportInfo& rInfo );
601     /** Opens the title of the HTML document (<title> tag). */
602     void                TitleOn( const ImportInfo& rInfo );
603     /** Closes the title of the HTML document (</title> tag). */
604     void                TitleOff( const ImportInfo& rInfo );
605 
606     /** Opens a new table at the current position. */
607     void                TableOn( const ImportInfo& rInfo );
608     /** Closes the current table. */
609     void                TableOff( const ImportInfo& rInfo );
610     /** Opens a new table based on preformatted text. */
611     void                PreOn( const ImportInfo& rInfo );
612     /** Closes the current preformatted text table. */
613     void                PreOff( const ImportInfo& rInfo );
614 
615     /** Closes the current table, regardless on opening tag. */
616     void                CloseTable( const ImportInfo& rInfo );
617 
618     DECL_LINK( HTMLImportHdl, const ImportInfo* );
619 
620 private:
621     typedef ::std::auto_ptr< ScHTMLGlobalTable >    ScHTMLGlobalTablePtr;
622 
623     String              maTitle;            /// The title of the document.
624     ScHTMLGlobalTablePtr mxGlobTable;       /// Contains the entire imported document.
625     ScHTMLTable*        mpCurrTable;        /// Pointer to current table (performance).
626     ScHTMLTableId       mnUnusedId;         /// First unused table identifier.
627     bool                mbTitleOn;          /// true = Inside of <title> </title>.
628 };
629 
630 
631 // ============================================================================
632 
633 #endif
634 
635