xref: /trunk/main/editeng/source/editeng/eehtml.cxx (revision 209925ef)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_editeng.hxx"
26 
27 #include <vcl/wrkwin.hxx>
28 #include <vcl/dialog.hxx>
29 #include <vcl/msgbox.hxx>
30 #include <vcl/svapp.hxx>
31 #include <eehtml.hxx>
32 #include <impedit.hxx>
33 #include <editeng/adjitem.hxx>
34 #include <editeng/flditem.hxx>
35 #include <tools/urlobj.hxx>
36 #include <editeng/fhgtitem.hxx>
37 #include <editeng/fontitem.hxx>
38 #include <editeng/ulspitem.hxx>
39 #include <editeng/wghtitem.hxx>
40 #include <svtools/htmltokn.h>
41 #include <svtools/htmlkywd.hxx>
42 #include <tools/tenccvt.hxx>
43 
44 #define ACTION_INSERTTEXT		  1
45 #define ACTION_INSERTPARABRK	  2
46 
47 #define STYLE_PRE		  		101
48 
EditHTMLParser(SvStream & rIn,const String & rBaseURL,SvKeyValueIterator * pHTTPHeaderAttrs)49 EditHTMLParser::EditHTMLParser( SvStream& rIn, const String& rBaseURL, SvKeyValueIterator* pHTTPHeaderAttrs )
50 	: HTMLParser( rIn, true )
51     , aBaseURL( rBaseURL )
52 {
53 	pImpEditEngine = 0;
54 	pCurAnchor = 0;
55 	bInPara = sal_False;
56 	bWasInPara = sal_False;
57 	nInTable = 0;
58 	nInCell = 0;
59     bInTitle = sal_False;
60 	nDefListLevel = 0;
61 	nBulletLevel = 0;
62 	nNumberingLevel = 0;
63 	bFieldsInserted = sal_False;
64 
65     DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW == GetSrcEncoding( ), "EditHTMLParser::EditHTMLParser: Where does the encoding come from?" );
66     DBG_ASSERT( !IsSwitchToUCS2(), "EditHTMLParser::::EditHTMLParser: Switch to UCS2?" );
67 
68     // Although the real default encoding is ISO8859-1, we use MS-1252
69     // als default encoding.
70     SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  RTL_TEXTENCODING_ISO_8859_1 ) );
71 
72     // If the file starts with a BOM, switch to UCS2.
73     SetSwitchToUCS2( sal_True );
74 
75     if ( pHTTPHeaderAttrs )
76         SetEncodingByHTTPHeader( pHTTPHeaderAttrs );
77 }
78 
~EditHTMLParser()79 EditHTMLParser::~EditHTMLParser()
80 {
81 	delete pCurAnchor;
82 }
83 
CallParser(ImpEditEngine * pImpEE,const EditPaM & rPaM)84 SvParserState EditHTMLParser::CallParser( ImpEditEngine* pImpEE, const EditPaM& rPaM )
85 {
86 	DBG_ASSERT( pImpEE, "CallParser: ImpEditEngine ?!" );
87 	pImpEditEngine = pImpEE;
88 	SvParserState _eState = SVPAR_NOTSTARTED;
89 	if ( pImpEditEngine )
90 	{
91 		// Umbrechmimik vom RTF-Import einbauen?
92 		aCurSel = EditSelection( rPaM, rPaM );
93 
94 		if ( pImpEditEngine->aImportHdl.IsSet() )
95 		{
96 			ImportInfo aImportInfo( HTMLIMP_START, this, pImpEditEngine->CreateESel( aCurSel ) );
97 			pImpEditEngine->aImportHdl.Call( &aImportInfo );
98 		}
99 
100 		ImpSetStyleSheet( 0 );
101 		_eState = HTMLParser::CallParser();
102 
103 		if ( pImpEditEngine->aImportHdl.IsSet() )
104 		{
105 			ImportInfo aImportInfo( HTMLIMP_END, this, pImpEditEngine->CreateESel( aCurSel ) );
106 			pImpEditEngine->aImportHdl.Call( &aImportInfo );
107 		}
108 
109 		if ( bFieldsInserted )
110 			pImpEditEngine->UpdateFields();
111 	}
112 	return _eState;
113 }
114 
NextToken(int nToken)115 void EditHTMLParser::NextToken( int nToken )
116 {
117 	#ifdef DBG_UTIL
118 		HTML_TOKEN_IDS xID = (HTML_TOKEN_IDS)nToken;
119 		(void)xID;
120 	#endif
121 
122 	switch( nToken )
123 	{
124     case HTML_META:
125     {
126 		const HTMLOptions *_pOptions = GetOptions();
127 		sal_uInt16 nArrLen = _pOptions->Count();
128         sal_Bool bEquiv = sal_False;
129 		for ( sal_uInt16 i = 0; i < nArrLen; i++ )
130 		{
131 			const HTMLOption *pOption = (*_pOptions)[i];
132 			switch( pOption->GetToken() )
133 			{
134 				case HTML_O_HTTPEQUIV:
135 				{
136                     bEquiv = sal_True;
137 				}
138 				break;
139 				case HTML_O_CONTENT:
140 				{
141                     if ( bEquiv )
142                     {
143                         rtl_TextEncoding eEnc = GetEncodingByMIME( pOption->GetString() );
144                         if ( eEnc != RTL_TEXTENCODING_DONTKNOW )
145                             SetSrcEncoding( eEnc );
146                     }
147 				}
148 				break;
149 			}
150 		}
151 
152     }
153     break;
154 	case HTML_PLAINTEXT_ON:
155 	case HTML_PLAINTEXT2_ON:
156 		bInPara = sal_True;
157 	break;
158 	case HTML_PLAINTEXT_OFF:
159 	case HTML_PLAINTEXT2_OFF:
160 		bInPara = sal_False;
161 	break;
162 
163 	case HTML_LINEBREAK:
164 	case HTML_NEWPARA:
165 	{
166 		if ( ( bInPara || nInTable ) &&
167 			( ( nToken == HTML_LINEBREAK ) || HasTextInCurrentPara() ) )
168 		{
169 			ImpInsertParaBreak();
170 		}
171 	}
172 	break;
173 	case HTML_HORZRULE:
174 	{
175 		if ( HasTextInCurrentPara() )
176 			ImpInsertParaBreak();
177 		ImpInsertParaBreak();
178 	}
179 	case HTML_NONBREAKSPACE:
180 	{
181 		if ( bInPara )
182 		{
183 			ImpInsertText( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ) );
184 		}
185 	}
186 	break;
187 	case HTML_TEXTTOKEN:
188 	{
189 	    // #i110937# for <title> content, call aImportHdl (no SkipGroup), but don't insert the text into the EditEngine
190         if (!bInTitle)
191         {
192             if ( !bInPara )
193                 StartPara( sal_False );
194 
195             // if ( bInPara || pCurAnchor )
196 
197 			String aText = aToken;
198 			if ( aText.Len() && ( aText.GetChar( 0 ) == ' ' )
199 					&& ThrowAwayBlank() && !IsReadPRE() )
200 				aText.Erase( 0, 1 );
201 
202 			if ( pCurAnchor )
203 			{
204 				pCurAnchor->aText += aText;
205 			}
206 			else
207 			{
208 				// Nur bis HTML mit 319 geschrieben ?!
209 				if ( IsReadPRE() )
210 				{
211 					sal_uInt16 nTabPos = aText.Search( '\t', 0 );
212 					while ( nTabPos != STRING_NOTFOUND )
213 					{
214 						aText.Erase( nTabPos, 1 );
215 						aText.Insert( String( RTL_CONSTASCII_USTRINGPARAM( "        " ) ), nTabPos );
216 						nTabPos = aText.Search( '\t', nTabPos+8 );
217 					}
218 				}
219 				ImpInsertText( aText );
220 			}
221 		}
222 	}
223 	break;
224 
225 	case HTML_CENTER_ON:
226 	case HTML_CENTER_OFF:	// if ( bInPara )
227 							{
228 								sal_uInt32 nNode = pImpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
229 								SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
230 								aItems.ClearItem( EE_PARA_JUST );
231 								if ( nToken == HTML_CENTER_ON )
232 									aItems.Put( SvxAdjustItem( SVX_ADJUST_CENTER, EE_PARA_JUST ) );
233 								pImpEditEngine->SetParaAttribs( nNode, aItems );
234 							}
235 							break;
236 
237 	case HTML_ANCHOR_ON:	AnchorStart();
238 							break;
239 	case HTML_ANCHOR_OFF:	AnchorEnd();
240 							break;
241 
242 	case HTML_PARABREAK_ON:
243 		if( bInPara && HasTextInCurrentPara() )
244 			EndPara( sal_True );
245 		StartPara( sal_True );
246 		break;
247 
248 	case HTML_PARABREAK_OFF:
249 		if( bInPara )
250 			EndPara( sal_True );
251 		break;
252 
253 	case HTML_HEAD1_ON:
254 	case HTML_HEAD2_ON:
255 	case HTML_HEAD3_ON:
256 	case HTML_HEAD4_ON:
257 	case HTML_HEAD5_ON:
258 	case HTML_HEAD6_ON:
259 	{
260 		HeadingStart( nToken );
261 	}
262 	break;
263 
264 	case HTML_HEAD1_OFF:
265 	case HTML_HEAD2_OFF:
266 	case HTML_HEAD3_OFF:
267 	case HTML_HEAD4_OFF:
268 	case HTML_HEAD5_OFF:
269 	case HTML_HEAD6_OFF:
270 	{
271 		HeadingEnd( nToken );
272 	}
273 	break;
274 
275 	case HTML_PREFORMTXT_ON:
276 	case HTML_XMP_ON:
277 	case HTML_LISTING_ON:
278 	{
279 		StartPara( sal_True );
280 		ImpSetStyleSheet( STYLE_PRE );
281 	}
282 	break;
283 
284 	case HTML_DEFLIST_ON:
285 	{
286 		nDefListLevel++;
287 	}
288 	break;
289 
290 	case HTML_DEFLIST_OFF:
291 	{
292 		if( nDefListLevel )
293 			nDefListLevel--;
294 	}
295 	break;
296 
297 	case HTML_TABLE_ON:		nInTable++;
298 							break;
299 	case HTML_TABLE_OFF:	DBG_ASSERT( nInTable, "Nicht in Table, aber TABLE_OFF?" );
300 							nInTable--;
301 							break;
302 
303 	case HTML_TABLEHEADER_ON:
304 	case HTML_TABLEDATA_ON:
305 		nInCell++;
306 	// fallthru
307 	case HTML_BLOCKQUOTE_ON:
308 	case HTML_BLOCKQUOTE_OFF:
309 	case HTML_BLOCKQUOTE30_ON:
310 	case HTML_BLOCKQUOTE30_OFF:
311 	case HTML_LISTHEADER_ON:
312 	case HTML_LI_ON:
313 	case HTML_DD_ON:
314 	case HTML_DT_ON:
315 	case HTML_ORDERLIST_ON:
316 	case HTML_UNORDERLIST_ON:
317 	{
318 		sal_Bool bHasText = HasTextInCurrentPara();
319 		if ( bHasText )
320 			ImpInsertParaBreak();
321 		StartPara( sal_False );
322 	}
323 	break;
324 
325 	case HTML_TABLEHEADER_OFF:
326 	case HTML_TABLEDATA_OFF:
327 	{
328 		if ( nInCell )
329 			nInCell--;
330 	}
331 	// fallthru
332 	case HTML_LISTHEADER_OFF:
333 	case HTML_LI_OFF:
334 	case HTML_DD_OFF:
335 	case HTML_DT_OFF:
336 	case HTML_ORDERLIST_OFF:
337 	case HTML_UNORDERLIST_OFF:	EndPara( sal_False );
338 								break;
339 
340 	case HTML_TABLEROW_ON:
341 	case HTML_TABLEROW_OFF:	// Nur nach einem CELL ein RETURN, fuer Calc
342 
343 	case HTML_COL_ON:
344 	case HTML_COLGROUP_ON:
345 	case HTML_COLGROUP_OFF: break;
346 
347 	case HTML_FONT_ON:		// ...
348 							break;
349 	case HTML_FONT_OFF:		// ...
350 							break;
351 
352 
353 	// #58335# kein SkipGroup on/off auf inline markup etc.
354 
355     case HTML_TITLE_ON:
356         bInTitle = sal_True;
357         break;
358     case HTML_TITLE_OFF:
359         bInTitle = sal_False;
360         break;
361 
362 	// globals
363 	case HTML_HTML_ON:
364 	case HTML_HTML_OFF:
365 	case HTML_BODY_ON:
366 	case HTML_BODY_OFF:
367 	case HTML_HEAD_ON:
368 	case HTML_HEAD_OFF:
369 	case HTML_FORM_ON:
370 	case HTML_FORM_OFF:
371 	case HTML_THEAD_ON:
372 	case HTML_THEAD_OFF:
373 	case HTML_TBODY_ON:
374 	case HTML_TBODY_OFF:
375 	// inline elements, structural markup
376 	// HTML 3.0
377 	case HTML_BANNER_ON:
378 	case HTML_BANNER_OFF:
379 	case HTML_DIVISION_ON:
380 	case HTML_DIVISION_OFF:
381 //	case HTML_LISTHEADER_ON:		//! special handling
382 //	case HTML_LISTHEADER_OFF:
383 	case HTML_NOTE_ON:
384 	case HTML_NOTE_OFF:
385 	// inline elements, logical markup
386 	// HTML 2.0
387 	case HTML_ADDRESS_ON:
388 	case HTML_ADDRESS_OFF:
389 //	case HTML_BLOCKQUOTE_ON:		//! extra Behandlung
390 //	case HTML_BLOCKQUOTE_OFF:
391 	case HTML_CITIATION_ON:
392 	case HTML_CITIATION_OFF:
393 	case HTML_CODE_ON:
394 	case HTML_CODE_OFF:
395 	case HTML_DEFINSTANCE_ON:
396 	case HTML_DEFINSTANCE_OFF:
397 	case HTML_EMPHASIS_ON:
398 	case HTML_EMPHASIS_OFF:
399 	case HTML_KEYBOARD_ON:
400 	case HTML_KEYBOARD_OFF:
401 	case HTML_SAMPLE_ON:
402 	case HTML_SAMPLE_OFF:
403 	case HTML_STRIKE_ON:
404 	case HTML_STRIKE_OFF:
405 	case HTML_STRONG_ON:
406 	case HTML_STRONG_OFF:
407 	case HTML_VARIABLE_ON:
408 	case HTML_VARIABLE_OFF:
409 	// HTML 3.0
410 	case HTML_ABBREVIATION_ON:
411 	case HTML_ABBREVIATION_OFF:
412 	case HTML_ACRONYM_ON:
413 	case HTML_ACRONYM_OFF:
414 	case HTML_AUTHOR_ON:
415 	case HTML_AUTHOR_OFF:
416 //	case HTML_BLOCKQUOTE30_ON:		//! extra Behandlung
417 //	case HTML_BLOCKQUOTE30_OFF:
418 	case HTML_DELETEDTEXT_ON:
419 	case HTML_DELETEDTEXT_OFF:
420 	case HTML_INSERTEDTEXT_ON:
421 	case HTML_INSERTEDTEXT_OFF:
422 	case HTML_LANGUAGE_ON:
423 	case HTML_LANGUAGE_OFF:
424 	case HTML_PERSON_ON:
425 	case HTML_PERSON_OFF:
426 	case HTML_SHORTQUOTE_ON:
427 	case HTML_SHORTQUOTE_OFF:
428 	case HTML_SUBSCRIPT_ON:
429 	case HTML_SUBSCRIPT_OFF:
430 	case HTML_SUPERSCRIPT_ON:
431 	case HTML_SUPERSCRIPT_OFF:
432 	// inline elements, visual markup
433 	// HTML 2.0
434 	case HTML_BOLD_ON:
435 	case HTML_BOLD_OFF:
436 	case HTML_ITALIC_ON:
437 	case HTML_ITALIC_OFF:
438 	case HTML_TELETYPE_ON:
439 	case HTML_TELETYPE_OFF:
440 	case HTML_UNDERLINE_ON:
441 	case HTML_UNDERLINE_OFF:
442 	// HTML 3.0
443 	case HTML_BIGPRINT_ON:
444 	case HTML_BIGPRINT_OFF:
445 	case HTML_STRIKETHROUGH_ON:
446 	case HTML_STRIKETHROUGH_OFF:
447 	case HTML_SMALLPRINT_ON:
448 	case HTML_SMALLPRINT_OFF:
449 	// figures
450 	case HTML_FIGURE_ON:
451 	case HTML_FIGURE_OFF:
452 	case HTML_CAPTION_ON:
453 	case HTML_CAPTION_OFF:
454 	case HTML_CREDIT_ON:
455 	case HTML_CREDIT_OFF:
456 	// misc
457 	case HTML_DIRLIST_ON:
458 	case HTML_DIRLIST_OFF:
459 	case HTML_FOOTNOTE_ON:			//! landen so im Text
460 	case HTML_FOOTNOTE_OFF:
461 	case HTML_MENULIST_ON:
462 	case HTML_MENULIST_OFF:
463 //	case HTML_PLAINTEXT_ON:			//! extra Behandlung
464 //	case HTML_PLAINTEXT_OFF:
465 //	case HTML_PREFORMTXT_ON:		//! extra Behandlung
466 //	case HTML_PREFORMTXT_OFF:
467 	case HTML_SPAN_ON:
468 	case HTML_SPAN_OFF:
469 	// obsolete
470 //	case HTML_XMP_ON:				//! extra Behandlung
471 //	case HTML_XMP_OFF:
472 //	case HTML_LISTING_ON:			//! extra Behandlung
473 //	case HTML_LISTING_OFF:
474 	// Netscape
475 	case HTML_BLINK_ON:
476 	case HTML_BLINK_OFF:
477 	case HTML_NOBR_ON:
478 	case HTML_NOBR_OFF:
479 	case HTML_NOEMBED_ON:
480 	case HTML_NOEMBED_OFF:
481 	case HTML_NOFRAMES_ON:
482 	case HTML_NOFRAMES_OFF:
483 	// Internet Explorer
484 	case HTML_MARQUEE_ON:
485 	case HTML_MARQUEE_OFF:
486 //	case HTML_PLAINTEXT2_ON:		//! extra Behandlung
487 //	case HTML_PLAINTEXT2_OFF:
488 	break;
489 
490 	default:
491 	{
492 		if ( nToken & HTML_TOKEN_ONOFF )
493 		{
494 			if ( ( nToken == HTML_UNKNOWNCONTROL_ON ) || ( nToken == HTML_UNKNOWNCONTROL_OFF ) )
495 			{
496 				;
497 			}
498 			else if ( !(nToken & 1) )
499 			{
500 				DBG_ASSERT( !( nToken & 1 ), "Kein Start-Token ?!" );
501 				SkipGroup( nToken + 1 );
502 			}
503 		}
504 	}
505 	}	// SWITCH
506 
507 	if ( pImpEditEngine->aImportHdl.IsSet() )
508 	{
509 		ImportInfo aImportInfo( HTMLIMP_NEXTTOKEN, this, pImpEditEngine->CreateESel( aCurSel ) );
510 		aImportInfo.nToken = nToken;
511 		aImportInfo.nTokenValue = (short)nTokenValue;
512 		if ( nToken == HTML_TEXTTOKEN )
513 			aImportInfo.aText = aToken;
514 		pImpEditEngine->aImportHdl.Call( &aImportInfo );
515 	}
516 
517 }
518 
ImpInsertParaBreak()519 void EditHTMLParser::ImpInsertParaBreak()
520 {
521 	if ( pImpEditEngine->aImportHdl.IsSet() )
522 	{
523 		ImportInfo aImportInfo( HTMLIMP_INSERTPARA, this, pImpEditEngine->CreateESel( aCurSel ) );
524 		pImpEditEngine->aImportHdl.Call( &aImportInfo );
525 	}
526 	aCurSel = pImpEditEngine->ImpInsertParaBreak( aCurSel );
527 	nLastAction = ACTION_INSERTPARABRK;
528 }
529 
ImpSetAttribs(const SfxItemSet & rItems,EditSelection * pSel)530 void EditHTMLParser::ImpSetAttribs( const SfxItemSet& rItems, EditSelection* pSel )
531 {
532 	// pSel, wenn Zeichenattribute, sonst Absatzattribute fuer den
533 	// aktuellen Absatz.
534 	DBG_ASSERT( pSel || ( aCurSel.Min().GetNode() == aCurSel.Max().GetNode() ), "ImpInsertAttribs: Selektion?" );
535 
536 	EditPaM aStartPaM( pSel ? pSel->Min() : aCurSel.Min() );
537 	EditPaM aEndPaM( pSel ? pSel->Max() : aCurSel.Max() );
538 
539 	if ( !pSel )
540 	{
541 		aStartPaM.SetIndex( 0 );
542 		aEndPaM.SetIndex( aEndPaM.GetNode()->Len() );
543 	}
544 
545 	if ( pImpEditEngine->aImportHdl.IsSet() )
546 	{
547 		EditSelection aSel( aStartPaM, aEndPaM );
548 		ImportInfo aImportInfo( HTMLIMP_SETATTR, this, pImpEditEngine->CreateESel( aSel ) );
549 		aImportInfo.pAttrs = (void*)&rItems;
550 		pImpEditEngine->aImportHdl.Call( &aImportInfo );
551 	}
552 
553 	ContentNode* pSN = aStartPaM.GetNode();
554 	sal_uInt32 nStartNode = pImpEditEngine->GetEditDoc().GetPos( pSN );
555 
556 	// Wenn ein Attribut von 0 bis aktuelle Absatzlaenge geht,
557 	// soll es ein Absatz-Attribut sein!
558 
559 	// Achtung: Selektion kann ueber mehrere Absaetze gehen.
560 	// Alle vollstaendigen Absaetze sind Absatzattribute...
561 
562 	// HTML eigentlich nicht:
563 #ifdef DBG_UTIL
564 	ContentNode* pEN = aEndPaM.GetNode();
565 	sal_uInt32 nEndNode = pImpEditEngine->GetEditDoc().GetPos( pEN );
566 	DBG_ASSERT( nStartNode == nEndNode, "ImpSetAttribs: Mehrere Absaetze?" );
567 #endif
568 
569 /*
570 	for ( sal_uInt32 z = nStartNode+1; z < nEndNode; z++ )
571 	{
572 		DBG_ASSERT( pImpEditEngine->GetEditDoc().SaveGetObject( z ), "Node existiert noch nicht(RTF)" );
573 		pImpEditEngine->SetParaAttribs( z, rSet.GetAttrSet() );
574 	}
575 
576 	if ( aStartPaM.GetNode() != aEndPaM.GetNode() )
577 	{
578 		// Den Rest des StartNodes...
579 		if ( aStartPaM.GetIndex() == 0 )
580 			pImpEditEngine->SetParaAttribs( nStartNode, rSet.GetAttrSet() );
581 		else
582 			pImpEditEngine->SetAttribs( EditSelection( aStartPaM, EditPaM( aStartPaM.GetNode(), aStartPaM.GetNode()->Len() ) ), rSet.GetAttrSet() );
583 
584 		// Den Anfang des EndNodes....
585 		if ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() )
586 			pImpEditEngine->SetParaAttribs( nEndNode, rSet.GetAttrSet() );
587 		else
588 			pImpEditEngine->SetAttribs( EditSelection( EditPaM( aEndPaM.GetNode(), 0 ), aEndPaM ), rSet.GetAttrSet() );
589 	}
590 	else
591 */
592 	{
593 		if ( ( aStartPaM.GetIndex() == 0 ) && ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() ) )
594 		{
595 			// Muesse gemergt werden:
596 			SfxItemSet aItems( pImpEditEngine->GetParaAttribs( nStartNode ) );
597 			aItems.Put( rItems );
598 			pImpEditEngine->SetParaAttribs( nStartNode, aItems );
599 		}
600 		else
601 			pImpEditEngine->SetAttribs( EditSelection( aStartPaM, aEndPaM ), rItems );
602 	}
603 }
604 
ImpSetStyleSheet(sal_uInt16 nHLevel)605 void EditHTMLParser::ImpSetStyleSheet( sal_uInt16 nHLevel )
606 {
607 	/*
608 		nHLevel:	0: 			Ausschalten
609 					1-6: 		Heading
610 					STYLE_PRE: 	Preformatted
611 	*/
612 
613 //		if ( pImpEditEngine->GetStatus().DoImportRTFStyleSheets() )
614 //		{
615 //			SvxRTFStyleType* pS = GetStyleTbl().Get( rSet.StyleNo() );
616 //			DBG_ASSERT( pS, "Vorlage in RTF nicht definiert!" );
617 //			if ( pS )
618 //				pImpEditEngine->SetStyleSheet( EditSelection( aStartPaM, aEndPaM ), pS->sName, SFX_STYLE_FAMILY_ALL );
619 //		}
620 //		else
621 		{
622 			// Harte Attribute erzeugen...
623 			// Reicht fuer Calc, bei StyleSheets muesste noch geklaert werden,
624 			// dass diese auch in der App liegen sollten, damit sie beim
625 			// fuettern in eine andere Engine auch noch da sind...
626 
627 			sal_uInt32 nNode = pImpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
628 //			SfxItemSet aItems( pImpEditEngine->GetEmptyItemSet() );
629 			SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
630 
631 			aItems.ClearItem( EE_PARA_ULSPACE );
632 			aItems.ClearItem( EE_CHAR_FONTHEIGHT );
633 			aItems.ClearItem( EE_CHAR_FONTINFO );
634 			aItems.ClearItem( EE_CHAR_WEIGHT );
635 
636 			// Fett in den ersten 3 Headings
637 			if ( ( nHLevel >= 1 ) && ( nHLevel <= 3 ) )
638 			{
639 				SvxWeightItem aWeightItem( WEIGHT_BOLD, EE_CHAR_WEIGHT );
640 				aItems.Put( aWeightItem );
641 			}
642 
643 			// Fonthoehe und Abstaende, wenn LogicToLogic moeglich:
644 			MapUnit eUnit = pImpEditEngine->GetRefMapMode().GetMapUnit();
645 			if ( ( eUnit != MAP_PIXEL ) && ( eUnit != MAP_SYSFONT ) &&
646 				 ( eUnit != MAP_APPFONT ) && ( eUnit != MAP_RELATIVE ) )
647 			{
648 				long nPoints = 10;
649 				if ( nHLevel == 1 )
650 					nPoints = 22;
651 				else if ( nHLevel == 2 )
652 					nPoints = 16;
653 				else if ( nHLevel == 3 )
654 					nPoints = 12;
655 				else if ( nHLevel == 4 )
656 					nPoints = 11;
657 
658 				nPoints = OutputDevice::LogicToLogic( nPoints, MAP_POINT, eUnit );
659 				SvxFontHeightItem aHeightItem( nPoints, 100, EE_CHAR_FONTHEIGHT );
660 				aItems.Put( aHeightItem );
661 
662 				// Absatzabstaende, wenn Heading:
663 				if ( !nHLevel || ((nHLevel >= 1) && (nHLevel <= 6)) )
664 				{
665 					SvxULSpaceItem aULSpaceItem( EE_PARA_ULSPACE );
666 					aULSpaceItem.SetUpper( (sal_uInt16)OutputDevice::LogicToLogic( 42, MAP_10TH_MM, eUnit ) );
667 					aULSpaceItem.SetLower( (sal_uInt16)OutputDevice::LogicToLogic( 35, MAP_10TH_MM, eUnit ) );
668 					aItems.Put( aULSpaceItem );
669 				}
670 			}
671 
672 			// Bei Pre einen proportionalen Font waehlen
673 			if ( nHLevel == STYLE_PRE )
674 			{
675 				Font aFont = OutputDevice::GetDefaultFont( DEFAULTFONT_FIXED, LANGUAGE_SYSTEM, 0 );
676 				SvxFontItem aFontItem( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO );
677 				aItems.Put( aFontItem );
678 			}
679 
680 			pImpEditEngine->SetParaAttribs( nNode, aItems );
681 		}
682 }
683 
ImpInsertText(const String & rText)684 void EditHTMLParser::ImpInsertText( const String& rText )
685 {
686 	String aText( rText );
687 	if ( pImpEditEngine->aImportHdl.IsSet() )
688 	{
689 		ImportInfo aImportInfo( HTMLIMP_INSERTTEXT, this, pImpEditEngine->CreateESel( aCurSel ) );
690 		aImportInfo.aText = aText;
691 		pImpEditEngine->aImportHdl.Call( &aImportInfo );
692 	}
693 
694 	aCurSel = pImpEditEngine->ImpInsertText( aCurSel, aText );
695 	nLastAction = ACTION_INSERTTEXT;
696 }
697 
SkipGroup(int nEndToken)698 void EditHTMLParser::SkipGroup( int nEndToken )
699 {
700 	// #69109# groups in cells are closed upon leaving the cell, because those
701 	// ******* web authors don't know their job
702 	// for example: <td><form></td>   lacks a closing </form>
703 	sal_uInt8 nCellLevel = nInCell;
704 	int nToken;
705 	while( nCellLevel <= nInCell && ( (nToken = GetNextToken() ) != nEndToken ) && nToken )
706 	{
707 		switch ( nToken )
708 		{
709 			case HTML_TABLEHEADER_ON:
710 			case HTML_TABLEDATA_ON:
711 				nInCell++;
712 			break;
713 			case HTML_TABLEHEADER_OFF:
714 			case HTML_TABLEDATA_OFF:
715 				if ( nInCell )
716 					nInCell--;
717 			break;
718 		}
719 	}
720 }
721 
StartPara(sal_Bool bReal)722 void EditHTMLParser::StartPara( sal_Bool bReal )
723 {
724 	if ( bReal )
725 	{
726 		const HTMLOptions *_pOptions = GetOptions();
727 		sal_uInt16 nArrLen = _pOptions->Count();
728 		SvxAdjust eAdjust = SVX_ADJUST_LEFT;
729 		for ( sal_uInt16 i = 0; i < nArrLen; i++ )
730 		{
731 			const HTMLOption *pOption = (*_pOptions)[i];
732 			switch( pOption->GetToken() )
733 			{
734 				case HTML_O_ALIGN:
735 				{
736 					if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_right ) == COMPARE_EQUAL )
737 						eAdjust = SVX_ADJUST_RIGHT;
738 					else if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_middle ) == COMPARE_EQUAL )
739 						eAdjust = SVX_ADJUST_CENTER;
740 					else if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_center ) == COMPARE_EQUAL )
741 						eAdjust = SVX_ADJUST_CENTER;
742 					else
743 						eAdjust = SVX_ADJUST_LEFT;
744 				}
745 				break;
746 			}
747 		}
748 		SfxItemSet aItemSet( pImpEditEngine->GetEmptyItemSet() );
749 		aItemSet.Put( SvxAdjustItem( eAdjust, EE_PARA_JUST ) );
750 		ImpSetAttribs( aItemSet );
751 	}
752 	bInPara = sal_True;
753 }
754 
EndPara(sal_Bool)755 void EditHTMLParser::EndPara( sal_Bool )
756 {
757 	if ( bInPara )
758 	{
759 		sal_Bool bHasText = HasTextInCurrentPara();
760 		if ( bHasText )
761 			ImpInsertParaBreak();
762 		// Nur, wenn ohne Absatzabstaende gearbeitet wird...
763 //		if ( !nInTable && bReal && (nNumberingLevel<=1) && (nBulletLevel<=1) )
764 //			ImpInsertParaBreak();
765 	}
766 	bInPara = sal_False;
767 }
768 
ThrowAwayBlank()769 sal_Bool EditHTMLParser::ThrowAwayBlank()
770 {
771 	// Ein Blank muss weggeschmissen werden, wenn der neue Text mit einem
772 	// Blank beginnt und der aktuelle Absatz leer ist oder mit einem
773 	// Blank endet...
774 	ContentNode* pNode = aCurSel.Max().GetNode();
775 	if ( pNode->Len() && ( pNode->GetChar( pNode->Len()-1 ) != ' ' ) )
776 		return sal_False;
777 	return sal_True;
778 }
779 
HasTextInCurrentPara()780 sal_Bool EditHTMLParser::HasTextInCurrentPara()
781 {
782 	return aCurSel.Max().GetNode()->Len() ? sal_True : sal_False;
783 }
784 
AnchorStart()785 void EditHTMLParser::AnchorStart()
786 {
787 	// Anker im Anker ignoriern
788 	if ( !pCurAnchor )
789 	{
790 		const HTMLOptions* _pOptions = GetOptions();
791 		sal_uInt16 nArrLen = _pOptions->Count();
792 
793 		String aRef;
794 
795 		for ( sal_uInt16 i = 0; i < nArrLen; i++ )
796 		{
797 			const HTMLOption* pOption = (*_pOptions)[i];
798 			switch( pOption->GetToken() )
799 			{
800 				case HTML_O_HREF:
801 					aRef = pOption->GetString();
802 				break;
803 			}
804 		}
805 
806 		if ( aRef.Len() )
807 		{
808 			String aURL = aRef;
809 			if ( aURL.Len() && ( aURL.GetChar( 0 ) != '#' ) )
810 			{
811 				INetURLObject aTargetURL;
812                 INetURLObject aRootURL( aBaseURL );
813 				aRootURL.GetNewAbsURL( aRef, &aTargetURL );
814 				aURL = aTargetURL.GetMainURL( INetURLObject::DECODE_TO_IURI );
815             }
816 			pCurAnchor = new AnchorInfo;
817 			pCurAnchor->aHRef = aURL;
818 		}
819 	}
820 }
821 
AnchorEnd()822 void EditHTMLParser::AnchorEnd()
823 {
824 	if ( pCurAnchor )
825 	{
826 		// Als URL-Feld einfuegen...
827 		SvxFieldItem aFld( SvxURLField( pCurAnchor->aHRef, pCurAnchor->aText, SVXURLFORMAT_REPR ), EE_FEATURE_FIELD  );
828 		aCurSel = pImpEditEngine->InsertField( aCurSel, aFld );
829 		bFieldsInserted = sal_True;
830 		delete pCurAnchor;
831 		pCurAnchor = 0;
832 
833 		if ( pImpEditEngine->aImportHdl.IsSet() )
834 		{
835 			ImportInfo aImportInfo( HTMLIMP_INSERTFIELD, this, pImpEditEngine->CreateESel( aCurSel ) );
836 			pImpEditEngine->aImportHdl.Call( &aImportInfo );
837 		}
838 	}
839 }
840 
HeadingStart(int nToken)841 void EditHTMLParser::HeadingStart( int nToken )
842 {
843 	bWasInPara = bInPara;
844 	StartPara( sal_False );
845 
846 	if ( bWasInPara && HasTextInCurrentPara() )
847 		ImpInsertParaBreak();
848 
849 	sal_uInt16 nId = sal::static_int_cast< sal_uInt16 >(
850         1 + ( ( nToken - HTML_HEAD1_ON ) / 2 ) );
851 	DBG_ASSERT( (nId >= 1) && (nId <= 9), "HeadingStart: ID kann nicht stimmen!" );
852 	ImpSetStyleSheet( nId );
853 }
854 
HeadingEnd(int)855 void EditHTMLParser::HeadingEnd( int )
856 {
857 	EndPara( sal_False );
858 	ImpSetStyleSheet( 0 );
859 
860 	if ( bWasInPara )
861 	{
862 		bInPara = sal_True;
863 		bWasInPara = sal_False;
864 	}
865 }
866