xref: /aoo42x/main/svtools/source/svrtf/parrtf.cxx (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_svtools.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */
32*cdf0e10cSrcweir 
33*cdf0e10cSrcweir #include <stdio.h>		                // for EOF
34*cdf0e10cSrcweir #include <rtl/tencinfo.h>
35*cdf0e10cSrcweir #include <tools/stream.hxx>
36*cdf0e10cSrcweir #include <tools/debug.hxx>
37*cdf0e10cSrcweir #include <svtools/rtftoken.h>
38*cdf0e10cSrcweir #include <svtools/rtfkeywd.hxx>
39*cdf0e10cSrcweir #include <svtools/parrtf.hxx>
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir const int MAX_STRING_LEN = 1024;
42*cdf0e10cSrcweir const int MAX_TOKEN_LEN = 128;
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir #define RTF_ISDIGIT( c ) (c >= '0' && c <= '9')
45*cdf0e10cSrcweir #define RTF_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') )
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
48*cdf0e10cSrcweir 	: SvParser( rIn, nStackSize ),
49*cdf0e10cSrcweir 	eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), 	// default ist ANSI-CodeSet
50*cdf0e10cSrcweir 	nUCharOverread( 1 )
51*cdf0e10cSrcweir {
52*cdf0e10cSrcweir 	// default ist ANSI-CodeSet
53*cdf0e10cSrcweir 	SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
54*cdf0e10cSrcweir 	bRTF_InTextRead = false;
55*cdf0e10cSrcweir }
56*cdf0e10cSrcweir 
57*cdf0e10cSrcweir SvRTFParser::~SvRTFParser()
58*cdf0e10cSrcweir {
59*cdf0e10cSrcweir }
60*cdf0e10cSrcweir 
61*cdf0e10cSrcweir 
62*cdf0e10cSrcweir 
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir int SvRTFParser::_GetNextToken()
65*cdf0e10cSrcweir {
66*cdf0e10cSrcweir 	int nRet = 0;
67*cdf0e10cSrcweir 	do {
68*cdf0e10cSrcweir 		int bNextCh = true;
69*cdf0e10cSrcweir 		switch( nNextCh )
70*cdf0e10cSrcweir 		{
71*cdf0e10cSrcweir 		case '\\':
72*cdf0e10cSrcweir 			{
73*cdf0e10cSrcweir 				// Steuerzeichen
74*cdf0e10cSrcweir 				switch( nNextCh = GetNextChar() )
75*cdf0e10cSrcweir 				{
76*cdf0e10cSrcweir 				case '{':
77*cdf0e10cSrcweir 				case '}':
78*cdf0e10cSrcweir 				case '\\':
79*cdf0e10cSrcweir 				case '+':		// habe ich in einem RTF-File gefunden
80*cdf0e10cSrcweir 				case '~':		// nonbreaking space
81*cdf0e10cSrcweir 				case '-':		// optional hyphen
82*cdf0e10cSrcweir 				case '_':		// nonbreaking hyphen
83*cdf0e10cSrcweir 				case '\'':		// HexValue
84*cdf0e10cSrcweir 					nNextCh = '\\';
85*cdf0e10cSrcweir 					rInput.SeekRel( -1 );
86*cdf0e10cSrcweir 					ScanText();
87*cdf0e10cSrcweir 					nRet = RTF_TEXTTOKEN;
88*cdf0e10cSrcweir 					bNextCh = 0 == nNextCh;
89*cdf0e10cSrcweir 					break;
90*cdf0e10cSrcweir 
91*cdf0e10cSrcweir 				case '*':		// ignoreflag
92*cdf0e10cSrcweir 					nRet = RTF_IGNOREFLAG;
93*cdf0e10cSrcweir 					break;
94*cdf0e10cSrcweir 				case ':':	 	// subentry in an index entry
95*cdf0e10cSrcweir 					nRet = RTF_SUBENTRYINDEX;
96*cdf0e10cSrcweir 					break;
97*cdf0e10cSrcweir 				case '|':		// formula-charakter
98*cdf0e10cSrcweir 					nRet = RTF_FORMULA;
99*cdf0e10cSrcweir 					break;
100*cdf0e10cSrcweir 
101*cdf0e10cSrcweir 				case 0x0a:
102*cdf0e10cSrcweir 				case 0x0d:
103*cdf0e10cSrcweir 					nRet = RTF_PAR;
104*cdf0e10cSrcweir 					break;
105*cdf0e10cSrcweir 
106*cdf0e10cSrcweir 				default:
107*cdf0e10cSrcweir 					if( RTF_ISALPHA( nNextCh ) )
108*cdf0e10cSrcweir 					{
109*cdf0e10cSrcweir 						aToken = '\\';
110*cdf0e10cSrcweir 						{
111*cdf0e10cSrcweir 							String aStrBuffer;
112*cdf0e10cSrcweir 							sal_Unicode* pStr = aStrBuffer.AllocBuffer(
113*cdf0e10cSrcweir 															MAX_TOKEN_LEN );
114*cdf0e10cSrcweir 							xub_StrLen nStrLen = 0;
115*cdf0e10cSrcweir 							do {
116*cdf0e10cSrcweir 								*(pStr + nStrLen++) = nNextCh;
117*cdf0e10cSrcweir 								if( MAX_TOKEN_LEN == nStrLen )
118*cdf0e10cSrcweir 								{
119*cdf0e10cSrcweir 									aToken += aStrBuffer;
120*cdf0e10cSrcweir 									aToken.GetBufferAccess();  // make unique string!
121*cdf0e10cSrcweir 									nStrLen = 0;
122*cdf0e10cSrcweir 								}
123*cdf0e10cSrcweir 								nNextCh = GetNextChar();
124*cdf0e10cSrcweir 							} while( RTF_ISALPHA( nNextCh ) );
125*cdf0e10cSrcweir 							if( nStrLen )
126*cdf0e10cSrcweir 							{
127*cdf0e10cSrcweir 								aStrBuffer.ReleaseBufferAccess( nStrLen );
128*cdf0e10cSrcweir 								aToken += aStrBuffer;
129*cdf0e10cSrcweir 							}
130*cdf0e10cSrcweir 						}
131*cdf0e10cSrcweir 
132*cdf0e10cSrcweir 						// Minus fuer numerischen Parameter
133*cdf0e10cSrcweir 						int bNegValue = false;
134*cdf0e10cSrcweir 						if( '-' == nNextCh )
135*cdf0e10cSrcweir 						{
136*cdf0e10cSrcweir 							bNegValue = true;
137*cdf0e10cSrcweir 							nNextCh = GetNextChar();
138*cdf0e10cSrcweir 						}
139*cdf0e10cSrcweir 
140*cdf0e10cSrcweir 						// evt. Numerischer Parameter
141*cdf0e10cSrcweir 						if( RTF_ISDIGIT( nNextCh ) )
142*cdf0e10cSrcweir 						{
143*cdf0e10cSrcweir 							nTokenValue = 0;
144*cdf0e10cSrcweir 							do {
145*cdf0e10cSrcweir 								nTokenValue *= 10;
146*cdf0e10cSrcweir 								nTokenValue += nNextCh - '0';
147*cdf0e10cSrcweir 								nNextCh = GetNextChar();
148*cdf0e10cSrcweir 							} while( RTF_ISDIGIT( nNextCh ) );
149*cdf0e10cSrcweir 							if( bNegValue )
150*cdf0e10cSrcweir 								nTokenValue = -nTokenValue;
151*cdf0e10cSrcweir 							bTokenHasValue=true;
152*cdf0e10cSrcweir 						}
153*cdf0e10cSrcweir 						else if( bNegValue )		// das Minus wieder zurueck
154*cdf0e10cSrcweir 						{
155*cdf0e10cSrcweir 							nNextCh = '-';
156*cdf0e10cSrcweir 							rInput.SeekRel( -1 );
157*cdf0e10cSrcweir 						}
158*cdf0e10cSrcweir 						if( ' ' == nNextCh )		// Blank gehoert zum Token!
159*cdf0e10cSrcweir 							nNextCh = GetNextChar();
160*cdf0e10cSrcweir 
161*cdf0e10cSrcweir 						// suche das Token in der Tabelle:
162*cdf0e10cSrcweir 						if( 0 == (nRet = GetRTFToken( aToken )) )
163*cdf0e10cSrcweir 							// Unknown Control
164*cdf0e10cSrcweir 							nRet = RTF_UNKNOWNCONTROL;
165*cdf0e10cSrcweir 
166*cdf0e10cSrcweir 						// bug 76812 - unicode token handled as normal text
167*cdf0e10cSrcweir 						bNextCh = false;
168*cdf0e10cSrcweir 						switch( nRet )
169*cdf0e10cSrcweir 						{
170*cdf0e10cSrcweir 						case RTF_UC:
171*cdf0e10cSrcweir 							if( 0 <= nTokenValue )
172*cdf0e10cSrcweir 							{
173*cdf0e10cSrcweir 								nUCharOverread = (sal_uInt8)nTokenValue;
174*cdf0e10cSrcweir #if 1
175*cdf0e10cSrcweir                                 //cmc: other ifdef breaks #i3584
176*cdf0e10cSrcweir 								aParserStates.top().
177*cdf0e10cSrcweir 									nUCharOverread = nUCharOverread;
178*cdf0e10cSrcweir #else
179*cdf0e10cSrcweir 								if( !nUCharOverread )
180*cdf0e10cSrcweir 									nUCharOverread = aParserStates.top().nUCharOverread;
181*cdf0e10cSrcweir 								else
182*cdf0e10cSrcweir 									aParserStates.top().
183*cdf0e10cSrcweir 										nUCharOverread = nUCharOverread;
184*cdf0e10cSrcweir #endif
185*cdf0e10cSrcweir 							}
186*cdf0e10cSrcweir 							aToken.Erase(); // #i47831# erase token to prevent the token from beeing treated as text
187*cdf0e10cSrcweir 							// read next token
188*cdf0e10cSrcweir 							nRet = 0;
189*cdf0e10cSrcweir 							break;
190*cdf0e10cSrcweir 
191*cdf0e10cSrcweir 						case RTF_UPR:
192*cdf0e10cSrcweir 							if (!_inSkipGroup) {
193*cdf0e10cSrcweir 							// UPR - overread the group with the ansi
194*cdf0e10cSrcweir 							//       informations
195*cdf0e10cSrcweir 							while( '{' != _GetNextToken() )
196*cdf0e10cSrcweir 								;
197*cdf0e10cSrcweir 							SkipGroup();
198*cdf0e10cSrcweir 							_GetNextToken();  // overread the last bracket
199*cdf0e10cSrcweir 							nRet = 0;
200*cdf0e10cSrcweir 							}
201*cdf0e10cSrcweir 							break;
202*cdf0e10cSrcweir 
203*cdf0e10cSrcweir 						case RTF_U:
204*cdf0e10cSrcweir 							if( !bRTF_InTextRead )
205*cdf0e10cSrcweir 							{
206*cdf0e10cSrcweir 								nRet = RTF_TEXTTOKEN;
207*cdf0e10cSrcweir 								aToken = (sal_Unicode)nTokenValue;
208*cdf0e10cSrcweir 
209*cdf0e10cSrcweir 								// overread the next n "RTF" characters. This
210*cdf0e10cSrcweir 								// can be also \{, \}, \'88
211*cdf0e10cSrcweir 								for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
212*cdf0e10cSrcweir 								{
213*cdf0e10cSrcweir 									sal_Unicode cAnsi = nNextCh;
214*cdf0e10cSrcweir 									while( 0xD == cAnsi )
215*cdf0e10cSrcweir 										cAnsi = GetNextChar();
216*cdf0e10cSrcweir 									while( 0xA == cAnsi )
217*cdf0e10cSrcweir 										cAnsi = GetNextChar();
218*cdf0e10cSrcweir 
219*cdf0e10cSrcweir 									if( '\\' == cAnsi &&
220*cdf0e10cSrcweir 										'\'' == ( cAnsi = GetNextChar() ))
221*cdf0e10cSrcweir 										// HexValue ueberlesen
222*cdf0e10cSrcweir 										cAnsi = GetHexValue();
223*cdf0e10cSrcweir 									nNextCh = GetNextChar();
224*cdf0e10cSrcweir 								}
225*cdf0e10cSrcweir 								ScanText();
226*cdf0e10cSrcweir 								bNextCh = 0 == nNextCh;
227*cdf0e10cSrcweir 							}
228*cdf0e10cSrcweir 							break;
229*cdf0e10cSrcweir 						}
230*cdf0e10cSrcweir 					}
231*cdf0e10cSrcweir 					else if( SVPAR_PENDING != eState )
232*cdf0e10cSrcweir 					{
233*cdf0e10cSrcweir 						// Bug 34631 - "\ " ueberlesen - Blank als Zeichen
234*cdf0e10cSrcweir 						// eState = SVPAR_ERROR;
235*cdf0e10cSrcweir 						bNextCh = false;
236*cdf0e10cSrcweir 					}
237*cdf0e10cSrcweir 					break;
238*cdf0e10cSrcweir 				}
239*cdf0e10cSrcweir 			}
240*cdf0e10cSrcweir 			break;
241*cdf0e10cSrcweir 
242*cdf0e10cSrcweir 		case sal_Unicode(EOF):
243*cdf0e10cSrcweir 			eState = SVPAR_ACCEPTED;
244*cdf0e10cSrcweir 			nRet = nNextCh;
245*cdf0e10cSrcweir 			break;
246*cdf0e10cSrcweir 
247*cdf0e10cSrcweir 		case '{':
248*cdf0e10cSrcweir 			{
249*cdf0e10cSrcweir 				if( 0 <= nOpenBrakets )
250*cdf0e10cSrcweir 				{
251*cdf0e10cSrcweir 					RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
252*cdf0e10cSrcweir                     aParserStates.push( aState );
253*cdf0e10cSrcweir 				}
254*cdf0e10cSrcweir 				++nOpenBrakets;
255*cdf0e10cSrcweir                 DBG_ASSERT(
256*cdf0e10cSrcweir                     static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
257*cdf0e10cSrcweir                     "ParserStateStack unequal to bracket count" );
258*cdf0e10cSrcweir 				nRet = nNextCh;
259*cdf0e10cSrcweir 			}
260*cdf0e10cSrcweir 			break;
261*cdf0e10cSrcweir 
262*cdf0e10cSrcweir 		case '}':
263*cdf0e10cSrcweir 			--nOpenBrakets;
264*cdf0e10cSrcweir 			if( 0 <= nOpenBrakets )
265*cdf0e10cSrcweir 			{
266*cdf0e10cSrcweir                 aParserStates.pop();
267*cdf0e10cSrcweir 				if( !aParserStates.empty() )
268*cdf0e10cSrcweir 				{
269*cdf0e10cSrcweir 					const RtfParserState_Impl& rRPS =
270*cdf0e10cSrcweir 							aParserStates.top();
271*cdf0e10cSrcweir 					nUCharOverread = rRPS.nUCharOverread;
272*cdf0e10cSrcweir 					SetSrcEncoding( rRPS.eCodeSet );
273*cdf0e10cSrcweir 				}
274*cdf0e10cSrcweir 				else
275*cdf0e10cSrcweir 				{
276*cdf0e10cSrcweir 					nUCharOverread = 1;
277*cdf0e10cSrcweir 					SetSrcEncoding( GetCodeSet() );
278*cdf0e10cSrcweir 				}
279*cdf0e10cSrcweir 			}
280*cdf0e10cSrcweir             DBG_ASSERT(
281*cdf0e10cSrcweir                 static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
282*cdf0e10cSrcweir                 "ParserStateStack unequal to bracket count" );
283*cdf0e10cSrcweir 			nRet = nNextCh;
284*cdf0e10cSrcweir 			break;
285*cdf0e10cSrcweir 
286*cdf0e10cSrcweir 		case 0x0d:
287*cdf0e10cSrcweir 		case 0x0a:
288*cdf0e10cSrcweir 			break;
289*cdf0e10cSrcweir 
290*cdf0e10cSrcweir 		default:
291*cdf0e10cSrcweir 			// es folgt normaler Text
292*cdf0e10cSrcweir 			ScanText();
293*cdf0e10cSrcweir 			nRet = RTF_TEXTTOKEN;
294*cdf0e10cSrcweir 			bNextCh = 0 == nNextCh;
295*cdf0e10cSrcweir 			break;
296*cdf0e10cSrcweir 		}
297*cdf0e10cSrcweir 
298*cdf0e10cSrcweir 		if( bNextCh )
299*cdf0e10cSrcweir 			nNextCh = GetNextChar();
300*cdf0e10cSrcweir 
301*cdf0e10cSrcweir 	} while( !nRet && SVPAR_WORKING == eState );
302*cdf0e10cSrcweir 	return nRet;
303*cdf0e10cSrcweir }
304*cdf0e10cSrcweir 
305*cdf0e10cSrcweir 
306*cdf0e10cSrcweir sal_Unicode SvRTFParser::GetHexValue()
307*cdf0e10cSrcweir {
308*cdf0e10cSrcweir 	// Hex-Wert sammeln
309*cdf0e10cSrcweir 	register int n;
310*cdf0e10cSrcweir 	register sal_Unicode nHexVal = 0;
311*cdf0e10cSrcweir 
312*cdf0e10cSrcweir 	for( n = 0; n < 2; ++n )
313*cdf0e10cSrcweir 	{
314*cdf0e10cSrcweir 		nHexVal *= 16;
315*cdf0e10cSrcweir 		nNextCh = GetNextChar();
316*cdf0e10cSrcweir 		if( nNextCh >= '0' && nNextCh <= '9' )
317*cdf0e10cSrcweir 			nHexVal += (nNextCh - 48);
318*cdf0e10cSrcweir 		else if( nNextCh >= 'a' && nNextCh <= 'f' )
319*cdf0e10cSrcweir 			nHexVal += (nNextCh - 87);
320*cdf0e10cSrcweir 		else if( nNextCh >= 'A' && nNextCh <= 'F' )
321*cdf0e10cSrcweir 			nHexVal += (nNextCh - 55);
322*cdf0e10cSrcweir 	}
323*cdf0e10cSrcweir 	return nHexVal;
324*cdf0e10cSrcweir }
325*cdf0e10cSrcweir 
326*cdf0e10cSrcweir void SvRTFParser::ScanText( const sal_Unicode cBreak )
327*cdf0e10cSrcweir {
328*cdf0e10cSrcweir 	String aStrBuffer;
329*cdf0e10cSrcweir 	int bWeiter = true;
330*cdf0e10cSrcweir 	while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN)
331*cdf0e10cSrcweir 	{
332*cdf0e10cSrcweir 		int bNextCh = true;
333*cdf0e10cSrcweir 		switch( nNextCh )
334*cdf0e10cSrcweir 		{
335*cdf0e10cSrcweir 		case '\\':
336*cdf0e10cSrcweir 			{
337*cdf0e10cSrcweir 				switch (nNextCh = GetNextChar())
338*cdf0e10cSrcweir 				{
339*cdf0e10cSrcweir 				case '\'':
340*cdf0e10cSrcweir 					{
341*cdf0e10cSrcweir 
342*cdf0e10cSrcweir #if 0
343*cdf0e10cSrcweir                         // #i35653 patch from cmc
344*cdf0e10cSrcweir                         ByteString aByteString(static_cast<char>(GetHexValue()));
345*cdf0e10cSrcweir                         if (aByteString.Len())
346*cdf0e10cSrcweir                             aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
347*cdf0e10cSrcweir #else
348*cdf0e10cSrcweir                         ByteString aByteString;
349*cdf0e10cSrcweir                         while (1)
350*cdf0e10cSrcweir                         {
351*cdf0e10cSrcweir                             aByteString.Append((char)GetHexValue());
352*cdf0e10cSrcweir 
353*cdf0e10cSrcweir                             bool bBreak = false;
354*cdf0e10cSrcweir                             sal_Char nSlash = '\\';
355*cdf0e10cSrcweir                             while (!bBreak)
356*cdf0e10cSrcweir                             {
357*cdf0e10cSrcweir 								wchar_t __next=GetNextChar();
358*cdf0e10cSrcweir 								if (__next>0xFF) // fix for #i43933# and #i35653#
359*cdf0e10cSrcweir 								{
360*cdf0e10cSrcweir 									if (aByteString.Len())
361*cdf0e10cSrcweir 										aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
362*cdf0e10cSrcweir 									aStrBuffer.Append((sal_Unicode)__next);
363*cdf0e10cSrcweir 
364*cdf0e10cSrcweir 									aByteString.Erase();
365*cdf0e10cSrcweir 									continue;
366*cdf0e10cSrcweir 								}
367*cdf0e10cSrcweir                                 nSlash = (sal_Char)__next;
368*cdf0e10cSrcweir                                 while (nSlash == 0xD || nSlash == 0xA)
369*cdf0e10cSrcweir                                     nSlash = (sal_Char)GetNextChar();
370*cdf0e10cSrcweir 
371*cdf0e10cSrcweir                                 switch (nSlash)
372*cdf0e10cSrcweir                                 {
373*cdf0e10cSrcweir                                     case '{':
374*cdf0e10cSrcweir                                     case '}':
375*cdf0e10cSrcweir                                     case '\\':
376*cdf0e10cSrcweir                                         bBreak = true;
377*cdf0e10cSrcweir                                         break;
378*cdf0e10cSrcweir                                     default:
379*cdf0e10cSrcweir                                         aByteString.Append(nSlash);
380*cdf0e10cSrcweir                                         break;
381*cdf0e10cSrcweir                                 }
382*cdf0e10cSrcweir                             }
383*cdf0e10cSrcweir 
384*cdf0e10cSrcweir                             nNextCh = GetNextChar();
385*cdf0e10cSrcweir 
386*cdf0e10cSrcweir                             if (nSlash != '\\' || nNextCh != '\'')
387*cdf0e10cSrcweir                             {
388*cdf0e10cSrcweir                                 rInput.SeekRel(-1);
389*cdf0e10cSrcweir                                 nNextCh = nSlash;
390*cdf0e10cSrcweir                                 break;
391*cdf0e10cSrcweir                             }
392*cdf0e10cSrcweir                         }
393*cdf0e10cSrcweir 
394*cdf0e10cSrcweir                         bNextCh = false;
395*cdf0e10cSrcweir 
396*cdf0e10cSrcweir                         if (aByteString.Len())
397*cdf0e10cSrcweir                             aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
398*cdf0e10cSrcweir #endif
399*cdf0e10cSrcweir                     }
400*cdf0e10cSrcweir 					break;
401*cdf0e10cSrcweir 				case '\\':
402*cdf0e10cSrcweir 				case '}':
403*cdf0e10cSrcweir 				case '{':
404*cdf0e10cSrcweir 				case '+':		// habe ich in einem RTF-File gefunden
405*cdf0e10cSrcweir 					aStrBuffer.Append(nNextCh);
406*cdf0e10cSrcweir 					break;
407*cdf0e10cSrcweir 				case '~':		// nonbreaking space
408*cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0xA0));
409*cdf0e10cSrcweir 					break;
410*cdf0e10cSrcweir 				case '-':		// optional hyphen
411*cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0xAD));
412*cdf0e10cSrcweir 					break;
413*cdf0e10cSrcweir 				case '_':		// nonbreaking hyphen
414*cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0x2011));
415*cdf0e10cSrcweir 					break;
416*cdf0e10cSrcweir 
417*cdf0e10cSrcweir 				case 'u':
418*cdf0e10cSrcweir 					// UNI-Code Zeichen lesen
419*cdf0e10cSrcweir 					{
420*cdf0e10cSrcweir 						nNextCh = GetNextChar();
421*cdf0e10cSrcweir 						rInput.SeekRel( -2 );
422*cdf0e10cSrcweir 
423*cdf0e10cSrcweir 						if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) )
424*cdf0e10cSrcweir 						{
425*cdf0e10cSrcweir 							bRTF_InTextRead = true;
426*cdf0e10cSrcweir 
427*cdf0e10cSrcweir 							String sSave( aToken );
428*cdf0e10cSrcweir 							nNextCh = '\\';
429*cdf0e10cSrcweir                             #ifdef DBG_UTIL
430*cdf0e10cSrcweir 							int nToken =
431*cdf0e10cSrcweir                             #endif
432*cdf0e10cSrcweir                                 _GetNextToken();
433*cdf0e10cSrcweir 							DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" );
434*cdf0e10cSrcweir 							// dont convert symbol chars
435*cdf0e10cSrcweir 							aStrBuffer.Append(
436*cdf0e10cSrcweir                                 static_cast< sal_Unicode >(nTokenValue));
437*cdf0e10cSrcweir 
438*cdf0e10cSrcweir 							// overread the next n "RTF" characters. This
439*cdf0e10cSrcweir 							// can be also \{, \}, \'88
440*cdf0e10cSrcweir 							for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
441*cdf0e10cSrcweir 							{
442*cdf0e10cSrcweir 								sal_Unicode cAnsi = nNextCh;
443*cdf0e10cSrcweir 								while( 0xD == cAnsi )
444*cdf0e10cSrcweir 									cAnsi = GetNextChar();
445*cdf0e10cSrcweir 								while( 0xA == cAnsi )
446*cdf0e10cSrcweir 									cAnsi = GetNextChar();
447*cdf0e10cSrcweir 
448*cdf0e10cSrcweir 								if( '\\' == cAnsi &&
449*cdf0e10cSrcweir 									'\'' == ( cAnsi = GetNextChar() ))
450*cdf0e10cSrcweir 									// HexValue ueberlesen
451*cdf0e10cSrcweir 									cAnsi = GetHexValue();
452*cdf0e10cSrcweir 								nNextCh = GetNextChar();
453*cdf0e10cSrcweir 							}
454*cdf0e10cSrcweir 							bNextCh = false;
455*cdf0e10cSrcweir 							aToken = sSave;
456*cdf0e10cSrcweir 							bRTF_InTextRead = false;
457*cdf0e10cSrcweir 						}
458*cdf0e10cSrcweir 						else
459*cdf0e10cSrcweir 						{
460*cdf0e10cSrcweir 							nNextCh = '\\';
461*cdf0e10cSrcweir 							bWeiter = false;		// Abbrechen, String zusammen
462*cdf0e10cSrcweir 						}
463*cdf0e10cSrcweir 					}
464*cdf0e10cSrcweir 					break;
465*cdf0e10cSrcweir 
466*cdf0e10cSrcweir 				default:
467*cdf0e10cSrcweir 					rInput.SeekRel( -1 );
468*cdf0e10cSrcweir 					nNextCh = '\\';
469*cdf0e10cSrcweir 					bWeiter = false;		// Abbrechen, String zusammen
470*cdf0e10cSrcweir 					break;
471*cdf0e10cSrcweir 				}
472*cdf0e10cSrcweir 			}
473*cdf0e10cSrcweir 			break;
474*cdf0e10cSrcweir 
475*cdf0e10cSrcweir 		case sal_Unicode(EOF):
476*cdf0e10cSrcweir 				eState = SVPAR_ERROR;
477*cdf0e10cSrcweir 				// weiter
478*cdf0e10cSrcweir 		case '{':
479*cdf0e10cSrcweir 		case '}':
480*cdf0e10cSrcweir 			bWeiter = false;
481*cdf0e10cSrcweir 			break;
482*cdf0e10cSrcweir 
483*cdf0e10cSrcweir 		case 0x0a:
484*cdf0e10cSrcweir 		case 0x0d:
485*cdf0e10cSrcweir 			break;
486*cdf0e10cSrcweir 
487*cdf0e10cSrcweir 		default:
488*cdf0e10cSrcweir 			if( nNextCh == cBreak || aStrBuffer.Len() >= MAX_STRING_LEN)
489*cdf0e10cSrcweir 				bWeiter = false;
490*cdf0e10cSrcweir 			else
491*cdf0e10cSrcweir 			{
492*cdf0e10cSrcweir 				do {
493*cdf0e10cSrcweir 					// alle anderen Zeichen kommen in den Text
494*cdf0e10cSrcweir 					aStrBuffer.Append(nNextCh);
495*cdf0e10cSrcweir 
496*cdf0e10cSrcweir 					if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
497*cdf0e10cSrcweir 					{
498*cdf0e10cSrcweir                         if (aStrBuffer.Len())
499*cdf0e10cSrcweir 		                    aToken += aStrBuffer;
500*cdf0e10cSrcweir 						return;
501*cdf0e10cSrcweir 					}
502*cdf0e10cSrcweir 				} while
503*cdf0e10cSrcweir                 (
504*cdf0e10cSrcweir                     (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) &&
505*cdf0e10cSrcweir                     (aStrBuffer.Len() < MAX_STRING_LEN)
506*cdf0e10cSrcweir                 );
507*cdf0e10cSrcweir 				bNextCh = false;
508*cdf0e10cSrcweir 			}
509*cdf0e10cSrcweir 		}
510*cdf0e10cSrcweir 
511*cdf0e10cSrcweir 		if( bWeiter && bNextCh )
512*cdf0e10cSrcweir 			nNextCh = GetNextChar();
513*cdf0e10cSrcweir 	}
514*cdf0e10cSrcweir 
515*cdf0e10cSrcweir 	if (aStrBuffer.Len())
516*cdf0e10cSrcweir 		aToken += aStrBuffer;
517*cdf0e10cSrcweir }
518*cdf0e10cSrcweir 
519*cdf0e10cSrcweir 
520*cdf0e10cSrcweir short SvRTFParser::_inSkipGroup=0;
521*cdf0e10cSrcweir 
522*cdf0e10cSrcweir void SvRTFParser::SkipGroup()
523*cdf0e10cSrcweir {
524*cdf0e10cSrcweir short nBrackets=1;
525*cdf0e10cSrcweir if (_inSkipGroup>0)
526*cdf0e10cSrcweir 	return;
527*cdf0e10cSrcweir _inSkipGroup++;
528*cdf0e10cSrcweir #if 1	//#i16185# fecking \bin keyword
529*cdf0e10cSrcweir     do
530*cdf0e10cSrcweir     {
531*cdf0e10cSrcweir         switch (nNextCh)
532*cdf0e10cSrcweir         {
533*cdf0e10cSrcweir             case '{':
534*cdf0e10cSrcweir                 ++nBrackets;
535*cdf0e10cSrcweir                 break;
536*cdf0e10cSrcweir             case '}':
537*cdf0e10cSrcweir 				if (!--nBrackets) {
538*cdf0e10cSrcweir 					_inSkipGroup--;
539*cdf0e10cSrcweir                     return;
540*cdf0e10cSrcweir 				}
541*cdf0e10cSrcweir                 break;
542*cdf0e10cSrcweir         }
543*cdf0e10cSrcweir         int nToken = _GetNextToken();
544*cdf0e10cSrcweir         if (nToken == RTF_BIN)
545*cdf0e10cSrcweir         {
546*cdf0e10cSrcweir             rInput.SeekRel(-1);
547*cdf0e10cSrcweir             rInput.SeekRel(nTokenValue);
548*cdf0e10cSrcweir 		    nNextCh = GetNextChar();
549*cdf0e10cSrcweir         }
550*cdf0e10cSrcweir 		while (nNextCh==0xa || nNextCh==0xd)
551*cdf0e10cSrcweir 		{
552*cdf0e10cSrcweir 			nNextCh = GetNextChar();
553*cdf0e10cSrcweir 		}
554*cdf0e10cSrcweir     } while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
555*cdf0e10cSrcweir #else
556*cdf0e10cSrcweir 	sal_Unicode cPrev = 0;
557*cdf0e10cSrcweir 	do {
558*cdf0e10cSrcweir 		switch( nNextCh )
559*cdf0e10cSrcweir 		{
560*cdf0e10cSrcweir 		case '{':
561*cdf0e10cSrcweir 			if( '\\' != cPrev )
562*cdf0e10cSrcweir 				++nBrackets;
563*cdf0e10cSrcweir 			break;
564*cdf0e10cSrcweir 
565*cdf0e10cSrcweir 		case '}':
566*cdf0e10cSrcweir 			if( '\\' != cPrev && !--nBrackets )
567*cdf0e10cSrcweir 				return;
568*cdf0e10cSrcweir 			break;
569*cdf0e10cSrcweir 
570*cdf0e10cSrcweir 		case '\\':
571*cdf0e10cSrcweir 			if( '\\' == cPrev )
572*cdf0e10cSrcweir 				nNextCh = 0;
573*cdf0e10cSrcweir 			break;
574*cdf0e10cSrcweir 		}
575*cdf0e10cSrcweir 		cPrev = nNextCh;
576*cdf0e10cSrcweir 		nNextCh = GetNextChar();
577*cdf0e10cSrcweir 	} while( sal_Unicode(EOF) != nNextCh && IsParserWorking() );
578*cdf0e10cSrcweir #endif
579*cdf0e10cSrcweir 
580*cdf0e10cSrcweir 	if( SVPAR_PENDING != eState && '}' != nNextCh )
581*cdf0e10cSrcweir 		eState = SVPAR_ERROR;
582*cdf0e10cSrcweir 	_inSkipGroup--;
583*cdf0e10cSrcweir }
584*cdf0e10cSrcweir 
585*cdf0e10cSrcweir void SvRTFParser::ReadUnknownData()	{ SkipGroup(); }
586*cdf0e10cSrcweir void SvRTFParser::ReadBitmapData()	{ SkipGroup(); }
587*cdf0e10cSrcweir void SvRTFParser::ReadOLEData()		{ SkipGroup(); }
588*cdf0e10cSrcweir 
589*cdf0e10cSrcweir 
590*cdf0e10cSrcweir SvParserState SvRTFParser::CallParser()
591*cdf0e10cSrcweir {
592*cdf0e10cSrcweir 	sal_Char cFirstCh;
593*cdf0e10cSrcweir     nNextChPos = rInput.Tell();
594*cdf0e10cSrcweir 	rInput >> cFirstCh; nNextCh = cFirstCh;
595*cdf0e10cSrcweir 	eState = SVPAR_WORKING;
596*cdf0e10cSrcweir 	nOpenBrakets = 0;
597*cdf0e10cSrcweir 	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
598*cdf0e10cSrcweir 	eUNICodeSet = RTL_TEXTENCODING_MS_1252; 	// default ist ANSI-CodeSet
599*cdf0e10cSrcweir 
600*cdf0e10cSrcweir 	// die 1. beiden Token muessen '{' und \\rtf sein !!
601*cdf0e10cSrcweir 	if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
602*cdf0e10cSrcweir 	{
603*cdf0e10cSrcweir 		AddRef();
604*cdf0e10cSrcweir 		Continue( 0 );
605*cdf0e10cSrcweir 		if( SVPAR_PENDING != eState )
606*cdf0e10cSrcweir 			ReleaseRef();		// dann brauchen wir den Parser nicht mehr!
607*cdf0e10cSrcweir 	}
608*cdf0e10cSrcweir 	else
609*cdf0e10cSrcweir 		eState = SVPAR_ERROR;
610*cdf0e10cSrcweir 
611*cdf0e10cSrcweir 	return eState;
612*cdf0e10cSrcweir }
613*cdf0e10cSrcweir 
614*cdf0e10cSrcweir void SvRTFParser::Continue( int nToken )
615*cdf0e10cSrcweir {
616*cdf0e10cSrcweir //	DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
617*cdf0e10cSrcweir //				"Zeichensatz wurde geaendert." );
618*cdf0e10cSrcweir 
619*cdf0e10cSrcweir 	if( !nToken )
620*cdf0e10cSrcweir 		nToken = GetNextToken();
621*cdf0e10cSrcweir 
622*cdf0e10cSrcweir 	while( IsParserWorking() )
623*cdf0e10cSrcweir 	{
624*cdf0e10cSrcweir 		SaveState( nToken );
625*cdf0e10cSrcweir 		switch( nToken )
626*cdf0e10cSrcweir 		{
627*cdf0e10cSrcweir 		case '}':
628*cdf0e10cSrcweir 			if( nOpenBrakets )
629*cdf0e10cSrcweir 				goto NEXTTOKEN;
630*cdf0e10cSrcweir 			eState = SVPAR_ACCEPTED;
631*cdf0e10cSrcweir 			break;
632*cdf0e10cSrcweir 
633*cdf0e10cSrcweir 		case '{':
634*cdf0e10cSrcweir 			// eine unbekannte Gruppe ?
635*cdf0e10cSrcweir 			{
636*cdf0e10cSrcweir 				if( RTF_IGNOREFLAG != GetNextToken() )
637*cdf0e10cSrcweir 					nToken = SkipToken( -1 );
638*cdf0e10cSrcweir 				else if( RTF_UNKNOWNCONTROL != GetNextToken() )
639*cdf0e10cSrcweir 					nToken = SkipToken( -2 );
640*cdf0e10cSrcweir 				else
641*cdf0e10cSrcweir 				{
642*cdf0e10cSrcweir 					// gleich herausfiltern
643*cdf0e10cSrcweir 					ReadUnknownData();
644*cdf0e10cSrcweir 					nToken = GetNextToken();
645*cdf0e10cSrcweir 					if( '}' != nToken )
646*cdf0e10cSrcweir 						eState = SVPAR_ERROR;
647*cdf0e10cSrcweir 					break;		// auf zum naechsten Token!!
648*cdf0e10cSrcweir 				}
649*cdf0e10cSrcweir 			}
650*cdf0e10cSrcweir 			goto NEXTTOKEN;
651*cdf0e10cSrcweir 
652*cdf0e10cSrcweir 		case RTF_UNKNOWNCONTROL:
653*cdf0e10cSrcweir 			break;		// unbekannte Token ueberspringen
654*cdf0e10cSrcweir 		case RTF_NEXTTYPE:
655*cdf0e10cSrcweir 		case RTF_ANSITYPE:
656*cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
657*cdf0e10cSrcweir             break;
658*cdf0e10cSrcweir 		case RTF_MACTYPE:
659*cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN );
660*cdf0e10cSrcweir             break;
661*cdf0e10cSrcweir 		case RTF_PCTYPE:
662*cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 );
663*cdf0e10cSrcweir             break;
664*cdf0e10cSrcweir 		case RTF_PCATYPE:
665*cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 );
666*cdf0e10cSrcweir             break;
667*cdf0e10cSrcweir 		case RTF_ANSICPG:
668*cdf0e10cSrcweir             eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
669*cdf0e10cSrcweir             SetSrcEncoding(eCodeSet);
670*cdf0e10cSrcweir 			break;
671*cdf0e10cSrcweir 		default:
672*cdf0e10cSrcweir NEXTTOKEN:
673*cdf0e10cSrcweir 			NextToken( nToken );
674*cdf0e10cSrcweir 			break;
675*cdf0e10cSrcweir 		}
676*cdf0e10cSrcweir 		if( IsParserWorking() )
677*cdf0e10cSrcweir 			SaveState( 0 );			// bis hierhin abgearbeitet,
678*cdf0e10cSrcweir 									// weiter mit neuem Token!
679*cdf0e10cSrcweir 		nToken = GetNextToken();
680*cdf0e10cSrcweir 	}
681*cdf0e10cSrcweir 	if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets )
682*cdf0e10cSrcweir 		eState = SVPAR_ERROR;
683*cdf0e10cSrcweir }
684*cdf0e10cSrcweir 
685*cdf0e10cSrcweir void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
686*cdf0e10cSrcweir {
687*cdf0e10cSrcweir 	if (eEnc == RTL_TEXTENCODING_DONTKNOW)
688*cdf0e10cSrcweir 		eEnc = GetCodeSet();
689*cdf0e10cSrcweir 
690*cdf0e10cSrcweir 	if (!aParserStates.empty())
691*cdf0e10cSrcweir 		aParserStates.top().eCodeSet = eEnc;
692*cdf0e10cSrcweir 	SetSrcEncoding(eEnc);
693*cdf0e10cSrcweir }
694*cdf0e10cSrcweir 
695*cdf0e10cSrcweir #ifdef USED
696*cdf0e10cSrcweir void SvRTFParser::SaveState( int nToken )
697*cdf0e10cSrcweir {
698*cdf0e10cSrcweir 	SvParser::SaveState( nToken );
699*cdf0e10cSrcweir }
700*cdf0e10cSrcweir 
701*cdf0e10cSrcweir void SvRTFParser::RestoreState()
702*cdf0e10cSrcweir {
703*cdf0e10cSrcweir 	SvParser::RestoreState();
704*cdf0e10cSrcweir }
705*cdf0e10cSrcweir #endif
706*cdf0e10cSrcweir 
707*cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */
708