xref: /aoo42x/main/svtools/source/svrtf/parrtf.cxx (revision facfa769)
15900e8ecSAndrew Rist /**************************************************************
2*facfa769Smseidel  *
35900e8ecSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
45900e8ecSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
55900e8ecSAndrew Rist  * distributed with this work for additional information
65900e8ecSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
75900e8ecSAndrew Rist  * to you under the Apache License, Version 2.0 (the
85900e8ecSAndrew Rist  * "License"); you may not use this file except in compliance
95900e8ecSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*facfa769Smseidel  *
115900e8ecSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*facfa769Smseidel  *
135900e8ecSAndrew Rist  * Unless required by applicable law or agreed to in writing,
145900e8ecSAndrew Rist  * software distributed under the License is distributed on an
155900e8ecSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
165900e8ecSAndrew Rist  * KIND, either express or implied.  See the License for the
175900e8ecSAndrew Rist  * specific language governing permissions and limitations
185900e8ecSAndrew Rist  * under the License.
19*facfa769Smseidel  *
205900e8ecSAndrew Rist  *************************************************************/
215900e8ecSAndrew Rist 
225900e8ecSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_svtools.hxx"
26cdf0e10cSrcweir 
27*facfa769Smseidel #include <stdio.h> // for EOF
28cdf0e10cSrcweir #include <rtl/tencinfo.h>
29cdf0e10cSrcweir #include <tools/stream.hxx>
30cdf0e10cSrcweir #include <tools/debug.hxx>
31cdf0e10cSrcweir #include <svtools/rtftoken.h>
32cdf0e10cSrcweir #include <svtools/rtfkeywd.hxx>
33cdf0e10cSrcweir #include <svtools/parrtf.hxx>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir const int MAX_STRING_LEN = 1024;
36cdf0e10cSrcweir const int MAX_TOKEN_LEN = 128;
37cdf0e10cSrcweir 
38cdf0e10cSrcweir #define RTF_ISDIGIT( c ) (c >= '0' && c <= '9')
39cdf0e10cSrcweir #define RTF_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') )
40cdf0e10cSrcweir 
SvRTFParser(SvStream & rIn,sal_uInt8 nStackSize)41cdf0e10cSrcweir SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
42cdf0e10cSrcweir 	: SvParser( rIn, nStackSize ),
43*facfa769Smseidel 	eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), // default is ANSI code set
44cdf0e10cSrcweir 	nUCharOverread( 1 )
45cdf0e10cSrcweir {
46*facfa769Smseidel 	// default is ANSI code set
47cdf0e10cSrcweir 	SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
48cdf0e10cSrcweir 	bRTF_InTextRead = false;
49cdf0e10cSrcweir }
50cdf0e10cSrcweir 
~SvRTFParser()51cdf0e10cSrcweir SvRTFParser::~SvRTFParser()
52cdf0e10cSrcweir {
53cdf0e10cSrcweir }
54cdf0e10cSrcweir 
55cdf0e10cSrcweir 
56cdf0e10cSrcweir 
57cdf0e10cSrcweir 
_GetNextToken()58cdf0e10cSrcweir int SvRTFParser::_GetNextToken()
59cdf0e10cSrcweir {
60cdf0e10cSrcweir 	int nRet = 0;
61cdf0e10cSrcweir 	do {
62cdf0e10cSrcweir 		int bNextCh = true;
63cdf0e10cSrcweir 		switch( nNextCh )
64cdf0e10cSrcweir 		{
65cdf0e10cSrcweir 		case '\\':
66cdf0e10cSrcweir 			{
67cdf0e10cSrcweir 				// Steuerzeichen
68cdf0e10cSrcweir 				switch( nNextCh = GetNextChar() )
69cdf0e10cSrcweir 				{
70cdf0e10cSrcweir 				case '{':
71cdf0e10cSrcweir 				case '}':
72cdf0e10cSrcweir 				case '\\':
73cdf0e10cSrcweir 				case '+':		// habe ich in einem RTF-File gefunden
74cdf0e10cSrcweir 				case '~':		// nonbreaking space
75cdf0e10cSrcweir 				case '-':		// optional hyphen
76cdf0e10cSrcweir 				case '_':		// nonbreaking hyphen
77cdf0e10cSrcweir 				case '\'':		// HexValue
78cdf0e10cSrcweir 					nNextCh = '\\';
79cdf0e10cSrcweir 					rInput.SeekRel( -1 );
80cdf0e10cSrcweir 					ScanText();
81cdf0e10cSrcweir 					nRet = RTF_TEXTTOKEN;
82cdf0e10cSrcweir 					bNextCh = 0 == nNextCh;
83cdf0e10cSrcweir 					break;
84cdf0e10cSrcweir 
85cdf0e10cSrcweir 				case '*':		// ignoreflag
86cdf0e10cSrcweir 					nRet = RTF_IGNOREFLAG;
87cdf0e10cSrcweir 					break;
88cdf0e10cSrcweir 				case ':':	 	// subentry in an index entry
89cdf0e10cSrcweir 					nRet = RTF_SUBENTRYINDEX;
90cdf0e10cSrcweir 					break;
91*facfa769Smseidel 				case '|':		// formula-character
92cdf0e10cSrcweir 					nRet = RTF_FORMULA;
93cdf0e10cSrcweir 					break;
94cdf0e10cSrcweir 
95cdf0e10cSrcweir 				case 0x0a:
96cdf0e10cSrcweir 				case 0x0d:
97cdf0e10cSrcweir 					nRet = RTF_PAR;
98cdf0e10cSrcweir 					break;
99cdf0e10cSrcweir 
100cdf0e10cSrcweir 				default:
101cdf0e10cSrcweir 					if( RTF_ISALPHA( nNextCh ) )
102cdf0e10cSrcweir 					{
103cdf0e10cSrcweir 						aToken = '\\';
104cdf0e10cSrcweir 						{
105cdf0e10cSrcweir 							String aStrBuffer;
106cdf0e10cSrcweir 							sal_Unicode* pStr = aStrBuffer.AllocBuffer(
107cdf0e10cSrcweir 															MAX_TOKEN_LEN );
108cdf0e10cSrcweir 							xub_StrLen nStrLen = 0;
109cdf0e10cSrcweir 							do {
110cdf0e10cSrcweir 								*(pStr + nStrLen++) = nNextCh;
111cdf0e10cSrcweir 								if( MAX_TOKEN_LEN == nStrLen )
112cdf0e10cSrcweir 								{
113cdf0e10cSrcweir 									aToken += aStrBuffer;
114*facfa769Smseidel 									aToken.GetBufferAccess(); // make unique string!
115cdf0e10cSrcweir 									nStrLen = 0;
116cdf0e10cSrcweir 								}
117cdf0e10cSrcweir 								nNextCh = GetNextChar();
118cdf0e10cSrcweir 							} while( RTF_ISALPHA( nNextCh ) );
119cdf0e10cSrcweir 							if( nStrLen )
120cdf0e10cSrcweir 							{
121cdf0e10cSrcweir 								aStrBuffer.ReleaseBufferAccess( nStrLen );
122cdf0e10cSrcweir 								aToken += aStrBuffer;
123cdf0e10cSrcweir 							}
124cdf0e10cSrcweir 						}
125cdf0e10cSrcweir 
126cdf0e10cSrcweir 						// Minus fuer numerischen Parameter
127cdf0e10cSrcweir 						int bNegValue = false;
128cdf0e10cSrcweir 						if( '-' == nNextCh )
129cdf0e10cSrcweir 						{
130cdf0e10cSrcweir 							bNegValue = true;
131cdf0e10cSrcweir 							nNextCh = GetNextChar();
132cdf0e10cSrcweir 						}
133cdf0e10cSrcweir 
134cdf0e10cSrcweir 						// evt. Numerischer Parameter
135cdf0e10cSrcweir 						if( RTF_ISDIGIT( nNextCh ) )
136cdf0e10cSrcweir 						{
137cdf0e10cSrcweir 							nTokenValue = 0;
138cdf0e10cSrcweir 							do {
139cdf0e10cSrcweir 								nTokenValue *= 10;
140cdf0e10cSrcweir 								nTokenValue += nNextCh - '0';
141cdf0e10cSrcweir 								nNextCh = GetNextChar();
142cdf0e10cSrcweir 							} while( RTF_ISDIGIT( nNextCh ) );
143cdf0e10cSrcweir 							if( bNegValue )
144cdf0e10cSrcweir 								nTokenValue = -nTokenValue;
145cdf0e10cSrcweir 							bTokenHasValue=true;
146cdf0e10cSrcweir 						}
147*facfa769Smseidel 						else if( bNegValue ) // das Minus wieder zurueck
148cdf0e10cSrcweir 						{
149cdf0e10cSrcweir 							nNextCh = '-';
150cdf0e10cSrcweir 							rInput.SeekRel( -1 );
151cdf0e10cSrcweir 						}
152*facfa769Smseidel 						if( ' ' == nNextCh ) // Blank gehoert zum Token!
153cdf0e10cSrcweir 							nNextCh = GetNextChar();
154cdf0e10cSrcweir 
155cdf0e10cSrcweir 						// suche das Token in der Tabelle:
156cdf0e10cSrcweir 						if( 0 == (nRet = GetRTFToken( aToken )) )
157cdf0e10cSrcweir 							// Unknown Control
158cdf0e10cSrcweir 							nRet = RTF_UNKNOWNCONTROL;
159cdf0e10cSrcweir 
160cdf0e10cSrcweir 						// bug 76812 - unicode token handled as normal text
161cdf0e10cSrcweir 						bNextCh = false;
162cdf0e10cSrcweir 						switch( nRet )
163cdf0e10cSrcweir 						{
164cdf0e10cSrcweir 						case RTF_UC:
165cdf0e10cSrcweir 							if( 0 <= nTokenValue )
166cdf0e10cSrcweir 							{
167cdf0e10cSrcweir 								nUCharOverread = (sal_uInt8)nTokenValue;
168cdf0e10cSrcweir #if 1
169*facfa769Smseidel 								// cmc: other ifdef breaks #i3584
170cdf0e10cSrcweir 								aParserStates.top().
171cdf0e10cSrcweir 									nUCharOverread = nUCharOverread;
172cdf0e10cSrcweir #else
173cdf0e10cSrcweir 								if( !nUCharOverread )
174cdf0e10cSrcweir 									nUCharOverread = aParserStates.top().nUCharOverread;
175cdf0e10cSrcweir 								else
176cdf0e10cSrcweir 									aParserStates.top().
177cdf0e10cSrcweir 										nUCharOverread = nUCharOverread;
178cdf0e10cSrcweir #endif
179cdf0e10cSrcweir 							}
180d92770c0Smseidel 							aToken.Erase(); // #i47831# erase token to prevent the token from being treated as text
181cdf0e10cSrcweir 							// read next token
182cdf0e10cSrcweir 							nRet = 0;
183cdf0e10cSrcweir 							break;
184cdf0e10cSrcweir 
185cdf0e10cSrcweir 						case RTF_UPR:
186cdf0e10cSrcweir 							if (!_inSkipGroup) {
187cdf0e10cSrcweir 							// UPR - overread the group with the ansi
188*facfa769Smseidel 							//       information
189cdf0e10cSrcweir 							while( '{' != _GetNextToken() )
190cdf0e10cSrcweir 								;
191cdf0e10cSrcweir 							SkipGroup();
192*facfa769Smseidel 							_GetNextToken(); // overread the last bracket
193cdf0e10cSrcweir 							nRet = 0;
194cdf0e10cSrcweir 							}
195cdf0e10cSrcweir 							break;
196cdf0e10cSrcweir 
197cdf0e10cSrcweir 						case RTF_U:
198cdf0e10cSrcweir 							if( !bRTF_InTextRead )
199cdf0e10cSrcweir 							{
200cdf0e10cSrcweir 								nRet = RTF_TEXTTOKEN;
201cdf0e10cSrcweir 								aToken = (sal_Unicode)nTokenValue;
202cdf0e10cSrcweir 
203cdf0e10cSrcweir 								// overread the next n "RTF" characters. This
204cdf0e10cSrcweir 								// can be also \{, \}, \'88
205cdf0e10cSrcweir 								for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
206cdf0e10cSrcweir 								{
207cdf0e10cSrcweir 									sal_Unicode cAnsi = nNextCh;
208cdf0e10cSrcweir 									while( 0xD == cAnsi )
209cdf0e10cSrcweir 										cAnsi = GetNextChar();
210cdf0e10cSrcweir 									while( 0xA == cAnsi )
211cdf0e10cSrcweir 										cAnsi = GetNextChar();
212cdf0e10cSrcweir 
213cdf0e10cSrcweir 									if( '\\' == cAnsi &&
214cdf0e10cSrcweir 										'\'' == ( cAnsi = GetNextChar() ))
215cdf0e10cSrcweir 										// HexValue ueberlesen
216cdf0e10cSrcweir 										cAnsi = GetHexValue();
217cdf0e10cSrcweir 									nNextCh = GetNextChar();
218cdf0e10cSrcweir 								}
219cdf0e10cSrcweir 								ScanText();
220cdf0e10cSrcweir 								bNextCh = 0 == nNextCh;
221cdf0e10cSrcweir 							}
222cdf0e10cSrcweir 							break;
223cdf0e10cSrcweir 						}
224cdf0e10cSrcweir 					}
225cdf0e10cSrcweir 					else if( SVPAR_PENDING != eState )
226cdf0e10cSrcweir 					{
227cdf0e10cSrcweir 						// Bug 34631 - "\ " ueberlesen - Blank als Zeichen
228cdf0e10cSrcweir 						// eState = SVPAR_ERROR;
229cdf0e10cSrcweir 						bNextCh = false;
230cdf0e10cSrcweir 					}
231cdf0e10cSrcweir 					break;
232cdf0e10cSrcweir 				}
233cdf0e10cSrcweir 			}
234cdf0e10cSrcweir 			break;
235cdf0e10cSrcweir 
236cdf0e10cSrcweir 		case sal_Unicode(EOF):
237cdf0e10cSrcweir 			eState = SVPAR_ACCEPTED;
238cdf0e10cSrcweir 			nRet = nNextCh;
239cdf0e10cSrcweir 			break;
240cdf0e10cSrcweir 
241cdf0e10cSrcweir 		case '{':
242cdf0e10cSrcweir 			{
243cdf0e10cSrcweir 				if( 0 <= nOpenBrakets )
244cdf0e10cSrcweir 				{
245cdf0e10cSrcweir 					RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
246*facfa769Smseidel 					aParserStates.push( aState );
247cdf0e10cSrcweir 				}
248cdf0e10cSrcweir 				++nOpenBrakets;
249*facfa769Smseidel 				DBG_ASSERT(
250*facfa769Smseidel 					static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
251*facfa769Smseidel 					"ParserStateStack unequal to bracket count" );
252cdf0e10cSrcweir 				nRet = nNextCh;
253cdf0e10cSrcweir 			}
254cdf0e10cSrcweir 			break;
255cdf0e10cSrcweir 
256cdf0e10cSrcweir 		case '}':
257cdf0e10cSrcweir 			--nOpenBrakets;
258cdf0e10cSrcweir 			if( 0 <= nOpenBrakets )
259cdf0e10cSrcweir 			{
260*facfa769Smseidel 				aParserStates.pop();
261cdf0e10cSrcweir 				if( !aParserStates.empty() )
262cdf0e10cSrcweir 				{
263cdf0e10cSrcweir 					const RtfParserState_Impl& rRPS =
264cdf0e10cSrcweir 							aParserStates.top();
265cdf0e10cSrcweir 					nUCharOverread = rRPS.nUCharOverread;
266cdf0e10cSrcweir 					SetSrcEncoding( rRPS.eCodeSet );
267cdf0e10cSrcweir 				}
268cdf0e10cSrcweir 				else
269cdf0e10cSrcweir 				{
270cdf0e10cSrcweir 					nUCharOverread = 1;
271cdf0e10cSrcweir 					SetSrcEncoding( GetCodeSet() );
272cdf0e10cSrcweir 				}
273cdf0e10cSrcweir 			}
274*facfa769Smseidel 			DBG_ASSERT(
275*facfa769Smseidel 				static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
276*facfa769Smseidel 				"ParserStateStack unequal to bracket count" );
277cdf0e10cSrcweir 			nRet = nNextCh;
278cdf0e10cSrcweir 			break;
279cdf0e10cSrcweir 
280cdf0e10cSrcweir 		case 0x0d:
281cdf0e10cSrcweir 		case 0x0a:
282cdf0e10cSrcweir 			break;
283cdf0e10cSrcweir 
284cdf0e10cSrcweir 		default:
285cdf0e10cSrcweir 			// es folgt normaler Text
286cdf0e10cSrcweir 			ScanText();
287cdf0e10cSrcweir 			nRet = RTF_TEXTTOKEN;
288cdf0e10cSrcweir 			bNextCh = 0 == nNextCh;
289cdf0e10cSrcweir 			break;
290cdf0e10cSrcweir 		}
291cdf0e10cSrcweir 
292cdf0e10cSrcweir 		if( bNextCh )
293cdf0e10cSrcweir 			nNextCh = GetNextChar();
294cdf0e10cSrcweir 
295cdf0e10cSrcweir 	} while( !nRet && SVPAR_WORKING == eState );
296cdf0e10cSrcweir 	return nRet;
297cdf0e10cSrcweir }
298cdf0e10cSrcweir 
299cdf0e10cSrcweir 
GetHexValue()300cdf0e10cSrcweir sal_Unicode SvRTFParser::GetHexValue()
301cdf0e10cSrcweir {
302cdf0e10cSrcweir 	// Hex-Wert sammeln
303cdf0e10cSrcweir 	register int n;
304cdf0e10cSrcweir 	register sal_Unicode nHexVal = 0;
305cdf0e10cSrcweir 
306cdf0e10cSrcweir 	for( n = 0; n < 2; ++n )
307cdf0e10cSrcweir 	{
308cdf0e10cSrcweir 		nHexVal *= 16;
309cdf0e10cSrcweir 		nNextCh = GetNextChar();
310cdf0e10cSrcweir 		if( nNextCh >= '0' && nNextCh <= '9' )
311cdf0e10cSrcweir 			nHexVal += (nNextCh - 48);
312cdf0e10cSrcweir 		else if( nNextCh >= 'a' && nNextCh <= 'f' )
313cdf0e10cSrcweir 			nHexVal += (nNextCh - 87);
314cdf0e10cSrcweir 		else if( nNextCh >= 'A' && nNextCh <= 'F' )
315cdf0e10cSrcweir 			nHexVal += (nNextCh - 55);
316cdf0e10cSrcweir 	}
317cdf0e10cSrcweir 	return nHexVal;
318cdf0e10cSrcweir }
319cdf0e10cSrcweir 
ScanText(const sal_Unicode cBreak)320cdf0e10cSrcweir void SvRTFParser::ScanText( const sal_Unicode cBreak )
321cdf0e10cSrcweir {
322cdf0e10cSrcweir 	String aStrBuffer;
323cdf0e10cSrcweir 	int bWeiter = true;
324cdf0e10cSrcweir 	while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN)
325cdf0e10cSrcweir 	{
326cdf0e10cSrcweir 		int bNextCh = true;
327cdf0e10cSrcweir 		switch( nNextCh )
328cdf0e10cSrcweir 		{
329cdf0e10cSrcweir 		case '\\':
330cdf0e10cSrcweir 			{
331cdf0e10cSrcweir 				switch (nNextCh = GetNextChar())
332cdf0e10cSrcweir 				{
333cdf0e10cSrcweir 				case '\'':
334cdf0e10cSrcweir 					{
335cdf0e10cSrcweir 
336cdf0e10cSrcweir #if 0
337*facfa769Smseidel 						// #i35653 patch from cmc
338*facfa769Smseidel 						ByteString aByteString(static_cast<char>(GetHexValue()));
339*facfa769Smseidel 						if (aByteString.Len())
340*facfa769Smseidel 							aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
341cdf0e10cSrcweir #else
342*facfa769Smseidel 						ByteString aByteString;
343*facfa769Smseidel 						while (1)
344*facfa769Smseidel 						{
345*facfa769Smseidel 							aByteString.Append((char)GetHexValue());
346*facfa769Smseidel 
347*facfa769Smseidel 							bool bBreak = false;
348*facfa769Smseidel 							sal_Char nSlash = '\\';
349*facfa769Smseidel 							while (!bBreak)
350*facfa769Smseidel 							{
351cdf0e10cSrcweir 								wchar_t __next=GetNextChar();
352cdf0e10cSrcweir 								if (__next>0xFF) // fix for #i43933# and #i35653#
353cdf0e10cSrcweir 								{
354cdf0e10cSrcweir 									if (aByteString.Len())
355cdf0e10cSrcweir 										aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
356cdf0e10cSrcweir 									aStrBuffer.Append((sal_Unicode)__next);
357cdf0e10cSrcweir 
358cdf0e10cSrcweir 									aByteString.Erase();
359cdf0e10cSrcweir 									continue;
360cdf0e10cSrcweir 								}
361*facfa769Smseidel 								nSlash = (sal_Char)__next;
362*facfa769Smseidel 								while (nSlash == 0xD || nSlash == 0xA)
363*facfa769Smseidel 									nSlash = (sal_Char)GetNextChar();
364*facfa769Smseidel 
365*facfa769Smseidel 								switch (nSlash)
366*facfa769Smseidel 								{
367*facfa769Smseidel 									case '{':
368*facfa769Smseidel 									case '}':
369*facfa769Smseidel 									case '\\':
370*facfa769Smseidel 										bBreak = true;
371*facfa769Smseidel 										break;
372*facfa769Smseidel 									default:
373*facfa769Smseidel 										aByteString.Append(nSlash);
374*facfa769Smseidel 										break;
375*facfa769Smseidel 								}
376*facfa769Smseidel 							}
377*facfa769Smseidel 
378*facfa769Smseidel 							nNextCh = GetNextChar();
379*facfa769Smseidel 
380*facfa769Smseidel 							if (nSlash != '\\' || nNextCh != '\'')
381*facfa769Smseidel 							{
382*facfa769Smseidel 								rInput.SeekRel(-1);
383*facfa769Smseidel 								nNextCh = nSlash;
384*facfa769Smseidel 								break;
385*facfa769Smseidel 							}
386*facfa769Smseidel 						}
387*facfa769Smseidel 
388*facfa769Smseidel 						bNextCh = false;
389*facfa769Smseidel 
390*facfa769Smseidel 						if (aByteString.Len())
391*facfa769Smseidel 							aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
392cdf0e10cSrcweir #endif
393*facfa769Smseidel 					}
394cdf0e10cSrcweir 					break;
395cdf0e10cSrcweir 				case '\\':
396cdf0e10cSrcweir 				case '}':
397cdf0e10cSrcweir 				case '{':
398cdf0e10cSrcweir 				case '+':		// habe ich in einem RTF-File gefunden
399cdf0e10cSrcweir 					aStrBuffer.Append(nNextCh);
400cdf0e10cSrcweir 					break;
401cdf0e10cSrcweir 				case '~':		// nonbreaking space
402cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0xA0));
403cdf0e10cSrcweir 					break;
404cdf0e10cSrcweir 				case '-':		// optional hyphen
405cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0xAD));
406cdf0e10cSrcweir 					break;
407cdf0e10cSrcweir 				case '_':		// nonbreaking hyphen
408cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0x2011));
409cdf0e10cSrcweir 					break;
410cdf0e10cSrcweir 
411cdf0e10cSrcweir 				case 'u':
412cdf0e10cSrcweir 					// UNI-Code Zeichen lesen
413cdf0e10cSrcweir 					{
414cdf0e10cSrcweir 						nNextCh = GetNextChar();
415cdf0e10cSrcweir 						rInput.SeekRel( -2 );
416cdf0e10cSrcweir 
417cdf0e10cSrcweir 						if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) )
418cdf0e10cSrcweir 						{
419cdf0e10cSrcweir 							bRTF_InTextRead = true;
420cdf0e10cSrcweir 
421cdf0e10cSrcweir 							String sSave( aToken );
422cdf0e10cSrcweir 							nNextCh = '\\';
423*facfa769Smseidel 							#ifdef DBG_UTIL
424cdf0e10cSrcweir 							int nToken =
425*facfa769Smseidel 							#endif
426*facfa769Smseidel 								_GetNextToken();
427cdf0e10cSrcweir 							DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" );
428cdf0e10cSrcweir 							// dont convert symbol chars
429cdf0e10cSrcweir 							aStrBuffer.Append(
430*facfa769Smseidel 								static_cast< sal_Unicode >(nTokenValue));
431cdf0e10cSrcweir 
432cdf0e10cSrcweir 							// overread the next n "RTF" characters. This
433cdf0e10cSrcweir 							// can be also \{, \}, \'88
434cdf0e10cSrcweir 							for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
435cdf0e10cSrcweir 							{
436cdf0e10cSrcweir 								sal_Unicode cAnsi = nNextCh;
437cdf0e10cSrcweir 								while( 0xD == cAnsi )
438cdf0e10cSrcweir 									cAnsi = GetNextChar();
439cdf0e10cSrcweir 								while( 0xA == cAnsi )
440cdf0e10cSrcweir 									cAnsi = GetNextChar();
441cdf0e10cSrcweir 
442cdf0e10cSrcweir 								if( '\\' == cAnsi &&
443cdf0e10cSrcweir 									'\'' == ( cAnsi = GetNextChar() ))
444cdf0e10cSrcweir 									// HexValue ueberlesen
445cdf0e10cSrcweir 									cAnsi = GetHexValue();
446cdf0e10cSrcweir 								nNextCh = GetNextChar();
447cdf0e10cSrcweir 							}
448cdf0e10cSrcweir 							bNextCh = false;
449cdf0e10cSrcweir 							aToken = sSave;
450cdf0e10cSrcweir 							bRTF_InTextRead = false;
451cdf0e10cSrcweir 						}
452cdf0e10cSrcweir 						else
453cdf0e10cSrcweir 						{
454cdf0e10cSrcweir 							nNextCh = '\\';
455*facfa769Smseidel 							bWeiter = false; // Abbrechen, String zusammen
456cdf0e10cSrcweir 						}
457cdf0e10cSrcweir 					}
458cdf0e10cSrcweir 					break;
459cdf0e10cSrcweir 
460cdf0e10cSrcweir 				default:
461cdf0e10cSrcweir 					rInput.SeekRel( -1 );
462cdf0e10cSrcweir 					nNextCh = '\\';
463*facfa769Smseidel 					bWeiter = false; // Abbrechen, String zusammen
464cdf0e10cSrcweir 					break;
465cdf0e10cSrcweir 				}
466cdf0e10cSrcweir 			}
467cdf0e10cSrcweir 			break;
468cdf0e10cSrcweir 
469cdf0e10cSrcweir 		case sal_Unicode(EOF):
470cdf0e10cSrcweir 				eState = SVPAR_ERROR;
471cdf0e10cSrcweir 				// weiter
472cdf0e10cSrcweir 		case '{':
473cdf0e10cSrcweir 		case '}':
474cdf0e10cSrcweir 			bWeiter = false;
475cdf0e10cSrcweir 			break;
476cdf0e10cSrcweir 
477cdf0e10cSrcweir 		case 0x0a:
478cdf0e10cSrcweir 		case 0x0d:
479cdf0e10cSrcweir 			break;
480cdf0e10cSrcweir 
481cdf0e10cSrcweir 		default:
482cdf0e10cSrcweir 			if( nNextCh == cBreak || aStrBuffer.Len() >= MAX_STRING_LEN)
483cdf0e10cSrcweir 				bWeiter = false;
484cdf0e10cSrcweir 			else
485cdf0e10cSrcweir 			{
486cdf0e10cSrcweir 				do {
487cdf0e10cSrcweir 					// alle anderen Zeichen kommen in den Text
488cdf0e10cSrcweir 					aStrBuffer.Append(nNextCh);
489cdf0e10cSrcweir 
490cdf0e10cSrcweir 					if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
491cdf0e10cSrcweir 					{
492*facfa769Smseidel 						if (aStrBuffer.Len())
493*facfa769Smseidel 							aToken += aStrBuffer;
494cdf0e10cSrcweir 						return;
495cdf0e10cSrcweir 					}
496cdf0e10cSrcweir 				} while
497*facfa769Smseidel 				(
498*facfa769Smseidel 					(RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) &&
499*facfa769Smseidel 					(aStrBuffer.Len() < MAX_STRING_LEN)
500*facfa769Smseidel 				);
501cdf0e10cSrcweir 				bNextCh = false;
502cdf0e10cSrcweir 			}
503cdf0e10cSrcweir 		}
504cdf0e10cSrcweir 
505cdf0e10cSrcweir 		if( bWeiter && bNextCh )
506cdf0e10cSrcweir 			nNextCh = GetNextChar();
507cdf0e10cSrcweir 	}
508cdf0e10cSrcweir 
509cdf0e10cSrcweir 	if (aStrBuffer.Len())
510cdf0e10cSrcweir 		aToken += aStrBuffer;
511cdf0e10cSrcweir }
512cdf0e10cSrcweir 
513cdf0e10cSrcweir 
514cdf0e10cSrcweir short SvRTFParser::_inSkipGroup=0;
515cdf0e10cSrcweir 
SkipGroup()516cdf0e10cSrcweir void SvRTFParser::SkipGroup()
517cdf0e10cSrcweir {
518cdf0e10cSrcweir short nBrackets=1;
519*facfa769Smseidel if (_inSkipGroup>0)
520cdf0e10cSrcweir 	return;
521cdf0e10cSrcweir _inSkipGroup++;
522*facfa769Smseidel #if 1 // #i16185# fecking \bin keyword
523*facfa769Smseidel 	do
524*facfa769Smseidel 	{
525*facfa769Smseidel 		switch (nNextCh)
526*facfa769Smseidel 		{
527*facfa769Smseidel 			case '{':
528*facfa769Smseidel 				++nBrackets;
529*facfa769Smseidel 				break;
530*facfa769Smseidel 			case '}':
531cdf0e10cSrcweir 				if (!--nBrackets) {
532cdf0e10cSrcweir 					_inSkipGroup--;
533*facfa769Smseidel 					return;
534cdf0e10cSrcweir 				}
535*facfa769Smseidel 				break;
536*facfa769Smseidel 		}
537*facfa769Smseidel 		int nToken = _GetNextToken();
538*facfa769Smseidel 		if (nToken == RTF_BIN)
539*facfa769Smseidel 		{
540*facfa769Smseidel 			rInput.SeekRel(-1);
541*facfa769Smseidel 			rInput.SeekRel(nTokenValue);
542*facfa769Smseidel 			nNextCh = GetNextChar();
543*facfa769Smseidel 		}
544cdf0e10cSrcweir 		while (nNextCh==0xa || nNextCh==0xd)
545cdf0e10cSrcweir 		{
546cdf0e10cSrcweir 			nNextCh = GetNextChar();
547cdf0e10cSrcweir 		}
548*facfa769Smseidel 	} while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
549cdf0e10cSrcweir #else
550cdf0e10cSrcweir 	sal_Unicode cPrev = 0;
551cdf0e10cSrcweir 	do {
552cdf0e10cSrcweir 		switch( nNextCh )
553cdf0e10cSrcweir 		{
554cdf0e10cSrcweir 		case '{':
555cdf0e10cSrcweir 			if( '\\' != cPrev )
556cdf0e10cSrcweir 				++nBrackets;
557cdf0e10cSrcweir 			break;
558cdf0e10cSrcweir 
559cdf0e10cSrcweir 		case '}':
560cdf0e10cSrcweir 			if( '\\' != cPrev && !--nBrackets )
561cdf0e10cSrcweir 				return;
562cdf0e10cSrcweir 			break;
563cdf0e10cSrcweir 
564cdf0e10cSrcweir 		case '\\':
565cdf0e10cSrcweir 			if( '\\' == cPrev )
566cdf0e10cSrcweir 				nNextCh = 0;
567cdf0e10cSrcweir 			break;
568cdf0e10cSrcweir 		}
569cdf0e10cSrcweir 		cPrev = nNextCh;
570cdf0e10cSrcweir 		nNextCh = GetNextChar();
571cdf0e10cSrcweir 	} while( sal_Unicode(EOF) != nNextCh && IsParserWorking() );
572cdf0e10cSrcweir #endif
573cdf0e10cSrcweir 
574cdf0e10cSrcweir 	if( SVPAR_PENDING != eState && '}' != nNextCh )
575cdf0e10cSrcweir 		eState = SVPAR_ERROR;
576cdf0e10cSrcweir 	_inSkipGroup--;
577cdf0e10cSrcweir }
578cdf0e10cSrcweir 
ReadUnknownData()579cdf0e10cSrcweir void SvRTFParser::ReadUnknownData()	{ SkipGroup(); }
ReadBitmapData()580cdf0e10cSrcweir void SvRTFParser::ReadBitmapData()	{ SkipGroup(); }
ReadOLEData()581cdf0e10cSrcweir void SvRTFParser::ReadOLEData()		{ SkipGroup(); }
582cdf0e10cSrcweir 
583cdf0e10cSrcweir 
CallParser()584cdf0e10cSrcweir SvParserState SvRTFParser::CallParser()
585cdf0e10cSrcweir {
586cdf0e10cSrcweir 	sal_Char cFirstCh;
587*facfa769Smseidel 	nNextChPos = rInput.Tell();
588cdf0e10cSrcweir 	rInput >> cFirstCh; nNextCh = cFirstCh;
589cdf0e10cSrcweir 	eState = SVPAR_WORKING;
590cdf0e10cSrcweir 	nOpenBrakets = 0;
591cdf0e10cSrcweir 	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
592cdf0e10cSrcweir 	eUNICodeSet = RTL_TEXTENCODING_MS_1252; 	// default ist ANSI-CodeSet
593cdf0e10cSrcweir 
594cdf0e10cSrcweir 	// die 1. beiden Token muessen '{' und \\rtf sein !!
595cdf0e10cSrcweir 	if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
596cdf0e10cSrcweir 	{
597cdf0e10cSrcweir 		AddRef();
598cdf0e10cSrcweir 		Continue( 0 );
599cdf0e10cSrcweir 		if( SVPAR_PENDING != eState )
600cdf0e10cSrcweir 			ReleaseRef();		// dann brauchen wir den Parser nicht mehr!
601cdf0e10cSrcweir 	}
602cdf0e10cSrcweir 	else
603cdf0e10cSrcweir 		eState = SVPAR_ERROR;
604cdf0e10cSrcweir 
605cdf0e10cSrcweir 	return eState;
606cdf0e10cSrcweir }
607cdf0e10cSrcweir 
Continue(int nToken)608cdf0e10cSrcweir void SvRTFParser::Continue( int nToken )
609cdf0e10cSrcweir {
610cdf0e10cSrcweir //	DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
611cdf0e10cSrcweir //				"Zeichensatz wurde geaendert." );
612cdf0e10cSrcweir 
613cdf0e10cSrcweir 	if( !nToken )
614cdf0e10cSrcweir 		nToken = GetNextToken();
615cdf0e10cSrcweir 
616cdf0e10cSrcweir 	while( IsParserWorking() )
617cdf0e10cSrcweir 	{
618cdf0e10cSrcweir 		SaveState( nToken );
619cdf0e10cSrcweir 		switch( nToken )
620cdf0e10cSrcweir 		{
621cdf0e10cSrcweir 		case '}':
622cdf0e10cSrcweir 			if( nOpenBrakets )
623cdf0e10cSrcweir 				goto NEXTTOKEN;
624cdf0e10cSrcweir 			eState = SVPAR_ACCEPTED;
625cdf0e10cSrcweir 			break;
626cdf0e10cSrcweir 
627cdf0e10cSrcweir 		case '{':
628cdf0e10cSrcweir 			// eine unbekannte Gruppe ?
629cdf0e10cSrcweir 			{
630cdf0e10cSrcweir 				if( RTF_IGNOREFLAG != GetNextToken() )
631cdf0e10cSrcweir 					nToken = SkipToken( -1 );
632cdf0e10cSrcweir 				else if( RTF_UNKNOWNCONTROL != GetNextToken() )
633cdf0e10cSrcweir 					nToken = SkipToken( -2 );
634cdf0e10cSrcweir 				else
635cdf0e10cSrcweir 				{
636cdf0e10cSrcweir 					// gleich herausfiltern
637cdf0e10cSrcweir 					ReadUnknownData();
638cdf0e10cSrcweir 					nToken = GetNextToken();
639cdf0e10cSrcweir 					if( '}' != nToken )
640cdf0e10cSrcweir 						eState = SVPAR_ERROR;
641cdf0e10cSrcweir 					break;		// auf zum naechsten Token!!
642cdf0e10cSrcweir 				}
643cdf0e10cSrcweir 			}
644cdf0e10cSrcweir 			goto NEXTTOKEN;
645cdf0e10cSrcweir 
646cdf0e10cSrcweir 		case RTF_UNKNOWNCONTROL:
647cdf0e10cSrcweir 			break;		// unbekannte Token ueberspringen
648cdf0e10cSrcweir 		case RTF_NEXTTYPE:
649*facfa769Smseidel 		case RTF_ANSITYPE:
650*facfa769Smseidel 			SetEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
651*facfa769Smseidel 			break;
652*facfa769Smseidel 		case RTF_MACTYPE:
653*facfa769Smseidel 			SetEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN );
654*facfa769Smseidel 			break;
655*facfa769Smseidel 		case RTF_PCTYPE:
656*facfa769Smseidel 			SetEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 );
657*facfa769Smseidel 			break;
658*facfa769Smseidel 		case RTF_PCATYPE:
659*facfa769Smseidel 			SetEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 );
660*facfa769Smseidel 			break;
661cdf0e10cSrcweir 		case RTF_ANSICPG:
662*facfa769Smseidel 			eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
663*facfa769Smseidel 			SetEncoding(eCodeSet);
664cdf0e10cSrcweir 			break;
665cdf0e10cSrcweir 		default:
666cdf0e10cSrcweir NEXTTOKEN:
667cdf0e10cSrcweir 			NextToken( nToken );
668cdf0e10cSrcweir 			break;
669cdf0e10cSrcweir 		}
670cdf0e10cSrcweir 		if( IsParserWorking() )
671cdf0e10cSrcweir 			SaveState( 0 );			// bis hierhin abgearbeitet,
672cdf0e10cSrcweir 									// weiter mit neuem Token!
673cdf0e10cSrcweir 		nToken = GetNextToken();
674cdf0e10cSrcweir 	}
675cdf0e10cSrcweir 	if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets )
676cdf0e10cSrcweir 		eState = SVPAR_ERROR;
677cdf0e10cSrcweir }
678cdf0e10cSrcweir 
SetEncoding(rtl_TextEncoding eEnc)679cdf0e10cSrcweir void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
680cdf0e10cSrcweir {
681cdf0e10cSrcweir 	if (eEnc == RTL_TEXTENCODING_DONTKNOW)
682cdf0e10cSrcweir 		eEnc = GetCodeSet();
683cdf0e10cSrcweir 
684cdf0e10cSrcweir 	if (!aParserStates.empty())
685cdf0e10cSrcweir 		aParserStates.top().eCodeSet = eEnc;
686cdf0e10cSrcweir 	SetSrcEncoding(eEnc);
687cdf0e10cSrcweir }
688cdf0e10cSrcweir 
689cdf0e10cSrcweir #ifdef USED
SaveState(int nToken)690cdf0e10cSrcweir void SvRTFParser::SaveState( int nToken )
691cdf0e10cSrcweir {
692cdf0e10cSrcweir 	SvParser::SaveState( nToken );
693cdf0e10cSrcweir }
694cdf0e10cSrcweir 
RestoreState()695cdf0e10cSrcweir void SvRTFParser::RestoreState()
696cdf0e10cSrcweir {
697cdf0e10cSrcweir 	SvParser::RestoreState();
698cdf0e10cSrcweir }
699cdf0e10cSrcweir #endif
700cdf0e10cSrcweir 
701*facfa769Smseidel /* vim: set noet sw=4 ts=4: */
702