xref: /aoo42x/main/svtools/source/svrtf/parrtf.cxx (revision 5900e8ec)
1*5900e8ecSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*5900e8ecSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*5900e8ecSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*5900e8ecSAndrew Rist  * distributed with this work for additional information
6*5900e8ecSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*5900e8ecSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*5900e8ecSAndrew Rist  * "License"); you may not use this file except in compliance
9*5900e8ecSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*5900e8ecSAndrew Rist  *
11*5900e8ecSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*5900e8ecSAndrew Rist  *
13*5900e8ecSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*5900e8ecSAndrew Rist  * software distributed under the License is distributed on an
15*5900e8ecSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*5900e8ecSAndrew Rist  * KIND, either express or implied.  See the License for the
17*5900e8ecSAndrew Rist  * specific language governing permissions and limitations
18*5900e8ecSAndrew Rist  * under the License.
19*5900e8ecSAndrew Rist  *
20*5900e8ecSAndrew Rist  *************************************************************/
21*5900e8ecSAndrew Rist 
22*5900e8ecSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_svtools.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */
28cdf0e10cSrcweir 
29cdf0e10cSrcweir #include <stdio.h>		                // for EOF
30cdf0e10cSrcweir #include <rtl/tencinfo.h>
31cdf0e10cSrcweir #include <tools/stream.hxx>
32cdf0e10cSrcweir #include <tools/debug.hxx>
33cdf0e10cSrcweir #include <svtools/rtftoken.h>
34cdf0e10cSrcweir #include <svtools/rtfkeywd.hxx>
35cdf0e10cSrcweir #include <svtools/parrtf.hxx>
36cdf0e10cSrcweir 
37cdf0e10cSrcweir const int MAX_STRING_LEN = 1024;
38cdf0e10cSrcweir const int MAX_TOKEN_LEN = 128;
39cdf0e10cSrcweir 
40cdf0e10cSrcweir #define RTF_ISDIGIT( c ) (c >= '0' && c <= '9')
41cdf0e10cSrcweir #define RTF_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') )
42cdf0e10cSrcweir 
43cdf0e10cSrcweir SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
44cdf0e10cSrcweir 	: SvParser( rIn, nStackSize ),
45cdf0e10cSrcweir 	eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), 	// default ist ANSI-CodeSet
46cdf0e10cSrcweir 	nUCharOverread( 1 )
47cdf0e10cSrcweir {
48cdf0e10cSrcweir 	// default ist ANSI-CodeSet
49cdf0e10cSrcweir 	SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
50cdf0e10cSrcweir 	bRTF_InTextRead = false;
51cdf0e10cSrcweir }
52cdf0e10cSrcweir 
53cdf0e10cSrcweir SvRTFParser::~SvRTFParser()
54cdf0e10cSrcweir {
55cdf0e10cSrcweir }
56cdf0e10cSrcweir 
57cdf0e10cSrcweir 
58cdf0e10cSrcweir 
59cdf0e10cSrcweir 
60cdf0e10cSrcweir int SvRTFParser::_GetNextToken()
61cdf0e10cSrcweir {
62cdf0e10cSrcweir 	int nRet = 0;
63cdf0e10cSrcweir 	do {
64cdf0e10cSrcweir 		int bNextCh = true;
65cdf0e10cSrcweir 		switch( nNextCh )
66cdf0e10cSrcweir 		{
67cdf0e10cSrcweir 		case '\\':
68cdf0e10cSrcweir 			{
69cdf0e10cSrcweir 				// Steuerzeichen
70cdf0e10cSrcweir 				switch( nNextCh = GetNextChar() )
71cdf0e10cSrcweir 				{
72cdf0e10cSrcweir 				case '{':
73cdf0e10cSrcweir 				case '}':
74cdf0e10cSrcweir 				case '\\':
75cdf0e10cSrcweir 				case '+':		// habe ich in einem RTF-File gefunden
76cdf0e10cSrcweir 				case '~':		// nonbreaking space
77cdf0e10cSrcweir 				case '-':		// optional hyphen
78cdf0e10cSrcweir 				case '_':		// nonbreaking hyphen
79cdf0e10cSrcweir 				case '\'':		// HexValue
80cdf0e10cSrcweir 					nNextCh = '\\';
81cdf0e10cSrcweir 					rInput.SeekRel( -1 );
82cdf0e10cSrcweir 					ScanText();
83cdf0e10cSrcweir 					nRet = RTF_TEXTTOKEN;
84cdf0e10cSrcweir 					bNextCh = 0 == nNextCh;
85cdf0e10cSrcweir 					break;
86cdf0e10cSrcweir 
87cdf0e10cSrcweir 				case '*':		// ignoreflag
88cdf0e10cSrcweir 					nRet = RTF_IGNOREFLAG;
89cdf0e10cSrcweir 					break;
90cdf0e10cSrcweir 				case ':':	 	// subentry in an index entry
91cdf0e10cSrcweir 					nRet = RTF_SUBENTRYINDEX;
92cdf0e10cSrcweir 					break;
93cdf0e10cSrcweir 				case '|':		// formula-charakter
94cdf0e10cSrcweir 					nRet = RTF_FORMULA;
95cdf0e10cSrcweir 					break;
96cdf0e10cSrcweir 
97cdf0e10cSrcweir 				case 0x0a:
98cdf0e10cSrcweir 				case 0x0d:
99cdf0e10cSrcweir 					nRet = RTF_PAR;
100cdf0e10cSrcweir 					break;
101cdf0e10cSrcweir 
102cdf0e10cSrcweir 				default:
103cdf0e10cSrcweir 					if( RTF_ISALPHA( nNextCh ) )
104cdf0e10cSrcweir 					{
105cdf0e10cSrcweir 						aToken = '\\';
106cdf0e10cSrcweir 						{
107cdf0e10cSrcweir 							String aStrBuffer;
108cdf0e10cSrcweir 							sal_Unicode* pStr = aStrBuffer.AllocBuffer(
109cdf0e10cSrcweir 															MAX_TOKEN_LEN );
110cdf0e10cSrcweir 							xub_StrLen nStrLen = 0;
111cdf0e10cSrcweir 							do {
112cdf0e10cSrcweir 								*(pStr + nStrLen++) = nNextCh;
113cdf0e10cSrcweir 								if( MAX_TOKEN_LEN == nStrLen )
114cdf0e10cSrcweir 								{
115cdf0e10cSrcweir 									aToken += aStrBuffer;
116cdf0e10cSrcweir 									aToken.GetBufferAccess();  // make unique string!
117cdf0e10cSrcweir 									nStrLen = 0;
118cdf0e10cSrcweir 								}
119cdf0e10cSrcweir 								nNextCh = GetNextChar();
120cdf0e10cSrcweir 							} while( RTF_ISALPHA( nNextCh ) );
121cdf0e10cSrcweir 							if( nStrLen )
122cdf0e10cSrcweir 							{
123cdf0e10cSrcweir 								aStrBuffer.ReleaseBufferAccess( nStrLen );
124cdf0e10cSrcweir 								aToken += aStrBuffer;
125cdf0e10cSrcweir 							}
126cdf0e10cSrcweir 						}
127cdf0e10cSrcweir 
128cdf0e10cSrcweir 						// Minus fuer numerischen Parameter
129cdf0e10cSrcweir 						int bNegValue = false;
130cdf0e10cSrcweir 						if( '-' == nNextCh )
131cdf0e10cSrcweir 						{
132cdf0e10cSrcweir 							bNegValue = true;
133cdf0e10cSrcweir 							nNextCh = GetNextChar();
134cdf0e10cSrcweir 						}
135cdf0e10cSrcweir 
136cdf0e10cSrcweir 						// evt. Numerischer Parameter
137cdf0e10cSrcweir 						if( RTF_ISDIGIT( nNextCh ) )
138cdf0e10cSrcweir 						{
139cdf0e10cSrcweir 							nTokenValue = 0;
140cdf0e10cSrcweir 							do {
141cdf0e10cSrcweir 								nTokenValue *= 10;
142cdf0e10cSrcweir 								nTokenValue += nNextCh - '0';
143cdf0e10cSrcweir 								nNextCh = GetNextChar();
144cdf0e10cSrcweir 							} while( RTF_ISDIGIT( nNextCh ) );
145cdf0e10cSrcweir 							if( bNegValue )
146cdf0e10cSrcweir 								nTokenValue = -nTokenValue;
147cdf0e10cSrcweir 							bTokenHasValue=true;
148cdf0e10cSrcweir 						}
149cdf0e10cSrcweir 						else if( bNegValue )		// das Minus wieder zurueck
150cdf0e10cSrcweir 						{
151cdf0e10cSrcweir 							nNextCh = '-';
152cdf0e10cSrcweir 							rInput.SeekRel( -1 );
153cdf0e10cSrcweir 						}
154cdf0e10cSrcweir 						if( ' ' == nNextCh )		// Blank gehoert zum Token!
155cdf0e10cSrcweir 							nNextCh = GetNextChar();
156cdf0e10cSrcweir 
157cdf0e10cSrcweir 						// suche das Token in der Tabelle:
158cdf0e10cSrcweir 						if( 0 == (nRet = GetRTFToken( aToken )) )
159cdf0e10cSrcweir 							// Unknown Control
160cdf0e10cSrcweir 							nRet = RTF_UNKNOWNCONTROL;
161cdf0e10cSrcweir 
162cdf0e10cSrcweir 						// bug 76812 - unicode token handled as normal text
163cdf0e10cSrcweir 						bNextCh = false;
164cdf0e10cSrcweir 						switch( nRet )
165cdf0e10cSrcweir 						{
166cdf0e10cSrcweir 						case RTF_UC:
167cdf0e10cSrcweir 							if( 0 <= nTokenValue )
168cdf0e10cSrcweir 							{
169cdf0e10cSrcweir 								nUCharOverread = (sal_uInt8)nTokenValue;
170cdf0e10cSrcweir #if 1
171cdf0e10cSrcweir                                 //cmc: other ifdef breaks #i3584
172cdf0e10cSrcweir 								aParserStates.top().
173cdf0e10cSrcweir 									nUCharOverread = nUCharOverread;
174cdf0e10cSrcweir #else
175cdf0e10cSrcweir 								if( !nUCharOverread )
176cdf0e10cSrcweir 									nUCharOverread = aParserStates.top().nUCharOverread;
177cdf0e10cSrcweir 								else
178cdf0e10cSrcweir 									aParserStates.top().
179cdf0e10cSrcweir 										nUCharOverread = nUCharOverread;
180cdf0e10cSrcweir #endif
181cdf0e10cSrcweir 							}
182cdf0e10cSrcweir 							aToken.Erase(); // #i47831# erase token to prevent the token from beeing treated as text
183cdf0e10cSrcweir 							// read next token
184cdf0e10cSrcweir 							nRet = 0;
185cdf0e10cSrcweir 							break;
186cdf0e10cSrcweir 
187cdf0e10cSrcweir 						case RTF_UPR:
188cdf0e10cSrcweir 							if (!_inSkipGroup) {
189cdf0e10cSrcweir 							// UPR - overread the group with the ansi
190cdf0e10cSrcweir 							//       informations
191cdf0e10cSrcweir 							while( '{' != _GetNextToken() )
192cdf0e10cSrcweir 								;
193cdf0e10cSrcweir 							SkipGroup();
194cdf0e10cSrcweir 							_GetNextToken();  // overread the last bracket
195cdf0e10cSrcweir 							nRet = 0;
196cdf0e10cSrcweir 							}
197cdf0e10cSrcweir 							break;
198cdf0e10cSrcweir 
199cdf0e10cSrcweir 						case RTF_U:
200cdf0e10cSrcweir 							if( !bRTF_InTextRead )
201cdf0e10cSrcweir 							{
202cdf0e10cSrcweir 								nRet = RTF_TEXTTOKEN;
203cdf0e10cSrcweir 								aToken = (sal_Unicode)nTokenValue;
204cdf0e10cSrcweir 
205cdf0e10cSrcweir 								// overread the next n "RTF" characters. This
206cdf0e10cSrcweir 								// can be also \{, \}, \'88
207cdf0e10cSrcweir 								for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
208cdf0e10cSrcweir 								{
209cdf0e10cSrcweir 									sal_Unicode cAnsi = nNextCh;
210cdf0e10cSrcweir 									while( 0xD == cAnsi )
211cdf0e10cSrcweir 										cAnsi = GetNextChar();
212cdf0e10cSrcweir 									while( 0xA == cAnsi )
213cdf0e10cSrcweir 										cAnsi = GetNextChar();
214cdf0e10cSrcweir 
215cdf0e10cSrcweir 									if( '\\' == cAnsi &&
216cdf0e10cSrcweir 										'\'' == ( cAnsi = GetNextChar() ))
217cdf0e10cSrcweir 										// HexValue ueberlesen
218cdf0e10cSrcweir 										cAnsi = GetHexValue();
219cdf0e10cSrcweir 									nNextCh = GetNextChar();
220cdf0e10cSrcweir 								}
221cdf0e10cSrcweir 								ScanText();
222cdf0e10cSrcweir 								bNextCh = 0 == nNextCh;
223cdf0e10cSrcweir 							}
224cdf0e10cSrcweir 							break;
225cdf0e10cSrcweir 						}
226cdf0e10cSrcweir 					}
227cdf0e10cSrcweir 					else if( SVPAR_PENDING != eState )
228cdf0e10cSrcweir 					{
229cdf0e10cSrcweir 						// Bug 34631 - "\ " ueberlesen - Blank als Zeichen
230cdf0e10cSrcweir 						// eState = SVPAR_ERROR;
231cdf0e10cSrcweir 						bNextCh = false;
232cdf0e10cSrcweir 					}
233cdf0e10cSrcweir 					break;
234cdf0e10cSrcweir 				}
235cdf0e10cSrcweir 			}
236cdf0e10cSrcweir 			break;
237cdf0e10cSrcweir 
238cdf0e10cSrcweir 		case sal_Unicode(EOF):
239cdf0e10cSrcweir 			eState = SVPAR_ACCEPTED;
240cdf0e10cSrcweir 			nRet = nNextCh;
241cdf0e10cSrcweir 			break;
242cdf0e10cSrcweir 
243cdf0e10cSrcweir 		case '{':
244cdf0e10cSrcweir 			{
245cdf0e10cSrcweir 				if( 0 <= nOpenBrakets )
246cdf0e10cSrcweir 				{
247cdf0e10cSrcweir 					RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
248cdf0e10cSrcweir                     aParserStates.push( aState );
249cdf0e10cSrcweir 				}
250cdf0e10cSrcweir 				++nOpenBrakets;
251cdf0e10cSrcweir                 DBG_ASSERT(
252cdf0e10cSrcweir                     static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
253cdf0e10cSrcweir                     "ParserStateStack unequal to bracket count" );
254cdf0e10cSrcweir 				nRet = nNextCh;
255cdf0e10cSrcweir 			}
256cdf0e10cSrcweir 			break;
257cdf0e10cSrcweir 
258cdf0e10cSrcweir 		case '}':
259cdf0e10cSrcweir 			--nOpenBrakets;
260cdf0e10cSrcweir 			if( 0 <= nOpenBrakets )
261cdf0e10cSrcweir 			{
262cdf0e10cSrcweir                 aParserStates.pop();
263cdf0e10cSrcweir 				if( !aParserStates.empty() )
264cdf0e10cSrcweir 				{
265cdf0e10cSrcweir 					const RtfParserState_Impl& rRPS =
266cdf0e10cSrcweir 							aParserStates.top();
267cdf0e10cSrcweir 					nUCharOverread = rRPS.nUCharOverread;
268cdf0e10cSrcweir 					SetSrcEncoding( rRPS.eCodeSet );
269cdf0e10cSrcweir 				}
270cdf0e10cSrcweir 				else
271cdf0e10cSrcweir 				{
272cdf0e10cSrcweir 					nUCharOverread = 1;
273cdf0e10cSrcweir 					SetSrcEncoding( GetCodeSet() );
274cdf0e10cSrcweir 				}
275cdf0e10cSrcweir 			}
276cdf0e10cSrcweir             DBG_ASSERT(
277cdf0e10cSrcweir                 static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
278cdf0e10cSrcweir                 "ParserStateStack unequal to bracket count" );
279cdf0e10cSrcweir 			nRet = nNextCh;
280cdf0e10cSrcweir 			break;
281cdf0e10cSrcweir 
282cdf0e10cSrcweir 		case 0x0d:
283cdf0e10cSrcweir 		case 0x0a:
284cdf0e10cSrcweir 			break;
285cdf0e10cSrcweir 
286cdf0e10cSrcweir 		default:
287cdf0e10cSrcweir 			// es folgt normaler Text
288cdf0e10cSrcweir 			ScanText();
289cdf0e10cSrcweir 			nRet = RTF_TEXTTOKEN;
290cdf0e10cSrcweir 			bNextCh = 0 == nNextCh;
291cdf0e10cSrcweir 			break;
292cdf0e10cSrcweir 		}
293cdf0e10cSrcweir 
294cdf0e10cSrcweir 		if( bNextCh )
295cdf0e10cSrcweir 			nNextCh = GetNextChar();
296cdf0e10cSrcweir 
297cdf0e10cSrcweir 	} while( !nRet && SVPAR_WORKING == eState );
298cdf0e10cSrcweir 	return nRet;
299cdf0e10cSrcweir }
300cdf0e10cSrcweir 
301cdf0e10cSrcweir 
302cdf0e10cSrcweir sal_Unicode SvRTFParser::GetHexValue()
303cdf0e10cSrcweir {
304cdf0e10cSrcweir 	// Hex-Wert sammeln
305cdf0e10cSrcweir 	register int n;
306cdf0e10cSrcweir 	register sal_Unicode nHexVal = 0;
307cdf0e10cSrcweir 
308cdf0e10cSrcweir 	for( n = 0; n < 2; ++n )
309cdf0e10cSrcweir 	{
310cdf0e10cSrcweir 		nHexVal *= 16;
311cdf0e10cSrcweir 		nNextCh = GetNextChar();
312cdf0e10cSrcweir 		if( nNextCh >= '0' && nNextCh <= '9' )
313cdf0e10cSrcweir 			nHexVal += (nNextCh - 48);
314cdf0e10cSrcweir 		else if( nNextCh >= 'a' && nNextCh <= 'f' )
315cdf0e10cSrcweir 			nHexVal += (nNextCh - 87);
316cdf0e10cSrcweir 		else if( nNextCh >= 'A' && nNextCh <= 'F' )
317cdf0e10cSrcweir 			nHexVal += (nNextCh - 55);
318cdf0e10cSrcweir 	}
319cdf0e10cSrcweir 	return nHexVal;
320cdf0e10cSrcweir }
321cdf0e10cSrcweir 
322cdf0e10cSrcweir void SvRTFParser::ScanText( const sal_Unicode cBreak )
323cdf0e10cSrcweir {
324cdf0e10cSrcweir 	String aStrBuffer;
325cdf0e10cSrcweir 	int bWeiter = true;
326cdf0e10cSrcweir 	while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN)
327cdf0e10cSrcweir 	{
328cdf0e10cSrcweir 		int bNextCh = true;
329cdf0e10cSrcweir 		switch( nNextCh )
330cdf0e10cSrcweir 		{
331cdf0e10cSrcweir 		case '\\':
332cdf0e10cSrcweir 			{
333cdf0e10cSrcweir 				switch (nNextCh = GetNextChar())
334cdf0e10cSrcweir 				{
335cdf0e10cSrcweir 				case '\'':
336cdf0e10cSrcweir 					{
337cdf0e10cSrcweir 
338cdf0e10cSrcweir #if 0
339cdf0e10cSrcweir                         // #i35653 patch from cmc
340cdf0e10cSrcweir                         ByteString aByteString(static_cast<char>(GetHexValue()));
341cdf0e10cSrcweir                         if (aByteString.Len())
342cdf0e10cSrcweir                             aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
343cdf0e10cSrcweir #else
344cdf0e10cSrcweir                         ByteString aByteString;
345cdf0e10cSrcweir                         while (1)
346cdf0e10cSrcweir                         {
347cdf0e10cSrcweir                             aByteString.Append((char)GetHexValue());
348cdf0e10cSrcweir 
349cdf0e10cSrcweir                             bool bBreak = false;
350cdf0e10cSrcweir                             sal_Char nSlash = '\\';
351cdf0e10cSrcweir                             while (!bBreak)
352cdf0e10cSrcweir                             {
353cdf0e10cSrcweir 								wchar_t __next=GetNextChar();
354cdf0e10cSrcweir 								if (__next>0xFF) // fix for #i43933# and #i35653#
355cdf0e10cSrcweir 								{
356cdf0e10cSrcweir 									if (aByteString.Len())
357cdf0e10cSrcweir 										aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
358cdf0e10cSrcweir 									aStrBuffer.Append((sal_Unicode)__next);
359cdf0e10cSrcweir 
360cdf0e10cSrcweir 									aByteString.Erase();
361cdf0e10cSrcweir 									continue;
362cdf0e10cSrcweir 								}
363cdf0e10cSrcweir                                 nSlash = (sal_Char)__next;
364cdf0e10cSrcweir                                 while (nSlash == 0xD || nSlash == 0xA)
365cdf0e10cSrcweir                                     nSlash = (sal_Char)GetNextChar();
366cdf0e10cSrcweir 
367cdf0e10cSrcweir                                 switch (nSlash)
368cdf0e10cSrcweir                                 {
369cdf0e10cSrcweir                                     case '{':
370cdf0e10cSrcweir                                     case '}':
371cdf0e10cSrcweir                                     case '\\':
372cdf0e10cSrcweir                                         bBreak = true;
373cdf0e10cSrcweir                                         break;
374cdf0e10cSrcweir                                     default:
375cdf0e10cSrcweir                                         aByteString.Append(nSlash);
376cdf0e10cSrcweir                                         break;
377cdf0e10cSrcweir                                 }
378cdf0e10cSrcweir                             }
379cdf0e10cSrcweir 
380cdf0e10cSrcweir                             nNextCh = GetNextChar();
381cdf0e10cSrcweir 
382cdf0e10cSrcweir                             if (nSlash != '\\' || nNextCh != '\'')
383cdf0e10cSrcweir                             {
384cdf0e10cSrcweir                                 rInput.SeekRel(-1);
385cdf0e10cSrcweir                                 nNextCh = nSlash;
386cdf0e10cSrcweir                                 break;
387cdf0e10cSrcweir                             }
388cdf0e10cSrcweir                         }
389cdf0e10cSrcweir 
390cdf0e10cSrcweir                         bNextCh = false;
391cdf0e10cSrcweir 
392cdf0e10cSrcweir                         if (aByteString.Len())
393cdf0e10cSrcweir                             aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
394cdf0e10cSrcweir #endif
395cdf0e10cSrcweir                     }
396cdf0e10cSrcweir 					break;
397cdf0e10cSrcweir 				case '\\':
398cdf0e10cSrcweir 				case '}':
399cdf0e10cSrcweir 				case '{':
400cdf0e10cSrcweir 				case '+':		// habe ich in einem RTF-File gefunden
401cdf0e10cSrcweir 					aStrBuffer.Append(nNextCh);
402cdf0e10cSrcweir 					break;
403cdf0e10cSrcweir 				case '~':		// nonbreaking space
404cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0xA0));
405cdf0e10cSrcweir 					break;
406cdf0e10cSrcweir 				case '-':		// optional hyphen
407cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0xAD));
408cdf0e10cSrcweir 					break;
409cdf0e10cSrcweir 				case '_':		// nonbreaking hyphen
410cdf0e10cSrcweir 					aStrBuffer.Append(static_cast< sal_Unicode >(0x2011));
411cdf0e10cSrcweir 					break;
412cdf0e10cSrcweir 
413cdf0e10cSrcweir 				case 'u':
414cdf0e10cSrcweir 					// UNI-Code Zeichen lesen
415cdf0e10cSrcweir 					{
416cdf0e10cSrcweir 						nNextCh = GetNextChar();
417cdf0e10cSrcweir 						rInput.SeekRel( -2 );
418cdf0e10cSrcweir 
419cdf0e10cSrcweir 						if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) )
420cdf0e10cSrcweir 						{
421cdf0e10cSrcweir 							bRTF_InTextRead = true;
422cdf0e10cSrcweir 
423cdf0e10cSrcweir 							String sSave( aToken );
424cdf0e10cSrcweir 							nNextCh = '\\';
425cdf0e10cSrcweir                             #ifdef DBG_UTIL
426cdf0e10cSrcweir 							int nToken =
427cdf0e10cSrcweir                             #endif
428cdf0e10cSrcweir                                 _GetNextToken();
429cdf0e10cSrcweir 							DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" );
430cdf0e10cSrcweir 							// dont convert symbol chars
431cdf0e10cSrcweir 							aStrBuffer.Append(
432cdf0e10cSrcweir                                 static_cast< sal_Unicode >(nTokenValue));
433cdf0e10cSrcweir 
434cdf0e10cSrcweir 							// overread the next n "RTF" characters. This
435cdf0e10cSrcweir 							// can be also \{, \}, \'88
436cdf0e10cSrcweir 							for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
437cdf0e10cSrcweir 							{
438cdf0e10cSrcweir 								sal_Unicode cAnsi = nNextCh;
439cdf0e10cSrcweir 								while( 0xD == cAnsi )
440cdf0e10cSrcweir 									cAnsi = GetNextChar();
441cdf0e10cSrcweir 								while( 0xA == cAnsi )
442cdf0e10cSrcweir 									cAnsi = GetNextChar();
443cdf0e10cSrcweir 
444cdf0e10cSrcweir 								if( '\\' == cAnsi &&
445cdf0e10cSrcweir 									'\'' == ( cAnsi = GetNextChar() ))
446cdf0e10cSrcweir 									// HexValue ueberlesen
447cdf0e10cSrcweir 									cAnsi = GetHexValue();
448cdf0e10cSrcweir 								nNextCh = GetNextChar();
449cdf0e10cSrcweir 							}
450cdf0e10cSrcweir 							bNextCh = false;
451cdf0e10cSrcweir 							aToken = sSave;
452cdf0e10cSrcweir 							bRTF_InTextRead = false;
453cdf0e10cSrcweir 						}
454cdf0e10cSrcweir 						else
455cdf0e10cSrcweir 						{
456cdf0e10cSrcweir 							nNextCh = '\\';
457cdf0e10cSrcweir 							bWeiter = false;		// Abbrechen, String zusammen
458cdf0e10cSrcweir 						}
459cdf0e10cSrcweir 					}
460cdf0e10cSrcweir 					break;
461cdf0e10cSrcweir 
462cdf0e10cSrcweir 				default:
463cdf0e10cSrcweir 					rInput.SeekRel( -1 );
464cdf0e10cSrcweir 					nNextCh = '\\';
465cdf0e10cSrcweir 					bWeiter = false;		// Abbrechen, String zusammen
466cdf0e10cSrcweir 					break;
467cdf0e10cSrcweir 				}
468cdf0e10cSrcweir 			}
469cdf0e10cSrcweir 			break;
470cdf0e10cSrcweir 
471cdf0e10cSrcweir 		case sal_Unicode(EOF):
472cdf0e10cSrcweir 				eState = SVPAR_ERROR;
473cdf0e10cSrcweir 				// weiter
474cdf0e10cSrcweir 		case '{':
475cdf0e10cSrcweir 		case '}':
476cdf0e10cSrcweir 			bWeiter = false;
477cdf0e10cSrcweir 			break;
478cdf0e10cSrcweir 
479cdf0e10cSrcweir 		case 0x0a:
480cdf0e10cSrcweir 		case 0x0d:
481cdf0e10cSrcweir 			break;
482cdf0e10cSrcweir 
483cdf0e10cSrcweir 		default:
484cdf0e10cSrcweir 			if( nNextCh == cBreak || aStrBuffer.Len() >= MAX_STRING_LEN)
485cdf0e10cSrcweir 				bWeiter = false;
486cdf0e10cSrcweir 			else
487cdf0e10cSrcweir 			{
488cdf0e10cSrcweir 				do {
489cdf0e10cSrcweir 					// alle anderen Zeichen kommen in den Text
490cdf0e10cSrcweir 					aStrBuffer.Append(nNextCh);
491cdf0e10cSrcweir 
492cdf0e10cSrcweir 					if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
493cdf0e10cSrcweir 					{
494cdf0e10cSrcweir                         if (aStrBuffer.Len())
495cdf0e10cSrcweir 		                    aToken += aStrBuffer;
496cdf0e10cSrcweir 						return;
497cdf0e10cSrcweir 					}
498cdf0e10cSrcweir 				} while
499cdf0e10cSrcweir                 (
500cdf0e10cSrcweir                     (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) &&
501cdf0e10cSrcweir                     (aStrBuffer.Len() < MAX_STRING_LEN)
502cdf0e10cSrcweir                 );
503cdf0e10cSrcweir 				bNextCh = false;
504cdf0e10cSrcweir 			}
505cdf0e10cSrcweir 		}
506cdf0e10cSrcweir 
507cdf0e10cSrcweir 		if( bWeiter && bNextCh )
508cdf0e10cSrcweir 			nNextCh = GetNextChar();
509cdf0e10cSrcweir 	}
510cdf0e10cSrcweir 
511cdf0e10cSrcweir 	if (aStrBuffer.Len())
512cdf0e10cSrcweir 		aToken += aStrBuffer;
513cdf0e10cSrcweir }
514cdf0e10cSrcweir 
515cdf0e10cSrcweir 
516cdf0e10cSrcweir short SvRTFParser::_inSkipGroup=0;
517cdf0e10cSrcweir 
518cdf0e10cSrcweir void SvRTFParser::SkipGroup()
519cdf0e10cSrcweir {
520cdf0e10cSrcweir short nBrackets=1;
521cdf0e10cSrcweir if (_inSkipGroup>0)
522cdf0e10cSrcweir 	return;
523cdf0e10cSrcweir _inSkipGroup++;
524cdf0e10cSrcweir #if 1	//#i16185# fecking \bin keyword
525cdf0e10cSrcweir     do
526cdf0e10cSrcweir     {
527cdf0e10cSrcweir         switch (nNextCh)
528cdf0e10cSrcweir         {
529cdf0e10cSrcweir             case '{':
530cdf0e10cSrcweir                 ++nBrackets;
531cdf0e10cSrcweir                 break;
532cdf0e10cSrcweir             case '}':
533cdf0e10cSrcweir 				if (!--nBrackets) {
534cdf0e10cSrcweir 					_inSkipGroup--;
535cdf0e10cSrcweir                     return;
536cdf0e10cSrcweir 				}
537cdf0e10cSrcweir                 break;
538cdf0e10cSrcweir         }
539cdf0e10cSrcweir         int nToken = _GetNextToken();
540cdf0e10cSrcweir         if (nToken == RTF_BIN)
541cdf0e10cSrcweir         {
542cdf0e10cSrcweir             rInput.SeekRel(-1);
543cdf0e10cSrcweir             rInput.SeekRel(nTokenValue);
544cdf0e10cSrcweir 		    nNextCh = GetNextChar();
545cdf0e10cSrcweir         }
546cdf0e10cSrcweir 		while (nNextCh==0xa || nNextCh==0xd)
547cdf0e10cSrcweir 		{
548cdf0e10cSrcweir 			nNextCh = GetNextChar();
549cdf0e10cSrcweir 		}
550cdf0e10cSrcweir     } while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
551cdf0e10cSrcweir #else
552cdf0e10cSrcweir 	sal_Unicode cPrev = 0;
553cdf0e10cSrcweir 	do {
554cdf0e10cSrcweir 		switch( nNextCh )
555cdf0e10cSrcweir 		{
556cdf0e10cSrcweir 		case '{':
557cdf0e10cSrcweir 			if( '\\' != cPrev )
558cdf0e10cSrcweir 				++nBrackets;
559cdf0e10cSrcweir 			break;
560cdf0e10cSrcweir 
561cdf0e10cSrcweir 		case '}':
562cdf0e10cSrcweir 			if( '\\' != cPrev && !--nBrackets )
563cdf0e10cSrcweir 				return;
564cdf0e10cSrcweir 			break;
565cdf0e10cSrcweir 
566cdf0e10cSrcweir 		case '\\':
567cdf0e10cSrcweir 			if( '\\' == cPrev )
568cdf0e10cSrcweir 				nNextCh = 0;
569cdf0e10cSrcweir 			break;
570cdf0e10cSrcweir 		}
571cdf0e10cSrcweir 		cPrev = nNextCh;
572cdf0e10cSrcweir 		nNextCh = GetNextChar();
573cdf0e10cSrcweir 	} while( sal_Unicode(EOF) != nNextCh && IsParserWorking() );
574cdf0e10cSrcweir #endif
575cdf0e10cSrcweir 
576cdf0e10cSrcweir 	if( SVPAR_PENDING != eState && '}' != nNextCh )
577cdf0e10cSrcweir 		eState = SVPAR_ERROR;
578cdf0e10cSrcweir 	_inSkipGroup--;
579cdf0e10cSrcweir }
580cdf0e10cSrcweir 
581cdf0e10cSrcweir void SvRTFParser::ReadUnknownData()	{ SkipGroup(); }
582cdf0e10cSrcweir void SvRTFParser::ReadBitmapData()	{ SkipGroup(); }
583cdf0e10cSrcweir void SvRTFParser::ReadOLEData()		{ SkipGroup(); }
584cdf0e10cSrcweir 
585cdf0e10cSrcweir 
586cdf0e10cSrcweir SvParserState SvRTFParser::CallParser()
587cdf0e10cSrcweir {
588cdf0e10cSrcweir 	sal_Char cFirstCh;
589cdf0e10cSrcweir     nNextChPos = rInput.Tell();
590cdf0e10cSrcweir 	rInput >> cFirstCh; nNextCh = cFirstCh;
591cdf0e10cSrcweir 	eState = SVPAR_WORKING;
592cdf0e10cSrcweir 	nOpenBrakets = 0;
593cdf0e10cSrcweir 	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
594cdf0e10cSrcweir 	eUNICodeSet = RTL_TEXTENCODING_MS_1252; 	// default ist ANSI-CodeSet
595cdf0e10cSrcweir 
596cdf0e10cSrcweir 	// die 1. beiden Token muessen '{' und \\rtf sein !!
597cdf0e10cSrcweir 	if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
598cdf0e10cSrcweir 	{
599cdf0e10cSrcweir 		AddRef();
600cdf0e10cSrcweir 		Continue( 0 );
601cdf0e10cSrcweir 		if( SVPAR_PENDING != eState )
602cdf0e10cSrcweir 			ReleaseRef();		// dann brauchen wir den Parser nicht mehr!
603cdf0e10cSrcweir 	}
604cdf0e10cSrcweir 	else
605cdf0e10cSrcweir 		eState = SVPAR_ERROR;
606cdf0e10cSrcweir 
607cdf0e10cSrcweir 	return eState;
608cdf0e10cSrcweir }
609cdf0e10cSrcweir 
610cdf0e10cSrcweir void SvRTFParser::Continue( int nToken )
611cdf0e10cSrcweir {
612cdf0e10cSrcweir //	DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
613cdf0e10cSrcweir //				"Zeichensatz wurde geaendert." );
614cdf0e10cSrcweir 
615cdf0e10cSrcweir 	if( !nToken )
616cdf0e10cSrcweir 		nToken = GetNextToken();
617cdf0e10cSrcweir 
618cdf0e10cSrcweir 	while( IsParserWorking() )
619cdf0e10cSrcweir 	{
620cdf0e10cSrcweir 		SaveState( nToken );
621cdf0e10cSrcweir 		switch( nToken )
622cdf0e10cSrcweir 		{
623cdf0e10cSrcweir 		case '}':
624cdf0e10cSrcweir 			if( nOpenBrakets )
625cdf0e10cSrcweir 				goto NEXTTOKEN;
626cdf0e10cSrcweir 			eState = SVPAR_ACCEPTED;
627cdf0e10cSrcweir 			break;
628cdf0e10cSrcweir 
629cdf0e10cSrcweir 		case '{':
630cdf0e10cSrcweir 			// eine unbekannte Gruppe ?
631cdf0e10cSrcweir 			{
632cdf0e10cSrcweir 				if( RTF_IGNOREFLAG != GetNextToken() )
633cdf0e10cSrcweir 					nToken = SkipToken( -1 );
634cdf0e10cSrcweir 				else if( RTF_UNKNOWNCONTROL != GetNextToken() )
635cdf0e10cSrcweir 					nToken = SkipToken( -2 );
636cdf0e10cSrcweir 				else
637cdf0e10cSrcweir 				{
638cdf0e10cSrcweir 					// gleich herausfiltern
639cdf0e10cSrcweir 					ReadUnknownData();
640cdf0e10cSrcweir 					nToken = GetNextToken();
641cdf0e10cSrcweir 					if( '}' != nToken )
642cdf0e10cSrcweir 						eState = SVPAR_ERROR;
643cdf0e10cSrcweir 					break;		// auf zum naechsten Token!!
644cdf0e10cSrcweir 				}
645cdf0e10cSrcweir 			}
646cdf0e10cSrcweir 			goto NEXTTOKEN;
647cdf0e10cSrcweir 
648cdf0e10cSrcweir 		case RTF_UNKNOWNCONTROL:
649cdf0e10cSrcweir 			break;		// unbekannte Token ueberspringen
650cdf0e10cSrcweir 		case RTF_NEXTTYPE:
651cdf0e10cSrcweir 		case RTF_ANSITYPE:
652cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
653cdf0e10cSrcweir             break;
654cdf0e10cSrcweir 		case RTF_MACTYPE:
655cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN );
656cdf0e10cSrcweir             break;
657cdf0e10cSrcweir 		case RTF_PCTYPE:
658cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 );
659cdf0e10cSrcweir             break;
660cdf0e10cSrcweir 		case RTF_PCATYPE:
661cdf0e10cSrcweir             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 );
662cdf0e10cSrcweir             break;
663cdf0e10cSrcweir 		case RTF_ANSICPG:
664cdf0e10cSrcweir             eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
665cdf0e10cSrcweir             SetSrcEncoding(eCodeSet);
666cdf0e10cSrcweir 			break;
667cdf0e10cSrcweir 		default:
668cdf0e10cSrcweir NEXTTOKEN:
669cdf0e10cSrcweir 			NextToken( nToken );
670cdf0e10cSrcweir 			break;
671cdf0e10cSrcweir 		}
672cdf0e10cSrcweir 		if( IsParserWorking() )
673cdf0e10cSrcweir 			SaveState( 0 );			// bis hierhin abgearbeitet,
674cdf0e10cSrcweir 									// weiter mit neuem Token!
675cdf0e10cSrcweir 		nToken = GetNextToken();
676cdf0e10cSrcweir 	}
677cdf0e10cSrcweir 	if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets )
678cdf0e10cSrcweir 		eState = SVPAR_ERROR;
679cdf0e10cSrcweir }
680cdf0e10cSrcweir 
681cdf0e10cSrcweir void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
682cdf0e10cSrcweir {
683cdf0e10cSrcweir 	if (eEnc == RTL_TEXTENCODING_DONTKNOW)
684cdf0e10cSrcweir 		eEnc = GetCodeSet();
685cdf0e10cSrcweir 
686cdf0e10cSrcweir 	if (!aParserStates.empty())
687cdf0e10cSrcweir 		aParserStates.top().eCodeSet = eEnc;
688cdf0e10cSrcweir 	SetSrcEncoding(eEnc);
689cdf0e10cSrcweir }
690cdf0e10cSrcweir 
691cdf0e10cSrcweir #ifdef USED
692cdf0e10cSrcweir void SvRTFParser::SaveState( int nToken )
693cdf0e10cSrcweir {
694cdf0e10cSrcweir 	SvParser::SaveState( nToken );
695cdf0e10cSrcweir }
696cdf0e10cSrcweir 
697cdf0e10cSrcweir void SvRTFParser::RestoreState()
698cdf0e10cSrcweir {
699cdf0e10cSrcweir 	SvParser::RestoreState();
700cdf0e10cSrcweir }
701cdf0e10cSrcweir #endif
702cdf0e10cSrcweir 
703cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */
704