1*5900e8ecSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*5900e8ecSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*5900e8ecSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*5900e8ecSAndrew Rist * distributed with this work for additional information 6*5900e8ecSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*5900e8ecSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*5900e8ecSAndrew Rist * "License"); you may not use this file except in compliance 9*5900e8ecSAndrew Rist * with the License. You may obtain a copy of the License at 10*5900e8ecSAndrew Rist * 11*5900e8ecSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*5900e8ecSAndrew Rist * 13*5900e8ecSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*5900e8ecSAndrew Rist * software distributed under the License is distributed on an 15*5900e8ecSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*5900e8ecSAndrew Rist * KIND, either express or implied. See the License for the 17*5900e8ecSAndrew Rist * specific language governing permissions and limitations 18*5900e8ecSAndrew Rist * under the License. 19*5900e8ecSAndrew Rist * 20*5900e8ecSAndrew Rist *************************************************************/ 21*5900e8ecSAndrew Rist 22*5900e8ecSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_svtools.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */ 28cdf0e10cSrcweir 29cdf0e10cSrcweir #include <stdio.h> // for EOF 30cdf0e10cSrcweir #include <rtl/tencinfo.h> 31cdf0e10cSrcweir #include <tools/stream.hxx> 32cdf0e10cSrcweir #include <tools/debug.hxx> 33cdf0e10cSrcweir #include <svtools/rtftoken.h> 34cdf0e10cSrcweir #include <svtools/rtfkeywd.hxx> 35cdf0e10cSrcweir #include <svtools/parrtf.hxx> 36cdf0e10cSrcweir 37cdf0e10cSrcweir const int MAX_STRING_LEN = 1024; 38cdf0e10cSrcweir const int MAX_TOKEN_LEN = 128; 39cdf0e10cSrcweir 40cdf0e10cSrcweir #define RTF_ISDIGIT( c ) (c >= '0' && c <= '9') 41cdf0e10cSrcweir #define RTF_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ) 42cdf0e10cSrcweir 43cdf0e10cSrcweir SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize ) 44cdf0e10cSrcweir : SvParser( rIn, nStackSize ), 45cdf0e10cSrcweir eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), // default ist ANSI-CodeSet 46cdf0e10cSrcweir nUCharOverread( 1 ) 47cdf0e10cSrcweir { 48cdf0e10cSrcweir // default ist ANSI-CodeSet 49cdf0e10cSrcweir SetSrcEncoding( RTL_TEXTENCODING_MS_1252 ); 50cdf0e10cSrcweir bRTF_InTextRead = false; 51cdf0e10cSrcweir } 52cdf0e10cSrcweir 53cdf0e10cSrcweir SvRTFParser::~SvRTFParser() 54cdf0e10cSrcweir { 55cdf0e10cSrcweir } 56cdf0e10cSrcweir 57cdf0e10cSrcweir 58cdf0e10cSrcweir 59cdf0e10cSrcweir 60cdf0e10cSrcweir int SvRTFParser::_GetNextToken() 61cdf0e10cSrcweir { 62cdf0e10cSrcweir int nRet = 0; 63cdf0e10cSrcweir do { 64cdf0e10cSrcweir int bNextCh = true; 65cdf0e10cSrcweir switch( nNextCh ) 66cdf0e10cSrcweir { 67cdf0e10cSrcweir case '\\': 68cdf0e10cSrcweir { 69cdf0e10cSrcweir // Steuerzeichen 70cdf0e10cSrcweir switch( nNextCh = GetNextChar() ) 71cdf0e10cSrcweir { 72cdf0e10cSrcweir case '{': 73cdf0e10cSrcweir case '}': 74cdf0e10cSrcweir case '\\': 75cdf0e10cSrcweir case '+': // habe ich in einem RTF-File gefunden 76cdf0e10cSrcweir case '~': // nonbreaking space 77cdf0e10cSrcweir case '-': // optional hyphen 78cdf0e10cSrcweir case '_': // nonbreaking hyphen 79cdf0e10cSrcweir case '\'': // HexValue 80cdf0e10cSrcweir nNextCh = '\\'; 81cdf0e10cSrcweir rInput.SeekRel( -1 ); 82cdf0e10cSrcweir ScanText(); 83cdf0e10cSrcweir nRet = RTF_TEXTTOKEN; 84cdf0e10cSrcweir bNextCh = 0 == nNextCh; 85cdf0e10cSrcweir break; 86cdf0e10cSrcweir 87cdf0e10cSrcweir case '*': // ignoreflag 88cdf0e10cSrcweir nRet = RTF_IGNOREFLAG; 89cdf0e10cSrcweir break; 90cdf0e10cSrcweir case ':': // subentry in an index entry 91cdf0e10cSrcweir nRet = RTF_SUBENTRYINDEX; 92cdf0e10cSrcweir break; 93cdf0e10cSrcweir case '|': // formula-charakter 94cdf0e10cSrcweir nRet = RTF_FORMULA; 95cdf0e10cSrcweir break; 96cdf0e10cSrcweir 97cdf0e10cSrcweir case 0x0a: 98cdf0e10cSrcweir case 0x0d: 99cdf0e10cSrcweir nRet = RTF_PAR; 100cdf0e10cSrcweir break; 101cdf0e10cSrcweir 102cdf0e10cSrcweir default: 103cdf0e10cSrcweir if( RTF_ISALPHA( nNextCh ) ) 104cdf0e10cSrcweir { 105cdf0e10cSrcweir aToken = '\\'; 106cdf0e10cSrcweir { 107cdf0e10cSrcweir String aStrBuffer; 108cdf0e10cSrcweir sal_Unicode* pStr = aStrBuffer.AllocBuffer( 109cdf0e10cSrcweir MAX_TOKEN_LEN ); 110cdf0e10cSrcweir xub_StrLen nStrLen = 0; 111cdf0e10cSrcweir do { 112cdf0e10cSrcweir *(pStr + nStrLen++) = nNextCh; 113cdf0e10cSrcweir if( MAX_TOKEN_LEN == nStrLen ) 114cdf0e10cSrcweir { 115cdf0e10cSrcweir aToken += aStrBuffer; 116cdf0e10cSrcweir aToken.GetBufferAccess(); // make unique string! 117cdf0e10cSrcweir nStrLen = 0; 118cdf0e10cSrcweir } 119cdf0e10cSrcweir nNextCh = GetNextChar(); 120cdf0e10cSrcweir } while( RTF_ISALPHA( nNextCh ) ); 121cdf0e10cSrcweir if( nStrLen ) 122cdf0e10cSrcweir { 123cdf0e10cSrcweir aStrBuffer.ReleaseBufferAccess( nStrLen ); 124cdf0e10cSrcweir aToken += aStrBuffer; 125cdf0e10cSrcweir } 126cdf0e10cSrcweir } 127cdf0e10cSrcweir 128cdf0e10cSrcweir // Minus fuer numerischen Parameter 129cdf0e10cSrcweir int bNegValue = false; 130cdf0e10cSrcweir if( '-' == nNextCh ) 131cdf0e10cSrcweir { 132cdf0e10cSrcweir bNegValue = true; 133cdf0e10cSrcweir nNextCh = GetNextChar(); 134cdf0e10cSrcweir } 135cdf0e10cSrcweir 136cdf0e10cSrcweir // evt. Numerischer Parameter 137cdf0e10cSrcweir if( RTF_ISDIGIT( nNextCh ) ) 138cdf0e10cSrcweir { 139cdf0e10cSrcweir nTokenValue = 0; 140cdf0e10cSrcweir do { 141cdf0e10cSrcweir nTokenValue *= 10; 142cdf0e10cSrcweir nTokenValue += nNextCh - '0'; 143cdf0e10cSrcweir nNextCh = GetNextChar(); 144cdf0e10cSrcweir } while( RTF_ISDIGIT( nNextCh ) ); 145cdf0e10cSrcweir if( bNegValue ) 146cdf0e10cSrcweir nTokenValue = -nTokenValue; 147cdf0e10cSrcweir bTokenHasValue=true; 148cdf0e10cSrcweir } 149cdf0e10cSrcweir else if( bNegValue ) // das Minus wieder zurueck 150cdf0e10cSrcweir { 151cdf0e10cSrcweir nNextCh = '-'; 152cdf0e10cSrcweir rInput.SeekRel( -1 ); 153cdf0e10cSrcweir } 154cdf0e10cSrcweir if( ' ' == nNextCh ) // Blank gehoert zum Token! 155cdf0e10cSrcweir nNextCh = GetNextChar(); 156cdf0e10cSrcweir 157cdf0e10cSrcweir // suche das Token in der Tabelle: 158cdf0e10cSrcweir if( 0 == (nRet = GetRTFToken( aToken )) ) 159cdf0e10cSrcweir // Unknown Control 160cdf0e10cSrcweir nRet = RTF_UNKNOWNCONTROL; 161cdf0e10cSrcweir 162cdf0e10cSrcweir // bug 76812 - unicode token handled as normal text 163cdf0e10cSrcweir bNextCh = false; 164cdf0e10cSrcweir switch( nRet ) 165cdf0e10cSrcweir { 166cdf0e10cSrcweir case RTF_UC: 167cdf0e10cSrcweir if( 0 <= nTokenValue ) 168cdf0e10cSrcweir { 169cdf0e10cSrcweir nUCharOverread = (sal_uInt8)nTokenValue; 170cdf0e10cSrcweir #if 1 171cdf0e10cSrcweir //cmc: other ifdef breaks #i3584 172cdf0e10cSrcweir aParserStates.top(). 173cdf0e10cSrcweir nUCharOverread = nUCharOverread; 174cdf0e10cSrcweir #else 175cdf0e10cSrcweir if( !nUCharOverread ) 176cdf0e10cSrcweir nUCharOverread = aParserStates.top().nUCharOverread; 177cdf0e10cSrcweir else 178cdf0e10cSrcweir aParserStates.top(). 179cdf0e10cSrcweir nUCharOverread = nUCharOverread; 180cdf0e10cSrcweir #endif 181cdf0e10cSrcweir } 182cdf0e10cSrcweir aToken.Erase(); // #i47831# erase token to prevent the token from beeing treated as text 183cdf0e10cSrcweir // read next token 184cdf0e10cSrcweir nRet = 0; 185cdf0e10cSrcweir break; 186cdf0e10cSrcweir 187cdf0e10cSrcweir case RTF_UPR: 188cdf0e10cSrcweir if (!_inSkipGroup) { 189cdf0e10cSrcweir // UPR - overread the group with the ansi 190cdf0e10cSrcweir // informations 191cdf0e10cSrcweir while( '{' != _GetNextToken() ) 192cdf0e10cSrcweir ; 193cdf0e10cSrcweir SkipGroup(); 194cdf0e10cSrcweir _GetNextToken(); // overread the last bracket 195cdf0e10cSrcweir nRet = 0; 196cdf0e10cSrcweir } 197cdf0e10cSrcweir break; 198cdf0e10cSrcweir 199cdf0e10cSrcweir case RTF_U: 200cdf0e10cSrcweir if( !bRTF_InTextRead ) 201cdf0e10cSrcweir { 202cdf0e10cSrcweir nRet = RTF_TEXTTOKEN; 203cdf0e10cSrcweir aToken = (sal_Unicode)nTokenValue; 204cdf0e10cSrcweir 205cdf0e10cSrcweir // overread the next n "RTF" characters. This 206cdf0e10cSrcweir // can be also \{, \}, \'88 207cdf0e10cSrcweir for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) 208cdf0e10cSrcweir { 209cdf0e10cSrcweir sal_Unicode cAnsi = nNextCh; 210cdf0e10cSrcweir while( 0xD == cAnsi ) 211cdf0e10cSrcweir cAnsi = GetNextChar(); 212cdf0e10cSrcweir while( 0xA == cAnsi ) 213cdf0e10cSrcweir cAnsi = GetNextChar(); 214cdf0e10cSrcweir 215cdf0e10cSrcweir if( '\\' == cAnsi && 216cdf0e10cSrcweir '\'' == ( cAnsi = GetNextChar() )) 217cdf0e10cSrcweir // HexValue ueberlesen 218cdf0e10cSrcweir cAnsi = GetHexValue(); 219cdf0e10cSrcweir nNextCh = GetNextChar(); 220cdf0e10cSrcweir } 221cdf0e10cSrcweir ScanText(); 222cdf0e10cSrcweir bNextCh = 0 == nNextCh; 223cdf0e10cSrcweir } 224cdf0e10cSrcweir break; 225cdf0e10cSrcweir } 226cdf0e10cSrcweir } 227cdf0e10cSrcweir else if( SVPAR_PENDING != eState ) 228cdf0e10cSrcweir { 229cdf0e10cSrcweir // Bug 34631 - "\ " ueberlesen - Blank als Zeichen 230cdf0e10cSrcweir // eState = SVPAR_ERROR; 231cdf0e10cSrcweir bNextCh = false; 232cdf0e10cSrcweir } 233cdf0e10cSrcweir break; 234cdf0e10cSrcweir } 235cdf0e10cSrcweir } 236cdf0e10cSrcweir break; 237cdf0e10cSrcweir 238cdf0e10cSrcweir case sal_Unicode(EOF): 239cdf0e10cSrcweir eState = SVPAR_ACCEPTED; 240cdf0e10cSrcweir nRet = nNextCh; 241cdf0e10cSrcweir break; 242cdf0e10cSrcweir 243cdf0e10cSrcweir case '{': 244cdf0e10cSrcweir { 245cdf0e10cSrcweir if( 0 <= nOpenBrakets ) 246cdf0e10cSrcweir { 247cdf0e10cSrcweir RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() ); 248cdf0e10cSrcweir aParserStates.push( aState ); 249cdf0e10cSrcweir } 250cdf0e10cSrcweir ++nOpenBrakets; 251cdf0e10cSrcweir DBG_ASSERT( 252cdf0e10cSrcweir static_cast<size_t>(nOpenBrakets) == aParserStates.size(), 253cdf0e10cSrcweir "ParserStateStack unequal to bracket count" ); 254cdf0e10cSrcweir nRet = nNextCh; 255cdf0e10cSrcweir } 256cdf0e10cSrcweir break; 257cdf0e10cSrcweir 258cdf0e10cSrcweir case '}': 259cdf0e10cSrcweir --nOpenBrakets; 260cdf0e10cSrcweir if( 0 <= nOpenBrakets ) 261cdf0e10cSrcweir { 262cdf0e10cSrcweir aParserStates.pop(); 263cdf0e10cSrcweir if( !aParserStates.empty() ) 264cdf0e10cSrcweir { 265cdf0e10cSrcweir const RtfParserState_Impl& rRPS = 266cdf0e10cSrcweir aParserStates.top(); 267cdf0e10cSrcweir nUCharOverread = rRPS.nUCharOverread; 268cdf0e10cSrcweir SetSrcEncoding( rRPS.eCodeSet ); 269cdf0e10cSrcweir } 270cdf0e10cSrcweir else 271cdf0e10cSrcweir { 272cdf0e10cSrcweir nUCharOverread = 1; 273cdf0e10cSrcweir SetSrcEncoding( GetCodeSet() ); 274cdf0e10cSrcweir } 275cdf0e10cSrcweir } 276cdf0e10cSrcweir DBG_ASSERT( 277cdf0e10cSrcweir static_cast<size_t>(nOpenBrakets) == aParserStates.size(), 278cdf0e10cSrcweir "ParserStateStack unequal to bracket count" ); 279cdf0e10cSrcweir nRet = nNextCh; 280cdf0e10cSrcweir break; 281cdf0e10cSrcweir 282cdf0e10cSrcweir case 0x0d: 283cdf0e10cSrcweir case 0x0a: 284cdf0e10cSrcweir break; 285cdf0e10cSrcweir 286cdf0e10cSrcweir default: 287cdf0e10cSrcweir // es folgt normaler Text 288cdf0e10cSrcweir ScanText(); 289cdf0e10cSrcweir nRet = RTF_TEXTTOKEN; 290cdf0e10cSrcweir bNextCh = 0 == nNextCh; 291cdf0e10cSrcweir break; 292cdf0e10cSrcweir } 293cdf0e10cSrcweir 294cdf0e10cSrcweir if( bNextCh ) 295cdf0e10cSrcweir nNextCh = GetNextChar(); 296cdf0e10cSrcweir 297cdf0e10cSrcweir } while( !nRet && SVPAR_WORKING == eState ); 298cdf0e10cSrcweir return nRet; 299cdf0e10cSrcweir } 300cdf0e10cSrcweir 301cdf0e10cSrcweir 302cdf0e10cSrcweir sal_Unicode SvRTFParser::GetHexValue() 303cdf0e10cSrcweir { 304cdf0e10cSrcweir // Hex-Wert sammeln 305cdf0e10cSrcweir register int n; 306cdf0e10cSrcweir register sal_Unicode nHexVal = 0; 307cdf0e10cSrcweir 308cdf0e10cSrcweir for( n = 0; n < 2; ++n ) 309cdf0e10cSrcweir { 310cdf0e10cSrcweir nHexVal *= 16; 311cdf0e10cSrcweir nNextCh = GetNextChar(); 312cdf0e10cSrcweir if( nNextCh >= '0' && nNextCh <= '9' ) 313cdf0e10cSrcweir nHexVal += (nNextCh - 48); 314cdf0e10cSrcweir else if( nNextCh >= 'a' && nNextCh <= 'f' ) 315cdf0e10cSrcweir nHexVal += (nNextCh - 87); 316cdf0e10cSrcweir else if( nNextCh >= 'A' && nNextCh <= 'F' ) 317cdf0e10cSrcweir nHexVal += (nNextCh - 55); 318cdf0e10cSrcweir } 319cdf0e10cSrcweir return nHexVal; 320cdf0e10cSrcweir } 321cdf0e10cSrcweir 322cdf0e10cSrcweir void SvRTFParser::ScanText( const sal_Unicode cBreak ) 323cdf0e10cSrcweir { 324cdf0e10cSrcweir String aStrBuffer; 325cdf0e10cSrcweir int bWeiter = true; 326cdf0e10cSrcweir while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN) 327cdf0e10cSrcweir { 328cdf0e10cSrcweir int bNextCh = true; 329cdf0e10cSrcweir switch( nNextCh ) 330cdf0e10cSrcweir { 331cdf0e10cSrcweir case '\\': 332cdf0e10cSrcweir { 333cdf0e10cSrcweir switch (nNextCh = GetNextChar()) 334cdf0e10cSrcweir { 335cdf0e10cSrcweir case '\'': 336cdf0e10cSrcweir { 337cdf0e10cSrcweir 338cdf0e10cSrcweir #if 0 339cdf0e10cSrcweir // #i35653 patch from cmc 340cdf0e10cSrcweir ByteString aByteString(static_cast<char>(GetHexValue())); 341cdf0e10cSrcweir if (aByteString.Len()) 342cdf0e10cSrcweir aStrBuffer.Append(String(aByteString, GetSrcEncoding())); 343cdf0e10cSrcweir #else 344cdf0e10cSrcweir ByteString aByteString; 345cdf0e10cSrcweir while (1) 346cdf0e10cSrcweir { 347cdf0e10cSrcweir aByteString.Append((char)GetHexValue()); 348cdf0e10cSrcweir 349cdf0e10cSrcweir bool bBreak = false; 350cdf0e10cSrcweir sal_Char nSlash = '\\'; 351cdf0e10cSrcweir while (!bBreak) 352cdf0e10cSrcweir { 353cdf0e10cSrcweir wchar_t __next=GetNextChar(); 354cdf0e10cSrcweir if (__next>0xFF) // fix for #i43933# and #i35653# 355cdf0e10cSrcweir { 356cdf0e10cSrcweir if (aByteString.Len()) 357cdf0e10cSrcweir aStrBuffer.Append(String(aByteString, GetSrcEncoding())); 358cdf0e10cSrcweir aStrBuffer.Append((sal_Unicode)__next); 359cdf0e10cSrcweir 360cdf0e10cSrcweir aByteString.Erase(); 361cdf0e10cSrcweir continue; 362cdf0e10cSrcweir } 363cdf0e10cSrcweir nSlash = (sal_Char)__next; 364cdf0e10cSrcweir while (nSlash == 0xD || nSlash == 0xA) 365cdf0e10cSrcweir nSlash = (sal_Char)GetNextChar(); 366cdf0e10cSrcweir 367cdf0e10cSrcweir switch (nSlash) 368cdf0e10cSrcweir { 369cdf0e10cSrcweir case '{': 370cdf0e10cSrcweir case '}': 371cdf0e10cSrcweir case '\\': 372cdf0e10cSrcweir bBreak = true; 373cdf0e10cSrcweir break; 374cdf0e10cSrcweir default: 375cdf0e10cSrcweir aByteString.Append(nSlash); 376cdf0e10cSrcweir break; 377cdf0e10cSrcweir } 378cdf0e10cSrcweir } 379cdf0e10cSrcweir 380cdf0e10cSrcweir nNextCh = GetNextChar(); 381cdf0e10cSrcweir 382cdf0e10cSrcweir if (nSlash != '\\' || nNextCh != '\'') 383cdf0e10cSrcweir { 384cdf0e10cSrcweir rInput.SeekRel(-1); 385cdf0e10cSrcweir nNextCh = nSlash; 386cdf0e10cSrcweir break; 387cdf0e10cSrcweir } 388cdf0e10cSrcweir } 389cdf0e10cSrcweir 390cdf0e10cSrcweir bNextCh = false; 391cdf0e10cSrcweir 392cdf0e10cSrcweir if (aByteString.Len()) 393cdf0e10cSrcweir aStrBuffer.Append(String(aByteString, GetSrcEncoding())); 394cdf0e10cSrcweir #endif 395cdf0e10cSrcweir } 396cdf0e10cSrcweir break; 397cdf0e10cSrcweir case '\\': 398cdf0e10cSrcweir case '}': 399cdf0e10cSrcweir case '{': 400cdf0e10cSrcweir case '+': // habe ich in einem RTF-File gefunden 401cdf0e10cSrcweir aStrBuffer.Append(nNextCh); 402cdf0e10cSrcweir break; 403cdf0e10cSrcweir case '~': // nonbreaking space 404cdf0e10cSrcweir aStrBuffer.Append(static_cast< sal_Unicode >(0xA0)); 405cdf0e10cSrcweir break; 406cdf0e10cSrcweir case '-': // optional hyphen 407cdf0e10cSrcweir aStrBuffer.Append(static_cast< sal_Unicode >(0xAD)); 408cdf0e10cSrcweir break; 409cdf0e10cSrcweir case '_': // nonbreaking hyphen 410cdf0e10cSrcweir aStrBuffer.Append(static_cast< sal_Unicode >(0x2011)); 411cdf0e10cSrcweir break; 412cdf0e10cSrcweir 413cdf0e10cSrcweir case 'u': 414cdf0e10cSrcweir // UNI-Code Zeichen lesen 415cdf0e10cSrcweir { 416cdf0e10cSrcweir nNextCh = GetNextChar(); 417cdf0e10cSrcweir rInput.SeekRel( -2 ); 418cdf0e10cSrcweir 419cdf0e10cSrcweir if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) ) 420cdf0e10cSrcweir { 421cdf0e10cSrcweir bRTF_InTextRead = true; 422cdf0e10cSrcweir 423cdf0e10cSrcweir String sSave( aToken ); 424cdf0e10cSrcweir nNextCh = '\\'; 425cdf0e10cSrcweir #ifdef DBG_UTIL 426cdf0e10cSrcweir int nToken = 427cdf0e10cSrcweir #endif 428cdf0e10cSrcweir _GetNextToken(); 429cdf0e10cSrcweir DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" ); 430cdf0e10cSrcweir // dont convert symbol chars 431cdf0e10cSrcweir aStrBuffer.Append( 432cdf0e10cSrcweir static_cast< sal_Unicode >(nTokenValue)); 433cdf0e10cSrcweir 434cdf0e10cSrcweir // overread the next n "RTF" characters. This 435cdf0e10cSrcweir // can be also \{, \}, \'88 436cdf0e10cSrcweir for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) 437cdf0e10cSrcweir { 438cdf0e10cSrcweir sal_Unicode cAnsi = nNextCh; 439cdf0e10cSrcweir while( 0xD == cAnsi ) 440cdf0e10cSrcweir cAnsi = GetNextChar(); 441cdf0e10cSrcweir while( 0xA == cAnsi ) 442cdf0e10cSrcweir cAnsi = GetNextChar(); 443cdf0e10cSrcweir 444cdf0e10cSrcweir if( '\\' == cAnsi && 445cdf0e10cSrcweir '\'' == ( cAnsi = GetNextChar() )) 446cdf0e10cSrcweir // HexValue ueberlesen 447cdf0e10cSrcweir cAnsi = GetHexValue(); 448cdf0e10cSrcweir nNextCh = GetNextChar(); 449cdf0e10cSrcweir } 450cdf0e10cSrcweir bNextCh = false; 451cdf0e10cSrcweir aToken = sSave; 452cdf0e10cSrcweir bRTF_InTextRead = false; 453cdf0e10cSrcweir } 454cdf0e10cSrcweir else 455cdf0e10cSrcweir { 456cdf0e10cSrcweir nNextCh = '\\'; 457cdf0e10cSrcweir bWeiter = false; // Abbrechen, String zusammen 458cdf0e10cSrcweir } 459cdf0e10cSrcweir } 460cdf0e10cSrcweir break; 461cdf0e10cSrcweir 462cdf0e10cSrcweir default: 463cdf0e10cSrcweir rInput.SeekRel( -1 ); 464cdf0e10cSrcweir nNextCh = '\\'; 465cdf0e10cSrcweir bWeiter = false; // Abbrechen, String zusammen 466cdf0e10cSrcweir break; 467cdf0e10cSrcweir } 468cdf0e10cSrcweir } 469cdf0e10cSrcweir break; 470cdf0e10cSrcweir 471cdf0e10cSrcweir case sal_Unicode(EOF): 472cdf0e10cSrcweir eState = SVPAR_ERROR; 473cdf0e10cSrcweir // weiter 474cdf0e10cSrcweir case '{': 475cdf0e10cSrcweir case '}': 476cdf0e10cSrcweir bWeiter = false; 477cdf0e10cSrcweir break; 478cdf0e10cSrcweir 479cdf0e10cSrcweir case 0x0a: 480cdf0e10cSrcweir case 0x0d: 481cdf0e10cSrcweir break; 482cdf0e10cSrcweir 483cdf0e10cSrcweir default: 484cdf0e10cSrcweir if( nNextCh == cBreak || aStrBuffer.Len() >= MAX_STRING_LEN) 485cdf0e10cSrcweir bWeiter = false; 486cdf0e10cSrcweir else 487cdf0e10cSrcweir { 488cdf0e10cSrcweir do { 489cdf0e10cSrcweir // alle anderen Zeichen kommen in den Text 490cdf0e10cSrcweir aStrBuffer.Append(nNextCh); 491cdf0e10cSrcweir 492cdf0e10cSrcweir if (sal_Unicode(EOF) == (nNextCh = GetNextChar())) 493cdf0e10cSrcweir { 494cdf0e10cSrcweir if (aStrBuffer.Len()) 495cdf0e10cSrcweir aToken += aStrBuffer; 496cdf0e10cSrcweir return; 497cdf0e10cSrcweir } 498cdf0e10cSrcweir } while 499cdf0e10cSrcweir ( 500cdf0e10cSrcweir (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) && 501cdf0e10cSrcweir (aStrBuffer.Len() < MAX_STRING_LEN) 502cdf0e10cSrcweir ); 503cdf0e10cSrcweir bNextCh = false; 504cdf0e10cSrcweir } 505cdf0e10cSrcweir } 506cdf0e10cSrcweir 507cdf0e10cSrcweir if( bWeiter && bNextCh ) 508cdf0e10cSrcweir nNextCh = GetNextChar(); 509cdf0e10cSrcweir } 510cdf0e10cSrcweir 511cdf0e10cSrcweir if (aStrBuffer.Len()) 512cdf0e10cSrcweir aToken += aStrBuffer; 513cdf0e10cSrcweir } 514cdf0e10cSrcweir 515cdf0e10cSrcweir 516cdf0e10cSrcweir short SvRTFParser::_inSkipGroup=0; 517cdf0e10cSrcweir 518cdf0e10cSrcweir void SvRTFParser::SkipGroup() 519cdf0e10cSrcweir { 520cdf0e10cSrcweir short nBrackets=1; 521cdf0e10cSrcweir if (_inSkipGroup>0) 522cdf0e10cSrcweir return; 523cdf0e10cSrcweir _inSkipGroup++; 524cdf0e10cSrcweir #if 1 //#i16185# fecking \bin keyword 525cdf0e10cSrcweir do 526cdf0e10cSrcweir { 527cdf0e10cSrcweir switch (nNextCh) 528cdf0e10cSrcweir { 529cdf0e10cSrcweir case '{': 530cdf0e10cSrcweir ++nBrackets; 531cdf0e10cSrcweir break; 532cdf0e10cSrcweir case '}': 533cdf0e10cSrcweir if (!--nBrackets) { 534cdf0e10cSrcweir _inSkipGroup--; 535cdf0e10cSrcweir return; 536cdf0e10cSrcweir } 537cdf0e10cSrcweir break; 538cdf0e10cSrcweir } 539cdf0e10cSrcweir int nToken = _GetNextToken(); 540cdf0e10cSrcweir if (nToken == RTF_BIN) 541cdf0e10cSrcweir { 542cdf0e10cSrcweir rInput.SeekRel(-1); 543cdf0e10cSrcweir rInput.SeekRel(nTokenValue); 544cdf0e10cSrcweir nNextCh = GetNextChar(); 545cdf0e10cSrcweir } 546cdf0e10cSrcweir while (nNextCh==0xa || nNextCh==0xd) 547cdf0e10cSrcweir { 548cdf0e10cSrcweir nNextCh = GetNextChar(); 549cdf0e10cSrcweir } 550cdf0e10cSrcweir } while (sal_Unicode(EOF) != nNextCh && IsParserWorking()); 551cdf0e10cSrcweir #else 552cdf0e10cSrcweir sal_Unicode cPrev = 0; 553cdf0e10cSrcweir do { 554cdf0e10cSrcweir switch( nNextCh ) 555cdf0e10cSrcweir { 556cdf0e10cSrcweir case '{': 557cdf0e10cSrcweir if( '\\' != cPrev ) 558cdf0e10cSrcweir ++nBrackets; 559cdf0e10cSrcweir break; 560cdf0e10cSrcweir 561cdf0e10cSrcweir case '}': 562cdf0e10cSrcweir if( '\\' != cPrev && !--nBrackets ) 563cdf0e10cSrcweir return; 564cdf0e10cSrcweir break; 565cdf0e10cSrcweir 566cdf0e10cSrcweir case '\\': 567cdf0e10cSrcweir if( '\\' == cPrev ) 568cdf0e10cSrcweir nNextCh = 0; 569cdf0e10cSrcweir break; 570cdf0e10cSrcweir } 571cdf0e10cSrcweir cPrev = nNextCh; 572cdf0e10cSrcweir nNextCh = GetNextChar(); 573cdf0e10cSrcweir } while( sal_Unicode(EOF) != nNextCh && IsParserWorking() ); 574cdf0e10cSrcweir #endif 575cdf0e10cSrcweir 576cdf0e10cSrcweir if( SVPAR_PENDING != eState && '}' != nNextCh ) 577cdf0e10cSrcweir eState = SVPAR_ERROR; 578cdf0e10cSrcweir _inSkipGroup--; 579cdf0e10cSrcweir } 580cdf0e10cSrcweir 581cdf0e10cSrcweir void SvRTFParser::ReadUnknownData() { SkipGroup(); } 582cdf0e10cSrcweir void SvRTFParser::ReadBitmapData() { SkipGroup(); } 583cdf0e10cSrcweir void SvRTFParser::ReadOLEData() { SkipGroup(); } 584cdf0e10cSrcweir 585cdf0e10cSrcweir 586cdf0e10cSrcweir SvParserState SvRTFParser::CallParser() 587cdf0e10cSrcweir { 588cdf0e10cSrcweir sal_Char cFirstCh; 589cdf0e10cSrcweir nNextChPos = rInput.Tell(); 590cdf0e10cSrcweir rInput >> cFirstCh; nNextCh = cFirstCh; 591cdf0e10cSrcweir eState = SVPAR_WORKING; 592cdf0e10cSrcweir nOpenBrakets = 0; 593cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 ); 594cdf0e10cSrcweir eUNICodeSet = RTL_TEXTENCODING_MS_1252; // default ist ANSI-CodeSet 595cdf0e10cSrcweir 596cdf0e10cSrcweir // die 1. beiden Token muessen '{' und \\rtf sein !! 597cdf0e10cSrcweir if( '{' == GetNextToken() && RTF_RTF == GetNextToken() ) 598cdf0e10cSrcweir { 599cdf0e10cSrcweir AddRef(); 600cdf0e10cSrcweir Continue( 0 ); 601cdf0e10cSrcweir if( SVPAR_PENDING != eState ) 602cdf0e10cSrcweir ReleaseRef(); // dann brauchen wir den Parser nicht mehr! 603cdf0e10cSrcweir } 604cdf0e10cSrcweir else 605cdf0e10cSrcweir eState = SVPAR_ERROR; 606cdf0e10cSrcweir 607cdf0e10cSrcweir return eState; 608cdf0e10cSrcweir } 609cdf0e10cSrcweir 610cdf0e10cSrcweir void SvRTFParser::Continue( int nToken ) 611cdf0e10cSrcweir { 612cdf0e10cSrcweir // DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(), 613cdf0e10cSrcweir // "Zeichensatz wurde geaendert." ); 614cdf0e10cSrcweir 615cdf0e10cSrcweir if( !nToken ) 616cdf0e10cSrcweir nToken = GetNextToken(); 617cdf0e10cSrcweir 618cdf0e10cSrcweir while( IsParserWorking() ) 619cdf0e10cSrcweir { 620cdf0e10cSrcweir SaveState( nToken ); 621cdf0e10cSrcweir switch( nToken ) 622cdf0e10cSrcweir { 623cdf0e10cSrcweir case '}': 624cdf0e10cSrcweir if( nOpenBrakets ) 625cdf0e10cSrcweir goto NEXTTOKEN; 626cdf0e10cSrcweir eState = SVPAR_ACCEPTED; 627cdf0e10cSrcweir break; 628cdf0e10cSrcweir 629cdf0e10cSrcweir case '{': 630cdf0e10cSrcweir // eine unbekannte Gruppe ? 631cdf0e10cSrcweir { 632cdf0e10cSrcweir if( RTF_IGNOREFLAG != GetNextToken() ) 633cdf0e10cSrcweir nToken = SkipToken( -1 ); 634cdf0e10cSrcweir else if( RTF_UNKNOWNCONTROL != GetNextToken() ) 635cdf0e10cSrcweir nToken = SkipToken( -2 ); 636cdf0e10cSrcweir else 637cdf0e10cSrcweir { 638cdf0e10cSrcweir // gleich herausfiltern 639cdf0e10cSrcweir ReadUnknownData(); 640cdf0e10cSrcweir nToken = GetNextToken(); 641cdf0e10cSrcweir if( '}' != nToken ) 642cdf0e10cSrcweir eState = SVPAR_ERROR; 643cdf0e10cSrcweir break; // auf zum naechsten Token!! 644cdf0e10cSrcweir } 645cdf0e10cSrcweir } 646cdf0e10cSrcweir goto NEXTTOKEN; 647cdf0e10cSrcweir 648cdf0e10cSrcweir case RTF_UNKNOWNCONTROL: 649cdf0e10cSrcweir break; // unbekannte Token ueberspringen 650cdf0e10cSrcweir case RTF_NEXTTYPE: 651cdf0e10cSrcweir case RTF_ANSITYPE: 652cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 ); 653cdf0e10cSrcweir break; 654cdf0e10cSrcweir case RTF_MACTYPE: 655cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN ); 656cdf0e10cSrcweir break; 657cdf0e10cSrcweir case RTF_PCTYPE: 658cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 ); 659cdf0e10cSrcweir break; 660cdf0e10cSrcweir case RTF_PCATYPE: 661cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 ); 662cdf0e10cSrcweir break; 663cdf0e10cSrcweir case RTF_ANSICPG: 664cdf0e10cSrcweir eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue); 665cdf0e10cSrcweir SetSrcEncoding(eCodeSet); 666cdf0e10cSrcweir break; 667cdf0e10cSrcweir default: 668cdf0e10cSrcweir NEXTTOKEN: 669cdf0e10cSrcweir NextToken( nToken ); 670cdf0e10cSrcweir break; 671cdf0e10cSrcweir } 672cdf0e10cSrcweir if( IsParserWorking() ) 673cdf0e10cSrcweir SaveState( 0 ); // bis hierhin abgearbeitet, 674cdf0e10cSrcweir // weiter mit neuem Token! 675cdf0e10cSrcweir nToken = GetNextToken(); 676cdf0e10cSrcweir } 677cdf0e10cSrcweir if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets ) 678cdf0e10cSrcweir eState = SVPAR_ERROR; 679cdf0e10cSrcweir } 680cdf0e10cSrcweir 681cdf0e10cSrcweir void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc ) 682cdf0e10cSrcweir { 683cdf0e10cSrcweir if (eEnc == RTL_TEXTENCODING_DONTKNOW) 684cdf0e10cSrcweir eEnc = GetCodeSet(); 685cdf0e10cSrcweir 686cdf0e10cSrcweir if (!aParserStates.empty()) 687cdf0e10cSrcweir aParserStates.top().eCodeSet = eEnc; 688cdf0e10cSrcweir SetSrcEncoding(eEnc); 689cdf0e10cSrcweir } 690cdf0e10cSrcweir 691cdf0e10cSrcweir #ifdef USED 692cdf0e10cSrcweir void SvRTFParser::SaveState( int nToken ) 693cdf0e10cSrcweir { 694cdf0e10cSrcweir SvParser::SaveState( nToken ); 695cdf0e10cSrcweir } 696cdf0e10cSrcweir 697cdf0e10cSrcweir void SvRTFParser::RestoreState() 698cdf0e10cSrcweir { 699cdf0e10cSrcweir SvParser::RestoreState(); 700cdf0e10cSrcweir } 701cdf0e10cSrcweir #endif 702cdf0e10cSrcweir 703cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */ 704