1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_svtools.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */ 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir #include <stdio.h> // for EOF 34*cdf0e10cSrcweir #include <rtl/tencinfo.h> 35*cdf0e10cSrcweir #include <tools/stream.hxx> 36*cdf0e10cSrcweir #include <tools/debug.hxx> 37*cdf0e10cSrcweir #include <svtools/rtftoken.h> 38*cdf0e10cSrcweir #include <svtools/rtfkeywd.hxx> 39*cdf0e10cSrcweir #include <svtools/parrtf.hxx> 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir const int MAX_STRING_LEN = 1024; 42*cdf0e10cSrcweir const int MAX_TOKEN_LEN = 128; 43*cdf0e10cSrcweir 44*cdf0e10cSrcweir #define RTF_ISDIGIT( c ) (c >= '0' && c <= '9') 45*cdf0e10cSrcweir #define RTF_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ) 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize ) 48*cdf0e10cSrcweir : SvParser( rIn, nStackSize ), 49*cdf0e10cSrcweir eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), // default ist ANSI-CodeSet 50*cdf0e10cSrcweir nUCharOverread( 1 ) 51*cdf0e10cSrcweir { 52*cdf0e10cSrcweir // default ist ANSI-CodeSet 53*cdf0e10cSrcweir SetSrcEncoding( RTL_TEXTENCODING_MS_1252 ); 54*cdf0e10cSrcweir bRTF_InTextRead = false; 55*cdf0e10cSrcweir } 56*cdf0e10cSrcweir 57*cdf0e10cSrcweir SvRTFParser::~SvRTFParser() 58*cdf0e10cSrcweir { 59*cdf0e10cSrcweir } 60*cdf0e10cSrcweir 61*cdf0e10cSrcweir 62*cdf0e10cSrcweir 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir int SvRTFParser::_GetNextToken() 65*cdf0e10cSrcweir { 66*cdf0e10cSrcweir int nRet = 0; 67*cdf0e10cSrcweir do { 68*cdf0e10cSrcweir int bNextCh = true; 69*cdf0e10cSrcweir switch( nNextCh ) 70*cdf0e10cSrcweir { 71*cdf0e10cSrcweir case '\\': 72*cdf0e10cSrcweir { 73*cdf0e10cSrcweir // Steuerzeichen 74*cdf0e10cSrcweir switch( nNextCh = GetNextChar() ) 75*cdf0e10cSrcweir { 76*cdf0e10cSrcweir case '{': 77*cdf0e10cSrcweir case '}': 78*cdf0e10cSrcweir case '\\': 79*cdf0e10cSrcweir case '+': // habe ich in einem RTF-File gefunden 80*cdf0e10cSrcweir case '~': // nonbreaking space 81*cdf0e10cSrcweir case '-': // optional hyphen 82*cdf0e10cSrcweir case '_': // nonbreaking hyphen 83*cdf0e10cSrcweir case '\'': // HexValue 84*cdf0e10cSrcweir nNextCh = '\\'; 85*cdf0e10cSrcweir rInput.SeekRel( -1 ); 86*cdf0e10cSrcweir ScanText(); 87*cdf0e10cSrcweir nRet = RTF_TEXTTOKEN; 88*cdf0e10cSrcweir bNextCh = 0 == nNextCh; 89*cdf0e10cSrcweir break; 90*cdf0e10cSrcweir 91*cdf0e10cSrcweir case '*': // ignoreflag 92*cdf0e10cSrcweir nRet = RTF_IGNOREFLAG; 93*cdf0e10cSrcweir break; 94*cdf0e10cSrcweir case ':': // subentry in an index entry 95*cdf0e10cSrcweir nRet = RTF_SUBENTRYINDEX; 96*cdf0e10cSrcweir break; 97*cdf0e10cSrcweir case '|': // formula-charakter 98*cdf0e10cSrcweir nRet = RTF_FORMULA; 99*cdf0e10cSrcweir break; 100*cdf0e10cSrcweir 101*cdf0e10cSrcweir case 0x0a: 102*cdf0e10cSrcweir case 0x0d: 103*cdf0e10cSrcweir nRet = RTF_PAR; 104*cdf0e10cSrcweir break; 105*cdf0e10cSrcweir 106*cdf0e10cSrcweir default: 107*cdf0e10cSrcweir if( RTF_ISALPHA( nNextCh ) ) 108*cdf0e10cSrcweir { 109*cdf0e10cSrcweir aToken = '\\'; 110*cdf0e10cSrcweir { 111*cdf0e10cSrcweir String aStrBuffer; 112*cdf0e10cSrcweir sal_Unicode* pStr = aStrBuffer.AllocBuffer( 113*cdf0e10cSrcweir MAX_TOKEN_LEN ); 114*cdf0e10cSrcweir xub_StrLen nStrLen = 0; 115*cdf0e10cSrcweir do { 116*cdf0e10cSrcweir *(pStr + nStrLen++) = nNextCh; 117*cdf0e10cSrcweir if( MAX_TOKEN_LEN == nStrLen ) 118*cdf0e10cSrcweir { 119*cdf0e10cSrcweir aToken += aStrBuffer; 120*cdf0e10cSrcweir aToken.GetBufferAccess(); // make unique string! 121*cdf0e10cSrcweir nStrLen = 0; 122*cdf0e10cSrcweir } 123*cdf0e10cSrcweir nNextCh = GetNextChar(); 124*cdf0e10cSrcweir } while( RTF_ISALPHA( nNextCh ) ); 125*cdf0e10cSrcweir if( nStrLen ) 126*cdf0e10cSrcweir { 127*cdf0e10cSrcweir aStrBuffer.ReleaseBufferAccess( nStrLen ); 128*cdf0e10cSrcweir aToken += aStrBuffer; 129*cdf0e10cSrcweir } 130*cdf0e10cSrcweir } 131*cdf0e10cSrcweir 132*cdf0e10cSrcweir // Minus fuer numerischen Parameter 133*cdf0e10cSrcweir int bNegValue = false; 134*cdf0e10cSrcweir if( '-' == nNextCh ) 135*cdf0e10cSrcweir { 136*cdf0e10cSrcweir bNegValue = true; 137*cdf0e10cSrcweir nNextCh = GetNextChar(); 138*cdf0e10cSrcweir } 139*cdf0e10cSrcweir 140*cdf0e10cSrcweir // evt. Numerischer Parameter 141*cdf0e10cSrcweir if( RTF_ISDIGIT( nNextCh ) ) 142*cdf0e10cSrcweir { 143*cdf0e10cSrcweir nTokenValue = 0; 144*cdf0e10cSrcweir do { 145*cdf0e10cSrcweir nTokenValue *= 10; 146*cdf0e10cSrcweir nTokenValue += nNextCh - '0'; 147*cdf0e10cSrcweir nNextCh = GetNextChar(); 148*cdf0e10cSrcweir } while( RTF_ISDIGIT( nNextCh ) ); 149*cdf0e10cSrcweir if( bNegValue ) 150*cdf0e10cSrcweir nTokenValue = -nTokenValue; 151*cdf0e10cSrcweir bTokenHasValue=true; 152*cdf0e10cSrcweir } 153*cdf0e10cSrcweir else if( bNegValue ) // das Minus wieder zurueck 154*cdf0e10cSrcweir { 155*cdf0e10cSrcweir nNextCh = '-'; 156*cdf0e10cSrcweir rInput.SeekRel( -1 ); 157*cdf0e10cSrcweir } 158*cdf0e10cSrcweir if( ' ' == nNextCh ) // Blank gehoert zum Token! 159*cdf0e10cSrcweir nNextCh = GetNextChar(); 160*cdf0e10cSrcweir 161*cdf0e10cSrcweir // suche das Token in der Tabelle: 162*cdf0e10cSrcweir if( 0 == (nRet = GetRTFToken( aToken )) ) 163*cdf0e10cSrcweir // Unknown Control 164*cdf0e10cSrcweir nRet = RTF_UNKNOWNCONTROL; 165*cdf0e10cSrcweir 166*cdf0e10cSrcweir // bug 76812 - unicode token handled as normal text 167*cdf0e10cSrcweir bNextCh = false; 168*cdf0e10cSrcweir switch( nRet ) 169*cdf0e10cSrcweir { 170*cdf0e10cSrcweir case RTF_UC: 171*cdf0e10cSrcweir if( 0 <= nTokenValue ) 172*cdf0e10cSrcweir { 173*cdf0e10cSrcweir nUCharOverread = (sal_uInt8)nTokenValue; 174*cdf0e10cSrcweir #if 1 175*cdf0e10cSrcweir //cmc: other ifdef breaks #i3584 176*cdf0e10cSrcweir aParserStates.top(). 177*cdf0e10cSrcweir nUCharOverread = nUCharOverread; 178*cdf0e10cSrcweir #else 179*cdf0e10cSrcweir if( !nUCharOverread ) 180*cdf0e10cSrcweir nUCharOverread = aParserStates.top().nUCharOverread; 181*cdf0e10cSrcweir else 182*cdf0e10cSrcweir aParserStates.top(). 183*cdf0e10cSrcweir nUCharOverread = nUCharOverread; 184*cdf0e10cSrcweir #endif 185*cdf0e10cSrcweir } 186*cdf0e10cSrcweir aToken.Erase(); // #i47831# erase token to prevent the token from beeing treated as text 187*cdf0e10cSrcweir // read next token 188*cdf0e10cSrcweir nRet = 0; 189*cdf0e10cSrcweir break; 190*cdf0e10cSrcweir 191*cdf0e10cSrcweir case RTF_UPR: 192*cdf0e10cSrcweir if (!_inSkipGroup) { 193*cdf0e10cSrcweir // UPR - overread the group with the ansi 194*cdf0e10cSrcweir // informations 195*cdf0e10cSrcweir while( '{' != _GetNextToken() ) 196*cdf0e10cSrcweir ; 197*cdf0e10cSrcweir SkipGroup(); 198*cdf0e10cSrcweir _GetNextToken(); // overread the last bracket 199*cdf0e10cSrcweir nRet = 0; 200*cdf0e10cSrcweir } 201*cdf0e10cSrcweir break; 202*cdf0e10cSrcweir 203*cdf0e10cSrcweir case RTF_U: 204*cdf0e10cSrcweir if( !bRTF_InTextRead ) 205*cdf0e10cSrcweir { 206*cdf0e10cSrcweir nRet = RTF_TEXTTOKEN; 207*cdf0e10cSrcweir aToken = (sal_Unicode)nTokenValue; 208*cdf0e10cSrcweir 209*cdf0e10cSrcweir // overread the next n "RTF" characters. This 210*cdf0e10cSrcweir // can be also \{, \}, \'88 211*cdf0e10cSrcweir for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) 212*cdf0e10cSrcweir { 213*cdf0e10cSrcweir sal_Unicode cAnsi = nNextCh; 214*cdf0e10cSrcweir while( 0xD == cAnsi ) 215*cdf0e10cSrcweir cAnsi = GetNextChar(); 216*cdf0e10cSrcweir while( 0xA == cAnsi ) 217*cdf0e10cSrcweir cAnsi = GetNextChar(); 218*cdf0e10cSrcweir 219*cdf0e10cSrcweir if( '\\' == cAnsi && 220*cdf0e10cSrcweir '\'' == ( cAnsi = GetNextChar() )) 221*cdf0e10cSrcweir // HexValue ueberlesen 222*cdf0e10cSrcweir cAnsi = GetHexValue(); 223*cdf0e10cSrcweir nNextCh = GetNextChar(); 224*cdf0e10cSrcweir } 225*cdf0e10cSrcweir ScanText(); 226*cdf0e10cSrcweir bNextCh = 0 == nNextCh; 227*cdf0e10cSrcweir } 228*cdf0e10cSrcweir break; 229*cdf0e10cSrcweir } 230*cdf0e10cSrcweir } 231*cdf0e10cSrcweir else if( SVPAR_PENDING != eState ) 232*cdf0e10cSrcweir { 233*cdf0e10cSrcweir // Bug 34631 - "\ " ueberlesen - Blank als Zeichen 234*cdf0e10cSrcweir // eState = SVPAR_ERROR; 235*cdf0e10cSrcweir bNextCh = false; 236*cdf0e10cSrcweir } 237*cdf0e10cSrcweir break; 238*cdf0e10cSrcweir } 239*cdf0e10cSrcweir } 240*cdf0e10cSrcweir break; 241*cdf0e10cSrcweir 242*cdf0e10cSrcweir case sal_Unicode(EOF): 243*cdf0e10cSrcweir eState = SVPAR_ACCEPTED; 244*cdf0e10cSrcweir nRet = nNextCh; 245*cdf0e10cSrcweir break; 246*cdf0e10cSrcweir 247*cdf0e10cSrcweir case '{': 248*cdf0e10cSrcweir { 249*cdf0e10cSrcweir if( 0 <= nOpenBrakets ) 250*cdf0e10cSrcweir { 251*cdf0e10cSrcweir RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() ); 252*cdf0e10cSrcweir aParserStates.push( aState ); 253*cdf0e10cSrcweir } 254*cdf0e10cSrcweir ++nOpenBrakets; 255*cdf0e10cSrcweir DBG_ASSERT( 256*cdf0e10cSrcweir static_cast<size_t>(nOpenBrakets) == aParserStates.size(), 257*cdf0e10cSrcweir "ParserStateStack unequal to bracket count" ); 258*cdf0e10cSrcweir nRet = nNextCh; 259*cdf0e10cSrcweir } 260*cdf0e10cSrcweir break; 261*cdf0e10cSrcweir 262*cdf0e10cSrcweir case '}': 263*cdf0e10cSrcweir --nOpenBrakets; 264*cdf0e10cSrcweir if( 0 <= nOpenBrakets ) 265*cdf0e10cSrcweir { 266*cdf0e10cSrcweir aParserStates.pop(); 267*cdf0e10cSrcweir if( !aParserStates.empty() ) 268*cdf0e10cSrcweir { 269*cdf0e10cSrcweir const RtfParserState_Impl& rRPS = 270*cdf0e10cSrcweir aParserStates.top(); 271*cdf0e10cSrcweir nUCharOverread = rRPS.nUCharOverread; 272*cdf0e10cSrcweir SetSrcEncoding( rRPS.eCodeSet ); 273*cdf0e10cSrcweir } 274*cdf0e10cSrcweir else 275*cdf0e10cSrcweir { 276*cdf0e10cSrcweir nUCharOverread = 1; 277*cdf0e10cSrcweir SetSrcEncoding( GetCodeSet() ); 278*cdf0e10cSrcweir } 279*cdf0e10cSrcweir } 280*cdf0e10cSrcweir DBG_ASSERT( 281*cdf0e10cSrcweir static_cast<size_t>(nOpenBrakets) == aParserStates.size(), 282*cdf0e10cSrcweir "ParserStateStack unequal to bracket count" ); 283*cdf0e10cSrcweir nRet = nNextCh; 284*cdf0e10cSrcweir break; 285*cdf0e10cSrcweir 286*cdf0e10cSrcweir case 0x0d: 287*cdf0e10cSrcweir case 0x0a: 288*cdf0e10cSrcweir break; 289*cdf0e10cSrcweir 290*cdf0e10cSrcweir default: 291*cdf0e10cSrcweir // es folgt normaler Text 292*cdf0e10cSrcweir ScanText(); 293*cdf0e10cSrcweir nRet = RTF_TEXTTOKEN; 294*cdf0e10cSrcweir bNextCh = 0 == nNextCh; 295*cdf0e10cSrcweir break; 296*cdf0e10cSrcweir } 297*cdf0e10cSrcweir 298*cdf0e10cSrcweir if( bNextCh ) 299*cdf0e10cSrcweir nNextCh = GetNextChar(); 300*cdf0e10cSrcweir 301*cdf0e10cSrcweir } while( !nRet && SVPAR_WORKING == eState ); 302*cdf0e10cSrcweir return nRet; 303*cdf0e10cSrcweir } 304*cdf0e10cSrcweir 305*cdf0e10cSrcweir 306*cdf0e10cSrcweir sal_Unicode SvRTFParser::GetHexValue() 307*cdf0e10cSrcweir { 308*cdf0e10cSrcweir // Hex-Wert sammeln 309*cdf0e10cSrcweir register int n; 310*cdf0e10cSrcweir register sal_Unicode nHexVal = 0; 311*cdf0e10cSrcweir 312*cdf0e10cSrcweir for( n = 0; n < 2; ++n ) 313*cdf0e10cSrcweir { 314*cdf0e10cSrcweir nHexVal *= 16; 315*cdf0e10cSrcweir nNextCh = GetNextChar(); 316*cdf0e10cSrcweir if( nNextCh >= '0' && nNextCh <= '9' ) 317*cdf0e10cSrcweir nHexVal += (nNextCh - 48); 318*cdf0e10cSrcweir else if( nNextCh >= 'a' && nNextCh <= 'f' ) 319*cdf0e10cSrcweir nHexVal += (nNextCh - 87); 320*cdf0e10cSrcweir else if( nNextCh >= 'A' && nNextCh <= 'F' ) 321*cdf0e10cSrcweir nHexVal += (nNextCh - 55); 322*cdf0e10cSrcweir } 323*cdf0e10cSrcweir return nHexVal; 324*cdf0e10cSrcweir } 325*cdf0e10cSrcweir 326*cdf0e10cSrcweir void SvRTFParser::ScanText( const sal_Unicode cBreak ) 327*cdf0e10cSrcweir { 328*cdf0e10cSrcweir String aStrBuffer; 329*cdf0e10cSrcweir int bWeiter = true; 330*cdf0e10cSrcweir while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN) 331*cdf0e10cSrcweir { 332*cdf0e10cSrcweir int bNextCh = true; 333*cdf0e10cSrcweir switch( nNextCh ) 334*cdf0e10cSrcweir { 335*cdf0e10cSrcweir case '\\': 336*cdf0e10cSrcweir { 337*cdf0e10cSrcweir switch (nNextCh = GetNextChar()) 338*cdf0e10cSrcweir { 339*cdf0e10cSrcweir case '\'': 340*cdf0e10cSrcweir { 341*cdf0e10cSrcweir 342*cdf0e10cSrcweir #if 0 343*cdf0e10cSrcweir // #i35653 patch from cmc 344*cdf0e10cSrcweir ByteString aByteString(static_cast<char>(GetHexValue())); 345*cdf0e10cSrcweir if (aByteString.Len()) 346*cdf0e10cSrcweir aStrBuffer.Append(String(aByteString, GetSrcEncoding())); 347*cdf0e10cSrcweir #else 348*cdf0e10cSrcweir ByteString aByteString; 349*cdf0e10cSrcweir while (1) 350*cdf0e10cSrcweir { 351*cdf0e10cSrcweir aByteString.Append((char)GetHexValue()); 352*cdf0e10cSrcweir 353*cdf0e10cSrcweir bool bBreak = false; 354*cdf0e10cSrcweir sal_Char nSlash = '\\'; 355*cdf0e10cSrcweir while (!bBreak) 356*cdf0e10cSrcweir { 357*cdf0e10cSrcweir wchar_t __next=GetNextChar(); 358*cdf0e10cSrcweir if (__next>0xFF) // fix for #i43933# and #i35653# 359*cdf0e10cSrcweir { 360*cdf0e10cSrcweir if (aByteString.Len()) 361*cdf0e10cSrcweir aStrBuffer.Append(String(aByteString, GetSrcEncoding())); 362*cdf0e10cSrcweir aStrBuffer.Append((sal_Unicode)__next); 363*cdf0e10cSrcweir 364*cdf0e10cSrcweir aByteString.Erase(); 365*cdf0e10cSrcweir continue; 366*cdf0e10cSrcweir } 367*cdf0e10cSrcweir nSlash = (sal_Char)__next; 368*cdf0e10cSrcweir while (nSlash == 0xD || nSlash == 0xA) 369*cdf0e10cSrcweir nSlash = (sal_Char)GetNextChar(); 370*cdf0e10cSrcweir 371*cdf0e10cSrcweir switch (nSlash) 372*cdf0e10cSrcweir { 373*cdf0e10cSrcweir case '{': 374*cdf0e10cSrcweir case '}': 375*cdf0e10cSrcweir case '\\': 376*cdf0e10cSrcweir bBreak = true; 377*cdf0e10cSrcweir break; 378*cdf0e10cSrcweir default: 379*cdf0e10cSrcweir aByteString.Append(nSlash); 380*cdf0e10cSrcweir break; 381*cdf0e10cSrcweir } 382*cdf0e10cSrcweir } 383*cdf0e10cSrcweir 384*cdf0e10cSrcweir nNextCh = GetNextChar(); 385*cdf0e10cSrcweir 386*cdf0e10cSrcweir if (nSlash != '\\' || nNextCh != '\'') 387*cdf0e10cSrcweir { 388*cdf0e10cSrcweir rInput.SeekRel(-1); 389*cdf0e10cSrcweir nNextCh = nSlash; 390*cdf0e10cSrcweir break; 391*cdf0e10cSrcweir } 392*cdf0e10cSrcweir } 393*cdf0e10cSrcweir 394*cdf0e10cSrcweir bNextCh = false; 395*cdf0e10cSrcweir 396*cdf0e10cSrcweir if (aByteString.Len()) 397*cdf0e10cSrcweir aStrBuffer.Append(String(aByteString, GetSrcEncoding())); 398*cdf0e10cSrcweir #endif 399*cdf0e10cSrcweir } 400*cdf0e10cSrcweir break; 401*cdf0e10cSrcweir case '\\': 402*cdf0e10cSrcweir case '}': 403*cdf0e10cSrcweir case '{': 404*cdf0e10cSrcweir case '+': // habe ich in einem RTF-File gefunden 405*cdf0e10cSrcweir aStrBuffer.Append(nNextCh); 406*cdf0e10cSrcweir break; 407*cdf0e10cSrcweir case '~': // nonbreaking space 408*cdf0e10cSrcweir aStrBuffer.Append(static_cast< sal_Unicode >(0xA0)); 409*cdf0e10cSrcweir break; 410*cdf0e10cSrcweir case '-': // optional hyphen 411*cdf0e10cSrcweir aStrBuffer.Append(static_cast< sal_Unicode >(0xAD)); 412*cdf0e10cSrcweir break; 413*cdf0e10cSrcweir case '_': // nonbreaking hyphen 414*cdf0e10cSrcweir aStrBuffer.Append(static_cast< sal_Unicode >(0x2011)); 415*cdf0e10cSrcweir break; 416*cdf0e10cSrcweir 417*cdf0e10cSrcweir case 'u': 418*cdf0e10cSrcweir // UNI-Code Zeichen lesen 419*cdf0e10cSrcweir { 420*cdf0e10cSrcweir nNextCh = GetNextChar(); 421*cdf0e10cSrcweir rInput.SeekRel( -2 ); 422*cdf0e10cSrcweir 423*cdf0e10cSrcweir if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) ) 424*cdf0e10cSrcweir { 425*cdf0e10cSrcweir bRTF_InTextRead = true; 426*cdf0e10cSrcweir 427*cdf0e10cSrcweir String sSave( aToken ); 428*cdf0e10cSrcweir nNextCh = '\\'; 429*cdf0e10cSrcweir #ifdef DBG_UTIL 430*cdf0e10cSrcweir int nToken = 431*cdf0e10cSrcweir #endif 432*cdf0e10cSrcweir _GetNextToken(); 433*cdf0e10cSrcweir DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" ); 434*cdf0e10cSrcweir // dont convert symbol chars 435*cdf0e10cSrcweir aStrBuffer.Append( 436*cdf0e10cSrcweir static_cast< sal_Unicode >(nTokenValue)); 437*cdf0e10cSrcweir 438*cdf0e10cSrcweir // overread the next n "RTF" characters. This 439*cdf0e10cSrcweir // can be also \{, \}, \'88 440*cdf0e10cSrcweir for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) 441*cdf0e10cSrcweir { 442*cdf0e10cSrcweir sal_Unicode cAnsi = nNextCh; 443*cdf0e10cSrcweir while( 0xD == cAnsi ) 444*cdf0e10cSrcweir cAnsi = GetNextChar(); 445*cdf0e10cSrcweir while( 0xA == cAnsi ) 446*cdf0e10cSrcweir cAnsi = GetNextChar(); 447*cdf0e10cSrcweir 448*cdf0e10cSrcweir if( '\\' == cAnsi && 449*cdf0e10cSrcweir '\'' == ( cAnsi = GetNextChar() )) 450*cdf0e10cSrcweir // HexValue ueberlesen 451*cdf0e10cSrcweir cAnsi = GetHexValue(); 452*cdf0e10cSrcweir nNextCh = GetNextChar(); 453*cdf0e10cSrcweir } 454*cdf0e10cSrcweir bNextCh = false; 455*cdf0e10cSrcweir aToken = sSave; 456*cdf0e10cSrcweir bRTF_InTextRead = false; 457*cdf0e10cSrcweir } 458*cdf0e10cSrcweir else 459*cdf0e10cSrcweir { 460*cdf0e10cSrcweir nNextCh = '\\'; 461*cdf0e10cSrcweir bWeiter = false; // Abbrechen, String zusammen 462*cdf0e10cSrcweir } 463*cdf0e10cSrcweir } 464*cdf0e10cSrcweir break; 465*cdf0e10cSrcweir 466*cdf0e10cSrcweir default: 467*cdf0e10cSrcweir rInput.SeekRel( -1 ); 468*cdf0e10cSrcweir nNextCh = '\\'; 469*cdf0e10cSrcweir bWeiter = false; // Abbrechen, String zusammen 470*cdf0e10cSrcweir break; 471*cdf0e10cSrcweir } 472*cdf0e10cSrcweir } 473*cdf0e10cSrcweir break; 474*cdf0e10cSrcweir 475*cdf0e10cSrcweir case sal_Unicode(EOF): 476*cdf0e10cSrcweir eState = SVPAR_ERROR; 477*cdf0e10cSrcweir // weiter 478*cdf0e10cSrcweir case '{': 479*cdf0e10cSrcweir case '}': 480*cdf0e10cSrcweir bWeiter = false; 481*cdf0e10cSrcweir break; 482*cdf0e10cSrcweir 483*cdf0e10cSrcweir case 0x0a: 484*cdf0e10cSrcweir case 0x0d: 485*cdf0e10cSrcweir break; 486*cdf0e10cSrcweir 487*cdf0e10cSrcweir default: 488*cdf0e10cSrcweir if( nNextCh == cBreak || aStrBuffer.Len() >= MAX_STRING_LEN) 489*cdf0e10cSrcweir bWeiter = false; 490*cdf0e10cSrcweir else 491*cdf0e10cSrcweir { 492*cdf0e10cSrcweir do { 493*cdf0e10cSrcweir // alle anderen Zeichen kommen in den Text 494*cdf0e10cSrcweir aStrBuffer.Append(nNextCh); 495*cdf0e10cSrcweir 496*cdf0e10cSrcweir if (sal_Unicode(EOF) == (nNextCh = GetNextChar())) 497*cdf0e10cSrcweir { 498*cdf0e10cSrcweir if (aStrBuffer.Len()) 499*cdf0e10cSrcweir aToken += aStrBuffer; 500*cdf0e10cSrcweir return; 501*cdf0e10cSrcweir } 502*cdf0e10cSrcweir } while 503*cdf0e10cSrcweir ( 504*cdf0e10cSrcweir (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) && 505*cdf0e10cSrcweir (aStrBuffer.Len() < MAX_STRING_LEN) 506*cdf0e10cSrcweir ); 507*cdf0e10cSrcweir bNextCh = false; 508*cdf0e10cSrcweir } 509*cdf0e10cSrcweir } 510*cdf0e10cSrcweir 511*cdf0e10cSrcweir if( bWeiter && bNextCh ) 512*cdf0e10cSrcweir nNextCh = GetNextChar(); 513*cdf0e10cSrcweir } 514*cdf0e10cSrcweir 515*cdf0e10cSrcweir if (aStrBuffer.Len()) 516*cdf0e10cSrcweir aToken += aStrBuffer; 517*cdf0e10cSrcweir } 518*cdf0e10cSrcweir 519*cdf0e10cSrcweir 520*cdf0e10cSrcweir short SvRTFParser::_inSkipGroup=0; 521*cdf0e10cSrcweir 522*cdf0e10cSrcweir void SvRTFParser::SkipGroup() 523*cdf0e10cSrcweir { 524*cdf0e10cSrcweir short nBrackets=1; 525*cdf0e10cSrcweir if (_inSkipGroup>0) 526*cdf0e10cSrcweir return; 527*cdf0e10cSrcweir _inSkipGroup++; 528*cdf0e10cSrcweir #if 1 //#i16185# fecking \bin keyword 529*cdf0e10cSrcweir do 530*cdf0e10cSrcweir { 531*cdf0e10cSrcweir switch (nNextCh) 532*cdf0e10cSrcweir { 533*cdf0e10cSrcweir case '{': 534*cdf0e10cSrcweir ++nBrackets; 535*cdf0e10cSrcweir break; 536*cdf0e10cSrcweir case '}': 537*cdf0e10cSrcweir if (!--nBrackets) { 538*cdf0e10cSrcweir _inSkipGroup--; 539*cdf0e10cSrcweir return; 540*cdf0e10cSrcweir } 541*cdf0e10cSrcweir break; 542*cdf0e10cSrcweir } 543*cdf0e10cSrcweir int nToken = _GetNextToken(); 544*cdf0e10cSrcweir if (nToken == RTF_BIN) 545*cdf0e10cSrcweir { 546*cdf0e10cSrcweir rInput.SeekRel(-1); 547*cdf0e10cSrcweir rInput.SeekRel(nTokenValue); 548*cdf0e10cSrcweir nNextCh = GetNextChar(); 549*cdf0e10cSrcweir } 550*cdf0e10cSrcweir while (nNextCh==0xa || nNextCh==0xd) 551*cdf0e10cSrcweir { 552*cdf0e10cSrcweir nNextCh = GetNextChar(); 553*cdf0e10cSrcweir } 554*cdf0e10cSrcweir } while (sal_Unicode(EOF) != nNextCh && IsParserWorking()); 555*cdf0e10cSrcweir #else 556*cdf0e10cSrcweir sal_Unicode cPrev = 0; 557*cdf0e10cSrcweir do { 558*cdf0e10cSrcweir switch( nNextCh ) 559*cdf0e10cSrcweir { 560*cdf0e10cSrcweir case '{': 561*cdf0e10cSrcweir if( '\\' != cPrev ) 562*cdf0e10cSrcweir ++nBrackets; 563*cdf0e10cSrcweir break; 564*cdf0e10cSrcweir 565*cdf0e10cSrcweir case '}': 566*cdf0e10cSrcweir if( '\\' != cPrev && !--nBrackets ) 567*cdf0e10cSrcweir return; 568*cdf0e10cSrcweir break; 569*cdf0e10cSrcweir 570*cdf0e10cSrcweir case '\\': 571*cdf0e10cSrcweir if( '\\' == cPrev ) 572*cdf0e10cSrcweir nNextCh = 0; 573*cdf0e10cSrcweir break; 574*cdf0e10cSrcweir } 575*cdf0e10cSrcweir cPrev = nNextCh; 576*cdf0e10cSrcweir nNextCh = GetNextChar(); 577*cdf0e10cSrcweir } while( sal_Unicode(EOF) != nNextCh && IsParserWorking() ); 578*cdf0e10cSrcweir #endif 579*cdf0e10cSrcweir 580*cdf0e10cSrcweir if( SVPAR_PENDING != eState && '}' != nNextCh ) 581*cdf0e10cSrcweir eState = SVPAR_ERROR; 582*cdf0e10cSrcweir _inSkipGroup--; 583*cdf0e10cSrcweir } 584*cdf0e10cSrcweir 585*cdf0e10cSrcweir void SvRTFParser::ReadUnknownData() { SkipGroup(); } 586*cdf0e10cSrcweir void SvRTFParser::ReadBitmapData() { SkipGroup(); } 587*cdf0e10cSrcweir void SvRTFParser::ReadOLEData() { SkipGroup(); } 588*cdf0e10cSrcweir 589*cdf0e10cSrcweir 590*cdf0e10cSrcweir SvParserState SvRTFParser::CallParser() 591*cdf0e10cSrcweir { 592*cdf0e10cSrcweir sal_Char cFirstCh; 593*cdf0e10cSrcweir nNextChPos = rInput.Tell(); 594*cdf0e10cSrcweir rInput >> cFirstCh; nNextCh = cFirstCh; 595*cdf0e10cSrcweir eState = SVPAR_WORKING; 596*cdf0e10cSrcweir nOpenBrakets = 0; 597*cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 ); 598*cdf0e10cSrcweir eUNICodeSet = RTL_TEXTENCODING_MS_1252; // default ist ANSI-CodeSet 599*cdf0e10cSrcweir 600*cdf0e10cSrcweir // die 1. beiden Token muessen '{' und \\rtf sein !! 601*cdf0e10cSrcweir if( '{' == GetNextToken() && RTF_RTF == GetNextToken() ) 602*cdf0e10cSrcweir { 603*cdf0e10cSrcweir AddRef(); 604*cdf0e10cSrcweir Continue( 0 ); 605*cdf0e10cSrcweir if( SVPAR_PENDING != eState ) 606*cdf0e10cSrcweir ReleaseRef(); // dann brauchen wir den Parser nicht mehr! 607*cdf0e10cSrcweir } 608*cdf0e10cSrcweir else 609*cdf0e10cSrcweir eState = SVPAR_ERROR; 610*cdf0e10cSrcweir 611*cdf0e10cSrcweir return eState; 612*cdf0e10cSrcweir } 613*cdf0e10cSrcweir 614*cdf0e10cSrcweir void SvRTFParser::Continue( int nToken ) 615*cdf0e10cSrcweir { 616*cdf0e10cSrcweir // DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(), 617*cdf0e10cSrcweir // "Zeichensatz wurde geaendert." ); 618*cdf0e10cSrcweir 619*cdf0e10cSrcweir if( !nToken ) 620*cdf0e10cSrcweir nToken = GetNextToken(); 621*cdf0e10cSrcweir 622*cdf0e10cSrcweir while( IsParserWorking() ) 623*cdf0e10cSrcweir { 624*cdf0e10cSrcweir SaveState( nToken ); 625*cdf0e10cSrcweir switch( nToken ) 626*cdf0e10cSrcweir { 627*cdf0e10cSrcweir case '}': 628*cdf0e10cSrcweir if( nOpenBrakets ) 629*cdf0e10cSrcweir goto NEXTTOKEN; 630*cdf0e10cSrcweir eState = SVPAR_ACCEPTED; 631*cdf0e10cSrcweir break; 632*cdf0e10cSrcweir 633*cdf0e10cSrcweir case '{': 634*cdf0e10cSrcweir // eine unbekannte Gruppe ? 635*cdf0e10cSrcweir { 636*cdf0e10cSrcweir if( RTF_IGNOREFLAG != GetNextToken() ) 637*cdf0e10cSrcweir nToken = SkipToken( -1 ); 638*cdf0e10cSrcweir else if( RTF_UNKNOWNCONTROL != GetNextToken() ) 639*cdf0e10cSrcweir nToken = SkipToken( -2 ); 640*cdf0e10cSrcweir else 641*cdf0e10cSrcweir { 642*cdf0e10cSrcweir // gleich herausfiltern 643*cdf0e10cSrcweir ReadUnknownData(); 644*cdf0e10cSrcweir nToken = GetNextToken(); 645*cdf0e10cSrcweir if( '}' != nToken ) 646*cdf0e10cSrcweir eState = SVPAR_ERROR; 647*cdf0e10cSrcweir break; // auf zum naechsten Token!! 648*cdf0e10cSrcweir } 649*cdf0e10cSrcweir } 650*cdf0e10cSrcweir goto NEXTTOKEN; 651*cdf0e10cSrcweir 652*cdf0e10cSrcweir case RTF_UNKNOWNCONTROL: 653*cdf0e10cSrcweir break; // unbekannte Token ueberspringen 654*cdf0e10cSrcweir case RTF_NEXTTYPE: 655*cdf0e10cSrcweir case RTF_ANSITYPE: 656*cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 ); 657*cdf0e10cSrcweir break; 658*cdf0e10cSrcweir case RTF_MACTYPE: 659*cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN ); 660*cdf0e10cSrcweir break; 661*cdf0e10cSrcweir case RTF_PCTYPE: 662*cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 ); 663*cdf0e10cSrcweir break; 664*cdf0e10cSrcweir case RTF_PCATYPE: 665*cdf0e10cSrcweir SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 ); 666*cdf0e10cSrcweir break; 667*cdf0e10cSrcweir case RTF_ANSICPG: 668*cdf0e10cSrcweir eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue); 669*cdf0e10cSrcweir SetSrcEncoding(eCodeSet); 670*cdf0e10cSrcweir break; 671*cdf0e10cSrcweir default: 672*cdf0e10cSrcweir NEXTTOKEN: 673*cdf0e10cSrcweir NextToken( nToken ); 674*cdf0e10cSrcweir break; 675*cdf0e10cSrcweir } 676*cdf0e10cSrcweir if( IsParserWorking() ) 677*cdf0e10cSrcweir SaveState( 0 ); // bis hierhin abgearbeitet, 678*cdf0e10cSrcweir // weiter mit neuem Token! 679*cdf0e10cSrcweir nToken = GetNextToken(); 680*cdf0e10cSrcweir } 681*cdf0e10cSrcweir if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets ) 682*cdf0e10cSrcweir eState = SVPAR_ERROR; 683*cdf0e10cSrcweir } 684*cdf0e10cSrcweir 685*cdf0e10cSrcweir void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc ) 686*cdf0e10cSrcweir { 687*cdf0e10cSrcweir if (eEnc == RTL_TEXTENCODING_DONTKNOW) 688*cdf0e10cSrcweir eEnc = GetCodeSet(); 689*cdf0e10cSrcweir 690*cdf0e10cSrcweir if (!aParserStates.empty()) 691*cdf0e10cSrcweir aParserStates.top().eCodeSet = eEnc; 692*cdf0e10cSrcweir SetSrcEncoding(eEnc); 693*cdf0e10cSrcweir } 694*cdf0e10cSrcweir 695*cdf0e10cSrcweir #ifdef USED 696*cdf0e10cSrcweir void SvRTFParser::SaveState( int nToken ) 697*cdf0e10cSrcweir { 698*cdf0e10cSrcweir SvParser::SaveState( nToken ); 699*cdf0e10cSrcweir } 700*cdf0e10cSrcweir 701*cdf0e10cSrcweir void SvRTFParser::RestoreState() 702*cdf0e10cSrcweir { 703*cdf0e10cSrcweir SvParser::RestoreState(); 704*cdf0e10cSrcweir } 705*cdf0e10cSrcweir #endif 706*cdf0e10cSrcweir 707*cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */ 708