1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_svl.hxx" 30*cdf0e10cSrcweir #include <svl/lngmisc.hxx> 31*cdf0e10cSrcweir #include <tools/solar.h> 32*cdf0e10cSrcweir #include <tools/string.hxx> 33*cdf0e10cSrcweir #include <tools/debug.hxx> 34*cdf0e10cSrcweir #include <rtl/ustrbuf.hxx> 35*cdf0e10cSrcweir #include <rtl/ustring.hxx> 36*cdf0e10cSrcweir 37*cdf0e10cSrcweir using namespace rtl; 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir namespace linguistic 40*cdf0e10cSrcweir { 41*cdf0e10cSrcweir 42*cdf0e10cSrcweir /////////////////////////////////////////////////////////////////////////// 43*cdf0e10cSrcweir 44*cdf0e10cSrcweir sal_Int32 GetNumControlChars( const OUString &rTxt ) 45*cdf0e10cSrcweir { 46*cdf0e10cSrcweir sal_Int32 nCnt = 0; 47*cdf0e10cSrcweir sal_Int32 nLen = rTxt.getLength(); 48*cdf0e10cSrcweir for (sal_Int32 i = 0; i < nLen; ++i) 49*cdf0e10cSrcweir { 50*cdf0e10cSrcweir if (IsControlChar( rTxt[i] )) 51*cdf0e10cSrcweir ++nCnt; 52*cdf0e10cSrcweir } 53*cdf0e10cSrcweir return nCnt; 54*cdf0e10cSrcweir } 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir 57*cdf0e10cSrcweir sal_Bool RemoveHyphens( OUString &rTxt ) 58*cdf0e10cSrcweir { 59*cdf0e10cSrcweir sal_Bool bModified = sal_False; 60*cdf0e10cSrcweir if (HasHyphens( rTxt )) 61*cdf0e10cSrcweir { 62*cdf0e10cSrcweir String aTmp( rTxt ); 63*cdf0e10cSrcweir aTmp.EraseAllChars( SVT_SOFT_HYPHEN ); 64*cdf0e10cSrcweir aTmp.EraseAllChars( SVT_HARD_HYPHEN ); 65*cdf0e10cSrcweir rTxt = aTmp; 66*cdf0e10cSrcweir bModified = sal_True; 67*cdf0e10cSrcweir } 68*cdf0e10cSrcweir return bModified; 69*cdf0e10cSrcweir } 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir 72*cdf0e10cSrcweir sal_Bool RemoveControlChars( OUString &rTxt ) 73*cdf0e10cSrcweir { 74*cdf0e10cSrcweir sal_Bool bModified = sal_False; 75*cdf0e10cSrcweir sal_Int32 nCtrlChars = GetNumControlChars( rTxt ); 76*cdf0e10cSrcweir if (nCtrlChars) 77*cdf0e10cSrcweir { 78*cdf0e10cSrcweir sal_Int32 nLen = rTxt.getLength(); 79*cdf0e10cSrcweir sal_Int32 nSize = nLen - nCtrlChars; 80*cdf0e10cSrcweir OUStringBuffer aBuf( nSize ); 81*cdf0e10cSrcweir aBuf.setLength( nSize ); 82*cdf0e10cSrcweir sal_Int32 nCnt = 0; 83*cdf0e10cSrcweir for (sal_Int32 i = 0; i < nLen; ++i) 84*cdf0e10cSrcweir { 85*cdf0e10cSrcweir sal_Unicode cChar = rTxt[i]; 86*cdf0e10cSrcweir if (!IsControlChar( cChar )) 87*cdf0e10cSrcweir { 88*cdf0e10cSrcweir DBG_ASSERT( nCnt < nSize, "index out of range" ); 89*cdf0e10cSrcweir aBuf.setCharAt( nCnt++, cChar ); 90*cdf0e10cSrcweir } 91*cdf0e10cSrcweir } 92*cdf0e10cSrcweir DBG_ASSERT( nCnt == nSize, "wrong size" ); 93*cdf0e10cSrcweir rTxt = aBuf.makeStringAndClear(); 94*cdf0e10cSrcweir bModified = sal_True; 95*cdf0e10cSrcweir } 96*cdf0e10cSrcweir return bModified; 97*cdf0e10cSrcweir } 98*cdf0e10cSrcweir 99*cdf0e10cSrcweir 100*cdf0e10cSrcweir // non breaking field character 101*cdf0e10cSrcweir #define CH_TXTATR_INWORD ((sal_Char) 0x02) 102*cdf0e10cSrcweir 103*cdf0e10cSrcweir sal_Bool ReplaceControlChars( rtl::OUString &rTxt, sal_Char /*aRplcChar*/ ) 104*cdf0e10cSrcweir { 105*cdf0e10cSrcweir // the resulting string looks like this: 106*cdf0e10cSrcweir // 1. non breaking field characters get removed 107*cdf0e10cSrcweir // 2. remaining control characters will be replaced by ' ' 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir sal_Bool bModified = sal_False; 110*cdf0e10cSrcweir sal_Int32 nCtrlChars = GetNumControlChars( rTxt ); 111*cdf0e10cSrcweir if (nCtrlChars) 112*cdf0e10cSrcweir { 113*cdf0e10cSrcweir sal_Int32 nLen = rTxt.getLength(); 114*cdf0e10cSrcweir OUStringBuffer aBuf( nLen ); 115*cdf0e10cSrcweir sal_Int32 nCnt = 0; 116*cdf0e10cSrcweir for (sal_Int32 i = 0; i < nLen; ++i) 117*cdf0e10cSrcweir { 118*cdf0e10cSrcweir sal_Unicode cChar = rTxt[i]; 119*cdf0e10cSrcweir if (CH_TXTATR_INWORD != cChar) 120*cdf0e10cSrcweir { 121*cdf0e10cSrcweir if (IsControlChar( cChar )) 122*cdf0e10cSrcweir cChar = ' '; 123*cdf0e10cSrcweir DBG_ASSERT( nCnt < nLen, "index out of range" ); 124*cdf0e10cSrcweir aBuf.setCharAt( nCnt++, cChar ); 125*cdf0e10cSrcweir } 126*cdf0e10cSrcweir } 127*cdf0e10cSrcweir aBuf.setLength( nCnt ); 128*cdf0e10cSrcweir rTxt = aBuf.makeStringAndClear(); 129*cdf0e10cSrcweir bModified = sal_True; 130*cdf0e10cSrcweir } 131*cdf0e10cSrcweir return bModified; 132*cdf0e10cSrcweir } 133*cdf0e10cSrcweir 134*cdf0e10cSrcweir 135*cdf0e10cSrcweir String GetThesaurusReplaceText( const String &rText ) 136*cdf0e10cSrcweir { 137*cdf0e10cSrcweir // The strings for synonyms returned by the thesaurus sometimes have some 138*cdf0e10cSrcweir // explanation text put in between '(' and ')' or a trailing '*'. 139*cdf0e10cSrcweir // These parts should not be put in the ReplaceEdit Text that may get 140*cdf0e10cSrcweir // inserted into the document. Thus we strip them from the text. 141*cdf0e10cSrcweir 142*cdf0e10cSrcweir String aText( rText ); 143*cdf0e10cSrcweir 144*cdf0e10cSrcweir xub_StrLen nPos = aText.Search( sal_Unicode('(') ); 145*cdf0e10cSrcweir while (STRING_NOTFOUND != nPos) 146*cdf0e10cSrcweir { 147*cdf0e10cSrcweir xub_StrLen nEnd = aText.Search( sal_Unicode(')'), nPos ); 148*cdf0e10cSrcweir if (STRING_NOTFOUND != nEnd) 149*cdf0e10cSrcweir aText.Erase( nPos, nEnd-nPos+1 ); 150*cdf0e10cSrcweir else 151*cdf0e10cSrcweir break; 152*cdf0e10cSrcweir nPos = aText.Search( sal_Unicode('(') ); 153*cdf0e10cSrcweir } 154*cdf0e10cSrcweir 155*cdf0e10cSrcweir nPos = aText.Search( sal_Unicode('*') ); 156*cdf0e10cSrcweir if (STRING_NOTFOUND != nPos) 157*cdf0e10cSrcweir aText.Erase( nPos ); 158*cdf0e10cSrcweir 159*cdf0e10cSrcweir // remove any possible remaining ' ' that may confuse the thesaurus 160*cdf0e10cSrcweir // when it gets called with the text 161*cdf0e10cSrcweir aText.EraseLeadingAndTrailingChars( sal_Unicode(' ') ); 162*cdf0e10cSrcweir 163*cdf0e10cSrcweir return aText; 164*cdf0e10cSrcweir } 165*cdf0e10cSrcweir 166*cdf0e10cSrcweir /////////////////////////////////////////////////////////////////////////// 167*cdf0e10cSrcweir 168*cdf0e10cSrcweir } // namespace linguistic 169*cdf0e10cSrcweir 170