1*b1cdbd2cSJim Jagielski /**************************************************************
2*b1cdbd2cSJim Jagielski *
3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file
5*b1cdbd2cSJim Jagielski * distributed with this work for additional information
6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file
7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski *
11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski *
13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the
17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski * under the License.
19*b1cdbd2cSJim Jagielski *
20*b1cdbd2cSJim Jagielski *************************************************************/
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski // MARKER(update_precomp.py): autogen include statement, do not remove
25*b1cdbd2cSJim Jagielski #include "precompiled_svl.hxx"
26*b1cdbd2cSJim Jagielski #include <svl/lngmisc.hxx>
27*b1cdbd2cSJim Jagielski #include <tools/solar.h>
28*b1cdbd2cSJim Jagielski #include <tools/string.hxx>
29*b1cdbd2cSJim Jagielski #include <tools/debug.hxx>
30*b1cdbd2cSJim Jagielski #include <rtl/ustrbuf.hxx>
31*b1cdbd2cSJim Jagielski #include <rtl/ustring.hxx>
32*b1cdbd2cSJim Jagielski
33*b1cdbd2cSJim Jagielski using namespace rtl;
34*b1cdbd2cSJim Jagielski
35*b1cdbd2cSJim Jagielski namespace linguistic
36*b1cdbd2cSJim Jagielski {
37*b1cdbd2cSJim Jagielski
38*b1cdbd2cSJim Jagielski ///////////////////////////////////////////////////////////////////////////
39*b1cdbd2cSJim Jagielski
GetNumControlChars(const OUString & rTxt)40*b1cdbd2cSJim Jagielski sal_Int32 GetNumControlChars( const OUString &rTxt )
41*b1cdbd2cSJim Jagielski {
42*b1cdbd2cSJim Jagielski sal_Int32 nCnt = 0;
43*b1cdbd2cSJim Jagielski sal_Int32 nLen = rTxt.getLength();
44*b1cdbd2cSJim Jagielski for (sal_Int32 i = 0; i < nLen; ++i)
45*b1cdbd2cSJim Jagielski {
46*b1cdbd2cSJim Jagielski if (IsControlChar( rTxt[i] ))
47*b1cdbd2cSJim Jagielski ++nCnt;
48*b1cdbd2cSJim Jagielski }
49*b1cdbd2cSJim Jagielski return nCnt;
50*b1cdbd2cSJim Jagielski }
51*b1cdbd2cSJim Jagielski
52*b1cdbd2cSJim Jagielski
RemoveHyphens(OUString & rTxt)53*b1cdbd2cSJim Jagielski sal_Bool RemoveHyphens( OUString &rTxt )
54*b1cdbd2cSJim Jagielski {
55*b1cdbd2cSJim Jagielski sal_Bool bModified = sal_False;
56*b1cdbd2cSJim Jagielski if (HasHyphens( rTxt ))
57*b1cdbd2cSJim Jagielski {
58*b1cdbd2cSJim Jagielski String aTmp( rTxt );
59*b1cdbd2cSJim Jagielski aTmp.EraseAllChars( SVT_SOFT_HYPHEN );
60*b1cdbd2cSJim Jagielski aTmp.EraseAllChars( SVT_HARD_HYPHEN );
61*b1cdbd2cSJim Jagielski rTxt = aTmp;
62*b1cdbd2cSJim Jagielski bModified = sal_True;
63*b1cdbd2cSJim Jagielski }
64*b1cdbd2cSJim Jagielski return bModified;
65*b1cdbd2cSJim Jagielski }
66*b1cdbd2cSJim Jagielski
67*b1cdbd2cSJim Jagielski
RemoveControlChars(OUString & rTxt)68*b1cdbd2cSJim Jagielski sal_Bool RemoveControlChars( OUString &rTxt )
69*b1cdbd2cSJim Jagielski {
70*b1cdbd2cSJim Jagielski sal_Bool bModified = sal_False;
71*b1cdbd2cSJim Jagielski sal_Int32 nCtrlChars = GetNumControlChars( rTxt );
72*b1cdbd2cSJim Jagielski if (nCtrlChars)
73*b1cdbd2cSJim Jagielski {
74*b1cdbd2cSJim Jagielski sal_Int32 nLen = rTxt.getLength();
75*b1cdbd2cSJim Jagielski sal_Int32 nSize = nLen - nCtrlChars;
76*b1cdbd2cSJim Jagielski OUStringBuffer aBuf( nSize );
77*b1cdbd2cSJim Jagielski aBuf.setLength( nSize );
78*b1cdbd2cSJim Jagielski sal_Int32 nCnt = 0;
79*b1cdbd2cSJim Jagielski for (sal_Int32 i = 0; i < nLen; ++i)
80*b1cdbd2cSJim Jagielski {
81*b1cdbd2cSJim Jagielski sal_Unicode cChar = rTxt[i];
82*b1cdbd2cSJim Jagielski if (!IsControlChar( cChar ))
83*b1cdbd2cSJim Jagielski {
84*b1cdbd2cSJim Jagielski DBG_ASSERT( nCnt < nSize, "index out of range" );
85*b1cdbd2cSJim Jagielski aBuf.setCharAt( nCnt++, cChar );
86*b1cdbd2cSJim Jagielski }
87*b1cdbd2cSJim Jagielski }
88*b1cdbd2cSJim Jagielski DBG_ASSERT( nCnt == nSize, "wrong size" );
89*b1cdbd2cSJim Jagielski rTxt = aBuf.makeStringAndClear();
90*b1cdbd2cSJim Jagielski bModified = sal_True;
91*b1cdbd2cSJim Jagielski }
92*b1cdbd2cSJim Jagielski return bModified;
93*b1cdbd2cSJim Jagielski }
94*b1cdbd2cSJim Jagielski
95*b1cdbd2cSJim Jagielski
96*b1cdbd2cSJim Jagielski // non breaking field character
97*b1cdbd2cSJim Jagielski #define CH_TXTATR_INWORD ((sal_Char) 0x02)
98*b1cdbd2cSJim Jagielski
ReplaceControlChars(rtl::OUString & rTxt,sal_Char)99*b1cdbd2cSJim Jagielski sal_Bool ReplaceControlChars( rtl::OUString &rTxt, sal_Char /*aRplcChar*/ )
100*b1cdbd2cSJim Jagielski {
101*b1cdbd2cSJim Jagielski // the resulting string looks like this:
102*b1cdbd2cSJim Jagielski // 1. non breaking field characters get removed
103*b1cdbd2cSJim Jagielski // 2. remaining control characters will be replaced by ' '
104*b1cdbd2cSJim Jagielski
105*b1cdbd2cSJim Jagielski sal_Bool bModified = sal_False;
106*b1cdbd2cSJim Jagielski sal_Int32 nCtrlChars = GetNumControlChars( rTxt );
107*b1cdbd2cSJim Jagielski if (nCtrlChars)
108*b1cdbd2cSJim Jagielski {
109*b1cdbd2cSJim Jagielski sal_Int32 nLen = rTxt.getLength();
110*b1cdbd2cSJim Jagielski OUStringBuffer aBuf( nLen );
111*b1cdbd2cSJim Jagielski sal_Int32 nCnt = 0;
112*b1cdbd2cSJim Jagielski for (sal_Int32 i = 0; i < nLen; ++i)
113*b1cdbd2cSJim Jagielski {
114*b1cdbd2cSJim Jagielski sal_Unicode cChar = rTxt[i];
115*b1cdbd2cSJim Jagielski if (CH_TXTATR_INWORD != cChar)
116*b1cdbd2cSJim Jagielski {
117*b1cdbd2cSJim Jagielski if (IsControlChar( cChar ))
118*b1cdbd2cSJim Jagielski cChar = ' ';
119*b1cdbd2cSJim Jagielski DBG_ASSERT( nCnt < nLen, "index out of range" );
120*b1cdbd2cSJim Jagielski aBuf.setCharAt( nCnt++, cChar );
121*b1cdbd2cSJim Jagielski }
122*b1cdbd2cSJim Jagielski }
123*b1cdbd2cSJim Jagielski aBuf.setLength( nCnt );
124*b1cdbd2cSJim Jagielski rTxt = aBuf.makeStringAndClear();
125*b1cdbd2cSJim Jagielski bModified = sal_True;
126*b1cdbd2cSJim Jagielski }
127*b1cdbd2cSJim Jagielski return bModified;
128*b1cdbd2cSJim Jagielski }
129*b1cdbd2cSJim Jagielski
130*b1cdbd2cSJim Jagielski
GetThesaurusReplaceText(const String & rText)131*b1cdbd2cSJim Jagielski String GetThesaurusReplaceText( const String &rText )
132*b1cdbd2cSJim Jagielski {
133*b1cdbd2cSJim Jagielski // The strings for synonyms returned by the thesaurus sometimes have some
134*b1cdbd2cSJim Jagielski // explanation text put in between '(' and ')' or a trailing '*'.
135*b1cdbd2cSJim Jagielski // These parts should not be put in the ReplaceEdit Text that may get
136*b1cdbd2cSJim Jagielski // inserted into the document. Thus we strip them from the text.
137*b1cdbd2cSJim Jagielski
138*b1cdbd2cSJim Jagielski String aText( rText );
139*b1cdbd2cSJim Jagielski
140*b1cdbd2cSJim Jagielski xub_StrLen nPos = aText.Search( sal_Unicode('(') );
141*b1cdbd2cSJim Jagielski while (STRING_NOTFOUND != nPos)
142*b1cdbd2cSJim Jagielski {
143*b1cdbd2cSJim Jagielski xub_StrLen nEnd = aText.Search( sal_Unicode(')'), nPos );
144*b1cdbd2cSJim Jagielski if (STRING_NOTFOUND != nEnd)
145*b1cdbd2cSJim Jagielski aText.Erase( nPos, nEnd-nPos+1 );
146*b1cdbd2cSJim Jagielski else
147*b1cdbd2cSJim Jagielski break;
148*b1cdbd2cSJim Jagielski nPos = aText.Search( sal_Unicode('(') );
149*b1cdbd2cSJim Jagielski }
150*b1cdbd2cSJim Jagielski
151*b1cdbd2cSJim Jagielski nPos = aText.Search( sal_Unicode('*') );
152*b1cdbd2cSJim Jagielski if (STRING_NOTFOUND != nPos)
153*b1cdbd2cSJim Jagielski aText.Erase( nPos );
154*b1cdbd2cSJim Jagielski
155*b1cdbd2cSJim Jagielski // remove any possible remaining ' ' that may confuse the thesaurus
156*b1cdbd2cSJim Jagielski // when it gets called with the text
157*b1cdbd2cSJim Jagielski aText.EraseLeadingAndTrailingChars( sal_Unicode(' ') );
158*b1cdbd2cSJim Jagielski
159*b1cdbd2cSJim Jagielski return aText;
160*b1cdbd2cSJim Jagielski }
161*b1cdbd2cSJim Jagielski
162*b1cdbd2cSJim Jagielski ///////////////////////////////////////////////////////////////////////////
163*b1cdbd2cSJim Jagielski
164*b1cdbd2cSJim Jagielski } // namespace linguistic
165*b1cdbd2cSJim Jagielski
166