xref: /aoo41x/main/svl/source/misc/lngmisc.cxx (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_svl.hxx"
30*cdf0e10cSrcweir #include <svl/lngmisc.hxx>
31*cdf0e10cSrcweir #include <tools/solar.h>
32*cdf0e10cSrcweir #include <tools/string.hxx>
33*cdf0e10cSrcweir #include <tools/debug.hxx>
34*cdf0e10cSrcweir #include <rtl/ustrbuf.hxx>
35*cdf0e10cSrcweir #include <rtl/ustring.hxx>
36*cdf0e10cSrcweir 
37*cdf0e10cSrcweir using namespace rtl;
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir namespace linguistic
40*cdf0e10cSrcweir {
41*cdf0e10cSrcweir 
42*cdf0e10cSrcweir ///////////////////////////////////////////////////////////////////////////
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir sal_Int32 GetNumControlChars( const OUString &rTxt )
45*cdf0e10cSrcweir {
46*cdf0e10cSrcweir 	sal_Int32 nCnt = 0;
47*cdf0e10cSrcweir 	sal_Int32 nLen = rTxt.getLength();
48*cdf0e10cSrcweir 	for (sal_Int32 i = 0;  i < nLen;  ++i)
49*cdf0e10cSrcweir 	{
50*cdf0e10cSrcweir 		if (IsControlChar( rTxt[i] ))
51*cdf0e10cSrcweir 			++nCnt;
52*cdf0e10cSrcweir 	}
53*cdf0e10cSrcweir 	return nCnt;
54*cdf0e10cSrcweir }
55*cdf0e10cSrcweir 
56*cdf0e10cSrcweir 
57*cdf0e10cSrcweir sal_Bool RemoveHyphens( OUString &rTxt )
58*cdf0e10cSrcweir {
59*cdf0e10cSrcweir 	sal_Bool bModified = sal_False;
60*cdf0e10cSrcweir 	if (HasHyphens( rTxt ))
61*cdf0e10cSrcweir 	{
62*cdf0e10cSrcweir 		String aTmp( rTxt );
63*cdf0e10cSrcweir 		aTmp.EraseAllChars( SVT_SOFT_HYPHEN );
64*cdf0e10cSrcweir 		aTmp.EraseAllChars( SVT_HARD_HYPHEN );
65*cdf0e10cSrcweir 		rTxt = aTmp;
66*cdf0e10cSrcweir 		bModified = sal_True;
67*cdf0e10cSrcweir 	}
68*cdf0e10cSrcweir 	return bModified;
69*cdf0e10cSrcweir }
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir 
72*cdf0e10cSrcweir sal_Bool RemoveControlChars( OUString &rTxt )
73*cdf0e10cSrcweir {
74*cdf0e10cSrcweir 	sal_Bool bModified = sal_False;
75*cdf0e10cSrcweir 	sal_Int32 nCtrlChars = GetNumControlChars( rTxt );
76*cdf0e10cSrcweir 	if (nCtrlChars)
77*cdf0e10cSrcweir 	{
78*cdf0e10cSrcweir 		sal_Int32 nLen	= rTxt.getLength();
79*cdf0e10cSrcweir 		sal_Int32 nSize = nLen - nCtrlChars;
80*cdf0e10cSrcweir 		OUStringBuffer aBuf( nSize );
81*cdf0e10cSrcweir 		aBuf.setLength( nSize );
82*cdf0e10cSrcweir 		sal_Int32 nCnt = 0;
83*cdf0e10cSrcweir 		for (sal_Int32 i = 0;  i < nLen;  ++i)
84*cdf0e10cSrcweir 		{
85*cdf0e10cSrcweir 			sal_Unicode cChar = rTxt[i];
86*cdf0e10cSrcweir 			if (!IsControlChar( cChar ))
87*cdf0e10cSrcweir 			{
88*cdf0e10cSrcweir 				DBG_ASSERT( nCnt < nSize, "index out of range" );
89*cdf0e10cSrcweir 				aBuf.setCharAt( nCnt++, cChar );
90*cdf0e10cSrcweir 			}
91*cdf0e10cSrcweir 		}
92*cdf0e10cSrcweir 		DBG_ASSERT( nCnt == nSize, "wrong size" );
93*cdf0e10cSrcweir 		rTxt = aBuf.makeStringAndClear();
94*cdf0e10cSrcweir 		bModified = sal_True;
95*cdf0e10cSrcweir 	}
96*cdf0e10cSrcweir 	return bModified;
97*cdf0e10cSrcweir }
98*cdf0e10cSrcweir 
99*cdf0e10cSrcweir 
100*cdf0e10cSrcweir // non breaking field character
101*cdf0e10cSrcweir #define CH_TXTATR_INWORD    ((sal_Char) 0x02)
102*cdf0e10cSrcweir 
103*cdf0e10cSrcweir sal_Bool ReplaceControlChars( rtl::OUString &rTxt, sal_Char /*aRplcChar*/ )
104*cdf0e10cSrcweir {
105*cdf0e10cSrcweir     // the resulting string looks like this:
106*cdf0e10cSrcweir     // 1. non breaking field characters get removed
107*cdf0e10cSrcweir     // 2. remaining control characters will be replaced by ' '
108*cdf0e10cSrcweir 
109*cdf0e10cSrcweir     sal_Bool bModified = sal_False;
110*cdf0e10cSrcweir     sal_Int32 nCtrlChars = GetNumControlChars( rTxt );
111*cdf0e10cSrcweir     if (nCtrlChars)
112*cdf0e10cSrcweir     {
113*cdf0e10cSrcweir         sal_Int32 nLen  = rTxt.getLength();
114*cdf0e10cSrcweir         OUStringBuffer aBuf( nLen );
115*cdf0e10cSrcweir         sal_Int32 nCnt = 0;
116*cdf0e10cSrcweir         for (sal_Int32 i = 0;  i < nLen;  ++i)
117*cdf0e10cSrcweir         {
118*cdf0e10cSrcweir             sal_Unicode cChar = rTxt[i];
119*cdf0e10cSrcweir             if (CH_TXTATR_INWORD != cChar)
120*cdf0e10cSrcweir             {
121*cdf0e10cSrcweir                 if (IsControlChar( cChar ))
122*cdf0e10cSrcweir                     cChar = ' ';
123*cdf0e10cSrcweir                 DBG_ASSERT( nCnt < nLen, "index out of range" );
124*cdf0e10cSrcweir                 aBuf.setCharAt( nCnt++, cChar );
125*cdf0e10cSrcweir             }
126*cdf0e10cSrcweir         }
127*cdf0e10cSrcweir         aBuf.setLength( nCnt );
128*cdf0e10cSrcweir         rTxt = aBuf.makeStringAndClear();
129*cdf0e10cSrcweir         bModified = sal_True;
130*cdf0e10cSrcweir     }
131*cdf0e10cSrcweir     return bModified;
132*cdf0e10cSrcweir }
133*cdf0e10cSrcweir 
134*cdf0e10cSrcweir 
135*cdf0e10cSrcweir String GetThesaurusReplaceText( const String &rText )
136*cdf0e10cSrcweir {
137*cdf0e10cSrcweir     // The strings for synonyms returned by the thesaurus sometimes have some
138*cdf0e10cSrcweir     // explanation text put in between '(' and ')' or a trailing '*'.
139*cdf0e10cSrcweir     // These parts should not be put in the ReplaceEdit Text that may get
140*cdf0e10cSrcweir     // inserted into the document. Thus we strip them from the text.
141*cdf0e10cSrcweir 
142*cdf0e10cSrcweir     String aText( rText );
143*cdf0e10cSrcweir 
144*cdf0e10cSrcweir     xub_StrLen nPos = aText.Search( sal_Unicode('(') );
145*cdf0e10cSrcweir     while (STRING_NOTFOUND != nPos)
146*cdf0e10cSrcweir     {
147*cdf0e10cSrcweir         xub_StrLen nEnd = aText.Search( sal_Unicode(')'), nPos );
148*cdf0e10cSrcweir         if (STRING_NOTFOUND != nEnd)
149*cdf0e10cSrcweir             aText.Erase( nPos, nEnd-nPos+1 );
150*cdf0e10cSrcweir         else
151*cdf0e10cSrcweir             break;
152*cdf0e10cSrcweir         nPos = aText.Search( sal_Unicode('(') );
153*cdf0e10cSrcweir     }
154*cdf0e10cSrcweir 
155*cdf0e10cSrcweir     nPos = aText.Search( sal_Unicode('*') );
156*cdf0e10cSrcweir     if (STRING_NOTFOUND != nPos)
157*cdf0e10cSrcweir         aText.Erase( nPos );
158*cdf0e10cSrcweir 
159*cdf0e10cSrcweir     // remove any possible remaining ' ' that may confuse the thesaurus
160*cdf0e10cSrcweir     // when it gets called with the text
161*cdf0e10cSrcweir     aText.EraseLeadingAndTrailingChars( sal_Unicode(' ') );
162*cdf0e10cSrcweir 
163*cdf0e10cSrcweir     return aText;
164*cdf0e10cSrcweir }
165*cdf0e10cSrcweir 
166*cdf0e10cSrcweir ///////////////////////////////////////////////////////////////////////////
167*cdf0e10cSrcweir 
168*cdf0e10cSrcweir } // namespace linguistic
169*cdf0e10cSrcweir 
170