1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_lingucomponent.hxx"
26 
27 
28 #include <com/sun/star/uno/Reference.h>
29 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
30 
31 #include <cppuhelper/factory.hxx>	// helper for factories
32 #include <com/sun/star/registry/XRegistryKey.hpp>
33 #include <i18npool/mslangid.hxx>
34 #include <unotools/pathoptions.hxx>
35 #include <unotools/useroptions.hxx>
36 #include <tools/debug.hxx>
37 #include <unotools/processfactory.hxx>
38 #include <osl/mutex.hxx>
39 
40 #include <hyphen.h>
41 #include <hyphenimp.hxx>
42 
43 #include <linguistic/hyphdta.hxx>
44 #include <rtl/ustring.hxx>
45 #include <rtl/ustrbuf.hxx>
46 #include <rtl/textenc.h>
47 
48 #include <linguistic/lngprops.hxx>
49 #include <unotools/pathoptions.hxx>
50 #include <unotools/useroptions.hxx>
51 #include <unotools/lingucfg.hxx>
52 #include <osl/file.hxx>
53 
54 #include <stdio.h>
55 #include <string.h>
56 
57 #include <list>
58 #include <set>
59 
60 using namespace utl;
61 using namespace osl;
62 using namespace rtl;
63 using namespace com::sun::star;
64 using namespace com::sun::star::beans;
65 using namespace com::sun::star::lang;
66 using namespace com::sun::star::uno;
67 using namespace com::sun::star::linguistic2;
68 using namespace linguistic;
69 
70 // values asigned to capitalization types
71 #define CAPTYPE_UNKNOWN 0
72 #define CAPTYPE_NOCAP   1
73 #define CAPTYPE_INITCAP 2
74 #define CAPTYPE_ALLCAP  3
75 #define CAPTYPE_MIXED   4
76 
77 // min, max
78 
79 //#define Min(a,b) (a < b ? a : b)
80 #define Max(a,b) (a > b ? a : b)
81 
82 ///////////////////////////////////////////////////////////////////////////
83 
84 
Hyphenator()85 Hyphenator::Hyphenator() :
86 	aEvtListeners	( GetLinguMutex() )
87 {
88 	bDisposing = sal_False;
89 	pPropHelper = NULL;
90     aDicts = NULL;
91     numdict = 0;
92 }
93 
94 
~Hyphenator()95 Hyphenator::~Hyphenator()
96 {
97 	if (pPropHelper)
98 		pPropHelper->RemoveAsPropListener();
99 
100     if ((numdict) && (aDicts))
101     {
102         for (int i=0; i < numdict; i++)
103         {
104             if (aDicts[i].apCC) delete aDicts[i].apCC;
105             aDicts[i].apCC = NULL;
106         }
107 	}
108     if (aDicts) delete[] aDicts;
109 	aDicts = NULL;
110     numdict = 0;
111 }
112 
113 
GetPropHelper_Impl()114 PropertyHelper_Hyphen & Hyphenator::GetPropHelper_Impl()
115 {
116 	if (!pPropHelper)
117 	{
118 		Reference< XPropertySet	>	xPropSet( GetLinguProperties(), UNO_QUERY );
119 
120 		pPropHelper	= new PropertyHelper_Hyphen ((XHyphenator *) this, xPropSet );
121 		xPropHelper = pPropHelper;
122 		pPropHelper->AddAsPropListener();	//! after a reference is established
123 	}
124 	return *pPropHelper;
125 
126 }
127 
128 
getLocales()129 Sequence< Locale > SAL_CALL Hyphenator::getLocales()
130 		throw(RuntimeException)
131 {
132 	MutexGuard	aGuard( GetLinguMutex() );
133 
134     // this routine should return the locales supported by the installed
135     // dictionaries.
136 
137     if (!numdict)
138     {
139         SvtLinguConfig aLinguCfg;
140 
141         // get list of dictionaries-to-use
142 		// (or better speaking: the list of dictionaries using the
143 		// new configuration entries).
144         std::list< SvtLinguConfigDictionaryEntry > aDics;
145         uno::Sequence< rtl::OUString > aFormatList;
146         aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Hyphenators"),
147                 A2OU("org.openoffice.lingu.LibHnjHyphenator"), aFormatList );
148         sal_Int32 nLen = aFormatList.getLength();
149         for (sal_Int32 i = 0;  i < nLen;  ++i)
150         {
151             std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
152                     aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
153             aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
154         }
155 
156         //!! for compatibility with old dictionaries (the ones not using extensions
157         //!! or new configuration entries, but still using the dictionary.lst file)
158 		//!! Get the list of old style spell checking dictionaries to use...
159         std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
160 				GetOldStyleDics( "HYPH" ) );
161 
162 		// to prefer dictionaries with configuration entries we will only
163 		// use those old style dictionaries that add a language that
164 		// is not yet supported by the list od new style dictionaries
165 		MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
166 
167         numdict = aDics.size();
168         if (numdict)
169         {
170             // get supported locales from the dictionaries-to-use...
171             sal_Int32 k = 0;
172             std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet;
173             std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
174             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
175             {
176                 uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
177                 sal_Int32 nLen2 = aLocaleNames.getLength();
178                 for (k = 0;  k < nLen2;  ++k)
179                 {
180                     aLocaleNamesSet.insert( aLocaleNames[k] );
181                 }
182             }
183             // ... and add them to the resulting sequence
184             aSuppLocales.realloc( aLocaleNamesSet.size() );
185             std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB;
186             k = 0;
187             for (aItB = aLocaleNamesSet.begin();  aItB != aLocaleNamesSet.end();  ++aItB)
188             {
189                 Locale aTmp( MsLangId::convertLanguageToLocale(
190                         MsLangId::convertIsoStringToLanguage( *aItB )));
191                 aSuppLocales[k++] = aTmp;
192             }
193 
194             //! For each dictionary and each locale we need a seperate entry.
195             //! If this results in more than one dictionary per locale than (for now)
196 			//! it is undefined which dictionary gets used.
197 			//! In the future the implementation should support using several dictionaries
198 			//! for one locale.
199 			numdict = 0;
200             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
201 				numdict = numdict + aDictIt->aLocaleNames.getLength();
202 
203             // add dictionary information
204             aDicts = new HDInfo[numdict];
205 
206             k = 0;
207             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
208             {
209                 if (aDictIt->aLocaleNames.getLength() > 0 &&
210                     aDictIt->aLocations.getLength() > 0)
211                 {
212                     uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
213                     sal_Int32 nLocales = aLocaleNames.getLength();
214 
215                     // currently only one language per dictionary is supported in the actual implementation...
216                     // Thus here we work-around this by adding the same dictionary several times.
217                     // Once for each of it's supported locales.
218                     for (sal_Int32 i = 0;  i < nLocales;  ++i)
219                     {
220                         aDicts[k].aPtr = NULL;
221                         aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
222                         aDicts[k].aLoc = MsLangId::convertLanguageToLocale(
223                                         MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] ));
224                         aDicts[k].apCC = new CharClass( aDicts[k].aLoc );
225                         // also both files have to be in the same directory and the
226                         // file names must only differ in the extension (.aff/.dic).
227                         // Thus we use the first location only and strip the extension part.
228                         rtl::OUString aLocation = aDictIt->aLocations[0];
229                         sal_Int32 nPos = aLocation.lastIndexOf( '.' );
230                         aLocation = aLocation.copy( 0, nPos );
231                         aDicts[k].aName = aLocation;
232 
233                         ++k;
234                     }
235                 }
236             }
237             DBG_ASSERT( k == numdict, "index mismatch?" );
238         }
239         else
240         {
241             /* no dictionary found so register no dictionaries */
242             numdict = 0;
243             aDicts = NULL;
244             aSuppLocales.realloc(0);
245         }
246     }
247 
248     return aSuppLocales;
249 }
250 
251 
252 
hasLocale(const Locale & rLocale)253 sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
254 		throw(RuntimeException)
255 {
256 	MutexGuard	aGuard( GetLinguMutex() );
257 
258 	sal_Bool bRes = sal_False;
259 	if (!aSuppLocales.getLength())
260 		getLocales();
261 
262 	const Locale *pLocale = aSuppLocales.getConstArray();
263 	sal_Int32 nLen = aSuppLocales.getLength();
264 	for (sal_Int32 i = 0;  i < nLen;  ++i)
265 	{
266 		if (rLocale == pLocale[i])
267 		{
268 			bRes = sal_True;
269 			break;
270 		}
271 	}
272 	return bRes;
273 }
274 
275 
hyphenate(const::rtl::OUString & aWord,const::com::sun::star::lang::Locale & aLocale,sal_Int16 nMaxLeading,const::com::sun::star::beans::PropertyValues & aProperties)276 Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord,
277        const ::com::sun::star::lang::Locale& aLocale,
278        sal_Int16 nMaxLeading,
279        const ::com::sun::star::beans::PropertyValues& aProperties )
280        throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
281 {
282 	int nHyphenationPos = -1;
283     int nHyphenationPosAlt = -1;
284     int nHyphenationPosAltHyph = -1;
285 	int wordlen;
286 	char *hyphens;
287     char *lcword;
288     int k = 0;
289 
290     PropertyHelper_Hyphen & rHelper = GetPropHelper();
291     rHelper.SetTmpPropVals(aProperties);
292 	sal_Int16 minTrail = rHelper.GetMinTrailing();
293 	sal_Int16 minLead = rHelper.GetMinLeading();
294 	sal_Int16 minLen = rHelper.GetMinWordLength();
295 
296 	HyphenDict *dict = NULL;
297     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
298     CharClass * pCC = NULL;
299 
300 	Reference< XHyphenatedWord > xRes;
301 
302     k = -1;
303     for (int j = 0; j < numdict; j++)
304     {
305         if (aLocale == aDicts[j].aLoc)
306             k = j;
307     }
308 
309     // if we have a hyphenation dictionary matching this locale
310     if (k != -1)
311     {
312         // if this dictinary has not been loaded yet do that
313         if (!aDicts[k].aPtr)
314         {
315             OUString DictFN = aDicts[k].aName + A2OU(".dic");
316             OUString dictpath;
317 
318             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
319             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
320 
321 #if defined(WNT)
322             // workaround for Windows specifc problem that the
323             // path length in calls to 'fopen' is limted to somewhat
324             // about 120+ characters which will usually be exceed when
325             // using dictionaries as extensions.
326             sTmp = Win_GetShortPathName( dictpath );
327 #endif
328 
329             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
330             {
331                fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
332                return NULL;
333             }
334             aDicts[k].aPtr = dict;
335             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
336         }
337 
338         // other wise hyphenate the word with that dictionary
339         dict = aDicts[k].aPtr;
340         eEnc = aDicts[k].eEnc;
341         pCC =  aDicts[k].apCC;
342 
343         // we don't want to work with a default text encoding since following incorrect
344         // results may occur only for specific text and thus may be hard to notice.
345         // Thus better always make a clean exit here if the text encoding is in question.
346         // Hopefully something not working at all will raise proper attention quickly. ;-)
347         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
348         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
349             return NULL;
350 
351         sal_uInt16 ct = CAPTYPE_UNKNOWN;
352         ct = capitalType(aWord, pCC);
353 
354         // first convert any smart quotes or apostrophes to normal ones
355 	    OUStringBuffer rBuf(aWord);
356         sal_Int32 nc = rBuf.getLength();
357         sal_Unicode ch;
358         for (sal_Int32 ix=0; ix < nc; ix++)
359         {
360 	        ch = rBuf.charAt(ix);
361             if ((ch == 0x201C) || (ch == 0x201D))
362                 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
363             if ((ch == 0x2018) || (ch == 0x2019))
364                 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
365         }
366         OUString nWord(rBuf.makeStringAndClear());
367 
368         // now convert word to all lowercase for pattern recognition
369         OUString nTerm(makeLowerCase(nWord, pCC));
370 
371         // now convert word to needed encoding
372         OString encWord(OU2ENC(nTerm,eEnc));
373 
374 	    wordlen = encWord.getLength();
375         lcword = new char[wordlen + 1];
376 	    hyphens = new char[wordlen + 5];
377 
378         char ** rep = NULL; // replacements of discretionary hyphenation
379         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
380         int * cut = NULL; // length of deletions in original word
381 
382         // copy converted word into simple char buffer
383         strcpy(lcword,encWord.getStr());
384 
385         // now strip off any ending periods
386         int n = wordlen-1;
387         while((n >=0) && (lcword[n] == '.'))
388             n--;
389         n++;
390         if (n > 0)
391         {
392             const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
393                     &rep, &pos, &cut, minLead, minTrail,
394                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead  - Max(dict->lhmin, 2))),
395                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
396             if (bFailed)
397             {
398                 //whoops something did not work
399                 delete[] hyphens;
400                 delete[] lcword;
401                 if (rep)
402                 {
403                     for(int j = 0; j < n; j++)
404                     {
405                         if (rep[j]) free(rep[j]);
406                     }
407                     free(rep);
408                 }
409                 if (pos) free(pos);
410                 if (cut) free(cut);
411                 return NULL;
412             }
413         }
414 
415         // now backfill hyphens[] for any removed trailing periods
416         for (int c = n; c < wordlen; c++) hyphens[c] = '0';
417         hyphens[wordlen] = '\0';
418 
419 	    sal_Int32 Leading =  GetPosInWordToCheck( aWord, nMaxLeading );
420 
421 	    for (sal_Int32 i = 0; i < n; i++)
422 	    {
423             int leftrep = 0;
424             sal_Bool hit = (n >= minLen);
425             if (!rep || !rep[i] || (i >= n))
426             {
427                 hit = hit && (hyphens[i]&1) && (i < Leading);
428                 hit = hit && (i >= (minLead-1) );
429                 hit = hit && ((n - i - 1) >= minTrail);
430             }
431             else
432             {
433                 // calculate change character length before hyphenation point signed with '='
434                 for (char * c = rep[i]; *c && (*c != '='); c++)
435                 {
436                     if (eEnc == RTL_TEXTENCODING_UTF8)
437                     {
438                         if (((unsigned char) *c) >> 6 != 2)
439                             leftrep++;
440                     }
441                     else
442                         leftrep++;
443                 }
444                 hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
445                 hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
446                 hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
447             }
448             if (hit)
449             {
450                 nHyphenationPos = i;
451                 if (rep && (i < n) && rep[i])
452                 {
453                     nHyphenationPosAlt = i - pos[i];
454                     nHyphenationPosAltHyph = i + leftrep - pos[i];
455                 }
456             }
457         }
458 
459         if (nHyphenationPos  == -1)
460         {
461             xRes = NULL;
462         }
463         else
464         {
465             if (rep && rep[nHyphenationPos])
466             {
467                 // remove equal sign
468                 char * s = rep[nHyphenationPos];
469                 int eq = 0;
470                 for (; *s; s++)
471                 {
472                     if (*s == '=') eq = 1;
473                     if (eq) *s = *(s + 1);
474                 }
475                 OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
476                 OUString repHyph;
477                 switch (ct)
478                 {
479                     case CAPTYPE_ALLCAP:
480                     {
481                         repHyph = makeUpperCase(repHyphlow, pCC);
482                         break;
483                     }
484                     case CAPTYPE_INITCAP:
485                     {
486                         if (nHyphenationPosAlt == 0)
487                             repHyph = makeInitCap(repHyphlow, pCC);
488                         else
489                              repHyph = repHyphlow;
490                         break;
491                     }
492                     default:
493                     {
494                         repHyph = repHyphlow;
495                         break;
496                     }
497                 }
498 
499                 // handle shortening
500                 sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
501                 nHyphenationPosAltHyph : nHyphenationPos);
502                 // dicretionary hyphenation
503                 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos,
504                     aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
505                     (sal_Int16) nHyphenationPosAltHyph);
506             }
507             else
508             {
509                 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ),
510                     (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
511             }
512         }
513 
514         delete[] lcword;
515 	    delete[] hyphens;
516         if (rep)
517         {
518             for(int j = 0; j < n; j++)
519             {
520                 if (rep[j]) free(rep[j]);
521             }
522             free(rep);
523         }
524         if (pos) free(pos);
525         if (cut) free(cut);
526 	    return xRes;
527 	}
528     return NULL;
529 }
530 
531 
queryAlternativeSpelling(const::rtl::OUString &,const::com::sun::star::lang::Locale &,sal_Int16,const::com::sun::star::beans::PropertyValues &)532 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
533         const ::rtl::OUString& /*aWord*/,
534         const ::com::sun::star::lang::Locale& /*aLocale*/,
535         sal_Int16 /*nIndex*/,
536         const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
537         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
538 {
539   /* alternative spelling isn't supported by tex dictionaries */
540   /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
541   /* TASK: implement queryAlternativeSpelling() */
542   return NULL;
543 }
544 
createPossibleHyphens(const::rtl::OUString & aWord,const::com::sun::star::lang::Locale & aLocale,const::com::sun::star::beans::PropertyValues & aProperties)545 Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
546         const ::com::sun::star::lang::Locale& aLocale,
547         const ::com::sun::star::beans::PropertyValues& aProperties )
548         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
549 {
550     int wordlen;
551     char *hyphens;
552     char *lcword;
553     int k;
554 
555     PropertyHelper_Hyphen & rHelper = GetPropHelper();
556     rHelper.SetTmpPropVals(aProperties);
557     sal_Int16 minTrail = rHelper.GetMinTrailing();
558     sal_Int16 minLead = rHelper.GetMinLeading();
559 
560     HyphenDict *dict = NULL;
561     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
562     CharClass* pCC = NULL;
563 
564     Reference< XPossibleHyphens > xRes;
565 
566     k = -1;
567     for (int j = 0; j < numdict; j++)
568     {
569         if (aLocale == aDicts[j].aLoc) k = j;
570     }
571 
572     // if we have a hyphenation dictionary matching this locale
573     if (k != -1)
574     {
575         // if this dictioanry has not been loaded yet do that
576         if (!aDicts[k].aPtr)
577         {
578             OUString DictFN = aDicts[k].aName + A2OU(".dic");
579             OUString dictpath;
580 
581             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
582             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
583 
584 #if defined(WNT)
585             // workaround for Windows specifc problem that the
586             // path length in calls to 'fopen' is limted to somewhat
587             // about 120+ characters which will usually be exceed when
588             // using dictionaries as extensions.
589             sTmp = Win_GetShortPathName( dictpath );
590 #endif
591 
592             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
593             {
594                fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
595                return NULL;
596             }
597             aDicts[k].aPtr = dict;
598             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
599         }
600 
601         // other wise hyphenate the word with that dictionary
602         dict = aDicts[k].aPtr;
603         eEnc = aDicts[k].eEnc;
604         pCC  = aDicts[k].apCC;
605 
606         // we don't want to work with a default text encoding since following incorrect
607         // results may occur only for specific text and thus may be hard to notice.
608         // Thus better always make a clean exit here if the text encoding is in question.
609         // Hopefully something not working at all will raise proper attention quickly. ;-)
610         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
611         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
612             return NULL;
613 
614         // first handle smart quotes both single and double
615         OUStringBuffer rBuf(aWord);
616         sal_Int32 nc = rBuf.getLength();
617         sal_Unicode ch;
618         for (sal_Int32 ix=0; ix < nc; ix++)
619         {
620             ch = rBuf.charAt(ix);
621             if ((ch == 0x201C) || (ch == 0x201D))
622                 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
623             if ((ch == 0x2018) || (ch == 0x2019))
624                 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
625         }
626         OUString nWord(rBuf.makeStringAndClear());
627 
628         // now convert word to all lowercase for pattern recognition
629         OUString nTerm(makeLowerCase(nWord, pCC));
630 
631         // now convert word to needed encoding
632         OString encWord(OU2ENC(nTerm,eEnc));
633 
634         wordlen = encWord.getLength();
635         lcword = new char[wordlen+1];
636         hyphens = new char[wordlen+5];
637         char ** rep = NULL; // replacements of discretionary hyphenation
638         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
639         int * cut = NULL; // length of deletions in original word
640 
641         // copy converted word into simple char buffer
642         strcpy(lcword,encWord.getStr());
643 
644         // first remove any trailing periods
645         int n = wordlen-1;
646         while((n >=0) && (lcword[n] == '.'))
647             n--;
648         n++;
649         // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr);
650         if (n > 0)
651         {
652             const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
653                     &rep, &pos, &cut, minLead, minTrail,
654                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
655                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
656             if (bFailed)
657             {
658                 delete[] hyphens;
659                 delete[] lcword;
660 
661                 if (rep)
662                 {
663                     for(int j = 0; j < n; j++)
664                     {
665                         if (rep[j]) free(rep[j]);
666                     }
667                     free(rep);
668                 }
669                 if (pos) free(pos);
670                 if (cut) free(cut);
671 
672                 return NULL;
673             }
674         }
675         // now backfill hyphens[] for any removed periods
676         for (int c = n; c < wordlen; c++)
677             hyphens[c] = '0';
678         hyphens[wordlen] = '\0';
679         // fprintf(stderr,"... %s\n",hyphens); fflush(stderr);
680 
681         sal_Int16 nHyphCount = 0;
682         sal_Int16 i;
683 
684         for ( i = 0; i < encWord.getLength(); i++)
685         {
686             if (hyphens[i]&1 && (!rep || !rep[i]))
687                 nHyphCount++;
688         }
689 
690         Sequence< sal_Int16 > aHyphPos(nHyphCount);
691         sal_Int16 *pPos = aHyphPos.getArray();
692         OUStringBuffer hyphenatedWordBuffer;
693         OUString hyphenatedWord;
694         nHyphCount = 0;
695 
696         for (i = 0; i < nWord.getLength(); i++)
697         {
698             hyphenatedWordBuffer.append(aWord[i]);
699             // hyphenation position (not alternative)
700             if (hyphens[i]&1 && (!rep || !rep[i]))
701             {
702                 pPos[nHyphCount] = i;
703                 hyphenatedWordBuffer.append(sal_Unicode('='));
704                 nHyphCount++;
705             }
706         }
707 
708         hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
709         //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord));
710         //fflush(stderr);
711 
712         xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ),
713                   hyphenatedWord, aHyphPos );
714 
715         delete[] hyphens;
716         delete[] lcword;
717 
718         if (rep)
719         {
720             for(int j = 0; j < n; j++)
721             {
722                 if (rep[j]) free(rep[j]);
723             }
724             free(rep);
725         }
726         if (pos) free(pos);
727         if (cut) free(cut);
728 
729         return xRes;
730     }
731 
732     return NULL;
733 }
734 
capitalType(const OUString & aTerm,CharClass * pCC)735 sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC)
736 {
737     sal_Int32 tlen = aTerm.getLength();
738     if ((pCC) && (tlen))
739     {
740         String aStr(aTerm);
741         sal_Int32 nc = 0;
742         for (xub_StrLen tindex = 0; tindex < tlen;  tindex++)
743         {
744             if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER)
745                 nc++;
746         }
747 
748         if (nc == 0)
749             return (sal_uInt16) CAPTYPE_NOCAP;
750         if (nc == tlen)
751             return (sal_uInt16) CAPTYPE_ALLCAP;
752         if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER))
753             return (sal_uInt16) CAPTYPE_INITCAP;
754 
755         return (sal_uInt16) CAPTYPE_MIXED;
756     }
757     return (sal_uInt16) CAPTYPE_UNKNOWN;
758 }
759 
makeLowerCase(const OUString & aTerm,CharClass * pCC)760 OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
761 {
762     if (pCC)
763         return pCC->toLower_rtl(aTerm, 0, aTerm.getLength());
764     return aTerm;
765 }
766 
makeUpperCase(const OUString & aTerm,CharClass * pCC)767 OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
768 {
769     if (pCC)
770         return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
771     return aTerm;
772 }
773 
774 
makeInitCap(const OUString & aTerm,CharClass * pCC)775 OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
776 {
777     sal_Int32 tlen = aTerm.getLength();
778     if ((pCC) && (tlen))
779     {
780         OUString bTemp = aTerm.copy(0,1);
781         if (tlen > 1)
782             return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) );
783 
784         return pCC->toUpper_rtl(bTemp, 0, 1);
785 	}
786     return aTerm;
787 }
788 
789 
Hyphenator_CreateInstance(const Reference<XMultiServiceFactory> &)790 Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
791         const Reference< XMultiServiceFactory > & /*rSMgr*/ )
792         throw(Exception)
793 {
794 	Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
795 	return xService;
796 }
797 
798 
addLinguServiceEventListener(const Reference<XLinguServiceEventListener> & rxLstnr)799 sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
800         const Reference< XLinguServiceEventListener >& rxLstnr )
801         throw(RuntimeException)
802 {
803 	MutexGuard	aGuard( GetLinguMutex() );
804 
805 	sal_Bool bRes = sal_False;
806 	if (!bDisposing && rxLstnr.is())
807 	{
808 		bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
809 	}
810 	return bRes;
811 }
812 
813 
removeLinguServiceEventListener(const Reference<XLinguServiceEventListener> & rxLstnr)814 sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
815         const Reference< XLinguServiceEventListener >& rxLstnr )
816         throw(RuntimeException)
817 {
818 	MutexGuard	aGuard( GetLinguMutex() );
819 
820 	sal_Bool bRes = sal_False;
821 	if (!bDisposing && rxLstnr.is())
822 	{
823 		DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" );
824 		bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
825 	}
826 	return bRes;
827 }
828 
829 
getServiceDisplayName(const Locale &)830 OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
831         throw(RuntimeException)
832 {
833 	MutexGuard	aGuard( GetLinguMutex() );
834 	return A2OU( "Libhyphen Hyphenator" );
835 }
836 
837 
initialize(const Sequence<Any> & rArguments)838 void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
839 		throw(Exception, RuntimeException)
840 {
841 	MutexGuard	aGuard( GetLinguMutex() );
842 
843 	if (!pPropHelper)
844 	{
845 		sal_Int32 nLen = rArguments.getLength();
846 		if (2 == nLen)
847 		{
848 			Reference< XPropertySet	>	xPropSet;
849 			rArguments.getConstArray()[0] >>= xPropSet;
850 			//rArguments.getConstArray()[1] >>= xDicList;
851 
852 			//! Pointer allows for access of the non-UNO functions.
853 			//! And the reference to the UNO-functions while increasing
854 			//! the ref-count and will implicitly free the memory
855 			//! when the object is not longer used.
856 			pPropHelper = new PropertyHelper_Hyphen( (XHyphenator *) this, xPropSet );
857 			xPropHelper = pPropHelper;
858 			pPropHelper->AddAsPropListener();	//! after a reference is established
859 		}
860         else
861         {
862 			DBG_ERROR( "wrong number of arguments in sequence" );
863         }
864 	}
865 }
866 
867 
dispose()868 void SAL_CALL Hyphenator::dispose()
869 		throw(RuntimeException)
870 {
871 	MutexGuard	aGuard( GetLinguMutex() );
872 
873 	if (!bDisposing)
874 	{
875 		bDisposing = sal_True;
876 		EventObject	aEvtObj( (XHyphenator *) this );
877 		aEvtListeners.disposeAndClear( aEvtObj );
878 	}
879 }
880 
881 
addEventListener(const Reference<XEventListener> & rxListener)882 void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
883 		throw(RuntimeException)
884 {
885 	MutexGuard	aGuard( GetLinguMutex() );
886 
887 	if (!bDisposing && rxListener.is())
888 		aEvtListeners.addInterface( rxListener );
889 }
890 
891 
removeEventListener(const Reference<XEventListener> & rxListener)892 void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
893 		throw(RuntimeException)
894 {
895 	MutexGuard	aGuard( GetLinguMutex() );
896 
897 	if (!bDisposing && rxListener.is())
898 		aEvtListeners.removeInterface( rxListener );
899 }
900 
901 
902 ///////////////////////////////////////////////////////////////////////////
903 // Service specific part
904 //
905 
getImplementationName()906 OUString SAL_CALL Hyphenator::getImplementationName()
907 		throw(RuntimeException)
908 {
909 	MutexGuard	aGuard( GetLinguMutex() );
910 
911 	return getImplementationName_Static();
912 }
913 
914 
supportsService(const OUString & ServiceName)915 sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
916 		throw(RuntimeException)
917 {
918 	MutexGuard	aGuard( GetLinguMutex() );
919 
920 	Sequence< OUString > aSNL = getSupportedServiceNames();
921 	const OUString * pArray = aSNL.getConstArray();
922 	for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
923 		if( pArray[i] == ServiceName )
924 			return sal_True;
925 	return sal_False;
926 }
927 
928 
getSupportedServiceNames()929 Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
930 		throw(RuntimeException)
931 {
932 	MutexGuard	aGuard( GetLinguMutex() );
933 
934 	return getSupportedServiceNames_Static();
935 }
936 
937 
getSupportedServiceNames_Static()938 Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
939 		throw()
940 {
941 	MutexGuard	aGuard( GetLinguMutex() );
942 
943 	Sequence< OUString > aSNS( 1 );	// auch mehr als 1 Service moeglich
944 	aSNS.getArray()[0] = A2OU( SN_HYPHENATOR );
945 	return aSNS;
946 }
947 
Hyphenator_getFactory(const sal_Char * pImplName,XMultiServiceFactory * pServiceManager,void *)948 void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
949 			XMultiServiceFactory * pServiceManager, void *  )
950 {
951 	void * pRet = 0;
952 	if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) )
953 	{
954 		Reference< XSingleServiceFactory > xFactory =
955 			cppu::createOneInstanceFactory(
956 				pServiceManager,
957 				Hyphenator::getImplementationName_Static(),
958 				Hyphenator_CreateInstance,
959 				Hyphenator::getSupportedServiceNames_Static());
960 		// acquire, because we return an interface pointer instead of a reference
961 		xFactory->acquire();
962 		pRet = xFactory.get();
963 	}
964 	return pRet;
965 }
966 
967 
968 ///////////////////////////////////////////////////////////////////////////
969 
970 #undef CAPTYPE_UNKNOWN
971 #undef CAPTYPE_NOCAP
972 #undef CAPTYPE_INITCAP
973 #undef CAPTYPE_ALLCAP
974 #undef CAPTYPE_MIXED
975