1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_guesslang.hxx"
26 
27 #include <iostream>
28 
29 #include <tools/debug.hxx>
30 
31 #include <sal/config.h>
32 #include <cppuhelper/factory.hxx>
33 #include <cppuhelper/implementationentry.hxx>
34 #include <cppuhelper/implbase2.hxx>
35 #include <tools/string.hxx>
36 
37 #include <simpleguesser.hxx>
38 #include <guess.hxx>
39 
40 //#include <cppuhelper/queryinterface.hxx> // helper for queryInterface() impl
41 
42 //#include <com/sun/star/lang/XMultiServiceFactory.hpp>
43 #include <com/sun/star/registry/XRegistryKey.hpp>
44 #include <com/sun/star/lang/XServiceInfo.hpp>
45 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
46 #include <unotools/pathoptions.hxx>
47 #include <unotools/localfilehelper.hxx>
48 #include <osl/thread.h>
49 
50 using namespace ::rtl;
51 using namespace ::osl;
52 using namespace ::cppu;
53 using namespace ::com::sun::star;
54 using namespace ::com::sun::star::uno;
55 using namespace ::com::sun::star::lang;
56 using namespace ::com::sun::star::linguistic2;
57 
58 namespace css = ::com::sun::star;
59 
60 //==================================================================================================
61 
62 #define A2OU(x) ::rtl::OUString::createFromAscii( x )
63 
64 #define SERVICENAME     "com.sun.star.linguistic2.LanguageGuessing"
65 
66 #define IMPLNAME        "com.sun.star.lingu2.LanguageGuessing"
67 
getSupportedServiceNames_LangGuess_Impl()68 static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
69 {
70     Sequence<OUString> names(1);
71     names[0] = A2OU( SERVICENAME );
72     return names;
73 }
74 
getImplementationName_LangGuess_Impl()75 static OUString getImplementationName_LangGuess_Impl()
76 {
77     return A2OU( IMPLNAME );
78 }
79 
GetLangGuessMutex()80 static osl::Mutex &  GetLangGuessMutex()
81 {
82     static osl::Mutex aMutex;
83     return aMutex;
84 }
85 
86 
87 class LangGuess_Impl :
88     public ::cppu::WeakImplHelper2<
89         XLanguageGuessing,
90         XServiceInfo >
91 {
92     SimpleGuesser   m_aGuesser;
93     bool            m_bInitialized;
94     css::uno::Reference< css::uno::XComponentContext >  m_xContext;
95 
96     LangGuess_Impl( const LangGuess_Impl & ); // not defined
97     LangGuess_Impl & operator =( const LangGuess_Impl & ); // not defined
98 
~LangGuess_Impl()99     virtual ~LangGuess_Impl() {}
100     void    EnsureInitialized();
101 
102 public:
103     explicit LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext);
104 
105     // XServiceInfo	implementation
106     virtual OUString SAL_CALL getImplementationName(  ) throw(RuntimeException);
107     virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) throw(RuntimeException);
108     virtual Sequence< OUString > SAL_CALL getSupportedServiceNames(  ) throw(RuntimeException);
109     static Sequence< OUString > SAL_CALL getSupportedServiceNames_Static(  );
110 
111     // XLanguageGuessing implementation
112     virtual ::com::sun::star::lang::Locale SAL_CALL guessPrimaryLanguage( const ::rtl::OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
113     virtual void SAL_CALL disableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
114     virtual void SAL_CALL enableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
115     virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getAvailableLanguages(  ) throw (::com::sun::star::uno::RuntimeException);
116     virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getEnabledLanguages(  ) throw (::com::sun::star::uno::RuntimeException);
117     virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getDisabledLanguages(  ) throw (::com::sun::star::uno::RuntimeException);
118 
119     // implementation specific
120     void SetFingerPrintsDB( const rtl::OUString &fileName ) throw (RuntimeException);
121 
122     static const OUString & SAL_CALL getImplementationName_Static() throw();
123 
124 };
125 
126 //*************************************************************************
127 
LangGuess_Impl(css::uno::Reference<css::uno::XComponentContext> const & rxContext)128 LangGuess_Impl::LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext) :
129     m_bInitialized( false ),
130     m_xContext( rxContext )
131 {
132 }
133 
134 //*************************************************************************
135 
EnsureInitialized()136 void LangGuess_Impl::EnsureInitialized()
137 {
138     if (!m_bInitialized)
139     {
140         // set this to true at the very start to prevent loops because of
141         // implicitly called functions below
142         m_bInitialized = true;
143 
144         // set default fingerprint path to where those get installed
145         String aPhysPath;
146         String aURL( SvtPathOptions().GetFingerprintPath() );
147         utl::LocalFileHelper::ConvertURLToPhysicalName( aURL, aPhysPath );
148 #ifdef WNT
149             aPhysPath += '\\';
150 #else
151             aPhysPath += '/';
152 #endif
153 
154         SetFingerPrintsDB( aPhysPath );
155 
156         //
157         // disable currently not functional languages...
158         //
159         struct LangCountry
160         {
161             const char *pLang;
162             const char *pCountry;
163         };
164         LangCountry aDisable[] =
165         {
166             {"gv", ""}, {"sco", ""},                            // no lang-id available yet...
167 //            {"hy", ""}, {"drt", ""},                          // 0 bytes fingerprints...
168             {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, // not yet correct functional...
169             {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""},
170             {"sa", ""}, {"ta", ""}, {"th", ""},
171             {"qu", ""}, {"yi", ""}
172         };
173         sal_Int32 nNum = sizeof(aDisable) / sizeof(aDisable[0]);
174         Sequence< Locale > aDisableSeq( nNum );
175         Locale *pDisableSeq = aDisableSeq.getArray();
176         for (sal_Int32 i = 0;  i < nNum;  ++i)
177         {
178             Locale aLocale;
179             aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
180             aLocale.Country  = OUString::createFromAscii( aDisable[i].pCountry );
181             pDisableSeq[i] = aLocale;
182         }
183         disableLanguages( aDisableSeq );
184         DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
185     }
186 }
187 
188 //*************************************************************************
189 
190 /* TL: currently not part of the API
191 Sequence< com::sun::star::lang::Locale > SAL_CALL LangGuess_Impl::guessLanguages(
192         const rtl::OUString   &rText,
193         sal_Int32       nStartPos,
194         sal_Int32       nLen )
195     throw (RuntimeException)
196 {
197     Sequence< com::sun::star::lang::Locale > aRes;
198 
199     OString o = OUStringToOString( rText, RTL_TEXTENCODING_UTF8 );
200     vector<Guess> gs = m_aGuesser.GuessLanguage(o.pData->buffer);
201 
202     aRes.realloc(gs.size());
203 
204     com::sun::star::lang::Locale *pRes = aRes.getArray();
205 
206 #ifdef DEBUG
207     std::cout << " We have " << gs.size() << " candidates" << std::endl;
208 #endif
209 
210     for(int i = 0; i < gs.size() ; i++ ){
211         com::sun::star::lang::Locale current_aRes;
212 
213         current_aRes.Language   = A2OU( gs[i].getLanguage().c_str() );
214         current_aRes.Country    = A2OU( gs[i].getCountry().c_str() );
215 
216         pRes[i] = current_aRes;
217     }
218 
219     return aRes;
220 }
221 */
222 //*************************************************************************
223 
guessPrimaryLanguage(const::rtl::OUString & rText,::sal_Int32 nStartPos,::sal_Int32 nLen)224 Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
225         const ::rtl::OUString& rText,
226         ::sal_Int32 nStartPos,
227         ::sal_Int32 nLen )
228     throw (lang::IllegalArgumentException, uno::RuntimeException)
229 {
230     osl::MutexGuard aGuard( GetLangGuessMutex() );
231 
232     EnsureInitialized();
233 
234 	lang::Locale aRes;
235 	if (nStartPos >=0 && nLen >= 0 && nStartPos + nLen <= rText.getLength())
236 	{
237 		OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
238         Guess g = m_aGuesser.GuessPrimaryLanguage((char*)o.getStr());
239 		aRes.Language   = OUString::createFromAscii(g.GetLanguage().c_str());
240 		aRes.Country    = OUString::createFromAscii(g.GetCountry().c_str());
241 	}
242 	else
243 		throw lang::IllegalArgumentException();
244 
245 	return aRes;
246 }
247 
248 //*************************************************************************
249 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
250 
SetFingerPrintsDB(const rtl::OUString & filePath)251 void LangGuess_Impl::SetFingerPrintsDB(
252         const rtl::OUString &filePath )
253     throw (RuntimeException)
254 {
255 	//! text encoding for file name / path needs to be in the same encoding the OS uses
256     OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
257     OString conf_file_name( DEFAULT_CONF_FILE_NAME );
258     OString conf_file_path(path);
259     conf_file_path += conf_file_name;
260 
261     //cout << "Conf file : " << conf_file_path.getStr() << " directory : " << path.getStr() << endl;
262 
263     m_aGuesser.SetDBPath((const char*)conf_file_path.getStr(), (const char*)path.getStr());
264 }
265 
266 //*************************************************************************
getAvailableLanguages()267 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages(  )
268         throw (uno::RuntimeException)
269 {
270     osl::MutexGuard aGuard( GetLangGuessMutex() );
271 
272     EnsureInitialized();
273 
274     Sequence< com::sun::star::lang::Locale > aRes;
275     vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
276     aRes.realloc(gs.size());
277 
278     com::sun::star::lang::Locale *pRes = aRes.getArray();
279 
280     for(size_t i = 0; i < gs.size() ; i++ ){
281         com::sun::star::lang::Locale current_aRes;
282         current_aRes.Language   = A2OU( gs[i].GetLanguage().c_str() );
283         current_aRes.Country    = A2OU( gs[i].GetCountry().c_str() );
284         pRes[i] = current_aRes;
285     }
286 
287     return aRes;
288 }
289 
290 //*************************************************************************
getEnabledLanguages()291 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages(  )
292         throw (uno::RuntimeException)
293 {
294     osl::MutexGuard aGuard( GetLangGuessMutex() );
295 
296     EnsureInitialized();
297 
298     Sequence< com::sun::star::lang::Locale > aRes;
299     vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
300     aRes.realloc(gs.size());
301 
302     com::sun::star::lang::Locale *pRes = aRes.getArray();
303 
304     for(size_t i = 0; i < gs.size() ; i++ ){
305         com::sun::star::lang::Locale current_aRes;
306         current_aRes.Language   = A2OU( gs[i].GetLanguage().c_str() );
307         current_aRes.Country    = A2OU( gs[i].GetCountry().c_str() );
308         pRes[i] = current_aRes;
309     }
310 
311     return aRes;
312 }
313 
314 //*************************************************************************
getDisabledLanguages()315 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages(  )
316         throw (uno::RuntimeException)
317 {
318     osl::MutexGuard aGuard( GetLangGuessMutex() );
319 
320     EnsureInitialized();
321 
322     Sequence< com::sun::star::lang::Locale > aRes;
323     vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
324     aRes.realloc(gs.size());
325 
326     com::sun::star::lang::Locale *pRes = aRes.getArray();
327 
328     for(size_t i = 0; i < gs.size() ; i++ ){
329         com::sun::star::lang::Locale current_aRes;
330         current_aRes.Language   = A2OU( gs[i].GetLanguage().c_str() );
331         current_aRes.Country    = A2OU( gs[i].GetCountry().c_str() );
332         pRes[i] = current_aRes;
333     }
334 
335     return aRes;
336 }
337 
338 //*************************************************************************
disableLanguages(const uno::Sequence<Locale> & rLanguages)339 void SAL_CALL LangGuess_Impl::disableLanguages(
340         const uno::Sequence< Locale >& rLanguages )
341     throw (lang::IllegalArgumentException, uno::RuntimeException)
342 {
343     osl::MutexGuard aGuard( GetLangGuessMutex() );
344 
345     EnsureInitialized();
346 
347     sal_Int32 nLanguages = rLanguages.getLength();
348     const Locale *pLanguages = rLanguages.getConstArray();
349 
350     for (sal_Int32 i = 0;  i < nLanguages;  ++i)
351     {
352         string language;
353 
354         OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
355         OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
356 
357         language += l.getStr();
358         language += "-";
359         language += c.getStr();
360         m_aGuesser.DisableLanguage(language);
361     }
362 }
363 
364 //*************************************************************************
enableLanguages(const uno::Sequence<Locale> & rLanguages)365 void SAL_CALL LangGuess_Impl::enableLanguages(
366         const uno::Sequence< Locale >& rLanguages )
367     throw (lang::IllegalArgumentException, uno::RuntimeException)
368 {
369     osl::MutexGuard aGuard( GetLangGuessMutex() );
370 
371     EnsureInitialized();
372 
373     sal_Int32 nLanguages = rLanguages.getLength();
374     const Locale *pLanguages = rLanguages.getConstArray();
375 
376     for (sal_Int32 i = 0;  i < nLanguages;  ++i)
377     {
378         string language;
379 
380         OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
381         OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
382 
383         language += l.getStr();
384         language += "-";
385         language += c.getStr();
386         m_aGuesser.EnableLanguage(language);
387     }
388 }
389 
390 //*************************************************************************
getImplementationName()391 OUString SAL_CALL LangGuess_Impl::getImplementationName(  )
392 	throw(RuntimeException)
393 {
394     osl::MutexGuard aGuard( GetLangGuessMutex() );
395     return A2OU( IMPLNAME );
396 }
397 
398 //*************************************************************************
supportsService(const OUString & ServiceName)399 sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
400 	throw(RuntimeException)
401 {
402     osl::MutexGuard aGuard( GetLangGuessMutex() );
403 	Sequence< OUString > aSNL = getSupportedServiceNames();
404 	const OUString * pArray = aSNL.getArray();
405 	for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
406 		if( pArray[i] == ServiceName )
407 			return sal_True;
408 	return sal_False;
409 }
410 
411 //*************************************************************************
getSupportedServiceNames()412 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames(  )
413 	throw(RuntimeException)
414 {
415     osl::MutexGuard aGuard( GetLangGuessMutex() );
416 	return getSupportedServiceNames_Static();
417 }
418 
419 //*************************************************************************
getSupportedServiceNames_Static()420 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames_Static(  )
421 {
422     OUString aName( A2OU( SERVICENAME ) );
423 	return Sequence< OUString >( &aName, 1 );
424 }
425 
426 //*************************************************************************
427 
428 
429 /**
430  * Function to create a new component instance; is needed by factory helper implementation.
431  * @param xMgr service manager to if the components needs other component instances
432  */
LangGuess_Impl_create(Reference<XComponentContext> const & xContext)433 Reference< XInterface > SAL_CALL LangGuess_Impl_create(
434     Reference< XComponentContext > const & xContext )
435     SAL_THROW( () )
436 {
437     return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl(xContext) );
438 }
439 
440 //##################################################################################################
441 //#### EXPORTED ### functions to allow for registration and creation of the UNO component
442 //##################################################################################################
443 
444 static struct ::cppu::ImplementationEntry s_component_entries [] =
445 {
446     {
447         LangGuess_Impl_create, getImplementationName_LangGuess_Impl,
448         getSupportedServiceNames_LangGuess_Impl,
449         ::cppu::createSingleComponentFactory,
450         0, 0
451     },
452     { 0, 0, 0, 0, 0, 0 }
453 };
454 
455 extern "C"
456 {
457 
component_getImplementationEnvironment(sal_Char const ** ppEnvTypeName,uno_Environment **)458 SAL_DLLPUBLIC_EXPORT void SAL_CALL component_getImplementationEnvironment(
459     sal_Char const ** ppEnvTypeName, uno_Environment ** /*ppEnv*/ )
460 {
461     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
462 }
463 
component_getFactory(sal_Char const * implName,lang::XMultiServiceFactory * xMgr,registry::XRegistryKey * xRegistry)464 SAL_DLLPUBLIC_EXPORT void * SAL_CALL component_getFactory(
465     sal_Char const * implName, lang::XMultiServiceFactory * xMgr,
466     registry::XRegistryKey * xRegistry )
467 {
468     return ::cppu::component_getFactoryHelper(
469         implName, xMgr, xRegistry, s_component_entries );
470 }
471 
472 }
473 
474