1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_lingucomponent.hxx"
30 
31 #include <iostream>
32 
33 #include <tools/debug.hxx>
34 
35 #include <sal/config.h>
36 #include <cppuhelper/factory.hxx>
37 #include <cppuhelper/implementationentry.hxx>
38 #include <cppuhelper/implbase2.hxx>
39 #include <tools/string.hxx>
40 
41 #include <simpleguesser.hxx>
42 #include <guess.hxx>
43 
44 //#include <cppuhelper/queryinterface.hxx> // helper for queryInterface() impl
45 
46 //#include <com/sun/star/lang/XMultiServiceFactory.hpp>
47 #include <com/sun/star/registry/XRegistryKey.hpp>
48 #include <com/sun/star/lang/XServiceInfo.hpp>
49 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
50 #include <unotools/pathoptions.hxx>
51 #include <unotools/localfilehelper.hxx>
52 #include <osl/thread.h>
53 
54 using namespace ::rtl;
55 using namespace ::osl;
56 using namespace ::cppu;
57 using namespace ::com::sun::star;
58 using namespace ::com::sun::star::uno;
59 using namespace ::com::sun::star::lang;
60 using namespace ::com::sun::star::linguistic2;
61 
62 namespace css = ::com::sun::star;
63 
64 //==================================================================================================
65 
66 #define A2OU(x) ::rtl::OUString::createFromAscii( x )
67 
68 #define SERVICENAME     "com.sun.star.linguistic2.LanguageGuessing"
69 
70 #define IMPLNAME        "com.sun.star.lingu2.LanguageGuessing"
71 
72 static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
73 {
74     Sequence<OUString> names(1);
75     names[0] = A2OU( SERVICENAME );
76     return names;
77 }
78 
79 static OUString getImplementationName_LangGuess_Impl()
80 {
81     return A2OU( IMPLNAME );
82 }
83 
84 static osl::Mutex &  GetLangGuessMutex()
85 {
86     static osl::Mutex aMutex;
87     return aMutex;
88 }
89 
90 
91 class LangGuess_Impl :
92     public ::cppu::WeakImplHelper2<
93         XLanguageGuessing,
94         XServiceInfo >
95 {
96     SimpleGuesser   m_aGuesser;
97     bool            m_bInitialized;
98     css::uno::Reference< css::uno::XComponentContext >  m_xContext;
99 
100     LangGuess_Impl( const LangGuess_Impl & ); // not defined
101     LangGuess_Impl & operator =( const LangGuess_Impl & ); // not defined
102 
103     virtual ~LangGuess_Impl() {}
104     void    EnsureInitialized();
105 
106 public:
107     explicit LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext);
108 
109     // XServiceInfo	implementation
110     virtual OUString SAL_CALL getImplementationName(  ) throw(RuntimeException);
111     virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) throw(RuntimeException);
112     virtual Sequence< OUString > SAL_CALL getSupportedServiceNames(  ) throw(RuntimeException);
113     static Sequence< OUString > SAL_CALL getSupportedServiceNames_Static(  );
114 
115     // XLanguageGuessing implementation
116     virtual ::com::sun::star::lang::Locale SAL_CALL guessPrimaryLanguage( const ::rtl::OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
117     virtual void SAL_CALL disableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
118     virtual void SAL_CALL enableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
119     virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getAvailableLanguages(  ) throw (::com::sun::star::uno::RuntimeException);
120     virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getEnabledLanguages(  ) throw (::com::sun::star::uno::RuntimeException);
121     virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getDisabledLanguages(  ) throw (::com::sun::star::uno::RuntimeException);
122 
123     // implementation specific
124     void SetFingerPrintsDB( const rtl::OUString &fileName ) throw (RuntimeException);
125 
126     static const OUString & SAL_CALL getImplementationName_Static() throw();
127 
128 };
129 
130 //*************************************************************************
131 
132 LangGuess_Impl::LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext) :
133     m_bInitialized( false ),
134     m_xContext( rxContext )
135 {
136 }
137 
138 //*************************************************************************
139 
140 void LangGuess_Impl::EnsureInitialized()
141 {
142     if (!m_bInitialized)
143     {
144         // set this to true at the very start to prevent loops because of
145         // implicitly called functions below
146         m_bInitialized = true;
147 
148         // set default fingerprint path to where those get installed
149         String aPhysPath;
150         String aURL( SvtPathOptions().GetFingerprintPath() );
151         utl::LocalFileHelper::ConvertURLToPhysicalName( aURL, aPhysPath );
152 #ifdef WNT
153             aPhysPath += '\\';
154 #else
155             aPhysPath += '/';
156 #endif
157 
158         SetFingerPrintsDB( aPhysPath );
159 
160         //
161         // disable currently not functional languages...
162         //
163         struct LangCountry
164         {
165             const char *pLang;
166             const char *pCountry;
167         };
168         LangCountry aDisable[] =
169         {
170             {"gv", ""}, {"sco", ""},                            // no lang-id available yet...
171 //            {"hy", ""}, {"drt", ""},                          // 0 bytes fingerprints...
172             {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, // not yet correct functional...
173             {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""},
174             {"sa", ""}, {"ta", ""}, {"th", ""},
175             {"qu", ""}, {"yi", ""}
176         };
177         sal_Int32 nNum = sizeof(aDisable) / sizeof(aDisable[0]);
178         Sequence< Locale > aDisableSeq( nNum );
179         Locale *pDisableSeq = aDisableSeq.getArray();
180         for (sal_Int32 i = 0;  i < nNum;  ++i)
181         {
182             Locale aLocale;
183             aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
184             aLocale.Country  = OUString::createFromAscii( aDisable[i].pCountry );
185             pDisableSeq[i] = aLocale;
186         }
187         disableLanguages( aDisableSeq );
188         DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
189     }
190 }
191 
192 //*************************************************************************
193 
194 /* TL: currently not part of the API
195 Sequence< com::sun::star::lang::Locale > SAL_CALL LangGuess_Impl::guessLanguages(
196         const rtl::OUString   &rText,
197         sal_Int32       nStartPos,
198         sal_Int32       nLen )
199     throw (RuntimeException)
200 {
201     Sequence< com::sun::star::lang::Locale > aRes;
202 
203     OString o = OUStringToOString( rText, RTL_TEXTENCODING_UTF8 );
204     vector<Guess> gs = m_aGuesser.GuessLanguage(o.pData->buffer);
205 
206     aRes.realloc(gs.size());
207 
208     com::sun::star::lang::Locale *pRes = aRes.getArray();
209 
210 #ifdef DEBUG
211     std::cout << " We have " << gs.size() << " candidates" << std::endl;
212 #endif
213 
214     for(int i = 0; i < gs.size() ; i++ ){
215         com::sun::star::lang::Locale current_aRes;
216 
217         current_aRes.Language   = A2OU( gs[i].getLanguage().c_str() );
218         current_aRes.Country    = A2OU( gs[i].getCountry().c_str() );
219 
220         pRes[i] = current_aRes;
221     }
222 
223     return aRes;
224 }
225 */
226 //*************************************************************************
227 
228 Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
229         const ::rtl::OUString& rText,
230         ::sal_Int32 nStartPos,
231         ::sal_Int32 nLen )
232     throw (lang::IllegalArgumentException, uno::RuntimeException)
233 {
234     osl::MutexGuard aGuard( GetLangGuessMutex() );
235 
236     EnsureInitialized();
237 
238 	lang::Locale aRes;
239 	if (nStartPos >=0 && nLen >= 0 && nStartPos + nLen <= rText.getLength())
240 	{
241 		OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
242         Guess g = m_aGuesser.GuessPrimaryLanguage((char*)o.getStr());
243 		aRes.Language   = OUString::createFromAscii(g.GetLanguage().c_str());
244 		aRes.Country    = OUString::createFromAscii(g.GetCountry().c_str());
245 	}
246 	else
247 		throw lang::IllegalArgumentException();
248 
249 	return aRes;
250 }
251 
252 //*************************************************************************
253 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
254 
255 void LangGuess_Impl::SetFingerPrintsDB(
256         const rtl::OUString &filePath )
257     throw (RuntimeException)
258 {
259 	//! text encoding for file name / path needs to be in the same encoding the OS uses
260     OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
261     OString conf_file_name( DEFAULT_CONF_FILE_NAME );
262     OString conf_file_path(path);
263     conf_file_path += conf_file_name;
264 
265     //cout << "Conf file : " << conf_file_path.getStr() << " directory : " << path.getStr() << endl;
266 
267     m_aGuesser.SetDBPath((const char*)conf_file_path.getStr(), (const char*)path.getStr());
268 }
269 
270 //*************************************************************************
271 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages(  )
272         throw (uno::RuntimeException)
273 {
274     osl::MutexGuard aGuard( GetLangGuessMutex() );
275 
276     EnsureInitialized();
277 
278     Sequence< com::sun::star::lang::Locale > aRes;
279     vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
280     aRes.realloc(gs.size());
281 
282     com::sun::star::lang::Locale *pRes = aRes.getArray();
283 
284     for(size_t i = 0; i < gs.size() ; i++ ){
285         com::sun::star::lang::Locale current_aRes;
286         current_aRes.Language   = A2OU( gs[i].GetLanguage().c_str() );
287         current_aRes.Country    = A2OU( gs[i].GetCountry().c_str() );
288         pRes[i] = current_aRes;
289     }
290 
291     return aRes;
292 }
293 
294 //*************************************************************************
295 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages(  )
296         throw (uno::RuntimeException)
297 {
298     osl::MutexGuard aGuard( GetLangGuessMutex() );
299 
300     EnsureInitialized();
301 
302     Sequence< com::sun::star::lang::Locale > aRes;
303     vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
304     aRes.realloc(gs.size());
305 
306     com::sun::star::lang::Locale *pRes = aRes.getArray();
307 
308     for(size_t i = 0; i < gs.size() ; i++ ){
309         com::sun::star::lang::Locale current_aRes;
310         current_aRes.Language   = A2OU( gs[i].GetLanguage().c_str() );
311         current_aRes.Country    = A2OU( gs[i].GetCountry().c_str() );
312         pRes[i] = current_aRes;
313     }
314 
315     return aRes;
316 }
317 
318 //*************************************************************************
319 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages(  )
320         throw (uno::RuntimeException)
321 {
322     osl::MutexGuard aGuard( GetLangGuessMutex() );
323 
324     EnsureInitialized();
325 
326     Sequence< com::sun::star::lang::Locale > aRes;
327     vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
328     aRes.realloc(gs.size());
329 
330     com::sun::star::lang::Locale *pRes = aRes.getArray();
331 
332     for(size_t i = 0; i < gs.size() ; i++ ){
333         com::sun::star::lang::Locale current_aRes;
334         current_aRes.Language   = A2OU( gs[i].GetLanguage().c_str() );
335         current_aRes.Country    = A2OU( gs[i].GetCountry().c_str() );
336         pRes[i] = current_aRes;
337     }
338 
339     return aRes;
340 }
341 
342 //*************************************************************************
343 void SAL_CALL LangGuess_Impl::disableLanguages(
344         const uno::Sequence< Locale >& rLanguages )
345     throw (lang::IllegalArgumentException, uno::RuntimeException)
346 {
347     osl::MutexGuard aGuard( GetLangGuessMutex() );
348 
349     EnsureInitialized();
350 
351     sal_Int32 nLanguages = rLanguages.getLength();
352     const Locale *pLanguages = rLanguages.getConstArray();
353 
354     for (sal_Int32 i = 0;  i < nLanguages;  ++i)
355     {
356         string language;
357 
358         OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
359         OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
360 
361         language += l.getStr();
362         language += "-";
363         language += c.getStr();
364         m_aGuesser.DisableLanguage(language);
365     }
366 }
367 
368 //*************************************************************************
369 void SAL_CALL LangGuess_Impl::enableLanguages(
370         const uno::Sequence< Locale >& rLanguages )
371     throw (lang::IllegalArgumentException, uno::RuntimeException)
372 {
373     osl::MutexGuard aGuard( GetLangGuessMutex() );
374 
375     EnsureInitialized();
376 
377     sal_Int32 nLanguages = rLanguages.getLength();
378     const Locale *pLanguages = rLanguages.getConstArray();
379 
380     for (sal_Int32 i = 0;  i < nLanguages;  ++i)
381     {
382         string language;
383 
384         OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
385         OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
386 
387         language += l.getStr();
388         language += "-";
389         language += c.getStr();
390         m_aGuesser.EnableLanguage(language);
391     }
392 }
393 
394 //*************************************************************************
395 OUString SAL_CALL LangGuess_Impl::getImplementationName(  )
396 	throw(RuntimeException)
397 {
398     osl::MutexGuard aGuard( GetLangGuessMutex() );
399     return A2OU( IMPLNAME );
400 }
401 
402 //*************************************************************************
403 sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
404 	throw(RuntimeException)
405 {
406     osl::MutexGuard aGuard( GetLangGuessMutex() );
407 	Sequence< OUString > aSNL = getSupportedServiceNames();
408 	const OUString * pArray = aSNL.getArray();
409 	for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
410 		if( pArray[i] == ServiceName )
411 			return sal_True;
412 	return sal_False;
413 }
414 
415 //*************************************************************************
416 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames(  )
417 	throw(RuntimeException)
418 {
419     osl::MutexGuard aGuard( GetLangGuessMutex() );
420 	return getSupportedServiceNames_Static();
421 }
422 
423 //*************************************************************************
424 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames_Static(  )
425 {
426     OUString aName( A2OU( SERVICENAME ) );
427 	return Sequence< OUString >( &aName, 1 );
428 }
429 
430 //*************************************************************************
431 
432 
433 /**
434  * Function to create a new component instance; is needed by factory helper implementation.
435  * @param xMgr service manager to if the components needs other component instances
436  */
437 Reference< XInterface > SAL_CALL LangGuess_Impl_create(
438     Reference< XComponentContext > const & xContext )
439     SAL_THROW( () )
440 {
441     return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl(xContext) );
442 }
443 
444 //##################################################################################################
445 //#### EXPORTED ### functions to allow for registration and creation of the UNO component
446 //##################################################################################################
447 
448 static struct ::cppu::ImplementationEntry s_component_entries [] =
449 {
450     {
451         LangGuess_Impl_create, getImplementationName_LangGuess_Impl,
452         getSupportedServiceNames_LangGuess_Impl,
453         ::cppu::createSingleComponentFactory,
454         0, 0
455     },
456     { 0, 0, 0, 0, 0, 0 }
457 };
458 
459 extern "C"
460 {
461 
462 void SAL_CALL component_getImplementationEnvironment(
463     sal_Char const ** ppEnvTypeName, uno_Environment ** /*ppEnv*/ )
464 {
465     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
466 }
467 
468 void * SAL_CALL component_getFactory(
469     sal_Char const * implName, lang::XMultiServiceFactory * xMgr,
470     registry::XRegistryKey * xRegistry )
471 {
472     return ::cppu::component_getFactoryHelper(
473         implName, xMgr, xRegistry, s_component_entries );
474 }
475 
476 }
477 
478