1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_guesslang.hxx"
26
27 #include <iostream>
28
29 #include <tools/debug.hxx>
30
31 #include <sal/config.h>
32 #include <cppuhelper/factory.hxx>
33 #include <cppuhelper/implementationentry.hxx>
34 #include <cppuhelper/implbase2.hxx>
35 #include <tools/string.hxx>
36
37 #include <simpleguesser.hxx>
38 #include <guess.hxx>
39
40 //#include <cppuhelper/queryinterface.hxx> // helper for queryInterface() impl
41
42 //#include <com/sun/star/lang/XMultiServiceFactory.hpp>
43 #include <com/sun/star/registry/XRegistryKey.hpp>
44 #include <com/sun/star/lang/XServiceInfo.hpp>
45 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
46 #include <unotools/pathoptions.hxx>
47 #include <unotools/localfilehelper.hxx>
48 #include <osl/thread.h>
49
50 using namespace ::rtl;
51 using namespace ::osl;
52 using namespace ::cppu;
53 using namespace ::com::sun::star;
54 using namespace ::com::sun::star::uno;
55 using namespace ::com::sun::star::lang;
56 using namespace ::com::sun::star::linguistic2;
57
58 namespace css = ::com::sun::star;
59
60 //==================================================================================================
61
62 #define A2OU(x) ::rtl::OUString::createFromAscii( x )
63
64 #define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing"
65
66 #define IMPLNAME "com.sun.star.lingu2.LanguageGuessing"
67
getSupportedServiceNames_LangGuess_Impl()68 static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
69 {
70 Sequence<OUString> names(1);
71 names[0] = A2OU( SERVICENAME );
72 return names;
73 }
74
getImplementationName_LangGuess_Impl()75 static OUString getImplementationName_LangGuess_Impl()
76 {
77 return A2OU( IMPLNAME );
78 }
79
GetLangGuessMutex()80 static osl::Mutex & GetLangGuessMutex()
81 {
82 static osl::Mutex aMutex;
83 return aMutex;
84 }
85
86
87 class LangGuess_Impl :
88 public ::cppu::WeakImplHelper2<
89 XLanguageGuessing,
90 XServiceInfo >
91 {
92 SimpleGuesser m_aGuesser;
93 bool m_bInitialized;
94 css::uno::Reference< css::uno::XComponentContext > m_xContext;
95
96 LangGuess_Impl( const LangGuess_Impl & ); // not defined
97 LangGuess_Impl & operator =( const LangGuess_Impl & ); // not defined
98
~LangGuess_Impl()99 virtual ~LangGuess_Impl() {}
100 void EnsureInitialized();
101
102 public:
103 explicit LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext);
104
105 // XServiceInfo implementation
106 virtual OUString SAL_CALL getImplementationName( ) throw(RuntimeException);
107 virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) throw(RuntimeException);
108 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) throw(RuntimeException);
109 static Sequence< OUString > SAL_CALL getSupportedServiceNames_Static( );
110
111 // XLanguageGuessing implementation
112 virtual ::com::sun::star::lang::Locale SAL_CALL guessPrimaryLanguage( const ::rtl::OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
113 virtual void SAL_CALL disableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
114 virtual void SAL_CALL enableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
115 virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getAvailableLanguages( ) throw (::com::sun::star::uno::RuntimeException);
116 virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getEnabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
117 virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getDisabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
118
119 // implementation specific
120 void SetFingerPrintsDB( const rtl::OUString &fileName ) throw (RuntimeException);
121
122 static const OUString & SAL_CALL getImplementationName_Static() throw();
123
124 };
125
126 //*************************************************************************
127
LangGuess_Impl(css::uno::Reference<css::uno::XComponentContext> const & rxContext)128 LangGuess_Impl::LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext) :
129 m_bInitialized( false ),
130 m_xContext( rxContext )
131 {
132 }
133
134 //*************************************************************************
135
EnsureInitialized()136 void LangGuess_Impl::EnsureInitialized()
137 {
138 if (!m_bInitialized)
139 {
140 // set this to true at the very start to prevent loops because of
141 // implicitly called functions below
142 m_bInitialized = true;
143
144 // set default fingerprint path to where those get installed
145 String aPhysPath;
146 String aURL( SvtPathOptions().GetFingerprintPath() );
147 utl::LocalFileHelper::ConvertURLToPhysicalName( aURL, aPhysPath );
148 #ifdef WNT
149 aPhysPath += '\\';
150 #else
151 aPhysPath += '/';
152 #endif
153
154 SetFingerPrintsDB( aPhysPath );
155
156 //
157 // disable currently not functional languages...
158 //
159 struct LangCountry
160 {
161 const char *pLang;
162 const char *pCountry;
163 };
164 LangCountry aDisable[] =
165 {
166 {"gv", ""}, {"sco", ""}, // no lang-id available yet...
167 // {"hy", ""}, {"drt", ""}, // 0 bytes fingerprints...
168 {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, // not yet correct functional...
169 {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""},
170 {"sa", ""}, {"ta", ""}, {"th", ""},
171 {"qu", ""}, {"yi", ""}
172 };
173 sal_Int32 nNum = sizeof(aDisable) / sizeof(aDisable[0]);
174 Sequence< Locale > aDisableSeq( nNum );
175 Locale *pDisableSeq = aDisableSeq.getArray();
176 for (sal_Int32 i = 0; i < nNum; ++i)
177 {
178 Locale aLocale;
179 aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
180 aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
181 pDisableSeq[i] = aLocale;
182 }
183 disableLanguages( aDisableSeq );
184 DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
185 }
186 }
187
188 //*************************************************************************
189
190 /* TL: currently not part of the API
191 Sequence< com::sun::star::lang::Locale > SAL_CALL LangGuess_Impl::guessLanguages(
192 const rtl::OUString &rText,
193 sal_Int32 nStartPos,
194 sal_Int32 nLen )
195 throw (RuntimeException)
196 {
197 Sequence< com::sun::star::lang::Locale > aRes;
198
199 OString o = OUStringToOString( rText, RTL_TEXTENCODING_UTF8 );
200 vector<Guess> gs = m_aGuesser.GuessLanguage(o.pData->buffer);
201
202 aRes.realloc(gs.size());
203
204 com::sun::star::lang::Locale *pRes = aRes.getArray();
205
206 #ifdef DEBUG
207 std::cout << " We have " << gs.size() << " candidates" << std::endl;
208 #endif
209
210 for(int i = 0; i < gs.size() ; i++ ){
211 com::sun::star::lang::Locale current_aRes;
212
213 current_aRes.Language = A2OU( gs[i].getLanguage().c_str() );
214 current_aRes.Country = A2OU( gs[i].getCountry().c_str() );
215
216 pRes[i] = current_aRes;
217 }
218
219 return aRes;
220 }
221 */
222 //*************************************************************************
223
guessPrimaryLanguage(const::rtl::OUString & rText,::sal_Int32 nStartPos,::sal_Int32 nLen)224 Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
225 const ::rtl::OUString& rText,
226 ::sal_Int32 nStartPos,
227 ::sal_Int32 nLen )
228 throw (lang::IllegalArgumentException, uno::RuntimeException)
229 {
230 osl::MutexGuard aGuard( GetLangGuessMutex() );
231
232 EnsureInitialized();
233
234 lang::Locale aRes;
235 if (nStartPos >=0 && nLen >= 0 && nStartPos + nLen <= rText.getLength())
236 {
237 OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
238 Guess g = m_aGuesser.GuessPrimaryLanguage((char*)o.getStr());
239 aRes.Language = OUString::createFromAscii(g.GetLanguage().c_str());
240 aRes.Country = OUString::createFromAscii(g.GetCountry().c_str());
241 }
242 else
243 throw lang::IllegalArgumentException();
244
245 return aRes;
246 }
247
248 //*************************************************************************
249 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
250
SetFingerPrintsDB(const rtl::OUString & filePath)251 void LangGuess_Impl::SetFingerPrintsDB(
252 const rtl::OUString &filePath )
253 throw (RuntimeException)
254 {
255 //! text encoding for file name / path needs to be in the same encoding the OS uses
256 OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
257 OString conf_file_name( DEFAULT_CONF_FILE_NAME );
258 OString conf_file_path(path);
259 conf_file_path += conf_file_name;
260
261 //cout << "Conf file : " << conf_file_path.getStr() << " directory : " << path.getStr() << endl;
262
263 m_aGuesser.SetDBPath((const char*)conf_file_path.getStr(), (const char*)path.getStr());
264 }
265
266 //*************************************************************************
getAvailableLanguages()267 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
268 throw (uno::RuntimeException)
269 {
270 osl::MutexGuard aGuard( GetLangGuessMutex() );
271
272 EnsureInitialized();
273
274 Sequence< com::sun::star::lang::Locale > aRes;
275 vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
276 aRes.realloc(gs.size());
277
278 com::sun::star::lang::Locale *pRes = aRes.getArray();
279
280 for(size_t i = 0; i < gs.size() ; i++ ){
281 com::sun::star::lang::Locale current_aRes;
282 current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
283 current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
284 pRes[i] = current_aRes;
285 }
286
287 return aRes;
288 }
289
290 //*************************************************************************
getEnabledLanguages()291 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
292 throw (uno::RuntimeException)
293 {
294 osl::MutexGuard aGuard( GetLangGuessMutex() );
295
296 EnsureInitialized();
297
298 Sequence< com::sun::star::lang::Locale > aRes;
299 vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
300 aRes.realloc(gs.size());
301
302 com::sun::star::lang::Locale *pRes = aRes.getArray();
303
304 for(size_t i = 0; i < gs.size() ; i++ ){
305 com::sun::star::lang::Locale current_aRes;
306 current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
307 current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
308 pRes[i] = current_aRes;
309 }
310
311 return aRes;
312 }
313
314 //*************************************************************************
getDisabledLanguages()315 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
316 throw (uno::RuntimeException)
317 {
318 osl::MutexGuard aGuard( GetLangGuessMutex() );
319
320 EnsureInitialized();
321
322 Sequence< com::sun::star::lang::Locale > aRes;
323 vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
324 aRes.realloc(gs.size());
325
326 com::sun::star::lang::Locale *pRes = aRes.getArray();
327
328 for(size_t i = 0; i < gs.size() ; i++ ){
329 com::sun::star::lang::Locale current_aRes;
330 current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
331 current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
332 pRes[i] = current_aRes;
333 }
334
335 return aRes;
336 }
337
338 //*************************************************************************
disableLanguages(const uno::Sequence<Locale> & rLanguages)339 void SAL_CALL LangGuess_Impl::disableLanguages(
340 const uno::Sequence< Locale >& rLanguages )
341 throw (lang::IllegalArgumentException, uno::RuntimeException)
342 {
343 osl::MutexGuard aGuard( GetLangGuessMutex() );
344
345 EnsureInitialized();
346
347 sal_Int32 nLanguages = rLanguages.getLength();
348 const Locale *pLanguages = rLanguages.getConstArray();
349
350 for (sal_Int32 i = 0; i < nLanguages; ++i)
351 {
352 string language;
353
354 OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
355 OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
356
357 language += l.getStr();
358 language += "-";
359 language += c.getStr();
360 m_aGuesser.DisableLanguage(language);
361 }
362 }
363
364 //*************************************************************************
enableLanguages(const uno::Sequence<Locale> & rLanguages)365 void SAL_CALL LangGuess_Impl::enableLanguages(
366 const uno::Sequence< Locale >& rLanguages )
367 throw (lang::IllegalArgumentException, uno::RuntimeException)
368 {
369 osl::MutexGuard aGuard( GetLangGuessMutex() );
370
371 EnsureInitialized();
372
373 sal_Int32 nLanguages = rLanguages.getLength();
374 const Locale *pLanguages = rLanguages.getConstArray();
375
376 for (sal_Int32 i = 0; i < nLanguages; ++i)
377 {
378 string language;
379
380 OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
381 OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
382
383 language += l.getStr();
384 language += "-";
385 language += c.getStr();
386 m_aGuesser.EnableLanguage(language);
387 }
388 }
389
390 //*************************************************************************
getImplementationName()391 OUString SAL_CALL LangGuess_Impl::getImplementationName( )
392 throw(RuntimeException)
393 {
394 osl::MutexGuard aGuard( GetLangGuessMutex() );
395 return A2OU( IMPLNAME );
396 }
397
398 //*************************************************************************
supportsService(const OUString & ServiceName)399 sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
400 throw(RuntimeException)
401 {
402 osl::MutexGuard aGuard( GetLangGuessMutex() );
403 Sequence< OUString > aSNL = getSupportedServiceNames();
404 const OUString * pArray = aSNL.getArray();
405 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
406 if( pArray[i] == ServiceName )
407 return sal_True;
408 return sal_False;
409 }
410
411 //*************************************************************************
getSupportedServiceNames()412 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
413 throw(RuntimeException)
414 {
415 osl::MutexGuard aGuard( GetLangGuessMutex() );
416 return getSupportedServiceNames_Static();
417 }
418
419 //*************************************************************************
getSupportedServiceNames_Static()420 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames_Static( )
421 {
422 OUString aName( A2OU( SERVICENAME ) );
423 return Sequence< OUString >( &aName, 1 );
424 }
425
426 //*************************************************************************
427
428
429 /**
430 * Function to create a new component instance; is needed by factory helper implementation.
431 * @param xMgr service manager to if the components needs other component instances
432 */
LangGuess_Impl_create(Reference<XComponentContext> const & xContext)433 Reference< XInterface > SAL_CALL LangGuess_Impl_create(
434 Reference< XComponentContext > const & xContext )
435 SAL_THROW( () )
436 {
437 return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl(xContext) );
438 }
439
440 //##################################################################################################
441 //#### EXPORTED ### functions to allow for registration and creation of the UNO component
442 //##################################################################################################
443
444 static struct ::cppu::ImplementationEntry s_component_entries [] =
445 {
446 {
447 LangGuess_Impl_create, getImplementationName_LangGuess_Impl,
448 getSupportedServiceNames_LangGuess_Impl,
449 ::cppu::createSingleComponentFactory,
450 0, 0
451 },
452 { 0, 0, 0, 0, 0, 0 }
453 };
454
455 extern "C"
456 {
457
component_getImplementationEnvironment(sal_Char const ** ppEnvTypeName,uno_Environment **)458 SAL_DLLPUBLIC_EXPORT void SAL_CALL component_getImplementationEnvironment(
459 sal_Char const ** ppEnvTypeName, uno_Environment ** /*ppEnv*/ )
460 {
461 *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
462 }
463
component_getFactory(sal_Char const * implName,lang::XMultiServiceFactory * xMgr,registry::XRegistryKey * xRegistry)464 SAL_DLLPUBLIC_EXPORT void * SAL_CALL component_getFactory(
465 sal_Char const * implName, lang::XMultiServiceFactory * xMgr,
466 registry::XRegistryKey * xRegistry )
467 {
468 return ::cppu::component_getFactoryHelper(
469 implName, xMgr, xRegistry, s_component_entries );
470 }
471
472 }
473
474