/************************************************************************* * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * Copyright 2000, 2010 Oracle and/or its affiliates. * * OpenOffice.org - a multi-platform office productivity suite * * This file is part of OpenOffice.org. * * OpenOffice.org is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License version 3 * only, as published by the Free Software Foundation. * * OpenOffice.org is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License version 3 for more details * (a copy is included in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU Lesser General Public License * version 3 along with OpenOffice.org. If not, see * * for a copy of the LGPLv3 License. * ************************************************************************/ // MARKER(update_precomp.py): autogen include statement, do not remove #include "precompiled_lingucomponent.hxx" #include #include // helper for factories #include #include #include #include #include #include #include #include #include #include #include #include "nthesimp.hxx" #include #include #include "nthesdta.hxx" #include #include #include #include // values asigned to capitalization types #define CAPTYPE_UNKNOWN 0 #define CAPTYPE_NOCAP 1 #define CAPTYPE_INITCAP 2 #define CAPTYPE_ALLCAP 3 #define CAPTYPE_MIXED 4 // XML-header to query SPELLML support #define SPELLML_SUPPORT "" using namespace utl; using namespace osl; using namespace rtl; using namespace com::sun::star; using namespace com::sun::star::beans; using namespace com::sun::star::lang; using namespace com::sun::star::uno; using namespace com::sun::star::linguistic2; using namespace linguistic; /////////////////////////////////////////////////////////////////////////// static uno::Reference< XLinguServiceManager > GetLngSvcMgr_Impl() { uno::Reference< XLinguServiceManager > xRes; uno::Reference< XMultiServiceFactory > xMgr = getProcessServiceFactory(); if (xMgr.is()) { xRes = uno::Reference< XLinguServiceManager > ( xMgr->createInstance( OUString( RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.linguistic2.LinguServiceManager" ) ) ), UNO_QUERY ) ; } return xRes; } Thesaurus::Thesaurus() : aEvtListeners ( GetLinguMutex() ) { bDisposing = sal_False; pPropHelper = NULL; aThes = NULL; aCharSetInfo = NULL; aTEncs = NULL; aTLocs = NULL; aTNames = NULL; numthes = 0; } Thesaurus::~Thesaurus() { if (aThes) { for (int i = 0; i < numthes; i++) { if (aThes[i]) delete aThes[i]; aThes[i] = NULL; } delete[] aThes; } aThes = NULL; if (aCharSetInfo) { for (int i = 0; i < numthes; i++) { if (aCharSetInfo[i]) delete aCharSetInfo[i]; aCharSetInfo[i] = NULL; } delete[] aCharSetInfo; } aCharSetInfo = NULL; numthes = 0; if (aTEncs) delete[] aTEncs; aTEncs = NULL; if (aTLocs) delete[] aTLocs; aTLocs = NULL; if (aTNames) delete[] aTNames; aTNames = NULL; if (pPropHelper) pPropHelper->RemoveAsPropListener(); } PropertyHelper_Thes & Thesaurus::GetPropHelper_Impl() { if (!pPropHelper) { Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY ); pPropHelper = new PropertyHelper_Thes( (XThesaurus *) this, xPropSet ); xPropHelper = pPropHelper; pPropHelper->AddAsPropListener(); //! after a reference is established } return *pPropHelper; } Sequence< Locale > SAL_CALL Thesaurus::getLocales() throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); // this routine should return the locales supported by the installed // dictionaries. if (!numthes) { SvtLinguConfig aLinguCfg; // get list of dictionaries-to-use std::list< SvtLinguConfigDictionaryEntry > aDics; uno::Sequence< rtl::OUString > aFormatList; aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Thesauri"), A2OU("org.openoffice.lingu.new.Thesaurus"), aFormatList ); sal_Int32 nLen = aFormatList.getLength(); for (sal_Int32 i = 0; i < nLen; ++i) { std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) ); aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); } //!! for compatibility with old dictionaries (the ones not using extensions //!! or new configuration entries, but still using the dictionary.lst file) //!! Get the list of old style spell checking dictionaries to use... std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( GetOldStyleDics( "THES" ) ); // to prefer dictionaries with configuration entries we will only // use those old style dictionaries that add a language that // is not yet supported by the list od new style dictionaries MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); numthes = aDics.size(); if (numthes) { // get supported locales from the dictionaries-to-use... sal_Int32 k = 0; std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet; std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt; for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) { uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames ); sal_Int32 nLen2 = aLocaleNames.getLength(); for (k = 0; k < nLen2; ++k) { aLocaleNamesSet.insert( aLocaleNames[k] ); } } // ... and add them to the resulting sequence aSuppLocales.realloc( aLocaleNamesSet.size() ); std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB; k = 0; for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB) { Locale aTmp( MsLangId::convertLanguageToLocale( MsLangId::convertIsoStringToLanguage( *aItB ))); aSuppLocales[k++] = aTmp; } //! For each dictionary and each locale we need a seperate entry. //! If this results in more than one dictionary per locale than (for now) //! it is undefined which dictionary gets used. //! In the future the implementation should support using several dictionaries //! for one locale. numthes = 0; for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) numthes = numthes + aDictIt->aLocaleNames.getLength(); // add dictionary information aThes = new MyThes* [numthes]; aTEncs = new rtl_TextEncoding [numthes]; aTLocs = new Locale [numthes]; aTNames = new OUString [numthes]; aCharSetInfo = new CharClass* [numthes]; k = 0; for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) { if (aDictIt->aLocaleNames.getLength() > 0 && aDictIt->aLocations.getLength() > 0) { uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames ); sal_Int32 nLocales = aLocaleNames.getLength(); // currently only one language per dictionary is supported in the actual implementation... // Thus here we work-around this by adding the same dictionary several times. // Once for each of it's supported locales. for (sal_Int32 i = 0; i < nLocales; ++i) { aThes[k] = NULL; aTEncs[k] = RTL_TEXTENCODING_DONTKNOW; aTLocs[k] = MsLangId::convertLanguageToLocale( MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] )); aCharSetInfo[k] = new CharClass( aTLocs[k] ); // also both files have to be in the same directory and the // file names must only differ in the extension (.aff/.dic). // Thus we use the first location only and strip the extension part. rtl::OUString aLocation = aDictIt->aLocations[0]; sal_Int32 nPos = aLocation.lastIndexOf( '.' ); aLocation = aLocation.copy( 0, nPos ); aTNames[k] = aLocation; ++k; } } } DBG_ASSERT( k == numthes, "index mismatch?" ); } else { /* no dictionary found so register no dictionaries */ numthes = 0; aThes = NULL; aTEncs = NULL; aTLocs = NULL; aTNames = NULL; aCharSetInfo = NULL; aSuppLocales.realloc(0); } } return aSuppLocales; } sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); sal_Bool bRes = sal_False; if (!aSuppLocales.getLength()) getLocales(); sal_Int32 nLen = aSuppLocales.getLength(); for (sal_Int32 i = 0; i < nLen; ++i) { const Locale *pLocale = aSuppLocales.getConstArray(); if (rLocale == pLocale[i]) { bRes = sal_True; break; } } return bRes; } Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings( const OUString& qTerm, const Locale& rLocale, const PropertyValues& rProperties) throw(IllegalArgumentException, RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); uno::Sequence< Reference< XMeaning > > aMeanings( 1 ); uno::Sequence< Reference< XMeaning > > noMeanings( 0 ); uno::Reference< XLinguServiceManager > xLngSvcMgr( GetLngSvcMgr_Impl() ); uno::Reference< XSpellChecker1 > xSpell; OUString rTerm(qTerm); OUString pTerm(qTerm); sal_uInt16 ct = CAPTYPE_UNKNOWN; sal_Int32 stem = 0; sal_Int32 stem2 = 0; sal_Int16 nLanguage = LocaleToLanguage( rLocale ); if (nLanguage == LANGUAGE_NONE || !rTerm.getLength()) return noMeanings; if (!hasLocale( rLocale )) #ifdef LINGU_EXCEPTIONS throw( IllegalArgumentException() ); #else return noMeanings; #endif if (prevTerm == qTerm && prevLocale == nLanguage) return prevMeanings; mentry * pmean = NULL; sal_Int32 nmean = 0; PropertyHelper_Thes &rHelper = GetPropHelper(); rHelper.SetTmpPropVals( rProperties ); MyThes * pTH = NULL; rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; CharClass * pCC = NULL; // find the first thesaurus that matches the locale for (int i =0; i < numthes; i++) { if (rLocale == aTLocs[i]) { // open up and intialize this thesaurus if need be if (!aThes[i]) { OUString datpath = aTNames[i] + A2OU(".dat"); OUString idxpath = aTNames[i] + A2OU(".idx"); OUString ndat; OUString nidx; osl::FileBase::getSystemPathFromFileURL(datpath,ndat); osl::FileBase::getSystemPathFromFileURL(idxpath,nidx); OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding())); OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding())); #if defined(WNT) // workaround for Windows specifc problem that the // path length in calls to 'fopen' is limted to somewhat // about 120+ characters which will usually be exceed when // using dictionaries as extensions. aTmpidx = Win_GetShortPathName( nidx ); aTmpdat = Win_GetShortPathName( ndat ); #endif aThes[i] = new MyThes(aTmpidx.getStr(),aTmpdat.getStr()); if (aThes[i]) aTEncs[i] = getTextEncodingFromCharset(aThes[i]->get_th_encoding()); } pTH = aThes[i]; eEnc = aTEncs[i]; pCC = aCharSetInfo[i]; if (pTH) break; } } // we don't want to work with a default text encoding since following incorrect // results may occur only for specific text and thus may be hard to notice. // Thus better always make a clean exit here if the text encoding is in question. // Hopefully something not working at all will raise proper attention quickly. ;-) DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); if (eEnc == RTL_TEXTENCODING_DONTKNOW) return noMeanings; while (pTH) { // convert word to all lower case for searching if (!stem) ct = capitalType(rTerm, pCC); OUString nTerm(makeLowerCase(rTerm, pCC)); OString aTmp( OU2ENC(nTerm, eEnc) ); nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean); if (nmean) aMeanings.realloc( nmean ); mentry * pe = pmean; OUString codeTerm = qTerm; Reference< XSpellAlternatives > xTmpRes2; if (stem) { xTmpRes2 = xSpell->spell( A2OU("") + pTerm + A2OU(""), nLanguage, rProperties ); if (xTmpRes2.is()) { Sequenceseq = xTmpRes2->getAlternatives(); if (seq.getLength() > 0) { codeTerm = seq[0]; stem2 = 1; } #if 0 OString o = OUStringToOString(codeTerm, RTL_TEXTENCODING_UTF8); fprintf(stderr, "CODETERM: %s\n", o.pData->buffer); #endif } } for (int j = 0; j < nmean; j++) { int count = pe->count; if (count) { Sequence< OUString > aStr( count ); OUString *pStr = aStr.getArray(); for (int i=0; i < count; i++) { OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc ); sal_Int32 catpos = sTerm.indexOf('('); sal_Int32 catpos2 = 0; OUString catst; OUString catst2; if (catpos > 2) { // remove category name for affixation and casing catst = A2OU(" ") + sTerm.copy(catpos); sTerm = sTerm.copy(0, catpos); sTerm = sTerm.trim(); } // generate synonyms with affixes if (stem && stem2) { Reference< XSpellAlternatives > xTmpRes; xTmpRes = xSpell->spell( A2OU("") + sTerm + A2OU("") + codeTerm + A2OU(""), nLanguage, rProperties ); if (xTmpRes.is()) { Sequenceseq = xTmpRes->getAlternatives(); if (seq.getLength() > 0) sTerm = seq[0]; } } if (catpos2) sTerm = catst2 + sTerm; sal_uInt16 ct1 = capitalType(sTerm, pCC); if (CAPTYPE_MIXED == ct1) ct = ct1; OUString cTerm; switch (ct) { case CAPTYPE_ALLCAP: cTerm = makeUpperCase(sTerm, pCC); break; case CAPTYPE_INITCAP: cTerm = makeInitCap(sTerm, pCC); break; default: cTerm = sTerm; break; } OUString aAlt( cTerm + catst); pStr[i] = aAlt; } #if 0 Meaning * pMn = new Meaning(rTerm,nLanguage,rHelper); #endif Meaning * pMn = new Meaning(rTerm,nLanguage); OUString dTerm(pe->defn,strlen(pe->defn),eEnc ); pMn->SetMeaning(dTerm); pMn->SetSynonyms(aStr); Reference* pMeaning = aMeanings.getArray(); pMeaning[j] = pMn; } pe++; } pTH->CleanUpAfterLookup(&pmean,nmean); if (nmean) { prevTerm = qTerm; prevMeanings = aMeanings; prevLocale = nLanguage; return aMeanings; } if (stem || !xLngSvcMgr.is()) return noMeanings; stem = 1; xSpell = uno::Reference< XSpellChecker1 >( xLngSvcMgr->getSpellChecker(), UNO_QUERY ); if (!xSpell.is() || !xSpell->isValid( A2OU(SPELLML_SUPPORT), nLanguage, rProperties )) return noMeanings; Reference< XSpellAlternatives > xTmpRes; xTmpRes = xSpell->spell( A2OU("") + rTerm + A2OU(""), nLanguage, rProperties ); if (xTmpRes.is()) { Sequenceseq = xTmpRes->getAlternatives(); #if 0 for (int i = 0; i < seq.getLength(); i++) { OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8); fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer); } #endif if (seq.getLength() > 0) { rTerm = seq[0]; // XXX Use only the first stem continue; } } // stem the last word of the synonym (for categories after affixation) rTerm = rTerm.trim(); sal_Int32 pos = rTerm.lastIndexOf(' '); if (!pos) return noMeanings; xTmpRes = xSpell->spell( A2OU("") + rTerm.copy(pos + 1) + A2OU(""), nLanguage, rProperties ); if (xTmpRes.is()) { Sequenceseq = xTmpRes->getAlternatives(); if (seq.getLength() > 0) { pTerm = rTerm.copy(pos + 1); rTerm = rTerm.copy(0, pos + 1) + seq[0]; #if 0 for (int i = 0; i < seq.getLength(); i++) { OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8); fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer); } #endif continue; } } break; } return noMeanings; } Reference< XInterface > SAL_CALL Thesaurus_CreateInstance( const Reference< XMultiServiceFactory > & /*rSMgr*/ ) throw(Exception) { Reference< XInterface > xService = (cppu::OWeakObject*) new Thesaurus; return xService; } OUString SAL_CALL Thesaurus::getServiceDisplayName( const Locale& /*rLocale*/ ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); return A2OU( "OpenOffice.org New Thesaurus" ); } void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments ) throw(Exception, RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); if (!pPropHelper) { sal_Int32 nLen = rArguments.getLength(); if (1 == nLen) { Reference< XPropertySet > xPropSet; rArguments.getConstArray()[0] >>= xPropSet; //! Pointer allows for access of the non-UNO functions. //! And the reference to the UNO-functions while increasing //! the ref-count and will implicitly free the memory //! when the object is not longer used. pPropHelper = new PropertyHelper_Thes( (XThesaurus *) this, xPropSet ); xPropHelper = pPropHelper; pPropHelper->AddAsPropListener(); //! after a reference is established } else DBG_ERROR( "wrong number of arguments in sequence" ); } } sal_uInt16 SAL_CALL Thesaurus::capitalType(const OUString& aTerm, CharClass * pCC) { sal_Int32 tlen = aTerm.getLength(); if ((pCC) && (tlen)) { String aStr(aTerm); sal_Int32 nc = 0; for (sal_uInt16 tindex = 0; tindex < tlen; tindex++) { if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER) nc++; } if (nc == 0) return (sal_uInt16) CAPTYPE_NOCAP; if (nc == tlen) return (sal_uInt16) CAPTYPE_ALLCAP; if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER)) return (sal_uInt16) CAPTYPE_INITCAP; return (sal_uInt16) CAPTYPE_MIXED; } return (sal_uInt16) CAPTYPE_UNKNOWN; } OUString SAL_CALL Thesaurus::makeLowerCase(const OUString& aTerm, CharClass * pCC) { if (pCC) return pCC->toLower_rtl(aTerm, 0, aTerm.getLength()); return aTerm; } OUString SAL_CALL Thesaurus::makeUpperCase(const OUString& aTerm, CharClass * pCC) { if (pCC) return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength()); return aTerm; } OUString SAL_CALL Thesaurus::makeInitCap(const OUString& aTerm, CharClass * pCC) { sal_Int32 tlen = aTerm.getLength(); if ((pCC) && (tlen)) { OUString bTemp = aTerm.copy(0,1); if (tlen > 1) { return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) ); } return pCC->toUpper_rtl(bTemp, 0, 1); } return aTerm; } void SAL_CALL Thesaurus::dispose() throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); if (!bDisposing) { bDisposing = sal_True; EventObject aEvtObj( (XThesaurus *) this ); aEvtListeners.disposeAndClear( aEvtObj ); } } void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); if (!bDisposing && rxListener.is()) aEvtListeners.addInterface( rxListener ); } void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); if (!bDisposing && rxListener.is()) aEvtListeners.removeInterface( rxListener ); } /////////////////////////////////////////////////////////////////////////// // Service specific part // OUString SAL_CALL Thesaurus::getImplementationName() throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); return getImplementationName_Static(); } sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); Sequence< OUString > aSNL = getSupportedServiceNames(); const OUString * pArray = aSNL.getConstArray(); for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) if( pArray[i] == ServiceName ) return sal_True; return sal_False; } Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames() throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); return getSupportedServiceNames_Static(); } Sequence< OUString > Thesaurus::getSupportedServiceNames_Static() throw() { MutexGuard aGuard( GetLinguMutex() ); Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich aSNS.getArray()[0] = A2OU( SN_THESAURUS ); return aSNS; } void * SAL_CALL Thesaurus_getFactory( const sal_Char * pImplName, XMultiServiceFactory * pServiceManager, void * ) { void * pRet = 0; if ( !Thesaurus::getImplementationName_Static().compareToAscii( pImplName ) ) { Reference< XSingleServiceFactory > xFactory = cppu::createOneInstanceFactory( pServiceManager, Thesaurus::getImplementationName_Static(), Thesaurus_CreateInstance, Thesaurus::getSupportedServiceNames_Static()); // acquire, because we return an interface pointer instead of a reference xFactory->acquire(); pRet = xFactory.get(); } return pRet; } /////////////////////////////////////////////////////////////////////////// #undef CAPTYPE_UNKNOWN #undef CAPTYPE_NOCAP #undef CAPTYPE_INITCAP #undef CAPTYPE_ALLCAP #undef CAPTYPE_MIXED