1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_lingucomponent.hxx" 30 31 #if defined(WNT) 32 #include <tools/prewin.h> 33 #endif 34 35 #if defined(WNT) 36 #include <Windows.h> 37 #endif 38 39 #if defined(WNT) 40 #include <tools/postwin.h> 41 #endif 42 43 44 #include <osl/thread.h> 45 #include <osl/file.hxx> 46 #include <tools/debug.hxx> 47 #include <tools/urlobj.hxx> 48 #include <i18npool/mslangid.hxx> 49 #include <unotools/lingucfg.hxx> 50 #include <unotools/pathoptions.hxx> 51 #include <rtl/ustring.hxx> 52 #include <rtl/string.hxx> 53 #include <rtl/tencinfo.h> 54 #include <linguistic/misc.hxx> 55 56 #include <set> 57 #include <vector> 58 #include <string.h> 59 60 #include <lingutil.hxx> 61 #include <dictmgr.hxx> 62 63 64 65 66 using ::com::sun::star::lang::Locale; 67 using namespace ::com::sun::star; 68 69 #if 0 70 ////////////////////////////////////////////////////////////////////// 71 72 String GetDirectoryPathFromFileURL( const String &rFileURL ) 73 { 74 // get file URL 75 INetURLObject aURLObj; 76 aURLObj.SetSmartProtocol( INET_PROT_FILE ); 77 aURLObj.SetSmartURL( rFileURL ); 78 aURLObj.removeSegment(); 79 DBG_ASSERT( !aURLObj.HasError(), "invalid URL" ); 80 String aRes = aURLObj.GetMainURL( INetURLObject::DECODE_TO_IURI ); 81 return aRes; 82 } 83 #endif 84 85 #if defined(WNT) 86 rtl::OString Win_GetShortPathName( const rtl::OUString &rLongPathName ) 87 { 88 rtl::OString aRes; 89 90 sal_Unicode aShortBuffer[1024] = {0}; 91 sal_Int32 nShortBufSize = sizeof( aShortBuffer ) / sizeof( aShortBuffer[0] ); 92 93 // use the version of 'GetShortPathName' that can deal with Unicode... 94 sal_Int32 nShortLen = GetShortPathNameW( 95 reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ), 96 reinterpret_cast<LPWSTR>( aShortBuffer ), 97 nShortBufSize ); 98 99 if (nShortLen < nShortBufSize) // conversion successful? 100 aRes = rtl::OString( OU2ENC( rtl::OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) ); 101 else 102 DBG_ERROR( "Win_GetShortPathName: buffer to short" ); 103 104 return aRes; 105 } 106 #endif //defined(WNT) 107 108 ////////////////////////////////////////////////////////////////////// 109 110 // build list of old style diuctionaries (not as extensions) to use. 111 // User installed dictionaries (the ones residing in the user paths) 112 // will get precedence over system installed ones for the same language. 113 std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType ) 114 { 115 std::vector< SvtLinguConfigDictionaryEntry > aRes; 116 117 if (!pDicType) 118 return aRes; 119 120 rtl::OUString aFormatName; 121 String aDicExtension; 122 #ifdef SYSTEM_DICTS 123 rtl::OUString aSystemDir; 124 rtl::OUString aSystemPrefix; 125 rtl::OUString aSystemSuffix; 126 #endif 127 bool bSpell = false; 128 bool bHyph = false; 129 bool bThes = false; 130 if (strcmp( pDicType, "DICT" ) == 0) 131 { 132 aFormatName = A2OU("DICT_SPELL"); 133 aDicExtension = String::CreateFromAscii( ".dic" ); 134 #ifdef SYSTEM_DICTS 135 aSystemDir = A2OU( DICT_SYSTEM_DIR ); 136 aSystemSuffix = aDicExtension; 137 #endif 138 bSpell = true; 139 } 140 else if (strcmp( pDicType, "HYPH" ) == 0) 141 { 142 aFormatName = A2OU("DICT_HYPH"); 143 aDicExtension = String::CreateFromAscii( ".dic" ); 144 #ifdef SYSTEM_DICTS 145 aSystemDir = A2OU( HYPH_SYSTEM_DIR ); 146 aSystemPrefix = A2OU( "hyph_" ); 147 aSystemSuffix = aDicExtension; 148 #endif 149 bHyph = true; 150 } 151 else if (strcmp( pDicType, "THES" ) == 0) 152 { 153 aFormatName = A2OU("DICT_THES"); 154 aDicExtension = String::CreateFromAscii( ".dat" ); 155 #ifdef SYSTEM_DICTS 156 aSystemDir = A2OU( THES_SYSTEM_DIR ); 157 aSystemPrefix = A2OU( "th_" ); 158 aSystemSuffix = A2OU( "_v2.dat" ); 159 #endif 160 bThes = true; 161 } 162 163 164 if (aFormatName.getLength() == 0 || aDicExtension.Len() == 0) 165 return aRes; 166 167 // set of languages to remember the language where it is already 168 // decided to make use of the dictionary. 169 std::set< LanguageType > aDicLangInUse; 170 171 #ifdef SYSTEM_DICTS 172 osl::Directory aSystemDicts(aSystemDir); 173 if (aSystemDicts.open() == osl::FileBase::E_None) 174 { 175 osl::DirectoryItem aItem; 176 osl::FileStatus aFileStatus(FileStatusMask_FileURL); 177 while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None) 178 { 179 aItem.getFileStatus(aFileStatus); 180 rtl::OUString sPath = aFileStatus.getFileURL(); 181 if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength()) 182 { 183 sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1; 184 if (!sPath.match(aSystemPrefix, nStartIndex)) 185 continue; 186 rtl::OUString sChunk = sPath.copy(0, sPath.getLength() - aSystemSuffix.getLength()); 187 sal_Int32 nIndex = nStartIndex + aSystemPrefix.getLength(); 188 rtl::OUString sLang = sChunk.getToken( 0, '_', nIndex ); 189 if (!sLang.getLength()) 190 continue; 191 rtl::OUString sRegion; 192 if (nIndex != -1) 193 sRegion = sChunk.copy( nIndex, sChunk.getLength() - nIndex ); 194 195 // Thus we first get the language of the dictionary 196 LanguageType nLang = MsLangId::convertIsoNamesToLanguage( 197 sLang, sRegion ); 198 199 if (aDicLangInUse.count( nLang ) == 0) 200 { 201 // remember the new language in use 202 aDicLangInUse.insert( nLang ); 203 204 // add the dictionary to the resulting vector 205 SvtLinguConfigDictionaryEntry aDicEntry; 206 aDicEntry.aLocations.realloc(1); 207 aDicEntry.aLocaleNames.realloc(1); 208 rtl::OUString aLocaleName( MsLangId::convertLanguageToIsoString( nLang ) ); 209 aDicEntry.aLocations[0] = sPath; 210 aDicEntry.aFormatName = aFormatName; 211 aDicEntry.aLocaleNames[0] = aLocaleName; 212 aRes.push_back( aDicEntry ); 213 } 214 } 215 } 216 } 217 218 #endif 219 220 return aRes; 221 } 222 223 224 void MergeNewStyleDicsAndOldStyleDics( 225 std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics, 226 const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics ) 227 { 228 // get list of languages supported by new style dictionaries 229 std::set< LanguageType > aNewStyleLanguages; 230 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt; 231 for (aIt = rNewStyleDics.begin() ; aIt != rNewStyleDics.end(); ++aIt) 232 { 233 const uno::Sequence< rtl::OUString > aLocaleNames( aIt->aLocaleNames ); 234 sal_Int32 nLocaleNames = aLocaleNames.getLength(); 235 for (sal_Int32 k = 0; k < nLocaleNames; ++k) 236 { 237 LanguageType nLang = MsLangId::convertIsoStringToLanguage( aLocaleNames[k] ); 238 aNewStyleLanguages.insert( nLang ); 239 } 240 } 241 242 // now check all old style dictionaries if they will add a not yet 243 // added language. If so add them to the resulting vector 244 std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2; 245 for (aIt2 = rOldStyleDics.begin(); aIt2 != rOldStyleDics.end(); ++aIt2) 246 { 247 sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength(); 248 249 // old style dics should only have one language listed... 250 DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!"); 251 if (nOldStyleDics > 0) 252 { 253 LanguageType nLang = MsLangId::convertIsoStringToLanguage( aIt2->aLocaleNames[0] ); 254 255 if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_NONE) 256 { 257 DBG_ERROR( "old style dictionary with invalid language found!" ); 258 continue; 259 } 260 261 // language not yet added? 262 if (aNewStyleLanguages.count( nLang ) == 0) 263 rNewStyleDics.push_back( *aIt2 ); 264 } 265 else 266 { 267 DBG_ERROR( "old style dictionary with no language found!" ); 268 } 269 } 270 } 271 272 273 rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset) 274 { 275 // default result: used to indicate that we failed to get the proper encoding 276 rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW; 277 278 if (pCharset) 279 { 280 eRet = rtl_getTextEncodingFromMimeCharset(pCharset); 281 if (eRet == RTL_TEXTENCODING_DONTKNOW) 282 eRet = rtl_getTextEncodingFromUnixCharset(pCharset); 283 if (eRet == RTL_TEXTENCODING_DONTKNOW) 284 { 285 if (strcmp("ISCII-DEVANAGARI", pCharset) == 0) 286 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI; 287 } 288 } 289 return eRet; 290 } 291 292 ////////////////////////////////////////////////////////////////////// 293 294