1# Copyright (c) 2003, WiseGuys Internet B.V. 2# 3# All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# - Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 12# - Redistributions in binary form must reproduce the above copyright 13# notice, this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution. 15# 16# - Neither the name of the WiseGuys Internet B.V. nor the names of its 17# contributors may be used to endorse or promote products derived from 18# this software without specific prior written permission. 19# 20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31# 32 33# A sample config file for the language models 34# provided with Gertjan van Noords language guesser 35# (http://odur.let.rug.nl/~vannoord/TextCat/) 36# 37# Notes: 38# - You may consider eliminating a couple of small languages from this 39# list because they cause false positives with big languages and are 40# bad for performance. (Do you really want to recognize Drents?) 41# - Putting the most probable languages at the top of the list 42# improves performance, because this will raise the threshold for 43# likely candidates more quickly. 44# 45 46# this file have been modified (to OOo by Jocelyn MERAND 47# joc.merATgmail.com) to include country and encoding 48# guess strings are made as following : language-country-encoding 49 50afrikaans.lm af--utf8 51albanian.lm sq--utf8 52amharic_utf.lm am--utf8 53arabic.lm ar--utf8 54basque.lm eu--utf8 55belarus.lm be--utf8 56bosnian.lm bs--utf8 57breton.lm br--utf8 58catalan.lm ca--utf8 59chinese_simplified.lm zh-CN-utf8 60chinese_traditional.lm zh-TW-utf8 61croatian.lm hr--utf8 62czech.lm cs--utf8 63danish.lm da--utf8 64dutch.lm nl--utf8 65english.lm en--utf8 66esperanto.lm eo--utf8 67estonian.lm et--utf8 68finnish.lm fi--utf8 69french.lm fr--utf8 70frisian.lm fy--utf8 71georgian.lm ka--utf8 72german.lm de--utf8 73greek.lm el--utf8 74hebrew.lm he--utf8 75hindi.lm hi--utf8 76hungarian.lm hu--utf8 77icelandic.lm is--utf8 78indonesian.lm id--utf8 79irish_gaelic.lm ga--utf8 80italian.lm it--utf8 81japanese.lm ja--utf8 82korean.lm ko--utf8 83latin.lm la--utf8 84latvian.lm lv--utf8 85lithuanian.lm lt--utf8 86luxembourgish.lm lb--utf8 87malay.lm ms--utf8 88manx_gaelic.lm gv--utf8 89marathi.lm mr--utf8 90mongolian_cyrillic.lm mn--utf8 91nepali.lm ne--utf8 92norwegian.lm nb--utf8 # Norwegian (Bokmal) 93persian.lm fa--utf8 # Farsi 94polish.lm pl--utf8 95portuguese.lm pt-PT-utf8 96quechua.lm qu--utf8 97romanian.lm ro--utf8 98romansh.lm rm--utf8 99russian.lm ru--utf8 100sanskrit.lm sa--utf8 101scots.lm sco--utf8 102scots_gaelic.lm gd--utf8 103serbian.lm sr--utf-8 104serbian-latin.lm sh--utf-8 105slovak_ascii.lm sk-SK-utf8 106slovenian.lm sl--utf8 107spanish.lm es--utf8 108swahili.lm sw--utf8 109swedish.lm sv--utf8 110tagalog.lm tl--utf8 111tamil.lm ta--utf8 112thai.lm th--utf8 113turkish.lm tr--utf8 114ukrainian.lm uk--utf8 115vietnamese.lm vi--utf8 116welsh.lm cy--utf8 117yiddish_utf.lm yi--utf8 118zulu.lm zu--utf8 119