1# Copyright (c) 2003, WiseGuys Internet B.V. 2# 3# All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# - Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 12# - Redistributions in binary form must reproduce the above copyright 13# notice, this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution. 15# 16# - Neither the name of the WiseGuys Internet B.V. nor the names of its 17# contributors may be used to endorse or promote products derived from 18# this software without specific prior written permission. 19# 20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31# 32# A sample config file for the language models 33# provided with Gertjan van Noords language guesser 34# (http://odur.let.rug.nl/~vannoord/TextCat/) 35# 36# Notes: 37# - You may consider eliminating a couple of small languages from this 38# list because they cause false positives with big languages and are 39# bad for performance. (Do you really want to recognize Drents?) 40# - Putting the most probable languages at the top of the list 41# improves performance, because this will raise the threshold for 42# likely candidates more quickly. 43# 44 45# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding 46# guess strings are made as following : language-country-encoding 47 48afrikaans.lm af--utf8 49albanian.lm sq--utf8 50amharic_utf.lm am--utf8 51arabic.lm ar--utf8 52basque.lm eu--utf8 53belarus.lm be--utf8 54bosnian.lm bs--utf8 55breton.lm br--utf8 56catalan.lm ca--utf8 57chinese_simplified.lm zh-CN-utf8 58chinese_traditional.lm zh-TW-utf8 59croatian.lm hr--utf8 60czech.lm cs--utf8 61danish.lm da--utf8 62dutch.lm nl--utf8 63english.lm en--utf8 64esperanto.lm eo--utf8 65estonian.lm et--utf8 66finnish.lm fi--utf8 67french.lm fr--utf8 68frisian.lm fy--utf8 69georgian.lm ka--utf8 70german.lm de--utf8 71greek.lm el--utf8 72hebrew.lm he--utf8 73hindi.lm hi--utf8 74hungarian.lm hu--utf8 75icelandic.lm is--utf8 76indonesian.lm id--utf8 77irish_gaelic.lm ga--utf8 78italian.lm it--utf8 79japanese.lm ja--utf8 80korean.lm ko--utf8 81latin.lm la--utf8 82latvian.lm lv--utf8 83lithuanian.lm lt--utf8 84luxembourgish.lm lb--utf8 85malay.lm ms--utf8 86manx_gaelic.lm gv--utf8 87marathi.lm mr--utf8 88mongolian_cyrillic.lm mn--utf8 89nepali.lm ne--utf8 90norwegian.lm nb--utf8 # Norwegian (Bokmal) 91persian.lm fa--utf8 # Farsi 92polish.lm pl--utf8 93portuguese.lm pt-PT-utf8 94quechua.lm qu--utf8 95romanian.lm ro--utf8 96romansh.lm rm--utf8 97russian.lm ru--utf8 98sanskrit.lm sa--utf8 99scots.lm sco--utf8 100scots_gaelic.lm gd--utf8 101serbian.lm sr--utf-8 102serbian-latin.lm sh--utf-8 103slovak_ascii.lm sk-SK-utf8 104slovenian.lm sl--utf8 105spanish.lm es--utf8 106swahili.lm sw--utf8 107swedish.lm sv--utf8 108tagalog.lm tl--utf8 109tamil.lm ta--utf8 110thai.lm th--utf8 111turkish.lm tr--utf8 112ukrainian.lm uk--utf8 113vietnamese.lm vi--utf8 114welsh.lm cy--utf8 115yiddish_utf.lm yi--utf8 116zulu.lm zu--utf8 117