1*cdf0e10cSrcweir#
2*cdf0e10cSrcweir# A sample config file for the language models
3*cdf0e10cSrcweir# provided with Gertjan van Noords language guesser
4*cdf0e10cSrcweir# (http://odur.let.rug.nl/~vannoord/TextCat/)
5*cdf0e10cSrcweir#
6*cdf0e10cSrcweir# Notes:
7*cdf0e10cSrcweir# - You may consider eliminating a couple of small languages from this
8*cdf0e10cSrcweir# list because they cause false positives with big languages and are
9*cdf0e10cSrcweir# bad for performance. (Do you really want to recognize Drents?)
10*cdf0e10cSrcweir# - Putting the most probable languages at the top of the list
11*cdf0e10cSrcweir# improves performance, because this will raise the threshold for
12*cdf0e10cSrcweir# likely candidates more quickly.
13*cdf0e10cSrcweir#
14*cdf0e10cSrcweir
15*cdf0e10cSrcweir# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
16*cdf0e10cSrcweir# guess strings are made as following : language-country-encoding
17*cdf0e10cSrcweir
18*cdf0e10cSrcweirafrikaans.lm                         af--utf8
19*cdf0e10cSrcweiralbanian.lm                          sq--utf8
20*cdf0e10cSrcweiramharic_utf.lm                       am--utf8
21*cdf0e10cSrcweirarabic.lm                            ar--utf8
22*cdf0e10cSrcweirbasque.lm                            eu--utf8
23*cdf0e10cSrcweirbelarus.lm                           be--utf8
24*cdf0e10cSrcweirbosnian.lm                           bs--utf8
25*cdf0e10cSrcweirbreton.lm                            br--utf8
26*cdf0e10cSrcweircatalan.lm                           ca--utf8
27*cdf0e10cSrcweirchinese_simplified.lm                zh-CN-utf8
28*cdf0e10cSrcweirchinese_traditional.lm               zh-TW-utf8
29*cdf0e10cSrcweircroatian.lm                          hr--utf8
30*cdf0e10cSrcweirczech.lm                             cs--utf8
31*cdf0e10cSrcweirdanish.lm                            da--utf8
32*cdf0e10cSrcweirdutch.lm                             nl--utf8
33*cdf0e10cSrcweirenglish.lm                           en--utf8
34*cdf0e10cSrcweiresperanto.lm                         eo--utf8
35*cdf0e10cSrcweirestonian.lm                          et--utf8
36*cdf0e10cSrcweirfinnish.lm                           fi--utf8
37*cdf0e10cSrcweirfrench.lm                            fr--utf8
38*cdf0e10cSrcweirfrisian.lm                           fy--utf8
39*cdf0e10cSrcweirgeorgian.lm                          ka--utf8
40*cdf0e10cSrcweirgerman.lm                            de--utf8
41*cdf0e10cSrcweirgreek.lm                             el--utf8
42*cdf0e10cSrcweirhebrew.lm                            he--utf8
43*cdf0e10cSrcweirhindi.lm                             hi--utf8
44*cdf0e10cSrcweirhungarian.lm                         hu--utf8
45*cdf0e10cSrcweiricelandic.lm                         is--utf8
46*cdf0e10cSrcweirindonesian.lm                        id--utf8
47*cdf0e10cSrcweirirish_gaelic.lm                      ga--utf8
48*cdf0e10cSrcweiritalian.lm                           it--utf8
49*cdf0e10cSrcweirjapanese.lm                          ja--utf8
50*cdf0e10cSrcweirkorean.lm                            ko--utf8
51*cdf0e10cSrcweirlatin.lm                             la--utf8
52*cdf0e10cSrcweirlatvian.lm                           lv--utf8
53*cdf0e10cSrcweirlithuanian.lm                        lt--utf8
54*cdf0e10cSrcweirluxembourgish.lm                     lb--utf8
55*cdf0e10cSrcweirmalay.lm                             ms--utf8
56*cdf0e10cSrcweirmanx_gaelic.lm                       gv--utf8
57*cdf0e10cSrcweirmarathi.lm                           mr--utf8
58*cdf0e10cSrcweirmongolian_cyrillic.lm                mn--utf8
59*cdf0e10cSrcweirnepali.lm                            ne--utf8
60*cdf0e10cSrcweirnorwegian.lm                         nb--utf8       # Norwegian (Bokmal)
61*cdf0e10cSrcweirpersian.lm                           fa--utf8       # Farsi
62*cdf0e10cSrcweirpolish.lm                            pl--utf8
63*cdf0e10cSrcweirportuguese.lm                        pt-PT-utf8
64*cdf0e10cSrcweirquechua.lm                           qu--utf8
65*cdf0e10cSrcweirromanian.lm                          ro--utf8
66*cdf0e10cSrcweirromansh.lm                           rm--utf8
67*cdf0e10cSrcweirrussian.lm                           ru--utf8
68*cdf0e10cSrcweirsanskrit.lm                          sa--utf8
69*cdf0e10cSrcweirscots.lm                             sco--utf8
70*cdf0e10cSrcweirscots_gaelic.lm                      gd--utf8
71*cdf0e10cSrcweirserbian.lm                           sr--utf-8
72*cdf0e10cSrcweirserbian-latin.lm                     sh--utf-8
73*cdf0e10cSrcweirslovak_ascii.lm                      sk-SK-utf8
74*cdf0e10cSrcweirslovenian.lm                         sl--utf8
75*cdf0e10cSrcweirspanish.lm                           es--utf8
76*cdf0e10cSrcweirswahili.lm                           sw--utf8
77*cdf0e10cSrcweirswedish.lm                           sv--utf8
78*cdf0e10cSrcweirtagalog.lm                           tl--utf8
79*cdf0e10cSrcweirtamil.lm                             ta--utf8
80*cdf0e10cSrcweirthai.lm                              th--utf8
81*cdf0e10cSrcweirturkish.lm                           tr--utf8
82*cdf0e10cSrcweirukrainian.lm                         uk--utf8
83*cdf0e10cSrcweirvietnamese.lm                        vi--utf8
84*cdf0e10cSrcweirwelsh.lm                             cy--utf8
85*cdf0e10cSrcweiryiddish_utf.lm                       yi--utf8
86*cdf0e10cSrcweirzulu.lm                              zu--utf8
87