1 /*************************************************************************** 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 #ifndef SIMPLEGUESSER_H 28 #define SIMPLEGUESSER_H 29 30 #include <string.h> 31 #include <string> 32 #include <cstdlib> 33 #include <vector> 34 #include <guess.hxx> 35 36 #define MAX_STRING_LENGTH_TO_ANALYSE 200 37 38 using namespace std; 39 40 /** 41 @author Jocelyn Merand 42 */ 43 class SimpleGuesser{ 44 public: 45 /**inits the object with conf file "./conf.txt"*/ 46 SimpleGuesser(); 47 48 /** Compares the current Simpleguesser with an other 49 * @param SimpleGuesser& sg the other guesser to compare 50 */ 51 void operator=(SimpleGuesser& sg); 52 53 /** 54 * destroy the object 55 */ 56 ~SimpleGuesser(); 57 58 /** 59 * Analyze a text and return the most probable languages of the text 60 * @param char* text is the text to analyze 61 * @return the list of guess 62 */ 63 vector<Guess> GuessLanguage(char* text); 64 65 /** 66 * Analyze a text and return the most probable language of the text 67 * @param char* text is the text to analyze 68 * @return the guess (containing language) 69 */ 70 Guess GuessPrimaryLanguage(char* text); 71 72 /** 73 * List all available languages (possibly to be in guesses) 74 * @return the list of languages 75 */ 76 vector<Guess> GetAvailableLanguages(); 77 78 /** 79 * List all languages (possibly in guesses or not) 80 * @return the list of languages 81 */ 82 vector<Guess> GetAllManagedLanguages(); 83 84 /** 85 * List all Unavailable languages (disable for any reason) 86 * @return the list of languages 87 */ 88 vector<Guess> GetUnavailableLanguages(); 89 90 /** 91 * Mark a language enabled 92 * @param string lang the language to enable (build like language-COUNTRY-encoding) 93 */ 94 void EnableLanguage(string lang); 95 96 /** 97 * Mark a language disabled 98 * @param string lang the language to disable (build like language-COUNTRY-encoding) 99 */ 100 void DisableLanguage(string lang); 101 102 /** 103 * Load a new DB of fingerprints 104 * @param const char* thePathOfConfFile self explaining 105 * @param const char* prefix is the path where the directory witch contains fingerprint files is stored 106 */ 107 void SetDBPath(const char* thePathOfConfFile, const char* prefix); 108 109 protected: 110 111 //Where typical fingerprints (n-gram tables) are stored 112 void* h; 113 114 //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both 115 vector<Guess> GetManagedLanguages(const char mask); 116 117 //Like getManagedLanguages, this function enable or disable a language and it depends of the mask 118 void XableLanguage(string lang, char mask); 119 }; 120 121 #endif 122