1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 #ifndef SIMPLEGUESSER_H 24 #define SIMPLEGUESSER_H 25 26 #include <string.h> 27 #include <string> 28 #include <cstdlib> 29 #include <vector> 30 #include <guess.hxx> 31 32 #define MAX_STRING_LENGTH_TO_ANALYSE 200 33 34 using namespace std; 35 36 /** 37 @author Jocelyn Merand 38 */ 39 class SimpleGuesser{ 40 public: 41 /**inits the object with conf file "./conf.txt"*/ 42 SimpleGuesser(); 43 44 /** Compares the current Simpleguesser with an other 45 * @param SimpleGuesser& sg the other guesser to compare 46 */ 47 void operator=(SimpleGuesser& sg); 48 49 /** 50 * destroy the object 51 */ 52 ~SimpleGuesser(); 53 54 /** 55 * Analyze a text and return the most probable languages of the text 56 * @param char* text is the text to analyze 57 * @return the list of guess 58 */ 59 vector<Guess> GuessLanguage(char* text); 60 61 /** 62 * Analyze a text and return the most probable language of the text 63 * @param char* text is the text to analyze 64 * @return the guess (containing language) 65 */ 66 Guess GuessPrimaryLanguage(char* text); 67 68 /** 69 * List all available languages (possibly to be in guesses) 70 * @return the list of languages 71 */ 72 vector<Guess> GetAvailableLanguages(); 73 74 /** 75 * List all languages (possibly in guesses or not) 76 * @return the list of languages 77 */ 78 vector<Guess> GetAllManagedLanguages(); 79 80 /** 81 * List all Unavailable languages (disable for any reason) 82 * @return the list of languages 83 */ 84 vector<Guess> GetUnavailableLanguages(); 85 86 /** 87 * Mark a language enabled 88 * @param string lang the language to enable (build like language-COUNTRY-encoding) 89 */ 90 void EnableLanguage(string lang); 91 92 /** 93 * Mark a language disabled 94 * @param string lang the language to disable (build like language-COUNTRY-encoding) 95 */ 96 void DisableLanguage(string lang); 97 98 /** 99 * Load a new DB of fingerprints 100 * @param const char* thePathOfConfFile self explaining 101 * @param const char* prefix is the path where the directory witch contains fingerprint files is stored 102 */ 103 void SetDBPath(const char* thePathOfConfFile, const char* prefix); 104 105 protected: 106 107 //Where typical fingerprints (n-gram tables) are stored 108 void* h; 109 110 //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both 111 vector<Guess> GetManagedLanguages(const char mask); 112 113 //Like getManagedLanguages, this function enable or disable a language and it depends of the mask 114 void XableLanguage(string lang, char mask); 115 }; 116 117 #endif 118