1*f7bd9df4SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*f7bd9df4SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*f7bd9df4SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*f7bd9df4SAndrew Rist * distributed with this work for additional information 6*f7bd9df4SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*f7bd9df4SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*f7bd9df4SAndrew Rist * "License"); you may not use this file except in compliance 9*f7bd9df4SAndrew Rist * with the License. You may obtain a copy of the License at 10*f7bd9df4SAndrew Rist * 11*f7bd9df4SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*f7bd9df4SAndrew Rist * 13*f7bd9df4SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*f7bd9df4SAndrew Rist * software distributed under the License is distributed on an 15*f7bd9df4SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*f7bd9df4SAndrew Rist * KIND, either express or implied. See the License for the 17*f7bd9df4SAndrew Rist * specific language governing permissions and limitations 18*f7bd9df4SAndrew Rist * under the License. 19*f7bd9df4SAndrew Rist * 20*f7bd9df4SAndrew Rist *************************************************************/ 21*f7bd9df4SAndrew Rist 22*f7bd9df4SAndrew Rist 23cdf0e10cSrcweir #ifndef _XDICTIONARY_H_ 24cdf0e10cSrcweir #define _XDICTIONARY_H_ 25cdf0e10cSrcweir 26cdf0e10cSrcweir #include <sal/types.h> 27cdf0e10cSrcweir #include <osl/module.h> 28cdf0e10cSrcweir 29cdf0e10cSrcweir #include <com/sun/star/i18n/Boundary.hpp> 30cdf0e10cSrcweir 31cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 32cdf0e10cSrcweir 33cdf0e10cSrcweir // Whether to use cell boundary code, currently unused but prepared. 34cdf0e10cSrcweir #define USE_CELL_BOUNDARY_CODE 0 35cdf0e10cSrcweir 36cdf0e10cSrcweir #define CACHE_MAX 32 // max cache structure number 37cdf0e10cSrcweir #define DEFAULT_SIZE 256 // for boundary size, to avoid alloc and release memory 38cdf0e10cSrcweir 39cdf0e10cSrcweir // cache structure. 40cdf0e10cSrcweir struct WordBreakCache { 41cdf0e10cSrcweir sal_Int32 length; // contents length saved here. 42cdf0e10cSrcweir sal_Unicode *contents; // seperated segment contents. 43cdf0e10cSrcweir sal_Int32* wordboundary; // word boundaries in segments. 44cdf0e10cSrcweir sal_Int32 size; // size of wordboundary 45cdf0e10cSrcweir 46cdf0e10cSrcweir WordBreakCache(); 47cdf0e10cSrcweir sal_Bool equals(const sal_Unicode *str, Boundary& boundary); // checking cached string 48cdf0e10cSrcweir }; 49cdf0e10cSrcweir 50cdf0e10cSrcweir class xdictionary 51cdf0e10cSrcweir { 52cdf0e10cSrcweir private: 53cdf0e10cSrcweir const sal_uInt8 * existMark; 54cdf0e10cSrcweir const sal_Int16 * index1; 55cdf0e10cSrcweir const sal_Int32 * index2; 56cdf0e10cSrcweir const sal_Int32 * lenArray; 57cdf0e10cSrcweir const sal_Unicode* dataArea; 58cdf0e10cSrcweir oslModule hModule; 59cdf0e10cSrcweir Boundary boundary; 60cdf0e10cSrcweir sal_Bool japaneseWordBreak; 61cdf0e10cSrcweir 62cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE 63cdf0e10cSrcweir // For CTL breakiterator, where the word boundary should not be inside cell. 64cdf0e10cSrcweir sal_Bool useCellBoundary; 65cdf0e10cSrcweir sal_Int32* cellBoundary; 66cdf0e10cSrcweir #endif 67cdf0e10cSrcweir 68cdf0e10cSrcweir public: 69cdf0e10cSrcweir xdictionary(const sal_Char *lang); 70cdf0e10cSrcweir ~xdictionary(); 71cdf0e10cSrcweir Boundary nextWord( const rtl::OUString& rText, sal_Int32 nPos, sal_Int16 wordType); 72cdf0e10cSrcweir Boundary previousWord( const rtl::OUString& rText, sal_Int32 nPos, sal_Int16 wordType); 73cdf0e10cSrcweir Boundary getWordBoundary( const rtl::OUString& rText, sal_Int32 nPos, sal_Int16 wordType, sal_Bool bDirection ); 74cdf0e10cSrcweir void setJapaneseWordBreak(); 75cdf0e10cSrcweir 76cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE 77cdf0e10cSrcweir void setCellBoundary(sal_Int32* cellArray); 78cdf0e10cSrcweir #endif 79cdf0e10cSrcweir 80cdf0e10cSrcweir private: 81cdf0e10cSrcweir WordBreakCache cache[CACHE_MAX]; 82cdf0e10cSrcweir 83cdf0e10cSrcweir sal_Bool seekSegment(const rtl::OUString& rText, sal_Int32 pos, Boundary& boundary); 84cdf0e10cSrcweir WordBreakCache& getCache(const sal_Unicode *text, Boundary& boundary); 85cdf0e10cSrcweir sal_Bool exists(const sal_uInt32 u); 86cdf0e10cSrcweir sal_Int32 getLongestMatch(const sal_Unicode *text, sal_Int32 len); 87cdf0e10cSrcweir }; 88cdf0e10cSrcweir 89cdf0e10cSrcweir } } } } 90cdf0e10cSrcweir 91cdf0e10cSrcweir #endif 92cdf0e10cSrcweir 93