1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26 
27 #define BREAKITERATOR_ALL
28 #include <breakiterator_cjk.hxx>
29 #include <localedata.hxx>
30 #include <i18nutil/unicode.hxx>
31 
32 using namespace ::com::sun::star::uno;
33 using namespace ::com::sun::star::lang;
34 using namespace ::rtl;
35 
36 namespace com { namespace sun { namespace star { namespace i18n {
37 
38 //      ----------------------------------------------------
39 //      class BreakIterator_CJK
40 //      ----------------------------------------------------;
41 
BreakIterator_CJK()42 BreakIterator_CJK::BreakIterator_CJK() :
43     dict( NULL ),
44     hangingCharacters()
45 {
46         cBreakIterator = "com.sun.star.i18n.BreakIterator_CJK";
47 }
48 
49 Boundary SAL_CALL
previousWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)50 BreakIterator_CJK::previousWord(const OUString& text, sal_Int32 anyPos,
51         const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
52 {
53         if (dict) {
54             result = dict->previousWord(text, anyPos, wordType);
55             // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
56             if (result.endPos - result.startPos != 1 ||
57                     getScriptType(text, result.startPos) == ScriptType::ASIAN)
58                 return result;
59             result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
60             if (result.endPos < anyPos)
61                 return result;
62         }
63         return BreakIterator_Unicode::previousWord(text, anyPos, nLocale, wordType);
64 }
65 
66 Boundary SAL_CALL
nextWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)67 BreakIterator_CJK::nextWord(const OUString& text, sal_Int32 anyPos,
68         const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
69 {
70         if (dict) {
71             result = dict->nextWord(text, anyPos, wordType);
72             // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
73             if (result.endPos - result.startPos != 1 ||
74                     getScriptType(text, result.startPos) == ScriptType::ASIAN)
75                 return result;
76             result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
77             if (result.startPos > anyPos)
78                 return result;
79         }
80         return BreakIterator_Unicode::nextWord(text, anyPos, nLocale, wordType);
81 }
82 
83 Boundary SAL_CALL
getWordBoundary(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType,sal_Bool bDirection)84 BreakIterator_CJK::getWordBoundary( const OUString& text, sal_Int32 anyPos,
85         const lang::Locale& nLocale, sal_Int16 wordType, sal_Bool bDirection )
86         throw(RuntimeException)
87 {
88         if (dict) {
89             result = dict->getWordBoundary(text, anyPos, wordType, bDirection);
90             // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
91             if (result.endPos - result.startPos != 1 ||
92                     getScriptType(text, result.startPos) == ScriptType::ASIAN)
93                 return result;
94         }
95         return BreakIterator_Unicode::getWordBoundary(text, anyPos, nLocale, wordType, bDirection);
96 }
97 
getLineBreak(const OUString & Text,sal_Int32 nStartPos,const lang::Locale &,sal_Int32,const LineBreakHyphenationOptions &,const LineBreakUserOptions & bOptions)98 LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak(
99         const OUString& Text, sal_Int32 nStartPos,
100         const lang::Locale& /*rLocale*/, sal_Int32 /*nMinBreakPos*/,
101         const LineBreakHyphenationOptions& /*hOptions*/,
102         const LineBreakUserOptions& bOptions ) throw(RuntimeException)
103 {
104         LineBreakResults lbr;
105 
106         if (bOptions.allowPunctuationOutsideMargin &&
107                 hangingCharacters.indexOf(Text[nStartPos]) != -1 &&
108                 (Text.iterateCodePoints( &nStartPos, 1), nStartPos == Text.getLength())) {
109             ; // do nothing
110         } else if (bOptions.applyForbiddenRules && 0 < nStartPos && nStartPos < Text.getLength()) {
111             while (nStartPos > 0 &&
112                     (bOptions.forbiddenBeginCharacters.indexOf(Text[nStartPos]) != -1 ||
113                     bOptions.forbiddenEndCharacters.indexOf(Text[nStartPos-1]) != -1))
114                 Text.iterateCodePoints( &nStartPos, -1);
115         }
116 
117         lbr.breakIndex = nStartPos;
118         lbr.breakType = BreakType::WORDBOUNDARY;
119         return lbr;
120 }
121 
122 #define LOCALE(language, country) lang::Locale(OUString::createFromAscii(language), OUString::createFromAscii(country), OUString())
123 //      ----------------------------------------------------
124 //      class BreakIterator_zh
125 //      ----------------------------------------------------;
BreakIterator_zh()126 BreakIterator_zh::BreakIterator_zh()
127 {
128         dict = new xdictionary("zh");
129         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "CN"));
130         cBreakIterator = "com.sun.star.i18n.BreakIterator_zh";
131 }
132 
~BreakIterator_zh()133 BreakIterator_zh::~BreakIterator_zh()
134 {
135         delete dict;
136 }
137 
138 //      ----------------------------------------------------
139 //      class BreakIterator_zh_TW
140 //      ----------------------------------------------------;
BreakIterator_zh_TW()141 BreakIterator_zh_TW::BreakIterator_zh_TW()
142 {
143         dict = new xdictionary("zh");
144         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "TW"));
145         cBreakIterator = "com.sun.star.i18n.BreakIterator_zh_TW";
146 }
147 
~BreakIterator_zh_TW()148 BreakIterator_zh_TW::~BreakIterator_zh_TW()
149 {
150         delete dict;
151 }
152 
153 //      ----------------------------------------------------
154 //      class BreakIterator_ja
155 //      ----------------------------------------------------;
BreakIterator_ja()156 BreakIterator_ja::BreakIterator_ja()
157 {
158         dict = new xdictionary("ja");
159         dict->setJapaneseWordBreak();
160         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ja", "JP"));
161         cBreakIterator = "com.sun.star.i18n.BreakIterator_ja";
162 }
163 
~BreakIterator_ja()164 BreakIterator_ja::~BreakIterator_ja()
165 {
166         delete dict;
167 }
168 
169 //      ----------------------------------------------------
170 //      class BreakIterator_ko
171 //      ----------------------------------------------------;
BreakIterator_ko()172 BreakIterator_ko::BreakIterator_ko()
173 {
174         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ko", "KR"));
175         cBreakIterator = "com.sun.star.i18n.BreakIterator_ko";
176 }
177 
~BreakIterator_ko()178 BreakIterator_ko::~BreakIterator_ko()
179 {
180 }
181 
182 } } } }
183