1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 #ifndef SVX_HANGUL_HANJA_CONVERSION_HXX
24 #define SVX_HANGUL_HANJA_CONVERSION_HXX
25 
26 #include <vcl/window.hxx>
27 #include <memory>
28 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
29 #include <com/sun/star/lang/Locale.hpp>
30 #include <com/sun/star/uno/Sequence.hxx>
31 #include "editeng/editengdllapi.h"
32 
33 //.............................................................................
34 namespace editeng
35 {
36 //.............................................................................
37 
38 	class HangulHanjaConversion_Impl;
39 
40 	//=========================================================================
41 	//= HangulHanjaConversion
42 	//=========================================================================
43 	/** encapsulates Hangul-Hanja conversion functionality
44 
45 		<p>terminology:
46 			<ul><li>A <b>text <em>portion</em></b> is some (potentially large) piece of text
47 				which is to be analyzed for convertible sub-strings.</li>
48 				<li>A <b>text <em>unit</em></b> is a sub string in a text portion, which is
49 				to be converted as a whole.</li>
50 			</ul>
51 			For instance, you could have two independent selections within your document, which are then
52 			two text portions. A text unit would be single Hangul/Hanja words within a portion, or even
53 			single Hangul syllabills when "replace by character" is enabled.
54 		</p>
55 	*/
56 	class EDITENG_DLLPUBLIC HangulHanjaConversion
57 	{
58 		friend class HangulHanjaConversion_Impl;
59 
60 	public:
61 		enum ReplacementAction
62 		{
63 			eExchange,				// simply exchange one text with another
64 			eReplacementBracketed,	// keep the original, and put the replacement in brackets after it
65 			eOriginalBracketed,		// replace the original text, but put it in brackeds after the replacement
66 			eReplacementAbove,		// keep the original, and put the replacement text as ruby text above it
67 			eOriginalAbove,			// replace the original text, but put it as ruby text above it
68 			eReplacementBelow,		// keep the original, and put the replacement text as ruby text below it
69 			eOriginalBelow			// replace the original text, but put it as ruby text below it
70 		};
71 
72         enum ConversionType             // does not specify direction...
73         {
74             eConvHangulHanja,           // Korean Hangul/Hanja conversion
75             eConvSimplifiedTraditional  // Chinese simplified / Chinese traditional conversion
76         };
77 
78         // Note: conversion direction for eConvSimplifiedTraditional is
79         // specified by source language.
80         // This one is for Hangul/Hanja where source and target language
81         // are the same.
82         enum ConversionDirection
83 		{
84 			eHangulToHanja,
85 			eHanjaToHangul
86 		};
87 
88 		enum ConversionFormat
89 		{
90             eSimpleConversion,          // used for simplified / traditional Chinese as well
91 			eHangulBracketed,
92 			eHanjaBracketed,
93 			eRubyHanjaAbove,
94 			eRubyHanjaBelow,
95 			eRubyHangulAbove,
96 			eRubyHangulBelow
97 		};
98 
99 	private:
100 		::std::auto_ptr< HangulHanjaConversion_Impl >	m_pImpl;
101 
102 		// used to set initial values of m_pImpl object from saved ones
103 		static sal_Bool				m_bUseSavedValues;	// defines if the followng two values should be used for initialization
104 		static sal_Bool				m_bTryBothDirectionsSave;
105         static ConversionDirection	m_ePrimaryConversionDirectionSave;
106 
107 		// Forbidden and not implemented.
108 		HangulHanjaConversion (const HangulHanjaConversion &);
109 		HangulHanjaConversion & operator= (const HangulHanjaConversion &);
110 
111     public:
112         HangulHanjaConversion(
113             Window* _pUIParent,
114             const ::com::sun::star::uno::Reference< ::com::sun::star::lang::XMultiServiceFactory >& _rxORB,
115             const ::com::sun::star::lang::Locale& _rSourceLocale,
116             const ::com::sun::star::lang::Locale& _rTargetLocale,
117             const Font* _pTargetFont,
118             sal_Int32 nOptions,
119             sal_Bool _bIsInteractive
120         );
121 
122         virtual ~HangulHanjaConversion( );
123 
124         // converts the whole document
125         void    ConvertDocument();
126 
127         LanguageType    GetSourceLanguage() const;
128         LanguageType    GetTargetLanguage() const;
129         const Font *    GetTargetFont() const;
130         sal_Int32       GetConversionOptions() const;
131         sal_Bool        IsInteractive() const;
132 
133         // chinese text conversion
134         static inline sal_Bool IsSimplified( LanguageType nLang );
135         static inline sal_Bool IsTraditional( LanguageType nLang );
136         static inline sal_Bool IsChinese( LanguageType nLang );
137         static inline sal_Bool IsSimilarChinese( LanguageType nLang1, LanguageType nLang2 );
138 
139 		// used to specify that the conversion direction states from the
140 		// last incarnation should be used as
141 		// initial conversion direction for the next incarnation.
142 		// (A hack used to transport a state information from
143 		// one incarnation to the next. Used in Writers text conversion...)
144 		static void		SetUseSavedConversionDirectionState( sal_Bool bVal );
145 		static sal_Bool IsUseSavedConversionDirectionState();
146 
147 	protected:
148 		/** retrieves the next text portion which is to be analyzed
149 
150 			<p>pseudo-abstract, needs to be overridden</p>
151 
152 			@param _rNextPortion
153 				upon return, this must contain the next text portion
154             @param _rLangOfPortion
155                 upon return, this must contain the language for the found text portion.
156                 (necessary for Chinese translation since there are 5 language variants
157                 too look for even if the 'source' language usually is only 'simplified'
158                 or 'traditional'.)
159 		*/
160         virtual void    GetNextPortion(
161                 ::rtl::OUString& /* [out] */ _rNextPortion,
162                 LanguageType& /* [out] */ _rLangOfPortion,
163                 sal_Bool /* [in] */ _bAllowImplicitChangesForNotConvertibleText );
164 
165 		/** announces a new "current unit"
166 
167 			<p>This will be called whenever it is necessary to interactively ask the user for
168 			a conversion. In such a case, a range within the current portion (see <member>GetNextPortion</member>)
169 			is presented to the user for chosing a substitution. Additionally, this method is called,
170 			so that derived classes can e.g. highlight this text range in a document view.</p>
171 
172 			<p>Note that the indexes are relative to the most recent replace action. See
173 			<member>ReplaceUnit</member> for details.</p>
174 
175 			@param _nUnitStart
176 				the start index of the unit
177 
178 			@param _nUnitEnd
179 				the start index (exclusively!) of the unit.
180 
181             @param _bAllowImplicitChangesForNotConvertibleText
182                 allows implicit changes other than the text itself for the
183                 text parts not being convertible.
184                 Used for chinese translation to attribute all not convertible
185                 text (e.g. western text, empty paragraphs, spaces, ...) to
186                 the target language and target font of the conversion.
187                 This is to ensure that after the conversion any new text entered
188                 anywhere in the document will have the target language (of course
189                 CJK Language only) and target font (CJK font only) set.
190 
191 			@see GetNextPortion
192 		*/
193 		virtual void	HandleNewUnit( const sal_Int32 _nUnitStart, const sal_Int32 _nUnitEnd );
194 
195 		/** replaces a text unit within a text portion with a new text
196 
197 			<p>pseudo-abstract, needs to be overridden</p>
198 
199 			<p>Note an important thing about the indicies: They are always relative to the <em>previous
200 			call</em> of ReplaceUnit. This means whe you get a call to ReplaceUnit, and replace some text
201 			in your document, than you have to remember the document position immediately <em>behind</em>
202 			the changed text. In a next call to ReplaceUnit, an index of <em>0</em> will denote exactly
203 			this position behind the previous replacement<br/>
204 			The reaons for this is that this class here does not know anything about your document structure,
205 			so after a replacement took place, it's impossible to address anything in the range from the
206 			beginning of the portion up to the replaced text.<br/>
207 			In the very first call to ReplaceUnit, an index of <em>0</em> denotes the very first position of
208 			the current portion.</p>
209 
210             <p>If the language of the text to be replaced is different from
211             the target language (as given by 'GetTargetLanguage') for example
212             when converting simplified Chinese from/to traditional Chinese
213             the language attribute of the new text has to be changed as well,
214 			**and** the font is to be set to the default (document) font for
215 			that language.</p>
216 
217 			@param _nUnitStart
218 				the start index of the range to replace
219 
220 			@param _nUnitEnd
221 				the end index (exclusively!) of the range to replace. E.g., an index
222 				pair (4,5) indicates a range of length 1.
223 
224             @param _rOrigText
225                 the original text to be replaced (as returned by GetNextPortion).
226                 Since in Chinese conversion the original text is needed as well
227                 in order to only do the minimal necassry text changes and to keep
228                 as much attributes as possible this is supplied here as well.
229 
230 			@param _rReplaceWith
231 				The replacement text
232 
233             @param _rOffsets
234                 An sequence matching the indices (characters) of _rReplaceWith
235                 to the indices of the characters in the original text they are
236                 replacing.
237                 This is necessary since some portions of the text may get
238                 converted in portions of different length than the original.
239                 The sequence will be empty if all conversions in the text are
240                 of equal length. That is if always the character at index i in
241                 _rOffsets is replacing the character at index i in the original
242                 text for all valid index values of i.
243 
244             @param _eAction
245 				replacement action to take
246 
247             @param pNewUnitLanguage
248                 if the replacement unit is required to have a new language that
249                 is specified here. If the language is to be left unchanged this
250                 is the 0 pointer.
251 		*/
252 		virtual void	ReplaceUnit(
253 							const sal_Int32 _nUnitStart, const sal_Int32 _nUnitEnd,
254                             const ::rtl::OUString& _rOrigText,
255                             const ::rtl::OUString& _rReplaceWith,
256                             const ::com::sun::star::uno::Sequence< sal_Int32 > &_rOffsets,
257                             ReplacementAction _eAction,
258                             LanguageType *pNewUnitLanguage
259 						);
260 
261         /** specifies if rubies are supported by the document implementing
262             this class.
263 
264             @return
265                 <TRUE/> if rubies are supported.
266         */
267         virtual sal_Bool    HasRubySupport() const;
268 	};
269 
IsSimplified(LanguageType nLang)270     sal_Bool HangulHanjaConversion::IsSimplified( LanguageType nLang )
271     {
272         return  nLang == LANGUAGE_CHINESE_SIMPLIFIED ||
273                 nLang == LANGUAGE_CHINESE_SINGAPORE;
274     }
275 
IsTraditional(LanguageType nLang)276     sal_Bool HangulHanjaConversion::IsTraditional( LanguageType nLang )
277     {
278         return  nLang == LANGUAGE_CHINESE_TRADITIONAL ||
279                 nLang == LANGUAGE_CHINESE_HONGKONG ||
280                 nLang == LANGUAGE_CHINESE_MACAU;
281     }
282 
IsChinese(LanguageType nLang)283     sal_Bool HangulHanjaConversion::IsChinese( LanguageType nLang )
284     {
285         return IsTraditional( nLang ) || IsSimplified( nLang );
286     }
287 
IsSimilarChinese(LanguageType nLang1,LanguageType nLang2)288     sal_Bool HangulHanjaConversion::IsSimilarChinese( LanguageType nLang1, LanguageType nLang2 )
289     {
290         return (IsTraditional(nLang1) && IsTraditional(nLang2)) ||
291                (IsSimplified(nLang1)  && IsSimplified(nLang2));
292     }
293 
294 //.............................................................................
295 }	// namespace svx
296 //.............................................................................
297 
298 #endif // SVX_HANGUL_HANJA_CONVERSION_HXX
299