xref: /aoo41x/main/sw/source/core/text/guess.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_sw.hxx"
30 
31 
32 #include <ctype.h>
33 #include <editeng/unolingu.hxx>
34 #include <tools/shl.hxx>    // needed for SW_MOD() macro
35 #include <errhdl.hxx>   // ASSERTs
36 #include <dlelstnr.hxx>
37 #include <swmodule.hxx>
38 #include <IDocumentSettingAccess.hxx>
39 #include <txtcfg.hxx>
40 #include <guess.hxx>
41 #include <inftxt.hxx>
42 #include <pagefrm.hxx>
43 #include <pagedesc.hxx> // SwPageDesc
44 #include <tgrditem.hxx>
45 #include <com/sun/star/i18n/BreakType.hpp>
46 #include <com/sun/star/i18n/WordType.hpp>
47 #include <unotools/charclass.hxx>
48 #include <porfld.hxx>
49 
50 using ::rtl::OUString;
51 using namespace ::com::sun::star;
52 using namespace ::com::sun::star::uno;
53 using namespace ::com::sun::star::i18n;
54 using namespace ::com::sun::star::beans;
55 using namespace ::com::sun::star::linguistic2;
56 
57 #define CH_FULL_BLANK 0x3000
58 
59 /*************************************************************************
60  *						SwTxtGuess::Guess
61  *
62  * provides information for line break calculation
63  * returns true if no line break has to be performed
64  * otherwise possible break or hyphenation position is determined
65  *************************************************************************/
66 
67 sal_Bool SwTxtGuess::Guess( const SwTxtPortion& rPor, SwTxtFormatInfo &rInf,
68                             const KSHORT nPorHeight )
69 {
70 	nCutPos = rInf.GetIdx();
71 
72 	// Leere Strings sind immer 0
73 	if( !rInf.GetLen() || !rInf.GetTxt().Len() )
74 		return sal_False;
75 
76     ASSERT( rInf.GetIdx() < rInf.GetTxt().Len(),
77 			"+SwTxtGuess::Guess: invalid SwTxtFormatInfo" );
78 
79     ASSERT( nPorHeight, "+SwTxtGuess::Guess: no height" );
80 
81     sal_uInt16 nMinSize;
82     sal_uInt16 nMaxSizeDiff;
83 
84     const SwScriptInfo& rSI =
85             ((SwParaPortion*)rInf.GetParaPortion())->GetScriptInfo();
86 
87     sal_uInt16 nMaxComp = ( SW_CJK == rInf.GetFont()->GetActual() ) &&
88                         rSI.CountCompChg() &&
89                         ! rInf.IsMulti() &&
90                         ! rPor.InFldGrp() &&
91                         ! rPor.IsDropPortion() ?
92                         10000 :
93                             0 ;
94 
95     SwTwips nLineWidth = rInf.Width() - rInf.X();
96 	xub_StrLen nMaxLen = rInf.GetTxt().Len() - rInf.GetIdx();
97 
98     if ( rInf.GetLen() < nMaxLen )
99         nMaxLen = rInf.GetLen();
100 
101     if( !nMaxLen )
102 		return sal_False;
103 
104 	KSHORT nItalic = 0;
105     if( ITALIC_NONE != rInf.GetFont()->GetItalic() && !rInf.NotEOL() )
106     {
107         sal_Bool bAddItalic = sal_True;
108 
109         // do not add extra italic value if we have an active character grid
110         if ( rInf.SnapToGrid() )
111         {
112             GETGRID( rInf.GetTxtFrm()->FindPageFrm() )
113             bAddItalic = !pGrid || GRID_LINES_CHARS != pGrid->GetGridType();
114         }
115 
116         // do not add extra italic value for an isolated blank:
117         if ( 1 == rInf.GetLen() &&
118              CH_BLANK == rInf.GetTxt().GetChar( rInf.GetIdx() ) )
119             bAddItalic = sal_False;
120 
121         nItalic = bAddItalic ? nPorHeight / 12 : 0;
122 
123         nLineWidth -= nItalic;
124 
125         // --> FME 2005-05-13 #i46524# LineBreak bug with italics
126         if ( nLineWidth < 0 ) nLineWidth = 0;
127         // <--
128 	}
129 
130 	// first check if everything fits to line
131     if ( long ( nLineWidth ) * 2 > long ( nMaxLen ) * nPorHeight )
132 	{
133         // call GetTxtSize with maximum compression (for kanas)
134         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen,
135                          nMaxComp, nMinSize, nMaxSizeDiff );
136 
137         nBreakWidth = nMinSize;
138 
139 		if ( nBreakWidth <= nLineWidth )
140 		{
141 			// portion fits to line
142 			nCutPos = rInf.GetIdx() + nMaxLen;
143 			if( nItalic &&
144                 ( nCutPos >= rInf.GetTxt().Len() ||
145                   // --> FME 2005-05-13 #i48035# Needed for CalcFitToContent
146                   // if first line ends with a manual line break
147                   rInf.GetTxt().GetChar( nCutPos ) == CH_BREAK ) )
148                   // <--
149 				nBreakWidth = nBreakWidth + nItalic;
150 
151             // save maximum width for later use
152             if ( nMaxSizeDiff )
153                 rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
154 
155             return sal_True;
156 		}
157 	}
158 
159 	sal_Bool bHyph = rInf.IsHyphenate() && !rInf.IsHyphForbud();
160 	xub_StrLen nHyphPos = 0;
161 
162 	// nCutPos is the first character not fitting to the current line
163 	// nHyphPos is the first character not fitting to the current line,
164 	// considering an additional "-" for hyphenation
165 	if( bHyph )
166 	{
167         nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp, nHyphPos );
168 
169         if ( !nHyphPos && rInf.GetIdx() )
170 			nHyphPos = rInf.GetIdx() - 1;
171 	}
172 	else
173     {
174         nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp );
175 
176 #ifdef DBG_UTIL
177         if ( STRING_LEN != nCutPos )
178         {
179             rInf.GetTxtSize( &rSI, rInf.GetIdx(), nCutPos - rInf.GetIdx(),
180                              nMaxComp, nMinSize, nMaxSizeDiff );
181             ASSERT( nMinSize <= nLineWidth, "What a Guess!!!" );
182         }
183 #endif
184     }
185 
186 	if( nCutPos > rInf.GetIdx() + nMaxLen )
187 	{
188 		// second check if everything fits to line
189 		nCutPos = nBreakPos = rInf.GetIdx() + nMaxLen - 1;
190         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen, nMaxComp,
191                          nMinSize, nMaxSizeDiff );
192 
193         nBreakWidth = nMinSize;
194 
195 		// Der folgende Vergleich sollte eigenlich immer sal_True ergeben, sonst
196 		// hat es wohl bei GetTxtBreak einen Pixel-Rundungsfehler gegeben...
197 		if ( nBreakWidth <= nLineWidth )
198 		{
199 			if( nItalic && ( nBreakPos + 1 ) >= rInf.GetTxt().Len() )
200 				nBreakWidth = nBreakWidth + nItalic;
201 
202             // save maximum width for later use
203             if ( nMaxSizeDiff )
204                 rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
205 
206             return sal_True;
207 		}
208 	}
209 
210     // we have to trigger an underflow for a footnote portion
211     // which does not fit to the current line
212     if ( rPor.IsFtnPortion() )
213     {
214         nBreakPos = rInf.GetIdx();
215         nCutPos = rInf.GetLen();
216         return sal_False;
217     }
218 
219     xub_StrLen nPorLen = 0;
220     // do not call the break iterator nCutPos is a blank
221     xub_Unicode cCutChar = rInf.GetTxt().GetChar( nCutPos );
222     if( CH_BLANK == cCutChar || CH_FULL_BLANK == cCutChar )
223     {
224 		nBreakPos = nCutPos;
225 		xub_StrLen nX = nBreakPos;
226 
227         // we step back until a non blank character has been found
228         // or there is only one more character left
229         while( nX && nBreakPos > rInf.GetLineStart() + 1 &&
230                ( CH_BLANK == ( cCutChar = rInf.GetChar( --nX ) ) ||
231                  CH_FULL_BLANK == cCutChar ) )
232             --nBreakPos;
233 
234 		if( nBreakPos > rInf.GetIdx() )
235 			nPorLen = nBreakPos - rInf.GetIdx();
236         while( ++nCutPos < rInf.GetTxt().Len() &&
237                ( CH_BLANK == ( cCutChar = rInf.GetChar( nCutPos ) ) ||
238                  CH_FULL_BLANK == cCutChar ) )
239 			; // nothing
240 
241 		nBreakStart = nCutPos;
242 	}
243 	else if( pBreakIt->GetBreakIter().is() )
244 	{
245         // New: We should have a look into the last portion, if it was a
246         // field portion. For this, we expand the text of the field portion
247         // into our string. If the line break position is inside of before
248         // the field portion, we trigger an underflow.
249 
250         xub_StrLen nOldIdx = rInf.GetIdx();
251         xub_Unicode cFldChr = 0;
252 
253 #if OSL_DEBUG_LEVEL > 1
254         XubString aDebugString;
255 #endif
256 
257         // be careful: a field portion can be both: 0x01 (common field)
258         // or 0x02 (the follow of a footnode)
259         if ( rInf.GetLast() && rInf.GetLast()->InFldGrp() &&
260              ! rInf.GetLast()->IsFtnPortion() &&
261              rInf.GetIdx() > rInf.GetLineStart() &&
262              CH_TXTATR_BREAKWORD ==
263                 ( cFldChr = rInf.GetTxt().GetChar( rInf.GetIdx() - 1 ) ) )
264         {
265             SwFldPortion* pFld = (SwFldPortion*)rInf.GetLast();
266             XubString aTxt;
267             pFld->GetExpTxt( rInf, aTxt );
268 
269             if ( aTxt.Len() )
270             {
271                 nFieldDiff = aTxt.Len() - 1;
272                 nCutPos = nCutPos + nFieldDiff;
273                 nHyphPos = nHyphPos + nFieldDiff;
274 
275 #if OSL_DEBUG_LEVEL > 1
276                 aDebugString = rInf.GetTxt();
277 #endif
278 
279                 XubString& rOldTxt = (XubString&)rInf.GetTxt();
280                 rOldTxt.Erase( rInf.GetIdx() - 1, 1 );
281                 rOldTxt.Insert( aTxt, rInf.GetIdx() - 1 );
282                 rInf.SetIdx( rInf.GetIdx() + nFieldDiff );
283             }
284             else
285                 cFldChr = 0;
286         }
287 
288         LineBreakHyphenationOptions aHyphOpt;
289 		Reference< XHyphenator >  xHyph;
290 		if( bHyph )
291 		{
292 			xHyph = ::GetHyphenator();
293 			aHyphOpt = LineBreakHyphenationOptions( xHyph,
294 								rInf.GetHyphValues(), nHyphPos );
295 		}
296 
297         // Get Language for break iterator.
298         // We have to switch the current language if we have a script
299         // change at nCutPos. Otherwise LATIN punctuation would never
300         // be allowed to be hanging punctuation.
301         // NEVER call GetLang if the string has been modified!!!
302         LanguageType aLang = rInf.GetFont()->GetLanguage();
303 
304         // If we are inside a field portion, we use a temporar string which
305         // differs from the string at the textnode. Therefore we are not allowed
306         // to call the GetLang function.
307         if ( nCutPos && ! rPor.InFldGrp() )
308         {
309             const CharClass& rCC = GetAppCharClass();
310 
311             // step back until a non-punctuation character is reached
312             xub_StrLen nLangIndex = nCutPos;
313 
314             // If a field has been expanded right in front of us we do not
315             // step further than the beginning of the expanded field
316             // (which is the position of the field placeholder in our
317             // original string).
318             const xub_StrLen nDoNotStepOver = CH_TXTATR_BREAKWORD == cFldChr ?
319                                               rInf.GetIdx() - nFieldDiff - 1:
320                                               0;
321 
322             while ( nLangIndex > nDoNotStepOver &&
323                     ! rCC.isLetterNumeric( rInf.GetTxt(), nLangIndex ) )
324                 --nLangIndex;
325 
326             // last "real" character is not inside our current portion
327             // we have to check the script type of the last "real" character
328             if ( nLangIndex < rInf.GetIdx() )
329             {
330                 sal_uInt16 nScript = pBreakIt->GetRealScriptOfText( rInf.GetTxt(),
331                                                                 nLangIndex );
332                 ASSERT( nScript, "Script is not between 1 and 4" );
333 
334                 // compare current script with script from last "real" character
335                 if ( nScript - 1 != rInf.GetFont()->GetActual() )
336                     aLang = rInf.GetTxtFrm()->GetTxtNode()->GetLang(
337                         CH_TXTATR_BREAKWORD == cFldChr ?
338                         nDoNotStepOver :
339                         nLangIndex, 0, nScript );
340             }
341         }
342 
343 		const ForbiddenCharacters aForbidden(
344                 *rInf.GetTxtFrm()->GetNode()->getIDocumentSettingAccess()->getForbiddenCharacters( aLang, true ) );
345 
346         const sal_Bool bAllowHanging = rInf.IsHanging() && ! rInf.IsMulti() &&
347                                       ! rPor.InFldGrp();
348 
349 		LineBreakUserOptions aUserOpt(
350 				aForbidden.beginLine, aForbidden.endLine,
351                 rInf.HasForbiddenChars(), bAllowHanging, sal_False );
352 
353         //! register listener to LinguServiceEvents now in order to get
354         //! notified about relevant changes in the future
355         SwModule *pModule = SW_MOD();
356         if (!pModule->GetLngSvcEvtListener().is())
357             pModule->CreateLngSvcEvtListener();
358 
359         // !!! We must have a local copy of the locale, because inside
360         // getLineBreak the LinguEventListener can trigger a new formatting,
361         // which can corrupt the locale pointer inside pBreakIt.
362         const lang::Locale aLocale = pBreakIt->GetLocale( aLang );
363 
364         // determines first possible line break from nRightPos to
365         // start index of current line
366         LineBreakResults aResult = pBreakIt->GetBreakIter()->getLineBreak(
367             rInf.GetTxt(), nCutPos, aLocale,
368             rInf.GetLineStart(), aHyphOpt, aUserOpt );
369 
370         nBreakPos = (xub_StrLen)aResult.breakIndex;
371 
372         // if we are formatting multi portions we want to allow line breaks
373         // at the border between single line and multi line portion
374         // we have to be carefull with footnote portions, they always come in
375         // with an index 0
376         if ( nBreakPos < rInf.GetLineStart() && rInf.IsFirstMulti() &&
377              ! rInf.IsFtnInside() )
378             nBreakPos = rInf.GetLineStart();
379 
380         nBreakStart = nBreakPos;
381 
382         bHyph = BreakType::HYPHENATION == aResult.breakType;
383 
384 		if ( bHyph && nBreakPos != STRING_LEN)
385 		{
386 			// found hyphenation position within line
387 			// nBreakPos is set to the hyphenation position
388 			xHyphWord = aResult.rHyphenatedWord;
389             nBreakPos += xHyphWord->getHyphenationPos() + 1;
390 
391 #if OSL_DEBUG_LEVEL > 1
392             // e.g., Schif-fahrt, referes to our string
393             const String aWord = xHyphWord->getWord();
394             // e.g., Schiff-fahrt, referes to the word after hyphenation
395             const String aHyphenatedWord = xHyphWord->getHyphenatedWord();
396             // e.g., Schif-fahrt: 5, referes to our string
397             const sal_uInt16 nHyphenationPos = xHyphWord->getHyphenationPos();
398             (void)nHyphenationPos;
399             // e.g., Schiff-fahrt: 6, referes to the word after hyphenation
400             const sal_uInt16 nHyphenPos = xHyphWord->getHyphenPos();
401             (void)nHyphenPos;
402 #endif
403 
404             // if not in interactive mode, we have to break behind a soft hyphen
405             if ( ! rInf.IsInterHyph() && rInf.GetIdx() )
406             {
407                 const long nSoftHyphPos =
408                         xHyphWord->getWord().indexOf( CHAR_SOFTHYPHEN );
409 
410                 if ( nSoftHyphPos >= 0 &&
411                      nBreakStart + nSoftHyphPos <= nBreakPos &&
412                      nBreakPos > rInf.GetLineStart() )
413                     nBreakPos = rInf.GetIdx() - 1;
414             }
415 
416             if( nBreakPos >= rInf.GetIdx() )
417 			{
418 				nPorLen = nBreakPos - rInf.GetIdx();
419 				if( '-' == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
420 					xHyphWord = NULL;
421 			}
422 		}
423         else if ( !bHyph && nBreakPos >= rInf.GetLineStart() )
424 		{
425             ASSERT( nBreakPos != STRING_LEN, "we should have found a break pos" );
426 
427 			// found break position within line
428 			xHyphWord = NULL;
429 
430             // check, if break position is soft hyphen and an underflow
431             // has to be triggered
432             if( nBreakPos > rInf.GetLineStart() && rInf.GetIdx() &&
433                 CHAR_SOFTHYPHEN == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
434 				nBreakPos = rInf.GetIdx() - 1;
435 
436             // Delete any blanks at the end of a line, but be careful:
437             // If a field has been expanded, we do not want to delete any
438             // blanks inside the field portion. This would cause an unwanted
439             // underflow
440             xub_StrLen nX = nBreakPos;
441             while( nX > rInf.GetLineStart() &&
442                    ( CH_TXTATR_BREAKWORD != cFldChr || nX > rInf.GetIdx() ) &&
443                    ( CH_BLANK == rInf.GetChar( --nX ) ||
444                      CH_FULL_BLANK == rInf.GetChar( nX ) ) )
445                 nBreakPos = nX;
446             if( nBreakPos > rInf.GetIdx() )
447 				nPorLen = nBreakPos - rInf.GetIdx();
448 		}
449         else
450         {
451 			// no line break found, setting nBreakPos to STRING_LEN
452 			// causes a break cut
453 			nBreakPos = STRING_LEN;
454 			ASSERT( nCutPos >= rInf.GetIdx(), "Deep cut" );
455 			nPorLen = nCutPos - rInf.GetIdx();
456 		}
457 
458 		if( nBreakPos > nCutPos && nBreakPos != STRING_LEN )
459 		{
460             const xub_StrLen nHangingLen = nBreakPos - nCutPos;
461             SwPosSize aTmpSize = rInf.GetTxtSize( &rSI, nCutPos,
462                                                   nHangingLen, 0 );
463 			ASSERT( !pHanging, "A hanging portion is hanging around" );
464 			pHanging = new SwHangingPortion( aTmpSize );
465             pHanging->SetLen( nHangingLen );
466             nPorLen = nCutPos - rInf.GetIdx();
467         }
468 
469         // If we expanded a field, we must repair the original string.
470         // In case we do not trigger an underflow, we correct the nBreakPos
471         // value, but we cannot correct the nBreakStart value:
472         // If we have found a hyphenation position, nBreakStart can lie before
473         // the field.
474         if ( CH_TXTATR_BREAKWORD == cFldChr )
475         {
476             if ( nBreakPos < rInf.GetIdx() )
477                 nBreakPos = nOldIdx - 1;
478             else if ( STRING_LEN != nBreakPos )
479             {
480                 ASSERT( nBreakPos >= nFieldDiff, "I've got field trouble!" );
481                 nBreakPos = nBreakPos - nFieldDiff;
482             }
483 
484             ASSERT( nCutPos >= rInf.GetIdx() && nCutPos >= nFieldDiff,
485                     "I've got field trouble, part2!" );
486             nCutPos = nCutPos - nFieldDiff;
487 
488             XubString& rOldTxt = (XubString&)rInf.GetTxt();
489             rOldTxt.Erase( nOldIdx - 1, nFieldDiff + 1 );
490             rOldTxt.Insert( cFldChr, nOldIdx - 1 );
491             rInf.SetIdx( nOldIdx );
492 
493 #if OSL_DEBUG_LEVEL > 1
494             ASSERT( aDebugString == rInf.GetTxt(),
495                     "Somebody, somebody, somebody put something in my string" );
496 #endif
497         }
498     }
499 
500 	if( nPorLen )
501     {
502         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nPorLen,
503                          nMaxComp, nMinSize, nMaxSizeDiff );
504 
505         // save maximum width for later use
506         if ( nMaxSizeDiff )
507             rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
508 
509         nBreakWidth = nItalic + nMinSize;
510     }
511 	else
512 		nBreakWidth = 0;
513 
514     if( pHanging )
515         nBreakPos = nCutPos;
516 
517     return sal_False;
518 }
519 
520 /*************************************************************************
521  *						SwTxtGuess::AlternativeSpelling
522  *************************************************************************/
523 
524 // returns true if word at position nPos has a diffenrent spelling
525 // if hyphenated at this position (old german spelling)
526 
527 sal_Bool SwTxtGuess::AlternativeSpelling( const SwTxtFormatInfo &rInf,
528 	const xub_StrLen nPos )
529 {
530 	// get word boundaries
531 	xub_StrLen nWordLen;
532 
533 	Boundary aBound =
534 		pBreakIt->GetBreakIter()->getWordBoundary( rInf.GetTxt(), nPos,
535 		pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
536 		WordType::DICTIONARY_WORD, sal_True );
537 	nBreakStart = (xub_StrLen)aBound.startPos;
538     nWordLen = static_cast<xub_StrLen>(aBound.endPos - nBreakStart);
539 
540     // if everything else fails, we want to cut at nPos
541     nCutPos = nPos;
542 
543 	XubString aTxt( rInf.GetTxt().Copy( nBreakStart, nWordLen ) );
544 
545 	// check, if word has alternative spelling
546 	Reference< XHyphenator >  xHyph( ::GetHyphenator() );
547 	ASSERT( xHyph.is(), "Hyphenator is missing");
548 	//! subtract 1 since the UNO-interface is 0 based
549 	xHyphWord =	xHyph->queryAlternativeSpelling( OUString(aTxt),
550 						pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
551 						nPos - nBreakStart, rInf.GetHyphValues() );
552 	return xHyphWord.is() && xHyphWord->isAlternativeSpelling();
553 }
554 
555