xref: /trunk/main/sw/source/core/text/guess.cxx (revision ee093aae)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sw.hxx"
26 
27 #include <ctype.h>
28 #include <editeng/unolingu.hxx>
29 #include <tools/shl.hxx>    // needed for SW_MOD() macro
30 #include <errhdl.hxx>   // ASSERTs
31 #include <dlelstnr.hxx>
32 #include <swmodule.hxx>
33 #include <IDocumentSettingAccess.hxx>
34 #include <txtcfg.hxx>
35 #include <guess.hxx>
36 #include <inftxt.hxx>
37 #include <pagefrm.hxx>
38 #include <pagedesc.hxx> // SwPageDesc
39 #include <tgrditem.hxx>
40 #include <com/sun/star/i18n/BreakType.hpp>
41 #include <com/sun/star/i18n/WordType.hpp>
42 #include <unotools/charclass.hxx>
43 #include <porfld.hxx>
44 #include <paratr.hxx>
45 
46 using ::rtl::OUString;
47 using namespace ::com::sun::star;
48 using namespace ::com::sun::star::uno;
49 using namespace ::com::sun::star::i18n;
50 using namespace ::com::sun::star::beans;
51 using namespace ::com::sun::star::linguistic2;
52 
53 #define CH_FULL_BLANK 0x3000
54 
55 /*************************************************************************
56  *						SwTxtGuess::Guess
57  *
58  * provides information for line break calculation
59  * returns true if no line break has to be performed
60  * otherwise possible break or hyphenation position is determined
61  *************************************************************************/
62 
63 sal_Bool SwTxtGuess::Guess( const SwTxtPortion& rPor, SwTxtFormatInfo &rInf,
64                             const KSHORT nPorHeight )
65 {
66 	nCutPos = rInf.GetIdx();
67 
68     // Empty strings are always 0
69 	if( !rInf.GetLen() || !rInf.GetTxt().Len() )
70 		return sal_False;
71 
72     ASSERT( rInf.GetIdx() < rInf.GetTxt().Len(),
73 			"+SwTxtGuess::Guess: invalid SwTxtFormatInfo" );
74 
75     ASSERT( nPorHeight, "+SwTxtGuess::Guess: no height" );
76 
77     sal_uInt16 nMinSize;
78     sal_uInt16 nMaxSizeDiff;
79 
80     const SwScriptInfo& rSI =
81             ((SwParaPortion*)rInf.GetParaPortion())->GetScriptInfo();
82 
83     sal_uInt16 nMaxComp = ( SW_CJK == rInf.GetFont()->GetActual() ) &&
84                         rSI.CountCompChg() &&
85                         ! rInf.IsMulti() &&
86                         ! rPor.InFldGrp() &&
87                         ! rPor.IsDropPortion() ?
88                         10000 :
89                             0 ;
90 
91     SwTwips nLineWidth = rInf.Width() - rInf.X();
92 	xub_StrLen nMaxLen = rInf.GetTxt().Len() - rInf.GetIdx();
93 
94     if ( rInf.GetLen() < nMaxLen )
95         nMaxLen = rInf.GetLen();
96 
97     if( !nMaxLen )
98 		return sal_False;
99 
100 	KSHORT nItalic = 0;
101     if( ITALIC_NONE != rInf.GetFont()->GetItalic() && !rInf.NotEOL() )
102     {
103         sal_Bool bAddItalic = sal_True;
104 
105         // do not add extra italic value if we have an active character grid
106         if ( rInf.SnapToGrid() )
107         {
108             GETGRID( rInf.GetTxtFrm()->FindPageFrm() )
109             bAddItalic = !pGrid || GRID_LINES_CHARS != pGrid->GetGridType();
110         }
111 
112         // do not add extra italic value for an isolated blank:
113         if ( 1 == rInf.GetLen() &&
114              CH_BLANK == rInf.GetTxt().GetChar( rInf.GetIdx() ) )
115             bAddItalic = sal_False;
116 
117         nItalic = bAddItalic ? nPorHeight / 12 : 0;
118 
119         nLineWidth -= nItalic;
120 
121         // --> FME 2005-05-13 #i46524# LineBreak bug with italics
122         if ( nLineWidth < 0 ) nLineWidth = 0;
123         // <--
124 	}
125 
126 	// first check if everything fits to line
127     if ( long ( nLineWidth ) * 2 > long ( nMaxLen ) * nPorHeight )
128 	{
129         // call GetTxtSize with maximum compression (for kanas)
130         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen,
131                          nMaxComp, nMinSize, nMaxSizeDiff );
132 
133         nBreakWidth = nMinSize;
134 
135 		if ( nBreakWidth <= nLineWidth )
136 		{
137 			// portion fits to line
138 			nCutPos = rInf.GetIdx() + nMaxLen;
139 			if( nItalic &&
140                 ( nCutPos >= rInf.GetTxt().Len() ||
141                   // --> FME 2005-05-13 #i48035# Needed for CalcFitToContent
142                   // if first line ends with a manual line break
143                   rInf.GetTxt().GetChar( nCutPos ) == CH_BREAK ) )
144                   // <--
145 				nBreakWidth = nBreakWidth + nItalic;
146 
147             // save maximum width for later use
148             if ( nMaxSizeDiff )
149                 rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
150 
151             return sal_True;
152 		}
153 	}
154 
155 	sal_Bool bHyph = rInf.IsHyphenate() && !rInf.IsHyphForbud();
156 	xub_StrLen nHyphPos = 0;
157 
158 	// nCutPos is the first character not fitting to the current line
159 	// nHyphPos is the first character not fitting to the current line,
160 	// considering an additional "-" for hyphenation
161 	if( bHyph )
162 	{
163         nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp, nHyphPos );
164 
165         if ( !nHyphPos && rInf.GetIdx() )
166 			nHyphPos = rInf.GetIdx() - 1;
167 	}
168 	else
169     {
170         nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp );
171 
172 #ifdef DBG_UTIL
173         if ( STRING_LEN != nCutPos )
174         {
175             rInf.GetTxtSize( &rSI, rInf.GetIdx(), nCutPos - rInf.GetIdx(),
176                              nMaxComp, nMinSize, nMaxSizeDiff );
177             ASSERT( nMinSize <= nLineWidth, "What a Guess!!!" );
178         }
179 #endif
180     }
181 
182 	if( nCutPos > rInf.GetIdx() + nMaxLen )
183 	{
184 		// second check if everything fits to line
185 		nCutPos = nBreakPos = rInf.GetIdx() + nMaxLen - 1;
186         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen, nMaxComp,
187                          nMinSize, nMaxSizeDiff );
188 
189         nBreakWidth = nMinSize;
190 
191         // The following comparison should always give sal_True, otherwise
192         // a pixel rounding error in GetTxtBreak will appear
193 		if ( nBreakWidth <= nLineWidth )
194 		{
195 			if( nItalic && ( nBreakPos + 1 ) >= rInf.GetTxt().Len() )
196 				nBreakWidth = nBreakWidth + nItalic;
197 
198             // save maximum width for later use
199             if ( nMaxSizeDiff )
200                 rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
201 
202             return sal_True;
203 		}
204 	}
205 
206     // we have to trigger an underflow for a footnote portion
207     // which does not fit to the current line
208     if ( rPor.IsFtnPortion() )
209     {
210         nBreakPos = rInf.GetIdx();
211         nCutPos = rInf.GetLen();
212         return sal_False;
213     }
214 
215     xub_StrLen nPorLen = 0;
216     // do not call the break iterator nCutPos is a blank
217     xub_Unicode cCutChar = rInf.GetTxt().GetChar( nCutPos );
218     if( CH_BLANK == cCutChar || CH_FULL_BLANK == cCutChar )
219     {
220 		nBreakPos = nCutPos;
221 		xub_StrLen nX = nBreakPos;
222 
223         const SvxAdjust& rAdjust = rInf.GetTxtFrm()->GetTxtNode()->GetSwAttrSet().GetAdjust().GetAdjust();
224         if ( rAdjust == SVX_ADJUST_LEFT )
225         {
226             // we step back until a non blank character has been found
227             // or there is only one more character left
228             while( nX && nBreakPos > rInf.GetTxt().Len() &&
229                    ( CH_BLANK == ( cCutChar = rInf.GetChar( --nX ) ) ||
230                      CH_FULL_BLANK == cCutChar ) )
231                 --nBreakPos;
232         }
233         else
234         {
235             while( nX && nBreakPos > rInf.GetLineStart() + 1 &&
236                    ( CH_BLANK == ( cCutChar = rInf.GetChar( --nX ) ) ||
237                      CH_FULL_BLANK == cCutChar ) )
238                 --nBreakPos;
239         }
240 
241         if( nBreakPos > rInf.GetIdx() )
242             nPorLen = nBreakPos - rInf.GetIdx();
243         while( ++nCutPos < rInf.GetTxt().Len() &&
244                ( CH_BLANK == ( cCutChar = rInf.GetChar( nCutPos ) ) ||
245                  CH_FULL_BLANK == cCutChar ) )
246 			; // nothing
247 
248 		nBreakStart = nCutPos;
249 	}
250 	else if( pBreakIt->GetBreakIter().is() )
251 	{
252         // New: We should have a look into the last portion, if it was a
253         // field portion. For this, we expand the text of the field portion
254         // into our string. If the line break position is inside of before
255         // the field portion, we trigger an underflow.
256 
257         xub_StrLen nOldIdx = rInf.GetIdx();
258         xub_Unicode cFldChr = 0;
259 
260 #if OSL_DEBUG_LEVEL > 1
261         XubString aDebugString;
262 #endif
263 
264         // be careful: a field portion can be both: 0x01 (common field)
265         // or 0x02 (the follow of a footnode)
266         if ( rInf.GetLast() && rInf.GetLast()->InFldGrp() &&
267              ! rInf.GetLast()->IsFtnPortion() &&
268              rInf.GetIdx() > rInf.GetLineStart() &&
269              CH_TXTATR_BREAKWORD ==
270                 ( cFldChr = rInf.GetTxt().GetChar( rInf.GetIdx() - 1 ) ) )
271         {
272             SwFldPortion* pFld = (SwFldPortion*)rInf.GetLast();
273             XubString aTxt;
274             pFld->GetExpTxt( rInf, aTxt );
275 
276             if ( aTxt.Len() )
277             {
278                 nFieldDiff = aTxt.Len() - 1;
279                 nCutPos = nCutPos + nFieldDiff;
280                 nHyphPos = nHyphPos + nFieldDiff;
281 
282 #if OSL_DEBUG_LEVEL > 1
283                 aDebugString = rInf.GetTxt();
284 #endif
285 
286                 XubString& rOldTxt = (XubString&)rInf.GetTxt();
287                 rOldTxt.Erase( rInf.GetIdx() - 1, 1 );
288                 rOldTxt.Insert( aTxt, rInf.GetIdx() - 1 );
289                 rInf.SetIdx( rInf.GetIdx() + nFieldDiff );
290             }
291             else
292                 cFldChr = 0;
293         }
294 
295         LineBreakHyphenationOptions aHyphOpt;
296 		Reference< XHyphenator >  xHyph;
297 		if( bHyph )
298 		{
299 			xHyph = ::GetHyphenator();
300 			aHyphOpt = LineBreakHyphenationOptions( xHyph,
301 								rInf.GetHyphValues(), nHyphPos );
302 		}
303 
304         // Get Language for break iterator.
305         // We have to switch the current language if we have a script
306         // change at nCutPos. Otherwise LATIN punctuation would never
307         // be allowed to be hanging punctuation.
308         // NEVER call GetLang if the string has been modified!!!
309         LanguageType aLang = rInf.GetFont()->GetLanguage();
310 
311         // If we are inside a field portion, we use a temporar string which
312         // differs from the string at the textnode. Therefore we are not allowed
313         // to call the GetLang function.
314         if ( nCutPos && ! rPor.InFldGrp() )
315         {
316             const CharClass& rCC = GetAppCharClass();
317 
318             // step back until a non-punctuation character is reached
319             xub_StrLen nLangIndex = nCutPos;
320 
321             // If a field has been expanded right in front of us we do not
322             // step further than the beginning of the expanded field
323             // (which is the position of the field placeholder in our
324             // original string).
325             const xub_StrLen nDoNotStepOver = CH_TXTATR_BREAKWORD == cFldChr ?
326                                               rInf.GetIdx() - nFieldDiff - 1:
327                                               0;
328 
329             while ( nLangIndex > nDoNotStepOver &&
330                     ! rCC.isLetterNumeric( rInf.GetTxt(), nLangIndex ) )
331                 --nLangIndex;
332 
333             // last "real" character is not inside our current portion
334             // we have to check the script type of the last "real" character
335             if ( nLangIndex < rInf.GetIdx() )
336             {
337                 sal_uInt16 nScript = pBreakIt->GetRealScriptOfText( rInf.GetTxt(),
338                                                                 nLangIndex );
339                 ASSERT( nScript, "Script is not between 1 and 4" );
340 
341                 // compare current script with script from last "real" character
342                 if ( nScript - 1 != rInf.GetFont()->GetActual() )
343                     aLang = rInf.GetTxtFrm()->GetTxtNode()->GetLang(
344                         CH_TXTATR_BREAKWORD == cFldChr ?
345                         nDoNotStepOver :
346                         nLangIndex, 0, nScript );
347             }
348         }
349 
350 		const ForbiddenCharacters aForbidden(
351                 *rInf.GetTxtFrm()->GetNode()->getIDocumentSettingAccess()->getForbiddenCharacters( aLang, true ) );
352 
353         const sal_Bool bAllowHanging = rInf.IsHanging() && ! rInf.IsMulti() &&
354                                       ! rPor.InFldGrp();
355 
356 		LineBreakUserOptions aUserOpt(
357 				aForbidden.beginLine, aForbidden.endLine,
358                 rInf.HasForbiddenChars(), bAllowHanging, sal_False );
359 
360         //! register listener to LinguServiceEvents now in order to get
361         //! notified about relevant changes in the future
362         SwModule *pModule = SW_MOD();
363         if (!pModule->GetLngSvcEvtListener().is())
364             pModule->CreateLngSvcEvtListener();
365 
366         // !!! We must have a local copy of the locale, because inside
367         // getLineBreak the LinguEventListener can trigger a new formatting,
368         // which can corrupt the locale pointer inside pBreakIt.
369         const lang::Locale aLocale = pBreakIt->GetLocale( aLang );
370 
371         // determines first possible line break from nRightPos to
372         // start index of current line
373         LineBreakResults aResult = pBreakIt->GetBreakIter()->getLineBreak(
374             rInf.GetTxt(), nCutPos, aLocale,
375             rInf.GetLineStart(), aHyphOpt, aUserOpt );
376 
377         nBreakPos = (xub_StrLen)aResult.breakIndex;
378 
379         // if we are formatting multi portions we want to allow line breaks
380         // at the border between single line and multi line portion
381         // we have to be carefull with footnote portions, they always come in
382         // with an index 0
383         if ( nBreakPos < rInf.GetLineStart() && rInf.IsFirstMulti() &&
384              ! rInf.IsFtnInside() )
385             nBreakPos = rInf.GetLineStart();
386 
387         nBreakStart = nBreakPos;
388 
389         bHyph = BreakType::HYPHENATION == aResult.breakType;
390 
391 		if ( bHyph && nBreakPos != STRING_LEN)
392 		{
393 			// found hyphenation position within line
394 			// nBreakPos is set to the hyphenation position
395 			xHyphWord = aResult.rHyphenatedWord;
396             nBreakPos += xHyphWord->getHyphenationPos() + 1;
397 
398 #if OSL_DEBUG_LEVEL > 1
399             // e.g., Schif-fahrt, referes to our string
400             const String aWord = xHyphWord->getWord();
401             // e.g., Schiff-fahrt, referes to the word after hyphenation
402             const String aHyphenatedWord = xHyphWord->getHyphenatedWord();
403             // e.g., Schif-fahrt: 5, referes to our string
404             const sal_uInt16 nHyphenationPos = xHyphWord->getHyphenationPos();
405             (void)nHyphenationPos;
406             // e.g., Schiff-fahrt: 6, referes to the word after hyphenation
407             const sal_uInt16 nHyphenPos = xHyphWord->getHyphenPos();
408             (void)nHyphenPos;
409 #endif
410 
411             // if not in interactive mode, we have to break behind a soft hyphen
412             if ( ! rInf.IsInterHyph() && rInf.GetIdx() )
413             {
414                 const long nSoftHyphPos =
415                         xHyphWord->getWord().indexOf( CHAR_SOFTHYPHEN );
416 
417                 if ( nSoftHyphPos >= 0 &&
418                      nBreakStart + nSoftHyphPos <= nBreakPos &&
419                      nBreakPos > rInf.GetLineStart() )
420                     nBreakPos = rInf.GetIdx() - 1;
421             }
422 
423             if( nBreakPos >= rInf.GetIdx() )
424 			{
425 				nPorLen = nBreakPos - rInf.GetIdx();
426 				if( '-' == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
427 					xHyphWord = NULL;
428 			}
429 		}
430         else if ( !bHyph && nBreakPos >= rInf.GetLineStart() )
431 		{
432             ASSERT( nBreakPos != STRING_LEN, "we should have found a break pos" );
433 
434 			// found break position within line
435 			xHyphWord = NULL;
436 
437             // check, if break position is soft hyphen and an underflow
438             // has to be triggered
439             if( nBreakPos > rInf.GetLineStart() && rInf.GetIdx() &&
440                 CHAR_SOFTHYPHEN == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
441 				nBreakPos = rInf.GetIdx() - 1;
442 
443             const SvxAdjust& rAdjust = rInf.GetTxtFrm()->GetTxtNode()->GetSwAttrSet().GetAdjust().GetAdjust();
444             if( rAdjust != SVX_ADJUST_LEFT )
445             {
446                 // Delete any blanks at the end of a line, but be careful:
447                 // If a field has been expanded, we do not want to delete any
448                 // blanks inside the field portion. This would cause an unwanted
449                 // underflow
450                 xub_StrLen nX = nBreakPos;
451                 while( nX > rInf.GetLineStart() &&
452                        ( CH_TXTATR_BREAKWORD != cFldChr || nX > rInf.GetIdx() ) &&
453                        ( CH_BLANK == rInf.GetChar( --nX ) ||
454                          CH_FULL_BLANK == rInf.GetChar( nX ) ) )
455                     nBreakPos = nX;
456             }
457             if( nBreakPos > rInf.GetIdx() )
458 				nPorLen = nBreakPos - rInf.GetIdx();
459 		}
460         else
461         {
462 			// no line break found, setting nBreakPos to STRING_LEN
463 			// causes a break cut
464 			nBreakPos = STRING_LEN;
465 			ASSERT( nCutPos >= rInf.GetIdx(), "Deep cut" );
466 			nPorLen = nCutPos - rInf.GetIdx();
467 		}
468 
469 		if( nBreakPos > nCutPos && nBreakPos != STRING_LEN )
470 		{
471             const xub_StrLen nHangingLen = nBreakPos - nCutPos;
472             SwPosSize aTmpSize = rInf.GetTxtSize( &rSI, nCutPos,
473                                                   nHangingLen, 0 );
474 			ASSERT( !pHanging, "A hanging portion is hanging around" );
475 			pHanging = new SwHangingPortion( aTmpSize );
476             pHanging->SetLen( nHangingLen );
477             nPorLen = nCutPos - rInf.GetIdx();
478         }
479 
480         // If we expanded a field, we must repair the original string.
481         // In case we do not trigger an underflow, we correct the nBreakPos
482         // value, but we cannot correct the nBreakStart value:
483         // If we have found a hyphenation position, nBreakStart can lie before
484         // the field.
485         if ( CH_TXTATR_BREAKWORD == cFldChr )
486         {
487             if ( nBreakPos < rInf.GetIdx() )
488                 nBreakPos = nOldIdx - 1;
489             else if ( STRING_LEN != nBreakPos )
490             {
491                 ASSERT( nBreakPos >= nFieldDiff, "I've got field trouble!" );
492                 nBreakPos = nBreakPos - nFieldDiff;
493             }
494 
495             ASSERT( nCutPos >= rInf.GetIdx() && nCutPos >= nFieldDiff,
496                     "I've got field trouble, part2!" );
497             nCutPos = nCutPos - nFieldDiff;
498 
499             XubString& rOldTxt = (XubString&)rInf.GetTxt();
500             rOldTxt.Erase( nOldIdx - 1, nFieldDiff + 1 );
501             rOldTxt.Insert( cFldChr, nOldIdx - 1 );
502             rInf.SetIdx( nOldIdx );
503 
504 #if OSL_DEBUG_LEVEL > 1
505             ASSERT( aDebugString == rInf.GetTxt(),
506                     "Somebody, somebody, somebody put something in my string" );
507 #endif
508         }
509     }
510 
511 	if( nPorLen )
512     {
513         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nPorLen,
514                          nMaxComp, nMinSize, nMaxSizeDiff );
515 
516         // save maximum width for later use
517         if ( nMaxSizeDiff )
518             rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
519 
520         nBreakWidth = nItalic + nMinSize;
521     }
522 	else
523 		nBreakWidth = 0;
524 
525     if( pHanging )
526         nBreakPos = nCutPos;
527 
528     return sal_False;
529 }
530 
531 /*************************************************************************
532  *						SwTxtGuess::AlternativeSpelling
533  *************************************************************************/
534 
535 // returns true if word at position nPos has a diffenrent spelling
536 // if hyphenated at this position (old german spelling)
537 
538 sal_Bool SwTxtGuess::AlternativeSpelling( const SwTxtFormatInfo &rInf,
539 	const xub_StrLen nPos )
540 {
541 	// get word boundaries
542 	xub_StrLen nWordLen;
543 
544 	Boundary aBound =
545 		pBreakIt->GetBreakIter()->getWordBoundary( rInf.GetTxt(), nPos,
546 		pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
547 		WordType::DICTIONARY_WORD, sal_True );
548 	nBreakStart = (xub_StrLen)aBound.startPos;
549     nWordLen = static_cast<xub_StrLen>(aBound.endPos - nBreakStart);
550 
551     // if everything else fails, we want to cut at nPos
552     nCutPos = nPos;
553 
554 	XubString aTxt( rInf.GetTxt().Copy( nBreakStart, nWordLen ) );
555 
556 	// check, if word has alternative spelling
557 	Reference< XHyphenator >  xHyph( ::GetHyphenator() );
558 	ASSERT( xHyph.is(), "Hyphenator is missing");
559 	//! subtract 1 since the UNO-interface is 0 based
560 	xHyphWord =	xHyph->queryAlternativeSpelling( OUString(aTxt),
561 						pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
562 						nPos - nBreakStart, rInf.GetHyphValues() );
563 	return xHyphWord.is() && xHyphWord->isAlternativeSpelling();
564 }
565 
566