1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sw.hxx"
26 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */
27
28
29 #include <tools/stream.hxx>
30 #include <hintids.hxx>
31 #include <rtl/tencinfo.h>
32 #include <sfx2/printer.hxx>
33 #include <editeng/fontitem.hxx>
34 #include <editeng/langitem.hxx>
35 #include <editeng/brkitem.hxx>
36 #include <editeng/scripttypeitem.hxx>
37 #include <shellio.hxx>
38 #include <doc.hxx>
39 #include <swtypes.hxx>
40 #include <ndtxt.hxx>
41 #include <pam.hxx>
42 #include <frmatr.hxx>
43 #include <fltini.hxx>
44 #include <pagedesc.hxx>
45 #include <breakit.hxx>
46 #include <swerror.h>
47 #ifndef _STATSTR_HRC
48 #include <statstr.hrc> // ResId fuer Statusleiste
49 #endif
50 #include <mdiexp.hxx> // ...Percent()
51 #include <poolfmt.hxx>
52
53 #include "vcl/metric.hxx"
54
55 #define ASC_BUFFLEN 4096
56
57 class SwASCIIParser
58 {
59 SwDoc* pDoc;
60 SwPaM* pPam;
61 SvStream& rInput;
62 sal_Char* pArr;
63 const SwAsciiOptions& rOpt;
64 SfxItemSet* pItemSet;
65 long nFileSize;
66 sal_uInt16 nScript;
67 bool bNewDoc;
68
69 sal_uLong ReadChars();
70 void InsertText( const String& rStr );
71
72 public:
73 SwASCIIParser( SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
74 int bReadNewDoc, const SwAsciiOptions& rOpts );
75 ~SwASCIIParser();
76
77 sal_uLong CallParser();
78 };
79
80
81 // Aufruf fuer die allg. Reader-Schnittstelle
Read(SwDoc & rDoc,const String &,SwPaM & rPam,const String &)82 sal_uLong AsciiReader::Read( SwDoc &rDoc, const String&, SwPaM &rPam, const String & )
83 {
84 if( !pStrm )
85 {
86 ASSERT( !this, "ASCII-Read ohne Stream" );
87 return ERR_SWG_READ_ERROR;
88 }
89
90 //JP 18.01.96: Alle Ueberschriften sind normalerweise ohne
91 // Kapitelnummer. Darum hier explizit abschalten
92 // weil das Default jetzt wieder auf AN ist.
93 if( !bInsertMode )
94 Reader::SetNoOutlineNum( rDoc );
95
96 SwASCIIParser* pParser = new SwASCIIParser( &rDoc, rPam, *pStrm,
97 !bInsertMode, aOpt.GetASCIIOpts() );
98 sal_uLong nRet = pParser->CallParser();
99
100 delete pParser;
101 // after Read reset the options
102 aOpt.ResetASCIIOpts();
103 return nRet;
104 }
105
SwASCIIParser(SwDoc * pD,const SwPaM & rCrsr,SvStream & rIn,int bReadNewDoc,const SwAsciiOptions & rOpts)106 SwASCIIParser::SwASCIIParser(SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
107 int bReadNewDoc, const SwAsciiOptions& rOpts)
108 : pDoc(pD), rInput(rIn), rOpt(rOpts), nScript(0), bNewDoc(bReadNewDoc)
109 {
110 pPam = new SwPaM( *rCrsr.GetPoint() );
111 pArr = new sal_Char [ ASC_BUFFLEN + 2 ];
112
113 pItemSet = new SfxItemSet( pDoc->GetAttrPool(),
114 RES_CHRATR_FONT, RES_CHRATR_LANGUAGE,
115 RES_CHRATR_CJK_FONT, RES_CHRATR_CJK_LANGUAGE,
116 RES_CHRATR_CTL_FONT, RES_CHRATR_CTL_LANGUAGE,
117 0 );
118
119 // set defaults from the options
120 if( rOpt.GetLanguage() )
121 {
122 SvxLanguageItem aLang( (LanguageType)rOpt.GetLanguage(),
123 RES_CHRATR_LANGUAGE );
124 pItemSet->Put( aLang );
125 pItemSet->Put( aLang, RES_CHRATR_CJK_LANGUAGE );
126 pItemSet->Put( aLang, RES_CHRATR_CTL_LANGUAGE );
127 }
128 if( rOpt.GetFontName().Len() )
129 {
130 Font aTextFont( rOpt.GetFontName(), Size( 0, 10 ) );
131 if( pDoc->getPrinter( false ) )
132 aTextFont = pDoc->getPrinter( false )->GetFontMetric( aTextFont );
133 SvxFontItem aFont( aTextFont.GetFamily(), aTextFont.GetName(),
134 aEmptyStr, aTextFont.GetPitch(), aTextFont.GetCharSet(), RES_CHRATR_FONT );
135 pItemSet->Put( aFont );
136 pItemSet->Put( aFont, RES_CHRATR_CJK_FONT );
137 pItemSet->Put( aFont, RES_CHRATR_CTL_FONT );
138 }
139 }
140
~SwASCIIParser()141 SwASCIIParser::~SwASCIIParser()
142 {
143 delete pPam;
144 delete [] pArr;
145 delete pItemSet;
146 }
147
148
149 // Aufruf des Parsers
CallParser()150 sal_uLong SwASCIIParser::CallParser()
151 {
152 rInput.Seek(STREAM_SEEK_TO_END);
153 rInput.ResetError();
154
155 nFileSize = rInput.Tell();
156 rInput.Seek(STREAM_SEEK_TO_BEGIN);
157 rInput.ResetError();
158
159 ::StartProgress( STR_STATSTR_W4WREAD, 0, nFileSize, pDoc->GetDocShell() );
160
161 SwPaM* pInsPam = 0;
162 xub_StrLen nSttCntnt = 0;
163 if (!bNewDoc)
164 {
165 const SwNodeIndex& rTmp = pPam->GetPoint()->nNode;
166 pInsPam = new SwPaM( rTmp, rTmp, 0, -1 );
167 nSttCntnt = pPam->GetPoint()->nContent.GetIndex();
168 }
169
170 SwTxtFmtColl *pColl = 0;
171
172 if (bNewDoc)
173 {
174 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE, false);
175 if (!pColl)
176 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_STANDARD,false);
177 if (pColl)
178 pDoc->SetTxtFmtColl(*pPam, pColl);
179 }
180
181 sal_uLong nError = ReadChars();
182
183 if( pItemSet )
184 {
185 // set only the attribute, for scanned scripts.
186 if( !( SCRIPTTYPE_LATIN & nScript ))
187 {
188 pItemSet->ClearItem( RES_CHRATR_FONT );
189 pItemSet->ClearItem( RES_CHRATR_LANGUAGE );
190 }
191 if( !( SCRIPTTYPE_ASIAN & nScript ))
192 {
193 pItemSet->ClearItem( RES_CHRATR_CJK_FONT );
194 pItemSet->ClearItem( RES_CHRATR_CJK_LANGUAGE );
195 }
196 if( !( SCRIPTTYPE_COMPLEX & nScript ))
197 {
198 pItemSet->ClearItem( RES_CHRATR_CTL_FONT );
199 pItemSet->ClearItem( RES_CHRATR_CTL_LANGUAGE );
200 }
201 if( pItemSet->Count() )
202 {
203 if( bNewDoc )
204 {
205 if (pColl)
206 {
207 // Using the pool defaults for the font causes significant
208 // trouble for the HTML filter, because it is not able
209 // to export the pool defaults (or to be more precice:
210 // the HTML filter is not able to detect whether a pool
211 // default has changed or not. Even a comparison with the
212 // HTMLi template does not work, because the defaults are
213 // not copied when a new doc is created. The result of
214 // comparing pool defaults therfor would be that the
215 // defaults are exported always if the have changed for
216 // text documents in general. That's not sensible, as well
217 // as it is not sensible to export them always.
218 sal_uInt16 aWhichIds[4] =
219 {
220 RES_CHRATR_FONT, RES_CHRATR_CJK_FONT,
221 RES_CHRATR_CTL_FONT, 0
222 };
223 sal_uInt16 *pWhichIds = aWhichIds;
224 while (*pWhichIds)
225 {
226 const SfxPoolItem *pItem;
227 if (SFX_ITEM_SET == pItemSet->GetItemState(*pWhichIds,
228 false, &pItem))
229 {
230 pColl->SetFmtAttr( *pItem );
231 pItemSet->ClearItem( *pWhichIds );
232 }
233 ++pWhichIds;
234 }
235 }
236 if (pItemSet->Count())
237 pDoc->SetDefault(*pItemSet);
238 }
239 else if( pInsPam )
240 {
241 // then set over the insert range the defined attributes
242 *pInsPam->GetMark() = *pPam->GetPoint();
243 pInsPam->GetPoint()->nNode++;
244 pInsPam->GetPoint()->nContent.Assign(
245 pInsPam->GetCntntNode(), nSttCntnt );
246
247 // !!!!!
248 ASSERT( !this, "Have to change - hard attr. to para. style" );
249 pDoc->InsertItemSet( *pInsPam, *pItemSet, 0 );
250 }
251 }
252 delete pItemSet, pItemSet = 0;
253 }
254
255 if( pInsPam )
256 delete pInsPam;
257
258 ::EndProgress( pDoc->GetDocShell() );
259 return nError;
260 }
261
ReadChars()262 sal_uLong SwASCIIParser::ReadChars()
263 {
264 sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0;
265 long nReadCnt = 0, nLineLen = 0;
266 sal_Unicode cLastCR = 0;
267 bool bSwapUnicode = false;
268
269 const SwAsciiOptions *pUseMe=&rOpt;
270 SwAsciiOptions aEmpty;
271 if (nFileSize >= 2 &&
272 aEmpty.GetFontName() == rOpt.GetFontName() &&
273 aEmpty.GetCharSet() == rOpt.GetCharSet() &&
274 aEmpty.GetLanguage() == rOpt.GetLanguage() &&
275 aEmpty.GetParaFlags() == rOpt.GetParaFlags())
276 {
277 sal_uLong nLen, nOrig;
278 nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN);
279 CharSet eCharSet;
280 bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet, &bSwapUnicode);
281 ASSERT(bRet, "Autodetect of text import without nag dialog must "
282 "have failed");
283 if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
284 {
285 aEmpty.SetCharSet(eCharSet);
286 rInput.SeekRel(-(long(nLen)));
287 }
288 else
289 rInput.SeekRel(-(long(nOrig)));
290 pUseMe=&aEmpty;
291 }
292
293 rtl_TextToUnicodeConverter hConverter=0;
294 rtl_TextToUnicodeContext hContext=0;
295 CharSet currentCharSet = pUseMe->GetCharSet();
296 if (RTL_TEXTENCODING_UCS2 != currentCharSet)
297 {
298 if( currentCharSet == RTL_TEXTENCODING_DONTKNOW )
299 currentCharSet = RTL_TEXTENCODING_ASCII_US;
300 hConverter = rtl_createTextToUnicodeConverter( currentCharSet );
301 ASSERT( hConverter, "no string convert avaiable" );
302 if (!hConverter)
303 return ERROR_SW_READ_BASE;
304 bSwapUnicode = false;
305 hContext = rtl_createTextToUnicodeContext( hConverter );
306 }
307 else if (pUseMe != &aEmpty) //Already successfully figured out type
308 {
309 rInput.StartReadingUnicodeText( currentCharSet );
310 bSwapUnicode = rInput.IsEndianSwap();
311 }
312
313 String sWork;
314 sal_uLong nArrOffset = 0;
315
316 do {
317 if( pStt >= pEnd )
318 {
319 if( pLastStt != pStt )
320 InsertText( String( pLastStt ));
321
322 // lese einen neuen Block ein
323 sal_uLong lGCount;
324 if( SVSTREAM_OK != rInput.GetError() || 0 == (lGCount =
325 rInput.Read( pArr + nArrOffset,
326 ASC_BUFFLEN - nArrOffset )))
327 break; // aus der WHILE-Schleife heraus
328
329 /*
330 #98380#
331 If there was some unconverted bytes on the last cycle then they
332 were put at the beginning of the array, so total bytes available
333 to convert this cycle includes them. If we found 0 following bytes
334 then we ignore the previous partial character.
335 */
336 lGCount+=nArrOffset;
337
338 if( hConverter )
339 {
340 sal_uInt32 nInfo;
341 sal_Size nNewLen = lGCount, nCntBytes;
342 sal_Unicode* pBuf = sWork.AllocBuffer( static_cast< xub_StrLen >(nNewLen) );
343
344 nNewLen = rtl_convertTextToUnicode( hConverter, hContext,
345 pArr, lGCount, pBuf, nNewLen,
346 (
347 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
348 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
349 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
350 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
351 ),
352 &nInfo,
353 &nCntBytes );
354 if( 0 != ( nArrOffset = lGCount - nCntBytes ) )
355 memmove( pArr, pArr + nCntBytes, nArrOffset );
356 sWork.ReleaseBufferAccess( static_cast< xub_StrLen >(nNewLen) );
357
358 pStt = pLastStt = sWork.GetBufferAccess();
359 pEnd = pStt + nNewLen;
360 }
361 else
362 {
363 pStt = pLastStt = (sal_Unicode*)pArr;
364 pEnd = (sal_Unicode*)(pArr + lGCount);
365
366 if( bSwapUnicode )
367 {
368 sal_Char* pF = pArr, *pN = pArr + 1;
369 for( sal_uLong n = 0; n < lGCount; n += 2, pF += 2, pN += 2 )
370 {
371 sal_Char c = *pF;
372 *pF = *pN;
373 *pN = c;
374 }
375 }
376 }
377
378 *pEnd = 0;
379 nReadCnt += lGCount;
380
381 ::SetProgressState( nReadCnt, pDoc->GetDocShell() );
382
383 if( cLastCR )
384 {
385 if( 0x0a == *pStt && 0x0d == cLastCR )
386 pLastStt = ++pStt;
387 cLastCR = 0;
388 nLineLen = 0;
389 // JP 03.04.96: das letze am Ende nehmen wir nicht
390 if( !rInput.IsEof() || !(pEnd == pStt ||
391 ( !*pEnd && pEnd == pStt+1 ) ) )
392 pDoc->SplitNode( *pPam->GetPoint(), false );
393 }
394 }
395
396 bool bIns = true, bSplitNode = false;
397 switch( *pStt )
398 {
399 //JP 12.11.2001: task 94636 - don't ignore all behind the zero character,
400 // change it to the default "control character"
401 // case 0:
402 // pEnd = pStt;
403 // bIns = false ;
404 // break;
405
406 case 0x0a: if( LINEEND_LF == pUseMe->GetParaFlags() )
407 {
408 bIns = false;
409 *pStt = 0;
410 ++pStt;
411
412 // JP 03.04.96: das letze am Ende nehmen wir nicht
413 if( !rInput.IsEof() || pEnd != pStt )
414 bSplitNode = true;
415 }
416 break;
417
418 case 0x0d: if( LINEEND_LF != pUseMe->GetParaFlags() )
419 {
420 bIns = false;
421 *pStt = 0;
422 ++pStt;
423
424 bool bChkSplit = false;
425 if( LINEEND_CRLF == pUseMe->GetParaFlags() )
426 {
427 if( pStt == pEnd )
428 cLastCR = 0x0d;
429 else if( 0x0a == *pStt )
430 {
431 ++pStt;
432 bChkSplit = true;
433 }
434 }
435 else
436 bChkSplit = true;
437
438 // JP 03.04.96: das letze am Ende nehmen wir nicht
439 if( bChkSplit && ( !rInput.IsEof() || pEnd != pStt ))
440 bSplitNode = true;
441 }
442 break;
443
444 case 0x0c:
445 {
446 // dann mal einen harten Seitenumbruch einfuegen
447 *pStt++ = 0;
448 if( nLineLen )
449 {
450 // Change to charset system!!!!
451 //rOpt.GetCharSet();
452 InsertText( String( pLastStt ));
453 }
454 pDoc->SplitNode( *pPam->GetPoint(), false );
455 pDoc->InsertPoolItem(
456 *pPam, SvxFmtBreakItem( SVX_BREAK_PAGE_BEFORE, RES_BREAK ), 0);
457 pLastStt = pStt;
458 nLineLen = 0;
459 bIns = false;
460 }
461 break;
462
463 case 0x1a:
464 if( nReadCnt == nFileSize && pStt+1 == pEnd )
465 *pStt = 0;
466 else
467 *pStt = '#'; // Ersatzdarstellung
468 break;
469
470 case '\t': break;
471
472 default:
473 if( ' ' > *pStt )
474 // Ctrl-Zchn gefunden ersetze durch '#'
475 *pStt = '#';
476 break;
477 }
478
479 if( bIns )
480 {
481 if( ( nLineLen >= MAX_ASCII_PARA - 100 ) &&
482 ( ( *pStt == ' ' ) || ( nLineLen >= MAX_ASCII_PARA - 1 ) ) )
483 {
484 sal_Unicode c = *pStt;
485 *pStt = 0;
486 InsertText( String( pLastStt ));
487 pDoc->SplitNode( *pPam->GetPoint(), false );
488 pLastStt = pStt;
489 nLineLen = 0;
490 *pStt = c;
491 }
492 ++pStt;
493 ++nLineLen;
494 }
495 else if( bSplitNode )
496 {
497 // es wurde ein CR/LF erkannt, also speichere den Text
498
499 InsertText( String( pLastStt ));
500 pDoc->SplitNode( *pPam->GetPoint(), false );
501 pLastStt = pStt;
502 nLineLen = 0;
503 }
504 } while(true);
505
506 if( hConverter )
507 {
508 rtl_destroyTextToUnicodeContext( hConverter, hContext );
509 rtl_destroyTextToUnicodeConverter( hConverter );
510 }
511 return 0;
512 }
513
InsertText(const String & rStr)514 void SwASCIIParser::InsertText( const String& rStr )
515 {
516 pDoc->InsertString( *pPam, rStr );
517 if( pItemSet && pBreakIt && nScript != ( SCRIPTTYPE_LATIN |
518 SCRIPTTYPE_ASIAN |
519 SCRIPTTYPE_COMPLEX ) )
520 nScript |= pBreakIt->GetAllScriptsOfText( rStr );
521 }
522
523 /* vi:set tabstop=4 shiftwidth=4 expandtab: */
524