1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // TODO: Woher? 29 #define Max( a, b ) (((a)>(b)) ? (a) : (b) ) 30 #define Min( a, b ) (((a)<(b)) ? (a) : (b) ) 31 32 /* 33 * 34 * Text2UnicodeConverter 35 * 36 **/ 37 namespace sax_expatwrap { 38 39 class Text2UnicodeConverter 40 { 41 42 public: 43 Text2UnicodeConverter( const ::rtl::OString & sEncoding ); 44 ~Text2UnicodeConverter(); 45 46 ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & ); 47 sal_Bool canContinue() { return m_bCanContinue; } 48 49 private: 50 void init( rtl_TextEncoding encoding ); 51 52 rtl_TextToUnicodeConverter m_convText2Unicode; 53 rtl_TextToUnicodeContext m_contextText2Unicode; 54 sal_Bool m_bCanContinue; 55 sal_Bool m_bInitialized; 56 rtl_TextEncoding m_rtlEncoding; 57 ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource; 58 }; 59 60 /*---------------------------------------- 61 * 62 * Unicode2TextConverter 63 * 64 **-----------------------------------------*/ 65 class Unicode2TextConverter 66 { 67 public: 68 Unicode2TextConverter( rtl_TextEncoding encoding ); 69 ~Unicode2TextConverter(); 70 71 inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const ::rtl::OUString &s ) 72 { 73 return convert( s.getStr() , s.getLength() ); 74 } 75 ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength ); 76 sal_Bool canContinue() { return m_bCanContinue; } 77 78 private: 79 void init( rtl_TextEncoding encoding ); 80 81 rtl_UnicodeToTextConverter m_convUnicode2Text; 82 rtl_UnicodeToTextContext m_contextUnicode2Text; 83 sal_Bool m_bCanContinue; 84 sal_Bool m_bInitialized; 85 rtl_TextEncoding m_rtlEncoding; 86 ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource; 87 }; 88 89 90 91 /*---------------------------------------- 92 * 93 * XMLFile2UTFConverter 94 * 95 **-----------------------------------------*/ 96 class XMLFile2UTFConverter 97 { 98 public: 99 XMLFile2UTFConverter( ): 100 m_bStarted( sal_False ), 101 m_pText2Unicode( 0 ), 102 m_pUnicode2Text( 0 ) 103 {} 104 105 ~XMLFile2UTFConverter(); 106 107 void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; } 108 void setEncoding( const ::rtl::OString &s ) { m_sEncoding = s; } 109 110 111 112 // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There 113 // may be returned less or more bytes than ordered. 114 sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ) 115 throw ( ::com::sun::star::io::IOException, 116 ::com::sun::star::io::NotConnectedException , 117 ::com::sun::star::io::BufferSizeExceededException , 118 ::com::sun::star::uno::RuntimeException ); 119 120 private: 121 122 // Called only on first Sequence of bytes. Tries to figure out file format and encoding information. 123 // @return TRUE, when encoding information could be retrieved 124 // @return FALSE, when no encoding information was found in file 125 sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq ); 126 127 // Called only on first Sequence of bytes. Tries to figure out 128 // if enough data is available to scan encoding 129 // @return TRUE, when encoding is retrievable 130 // @return FALSE, when more data is needed 131 sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq ); 132 133 // When encoding attribute is within the text (in the first line), it is removed. 134 void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq ); 135 136 // Initializes decoding depending on m_sEncoding setting 137 void initializeDecoding(); 138 private: 139 ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in; 140 141 sal_Bool m_bStarted; 142 ::rtl::OString m_sEncoding; 143 144 Text2UnicodeConverter *m_pText2Unicode; 145 Unicode2TextConverter *m_pUnicode2Text; 146 }; 147 } 148