1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // TODO: Woher? 25 #define Max( a, b ) (((a)>(b)) ? (a) : (b) ) 26 #define Min( a, b ) (((a)<(b)) ? (a) : (b) ) 27 28 /* 29 * 30 * Text2UnicodeConverter 31 * 32 **/ 33 namespace sax_expatwrap { 34 35 class Text2UnicodeConverter 36 { 37 38 public: 39 Text2UnicodeConverter( const ::rtl::OString & sEncoding ); 40 ~Text2UnicodeConverter(); 41 42 ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & ); canContinue()43 sal_Bool canContinue() { return m_bCanContinue; } 44 45 private: 46 void init( rtl_TextEncoding encoding ); 47 48 rtl_TextToUnicodeConverter m_convText2Unicode; 49 rtl_TextToUnicodeContext m_contextText2Unicode; 50 sal_Bool m_bCanContinue; 51 sal_Bool m_bInitialized; 52 rtl_TextEncoding m_rtlEncoding; 53 ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource; 54 }; 55 56 /*---------------------------------------- 57 * 58 * Unicode2TextConverter 59 * 60 **-----------------------------------------*/ 61 class Unicode2TextConverter 62 { 63 public: 64 Unicode2TextConverter( rtl_TextEncoding encoding ); 65 ~Unicode2TextConverter(); 66 convert(const::rtl::OUString & s)67 inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const ::rtl::OUString &s ) 68 { 69 return convert( s.getStr() , s.getLength() ); 70 } 71 ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength ); canContinue()72 sal_Bool canContinue() { return m_bCanContinue; } 73 74 private: 75 void init( rtl_TextEncoding encoding ); 76 77 rtl_UnicodeToTextConverter m_convUnicode2Text; 78 rtl_UnicodeToTextContext m_contextUnicode2Text; 79 sal_Bool m_bCanContinue; 80 sal_Bool m_bInitialized; 81 rtl_TextEncoding m_rtlEncoding; 82 ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource; 83 }; 84 85 86 87 /*---------------------------------------- 88 * 89 * XMLFile2UTFConverter 90 * 91 **-----------------------------------------*/ 92 class XMLFile2UTFConverter 93 { 94 public: XMLFile2UTFConverter()95 XMLFile2UTFConverter( ): 96 m_bStarted( sal_False ), 97 m_pText2Unicode( 0 ), 98 m_pUnicode2Text( 0 ) 99 {} 100 101 ~XMLFile2UTFConverter(); 102 setInputStream(::com::sun::star::uno::Reference<::com::sun::star::io::XInputStream> & r)103 void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; } setEncoding(const::rtl::OString & s)104 void setEncoding( const ::rtl::OString &s ) { m_sEncoding = s; } 105 106 107 108 // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There 109 // may be returned less or more bytes than ordered. 110 sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ) 111 throw ( ::com::sun::star::io::IOException, 112 ::com::sun::star::io::NotConnectedException , 113 ::com::sun::star::io::BufferSizeExceededException , 114 ::com::sun::star::uno::RuntimeException ); 115 116 private: 117 118 // Called only on first Sequence of bytes. Tries to figure out file format and encoding information. 119 // @return TRUE, when encoding information could be retrieved 120 // @return FALSE, when no encoding information was found in file 121 sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq ); 122 123 // Called only on first Sequence of bytes. Tries to figure out 124 // if enough data is available to scan encoding 125 // @return TRUE, when encoding is retrievable 126 // @return FALSE, when more data is needed 127 sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq ); 128 129 // When encoding attribute is within the text (in the first line), it is removed. 130 void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq ); 131 132 // Initializes decoding depending on m_sEncoding setting 133 void initializeDecoding(); 134 private: 135 ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in; 136 137 sal_Bool m_bStarted; 138 ::rtl::OString m_sEncoding; 139 140 Text2UnicodeConverter *m_pText2Unicode; 141 Unicode2TextConverter *m_pUnicode2Text; 142 }; 143 } 144