1# ************************************************************* 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20# ************************************************************* 21 22import getopt,sys 23import uno 24from unohelper import Base,systemPathToFileUrl, absolutize 25from os import getcwd 26 27from com.sun.star.beans import PropertyValue 28from com.sun.star.beans.PropertyState import DIRECT_VALUE 29from com.sun.star.uno import Exception as UnoException 30from com.sun.star.io import IOException,XInputStream, XOutputStream 31 32class OutputStream( Base, XOutputStream ): 33 def __init__( self ): 34 self.closed = 0 35 36 def closeOutput(self): 37 self.closed = 1 38 39 def writeBytes( self, seq ): 40 sys.stdout.write( seq.value ) 41 42 def flush( self ): 43 pass 44 45 46def main(): 47 retVal = 0 48 doc = None 49 50 try: 51 opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"]) 52 format = None 53 url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext" 54 filterName = "Text (Encoded)" 55 for o, a in opts: 56 if o in ("-h", "--help"): 57 usage() 58 sys.exit() 59 if o in ("-c", "--connection-string" ): 60 url = "uno:" + a + ";urp;StarOffice.ComponentContext" 61 if o == "--html": 62 filterName = "HTML (StarWriter)" 63 64 print(filterName) 65 if not len( args ): 66 usage() 67 sys.exit() 68 69 ctxLocal = uno.getComponentContext() 70 smgrLocal = ctxLocal.ServiceManager 71 72 resolver = smgrLocal.createInstanceWithContext( 73 "com.sun.star.bridge.UnoUrlResolver", ctxLocal ) 74 ctx = resolver.resolve( url ) 75 smgr = ctx.ServiceManager 76 77 desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx ) 78 79 cwd = systemPathToFileUrl( getcwd() ) 80 outProps = ( 81 PropertyValue( "FilterName" , 0, filterName , 0 ), 82 PropertyValue( "OutputStream",0, OutputStream(),0)) 83 inProps = PropertyValue( "Hidden" , 0 , True, 0 ), 84 for path in args: 85 try: 86 fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) ) 87 doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps) 88 89 if not doc: 90 raise UnoException( "Couldn't open stream for unknown reason", None ) 91 92 doc.storeToURL("private:stream",outProps) 93 except IOException as e: 94 sys.stderr.write( "Error during conversion: " + e.Message + "\n" ) 95 retVal = 1 96 except UnoException as e: 97 sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" ) 98 retVal = 1 99 if doc: 100 doc.dispose() 101 102 except UnoException as e: 103 sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" ) 104 retVal = 1 105 except getopt.GetoptError as e: 106 sys.stderr.write( str(e) + "\n" ) 107 usage() 108 retVal = 1 109 110 sys.exit(retVal) 111 112def usage(): 113 sys.stderr.write( "usage: ooextract.py --help |\n"+ 114 " [-c <connection-string> | --connection-string=<connection-string>\n"+ 115 " file1 file2 ...\n"+ 116 "\n" + 117 "Extracts plain text from documents and prints it to stdout.\n" + 118 "Requires an OpenOffice.org instance to be running. The script and the\n"+ 119 "running OpenOffice.org instance must be able to access the file with\n"+ 120 "by the same system path.\n" 121 "\n"+ 122 "-c <connection-string> | --connection-string=<connection-string>\n" + 123 " The connection-string part of a uno url to where the\n" + 124 " the script should connect to in order to do the conversion.\n" + 125 " The strings defaults to socket,host=localhost,port=2002\n" 126 "--html \n" 127 " Instead of the text filter, the writer html filter is used\n" 128 ) 129 130main() 131