1import getopt,sys 2import uno 3from unohelper import Base,systemPathToFileUrl, absolutize 4from os import getcwd 5 6from com.sun.star.beans import PropertyValue 7from com.sun.star.beans.PropertyState import DIRECT_VALUE 8from com.sun.star.uno import Exception as UnoException 9from com.sun.star.io import IOException,XInputStream, XOutputStream 10 11class OutputStream( Base, XOutputStream ): 12 def __init__( self ): 13 self.closed = 0 14 15 def closeOutput(self): 16 self.closed = 1 17 18 def writeBytes( self, seq ): 19 sys.stdout.write( seq.value ) 20 21 def flush( self ): 22 pass 23 24 25def main(): 26 retVal = 0 27 doc = None 28 29 try: 30 opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"]) 31 format = None 32 url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext" 33 filterName = "Text (Encoded)" 34 for o, a in opts: 35 if o in ("-h", "--help"): 36 usage() 37 sys.exit() 38 if o in ("-c", "--connection-string" ): 39 url = "uno:" + a + ";urp;StarOffice.ComponentContext" 40 if o == "--html": 41 filterName = "HTML (StarWriter)" 42 43 print filterName 44 if not len( args ): 45 usage() 46 sys.exit() 47 48 ctxLocal = uno.getComponentContext() 49 smgrLocal = ctxLocal.ServiceManager 50 51 resolver = smgrLocal.createInstanceWithContext( 52 "com.sun.star.bridge.UnoUrlResolver", ctxLocal ) 53 ctx = resolver.resolve( url ) 54 smgr = ctx.ServiceManager 55 56 desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx ) 57 58 cwd = systemPathToFileUrl( getcwd() ) 59 outProps = ( 60 PropertyValue( "FilterName" , 0, filterName , 0 ), 61 PropertyValue( "OutputStream",0, OutputStream(),0)) 62 inProps = PropertyValue( "Hidden" , 0 , True, 0 ), 63 for path in args: 64 try: 65 fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) ) 66 doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps) 67 68 if not doc: 69 raise UnoException( "Couldn't open stream for unknown reason", None ) 70 71 doc.storeToURL("private:stream",outProps) 72 except IOException, e: 73 sys.stderr.write( "Error during conversion: " + e.Message + "\n" ) 74 retVal = 1 75 except UnoException, e: 76 sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" ) 77 retVal = 1 78 if doc: 79 doc.dispose() 80 81 except UnoException, e: 82 sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" ) 83 retVal = 1 84 except getopt.GetoptError,e: 85 sys.stderr.write( str(e) + "\n" ) 86 usage() 87 retVal = 1 88 89 sys.exit(retVal) 90 91def usage(): 92 sys.stderr.write( "usage: ooextract.py --help |\n"+ 93 " [-c <connection-string> | --connection-string=<connection-string>\n"+ 94 " file1 file2 ...\n"+ 95 "\n" + 96 "Extracts plain text from documents and prints it to stdout.\n" + 97 "Requires an OpenOffice.org instance to be running. The script and the\n"+ 98 "running OpenOffice.org instance must be able to access the file with\n"+ 99 "by the same system path.\n" 100 "\n"+ 101 "-c <connection-string> | --connection-string=<connection-string>\n" + 102 " The connection-string part of a uno url to where the\n" + 103 " the script should connect to in order to do the conversion.\n" + 104 " The strings defaults to socket,host=localhost,port=2002\n" 105 "--html \n" 106 " Instead of the text filter, the writer html filter is used\n" 107 ) 108 109main() 110