xref: /aoo41x/main/pyuno/demo/ooextract.py (revision cdf0e10c)
1*cdf0e10cSrcweirimport getopt,sys
2*cdf0e10cSrcweirimport uno
3*cdf0e10cSrcweirfrom unohelper import Base,systemPathToFileUrl, absolutize
4*cdf0e10cSrcweirfrom os import getcwd
5*cdf0e10cSrcweir
6*cdf0e10cSrcweirfrom com.sun.star.beans import PropertyValue
7*cdf0e10cSrcweirfrom com.sun.star.beans.PropertyState import DIRECT_VALUE
8*cdf0e10cSrcweirfrom com.sun.star.uno import Exception as UnoException
9*cdf0e10cSrcweirfrom com.sun.star.io import IOException,XInputStream, XOutputStream
10*cdf0e10cSrcweir
11*cdf0e10cSrcweirclass OutputStream( Base, XOutputStream ):
12*cdf0e10cSrcweir      def __init__( self ):
13*cdf0e10cSrcweir	  self.closed = 0
14*cdf0e10cSrcweir
15*cdf0e10cSrcweir      def closeOutput(self):
16*cdf0e10cSrcweir	  self.closed = 1
17*cdf0e10cSrcweir
18*cdf0e10cSrcweir      def writeBytes( self, seq ):
19*cdf0e10cSrcweir	  sys.stdout.write( seq.value )
20*cdf0e10cSrcweir
21*cdf0e10cSrcweir      def flush( self ):
22*cdf0e10cSrcweir	  pass
23*cdf0e10cSrcweir
24*cdf0e10cSrcweir
25*cdf0e10cSrcweirdef main():
26*cdf0e10cSrcweir    retVal = 0
27*cdf0e10cSrcweir    doc = None
28*cdf0e10cSrcweir
29*cdf0e10cSrcweir    try:
30*cdf0e10cSrcweir        opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"])
31*cdf0e10cSrcweir        format = None
32*cdf0e10cSrcweir        url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
33*cdf0e10cSrcweir        filterName = "Text (Encoded)"
34*cdf0e10cSrcweir        for o, a in opts:
35*cdf0e10cSrcweir            if o in ("-h", "--help"):
36*cdf0e10cSrcweir                usage()
37*cdf0e10cSrcweir                sys.exit()
38*cdf0e10cSrcweir            if o in ("-c", "--connection-string" ):
39*cdf0e10cSrcweir                url = "uno:" + a + ";urp;StarOffice.ComponentContext"
40*cdf0e10cSrcweir            if o == "--html":
41*cdf0e10cSrcweir                filterName = "HTML (StarWriter)"
42*cdf0e10cSrcweir
43*cdf0e10cSrcweir        print filterName
44*cdf0e10cSrcweir        if not len( args ):
45*cdf0e10cSrcweir              usage()
46*cdf0e10cSrcweir              sys.exit()
47*cdf0e10cSrcweir
48*cdf0e10cSrcweir        ctxLocal = uno.getComponentContext()
49*cdf0e10cSrcweir        smgrLocal = ctxLocal.ServiceManager
50*cdf0e10cSrcweir
51*cdf0e10cSrcweir        resolver = smgrLocal.createInstanceWithContext(
52*cdf0e10cSrcweir                 "com.sun.star.bridge.UnoUrlResolver", ctxLocal )
53*cdf0e10cSrcweir        ctx = resolver.resolve( url )
54*cdf0e10cSrcweir        smgr = ctx.ServiceManager
55*cdf0e10cSrcweir
56*cdf0e10cSrcweir        desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx )
57*cdf0e10cSrcweir
58*cdf0e10cSrcweir        cwd = systemPathToFileUrl( getcwd() )
59*cdf0e10cSrcweir        outProps = (
60*cdf0e10cSrcweir            PropertyValue( "FilterName" , 0, filterName , 0 ),
61*cdf0e10cSrcweir            PropertyValue( "OutputStream",0, OutputStream(),0))
62*cdf0e10cSrcweir        inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
63*cdf0e10cSrcweir        for path in args:
64*cdf0e10cSrcweir            try:
65*cdf0e10cSrcweir                fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) )
66*cdf0e10cSrcweir                doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps)
67*cdf0e10cSrcweir
68*cdf0e10cSrcweir                if not doc:
69*cdf0e10cSrcweir                    raise UnoException( "Couldn't open stream for unknown reason", None )
70*cdf0e10cSrcweir
71*cdf0e10cSrcweir                doc.storeToURL("private:stream",outProps)
72*cdf0e10cSrcweir            except IOException, e:
73*cdf0e10cSrcweir                sys.stderr.write( "Error during conversion: " + e.Message + "\n" )
74*cdf0e10cSrcweir                retVal = 1
75*cdf0e10cSrcweir            except UnoException, e:
76*cdf0e10cSrcweir                sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" )
77*cdf0e10cSrcweir                retVal = 1
78*cdf0e10cSrcweir            if doc:
79*cdf0e10cSrcweir                doc.dispose()
80*cdf0e10cSrcweir
81*cdf0e10cSrcweir    except UnoException, e:
82*cdf0e10cSrcweir        sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" )
83*cdf0e10cSrcweir        retVal = 1
84*cdf0e10cSrcweir    except getopt.GetoptError,e:
85*cdf0e10cSrcweir        sys.stderr.write( str(e) + "\n" )
86*cdf0e10cSrcweir        usage()
87*cdf0e10cSrcweir        retVal = 1
88*cdf0e10cSrcweir
89*cdf0e10cSrcweir    sys.exit(retVal)
90*cdf0e10cSrcweir
91*cdf0e10cSrcweirdef usage():
92*cdf0e10cSrcweir    sys.stderr.write( "usage: ooextract.py --help |\n"+
93*cdf0e10cSrcweir                  "       [-c <connection-string> | --connection-string=<connection-string>\n"+
94*cdf0e10cSrcweir                  "       file1 file2 ...\n"+
95*cdf0e10cSrcweir                  "\n" +
96*cdf0e10cSrcweir                  "Extracts plain text from documents and prints it to stdout.\n" +
97*cdf0e10cSrcweir                  "Requires an OpenOffice.org instance to be running. The script and the\n"+
98*cdf0e10cSrcweir                  "running OpenOffice.org instance must be able to access the file with\n"+
99*cdf0e10cSrcweir                  "by the same system path.\n"
100*cdf0e10cSrcweir                  "\n"+
101*cdf0e10cSrcweir                  "-c <connection-string> | --connection-string=<connection-string>\n" +
102*cdf0e10cSrcweir                  "        The connection-string part of a uno url to where the\n" +
103*cdf0e10cSrcweir                  "        the script should connect to in order to do the conversion.\n" +
104*cdf0e10cSrcweir                  "        The strings defaults to socket,host=localhost,port=2002\n"
105*cdf0e10cSrcweir                  "--html \n"
106*cdf0e10cSrcweir                  "        Instead of the text filter, the writer html filter is used\n"
107*cdf0e10cSrcweir                  )
108*cdf0e10cSrcweir
109*cdf0e10cSrcweirmain()
110