xref: /aoo4110/main/pyuno/demo/ooextract.py (revision b1cdbd2c)
1# *************************************************************
2#
3#  Licensed to the Apache Software Foundation (ASF) under one
4#  or more contributor license agreements.  See the NOTICE file
5#  distributed with this work for additional information
6#  regarding copyright ownership.  The ASF licenses this file
7#  to you under the Apache License, Version 2.0 (the
8#  "License"); you may not use this file except in compliance
9#  with the License.  You may obtain a copy of the License at
10#
11#    http://www.apache.org/licenses/LICENSE-2.0
12#
13#  Unless required by applicable law or agreed to in writing,
14#  software distributed under the License is distributed on an
15#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16#  KIND, either express or implied.  See the License for the
17#  specific language governing permissions and limitations
18#  under the License.
19#
20# *************************************************************
21
22import getopt,sys
23import uno
24from unohelper import Base,systemPathToFileUrl, absolutize
25from os import getcwd
26
27from com.sun.star.beans import PropertyValue
28from com.sun.star.beans.PropertyState import DIRECT_VALUE
29from com.sun.star.uno import Exception as UnoException
30from com.sun.star.io import IOException,XInputStream, XOutputStream
31
32class OutputStream( Base, XOutputStream ):
33    def __init__( self ):
34        self.closed = 0
35
36    def closeOutput(self):
37        self.closed = 1
38
39    def writeBytes( self, seq ):
40        sys.stdout.write( seq.value )
41
42    def flush( self ):
43        pass
44
45
46def main():
47    retVal = 0
48    doc = None
49
50    try:
51        opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"])
52        format = None
53        url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
54        filterName = "Text (Encoded)"
55        for o, a in opts:
56            if o in ("-h", "--help"):
57                usage()
58                sys.exit()
59            if o in ("-c", "--connection-string" ):
60                url = "uno:" + a + ";urp;StarOffice.ComponentContext"
61            if o == "--html":
62                filterName = "HTML (StarWriter)"
63
64        print(filterName)
65        if not len( args ):
66            usage()
67            sys.exit()
68
69        ctxLocal = uno.getComponentContext()
70        smgrLocal = ctxLocal.ServiceManager
71
72        resolver = smgrLocal.createInstanceWithContext(
73                 "com.sun.star.bridge.UnoUrlResolver", ctxLocal )
74        ctx = resolver.resolve( url )
75        smgr = ctx.ServiceManager
76
77        desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx )
78
79        cwd = systemPathToFileUrl( getcwd() )
80        outProps = (
81            PropertyValue( "FilterName" , 0, filterName , 0 ),
82            PropertyValue( "OutputStream",0, OutputStream(),0))
83        inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
84        for path in args:
85            try:
86                fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) )
87                doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps)
88
89                if not doc:
90                    raise UnoException( "Couldn't open stream for unknown reason", None )
91
92                doc.storeToURL("private:stream",outProps)
93            except IOException as e:
94                sys.stderr.write( "Error during conversion: " + e.Message + "\n" )
95                retVal = 1
96            except UnoException as e:
97                sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" )
98                retVal = 1
99            if doc:
100                doc.dispose()
101
102    except UnoException as e:
103        sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" )
104        retVal = 1
105    except getopt.GetoptError as e:
106        sys.stderr.write( str(e) + "\n" )
107        usage()
108        retVal = 1
109
110    sys.exit(retVal)
111
112def usage():
113    sys.stderr.write( "usage: ooextract.py --help |\n"+
114                  "       [-c <connection-string> | --connection-string=<connection-string>\n"+
115                  "       file1 file2 ...\n"+
116                  "\n" +
117                  "Extracts plain text from documents and prints it to stdout.\n" +
118                  "Requires an OpenOffice.org instance to be running. The script and the\n"+
119                  "running OpenOffice.org instance must be able to access the file with\n"+
120                  "by the same system path.\n"
121                  "\n"+
122                  "-c <connection-string> | --connection-string=<connection-string>\n" +
123                  "        The connection-string part of a uno url to where the\n" +
124                  "        the script should connect to in order to do the conversion.\n" +
125                  "        The strings defaults to socket,host=localhost,port=2002\n"
126                  "--html \n"
127                  "        Instead of the text filter, the writer html filter is used\n"
128                  )
129
130main()
131