xref: /aoo41x/main/pyuno/demo/ooextract.py (revision d912c6c5)
1bd8ef897SAndrew Rist# *************************************************************
2bd8ef897SAndrew Rist#
3bd8ef897SAndrew Rist#  Licensed to the Apache Software Foundation (ASF) under one
4bd8ef897SAndrew Rist#  or more contributor license agreements.  See the NOTICE file
5bd8ef897SAndrew Rist#  distributed with this work for additional information
6bd8ef897SAndrew Rist#  regarding copyright ownership.  The ASF licenses this file
7bd8ef897SAndrew Rist#  to you under the Apache License, Version 2.0 (the
8bd8ef897SAndrew Rist#  "License"); you may not use this file except in compliance
9bd8ef897SAndrew Rist#  with the License.  You may obtain a copy of the License at
10bd8ef897SAndrew Rist#
11bd8ef897SAndrew Rist#    http://www.apache.org/licenses/LICENSE-2.0
12bd8ef897SAndrew Rist#
13bd8ef897SAndrew Rist#  Unless required by applicable law or agreed to in writing,
14bd8ef897SAndrew Rist#  software distributed under the License is distributed on an
15bd8ef897SAndrew Rist#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16bd8ef897SAndrew Rist#  KIND, either express or implied.  See the License for the
17bd8ef897SAndrew Rist#  specific language governing permissions and limitations
18bd8ef897SAndrew Rist#  under the License.
19bd8ef897SAndrew Rist#
20bd8ef897SAndrew Rist# *************************************************************
21bd8ef897SAndrew Rist
22cdf0e10cSrcweirimport getopt,sys
23cdf0e10cSrcweirimport uno
24cdf0e10cSrcweirfrom unohelper import Base,systemPathToFileUrl, absolutize
25cdf0e10cSrcweirfrom os import getcwd
26cdf0e10cSrcweir
27cdf0e10cSrcweirfrom com.sun.star.beans import PropertyValue
28cdf0e10cSrcweirfrom com.sun.star.beans.PropertyState import DIRECT_VALUE
29cdf0e10cSrcweirfrom com.sun.star.uno import Exception as UnoException
30cdf0e10cSrcweirfrom com.sun.star.io import IOException,XInputStream, XOutputStream
31cdf0e10cSrcweir
32cdf0e10cSrcweirclass OutputStream( Base, XOutputStream ):
33*d912c6c5SPedro Giffuni    def __init__( self ):
34*d912c6c5SPedro Giffuni        self.closed = 0
35cdf0e10cSrcweir
36*d912c6c5SPedro Giffuni    def closeOutput(self):
37*d912c6c5SPedro Giffuni        self.closed = 1
38*d912c6c5SPedro Giffuni
39*d912c6c5SPedro Giffuni    def writeBytes( self, seq ):
40*d912c6c5SPedro Giffuni        sys.stdout.write( seq.value )
41*d912c6c5SPedro Giffuni
42*d912c6c5SPedro Giffuni    def flush( self ):
43*d912c6c5SPedro Giffuni        pass
44cdf0e10cSrcweir
45cdf0e10cSrcweir
46cdf0e10cSrcweirdef main():
47cdf0e10cSrcweir    retVal = 0
48cdf0e10cSrcweir    doc = None
49cdf0e10cSrcweir
50cdf0e10cSrcweir    try:
51cdf0e10cSrcweir        opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"])
52cdf0e10cSrcweir        format = None
53cdf0e10cSrcweir        url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
54cdf0e10cSrcweir        filterName = "Text (Encoded)"
55cdf0e10cSrcweir        for o, a in opts:
56cdf0e10cSrcweir            if o in ("-h", "--help"):
57cdf0e10cSrcweir                usage()
58cdf0e10cSrcweir                sys.exit()
59cdf0e10cSrcweir            if o in ("-c", "--connection-string" ):
60cdf0e10cSrcweir                url = "uno:" + a + ";urp;StarOffice.ComponentContext"
61cdf0e10cSrcweir            if o == "--html":
62cdf0e10cSrcweir                filterName = "HTML (StarWriter)"
63*d912c6c5SPedro Giffuni
64*d912c6c5SPedro Giffuni        print(filterName)
65cdf0e10cSrcweir        if not len( args ):
66*d912c6c5SPedro Giffuni            usage()
67*d912c6c5SPedro Giffuni            sys.exit()
68*d912c6c5SPedro Giffuni
69cdf0e10cSrcweir        ctxLocal = uno.getComponentContext()
70cdf0e10cSrcweir        smgrLocal = ctxLocal.ServiceManager
71cdf0e10cSrcweir
72cdf0e10cSrcweir        resolver = smgrLocal.createInstanceWithContext(
73cdf0e10cSrcweir                 "com.sun.star.bridge.UnoUrlResolver", ctxLocal )
74cdf0e10cSrcweir        ctx = resolver.resolve( url )
75cdf0e10cSrcweir        smgr = ctx.ServiceManager
76cdf0e10cSrcweir
77cdf0e10cSrcweir        desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx )
78cdf0e10cSrcweir
79cdf0e10cSrcweir        cwd = systemPathToFileUrl( getcwd() )
80cdf0e10cSrcweir        outProps = (
81cdf0e10cSrcweir            PropertyValue( "FilterName" , 0, filterName , 0 ),
82cdf0e10cSrcweir            PropertyValue( "OutputStream",0, OutputStream(),0))
83cdf0e10cSrcweir        inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
84cdf0e10cSrcweir        for path in args:
85cdf0e10cSrcweir            try:
86cdf0e10cSrcweir                fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) )
87cdf0e10cSrcweir                doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps)
88cdf0e10cSrcweir
89cdf0e10cSrcweir                if not doc:
90cdf0e10cSrcweir                    raise UnoException( "Couldn't open stream for unknown reason", None )
91cdf0e10cSrcweir
92cdf0e10cSrcweir                doc.storeToURL("private:stream",outProps)
93*d912c6c5SPedro Giffuni            except IOException as e:
94cdf0e10cSrcweir                sys.stderr.write( "Error during conversion: " + e.Message + "\n" )
95cdf0e10cSrcweir                retVal = 1
96*d912c6c5SPedro Giffuni            except UnoException as e:
97cdf0e10cSrcweir                sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" )
98cdf0e10cSrcweir                retVal = 1
99cdf0e10cSrcweir            if doc:
100cdf0e10cSrcweir                doc.dispose()
101cdf0e10cSrcweir
102*d912c6c5SPedro Giffuni    except UnoException as e:
103cdf0e10cSrcweir        sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" )
104cdf0e10cSrcweir        retVal = 1
105*d912c6c5SPedro Giffuni    except getopt.GetoptError as e:
106cdf0e10cSrcweir        sys.stderr.write( str(e) + "\n" )
107cdf0e10cSrcweir        usage()
108cdf0e10cSrcweir        retVal = 1
109cdf0e10cSrcweir
110cdf0e10cSrcweir    sys.exit(retVal)
111*d912c6c5SPedro Giffuni
112cdf0e10cSrcweirdef usage():
113cdf0e10cSrcweir    sys.stderr.write( "usage: ooextract.py --help |\n"+
114cdf0e10cSrcweir                  "       [-c <connection-string> | --connection-string=<connection-string>\n"+
115cdf0e10cSrcweir                  "       file1 file2 ...\n"+
116cdf0e10cSrcweir                  "\n" +
117cdf0e10cSrcweir                  "Extracts plain text from documents and prints it to stdout.\n" +
118cdf0e10cSrcweir                  "Requires an OpenOffice.org instance to be running. The script and the\n"+
119cdf0e10cSrcweir                  "running OpenOffice.org instance must be able to access the file with\n"+
120cdf0e10cSrcweir                  "by the same system path.\n"
121cdf0e10cSrcweir                  "\n"+
122cdf0e10cSrcweir                  "-c <connection-string> | --connection-string=<connection-string>\n" +
123cdf0e10cSrcweir                  "        The connection-string part of a uno url to where the\n" +
124cdf0e10cSrcweir                  "        the script should connect to in order to do the conversion.\n" +
125cdf0e10cSrcweir                  "        The strings defaults to socket,host=localhost,port=2002\n"
126cdf0e10cSrcweir                  "--html \n"
127cdf0e10cSrcweir                  "        Instead of the text filter, the writer html filter is used\n"
128cdf0e10cSrcweir                  )
129cdf0e10cSrcweir
130*d912c6c5SPedro Giffunimain()
131