#!/bin/env python2 # script to convert openoffice calc files to CSV including cell notes # (c) May 2005 Pablo Hoffman # License: GPL from zipfile import ZipFile import xml.parsers.expat import sys incell, innote, isdata = 0, 0, 0 rows, notelines = [], [] celldata, celltext, = "", "" notejoinchar = " - " # char to use when joining multi-line notes def start_element(name, attrs): global incell, innote, rows, notelines, isdata if name == "table:table-cell": incell = 1 celldata = "" notelines = [] if name == "office:annotation": innote = 1 if name == 'table:table-row': rows = [] if name == 'text:p': isdata = 1 def end_element(name): global incell, innote, rows, celldata, isdata if name == "table:table-cell": incell = 0 if celltext and notelines: celldata = "\"%s (%s)\"" % (celltext, notejoinchar.join(notelines)) elif celltext: celldata = "\"%s\"" % celltext else: celldata = "" rows.append(celldata) if name == 'table:table-row' and celldata: csv.write("%s\n" % separator.join(rows)) if name == "office:annotation": innote = 0 if name == 'text:p': isdata = 0 def char_data(data): global celltext, notelines if isdata: if innote: notelines.append(data.encode(encoding)) if incell and not innote: celltext = data.encode(encoding) def parseargs(): global sxcfile, csvfile, encoding, separator if len(sys.argv) < 3: print "usage: %s [encoding] [separator]\n" % sys.argv[0] print "sxc_file: input OpenOffice calc file" print "csv_file: output comma separated file" print "encoding: utf_8, latin_1, etc. defaults to utf_8. for more encodings see:" print " http://www.python.org/doc/2.4.1/lib/standard-encodings.html" print "separator: separator to use in output file. defaults to , (comma)" print " enter 'tab' to use tabulator" sys.exit(1) else: sxcfile = sys.argv[1] csvfile = sys.argv[2] if len(sys.argv) > 3: encoding = sys.argv[3] else: encoding = "utf_8" if len(sys.argv) > 4: separator = sys.argv[4] else: separator = "," if separator == 'tab': separator = "\t" parseargs() # open the sxc file and get its contents xmldata = ZipFile(sxcfile).read("content.xml") # setup the XML parser p = xml.parsers.expat.ParserCreate() p.StartElementHandler = start_element p.EndElementHandler = end_element p.CharacterDataHandler = char_data # parse the XML data amd write the output file csv = file(csvfile, 'w') p.Parse(xmldata) csv.close()