logilab/doctools

view transformer.py @ 0:cc367abb080e

forget the past. forget the past.
author root
date Wed, 26 Apr 2006 10:48:09 +0000
parents
children 5f66dad05f6b
line source
1 # -*- coding: ISO-8859-1 -*-
3 # Copyright (c) 2000-2003 LOGILAB S.A. (Paris, FRANCE).
4 # http://www.logilab.fr/ -- mailto:contact@logilab.fr
5 #
6 # This program is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU General Public License as published by the Free Software
8 # Foundation; either version 2 of the License, or (at your option) any later
9 # version.
10 #
11 # This program is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along with
16 # this program; if not, write to the Free Software Foundation, Inc.,
17 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 """ %prog [options] <input_file>
21 Transform ReST / DOCBOOK XML to differents output formats
22 """
24 __revision__ = "$Id: transformer.py,v 1.15 2006-02-27 09:03:13 nico Exp $"
26 import os
27 import sys
28 from os.path import isabs, join, exists, expanduser, splitext, basename#, exists
29 from cStringIO import StringIO
30 from commands import getstatusoutput
31 from xml.sax import ContentHandler, make_parser, SAXNotRecognizedException
32 from xml.sax import InputSource, ErrorHandler
33 from xml.sax.handler import feature_external_pes, feature_external_ges
34 from logilab.common.configuration import OptionsManagerMixIn
35 from logilab.common.configuration import OptionsProviderMixIn
37 from logilab.doctools.__pkginfo__ import version
39 #import tempfile
40 #tempfile.tempdir='.'
42 REST_EXTENSIONS = ('.txt', '.rst', '.rest')
43 DOCBOOK_EXTENSIONS = ('.xml', '.dbk')
44 FO_EXTENSIONS = ('.fo',)
46 if os.environ.has_key('MKDOCRC') and exists(os.environ['MKDOCRC']):
47 MKDOCRC = os.environ['MKDOCRC']
48 else:
49 USER_HOME = expanduser('~')
50 if USER_HOME == '~':
51 MKDOCRC = ".mkdocrc"
52 else:
53 MKDOCRC = join(USER_HOME, '.mkdocrc')
54 if not exists(MKDOCRC):
55 if exists('/etc/mkdocrc'):
56 MKDOCRC = '/etc/mkdocrc'
57 else:
58 MKDOCRC = None
60 ENV_HELP = """
61 The following environment variables are used :
62 * MKDOCRC
63 path to the configuration file. If not found, it will use the first
64 existant file in ~/.mkdocrc, /etc/mkdocrc.
65 """
66 if MKDOCRC:
67 ENV_HELP += 'The current configuration file in use is %s.'% MKDOCRC
68 else:
69 ENV_HELP += 'No configuration file has been found for this run.'
71 # exceptions ##################################################################
73 class FormattingException(Exception):
74 """raised when a transformation failed"""
76 class GuessException(Exception):
77 """raised when we are not able to guess something"""
79 class InputGuessException(GuessException):
80 """raised when we are not able to guess the input file format"""
82 class OutputGuessException(GuessException):
83 """raised when we are not able to guess the input file format"""
85 # utilities ###################################################################
87 class PIManager(ContentHandler):
88 """try to get preprocess and style xslt from processing instruction"""
89 def __init__(self, quiet=1):
90 self.quiet = quiet
92 def processingInstruction(self, target, data):
93 """Receive notification of a processing instruction."""
94 if target == 'logidoc-style':
95 target, xslt = None, None
96 for attr in data.split(' '):
97 name, value = attr.split('=')
98 if name == 'target':
99 target = value[1:-1]
100 elif name == 'xslt':
101 xslt = value[1:-1]
102 else:
103 msg = 'Bad logidoc-style attribute %s'
104 raise FormattingException(msg % name)
105 if not target:
106 msg = 'Bad logidoc-style, missing target'
107 raise FormattingException(msg)
108 if not xslt:
109 msg = 'Bad logidoc-style, missing xslt'
110 raise FormattingException(msg)
111 if not self.xslts.has_key(target):
112 self.xslts[target] = xslt
113 else:
114 print 'Warning: ignoring %s stylesheet for %s' % (xslt, target)
115 elif target == 'logidoc-preprocess':
116 name, value = data.split('=')
117 if name != 'xslt':
118 msg = 'Bad logidoc-preprocess attribute %s'
119 raise FormattingException(msg % name)
121 assert name == 'xslt'
122 self.preprocess.append(value[1:-1])
124 def reset(self):
125 self.preprocess = []
126 self.xslts = {}
128 def fromFile(self, filename):
129 inputsource = InputSource('file://'+os.path.abspath(filename))
130 return self.fromInputSource(inputsource)
132 def fromString(self, string):
133 inputsource = InputSource()
134 inputsource.setByteStream(StringIO(string))
135 inputsource.setSystemId('file://'+os.path.abspath("hardcoded_string"))
136 return self.fromInputSource(inputsource)
139 def fromInputSource(self, inputsource):
140 self.reset()
141 parser = make_parser()
142 parser.setContentHandler(self)
143 parser.setErrorHandler(ErrorHandler())
144 # do not include any external entities
145 ## try:
146 ## parser.setFeature(feature_external_ges, 0)
147 ## except SAXNotRecognizedException:
148 ## pass
149 ## try:
150 ## parser.setFeature(feature_external_pes, 0)
151 ## except SAXNotRecognizedException:
152 ## pass
153 parser.parse(inputsource)
154 return self.preprocess, self.xslts
157 def exec_cmd(cmd):
158 """executed a command, check status and return output"""
159 status, output = getstatusoutput(cmd)
160 if status != 0:
161 raise FormattingException('"%s" returned status %s\n%s' % (
162 cmd, status, output))
163 return output
165 def guess_format(filename):
166 """guess file format according to its extension"""
167 ext = splitext(filename)[1]
168 if ext in REST_EXTENSIONS:
169 return 'rest'
170 if ext in DOCBOOK_EXTENSIONS:
171 return 'docbook'
172 if ext in FO_EXTENSIONS:
173 return 'fo'
174 raise InputGuessException("Unable to guess file format from %s" % filename)
177 def xmlproc_output(output, checked):
178 """parse xmlproc output and check for errors"""
179 for line in output.strip().split('\n'):
180 if line[0:2] == 'E:' or line[0:2] == 'W:' :
181 print line
182 j = line.find(" error(s)")
183 assert j != -1
184 i = line.rfind(" ", 0, j)
185 err = line[i+1:j]
186 if err != "0" :
187 raise FormattingException('Not a %s xml file' % checked)
190 # the transformer #############################################################
192 # FIXME: on a besoin de rajouter des étapes de transformation pour intégrer
193 # pybill et trf-session. faudrait remanier ce code pour faire apparaître un
194 # objet Chainon qui sera une transformation élémentaire et ensuite on
195 # pourra chaîner les chaînons avec un joli générateur/producteur paresseux
197 class Transformer(OptionsManagerMixIn, OptionsProviderMixIn):
198 name = 'MAIN'
199 options = (
200 # main options
201 ('target',
202 {'type': 'choice',
203 'choices': ('docbook', 'html', 'fo', 'pdf'),
204 'default' : 'pdf',
205 'metavar' : "<format>",
206 'help': "output format. Available format are docbook (if input is a \
207 ReST file), html and pdf."
208 }),
209 ('source',
210 {'type': 'choice',
211 'choices': ('rest', 'docbook', 'fo'),
212 'metavar' : "<format>",
213 'help': "source format. Available format are rest, docbook, fo. If \
214 not specified, source format will be guessed from the file's extension."
215 }),
216 ('check',
217 {'type' : 'yn',
218 'metavar' : '<y_or_n>',
219 'default' : 1,
220 'help': "tell if we should check that the docbook input file is \
221 well formed xml."
222 }),
223 ('validate',
224 {'type' : 'yn',
225 'metavar' : '<y_or_n>',
226 'default' : 0,
227 'help': "tell if we should validate the docbook input file."
228 }),
229 ('preprocess',
230 {'type': 'string',
231 'action': 'append',
232 'default': (),
233 'metavar' : '<xslt>',
234 'help': "add a pre-processing style sheet. You can set this option \
235 multiple times. If not specified, preprocessing."
236 }),
237 ('stylesheet',
238 {'type': 'string',
239 'metavar' : '<xslt>',
240 'help': "set the main style sheet."
241 }),
242 ('ignore-pi',
243 {'action': 'store_true',
244 'dest': 'ignore_pi',
245 'help': "Do not try to guess main/pre-process stylesheets from \
246 processing instruction."
247 }),
248 ('keep',
249 {'action': 'store_true',
250 'help': "Keep temporary files."
251 }),
252 ('quiet',
253 {'action': 'store_true',
254 'help': "Do not display information about what we're doing..."
255 }),
257 # FOP related options
258 ('fop',
259 {'type': 'string',
260 'default' : 'fop',
261 'metavar' : "<binpath>",
262 'help': "path of the fop executable."
263 }),
264 ('fop-options',
265 {'type': 'string',
266 'dest' : 'fop_opts',
267 'default' : '',
268 'metavar' : "<options list>",
269 'help': "options given to the fop executable."
270 }),
272 # xsltproc related options
273 ('xsltproc',
274 {'type': 'string',
275 'default' : 'xsltproc',
276 'metavar' : "<binpath>",
277 'help': "path of the xsltproc executable."
278 }),
279 ('xsltproc-options',
280 {'type': 'string',
281 'dest' : 'xsltproc_opts',
282 'default' : '--xinclude --catalogs',
283 'metavar' : "<options list>",
284 'help': "options given to the xsltproc executable."
285 }),
286 ('param',
287 {'type': 'named',
288 'action' : 'append',
289 'default' : (),
290 'dest': 'parameters',
291 'metavar' : "<name>=<value>",
292 'help': "sets the <name> stylesheet parameter to <value>. You may \
293 set this option multiple times. Parameters are given to the xslt processor."
294 }),
295 # ReST related options
296 ('doctype',
297 {'type': 'string',
298 'default' : 'book',
299 'metavar' : "<doctype>",
300 'help': "doctype to use when converting ReST to DOCBOOK."
301 }),
303 ## FIXME Path to xslt directory has not to be specified.
304 ## TODO : Remove all references to xslt root and use id in catalog
305 # xslts location
306 ('xsltroot',
307 {'type': 'string',
308 'metavar' : "<xslt directory>",
309 'default' : '/usr/share/sgml/logilab-xml/stylesheet/',
310 'help': "directory where logilab's stylesheets are located."
311 }),
312 )
314 def __init__(self):
315 OptionsManagerMixIn.__init__(self, usage=__doc__, version=version,
316 config_file=MKDOCRC, quiet=1)
317 OptionsProviderMixIn.__init__(self)
318 self.register_options_provider(self)
319 self.add_help_section('Environment variables', ENV_HELP)
320 self.pimanager = PIManager()
322 def xslt_transform(self, input_file, output_file, xslt_file):
323 """xsltproc based transformation
324 """
325 if not self.config.quiet:
326 print '-' * 80
327 print "Transforms %s to %s using %s" % (input_file, output_file,
328 xslt_file)
329 cmd = [self.config.xsltproc, self.config.xsltproc_opts,
330 "--output ", output_file]
331 params = []
332 for name, value in self.config.parameters :
333 cmd.append('--param')
334 cmd.append(name)
335 cmd.append("\"'" + value + "'\"")
336 cmd.append(xslt_file)
337 cmd.append(input_file)
338 # executes transformation command line
339 output = exec_cmd(' '.join(cmd))
340 if not self.config.quiet:
341 print output
342 return output_file
345 def fop_transform(self, fo_file, output_file):
346 """FOP based transformation
347 """
348 if not self.config.quiet:
349 print '-' * 80
350 print "Transforms Formatting Objects to PDF (%s -> %s)" % (
351 fo_file, output_file)
352 # executes transformation command line
353 output = exec_cmd("%s %s %s %s" %(self.config.fop, self.config.fop_opts,
354 fo_file, output_file))
355 if not self.config.quiet:
356 print output
357 return output_file
360 def rest_transform(self, rest_file, output_file):
361 """transforms Restructured Text to DOCBOOK XML
362 """
363 if not self.config.quiet:
364 print '-' * 80
365 print "Transforms Restructured Text to DOCBOOK XML (%s -> %s)" % (
366 rest_file, output_file)
367 from docutils import core, io
368 from logilab.doctools.rest_docbook import Writer
369 writer = Writer()
370 pub = core.Publisher(writer=writer)
371 pub.set_reader(reader_name='standalone',
372 parser_name='restructuredtext', parser=None)
373 pub.source = io.FileInput(source_path=rest_file, encoding='ISO-8859-1')
374 pub.destination = io.FileOutput(destination_path=output_file,
375 encoding='UTF-8')
376 # FIXME : find the way to specify docutils no parsing args
377 # hint: use core.publish_programmatically ?
378 sys.argv = [sys.argv[0]]
379 try:
380 pub.publish(settings_overrides={'output_encoding': 'UTF-8',
381 'report_level': 2,
382 'doctype' : self.config.doctype,
383 })
384 except Exception, ex:
385 raise FormattingException(str(ex))
386 return output_file
389 def check_xml(self, xml_file):
390 """check the given xml file is well formed XML
391 """
392 if not self.config.quiet:
393 print '-' * 80
394 print 'Checking %s' % xml_file
395 output = exec_cmd("xmlproc_parse " + xml_file)
396 xmlproc_output(output, 'well formed')
397 return xml_file
400 def validate_xml(self, xml_file):
401 """check the given xml file is valid XML
402 """
403 if not self.config.quiet:
404 print '-' * 80
405 print 'Validating %s' % xml_file
406 output = exec_cmd("xmlproc_val " + xml_file)
407 xmlproc_output(output, 'valid')
408 return xml_file
411 def transform(self, filename):
412 """run transforms on filename
413 """
414 # get transform parameters
415 to_remove = []
416 source_format = self.config.source
417 dest_format = self.config.target
418 preprocess = self.config.preprocess
419 stylesheet = self.config.stylesheet
420 base = splitext(basename(filename))[0]
422 if not source_format:
423 source_format = guess_format(filename)
425 # check docbook xml for validity or well formness
426 if source_format == 'docbook':
427 if not self.config.ignore_pi and (not preprocess or not stylesheet):
428 self.pimanager.quiet = self.config.quiet
429 preproc, styles = self.pimanager.fromFile(filename)
430 preprocess = preprocess or preproc
431 stylesheet = stylesheet or styles.get(dest_format)
433 # check we have a main stylesheet
434 if not stylesheet and dest_format != 'docbook' and source_format != 'fo':
435 raise OutputGuessException('Unable to guess the main style sheet')
437 # transform ReST to docbook ?
438 if source_format == 'rest':
439 filename = self.rest_transform(filename, base + '.xml')
440 if dest_format != 'docbook':
441 to_remove.append(filename)
443 # are we arrived ?
444 if dest_format == 'docbook':
445 return filename
447 # preprocessing
448 for preprocess_xslt in preprocess:
449 xslt = self.absolute_stylesheet(preprocess_xslt, 'pre-process')
450 output = '%s.%s.xml' % (base, preprocess_xslt)
451 filename = self.xslt_transform(filename, output, xslt)
452 to_remove.append(output)
454 # finalization
455 if dest_format == 'html':
456 # transform DOCBOOK to HTML
457 xslt = self.absolute_stylesheet(stylesheet, 'html')
458 filename = self.xslt_transform(filename, base + '.html', xslt)
459 else:
460 if source_format != 'fo':
461 # transform DOCBOOK to FO
462 xslt = self.absolute_stylesheet(stylesheet, 'fo')
463 filename = self.xslt_transform(filename, base + '.fo', xslt)
464 to_remove.append(filename)
465 # are we arrived ?
466 if dest_format == 'fo':
467 return filename
468 # transform FO to PDF
469 filename = self.fop_transform(filename, base + '.pdf')
472 self.clean(to_remove)
473 return filename
476 def absolute_stylesheet(self, stylesheet, type):
477 """return the absolute path of the given stylesheet"""
478 if isabs(stylesheet):
479 return stylesheet
480 if stylesheet.endswith('.xsl') and stylesheet.endswith('.xslt'):
481 return join(self.config.xsltroot, type, stylesheet)
482 return join(self.config.xsltroot, type, stylesheet, 'root.xsl')
484 def clean(self, files):
485 """remove temporary files, unless configuration tells to keep them"""
486 if self.config.keep:
487 return
488 if not self.config.quiet and files:
489 print '-' * 80
490 print 'Removing temporary files'
491 for file in files:
492 os.remove(file)