logilab/doctools

view xmlformat.py @ 0:cc367abb080e

forget the past. forget the past.
author root
date Wed, 26 Apr 2006 10:48:09 +0000
parents
children c3e73486dd16
line source
1 #!/usr/bin/python
2 '''%(PROG)s: format xml source code to xml docbook using roles
4 USAGE: %(PROG)s [OPTIONS] <input.xml>...
6 OPTIONS:
7 -h / --help
8 display this help message and exit
10 -o / --output <OUTPUT_FILE>
11 write results in file <OUTPUT_FILE>.
12 -s / --stdout
13 write results to standard output.
14 -e / --encoding iso-8859-1
15 specify encoding to use in outputs.
17 -n / --no-head
18 do not insert output headers.
20 -f / --format <OUTPUT_FORMAT>
21 set output format. Default to %(DEFAULT_FORMAT)s.
22 Available formats are %(FORMATS)s.
23 '''
25 __revision__ = "$Id: xmlformat.py,v 1.7 2004-10-31 02:18:06 nico Exp $"
27 import sys
28 from os.path import basename
29 from xml.dom.ext import SplitQName
30 from xml.sax.handler import ContentHandler
32 PROG = basename(sys.argv[0])
33 FORMATS = ('docbook', 'extended-docbook', 'html')
34 DEFAULT_FORMAT = 'docbook'
36 _ROOT, _STRING, _COMMENT, _NAME, _KEYWORD, _TEXT, _HEAD = 0, 1, 2, 3, 4, 5, 6
38 LOGILAB = {
39 _HEAD: ('''<?xml version="1.0" encoding="%s"?>
40 <article>''', '</article>'),
41 _ROOT: ('<programlisting role="python">','</programlisting>'),
42 _STRING: ('<emphasis role="string">', '</emphasis>'),
43 _COMMENT:('<emphasis role="comment">', '</emphasis>'),
44 _NAME: ('<emphasis role="name">', '</emphasis>'),
45 _KEYWORD:('<emphasis role="keyword">', '</emphasis>'),
46 _TEXT: ('', '')
47 }
48 DOCBOOK = {
49 _HEAD: ('''<?xml version="1.0" encoding="%s"?>
50 <article>''', '</article>'),
51 _ROOT: ('<programlisting>','</programlisting>'),
52 _STRING: ('<emphasis>', '</emphasis>'),
53 _COMMENT:('<emphasis>', '</emphasis>'),
54 _NAME: ('', ''),
55 _KEYWORD:('<emphasis role="bold">', '</emphasis>'),
56 _TEXT: ('', '')
57 }
58 HTML = {
59 _HEAD: ('''<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
60 <html>
61 <head>
62 <meta http-equiv="Content-Type" content="text/html; charset=%s">
63 <link rel="stylesheet" type="text/css" href="/intranet.css">
64 </head>
65 <body>''','</body>\n</html>'),
66 _ROOT: ('<div>', '</div>'),
67 _STRING: ('<font color="#004080">', '</font>'),
68 _COMMENT:('<font color="#008000">', '</font>'),
69 _NAME: ('', ''),
70 _KEYWORD:('<font color="#C00000">', '</font>'),
71 _TEXT: ('', '')
72 }
74 ## full sax handler, print each event to output ###############################
76 class XmlFormatSaxHandler(ContentHandler):
77 """
78 Format an xmlfile to docbook or html
79 """
81 def __init__(self, head=1, output=sys.stdout, encoding='UTF-8'):
82 self._out = output
83 self._cod = encoding
84 self._head = head
85 self._o_d = LOGILAB
86 self._ind = 0
88 def set_format(self, format):
89 if format == 'docbook':
90 self._o_d = DOCBOOK
91 elif format == 'extended-docbook':
92 self._o_d = LOGILAB
93 if format == 'html':
94 self._o_d = HTML
96 ## content handler ########################################################
97 def startDocument(self):
98 if self._head:
99 self._out.write(self._o_d[_HEAD][0] % self._cod)
100 self._out.write(self._o_d[_ROOT][0])
102 def endDocument(self):
103 self._out.write(self._o_d[_ROOT][1])
104 if self._head:
105 self._out.write(self._o_d[_HEAD][1])
107 def startElement(self, name, attrs):
108 prefix, local = SplitQName(name)
109 if prefix:
110 self._out.write('&lt;%s%s%s:%s%s%s'.encode(self._cod) % (
111 self._o_d[_KEYWORD][0], prefix, self._o_d[_KEYWORD][1],
112 self._o_d[_NAME][0], local, self._o_d[_NAME][1]))
113 else:
114 self._out.write('&lt;%s%s%s'.encode(self._cod) % (
115 self._o_d[_KEYWORD][0], local, self._o_d[_KEYWORD][1]))
116 for key, val in attrs.items():
117 prefix, local = SplitQName(key)
118 if prefix:
119 self._out.write(' %s%s%s:%s%s%s=%s"%s"%s'.encode(self._cod) % (
120 self._o_d[_KEYWORD][0], prefix, self._o_d[_KEYWORD][1],
121 self._o_d[_NAME][0], local, self._o_d[_NAME][1],
122 self._o_d[_STRING][0], val, self._o_d[_STRING][1]))
123 else:
124 self._out.write(' %s%s%s=%s"%s"%s'.encode(self._cod) % (
125 self._o_d[_NAME][0], local, self._o_d[_NAME][1],
126 self._o_d[_STRING][0], val, self._o_d[_STRING][1]))
127 self._out.write('>')
129 def endElement(self, name):
130 prefix, local = SplitQName(name)
131 if prefix:
132 self._out.write('&lt;/%s%s%s:%s%s%s>'.encode(self._cod) % (
133 self._o_d[_KEYWORD][0], prefix, self._o_d[_KEYWORD][1],
134 self._o_d[_NAME][0], local, self._o_d[_NAME][1]))
135 else:
136 self._out.write('&lt;/%s%s%s>'.encode(self._cod) % (
137 self._o_d[_KEYWORD][0], local, self._o_d[_KEYWORD][1]))
139 def processingInstruction(self, target, data):
140 self._out.write('&lt;?%s%s%s %s%s%s>'.encode(self._cod) % (
141 self._o_d[_NAME][0], target, self._o_d[_NAME][1],
142 self._o_d[_STRING][0], data, self._o_d[_STRING][1]))
144 def characters(self, ch):
145 self._out.write('%s%s%s' % (
146 self._o_d[_TEXT][0], ch.replace('<', '&lt;').encode(self._cod),
147 self._o_d[_TEXT][1]))
149 ## lexical handler ########################################################
150 def comment(self, comment):
151 self._out.write('%s&lt;!--%s-->%s' % (
152 self._o_d[_COMMENT][0],
153 comment.replace('<', '&lt;').encode(self._cod),
154 self._o_d[_COMMENT][1]))
156 def startCDATA(self):
157 self.cdata = 0
158 self._out.write('&lt;%s[CDATA[%s' % (
159 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1]))
161 def endCDATA(self):
162 self._out.write('%s]]%s>' % (
163 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1]))
165 def startDTD(self, name, public_id, system_id):
166 self._out.write('&lt;%s!DOCTYPE%s %s'.encode(self._cod) % (
167 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1], name))
168 if public_id:
169 self._out.write(' PUBLIC %s"%s"%s %s"%s"%s [\n'.encode(self._cod) % (
170 self._o_d[_STRING][0], public_id, self._o_d[_STRING][1],
171 self._o_d[_STRING][0], system_id, self._o_d[_STRING][1]))
172 else:
173 self._out.write(' SYSTEM %s"%s"%s [\n'.encode(self._cod) % (
174 self._o_d[_STRING][0], system_id, self._o_d[_STRING][1]))
176 def endDTD(self):
177 self._out.write(']>\n')
179 def startEntity(self, name):
180 pass
182 def endEntity(self, name):
183 pass
185 ## decl handler ###########################################################
186 def internalEntityDecl(self, name, value):
187 self._out.write('&lt;%s!ENTITY%s %s %s>\n'.encode(self._cod) % (
188 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1], name, value))
190 def externalEntityDecl(self, name, public_id, system_id):
191 self._out.write('&lt;%s!ENTITY%s %s'.encode(self._cod) % (
192 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1], name))
193 if public_id:
194 self._out.write(' PUBLIC %s"%s"%s %s"%s"%s>\n'.encode(self._cod) % (
195 self._o_d[_STRING][0], public_id, self._o_d[_STRING][1],
196 self._o_d[_STRING][0], system_id, self._o_d[_STRING][1]))
197 else:
198 self._out.write(' SYSTEM %s"%s"%s>\n'.encode(self._cod) % (
199 self._o_d[_STRING][0], system_id, self._o_d[_STRING][1]))
201 def elementDecl(self, elem_name, content_model):
202 c_m = _decode_content_model(content_model)
203 self._out.write('&lt;%s!ELEMENT%s %s %s>\n'.encode(self._cod) % (
204 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1], elem_name,
205 c_m))
207 def attributeDecl(self, elem_name, attr_name, type_d, value_def, value):
208 import types
209 if type(type_d) is types.ListType:
210 s = ''
211 for pos in type_d:
212 if not s:
213 s = '(%s' % pos
214 else:
215 s = '%s|%s' % (s, pos)
216 s = '%s)' % s
217 self._out.write('&lt;%s!ATTLIST%s %s %s %s %s>\n'.encode(self._cod) % (
218 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1], elem_name,
219 attr_name, s , value_def))
220 else:
221 self._out.write('&lt;%s!ATTLIST%s %s %s %s>\n'.encode(self._cod) % (
222 self._o_d[_KEYWORD][0], self._o_d[_KEYWORD][1], elem_name,
223 attr_name, type))
225 C_OP, C_VAL, C_NUM = 0, 1, 2
226 def _decode_content_model(content_m):
227 s = ''
228 if content_m[C_OP] == ',':
229 for c_m in content_m[C_VAL]:
230 if not s:
231 s = '(%s' % _decode_content_model(c_m)
232 else:
233 s = '%s, %s' % (s, _decode_content_model(c_m))
234 s = '%s)%s' % (s, content_m[C_NUM] )
235 elif content_m[C_OP] == '|':
236 for c_m in content_m[C_VAL]:
237 if not s:
238 s = '(%s' % _decode_content_model(c_m)
239 else:
240 s = '%s|%s' % (s, _decode_content_model(c_m))
241 s = '%s)%s' % (s, content_m[C_NUM] )
242 else:
243 s = '%s%s' % (s, content_m[C_OP])
244 s = '%s%s' % (s, content_m[-1])
245 return s
248 def run(args):
249 """
250 main
251 """
252 import getopt, os
253 from xml.sax import make_parser
254 from xml.sax.handler import property_lexical_handler
255 from xml.sax.handler import property_declaration_handler
256 ## get options
257 (options, args) = getopt.getopt(args,
258 'he:o:sf:n',
259 ['help', 'encoding=', 'output=', 'stdout',
260 'format=', 'no-head'])
261 encod, output, dest, head, format = 'UTF-8', None, None, 1, DEFAULT_FORMAT
262 for opt in options:
263 if opt[0] == '-h' or opt[0] == '--help':
264 print __doc__ % globals()
265 return
266 elif opt[0] == '-o' or opt[0] == '--output':
267 output = opt[1]
268 dest = open(output, 'w')
269 elif opt[0] == '-s' or opt[0] == '--stdout':
270 dest = sys.stdout
271 elif opt[0] == '-o' or opt[0] == '--format':
272 val = opt[1].lower()
273 if not val in FORMATS:
274 raise 'Unknown format %s' % val
275 format = val
276 elif opt[0] == '-e' or opt[0] == '--encoding':
277 encod = opt[1]
278 elif opt[0] == '-n' or opt[0] == '--no-head':
279 head = 0
280 if len(args) == 0:
281 print __doc__ % globals()
282 return
283 ## transforms source files (xmlproc support property_lexical_handler while
284 ## pyexpat doen't)
285 #p = make_parser(['xml.sax.drivers2.drv_xmlproc'])
286 p = make_parser()
287 for filename in args:
288 source = open(filename, 'r')
289 ## prepare handler
290 if not dest:
291 if filename[-4:] not in ('.xml', '.dtd'):
292 sys.stderr.write('Unknown extension %s, ignored file %s\n' % \
293 (filename[-4:], filename))
294 continue
295 dest = open('%s_dcbk.xml' % os.path.basename(filename)[:-4], 'w+')
296 h = XmlFormatSaxHandler(head, dest, encod)
297 h.set_format(format)
298 p.setContentHandler(h)
299 try:
300 p.setProperty(property_lexical_handler, h)
301 except Exception, exc:
302 print exc
303 try:
304 p.setProperty(property_declaration_handler, h)
305 except Exception, exc:
306 print exc
307 sys.stderr.write("Formatting %s ...\n" % filename)
309 ## parse and write colorized version to output file
310 p.parse(source)
312 source.close()
313 if not output and not dest is sys.stdout:
314 dest.close()
315 dest = None
318 if __name__ == "__main__":
319 run(sys.argv[1:])