logilab/doctools

view py2db.py @ 0:cc367abb080e

forget the past. forget the past.
author root
date Wed, 26 Apr 2006 10:48:09 +0000
parents
children c3e73486dd16
line source
1 #!/usr/bin/python
2 # -*- coding: ISO-8859-1 -*-
3 """%(PROG)s: format Python source code to xml docbook using roles
5 USAGE: %(PROG)s [OPTIONS] <input.py>...
7 OPTIONS:
8 -h / --help
9 display this help message and exit
11 -r / --root "rootstring"
12 insert "rootstring" as root
14 -f / --format <OUTPUT_FORMAT>
15 set output format. Default to %(DEFAULT_FORMAT)s.
16 Available formats are %(FORMATS)s.
18 -s / --stdout
19 write results to standard output
20 """
21 ## Original code from active state recipe
22 ## 'Colorize Python source using the built-in tokenizer'
23 ## posted by Jürgen Hermann and modified to obtain docbook instead of colored
24 ## html
26 ## ----------------------------------------------------------------------------
27 ## MoinMoin - Python Source Parser
29 ## This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
30 ## Python source code to HTML markup, rendering comments, keywords, operators,
31 ## numeric and string literals in different colors.
33 ## It shows how to use the built-in keyword, token and tokenize modules
34 ## to scan Python source code and re-emit it with no changes to its
35 ## original formatting (which is the hard part).
37 __revision__ = '$Id: py2db.py,v 1.6 2004-10-31 02:18:06 nico Exp $'
39 import sys, cStringIO
40 import keyword, token, tokenize
41 from xml.sax.saxutils import escape
42 from os.path import basename
45 ## Python Source Parser #####################################################
47 _KEYWORD = token.NT_OFFSET + 1
48 _TEXT = token.NT_OFFSET + 2
50 class Parser:
51 """
52 Send colored python source.
53 """
55 def __init__(self, raw, tags, out = sys.stdout):
56 """
57 Store the source text.
58 """
59 self.raw = raw.expandtabs().strip()
60 self.out = out
61 self.tags = tags
63 def format(self, root=''):
64 """
65 Parse and send the colored source.
66 """
67 # store line offsets in self.lines
68 self.lines = [0, 0]
69 pos = 0
70 while 1:
71 pos = self.raw.find('\n', pos) + 1
72 if not pos: break
73 self.lines.append(pos)
74 self.lines.append(len(self.raw))
76 # parse the source and write it
77 self.pos = 0
78 text = cStringIO.StringIO(self.raw)
79 if root:
80 self.out.write('<%s>\n'%root)
81 self.out.write(' <programlisting role="python">\n')
82 try:
83 tokenize.tokenize(text.readline, self)
84 except tokenize.TokenError, ex:
85 msg = ex[0]
86 line = ex[1][0]
87 print "ERROR: %s%s\n" % (msg, self.raw[self.lines[line]:])
88 self.out.write('\n </programlisting>\n')
89 if root:
90 self.out.write('</%s>\n'%root)
92 def __call__(self, toktype, toktext, (srow, scol), (erow, ecol), line):
93 """
94 Token handler.
95 """
96 #print "type", toktype, token.tok_name[toktype], "text", toktext,
97 #print "start", srow,scol, "end", erow,ecol, "<br>"
99 ## calculate new positions
100 oldpos = self.pos
101 newpos = self.lines[srow] + scol
102 self.pos = newpos + len(toktext)
104 ## handle newlines
105 if toktype in [token.NEWLINE, tokenize.NL]:
106 self.out.write('\n')
107 return
109 ## send the original whitespace, if needed
110 if newpos > oldpos:
111 self.out.write(self.raw[oldpos:newpos])
113 ## skip indenting tokens
114 if toktype in [token.INDENT, token.DEDENT]:
115 self.pos = newpos
116 return
118 ## map token type to a group
119 if token.LPAR <= toktype and toktype <= token.OP:
120 toktype = token.OP
121 elif toktype == token.NAME and keyword.iskeyword(toktext):
122 toktype = _KEYWORD
124 t_tags = self.tags.get(toktype, self.tags[_TEXT])
126 ## send text
127 self.out.write(t_tags[0])
128 self.out.write(escape(toktext))
129 self.out.write(t_tags[1])
132 ## Command line ###############################################################
133 _TAGS = {
134 token.NUMBER: ('<emphasis role="number">', '</emphasis>'),
135 token.OP: ('<emphasis role="op">', '</emphasis>'),
136 token.STRING: ('<emphasis role="string">', '</emphasis>'),
137 tokenize.COMMENT: ('<emphasis role="comment">', '</emphasis>'),
138 token.NAME: ('<emphasis role="name">', '</emphasis>'),
139 token.ERRORTOKEN: ('<emphasis role="error">', '</emphasis>'), # ?
140 _KEYWORD: ('<emphasis role="keyword">', '</emphasis>'),
141 _TEXT: ('', '')
142 }
143 _STANDARDS_TAGS = {
144 token.NUMBER: ('', ''),
145 token.OP: ('', ''),
146 token.STRING: ('<emphasis>', '</emphasis>'),
147 tokenize.COMMENT: ('<emphasis>', '</emphasis>'),
148 token.NAME: ('', ''),
149 token.ERRORTOKEN: ('', ''), # ?
150 _KEYWORD: ('<emphasis role="bold">', '</emphasis>'),
151 _TEXT: ('', '')
152 }
154 PROG = basename(sys.argv[0])
155 FORMATS = ('docbook', 'extended-docbook')
156 DEFAULT_FORMAT = 'docbook'
158 def run(args):
159 import getopt
161 ## get options
162 (opt, args) = getopt.getopt(args,
163 'hr:f:s',
164 ['help', 'root=', 'format=', 'stdout'])
165 root, ext, stdout = '', 0, 0
166 for o in opt:
167 if o[0] == '-h' or o[0] == '--help':
168 print __doc__ % globals()
169 return
170 elif o[0] == '-r' or o[0] == '--root':
171 root = o[1]
172 elif o[0] == '-f' or o[0] == '--format':
173 val = o[1].lower()
174 if not val in FORMATS:
175 raise 'Unknown format %s' % val
176 if val == 'extended-docbook':
177 ext = 1
178 elif o[0] == '-s' or o[0] == '--stdout':
179 stdout = 1
181 ## transforms source files
182 for file in args:
183 if file[-3:] != '.py':
184 sys.stderr.write('Unknown extension, ignored file %s\n' % file)
185 continue
186 source = open(file, 'r')
187 if not stdout:
188 output = '%s.xml' % file[:-3]
189 dest = open(output, 'w+')
190 else:
191 dest = sys.stdout
192 sys.stderr.write("Formatting...\n")
193 ## write colorized version to "python.html"
194 if not ext:
195 Parser(source.read(), _STANDARDS_TAGS, dest).format(root)
196 else:
197 Parser(source.read(), _TAGS, dest).format(root)
198 source.close()
199 dest.close()
202 if __name__ == "__main__":
203 run(sys.argv[1:])