blabla
# *
# */
#
# bla_bla_bla
# bilip_bilip
#
# /* - this comment acts as a separator - */
#
# blo_blo_blo
#
#
# will only keep the first two lines of sources with
# the "blabla" block
#
# However, the comment will be kept, with following source lines
# if it contains a starting '#' or '@' as in:
#
# /*@.....*/
# /*#.....*/
# /* @.....*/
# /* #.....*/
#
##############################################################################
#
# The DocCode class is used to store source code lines
#
# self.lines contains a set of source code lines that will
# be dumped as HTML in a tag.
#
# the object is filled line by line by the parser, it strips the
# leading "margin" space from each input line before storing it
# in self.lines
#
class DocCode:
def __init__( self, margin = 0 ):
self.lines = []
self.margin = margin
def add( self, line ):
# remove margin whitespace
if string.strip( line[: self.margin] ) == "":
line = line[self.margin :]
self.lines.append( line )
def dump( self ):
for line in self.lines:
print "--" + line
print ""
def get_identifier( self ):
# this function should never be called
return "UNKNOWN_CODE_IDENTIFIER!!"
def dump_html( self ):
# clean the last empty lines
l = len( self.lines ) - 1
while l > 0 and string.strip( self.lines[l - 1] ) == "":
l = l - 1
print code_header
for line in self.lines[0 : l]:
print line
print code_footer
##############################################################################
#
# The DocParagraph is used to store text paragraphs
# self.words is simply a list of words for the paragraph
#
# the paragraph is filled line by line by the parser..
#
class DocParagraph:
def __init__( self ):
self.words = []
def add( self, line ):
# get rid of unwanted spaces in the paragraph
#
# the following line is the same as
#
# self.words.extend( string.split( line ) )
#
# but older Python versions don't have the `extend' attribute
#
last = len(self.words)
self.words[last:last] = string.split( line )
# this function is used to retrieve the first word of a given
# paragraph..
def get_identifier( self ):
if self.words:
return self.words[0]
# should never happen
return "UNKNOWN_PARA_IDENTIFIER!!"
def dump( self ):
max_width = 50
cursor = 0
line = ""
for word in self.words:
if cursor + len( word ) + 1 > max_width:
print line
cursor = 0
line = ""
line = line + word + " "
cursor = cursor + len( word ) + 1
if cursor > 0:
print line
#print "§" #for debugging only
def dump_html( self ):
print para_header
self.dump()
print para_footer
###########################################################################
#
# DocContent is used to store the content of a given marker.
#
# the "self.items" list contains (field,elements) record, where
# "field" corresponds to a given structure fields or function
# parameter (indicated by a "::"), or NULL for a normal section
# of text/code
#
# hence, the following example:
#
#
# this is an example of what can be put in a content section,
#
# a second line of example text
#
# x :: a simple test field, with some content
# y :: even before, this field has some code content
# {
# y = x+2;
# }
#
# should be stored as
# [ ( None, [ DocParagraph, DocParagraph] ),
# ( "x", [ DocParagraph ] ),
# ( "y", [ DocParagraph, DocCode ] ) ]
#
# in self.items
#
# the DocContent object is entirely built at creation time, you must
# pass a list of input text lines lin the "lines_list" parameter..
#
#
class DocContent:
def __init__( self, lines_list ):
self.items = []
code_mode = 0
code_margin = 0
text = []
paragraph = None # represents the current DocParagraph
code = None # represents the current DocCode
elements = [] # the list of elements for the current field,
# contains DocParagraph or DocCode objects
field = None # the current field
for aline in lines_list:
if code_mode == 0:
line = string.lstrip( aline )
l = len( line )
margin = len( aline ) - l
# if the line is empty, this is the end of the current
# paragraph
if l == 0 or line == '{':
if paragraph:
elements.append( paragraph )
paragraph = None
if line == "":
continue
code_mode = 1
code_margin = margin
code = None
continue
words = string.split( line )
# test for a field delimiter on the start of the line, i.e.
# the token `::'
#
if len( words ) >= 2 and words[1] == "::":
# start a new field - complete current paragraph if any
if paragraph:
elements.append( paragraph )
paragraph = None
# append previous "field" to self.items
self.items.append( ( field, elements ) )
# start new field and elements list
field = words[0]
elements = []
words = words[2 :]
# append remaining words to current paragraph
if len( words ) > 0:
line = string.join( words )
if not paragraph:
paragraph = DocParagraph()
paragraph.add( line )
else:
# we're in code mode..
line = aline
# the code block ends with a line that has a single '}' on it
# that is located at the same column that the opening
# accolade..
if line == " " * code_margin + '}':
if code:
elements.append( code )
code = None
code_mode = 0
code_margin = 0
# otherwise, add the line to the current paragraph
else:
if not code:
code = DocCode()
code.add( line )
if paragraph:
elements.append( paragraph )
if code:
elements.append( code )
self.items.append( ( field, elements ) )
def get_identifier( self ):
if self.items:
item = self.items[0]
for element in item[1]:
return element.get_identifier()
# should never happen
return "UNKNOWN_CONTENT_IDENTIFIER!!"
def dump( self ):
for item in self.items:
field = item[0]
if field:
print ""
for element in item[1]:
element.dump()
if field:
print " "
def dump_html( self ):
n = len( self.items )
in_table = 0
for i in range( n ):
item = self.items[i]
field = item[0]
if not field:
if in_table:
print ""
in_table = 0
for element in item[1]:
element.dump_html()
else:
if not in_table:
print ""
in_table = 1
else:
print " |
"
print "" + field + " | "
for element in item[1]:
element.dump_html()
if in_table:
print " |
"
######################################################################################
#
#
# The DocBlock class is used to store a given comment block. It contains
# a list of markers, as well as a list of contents for each marker.
#
# "self.items" is a list of ( marker, contents ) elements, where
# 'marker' is a lowercase marker string, and 'contents' is a DocContent
# object
#
# "self.source" is simply a list of text lines taken from the
# uncommented source itself..
#
# finally, "self.identifier" is a simple identifier used to
# uniquely identify the block
#
class DocBlock:
def __init__( self, block_line_list = [], source_line_list = [] ):
self.items = [] # current ( marker, contents ) list
self.identifier = None
marker = None # current marker
content = [] # current content lines list
alphanum = string.letters + string.digits + "_"
for line in block_line_list:
line2 = string.lstrip( line )
l = len( line2 )
margin = len( line ) - l
if l > 3:
ender = None
if line2[0] == '<':
ender = '>'
elif line2[0] == '@':
ender = ':'
if ender:
i = 1
while i < l and line2[i] in alphanum:
i = i + 1
if i < l and line2[i] == ender:
if marker and content:
self.add( marker, content )
marker = line2[1 : i]
content = []
line2 = string.lstrip( line2[i + 1 :] )
l = len( line2 )
line = " " * margin + line2
content.append( line )
if marker and content:
self.add( marker, content )
self.source = []
if self.items:
self.source = source_line_list
# this function is used to add a new element to self.items
# 'marker' is a marker string, or None
# 'lines' is a list of text lines used to compute a list of
# DocContent objects
#
def add( self, marker, lines ):
# remove the first and last empty lines from the content list
l = len( lines )
if l > 0:
i = 0
while l > 0 and string.strip( lines[l - 1] ) == "":
l = l - 1
while i < l and string.strip( lines[i] ) == "":
i = i + 1
lines = lines[i : l]
l = len( lines )
# add a new marker only if its marker and its content list aren't empty
if l > 0 and marker:
content = DocContent(lines)
self.items.append( ( string.lower(marker), content ) )
if not self.identifier:
self.identifier = content.get_identifier()
def dump( self ):
for i in range( len( self.items ) ):
print "[" + self.items[i][0] + "]"
content = self.items[i][1]
content.dump()
def dump_html( self ):
types = [ 'type', 'struct', 'functype', 'function', 'constant',
'enum', 'macro' ]
if not self.items:
return
# start of a block
print block_header
print "" + self.identifier + "
"
# print source code
if not self.source:
return
lines = self.source
l = len( lines ) - 1
while l >= 0 and string.strip( lines[l] ) == "":
l = l - 1
print source_header
for line in lines[0 : l + 1]:
print line
print source_footer
# dump each (marker,content) element
for element in self.items:
marker = element[0]
content = element[1]
if marker == "description":
print ""
content.dump_html()
print "
"
elif not (marker in types):
print "" + marker + "
"
print ""
content.dump_html()
print "
"
print ""
print block_footer
# filter a given list of DocBlocks. Returns a new list
# of DocBlock objects that only contains element whose
# "type" (i.e. first marker) is in the "types" parameter
#
def filter_blocks( block_list, types ):
new_list = []
for block in block_list:
if block.items:
element = block.items[0]
marker = element[0]
if marker in types:
new_list.append( block )
return new_list
# perform a lexicographical comparison of two DocBlock
# objects. Returns -1, 0 or 1
#
def block_lexicographical_compare( b1, b2 ):
if not b1.identifier:
return -1
if not b2.identifier:
return 1
id1 = string.lower(b1.identifier)
id2 = string.lower(b2.identifier)
if id1 < id2:
return -1
elif id1 == id2:
return 0
else:
return 1
def block_make_list( source_block_list ):
list = []
for block in source_block_list:
docblock = DocBlock( block[0], block[1] )
list.append( docblock )
return list
# dump a list block as a single HTML page
#
def dump_html_1( block_list ):
print html_header
for block in block_list:
block.dump_html()
print html_footer
def make_block_list():
"""parse a file and extract comments blocks from it"""
list = []
block = []
format = 0
# we use "format" to store the state of our parser:
#
# 0 - wait for beginning of comment
# 1 - parse comment format 1
# 2 - parse comment format 2
#
# 4 - wait for beginning of source (or comment ??)
# 5 - process source
#
comment = []
source = []
state = 0
for line in fileinput.input():
l = len( line )
if l > 0 and line[l - 1] == '\012':
line = line[0 : l - 1]
# stripped version of the line
line2 = string.strip( line )
l = len( line2 )
# if this line begins with a comment and we are processing some
# source, exit to state 0
#
# unless we encounter something like:
#
# /*@.....
# /*#.....
#
# /* @.....
# /* #.....
#
if format >= 4 and l > 2 and line2[0 : 2] == '/*':
if l < 4 or ( line2[3] != '@' and line2[3:4] != ' @' and
line2[3] != '#' and line2[3:4] != ' #'):
list.append( ( block, source ) )
format = 0
if format == 0: #### wait for beginning of comment ####
if l > 3 and line2[0 : 3] == '/**':
i = 3
while i < l and line2[i] == '*':
i = i + 1
if i == l:
# this is '/**' followed by any number of '*', the
# beginning of a Format 1 block
#
block = []
source = []
format = 1
elif i == l - 1 and line2[i] == '/':
# this is '/**' followed by any number of '*', followed
# by a '/', i.e. the beginning of a Format 2 or 3 block
#
block = []
source = []
format = 2
##############################################################
#
# FORMAT 1
#
elif format == 1:
# if the line doesn't begin with a "*", something went
# wrong, and we must exit, and forget the current block..
if l == 0 or line2[0] != '*':
block = []
format = 0
# otherwise, we test for an end of block, which is an
# arbitrary number of '*', followed by '/'
else:
i = 1
while i < l and line2[i] == '*':
i = i + 1
# test for the end of the block
if i < l and line2[i] == '/':
if block != []:
format = 4
else:
format = 0
else:
# otherwise simply append line to current block
block.append( line2[i:] )
continue
##############################################################
#
# FORMAT 2
#
elif format == 2:
# if the line doesn't begin with '/*' and end with '*/',
# this is the end of the format 2 format
if l < 4 or line2[: 2] != '/*' or line2[-2 :] != '*/':
if block != []:
format = 4
else:
format = 0
else:
# remove the start and end comment delimiters, then
# right-strip the line
line2 = string.rstrip( line2[2 : -2] )
# check for end of a format2 block, i.e. a run of '*'
if string.count( line2, '*' ) == l - 4:
if block != []:
format = 4
else:
format = 0
else:
# otherwise, add the line to the current block
block.append( line2 )
continue
if format >= 4: #### source processing ####
if l > 0:
format = 5
if format == 5:
source.append( line )
if format >= 4:
list.append( [block, source] )
return list
# This function is only used for debugging
#
def dump_block_list( list ):
"""dump a comment block list"""
for block in list:
print "----------------------------------------"
for line in block[0]:
print line
for line in block[1]:
print line
print "---------the end-----------------------"
def main( argv ):
"""main program loop"""
sys.stderr.write( "extracting comment blocks from sources...\n" )
list = make_block_list()
list = block_make_list(list)
list2 = filter_blocks( list, ['type','macro','enum','constant', 'functype'] )
#list2 = list
list2.sort( block_lexicographical_compare )
dump_html_1( list2 )
#dump_doc_blocks( list )
#dump_block_lists( list )
#dump_html_1( list )
# If called from the command line
if __name__ == '__main__':
main( sys.argv )
# eof