#!/usr/bin/env python2
#
# Note: this code is Python 3 ready, but trips over Debian bug 764848.
# In Python 3.4.3, subprocess.getstatusoutput() returns an incorrect
# status value for normal exits with nonzero status that makes it look
# like the subprocess has been signaled.
#
# Run doclifter against an entire manual tree.
# Sees all files in section 1 through 8 by default.
#
# SPDX-License-Identifier: BSD-2-Clause
from __future__ import print_function
import sys, os, getopt, signal, time, re, subprocess, stat
import thread, threading, Queue
try:
getstatusoutput = subprocess.getstatusoutput
getoutput = subprocess.getoutput
except AttributeError:
import commands
getstatusoutput = commands.getstatusoutput
getoutput = commands.getoutput
mandir = "/usr/share/man"
patchdir = os.path.abspath("prepatch")
outdir = None
patched = 0
makehtml = False
xslfragment = None
processed = set([])
excluded_files = []
WORKERS = 12 # Adjust proportionately to your number of processors.
def manfile(section, basename=""):
"Return a manual file or directory based on section name."
if not basename:
# Return the directory
return "%s/man%s/" % (mandir, section)
elif basename[0] == '/':
return basename
elif basename.endswith(".gz") or basename.endswith(".bz2") or basename.endswith(".Z"):
# We've been handed an actual filename
return "%s/man%s/%s" % (mandir, section, basename)
else:
# We've been handed a filename section
return "%s/man%s/%s.%s.gz" % (mandir, section[:1], basename, section)
def analyze_manpage(manpage):
"Provide log annotations based on content."
exclusions = (
("<html>", "HTML"),
("auto-generated by docbook2man-spec", "DocBook"),
("automatically generated by docbook2man", "DocBook"),
("Generated by db2man.xsl", "XML DocBook"),
("Automatically generated by Pod::Man", "Pod::Man"),
("Man page generated from reStructeredText", "reStructuredText"),
("Man page generated from reStructuredText", "reStructuredText"),
("Generator: DocBook XSL Stylesheets", "DocBook stylesheets"),
("Generated by docutils manpage writer", "docutils"),
("DocBook SGML with docbook-to-man", "DocBook SGML"),
("Doxygen", "Doxygen"),
("created with latex2man", "latex2man")
)
output = ""
fp = open(manpage, "rb")
text = fp.read().decode('latin-1')
for (pattern, generator) in exclusions:
if text.find(pattern) > -1:
output += "Generated from %s\n" % generator
fp.close()
return output
def fetch_page(file, localcopy, patch):
"Grab a local copy of a man page, patching if needed."
output = ""
if file[-3:] == ".gz":
cstat = os.system("gunzip <%s >%s" % (file, localcopy))
elif file[-4:] == ".bz2":
cstat = os.system("bunzip2 <%s >%s" % (file, localcopy))
elif file[-2:] == ".Z":
cstat = os.system("uncompress <%s >%s" % (file, localcopy))
else:
cstat = os.system("cp %s %s" % (file, localcopy))
if os.WIFSIGNALED(cstat) or os.WEXITSTATUS(cstat):
return (1, output + "manlifter: copy failed, status %d", cstat)
if os.path.exists(patch):
stem = os.path.basename(localcopy)
patch = getoutput("patch -d%s --version-control=never <%s" % (outdir, patch,))
os.system("rm -f %s/%s.orig %s/%s.rej" % (outdir, stem, outdir, stem))
if patch:
output += patch + "\n"
return (0, output)
def getstem(file):
"Reduce the name of a man page or generated HTML file to its stem"
if file.endswith(".xml"):
file = file[:-4]
file = ".".join(file.split(".")[:-1]) # Remove section
return file
def make_xml(source, options, withsect):
"Make XML from specified man page."
if withsect:
withsect = "-S " + withsect
else:
withsect = ""
(doclifter_status, output) = getstatusoutput("doclifter -I %s %s %s %s" % (mandir, withsect, options, source))
if output:
output += "\n"
if os.WIFEXITED(doclifter_status):
doclifter_status = os.WEXITSTATUS(doclifter_status)
else:
# Should never happen, but has been triggered by Python 3 versions
# with a buggy getstatusoutput() implementation.
raise ValueError
lxmlloc = None
if doclifter_status == 2:
fp = open(source)
contents = fp.read()
inclusions = re.compile(r"\.so\s+(.*)").search(contents)
fp.close()
if inclusions:
lxmlloc = os.path.join(outdir, getstem(inclusions.group(1)) + ".xml")
return(2, lxmlloc, output)
return (doclifter_status, None, output)
def validate(translation):
"Validate an XML file produced by translation."
output = ""
# If it has entity inclusions it won't validate, so don't try.
# This is only a good idea because man pages that have these are
# usually trivial wrappers like builtins.1
try:
fp = open(translation)
text = fp.read()
inclusions = re.compile("<!ENTITY.*SYSTEM '(.*)'>").search(text)
equation = "<equation" in text
fp.close()
if inclusions:
output += "Won't validate due to entity inclusion of %s\n" % inclusions.group(1)
return (0, output)
if equation:
output += "Won't validate due to MathML inclusions\n"
return (0, output)
except IOError:
output += "%s is missing.\n" % translation
# Run the validation checker
(bstat, validate_out) = getstatusoutput("xmllint --xinclude --valid %s >/dev/null" % translation)
if validate_out:
output += validate_out + "\n"
if os.WIFSIGNALED(bstat):
output += "Bailing out of xmllint...\n"
return (-1, output)
xmllint_error_status = os.WEXITSTATUS(bstat)
if xmllint_error_status:
output += "xmllint error status:%s\n" % os.WEXITSTATUS(bstat)
if xmllint_error_status:
return (6, output)
return (0, output)
def format(translation, fmt, xslfragment):
"Format an XML file to a specified format."
output = ""
if not xslfragment:
command = "xmlto %s %s" % (fmt, translation)
else:
command = "xmlto -m %s %s %s" % (xslfragment, fmt, translation)
(bstat, format_out) = getstatusoutput(command)
if format_out:
output += format_out + "\n"
if os.WIFSIGNALED(bstat):
output += "Bailing out of %s formatting...\n" % fmt
return (-1, output)
format_error_status = os.WEXITSTATUS(bstat)
if format_error_status:
output += "format error status:%s\n" % os.WEXITSTATUS(bstat)
if format_error_status:
return (6, output)
return (0, output)
def deploy(source, target):
try:
os.rename(source, target)
except OSError as e:
if not os.path.exists(source):
reason = "source file is nonexistent"
if not os.path.exists(os.path.dirname(source)):
reason = "source directory is nonexistent"
else:
reason = "unknown"
return(3, "Rename of %s to %s failed, errno = %d, because %s" % (source, target, e.errno,reason))
return (0, "")
def makelink(source, target):
try:
os.symlink(os.path.abspath(source), os.path.abspath(target))
except OSError:
pass
def singlerun(fn, options, tmpstem=None, batchmode=False):
"Test-format a single file."
if tmpstem is None:
tmpstem = "foo" + repr(os.getpid()) + ":" + str(thread.get_ident())
global patched
foundpatch = False
if not os.path.exists(fn):
return (0, False, "")
output = ""
if fn[-3:] == ".gz":
withsect = os.path.basename(fn)[:-3]
elif fn[-4:] == ".bz2":
withsect = os.path.basename(fn)[:-4]
elif fn[-2:] == ".Z":
withsect = os.path.basename(fn)[:-2]
else:
withsect = os.path.basename(fn)
dot = withsect.rindex(".")
section = withsect[dot+1:dot+2]
subdir = os.path.join(outdir, "man" + section)
stem = getstem(withsect)
xmlloc = os.path.join(subdir, stem + ".xml")
# Count patches here so our stats won't be off
patch = os.path.join(patchdir, withsect + ".patch")
if os.path.exists(patch):
patched += 1
foundpatch = True
try:
global processed
tmpstem = os.path.join(outdir, tmpstem)
source = tmpstem + ".man"
# Grab the actual manual page
localcopy = os.path.join(outdir, withsect)
(status, output) = fetch_page(fn, localcopy, patch)
if (status):
return (status, False, output)
# Save work by doing conversions only as needed
analysis = analyze_manpage(localcopy)
rebuild_xml = True
if batchmode and os.path.exists(xmlloc):
if os.stat(fn).st_mtime < os.lstat(xmlloc).st_mtime:
output += "XML conversion is up to date.\n"
processed.discard(withsect)
rebuild_xml = False
for (excluded, caption) in (("DocBook", "from DocBook masters."),
("Doxygen", "by Doxygen."),
("latex2man", "by latex2man.")):
if batchmode and excluded in analysis:
output += "Made " + caption + "\n"
processed.discard(withsect)
return (7, False, output)
htmlloc = os.path.join(subdir, stem + ".html")
if rebuild_xml:
# Note the the patch was used
processed.discard(withsect)
# Add any annotations
output += analysis
# Save the location of the page
loc = tmpstem + ".loc"
lfp = open(loc, "w")
lfp.write(withsect)
lfp.close()
# Move the source file into the output directory
os.rename(localcopy, source)
# Run the translator
(doclifter_status, lxmlloc, note) = make_xml(source, options, withsect if batchmode else None)
output += note
if doclifter_status not in (0, 2):
if not batchmode:
output += "doclifter error status: %s\n" % doclifter_status
return (doclifter_status, foundpatch, output)
translation = tmpstem + ".man.xml"
# Warn about FIX-ME problems
output += getoutput("grep FIX-ME " + translation + " 2>/dev/null")
# If the translation went through, cleaning up consists
# of putting this in its permanent location.
try:
# This will foo up if we ever have to symlink between dirs
if batchmode and not os.path.exists(subdir):
os.mkdir(subdir)
except OSError as e:
return(3, foundpatch, output + "Creation of %s failed, errno = %d\n"%(subdir,e.errno))
if doclifter_status == 2:
makelink(lxmlloc, xmlloc)
if doclifter_status == 0:
if not makehtml:
(status, more) = validate(translation)
output += more
if batchmode and status:
os.remove(translation)
try:
os.remove(htmlloc)
except OSError:
pass
return (status, foundpatch, output)
if batchmode:
(status, more) = deploy(translation, xmlloc)
translation = xmlloc
output += more
if status:
return (status, foundpatch, output)
# Save work by doing HTML conversions only as needed
rebuild_html = makehtml
if batchmode and os.path.exists(htmlloc):
if os.stat(xmlloc).st_mtime < os.lstat(htmlloc).st_mtime:
output += "HTML conversion is up to date\n"
rebuild_html = False
if rebuild_html:
if batchmode:
htmlloc = os.path.join(subdir, stem + ".html")
else:
htmlloc = stem + ".html"
if batchmode and stat.S_ISLNK(os.lstat(xmlloc).st_mode):
makelink(os.readlink(xmlloc)[:-4]+".html", htmlloc)
else:
(status, more) = format(translation, "xhtml-nochunks", xslfragment)
output += more
if status:
if batchmode:
os.remove(xmlloc)
try:
os.remove(htmlloc)
except OSError:
pass
return (status, foundpatch, output)
finally:
# Clean up
if batchmode:
if os.path.exists(source):
os.remove(source)
return (0, foundpatch, output)
def sectionfiles(sections):
"Generate files corresponding to a list of sections."
files = []
for section in sections:
files = files + [manfile(section, f) for f in os.listdir(manfile(section))]
files.sort()
files = [fn for fn in files if not os.path.basename(fn).startswith(".")]
# Until Sam Hocevar stops installing build intermediates. Bletch!
files = [fn for fn in files if not "_build_libcaca" in fn]
return files
total = eligible = starttime = 0
def report_elapsed(elapsed):
"Report elapsed time in friendly format."
return "%02dh:%02dm:%02ds" % (elapsed/3600, (elapsed % 3600)/60, elapsed % 60)
def massrun(files, options, profiling):
"Test against all files in specified sections."
def bailout(signum, frame):
print("\nBailing out with signal %d..." % signum)
os.system("rm -f doclifter_test%s.py doclifter_test%s.py[co]" % (os.getpid(), os.getpid()))
sys.exit(0)
global total, eligible, starttime
total = 0
starttime = int(time.time())
eligible = len(files)
doclifter_error_count = xmllint_error_count = docbook_count = total = 0
def report(sig, frame, out=sys.stderr):
ftotal = float(total)
elapsed = int(time.time()) - starttime
out.write("\n%%%d of %d files in %s, %d OK, %d preconverted, %d patched, %d doclifter errors, %d validation failures, %2.2f%% good.\n" % \
(total, eligible, report_elapsed(elapsed),
(total - doclifter_error_count - xmllint_error_count),
docbook_count,
patched,
doclifter_error_count,
xmllint_error_count,
(ftotal-doclifter_error_count-xmllint_error_count-patched)*100.0/ftotal))
def test(fn, options):
before = time.time()
(status, patched, output) = singlerun(fn=fn, options=options, batchmode=True)
after = time.time()
summary = "! %s=%d%s (%2.2f)\n" % (fn, status, " *"[patched], after-before)
return (status, summary + output + "\n")
signal.signal(signal.SIGUSR2, report)
signal.signal(signal.SIGHUP, bailout)
signal.signal(signal.SIGINT, bailout)
signal.signal(signal.SIGTERM, bailout)
print("%Test started", time.ctime())
if profiling:
print("%Profiling enabled.\n")
else:
print("%Profiling not enabled.\n")
try:
q = Queue.Queue(maxsize=WORKERS)
def stuffone(fn):
(status, output) = test(fn=fn, options=options)
q.put((fn, status, output), block=True)
def stuffall():
for fn in files:
if fn not in excluded_files:
stuffone(fn)
#s = threading.Thread(target=stuffone, args=(fn,))
#s.start()
#s.name = fn
t = threading.Thread(target=stuffall)
t.start()
t.name = "MASTER"
while t.is_alive():
(fn, status, output) = q.get(block=True)
sys.stdout.write(output)
sys.stdout.flush()
if status == -1:
break
elif status in (1, 4): # Doclifter parse or internal error.
doclifter_error_count += 1
elif status == 2: # .so inclusion
pass
elif status in (3, 5): # File I/O error or keyboard interrupt
pass
elif status == 6: # Validation failure
xmllint_error_count += 1
elif status == 7:
docbook_count += 1
total = total + 1
except KeyboardInterrupt:
pass
sys.stderr.write("%% Waiting on thread termination... \n")
for t in threading.enumerate():
sys.stderr.write("***** Zombie thread %d = %s\n" % (t.ident, t.name))
if t != threading.current_thread():
t.join()
report(0, sys.stdout)
htmlheader = '''
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Manlifter contents page</title>
</head>
<body>
'''
htmltrailer = "</body>\n</html>\n"
def genindex(ofp):
# Collect all section/name/description triples
filelist = []
section_re = re.compile("/man([^/]*)")
extract_re = re.compile("<refpurpose>([^<]*)</refpurpose>")
section_dict = {}
for (root, dirs, files) in os.walk('xmlman'):
for file in files:
try:
if not file.endswith(".xml"): continue
# Extract the manual section
m = section_re.search(root)
if m:
section = m.group(1)
else:
continue
section_dict[section] = []
# Extract the manual page name
name = ".".join(file.split(".")[:-1])
# Extract the description
file = os.path.join(root, file)
fp = open(file)
contents = fp.read()
fp.close()
m = extract_re.search(contents)
if m:
description = m.group(1)
else:
description = "(no description)"
# Build an index entry
filelist.append((section, name, description))
except IOError:
pass
filelist.sort() # In case the directory was pieced together by several runs
for (section, name, description) in filelist:
section_dict[section].append((name, description))
keys = list(section_dict.keys())
keys.sort()
for section in keys:
ofp.write(htmlheader)
ofp.write("<h1>%s:</h1>\n<dl>\n" % section)
for (name, description) in section_dict[section]:
ofp.write("<dt><a href='man%s/%s.html'>%s</a></dt><dd>%s</dd>\n" \
% (section, name, name, description))
ofp.write("</dl>\n")
ofp.write(htmltrailer)
def statistics():
global patched
legends = (
"OK ", # No error
"???", # Unliftable (normal error status)
".so", # failure due to inclusion
"I/O", # I/O failure, could not reach page
"!!!", # Internal error, doclifter blew up
"^C ", # Translation interrupted
"XML", # XML validation failure
"NOP", # Already in DocBook
)
counts = [0] * len(legends)
warnings = 0
warn_latch = False
while True:
line = sys.stdin.readline()
if not line:
break
elif not line.strip():
if warn_latch:
warnings += 1
continue
if "warning -" in line:
warn_latch = True
if line[0] != '!':
continue
warn_latch = False
line = line[2:]
rcolon = line.rindex("=")
file = line[:rcolon]
retval = line[rcolon+1:].split()[0]
if retval.endswith("*"):
patched += 1
retval = retval[:-1]
if file.endswith(".gz"):
file = file[:-3]
elif file.endswith(".bz2"):
file = file[:-4]
elif file.endswith(".Z"):
file = file[:-2]
file = os.path.basename(file)
counts[int(retval)] += 1
total = sum(counts)
for (i, count) in enumerate(counts):
print("%d = %s: %5d %2.2f%%" % (i, legends[i], count, (count * 1.0)*100/total))
good = counts[0]
bad = sum(counts[1:7])
print("Total: %d Errors: %d Warnings: %d" % (total, bad, warnings))
print("Patched: %d (%2.2f%%)" % (patched, patched*100/float(total)))
print("With patches: %d (%2.2f%%)" % (good, good*100/float(total)))
print("Without patches: %d (%2.2f%%)" % (good-patched, (good-patched)*100/float(total)))
def errorclean(error_only, pattern):
if pattern:
pattern = re.compile(pattern)
pagename = re.compile(r"! (.*)=([0-9]+)")
while 1:
header = sys.stdin.readline()
if not header:
break
# Look for a log leader
m = pagename.search(header)
if not m:
continue
subject = m.group(1)
status = int(m.group(2))
# Collect following error messages up to a blank line
trailer = ''
while 1:
line = sys.stdin.readline()
trailer += line
if not line or not line.strip():
break
if pattern:
# Emit by pattern
if pattern.search(trailer):
sys.stdout.write(subject+"\n")
else:
# Emit some of them by status
def matches(s): return trailer.find(s) > -1
if status == 0 and not matches("warning") and not matches("FAILED") and not matches("offset") and not matches("Reversed"):
continue
if status == 1 and (matches("page is empty") or matches("page has no text")):
continue
if status in (2, 7):
continue
# Otherwise, emit
if error_only:
print(subject)
else:
sys.stdout.write(header + trailer)
def patchman(stem="foobar"):
"Make a patch against the last page lifted."
if not os.path.exists(stem + ".man"):
sys.stderr.write("manlifter: no trial page waiting.\n")
raise SystemExit(1)
if not os.path.exists(stem + ".loc"):
sys.stderr.write("manlifter: no saved page location.\n")
raise SystemExit(1)
# Retrieve the location of the last page
lfp = open(stem + ".loc")
withsect = lfp.read()
lfp.close()
# Fail if patch already exists
patch = os.path.join(patchdir, withsect + ".patch")
if os.path.exists(patch):
sys.stderr.write("manlifter: patch for %s already exists.\n" % withsect)
raise SystemExit(1)
# Make copies for patching and do it
trialpage = stem + ".man"
unpatched = withsect + "-unpatched"
try:
os.system("cp %s %s" % (trialpage, unpatched))
os.system("cp %s %s" % (trialpage, withsect))
if os.system(os.getenv("EDITOR") + " " + withsect) == 0:
os.system("diff -u %s %s >%s" % (unpatched, withsect, patch))
finally:
os.system("rm -f %s %s %s" % (withsect, withsect + "~", unpatched))
citereftemplate = '''
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:param name="citerefentry.link" select="1"/>
<xsl:template name="generate.citerefentry.link">
<xsl:text>%s</xsl:text>
<xsl:text>/man</xsl:text>
<xsl:value-of select="manvolnum"/>
<xsl:text>/</xsl:text>
<xsl:value-of select="refentrytitle"/>
<xsl:text>.html</xsl:text>
</xsl:template>
</xsl:stylesheet>
'''
def doclifter_driver(options, arguments):
"Lift old markup to new."
global mandir, makehtml, outdir, xslfragment, patchdir, makepatch, excluded_files
filelist = []
sections = []
callopts = ""
patchlift = False
makehtml = False
errorfilter = False
quiet = False
fval = None
makepatch = False
profiling = False
excluded_files = []
for (switch, val) in options:
if (switch == '-d'):
callopts += " -d " + val
elif (switch == '-e'):
errorfilter = True
elif (switch == '-f'): # Translate files in the specified list
fval = val
elif (switch == '-h'):
makehtml = True
elif (switch == '-I'): # Specify the root of the manual hierarchy
mandir = val
elif (switch == '-m'): # Make a patch from the last fetched page
makepatch = True
elif (switch == '-M'): # Make a patch with specified page
patchlift = True
elif (switch == '-p'): # Specify patch directory
patchdir = os.path.abspath(val)
elif (switch == '-P'):
profiling = True
elif (switch in ("-q", '-v', '-w')): # Set verbosity level
quiet = True
callopts += " " + switch
elif (switch == '-s'): # Specify search list of sections
sections.append(val)
elif (switch == '-S'): # Generate statistics from log on stdin
statistics()
sys.exit(0)
elif (switch == '-X'):
excluded_files = open(val).read().split()
if not sections:
sections = ["1", "2", "3", "4", "5", "6", "7", "8"]
if not outdir:
if not arguments:
outdir = 'xmlman'
else:
outdir = '.'
# Clean/create the output directory
if not arguments:
if not os.path.exists(outdir):
os.mkdir(outdir)
# Create XSL fragment for making refentries into links
xslfragment = os.path.abspath(os.path.join(outdir, "citerefentry.xsl"))
fp = open(xslfragment, "w")
fp.write(citereftemplate % outdir)
fp.close()
try:
# Process args, if present
if arguments:
found = False
for file in arguments:
for section in sections:
manpage = manfile(section, file)
print("Trying", manpage)
if os.path.exists(manpage):
(status, patched, output) = singlerun(manpage, callopts, "foobar", batchmode=False)
print(output)
found = True
break
if patchlift:
patchman()
if not found:
print("Not found.")
elif makepatch:
patchman()
elif errorfilter:
errorclean(quiet, fval)
elif fval:
fp = open(fval)
filelist = [x.rstrip() for x in fp.readlines()]
fp.close()
massrun(filelist, callopts, profiling)
else:
global processed
processed = set([])
if os.path.exists(patchdir):
processed = set([x.replace(".patch", "").replace(".correction", "") for x in os.listdir(patchdir)])
massrun(sectionfiles(sections), callopts, profiling)
if processed:
print("%% %d patches not used:" % len(processed))
for file in processed:
print(file)
finally:
pass
#os.remove(xslfragment)
# Now, rebuild the index page
if makehtml:
fp = open(os.path.join(outdir, "index.html"), "w")
genindex(fp)
fp.close()
if __name__ == "__main__":
# Find a copy of doclifter
for pathdir in ["."] + os.environ["PATH"].split(":"):
where = os.path.join(pathdir, "doclifter")
if os.path.exists(where):
break
else:
sys.stderr.write("manlifter: can't find doclifter!\n")
sys.exit(1)
# Gather options
(options, arguments) = getopt.getopt(sys.argv[1:], "d:ef:hI:mMp:Pqs:SvwX:")
doclifter_driver(options, arguments)
# End