Commit upstream/2.20 - doclifter

Import Upstream version 2.20 Boyuan Yang 2 years ago

21 changed file(s) with 4287 addition(s) and 2807 deletion(s). Raw diff Collapse all Expand all

-0

.gitignore less more

	0	*.html
	1	*.1
	2	*.tar.gz
	3	xmlman
	4	prepatch
	5	SHIPPER.*
	6

-0

.shipper less more

extralines = "<p>You can browse a report on <a href='bugs.html'>manual-page bugs found by doclifter</a>.</p>\n"

-1

Makefile less more

37	37	doclifter-$(VERSION).md5: doclifter-$(VERSION).tar.gz
38	38	@md5sum doclifter-$(VERSION).tar.gz >doclifter-$(VERSION).md5
39	39
	40	# Note: This will show a souriious diff id pic2plot is not installed.
40	41	check:
41	42	@cd tests >/dev/null; make --quiet
42	43

63	64	release: doclifter-$(VERSION).tar.gz doclifter-$(VERSION).md5 doclifter.html manlifter.html
64	65	shipper version=$(VERSION) \| sh -e -x
65	66
66		htmlclean: rm *.html
	67	htmlclean:
	68	rm *.html
67	69	refresh: htmlclean doclifter.html manlifter.html
68	70	shipper -N -w version=$(VERSION) \| sh -e -x
69	71

+323

-0

NEWS less more

	0	= doclifter history =
	1
	2	2.20: 2021-09-20::
	3	Handle \[en] in NAME sections.
	4	Use modern GCC error-line tags.
	5	Handle groff hex escapes like \\*[u92DC].
	6	Ubuntu has abolished /usr/bin/python, change shebang to python3.
	7
	8	2.19: 2019-03-20::
	9	Handle .Bf/.Ef in mdoc.
	10
	11	2.18: 2018-06-12::
	12	.in +4/.EX...EE/.in is now translated structurally.
	13	Cope gracefully with some idiosyncracies in OpenSSL library pages.
	14	Recognize new-style pod2man header.
	15	Ignore .Dd $Mdocdate$. Automatically lift 2-digit dates to 4 digits.
	16	Mwww fix to .MTO generation; also, ignore .LINKSTYLE.
	17	In mm, interpret .DS/.DE as informal figure and .FG as a caption element.
	18	Process mdoc .Lk request.
	19
	20	2.17: 2016-03-08::
	21	Add role mapping for bold highlights.
	22	Fix list syntax not being terminated by PP.
	23	Fix a bug that caused pages containing PIC diagrams to get clobbered.
	24	Fix a bug introduced in 2.16 that caused spurious failures under Python 2.
	25	Error messages for command and function syntax parse failures have improved.
	26
	27	2.16: 2016-02-25::
	28	Code now runs under either Python 2 or Python 3.
	29	Possible input encodings can be specified with the -i option.
	30	Change default output encoding from ISO-8859-1 to UTF-8.
	31	Handle .de co and .de au in NAME sections; the groff suite needs this.
	32
	33	2.15: 2014-06-03::
	34	The usual minor improvements for strange edge cases.
	35	Work around bugginess of --postvalid in recent xmllint versions.
	36	Cleanup for new version of pylint.
	37
	38	2.14: 2014-03-18::
	39	BSD port fix.
	40
	41	2.13: 2013-09-27::
	42	Simplify and improve ntroff expression evaluation.
	43
	44	2.12: 2013-06-17::
	45	New logic prevents spurious warnings from .in +N just before .nf.
	46	Many more instances of .ta are now automatically handled.
	47	Multi-file compilation was broken, is now repaired.
	48
	49	2.11: 2013-06-01::
	50	W3C moved a math DTD; cope.
	51	Improved .Bl/.El handling and updated canned strings in mdoc.
	52	Accept \(hy in name sections.
	53	Handle &numsp;, inadvertently omitted from DocBook v4.
	54	Added -V for version option.
	55
	56	2.10: 2013-03-17::
	57	Preserve trailing comments after table rows (example: matherr(3)).
	58	Add support for some previously missing groff extension glyphs.
	59	Improved handling of .Bd/.Be in mdoc.
	60
	61	2.9: 2012-07-30::
	62	Handle foojzs pages better. Interpret some cases of .rj.
	63	Recognize "Feature Test" as a function synopsis ender.
	64	Handle m, r and d troff conditionals.
	65	Process .ti with positive indent into <blockquote> around the following line.
	66	Support all mdoc special-character strings.
	67	Improved recognition of program listings.
	68	Fix brown-paper-bag bug in processing of mdoc
	69
	70	2.8: 2012-06-24::
	71	Fix a bug in command-synopsis parsing pointed out by Tom Browder
	72	Lifts 97% of 11029 pages in a full Ubuntu Precise Pangolin release.
	73
	74	2.7: 2011-08-23::
	75	Improvement for lynxprep handling by Jon Vyse.
	76
	77	2.6: 2010-11-26::
	78	Clean up glitches revealed by pychecker. Fix buggy interpretation of ms .AI
	79	macro. Map TBL "box" attribute to Docbook frame="border".
	80
	81	2.5: 2010-10-19::
	82	Handle groff \m color extension. Deal gracefully with manpages generated by
	83	reStructuredText. Cope with groff-style \F font escapes better.
	84	Partial interpretation of troff \h.
	85
	86	2.4: 2010-07-22::
	87	eqn markup is now handled if the eqn -TMathml switch produces results.
	88	Bell Labs or Berkeley meaning of .P1 is dispatched to depending on
	89	whether .P2 is present. Added -w option for strict portability checking.
	90	Fedora bug 220736 fixed. All troff glyphs are now mapped (added
	91	bracket-pile characters, yogh, hooked-o, and underdot). You are now
	92	warned of sequences that look like glyphs but can't be mapped.
	93	Table handling for mdoc pages has been much improved. Tests for
	94	requests that can't be turned into structure are stricter.
	95	Appropriate cases of \o are now translated into Latin-1 and Latin-2::
	96	letters with accents. Inline ad-hoc tables made with .ta and
	97	literal tabs are now lifted. Groff extended escapes $* and $@ are
	98	now handled. Speed optimizations so it's about 30% faster, and a
	99	profiling switch on manlifter. Rudimentary DocBook V5 translation, but
	100	inclusions and character entities are iffy and untested.
	101	Lifts 94% of 11863 pages in a full Ubuntu Lucid Lynx install.
	102
	103	2.3: 2006-12-25::
	104	Work around a bug in db2man.xsl. Implement Markus Hoenicka's
	105	requested behavior for multiple-file conversions. Implement
	106	translation of groff extended .cc and .c2 requests. Ignore
	107	the .TA macro that occurs duplicatively with .ta in X.org
	108	manual pages. Cope with unresolved .Sx refererences in mdoc.
	109	Handle .Ex and .Ee. Cope with X consortium macro preamble better.
	110	.RS/.RE is now fully handled, no more spurious warnings.
	111
	112	2.2: 2005-01-15::
	113	Have manlifter create subdirectories and the xslfragment
	114	only in batch mode. Use current list indent on block start/end.
	115
	116	2.1: 2005-01-14::
	117	Interrupt handlers are refactored so manlifter can be aborted with
	118	a single ^C; as a result, exit values 4 and 5 have swapped places.
	119	In manlifter, don't remove the result file unless we're in batchmode.
	120	Lifts 96% of 11121 pages in a full Fedora Core 3 install.
	121
	122	2.0: 2004-12-24::
	123	Added manlifter to the distribution. doclifter no longer strips off file
	124	extensions before appending .xml. Major improvement in parsing of
	125	displays; C function prototypes are now recognized in them.
	126
	127	1.15: 2004-11-20::
	128	Fix logic for flushing mdoc namediv. Handle tables within mdoc
	129	lists better. Strip out some pod2man-generated cliches.
	130
	131	1.14: 2004-09-02
	132	Added -e option to set encoding in the output XML. Documented
	133	required file extensions for mm, me, ms. Now lift some trivial uses
	134	of eqn(1) markup. Better handling of mm header markup. Interpret
	135	the X source distribution's local macros as a secondary markup.
	136	Nuke &hairsp; it's documented but apparently not actually defined.
	137
	138	1.13: 2004-08-13::
	139	Manual date now goes in refentryinfo, as Steve Cheng suggested.
	140	Restored correct parsing of multicommand synopses.
	141
	142	1.12: 2004-07-27::
	143	Implemented handling of mdoc .Brq macro. Code no longer chokes on
	144	multiple Synopsis headers.
	145
	146	1.11: 2004-07-26::
	147	Speed optimizations. Improved pod2man detection. Close <anchor/> properly.
	148	.UN before .SH or .SS sets the XML ID of the generated section.
	149	Boldfaced lines immediately before tables are interpreted as titles. .UR now
	150	generates <link> for local links. Improved .RE handling that fixes
	151	a couple of edge cases. Multiline table entries are now interpreted as full
	152	blocks, so commands work normally there.
	153
	154	1.10: 2004-07-06::
	155	Enhance to handle lynx dump pages. Fix .Fa interpretation.
	156	Gets 96% of 10862 Fedora Core 2 pages.
	157
	158	1.9: 2004-06-01::
	159	John Franklin's support for the Vt macro in mdoc. Tuning for Psyco.
	160
	161	1.8: 2004-03-01::
	162	Avoid choking on malformed tables in Qt pages. Fix minor bug in handling
	163	of nested .if/.ie requests. Process Fa macro correctly.
	164
	165	1.7: 2004-02-17::
	166	Handle .Pa tags in synopses better. This version lifts 96% of 10316::
	167	man pages in a full Fedora Core 1 installation.
	168
	169	1.6: 2004-01-02::
	170	Simpler, better entity translation logic; the -s and -x options are gone.
	171
	172	1.5: 2003-12-29::
	173	Fixes to RPM packaging.
	174
	175	1.4: 2003-12-26::
	176	Handle .TQ reduction. Translate attempts to fake up double quotes
	177	in text with `` and ''. Catch a few more .RS/.RE cases.
	178
	179	1.3: 2003-12-15::
	180	Process .RS/.RE tags to generate list nesting. As a side effect,
	181	this change fixes bad interactions between .ig and .TP. Fix a bug in
	182	gathering hints from function prologs. Use <varname>
	183	rather than <symbol> for variables. Work around a common bug in ISC
	184	man pages. Evaluate one-line .el macros properly.
	185
	186	1.2: 2003-12-08::
	187	SGML-generation support removed; the -s and -x options now
	188	control whether troff special characters are translated to the
	189	ISO entity set or the XHTML entity set.
	190	Corrected a bug in processing of synopses with multiple commands.
	191	Handle \f[012434] font changes in synopses correctly. -D option
	192	supports posting hints at startup time. Better recognition of
	193	filenames and commands in running text. Error return values
	194	now convey more information.
	195
	196	1.1: 2003-12-04::
	197	Fixed a bug in conditional evaluation that twadmin(8) tickled.
	198	Better detection of pure inclusions. Better blank-section
	199	elimination. Kleene star recognized as ... synonym in command
	200	synopses. Correct some bugs in semantic-hint collection.
	201	Limited internationalization -- recognize "NAME" in a couple
	202	of different languages. Recognize Qt manual pages and use their
	203	conventions. Better lifting of mandoc-generated pages. Translate
	204	groff-style \[...] escapes, flag unknown ones. Can now parse
	205	K&R style function prototypes as well as ANSI ones. This version
	206	lifts 96% of 9829 manual pages in a full Red Hat 9 installation
	207	with Fedora updates to validated XML-DocBook.
	208
	209	1.0.6: 2003-11-20::
	210	Lots of changes made so the XML output will pass validation.
	211	Appropriately wrap <citerefentry> sequences generated from SEE ALSO.
	212	Clean up generated <sbrk/> tags when we don't find a function or
	213	command synopsis. Push back folded highlights so paragraph generation
	214	doesn't get screwed up. Don't generate invalid class attribute for
	215	<programlisting> and <symbol>. The .RS command no longer ends a
	216	.IP or .TP entry. Correct .Ql so it doesn't generate spurious
	217	line breaks. Fix a bug in <group> syntax processing. Declare <envar>
	218	<constant>, and <errorcode> in-line tags (avoids fooups in processing
	219	mdoc). Fix lexer bug that dropped a character after triple-quote in tokens.
	220	Evaluate groff-style \\n[...] register escapes, \\n(.$., \w, and .g.
	221	Generate IDs correctly even when section titles are in CJK. Accept
	222	options starting with +. The mdoc interpreter can now deal with an
	223	out-of-order Synopsis section. This version lifts 94% of 9829 manual
	224	pages in a full Red Hat 9 installation with Fedora updates.
	225
	226	1.0.5: 2003-10-21::
	227	Translate Version 8 .L macro and friends. Fix some minor markup
	228	problems in doclifter.xml.
	229
	230	1.0.4: 2003-03-18::
	231	Improved synopsis line detection. Added Berkely Bsx. Ox, Nx macros.
	232	Implemented extended groff ab, als, nop and return requests. Some
	233	effort is now made to identify markup that refers to section headers
	234	and lift it to link tags (by Aaron Hawley). Corrected erroneous
	235	handling of string quotes around request arguments. Implemented
	236	mdoc .Ex and .Rv macros. Better handling of \d, \u, and \v troff
	237	requests.
	238
	239	1.0.3: 2003-02-14::
	240	Enable translation of PIC diagrams using pic2plot. Lift highlighted
	241	.*_t and errno appropriately. Implemented .fam and \F groff
	242	extensions. Improved synopsis line detection. Interpret DS/DE in
	243	manual pages (it's illegal but unambiguous). Work around common
	244	error of putting an opening ' at the left margin. This version lifts
	245	96% of 6705 manual pages in a Red Hat 8.0 install.
	246
	247	1.0.2: 2003-02-13::
	248	Corrected a bug in processing of the .SM highlight on manual pages.
	249	This affects other font changers with two-character names, notably CW.
	250
	251	1.0.1: 2002-09-17::
	252	Prevent some false matches on mwww macros. Don't generate
	253	entity inclusion files into the internal set; Tim Waugh says
	254	it's not necessary and indeed it seems not to be when I'm using xmlto.
	255
	256	1.0.0: 2002-08-17::
	257	Better firewalling against unbalanced font changes -- unbalanced
	258	markup can no longer break the translation. Drastically
	259	improved parsing of function prototypes. Support for mwww macros.
	260	Added groff-1.18's euro and micro signs. Also now interpreting
	261	various historical fossils from Ultrix and elsewhere that show up
	262	on Linux manual pages. This version lifts 95% of the 5548 man pages
	263	in a full Red Hat 7.3 workstation install.
	264
	265	0.99: 2002-07-30::
	266	C declaration parsing for sections 2 and 3 by Pradeep Padala.
	267	Improvements in command-synopsis parsing. Applied Michael
	268	Smith's XML-compliance patch. Default changed to XML to
	269	go with 7.3 toolchain. Added -s option. This version lifts
	270	97% of 4253 man pages in a full Red Hat 7.3 install.
	271
	272	0.95: 2002-07-15::
	273	Point release for Pradeep Padala.
	274	Added Windows port fix. Better command marking from synopsis
	275	sections. Oops, allow tildes in URLs. We can handle multiple
	276	stacked .TP entries now. Implement groff mso, ignore ftr.
	277	Crude, non-semantic lifting of function synopses.
	278
	279	0.9: 2001-11-09::
	280	Add mm support. Resolve Latin 1 and Latin 2 entities correctly.
	281	Don't let line numbering be confused by saved sections. Handle
	282	non-syntactic [] in optional filename extensions. Improved
	283	generation of included entities.
	284
	285	0.8: 2001-11-05::
	286	Catch and foil attempts to rename immutable macros. Better
	287	handling of unbalanced highlights. Multiple name lines are
	288	now passed through with the first one parsed. 96% success on
	289	sections 1 and 8 of a full Red Hat install.
	290
	291	0.7: 2001-10-31::
	292	Multiple vertically-stacked hanging tags are now translated into
	293	Synopsis sections. Mdoc bibliography macros are supported.
	294	Paragraphed text in Synopsis sections is now handled. Tcl/Tk
	295	extension macros are processed. 92% success on section 1 of a
	296	full Red Hat install.
	297
	298	0.6: 2001-10-18::
	299	Better handling of weird highlight and paragraphing combinations.
	300
	301	0.5: 2001-10-02::
	302	Support for Berkeley mdoc.
	303
	304	0.4: 2001-09-05::
	305	Don't rely on compilerlike.py being available.
	306
	307	0.3: 2001-09-04::
	308	More steps towards mandoc interpretation. Checkpoint release for
	309	Jorge Godoy and Jeffrey Franks.
	310
	311	0.2: 2001-08-31::
	312	Extra arguments of .TH are now passed through. \c is now handled
	313	properly rather than just being nuked. Now formats a dozen or
	314	so more problem pages. Magic inclusion semantics and -I. Full
	315	support for extended groff_char(7) characters. Implemented .tr.
	316
	317	0.1: 2001-08-27::
	318	Initial build.
	319
	320	0.0: 2001-08-16::
	321	First RCS commit. Project launched
	322

+49

-47

PATCHES less more

49	49	N Extraneous . at start of line.
50	50	O Command-line options described are not actually implemented.
51	51	P Removed unnecessary \c that confused the doclifter parser.
52		Q Missing Description header.
	52	Q .UR macro needs trailing .UE.
53	53	R .ce markup can't be structurally translated, and is likely
54	54	to cause rendering flaws in generated HTML.
55	55	S DEPRECATED: in function syntax cannot be translated. Also, the

129	129	manual page with the indicated corrections.
130	130	z Garbled comment syntax.
131	131	%%
132		b\|acl.5 \|I \|https://savannah.nongnu.org/bugs/index.php?39096
133		s\|afmtodit.1 \|k \|bug-groff@gnu.org
	132	y\|acl.5 \| \|
	133	y\|afmtodit.1 \| \|bug-groff@gnu.org
134	134	b\|american.5,english.5 \| \|geoff@cs.hmc.edu
135	135	3n\|analog.1 \|CZ \|analog-author@lists.meer.net
136		2n\|AnyEvent::FAQ.3pm \|W \|Marc Lehmann <schmorp@schmorp.de>
	136	y\|AnyEvent::FAQ.3pm \| \|Marc Lehmann <schmorp@schmorp.de>
137	137	1n\|audispd.8 \|* \|sgrubb@redhat.com
138		1n\|autosp.1 \|Q \|Bob Tennent <rdt@cs.queensu.ca>
	138	y\|autosp.1 \| \|Bob Tennent <rdt@cs.queensu.ca>
139	139	2n\|B::Hooks::EndOfScope::PP.3pm,B::Hooks::EndOfScope::XS.3pm\|W \|Florian Ragwitz <rafl@debian.org>
140	140	2n\|bash.1 \|L \|chet.ramey@case.edu, bug-bash@gnu.org
141	141	3n\|btcflash.8 \|J \|Daniel Baumann <daniel@debian.org>

143	143	p\|bzfs.6 \|o \|https://github.com/BZFlag-Dev/bzflag/pull/149
144	144	3n\|bzr.1,bzr.bzr.1 \|JX \|bazaar@lists.canonical.com
145	145	b\|calc_tickadj.1 \|C \|
146		p\|c_rehash.1ssl,openssl-c_rehash.1ssl,openssl-rehash.1ssl,rehash.1ssl \|W \|https://github.com/openssl/openssl/pull/6267
	146	y\|c_rehash.1ssl,openssl-c_rehash.1ssl,openssl-rehash.1ssl,rehash.1ssl \| \|https://github.com/openssl/openssl/pull/6267
147	147	1n\|calendar.1 \|X \|Debian Bsdmainutils Team <pkg-bsdmainutils@teams.debian.net>
148	148	3n\|cdparanoia.1 \|L \|paranoia-dev@xiph.org
149	149	b\|cgroups.7,cgroup_namespaces.7\|v \|https://github.com/mkerrisk/man-pages/pull/10

151	151	b\|chmoddic.1 \|BC \|Canna@nec.co.jp
152	152	3n\|chroot.2 \|EL \|bug-coreutils@gnu.org
153	153	2n\|claws-mail.1 \|L \|paul@claws-mail.org
154		p\|cmake.1 \|B \|https://gitlab.kitware.com/cmake/cmake/issues/17917
	154	y\|cmake.1 \| \|
155	155	1n\|CPAN::Meta::History::Meta_1_2.3pm,CPAN::Meta::History::Meta_1_3.3pm,CPAN::Meta::History::Meta_1_4.3pm\|t \|Ken Williams <kwilliams@cpan.org>
156	156	b\|co.1,ident.1 \|o \|rcs-bugs@gnu.org
157		p\|cpio.1 \|l \|bug-cpio@gnu.org
	157	y\|cpio.1 \| \|bug-cpio@gnu.org
158	158	1n\|codepage.1 \|C \|kbd@lists.altlinux.org
159	159	3n\|compose.1,edit.1 \|*y \|mime-support@plessy.org
160	160	1n\|CURLOPT_PROXY_CAPATH.3\|XY \|Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>

163	163	p\|DECLARE_LHASH_OF.3ssl,OPENSSL_LH_COMPFUNC.3ssl,OPENSSL_LH_HASHFUNC.3ssl,OPENSSL_LH_DOALL_FUNC.3ssl,LHASH_DOALL_ARG_FN_TYPE.3ssl,IMPLEMENT_LHASH_HASH_FN.3ssl,IMPLEMENT_LHASH_COMP_FN.3ssl,lh_TYPE_new.3ssl,lh_TYPE_free.3ssl,lh_TYPE_insert.3ssl,lh_TYPE_delete.3ssl,lh_TYPE_retrieve.3ssl,lh_TYPE_doall.3ssl,lh_TYPE_doall_arg.3ssl,lh_TYPE_error.3ssl \| \|kurt@openssl.org
164	164	s\|DEFINE_STACK_OF.3ssl,DEFINE_SPECIAL_STACK_OF.3ssl,DEFINE_STACK_OF_CONST.3ssl,DEFINE_SPECIAL_STACK_OF_CONST.3ssl,OPENSSL_sk_deep_copy.3ssl,OPENSSL_sk_delete.3ssl,OPENSSL_sk_delete_ptr.3ssl,OPENSSL_sk_dup.3ssl,OPENSSL_sk_find.3ssl,OPENSSL_sk_find_ex.3ssl,OPENSSL_sk_free.3ssl,OPENSSL_sk_insert.3ssl,OPENSSL_sk_is_sorted.3ssl,OPENSSL_sk_new.3ssl,OPENSSL_sk_new_null.3ssl,OPENSSL_sk_num.3ssl,OPENSSL_sk_pop.3ssl,OPENSSL_sk_pop_free.3ssl,OPENSSL_sk_push.3ssl,OPENSSL_sk_set.3ssl,OPENSSL_sk_set_cmp_func.3ssl,OPENSSL_sk_shift.3ssl,OPENSSL_sk_sort.3ssl,OPENSSL_sk_unshift.3ssl,OPENSSL_sk_value.3ssl,OPENSSL_sk.3ssl,zero.3ssl,sk_TYPE_num.3ssl,sk_TYPE_value.3ssl,sk_TYPE_new.3ssl,sk_TYPE_new_null.3ssl,sk_TYPE_free.3ssl,sk_TYPE_zero.3ssl,sk_TYPE_delete.3ssl,sk_TYPE_delete_ptr.3ssl,sk_TYPE_push.3ssl,sk_TYPE_unshift.3ssl,sk_TYPE_pop.3ssl,sk_TYPE_shift.3ssl,sk_TYPE_pop_free.3ssl,sk_TYPE_insert.3ssl,sk_TYPE_set.3ssl,sk_TYPE_find.3ssl,sk_TYPE_find_ex.3ssl,sk_TYPE_sort.3ssl,sk_TYPE_is_sorted.3ssl,sk_TYPE_dup.3ssl,sk_TYPE_deep_copy.3ssl,sk_TYPE_set_cmp_func.3ssl\|n \|kurt@openssl.org
165	165	b\|Parse::DebControl::Error.3pm\|Wy \|
166		1n\|devlink.8 \|C \|netdev@vger.kernel.orgq
	166	y\|devlink.8 \|C \|netdev@vger.kernel.org
	167	n\|dfu-programmer.1 \|Q \|Weston Schmidt <weston_schmidt@alumni.purdue.edu>
167	168	s\|devnag.1 \|J \|tex-live@tug.org
168	169	s\|dh_install.1 \|iy \|
169	170	p\|dhclient.8 \|U \|dhcp-bugs@isc.org
170		3n\|dkms.8 \|XJ \|dkms-devel@dell.com
	171	y\|dkms.8 \| \|dkms-devel@dell.com
171	172	2n\|dmcs.1,mcs.1,gmcs.1 \|LA \|mono-docs-list@lists.ximian.com
172	173	1n\|dmstats.8 \|C \|Bryn M. Reeves <bmr@redhat.com>
173	174	b\|dosbox.1 \|L \|
174		1n\|driverless.1 \|L \|cups-devel@cups.org
	175	y\|driverless.1 \| \|
175	176	b\|dump-acct.8 \|U \|https://savannah.gnu.org/bugs/index.php?54040
176	177	b\|dv2dt.1 \|C \|
177	178	4n\|dvipdf.1,font2c.1 \|R \|epm@easysw.com
178	179	2n\|edgepaint.1 \|W \|Yifan Hu <yifanhu@research.att.com>
179		p\|editres.1 \|I \|xorg-devel@lists.freedesktop.org
	180	y\|editres.1 \| \|xorg-devel@lists.freedesktop.org
180	181	4n\|e2fsck.8 \|o \|tytso@thunk.org
181	182	3n\|e2image.8 \|J \|tytso@thunk.org
182	183	4n\|efax.1 \|Jug \|edc@cce.com
183		s\|eqn.1,geqn.1 \| \|bug-groff@gnu.org
	184	y\|eqn.1,geqn.1 \| \|bug-groff@gnu.org
184	185	2n\|irb.1,irb2.5.1 \|a \|ruby-doc@ruby-lang.org
185		1n\|ethtool.8 \|P \|netdev@vger.kernel.org
	186	1n\|ethtool.8 \|L \|netdev@vger.kernel.org
186	187	p\|exiv2.1 \|L \|robin@clanmills.com
187	188	p\|extractres.1 \|R \|https://github.com/rrthomas/psutils/pull/4
188	189	3n\|f2py.1,f2py2.7.1 \|C \|f2py-users@cens.ioc.ee

193	194	2n\|fuzzyflakes.6x \|C \|Barry Dmytro <badcherry@mailc.net>
194	195	1n\|fwup_get_fw_type.3,fwup_get_fw_version.3,fwup_get_guid.3.gz,fwup_get_last_attempt_info.3,fwup_get_lowest_supported_version.3,fwup_get_ux_capsule_info.3,fwup_resource_iter_create.3,fwup_resource_iter_destroy.3,fwup_resource_iter_next.3,fwup_resource_iter_next.3,fwup_set_up_update.3,fwup_supported.3,libfwup.3,fwup_clear_status.3,fwup_get_guid.3,libfwup.h.3 \|IXc \|Peter Jones <pjones@redhat.com>
195	196	3n\|gacutil.1,cli-gacutil.1\|N \|mono-docs-list@lists.ximian.com
196		s\|gdiffmk.1 \|Wk \|bug-groff@gnu.org
	197	y\|gdiffmk.1 \|Wk \|bug-groff@gnu.org
197	198	3n\|genisoimage.1 \|oy \|Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
198	199	s\|getafm.1 \|R \|
199	200	1n\|getty.8,agetty.8 \|m \|Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
200	201	b\|gftodvi.1 \|I \|
201	202	p\|gource.1 \|C \|https://github.com/acaudwell/Gource/pull/155
202	203	3n\|gpm-types.7 \|JC \|gpm@lists.linux.it
203		p\|grn.1 \|J \|bug-groff@gnu.org
204		s\|groff.1 \| \|bug-groff@gnu.org
205		s\|groff_char.7 \| \|bug-groff@gnu.org
	204	y\|grn.1 \| \|bug-groff@gnu.org
	205	n\|groff.1 \|e \|bug-groff@gnu.org
	206	y\|groff_char.7 \| \|bug-groff@gnu.org
206	207	s\|groff_man.7 \| \|bug-groff@gnu.org
207		s\|groff_tmac.5 \| \|bug-groff@gnu.org
208		s\|groffer.1 \| \|bug-groff@gnu.org
209		s\|grog.1 \| \|bug-groff@gnu.org
210		s\|gropdf.1 \| \|bug-groff@gnu.org
211		s\|gtbl.1,tbl.1 \|* \|bug-groff@gnu.org
	208	n\|groff_tmac.5 \|e \|bug-groff@gnu.org
	209	y\|groffer.1 \| \|bug-groff@gnu.org
	210	y\|grog.1 \| \|bug-groff@gnu.org
	211	n\|gropdf.1 \|eP \|bug-groff@gnu.org
	212	y\|gtbl.1,tbl.1 \| \|bug-groff@gnu.org
212	213	b\|gthumb.1 \|L \|
213		b\|gvcolor.1 \|C \|https://gitlab.com/graphviz/graphviz/issues/1384
214		b\|gvpack.1 \|C \|https://gitlab.com/graphviz/graphviz/issues/1385
	214	y\|gvcolor.1 \| \|
	215	y\|gvpack.1 \| \|
215	216	1n\|hddtemp.8 \|L* \|hddtemp-dev@nongnu.org
216	217	8n\|hfsutils.1 \|HJ \|Robert Leslie <rob@mars.org>
217	218	b\|hosts_access.5,hosts.allow.5,hosts.deny.5,hosts_options.5\|I \|Wietse Venema <wietse@porcupine.org>

239	240	3n\|lam.7,LAM.7 \|L \|lam-devel@lam-mpi.org
240	241	3n\|lam-helpfile.5 \|I \|lam-devel@lam-mpi.org
241	242	b\|lastcomm.1 \|I \|https://savannah.gnu.org/bugs/index.php?39134
242		p\|lgftp.1 \|I \|lav@yars.free.net
	243	p\|lftp.1 \|I \|lav@yars.free.net
243	244	3n\|libcaca-authors.3caca\|W \|Sam Hocevar <sam@hocevar.net>
244	245	3n\|libcaca-canvas.3caca \|WJ \|Sam Hocevar <sam@hocevar.net>
245	246	3n\|libcaca-env.3caca \|WL \|Sam Hocevar <sam@hocevar.net>
246	247	3n\|libcaca-font.3caca \|WJ \|Sam Hocevar <sam@hocevar.net>
247	248	3n\|libcaca-ruby.3caca \|W \|Sam Hocevar <sam@hocevar.net>
248	249	3n\|libcaca-tutorial.3caca\|W \|Sam Hocevar <sam@hocevar.net>
249		p\|libinput.4 \|L \|xorg-devel@lists.freedesktop.org
	250	y\|libinput.4 \| \|xorg-devel@lists.freedesktop.org
250	251	p\|libpng.3 \|SJ \|png-mng-implement@lists.sourceforge.net
251	252	3n\|libtiff.3tiff \|I \|tiff@lists.maptools.org
252	253	1n\|linkicc.1,transicc.1 \|L \|Shiju p. Nair <shiju.p@gmail.com>, info@littlecm1s.com
253	254	1n\| List::Compare::Functional.3pm\|HY \|Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
254	255	3n\|list_audio_tracks.1 \|W \|Heiko Eissfeldt <heiko@colossus.escape.de>, debburn-devel@lists.alioth.debian.org
255		4n\|ln.1 \|j \|bug-coreutils@gnu.org
	256	y\|ln.1 \| \|bug-coreutils@gnu.org
256	257	1n\|loadkeys.1 \|U \|kbd@lists.altlinux.org
257	258	3n\|locate.findutils.1 \|U \|bug-findutils@gnu.org
258	259	1n\|logilab-pytest.1 \|Zw \|python-projects@lists.logilab.org
259	260	8n\|lpr.1 \|U \|papowell@lprng.com
260		1n\|lynx.1,www-browser.1 \|I \|lynx-dev@nongnu.org
	261	y\|lynx.1,www-browser.1 \|I \|lynx-dev@nongnu.org
261	262	1n\|mailx.1posix \|l \|mtk-manpages@gmx.net
262	263	3n\|makeindex.1 \|J \|beebe@math.utah.edu
263	264	p\|mathspic.1 \|JWt \|Dick Nickalls <dick@nickalls.org>

287	288	s\|ntp-wait.8,ntp-keygen.8,ntp.keys.5\|C \|
288	289	3n\|nvidia-settings.1 \|IxY \|ubuntu-devel-discuss@lists.ubuntu.com
289	290	3n\|nvidia-smi.1 \|IfY \|ubuntu-devel-discuss@lists.ubuntu.com
290		p\|objcopy.1,objdump.1,x86_64-linux-gnu-objcopy.1,x86_64-linux-gnu-objdump.1 \|U \|bug-binutils@gnu.org
	291	p\|objdump.1,x86_64-linux-gnu-objdump.1 \|U \|bug-binutils@gnu.org
291	292	3n\|ode.1 \|e \|bug-plotutils@gnu.org
292	293	3n\|omfonts.1 \|W \|Norbert Preining <preining@logic.at>
293	294	4n\|openvt.1,open.1 \|L \|aeb@cwi.nl

300	301	3n\|pbget.1,pbput.1,pbputs.1 \|W \|Dustin Kirkland <kirkland@ubuntu.com>
301	302	3n\|pbmtextps.1 \|C \|Bryan Henderson <bryanh@giraffe-data.com>
302	303	3n\|pcap-filter.7 \|I \|tcpdump-workers@lists.tcpdump.org
303		p\|pdfroff.1 \|X \|bug-groff@gnu.org
	304	y\|pdfroff.1 \| \|bug-groff@gnu.org
304	305	1n\|pdcp.1 \|J \|garlick@llnl.gov
305	306	1n\|pdsh.1,pdsh.bin.1 \|J \|garlick@llnl.gov
306	307	1n\|rpdcp.1 \|z \|garlick@llnl.gov

312	313	3n\|pnmtofiasco.1 \|e \|Bryan Henderson <bryanh@giraffe-data.com>
313	314	3n\|policytool.1 \|Wy \|openjdk@lists.launchpad.net
314	315	1n\|postqueue.1 \|C \|Wietse Venema <wietse@porcupine.org>
315		s\|preconv.1 \| \|bug-groff@gnu.org
	316	y\|preconv.1 \| \|bug-groff@gnu.org
316	317	2n\|prlimit.1 \|U \|Davidlohr Bueso <dave@gnu.org>
317	318	b\|proc.5,procfs.5 \|vL \|https://github.com/mkerrisk/man-pages/pull/10
318		b\|pstree.1,pstree.x11.1 \|C \|Craig Small <csmall@small.dropbear.id.au>
	319	y\|pstree.1,pstree.x11.1 \| \|Craig Small <csmall@small.dropbear.id.au>
319	320	b\|pstops.1 \|R \|
320	321	b\|ps2epsi.1 \|j \|
321	322	b\|ps2pdfwr.1 \|R \|
322		2n\|pylint.1 \|J \|code-quality@python.org
	323	y\|pylint.1 \|J \|code-quality@python.org
323	324	2n\|rake2.1.1 \|L \|ruby-doc@ruby-lang.org
324	325	b\|random.7 \|m \|mtk-manpages@gmx.net
325	326	b\|rcsfile.5 \|d \|rcs-bugs@gnu.org
326	327	1n\|rdma.8 \|C \|Leon Romanovsky <leonro@mellanox.com>
327		s\|refer.1 \| \|bug-groff@gnu.org
	328	y\|refer.1 \| \|bug-groff@gnu.org
328	329	3n\|regulatory.bin.5 \|w \|linux-wireless@vger.kernel.org
329	330	2n\|request-key.8 \|q \|David Howells <dhowells@redhat.com>
330	331	2n\|request-key.conf.5 \|q \|David Howells <dhowells@redhat.com>

335	336	3n\|rlwrap.1,readline-editor.1\|J \|Chet Ramey <chet.ramey@case.edu>
336	337	3n\|rmid.1 \|Wy \|openjdk@lists.launchpad.net
337	338	3n\|rmiregistry.1 \|Wy \|openjdk@lists.launchpad.net
338		s\|roff.7 \| \|bug-groff@gnu.org
	339	n\|roff.7 \|e \|bug-groff@gnu.org
339	340	b\|rotatelogs.8 \|L*< \|
340	341	p\|s3.4 \|I \|xorg-devel@lists.freedesktop.org
341		1n\|sane-lexmark.5 \|L \|https://alioth.debian.org/tracker/index.php?func=detail&aid=315955&group_id=30186&atid=410366
	342	b\|sane-lexmark.5 \|L \|https://alioth.debian.org/tracker/index.php?func=detail&aid=315955&group_id=30186&atid=410366
342	343	1n\|scapy.1 \|l \|Philippe Biondi <phil@secdev.org>
343		p\|screen.1 \|LI \|screen-devel@gnu.org
	344	y\|screen.1 \|LI \|screen-devel@gnu.org
344	345	b\|SDL_Init.3 \|L \|sdl@lists.libsdl.org
345	346	b\|SDL_CDPlayTracks.3 \|n \|docs@lists.libsdl.org
346		b\|seccomp.2 \|h \|https://github.com/mkerrisk/man-pages/pull/10
	347	y\|seccomp.2 \| \|
347	348	3n\|see.1,run-mailcap.1,print.1 \|C \|mime-support@plessy.org
348	349	2n\|semanage-user.8,semanage-boolean.8,semanage-module.8,semanage-permissive.8\|B \|Daniel Walsh <dwalsh@redhat.com>
349	350	2n\|semanage-fcontext.8 \|BU \|Daniel Walsh <dwalsh@redhat.com>
350	351	b\|semop.2,semtimedop.2 \|v \|https://github.com/mkerrisk/man-pages/pull/10
351		p\|setcap.8 \|C \|Andrew G. Morgan <morgan@kernel.org>
	352	y\|setcap.8 \| \|Andrew G. Morgan <morgan@kernel.org>
352	353	2n\|sg_xcopy.8 \|l \|Douglas Gilbert <dgilbert@interlog.com>
353	354	3n\|sgmlspl.1 \|L \|Ardo van Rangelrooij <ardo@debian.org>
354	355	b\|slapd.conf.5 \|LI \|OpenLDAP-devel@OpenLDAP.org

360	361	b\|ssh_config.5,sshd_config.5\|F \|
361	362	1n\|SSL_get_cipher_name.3ssl,SSL_get_current_cipher.3ssl,SSL_get_cipher_bits.3ssl, SSL_get_cipher_version.3ssl\|p \|kurt@openssl.org
362	363	1n\|suffixes.7 \|l \|Michael Kerrisk <mtk.manpages@gmail.com>
363		1n\|synctex.1 \|L \|tex-live@tug.org
	364	y\|synctex.1 \|L \|tex-live@tug.org
364	365	b\|sysfs.5 \|* \|Michael Kerrisk <mtk.manpages@gmail.com>
365	366	8n\|rb.1,rx.1,rz.1,sb.1,sx.1,sz.1\|e \|Uwe Ohse <uwe@ohse.de>
366	367	p\|tar.1 \|l \|bug-tar@gnu.org
367		1n\|tc-matchall.8,tc-sample.8\|h \|stephen@networkplumber.org
	368	1n\|tc-matchall.8 \|h \|stephen@networkplumber.org
	369	y\|tc-sample.8 \| \|stephen@networkplumber.org
368	370	p\|tc-cbq-details.8,tc-cbq.8,tc-mqprio.8,tc-prio.8,tc-htb.8\|B \|netdev@vger.kernel.org
369	371	1n\|tc-bpf.8 \|CL \|netdev@vger.kernel.org
370	372	1n\|tc-nat.8 \|C \|netdev@vger.kernel.org

374	376	1n\|tcpdump.8 \|l \|tcpdump-workers@lists.tcpdump.org
375	377	3n\|tek2plot.1 \|W \|bug-plotutils@gnu.org
376	378	3n\|test.1,[.1 \|CO \|bug-coreutils@gnu.org
377		1n\|thermal-conf.xml.5 \|L \|Colin King <colin.king@ubuntu.com>
	379	y\|thermal-conf.xml.5 \| \|Colin King <colin.king@ubuntu.com>
378	380	3n\|TIFFGetField.3tiff \|I \|tiff@lists.maptools.org
379	381	3n\|TIFFmemory.3tiff \|b \|tiff@lists.maptools.org
380	382	3n\|tnameserv.1 \|Wy \|openjdk@lists.launchpad.net
381		b\|tidy.1 \|m \|tidy-develop@lists.sourceforge.net,html-tidy@w3.org
	383	y\|tidy.1 \| \|tidy-develop@lists.sourceforge.net,html-tidy@w3.org
382	384	1n\|top.1 \|CY \|Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
383	385	8n\|tune2fs.8 \|C \|tytso@thunk.org
384		1n\|ubuntu-advantage.1,ua.1 \|L \|Ubuntu Desktop Team <ubuntu-desktop@lists.ubuntu.com>
	386	y\|ubuntu-advantage.1,ua.1 \| \|Ubuntu Desktop Team <ubuntu-desktop@lists.ubuntu.com>
385	387	2n\|unrar.1,unrar-nonfree.1\|C \|Petr Cech <cech@debian.org>
386	388	b\|upstart-events.7 \|I \|upstart-devel@lists.ubuntu.com
387	389	b\|usb-creator-gtk.8 \|W \|Roderick B. Greening <roderick.greening@gmail.com>

389	391	3n\|uuencode.1posix \|I \|Francesco Paolo Lovergine <frankie@debian.org>
390	392	2n\|winemaker.1 \|U \|wine-devel@winehq.org
391	393	1n\|vim-addon-manager.1,vam.1,vim-addons.1 \|V \|James Vega <jamessan@debian.org>
392		p\|xlogo.1 \|I \|xorg-devel@lists.freedesktop.org
	394	y\|xlogo.1 \| \|xorg-devel@lists.freedesktop.org
393	395	1n\|X509_SIG_getm.3ssl,X509_SIG_get0.3ssl\|p \|https://github.com/openssl/openssl/pull/6429
394	396	2n\|XML::LibXML::Pattern.3pm\|W \|perl-xml@listserv.ActiveState.com
395	397	2n\|XML::LibXML::Reader.3pm\|W \|perl-xml@listserv.ActiveState.com

397	399	2n\|XML::LibXML::XPathExpression.3pm\|W \|perl-xml@listserv.ActiveState.com
398	400	gA\|xmlto.1 \|I \|tim@cyberelk.net
399	401	3n\|Xserver.1 \|I \|xorg-devel@lists.freedesktop.org
400		4n\|xterm.1 \|LI \|xorg-devel@lists.freedesktop.org
	402	y\|xterm.1 \| \|xorg-devel@lists.freedesktop.org
401	403	b\|zic.8 \|o \|https://github.com/mkerrisk/man-pages/pull/10
402	404	1n\|zip.1 \|JC \|Info-ZIP-Dev@goatley.com
403	405	3n\|zipinfo.1 \|* \|newt@pobox.com

-1

README less more

10	10	table markup, PIC into SVG, and EQN into MathML (relying on pic2svg
11	11	and GNU eqn for the last two).
12	12
13		Install by doing, "make install". To install to a prefix other than
	13	Install by doing "make install". To install to a prefix other than
14	14	the default (/usr), set a PREFIX environment variable
15	15	e.g. "PREFIX=/usr/local make -e install"
16	16

-1

TODO less more

2	2	* Docbook 5 translation is incomplete; inclusions won't work,
3	3	entities are untested.
4	4
5		* doclifter doesn't lnow about groff compatibility mode; de and de1 are
	5	* doclifter doesn't know about groff compatibility mode; de and de1 are
6	6	treated as equivalent.
7	7
8	8	See the PATCHES file for other, more minor problems mainly due to bad markup.

+163

-0

buglist.py less more

	0	#!/usr/bin/env python3
	1	import string, os, sys, os.path
	2
	3	subdir = "prepatch"
	4
	5	distro = "Xubuntu 18.04 with some extras"
	6	future = "next Xubuntu release"
	7
	8	CATEGORY_COUNT = 8
	9
	10	class Buglist:
	11	def __init__(self, filename="PATCHES"):
	12	with open(filename) as file:
	13	self.codes = {}
	14	while True:
	15	line = file.readline()
	16	if line == "%%\n":
	17	break
	18	if line[0] == "#":
	19	continue
	20	if not line[0].isspace():
	21	key = line[0]
	22	self.codes[key] = ""
	23	line = line[1:]
	24	self.codes[key] += line.strip() + "\n"
	25
	26	self.maildict = {}
	27	self.lines = []
	28	self.fields = []
	29	self.typeseen = {}
	30	self.promised = 0
	31	self.counts = [0] * CATEGORY_COUNT
	32	self.patched = 0
	33	self.warnings = 0
	34	while True:
	35	line = file.readline()
	36	if not line:
	37	break
	38	self.lines.append(line)
	39	fields = line.split("\|")
	40	try:
	41	(status, pages, problems, mailto) = map(string.strip, fields)
	42	if 'p' in status:
	43	self.promised += len(pages.split(","))
	44	except:
	45	print line
	46	sys.exit(1)
	47	for c in problems:
	48	self.typeseen[c] = True
	49	self.fields.append(map(string.strip, fields))
	50	if mailto not in self.maildict:
	51	self.maildict[mailto] = []
	52	self.maildict[mailto].append((status, pages, problems))
	53	try:
	54	with open("full.log") as lf:
	55	warn_latch = False
	56	while True:
	57	line = lf.readline()
	58	if not line:
	59	break
	60	elif not line.strip():
	61	if warn_latch:
	62	self.warnings += 1
	63	continue
	64	if "warning -" in line:
	65	warn_latch = True
	66	if line[0] != '!':
	67	continue
	68	warn_latch = False
	69	line = line[2:]
	70	rcolon = line.rindex("=")
	71	retval = line[rcolon+1:].split()[0]
	72	if retval.endswith("*"):
	73	self.patched += 1
	74	retval = retval[:-1]
	75	self.counts[int(retval)] += 1
	76	except (OSError, IOError):
	77	sys.stderr.write("Conversion log is missing.\n")
	78
	79	def pagelist(self, include="", exclude=""):
	80	lst = []
	81	for (status, pages, problems, mailto) in self.fields:
	82	addit = not include
	83	for c in include:
	84	if c in status:
	85	addit = True
	86	break
	87	for c in exclude:
	88	if c in status:
	89	addit = False
	90	break
	91	if addit:
	92	lst += map(string.strip, pages.split(","))
	93	lst.sort()
	94	return lst
	95
	96	def filestem(x):
	97	if x.endswith(".patch"):
	98	return x[:-6]
	99	elif x.endswith(".correction"):
	100	return x[:-11]
	101	else:
	102	return x
	103
	104	def pagetofile(page):
	105	page = os.path.join(subdir, page)
	106	if os.path.exists(page + ".patch"):
	107	fp = open(page + ".patch")
	108	txt = fp.read()
	109	fp.close()
	110	# manlifter doesn't pick up corrections
	111	elif os.path.exists(page + ".correction"):
	112	fp = open(page + ".correction")
	113	txt = fp.read()
	114	fp.close()
	115	else:
	116	txt = None
	117	return txt
	118
	119	if __name__ == '__main__':
	120	import sets, getopt
	121	(options, arguments) = getopt.getopt(sys.argv[1:], "e:")
	122	error_type_filter = None
	123	for (opt, val) in options:
	124	if opt == '-e':
	125	error_type_filter = val
	126
	127	bugs = Buglist()
	128	if error_type_filter: # report on what entries contain specified code
	129	for (status, pages, problems, mailto) in bugs.fields:
	130	if error_type_filter in problems:
	131	print "\|".join((status, pages, problems, mailto))
	132	else:
	133	# Default action is to sanity-check the database
	134	files = sets.Set(map(lambda x: filestem(x), os.listdir(subdir)))
	135	pages = sets.Set(bugs.pagelist())
	136	unresolved = sets.Set(bugs.pagelist(exclude='y*'))
	137	resolved = sets.Set(bugs.pagelist(include='y'))
	138	counts = {}
	139	for page in pages:
	140	counts[page] = counts.get(page, 0) + 1
	141	duplicates = filter(lambda x: counts[x] > 1, pages)
	142	if duplicates:
	143	print "Duplicates:", duplicates
	144	print "%d uresolved patches, %d resolved, %d promised, %d total, %d files." %(len(unresolved), len(resolved), bugs.promised, len(pages), len(files))
	145	if files - pages:
	146	print "These files have no buglist entry:", files - pages
	147	if unresolved - files:
	148	print "These bugs have no filelist entry:", unresolved - files
	149	if files != unresolved:
	150	print "Leftovers:", " ".join(files - unresolved)
	151	nonbugs = []
	152	for c in bugs.codes.keys():
	153	if not c in bugs.typeseen:
	154	nonbugs.append(c)
	155	if nonbugs:
	156	print "These bug types no longer occur: ", ", ".join(nonbugs)
	157	available = map(lambda x: x, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
	158	for c in bugs.codes.keys():
	159	available.remove(c)
	160	if available:
	161	print "These bug keys are available: ", ", ".join(available)
	162

+353

-0

c_parse.py less more

	0	#!/usr/bin/env python3
	1
	2	class LineTokenizer:
	3	"Make a collection of lines available either as lines or tokens."
	4	def __init__(self, lines):
	5	self.lines = lines
	6	self.pretokenizer = None
	7	self.token_index = 0
	8	self.tokens = []
	9	self.tokenize()
	10	def popline(self):
	11	"Grab the next line and make it the token buffer."
	12	if not self.lines:
	13	#sys.stderr.write("Popline returns None\n")
	14	return None
	15	else:
	16	#sys.stderr.write("Popline starts with: %s\n" % self)
	17	res = self.lines[0]
	18	self.lines.pop(0)
	19	self.tokens = []
	20	if self.lines:
	21	self.tokenize(self.pretokenizer)
	22	#sys.stderr.write("In popline, I return %s: %s\n" % (`res`, self))
	23	return res
	24	def pushline(self, line):
	25	"Replace the token buffer with the current line."
	26	self.lines = [self.line] + self.lines
	27	self.tokenize(self.pretokenizer)
	28	#sys.stderr.write("Pushline leaves: %s\n" % self)
	29	def peekline(self):
	30	"Return the token buffer"
	31	if not self.lines:
	32	return None
	33	else:
	34	return self.lines[0]
	35	def tokenize(self, new_pretokenizer=None):
	36	"Split a line on tabs and whitespaces, but not linefeeds."
	37	self.pretokenizer = new_pretokenizer
	38	if self.lines:
	39	if self.pretokenizer:
	40	line = self.pretokenizer(self.lines[0])
	41	else:
	42	line = self.lines[0]
	43	self.tokens = line.split()
	44	#sys.stderr.write("In tokenize, I split: " + `self` + '\n')
	45	def restore_newlines(self):
	46	self.tokens.append("\n")
	47	for i in range(len(self.lines)):
	48	self.lines[i] += "\n"
	49	def token_pop(self, count=1):
	50	"Get a token."
	51	if not self.lines:
	52	return None
	53	#sys.stderr.write("In token_pop, I see: " + `self` + '\n')
	54	res = self.tokens[0]
	55	self.tokens = self.tokens[count:]
	56	if not self.tokens:
	57	if not self.lines:
	58	#sys.stderr.write("In token_pop, I return None: " + `self` + '\n')
	59	return None
	60	self.popline()
	61	self.token_index += 1
	62	#sys.stderr.write("In token_pop, I return: " + `self` + '\n')
	63	return res
	64	def token_peek(self):
	65	"Peek at the next token."
	66	if self.tokens:
	67	return self.tokens[0]
	68	else:
	69	return None # list empty means we're out of data
	70
	71	def token_push(self, tok):
	72	"Push back a token."
	73	self.tokens = [tok] + self.tokens
	74	# We do not alter the source line!
	75	self.token_index -= 1
	76	def __str__(self):
	77	"Display the state of the object."
	78	return "<tokens=%s, lines=%s>" % (self.tokens, self.lines)
	79	__repr__ = __str__
	80
	81	import exceptions
	82
	83	class CDeclarationError(exceptions.Exception):
	84	def __init__(self, message, retval=1):
	85	self.message = message
	86	self.retval = retval
	87
	88	class CDeclarationParser:
	89	"Parse a C declaration,"
	90	def __init__(self, io, handler, notifier=lambda x: None):
	91	self.io = io
	92	self.handler = handler
	93	self.notifier = notifier
	94	self.tokencount = 0
	95	self.state_stack = []
	96	self.construct_stack = []
	97	self.typedefs = {}
	98	self.declaration()
	99	def token_peek(self):
	100	return self.io.token_peek() # FIXME: skip C comments
	101	def token_pop(self):
	102	self.tokencount += 1
	103	return self.io.token_pop() # FIXME: skip C comments
	104	def checkin(self, label, opt):
	105	print " " * len(self.construct_stack) + "->" + label + " " + ("required", "optional")[opt]
	106	self.state_stack.append(self.tokencount)
	107	self.construct_stack.append(label)
	108	def checkout(self, label, opt):
	109	nonempty = self.state_stack[-1] < self.tokencount
	110	self.state_stack.pop()
	111	self.construct_stack.pop()
	112	if not opt and not nonempty:
	113	raise CDeclarationError("missing required element in " + label)
	114	else:
	115	print " " * len(self.construct_stack) + "<-" + label + " " + ("not found", "found")[nonempty]
	116	return nonempty
	117	def expect(self, *args):
	118	yes = self.token_peek() in args
	119	print " " * len(self.construct_stack) + "expect" + `args` + " = " + "ny"[yes] + " (" + self.token_peek() + ")"
	120	if yes:
	121	# One of only two places we pop the token stack
	122	print "Popped terminal:", self.token_pop()
	123	return yes
	124	def identifier(self, opt):
	125	self.checkin("identifier", opt)
	126	id = self.token_peek()
	127	if not id or (not id[0].isalpha and isalpha[0] != "_"):
	128	raise CDeclarationError("saw %s where identifier expected " % id)
	129	else:
	130	# And here is the other.
	131	self.handler(id, self.construct_stack)
	132	self.token_pop()
	133	self.checkout("identifier", opt)
	134	# Rest is hand-compiled from "A.13 of the C Programming Language", 2nd ed.
	135	# It covers the entire declaration syntax, except for initializers.
	136	# It does not cover function definitions. There are two things we
	137	# don't do quite right because they can't be done LL(1), see below.
	138	def declaration(self):
	139	self.declaration_specifiers(opt=0)
	140	self.init_declarator_list(opt=0) # Technically, is opt
	141	self.expect(";")
	142	def declaration_list(self, opt):
	143	self.checkin("declaration_list", opt)
	144	self.declaration_specifiers(opt=0)
	145	while self.declaration_specifiers(opt=1):
	146	continue
	147	return self.checkout("declaration_list", opt)
	148	def declaration_specifiers(self, opt):
	149	self.checkin("declaration_specifiers", opt)
	150	if \
	151	self.storage_class_specifier(opt=1) or \
	152	self.type_specifier(opt=1) or \
	153	self.type_qualifier(opt=1):
	154	self.declaration_specifiers(opt=1)
	155	return self.checkout("declaration_specifiers", opt)
	156	def storage_class_specifier(self, opt):
	157	self.checkin("storage_class_specifier", opt)
	158	self.expect("static", "extern", "typedef") # auto, register
	159	return self.checkout("storage_class_specifier", opt)
	160	def type_specifier(self, opt):
	161	self.checkin("type_specifier", opt)
	162	self.expect("void", "char", "short", "int", "long", "float", "double", "signed", "unsigned") \
	163	or \
	164	self.struct_or_union_specifier(opt) \
	165	or \
	166	self.identifier(opt)
	167	# We've left out one legal alternative, an enum, because we
	168	# never expect to encounter it on a man page.
	169	# We accept any identifier here because we can't know all typedefs
	170	# in advance -- that would require parsing include files and a
	171	# semi-infinite amount of hair.
	172	return self.checkout("type_specifier", opt)
	173	def type_qualifier(self, opt):
	174	self.checkin("type_qualifier", opt)
	175	self.expect("const", "volatile")
	176	return self.checkout("type_qualifier", opt)
	177	def struct_or_union_specifier(self, opt):
	178	self.checkin("struct_or_union_specifier", opt)
	179	if not (self.expect("struct","union") and self.identifier(opt=0)):
	180	self.expect("{") and \
	181	self.struct_declaration_list(opt=0) and \
	182	self.expect("}")
	183	return self.checkout("struct_or_union_specifier", opt)
	184	def struct_declaration_list(self, opt):
	185	self.checkin("struct_declaration_list", opt)
	186	self.struct_declaration(opt=0)
	187	while self.struct_declaration(opt=1):
	188	continue
	189	return self.checkout("struct_declaration_list", opt)
	190	def init_declarator_list(self, opt):
	191	self.checkin("init_declarator_list", opt)
	192	# This is where we lop off the initializer branch.
	193	self.declarator(opt=0)
	194	while self.expect(","):
	195	self.init_declaration(opt=1)
	196	return self.checkout("init_declarator_list", opt)
	197	def struct_declaration(self, opt):
	198	self.checkin("struct_declaration", opt)
	199	self.specifier_qualifier_list(opt=0)
	200	self.struct_declarator_list(opt=0)
	201	return self.checkout("struct_declaration", opt)
	202	def specifier_qualifier_list(self, opt):
	203	self.checkin("specifier_qualifier_list", opt)
	204	self.type_specifier(opt=0) or self.type_qualifier(opt=0)
	205	while self.type_specifier(opt=1) or self.type_qualifier(opt=1):
	206	continue
	207	self.specifier_qualifier_list(opt=1)
	208	return self.checkout("specifier_qualifier_list", opt)
	209	def struct_declarator_list(self, opt):
	210	self.checkin("struct_declarator_list", opt)
	211	self.struct_declarator(opt=0)
	212	while self.struct_declaration(opt=1):
	213	continue
	214	return self.checkout("struct_declarator_list", opt)
	215	def struct_declarator(self, opt):
	216	self.checkin("struct_declarator", opt)
	217	self.declarator(opt=1)
	218	if self.expect(":"):
	219	self_constant_expression(opt=0)
	220	return self.checkout("struct_declarator", opt)
	221	def declarator(self, opt):
	222	self.checkin("declarator", opt)
	223	self.pointer(opt=1)
	224	self.direct_declarator(opt=0)
	225	return self.checkout("declarator", opt)
	226	def direct_declarator(self, opt):
	227	self.checkin("direct_declarator", opt)
	228	# This is not quite the A.13 production, which is
	229	# direct-declarator:
	230	# identifer
	231	# ( declarator)
	232	# direct-declarator [ constant-expression? ]...
	233	# direct-declarator ( parameter-type-list )
	234	# direct-declarator ( identifier-list )
	235	# This be parsed LL(1) the way we're doing. We implement this:
	236	# direct-declarator-prefix:
	237	# identifier
	238	# ( declarator )
	239	# direct-declarator:
	240	# direct-declarator-prefix [ constant-expression? ]...
	241	# direct-declarator-prefix ( parameter-type-list )
	242	# direct-declarator-prefix ( identifier-list )
	243	# This won't catch weird shit like foo(bar)[MUMBLE].
	244	# Let's hope it doesn't miss any real-world cases.
	245	if self.expect("("):
	246	self.declarator(opt=0)
	247	self.expect(")")
	248	else:
	249	self.identifier(opt=0)
	250	if self.expect("("):
	251	self.parameter_type_list(opt=0) or self.identifier_list(1)
	252	self.expect(")")
	253	else:
	254	while self.expect("["):
	255	self.constant_expression(opt=1)
	256	self.expect("]")
	257	return self.checkout("direct_declarator", opt)
	258	def pointer(self, opt):
	259	self.checkin("pointer", opt)
	260	self.expect("*")
	261	if self.type_qualifier_list(opt=1):
	262	self.pointer(opt=1)
	263	return self.checkout("pointer", opt)
	264	def type_qualifier_list(self, opt):
	265	self.checkin("type_qualifier_list", opt)
	266	while self.type_qualifier(opt=1):
	267	continue
	268	return self.checkout("type_qualifier_list", opt)
	269	def parameter_type_list(self, opt):
	270	self.checkin("parameter_type_list", opt)
	271	self.parameter_list(opt=0)
	272	if self.expect(","):
	273	self.expect("...")
	274	return self.checkout("parameter_type_list", opt)
	275	def parameter_list(self, opt):
	276	self.checkin("parameter_list", opt)
	277	self.parameter_declaration(opt=0)
	278	while self.expect(","):
	279	self.parameter_declaration(opt=0)
	280	return self.checkout("parameter_list", opt)
	281	def parameter_declaration(self, opt):
	282	self.checkin("parameter_declaration", opt)
	283	self.declaration_specifiers(opt=0)
	284	self.declarator(opt=0) \
	285	or \
	286	self.abstract_declarator(opt=1)
	287	return self.checkout("parameter_declaration", opt)
	288	def identifier_list(self, opt):
	289	self.checkin("identifier_list", opt)
	290	self.identifier(opt=0)
	291	while self.expect(","):
	292	self.identifier(opt=0)
	293	return self.checkout("identifier_list", opt)
	294	def type_name(self, opt):
	295	self.checkin("type_name", opt)
	296	self.specifier_qualifier_list(opt=0)
	297	self.abstract_declarator(opt=1)
	298	return self.checkout("type_name", opt)
	299	def abstract_declarator(self, opt):
	300	self.checkin("abstract_declarator", opt)
	301	if self.pointer(opt=1):
	302	self.direct_abstract_declarator(opt=1)
	303	else:
	304	self.direct_abstract_declarator(opt=0)
	305	return self.checkout("abstract_declarator", opt)
	306	def direct_abstract_declarator(self, opt):
	307	self.checkin("direct_abstract_declarator", opt)
	308	# The actual production is
	309	# direct_abstract_declarator:
	310	# ( abstract-declarator )
	311	# direct_abstract_declarator? [ constant-expression? ]
	312	# direct_abstract_declarator? ( parameter-type-list? )
	313	# This too cannot be parsed LL(1) So we do this:
	314	# direct_abstract_declarator-prefix:
	315	# ( abstract_declarator ) [ constant-expression? ]...
	316	# ( abstract_declarator ) ( parameter-type-list? )
	317	# and hope it's good enough to catch the real-world cases.
	318	if self.expect("("):
	319	self.abstract_declarator(opt=0)
	320	self.expect(")")
	321	if self.expect("("):
	322	self.parameter_type_list(opt=0)
	323	self.expect(")")
	324	else:
	325	while self.expect("["):
	326	self.constant_expression(opt=1)
	327	self.expect("]")
	328	return self.checkout("direct_abstract_declarator", opt)
	329	def typedef_name(self, opt):
	330	self.checkin("typedef_name", opt)
	331	self.identifier(opt=0)
	332	return self.checkout("typedef_name", opt)
	333	def constant_expression(self, opt):
	334	self.checkin("constant_expression", opt)
	335	# Full grammar has expressions here.
	336	# All we're ever going to see is constants, thank goodness.
	337	self.identifier(opt=1)
	338	return self.checkout("constant_expression", opt)
	339
	340	if __name__ == "__main__":
	341	import sys
	342	def post(token, stack):
	343	print "I see %s with %s" % (token, stack)
	344	def notify(msg):
	345	print msg
	346	io = LineTokenizer(sys.stdin.readlines())
	347	print io
	348	try:
	349	CDeclarationParser(io, post, notify)
	350	except CDeclarationError, e:
	351	print e.message
	352

-0

clonepatch less more

	0	#!/bin/sh
	1	# Clone a patch (specified by NAME.SECTION) to any number of new copies
	2	# specified likewise. Useful for cases like XF86VM.3 and its clones.
	3	base=$1
	4	shift
	5	for copy in $*;
	6	do
	7	sed <prepatch/${base}.patch >prepatch/${copy}.patch 1,2s/${base}/${copy}/
	8	done

+29

-0

control less more

	0	# This is not a real Debian control file
	1	# It's project metadata for the shipper tool
	2
	3	Package: doclifter
	4
	5	Description: Lift documents in nroff markups to XML-DocBook.
	6	Lifting documents from presentation level to semantic level is hard,
	7	and a really good job requires human polishing. This tool aims to do
	8	everything that can be mechanized, and to preserve any troff-level
	9	information that might have structural implications in XML comments.
	10	This tool does the hard parts. TBL tables are translated into DocBook
	11	table markup, PIC into SVG, and EQN into MathML (relying on pic2svg
	12	and GNU eqn for the last two).
	13
	14	Homepage: http://www.catb.org/~esr/doclifter
	15
	16	XBS-Destinations: mailto:ubuntu-devel-discuss@lists.ubuntu.com
	17
	18	XBS-HTML-Target: index.html
	19
	20	XBS-Repository-URL: https://gitlab.com/esr/doclifter
	21
	22	XBS-Debian-Packages: doclifter
	23
	24	XBS-Logo: doclifter-logo.png
	25
	26	XBS-VC-Tag-Template: %(version)s
	27
	28	XBS-Validate: make check

+1378

-1366

doclifter less more

0		#!/usr/bin/env python
	0	#!/usr/bin/env python3
1	1	r"""
2	2	doclifter: translate man/mdoc/ms/me/mm sources to DocBook.
3	3
4	4	By Eric S. Raymond, copyright 2002, 2006, 2007.
5		Released as open source under the BSD license.
6	5
7	6	This comment is addressed to you if you want to add support for another
8	7	macro package to doclifter. Or if you have encountered a bug in doclifter

22	21
23	22	TroffInterpreter provides I/O and other basic services for a stack of request
24	23	interpreters. Interpreters get added to the stack when TroffInterpreter
25		recognizes certain patterns in the input; see the table interpreter_dispatch
	24	recognizes certain patterns in the input; see the table interpreterDispatch
26	25	for details. If a string pattern added to this table is length 2,
27	26	TroffInterpreter will assume it is a request name and check to make sure
28	27	that it's not a macro.

53	52	exclusive macro set in the stack, if there is one; otherwise it will be the
54	53	top tag of the most recently added interpreter.
55	54
56		ignore_set
	55	ignoreSet
57	56	Tags to ignore. List here any presentation-level tags that don't have
58	57	structural implications. They will be silently discarded.
59	58	Note: there is a potential subtle gotcha in the handling of ignore
60	59	sets. The code presently assumes that no tag in any interpreter's
61	60	ignore set is handled by any other interpreter.
62	61
63		complain_set
	62	complainSet
64	63	Tags to complain about. Put here things that can't be translated out
65	64	of presentation level but that might have structural meaning (such as
66	65	indentation changes). The user will be warned on stderr when these
67	66	come up. Otherwise they're ignored.
68	67
69		parabreak_set
	68	parabreakSet
70	69	The set of tags that forces a new paragraph without changing the
71	70	document section. Used to recognize the end of lists.
72	71
73		sectionbreak_set
	72	sectionbreakSet
74	73	The set of tags that forces a new document section. Things that
75	74	are going to translate to a DocBook sect, refsect, or section tag
76	75	should go here.
77	76
78		listbreak_set
	77	listbreakSet
79	78	The set of tags that forces an end to a list section. Normally
80		includes everything in the sectionbreak_set.
81
82		scoped_set
	79	includes everything in the sectionbreakSet.
	80
	81	scopedSet
83	82	The set of list tags that is scoped, e.g has an end tag and should not
84	83	be interrupted by list breakers.
85	84

87	86	Special-character to ISO literal mappings. These are applied late
88	87	in the translation, after string and macro evaluation.
89	88	It's also useful to know that your request interpreter can call the
90		function declare_body_start() to tell the framework class where the
	89	function declareBodyStart() to tell the framework class where the
91	90	body of the document starts (as opposed to the preamble full of troff
92	91	requests). This infornation is used to restrict the scope of
93	92	character translations.

127	126	intrinsic, because the troff request language is grubby.
128	127	"""
129	128	# SPDX-License-Identifier: BSD-2-Clause
	129	# Runs under both Python 2 and Python 3; preserve this property!
130	130
131	131	import sys, os, glob, re, string, tempfile, time, pprint, subprocess, io
132	132

142	142	import commands
143	143	getstatusoutput = commands.getstatusoutput
144	144
145		version = "2.19"
	145	version = "2.20"
146	146
147	147	# This is a speed hack recommended by Armin Rigo. It cuts runtime by about 33%
148	148	# and makes it possible for psyco 1.2 to reduce runtime another 33%.
149		re_cache = {}
150		def re_compile(st, flags=0):
	149	reCache = {}
	150	def reCompile(st, flags=0):
151	151	try:
152		return re_cache[st]
	152	return reCache[st]
153	153	except KeyError:
154		r = re_cache[st] = re.compile(st, flags)
	154	r = reCache[st] = re.compile(st, flags)
155	155	return r
156	156
157	157	# In order: Dutch, English/German, French, Italian, Norwegian/Danish, Polish,
158	158	# Spanish, Swedish.
159		name_synonyms = re.compile("^(naam\|name\|nom\|nome\|navn\|nazwa\|nombre\|namn)$", re.I)
	159	nameSynonyms = re.compile("^(naam\|name\|nom\|nome\|navn\|nazwa\|nombre\|namn)$", re.I)
160	160
161	161	# How to detect synopses
162		synopsis_label = re.compile("SYNOPSIS$", re.I)
163		synopsis_header = re.compile(r'\.S[Hh]\s*"?(?:SYNOPSIS)"?$', re.I)
164		description_label = re.compile("DESCRIPTION$", re.I)
	162	synopsisLabel = re.compile("SYNOPSIS$", re.I)
	163	synopsisHeader = re.compile(r'\.S[Hh]\s*"?(?:SYNOPSIS)"?$', re.I)
	164	descriptionLabel = re.compile("DESCRIPTION$", re.I)
165	165
166	166	# Qt part descriptions. It's OK to see these in function synopses, we just
167	167	# turn them into an info section.
168		qt_headers = ("Public Members", "Public Slots", "Signals",
	168	qtHeaders = ("Public Members", "Public Slots", "Signals",
169	169	"Static Public Members", "Properties", "Protected Members",)
170	170	# Used to distinguish first-level section headers from second-level ones
171	171	# when the Qt grotty hack is enabled.
172		caps_header = re.compile("^[A-Z ]*$")
	172	capsHeader = re.compile("^[A-Z ]*$")
173	173	# These have to be messed with by the Qt grotty hack.
174		qt_invert = ("Property Documentation", "Member Type Documentation")
	174	qtInvert = ("Property Documentation", "Member Type Documentation")
175	175
176	176	blankline = re.compile(r"^\s*$")
177	177

179	179	endtag = re.compile("<[^>]*>$")
180	180
181	181	# Used in C syntax recognition
182		c_declarators = ("void", "char", "short", "int",
	182	cDeclarators = ("void", "char", "short", "int",
183	183	"long", "float", "double", "signed",
184	184	"unsigned", "typedef", "struct",
185	185	"union", "enum", "const", "volatile",
186	186	"inline", "restricted", # C9X
187	187	"virtual",) # C++
188		c_source_re = re.compile("\|".join(c_declarators))
	188	cSourceRe = re.compile("\|".join(cDeclarators))
189	189
190	190	# Used to strip headers off generated HTML documents.
191	191	xmlheader = re.compile(r"<\?.*\?>\n")

196	196	# Match an RFC822 email address, possibly with surrounding <>.
197	197	# This is the right thing because the XSL stylesheets surround
198	198	# <email> content with <> on output.
199		email_re = re.compile(r"\b(?:<)?(?P<email>[-\w_.]+@[-\w_.]+)(?:>)?\b")
	199	emailRe = re.compile(r"\b(?:<)?(?P<email>[-\w_.]+@[-\w_.]+)(?:>)?\b")
200	200
201	201	# Match an URL. This pattern is carefully constructed not to eat
202	202	# a following period if (as is often the case) it occurs at the
203	203	# end of a sentence.
204		url_re=re.compile(r"(?P<url>\b(http\|ftp\|telnet\|mailto)://[-_%\w/&;.~]+[-_%\w/&;])")
205
206		# Match a xmlns URL in the top level tag, so that the url_re does not try to ulink-ize it.
207		xmlns_re=re.compile(r"\w xmlns='http://docbook.org/ns/docbook'")
	204	urlRe=re.compile(r"(?P<url>\b(http\|ftp\|telnet\|mailto)://[-_%\w/&;.~]+[-_%\w/&;])")
	205
	206	# Match a xmlns URL in the top level tag, so that the urlRe does not try to ulink-ize it.
	207	xmlnsRe=re.compile(r"\w xmlns='http://docbook.org/ns/docbook'")
208	208
209	209	# Match a troff highlight
210		troff_highlight = re.compile(r"(\\[fF]\([A-Z][A-Z])\|(\\f\[[A-Z]*\])\|(\\[fF][A-Z0-9])\|(\\F\[\])")
211		troff_highlight_stripper = re.compile(r"^\.[BI] ")
	210	troffHighlight = re.compile(r"(\\[fF]\([A-Z][A-Z])\|(\\f\[[A-Z]*\])\|(\\[fF][A-Z0-9])\|(\\F\[\])")
	211	troffHighlightStripper = re.compile(r"^\.[BI] ")
212	212
213	213	# Match a glue token with all preceding and following whitespace
214	214	hotglue = re.compile(r"\s@GLUE@\s")
215	215	cleantag = re.compile(r"</([a-z]+)><\1>")
216	216
217	217	# Match an identifier token in C or Python
218		id_re = re.compile("^[_a-zA-Z][_a-zA-Z0-9]*$")
	218	idRe = re.compile("^[_a-zA-Z][_a-zA-Z0-9]*$")
219	219
220	220	# List how troff specials that can appear as list tags map into
221	221	# DocBook mark types. According to Norm Walsh's DSSL and XSL

226	226	# "box" as a synonym for "square". We map dash to box here for consistency
227	227	# with the -dash/-bullet distinction in mdoc, where -dash can only
228	228	# reasonably be mapped to box rather than disc.
229		ip_tag_mapping = {
	229	ipTagMapping = {
230	230	r"\(bu":"bullet",
231	231	r"\(sq":"box",
232	232	"*" : "bullet",

234	234	}
235	235
236	236	# Add this to the V4 preamble when we have MathML elements
237		mathml_entities = '''<!ENTITY % MATHML.prefixed "INCLUDE">
	237	mathmlEntities = '''<!ENTITY % MATHML.prefixed "INCLUDE">
238	238	<!ENTITY % MATHML.prefix "mml">
239	239	<!ENTITY % equation.content "(alt?, (graphic+\|mediaobject+\|mml:math))">
240	240	<!ENTITY % inlineequation.content

262	262	"""
263	263
264	264	# Verbosity thresholds for debugging
265		general_verbosity = "g" # More details on warnings
266		section_verbosity = "s" # Show section pushes and pops
267		classify_verbosity = "c" # Show section classification details
268		parse_verbosity = "p" # Show synopsis parse details
269		macro_verbosity = "m" # Show expression evaluation details
270		highlight_verbosity = 'h' # Show highlight resolution details
271		io_verbosity = "i" # Show low-level I/O
272		interpreter_verbosity = "z" # Show low-level interpreter checks
273		bsd_verbosity = 'b' # BSD macroexpansion
274		tokenizer_verbosity = 'x' # Tokenizer verbosity
275		timing_verbosity = 't' # Execution profiling
276		supersub_verbosity = 'u' # Super/subscript recognition velocity.
277		namesection_verbosity = 'n' # Name section parsing
	265	generalVerbosity = "g" # More details on warnings
	266	sectionVerbosity = "s" # Show section pushes and pops
	267	classifyVerbosity = "c" # Show section classification details
	268	parseVerbosity = "p" # Show synopsis parse details
	269	macroVerbosity = "m" # Show expression evaluation details
	270	highlightVerbosity = 'h' # Show highlight resolution details
	271	ioVerbosity = "i" # Show low-level I/O
	272	interpreterVerbosity = "z" # Show low-level interpreter checks
	273	bsdVerbosity = 'b' # BSD macroexpansion
	274	tokenizerVerbosity = 'x' # Tokenizer verbosity
	275	timingVerbosity = 't' # Execution profiling
	276	supersubVerbosity = 'u' # Super/subscript recognition velocity.
	277	namesectionVerbosity = 'n' # Name section parsing
278	278
279	279	def deemphasize(st):
280	280	"Throw out highlighting info from a string."
281		return troff_highlight.sub("", st)
282
283		def is_command(line):
	281	return troffHighlight.sub("", st)
	282
	283	def isCommand(line):
284	284	# This works around a common bug -- string-enclosing ' at the left margin
285	285	return len(line) > 1 and \
286		(line[0] == TroffInterpreter.ctrl or (line[0] == TroffInterpreter.ctrl_nobreak and line[1:].find(TroffInterpreter.ctrl_nobreak) == -1))
287
288		def is_comment(line):
	286	(line[0] == TroffInterpreter.ctrl or (line[0] == TroffInterpreter.ctrlNobreak and line[1:].find(TroffInterpreter.ctrlNobreak) == -1))
	287
	288	def isComment(line):
289	289	# The malformed crap people write as troff comments is amazing...
290	290	line = line.replace(" ", "").replace("\t", "")
291		return line == TroffInterpreter.ctrl or line == TroffInterpreter.ctrl_nobreak or line[:3] in (r'.\"', r'/\"', r'./"', r".\'", '\'\\"', r'\'\"', r'\".', r"...", r"'''", r"\!.") or line[:2] in (r'."', r".'", r'\"', r"'#", r"\#") or line[:4] in (r'.\\"', r"'.\"")
292
293		def match_command(line, tag):
	291	return line == TroffInterpreter.ctrl or line == TroffInterpreter.ctrlNobreak or line[:3] in (r'.\"', r'/\"', r'./"', r".\'", '\'\\"', r'\'\"', r'\".', r"...", r"'''", r"\!.") or line[:2] in (r'."', r".'", r'\"', r"'#", r"\#") or line[:4] in (r'.\\"', r"'.\"")
	292
	293	def matchCommand(line, tag):
294	294	# Cope with the possibility of spaces after the dot
295		if not line or line[0] not in (TroffInterpreter.ctrl, TroffInterpreter.ctrl_nobreak):
	295	if not line or line[0] not in (TroffInterpreter.ctrl, TroffInterpreter.ctrlNobreak):
296	296	return False
297	297	tokens = line[1:].strip().split()
298	298	return tokens and tokens[0] == tag

309	309	#def untagged(pattern):
310	310	# "Transform the pattern to guarantee that it won't match marked-up text."
311	311	# # Warning! Only really works with fixed-length patterns.
312		# return re_compile("(?<!>)" + pattern.pattern + "(?!</)")
	312	# return reCompile("(?<!>)" + pattern.pattern + "(?!</)")
313	313
314	314	def fontclose(istr):
315	315	"Make sure we exit interpretation of the given string in normal font."
316		last_font_escape = istr.rfind(r'\f')
317		if last_font_escape > -1 and istr[last_font_escape+2] not in "R":
	316	lastFontEscape = istr.rfind(r'\f')
	317	if lastFontEscape > -1 and istr[lastFontEscape+2] not in "R":
318	318	istr += r"\fR"
319		istr = re_compile(r"\f[^P]\fR$").sub(r"\fR", istr)
320		last_font_escape = istr.rfind(r'\F')
321		if last_font_escape > -1 and istr[last_font_escape+2:last_font_escape+4] != "[]":
	319	istr = reCompile(r"\f[^P]\fR$").sub(r"\fR", istr)
	320	lastFontEscape = istr.rfind(r'\F')
	321	if lastFontEscape > -1 and istr[lastFontEscape+2:lastFontEscape+4] != "[]":
322	322	istr += r"\f[]"
323	323	return istr
324	324
325		def get_xml_char(istr):
	325	def getXmlChar(istr):
326	326	"Extract a leading character or XML escape from the string."
327	327	if len(istr) == 0:
328	328	return ""

334	334	take += 1
335	335	return istr[:take+1]
336	336
337		def make_comment(istr):
	337	def makeComment(istr):
338	338	if istr.startswith("."):
339	339	istr = istr[1:]
340	340	istr = istr.replace(r'\"', "").replace(r'\\"', "").replace(r'\(co', "(C)")

343	343
344	344	def lineparse(line):
345	345	"Parse arguments of a dot macro."
346		if not is_command(line):
	346	if not isCommand(line):
347	347	return None
348	348	#stderr.write("About to parse: " + line + "\n")
349	349	tokens = [line[0]]

451	451	"Apply all known hints to lift tokens in a text string."
452	452	# stderr.write("Marked tokens:" + repr(self.dictionary) + "\n")
453	453	for (token, tag) in list(self.dictionary.items()):
454		with_hi = r"<emphasis\s+remap='[A-Z]+'>(%s)</emphasis>" % token
455		#stdout.write("marking %s as %s via %s\n" % (token, tag, with_hi))
	454	withHi = r"<emphasis\s+remap='[A-Z]+'>(%s)</emphasis>" % token
	455	#stdout.write("marking %s as %s via %s\n" % (token, tag, withHi))
456	456	try:
457	457	ender = tag.split()[0] # discard attributes
458		text = re_compile(with_hi).sub(r"<%s>\1</%s>"%(tag,ender),text)
459		text = re_compile(r"\b("+token+")\b").sub(r"<%s>\1</%s>" % (tag, ender), text)
	458	text = reCompile(withHi).sub(r"<%s>\1</%s>"%(tag,ender),text)
	459	text = reCompile(r"\b("+token+")\b").sub(r"<%s>\1</%s>" % (tag, ender), text)
460	460	except re.sre_compile.error:
461	461	pass
462	462	return text

472	472	if line.startswith('.\\" \| '):
473	473	# Someday we'll have more declarations
474	474	try:
475		(mark, token, as_word, markup) = line[5:].split()
476		if mark != "mark" or as_word != "as":
	475	(mark, token, asWord, markup) = line[5:].split()
	476	if mark != "mark" or asWord != "as":
477	477	continue
478	478	self.post(token, markup)
479	479	except ValueError:

493	493	self.type = ftype
494	494	self.count = 0
495	495	def __repr__(self):
496		return "<Frame: " + repr(self.__dict__) + ">"
	496	return "<Frame: " + repr(self._dict__) + ">"
497	497
498	498	class DocLifter:
499	499	"DocBook translation of generic troff macros."

501	501	# The second element is a regexp to match to the tag content.
502	502	# If the regexp matches, the bracketing emphasis tags are replaced
503	503	# with the semantic tag in the third column.
504		lift_highlights = [(re_compile(r"<emphasis\s+remap='%s'>(%s)</emphasis>" % (x[0], x[1])), x[2]) for x in (
	504	liftHighlights = [(reCompile(r"<emphasis\s+remap='%s'>(%s)</emphasis>" % (x[0], x[1])), x[2]) for x in (
505	505	("SM", r"[A-Z.]*", "acronym"), # Historical -- SM is rare
506	506	("SM", r"[A-Z]+_[A-Z_]+", "envar"), # In bison.1, cvs.1
507	507	("[BI]",r"-[^<]+", "option"), # likely command option man(7)

510	510	("[BI]",r"\.[a-zA-Z][^<]*", "markup"), # roff markup
511	511	("[BI]",r"/[^<]+", "filename"), # Marked filenames
512	512	("[BI]",r"~/[^<]*", "filename"), # Home directory filenames
513		("[BI]",email_re.pattern,"email"), # email addresses
	513	("[BI]",emailRe.pattern,"email"), # email addresses
514	514	("[BI]",r"SIG[A-Z]+", "constant"), # signal
515	515	("[BI]",r"errno", "varname"), # variable
516	516	("[BI]",r"[a-z_]*_t", "type"),

592	592	("[BI]","PRINTER", "envar"),
593	593	("[BI]","LPDEST", "envar"),
594	594	)]
595		post_translation_patterns = (
	595	postTranslationPatterns = (
596	596	# man(7)-style man-page references
597	597	(re.compile(r"<emphasis (role='[a-z_]*' )?remap='[BI]'>([^ ]+)</emphasis>(?:&zerosp;\| )?$([0-9]+[A-Za-z]?)$"),
598	598	r"<citerefentry><refentrytitle>\2</refentrytitle><manvolnum>\3</manvolnum></citerefentry>"),

603	603	# these never occur in program listings.
604	604	(re.compile("``([^`']+)''"), r"“\1”"),
605	605	)
606		post_lift_patterns = (
	606	postLiftPatterns = (
607	607	# Find a highlight directly after an <option> makes it <replaceable>
608	608	(re.compile(r"(<option>[^ ]+</option>\s*)<emphasis remap='[BI]'>([^<]+)</emphasis>"),
609	609	r"\1<replaceable>\2</replaceable>"),

638	638	# looks like an id that was created during translation, then the
639	639	# emphasis tags are replaced with a link tag with the target being
640	640	# the result of converting the tag contents to an XML id.
641		lift_links = (
	641	liftLinks = (
642	642	("SM", r"[A-Z ]+"), # Used in RCS and others
643	643	("Em", r"[A-Z ]+"), # Used in csh.1
644	644	("B", r"[A-Z ]+"), # Used in java.1, refer.1

647	647	# These are entities that don't exist in the ISO set but are in Unicode.
648	648	# They may be generated by our translation logic. If so, the right
649	649	# entity declaration has to get emitted into the preamble.
650		pseudo_entities = (
	650	pseudoEntities = (
651	651	# These are from troff classic
652	652	("lh", "☞"), # Hand pointing left
653	653	("rh", "☜"), # Hand pointing right

706	706	def __reinit__(self):
707	707	"Null out the parser state."
708	708	self.toptag = None
709		self.ignore_set = set([])
710		self.listbreak_set = set([])
711		self.scoped_set = set([])
712		self.complain_set = set([])
	709	self.ignoreSet = set([])
	710	self.listbreakSet = set([])
	711	self.scopedSet = set([])
	712	self.complainSet = set([])
713	713	self.outsubst = []
714	714	self.sectname = None
715	715	self.nonblanks = 0

718	718	self.sectionhooks = []
719	719	self.fontfamily = ""
720	720	self.synopsis = None # Never gets this value once one has been seen
721		self.synopsis_flushed = False
722		self.section_count = 0
	721	self.synopsisFlushed = False
	722	self.sectionCount = 0
723	723	self.transplant = []
724	724	self.complaints = {}
725		self.stash_indents = []
726		self.trap_prefix = None
727		self.trap_suffix = None
	725	self.stashIndents = []
	726	self.trapPrefix = None
	727	self.trapSuffix = None
728	728	self.errorcount = 0
729		self.stash_id = None
	729	self.stashId = None
730	730	self.displaystack = []
731		self.pic_seen = False
732		self.eqn_seen = False
	731	self.picSeen = False
	732	self.eqnSeen = False
733	733	self.localentities = []
734	734	self.lines = None
735	735	self.eqnsub = None

739	739	self.physlines = 0
740	740	self.highlight = "R"
741	741	self.oldhighlight = "R"
742		self.in_preamble = True
743		self.eqn_processed = False
744		self.body_start = 0
745		self.need_para = False
	742	self.inPreamble = True
	743	self.eqnProcessed = False
	744	self.bodyStart = 0
	745	self.needPara = False
746	746	self.sectiondepth = 0
747	747	self.output = []
748	748	self.inclusions = []

760	760	quiet=0,
761	761	portability=0,
762	762	includepath="",
763		in_encodings=(),
764		out_encoding='UTF-8',
	763	inEncodings=(),
	764	outEncoding='UTF-8',
765	765	docbook5=True):
766	766	self.verbose = verbose
767	767	self.quiet = quiet
768	768	self.portability = portability
769	769	self.includepath = includepath
770		self.in_encodings = in_encodings
771		self.out_encoding = out_encoding
	770	self.inEncodings = inEncodings
	771	self.outEncoding = outEncoding
772	772	self.docbook5 = docbook5
773	773	self.__reinit__()
774	774
775		def body_section(self):
	775	def bodySection(self):
776	776	"Are we in a section that corresponds to a real refentry section?"
777		return self.body_start
778		def declare_body_start(self):
	777	return self.bodyStart
	778	def declareBodyStart(self):
779	779	"Latch the location where the document body starts."
780		if not self.body_start:
781		self.body_start = len(self.output)
	780	if not self.bodyStart:
	781	self.bodyStart = len(self.output)
782	782	if not self.quiet:
783		self.emit(make_comment("body begins here"))
	783	self.emit(makeComment("body begins here"))
784	784
785	785	# I/O utility code
786	786	def popline(self):

805	805	continue
806	806	else:
807	807	line += physline
808		if io_verbosity in self.verbose:
	808	if ioVerbosity in self.verbose:
809	809	self.notify("popped: " + line)
810	810	self.lineno += self.physlines
811		return self.troff.expand_strings(line)
	811	return self.troff.expandStrings(line)
812	812	return None
813	813	def pushline(self, line):
814	814	"Push a line back on to the input source"
815		if io_verbosity in self.verbose:
	815	if ioVerbosity in self.verbose:
816	816	self.notify("pushed: %s" % line)
817	817	self.lines = [line] + self.lines
818	818	self.lineno -= self.physlines

822	822	line = self.popline()
823	823	self.pushline(line)
824	824	return line
825		def macro_return(self):
	825	def macroReturn(self):
826	826	"Skip the remainder of the current macro."
827	827	if not self.troff.macroargs:
828	828	self.notify("warning: return outside of macro")

838	838	def notify(self, msg):
839	839	"C-compiler-like error message format."
840	840	if self.troff.macronames:
841		msg = '"%s", line %d, expanding %s: %s\n' % (spoofname or self.file, self.lineno, self.troff.macronames[-1], msg)
	841	msg = '%s:%d: expanding %s: %s\n' % (spoofname or self.file, self.lineno, self.troff.macronames[-1], msg)
842	842	else:
843		msg = '"%s", line %d: %s\n' % (spoofname or self.file, self.lineno, msg)
	843	msg = '%s:%d: %s\n' % (spoofname or self.file, self.lineno, msg)
844	844	stderr.write(msg)
845	845	return msg
846	846	def filewarn(self, msg):

858	858	if complain == None:
859	859	complain = not self.quiet
860	860	if complain:
861		self.emit(make_comment(" ".join(tokens)))
862		def trap_emit(self, prefix, suffix=""):
863		self.trap_prefix = prefix
864		self.trap_suffix = suffix
865		self.need_para = False
	861	self.emit(makeComment(" ".join(tokens)))
	862	def trapEmit(self, prefix, suffix=""):
	863	self.trapPrefix = prefix
	864	self.trapSuffix = suffix
	865	self.needPara = False
866	866	def emit(self, line, trans=1):
867	867	"Emit output."
868		if io_verbosity in self.verbose:
	868	if ioVerbosity in self.verbose:
869	869	self.notify("emit(%s, trans=%d)" % (repr(line), trans))
870	870	# Perhaps we've set a line trap?
871		if self.trap_prefix or self.trap_suffix:
	871	if self.trapPrefix or self.trapSuffix:
872	872	if not line.startswith("<!--") and not blankline.match(line):
873		self.pushline(self.trap_prefix + line + self.trap_suffix)
874		self.trap_prefix = self.trap_suffix = ""
	873	self.pushline(self.trapPrefix + line + self.trapSuffix)
	874	self.trapPrefix = self.trapSuffix = ""
875	875	return
876	876	# This test avoids a lot of expensive code on most lines
877	877	if '\\' in line:
878	878	# Where entity expansion gets done
879		line = self.expand_entities(line)
	879	line = self.expandEntities(line)
880	880	# Handle troff point changes.
881		if self.in_synopsis():
882		line = re_compile(r"\\s([+-]?[0-9]+)").sub("", line)
883		else:
884		line = re_compile(r"\\s([+-]?[0-9]+)").sub(r"<?troff ps \1?>",line)
	881	if self.inSynopsis():
	882	line = reCompile(r"\\s([+-]?[0-9]+)").sub("", line)
	883	else:
	884	line = reCompile(r"\\s([+-]?[0-9]+)").sub(r"<?troff ps \1?>",line)
885	885	# Some escape translations should be done at this point.
886	886	# This deals with some uses of \h for temporary indenting.
887	887	# There's an example in tvtime-command.1.
888		spacer = re_compile(r"\\h'([0-9]+)n'")
	888	spacer = reCompile(r"\\h'([0-9]+)n'")
889	889	while True:
890	890	m = spacer.search(line)
891	891	if m:

896	896	while '\\n' in line:
897	897	before = line[:line.find('\\n')]
898	898	after = line[line.find('\\n'):]
899		(head, tail) = self.troff.eval_term(after)
	899	(head, tail) = self.troff.evalTerm(after)
900	900	line = before + head + tail
901	901	# Check to see if output translation is enabled.
902	902	if trans and self.outsubst:
903		do_xlate = True
	903	doXlate = True
904	904	translated = ""
905	905	i = 0
906	906	while i < len(line):
907	907	if line[i] == '<':
908		do_xlate = 0
909		if not do_xlate:
	908	doXlate = 0
	909	if not doXlate:
910	910	if line[i] == '>':
911		do_xlate = True
	911	doXlate = True
912	912	translated += line[i]
913	913	i += 1
914	914	else:

946	946	return self.diversion and self.diversion[-1].strip().endswith(trailer)
947	947
948	948	# Synopsis handling
949		def flush_transplant(self):
950		if self.synopsis_flushed:
	949	def flushTransplant(self):
	950	if self.synopsisFlushed:
951	951	return
952	952	else:
953		self.synopsis_flushed = True
	953	self.synopsisFlushed = True
954	954	if self.synopsis:
955	955	(parsed, warnuser) = self.synopsis.transform()
956	956	if self.docbook5:
957	957	self.emit("<refsynopsisdiv xml:id='%s'>\n%s</refsynopsisdiv>\n" \
958		% (self.make_id_from_title('synopsis'), parsed))
	958	% (self.makeIdFromTitle('synopsis'), parsed))
959	959	else:
960	960	self.emit("<refsynopsisdiv id='%s'>\n%s</refsynopsisdiv>\n" \
961		% (self.make_id_from_title('synopsis'), parsed))
	961	% (self.makeIdFromTitle('synopsis'), parsed))
962	962	if warnuser:
963	963	self.warning("dubious content in Synopsis")
964	964	# If there's a transplant, emit it now.
965		self.declare_body_start()
	965	self.declareBodyStart()
966	966	self.output += self.transplant
967	967
968	968	# Section-break handlers
969		def end_paragraph(self, label="random"):
	969	def endParagraph(self, label="random"):
970	970	"Close the current paragraph, if we're in one."
971		if section_verbosity in self.verbose:
972		self.notify("end_paragraph(%s)" % label)
	971	if sectionVerbosity in self.verbose:
	972	self.notify("endParagraph(%s)" % label)
973	973	self.troff.nf = False
974		self.need_para = False
975		def need_paragraph(self):
	974	self.needPara = False
	975	def needParagraph(self):
976	976	"Cause <para> to be prepended to next text line."
977		if section_verbosity in self.verbose:
978		self.notify("need_paragraph()")
979		self.need_para = True
	977	if sectionVerbosity in self.verbose:
	978	self.notify("needParagraph()")
	979	self.needPara = True
980	980	def paragraph(self, remap=""):
981	981	"Replace generic paragraph-start macro with blank line."
982		if section_verbosity in self.verbose:
	982	if sectionVerbosity in self.verbose:
983	983	self.notify("paragraph(remap='%s')" % remap)
984		self.end_paragraph("paragraph")
	984	self.endParagraph("paragraph")
985	985	if not self.quiet:
986	986	if remap:
987		self.emit(make_comment(remap))
	987	self.emit(makeComment(remap))
988	988	else:
989	989	self.emit("")
990		self.need_paragraph()
991		def pop_section(self, depth):
	990	self.needParagraph()
	991	def popSection(self, depth):
992	992	"Pop to new section level."
993		if section_verbosity in self.verbose:
994		self.notify("pop_section(%d)" % depth)
	993	if sectionVerbosity in self.verbose:
	994	self.notify("popSection(%d)" % depth)
995	995	self.poplist() # Terminate all list structure
996	996	toplevel = (depth == 1) and (self.sectiondepth == 1)
997	997	self.troff.nf = False
998		self.end_paragraph(label="pop_section")
999		self.need_para = False
	998	self.endParagraph(label="popSection")
	999	self.needPara = False
1000	1000	# Execute any traps user might have planted.
1001	1001	for hook in self.sectionhooks:
1002	1002	hook()

1019	1019	self.emit("</%s%d>" % (divider, self.sectiondepth - i))
1020	1020	finally:
1021	1021	self.sectiondepth = depth
1022		def push_section(self, depth, title, makeid=True):
	1022	def pushSection(self, depth, title, makeid=True):
1023	1023	"Start new section."
1024		self.section_count += 1
1025		if section_verbosity in self.verbose:
1026		self.notify("push_section(%d, %s)" % (depth, title))
1027		self.pop_section(depth)
	1024	self.sectionCount += 1
	1025	if sectionVerbosity in self.verbose:
	1026	self.notify("pushSection(%d, %s)" % (depth, title))
	1027	self.popSection(depth)
1028	1028	if self.toptag == "refentry":
1029	1029	ref = "ref"
1030	1030	else:
1031	1031	ref = ""
1032		if self.stash_id:
	1032	if self.stashId:
1033	1033	if self.docbook5:
1034		sid = " xml:id='%s'" % self.stash_id
1035		self.stash_id = None
1036		else:
1037		sid = " id='%s'" % self.stash_id
1038		self.stash_id = None
	1034	sid = " xml:id='%s'" % self.stashId
	1035	self.stashId = None
	1036	else:
	1037	sid = " id='%s'" % self.stashId
	1038	self.stashId = None
1039	1039	elif makeid:
1040	1040	if self.docbook5:
1041		sid = " xml:id='%s'" % self.make_id_from_title(title)
1042		else:
1043		sid = " id='%s'" % self.make_id_from_title(title)
	1041	sid = " xml:id='%s'" % self.makeIdFromTitle(title)
	1042	else:
	1043	sid = " id='%s'" % self.makeIdFromTitle(title)
1044	1044	else:
1045	1045	sid = ""
1046	1046	self.emit("\n<%ssect%d%s><title>%s\\fR</title>" % (ref, depth, sid, title))
1047		self.need_paragraph()
	1047	self.needParagraph()
1048	1048	self.sectiondepth = depth
1049	1049	self.sectname = title
1050	1050	self.sectionhooks = []
1051	1051	self.nonblanks = 0
1052		def paragraph_break(self, line):
	1052	def paragraphBreak(self, line):
1053	1053	"Are we looking at a paragraph break command?"
1054	1054	if line.startswith(TroffInterpreter.ctrl + "end"):
1055	1055	return True
1056		elif not is_command(line):
	1056	elif not isCommand(line):
1057	1057	return False
1058	1058	else:
1059	1059	tokens = lineparse(line)
1060	1060	if tokens:
1061	1061	for interpreter in self.interpreters:
1062		if tokens[0][1:] in interpreter.parabreak_set:
	1062	if tokens[0][1:] in interpreter.parabreakSet:
1063	1063	return True
1064		if tokens[0][1:] in interpreter.sectionbreak_set:
	1064	if tokens[0][1:] in interpreter.sectionbreakSet:
1065	1065	return True
1066	1066	return False
1067		def section_break(self, line):
	1067	def sectionBreak(self, line):
1068	1068	"Are we looking at a section break command?"
1069	1069	if line.startswith(TroffInterpreter.ctrl + "end"):
1070	1070	return True
1071		elif not is_command(line):
	1071	elif not isCommand(line):
1072	1072	return False
1073	1073	else:
1074	1074	tokens = lineparse(line)
1075	1075	if tokens:
1076	1076	for interpreter in self.interpreters:
1077		if tokens[0][1:] in interpreter.sectionbreak_set:
	1077	if tokens[0][1:] in interpreter.sectionbreakSet:
1078	1078	return True
1079	1079	return False
1080	1080
1081	1081	def indent(self):
1082		return len(self.stash_indents) * " "
1083
1084		def begin_block(self, markup, remap=""):
	1082	return len(self.stashIndents) * " "
	1083
	1084	def beginBlock(self, markup, remap=""):
1085	1085	"Begin a block-context markup section."
1086		if io_verbosity in self.verbose:
1087		self.notify("begin_block(%s)" % markup)
1088		self.end_paragraph(label="begin_block")
	1086	if ioVerbosity in self.verbose:
	1087	self.notify("beginBlock(%s)" % markup)
	1088	self.endParagraph(label="beginBlock")
1089	1089	if remap and not self.quiet:
1090	1090	remap = " remap='" + remap + "'"
1091	1091	if markup in ("literallayout", "programlisting"):
1092	1092	self.troff.nf = True
1093		if io_verbosity in self.verbose:
	1093	if ioVerbosity in self.verbose:
1094	1094	self.warning("begin display collection")
1095	1095	self.displaystack.append((markup, remap, DisplayParser(self,
1096	1096	True,

1099	1099	else:
1100	1100	self.emit(self.indent() + "<" + markup + remap + ">")
1101	1101	if markup != "inlineequation":
1102		self.need_paragraph()
1103
1104		def end_block(self, markup, remap=""):
	1102	self.needParagraph()
	1103
	1104	def endBlock(self, markup, remap=""):
1105	1105	"End a block-context markup section."
1106	1106	# FIXME: use ends to ignore stray things that look like terminators
1107		if io_verbosity in self.verbose:
1108		self.notify("end_block(markup='%s', remap='%s')" % (markup, remap))
	1107	if ioVerbosity in self.verbose:
	1108	self.notify("endBlock(markup='%s', remap='%s')" % (markup, remap))
1109	1109	if remap and not self.quiet:
1110	1110	remap = " <!-- remap='" + remap + "' -->"
1111	1111	self.troff.nf = False
1112	1112	# Turn off all font highlights -- technically incorrect,
1113	1113	# but almost always the right thing to do. We also
1114	1114	# probably need an end-paragraph here, but that will be
1115		# taken care of by close_tags() later on.
	1115	# taken care of by closeTags() later on.
1116	1116	if self.displaystack:
1117	1117	(beginmarkup, beginremap, display) = self.displaystack.pop()
1118	1118	(parsed, _) = display.transform()

1121	1121	else:
1122	1122	self.emit(self.indent() + r"\fR</%s>" % markup + remap)
1123	1123	if markup != "inlineequation":
1124		self.need_paragraph()
	1124	self.needParagraph()
1125	1125
1126	1126	def pushlist(self, cmd, ltype=None):
1127		if section_verbosity in self.verbose:
	1127	if sectionVerbosity in self.verbose:
1128	1128	self.notify("pushlist(%s, %s)" % (cmd, ltype))
1129		self.stash_indents.append(Frame(cmd, ltype))
	1129	self.stashIndents.append(Frame(cmd, ltype))
1130	1130
1131	1131	def poplist(self, backto=None, remap=""):
1132	1132	"Pop levels off the list stack until we've removed specified command."
1133		if section_verbosity in self.verbose:
1134		self.notify("poplist(%s) %s" % (backto, self.stash_indents))
1135		while self.stash_indents:
1136		frame = self.stash_indents[-1]
	1133	if sectionVerbosity in self.verbose:
	1134	self.notify("poplist(%s) %s" % (backto, self.stashIndents))
	1135	while self.stashIndents:
	1136	frame = self.stashIndents[-1]
1137	1137	if frame.type == "variablelist":
1138		self.emit_variablelist("end")
	1138	self.emitVariablelist("end")
1139	1139	elif frame.type == "itemizedlist":
1140		self.emit_itemizedlist("end")
	1140	self.emitItemizedlist("end")
1141	1141	elif frame.type == "blockquote":
1142		self.end_block("blockquote", remap=remap)
1143		self.stash_indents.pop()
1144		else:
1145		self.stash_indents.pop()
	1142	self.endBlock("blockquote", remap=remap)
	1143	self.stashIndents.pop()
	1144	else:
	1145	self.stashIndents.pop()
1146	1146	if frame.command == backto:
1147	1147	break
1148		if section_verbosity in self.verbose:
1149		self.notify("after popping %s" % (self.stash_indents,))
1150
1151		def last_tag(self, lookfor):
	1148	if sectionVerbosity in self.verbose:
	1149	self.notify("after popping %s" % (self.stashIndents,))
	1150
	1151	def lastTag(self, lookfor):
1152	1152	"What was the last actual tag emitted?"
1153	1153	if not self.diversion:
1154	1154	return False

1163	1163	return False
1164	1164	back -= 1
1165	1165
1166		def emit_variablelist(self, cmd, term=None):
	1166	def emitVariablelist(self, cmd, term=None):
1167	1167	"Emit a portion of variable-list markup."
1168		if section_verbosity in self.verbose:
1169		self.notify("emit_variablelist(%s, %s) %s"%(cmd, repr(term), self.stash_indents))
	1168	if sectionVerbosity in self.verbose:
	1169	self.notify("emitVariablelist(%s, %s) %s"%(cmd, repr(term), self.stashIndents))
1170	1170	if cmd == "end":
1171		if self.stash_indents:
	1171	if self.stashIndents:
1172	1172	indent = self.indent()
1173		self.stash_indents.pop()
	1173	self.stashIndents.pop()
1174	1174	# Empty item with a bunch of .TP lines before it.
1175	1175	# Retroactively hack this into an itemized list.
1176		if self.last_tag("<listitem"):
	1176	if self.lastTag("<listitem"):
1177	1177	if self.verbose:
1178	1178	self.warning("variable-list header just before section break")
1179		if section_verbosity in self.verbose:
	1179	if sectionVerbosity in self.verbose:
1180	1180	self.notify("remaking as itemized")
1181	1181	backup = -1
1182	1182	while True:

1194	1194	self.emit("%s</itemizedlist>" % indent[2:])
1195	1195	else:
1196	1196	# List has a <para> or something at start.
1197		self.end_paragraph()
	1197	self.endParagraph()
1198	1198	self.emit("%s</listitem>" % indent)
1199	1199	self.emit("%s</varlistentry>" % indent)
1200	1200	self.emit("%s</variablelist>" % indent[2:])
1201	1201	return
1202	1202	# All cases below emit a <term> at least
1203		if not self.stash_indents or self.stash_indents[-1].command != cmd:
	1203	if not self.stashIndents or self.stashIndents[-1].command != cmd:
1204	1204	if self.quiet:
1205	1205	remap = ""
1206	1206	else:

1208	1208	self.emit("%s<variablelist%s>" % (self.indent(), remap))
1209	1209	self.pushlist(cmd, "variablelist")
1210	1210	indent = self.indent()
1211		back = self.last_tag("<listitem")
	1211	back = self.lastTag("<listitem")
1212	1212	if back:
1213	1213	self.diversion[back] = "<!-- DELETE ME! -->"
1214	1214	else:
1215		if self.stash_indents[-1].count > 0:
	1215	if self.stashIndents[-1].count > 0:
1216	1216	self.emit("%s</listitem>" % indent)
1217	1217	self.emit("%s</varlistentry>" % indent)
1218	1218	self.emit("%s<varlistentry>" % indent)

1222	1222	for item in term:
1223	1223	self.emit("%s<term>%s</term>" % (indent, fontclose(item)))
1224	1224	self.emit("%s<listitem>" % indent)
1225		self.stash_indents[-1].count += 1
1226		self.need_paragraph()
1227
1228		def emit_itemizedlist(self, cmd, bullet=None):
	1225	self.stashIndents[-1].count += 1
	1226	self.needParagraph()
	1227
	1228	def emitItemizedlist(self, cmd, bullet=None):
1229	1229	"Emit a portion of itemized-list markup."
1230		if section_verbosity in self.verbose:
1231		self.notify("emit_itemizedlist(%s) %s"%(cmd, self.stash_indents))
	1230	if sectionVerbosity in self.verbose:
	1231	self.notify("emitItemizedlist(%s) %s"%(cmd, self.stashIndents))
1232	1232	if cmd == "end":
1233		if self.stash_indents:
	1233	if self.stashIndents:
1234	1234	indent = self.indent()
1235		self.stash_indents.pop()
1236		if self.last_tag("<listitem"):
	1235	self.stashIndents.pop()
	1236	if self.lastTag("<listitem"):
1237	1237	self.emit("<para> <!-- FIXME: blank list item -->")
1238	1238	self.warning("blank itemizedlist item, look for FIXME")
1239		self.end_paragraph()
	1239	self.endParagraph()
1240	1240	self.emit("%s</listitem>" % indent)
1241	1241	self.emit("%s</itemizedlist>" % indent[2:])
1242	1242	return
1243	1243	# All cases below emit a <listitem> at least
1244		if not self.stash_indents or self.stash_indents[-1].command != cmd:
	1244	if not self.stashIndents or self.stashIndents[-1].command != cmd:
1245	1245	if self.quiet:
1246	1246	remap = ""
1247	1247	else:

1249	1249	self.emit("%s<itemizedlist%s>" % (self.indent(), remap))
1250	1250	self.pushlist(cmd, "itemizedlist")
1251	1251	indent = self.indent()
1252		back = self.last_tag("<listitem")
	1252	back = self.lastTag("<listitem")
1253	1253	if back:
1254	1254	self.diversion[back] = "<!-- DELETE ME! -->"
1255	1255	else:
1256		if self.stash_indents[-1].count > 0:
	1256	if self.stashIndents[-1].count > 0:
1257	1257	self.emit("%s</listitem>" % indent)
1258	1258	self.emit("%s<listitem override='%s'>" % (indent, bullet))
1259		self.stash_indents[-1].count += 1
1260		self.need_paragraph()
	1259	self.stashIndents[-1].count += 1
	1260	self.needParagraph()
1261	1261
1262	1262	# Highlight handling
1263		def change_highlight(self, htype, prefix='f'):
	1263	def changeHighlight(self, htype, prefix='f'):
1264	1264	if prefix == 'F': # groff font family change
1265	1265	if htype == 'T':
1266	1266	htype = ''

1269	1269	else: # ordinary font change
1270	1270	real = htype
1271	1271	pop = False
1272		if highlight_verbosity in self.verbose:
1273		log = "change_highlight(%s) from %s" % (real, self.highlight)
	1272	if highlightVerbosity in self.verbose:
	1273	log = "changeHighlight(%s) from %s" % (real, self.highlight)
1274	1274	if htype == "0":
1275	1275	pop = True
1276	1276	htype = self.oldhighlight

1286	1286	pop = True
1287	1287	htype = self.oldhighlight
1288	1288	elif htype == self.highlight:
1289		if highlight_verbosity in self.verbose:
	1289	if highlightVerbosity in self.verbose:
1290	1290	self.notify(log + " is a no-op")
1291	1291	return ""
1292		if highlight_verbosity in self.verbose:
	1292	if highlightVerbosity in self.verbose:
1293	1293	log += ", mapped to %s" % htype
1294	1294	if self.highlight == "R" or self.highlight == "[]":
1295	1295	newhi = ""

1305	1305	newhi += "<emphasis %sremap='%s%s'>" % (role, self.fontfamily,htype)
1306	1306	self.oldhighlight = self.highlight
1307	1307	self.highlight = htype
1308		if highlight_verbosity in self.verbose:
	1308	if highlightVerbosity in self.verbose:
1309	1309	self.notify(log + (", used %s, last = %s" % (newhi, self.oldhighlight)))
1310	1310	return newhi
1311		def direct_highlight(self, highlight, args, trailer=""):
	1311	def directHighlight(self, highlight, args, trailer=""):
1312	1312	"Translate man(7)-style ordinary highlight macros."
1313	1313	if not args:
1314	1314	line = self.popline()

1316	1316	# .B
1317	1317	# .BI -G num
1318	1318	# on the gcc.1 man page.
1319		if line is None or is_command(line):
	1319	if line is None or isCommand(line):
1320	1320	self.pushline(line)
1321		return make_comment("%s elided" % highlight)
	1321	return makeComment("%s elided" % highlight)
1322	1322	else:
1323	1323	line = " ".join(args)
1324	1324	if not trailer and line[-2:] == "\\c":

1336	1336	transformed += r"\fR" # Yes, see the definition of an-trap.
1337	1337	transformed += trailer
1338	1338	return transformed
1339		def alternating_highlight(self, highlight, words, trailer=""):
	1339	def alternatingHighlight(self, highlight, words, trailer=""):
1340	1340	"Translate the screwy man(7)-style alternating highlight macros."
1341	1341	if not words:
1342	1342	nextl = self.popline()

1344	1344	# .BR
1345	1345	# .SH CUSTOMIZATION
1346	1346	# on the MAKEDEV.8 manual page.
1347		if nextl is None or is_command(nextl) or blankline.search(nextl):
	1347	if nextl is None or isCommand(nextl) or blankline.search(nextl):
1348	1348	if nextl is not None:
1349	1349	self.pushline(nextl)
1350		return make_comment("bogus %s elided" % highlight)
	1350	return makeComment("bogus %s elided" % highlight)
1351	1351	else:
1352	1352	words = nextl.split()
1353	1353	if not trailer and words[-1][-2:] == "\\c":

1383	1383	self.warning("index macro has more than three arguments.")
1384	1384	return "<indexterm><primary>%s</primary><secondary>%s</secondary><tertiary>%s</tertiary></indexterm> <!-- %s -->" % (args[0], args[1], args[2], " ".join(args[3:]))
1385	1385
1386		def id_from_title(self, istr):
	1386	def idFromTitle(self, istr):
1387	1387	"Turn a string into a section ID usable in link declarations."
1388	1388	# First, remove any trailing section of the title in parens
1389	1389	istr = re.sub(r" \(.*", "", istr)

1407	1407	return self.name + "-" + newid
1408	1408	else:
1409	1409	return newid
1410		def make_id_from_title(self, st):
1411		sid = self.id_from_title(st)
	1410	def makeIdFromTitle(self, st):
	1411	sid = self.idFromTitle(st)
1412	1412	# We allow duplicate sections, but warn about them
1413	1413	if sid not in self.idlist:
1414	1414	self.idlist[sid] = 1

1418	1418	#self.error("more than one section is named %s" % st)
1419	1419	return sid + repr(self.idlist[sid])
1420	1420
1421		def id_exists(self, sid):
	1421	def idExists(self, sid):
1422	1422	"Test whether an id already exists"
1423	1423	return sid in self.idlist
1424	1424	def TBL(self, enddelim=None):

1583	1583	self.emit(" <thead>")
1584	1584	# OK, finally ready to emit the table
1585	1585	for i in range(len(data)):
1586		if is_comment(data[i][0]):
1587		self.emit(make_comment("\t".join(data[i])))
	1586	if isComment(data[i][0]):
	1587	self.emit(makeComment("\t".join(data[i])))
1588	1588	continue
1589	1589	if rowsep[i]:
1590	1590	self.emit(" <row rowsep='1'>")

1628	1628	content = str(data[i][j])
1629	1629	if '\n' in content:
1630	1630	interpreted = []
1631		self.interpret_block(content.split("\n"), interpreted)
	1631	self.interpretBlock(content.split("\n"), interpreted)
1632	1632	content = "\n".join(interpreted)
1633	1633	line += content
1634		if troff_highlight.search(line) is not None:
	1634	if troffHighlight.search(line) is not None:
1635	1635	line += r"\fR"
1636		self.emit(self.troff.expand_strings(line) + "</entry>")
	1636	self.emit(self.troff.expandStrings(line) + "</entry>")
1637	1637	comment = comments.pop(0)
1638	1638	if comment is None:
1639	1639	trailer = ""
1640	1640	else:
1641		trailer = " " + make_comment(comment)
	1641	trailer = " " + makeComment(comment)
1642	1642	self.emit(" </row>" + trailer)
1643	1643	if i == lastheaderline:
1644	1644	if lastheaderline > -1:

1658	1658	nondelimlines = 0
1659	1659	while self.lines:
1660	1660	line = self.popline()
1661		if not line.startswith("delim") and not match_command(line, "EN"):
	1661	if not line.startswith("delim") and not matchCommand(line, "EN"):
1662	1662	nondelimlines += 1
1663	1663	else:
1664	1664	tokens = line.split()

1666	1666	if tokens[1] == "off":
1667	1667	self.eqnsub = None
1668	1668	if not self.quiet:
1669		self.emit(make_comment("eqn delimiters off."))
	1669	self.emit(makeComment("eqn delimiters off."))
1670	1670	else:
1671	1671	es = re.escape(tokens[1][0])
1672	1672	ee = re.escape(tokens[1][1])
1673		self.eqnsub = re_compile("([^" + es + "])" + es + "([^" + ee + "]+)"+ ee +"(.)")
	1673	self.eqnsub = reCompile("([^" + es + "])" + es + "([^" + ee + "]+)"+ ee +"(.)")
1674	1674	if not self.quiet:
1675		self.emit(make_comment("eqn delimiters set to %s%s" % (tokens[1][0],tokens[1][1])))
	1675	self.emit(makeComment("eqn delimiters set to %s%s" % (tokens[1][0],tokens[1][1])))
1676	1676	else:
1677	1677	self.eqnsub = None
1678		if match_command(line, "EN"):
	1678	if matchCommand(line, "EN"):
1679	1679	break
1680	1680	eqnlines.append(line)
1681	1681	if nondelimlines:
1682		if self.in_preamble:
1683		self.emit(make_comment(startline.strip()))
	1682	if self.inPreamble:
	1683	self.emit(makeComment(startline.strip()))
1684	1684	for line in eqnlines:
1685		self.emit(make_comment(line))
1686		self.emit(make_comment(startline[0] + "EN"))
	1685	self.emit(makeComment(line))
	1686	self.emit(makeComment(startline[0] + "EN"))
1687	1687	else:
1688	1688	# If we could not pretranslate to MathML, and there
1689	1689	# are non-delimiter lines, and we're not in preamble
1690	1690	# it's appropriate to issue a worning here.
1691		if not self.eqn_processed:
1692		if not self.eqn_seen:
1693		self.eqn_seen = True
	1691	if not self.eqnProcessed:
	1692	if not self.eqnSeen:
	1693	self.eqnSeen = True
1694	1694	self.warning("eqn(1) markup not translated.")
1695		self.begin_block("literallayout", remap="EQN")
	1695	self.beginBlock("literallayout", remap="EQN")
1696	1696	else:
1697		self.begin_block("equation", remap="EQN")
	1697	self.beginBlock("equation", remap="EQN")
1698	1698	for line in eqnlines:
1699	1699	self.emit(line)
1700		if not self.eqn_processed:
1701		self.end_block("literallayout")
	1700	if not self.eqnProcessed:
	1701	self.endBlock("literallayout")
1702	1702	else:
1703		self.end_block("equation")
	1703	self.endBlock("equation")
1704	1704
1705	1705	def PIC(self, line):
1706	1706	"Wrap and emit a PIC section."
1707	1707	# Because xmlto(1) can't render SVG tables to PIC,
1708	1708	# it's appropriate to issue a warning here.
1709		pic_diagram = ".PS\n"
	1709	picDiagram = ".PS\n"
1710	1710	while True:
1711	1711	line = self.popline()
1712	1712	if not line:
1713	1713	self.error("missing .PE")
1714		pic_diagram += line + "\n"
	1714	picDiagram += line + "\n"
1715	1715	if line[1:].startswith("PE"):
1716	1716	break
1717	1717	fp = tempfile.NamedTemporaryFile(prefix="doclifter")
1718	1718	# This is high-byte-preserving
1719	1719	try:
1720		fp.write(pic_diagram.encode('latin-1'))
	1720	fp.write(picDiagram.encode('latin-1'))
1721	1721	except UnicodeDecodeError:
1722		fp.write(pic_diagram) # This means we're in Python2
	1722	fp.write(picDiagram) # This means we're in Python2
1723	1723	fp.flush()
1724	1724	(status, svg) = getstatusoutput("pic2plot -T svg <" + fp.name)
1725	1725	fp.close()

1730	1730	else:
1731	1731	self.warning("pic(1) markup not translated.")
1732	1732	self.emit("<literallayout remap='PIC'>")
1733		self.emit(pic_diagram)
	1733	self.emit(picDiagram)
1734	1734	self.emit("</literallayout>")
1735	1735
1736	1736	def ignore(self, cmd):
1737	1737	"Declare that we want to ignore a command."
1738		self.ignore_set.add(cmd)
	1738	self.ignoreSet.add(cmd)
1739	1739
1740	1740	def unignore(self, cmd):
1741	1741	"Declare that we want to stop ignoring a command."
1742		if cmd in self.ignore_set:
1743		self.ignore_set.remove(cmd)
	1742	if cmd in self.ignoreSet:
	1743	self.ignoreSet.remove(cmd)
1744	1744
1745	1745	def ignorable(self, command, nocomplaints=0):
1746	1746	"Can this command be safely ignored?"
1747	1747	if not command:
1748	1748	return False
1749	1749	command = command.split()[0] # only look at first token
1750		if command[0] in (TroffInterpreter.ctrl,TroffInterpreter.ctrl_nobreak):
	1750	if command[0] in (TroffInterpreter.ctrl,TroffInterpreter.ctrlNobreak):
1751	1751	command = command[1:]
1752		return command in self.ignore_set or (nocomplaints and command in self.complain_set)
	1752	return command in self.ignoreSet or (nocomplaints and command in self.complainSet)
1753	1753
1754	1754	def execute(self, line, command, tokens):
1755	1755	"Try to interpret this command using each interpreter in the stack."
1756		if command in self.ignore_set:
	1756	if command in self.ignoreSet:
1757	1757	self.passthrough(tokens)
1758	1758	return True
1759		if command in self.complain_set:
	1759	if command in self.complainSet:
1760	1760	self.complaints[command] = True
1761		if general_verbosity in self.verbose:
	1761	if generalVerbosity in self.verbose:
1762	1762	self.notify(command + " seen")
1763	1763	self.passthrough(tokens, complain=True)
1764	1764	return True
1765		listbreaker = (command in self.listbreak_set or command == 'end')
	1765	listbreaker = (command in self.listbreakSet or command == 'end')
1766	1766	if listbreaker:
1767	1767	# So .TP+.SH doesn't hose us, basically...
1768		self.trap_prefix = self.trap_suffix = ""
1769		if interpreter_verbosity in self.verbose:
	1768	self.trapPrefix = self.trapSuffix = ""
	1769	if interpreterVerbosity in self.verbose:
1770	1770	self.notify("after ignore check, interpreter sees: " + repr(tokens))
1771	1771	# Maybe this command closes a list?
1772		if self.stash_indents and listbreaker:
1773		if io_verbosity in self.verbose:
1774		self.notify("list closer %s: %s"%(command,self.stash_indents))
1775		enclosing = self.stash_indents[-1].command
1776		if enclosing not in self.scoped_set and command in self.listbreak_set and enclosing != command:
1777		while len(self.stash_indents):
1778		self.poplist(self.stash_indents[-1].command)
	1772	if self.stashIndents and listbreaker:
	1773	if ioVerbosity in self.verbose:
	1774	self.notify("list closer %s: %s"%(command,self.stashIndents))
	1775	enclosing = self.stashIndents[-1].command
	1776	if enclosing not in self.scopedSet and command in self.listbreakSet and enclosing != command:
	1777	while len(self.stashIndents):
	1778	self.poplist(self.stashIndents[-1].command)
1779	1779	# Here is where string expansion in command arguments gets done:
1780	1780	stripped = []
1781	1781	for arg in stripquotes(tokens):

1795	1795	return True
1796	1796	return False
1797	1797
1798		def in_synopsis(self):
1799		return self.sectname and synopsis_label.match(self.sectname)
1800
1801		def interpret_block(self, lines, divert=None):
	1798	def inSynopsis(self):
	1799	return self.sectname and synopsisLabel.match(self.sectname)
	1800
	1801	def interpretBlock(self, lines, divert=None):
1802	1802	# Line-by-line translation
1803	1803	self.pushdown.append(self.lines)
1804	1804	self.lines = lines

1809	1809	try:
1810	1810	while self.lines:
1811	1811	line = self.popline()
1812		if interpreter_verbosity in self.verbose:
	1812	if interpreterVerbosity in self.verbose:
1813	1813	self.notify("interpreter sees: %s" % line)
1814	1814	if line is None:
1815	1815	break

1823	1823	# .TP, but .PP is. We'll make up our own command and
1824	1824	# pass it through for the interpreters to munch on.
1825	1825	if line == '':
1826		if self.body_section() and not self.troff.nf:
	1826	if self.bodySection() and not self.troff.nf:
1827	1827	self.pushline(TroffInterpreter.ctrl + "blank")
1828	1828	# Treat blank lines in synopses as break commands;
1829	1829	# see cpio.1 for an example of why this is necessary.
1830		elif self.in_synopsis():
	1830	elif self.inSynopsis():
1831	1831	self.pushline(TroffInterpreter.ctrl + "br")
1832	1832	else:
1833	1833	self.emit('')

1840	1840	doit = (line != transformed)
1841	1841	line = transformed
1842	1842	# Could be a comment. Handle various ugly undocumented things.
1843		if is_comment(line):
	1843	if isComment(line):
1844	1844	if line[3:]:
1845		line = make_comment(line)
	1845	line = makeComment(line)
1846	1846	else:
1847	1847	line = ""
1848	1848	self.emit(line)

1852	1852	# a command. It's probably going to bite us someday.
1853	1853	if line == r"\}":
1854	1854	self.pushline(TroffInterpreter.ctrl + r"\}")
1855		if macro_verbosity in self.verbose:
	1855	if macroVerbosity in self.verbose:
1856	1856	self.warning(r"adventitious \} should probably be .\}")
1857	1857	continue
1858	1858	# If no command, emit, and go on.
1859		if not is_command(line):
	1859	if not isCommand(line):
1860	1860	# Note: This should be the only place where plain text
1861	1861	# is emitted. When in doubt, use pushline() rather
1862	1862	# emit -- that will send the generated text back through
1863	1863	# here.
1864		if self.need_para and line and not line[:4] == "<!--":
	1864	if self.needPara and line and not line[:4] == "<!--":
1865	1865	line = "<para>" + line
1866		self.need_para = False
	1866	self.needPara = False
1867	1867	self.emit(line)
1868	1868	continue
1869	1869	# We've got a dot command. Try to interpret it as a request.

1892	1892	# We were not able to find an interpretation
1893	1893	# stderr.write("Raw line:" + line + "\n")
1894	1894	# stderr.write("Tokens:" + repr(tokens) + "\n")
1895		self.emit(make_comment(line))
	1895	self.emit(makeComment(line))
1896	1896	self.error("uninterpreted '%s' command" % command)
1897	1897	except LiftException as e:
1898	1898	self.error('"%s on %s' % (str(e), repr(line)))
1899	1899	except (SystemExit, KeyboardInterrupt):
1900		self.error('"%s", line %d: internal error.' % (spoofname or self.file,self.lineno))
	1900	self.error('%s:%d: internal error.' % (spoofname or self.file,self.lineno))
1901	1901	except:
1902	1902	# Pass the exception upwards for debugging purposes
1903	1903	if not self.quiet:
1904		self.error("exception %s on: %s" % (sys.exc_info()[0], line))
	1904	self.error("exception %s on: %s" % (sys.excInfo()[0], line))
1905	1905	raise
1906	1906	self.lines = self.pushdown.pop()
1907	1907	if divert is not None:

1923	1923	return True
1924	1924	return False
1925	1925
1926		def expand_entities(self, line):
	1926	def expandEntities(self, line):
1927	1927	"Expand character entities."
1928	1928	# Specials associated with troff and various macro sets
1929	1929	for interpreter in self.interpreters:

1940	1940	if start == -1:
1941	1941	break
1942	1942	elif line[start:].startswith("\\[char"):
1943		m = re.match(r"[0-9]+(?=\135)", line[start+6:])
	1943	m = re.match(r"[0-9]+(?=\x5D)", line[start+6:])
1944	1944	if m:
1945	1945	line = line[:start] + "&#" + m.group(0) + ";" + line[start + 6 + len(m.group(0)) + 1:]
1946	1946	oldstart = start + 2
1947	1947	elif line[start:].startswith("\\[u"):
1948		m = re.match(r"[0-9]+(?=\135)", line[start+3:])
	1948	m = re.match(r"[0-9]+(?=\x5D)", line[start+3:])
1949	1949	if m:
1950	1950	line = line[:start] + "&#" + m.group(0) + ";" + line[start + 3 + len(m.group(0)) + 1:]
	1951	else:
	1952	m = re.match(r"[0-9A-F]+(?=\x5D)", line[start+3:])
	1953	if m:
	1954	line = line[:start] + "&#x" + m.group(0) + ";" + line[start + 3 + len(m.group(0)) + 1:]
1951	1955	oldstart = start + 2
1952	1956	else:
1953	1957	oldstart += 1
1954	1958	return line
1955	1959
1956		def hack_translations(self, line):
	1960	def hackTranslations(self, line):
1957	1961	"Perform troff font-escape translations."
1958	1962	if line[:4] != "<!--" and '\\' in line:
1959	1963	# Translate font escapes. We do this late in order to get
1960	1964	# uniform handling of those that were generated either by
1961	1965	# macros or by inline font escapes in the source text.
1962	1966	while True:
1963		esc = troff_highlight.search(line)
	1967	esc = troffHighlight.search(line)
1964	1968	if not esc:
1965	1969	break
1966	1970	else:
1967	1971	sl = esc.start()
1968	1972	if line[sl+2:sl+4] == "[]":
1969		line = line[:sl]+self.change_highlight("P")+line[sl+4:]
	1973	line = line[:sl]+self.changeHighlight("P")+line[sl+4:]
1970	1974	elif line[sl+2] == "[":
1971	1975	end = sl + 2 + line[sl+2:].find("]")
1972		line = line[:sl]+self.change_highlight(line[sl+3:end],line[sl+1])+line[end+1:]
	1976	line = line[:sl]+self.changeHighlight(line[sl+3:end],line[sl+1])+line[end+1:]
1973	1977	elif line[sl+2] == "(":
1974		line = line[:sl]+self.change_highlight(line[sl+3:sl+5],line[sl+1])+line[sl+5:]
	1978	line = line[:sl]+self.changeHighlight(line[sl+3:sl+5],line[sl+1])+line[sl+5:]
1975	1979	else:
1976		line = line[:sl]+self.change_highlight(line[sl+2:sl+3],line[sl+1])+line[sl+3:]
	1980	line = line[:sl]+self.changeHighlight(line[sl+2:sl+3],line[sl+1])+line[sl+3:]
1977	1981	return line
1978	1982
1979		def lift_link(self, line):
	1983	def liftLink(self, line):
1980	1984	"Checks highlighted content to see if it's an XML id which exists"
1981	1985	# Currently, matches only <emphasis> highlights
1982		if not re_compile("<emphasis").match(line):
	1986	if not reCompile("<emphasis").match(line):
1983	1987	return line
1984		for (link_highlight, re_content) in self.lift_links:
1985		lift = re_compile(r"<emphasis\s+remap='%s'>(%s)</emphasis>" % (link_highlight, re_content))
	1988	for (linkHighlight, reContent) in self.liftLinks:
	1989	lift = reCompile(r"<emphasis\s+remap='%s'>(%s)</emphasis>" % (linkHighlight, reContent))
1986	1990	if lift.match(line):
1987	1991	content = lift.sub(r"\1", line)
1988	1992	trailer = ""
1989	1993	if content.endswith("</para>"):
1990	1994	content = content[:-7]
1991	1995	trailer = "</para>"
1992		sid = self.id_from_title(content)
1993		if self.id_exists(sid):
	1996	sid = self.idFromTitle(content)
	1997	if self.idExists(sid):
1994	1998	return '<link linkend="%s">%s</link>%s' % (sid, content, trailer)
1995	1999	return line
1996	2000
1997		def is_active(self, macro_set):
	2001	def isActive(self, macroSet):
1998	2002	"Is a given macro set (specified by name) active?"
1999		return macro_set in [x.__class__.name for x in self.interpreters]
2000
2001		def activate(self, macro_set):
	2003	return macroSet in [x.__class__.name for x in self.interpreters]
	2004
	2005	def activate(self, macroSet):
2002	2006	"Activate a given macro set."
2003	2007	# Don't put duplicate instances in the interpreter list.
2004		if not self.is_active(macro_set.name):
2005		if hasattr(macro_set, "requires"):
2006		for ancestor in macro_set.requires:
	2008	if not self.isActive(macroSet.name):
	2009	if hasattr(macroSet, "requires"):
	2010	for ancestor in macroSet.requires:
2007	2011	self.activate(ancestor)
2008		newinstance = macro_set(self, self.verbose)
	2012	newinstance = macroSet(self, self.verbose)
2009	2013	self.interpreters = [newinstance] + self.interpreters
2010		if general_verbosity in self.verbose:
2011		stderr.write("%s uses %s macros...\n" % (spoofname or self.file, macro_set.name))
2012
2013		def close_tags(self, before, tag, tight):
	2014	if generalVerbosity in self.verbose:
	2015	stderr.write("%s uses %s macros...\n" % (spoofname or self.file, macroSet.name))
	2016
	2017	def closeTags(self, before, tag, tight):
2014	2018	"Generate close tags for a block-level open tag."
2015	2019	state = 0
2016	2020	after = ""
2017	2021	# This is an RE in case a tag instance has attributes.
2018		opentag = re_compile("<" + tag + r"\b[^>]*>")
	2022	opentag = reCompile("<" + tag + r"\b[^>]*>")
2019	2023	closetag = "</" + tag + ">"
2020	2024	closetaglength = len(closetag)
2021	2025	closer = "\\fR" + closetag

2177	2181	# Parse semantic hints from the text, if present.
2178	2182	# Yes, they should go to the global registry.
2179	2183	globalhints.read(text)
2180		if timing_verbosity in self.verbose:
	2184	if timingVerbosity in self.verbose:
2181	2185	now = time.time()
2182	2186	stderr.write("timing: hint processing = %2.2f\n" % (now-self.basetime,))
2183	2187	self.basetime = now

2187	2191	# uncommon error.
2188	2192	triggers = []
2189	2193	# Find uses of each trigger, sort by position of first occurrence
2190		for (pattern, consumer) in list(interpreter_dispatch.items()):
	2194	for (pattern, consumer) in list(interpreterDispatch.items()):
2191	2195	# If the file has an extension, we can exclude some possibilities
2192	2196	if "." in cfile:
2193		required = required_extensions.get(consumer)
	2197	required = requiredExtensions.get(consumer)
2194	2198	if required and not cfile.endswith(required):
2195	2199	continue
2196	2200	# Otherwise look for first uses of trigger patterns

2212	2216	triggered = [x[1] for x in triggers]
2213	2217	# Now walk through the list from the front, doing exclusions
2214	2218	if triggered:
2215		exclusion_lock = False
	2219	exclusionLock = False
2216	2220	for consumer in triggered:
2217	2221	# Only allow one exclusive macro set. This is how we
2218	2222	# avoid, e.g., the presence of mdoc macros after a man
2219	2223	# page header causing confusion.
2220	2224	if consumer.exclusive:
2221		if exclusion_lock:
	2225	if exclusionLock:
2222	2226	continue
2223	2227	else:
2224		exclusion_lock = True
	2228	exclusionLock = True
2225	2229	self.toptag = consumer.toptag
2226	2230	# Troff commands get evaluated first
2227	2231	self.activate(consumer)

2237	2241	stderr.write("Unknown format so defaulting to a lynx dump...\n")
2238	2242	text = DocLifter.lynxprep(text, self.troff)
2239	2243	self.activate(ManInterpreter)
2240		if timing_verbosity in self.verbose:
	2244	if timingVerbosity in self.verbose:
2241	2245	now = time.time()
2242	2246	stderr.write("timing: macro identification = %2.2f\n" % (now-self.basetime,))
2243	2247	self.basetime = now

2251	2255	text = text.replace(ugly, entity)
2252	2256	# Allow the interpreters to preprocess the output.
2253	2257	for interpreter in self.interpreters:
2254		self.listbreak_set \|= interpreter.listbreak_set
2255		self.scoped_set \|= interpreter.scoped_set
2256		self.ignore_set \|= interpreter.ignore_set
2257		self.complain_set \|= interpreter.complain_set
	2258	self.listbreakSet \|= interpreter.listbreakSet
	2259	self.scopedSet \|= interpreter.scopedSet
	2260	self.ignoreSet \|= interpreter.ignoreSet
	2261	self.complainSet \|= interpreter.complainSet
2258	2262	text = interpreter.preprocess(text)
2259		if timing_verbosity in self.verbose:
	2263	if timingVerbosity in self.verbose:
2260	2264	now = time.time()
2261	2265	stderr.write("timing: preprocessing = %2.2f\n" % (now-self.basetime,))
2262	2266	self.basetime = now
2263	2267	# Split it into lines
2264	2268	toplevel = text.split("\n")
2265	2269	# Check for pure inclusions
2266		def is_inclusion(x):
	2270	def isInclusion(x):
2267	2271	return x[:4] == TroffInterpreter.ctrl + "so "
2268		nonempty = [x for x in toplevel if x != "" and not is_comment(x)]
2269		if len(nonempty) == 1 and is_inclusion(nonempty[0]):
	2272	nonempty = [x for x in toplevel if x != "" and not isComment(x)]
	2273	if len(nonempty) == 1 and isInclusion(nonempty[0]):
2270	2274	raise LiftException(self, "see " + nonempty[0].strip()[4:], 2)
2271		elif len(nonempty) > 1 and len(list(filter(is_inclusion, nonempty))) == len(nonempty):
	2275	elif len(nonempty) > 1 and len(list(filter(isInclusion, nonempty))) == len(nonempty):
2272	2276	raise LiftException(self, "consists entirely of inclusions", 2)
2273	2277	# If it's not a pure inclusion, warn if we don't have a macro set.
2274	2278	if len(self.interpreters) == 1:

2289	2293	# Is there any text at all in this file?
2290	2294	textcount = 0
2291	2295	for line in toplevel:
2292		if not is_command(line) and not is_comment(line):
	2296	if not isCommand(line) and not isComment(line):
2293	2297	textcount += 1
2294	2298	if textcount == 0:
2295	2299	raise LiftException(self, "warning: page has no text")
2296	2300	# Plant a sentinel at the end to force paragraph and list closes
2297	2301	i = -1
2298		if not toplevel[i] and is_comment(toplevel[i]):
	2302	if not toplevel[i] and isComment(toplevel[i]):
2299	2303	i -= 1
2300	2304	toplevel.insert(len(toplevel)-i, TroffInterpreter.ctrl + "end")
2301	2305	# Emit the top-level tag, with an id that will direct the

2303	2307	if self.toptag:
2304	2308	if self.file != "stdin":
2305	2309	if self.docbook5:
2306		self.emit("<%s xmlns='http://docbook.org/ns/docbook' version='5.0' xml:lang='en' xml:id='%s'>" % (self.toptag, self.make_id_from_title(os.path.basename(cfile))))
	2310	self.emit("<%s xmlns='http://docbook.org/ns/docbook' version='5.0' xml:lang='en' xml:id='%s'>" % (self.toptag, self.makeIdFromTitle(os.path.basename(cfile))))
2307	2311	else:
2308		self.emit("<%s id='%s'>" % (self.toptag, self.make_id_from_title(os.path.basename(cfile))))
	2312	self.emit("<%s id='%s'>" % (self.toptag, self.makeIdFromTitle(os.path.basename(cfile))))
2309	2313	else:
2310	2314	self.emit("<%s>" % self.toptag)
2311		if timing_verbosity in self.verbose:
	2315	if timingVerbosity in self.verbose:
2312	2316	now = time.time()
2313	2317	stderr.write("timing: preparation = %2.2f\n" % (now-self.basetime,))
2314	2318	self.basetime = now
2315	2319	# Now interpret all the commands in the block
2316	2320	self.lineno = len(toplevel) + self.lineno
2317		self.interpret_block(toplevel)
2318		if timing_verbosity in self.verbose:
	2321	self.interpretBlock(toplevel)
	2322	if timingVerbosity in self.verbose:
2319	2323	now = time.time()
2320	2324	stderr.write("timing: block interpretation = %2.2f\n" % (now-self.basetime,))
2321	2325	self.basetime = now
2322	2326	# Wrap it up
2323		self.pop_section(1)
	2327	self.popSection(1)
2324	2328	for interpreter in self.interpreters:
2325	2329	if hasattr(interpreter, "wrapup"):
2326	2330	interpreter.wrapup()
2327	2331	if self.toptag:
2328	2332	self.emit("</%s>\n" % self.toptag)
2329		if timing_verbosity in self.verbose:
	2333	if timingVerbosity in self.verbose:
2330	2334	now = time.time()
2331	2335	stderr.write("timing: wrapup = %2.2f\n" % (now-self.basetime,))
2332	2336	self.basetime = now
2333	2337	# Close paragraphs properly. Note: we're going to run
2334	2338	# all the lines together for this and split them up
2335		# again afterwards. Because body_start is a line index,
	2339	# again afterwards. Because bodyStart is a line index,
2336	2340	# we have to not insert or delete lines here.
2337	2341	before = "\n".join(self.output)
2338		after = self.close_tags(before, "para", tight=True)
2339		after = self.close_tags(after, "literallayout", tight=False)
	2342	after = self.closeTags(before, "para", tight=True)
	2343	after = self.closeTags(after, "literallayout", tight=False)
2340	2344	self.output = after.split("\n")
2341		if timing_verbosity in self.verbose:
	2345	if timingVerbosity in self.verbose:
2342	2346	now = time.time()
2343	2347	stderr.write("timing: tag closing = %2.2f\n" % (now-self.basetime,))
2344	2348	self.basetime = now

2346	2350	# In that case return None to suppress output
2347	2351	if not self.interpreters and not [x for x in self.output if x[:4]=="<!--" or blankline.match(x)]:
2348	2352	return None
2349		if timing_verbosity in self.verbose:
	2353	if timingVerbosity in self.verbose:
2350	2354	now = time.time()
2351	2355	stderr.write("timing: emptiness check = %2.2f\n" % (now-self.basetime,))
2352	2356	self.basetime = now
2353	2357	# Time for post-translations
2354	2358	self.highlight = "R"
2355		if highlight_verbosity in self.verbose:
	2359	if highlightVerbosity in self.verbose:
2356	2360	self.filewarn("Highlight resolution begins")
2357		for j in range(self.body_start, len(self.output)):
2358		if highlight_verbosity in self.verbose:
	2361	for j in range(self.bodyStart, len(self.output)):
	2362	if highlightVerbosity in self.verbose:
2359	2363	self.notify("Before: " + self.output[j])
2360		self.output[j] = self.hack_translations(self.output[j])
2361		if highlight_verbosity in self.verbose:
	2364	self.output[j] = self.hackTranslations(self.output[j])
	2365	if highlightVerbosity in self.verbose:
2362	2366	self.notify("After: " + self.output[j])
2363		if highlight_verbosity in self.verbose:
	2367	if highlightVerbosity in self.verbose:
2364	2368	self.filewarn("Highlight resolution ends")
2365		if timing_verbosity in self.verbose:
	2369	if timingVerbosity in self.verbose:
2366	2370	now = time.time()
2367	2371	stderr.write("timing: translation hacks = %2.2f\n" % (now-self.basetime,))
2368	2372	self.basetime = now
2369		for j in range(self.body_start, len(self.output)):
2370		self.output[j] = self.lift_link(self.output[j])
2371		if timing_verbosity in self.verbose:
	2373	for j in range(self.bodyStart, len(self.output)):
	2374	self.output[j] = self.liftLink(self.output[j])
	2375	if timingVerbosity in self.verbose:
2372	2376	now = time.time()
2373	2377	stderr.write("timing: link lifting = %2.2f\n" % (now-self.basetime,))
2374	2378	self.basetime = now
2375	2379	# OK, now do pattern-based markup lifting on the DocBook markup
2376		head = "\n".join(self.output[:self.body_start]) + "\n"
2377		body = "\n".join(self.output[self.body_start:]) + "\n"
	2380	head = "\n".join(self.output[:self.bodyStart]) + "\n"
	2381	body = "\n".join(self.output[self.bodyStart:]) + "\n"
2378	2382	self.output = []
2379		for (pattern, substitute) in DocLifter.post_translation_patterns:
	2383	for (pattern, substitute) in DocLifter.postTranslationPatterns:
2380	2384	body = pattern.sub(substitute, body)
2381		for (r, s) in DocLifter.lift_highlights:
	2385	for (r, s) in DocLifter.liftHighlights:
2382	2386	ender = s.split()[0] # discard attributes
2383	2387	body = r.sub(r"<%s>\1</%s>" % (s, ender), body)
2384		for (pattern, substitute) in DocLifter.post_lift_patterns:
	2388	for (pattern, substitute) in DocLifter.postLiftPatterns:
2385	2389	body = pattern.sub(substitute, body)
2386	2390	# Semantic lifting based on the hints dictionary
2387	2391	text = head + self.localhints.apply(globalhints.apply(body))
2388		if timing_verbosity in self.verbose:
	2392	if timingVerbosity in self.verbose:
2389	2393	now = time.time()
2390	2394	stderr.write("timing: semantic lifting = %2.2f\n" % (now-self.basetime,))
2391	2395	self.basetime = now

2400	2404	# This avoids error messages based on (for example) untraversed
2401	2405	# branches in .if and .ie constructs.
2402	2406	badescapes = []
2403		commentless = re_compile("<!-- .* -->").sub("", text)
	2407	commentless = reCompile("<!-- .* -->").sub("", text)
2404	2408	if "<!-- .ig" in commentless:
2405	2409	lines = commentless.split("\n")
2406	2410	state = 0

2449	2453	# Optimize comments
2450	2454	text = text.replace(" -->\n<!-- ", "\n")
2451	2455	# We're done
2452		if timing_verbosity in self.verbose:
	2456	if timingVerbosity in self.verbose:
2453	2457	now = time.time()
2454	2458	self.filewarn("timing: post-processing = %2.2f\n" % (now-self.basetime,))
2455	2459	self.basetime = now
2456	2460	preamble = ""
2457		preamble = "<?xml version=\"1.0\" encoding=\"%s\"?>\n" % self.out_encoding
	2461	preamble = "<?xml version=\"1.0\" encoding=\"%s\"?>\n" % self.outEncoding
2458	2462	# Are there any entity references in this document?
2459	2463	entities = ""
2460		for (entity, uni) in DocLifter.pseudo_entities + tuple(self.localentities):
	2464	for (entity, uni) in DocLifter.pseudoEntities + tuple(self.localentities):
2461	2465	if text.find("&" + entity + ";") > -1:
2462	2466	entities += "<!ENTITY %s '%s'>\n" % (entity, uni)
2463	2467	# FIXME: if docbook5 is on, inclusions won't work.

2470	2474	if entities or self.inclusions or "<mml:m" in text:
2471	2475	preamble += " [\n"
2472	2476	if "<mml:m" in text:
2473		preamble += mathml_entities
	2477	preamble += mathmlEntities
2474	2478	for (entity, ifile) in self.inclusions:
2475	2479	preamble += "<!ENTITY %s SYSTEM '%s'>\n" % (entity, ifile)
2476	2480	preamble += entities

2482	2486	# If there were accumulated errors during processing, time to bail.
2483	2487	if self.errorcount:
2484	2488	raise LiftException(self, "there were %d errors during translation" % self.errorcount)
2485		if timing_verbosity in self.verbose:
	2489	if timingVerbosity in self.verbose:
2486	2490	now = time.time()
2487	2491	self.filewarn("timing: document generation = %2.2f\n" % (now-self.basetime,))
2488	2492	self.basetime = now

2515	2519	# (3) re-encode the Unicode to a byte stream, because our framework
2516	2520	# code is expecting that.
2517	2521	try:
2518		text = text.encode(binary_encoding)
	2522	text = text.encode(binaryEncoding)
2519	2523	except UnicodeDecodeError:
2520	2524	pass # Failure means we're in Oython 2
2521		for encoding in self.in_encodings:
	2525	for encoding in self.inEncodings:
2522	2526	try:
2523	2527	text = text.decode(encoding)
2524	2528	break

2527	2531	else:
2528	2532	self.troff.lineno = 0
2529	2533	raise LiftException(self, "Unicode decoding error")
2530		return text.encode(self.out_encoding)
	2534	return text.encode(self.outEncoding)
2531	2535
2532	2536	class TroffInterpreter:
2533	2537	"Interpret troff requests (does macroexpansion)."
2534	2538	name = "troff"
2535	2539	exclusive = False
2536	2540	toptag = ""
2537		immutable_set = set([])
2538		ignore_set = set([
	2541	immutableSet = set([])
	2542	ignoreSet = set([
2539	2543	# Ignore .. outside macro context, some people use it as a no-op.
2540	2544	".",
2541	2545	# Just ignore most font/character-size/page controls

2565	2569	# We can't do anything with the groff defcolor or it requests, alas.
2566	2570	"defcolor", "it",
2567	2571	])
2568		complain_set = set([
	2572	complainSet = set([
2569	2573	# Complain about stuff that produces gross motions.
2570	2574	"ne","mk","rt","ce",
2571	2575	# We could do much of section 10, but these are a sign of trouble.

2573	2577	# We can't handle environments, insertions, or next file.
2574	2578	"ev","rd","nx","pi",
2575	2579	])
2576		parabreak_set = set(["bp","ti"])
2577		sectionbreak_set = set([])
2578		listbreak_set = set([])
2579		scoped_set = set([])
2580		entity_translations = (
	2580	parabreakSet = set(["bp","ti"])
	2581	sectionbreakSet = set([])
	2582	listbreakSet = set([])
	2583	scopedSet = set([])
	2584	entityTranslations = (
2581	2585	# The entire list of characters described in the groff/troff reference
2582	2586	# is included here. Where there is no ISO equivalent, the second
2583	2587	# member will be an alias for a Unicode literal. This transformation

3038	3042	(r"o'Ao'", "Å"), # ISOlat1
3039	3043	(r"o'ao'", "å"), # ISOlat1
3040	3044	)
3041		xmlify_patterns = [(re_compile(x[0]), x[1]) for x in (
	3045	xmlifyPatterns = [(reCompile(x[0]), x[1]) for x in (
3042	3046	# Order of these & substitutions is significant.
3043	3047	# These have to go early, otherwise you mess up tags
3044	3048	# generated by requests.

3052	3056	# Blank lines go away
3053	3057	(r'^\.\\"\s+-*$', ""),
3054	3058	)]
3055		prefix_lifts = [(re_compile("%s <emphasis remap='[BI]'>([^<]+)</emphasis>" % x[0]), r"file <%s>\1</%s>" % (x[1], x[1])) for x in (
	3059	prefixLifts = [(reCompile("%s <emphasis remap='[BI]'>([^<]+)</emphasis>" % x[0]), r"file <%s>\1</%s>" % (x[1], x[1])) for x in (
3056	3060	("file", "filename"),
3057	3061	("command", "command"),
3058	3062	("option", "option"),

3060	3064
3061	3065	# These are interpreter state which can be modified by the .cc command
3062	3066	ctrl = "."
3063		ctrl_nobreak = "'"
	3067	ctrlNobreak = "'"
3064	3068
3065	3069	def __init__(self, source, verbose):
3066	3070	self.source = source

3074	3078	self.linenos = [] # Line number stack (only used in error msgs)
3075	3079	self.nf = False # Initially we're filling and adjusting
3076	3080	self.screen = False # Initially we're not in a screen context
3077		self.in_block = False # Initially we're not in a block context
	3081	self.inBlock = False # Initially we're not in a block context
3078	3082	self.ifstack = []
3079	3083	self.evaldepth = ""
3080	3084	self.longnames = []
3081		self.groff_features = []
3082		self.nonportable_features = []
3083		self.entities_from_strings = False
3084		self.ignore_outdent = None
	3085	self.groffFeatures = []
	3086	self.nonportableFeatures = []
	3087	self.entitiesFromStrings = False
	3088	self.ignoreOutdent = None
3085	3089	self.registers = { # Register table for nr, rr, rnn
3086	3090	".g": '0',
3087	3091	".$": lambda: str(len([x for x in self.macroargs and self.macroargs[-1] if x])),

3092	3096	if prefix not in instance.translations:
3093	3097	instance.translations[prefix] = []
3094	3098	instance.translations[prefix].append((glyph, entity))
3095		for (glyph, entity) in TroffInterpreter.entity_translations:
	3099	for (glyph, entity) in TroffInterpreter.entityTranslations:
3096	3100	if glyph.startswith("o'"):
3097	3101	tmerge(TroffInterpreter, "\\o", r"\%s" % glyph, entity)
3098	3102	else:

3104	3108	if len(glyph) >= 2:
3105	3109	tmerge(TroffInterpreter, "\\[", r"\[%s]" % glyph,entity)
3106	3110
3107		def expand_strings(self, line):
	3111	def expandStrings(self, line):
3108	3112	"Expand strings in the given line."
3109	3113	if '\\' not in line:
3110	3114	return line

3126	3130	line = line.replace(r"\$@", " ".join(['"%s"' % s for s in self.macroargs[-1]]))
3127	3131	return line
3128	3132
3129		def eval_register(self, key):
	3133	def evalRegister(self, key):
3130	3134	val = self.registers[key]
3131	3135	if type(val) in (type(""), type(0)):
3132	3136	return val
3133	3137	else:
3134	3138	return apply(val)
3135	3139
3136		def eval_term(self, exp):
	3140	def evalTerm(self, exp):
3137	3141	"Pop a term off an expression string and evaluate it."
3138	3142	if exp == "":
3139	3143	return ("", "")

3236	3240	register = exp[0]
3237	3241	if register in self.registers:
3238	3242	if increment is not None:
3239		self.registers[register] = str(int(self.eval_register(register)) + increment)
3240		return (self.eval_register(register), exp[end:])
	3243	self.registers[register] = str(int(self.evalRegister(register)) + increment)
	3244	return (self.evalRegister(register), exp[end:])
3241	3245	else:
3242	3246	if increment is not None:
3243	3247	self.registers[register] = str(increment)

3291	3295	return (v, expcopy)
3292	3296	# Some groff pages actually use this!
3293	3297	elif exp.startswith(r"\B'") and "'" in exp[3:]:
3294		self.groff_features.append(r"\B")
	3298	self.groffFeatures.append(r"\B")
3295	3299	end = 3 + exp[3:].find("'")
3296	3300	if re.compile("[0-9]+$").match(exp[3:end]):
3297	3301	return ('1', exp[:end+1])

3324	3328	end = -1
3325	3329	if end == -1:
3326	3330	self.source.error("unterminated parenthetical %s" % exp)
3327		return (self.eval_expr(exp[1:end]), exp[end+1:])
	3331	return (self.evalExpr(exp[1:end]), exp[end+1:])
3328	3332	# Maybe we couldn't do anything with it.
3329	3333	else:
3330	3334	return ("", exp)
3331	3335
3332		def eval_expr(self, exp):
	3336	def evalExpr(self, exp):
3333	3337	"Evaluate expressions for use in groff conditionals."
3334		if macro_verbosity in self.source.verbose:
	3338	if macroVerbosity in self.source.verbose:
3335	3339	self.evaldepth += " "
3336		self.source.notify("%s->eval_expr(%s)" % (self.evaldepth, exp))
	3340	self.source.notify("%s->evalExpr(%s)" % (self.evaldepth, exp))
3337	3341	# Accept ! prefix
3338	3342	if exp and exp[0] == "!":
3339		v = self.eval_expr(exp[1:])
	3343	v = self.evalExpr(exp[1:])
3340	3344	v = str(int(len(v)==0 or v == '0'))
3341	3345	elif exp and exp[0] == "(":
3342		v = self.eval_expr(exp[1:-1])
	3346	v = self.evalExpr(exp[1:-1])
3343	3347	else:
3344	3348	# Expression can consist of a single term only
3345		(head, tail) = self.eval_term(exp)
3346		if macro_verbosity in self.source.verbose:
	3349	(head, tail) = self.evalTerm(exp)
	3350	if macroVerbosity in self.source.verbose:
3347	3351	self.source.notify("%s(hd, tail)=(%s, %s)" % (self.evaldepth,head, tail))
3348	3352	if tail == "":
3349	3353	v = head
3350	3354	# Arithmetic
3351	3355	elif tail.startswith("+"):
3352		v = repr(int(head) + int(self.eval_expr(tail[1:])))
	3356	v = repr(int(head) + int(self.evalExpr(tail[1:])))
3353	3357	elif tail.startswith("-"):
3354		v = repr(int(head) - int(self.eval_expr(tail[1:])))
	3358	v = repr(int(head) - int(self.evalExpr(tail[1:])))
3355	3359	elif tail.startswith("*"):
3356		v = repr(int(head) * int(self.eval_expr(tail[1:])))
	3360	v = repr(int(head) * int(self.evalExpr(tail[1:])))
3357	3361	elif tail.startswith("/"):
3358		v = repr(int(head) / int(self.eval_expr(tail[1:])))
	3362	v = repr(int(head) / int(self.evalExpr(tail[1:])))
3359	3363	elif tail.startswith("%"):
3360		v = repr(int(head) % int(self.eval_expr(tail[1:])))
	3364	v = repr(int(head) % int(self.evalExpr(tail[1:])))
3361	3365	# Logical operators
3362	3366	elif tail.startswith("&"):
3363		v = repr(int(int(head) and int(self.eval_expr(tail[5:]))))
	3367	v = repr(int(int(head) and int(self.evalExpr(tail[5:]))))
3364	3368	elif tail.startswith(":"):
3365		v = repr(int(int(head) or int(self.eval_expr(tail[5:]))))
	3369	v = repr(int(int(head) or int(self.evalExpr(tail[5:]))))
3366	3370	# Relationals
3367	3371	elif tail.startswith("=="):
3368		v = repr(int(int(head) == int(self.eval_expr(tail[2:]))))
	3372	v = repr(int(int(head) == int(self.evalExpr(tail[2:]))))
3369	3373	elif tail.startswith("="):
3370		v = repr(int(int(head) == int(self.eval_expr(tail[1:]))))
	3374	v = repr(int(int(head) == int(self.evalExpr(tail[1:]))))
3371	3375	elif tail.startswith(">"):
3372		v = repr(int(int(head) > int(self.eval_expr(tail[4:]))))
	3376	v = repr(int(int(head) > int(self.evalExpr(tail[4:]))))
3373	3377	elif tail.startswith("<"):
3374		v = repr(int(int(head) < int(self.eval_expr(tail[4:]))))
	3378	v = repr(int(int(head) < int(self.evalExpr(tail[4:]))))
3375	3379	elif tail.startswith(">="):
3376		v = repr(int(int(head) >= int(self.eval_expr(tail[5:]))))
	3380	v = repr(int(int(head) >= int(self.evalExpr(tail[5:]))))
3377	3381	elif tail.startswith("<="):
3378		v = repr(int(int(head) <= int(self.eval_expr(tail[5:]))))
	3382	v = repr(int(int(head) <= int(self.evalExpr(tail[5:]))))
3379	3383	# Max/min
3380	3384	elif tail.startswith(">?"):
3381		v = repr(max(int(head), int(self.eval_expr(tail[5:]))))
	3385	v = repr(max(int(head), int(self.evalExpr(tail[5:]))))
3382	3386	elif tail.startswith("<?"):
3383		v = repr(min(int(head), int(self.eval_expr(tail[5:]))))
	3387	v = repr(min(int(head), int(self.evalExpr(tail[5:]))))
3384	3388	# We don't know what's going on, just call it true.
3385	3389	else:
3386	3390	self.source.error("bogus-looking expression %s" % exp)
3387	3391	v = '1'
3388		if macro_verbosity in self.source.verbose:
3389		self.source.notify("%s<-eval_expr(%s) -> %s" % (self.evaldepth, exp, v))
	3392	if macroVerbosity in self.source.verbose:
	3393	self.source.notify("%s<-evalExpr(%s) -> %s" % (self.evaldepth, exp, v))
3390	3394	self.evaldepth = self.evaldepth[:-1]
3391	3395	return v
3392	3396
3393	3397	def skiptoend(self, tokens):
3394	3398	"Skip command lines in a conditional arm we're going to ignore"
3395		if macro_verbosity in self.verbose:
	3399	if macroVerbosity in self.verbose:
3396	3400	self.source.notify("skiptoend(%s) started" % repr(tokens))
3397	3401	wholeline = "".join(tokens)
3398	3402	brace = wholeline.find("{")

3406	3410	elsedepth -= 1
3407	3411	if elsedepth == 0:
3408	3412	break
3409		if macro_verbosity in self.verbose:
	3413	if macroVerbosity in self.verbose:
3410	3414	self.source.notify("skiptoend(%s) finished" % repr(tokens))
3411	3415
3412	3416	def interpret(self, line, tokens, _):
3413	3417	command = tokens[0][1:]
3414	3418	args = tokens[1:]
3415	3419	if len(command) > 2:
3416		self.nonportable_features.append(command)
	3420	self.nonportableFeatures.append(command)
3417	3421	# .nl is apparently an undocumented synonym for .br in groff(1).
3418	3422	if command == "br" or command == "nl":
3419		if self.source.in_synopsis():
	3423	if self.source.inSynopsis():
3420	3424	self.source.emit("<sbr/>")
3421		elif self.source.body_section() and not self.nf:
	3425	elif self.source.bodySection() and not self.nf:
3422	3426	self.source.paragraph()
3423	3427	elif command == "ti":
3424		if self.source.in_synopsis():
	3428	if self.source.inSynopsis():
3425	3429	pass
3426	3430	elif self.source.troff.macronames:
3427	3431	self.source.warning(".ti seen in macro body")

3436	3440	self.source.emit(self.source.popline())
3437	3441	self.source.emit("</para></blockquote>")
3438	3442	elif command == "in":
3439		if self.source.in_synopsis():
	3443	if self.source.inSynopsis():
3440	3444	pass
3441	3445	# Hacky way of dealing with
3442	3446	# .in +4n

3458	3462	# emitting a warning about an unstructured .in call. This
3459	3463	# case fiores on a lot of the Linux core manpages.
3460	3464	if self.source.peekline().startswith(".EX") and args and args[0].startswith("+"):
3461		self.ignore_outdent = args[0]
	3465	self.ignoreOutdent = args[0]
3462	3466	self.source.passthrough([command] + args)
3463	3467	# Hacky way of dealing with
3464	3468	# .in +4n

3467	3471	# it as though the .nf had come first. This way we avoid
3468	3472	# emitting a warning about an unstructured .in call.
3469	3473	elif self.source.peekline().startswith(".nf") and args and args[0].startswith("+"):
3470		self.ignore_outdent = args[0]
	3474	self.ignoreOutdent = args[0]
3471	3475	self.source.passthrough([command] + args)
3472	3476	# Some .in pairs associated with displays can be ignored
3473		elif self.source.troff.nf and args and self.ignore_outdent is None:
3474		self.ignore_outdent = args[0]
3475		elif self.ignore_outdent is not None:
	3477	elif self.source.troff.nf and args and self.ignoreOutdent is None:
	3478	self.ignoreOutdent = args[0]
	3479	elif self.ignoreOutdent is not None:
3476	3480	if not args:
3477	3481	pass # bare .in outdenting a display
3478		elif args[0][0]=='-' and args[0][1:]==self.ignore_outdent[1:]:
	3482	elif args[0][0]=='-' and args[0][1:]==self.ignoreOutdent[1:]:
3479	3483	pass # matching outdent
3480	3484	else:
3481		self.source.warning("closing %s %s doesn't match opener %s" % (command, args.join(" "), self.ignore_outdent))
	3485	self.source.warning("closing %s %s doesn't match opener %s" % (command, args.join(" "), self.ignoreOutdent))
3482	3486	self.source.passthrough([command] + args)
3483		self.ignore_outdent = None
3484		elif not self.source.in_synopsis():
	3487	self.ignoreOutdent = None
	3488	elif not self.source.inSynopsis():
3485	3489	if args[0] == "+4" and not self.rsin:
3486	3490	self.source.emit(".RS")
3487	3491	self.rsin = True

3521	3525	self.source.passthrough(self.source.popline().split())
3522	3526	# Decompile ad-hoc tables. But don't be fooled by C source listings
3523	3527	# that happen to have embedded tabs.
3524		if not c_source_re.search(self.source.peekline()) \
	3528	if not cSourceRe.search(self.source.peekline()) \
3525	3529	and '\t' in self.source.peekline():
3526	3530	# Aha, it's a table line
3527	3531	table = []
3528	3532	maxtabs = 0
3529	3533	while True:
3530	3534	bodyline = self.source.peekline()
3531		if match_command(bodyline, "br"):
	3535	if matchCommand(bodyline, "br"):
3532	3536	self.source.popline()
3533		elif '\t' in bodyline and not is_command(bodyline):
	3537	elif '\t' in bodyline and not isCommand(bodyline):
3534	3538	if len(args) == 1:
3535	3539	# Wacky special case. Some pages, like
3536	3540	# enscript.1, have an ad-hoc table intended

3548	3552	# Queue up the generated table to be turned into DocBook.
3549	3553	self.source.lines = table + self.source.lines
3550	3554	else:
3551		if not self.source.in_synopsis() and not parseable and [s for s in self.source.lines if '\t' in s]:
	3555	if not self.source.inSynopsis() and not parseable and [s for s in self.source.lines if '\t' in s]:
3552	3556	self.source.warning("uninterpretable .ta seen in body section")
3553	3557	self.source.passthrough([command] + args)
3554	3558	elif command == "sp":
3555	3559	# Treat this as a paragraph break in body text.
3556		if self.source.body_section() and not self.nf:
3557		self.source.end_paragraph(label="sp")
	3560	if self.source.bodySection() and not self.nf:
	3561	self.source.endParagraph(label="sp")
3558	3562	# Always insert the space, it can't hurt and may help
3559	3563	# (e.g in function synopses).
3560	3564	lines = 1

3565	3569	pass
3566	3570	for i in range(lines):
3567	3571	self.source.diversion.append("")
3568		if self.source.body_section() and not self.nf:
3569		self.source.need_para = True
	3572	if self.source.bodySection() and not self.nf:
	3573	self.source.needPara = True
3570	3574	elif command == "Sp" and "Sp" not in self.longnames:
3571	3575	# Catches a common error. See for example mono.1
3572	3576	self.source.pushline(".sp")
3573	3577	elif command == "bp":
3574		self.nonportable_features.append("bp")
	3578	self.nonportableFeatures.append("bp")
3575	3579	if self.nf:
3576	3580	# Breaking up display markup causes too many problems
3577	3581	# at validation time to be worth it.

3592	3596	if args and args[0]!="R" and self.source.peekline():
3593	3597	# skip any number of things like .in
3594	3598	while self.source.ignorable(self.source.peekline(), nocomplaints=1):
3595		self.source.emit(make_comment(self.source.popline()))
	3599	self.source.emit(makeComment(self.source.popline()))
3596	3600	if self.source.peekline()[1:3] == "nf":
3597	3601	self.source.popline()
3598		self.source.end_paragraph(label="ft")
	3602	self.source.endParagraph(label="ft")
3599	3603	if self.source.quiet:
3600	3604	trailer = ""
3601	3605	else:

3622	3626	while blankline.match(self.source.peekline()):
3623	3627	self.source.popline()
3624	3628	while self.source.ignorable(self.source.peekline(), nocomplaints=1):
3625		self.source.emit(make_comment(self.source.popline()))
	3629	self.source.emit(makeComment(self.source.popline()))
3626	3630	nextl = self.source.peekline()
3627	3631	if nextl and nextl[0:3] == TroffInterpreter.ctrl + "ft":
3628	3632	fontswitch = nextl

3644	3648	else:
3645	3649	self.source.emit(r"\fR</literallayout>" + trailer)
3646	3650	self.nf = False
3647		self.source.need_paragraph()
	3651	self.source.needParagraph()
3648	3652	elif command in ("nf", "NF"): # .NF is an oddly common typo
3649		self.source.end_paragraph(label="nf")
	3653	self.source.endParagraph(label="nf")
3650	3654	if self.source.peekline() == TroffInterpreter.ctrl + "ft CW":
3651	3655	self.source.popline()
3652		self.source.end_paragraph()
	3656	self.source.endParagraph()
3653	3657	self.source.emit("<screen remap='.nf .ft CW'>")
3654	3658	self.screen = True
3655	3659	self.nf = True
3656	3660	else:
3657	3661	self.source.emit("<literallayout remap='.nf'>")
3658		self.source.end_paragraph()
	3662	self.source.endParagraph()
3659	3663	self.nf = True
3660	3664	elif command in ("ul", "cu"):
3661		self.nonportable_features.append(command)
	3665	self.nonportableFeatures.append(command)
3662	3666	if args:
3663	3667	try:
3664	3668	forlines = int(args)

3670	3674	for i in range(min(forlines, len(self.source.lines))):
3671	3675	self.source.lines[i] = r"\fU" + self.source.lines[i] + r"\fP"
3672	3676	elif command == "tr":
3673		self.nonportable_features.append("tr")
3674		args[0] = self.source.expand_entities(args[0])
	3677	self.nonportableFeatures.append("tr")
	3678	args[0] = self.source.expandEntities(args[0])
3675	3679	while True:
3676		frompart = get_xml_char(args[0])
	3680	frompart = getXmlChar(args[0])
3677	3681	args[0] = args[0][len(frompart):]
3678		topart = get_xml_char(args[0])
	3682	topart = getXmlChar(args[0])
3679	3683	args[0] = args[0][len(topart):]
3680	3684	if not frompart:
3681	3685	break

3687	3691	for (special, translation) in interpreter.translations[prefix]:
3688	3692	if topart == special:
3689	3693	topart = translation
3690		if general_verbosity in self.source.verbose:
	3694	if generalVerbosity in self.source.verbose:
3691	3695	self.source.notify("tr: %s -> %s" % (frompart, topart))
3692	3696	self.source.outsubst.append((frompart, topart))
3693	3697	elif command == "tm":
3694	3698	stderr.write(" ".join(args) + "\n")
3695		elif command == "mso" and args[0] in mso_dispatch:
3696		self.source.activate(mso_dispatch[args[0]])
	3699	elif command == "mso" and args[0] in msoDispatch:
	3700	self.source.activate(msoDispatch[args[0]])
3697	3701	elif command in ("so", "mso"):
3698	3702	# Should we report mso as a groff feature? Unclear...
3699	3703	# non-groff implementations are supposed to ignore these.

3721	3725	try:
3722	3726	text = self.preprocess(open(path).read())
3723	3727	lines = list(map(string.rstrip, text.split("\n")))
3724		if [x for x in lines if x and not (is_comment(x) or is_command(x))]:
	3728	if [x for x in lines if x and not (isComment(x) or isCommand(x))]:
3725	3729	self.source.warning(ifile + " contains text -- generating entity reference.")
3726	3730	path = None
3727	3731	else:
3728		if general_verbosity in self.verbose:
	3732	if generalVerbosity in self.verbose:
3729	3733	self.source.notify("including" + path)
3730	3734	lines = ["<!-- * start include from %s * -->" % path] \
3731	3735	+ lines \

3771	3775	value = line[i:]
3772	3776	value = value.replace(r"\'", "'")
3773	3777	value = value.replace("'", "'")
3774		if self.source.in_preamble and self.entities_from_strings:
	3778	if self.source.inPreamble and self.entitiesFromStrings:
3775	3779	self.source.localentities.append((stringname, value))
3776	3780	self.strings[stringname] = "&" + stringname + ";"
3777	3781	else:

3792	3796	newname = tokens[2]
3793	3797	suppressed = False
3794	3798	for interpreter in self.source.interpreters:
3795		if oldname in interpreter.immutable_set:
	3799	if oldname in interpreter.immutableSet:
3796	3800	suppressed = True
3797	3801	break
3798	3802	if suppressed:

3807	3811	self.macros[newname] = self.macros[oldname]
3808	3812	del self.macros[oldname]
3809	3813	elif command == "em":
3810		self.nonportable_features.append("em")
	3814	self.nonportableFeatures.append("em")
3811	3815	if len(tokens) == 1:
3812	3816	self.macroend = ".."
3813	3817	else:
3814	3818	self.macroend = tokens[1]
3815	3819	elif command in ("de", "de1", "am"):
3816		if macro_verbosity in self.verbose:
	3820	if macroVerbosity in self.verbose:
3817	3821	self.source.notify("macro definition begins")
3818	3822	name = tokens[1]
3819	3823	if len(name) > 2:

3827	3831	isused = [x for x in self.source.lines if type(x) == type("") and x[0:len(name)+1]==TroffInterpreter.ctrl+name]
3828	3832	suppressed = False
3829	3833	for interpreter in self.source.interpreters:
3830		if name in interpreter.immutable_set:
	3834	if name in interpreter.immutableSet:
3831	3835	suppressed = True
3832	3836	break
3833	3837	# We don't want macro listings showing up in the Synopsis section.
3834	3838	# They play hell with the Synopsis parser...
3835		listing = isused and self.source.body_section() and not suppressed and not self.source.quiet
	3839	listing = isused and self.source.bodySection() and not suppressed and not self.source.quiet
3836	3840	if listing:
3837	3841	self.source.emit("<!-- Macro definition:")
3838	3842	self.source.emit("%s %s" % (command, name))

3845	3849	if linetoks:
3846	3850	for interpreter in self.source.interpreters:
3847	3851	command = linetoks[0][1:]
3848		if command in interpreter.ignore_set:
	3852	if command in interpreter.ignoreSet:
3849	3853	line += '\t.\\" IGNORED'
3850		elif command in interpreter.complain_set:
	3854	elif command in interpreter.complainSet:
3851	3855	line += '\t.\\" IGNORED'
3852	3856	if not suppressed:
3853	3857	self.source.warning("untranslatable %s in %s definition" % (command, name))

3865	3869	self.source.emit("-->", trans=0)
3866	3870	if suppressed:
3867	3871	del self.macros[name]
3868		self.source.emit(make_comment("%s listing suppressed (immutable)"%name))
	3872	self.source.emit(makeComment("%s listing suppressed (immutable)"%name))
3869	3873	elif not isused:
3870		self.source.emit(make_comment("%s listing suppressed (not used)"%name))
	3874	self.source.emit(makeComment("%s listing suppressed (not used)"%name))
3871	3875	# OK, now perform macro reduction. Recognize macros that are
3872	3876	# presentation-level hacks around various standard constructs
3873	3877	# that we want to be able to recognize and elide.
3874	3878	for interpreter in self.source.interpreters:
3875	3879	if hasattr(interpreter, "reductions"):
3876		list(map(lambda x: self.conditionally_replace(x[0], x[1]), list(interpreter.reductions.items())))
	3880	list(map(lambda x: self.conditionallyReplace(x[0], x[1]), list(interpreter.reductions.items())))
3877	3881	# Implementation of numeric registers
3878	3882	elif command == "nr":
3879	3883	reg = args[0]

3888	3892	else:
3889	3893	baseval = '0'
3890	3894	val = str(eval(baseval+val))
3891		if macro_verbosity in self.verbose:
	3895	if macroVerbosity in self.verbose:
3892	3896	self.source.warning("register %s = %s" % (reg, val))
3893	3897	self.registers[reg] = val
3894	3898	elif command == "rr":

3896	3900	if reg in self.registers:
3897	3901	del self.registers[reg]
3898	3902	elif command == "rnn": # Groff extension
3899		self.groff_features.append("rnn")
	3903	self.groffFeatures.append("rnn")
3900	3904	reg = args[0]
3901	3905	new = args[1]
3902	3906	if reg and new in self.registers:

3917	3921	self.source.error("malformed conditional %s" % command)
3918	3922	return True
3919	3923	# Evaluate the guard
3920		guardval = self.eval_expr(tokens[1])
	3924	guardval = self.evalExpr(tokens[1])
3921	3925	guardval = len(guardval) and guardval != '0'
3922		if macro_verbosity in self.verbose:
	3926	if macroVerbosity in self.verbose:
3923	3927	self.source.notify("if condition stack push %s from: %s" % (guardval, repr(tokens)))
3924	3928	self.ifstack.append(guardval)
3925	3929	if command == "ie":
3926		if macro_verbosity in self.verbose:
	3930	if macroVerbosity in self.verbose:
3927	3931	self.source.notify("else condition stack %s push from: %s" % (guardval, repr(tokens)))
3928	3932	self.ifstack.append(guardval)
3929	3933	# If it's a one-liner and condition true, push back remaining text,

3937	3941	# conditionals. Ugh...
3938	3942	if guardval:
3939	3943	if len(tokens) > 2 and not tokens[2].startswith(r"\{") and tokens[2] != TroffInterpreter.ctrl + "if":
3940		if macro_verbosity in self.verbose:
	3944	if macroVerbosity in self.verbose:
3941	3945	self.source.notify("if or ie does one-line pushback")
3942	3946	self.source.pushline(TroffInterpreter.ctrl + r"\}")
3943	3947	if tokens[2].startswith(r"\{"):
3944	3948	tokens[2] = tokens[2][2:]
3945		if macro_verbosity in self.verbose:
	3949	if macroVerbosity in self.verbose:
3946	3950	self.source.notify("pushing back: " + repr(tokens[2:]))
3947	3951	self.source.pushline(" ".join(tokens[2:]))
3948	3952	else:

3968	3972	# in the a2p man page, forgetting that this needs an .ie
3969	3973	# rather than .if in order for the stack operations to
3970	3974	# balance. Let's not pop the stack and die.
3971		if general_verbosity in self.verbose:
	3975	if generalVerbosity in self.verbose:
3972	3976	self.source.warning("unbalanced condition-stack operation")
3973	3977	condition = True # Works out right if the guard was true.
3974	3978	else:
3975	3979	condition = self.ifstack[-1]
3976		if macro_verbosity in self.verbose:
	3980	if macroVerbosity in self.verbose:
3977	3981	self.source.notify(".el is %s" % condition)
3978	3982	# If it's a one-liner and condition false, push back remaining text
3979	3983	oneliner = len(tokens) > 1 and tokens[1][:2] != r"\{"

3983	3987	# If condition is true we need to do a skip now
3984	3988	if condition and not oneliner:
3985	3989	self.skiptoend(tokens)
3986		if macro_verbosity in self.verbose:
	3990	if macroVerbosity in self.verbose:
3987	3991	self.source.notify("stack state after .el: %s" % self.ifstack)
3988	3992	elif command == r"\}" or command == r".\}":
3989	3993	if self.ifstack: # See above note on a2p
3990		if macro_verbosity in self.verbose:
	3994	if macroVerbosity in self.verbose:
3991	3995	self.source.notify("stack pop from: " + repr(tokens))
3992	3996	self.ifstack.pop()
3993	3997	elif command == "nop": # groff extension
3994		self.groff_features.append("nop")
	3998	self.groffFeatures.append("nop")
3995	3999	if args:
3996	4000	self.source.pushline(" ".join(args))
3997	4001	elif command == "return": # groff extension
3998		self.groff_features.append("return")
3999		self.source.macro_return()
	4002	self.groffFeatures.append("return")
	4003	self.source.macroReturn()
4000	4004	elif command == "ig":
4001	4005	if not args:
4002	4006	args = ['.']
4003		if self.source.body_section():
4004		self.source.end_paragraph(label="ft")
	4007	if self.source.bodySection():
	4008	self.source.endParagraph(label="ft")
4005	4009	self.source.emit("<!-- " + " ".join(tokens))
4006	4010	while self.source.lines:
4007	4011	line = self.source.popline()

4012	4016	line = line.replace(r"\-", "—")
4013	4017	line = line.replace("-", "—")
4014	4018	self.source.emit(line)
4015		if self.source.body_section():
4016		self.source.need_paragraph()
	4019	if self.source.bodySection():
	4020	self.source.needParagraph()
4017	4021	# Debugging
4018	4022	elif command == "pm": # For debugging
4019	4023	stderr.write("Strings: " + repr(self.strings) + "\n")

4027	4031	self.source.lines = self.macros[command] + [self.source.lineno] + self.source.lines
4028	4032	# Extended groff macros
4029	4033	elif command == "do":
4030		self.groff_features.append("do") # Only ever used within macro packages.
	4034	self.groffFeatures.append("do") # Only ever used within macro packages.
4031	4035	elif command == "cc":
4032		self.nonportable_features.append("cc")
	4036	self.nonportableFeatures.append("cc")
4033	4037	TroffInterpreter.ctrl = args[0]
4034	4038	elif command == "c2":
4035		self.nonportable_features.append("c2")
4036		TroffInterpreter.ctrl_break = args[0]
	4039	self.nonportableFeatures.append("c2")
	4040	TroffInterpreter.ctrlBreak = args[0]
4037	4041	elif command == "ab":
4038	4042	if not args:
4039	4043	args = ["User Abort"]

4053	4057	else:
4054	4058	self.source.error("attempt to alias undefined name %s"%args[1])
4055	4059	elif command == "shift":
4056		self.groff_features.append("shift")
	4060	self.groffFeatures.append("shift")
4057	4061	if len(args):
4058	4062	shiftby = int(args[0])
4059	4063	else:
4060	4064	shiftby = 1
4061	4065	self.macroargs[-1] = self.macroargs[-1][shiftby:]
4062	4066	elif command == "PSPIC":
4063		self.groff_features.append("PSPIC")
	4067	self.groffFeatures.append("PSPIC")
4064	4068	ifile = args[0]
4065	4069	self.source.pushline('<mediaobject>\n<imageobject><imagedata fileref="%s" format="EPS"/></imageobject>\n</mediaobject>' % ifile)
4066	4070	elif command == "DOCLIFTER-HR":

4070	4074	return False
4071	4075	# Was there a trailing close bracket? Then push it back.
4072	4076	if len(tokens) > 1 and tokens[-1] == r"\}":
4073		if macro_verbosity in self.verbose:
	4077	if macroVerbosity in self.verbose:
4074	4078	self.source.notify("pushing back a trailing bracket")
4075	4079	self.source.pushline(TroffInterpreter.ctrl + r"\}")
4076	4080	return True
4077		def conditionally_replace(self, wrapper, standard):
	4081	def conditionallyReplace(self, wrapper, standard):
4078	4082	"Replace a wrapper with a standard macro if the wrapper contains it."
4079	4083	if wrapper in self.macros and [x for x in self.macros[wrapper] if x.find(standard) > -1]:
4080	4084	if not self.source.quiet:
4081		self.source.emit(make_comment("%s reduced to %s" % (wrapper, standard)))
4082		m = re_compile("^." + wrapper)
	4085	self.source.emit(makeComment("%s reduced to %s" % (wrapper, standard)))
	4086	m = reCompile("^." + wrapper)
4083	4087	self.source.lines = [m.sub(TroffInterpreter.ctrl+standard, x) for x in self.source.lines]
4084		def xmlify_line(self, line):
	4088	def xmlifyLine(self, line):
4085	4089	"XMLify a line of text, replacing magic characters with escapes."
4086		for (pattern, substitute) in TroffInterpreter.xmlify_patterns:
	4090	for (pattern, substitute) in TroffInterpreter.xmlifyPatterns:
4087	4091	line = pattern.sub(substitute, line)
4088	4092	return line
4089	4093	def preprocess(self, text):
4090	4094	if r'\*[' in text:
4091		self.groff_features.append(r"\*[")
	4095	self.groffFeatures.append(r"\*[")
4092	4096	# Fixes an error found in OpenLDAP pages that night be generic.
4093	4097	# This substitution enables a later stage to detect formations
4094	4098	# that should turn into literallayout tag pairs.

4110	4114	(status, mathml) = getstatusoutput("eqn -TMathML <" + fp.name)
4111	4115	fp.close()
4112	4116	if status == 0 and "<math" in mathml and not "<merror" in mathml:
4113		self.source.eqn_processed = True
	4117	self.source.eqnProcessed = True
4114	4118	# Reduce trivial equations
4115		mathml = re_compile(r"<math><m[ion]>(\w*)</m[ion]></math>").sub(r'\\fI\1\\fP', mathml)
	4119	mathml = reCompile(r"<math><m[ion]>(\w*)</m[ion]></math>").sub(r'\\fI\1\\fP', mathml)
4116	4120	# Now make sure there is a newline before trailing .ENs
4117	4121	mathml = mathml.replace("</math>.EN", "</math>\n.EN")
4118	4122	# FIXME: this optimization should be done in eqn, really
4119	4123	mathml = mathml.replace("</mi><mi>","").replace("</mn><mn>","")
4120	4124	# FIXME: maybe there's some way for eqn -TMathML to do these?
4121		mathml = re_compile(r"\\f[BIRP0-9]").sub("", mathml)
	4125	mathml = reCompile(r"\\f[BIRP0-9]").sub("", mathml)
4122	4126	# Mark MathML markup so we can put it in in the right namespace
4123	4127	# before validation. We can't put the "mml:" prefix in here,
4124	4128	# : in inline-equation markup has a bad interaction with the

4137	4141	# Fix a common error -- beginning a line with a string quote
4138	4142	# that isn't supposed to be a non-breaking request (example at
4139	4143	# eject.1).
4140		if line and line[0]=="'" and not is_comment(line) and len(line)>3:
	4144	if line and line[0]=="'" and not isComment(line) and len(line)>3:
4141	4145	if line[1] not in string.letters or line[2] not in string.letters:
4142	4146	line = r"\&" + line
4143	4147	# Don't allow ellipses to be mistaken for
4144		# commands (TCL_EvalTokens.3).
4145		ellipsis = re_compile(r"^(\s+)(\.\.\..*)")
	4148	# commands (TCLEvalTokens.3).
	4149	ellipsis = reCompile(r"^(\s+)(\.\.\..*)")
4146	4150	seen = ellipsis.match(line)
4147	4151	if seen:
4148	4152	line = seen.group(1) + r"\&" + seen.group(2)
4149	4153	# Compute our enclosure status
4150		if is_command(line):
4151		if line[1:].startswith("EQ") and self.source.eqn_processed:
	4154	if isCommand(line):
	4155	if line[1:].startswith("EQ") and self.source.eqnProcessed:
4152	4156	enclosed = "EQ"
4153	4157	elif line[1:].startswith("PS"):
4154	4158	enclosed = "PS"

4158	4162	# balanced MathML expressions in the mml namespace.
4159	4163	if not enclosed:
4160	4164	if "<MaGiC%CoOkIeath>" not in line or "</MaGiC%CoOkIeath>"not in line:
4161		line = self.xmlify_line(line)
	4165	line = self.xmlifyLine(line)
4162	4166	else:
4163	4167	before = line
4164	4168	filtered = ""

4166	4170	s = before.find("<MaGiC%CoOkIeath")
4167	4171	if s == -1:
4168	4172	break
4169		filtered += self.xmlify_line(before[:s])
	4173	filtered += self.xmlifyLine(before[:s])
4170	4174	filtered += "<inlineequation>"
4171	4175	before = before[s:]
4172	4176	t = before.find("</MaGiC%CoOkIeath>") + 18

4186	4190	# Convert vertical motions to superscript/subscript operations.
4187	4191	# Protecct double backslashes first.
4188	4192	text = text.replace(r"\\", r"@\\@")
4189		upmotion = re_compile(r"\\v'\-\.[0-9]+[mnv]'\|\\u(\.[0-9]+[mnv])?")
4190		downmotion = re_compile(r"\\v'\+?\.[0-9]+[mnv]'\|\\d(\.[0-9]+[mnv])?")
	4193	upmotion = reCompile(r"\\v'\-\.[0-9]+[mnv]'\|\\u(\.[0-9]+[mnv])?")
	4194	downmotion = reCompile(r"\\v'\+?\.[0-9]+[mnv]'\|\\d(\.[0-9]+[mnv])?")
4191	4195	direction = None
4192	4196	while True:
4193	4197	upward = upmotion.search(text)

4205	4209	+ r"<superscript>" \
4206	4210	+ text[upward.end():]
4207	4211	direction = 'up'
4208		if supersub_verbosity in self.verbose:
	4212	if supersubVerbosity in self.verbose:
4209	4213	self.source.notify("Starting from None, I see upward %s at %d" % (upward.string[upward.start(0):upward.end(0)], upward.start(0)))
4210	4214	elif downward:
4211	4215	text = text[:downward.start()] \
4212	4216	+ r"<subscript>" \
4213	4217	+ text[downward.end():]
4214	4218	direction = 'down'
4215		if supersub_verbosity in self.verbose:
	4219	if supersubVerbosity in self.verbose:
4216	4220	self.source.notify("Starting from None, I see downward %s at %d" % (downward.string[downward.start(0):downward.end(0)], downward.start(0)))
4217	4221	else:
4218	4222	self.source.error("error in vertical-motion match")

4225	4229	+ r"</superscript>" \
4226	4230	+ text[downward.end():]
4227	4231	direction = None
4228		if supersub_verbosity in self.verbose:
	4232	if supersubVerbosity in self.verbose:
4229	4233	self.source.notify("Starting from up, I see downward %s at %d" % (downward.string[downward.start(0):downward.end(0)], downward.start(0)))
4230	4234	else:
4231	4235	self.source.error("error in vertical-motion match (up)")

4235	4239	+ r"</subscript>" \
4236	4240	+ text[upward.end():]
4237	4241	direction = None
4238		if supersub_verbosity in self.verbose:
	4242	if supersubVerbosity in self.verbose:
4239	4243	self.source.notify("Starting from down, I see upward %s at %d" % (upward.string[upward.start(0):upward.end(0)], upward.start(0)))
4240	4244	elif downward:
4241	4245	self.source.error("two downward motions in a row")

4257	4261	# Now some pattern lifting to be applied after all macro sets.
4258	4262	# This transforms literallayouts with program text inside them
4259	4263	# into programlistings.
4260		keyword_lifter = \
	4264	keywordLifter = \
4261	4265	r"(<(?:literallayout\|screen)(?: remap='([^'])')?>(?:\n<emphasis remap='[A-Z]*'>)?)" \
4262	4266	r"([^<](%s)[^<])" \
4263	4267	r"((</emphasis>\n?)?</(?:literallayout\|screen)>)"
4264	4268	# Continue by recognizing source-code listings and screenshots
4265	4269	# of command examples.
4266		literal_lifts = (
	4270	literalLifts = (
4267	4271	(r"char\|bool\|int\|float\|struct\|union\|typedef\|#define\|#undef", "programlisting"), # C
4268	4272	(r"@_", "programlisting"), # Perl
4269	4273	(r"\ndef\|elif\|try\|except", "programlisting"), # Python
4270	4274	(r"mov\|jmp", "programlisting"), # Assembler
4271	4275	(r"\nbash\$\|\n\$", "screen")
4272	4276	)
4273		for (keywords, ltype) in literal_lifts:
4274		listing = re_compile(keyword_lifter % keywords)
	4277	for (keywords, ltype) in literalLifts:
	4278	listing = reCompile(keywordLifter % keywords)
4275	4279	ender = ltype.split()[0]
4276	4280	text = listing.sub(r"<%s remap='\2'>\3</%s>" % (ltype,ender), text)
4277	4281	# Here is another hack to lift filenames and similar things.
4278	4282	# Also, handle groff color extension. Alas, no tag-balance checking.
4279		color_lifts = [(re_compile(x[0]), x[1]) for x in (
	4283	colorLifts = [(reCompile(x[0]), x[1]) for x in (
4280	4284	("\\\\m\[\]", r"</phrase>"),
4281	4285	("\\\\m\[([a-z]+)\]", r"<phrase remap='color:\1'>"),
4282	4286	)]
4283		for (regexp, inline) in TroffInterpreter.prefix_lifts + color_lifts:
	4287	for (regexp, inline) in TroffInterpreter.prefixLifts + colorLifts:
4284	4288	text = regexp.sub(inline, text)
4285	4289	# And we may need to emit some compatibility warnings
4286	4290	if self.source.portability:
4287		if self.nonportable_features:
4288		self.nonportable_features = list(set(self.nonportable_features))
4289		self.source.filewarn("portability warning: nonportable requests '%s' seen.\n" % ", ".join(self.nonportable_features))
	4291	if self.nonportableFeatures:
	4292	self.nonportableFeatures = list(set(self.nonportableFeatures))
	4293	self.source.filewarn("portability warning: nonportable requests '%s' seen.\n" % ", ".join(self.nonportableFeatures))
4290	4294	if self.source.portability >= 2:
4291	4295	if self.longnames:
4292	4296	self.source.filewarn("portability warning: groff-style long macro names '%s' seen." % ", ".join(self.longnames))
4293		if self.groff_features:
4294		self.groff_features = list(set(self.groff_features))
	4297	if self.groffFeatures:
	4298	self.groffFeatures = list(set(self.groffFeatures))
4295	4299	self.source.filewarn(
4296	4300	"portability warning: groff extension%s '%s'." % \
4297		(("", "s")[len(self.groff_features) > 0],
4298		", ".join(self.groff_features)))
	4301	(("", "s")[len(self.groffFeatures) > 0],
	4302	", ".join(self.groffFeatures)))
4299	4303	return text
4300	4304
4301	4305	#
4302	4306	# Some formatting functions are common across more than one macro set.
4303	4307	#
4304	4308
4305		def skip_ignorables(source):
	4309	def skipIgnorables(source):
4306	4310	"Skip blank lines and ignorable commands."
4307	4311	while source.lines:
4308	4312	line = source.popline()

4311	4315	break
4312	4316	elif line == None:
4313	4317	break
4314		elif line in ("", TroffInterpreter.ctrl, TroffInterpreter.ctrl_nobreak): # Skip blank or null lines
	4318	elif line in ("", TroffInterpreter.ctrl, TroffInterpreter.ctrlNobreak): # Skip blank or null lines
4315	4319	continue
4316		elif source.paragraph_break(line): # Skip ordinary paragraphs
	4320	elif source.paragraphBreak(line): # Skip ordinary paragraphs
4317	4321	continue
4318	4322	else:
4319		if not is_command(line): # Non-blank text line
	4323	if not isCommand(line): # Non-blank text line
4320	4324	source.pushline(line)
4321	4325	break
4322	4326	else:

4326	4330	source.pushline(" ".join(tokens))
4327	4331	break
4328	4332
4329		def gather_lines(source):
	4333	def gatherLines(source):
4330	4334	"Gather text lines until we hit a command."
4331	4335	res = []
4332	4336	while source.lines:
4333	4337	line = source.popline()
4334		if is_command(line) and line[1] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
	4338	if isCommand(line) and line[1] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
4335	4339	source.pushline(line)
4336	4340	break
4337		if not (is_command(line) and source.ignorable(line)):
	4341	if not (isCommand(line) and source.ignorable(line)):
4338	4342	res.append(line)
4339	4343	return res
4340	4344
4341		def gather_item(source, tag=None):
	4345	def gatherItem(source, tag=None):
4342	4346	"Gather item, emitting opening and closing listitem tags."
4343		if section_verbosity in source.verbose:
4344		source.notify("gather_item(%s)\n" % tag)
	4347	if sectionVerbosity in source.verbose:
	4348	source.notify("gatherItem(%s)\n" % tag)
4345	4349	if tag:
4346	4350	source.emit("<" + tag + ">")
4347		source.need_paragraph()
	4351	source.needParagraph()
4348	4352	savesect = []
4349	4353	outlines = []
4350	4354	# Discard commands that generate nothing
4351		skip_ignorables(source)
	4355	skipIgnorables(source)
4352	4356	# Now gather the list item proper
4353	4357	source.listitem = True
4354		if section_verbosity in source.verbose:
	4358	if sectionVerbosity in source.verbose:
4355	4359	stderr.write("gathering list item\n")
4356	4360	while source.lines:
4357	4361	line = source.popline()

4369	4373	elif line.startswith(TroffInterpreter.ctrl + "blank"):
4370	4374	# The point is not to end the list on these.
4371	4375	savesect.append(TroffInterpreter.ctrl + "blank")
4372		elif source.section_break(line):
	4376	elif source.sectionBreak(line):
4373	4377	# Push back any blank lines before the section break.
4374	4378	# This avoids generating some spurious paragraph()
4375	4379	# calls that can litter the output with extra close tags.

4378	4382	savesect.pop()
4379	4383	source.pushline(line)
4380	4384	break
4381		elif source.paragraph_break(line):
	4385	elif source.paragraphBreak(line):
4382	4386	source.pushline(line)
4383	4387	break
4384	4388	else:
4385	4389	savesect.append(line)
4386		if interpreter_verbosity in source.verbose:
	4390	if interpreterVerbosity in source.verbose:
4387	4391	source.notify("interpreting savesect: " + repr(savesect))
4388		source.interpret_block(savesect, outlines)
4389		if interpreter_verbosity in source.verbose:
	4392	source.interpretBlock(savesect, outlines)
	4393	if interpreterVerbosity in source.verbose:
4390	4394	source.notify("interpretation of savesect complete\n")
4391	4395	if [x for x in outlines if not not x and x[:4] != "<!--"]:
4392	4396	list(map(source.emit, outlines))

4394	4398	source.warning("empty list item, see FIXME")
4395	4399	source.emit("<para> </para> <!-- FIXME: empty list item -->")
4396	4400	source.listitem = False
4397		source.end_paragraph(label="gather_item")
	4401	source.endParagraph(label="gatherItem")
4398	4402	if tag:
4399	4403	source.emit(r"</" + tag + ">")
4400		if section_verbosity in source.verbose:
4401		source.notify("gather_item(%s)\n" % tag)
4402
4403		def gather_simplelist(cmd, source):
	4404	if sectionVerbosity in source.verbose:
	4405	source.notify("gatherItem(%s)\n" % tag)
	4406
	4407	def gatherSimplelist(cmd, source):
4404	4408	"Gather listitems, terminate when you see a dot command."
4405	4409	while len(source.lines):
4406	4410	line = source.popline()

4408	4412	source.pushline(line)
4409	4413	break
4410	4414	else:
4411		gather_item(source, "listitem")
4412
4413		def gather_itemizedlist(cmd, source, bullet):
	4415	gatherItem(source, "listitem")
	4416
	4417	def gatherItemizedlist(cmd, source, bullet):
4414	4418	"Translate to bullet-list markup -- used in both man and me macros."
4415	4419	source.emit("<itemizedlist mark='%s'>" % bullet)
4416		gather_simplelist(cmd, source)
	4420	gatherSimplelist(cmd, source)
4417	4421	source.emit("</itemizedlist>\n")
4418	4422
4419		def gather_orderedlist(cmd, source, bullet):
	4423	def gatherOrderedlist(cmd, source, bullet):
4420	4424	"Translate to numbered-list markup."
4421	4425	source.emit("<orderedlist mark='%s'>" % bullet)
4422		gather_simplelist(cmd, source)
	4426	gatherSimplelist(cmd, source)
4423	4427	source.emit("</orderedlist>\n")
4424	4428
4425		def parse_name_section(nameline):
	4429	def parseNameSection(nameline):
4426	4430	"Parse a NAME -- description line."
4427	4431	nameline = deemphasize(nameline)
4428	4432	nameline = nameline.replace("\t", r' ')
4429	4433	# Unicode en dash - E2 80 93 or 342 200 223.
4430	4434	nameline = nameline.replace("\342\200\223", r'-')
	4435	nameline = nameline.replace(r" \[en] ", r' \- ')
4431	4436	nameline = nameline.replace(r" \-\- ", r' \- ')
4432	4437	nameline = nameline.replace(r" \&\- ", r' \- ')
4433	4438	nameline = nameline.replace(" - ", r' \- ')

4489	4494	res += repr(child)
4490	4495	return ("<%s>" % self.type) + res + ("</%s>" % self.type)
4491	4496
4492		def is_file_or_command_name(tok):
	4497	def isFileOrCommandName(tok):
4493	4498	# Yes, some legitimate commands begin with digits;
4494	4499	# 411toppm is a good example.
4495	4500	if not tok:

4517	4522	# following arguments together on the man page, with the boundary
4518	4523	# marked by a highlight change. Replace these with a glue token so
4519	4524	# there will be a parseable boundary there.
4520		ln=DisplayParser.old_style_option_glue.sub(r"\1 @GLUE@ \2",ln)
	4525	ln=DisplayParser.oldStyleOptionGlue.sub(r"\1 @GLUE@ \2",ln)
4521	4526	# We have now extracted all the semantic information we can from
4522	4527	# highlight boundaries.
4523	4528	ln = deemphasize(ln)

4535	4540	self.lines = lines
4536	4541	self.verbose = verbose
4537	4542	self.pretokenizer = None
4538		self.token_index = 0
	4543	self.tokenIndex = 0
4539	4544	self.lookahead = []
4540	4545	self.lookbehind = []
4541	4546	self.savedlines = []

4570	4575	return None
4571	4576	else:
4572	4577	return self.lines[0]
4573		def tokenize(self, new_pretokenizer=None):
	4578	def tokenize(self, newPretokenizer=None):
4574	4579	"Split a line on whitespace, but preserve \n as a token."
4575	4580	if self.verbose:
4576		stdout.write("tokenize: %s\n" % (new_pretokenizer,))
4577		self.pretokenizer = new_pretokenizer
	4581	stdout.write("tokenize: %s\n" % (newPretokenizer,))
	4582	self.pretokenizer = newPretokenizer
4578	4583	if self.lines:
4579	4584	if self.pretokenizer:
4580	4585	line = self.pretokenizer(self.lines[0])

4585	4590	self.lookahead.append('\n')
4586	4591	if self.verbose:
4587	4592	stdout.write("tokenize: split %s to get %s\n"%(repr(line),self))
4588		def token_pop(self, count=1):
	4593	def tokenPop(self, count=1):
4589	4594	"Get a token."
4590		res = self.token_peek(count)
	4595	res = self.tokenPeek(count)
4591	4596	self.lookbehind += self.lookahead[:count]
4592	4597	self.lookahead = self.lookahead[count:]
4593		self.token_index += count
	4598	self.tokenIndex += count
4594	4599	if self.verbose:
4595		stdout.write("token_pop: returns %s, from %s\n" % (repr(res), self))
	4600	stdout.write("tokenPop: returns %s, from %s\n" % (repr(res), self))
4596	4601	return res
4597		def token_push(self, tok):
	4602	def tokenPush(self, tok):
4598	4603	"Put back a token."
4599	4604	if self.verbose:
4600		stdout.write("token_push: %s, to %s\n" % (repr(tok), self))
	4605	stdout.write("tokenPush: %s, to %s\n" % (repr(tok), self))
4601	4606	if not self.lines:
4602	4607	self.lines = [tok]
4603	4608	elif not self.lookahead:
4604	4609	self.lines = [tok] + self.lines
4605	4610	self.lookahead = [tok] + self.lookahead
4606	4611	if self.verbose:
4607		stdout.write("token_push: ends with %s\n" % self)
4608		def token_peek(self, count=1):
	4612	stdout.write("tokenPush: ends with %s\n" % self)
	4613	def tokenPeek(self, count=1):
4609	4614	"Peek at the next token. The count argument can only index into the next line."
4610	4615	if not self.lookahead and not self.lines:
4611	4616	return None
4612	4617	if self.verbose:
4613		stdout.write("token_peek: I see " + repr(self) + '\n')
	4618	stdout.write("tokenPeek: I see " + repr(self) + '\n')
4614	4619	while len(self.lookahead) == 0:
4615	4620	if not self.lines:
4616	4621	if self.verbose:
4617		stdout.write("token_peek: I return None: "+repr(self)+'\n')
	4622	stdout.write("tokenPeek: I return None: "+repr(self)+'\n')
4618	4623	return None
4619	4624	self.popline()
4620	4625	if self.verbose:
4621		stdout.write("token_peek: I return %s from %s\n" % (repr(self.lookahead[count-1]), self))
	4626	stdout.write("tokenPeek: I return %s from %s\n" % (repr(self.lookahead[count-1]), self))
4622	4627	return self.lookahead[count-1]
4623	4628
4624	4629	def checkpoint(self):

4645	4650	class FunctionSynopsisParser:
4646	4651	"Consume a function synopsis and return markup."
4647	4652	# Candidate lines for FuncSynopsisInfo
4648		language_lines = (
4649		(re_compile(r"^\s#\s(define\|undef\|include\|if\s\|ifn?def\|endif)"), "C"),
4650		(re_compile(r"^\sextern.;$"), "C"),
4651		(re_compile(r"^\stypedef.;$"), "C"),
4652		(re_compile(r"^\s*import\s"), "Python"),
4653		(re_compile(r"^\suse\s.;"), "Perl"),
4654		(re_compile(r"#\s*perl"), "Perl"),
	4653	languageLines = (
	4654	(reCompile(r"^\s#\s(define\|undef\|include\|if\s\|ifn?def\|endif)"), "C"),
	4655	(reCompile(r"^\sextern.;$"), "C"),
	4656	(reCompile(r"^\stypedef.;$"), "C"),
	4657	(reCompile(r"^\s*import\s"), "Python"),
	4658	(reCompile(r"^\suse\s.;"), "Perl"),
	4659	(reCompile(r"#\s*perl"), "Perl"),
4655	4660	# Allow lines that resemble variable settings
4656	4661	# This isn't actually limited to C...
4657		(re_compile(r"[a-z_]+ = "), "C"),
	4662	(reCompile(r"[a-z_]+ = "), "C"),
4658	4663	)
4659	4664	# This patterns identify lines that are probably code
4660		language_fragments = (
	4665	languageFragments = (
4661	4666	# This is looking for the stuff that one finds around the left
4662	4667	# paren of a C declaration. This is something we're quite unlikely
4663	4668	# to see in running text.
4664		(re_compile(r"[a-z][a-z][a-z]\([_a-zA-Z][_a-zA-Z0-9]+[, ]"), "C"),
	4669	(reCompile(r"[a-z][a-z][a-z]\([_a-zA-Z][_a-zA-Z0-9]+[, ]"), "C"),
4665	4670	# Look for lines led with C declarations
4666		(re_compile(r"^\s*(int\|char\|long)\s"), "C"),
	4671	(reCompile(r"^\s*(int\|char\|long)\s"), "C"),
4667	4672	# Someday, use these
4668		#(re_compile(r"^\s*def\s"), "Python"),
4669		#(re_compile(r"^\s*class\s"), "Python"),
	4673	#(reCompile(r"^\s*def\s"), "Python"),
	4674	#(reCompile(r"^\s*class\s"), "Python"),
4670	4675	)
4671		token_pairs = (
4672		(re_compile(r"^\s/\"), re_compile(r"\*/$"), "C","C comment"),
	4676	tokenPairs = (
	4677	(reCompile(r"^\s/\"), reCompile(r"\*/$"), "C","C comment"),
4673	4678	# typedef/struct/union end only on ^} because they can have {} inside
4674		(re_compile(r"^\stypedef.{$"), re_compile(r"^}"), "C","C typedef"),
4675		(re_compile(r"^\stypedef.enum.*{$"), re_compile(r"^}"), "C","C typedef enum"),
4676		(re_compile(r"^\sstruct.{$"), re_compile(r"^}"), "C","C struct"),
4677		(re_compile(r"^\sunion.{$"), re_compile(r"^}"), "C","C union"),
	4679	(reCompile(r"^\stypedef.{$"), reCompile(r"^}"), "C","C typedef"),
	4680	(reCompile(r"^\stypedef.enum.*{$"), reCompile(r"^}"), "C","C typedef enum"),
	4681	(reCompile(r"^\sstruct.{$"), reCompile(r"^}"), "C","C struct"),
	4682	(reCompile(r"^\sunion.{$"), reCompile(r"^}"), "C","C union"),
4678	4683	# With enum we can be a bit more relaxed
4679		(re_compile(r"^\s*enum\b"), re_compile(r"};?"), "C","C enum"),
4680		(re_compile(r"^\s*extern\b"), re_compile(r"&semi;$"), "C","C extern"),
	4684	(reCompile(r"^\s*enum\b"), reCompile(r"};?"), "C","C enum"),
	4685	(reCompile(r"^\s*extern\b"), reCompile(r"&semi;$"), "C","C extern"),
4681	4686	)
4682		openssl_stack_line = re_compile(r"STACK_OF[A-Z_]*$[A-Za-z, ]+$(&semi;)?$")
4683		openssl_lhash_line = re_compile(r"LHASH_OF$[A-Za-z, ]+$(&semi;)?$")
	4687	opensslStackLine = reCompile(r"STACK_OF[A-Z_]*$[A-Za-z, ]+$(&semi;)?$")
	4688	opensslLhashLine = reCompile(r"LHASH_OF$[A-Za-z, ]+$(&semi;)?$")
4684	4689	def __init__(self, iop, source):
4685	4690	self.io = iop
4686	4691	self.source = source
4687	4692	self.output = ""
4688	4693	self.language = None
4689	4694	self.error = None
4690		self.seen_ansi = False
	4695	self.seenAnsi = False
4691	4696	# Shortcut: assume \| and ') (' and ] [ can never occur in a function
4692	4697	# synopsis (middle two filters out some Perl code examples).
4693	4698	# Make an exception for \|\| as this never occurs in those but may mean
4694	4699	# there is code for a disjunction of feature macros, as in logf(3).
4695	4700	# Look for these and return immediately if we find them.
4696	4701	if [x for x in self.io.lines if ("\|\|" not in x and "\|" in x) or "('" in x or "')" in x or "] [" in x]:
4697		if classify_verbosity in self.source.verbose:
	4702	if classifyVerbosity in self.source.verbose:
4698	4703	self.source.notify("can't be a function synopsis, contains \| or '] ['")
4699	4704	self.error = "<!-- contains \| or '] [' -->"
4700	4705	return

4703	4708	# (We used to test for ; but XML entity expansions messed that up.)
4704	4709	if not self.io.lines[0].startswith("#include"):
4705	4710	if not [x for x in self.io.lines if "(" in x]:
4706		if classify_verbosity in self.source.verbose:
	4711	if classifyVerbosity in self.source.verbose:
4707	4712	self.source.notify("can't be a function synopsis, does not contain (")
4708	4713	self.error = "<!-- does not contain ( -->"
4709	4714	return
4710	4715	# Otherwise time for a normal parse
4711		self.io.tokenize(self.__pretokenizer)
	4716	self.io.tokenize(self._Pretokenizer)
4712	4717	try:
4713	4718	try:
4714		if classify_verbosity in self.source.verbose:
	4719	if classifyVerbosity in self.source.verbose:
4715	4720	self.source.notify("beginning function synopsis parse: " + repr(self.io))
4716	4721	self.output = ""
4717	4722	while self.io.lines:
4718		info = self.__parse_function_synopsis_info()
4719		proto = self.__parse_function_prototype()
	4723	info = self._ParseFunctionSynopsisInfo()
	4724	proto = self._ParseFunctionPrototype()
4720	4725	if info or proto:
4721	4726	self.output += info + proto
4722	4727	else:

4724	4729	if self.output:
4725	4730	self.output = "<funcsynopsis>\n"+self.output+"</funcsynopsis>\n"
4726	4731	finally:
4727		if classify_verbosity in self.source.verbose:
	4732	if classifyVerbosity in self.source.verbose:
4728	4733	self.source.notify("ending function synopsis parse: " + self.output)
4729	4734	except LiftException as e:
4730	4735	self.error = "function synopsis parse failed "
4731		if self.io.token_peek() is None:
	4736	if self.io.tokenPeek() is None:
4732	4737	self.error += "at end of synopsis: %s" % (e.message)
4733	4738	else:
4734	4739	self.error += "on `%s' (%d): %s" % \
4735		(self.io.token_peek(), self.io.token_index, e.message)
4736		if classify_verbosity in self.source.verbose:
	4740	(self.io.tokenPeek(), self.io.tokenIndex, e.message)
	4741	if classifyVerbosity in self.source.verbose:
4737	4742	self.source.notify(self.error)
4738	4743	# Since we can detect function synopses reliably, check here
4739	4744	# and make self.output nonempty so we'll error out and not try
4740	4745	# doing a command parse.
4741		if list(filter(self.is_sourcecode, self.io.lines)):
	4746	if list(filter(self.isSourcecode, self.io.lines)):
4742	4747	self.output = "<!-- invalid function synopsis -->"
4743	4748	self.io.tokenize()
4744	4749
4745		def is_sourcecode(self, text):
	4750	def isSourcecode(self, text):
4746	4751	"Recognize that a line is source code."
4747	4752	if blankline.search(text):
4748	4753	return True
4749		for (pattern, dummy) in FunctionSynopsisParser.language_lines:
	4754	for (pattern, dummy) in FunctionSynopsisParser.languageLines:
4750	4755	if pattern.search(text):
4751	4756	return True
4752		for (pattern, dummy) in FunctionSynopsisParser.language_fragments:
	4757	for (pattern, dummy) in FunctionSynopsisParser.languageFragments:
4753	4758	if pattern.search(text):
4754	4759	return True
4755	4760	return False
4756	4761
4757		def __pretokenizer(self, line):
	4762	def _Pretokenizer(self, line):
4758	4763	line = detroff(line)
4759	4764	# OpenSSL pages have some weird type-macro generation stuff going on.
4760	4765	line = re.sub(r'STACK_OF([A-Z_])$([A-Za-z_])$', r"STACK_OF\1@GLUE1@\2@GLUE2@", line)

4766	4771	line = line.replace("@GLUE1@", "(").replace("@GLUE2@", ")")
4767	4772	return line
4768	4773
4769		def __detokenize(self, line):
	4774	def _Detokenize(self, line):
4770	4775	return line.replace("[ ]", "[]").replace("* ", "*") \
4771	4776	.replace(" &semi; ", "&semi;").replace(" ~ ", "~")
4772	4777
4773		def __parse_paramdef(self, arg):
	4778	def _ParseParamdef(self, arg):
4774	4779	"We've been handed a formal argument; parse it into a ParamDef."
4775	4780	if not arg: # Triggered by ,) which can be generated by mdoc
4776	4781	return ""

4789	4794	last = rindex("(", arg)
4790	4795	# Now look for the rightmost token that resembles a name.
4791	4796	# There's your parameter.
4792		param_ind = -1
	4797	paramInd = -1
4793	4798	for i in range(last):
4794	4799	if arg[last - i][0].isalpha():
4795		param_ind = last - i
	4800	paramInd = last - i
4796	4801	break
4797		if param_ind == -1:
	4802	if paramInd == -1:
4798	4803	prolog = " ".join(arg)
4799	4804	var = ""
4800	4805	epilog = ""
4801	4806	else:
4802		prolog = " ".join(arg[:param_ind])
4803		var = arg[param_ind]
4804		epilog = " ".join(arg[param_ind+1:])
4805		prolog = self.__detokenize(prolog)
4806		epilog = self.__detokenize(epilog)
	4807	prolog = " ".join(arg[:paramInd])
	4808	var = arg[paramInd]
	4809	epilog = " ".join(arg[paramInd+1:])
	4810	prolog = self._Detokenize(prolog)
	4811	epilog = self._Detokenize(epilog)
4807	4812	self.source.localhints.post(var, "varname role='parameter'")
4808	4813	return " <paramdef>" + prolog + " <parameter>" + var + "</parameter>" + epilog + "</paramdef>\n"
4809	4814
4810		def __parse_function_prototype(self):
	4815	def _ParseFunctionPrototype(self):
4811	4816	"Parse a C or C++ function prototype."
4812		if classify_verbosity in self.source.verbose:
	4817	if classifyVerbosity in self.source.verbose:
4813	4818	self.source.notify("beginning function prototype parse, language %s" % self.language)
4814	4819	try:
4815		if classify_verbosity in self.source.verbose:
4816		self.source.notify("parse_function_prototype() sees: " + repr(self.io))
	4820	if classifyVerbosity in self.source.verbose:
	4821	self.source.notify("parseFunctionPrototype() sees: " + repr(self.io))
4817	4822	# Seek the name token.
4818	4823	parendepth = 0
4819	4824	name = None
4820	4825	prolog = []
4821		hint_dict = {}
	4826	hintDict = {}
4822	4827	seentype = False
4823	4828	self.io.checkpoint()
4824	4829	# Munch the part before the formals
4825	4830	while True:
4826		tok = self.io.token_pop()
4827		if classify_verbosity in self.source.verbose:
	4831	tok = self.io.tokenPop()
	4832	if classifyVerbosity in self.source.verbose:
4828	4833	self.source.notify("looking at %s" % repr(tok))
4829		tnext = self.io.token_peek()
	4834	tnext = self.io.tokenPeek()
4830	4835	# The sequence \n( should be treated like (, so a function
4831	4836	# prototype with a line break just after the name is detected.
4832	4837	if tnext == '\n':
4833		self.io.token_pop()
4834		second = self.io.token_peek()
4835		if classify_verbosity in self.source.verbose:
	4838	self.io.tokenPop()
	4839	second = self.io.tokenPeek()
	4840	if classifyVerbosity in self.source.verbose:
4836	4841	self.source.notify("newline special case sees %s" % repr(second))
4837	4842	if second != '(':
4838		self.io.token_push('\n')
	4843	self.io.tokenPush('\n')
4839	4844	else:
4840	4845	tnext = second
4841	4846	# We shouldn't run out of tokens here
4842	4847	if tok is None:
4843		if classify_verbosity in self.source.verbose:
	4848	if classifyVerbosity in self.source.verbose:
4844	4849	self.source.notify("C prototype parse failed while looking for (")
4845	4850	self.io.unroll()
4846	4851	return ""

4852	4857	continue
4853	4858	# Accumulate C keywords. STACK_OF is part of a kludge to pass
4854	4859	# though a weird construction that the OpenSSL pages use.
4855		if tok in c_declarators or tok.startswith('operator') or "STACK_OF" in tok:
4856		if classify_verbosity in self.source.verbose:
	4860	if tok in cDeclarators or tok.startswith('operator') or "STACK_OF" in tok:
	4861	if classifyVerbosity in self.source.verbose:
4857	4862	self.source.notify("Treating %s as declarator" % tok)
4858		elif not id_re.match(tok) and not tok in ("(", ")", "*", "&", "~"):
4859		if classify_verbosity in self.source.verbose:
	4863	elif not idRe.match(tok) and not tok in ("(", ")", "*", "&", "~"):
	4864	if classifyVerbosity in self.source.verbose:
4860	4865	self.source.notify("illegal token %s while looking for declaration specifiers" % tok)
4861	4866	self.io.unroll()
4862	4867	return ""

4864	4869	# the function name, rather than some flukey typedef in the
4865	4870	# declaration. This will do the right thing with
4866	4871	# struct foo *bar(x, y)
4867		elif not name and id_re.match(tok):
4868		if tnext and not id_re.match(tnext) and tnext != '\n':
	4872	elif not name and idRe.match(tok):
	4873	if tnext and not idRe.match(tnext) and tnext != '\n':
4869	4874	name = tok
4870		if classify_verbosity in self.source.verbose:
	4875	if classifyVerbosity in self.source.verbose:
4871	4876	self.source.notify("name is %s, non-identifier is %s" % (name, repr(tnext)))
4872	4877	elif seentype:
4873		if classify_verbosity in self.source.verbose:
	4878	if classifyVerbosity in self.source.verbose:
4874	4879	self.source.notify("looks like text, not a function declaration: %s" % tok)
4875	4880	self.io.unroll()
4876	4881	return ""
4877	4882	else: # Could be a typedef
4878		if classify_verbosity in self.source.verbose:
	4883	if classifyVerbosity in self.source.verbose:
4879	4884	self.source.notify("treating %s as a type" % tok)
4880		hint_dict[tok] = "type"
	4885	hintDict[tok] = "type"
4881	4886	seentype = True
4882	4887	elif name and parendepth == 0 and tok == "(":
4883	4888	break

4886	4891	elif tok == ')':
4887	4892	parendepth -= 1
4888	4893	elif tok in ("struct", "union", "enum"):
4889		hint_dict[tok + " " + tnext] = "type"
	4894	hintDict[tok + " " + tnext] = "type"
4890	4895	prolog.append(tok)
4891		tok = self.io.token_pop()
4892		tnext = self.io.token_peek()
	4896	tok = self.io.tokenPop()
	4897	tnext = self.io.tokenPeek()
4893	4898	prolog.append(tok)
4894	4899	# Kluge to deal with C++ declarators
4895	4900	if self.io.lookahead[:2] == [")", "("]:
4896		self.io.token_pop(2)
	4901	self.io.tokenPop(2)
4897	4902	prolog += " ()"
4898	4903	if not name:
4899		if general_verbosity in self.source.verbose:
	4904	if generalVerbosity in self.source.verbose:
4900	4905	self.source.notify("no name in apparent function declaration.")
4901	4906	self.io.unroll()
4902	4907	return ""
4903		if parse_verbosity in self.source.verbose:
	4908	if parseVerbosity in self.source.verbose:
4904	4909	self.source.notify("Function name: " + name)
4905	4910	prolog[prolog.index(name)] = "<function>" + name + "</function>"
4906		hint_dict[name] = "function"
4907		prolog = self.__detokenize(" ".join(prolog))
	4911	hintDict[name] = "function"
	4912	prolog = self._Detokenize(" ".join(prolog))
4908	4913	# Is this an old-style or a new-style declaration?
4909		firstformal = self.io.token_pop()
	4914	firstformal = self.io.tokenPop()
4910	4915	argcount = parendepth = 0
4911		formal_args = ""
	4916	formalArgs = ""
4912	4917	newstyle = False
4913	4918	if firstformal == ')':
4914	4919	# No formals at all. Treat as K&R style
4915		if parse_verbosity in self.source.verbose:
	4920	if parseVerbosity in self.source.verbose:
4916	4921	self.source.notify("no formals")
4917	4922	else:
4918		if self.io.token_peek() in (")", ","):
	4923	if self.io.tokenPeek() in (")", ","):
4919	4924	# Just one token in the formal. This case is ambiguous;
4920	4925	# could be a K&R-style declaration, or could be an ANSI
4921	4926	# declaration like

4924	4929	# This is why we track whether we've seen ANSI C constructions.
4925	4930	# We also want to catch the case of
4926	4931	# int foo(void)
4927		# here, that's what the c_declarators check is about.
4928		self.io.token_push(firstformal)
4929		newstyle = self.seen_ansi or firstformal in c_declarators or self.io.lines[0].strip().endswith("&semi;")
	4932	# here, that's what the cDeclarators check is about.
	4933	self.io.tokenPush(firstformal)
	4934	newstyle = self.seenAnsi or firstformal in cDeclarators or self.io.lines[0].strip().endswith("&semi;")
4930	4935	else:
4931	4936	# More than one identifier in the formal
4932		self.io.token_push(firstformal)
4933		self.seen_ansi = newstyle = True
4934		if parse_verbosity in self.source.verbose:
	4937	self.io.tokenPush(firstformal)
	4938	self.seenAnsi = newstyle = True
	4939	if parseVerbosity in self.source.verbose:
4935	4940	if newstyle:
4936	4941	self.source.notify("ANSI-style declaration of %s"% name)
4937	4942	else:

4944	4949	else:
4945	4950	terminator = '&semi;'
4946	4951	formalnames = [[]]
4947		if self.io.token_peek() == ")": # Excludes no-args case
4948		self.io.token_pop()
	4952	if self.io.tokenPeek() == ")": # Excludes no-args case
	4953	self.io.tokenPop()
4949	4954	else:
4950	4955	while True:
4951		tok = self.io.token_pop()
	4956	tok = self.io.tokenPop()
4952	4957	if not tok:
4953	4958	# If we ran out of tokens without seeing a
4954	4959	# balancing ), this isn't a C prototype at all.
4955	4960	# Bail out.
4956		if general_verbosity in self.source.verbose:
	4961	if generalVerbosity in self.source.verbose:
4957	4962	self.source.warning("no balancing )")
4958	4963	self.io.unroll()
4959	4964	return ""

4975	4980	# Are we looking at an old-style declaration with nothing
4976	4981	# but formals? If so, head off any attempt to parse them,
4977	4982	# it will only come to grief.
4978		no_declarations = False
4979		maybe_semi = self.io.token_pop()
4980		if maybe_semi == "&semi;":
4981		no_declarations = True
4982		elif maybe_semi != "\n":
4983		if classify_verbosity in self.source.verbose:
	4983	noDeclarations = False
	4984	maybeSemi = self.io.tokenPop()
	4985	if maybeSemi == "&semi;":
	4986	noDeclarations = True
	4987	elif maybeSemi != "\n":
	4988	if classifyVerbosity in self.source.verbose:
4984	4989	self.source.warning("suspicious token %s after )" \
4985		% maybe_semi)
4986		self.io.token_push(maybe_semi)
	4990	% maybeSemi)
	4991	self.io.tokenPush(maybeSemi)
4987	4992	else:
4988	4993	# A second newline here means there is whitespace where
4989	4994	# we're expecting the parameter declarations. This
4990	4995	# happens a lot on the Tcl pages. Give up.
4991		maybe_newline = self.io.token_peek()
4992		if maybe_newline in ("\n", "<sbr/>", None):
4993		no_declarations = True
	4996	maybeNewline = self.io.tokenPeek()
	4997	if maybeNewline in ("\n", "<sbr/>", None):
	4998	noDeclarations = True
4994	4999	else:
4995	5000	# We're probably looking at the first declarator.
4996		self.io.token_push(maybe_semi)
	5001	self.io.tokenPush(maybeSemi)
4997	5002	# If there are no declarations, use the formal names we
4998	5003	# stashed away. It's better than nothing.
4999		if no_declarations:
5000		if parse_verbosity in self.source.verbose:
	5004	if noDeclarations:
	5005	if parseVerbosity in self.source.verbose:
5001	5006	self.source.notify("no parameter declarations")
5002	5007	for param in formalnames:
5003		formal_args += "<paramdef><parameter>%s</parameter></paramdef>\n" % " ".join(param)
5004		formal_args = self.__detokenize(formal_args)
	5008	formalArgs += "<paramdef><parameter>%s</parameter></paramdef>\n" % " ".join(param)
	5009	formalArgs = self._Detokenize(formalArgs)
5005	5010	argcount = 0
5006	5011	# Go get the prototype formals. If this is a new-style
5007	5012	# declaration, terminate on seeing a top-level ). If it's

5012	5017	while newstyle or argcount:
5013	5018	formal = []
5014	5019	while True:
5015		tok = self.io.token_pop()
5016		if parse_verbosity in self.source.verbose:
	5020	tok = self.io.tokenPop()
	5021	if parseVerbosity in self.source.verbose:
5017	5022	self.source.notify("Token (%d): %s %s" % (parendepth, tok, self.io.lookahead))
5018	5023	if tok is None:
5019		if parse_verbosity in self.source.verbose:
	5024	if parseVerbosity in self.source.verbose:
5020	5025	self.source.warning("unexpected end of token list")
5021	5026	self.io.unroll()
5022	5027	return ""

5036	5041	break # End of formal
5037	5042	formal.append(tok)
5038	5043	# Formal argument should be complete. Hand it off for analysis
5039		if parse_verbosity in self.source.verbose:
	5044	if parseVerbosity in self.source.verbose:
5040	5045	self.source.notify("Formal: %s" % formal)
5041		formal_args += self.__parse_paramdef(formal)
	5046	formalArgs += self._ParseParamdef(formal)
5042	5047	argcount -= 1
5043	5048	# We've gatherered all the argument markup
5044		if formal_args == "<paramdef><parameter>void</parameter></paramdef>":
5045		formal_args = " <void%/>"
5046		if formal_args == "<paramdef><parameter>...</parameter></paramdef>":
5047		formal_args = " <vaargs/>\n"
5048		if not formal_args:
	5049	if formalArgs == "<paramdef><parameter>void</parameter></paramdef>":
	5050	formalArgs = " <void%/>"
	5051	if formalArgs == "<paramdef><parameter>...</parameter></paramdef>":
	5052	formalArgs = " <vaargs/>\n"
	5053	if not formalArgs:
5049	5054	if newstyle:
5050		formal_args = "<varargs/>"
	5055	formalArgs = "<varargs/>"
5051	5056	else:
5052		formal_args = "<void/>"
	5057	formalArgs = "<void/>"
5053	5058	# Consume optional semicolons following the close paren
5054		if self.io.token_peek() in ("&semi;", ";"):
5055		self.io.token_pop()
5056		if parse_verbosity in self.source.verbose:
	5059	if self.io.tokenPeek() in ("&semi;", ";"):
	5060	self.io.tokenPop()
	5061	if parseVerbosity in self.source.verbose:
5057	5062	self.source.notify("ate trailing semi")
5058		if self.io.token_peek() not in (None, "\n", "<sbr/>"):
5059		if parse_verbosity in self.source.verbose:
5060		self.source.warning("trailing junk '%s' after prototype" % self.io.token_peek())
	5063	if self.io.tokenPeek() not in (None, "\n", "<sbr/>"):
	5064	if parseVerbosity in self.source.verbose:
	5065	self.source.warning("trailing junk '%s' after prototype" % self.io.tokenPeek())
5061	5066	self.io.unroll()
5062	5067	return ""
5063	5068	else:
5064	5069	# If we're at end of line, consume the line so the next
5065	5070	# go-around of the function synopsis parser won't see it.
5066		while self.io.token_peek() == "\n":
5067		self.io.token_pop()
5068		if parse_verbosity in self.source.verbose:
	5071	while self.io.tokenPeek() == "\n":
	5072	self.io.tokenPop()
	5073	if parseVerbosity in self.source.verbose:
5069	5074	self.source.notify("ate trailing newline")
5070	5075	# Now we can assemble the actual prolog...
5071	5076	prolog = "<funcdef>" + prolog + "</funcdef>\n"
5072	5077	# Now assemble and return it.
5073		if prolog or formal_args:
5074		output="<funcprototype>\n"+prolog+formal_args+"</funcprototype>\n"
	5078	if prolog or formalArgs:
	5079	output="<funcprototype>\n"+prolog+formalArgs+"</funcprototype>\n"
5075	5080	# Since the parse succeeded, the semantic hints we gathered
5076	5081	# are good
5077		#stdout.write("Hint dictionary from function synopsis is %s\n" % hint_dict)
5078		for (hid, htype) in list(hint_dict.items()):
	5082	#stdout.write("Hint dictionary from function synopsis is %s\n" % hintDict)
	5083	for (hid, htype) in list(hintDict.items()):
5079	5084	self.source.localhints.post(hid, htype)
5080	5085	finally:
5081		if classify_verbosity in self.source.verbose:
	5086	if classifyVerbosity in self.source.verbose:
5082	5087	self.source.notify("ending function prototype parse")
5083	5088	return output
5084	5089
5085		def __detect_passthroughs(self, line=None):
	5090	def _DetectPassthroughs(self, line=None):
5086	5091	# Detect language-specific line pattern
5087	5092	if line is None:
5088	5093	line = self.io.peekline()
5089		for (pattern, lang) in FunctionSynopsisParser.language_lines:
	5094	for (pattern, lang) in FunctionSynopsisParser.languageLines:
5090	5095	if pattern.search(line):
5091	5096	return lang
5092	5097	return None
5093		def __parse_function_synopsis_info(self):
	5098	def _ParseFunctionSynopsisInfo(self):
5094	5099	# Accept any number of lines as a FuncSynopsisInfo
5095		if classify_verbosity in self.source.verbose:
	5100	if classifyVerbosity in self.source.verbose:
5096	5101	self.source.notify("beginning function synopsis info parse")
5097	5102	synopsisinfo = ""
5098	5103	while True:
5099		skip_ignorables(self.source)
	5104	skipIgnorables(self.source)
5100	5105	line = self.io.peekline()
5101		if classify_verbosity in self.source.verbose:
	5106	if classifyVerbosity in self.source.verbose:
5102	5107	self.source.notify("candidate line: %s" % repr(line))
5103	5108	if line is None:
5104	5109	break

5122	5127	self.io.popline()
5123	5128	continue
5124	5129	# Also pass through anything that looks like a Qt section header
5125		if line.strip() in qt_headers:
	5130	if line.strip() in qtHeaders:
5126	5131	synopsisinfo += line
5127	5132	self.io.popline()
5128	5133	continue
5129	5134	# Other things, like cpp directives, should pass through as well.
5130	5135	# Test for single-line typedefs here so as not to have a bad
5131	5136	# interaction with the token-pair code below.
5132		lang = self.__detect_passthroughs(line)
	5137	lang = self._DetectPassthroughs(line)
5133	5138	if lang:
5134		if classify_verbosity in self.source.verbose:
	5139	if classifyVerbosity in self.source.verbose:
5135	5140	self.source.notify("from %s language identified as %s\n"% (repr(line), lang))
5136	5141	self.language = lang
5137	5142	synopsisinfo += line
5138	5143	self.io.popline()
5139	5144	continue
5140	5145	# Pass through funky OpenSSL macro prototypes
5141		if FunctionSynopsisParser.openssl_stack_line.search(line):
	5146	if FunctionSynopsisParser.opensslStackLine.search(line):
5142	5147	synopsisinfo += line
5143	5148	self.io.popline()
5144	5149	continue
5145		if FunctionSynopsisParser.openssl_lhash_line.search(line):
	5150	if FunctionSynopsisParser.opensslLhashLine.search(line):
5146	5151	synopsisinfo += line
5147	5152	self.io.popline()
5148	5153	continue

5176	5181	continue
5177	5182	# Pass through line sequences bracketed by specified token pairs.
5178	5183	# This is where we catch stuff like multiline struct declarations.
5179		for (start,end,lang,errmsg) in FunctionSynopsisParser.token_pairs:
	5184	for (start,end,lang,errmsg) in FunctionSynopsisParser.tokenPairs:
5180	5185	if start.match(line):
5181		if parse_verbosity in self.source.verbose:
	5186	if parseVerbosity in self.source.verbose:
5182	5187	self.source.notify("Declaration starts with %s, should end with %s" % (start.pattern, end.pattern))
5183	5188	while self.io.lines:
5184	5189	line = detroff(self.io.popline())
5185		if parse_verbosity in self.source.verbose:
	5190	if parseVerbosity in self.source.verbose:
5186	5191	self.source.notify(repr(line))
5187	5192	synopsisinfo += line
5188	5193	# This is the magic that allows us to avoid elaborate

5195	5200	else:
5196	5201	# Nothing we recognize. Stop, and don't pop the current line
5197	5202	break
5198		if classify_verbosity in self.source.verbose:
	5203	if classifyVerbosity in self.source.verbose:
5199	5204	self.source.notify("ending function synopsis info parse")
5200	5205	if synopsisinfo:
5201	5206	return "<funcsynopsisinfo>\n"+synopsisinfo+"</funcsynopsisinfo>\n"

5204	5209
5205	5210	class CommandSynopsisSequenceParser:
5206	5211	"Parse a sequence of command synopses."
5207		opt_file_ext = re_compile(r"\[\.([a-zA-Z\|.]+)\]")
5208		force_text = re_compile(r"\s[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+\s")
	5212	optFileExt = reCompile(r"\[\.([a-zA-Z\|.]+)\]")
	5213	forceText = reCompile(r"\s[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+\s")
5209	5214
5210	5215	def __init__(self, iop, source, refnames):
5211	5216	self.io = iop

5219	5224	self.groupnest = 0
5220	5225	self.lastnest = []
5221	5226	# Arrange for lexical analysis to work
5222		self.io.tokenize(self.__pretokenize)
5223		if bsd_verbosity in self.source.verbose:
	5227	self.io.tokenize(self._Pretokenize)
	5228	if bsdVerbosity in self.source.verbose:
5224	5229	self.source.notify("before reexpansion:" + repr(self.io))
5225	5230	while True:
5226	5231	nextl = self.io.peekline()

5235	5240	if line.startswith("<sbr/>"):
5236	5241	break
5237	5242	nextpart.append(line)
5238		if not list(filter(self.is_command_synopsis_line, nextpart)):
	5243	if not list(filter(self.isCommandSynopsisLine, nextpart)):
5239	5244	break
5240		output = self.parse_command_synopsis()
	5245	output = self.parseCommandSynopsis()
5241	5246	if not output:
5242	5247	break
5243	5248	self.output += output

5245	5250	break
5246	5251	self.io.tokenize() # Restore normal tokenization
5247	5252
5248		def __pretokenize(self, ln):
	5253	def _Pretokenize(self, ln):
5249	5254	ln = detroff(ln)
5250	5255	# Fix a perldoc problem
5251	5256	ln = ln.replace(r"\*(--", "--")
5252	5257	# Remove ordinary troff highlight macros
5253		ln = troff_highlight_stripper.sub("", ln)
	5258	ln = troffHighlightStripper.sub("", ln)
5254	5259	# Convert . . . to ...
5255	5260	ln = re.sub(r"\.\s+\.\s+\.", r"...", ln)
5256	5261	# Grotty little hack to make lexical analysis trivial. I got
5257	5262	# this idea from something I read about the first FORTRAN compiler.
5258		ln = CommandSynopsisSequenceParser.opt_file_ext.sub(r".@LB@\1@RB@", ln)
	5263	ln = CommandSynopsisSequenceParser.optFileExt.sub(r".@LB@\1@RB@", ln)
5259	5264	ln = ln.replace(r"\|.", r"\|.")
5260	5265	ln = ln.replace("][", "] @GLUE@ [")
5261	5266	ln = ln.replace("\|", " \| ").replace("...", " ... ")

5267	5272	#ln = ln.replace(" <", " < ").replace(">", " > ")
5268	5273	return ln
5269	5274
5270		def is_command_synopsis_line(self, rawline):
	5275	def isCommandSynopsisLine(self, rawline):
5271	5276	"Does this look like a command synopsis, not just a string of words?"
5272	5277	line = detroff(rawline)
5273	5278	# Pipe bar is a sure sign. So is equals, for GNU-style declarations.

5276	5281	# Don't be fooled by {}[] that are actually part of C declarations.
5277	5282	# Otherwise we can end up trying to parse as command synopses some
5278	5283	# things that should be treated as plain text. cph(1) is an example.
5279		has_c_keywords = False
5280		for keyword in c_declarators:
	5284	hasCKeywords = False
	5285	for keyword in cDeclarators:
5281	5286	if re.search(r"\b" + keyword + r"\b", line):
5282		has_c_keywords = True
	5287	hasCKeywords = True
5283	5288	break
5284	5289	# Look for special characters that could be part of either
5285	5290	# function or command synopsis.

5288	5293	if c in line:
5289	5294	ambiguous = True
5290	5295	break
5291		if ambiguous and not has_c_keywords:
	5296	if ambiguous and not hasCKeywords:
5292	5297	return 2
5293	5298	# We don't want to be fooled by text lines or option lists that
5294	5299	# begin with a dash but continue with running text.
5295		if CommandSynopsisSequenceParser.force_text.search(line):
	5300	if CommandSynopsisSequenceParser.forceText.search(line):
5296	5301	return 0
5297	5302	# If the line begins with one of the command's aliases, always treat
5298	5303	# as a synopsis line. This catches the important special case where

5310	5315	if line.find("<command") > -1:
5311	5316	return 6
5312	5317	# In mdoc, synopsis sections aren't allowed to contain running text.
5313		if self.source.in_synopsis() and self.source.is_active("mdoc"):
	5318	if self.source.inSynopsis() and self.source.isActive("mdoc"):
5314	5319	return 7
5315	5320	# Look for option starts in syntax sections only.
5316	5321	if line[0] == '-' or line.find(" -") > -1:

5320	5325	# If the first token is bolded, that probably means it's a command
5321	5326	# name that doesn't happen to match anything in the name section.
5322	5327	# Apply this test only when we're in a synopsis section.
5323		if self.source.in_synopsis() and rawline.startswith(r"\fB") or rawline.startswith(TroffInterpreter.ctrl + r"B "):
	5328	if self.source.inSynopsis() and rawline.startswith(r"\fB") or rawline.startswith(TroffInterpreter.ctrl + r"B "):
5324	5329	return 9
5325	5330	# Nope, doesn't look like a command synopsis line
5326		if classify_verbosity in self.source.verbose:
	5331	if classifyVerbosity in self.source.verbose:
5327	5332	self.source.notify("'%s' does not look like a synopsis line" % line.rstrip())
5328	5333	return 0
5329	5334
5330		def parse_command_synopsis(self):
	5335	def parseCommandSynopsis(self):
5331	5336	"Translate a synopsis line -- here is where the heavy work starts."
5332		if classify_verbosity in self.source.verbose:
5333		self.source.notify("parse_command_synopsis begins: refnames are %s" % list(self.refnames.keys()))
	5337	if classifyVerbosity in self.source.verbose:
	5338	self.source.notify("parseCommandSynopsis begins: refnames are %s" % list(self.refnames.keys()))
5334	5339	output = ""
5335	5340	try:
5336	5341	self.callnest = ""
5337	5342	self.groupnest = 0
5338		command = self.io.token_pop()
	5343	command = self.io.tokenPop()
5339	5344	if command is None:
5340	5345	return ""
5341	5346	self.refnames[command] = True
5342		if parse_verbosity in self.source.verbose:
	5347	if parseVerbosity in self.source.verbose:
5343	5348	self.source.notify("Command is %s" % command)
5344		if command in self.refnames or is_file_or_command_name(command):
	5349	if command in self.refnames or isFileOrCommandName(command):
5345	5350	globalhints.post(command, "command")
5346	5351	output += (" <command>%s</command>" % command)
5347	5352	else:
5348		self.io.token_push(command)
	5353	self.io.tokenPush(command)
5349	5354	raise LiftException(self.source, "first token %s in synopsis looks wrong." % command)
5350	5355	self.io.checkpoint()
5351	5356	while self.io.lines:
5352		if is_nltext_line(self.io.lines[0]):
	5357	if isNltextLine(self.io.lines[0]):
5353	5358	break
5354		arg = self.__compile_arg()
	5359	arg = self._CompileArg()
5355	5360	if arg == None:
5356	5361	break
5357	5362	output += " " + repr(arg) + "\n"
5358	5363	# This is where we short-stop the command-synopsis parser
5359	5364	# from eating trailing text sections.
5360	5365	if repr(arg) == "<sbr/>" and self.io.lines and \
5361		not self.is_command_synopsis_line(self.io.lines[0]):
	5366	not self.isCommandSynopsisLine(self.io.lines[0]):
5362	5367	break
5363	5368	if output:
5364	5369	return "<cmdsynopsis>\n"+output+"</cmdsynopsis>\n"

5366	5371	return ""
5367	5372	except LiftException as e:
5368	5373	self.error = "command synopsis parse failed "
5369		if self.io.token_peek() is None:
	5374	if self.io.tokenPeek() is None:
5370	5375	self.error += "at end of synopsis: %s" % (e.message)
5371	5376	else:
5372	5377	self.error += "on `%s' (%d): %s" % \
5373		(self.io.token_peek(), self.io.token_index, e.message)
	5378	(self.io.tokenPeek(), self.io.tokenIndex, e.message)
5374	5379	self.io.unroll()
5375	5380	# Generate a useful error message:
5376	5381	self.context = "\n"

5379	5384	self.context += " $ "
5380	5385	self.context += " ".join(self.io.lookbehind[self.lastnest[-1]:])
5381	5386	else:
5382		self.context += " ".join(self.io.lookbehind[:self.io.token_index])
	5387	self.context += " ".join(self.io.lookbehind[:self.io.tokenIndex])
5383	5388	self.context += " ^ "
5384		self.context += " ".join(self.io.lookbehind[self.io.token_index:])
5385		return "\n" + make_comment("\n" + self.error + "\n" + self.context) + "\n"
	5389	self.context += " ".join(self.io.lookbehind[self.io.tokenIndex:])
	5390	return "\n" + makeComment("\n" + self.error + "\n" + self.context) + "\n"
5386	5391
5387	5392	# Lexical tests
5388		def __is_next_special(self):
5389		if self.io.token_peek() in ("[", "]", "{", "}", "\|", "...", "*"):
	5393	def _IsNextSpecial(self):
	5394	if self.io.tokenPeek() in ("[", "]", "{", "}", "\|", "...", "*"):
5390	5395	self.confirmed = True
5391	5396	return True
5392	5397	else:
5393	5398	return False
5394		def __is_next_command(self):
5395		return self.io.token_peek() in self.refnames or globalhints.get(self.io.token_peek()) == "command"
5396		def __is_next_option(self):
5397		tnext = self.io.token_peek()
	5399	def _IsNextCommand(self):
	5400	return self.io.tokenPeek() in self.refnames or globalhints.get(self.io.tokenPeek()) == "command"
	5401	def _IsNextOption(self):
	5402	tnext = self.io.tokenPeek()
5398	5403	if tnext and tnext[0] in ('-', '+') or tnext.startswith("±"):
5399	5404	self.confirmed = True
5400	5405	return True

5402	5407	return True
5403	5408	else:
5404	5409	return False
5405		def __is_next_numeric(self):
	5410	def _IsNextNumeric(self):
5406	5411	try:
5407		int(self.io.token_peek())
	5412	int(self.io.tokenPeek())
5408	5413	return True
5409	5414	except (ValueError, TypeError):
5410	5415	return False
5411		def __is_next_replaceable(self):
5412		tnext = self.io.token_peek()
	5416	def _IsNextReplaceable(self):
	5417	tnext = self.io.tokenPeek()
5413	5418	if tnext is None:
5414	5419	return False
5415	5420	# Good reasons for accepting funky leader characters:

5423	5428	# / -- dummy filename arguments
5424	5429	# \ -- TeX commands such as luatex.1
5425	5430	# & -- TeX commands such as luatex.1
5426		elif tnext[0].isalpha() or tnext[0] in "./=:'\"@%,#?\\&" or (tnext[:4] == "<" and tnext != "<") or self.__is_next_numeric() or is_file_or_command_name(tnext):
	5431	elif tnext[0].isalpha() or tnext[0] in "./=:'\"@%,#?\\&" or (tnext[:4] == "<" and tnext != "<") or self._IsNextNumeric() or isFileOrCommandName(tnext):
5427	5432	return True
5428	5433	# nm.1
5429	5434	elif re.match("[0-9]+_[0-9]+", tnext):

5432	5437	else:
5433	5438	return False
5434	5439	# Manual-synopsis grammar
5435		def __compile_arg(self):
	5440	def _CompileArg(self):
5436	5441	try:
5437	5442	self.callnest += " "
5438		if parse_verbosity in self.source.verbose:
5439		self.source.notify(self.callnest + "compile_arg(" + repr(self.io.token_peek()) + ")")
5440		res = self.__compile_arg1()
	5443	if parseVerbosity in self.source.verbose:
	5444	self.source.notify(self.callnest + "compileArg(" + repr(self.io.tokenPeek()) + ")")
	5445	res = self._CompileArg1()
5441	5446	if res == None:
5442	5447	res = None # Failure is signaled by throwing an exception
5443	5448	else:
5444		while self.io.token_peek() == "\n":
5445		self.io.token_pop()
5446		if self.io.token_peek() in ("...", "*"):
5447		self.io.token_pop()
	5449	while self.io.tokenPeek() == "\n":
	5450	self.io.tokenPop()
	5451	if self.io.tokenPeek() in ("...", "*"):
	5452	self.io.tokenPop()
5448	5453	res.repeat = 1
5449		elif self.io.token_peek() == "\|":
5450		self.io.token_pop()
	5454	elif self.io.tokenPeek() == "\|":
	5455	self.io.tokenPop()
5451	5456	first = res
5452	5457	res = ParseNode("group")
5453	5458	res.children.append(first)
5454	5459	self.callnest += " "
5455		if parse_verbosity in self.source.verbose:
	5460	if parseVerbosity in self.source.verbose:
5456	5461	self.source.notify("%sentering alternation"%self.callnest)
5457	5462	while True:
5458		if self.io.token_peek() in ("\|", "\n"):
5459		self.io.token_pop()
	5463	if self.io.tokenPeek() in ("\|", "\n"):
	5464	self.io.tokenPop()
5460	5465	continue
5461		if self.io.token_peek() not in ("]", "}") and not self.__is_next_command():
5462		element = self.__compile_arg1()
	5466	if self.io.tokenPeek() not in ("]", "}") and not self._IsNextCommand():
	5467	element = self._CompileArg1()
5463	5468	if element:
5464	5469	res.children.append(element)
5465	5470	else:
5466	5471	return res
5467	5472	continue
5468	5473	break
5469		if parse_verbosity in self.source.verbose:
	5474	if parseVerbosity in self.source.verbose:
5470	5475	self.source.notify("%sexiting alternation"%self.callnest)
5471	5476	self.callnest = self.callnest[:-2]
5472		elif self.io.token_peek() == "@GLUE@":
5473		res = ParseNode(self.io.token_pop())
5474		if parse_verbosity in self.source.verbose:
5475		self.source.notify("%scompile_arg() returns %s: tokens are %s" % (self.callnest, repr(res), self.io.lookahead))
	5477	elif self.io.tokenPeek() == "@GLUE@":
	5478	res = ParseNode(self.io.tokenPop())
	5479	if parseVerbosity in self.source.verbose:
	5480	self.source.notify("%scompileArg() returns %s: tokens are %s" % (self.callnest, repr(res), self.io.lookahead))
5476	5481	finally:
5477	5482	self.callnest = self.callnest[:-2]
5478	5483	return res
5479		def __compile_arg1(self):
	5484	def _CompileArg1(self):
5480	5485	try:
5481	5486	self.callnest += " "
5482		if parse_verbosity in self.source.verbose:
5483		self.source.notify(self.callnest + "compile_arg1(%s, %s)" % (repr(self.io.token_peek()), self.io.lookahead))
	5487	if parseVerbosity in self.source.verbose:
	5488	self.source.notify(self.callnest + "compileArg1(%s, %s)" % (repr(self.io.tokenPeek()), self.io.lookahead))
5484	5489	# Now get an argument
5485		if self.io.token_peek() is None:
	5490	if self.io.tokenPeek() is None:
5486	5491	if self.groupnest == 0:
5487	5492	res = None
5488	5493	else:
5489	5494	raise LiftException(self.source, "unbalanced group in synopsis markup")
5490		elif self.io.token_peek() == "<sbr/>":
5491		self.io.token_pop()
5492		while self.io.token_peek() == '\n':
5493		self.io.token_pop()
5494		if not self.__is_next_command():
	5495	elif self.io.tokenPeek() == "<sbr/>":
	5496	self.io.tokenPop()
	5497	while self.io.tokenPeek() == '\n':
	5498	self.io.tokenPop()
	5499	if not self._IsNextCommand():
5495	5500	res = ParseNode("sbr")
5496	5501	elif self.groupnest == 0:
5497	5502	res = None
5498	5503	else:
5499	5504	raise LiftException(self.source, "unterminated group in synopsis")
5500		elif self.io.token_peek() == "\n":
5501		self.io.token_pop()
5502		if self.groupnest == 0 and self.__is_next_command():
	5505	elif self.io.tokenPeek() == "\n":
	5506	self.io.tokenPop()
	5507	if self.groupnest == 0 and self._IsNextCommand():
5503	5508	res = None
5504	5509	else:
5505	5510	res = ParseNode("\n")
5506		elif self.__is_next_option():
5507		option = self.io.token_pop()
5508		oldstyle = self.io.token_peek() == "@GLUE@"
	5511	elif self._IsNextOption():
	5512	option = self.io.tokenPop()
	5513	oldstyle = self.io.tokenPeek() == "@GLUE@"
5509	5514	if oldstyle:
5510		self.io.token_pop()
	5515	self.io.tokenPop()
5511	5516	res = ParseNode("arg")
5512	5517	gnustyle = option.split("=")
5513	5518	if len(gnustyle) > 1:

5517	5522	else:
5518	5523	optnode = ParseNode("option", option)
5519	5524	res.children.append(optnode)
5520		if self.io.lookahead and self.__is_next_replaceable():
5521		res.children.append(ParseNode("replaceable",self.io.token_pop()))
	5525	if self.io.lookahead and self._IsNextReplaceable():
	5526	res.children.append(ParseNode("replaceable",self.io.tokenPop()))
5522	5527	if oldstyle:
5523	5528	optnode.glue = ""
5524	5529	else:
5525	5530	optnode.glue = " "
5526	5531	self.source.localhints.post(re.escape(optnode.token), "option")
5527		elif self.__is_next_replaceable():
	5532	elif self._IsNextReplaceable():
5528	5533	res = ParseNode("arg")
5529		res.children.append(ParseNode("replaceable", self.io.token_pop()))
5530		elif self.io.token_peek() and self.io.token_peek()[:4] in ("<", ">"):
	5534	res.children.append(ParseNode("replaceable", self.io.tokenPop()))
	5535	elif self.io.tokenPeek() and self.io.tokenPeek()[:4] in ("<", ">"):
5531	5536	res = ParseNode("redirect", None, "plain")
5532		res.token = self.io.token_pop()
5533		elif self.io.token_peek() in ("[", "{"):
	5537	res.token = self.io.tokenPop()
	5538	elif self.io.tokenPeek() in ("[", "{"):
5534	5539	self.callnest += " "
5535		if parse_verbosity in self.source.verbose:
	5540	if parseVerbosity in self.source.verbose:
5536	5541	self.source.notify("%sentering group"%self.callnest)
5537	5542	self.groupnest += 1
5538		self.lastnest.append(self.io.token_index)
5539		self.io.token_pop()
5540		if self.io.token_peek() == "{":
	5543	self.lastnest.append(self.io.tokenIndex)
	5544	self.io.tokenPop()
	5545	if self.io.tokenPeek() == "{":
5541	5546	required = "req"
5542	5547	else:
5543	5548	required = "opt"
5544	5549	lst = []
5545	5550	while True:
5546		if self.io.token_peek() == '\n':
5547		self.io.token_pop()
	5551	if self.io.tokenPeek() == '\n':
	5552	self.io.tokenPop()
5548	5553	continue
5549		if self.io.token_peek() not in (None, "]", "}"):
5550		lst.append(self.__compile_arg())
	5554	if self.io.tokenPeek() not in (None, "]", "}"):
	5555	lst.append(self._CompileArg())
5551	5556	continue
5552	5557	break
5553	5558	if len(lst) == 1:

5556	5561	res = ParseNode("arg")
5557	5562	res.children = lst
5558	5563	res.choice = required
5559		if self.io.token_peek() is None or self.io.token_peek() == "<sbr/>":
	5564	if self.io.tokenPeek() is None or self.io.tokenPeek() == "<sbr/>":
5560	5565	raise LiftException(self.source, "expecting ] or }")
5561	5566	else:
5562		self.io.token_pop()
	5567	self.io.tokenPop()
5563	5568	self.lastnest.pop()
5564	5569	self.groupnest -= 1
5565		if parse_verbosity in self.source.verbose:
	5570	if parseVerbosity in self.source.verbose:
5566	5571	self.source.notify("%sexiting group"%self.callnest)
5567	5572	self.callnest = self.callnest[:-2]
5568	5573	else:
5569	5574	raise LiftException(self.source, "expecting argument")
5570		if parse_verbosity in self.source.verbose:
5571		self.source.notify("%scompile_arg1() returns %s: tokens are %s" % (self.callnest, res, " ".join(self.io.lookahead)))
	5575	if parseVerbosity in self.source.verbose:
	5576	self.source.notify("%scompileArg1() returns %s: tokens are %s" % (self.callnest, res, " ".join(self.io.lookahead)))
5572	5577	finally:
5573	5578	self.callnest = self.callnest[:-2]
5574	5579	return res
5575	5580
5576	5581
5577		def is_nltext_line(line):
	5582	def isNltextLine(line):
5578	5583	"Are there patterns here that must be natural language?"
5579	5584	if line is None:
5580	5585	return False

5604	5609
5605	5610	class DisplayParser:
5606	5611	"Parse a block into function synopsis, command synopsis or display text."
5607		old_style_option_glue = re_compile(r"([^A-Za-z]-[A-Za-z]*)(?:\f.)([A-Za-z])")
5608		unparseable = re_compile(r"\$[A-Za-z]\|=>\|[^:]//\|@load") # Perl, Awk, and other nightmares
5609		def __init__(self, source, try_synopsis, literal, refnames=None):
	5612	oldStyleOptionGlue = reCompile(r"([^A-Za-z]-[A-Za-z]*)(?:\f.)([A-Za-z])")
	5613	unparseable = reCompile(r"\$[A-Za-z]\|=>\|[^:]//\|@load") # Perl, Awk, and other nightmares
	5614	def __init__(self, source, trySynopsis, literal, refnames=None):
5610	5615	"Arrange the interpreter to accumulate synopsis lines in this object."
5611	5616	self.source = source
5612		self.try_synopsis = try_synopsis
	5617	self.trySynopsis = trySynopsis
5613	5618	self.literal = literal
5614	5619	self.refnames = refnames
5615	5620	if self.refnames is None:

5621	5626	source.ignore("fi")
5622	5627	source.ignore("ft")
5623	5628	source.ignore("ti")
5624		# Some pages (e.g. xdr_char.3) use this inside .EX/.EE pairs
	5629	# Some pages (e.g. xdrChar.3) use this inside .EX/.EE pairs
5625	5630	source.ignore("PP")
5626	5631	# .ta conveys no information in a Synopsis section,
5627	5632	# but outside one it may be our only clue that the man page
5628	5633	# author kluged up a table inline. So don't disable
5629	5634	# processing it in that case.
5630		if source.in_synopsis():
	5635	if source.inSynopsis():
5631	5636	source.ignore("ta")
5632	5637	source.ignore("ce")
5633	5638	source.unignore("br")
5634	5639	source.unignore("nl")
5635	5640	source.unignore("in")
5636		def __wrap(self):
	5641	def _Wrap(self):
5637	5642	# Re-enable normal commands
5638	5643	self.source.diversion = self.source.output
5639	5644	self.source.unignore("nf")

5641	5646	self.source.unignore("ft")
5642	5647	self.source.unignore("ti")
5643	5648	self.source.unignore("PP")
5644		if self.source.in_synopsis():
	5649	if self.source.inSynopsis():
5645	5650	self.source.unignore("ta")
5646	5651	self.source.unignore("ce")
5647	5652	self.source.ignore("br")
5648	5653	self.source.ignore("nl")
5649	5654	self.source.ignore("in")
5650		def __detect_unparseable_synopsis(self):
	5655	def _DetectUnparseableSynopsis(self):
5651	5656	"Detect stuff we just shouldn't try to parse."
5652	5657	# Blank sections
5653	5658	text = self.io.text().strip()

5663	5668	if text.split()[0].isupper() and self.source.find("SQL", backwards=True):
5664	5669	return True
5665	5670	return False
5666		def __emit_text(self, lines):
	5671	def _EmitText(self, lines):
5667	5672	if not lines:
5668	5673	return ""
5669		if io_verbosity in self.source.verbose:
5670		self.source.notify("__emit_text('''%s''')\n" % "".join(lines))
	5674	if ioVerbosity in self.source.verbose:
	5675	self.source.notify("_EmitText('''%s''')\n" % "".join(lines))
5671	5676	for i in range(len(lines)):
5672	5677	if lines[i].startswith("<sbr"):
5673	5678	lines[i] = "\n"
5674	5679	# All set up. Now block-interpret this like ordinary running
5675	5680	# text, so ordinary commands will work.
5676	5681	tempout = []
5677		if self.source.in_synopsis():
5678		self.source.need_paragraph()
5679		self.source.interpret_block(lines, tempout)
5680		if self.source.in_synopsis():
5681		self.source.end_paragraph()
5682		if classify_verbosity in self.source.verbose:
	5682	if self.source.inSynopsis():
	5683	self.source.needParagraph()
	5684	self.source.interpretBlock(lines, tempout)
	5685	if self.source.inSynopsis():
	5686	self.source.endParagraph()
	5687	if classifyVerbosity in self.source.verbose:
5683	5688	self.source.notify("got unknown section")
5684	5689	lines = []
5685	5690	text = "".join(tempout)

5688	5693	return text
5689	5694	def transform(self):
5690	5695	"Parse and transform the display section we've gathered."
5691		if classify_verbosity in self.source.verbose:
	5696	if classifyVerbosity in self.source.verbose:
5692	5697	self.source.notify("display parse begins, refnames = %s"%self.refnames)
5693	5698	# Undo redirection and re-enable normal commands.
5694		self.__wrap()
	5699	self._Wrap()
5695	5700	# First, fold the lines. We have to handle continuations
5696	5701	# explicitly, since we may be outside the body section.
5697	5702	processed = []
5698	5703	for line in self.synopses:
5699	5704	if line[:4] != "<!--":
5700		line = self.source.expand_entities(line)
	5705	line = self.source.expandEntities(line)
5701	5706	if processed and processed[-1][-2:] == "\\c":
5702	5707	processed[-1] = processed[-1][:-2] + line
5703	5708	else:
5704	5709	processed.append(line+"\n")
5705	5710	# Translate troff characters and XMLlify everything.
5706		if classify_verbosity in self.source.verbose:
	5711	if classifyVerbosity in self.source.verbose:
5707	5712	self.source.notify("Before tokenization: %s\n" % processed)
5708	5713	self.io = LineTokenizer(processed,
5709		tokenizer_verbosity in self.source.verbose)
5710		if classify_verbosity in self.source.verbose:
	5714	tokenizerVerbosity in self.source.verbose)
	5715	if classifyVerbosity in self.source.verbose:
5711	5716	self.source.notify("After tokenization: \n" + "".join(self.io.lines))
5712	5717
5713	5718	# This code is failure-prone. It is coping as best it can with a mess.

5741	5746	# structured stuff.
5742	5747	parsepass = errors = 0
5743	5748	out = ""
5744		if not self.try_synopsis or self.__detect_unparseable_synopsis():
	5749	if not self.trySynopsis or self._DetectUnparseableSynopsis():
5745	5750	err = None
5746	5751	out += "<synopsis>\n" + self.io.text().replace("<sbr/>", "\n") + "</synopsis>\n"
5747		if classify_verbosity in self.source.verbose:
	5752	if classifyVerbosity in self.source.verbose:
5748	5753	self.source.notify("got unparseable synopsis ")
5749	5754	else:
5750	5755	classified = False
5751	5756	stash = []
5752	5757	while self.io.lines:
5753	5758	parsepass += 1
5754		if classify_verbosity in self.source.verbose:
	5759	if classifyVerbosity in self.source.verbose:
5755	5760	self.source.notify("pass %d begins"% parsepass)
5756	5761	# Try to get a function synopsis
5757	5762	obj = FunctionSynopsisParser(self.io, self.source)
5758	5763	err = obj.error
5759	5764	if not err and obj.output:
5760		out += self.__emit_text(stash)
	5765	out += self._EmitText(stash)
5761	5766	out += obj.output.replace("<sbr/>", "\n")
5762		if classify_verbosity in self.source.verbose:
	5767	if classifyVerbosity in self.source.verbose:
5763	5768	self.source.notify("got function synopsis")
5764	5769	continue
5765	5770	elif obj.output and obj.language:
5766		if self.source.in_synopsis():
	5771	if self.source.inSynopsis():
5767	5772	self.source.error(obj.error)
5768		elif classify_verbosity in self.source.verbose:
	5773	elif classifyVerbosity in self.source.verbose:
5769	5774	self.source.warning(obj.error)
5770	5775	# Look for unambiguous natural language. This has to be
5771	5776	# done first because synopses for libraries not infrequently
5772	5777	# contain link instructions that can be mistaken for command
5773	5778	# synopses.
5774		while is_nltext_line(self.io.peekline()):
	5779	while isNltextLine(self.io.peekline()):
5775	5780	classified = True
5776	5781	nextl = self.io.popline()
5777		if classify_verbosity in self.source.verbose:
	5782	if classifyVerbosity in self.source.verbose:
5778	5783	self.source.warning("stashing '%s' (NL)" % repr(nextl))
5779	5784	stash.append(nextl)
5780	5785	# Now perhaps try for a command synopsis
5781		if self.source.in_synopsis():
	5786	if self.source.inSynopsis():
5782	5787	obj = CommandSynopsisSequenceParser(self.io, self.source, self.refnames)
5783	5788	err = obj.error
5784	5789	if not err and obj.output:
5785		out += self.__emit_text(stash)
	5790	out += self._EmitText(stash)
5786	5791	out += obj.output
5787		if classify_verbosity in self.source.verbose:
	5792	if classifyVerbosity in self.source.verbose:
5788	5793	self.source.notify("got command synopsis '%s'"%out)
5789	5794	continue
5790	5795	elif obj.output and obj.confirmed:
5791	5796	errors += 1
5792	5797	out += obj.output
5793		if self.source.in_synopsis():
	5798	if self.source.inSynopsis():
5794	5799	self.source.error(obj.error)
5795	5800	if self.source.verbose:
5796	5801	self.source.error("error context: %s" % obj.context)
5797		elif classify_verbosity in self.source.verbose:
	5802	elif classifyVerbosity in self.source.verbose:
5798	5803	self.source.warning(obj.error)
5799	5804	# Look for a filename - some manual pages for
5800	5805	# configuration files just give the full path of the

5802	5807	# very interesting, but it does avoid throwing a
5803	5808	# warning.
5804	5809	line = self.io.peekline()
5805		if classify_verbosity in self.source.verbose:
	5810	if classifyVerbosity in self.source.verbose:
5806	5811	self.source.notify("checking for plain filename")
5807	5812	if line and re.match(r"/[\S]*$", line):
5808		out += self.__emit_text(stash)
	5813	out += self._EmitText(stash)
5809	5814	fnpart = "<filename>" + line.strip() + "</filename>"
5810	5815	if self.literal:
5811	5816	out += fnpart + "\n"
5812	5817	else:
5813	5818	out += "<para>" + fnpart + "</para>\n"
5814		if classify_verbosity in self.source.verbose:
	5819	if classifyVerbosity in self.source.verbose:
5815	5820	self.source.notify("found plain filename")
5816	5821	self.io.popline()
5817	5822	classified = True

5821	5826	nextl = self.io.popline()
5822	5827	if nextl:
5823	5828	classified = False
5824		if classify_verbosity in self.source.verbose:
	5829	if classifyVerbosity in self.source.verbose:
5825	5830	self.source.warning("stashing %s" % repr(nextl))
5826	5831	stash.append(nextl)
5827	5832	# We've pulled as much of the section as we can into structured
5828	5833	# markup. If there's anything left, treat it as plain text.
5829	5834	if stash:
5830		if classify_verbosity in self.source.verbose:
	5835	if classifyVerbosity in self.source.verbose:
5831	5836	self.source.warning("emitting stash %s" % repr(stash))
5832		out += self.__emit_text(stash)
	5837	out += self._EmitText(stash)
5833	5838	# Postprocess the output to remove glue and clean up empty tags
5834	5839	out = hotglue.sub("", out)
5835	5840	out = cleantag.sub("", out)

5899	5904	name = "man"
5900	5905	exclusive = True
5901	5906	toptag = "refentry"
5902		immutable_set = set(["B", "I","R" ,"SM","CB","CR",
	5907	immutableSet = set(["B", "I","R" ,"SM","CB","CR",
5903	5908	"BI","BR","IB","IR",
5904	5909	"IL","RB","RI","RL","SB","LB","LI","LR",
5905	5910	"P" ,"PP","LP","HP",
5906	5911	"IP","RS","RE","SH","SS","TP",
5907	5912	"UE","UN","UR","IX","BY",])
5908		ignore_set = set(["PD", "DT",
	5913	ignoreSet = set(["PD", "DT",
5909	5914	# Undocumented and obscure
5910	5915	"LO", "PU", "UC", "l",
5911	5916	# Extensions from mtools doc set; we can safely ignore them

5917	5922	# Occurs in X Consortium manpages redundant with .ta,
5918	5923	# but not all such man pages have an identifiable X header.
5919	5924	"TA",])
5920		complain_set = set([])
5921		parabreak_set = set(["blank","P","PP","LP","HP","IP","TP",])
5922		sectionbreak_set = set(["SH","SS",])
5923		listbreak_set = set(["P","PP","LP","HP","SH","SS",])
5924		scoped_set = set(["RS"])
	5925	complainSet = set([])
	5926	parabreakSet = set(["blank","P","PP","LP","HP","IP","TP",])
	5927	sectionbreakSet = set(["SH","SS",])
	5928	listbreakSet = set(["P","PP","LP","HP","SH","SS",])
	5929	scopedSet = set(["RS"])
5925	5930	translations = {
5926	5931	"\\*" : [
5927	5932	(r"\*R", "®"),

5937	5942	(r"\*(WB", "WEB"),
5938	5943	# Some BSD pages use this without defining it
5939	5944	(r"\*(Ps", "Postscript"),
	5945	],
	5946	"\\*[" : [
	5947	# Some groff pages use this with a conditional
	5948	# definition that we can't handle gracefully.
	5949	(r"\*[tx]", "TeX"),
	5950	# A rather understandable typo
	5951	(r"\*[tex]", "TeX"),
5940	5952	]
5941	5953	}
5942	5954	# Tricky interaction with pod2man here; the Ip reduction will get called if

5948	5960	def __init__(self, source, verbose=0):
5949	5961	self.source = source
5950	5962	self.verbose = verbose
5951		self.hack_urls = True
	5963	self.hackUrls = True
5952	5964	self.authors = None
5953	5965	self.volnum = []
5954	5966	self.refnames = {}
5955		self.seen_DS = False
5956		self.have_name = False
5957		self.stash_linkender = None
	5967	self.seenDS = False
	5968	self.haveName = False
	5969	self.stashLinkender = None
5958	5970	self.manual = ""
5959	5971	self.msrc = ""
5960	5972	#self.systype = None

5963	5975	# to be locally overridden (which wouldn't be possible if it
5964	5976	# were on the ignore list.)
5965	5977	self.source.troff.macros["Id"] = []
5966		def fold_highlights(self, cmd, args):
	5978	def foldHighlights(self, cmd, args):
5967	5979	# We need this to be a separate entry point for TP tag processing.
5968	5980	# .R is not one of the documented font-change macros, but it is
5969	5981	# occasionally used anyway (eg by sz.1) -- derived from Ultrix.
5970	5982	# .CB and .CR are groff extensions.
5971	5983	if cmd in ("B", "I", "R", "L", "SM", "CB", "CR"):
5972		return self.source.direct_highlight(cmd, args)
	5984	return self.source.directHighlight(cmd, args)
5973	5985	elif cmd in ("BI","BR","BL",
5974	5986	"IB","IR","IL",
5975	5987	"RB","RI","RL",
5976	5988	"LI","LR","LB",
5977	5989	"SB"):
5978		return self.source.alternating_highlight(cmd, args)
	5990	return self.source.alternatingHighlight(cmd, args)
5979	5991	else:
5980	5992	return None
5981	5993	@staticmethod
5982		def start_synopsis(args):
	5994	def startSynopsis(args):
5983	5995	"Are we looking at a start odf synopsis?"
5984	5996	# Must accept "SYNOPSIS" but reject "SYNOPSIS AND DESCRIPTION",
5985	5997	# otherwise xdr(3) abd related pages will be misparsed.
5986		return list(filter(synopsis_label.search, args)) \
5987		and not list(filter(description_label.search, args))
5988		def end_synopsis(self):
5989		self.source.sectionhooks.remove(self.end_synopsis)
5990		self.source.flush_transplant()
	5998	return list(filter(synopsisLabel.search, args)) \
	5999	and not list(filter(descriptionLabel.search, args))
	6000	def endSynopsis(self):
	6001	self.source.sectionhooks.remove(self.endSynopsis)
	6002	self.source.flushTransplant()
5991	6003	self.source.unignore("Ve") # For Perl generated man pages
5992	6004	self.source.unignore("Vb") # For Perl generated man pages
5993	6005	self.source.unignore("Ip") # For Perl generated man pages
5994	6006	self.source.unignore("HP")
5995	6007	self.source.unignore("RS")
5996	6008	self.source.unignore("RE")
5997		def interpret(self, dummy_line, tokens, dummy_caller):
	6009	def interpret(self, dummyLine, tokens, dummyCaller):
5998	6010	cmd = tokens[0][1:]
5999	6011	args = tokens[1:]
6000	6012	# Highlighting
6001		highlighted = self.fold_highlights(cmd, args)
	6013	highlighted = self.foldHighlights(cmd, args)
6002	6014	if highlighted:
6003	6015	self.source.pushline(highlighted)
6004	6016	# Sectioning
6005	6017	elif cmd in ("blank", "P","PP","LP","HP") or (cmd=="IP" and (not args or not args[0])):
6006		if self.source.body_section():
	6018	if self.source.bodySection():
6007	6019	self.source.paragraph()
6008		elif self.source.in_synopsis():
	6020	elif self.source.inSynopsis():
6009	6021	self.source.emit("<sbr/>")
6010	6022	return True
6011	6023	elif cmd == "SH":

6016	6028	# apparently as a half-assed way to resume paragraphing after
6017	6029	# a list.
6018	6030	elif args[0] == "":
6019		self.source.need_paragraph()
	6031	self.source.needParagraph()
6020	6032	return True
6021	6033	# Handle nasty perversity in cvsversion.1 that might be repeated
6022	6034	elif args[0].find("--") > -1:

6027	6039	# Skip blank lines and paragraph commands
6028	6040	while True:
6029	6041	line = self.source.popline()
6030		# Can't use paragraph_break() here lest we skip .TP or .IP
	6042	# Can't use paragraphBreak() here lest we skip .TP or .IP
6031	6043	if line and not line[:3] in (TroffInterpreter.ctrl + "PP", TroffInterpreter.ctrl + "LP", TroffInterpreter.ctrl + "P"):
6032	6044	self.source.pushline(line)
6033	6045	break

6035	6047	# Now do processing that is specific to the section type.
6036	6048	# The self.source.synopsis check avoids croaking on CrtImgType(3)
6037	6049	# and other pages that use NAME as a body section name
6038		if name_synonyms.match(deemphasize(args[0])) and len(args)==1 and not self.source.synopsis:
	6050	if nameSynonyms.match(deemphasize(args[0])) and len(args)==1 and not self.source.synopsis:
6039	6051	self.source.sectname = "NAME"
6040		if namesection_verbosity in self.verbose:
	6052	if namesectionVerbosity in self.verbose:
6041	6053	self.source.notify("I see a name section")
6042		self.have_name = True
	6054	self.haveName = True
6043	6055	namesects = [""]
6044	6056	self.source.ignore("nf")
6045	6057	self.source.ignore("fi")

6049	6061	while True:
6050	6062	line = self.source.popline()
6051	6063	# Here's how we exit processing name sections
6052		if line is None or self.source.section_break(line):
	6064	if line is None or self.source.sectionBreak(line):
6053	6065	self.source.pushline(line)
6054	6066	break
6055	6067	# Cope with man pages generated by Texinfo that have a

6059	6071	self.source.pushline(line)
6060	6072	break
6061	6073	# Discard other blank lines and comments
6062		if not line or is_comment(line):
	6074	if not line or isComment(line):
6063	6075	continue
6064	6076	# Discard lines consisting only of a command leader
6065	6077	# (as in groff_mdoc(7)).
6066	6078	if line == TroffInterpreter.ctrl:
6067	6079	continue
6068	6080	# Maybe we ought to generate something here?
6069		if match_command(line, "IX"):
	6081	if matchCommand(line, "IX"):
6070	6082	continue
6071	6083	# Cope with some Pod2Man brain-death. It issues lines like
6072	6084	# .IP "\fBfoo\fR \- foo the bar" 4

6075	6087	if m:
6076	6088	line = m.group(1)
6077	6089	# Cope with .TP in name sections
6078		if match_command(line, "TP"):
	6090	if matchCommand(line, "TP"):
6079	6091	continue
6080		if is_command(line) and self.source.ignorable(line):
	6092	if isCommand(line) and self.source.ignorable(line):
6081	6093	continue
6082	6094	# Dash on a line means we start a new namediv,
6083	6095	# providing some previous line has started one.
6084		if namesection_verbosity in self.verbose:
	6096	if namesectionVerbosity in self.verbose:
6085	6097	self.source.notify("Before section test: %s\n" % repr(line))
6086	6098	if r"\-" in line and r"\-" in namesects[-1] and not namesects[-1].endswith(r"\-"):
6087		if namesection_verbosity in self.verbose:
	6099	if namesectionVerbosity in self.verbose:
6088	6100	self.source.notify("New name section")
6089	6101	namesects.append("")
6090	6102	# So does a break or paragraphing command
6091		if line.startswith(".br") or self.source.paragraph_break(line):
	6103	if line.startswith(".br") or self.source.paragraphBreak(line):
6092	6104	namesects.append("")
6093	6105	continue
6094	6106	# Some selinux pages require this:

6100	6112	break
6101	6113	# All other commands have to be ignored;
6102	6114	# this is necessary to throw out Pod2Man generated crud
6103		if is_command(line):
	6115	if isCommand(line):
6104	6116	continue
6105	6117	# Finally, something we can append to the current namesect
6106	6118	namesects[-1] += " " + line
6107		if namesection_verbosity in self.verbose:
	6119	if namesectionVerbosity in self.verbose:
6108	6120	self.source.notify("Assembled name sections: %s\n" % repr(namesects))
6109	6121	self.source.unignore("nf")
6110	6122	self.source.unignore("fi")

6119	6131	# we don't have enough marker information to parse
6120	6132	# it, so just barf.
6121	6133	try:
6122		(name, description) = parse_name_section(namesect)
	6134	(name, description) = parseNameSection(namesect)
6123	6135	except (TypeError, ValueError):
6124	6136	self.source.error("ill-formed NAME section '%s' in %s, giving up." % (namesect, self.source.file))
6125	6137	return
6126	6138	self.source.emit("<refnamediv>")
6127	6139	for nid in [x.strip() for x in name.split(",")]:
6128		nid = troff_highlight_stripper.sub("", nid)
	6140	nid = troffHighlightStripper.sub("", nid)
6129	6141	self.refnames[nid] = True
6130	6142	self.source.emit("<refname>%s</refname>" % nid)
6131	6143	self.source.emit("<refpurpose>%s</refpurpose>"%description)
6132	6144	self.source.emit("</refnamediv>")
6133		elif ManInterpreter.start_synopsis(args) and not self.source.synopsis:
6134		self.source.end_paragraph()
	6145	elif ManInterpreter.startSynopsis(args) and not self.source.synopsis:
	6146	self.source.endParagraph()
6135	6147	self.source.sectname = "SYNOPSIS"
6136	6148	self.source.ignore("RS")
6137	6149	self.source.ignore("RE")

6139	6151	self.source.ignore("Ve") # For Perl generated man pages
6140	6152	self.source.ignore("Vb") # For Perl generated man pages
6141	6153	self.source.ignore("Ip") # For Perl generated man pages
6142		self.source.sectionhooks.append(self.end_synopsis)
6143		try_synopsis = self.volnum != "3pm" and self.manual.find("Perl") == -1 or self.msrc.find("perl ") == -1
	6154	self.source.sectionhooks.append(self.endSynopsis)
	6155	trySynopsis = self.volnum != "3pm" and self.manual.find("Perl") == -1 or self.msrc.find("perl ") == -1
6144	6156	self.source.synopsis = DisplayParser(self.source,
6145		try_synopsis,
	6157	trySynopsis,
6146	6158	False,
6147	6159	self.refnames)
6148		elif not self.source.synopsis and self.source.find(synopsis_header):
6149		if section_verbosity in self.source.verbose:
	6160	elif not self.source.synopsis and self.source.find(synopsisHeader):
	6161	if sectionVerbosity in self.source.verbose:
6150	6162	self.source.notify("transplanting section...")
6151	6163	self.source.diversion = self.source.transplant
6152		self.source.push_section(1, " ".join(args))
6153		else:
6154		self.source.declare_body_start()
6155		self.source.push_section(1, " ".join(args))
	6164	self.source.pushSection(1, " ".join(args))
	6165	else:
	6166	self.source.declareBodyStart()
	6167	self.source.pushSection(1, " ".join(args))
6156	6168	elif cmd == "SS":
6157	6169	self.source.diversion = self.source.output
6158	6170	if not args:
6159	6171	args = self.source.popline().split()
6160		if self.source.body_section():
	6172	if self.source.bodySection():
6161	6173	# Normally SS sections are at depth 2,
6162	6174	# but there are exceptions...
6163	6175	if not self.source.nonblanks:

6167	6179	else:
6168	6180	newdepth = 2
6169	6181	# Now that we've calculated the new depth...
6170		self.source.push_section(newdepth, " ".join(args).strip())
6171		elif args[0] in qt_headers:
	6182	self.source.pushSection(newdepth, " ".join(args).strip())
	6183	elif args[0] in qtHeaders:
6172	6184	self.source.pushline(args[0])
6173	6185	else:
6174	6186	# In case the Synopsis section contains a subsection,
6175	6187	# as in cph.1, we want to start a new first level section.
6176		self.source.push_section(1, " ".join(args))
	6188	self.source.pushSection(1, " ".join(args))
6177	6189	elif cmd == "TH":
6178	6190	args = args[:5]
6179	6191	args += (5 - len(args)) * [""]
6180	6192	(title, self.volnum, date, self.msrc, self.manual) = args
6181		self.source.in_preamble = False
6182		if io_verbosity in self.source.verbose:
	6193	self.source.inPreamble = False
	6194	if ioVerbosity in self.source.verbose:
6183	6195	self.source.notify("exiting preamble")
6184	6196	# The .TH fields are often abused. Check that the date at
6185	6197	# least has a number in it; if not, assume the date field was

6207	6219	# Lists
6208	6220	elif cmd == "IP":
6209	6221	# Ignore this if in a Synopsis section.
6210		if self.source.in_synopsis():
	6222	if self.source.inSynopsis():
6211	6223	self.source.pushline(" ".join(args))
6212	6224	else:
6213		self.source.end_paragraph(label=cmd)
	6225	self.source.endParagraph(label=cmd)
6214	6226	# Discard second argument of IP tag
6215	6227	args = args[:1]
6216	6228	# Some tags can turn into an ItemizedList. Give

6219	6231	# perlhack(1) and gcc(1) are pages where this actually matters.
6220	6232	bullet = None
6221	6233	if len(args):
6222		bullet = ip_tag_mapping.get(args[0])
	6234	bullet = ipTagMapping.get(args[0])
6223	6235	if bullet:
6224		self.source.emit_itemizedlist("IP+"+bullet, bullet)
	6236	self.source.emitItemizedlist("IP+"+bullet, bullet)
6225	6237	# Otherwise, emit a variable list
6226	6238	else:
6227		self.source.emit_variablelist(cmd, " ".join(args))
	6239	self.source.emitVariablelist(cmd, " ".join(args))
6228	6240	elif cmd == "TP":
6229	6241	# Ignore this if in a Synopsis section.
6230		if self.source.in_synopsis():
	6242	if self.source.inSynopsis():
6231	6243	self.source.pushline(" ".join(args))
6232	6244	elif blankline.match(self.source.peekline()):
6233	6245	pass # Common malformation at end of lists
6234		elif self.source.paragraph_break(self.source.peekline()):
	6246	elif self.source.paragraphBreak(self.source.peekline()):
6235	6247	pass # Another common malformation at end of lists
6236	6248	else:
6237		self.source.end_paragraph(label=cmd)
	6249	self.source.endParagraph(label=cmd)
6238	6250	# Can't process this until one more text line has been emitted
6239		self.source.trap_emit(".TPINTERNAL ")
	6251	self.source.trapEmit(".TPINTERNAL ")
6240	6252	elif cmd == "TPINTERNAL":
6241		self.source.emit_variablelist("TP", " ".join(args))
	6253	self.source.emitVariablelist("TP", " ".join(args))
6242	6254	# Relative indent changes
6243	6255	elif cmd == "RS":
6244	6256	# Check for no-ops generated by pod2man.

6254	6266	self.source.pushlist("RS")
6255	6267	return True
6256	6268	# No markup for .nf+.RS, too, but note it on the stack
6257		elif self.source.last_tag("<literallayout"):
	6269	elif self.source.lastTag("<literallayout"):
6258	6270	self.source.pushlist("RS")
6259	6271	return True
6260	6272	# If we're in list content, nest the list a level deeper
6261		elif self.source.stash_indents:
	6273	elif self.source.stashIndents:
6262	6274	if nextl.startswith(".TP") or nextl.startswith(".IP"):
6263	6275	self.source.pushlist("RS", None)
6264	6276	else:
6265		self.source.begin_block("blockquote", remap='RS')
	6277	self.source.beginBlock("blockquote", remap='RS')
6266	6278	self.source.pushlist("RS", "blockquote")
6267		self.source.need_paragraph()
	6279	self.source.needParagraph()
6268	6280	return True
6269	6281	# Next check for single-line .RS/.RE blocks.
6270	6282	# This will fail if the line has a highlight.
6271		elif not is_command(self.source.peekline()):
	6283	elif not isCommand(self.source.peekline()):
6272	6284	text = self.source.popline()
6273	6285	if self.source.peekline() == TroffInterpreter.ctrl + "RE":
6274	6286	self.source.popline()
6275		self.source.begin_block("literallayout", remap='RS')
	6287	self.source.beginBlock("literallayout", remap='RS')
6276	6288	self.source.emit(text)
6277		self.source.end_block("literallayout", remap='RE')
	6289	self.source.endBlock("literallayout", remap='RE')
6278	6290	return True
6279	6291	else:
6280	6292	self.source.pushline(text)
6281	6293	# Fall through
6282	6294	# None of the special cases fired. Punt; treat as blockquote
6283	6295	self.source.pushlist("RS", "blockquote")
6284		self.source.begin_block("blockquote", remap='RS')
	6296	self.source.beginBlock("blockquote", remap='RS')
6285	6297	elif cmd == "RE":
6286	6298	self.source.poplist("RS", remap="RE")
6287		self.source.need_paragraph()
	6299	self.source.needParagraph()
6288	6300	# FSF extension macros
6289	6301	elif cmd == "UE": # End of link text
6290		if self.source.body_section():
6291		self.source.pushline(self.stash_linkender)
	6302	if self.source.bodySection():
	6303	self.source.pushline(self.stashLinkender)
6292	6304	elif cmd == "UN": # Anchor for a hyperlink target
6293	6305	if not args:
6294	6306	self.source.error("UN macro requires an argument")
6295		elif self.source.body_section():
	6307	elif self.source.bodySection():
6296	6308	if self.source.peekline()[:3] in (TroffInterpreter.ctrl + "SH", TroffInterpreter.ctrl + "SS"):
6297		self.source.stash_id = args[0]
	6309	self.source.stashId = args[0]
6298	6310	else:
6299	6311	if self.source.docbook5:
6300		self.source.pushline("<anchor xml:id='%s'/>" % self.source.make_id_from_title(tokens[1]))
	6312	self.source.pushline("<anchor xml:id='%s'/>" % self.source.makeIdFromTitle(tokens[1]))
6301	6313	else:
6302		self.source.pushline("<anchor id='%s'/>" % self.source.make_id_from_title(tokens[1]))
	6314	self.source.pushline("<anchor id='%s'/>" % self.source.makeIdFromTitle(tokens[1]))
6303	6315	elif cmd == "UR": # Start of link text
6304	6316	if not args:
6305	6317	self.source.error("UR macro requires an argument")
6306		elif self.source.body_section():
	6318	elif self.source.bodySection():
6307	6319	if args[0][0] == "#":
6308		self.source.pushline("<link linkend='%s'>" % self.source.id_from_title(args[0][1:]))
6309		self.stash_linkender = "</link>"
	6320	self.source.pushline("<link linkend='%s'>" % self.source.idFromTitle(args[0][1:]))
	6321	self.stashLinkender = "</link>"
6310	6322	else:
6311	6323	self.source.pushline("<ulink url='%s'>" % args[0])
6312		self.stash_linkender = "</ulink>"
6313		self.hack_urls = False
	6324	self.stashLinkender = "</ulink>"
	6325	self.hackUrls = False
6314	6326	elif cmd == "ME": # End of link text
6315		if self.source.body_section():
6316		self.source.pushline(self.stash_linkender)
	6327	if self.source.bodySection():
	6328	self.source.pushline(self.stashLinkender)
6317	6329	elif cmd == "MT": # Start of mail address text
6318	6330	if not args:
6319	6331	self.source.error("MT macro requires an argument")
6320		elif self.source.body_section():
	6332	elif self.source.bodySection():
6321	6333	self.source.pushline("<ulink url='mailto:%s'>" % args[0])
6322		self.stash_linkender = "</ulink>"
	6334	self.stashLinkender = "</ulink>"
6323	6335	# Indexing
6324	6336	elif cmd == "IX":
6325		if self.source.body_section() and len(tokens) > 1:
	6337	if self.source.bodySection() and len(tokens) > 1:
6326	6338	# Discard Perl section indicators
6327	6339	if tokens[1] in ("Name","Title","Header","Subsection","Item"):
6328	6340	tokens = tokens[2:]

6336	6348	elif cmd == "CT":
6337	6349	self.source.pushline("<CTRL/%s<" % args[0])
6338	6350	elif cmd == "Ds":
6339		if not self.source.in_synopsis():
6340		self.source.begin_block("literallayout", remap="Ds")
	6351	if not self.source.inSynopsis():
	6352	self.source.beginBlock("literallayout", remap="Ds")
6341	6353	elif cmd == "De":
6342		if not self.source.in_synopsis():
6343		self.source.end_block("literallayout", remap="De")
	6354	if not self.source.inSynopsis():
	6355	self.source.endBlock("literallayout", remap="De")
6344	6356	elif cmd == "EX" and "EX" not in self.source.troff.macros:
6345		if not self.source.in_synopsis():
6346		self.source.begin_block("literallayout", remap="EX")
	6357	if not self.source.inSynopsis():
	6358	self.source.beginBlock("literallayout", remap="EX")
6347	6359	elif cmd == "EE" and "EE" not in self.source.troff.macros:
6348		if not self.source.in_synopsis():
6349		self.source.end_block("literallayout", remap="EE")
	6360	if not self.source.inSynopsis():
	6361	self.source.endBlock("literallayout", remap="EE")
6350	6362	elif cmd == "NT" and "NT" not in self.source.troff.macros:
6351		self.source.begin_block("note", remap="NT")
	6363	self.source.beginBlock("note", remap="NT")
6352	6364	elif cmd == "NE" and "NE" not in self.source.troff.macros:
6353		self.source.end_block("note", remap="NE")
	6365	self.source.endBlock("note", remap="NE")
6354	6366	elif cmd == "RN":
6355	6367	self.source.pushline("<keycap>RETURN</keycap>")
6356	6368	elif cmd == "PN":

6396	6408	self.source.popline()
6397	6409	self.source.popline()
6398	6410	elif self.source.find("DE"):
6399		self.source.begin_block("literallayout", remap='DS')
6400		self.seen_DS = True
	6411	self.source.beginBlock("literallayout", remap='DS')
	6412	self.seenDS = True
6401	6413	elif cmd == "DE":
6402		if self.seen_DS:
6403		self.source.end_block("literallayout", remap='DE')
	6414	if self.seenDS:
	6415	self.source.endBlock("literallayout", remap='DE')
6404	6416	else:
6405	6417	return False
6406	6418	# Groff extensions

6413	6425	pass
6414	6426	# Use our reductions as fallbacks
6415	6427	elif cmd in ManInterpreter.reductions:
6416		replace_with = ManInterpreter.reductions[cmd]
6417		self.source.pushline(TroffInterpreter.ctrl + replace_with + " " + quoteargs(args))
	6428	replaceWith = ManInterpreter.reductions[cmd]
	6429	self.source.pushline(TroffInterpreter.ctrl + replaceWith + " " + quoteargs(args))
6418	6430	# Recover from some common typos
6419	6431	elif cmd[0] in "BIR" and cmd[1].islower() and len(cmd) > 3:
6420	6432	newtokens = [TroffInterpreter.ctrl + cmd[:1]] + [cmd[1:]] + tokens[1:]

6442	6454	# groff(1) accepts it, groffer(1) complains and accepts it.
6443	6455	text = text.replace("\n.I ", "\n.I ")
6444	6456	# Some versions of db2man.xsl have a bad bug. Work around it.
6445		text = re_compile("(\\.TH.*)\\.SH NAME").sub(r"\1\n.SH NAME", text)
	6457	text = reCompile("(\\.TH.*)\\.SH NAME").sub(r"\1\n.SH NAME", text)
6446	6458	# Some versions of netpbm makeman have a bad bug. Work around it.
6447		text = re_compile("(\\.UN.*)\\.SH").sub(r"\1\n.SH", text)
	6459	text = reCompile("(\\.UN.*)\\.SH").sub(r"\1\n.SH", text)
6448	6460	# Reverse a transformation that db2man does when translating <note>.
6449	6461	# FIXME: turn the following paragraph into a <note>.
6450	6462	text = text.replace(".it 1 an-trap\n.nr an-no-space-flag 1\n.nr an-break-flag 1\n.br\n", "")
6451	6463	# Brain-damage emitted often by Pod2Man, occasionally by humans
6452		text = re_compile("\\.PD.*\n+(.S[hHsS])").sub("\n\n\\1", text)
6453		text = re_compile("\\.RS.*\n+(.S[hHsS])").sub("\n\n\\1", text)
	6464	text = reCompile("\\.PD.*\n+(.S[hHsS])").sub("\n\n\\1", text)
	6465	text = reCompile("\\.RS.*\n+(.S[hHsS])").sub("\n\n\\1", text)
6454	6466	# Cheating way to avoid some annoying warnings on function pages
6455		if "NAME\nfeature_test_macros" not in text:
6456		text = re_compile("(\\.in [+-][0-9]*n)?(\nFeature Test)", re.I).sub("\n.SH FEATURE TEST\n\n\\2", text)
	6467	if "NAME\nfeatureTest_macros" not in text:
	6468	text = reCompile("(\\.in [+-][0-9]*n)?(\nFeature Test)", re.I).sub("\n.SH FEATURE TEST\n\n\\2", text)
6457	6469	return text
6458	6470	def postprocess(self, text):
6459	6471	# Page might be generated crap with no sections, which can't be lifted.
6460	6472	# This happens with some pod2man pages.
6461		if not self.source.section_count:
	6473	if not self.source.sectionCount:
6462	6474	if self.source.verbose:
6463	6475	self.source.warning("Pod2Man page with no body.")
6464	6476	text = text.replace("</refentry>", "") + empty
6465		elif not self.have_name:
	6477	elif not self.haveName:
6466	6478	raise LiftException(self.source, "no name section in %s, can't be lifted." % self.source.file)
6467	6479	# If there was no explicit URL markup, process implicit ones
6468		if self.hack_urls and not self.source.is_active("mwww") and not xmlns_re.search(text):
6469		text = url_re.sub(r"<ulink url='\g<url>'>\g<url></ulink>", text)
	6480	if self.hackUrls and not self.source.isActive("mwww") and not xmlnsRe.search(text):
	6481	text = urlRe.sub(r"<ulink url='\g<url>'>\g<url></ulink>", text)
6470	6482	foundit = text.rfind("SEE ALSO")
6471	6483	if foundit > -1:
6472	6484	before = text[:foundit]

6491	6503	name = "pod2man"
6492	6504	exclusive = False
6493	6505	toptag = "refentry"
6494		immutable_set = set(["Sp","Ip","Sh","Vb","Ve",])
6495		ignore_set = set([])
6496		complain_set = set([])
6497		parabreak_set = set(["Sp", "Ip",])
6498		sectionbreak_set = set(["Sh",])
6499		listbreak_set = set(["Sh",])
6500		scoped_set = set([])
	6506	immutableSet = set(["Sp","Ip","Sh","Vb","Ve",])
	6507	ignoreSet = set([])
	6508	complainSet = set([])
	6509	parabreakSet = set(["Sp", "Ip",])
	6510	sectionbreakSet = set(["Sh",])
	6511	listbreakSet = set(["Sh",])
	6512	scopedSet = set([])
6501	6513	translations = {
6502	6514	"\\*" : [
6503	6515	(r'\*`', "´"),

6522	6534	def __init__(self, source, verbose=0):
6523	6535	self.source = source
6524	6536	self.verbose = verbose
6525		def interpret(self, dummy_line, tokens, dummy_caller):
	6537	def interpret(self, dummyLine, tokens, dummyCaller):
6526	6538	cmd = tokens[0][1:]
6527	6539	args = tokens[1:]
6528	6540	# Sectioning
6529	6541	if cmd == "Sp" or cmd=="Ip" and (not args or not args[0] or args[0][0] in string.digits):
6530		if self.source.body_section():
	6542	if self.source.bodySection():
6531	6543	self.source.paragraph()
6532		elif self.source.in_synopsis():
	6544	elif self.source.inSynopsis():
6533	6545	self.source.emit("<sbr/>")
6534	6546	elif cmd == "Sh":
6535	6547	self.source.pushline(quoteargs([TroffInterpreter.ctrl + "SS"] + args))
6536	6548	elif cmd == "Vb":
6537		if self.source.body_section():
6538		self.source.begin_block("literallayout", remap="Vb")
	6549	if self.source.bodySection():
	6550	self.source.beginBlock("literallayout", remap="Vb")
6539	6551	elif cmd == "Ve":
6540		if self.source.body_section():
6541		self.source.end_block("literallayout", remap="Ve")
	6552	if self.source.bodySection():
	6553	self.source.endBlock("literallayout", remap="Ve")
6542	6554	elif cmd == "Ip":
6543	6555	if tokens[1]:
6544		self.source.emit_variablelist("Ip", tokens[1])
6545		else:
6546		self.source.emit_itemizedlist("Ip", 'bullet')
	6556	self.source.emitVariablelist("Ip", tokens[1])
	6557	else:
	6558	self.source.emitItemizedlist("Ip", 'bullet')
6547	6559	else:
6548	6560	return False
6549	6561	return True

6555	6567	# special characters \(--, \(PI, \(L", \(R", \(C+, \(C',
6556	6568	# and \*(`.S
6557	6569	lines = text.split("\n")
6558		starter = re_compile(r"\.[ST]H")
	6570	starter = reCompile(r"\.[ST]H")
6559	6571	while not starter.match(lines[0]):
6560	6572	lines.pop(0)
6561	6573	self.source.lineno += 1

6575	6587	name = "reStructuredText"
6576	6588	exclusive = False
6577	6589	toptag = "refentry"
6578		immutable_set = set([])
6579		ignore_set = set([])
6580		complain_set = set([])
6581		parabreak_set = set([])
6582		sectionbreak_set = set([])
6583		listbreak_set = set([])
6584		scoped_set = set([])
	6590	immutableSet = set([])
	6591	ignoreSet = set([])
	6592	complainSet = set([])
	6593	parabreakSet = set([])
	6594	sectionbreakSet = set([])
	6595	listbreakSet = set([])
	6596	scopedSet = set([])
6585	6597	translations = {}
6586	6598	requires = [ManInterpreter]
6587	6599	def __init__(self, source, verbose=0):
6588	6600	self.source = source
6589	6601	self.verbose = verbose
6590		def interpret(self, dummy_line, tokens, dummy_caller):
	6602	def interpret(self, dummyLine, tokens, dummyCaller):
6591	6603	cmd = tokens[0][1:]
6592	6604	# Ignore indent commands for now. It's possible we might want
6593	6605	# to map them to .RS/.RE later (and put INDENT in the scoped

6605	6617	# can emulate those easily enough.
6606	6618	lines = text.split("\n")
6607	6619	savelines = []
6608		cookie = re_compile(r"nr rst2man-indent-level 0")
6609		starter = re_compile(r"\.SH")
	6620	cookie = reCompile(r"nr rst2man-indent-level 0")
	6621	starter = reCompile(r"\.SH")
6610	6622	while lines and not cookie.search(lines[0]):
6611	6623	savelines.append(lines.pop(0))
6612	6624	if savelines:

6628	6640	name = "DocBook"
6629	6641	exclusive = False
6630	6642	toptag = "refentry"
6631		immutable_set = set([])
6632		ignore_set = set([])
6633		complain_set = set([])
6634		parabreak_set = set([])
6635		sectionbreak_set = set([])
6636		listbreak_set = set([])
6637		scoped_set = set([])
	6643	immutableSet = set([])
	6644	ignoreSet = set([])
	6645	complainSet = set([])
	6646	parabreakSet = set([])
	6647	sectionbreakSet = set([])
	6648	listbreakSet = set([])
	6649	scopedSet = set([])
6638	6650	translations = {}
6639	6651	requires = [ManInterpreter]
6640	6652	def __init__(self, source, verbose=0):
6641	6653	self.source = source
6642	6654	self.verbose = verbose
6643		def interpret(self, dummy, tokens, dummy_caller):
	6655	def interpret(self, dummy, tokens, dummyCaller):
6644	6656	cmd = tokens[0][1:]
6645	6657	# The generated inclusion defines some new commands.
6646	6658	# We might want or need to interpret these sometime.

6658	6670	# Comment out macro definitions in the stylesheet-generated inclusion.
6659	6671	# Better than removing them, because line numbers won't be perturbed.
6660	6672	lines = text.split("\n")
6661		th = re_compile(r"\.TH")
6662		starter = re_compile(r"(MAIN CONTENT STARTS HERE\|^\.SH)")
	6673	th = reCompile(r"\.TH")
	6674	starter = reCompile(r"(MAIN CONTENT STARTS HERE\|^\.SH)")
6663	6675	i = 0
6664	6676	while not th.search(lines[i]):
6665	6677	i += 1

6677	6689	name = "foojzs"
6678	6690	exclusive = False
6679	6691	toptag = "refentry"
6680		immutable_set = set([])
6681		ignore_set = set([])
6682		complain_set = set([])
6683		parabreak_set = set([])
6684		sectionbreak_set = set([])
6685		listbreak_set = set([])
6686		scoped_set = set([])
	6692	immutableSet = set([])
	6693	ignoreSet = set([])
	6694	complainSet = set([])
	6695	parabreakSet = set([])
	6696	sectionbreakSet = set([])
	6697	listbreakSet = set([])
	6698	scopedSet = set([])
6687	6699	translations = {}
6688	6700	requires = [ManInterpreter]
6689	6701	def __init__(self, source, verbose=0):
6690	6702	self.source = source
6691	6703	self.verbose = verbose
6692		def interpret(self, dummy, dummy_tokens, dummy_caller):
	6704	def interpret(self, dummy, dummyTokens, dummyCaller):
6693	6705	return False
6694	6706	def preprocess(self, text):
6695	6707	# Replace macro definitions in the header with blank lines.
6696	6708	# Better than removing them, because line numbers won't be perturbed.
6697	6709	lines = text.split("\n")
6698		th = re_compile(r"\.TH")
6699		starter = re_compile(r"\.SH")
	6710	th = reCompile(r"\.TH")
	6711	starter = reCompile(r"\.SH")
6700	6712	i = 0
6701	6713	while not th.search(lines[i]):
6702	6714	i += 1

6716	6728	toptag = "refentry"
6717	6729	# Some of the X local macros (Ds, De, NT, NE, PN) are Ultrix extensions
6718	6730	# already handled by ManInterpreter.
6719		immutable_set = set(["FD","FN","IN","ZN","hN"])
6720		ignore_set = set(["IN"])
6721		complain_set = set([])
6722		parabreak_set = set([])
6723		sectionbreak_set = set([])
6724		listbreak_set = set([])
6725		scoped_set = set([])
	6731	immutableSet = set(["FD","FN","IN","ZN","hN"])
	6732	ignoreSet = set(["IN"])
	6733	complainSet = set([])
	6734	parabreakSet = set([])
	6735	sectionbreakSet = set([])
	6736	listbreakSet = set([])
	6737	scopedSet = set([])
6726	6738	translations = {}
6727	6739	reductions = {}
6728	6740	requires = [ManInterpreter]
6729	6741	def __init__(self, source, verbose=0):
6730	6742	self.source = source
6731	6743	self.verbose = verbose
6732		self.source.troff.entities_from_strings = True
6733		def interpret(self, dummy, tokens, dummy_caller):
	6744	self.source.troff.entitiesFromStrings = True
	6745	def interpret(self, dummy, tokens, dummyCaller):
6734	6746	cmd = tokens[0][1:]
6735	6747	args = tokens[1:]
6736	6748	# .Ds and .De are already handled by ManInterpreter
6737	6749	if cmd == "FD":
6738	6750	# This wants to be a keep, but DocBook can't express that.
6739		self.source.begin_block("literallayout", remap="FD")
	6751	self.source.beginBlock("literallayout", remap="FD")
6740	6752	elif cmd == "FN":
6741		self.source.end_block("literallayout", remap="FN")
	6753	self.source.endBlock("literallayout", remap="FN")
6742	6754	elif cmd == "ZN":
6743	6755	self.source.pushline("<symbol role='ZN'>%s</symbol>%s" % (args[0], "".join(args[1:])))
6744	6756	elif cmd == "Pn":

6746	6758	elif cmd == "hN":
6747	6759	self.source.pushline("<symbol><%s></symbol>%s" % (args[0], "".join(args[1:])))
6748	6760	elif cmd == "C{":
6749		self.source.begin_block("programlisting", remap="C{")
	6761	self.source.beginBlock("programlisting", remap="C{")
6750	6762	elif cmd == "C}":
6751		self.source.end_block("programlisting", remap="C}")
	6763	self.source.endBlock("programlisting", remap="C}")
6752	6764	elif cmd == "NT":
6753		self.source.begin_block("note", remap="NT")
	6765	self.source.beginBlock("note", remap="NT")
6754	6766	elif cmd == "NE":
6755		self.source.end_block("note", remap="NE")
	6767	self.source.endBlock("note", remap="NE")
6756	6768	else:
6757	6769	return False
6758	6770	return True

6781	6793	name = "AST/CERN"
6782	6794	exclusive = False
6783	6795	toptag = "refentry"
6784		immutable_set = set(["FN", "DS", "DE"])
6785		ignore_set = set(["SP"])
6786		complain_set = set([])
6787		parabreak_set = set([])
6788		sectionbreak_set = set([])
6789		listbreak_set = set([])
6790		scoped_set = set([])
	6796	immutableSet = set(["FN", "DS", "DE"])
	6797	ignoreSet = set(["SP"])
	6798	complainSet = set([])
	6799	parabreakSet = set([])
	6800	sectionbreakSet = set([])
	6801	listbreakSet = set([])
	6802	scopedSet = set([])
6791	6803	translations = {}
6792	6804	reductions = {}
6793	6805	requires = [ManInterpreter]
6794	6806	def __init__(self, source, verbose=0):
6795	6807	self.source = source
6796	6808	self.verbose = verbose
6797		self.source.troff.entities_from_strings = True
	6809	self.source.troff.entitiesFromStrings = True
6798	6810	self.headerset = set([])
6799	6811	assert self.source.interpreters[0].name == "man"
6800		def interpret(self, dummy, tokens, dummy_caller):
	6812	def interpret(self, dummy, tokens, dummyCaller):
6801	6813	cmd = tokens[0][1:]
6802	6814	args = tokens[1:]
6803	6815	if cmd in ("H0", "H1", "H2", "H3", "H4"):
6804	6816	self.headerset.add(cmd)
6805		self.source.push_section(len(self.headerset)+1, " ".join(args))
	6817	self.source.pushSection(len(self.headerset)+1, " ".join(args))
6806	6818	elif cmd == "OP":
6807	6819	# Here's what we're emulating:
6808	6820	#

6864	6876	options.append("--" + args[1])
6865	6877	if args[3] != '-':
6866	6878	options[1] += "=<emphasis>%s</emphasis>" % args[3]
6867		self.source.emit_variablelist("OP", options)
	6879	self.source.emitVariablelist("OP", options)
6868	6880	elif cmd in ("SH", "SS"):
6869	6881	self.headerset = set([])
6870		self.source.interpreters[1].interpret(dummy, tokens, dummy_caller)
	6882	self.source.interpreters[1].interpret(dummy, tokens, dummyCaller)
6871	6883	else:
6872	6884	return False
6873	6885	return True

6893	6905	name = "tkman"
6894	6906	exclusive = False
6895	6907	toptag = "refentry"
6896		immutable_set = set(["AP","AS","BS","BE","CS","CE",
	6908	immutableSet = set(["AP","AS","BS","BE","CS","CE",
6897	6909	"VS","VE","DS","DE","SO","SE","OP",
6898	6910	"UL","^B","QW","PQ"])
6899		ignore_set = set([])
6900		complain_set = set([])
6901		parabreak_set = set(["AP",])
6902		sectionbreak_set = set([])
6903		listbreak_set = set([])
6904		scoped_set = set([])
	6911	ignoreSet = set([])
	6912	complainSet = set([])
	6913	parabreakSet = set(["AP",])
	6914	sectionbreakSet = set([])
	6915	listbreakSet = set([])
	6916	scopedSet = set([])
6905	6917	translations = {}
6906	6918	requires = [ManInterpreter]
6907	6919	def __init__(self, source, verbose=0):
6908	6920	self.source = source
6909	6921	self.verbose = verbose
6910		def interpret(self, dummy, tokens, dummy_caller):
	6922	def interpret(self, dummy, tokens, dummyCaller):
6911	6923	cmd = tokens[0][1:]
6912	6924	args = tokens[1:]
6913	6925	# Documentation for these is taken from the wish.1 header.

6919	6931	# and indent is equivalent to second arg of .IP (shouldn't ever be
6920	6932	# needed; use .AS below instead)
6921	6933	if cmd == "AP":
6922		if not self.source.body_section(): return True
6923		self.source.end_paragraph(label="AP")
	6934	if not self.source.bodySection(): return True
	6935	self.source.endParagraph(label="AP")
6924	6936	self.source.emit("<informaltable>\n<tgroup cols='3'>\n<tbody>\n")
6925	6937	self.source.pushline(quoteargs(tokens))
6926	6938	while self.source.lines:

6929	6941	while len(tokens) < 4:
6930	6942	tokens.append("")
6931	6943	self.source.emit("<row><entry>%s</entry><entry>%s</entry><entry>%s</entry>" % (tokens[1], tokens[2], tokens[3]))
6932		if tokens[1] not in c_declarators:
	6944	if tokens[1] not in cDeclarators:
6933	6945	globalhints.post(tokens[1], "type")
6934		gather_item(self.source, "entry")
	6946	gatherItem(self.source, "entry")
6935	6947	self.source.emit("</row>")
6936		if self.source.section_break(self.source.peekline()):
	6948	if self.source.sectionBreak(self.source.peekline()):
6937	6949	break
6938	6950	self.source.emit("</tbody>\n</tgroup>\n</informaltable>\n")
6939	6951	# .AS ?type? ?name?

6954	6966	# .CS
6955	6967	# Begin code excerpt.
6956	6968	elif cmd == "CS":
6957		self.source.begin_block("programlisting", remap="CS")
	6969	self.source.beginBlock("programlisting", remap="CS")
6958	6970	# .CE
6959	6971	# End code excerpt.
6960	6972	elif cmd == "CE":
6961		self.source.end_block("programlisting", remap="CE")
	6973	self.source.endBlock("programlisting", remap="CE")
6962	6974	# .VS ?version? ?br?
6963	6975	# Begin vertical sidebar, for use in marking newly-changed parts
6964	6976	# of man pages. The first argument is ignored and used for recording

6984	6996	# .SE
6985	6997	# End of list of standard options for a Tk widget.
6986	6998	elif cmd == "SO":
6987		self.source.push_section(1, 'STANDARD OPTIONS')
	6999	self.source.pushSection(1, 'STANDARD OPTIONS')
6988	7000	self.source.pushline("l l l l.")
6989	7001	self.source.TBL(TroffInterpreter.ctrl + "SE")
6990	7002	elif cmd == "OP":

7029	7041	name = "mdoc"
7030	7042	exclusive = True
7031	7043	toptag = "refentry"
7032		immutable_set = set([])
7033		ignore_set = set(["blank", "Bk", "Ek",])
7034		complain_set = set(["Db",])
7035		parabreak_set = set(["Pp",])
7036		sectionbreak_set = set(["Sh", "Ss"])
7037		listbreak_set = set([])
7038		scoped_set = set([])
	7044	immutableSet = set([])
	7045	ignoreSet = set(["blank", "Bk", "Ek",])
	7046	complainSet = set(["Db",])
	7047	parabreakSet = set(["Pp",])
	7048	sectionbreakSet = set(["Sh", "Ss"])
	7049	listbreakSet = set([])
	7050	scopedSet = set([])
7039	7051	translations = {
7040	7052	"\\*" : [
7041	7053	(r"\*q", '"'),

7128	7140	"Qo","Qq","Sc","So","Sq","St","Sx","Sy",
7129	7141	"Ta","Tn","Ux","Xc","Xo",])
7130	7142	# Substitution strings for the St request
7131		st_dict = {
	7143	stDict = {
7132	7144	# ANSI/ISO C
7133	7145	"-ansiC-89": "ANSI X3.159-1989 (ANSI C)",
7134	7146	"-ansiC": "ANSI X3.159-1989 (ANSI C)",

7177	7189	"-iso8601": "ISO 8601",
7178	7190	}
7179	7191
7180		lb_dict = {
	7192	lbDict = {
7181	7193	"libarchive": "Reading and Writing Streaming Archives Library (libarchive, -larchive)",
7182	7194	"libarm": "ARM Architecture Library (libarm, -larm)",
7183	7195	"libarm32": "ARM32 Architecture Library (libarm32, -larm32)",

7270	7282	self.liststack = []
7271	7283	self.bdstack = []
7272	7284	self.itemcount = []
7273		self.suppress_callables = False
	7285	self.suppressCallables = False
7274	7286	self.spacemode = True
7275	7287	self.biblio = []
7276	7288	self.inref = False
7277	7289	self.refnames = {}
7278	7290	self.volnum = None
7279		self.refmeta_flushed = False
7280		self.stash_linkender = None
	7291	self.refmetaFlushed = False
	7292	self.stashLinkender = None
7281	7293	self.rowcount = 0 # Implicit assumption that we never nest tables
7282		def flush_refmeta(self):
7283		if not self.refmeta_flushed:
	7294	def flushRefmeta(self):
	7295	if not self.refmetaFlushed:
7284	7296	self.source.emit("<refmeta>")
7285	7297	self.source.emit("<refentrytitle>%s</refentrytitle>" % self.title[0])
7286	7298	self.source.emit("<manvolnum>%s</manvolnum>"%self.volnum)
7287	7299	self.source.emit("</refmeta>")
7288	7300	self.source.emit("")
7289	7301	if self.source.docbook5:
7290		self.source.emit("<refnamediv xml:id='%s'>" % self.source.make_id_from_title('purpose'))
7291		else:
7292		self.source.emit("<refnamediv id='%s'>" % self.source.make_id_from_title('purpose'))
	7302	self.source.emit("<refnamediv xml:id='%s'>" % self.source.makeIdFromTitle('purpose'))
	7303	else:
	7304	self.source.emit("<refnamediv id='%s'>" % self.source.makeIdFromTitle('purpose'))
7293	7305	self.source.emit("<refname>%s</refname>" % self.name)
7294	7306	self.source.emit("<refpurpose>%s</refpurpose>" % self.desc)
7295	7307	self.source.emit("</refnamediv>")
7296	7308	self.manual = "BSD"
7297		self.refmeta_flushed = True
	7309	self.refmetaFlushed = True
7298	7310	def hasargs(self, cmd, args):
7299	7311	"Check to see if there is an macro argument available."
7300	7312	if not args:

7302	7314	return False
7303	7315	else:
7304	7316	return True
7305		def interpret(self, dummy, tokens, dummy_caller):
	7317	def interpret(self, dummy, tokens, dummyCaller):
7306	7318	tokens[0] = tokens[0][1:]
7307	7319	# First, collect any additional arguments implied by o/c enclosures.
7308	7320	for c in ("A", "B", "D", "O", "P", "Q", "S", "X"):

7346	7358	self.source.emit("is currently in beta test.")
7347	7359	elif command == "Cd":
7348	7360
7349		if self.source.in_synopsis():
	7361	if self.source.inSynopsis():
7350	7362	self.source.pushline(".br")
7351	7363	self.source.pushline(" ".join(args))
7352	7364	self.source.pushline(".br")

7397	7409	self.source.pushline(args[0] + "(")
7398	7410	elif command == "Ft":
7399	7411	if self.hasargs("Ft", args):
7400		if self.source.body_section():
	7412	if self.source.bodySection():
7401	7413	self.source.pushline(string.join(self.encloseargs(args,
7402	7414	"<type remap='Ft'>", "</type>")))
7403	7415	else:

7408	7420	self.source.pushline("The %s() function returns the value 0 if successful; otherwise the value -1 is returned and the global variable errno is set to indicate the error." % args[1])
7409	7421	# Hyperlinks
7410	7422	elif command == "UE":
7411		if not self.stash_linkender:
	7423	if not self.stashLinkender:
7412	7424	self.source.error(".UE with no preceding .UR")
7413	7425	else:
7414		self.source.emit(self.stash_linkender)
	7426	self.source.emit(self.stashLinkender)
7415	7427	elif command == "UN":
7416	7428	if self.source.docbook5:
7417		self.source.pushline("<anchor xml:id='%s'>" % self.source.make_id_from_title(args[0]))
7418		else:
7419		self.source.pushline("<anchor id='%s'>" % self.source.make_id_from_title(args[0]))
	7429	self.source.pushline("<anchor xml:id='%s'>" % self.source.makeIdFromTitle(args[0]))
	7430	else:
	7431	self.source.pushline("<anchor id='%s'>" % self.source.makeIdFromTitle(args[0]))
7420	7432	elif command == "UR":
7421	7433	if args[0][0] != "#":
7422		self.source.pushline("<ulink url='%s'>" % self.source.id_from_title(args[0]))
7423		self.stash_linkender = "</ulink>"
7424		else:
7425		self.source.pushline("<link linkend='%s'>" % self.source.id_from_title(args[0][1:]))
7426		self.stash_linkender = "</link>"
	7434	self.source.pushline("<ulink url='%s'>" % self.source.idFromTitle(args[0]))
	7435	self.stashLinkender = "</ulink>"
	7436	else:
	7437	self.source.pushline("<link linkend='%s'>" % self.source.idFromTitle(args[0][1:]))
	7438	self.stashLinkender = "</link>"
7427	7439	# Structure requests
7428	7440	elif command == "Dd":
7429	7441	if args[0] == "$Mdocdate$":
7430	7442	args.pop(0)
7431	7443	else:
7432		# acpi_fakekey.1 has an ISO date that doesn't conform to mdoc.
	7444	# acpiFakekey.1 has an ISO date that doesn't conform to mdoc.
7433	7445	if re.compile("[0-9]+-[0-9]+-[0-9]+").search(args[0]):
7434	7446	(self.year, self.month, self.day) = args[0].split("-")
7435	7447	# synctex.1 does this - might happen elsewhere

7462	7474	self.source.diversion = self.source.output
7463	7475	if not args:
7464	7476	args = self.source.popline().split()
7465		if name_synonyms.match(args[0]):
	7477	if nameSynonyms.match(args[0]):
7466	7478	self.source.sectname = "Name"
7467	7479	# Copes with lines that are blank or a dot only (groff_mdoc...)
7468		while self.source.peekline() in ("", TroffInterpreter.ctrl, TroffInterpreter.ctrl_nobreak):
	7480	while self.source.peekline() in ("", TroffInterpreter.ctrl, TroffInterpreter.ctrlNobreak):
7469	7481	self.source.popline()
7470	7482	# Kluge -- it turns out that some web pages (like ash.1)
7471	7483	# don't use the mandoc macros. Instead they use a Linux-
7472	7484	# style "NAME - description" header. Test for this.
7473	7485	line = self.source.popline()
7474		if is_command(line):
	7486	if isCommand(line):
7475	7487	# Macros will handle it
7476	7488	self.source.pushline(line)
7477	7489	else:
7478	7490	# Parse it explicitly.
7479		(self.name, self.desc) = parse_name_section(line)
7480		else:
7481		self.flush_refmeta()
7482		if synopsis_label.search(args[0]):
7483		self.source.end_paragraph()
	7491	(self.name, self.desc) = parseNameSection(line)
	7492	else:
	7493	self.flushRefmeta()
	7494	if synopsisLabel.search(args[0]):
	7495	self.source.endParagraph()
7484	7496	self.source.sectname = "Synopsis"
7485	7497	self.source.synopsis = DisplayParser(self.source,
7486	7498	True,
7487	7499	False,
7488	7500	self.refnames)
7489		self.source.sectionhooks.append(self.source.flush_transplant)
	7501	self.source.sectionhooks.append(self.source.flushTransplant)
7490	7502	return True # Declaration macros will do the work
7491		elif not self.source.synopsis and self.source.find(synopsis_header):
7492		if section_verbosity in self.source.verbose:
	7503	elif not self.source.synopsis and self.source.find(synopsisHeader):
	7504	if sectionVerbosity in self.source.verbose:
7493	7505	self.source.notify("transplanting section...")
7494	7506	self.source.diversion = self.source.transplant
7495		self.source.push_section(1, " ".join(args))
	7507	self.source.pushSection(1, " ".join(args))
7496	7508	else:
7497		self.flush_refmeta()
7498		self.source.declare_body_start()
	7509	self.flushRefmeta()
	7510	self.source.declareBodyStart()
7499	7511	# in case someone forgets to close a list (see mktemp.1).
7500	7512	for _ in self.liststack:
7501	7513	self.source.pushline(TroffInterpreter.ctrl + "El")
7502		self.source.push_section(1, " ".join(args))
	7514	self.source.pushSection(1, " ".join(args))
7503	7515	elif command == "Ss":
7504	7516	self.source.diversion = self.source.output
7505	7517	# in case someone forgets to close a list
7506	7518	for _ in self.liststack:
7507	7519	self.source.pushline(TroffInterpreter.ctrl + "El")
7508		self.source.push_section(2, " ".join(args))
	7520	self.source.pushSection(2, " ".join(args))
7509	7521	elif command == "Pp":
7510		if self.source.body_section():
	7522	if self.source.bodySection():
7511	7523	self.source.paragraph()
7512	7524	else:
7513	7525	self.source.emit("<sbr/>")

7530	7542	enclosuretype = "blockquote"
7531	7543	elif '-unfilled' in args:
7532	7544	enclosuretype = 'literallayout'
7533		self.source.begin_block("literallayout", remap="Bd")
	7545	self.source.beginBlock("literallayout", remap="Bd")
7534	7546	elif '-literal' in args:
7535	7547	enclosuretype = 'programlisting'
7536		self.source.begin_block("programlisting", remap="Bd")
	7548	self.source.beginBlock("programlisting", remap="Bd")
7537	7549	self.bdstack.append(enclosuretype)
7538	7550	elif command == "Ed":
7539	7551	dtype = self.bdstack.pop()
7540	7552	if dtype == 'blockquote':
7541	7553	self.source.poplist("Bd", remap="Ed (list)")
7542	7554	elif dtype is not None:
7543		self.source.end_block(dtype, remap="Ed (block)")
7544		self.source.need_paragraph()
	7555	self.source.endBlock(dtype, remap="Ed (block)")
	7556	self.source.needParagraph()
7545	7557	# List markup
7546	7558	elif command == "Bl":
7547	7559	header = " ".join(tokens)

7549	7561	# (as in ash(1)). Pass it through before emitting the list header.
7550	7562	while True:
7551	7563	nextl = self.source.popline()
7552		if match_command(nextl, "It"):
	7564	if matchCommand(nextl, "It"):
7553	7565	self.source.pushline(nextl)
7554	7566	break
7555	7567	else:
7556		self.source.interpret_block([nextl])
7557		self.source.end_paragraph(label="Bl")
	7568	self.source.interpretBlock([nextl])
	7569	self.source.endParagraph(label="Bl")
7558	7570	hasbodies = xoskip = False
7559	7571	depth = 1
7560	7572	for future in self.source.lines:
7561		if match_command(future, "El"):
	7573	if matchCommand(future, "El"):
7562	7574	depth -= 1
7563	7575	if depth == 0:
7564	7576	break
7565		elif match_command(future, "Bl"):
	7577	elif matchCommand(future, "Bl"):
7566	7578	depth += 1
7567	7579	nextl = future.strip()
7568		if is_comment(nextl):
	7580	if isComment(nextl):
7569	7581	continue
7570	7582	elif 'Xo' in nextl:
7571	7583	xoskip = True
7572		elif match_command("Xc", nextl):
	7584	elif matchCommand("Xc", nextl):
7573	7585	xoskip = False
7574		if not xoskip and not match_command(nextl, "It"):
	7586	if not xoskip and not matchCommand(nextl, "It"):
7575	7587	hasbodies = True
7576		if bsd_verbosity in self.source.verbose:
	7588	if bsdVerbosity in self.source.verbose:
7577	7589	self.source.notify("%s has bodies? %s" % (header, hasbodies))
7578	7590	self.itemcount.append(0)
7579	7591	if "-column" in tokens[1:]:

7587	7599	self.source.emit("<itemizedlist remap='%s' mark='bullet'>" % header)
7588	7600	self.liststack.append("</itemizedlist>")
7589	7601	elif "-dash" in tokens[1:] or "-hyphen" in tokens[1:]:
7590		# See the comment near ip_tag_mapping
	7602	# See the comment near ipTagMapping
7591	7603	self.source.emit("<itemizedlist remap=%s mark='box'>" % repr(header))
7592	7604	self.liststack.append("</itemizedlist>")
7593	7605	elif "-item" in tokens[1:]:

7602	7614	elif "-diag" in tokens[1:]:
7603	7615	self.source.emit("<variablelist remap=%s>"% repr(header))
7604	7616	self.liststack.append("</variablelist>")
7605		self.suppress_callables = True
	7617	self.suppressCallables = True
7606	7618	elif "-hang" in tokens[1:]:
7607	7619	self.source.emit("<variablelist remap=%s>"% repr(header))
7608	7620	self.liststack.append("</variablelist>")

7640	7652	else:
7641	7653	tagline = ""
7642	7654	if self.itemcount[-1]:
7643		self.source.end_paragraph(label="It")
	7655	self.source.endParagraph(label="It")
7644	7656	self.source.emit("</listitem>")
7645	7657	if self.liststack[-1] == "</variablelist>":
7646	7658	self.source.emit("</varlistentry>")

7648	7660	termlines = [tagline]
7649	7661	while True:
7650	7662	nextl = self.source.popline()
7651		if match_command(nextl, "It"):
	7663	if matchCommand(nextl, "It"):
7652	7664	digested = lineparse(nextl)
7653	7665	digested = self.macroeval(["No"] + digested[1:])
7654	7666	termlines.append(digested)

7668	7680	self.source.emit("<listitem>")
7669	7681	elif self.liststack[-1] == "</orderedlist>":
7670	7682	self.source.emit("<listitem>")
7671		self.source.need_paragraph()
	7683	self.source.needParagraph()
7672	7684	elif command == "El":
7673	7685	if self.liststack[-1] == "</table>":
7674	7686	self.source.emit(" </tbody></tgroup>")
7675	7687	else:
7676		self.source.end_paragraph(label="El")
	7688	self.source.endParagraph(label="El")
7677	7689	if self.liststack[-1] == "</variablelist>":
7678	7690	self.source.emit("</listitem>")
7679	7691	self.source.emit("</varlistentry>")

7684	7696	self.source.emit("</listitem>")
7685	7697	self.source.emit(self.liststack.pop())
7686	7698	self.itemcount.pop()
7687		self.source.need_paragraph()
	7699	self.source.needParagraph()
7688	7700	elif command == "Rs":
7689	7701	self.biblio.append({})
7690	7702	self.biblio[-1]["id"] = repr(len(self.biblio))

7694	7706	if self.source.output[-1] == "</variablelist>":
7695	7707	self.source.output = self.source.output[:-1]
7696	7708	else:
7697		self.source.end_paragraph(label="Re")
	7709	self.source.endParagraph(label="Re")
7698	7710	self.source.emit("<variablelist>")
7699	7711	# We'd like to emit a <bibliography> here, but the DocBook DTD
7700	7712	# doesn't permit it.
7701	7713	if self.source.docbook5:
7702		self.source.emit("<varlistentry xml:id='%s'>" % self.source.make_id_from_title("ref" + repr(len(self.biblio))))
7703		else:
7704		self.source.emit("<varlistentry id='%s'>" % self.source.make_id_from_title("ref" + repr(len(self.biblio))))
	7714	self.source.emit("<varlistentry xml:id='%s'>" % self.source.makeIdFromTitle("ref" + repr(len(self.biblio))))
	7715	else:
	7716	self.source.emit("<varlistentry id='%s'>" % self.source.makeIdFromTitle("ref" + repr(len(self.biblio))))
7705	7717	self.source.emit("<term>[%s]</term>" % len(self.biblio))
7706	7718	self.source.emit("<listitem><para>")
7707	7719	for (fld, tag) in ( \

7740	7752	# Machinery for evaluating parsed macros begins here
7741	7753	def evalmacro(self, args):
7742	7754	"Pop args off the stack and evaluate any associated macro."
7743		if bsd_verbosity in self.source.verbose:
	7755	if bsdVerbosity in self.source.verbose:
7744	7756	self.source.notify("evalmacro(%s)" % ", ".join(map(repr, args)))
7745	7757	cmd = args.pop(0)
7746		if cmd in self.ignore_set: # In case we get keeps with .Oo/Oc
	7758	if cmd in self.ignoreSet: # In case we get keeps with .Oo/Oc
7747	7759	while True:
7748	7760	end = args.pop(0)
7749	7761	if end == '\n':

7787	7799	return ["BSD UNIX"]
7788	7800	else:
7789	7801	return ["-".join(["%sBSD" % args[0]] + args[1:])]
7790		return self.process_punct(args, bxhelper, True)
	7802	return self.processPunct(args, bxhelper, True)
7791	7803	elif cmd == "Cm":
7792	7804	if self.hasargs("Cm", args):
7793	7805	return self.styleargs(args, "command")

7813	7825	if self.hasargs("Ev", args):
7814	7826	return self.styleargs(args, "envar")
7815	7827	elif cmd == "Fa":
7816		if self.source.in_synopsis():
7817		return [x+"," for x in self.process_punct(args)]
	7828	if self.source.inSynopsis():
	7829	return [x+"," for x in self.processPunct(args)]
7818	7830	else:
7819	7831	return self.styleargs(args, "emphasis", "remap='Fa'")
7820	7832	elif cmd == "Fl":

7834	7846	if self.hasargs("Ic", args):
7835	7847	return self.styleargs(args, "command", "remap='Ic'")
7836	7848	elif cmd == "Lb":
7837		return self.process_punct(args, self.lbhook, True)
	7849	return self.processPunct(args, self.lbhook, True)
7838	7850	elif cmd == "Li":
7839	7851	return self.styleargs(args, "literal")
7840	7852	elif cmd == "Ms":

7845	7857	savesect = [" ".join(self.encloseargs(args, "", ""))]
7846	7858	while True:
7847	7859	line = self.source.popline()
7848		if match_command(line, "Sh"):
	7860	if matchCommand(line, "Sh"):
7849	7861	self.source.pushline(line)
7850	7862	break
7851	7863	else:
7852	7864	savesect.append(line)
7853	7865	lines = []
7854		self.source.interpret_block(savesect, lines)
	7866	self.source.interpretBlock(savesect, lines)
7855	7867	self.desc = " ".join(lines)
7856		if not self.source.body_section():
	7868	if not self.source.bodySection():
7857	7869	return []
7858	7870	else:
7859	7871	return self.desc

7862	7874	if not self.name:
7863	7875	self.name = name
7864	7876	self.refnames[name] = True
7865		if self.source.sectname and name_synonyms.match(self.source.sectname):
	7877	if self.source.sectname and nameSynonyms.match(self.source.sectname):
7866	7878	return []
7867	7879	else:
7868	7880	if not name:

7879	7891	elif cmd == "Op":
7880	7892	return self.styleargs(args, ("[@GLUE@", "@GLUE@]"))
7881	7893	elif cmd == "Pa":
7882		if self.source.in_synopsis():
	7894	if self.source.inSynopsis():
7883	7895	return self.styleargs(args, "replaceable")
7884	7896	else:
7885	7897	return self.styleargs(args, "filename")

7920	7932	elif cmd == "Sq":
7921	7933	return self.encloseargs(args, "`@GLUE@", "@GLUE@\'")
7922	7934	elif cmd == "St":
7923		return self.process_punct(args, self.sthook, True)
	7935	return self.processPunct(args, self.sthook, True)
7924	7936	elif cmd == "Sx":
7925	7937	#title = " ".join(args)
7926		return self.process_punct(args, lambda x: ["<link role='Sx' linkend='%s'>%s</link>" % (self.source.id_from_title(" ".join(x)), " ".join(x))], False)
	7938	return self.processPunct(args, lambda x: ["<link role='Sx' linkend='%s'>%s</link>" % (self.source.idFromTitle(" ".join(x)), " ".join(x))], False)
7927	7939	elif cmd == "Sy":
7928	7940	return self.styleargs(args, "emphasis", 'remap="Sy"')
7929	7941	elif cmd == "Ta":

7945	7957	elif cmd == "Xo":
7946	7958	return self.replacemacro(args, "")
7947	7959	elif cmd == "Xr":
7948		return self.process_punct(args, self.xrhook, False)
	7960	return self.processPunct(args, self.xrhook, False)
7949	7961	elif cmd[0] == "%":
7950		lst = self.process_punct(args, lambda x: self.bibliohook(cmd[1], x), True)
	7962	lst = self.processPunct(args, lambda x: self.bibliohook(cmd[1], x), True)
7951	7963	if self.inref:
7952	7964	return []
7953	7965	else:

7984	7996	else:
7985	7997	raise LiftException(self.source, "unresolved reference to '%s'" % ref)
7986	7998	def sthook(self, args):
7987		if args[0] in MdocInterpreter.st_dict:
7988		return["<citetitle>" + MdocInterpreter.st_dict[args[0]] + "</citetitle>"]
	7999	if args[0] in MdocInterpreter.stDict:
	8000	return["<citetitle>" + MdocInterpreter.stDict[args[0]] + "</citetitle>"]
7989	8001	else:
7990	8002	raise LiftException(self.source, "unknown St macro '%s'" % args[0])
7991	8003	def lbhook(self, args):
7992		if args[0] in MdocInterpreter.lb_dict:
7993		return["<citetitle>" + MdocInterpreter.lb_dict[args[0]] + "</citetitle>"]
	8004	if args[0] in MdocInterpreter.lbDict:
	8005	return["<citetitle>" + MdocInterpreter.lbDict[args[0]] + "</citetitle>"]
7994	8006	else:
7995	8007	raise LiftException(self.source, "unknown Lb macro '%s'" % args[0])
7996	8008	def xrhook(self, args):

7998	8010	return ["<citerefentry><refentrytitle>%s</refentrytitle></citerefentry>" % args[0]]
7999	8011	else:
8000	8012	return ["<citerefentry><refentrytitle>%s</refentrytitle><manvolnum>%s</manvolnum></citerefentry>" % (args[0], args[1])]
8001		def extractargs(self, args, stop_on_callable=0):
	8013	def extractargs(self, args, stopOnCallable=0):
8002	8014	operands = []
8003	8015	while args:
8004		if stop_on_callable and args[0] in MdocInterpreter.callable:
	8016	if stopOnCallable and args[0] in MdocInterpreter.callable:
8005	8017	break
8006	8018	this = args.pop(0)
8007	8019	operands.append(this)
8008	8020	if this == '\n':
8009	8021	break
8010	8022	return operands
8011		def process_punct(self, args, hook=None, stop_on_callable=False):
	8023	def processPunct(self, args, hook=None, stopOnCallable=False):
8012	8024	"Wrap required processing of punctuation around an evaluation."
8013	8025	prepunct = []
8014	8026	postpunct = []

8019	8031	postpunct = [args.pop()] + postpunct
8020	8032	operands = []
8021	8033	while args:
8022		if stop_on_callable and args[0] in MdocInterpreter.callable:
	8034	if stopOnCallable and args[0] in MdocInterpreter.callable:
8023	8035	break
8024	8036	this = args.pop(0)
8025	8037	operands.append(this)

8040	8052	return result
8041	8053	def encloseargs(self, args, opener, closer):
8042	8054	"Grab and process arguments for an enclosure macro."
8043		return self.process_punct(args, lambda x: [opener] + x + [closer], False)
8044		def stylehook(self, args, tag, attr, dummy_prefix):
	8055	return self.processPunct(args, lambda x: [opener] + x + [closer], False)
	8056	def stylehook(self, args, tag, attr, dummyPrefix):
8045	8057	"Wrap non-punctuation characters in given tag pair."
8046	8058	result = []
8047	8059	if attr:

8059	8071	result.append(start + arg + end)
8060	8072	return result
8061	8073	def styleargs(self, args, tag, attribute="", prefix=""):
8062		return self.process_punct(args, lambda x: self.stylehook(x, tag, attribute, prefix), 1)
	8074	return self.processPunct(args, lambda x: self.stylehook(x, tag, attribute, prefix), 1)
8063	8075	def replacemacro(self, args, withmac):
8064		return self.process_punct(args, lambda x: [withmac] + x, 1)
	8076	return self.processPunct(args, lambda x: [withmac] + x, 1)
8065	8077	def macroeval(self, args):
8066	8078	"Evaluate a macro, returning a list."
8067		if bsd_verbosity in self.source.verbose:
	8079	if bsdVerbosity in self.source.verbose:
8068	8080	self.source.notify("macroeval%s\n" % (tuple(args),))
8069	8081
8070	8082	if args[0][0] == '.':

8076	8088	if not self.spacemode and len(nextpart) > 1:
8077	8089	for ind in range(len(nextpart)):
8078	8090	nextpart.insert(2*ind+1, "@GLUE@")
8079		if bsd_verbosity in self.source.verbose:
	8091	if bsdVerbosity in self.source.verbose:
8080	8092	self.source.notify("evalmacro -> %s" % nextpart)
8081	8093	result += nextpart
8082	8094	# Glue the results together
8083	8095	result = " ".join(result)
8084	8096	result = hotglue.sub("", result)
8085	8097	result = cleantag.sub("", result)
8086		if bsd_verbosity in self.source.verbose:
	8098	if bsdVerbosity in self.source.verbose:
8087	8099	self.source.notify("macroeval -> %s\n" % repr(result))
8088	8100	return result
8089	8101	def preprocess(self, text):
8090	8102	return text
8091	8103	def postprocess(self, text):
8092	8104	# It's not an error for Sx references to point elsewhere
8093		link_re = re_compile("<link role='Sx' linkend='([A-Za-z_])'>([A-Za-z_])</link>")
	8105	linkRe = reCompile("<link role='Sx' linkend='([A-Za-z_])'>([A-Za-z_])</link>")
8094	8106	while True:
8095		m = link_re.search(text)
	8107	m = linkRe.search(text)
8096	8108	if m:
8097	8109	linkstart = m.start(0)
8098	8110	linkend = m.end(0)
8099	8111	mid = m.group(1)
8100	8112	label = m.group(2)
8101		if self.source.id_exists(mid):
	8113	if self.source.idExists(mid):
8102	8114	text = text[:linkstart+6] + text[linkstart+15:]
8103	8115	else:
8104	8116	self.source.warning("unresolved Sx label %s" % label)

8111	8123	# groff_mdoc(7).
8112	8124	text = text.replace("<listitem>\n</listitem>", "")
8113	8125	# Sanity check
8114		if not self.source.section_count:
	8126	if not self.source.sectionCount:
8115	8127	raise LiftException(self.source, "no mdoc section structure, can't be lifted.")
8116	8128	return text
8117	8129

8120	8132	name = "ms"
8121	8133	exclusive = True
8122	8134	toptag = "article"
8123		immutable_set = set([])
8124		ignore_set = set([
	8135	immutableSet = set([])
	8136	ignoreSet = set([
8125	8137	# Ignore presentation-level-only requests from Bell Labs.
8126	8138	"RP", "ND", "DA", "1C", "2C", "MC",
8127	8139	"BX", "KS", "KE", "KF",

8132	8144	# they occur in ms papers, probably as relics from mm.
8133	8145	"MH", "CS", "D3"
8134	8146	])
8135		complain_set = set(["RS", "RE",])
8136		parabreak_set = set(["blank","PP", "LP", "XP", "IP",])
8137		sectionbreak_set = set(["NH", "SH", "SC",])
8138		listbreak_set = set(["PP", "LP", "XP", "NH", "SH", "SC",])
8139		scoped_set = set([])
	8147	complainSet = set(["RS", "RE",])
	8148	parabreakSet = set(["blank","PP", "LP", "XP", "IP",])
	8149	sectionbreakSet = set(["NH", "SH", "SC",])
	8150	listbreakSet = set(["PP", "LP", "XP", "NH", "SH", "SC",])
	8151	scopedSet = set([])
8140	8152	translations = {
8141	8153	"\\*" : [
8142	8154	# The Bell Labs prefix diacriticals

8233	8245	self.source.emit(r"</sidebar>")
8234	8246	# Commands for front matter
8235	8247	elif command == "TL":
8236		self.source.declare_body_start()
8237		self.TL = gather_lines(self.source)
	8248	self.source.declareBodyStart()
	8249	self.TL = gatherLines(self.source)
8238	8250	return True
8239	8251	elif command == "OK": # Undocumented -- probably some Bell Labs thing
8240		gather_lines(self.source)
	8252	gatherLines(self.source)
8241	8253	return True
8242	8254	elif command == "AU":
8243		self.AU = gather_lines(self.source)
	8255	self.AU = gatherLines(self.source)
8244	8256	return True
8245	8257	elif command == "AI":
8246		self.AI = gather_lines(self.source)
	8258	self.AI = gatherLines(self.source)
8247	8259	return True
8248	8260	elif command == "AB":
8249	8261	self.AB = []

8252	8264	tokens = lineparse(line)
8253	8265	if tokens and tokens[0][1:3] == "AE":
8254	8266	break
8255		if not (is_command(line) and self.source.ignorable(line)):
	8267	if not (isCommand(line) and self.source.ignorable(line)):
8256	8268	self.AB.append(line)
8257	8269	return True
8258	8270	# Here's where we analyze the front matter and generate the header
8259	8271	if not self.flushed:
8260		self.source.in_preamble = False
8261		if io_verbosity in self.source.verbose:
	8272	self.source.inPreamble = False
	8273	if ioVerbosity in self.source.verbose:
8262	8274	self.source.notify("exiting preamble")
8263	8275	self.flushed = True
8264	8276	# If there's only one line of authors, try to break it up by

8283	8295	digested.append(author)
8284	8296	# OK, we've got enough info to generate the header
8285	8297	if self.TL or self.AU or self.AI or self.AB:
8286		self.source.end_paragraph(label="ms header")
	8298	self.source.endParagraph(label="ms header")
8287	8299	self.source.emit("<articleinfo>")
8288	8300	if self.TL:
8289	8301	self.source.emit("<title>")
8290		caller.interpret_block(self.TL)
	8302	caller.interpretBlock(self.TL)
8291	8303	self.source.emit("</title>")
8292	8304	for self.author in digested:
8293	8305	if self.author.nonempty():
8294	8306	self.source.emit(repr(author))
8295	8307	if self.AB:
8296	8308	self.source.emit("<abstract>")
8297		self.source.need_paragraph()
8298		caller.interpret_block(self.AB)
8299		self.source.end_paragraph(label="AB")
	8309	self.source.needParagraph()
	8310	caller.interpretBlock(self.AB)
	8311	self.source.endParagraph(label="AB")
8300	8312	self.source.emit("</abstract>")
8301	8313	self.source.emit("</articleinfo>")
8302	8314	if command in ("blank","PP","LP","XP") or command == "IP" and len(tokens) == 1:

8307	8319	newdepth = int(tokens[1])
8308	8320	except ValueError:
8309	8321	newdepth = 1
8310		self.source.push_section(newdepth, title)
	8322	self.source.pushSection(newdepth, title)
8311	8323	elif command == "IP":
8312	8324	# If no tag is specified, treat as ordinary paragraph.
8313		self.source.end_paragraph(label="IP")
	8325	self.source.endParagraph(label="IP")
8314	8326	# Some tags can turn into an itemized list.
8315		if tokens[1] in ip_tag_mapping:
	8327	if tokens[1] in ipTagMapping:
8316	8328	self.source.pushline(quoteargs(tokens))
8317		gather_itemizedlist(TroffInterpreter.ctrl + "IP", self.source,
8318		ip_tag_mapping[tokens[1]])
	8329	gatherItemizedlist(TroffInterpreter.ctrl + "IP", self.source,
	8330	ipTagMapping[tokens[1]])
8319	8331	# Otherwise, emit a variable list
8320	8332	else:
8321		self.source.emit_variablelist(command, tokens[1])
	8333	self.source.emitVariablelist(command, tokens[1])
8322	8334	elif command == "QP":
8323		self.source.begin_block("blockquote", remap="QP")
	8335	self.source.beginBlock("blockquote", remap="QP")
8324	8336	while self.source.lines:
8325	8337	line = self.source.popline()
8326		if is_command(line):
	8338	if isCommand(line):
8327	8339	self.source.pushline(line)
8328	8340	break
8329	8341	self.source.emit(line)
8330		self.source.end_block("blockquote", remap="QE")
	8342	self.source.endBlock("blockquote", remap="QE")
8331	8343	elif command == "DS":
8332		self.source.begin_block("literallayout", remap='DS')
	8344	self.source.beginBlock("literallayout", remap='DS')
8333	8345	elif command == "DE":
8334		self.source.end_block("literallayout", remap='DE')
	8346	self.source.endBlock("literallayout", remap='DE')
8335	8347	elif command == "FS":
8336		self.source.begin_block("footnote", remap='FS')
	8348	self.source.beginBlock("footnote", remap='FS')
8337	8349	elif command == "FE":
8338		self.source.end_block("footnote", remap='FE')
	8350	self.source.endBlock("footnote", remap='FE')
8339	8351	elif command == "QS":
8340		self.source.begin_block("blockquote", remap='QS')
	8352	self.source.beginBlock("blockquote", remap='QS')
8341	8353	elif command == "QE":
8342		self.source.end_block("blockquote", remap='QE')
	8354	self.source.endBlock("blockquote", remap='QE')
8343	8355	# Undocumented Bell Labs-isms begin here
8344	8356	elif command == "UX":
8345	8357	self.source.pushline("<productname>Unix</productname>")

8348	8360	self.source.pushline("<productname>%s</productname>" % args[0])
8349	8361	return True
8350	8362	elif command == "SC":
8351		self.source.push_section(1, args[0])
	8363	self.source.pushSection(1, args[0])
8352	8364	elif command == "P1" and self.source.find("P2"):
8353		self.source.begin_block("programlisting", remap='P1')
	8365	self.source.beginBlock("programlisting", remap='P1')
8354	8366	elif command == "P2":
8355		self.source.end_block("programlisting", remap='P2')
	8367	self.source.endBlock("programlisting", remap='P2')
8356	8368	else:
8357	8369	return False
8358	8370	return True

8366	8378	name = "me"
8367	8379	exclusive = True
8368	8380	toptag = "article"
8369		immutable_set = set([])
8370		ignore_set = set(["1c","2c","bc","bl","ef","eh","ep","fo",
	8381	immutableSet = set([])
	8382	ignoreSet = set(["1c","2c","bc","bl","ef","eh","ep","fo",
8371	8383	"he","hx","m1","m2","m3","m4","n1","n2",
8372	8384	"of","oh","tp","xl","xp","sk","(z",")z",
8373	8385	"sz","(l",")l",
8374	8386	])
8375		complain_set = set(["ba","bx","ix","(b",")b","(c",")c","pa",
	8387	complainSet = set(["ba","bx","ix","(b",")b","(c",")c","pa",
8376	8388	"sx","uh",".$p",".$c",".$f",".$h",".$s",
8377	8389	"+c","(x",")x",
8378	8390	])
8379		parabreak_set = set(["blank","lp","pp","ip","np",])
8380		sectionbreak_set = set(["sh",])
8381		listbreak_set = set(["lp","pp","np","sh",])
8382		scoped_set = set([])
	8391	parabreakSet = set(["blank","lp","pp","ip","np",])
	8392	sectionbreakSet = set(["sh",])
	8393	listbreakSet = set(["lp","pp","np","sh",])
	8394	scopedSet = set([])
8383	8395	translations = {
8384	8396	"\\*" : [
8385	8397	(r"\*-", "–"), # Not quite right, supposed to be 3/4 dash

8408	8420	self.source = source
8409	8421	self.verbose = verbose
8410	8422	self.delay = []
8411		self.in_abstract = False
8412		self.source.in_preamble = False
8413		if io_verbosity in self.source.verbose:
	8423	self.inAbstract = False
	8424	self.source.inPreamble = False
	8425	if ioVerbosity in self.source.verbose:
8414	8426	self.source.notify("exiting preamble")
8415		def interpret(self, dummy, tokens, dummy_caller):
	8427	def interpret(self, dummy, tokens, dummyCaller):
8416	8428	cmd = tokens[0][1:]
8417	8429	args = tokens[1:]
8418	8430	if cmd in ("b", "bi", "i", "r", "rb", "sm", "u"):

8420	8432	trailer = ""
8421	8433	else:
8422	8434	trailer = args[1]
8423		self.source.pushline(self.source.direct_highlight(cmd.upper(), [args[0]], trailer))
	8435	self.source.pushline(self.source.directHighlight(cmd.upper(), [args[0]], trailer))
8424	8436	elif cmd == "q":
8425	8437	if len(args) <= 2:
8426	8438	trailer = ""

8428	8440	trailer = args[1]
8429	8441	self.source.pushline("<quote>%s</quote>%s" % (args[0], trailer))
8430	8442	elif cmd in ("blank", "lp", "pp"):
8431		self.source.declare_body_start()
	8443	self.source.declareBodyStart()
8432	8444	self.source.paragraph()
8433	8445	elif cmd == "ip":
8434		self.source.emit_variablelist("ip", args[1])
	8446	self.source.emitVariablelist("ip", args[1])
8435	8447	elif cmd == "bp":
8436	8448	self.source.pushline(quoteargs(tokens))
8437		gather_itemizedlist(TroffInterpreter.ctrl + "bp", self.source, "bullet")
	8449	gatherItemizedlist(TroffInterpreter.ctrl + "bp", self.source, "bullet")
8438	8450	elif cmd == "np":
8439	8451	self.source.pushline(quoteargs(tokens))
8440		gather_orderedlist(TroffInterpreter.ctrl + "np", self.source, "bullet")
	8452	gatherOrderedlist(TroffInterpreter.ctrl + "np", self.source, "bullet")
8441	8453	elif cmd == "(q":
8442		self.source.begin_block("blockquote", remap='(q')
	8454	self.source.beginBlock("blockquote", remap='(q')
8443	8455	elif cmd == ")q":
8444		self.source.end_block("blockquote", remap=')q')
	8456	self.source.endBlock("blockquote", remap=')q')
8445	8457	elif cmd == "(f":
8446		self.source.begin_block("footnote", remap='(q')
	8458	self.source.beginBlock("footnote", remap='(q')
8447	8459	elif cmd == ")f":
8448		self.source.end_block("footnote", remap=')q')
	8460	self.source.endBlock("footnote", remap=')q')
8449	8461	elif cmd == "(d":
8450	8462	self.source.diversion = self.delay
8451	8463	elif cmd == ")d":

8454	8466	self.source.output += self.delay
8455	8467	self.delay = []
8456	8468	elif cmd == "sh":
8457		self.source.push_section(int(tokens[1]), tokens[2])
	8469	self.source.pushSection(int(tokens[1]), tokens[2])
8458	8470	elif cmd == "++":
8459	8471	if tokens[1] == "AB":
8460		self.in_abstract = True
	8472	self.inAbstract = True
8461	8473	self.source.emit("<abstract>")
8462		elif self.in_abstract:
8463		self.in_abstract = False
	8474	elif self.inAbstract:
	8475	self.inAbstract = False
8464	8476	self.source.emit("</abstract>")
8465	8477	else:
8466	8478	return False

8475	8487	name = "mm"
8476	8488	exclusive = True
8477	8489	toptag = "article"
8478		immutable_set = set(["B", "I", "R",
	8490	immutableSet = set(["B", "I", "R",
8479	8491	"BI", "BR", "IB", "IR", "RB", "RI",
8480	8492	"AE", "AF", "AL", "RL", "APP", "APPSK",
8481	8493	"AS", "AT", "AU", "B1", "B2", "BE",

8484	8496	"IND", "LB", "LC", "LE", "LI", "P",
8485	8497	"RF", "SM", "TL", "VERBOFF", "VERBON",
8486	8498	"WA", "WE", ])
8487		ignore_set = set([")E", "1C", "2C", "AST", "AV", "AVL",
	8499	ignoreSet = set([")E", "1C", "2C", "AST", "AV", "AVL",
8488	8500	"COVER", "COVEND", "EF", "EH", "EDP",
8489	8501	"EPIC", "FC", "FD", "HC", "HM",
8490	8502	"GETR", "GETST",

8493	8505	"PGFORM", "PGNH", "PE", "PF", "PH",
8494	8506	"RP", "S", "SA", "SP",
8495	8507	"SG", "SK", "TAB", "TB", "TC", "VM", "WC"])
8496		complain_set = set(["EC", "EX",
	8508	complainSet = set(["EC", "EX",
8497	8509	"GETHN", "GETPN", "GETR", "GETST",
8498	8510	"LT", "LD", "LO",
8499	8511	"MOVE", "MULB", "MULN", "MULE", "NCOL",
8500	8512	"nP", "PIC", "RD", "RS", "RE", "SETR", ])
8501		parabreak_set = set([])
8502		sectionbreak_set = set([])
8503		listbreak_set = set([])
8504		scoped_set = set([])
	8513	parabreakSet = set([])
	8514	sectionbreakSet = set([])
	8515	listbreakSet = set([])
	8516	scopedSet = set([])
8505	8517	translations = {
8506	8518	"\\*" : [
8507	8519	(r"\*F", ""), # Assumes that footnote marks are adjacent to footnotes

8522	8534	self.flushed = False
8523	8535	self.author = Author()
8524	8536	self.TL = self.AS = self.FG = None
8525		def end_list(self):
	8537	def endList(self):
8526	8538	if self.listcount[-1]:
8527		self.source.end_paragraph(label="end_list")
	8539	self.source.endParagraph(label="endList")
8528	8540	self.source.emit("</listitem>")
8529	8541	if self.liststack[-1] == "</variablelist>":
8530	8542	self.source.emit("</varlistentry>")
8531	8543	self.source.emit(self.liststack.pop())
8532	8544	self.listcount.pop()
8533		def fold_highlights(self, cmd, args):
	8545	def foldHighlights(self, cmd, args):
8534	8546	# We need this to be a separate entry point for TP tag processing.
8535	8547	if cmd in ("B", "I", "R"):
8536		return self.source.alternating_highlight(cmd + "P", args)
	8548	return self.source.alternatingHighlight(cmd + "P", args)
8537	8549	elif cmd in ("BI", "BR", "IB", "IR", "RB", "RI"):
8538		return self.source.alternating_highlight(cmd, args)
	8550	return self.source.alternatingHighlight(cmd, args)
8539	8551	else:
8540	8552	return None
8541	8553	def interpret(self, dummy, tokens, caller):
8542	8554	cmd = tokens[0][1:]
8543	8555	args = tokens[1:]
8544	8556	# Highlighting
8545		highlighted = self.fold_highlights(cmd, args)
	8557	highlighted = self.foldHighlights(cmd, args)
8546	8558	if highlighted:
8547	8559	self.source.emit(highlighted)
8548	8560	return True
8549	8561	# Commands for front matter
8550	8562	elif cmd == "TL":
8551		self.source.declare_body_start()
8552		self.TL = gather_lines(self.source)
	8563	self.source.declareBodyStart()
	8564	self.TL = gatherLines(self.source)
8553	8565	return True
8554	8566	elif cmd == "AF":
8555	8567	self.author.orgname = args[0]

8568	8580	tokens = lineparse(line)
8569	8581	if tokens and tokens[0][1:3] == "AE":
8570	8582	break
8571		if not (is_command(line) and self.source.ignorable(line)):
	8583	if not (isCommand(line) and self.source.ignorable(line)):
8572	8584	self.AS.append(line)
8573	8585	return True
8574	8586	# Here's where we analyze the front matter and generate the header
8575	8587	if not self.flushed:
8576		self.source.in_preamble = False
8577		if io_verbosity in self.source.verbose:
	8588	self.source.inPreamble = False
	8589	if ioVerbosity in self.source.verbose:
8578	8590	self.source.notify("exiting preamble")
8579	8591	self.flushed = True
8580	8592	# OK, we've got enough info to generate the header
8581	8593	if self.TL or self.AS or self.author.nonempty():
8582		self.source.end_paragraph(label="mm header")
	8594	self.source.endParagraph(label="mm header")
8583	8595	self.source.emit("<articleinfo>")
8584	8596	if self.TL:
8585	8597	self.source.emit("<title>")
8586		caller.interpret_block(self.TL)
	8598	caller.interpretBlock(self.TL)
8587	8599	self.source.emit("</title>")
8588	8600	if self.author.nonempty():
8589	8601	self.source.emit(repr(self.author))
8590	8602	if self.AS:
8591	8603	self.source.emit("<abstract>")
8592		self.source.need_paragraph()
8593		caller.interpret_block(self.AS)
8594		self.source.end_paragraph(label="AS")
	8604	self.source.needParagraph()
	8605	caller.interpretBlock(self.AS)
	8606	self.source.endParagraph(label="AS")
8595	8607	self.source.emit("</abstract>")
8596	8608	self.source.emit("</articleinfo>")
8597	8609	# Ordinary formatting comands.

8618	8630	self.source.emit("<appendix><title>%s</title>" % name)
8619	8631	elif cmd == "AS":
8620	8632	self.source.emit("<abstract>")
8621		self.source.need_paragraph()
	8633	self.source.needParagraph()
8622	8634	elif cmd == "B1":
8623		self.source.begin_block("sidebar", remap="B1")
	8635	self.source.beginBlock("sidebar", remap="B1")
8624	8636	elif cmd == "B2":
8625		self.source.end_block(r"sidebar", remap="B2")
	8637	self.source.endBlock(r"sidebar", remap="B2")
8626	8638	elif cmd == "BE":
8627	8639	self.source.paragraph("End of BS/BE block")
8628	8640	elif cmd == "BL" or cmd == "ML":

8641	8653	self.liststack.append("</variablelist>")
8642	8654	self.listcount.append(0)
8643	8655	elif cmd == "DE":
8644		self.source.end_block("literallayout", remap="DE")
	8656	self.source.endBlock("literallayout", remap="DE")
8645	8657	if self.FG:
8646	8658	self.emit("<caption><phrase>%s</phrase></caption>" % self.FG)
8647	8659	self.source.emit("</informalfigure>")

8654	8666	self.liststack.append("</itemizedlist>")
8655	8667	elif cmd == "DS" or cmd == "DF":
8656	8668	self.source.emit("<informalfigure>")
8657		self.source.begin_block("literallayout", remap=cmd)
	8669	self.source.beginBlock("literallayout", remap=cmd)
8658	8670	elif cmd == "FE":
8659		self.source.end_block("footnote", remap="FE")
	8671	self.source.endBlock("footnote", remap="FE")
8660	8672	elif cmd == "FG":
8661	8673	self.source.FG = args[0]
8662	8674	elif cmd == "FS":
8663		self.source.begin_block("footnote", remap="FE")
	8675	self.source.beginBlock("footnote", remap="FE")
8664	8676	elif cmd == "H":
8665	8677	for level in self.liststack:
8666		self.end_list()
	8678	self.endList()
8667	8679	level = int(args[0])
8668		heading_text = heading_suffix = ""
	8680	headingText = headingSuffix = ""
8669	8681	if len(args) > 1:
8670		heading_text = args[1]
	8682	headingText = args[1]
8671	8683	if len(args) > 2:
8672		heading_suffix = args[1]
8673		self.source.push_section(level, heading_text + heading_suffix)
	8684	headingSuffix = args[1]
	8685	self.source.pushSection(level, headingText + headingSuffix)
8674	8686	elif cmd == "HU":
8675		heading_text = args[0]
	8687	headingText = args[0]
8676	8688	for level in self.liststack:
8677		self.end_list()
8678		self.source.push_section(self.source.sectiondepth, heading_text, makeid=0)
	8689	self.endList()
	8690	self.source.pushSection(self.source.sectiondepth, headingText, makeid=0)
8679	8691	# We can ignore H[XYZ] as they are user-defined exits
8680	8692	elif cmd == "IA":
8681	8693	self.source.emit("<!-- Start IA address spec: " + repr(args))

8705	8717	self.listcount.append(0)
8706	8718	elif cmd == "LC":
8707	8719	for level in self.liststack:
8708		self.end_list()
	8720	self.endList()
8709	8721	elif cmd == "LE":
8710		self.end_list()
	8722	self.endList()
8711	8723	elif cmd == "LI":
8712	8724	mark = ""
8713	8725	if len(args) > 0:
8714	8726	mark = args[0] # FIXME: process second argument
8715	8727	# End previous entry
8716	8728	if self.listcount[-1]:
8717		self.source.end_paragraph(label="LI")
	8729	self.source.endParagraph(label="LI")
8718	8730	self.source.emit("</listitem>")
8719	8731	if self.liststack[-1] == "</variablelist>":
8720	8732	self.source.emit("</varlistentry>")

8723	8735	self.source.emit("<varlistentry>")
8724	8736	self.source.emit("<term>%s</term>" % fontclose(mark))
8725	8737	self.source.emit("<listitem>")
8726		self.source.need_paragraph()
	8738	self.source.needParagraph()
8727	8739	# Bump counter
8728	8740	self.listcount[-1] += 1
8729	8741	elif cmd == "P" or cmd == "blank":

8737	8749	self.source.pushline(r"\fS%s\fP%s" % args)
8738	8750	# We can ignore user exits, TP, TX, TY.
8739	8751	elif cmd == "VERBOFF":
8740		self.source.end_block("literallayout", remap='VERBOFF')
	8752	self.source.endBlock("literallayout", remap='VERBOFF')
8741	8753	elif cmd == "VERBON":
8742		self.source.begin_block("literallayout", remap='VERBON')
	8754	self.source.beginBlock("literallayout", remap='VERBON')
8743	8755	elif cmd == "WA":
8744	8756	self.source.emit("<!-- Start WA address spec: " + repr(args))
8745	8757	elif cmd == "WE":

8758	8770	name = "mwww"
8759	8771	exclusive = False
8760	8772	toptag = "article"
8761		immutable_set = set(["HX", "BCL", "BGIMG",
	8773	immutableSet = set(["HX", "BCL", "BGIMG",
8762	8774	"URL", "MTO", "FTP", "IMG", "HTML",
8763	8775	"TAG", "HR",])
8764		ignore_set = set(["HX", "BCL", "BGIMG",
	8776	ignoreSet = set(["HX", "BCL", "BGIMG",
8765	8777	"HTML", "HR", "LK", "NHR",
8766	8778	"HnS", "HnE", "DC", "HTL", "LINKSTYLE"])
8767		complain_set = set([])
8768		parabreak_set = set([])
8769		sectionbreak_set = set([])
8770		listbreak_set = set([])
8771		scoped_set = set([])
	8779	complainSet = set([])
	8780	parabreakSet = set([])
	8781	sectionbreakSet = set([])
	8782	listbreakSet = set([])
	8783	scopedSet = set([])
8772	8784	translations = {}
8773	8785	reductions = {}
8774	8786	def __init__(self, source, verbose=0):
8775	8787	self.source = source
8776	8788	self.verbose = verbose
8777		def interpret(self, dummy_line, tokens, dummy_caller):
	8789	def interpret(self, dummyLine, tokens, dummyCaller):
8778	8790	cmd = tokens[0][1:]
8779	8791	args = tokens[1:]
8780	8792	if len(args) == 1:
8781	8793	args.append("")
8782	8794	if len(args) == 2:
8783	8795	args.append("")
8784		def make_url(url, txt, after):
	8796	def makeUrl(url, txt, after):
8785	8797	return '<ulink url="%s">%s</ulink>%s' % (url,txt,after)
8786	8798	# Ordinary formatting comands.
8787	8799	if cmd == "URL":
8788		self.source.pushline(make_url(args[0], args[1], args[2]))
	8800	self.source.pushline(makeUrl(args[0], args[1], args[2]))
8789	8801	elif cmd == "MTO":
8790		self.source.pushline(make_url("mailto:"+args[0], args[1], args[2]))
	8802	self.source.pushline(makeUrl("mailto:"+args[0], args[1], args[2]))
8791	8803	elif cmd == "FTP":
8792		self.source.pushline(make_url(args[0], args[1], args[2]))
	8804	self.source.pushline(makeUrl(args[0], args[1], args[2]))
8793	8805	elif cmd == "IMG":
8794	8806	ifile = args[1]
8795	8807	self.source.pushline('<mediaobject>\n<imageobject><imagedata fileref="%s"/></imageobject>\n</mediaobject>' % ifile)

8798	8810	self.source.pushline('<mediaobject>\n<imageobject><imagedata fileref="%s"/></imageobject>\n</mediaobject>' % ifile)
8799	8811	elif cmd == "TAG":
8800	8812	if self.source.docbook5:
8801		self.source.pushline('<anchor xml:id="%s"/>' % (self.source.make_id_from_title(args[0]),))
8802		else:
8803		self.source.pushline('<anchor id="%s"/>' % (self.source.make_id_from_title(args[0]),))
	8813	self.source.pushline('<anchor xml:id="%s"/>' % (self.source.makeIdFromTitle(args[0]),))
	8814	else:
	8815	self.source.pushline('<anchor id="%s"/>' % (self.source.makeIdFromTitle(args[0]),))
8804	8816	elif cmd == "ULS":
8805	8817	self.source.pushline("<itemizedlist>")
8806	8818	elif cmd == "ULE":

8818	8830
8819	8831	# This is how we autodetect the right macro set:
8820	8832
8821		interpreter_dispatch = {
	8833	interpreterDispatch = {
8822	8834	"pp": MeInterpreter,
8823	8835	"Dt": MdocInterpreter,
8824	8836	"Dd": MdocInterpreter,

8851	8863	"TAG": MwwwInterpreter,
8852	8864	}
8853	8865
8854		mso_dispatch = {
	8866	msoDispatch = {
8855	8867	"e.tmac": MeInterpreter,
8856	8868	"doc.tmac": MdocInterpreter,
8857	8869	"s.tmac": MsInterpreter,

8860	8872	"www.tmac": MwwwInterpreter,
8861	8873	}
8862	8874
8863		required_extensions = {
	8875	requiredExtensions = {
8864	8876	MeInterpreter: "me",
8865	8877	MsInterpreter: "ms",
8866	8878	MmInterpreter: "mm",

8870	8882	# Invocation machinery starts here
8871	8883	#
8872	8884
8873		binary_encoding = 'latin-1'
	8885	binaryEncoding = 'latin-1'
8874	8886
8875	8887	def stringize(o):
8876	8888	"Turn a byte string into Unicode that preserves 0x80..0xff."
8877	8889	if isinstance(o, str):
8878	8890	return o
8879	8891	if isinstance(o, bytes):
8880		return str(o, encoding=binary_encoding)
	8892	return str(o, encoding=binaryEncoding)
8881	8893	raise ValueError
8882	8894
8883		def make_std_wrapper(stream):
	8895	def makeStdWrapper(stream):
8884	8896	"Standard input/output wrapper factory function for binary I/O"
8885	8897	if isinstance(stream, io.TextIOWrapper):
8886	8898	# newline="\n" ensures that Python 3 won't mangle line breaks
8887		# line_buffering=True ensures that interactive command sessions work as expected
8888		return io.TextIOWrapper(stream.buffer, encoding=binary_encoding, newline="\n", line_buffering=True)
	8899	# lineBuffering=True ensures that interactive command sessions work as expected
	8900	return io.TextIOWrapper(stream.buffer, encoding=binaryEncoding, newline="\n", line_buffering=True)
8889	8901	return stream
8890	8902
8891		sys.stdin = make_std_wrapper(sys.stdin)
8892		sys.stdout = make_std_wrapper(sys.stdout)
8893		sys.stderr = make_std_wrapper(sys.stderr)
8894
8895		def transfile(name, arguments, translate_data, trans_filename=None):
	8903	sys.stdin = makeStdWrapper(sys.stdin)
	8904	sys.stdout = makeStdWrapper(sys.stdout)
	8905	sys.stderr = makeStdWrapper(sys.stderr)
	8906
	8907	def transfile(name, arguments, translateData, transFilename=None):
8896	8908	"Read input sources entire and transform them in memory."
8897	8909	if not arguments:
8898		outdoc = translate_data(name, "stdin", sys.stdin.read(), False)
	8910	outdoc = translateData(name, "stdin", sys.stdin.read(), False)
8899	8911	if outdoc:
8900	8912	stdout.write(stringize(outdoc))
8901	8913	else:

8910	8922	stderr.write("%s: can't open tempfile" % name)
8911	8923	return True
8912	8924	try:
8913		outdoc = translate_data(name,
	8925	outdoc = translateData(name,
8914	8926	ifile,
8915	8927	stringize(indoc),
8916	8928	len(arguments)>1)

8925	8937	else:
8926	8938	outfp.write(outdoc)
8927	8939	outfp.close() # under Windows you can't rename an open file
8928		if not trans_filename:
	8940	if not transFilename:
8929	8941	os.rename(tmpfile, ifile)
8930		elif type(trans_filename) == type(""):
8931		os.rename(tmpfile, ifile + trans_filename)
	8942	elif type(transFilename) == type(""):
	8943	os.rename(tmpfile, ifile + transFilename)
8932	8944	else:
8933		os.rename(tmpfile, trans_filename(ifile))
	8945	os.rename(tmpfile, transFilename(ifile))
8934	8946
8935	8947	stdout = sys.stdout
8936	8948	stderr = sys.stderr

8938	8950	globalhints = SemanticHintsRegistry()
8939	8951	spoofname = None
8940	8952
8941		def main(args, dummy_mainout=stdout, mainerr=stderr):
	8953	def main(args, dummyMainout=stdout, mainerr=stderr):
8942	8954	#global globalhints, pretty
8943	8955	global spoofname
8944	8956	import getopt

8948	8960	quiet = False
8949	8961	portability = 0
8950	8962	docbook5 = False
8951		verbosity_level = 0
	8963	verbosityLevel = 0
8952	8964	verbosity = None
8953		in_encodings = ('UTF-8', 'ISO-8859-1')
8954		out_encoding = "UTF-8"
	8965	inEncodings = ('UTF-8', 'ISO-8859-1')
	8966	outEncoding = "UTF-8"
8955	8967	for (switch, val) in options:
8956	8968	if switch == "-d":
8957	8969	verbosity = val
8958	8970	elif switch == "-e":
8959		out_encoding = val
	8971	outEncoding = val
8960	8972	elif switch == "-i":
8961		in_encodings = val.split(',')
	8973	inEncodings = val.split(',')
8962	8974	elif switch == "-D":
8963	8975	globalhints.post(*val.split("="))
8964	8976	elif switch == "-I":

8970	8982	elif switch == '-x':
8971	8983	docbook5 += 1
8972	8984	elif switch == '-v':
8973		verbosity_level += 1
	8985	verbosityLevel += 1
8974	8986	elif switch == '-w':
8975	8987	portability += 1
8976	8988	elif switch == '-S':

8979	8991	sys.stdout.write("doclifter version %s\n" % version)
8980	8992	sys.exit(0)
8981	8993	if not verbosity:
8982		verbosity = "gpscmibz"[:verbosity_level]
	8994	verbosity = "gpscmibz"[:verbosityLevel]
8983	8995	try:
8984	8996	lifter = DocLifter(verbosity,
8985	8997	quiet,
8986	8998	portability,
8987	8999	includepath,
8988		in_encodings,
8989		out_encoding,
	9000	inEncodings,
	9001	outEncoding,
8990	9002	docbook5)
8991	9003	transfile("doclifter", arguments, lifter, ".xml")
8992	9004	if hintfile:

-1193

~~doclifter.1~~ less more

0		'\" t
1		.\" Title: doclifter
2		.\" Author: [see the "Author" section]
3		.\" Generator: DocBook XSL Stylesheets v1.79.1 <http://docbook.sf.net/>
4		.\" Date: 06/12/2018
5		.\" Manual: Documentation Tools
6		.\" Source: doclifter
7		.\" Language: English
8		.\"
9		.TH "DOCLIFTER" "1" "06/12/2018" "doclifter" "Documentation Tools"
10		.\" -----------------------------------------------------------------
11		.\" * Define some portability stuff
12		.\" -----------------------------------------------------------------
13		.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14		.\" http://bugs.debian.org/507673
15		.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
16		.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17		.ie \n(.g .ds Aq \(aq
18		.el .ds Aq '
19		.\" -----------------------------------------------------------------
20		.\" * set default formatting
21		.\" -----------------------------------------------------------------
22		.\" disable hyphenation
23		.nh
24		.\" disable justification (adjust text to left margin only)
25		.ad l
26		.\" -----------------------------------------------------------------
27		.\" * MAIN CONTENT STARTS HERE *
28		.\" -----------------------------------------------------------------
29		.SH "NAME"
30		doclifter \- translate troff requests into DocBook
31		.SH "SYNOPSIS"
32		.HP \w'\fBdoclifter\fR\ 'u
33		\fBdoclifter\fR [\-o\ \fIoutput\-location\fR] [\-e\ \fIoutput\-encoding\fR] [\-i\ \fIinput\-encodings\fR] [\-h\ \fIhintfile\fR] [\-q] [\-x] [\-v] [\-w] [\-V] [\-D\ \fItoken=type\fR] [\-I\ \fIpath\fR] [\-S\ \fIspoofname\fR] \fIfile\fR...
34		.SH "DESCRIPTION"
35		.PP
36		\fBdoclifter\fR
37		translates documents written in troff macros to DocBook\&. Structural subsets of the requests in
38		\fBman\fR(7),
39		\fBmdoc\fR(7),
40		\fBms\fR(7),
41		\fBme\fR(7),
42		\fBmm\fR(7), and
43		\fBtroff\fR(1)
44		are supported\&.
45		.PP
46		The translation brings over all the structure of the original document at section, subsection, and paragraph level\&. Command and C function synopses are translated into DocBook markup, not just a verbatim display\&. Tables (TBL markup) are translated into DocBook table markup\&. PIC diagrams are translated into SVG\&. Troff\-level information that might have structural implications is preserved in XML comments\&.
47		.PP
48		Where possible, font\-change macros are translated into structural markup\&.
49		\fBdoclifter\fR
50		recognizes stereotyped patterns of markup and content (such as the use of italics in a FILES section to mark filenames) and lifts them\&. A means to edit, add, and save semantic hints about highlighting is supported\&.
51		.PP
52		Some cliches are recognized and lifted to structural markup even without highlighting\&. Patterns recognized include such things as URLs, email addresses, man page references, and C program listings\&.
53		.PP
54		The tag
55		\fB\&.in\fR
56		and
57		\fB\&.ti\fR
58		requests are passed through with complaints\&. They indicate presentation\-level markup that
59		\fBdoclifter\fR
60		cannot translate into structure; the output will require hand\-fixing\&.
61		.PP
62		The tag
63		\fB\&.ta\fR
64		is passed through with a complaint unless the immediarely following by text lines contains a tab, in which case the following span of lines containing tabs is lifted to a table\&.
65		.PP
66		Under some circumstances,
67		\fBdoclifter\fR
68		can even lift formatted manual pages and the text output produced by
69		\fBlynx\fR(1)
70		from HTML\&. If it finds no macros in the input, but does find a NAME section header, it tries to interpret the plain text as a manual page (skipping boilerplate headers and footers generated by
71		\fBlynx\fR(1))\&. Translations produced in this way will be prone to miss structural features, but this fallback is good enough for simple man pages\&.
72		.PP
73		\fBdoclifter\fR
74		does not do a perfect job, merely a surprisingly good one\&. Final polish should be applied by a human being capable of recognizing patterns too subtle for a computer\&. But
75		\fBdoclifter\fR
76		will almost always produce translations that are good enough to be usable before hand\-hacking\&.
77		.PP
78		See the
79		Troubleshooting
80		section for discussion of how to solve document conversion problems\&.
81		.SH "OPTIONS"
82		.PP
83		If called without arguments
84		\fBdoclifter\fR
85		acts as a filter, translating troff source input on standard input to DocBook markup on standard output\&. If called with arguments, each argument file is translated separately (but hints are retained, see below); the suffix
86		\&.xml
87		is given to the translated output\&.
88		.PP
89		\-o
90		.RS 4
91		Set the output location where files will be saved\&. Defaults to current working directory\&.
92		.RE
93		.PP
94		\-h
95		.RS 4
96		Name a file to which information on semantic hints gathered during analysis should be written\&.
97		.RE
98		.PP
99		\-D
100		.RS 4
101		The
102		\fB\-D\fR
103		allows you to post a hint\&. This may be useful, for example, if
104		\fBdoclifter\fR
105		is mis\-parsing a synopsis because it doesn\*(Aqt recognize a token as a command\&. This hint is merged after hints in the input source have been read\&.
106		.RE
107		.PP
108		\-I
109		.RS 4
110		The
111		\fB\-I\fR
112		option adds its argument to the include path used when docfilter searches for inclusions\&. The include path is initially just the current directory\&.
113		.RE
114		.PP
115		\-S
116		.RS 4
117		Set the filename to be used in error and warning messages\&. This is mainly inttended for use by test scripts\&.
118		.RE
119		.PP
120		\-e
121		.RS 4
122		The
123		\fB\-e\fR
124		allows you to set the output encoding of the XML and the encoding field to be emitted in its header\&. It defaults to UTF\-8\&.
125		.RE
126		.PP
127		\-i
128		.RS 4
129		The
130		\fB\-i\fR
131		allows you to set a comma\-separated list of encodings to be looked for in the input\&. The default is "ISO\-8859\-1,UTF\-8", which should cover almost all cases\&.
132		.RE
133		.PP
134		\-q
135		.RS 4
136		Normally, requests that
137		\fBdoclifter\fR
138		could not interpret (usually because they\*(Aqre presentation\-level) are passed through to XML comments in the output\&. The \-q option suppresses this\&. It also suppresses listing of macros\&. Messages about requests that are unrecognized or cannot be translated go to standard error whatever the state of this option\&. This option is intended to reduce clutter when you believe you have a clean lift of a document and want to lose the troff legacy\&.
139		.RE
140		.PP
141		\-x
142		.RS 4
143		The \-x option requests that
144		\fBdoclifter\fR
145		generate DocBook version 5 compatible xml content, rather than its default DocBook version 4\&.4 output\&. Inclusions and entities may not be handled correctly with this switch enabled\&.
146		.RE
147		.PP
148		\-v
149		.RS 4
150		The \-v option makes
151		\fBdoclifter\fR
152		noisier about what it\*(Aqs doing\&. This is mainly useful for debugging\&.
153		.RE
154		.PP
155		\-w
156		.RS 4
157		Enable strict portability checking\&. Multiple instances of \-w increase the strictness\&. See
158		the section called \(lqPORTABILITY CHECKING\(rq\&.
159		.RE
160		.PP
161		\-V
162		.RS 4
163		With this option, the program emits a version message and exits\&.
164		.RE
165		.SH "TRANSLATION RULES"
166		.PP
167		Overall, you can expect that font changes will be turned into
168		Emphasis
169		macros with a
170		Remap
171		attribute taken from the troff font name\&. The basic font names are R, I, B, U, CW, and SM\&.
172		.PP
173		Troff and macro\-package special character escapes are mapped into ISO character entities\&.
174		.PP
175		When
176		\fBdoclifter\fR
177		encounters a
178		\fB\&.so\fR
179		directive, it searches for the file\&. If it can get read access to the file, and open it, and the file consists entirely of command lines and comments, then it is included\&. If any of these conditions fails, an entity reference for it is generated\&.
180		.PP
181		\fBdoclifter\fR
182		performs special parsing when it recognizes a display such as is generated by
183		\fB\&.DS/\&.DE\fR\&. It repeatedly tries to parse first a function synopsis, and then plain text off what remains in the display\&. Thus, most inline C function prototypes will be lifted to structured markup\&.
184		.PP
185		Some notes on specific translations:
186		.SS "Man Translation"
187		.PP
188		\fBdoclifter\fR
189		does a good job on most man pages, It knows about the extended
190		\fBUR\fR/\fBUE\fR/\fBUN\fR
191		and
192		\fBURL\fR
193		requests supported under Linux\&. If any
194		\fB\&.UR\fR
195		request is present, it will translate these but not wrap URLs outide them with
196		Ulink
197		tags\&. It also knows about the extended
198		\fB\&.L\fR
199		(literal) font markup from Bell Labs Version 8, and its friends\&.
200		.PP
201		The
202		\fB\&.TH\fR
203		macro is used to generate a
204		RefMeta
205		section\&. If present, the date/source/manual arguments (see
206		\fBman\fR(7)) are wrapped in
207		RefMiscInfo
208		tag pairs with those class attributes\&. Note that
209		\fBdoclifter\fR
210		does not change the date\&.
211		.PP
212		\fBdoclifter\fR
213		performs special parsing when it recognizes a synopsis section\&. It repeatedly tries to parse first a function synopsis, then a command synopsis, and then plain text off what remains in the section\&.
214		.PP
215		The following man macros are translated into emphasis tags with a remap attribute:
216		\fB\&.B\fR,
217		\fB\&.I\fR,
218		\fB\&.L\fR,
219		\fB\&.BI\fR,
220		\fB\&.BR\fR,
221		\fB\&.BL\fR,
222		\fB\&.IB\fR,
223		\fB\&.IR\fR,
224		\fB\&.IL\fR,
225		\fB\&.RB\fR,
226		\fB\&.RI\fR,
227		\fB\&.RL\fR,
228		\fB\&.LB\fR,
229		\fB\&.LI\fR,
230		\fB\&.LR\fR,
231		\fB\&.SB\fR,
232		\fB\&.SM\fR\&. Some stereotyped patterns involving these macros are recognized and turned into semantic markup\&.
233		.PP
234		The following macros are translated into paragraph breaks:
235		\fB\&.LP\fR,
236		\fB\&.PP\fR,
237		\fB\&.P\fR,
238		\fB\&.HP\fR, and the single\-argument form of
239		\fB\&.IP\fR\&.
240		.PP
241		The two\-argument form of
242		\fB\&.IP\fR
243		is translated either as a
244		VariableList
245		(usually) or
246		ItemizedList
247		(if the tag is the troff bullet or square character)\&.
248		.PP
249		The following macros are translated semantically:
250		\fB\&.SH\fR,\fB\&.SS\fR,
251		\fB\&.TP\fR,
252		\fB\&.UR\fR,
253		\fB\&.UE\fR,
254		\fB\&.UN\fR,
255		\fB\&.IX\fR\&. A
256		\fB\&.UN\fR
257		call just before
258		\fB\&.SH\fR
259		or
260		\fB\&.SS\fR
261		sets the ID for the new section\&.
262		.PP
263		The
264		\fB\e*R\fR,
265		\fB\e*(Tm\fR,
266		\fB\e*(lq\fR, and
267		\fB\e*(rq\fR
268		symbols are translated\&.
269		.PP
270		The following (purely presentation\-level) macros are ignored:
271		\fB\&.PD\fR,\fB\&.DT\fR\&.
272		.PP
273		The
274		\fB\&.RS\fR/\fB\&.RE\fR
275		macros are translated differently depending on whether or not they precede list markup\&. When
276		\fB\&.RS\fR
277		occurs just before
278		\fB\&.TP\fR
279		or
280		\fB\&.IP\fR
281		the result is nested lists\&. Otherwise, the
282		\fB\&.RS\fR/\fB\&.RE\fR
283		pair is translated into a
284		Blockquote
285		tag\-pair\&.
286		.PP
287		\fB\&.DS\fR/\fB\&.DE\fR
288		is not part of the documented man macro set, but is recognized because it shows up with some frequency on legacy man pages from older Unixes\&.
289		.PP
290		Certain extension macros originally defined under Ultrix are translated structurally, including those that occasionally show up on the manual pages of Linux and other open\-source Unixes\&.
291		\fB\&.EX\fR/\fB\&.EE\fR
292		(and the synonyms
293		\fB\&.Ex\fR/\fB\&.Ee\fR),
294		\fB\&.Ds\fR/\fB\&.De\fR,
295
296		\fB\&.NT\fR/\fB\&.NE\fR,
297		\fB\&.PN\fR, and
298		\fB\&.MS\fR
299		are translated structurally\&.
300		.PP
301		The following extension macros used by the X distribution are also recognized and translated structurally:
302		\fB\&.FD\fR,
303		\fB\&.FN\fR,
304		\fB\&.IN\fR,
305		\fB\&.ZN\fR,
306		\fB\&.hN\fR, and
307		\fB\&.C{\fR/\fB\&.C}\fR
308		The
309		\fB\&.TA\fR
310		and
311		\fB\&.IN\fR
312		requests are ignored\&.
313		.PP
314		When the man macros are active, any
315		\fB\&.Pp\fR
316		macro definition containing the request
317		\fB\&.PP\fR
318		will be ignored\&. and all instances of
319		\fB\&.Pp\fR
320		replaced with
321		\fB\&.PP\fR\&. Similarly,
322		\fB\&.Tp\fR
323		will be replaced with
324		\fB\&.TP\fR\&. This is the least painful way to deal with some frequently\-encountered stereotyped wrapper definitions that would otherwise cause serious interpretation problems
325		.PP
326		Known problem areas with man translation:
327		.sp
328		.RS 4
329		.ie n \{\
330		\h'-04'\(bu\h'+03'\c
331		.\}
332		.el \{\
333		.sp -1
334		.IP \(bu 2.3
335		.\}
336		Weird uses of
337		\fB\&.TP\fR\&. These will sometime generate invalid XML and sometimes result in a FIXME comment in the generated XML (a warning message will also go to standard error)\&.
338		.RE
339		.sp
340		.RS 4
341		.ie n \{\
342		\h'-04'\(bu\h'+03'\c
343		.\}
344		.el \{\
345		.sp -1
346		.IP \(bu 2.3
347		.\}
348		It is debatable how the man macros
349		\fB\&.HP\fR
350		and
351		\fB\&.IP\fR
352		without tag should be translated\&. We treat them as an ordinary paragraph break\&. We could visually simulate a hanging paragraph with list markup, but this would not be a structural translation\&.
353		.RE
354		.SS "Pod2man Translation"
355		.PP
356		\fBdoclifter\fR
357		recognizes the extension macros produced by
358		\fBpod2man\fR
359		(\fB\&.Sh\fR,
360		\fB\&.Sp\fR,
361		\fB\&.Ip\fR,
362		\fB\&.Vb\fR,
363		\fB\&.Ve\fR) and translates them structurally\&.
364		.PP
365		The results of lifting pages produced by
366		\fBpod2man\fR
367		should be checked carefully by eyeball, especially the rendering of command and function synopses\&.
368		\fBPod2man\fR
369		generates rather perverse markup;
370		\fBdoclifter\fR\*(Aqs struggle to untangle it is sometimes in vain\&.
371		.PP
372		If possible, generate your DocBook from the POD sources\&. There is a
373		pod2docbook
374		module on CPAN that does this\&.
375		.SS "Tkman Translation"
376		.PP
377		\fBdoclifter\fR
378		recognizes the extension macros used by the Tcl/Tk documentation system:
379		\fB\&.AP\fR,
380		\fB\&.AS\fR,
381		\fB\&.BS\fR,
382		\fB\&.BE\fR,
383		\fB\&.CS\fR,
384		\fB\&.CE\fR,
385		\fB\&.DS\fR,
386		\fB\&.DE\fR,
387		\fB\&.SO\fR,
388		\fB\&.SE\fR,
389		\fB\&.UL\fR,
390		\fB\&.VS\fR,
391		\fB\&.VE\fR\&. The
392		\fB\&.AP\fR,
393		\fB\&.CS\fR,
394		\fB\&.CE\fR,
395		\fB\&.SO\fR,
396		\fB\&.SE\fR,
397		\fB\&.UL\fR,
398		\fB\&.QW\fR
399		and
400		\fB\&.PQ\fR
401		macros are translated structurally\&.
402		.SS "Mandoc Translation"
403		.PP
404		\fBdoclifter\fR
405		should be able to do an excellent job on most
406		\fBmdoc\fR(7)
407		pages, because this macro package expresses a lot of semantic structure\&.
408		.PP
409		Known problems with mandoc translation: All
410		\fB\&.Bd\fR/\fB\&.Ed\fR
411		display blocks are translated as
412		LiteralLayout
413		tag pairs
414		\&.
415		.SS "Ms Translation"
416		.PP
417		\fBdoclifter\fR
418		does a good job on most ms pages\&. One weak spot to watch out for is the generation of Author and Affiliation tags\&. The heuristics used to mine this information out of the
419		\fB\&.AU\fR
420		section work for authors who format their names in the way usual for English (e\&.g\&. "M\&. E\&. Lesk", "Eric S\&. Raymond") but are quite brittle\&.
421		.PP
422		For a document to be recognized as containing ms markup, it must have the extension
423		\&.ms\&. This avoids problems with false positives\&.
424		.PP
425		The
426		\fB\&.TL\fR,
427		\fB\&.AU\fR,
428		\fB\&.AI\fR, and
429		\fB\&.AE\fR
430		macros turn into article metainformation in the expected way\&. The
431		\fB\&.PP\fR,
432		\fB\&.LP\fR,
433		\fB\&.SH\fR, and
434		\fB\&.NH\fR
435		macros turn into paragraph and section structure\&. The tagged form of
436		\fB\&.IP\fR
437		is translated either as a
438		VariableList
439		(usually) or
440		ItemizedList
441		(if the tag is the troff bullet or square character); the untagged version is treated as an ordinary paragraph break\&.
442		.PP
443		The
444		\fB\&.DS\fR/\fB\&.DE\fR
445		pair is translated to a
446		LiteralLayout
447		tag pair
448		\&. The
449		\fB\&.FS\fR/\fB\&.FE\fR
450		pair is translated to a
451		Footnote
452		tag pair\&. The
453		\fB\&.QP\fR/\fB\&.QS\fR/\fB\&.QE\fR
454		requests define
455		BlockQuotes\&.
456		.PP
457		The
458		\fB\&.UL\fR
459		font change is mapped to U\&.
460		\fB\&.SM\fR
461		and
462		\fB\&.LG\fR
463		become numeric plus or minus size steps suffixed to the
464		Remap
465		attribute\&.
466		.PP
467		The
468		\fB\&.B1\fR
469		and
470		\fB\&.B2\fR
471		box macros are translated to a
472		Sidebar
473		tag pair\&.
474		.PP
475		All macros relating to page footers, multicolumn mode, and keeps are ignored (\fB\&.ND\fR,
476		\fB\&.DA\fR,
477		\fB\&.1C\fR,
478		\fB\&.2C\fR,
479		\fB\&.MC\fR,
480		\fB\&.BX\fR,
481		\fB\&.KS\fR,
482		\fB\&.KE\fR,
483		\fB\&.KF\fR)\&. The
484		\fB\&.R\fR,
485		\fB\&.RS\fR, and
486		\fB\&.RE\fR
487		macros are ignored as well\&.
488		.SS "Me Translation"
489		.PP
490		Translation of me documents tends to produce crude results that need a lot of hand\-hacking\&. The format has little usable structure, and documents written in it tend to use a lot of low\-level troff macros; both these properties tend to confuse
491		\fBdoclifter\fR\&.
492		.PP
493		For a document to be recognized as containing me markup, it must have the extension
494		\&.me\&. This avoids problems with false positives\&.
495		.PP
496		The following macros are translated into paragraph breaks:
497		\fB\&.lp\fR,
498		\fB\&.pp\fR\&. The
499		\fB\&.ip\fR
500		macro is translated into a
501		VariableList\&. The
502		\fB\&.bp\fR
503		macro is translated into an
504		ItemizedList\&. The
505		\fB\&.np\fR
506		macro is translated into an
507		OrderedList\&.
508		.PP
509		The b, i, and r fonts are mapped to emphasis tags with B, I, and R
510		Remap
511		attributes\&. The
512		\fB\&.rb\fR
513		("real bold") font is treated the same as
514		\fB\&.b\fR\&.
515		.PP
516		\fB\&.q(\fR/\fB\&.q)\fR
517		is translated structurally
518		\&.
519		.PP
520		Most other requests are ignored\&.
521		.SS "Mm Translation"
522		.PP
523		Memorandum Macros documents translate well, as these macros carry a lot of structural information\&. The translation rules are tuned for Memorandum or Released Paper styles; information associated with external\-letter style will be preserved in comments\&.
524		.PP
525		For a document to be recognized as containing mm markup, it must have the extension
526		\&.mm\&. This avoids problems with false positives\&.
527		.PP
528		The following highlight macros are translated int Emphasis tags:
529		\fB\&.B\fR,
530		\fB\&.I\fR,
531		\fB\&.R\fR,
532		\fB\&.BI\fR,
533		\fB\&.BR\fR,
534		\fB\&.IB\fR,
535		\fB\&.IR\fR,
536		\fB\&.RB\fR,
537		\fB\&.RI\fR\&.
538		.PP
539		The following macros are structurally translated:
540		\fB\&.AE\fR,
541		\fB\&.AF\fR,
542		\fB\&.AL\fR,
543		\fB\&.RL\fR,
544		\fB\&.APP\fR,
545		\fB\&.APPSK\fR,
546		\fB\&.AS\fR,
547		\fB\&.AT\fR,
548		\fB\&.AU\fR,
549		\fB\&.B1\fR,
550		\fB\&.B2\fR,
551		\fB\&.BE\fR,
552		\fB\&.BL\fR,
553		\fB\&.ML\fR,
554		\fB\&.BS\fR,
555		\fB\&.BVL\fR,
556		\fB\&.VL\fR,
557		\fB\&.DE\fR,
558		\fB\&.DL\fR
559		\fB\&.DS\fR,
560		\fB\&.FE\fR,
561		\fB\&.FS\fR,
562		\fB\&.H\fR,
563		\fB\&.HU\fR,
564		\fB\&.IA\fR,
565		\fB\&.IE\fR,
566		\fB\&.IND\fR,
567		\fB\&.LB\fR,
568		\fB\&.LC\fR,
569		\fB\&.LE\fR,
570		\fB\&.LI\fR,
571		\fB\&.P\fR,
572		\fB\&.RF\fR,
573		\fB\&.SM\fR,
574		\fB\&.TL\fR,
575		\fB\&.VERBOFF\fR,
576		\fB\&.VERBON\fR,
577		\fB\&.WA\fR,
578		\fB\&.WE\fR\&.
579		.PP
580		The following macros are ignored:
581		.PP
582		\ \&\fB\&.)E\fR,
583		\fB\&.1C\fR,
584		\fB\&.2C\fR,
585		\fB\&.AST\fR,
586		\fB\&.AV\fR,
587		\fB\&.AVL\fR,
588		\fB\&.COVER\fR,
589		\fB\&.COVEND\fR,
590		\fB\&.EF\fR,
591		\fB\&.EH\fR,
592		\fB\&.EDP\fR,
593		\fB\&.EPIC\fR,
594		\fB\&.FC\fR,
595		\fB\&.FD\fR,
596		\fB\&.HC\fR,
597		\fB\&.HM\fR,
598		\fB\&.GETR\fR,
599		\fB\&.GETST\fR,
600		\fB\&.HM\fR,
601		\fB\&.INITI\fR,
602		\fB\&.INITR\fR,
603		\fB\&.INDP\fR,
604		\fB\&.ISODATE\fR,
605		\fB\&.MT\fR,
606		\fB\&.NS\fR,
607		\fB\&.ND\fR,
608		\fB\&.OF\fR,
609		\fB\&.OH\fR,
610		\fB\&.OP\fR,
611		\fB\&.PGFORM\fR,
612		\fB\&.PGNH\fR,
613		\fB\&.PE\fR,
614		\fB\&.PF\fR,
615		\fB\&.PH\fR,
616		\fB\&.RP\fR,
617		\fB\&.S\fR,
618		\fB\&.SA\fR,
619		\fB\&.SP\fR,
620		\fB\&.SG\fR,
621		\fB\&.SK\fR,
622		\fB\&.TAB\fR,
623		\fB\&.TB\fR,
624		\fB\&.TC\fR,
625		\fB\&.VM\fR,
626		\fB\&.WC\fR\&.
627		.PP
628		The following macros generate warnings:
629		\fB\&.EC\fR,
630		\fB\&.EX\fR,
631		\fB\&.GETHN\fR,
632		\fB\&.GETPN\fR,
633		\fB\&.GETR\fR,
634		\fB\&.GETST\fR,
635		\fB\&.LT\fR,
636		\fB\&.LD\fR,
637		\fB\&.LO\fR,
638		\fB\&.MOVE\fR,
639		\fB\&.MULB\fR,
640		\fB\&.MULN\fR,
641		\fB\&.MULE\fR,
642		\fB\&.NCOL\fR,
643		\fB\&.nP\fR,
644		\fB\&.PIC\fR,
645		\fB\&.RD\fR,
646		\fB\&.RS\fR,
647		\fB\&.RE\fR,
648		\fB\&.SETR\fR
649		.PP
650		Pairs of
651		\fB\&.DS\fR/\fB\&.DE\fR
652		are interpreted as informal figures\&. If an
653		\fB\&.FG\fR
654		is present it becomes a caption element\&.
655		.PP
656		\ \&\fB\&.BS\fR/\fB\&.BE\fR
657		and
658		\fB\&.IA\fR/\fB\&.IE\fR
659		pairs are passed through\&. The text inside them may need to be deleted or moved\&.
660		.PP
661		The mark argument of
662		\fB\&.ML\fR
663		is ignored; the following list id formatted as a normal
664		ItemizedList\&.
665		.PP
666		The contents of
667		\fB\&.DS\fR/\fB\&.DE\fR
668		or
669		\fB\&.DF\fR/\fB\&.DE\fR
670		gets turned into a
671		Screen
672		display\&. Arguments controlling presentation\-level formatting are ignored\&.
673		.SS "Mwww Translation"
674		.PP
675		The mwww macros are an extension to the man macros supported by
676		\fBgroff\fR(1)
677		for producing web pages\&.
678		.PP
679		The
680		\fBURL\fR,
681		\fBFTP\fR,
682		\fBMAILTO\fR,
683		\fBFTP\fR,
684		\fBIMAGE\fR,
685		\fBTAG\fR
686		tags are translated structurally\&. The
687		\fBHTMLINDEX\fR,
688		\fBBODYCOLOR\fR,
689		\fBBACKGROUND\fR,
690		\fBHTML\fR, and
691		\fBLINE\fR
692		tags are ignored\&.
693		.SS "TBL Translation"
694		.PP
695		All structural features of TBL tables are translated, including both horizontal and vertical spanning with \(oqs\(cq and \(oq^\(cq\&. The \(oql\(cq, \(oqr\(cq, and \(oqc\(cq formats are supported; the \(oqn\(cq column format is rendered as \(oqr\(cq\&. Line continuations with
696		T{
697		and
698		T}
699		are handled correctly\&. So is
700		\fB\&.TH\fR\&.
701		.PP
702		The
703		\fBexpand\fR,
704		\fBbox\fR,
705		\fBdoublebox\fR,
706		\fBallbox\fR,
707		\fBcenter\fR,
708		\fBleft\fR, and
709		\fBright\fR
710		options are supported\&. The GNU synonyms
711		\fBframe\fR
712		and
713		\fBdoubleframe\fR
714		are also recognized\&. But the distinction between single and double rules and boxes is lost\&.
715		.PP
716		Table continuations (\&.T&) are not supported\&.
717		.PP
718		If the first nonempty line of text immediately before a table is boldfaced, it is interpreted as a title for the table and the table is generated using a
719		table
720		and
721		title\&. Otherwise the table is translated with
722		informaltable\&.
723		.PP
724		Most other presentation\-level TBL commands are ignored\&. The \(oqb\(cq format qualifier is processed, but point size and width qualifiers are not\&.
725		.SS "Pic Translation"
726		.PP
727		PIC sections are translated to SVG\&.
728		doclifter
729		calls out to
730		\fBpic2plot\fR(1)
731		to accomplish this; you must have that utility installed for PIC translation to work\&.
732		.SS "Eqn Translation"
733		.PP
734		EQN sections are filtered into embedded MathML with
735		\fBeqn \-TMathML\fR
736		if possible, otherwise passed through enclosed in
737		LiteralLayout
738		tags\&. After a delim statement has been seen, inline eqn delimiters are translated into an XML processing instruction\&. Exception: inline eqn equations consisting of a single character are translated to an
739		Emphasis
740		with a Role attribute of eqn\&.
741		.SS "Troff Translation"
742		.PP
743		The troff translation is meant only to support interpretation of the macro sets\&. It is not useful standalone\&.
744		.PP
745		The
746		\fB\&.nf\fR
747		and
748		\fB\&.fi\fR
749		macros are interpreted as literal\-layout boundaries\&. Calls to the
750		\fB\&.so\fR
751		macro either cause inclusion or are translated into XML entity inclusions (see above)\&. Calls to the
752		\fB\&.ul\fR
753		and
754		\fB\&.cu\fR
755		macros cause following lines to be wrapped in an
756		Emphasis
757		tag with a
758		Remap
759		attribute of "U"\&. Calls to
760		\fB\&.ft\fR
761		generate corresponding start or end emphasis tags\&. Calls to
762		\fB\&.tr\fR
763		cause character translation on output\&. Calls to
764		\fB\&.bp\fR
765		generate a
766		BeginPage
767		tag (in paragraphed text only)\&. Calls to
768		\fB\&.sp\fR
769		generate a paragraph break (in paragraphed text only)\&. Calls to
770		\fB\&.ti\fR
771		wrap the following line in a
772		BlockQuote
773		These are the only troff requests we translate to DocBook\&. The rest of the troff emulation exists because macro packages use it internally to expand macros into elements that might be structural\&.
774		.PP
775		Requests relating to macro definitions and strings (\fB\&.ds\fR,
776		\fB\&.as\fR,
777		\fB\&.de\fR,
778		\fB\&.am\fR,
779		\fB\&.rm\fR,
780		\fB\&.rn\fR,
781		\fB\&.em\fR) are processed and expanded\&. The
782		\fB\&.ig\fR
783		macro is also processed\&.
784		.PP
785		Conditional macros (\fB\&.if\fR,
786		\fB\&.ie\fR,
787		\fB\&.el\fR) are handled\&. The built\-in conditions o, n, t, e, and c are evaluated as if for
788		nroff
789		on page one of a document\&. The m, d, and r troff conditionals are also interpreted\&. String comparisons are evaluated by straight textual comparison\&. All numeric expressions evaluate to true\&.
790		.PP
791		The extended
792		groff
793		requests
794		\fBcc\fR,
795		\fBc2\fR,
796		\fBab\fR,
797		\fBals\fR,
798		\fBdo\fR,
799		\fBnop\fR, and
800		\fBreturn\fR
801		and
802		\fBshift\fR
803		are interpreted\&. Its
804		\fB\&.PSPIC\fR
805		extension is translated into a
806		MediaObject\&.
807		.PP
808		The
809		\fB\&.tm\fR
810		macro writes its arguments to standard error (with
811		\fB\-t\fR)\&. The
812		\fB\&.pm\fR
813		macro reports on defined macros and strings\&. These facilities may aid in debugging your translation\&.
814		.PP
815		Some troff escape sequences are lifted:
816		.sp
817		.RS 4
818		.ie n \{\
819		\h'-04' 1.\h'+01'\c
820		.\}
821		.el \{\
822		.sp -1
823		.IP " 1." 4.2
824		.\}
825		The \ee and \e\e escapes become a bare backslash, \e\&. a period, and \e\- a bare dash\&.
826		.RE
827		.sp
828		.RS 4
829		.ie n \{\
830		\h'-04' 2.\h'+01'\c
831		.\}
832		.el \{\
833		.sp -1
834		.IP " 2." 4.2
835		.\}
836		The troff escapes \e^, \e`, \e\*(Aq \e&, \e0, and \e\| are lifted to equivalent ISO special spacing characters\&.
837		.RE
838		.sp
839		.RS 4
840		.ie n \{\
841		\h'-04' 3.\h'+01'\c
842		.\}
843		.el \{\
844		.sp -1
845		.IP " 3." 4.2
846		.\}
847		A \e followed by space is translated to an ISO non\-breaking space entity\&.
848		.RE
849		.sp
850		.RS 4
851		.ie n \{\
852		\h'-04' 4.\h'+01'\c
853		.\}
854		.el \{\
855		.sp -1
856		.IP " 4." 4.2
857		.\}
858		A \e~ is also translated to an ISO non\-breaking space entity; properly this should be a space that can\*(Aqt be used for a linebreak but stretches like ordinary whitepace during line adjustment, but there is no ISO or Unicode entity for that\&.
859		.RE
860		.sp
861		.RS 4
862		.ie n \{\
863		\h'-04' 5.\h'+01'\c
864		.\}
865		.el \{\
866		.sp -1
867		.IP " 5." 4.2
868		.\}
869		The \eu and \ed half\-line motion vertical motion escapes, when paired, become
870		\fBSuperscript\fR
871		or
872		\fBSubscript\fR
873		tags\&.
874		.RE
875		.sp
876		.RS 4
877		.ie n \{\
878		\h'-04' 6.\h'+01'\c
879		.\}
880		.el \{\
881		.sp -1
882		.IP " 6." 4.2
883		.\}
884		The \ec escape is handled as a line continuation\&. in circumstances where that matters (e\&.g\&. for token\-pasting)\&.
885		.RE
886		.sp
887		.RS 4
888		.ie n \{\
889		\h'-04' 7.\h'+01'\c
890		.\}
891		.el \{\
892		.sp -1
893		.IP " 7." 4.2
894		.\}
895		The \ef escape for font changes is translated in various context\-dependent ways\&. First,
896		\fBdoclifter\fR
897		looks for cliches involving font changes that have semantic meaning, and lifts to a structural tag\&. If it can\*(Aqt do that, it generates an
898		Emphasis
899		tag\&.
900		.RE
901		.sp
902		.RS 4
903		.ie n \{\
904		\h'-04' 8.\h'+01'\c
905		.\}
906		.el \{\
907		.sp -1
908		.IP " 8." 4.2
909		.\}
910		The \em[] extension is translated into a
911		phrase
912		span with a remap attribute carrying the color\&. Note: Stylesheets typically won\*(Aqt render this!
913		.RE
914		.sp
915		.RS 4
916		.ie n \{\
917		\h'-04' 9.\h'+01'\c
918		.\}
919		.el \{\
920		.sp -1
921		.IP " 9." 4.2
922		.\}
923		Some uses of the \eo request are translated: pairs with a letter followed by one of the characters ` \*(Aq : ^ o ~ are translated to combining forms with diacriticals acute, grave, umlaut, circumflex, ring, and tilde respectively if the corresponding Latin\-1 or Latin\-2 character exists as an ISO literal\&.
924		.RE
925		.PP
926		Other escapes than these will yield warnings or errors\&.
927		.PP
928		All other troff requests are ignored but passed through into XML comments\&. A few (such as
929		\fB\&.ce\fR) also trigger a warning message\&.
930		.SH "PORTABILITY CHECKING"
931		.PP
932		When portability checking is enabled,
933		\fBdoclifter\fR
934		emits portability warnings about markup which it can handle but which will break various other viewers and interpreters\&.
935		.sp
936		.RS 4
937		.ie n \{\
938		\h'-04' 1.\h'+01'\c
939		.\}
940		.el \{\
941		.sp -1
942		.IP " 1." 4.2
943		.\}
944		At level 1, it will warn about constructions that would break
945		\fBman2html\fR(1), (the C program distributed with Linux
946		\fBman\fR(1), not the older and much less capable Perl script)\&. A close derivative of this code is used in GNOME
947		yelp\&. This should be the minimum level of portability you aim for, and corresponds to what is recommended on the
948		\fBgroff_man\fR(7)
949		manual page\&.
950		.RE
951		.sp
952		.RS 4
953		.ie n \{\
954		\h'-04' 2.\h'+01'\c
955		.\}
956		.el \{\
957		.sp -1
958		.IP " 2." 4.2
959		.\}
960		At level 2, it will warn about constructions that will break portability back to the Unix classic tools (including long macro names and glyph references with \e[])\&.
961		.RE
962		.SH "SEMANTIC ANALYSIS"
963		.PP
964		\fBdoclifter\fR
965		keeps two lists of semantic hints that it picks up from analyzing source documents (especially from parsing command and function synopses)\&. The local list includes:
966		.sp
967		.RS 4
968		.ie n \{\
969		\h'-04'\(bu\h'+03'\c
970		.\}
971		.el \{\
972		.sp -1
973		.IP \(bu 2.3
974		.\}
975		Names of function formal arguments
976		.RE
977		.sp
978		.RS 4
979		.ie n \{\
980		\h'-04'\(bu\h'+03'\c
981		.\}
982		.el \{\
983		.sp -1
984		.IP \(bu 2.3
985		.\}
986		Names of command options
987		.RE
988		.PP
989		Local hints are used to mark up the individual page from which they are gathered\&. The global list includes:
990		.sp
991		.RS 4
992		.ie n \{\
993		\h'-04'\(bu\h'+03'\c
994		.\}
995		.el \{\
996		.sp -1
997		.IP \(bu 2.3
998		.\}
999		Names of functions
1000		.RE
1001		.sp
1002		.RS 4
1003		.ie n \{\
1004		\h'-04'\(bu\h'+03'\c
1005		.\}
1006		.el \{\
1007		.sp -1
1008		.IP \(bu 2.3
1009		.\}
1010		Names of commands
1011		.RE
1012		.sp
1013		.RS 4
1014		.ie n \{\
1015		\h'-04'\(bu\h'+03'\c
1016		.\}
1017		.el \{\
1018		.sp -1
1019		.IP \(bu 2.3
1020		.\}
1021		Names of function return types
1022		.RE
1023		.PP
1024		If
1025		\fBdoclifter\fR
1026		is applied to multiple files, the global list is retained in memory\&. You can dump a report of global hints at the end of the run with the
1027		\fB\-h\fR
1028		option\&. The format of the hints is as follows:
1029		.sp
1030		.if n \{\
1031		.RS 4
1032		.\}
1033		.nf
1034		\ \&\&.\e" \| mark <phrase> as <markup>
1035		.fi
1036		.if n \{\
1037		.RE
1038		.\}
1039		.PP
1040		where
1041		\fB<phrase>\fR
1042		is an item of text and
1043		\fB<markup>\fR
1044		is the DocBook markup text it should be wrapped with whenever it appeared either highlighted or as a word surrounded by whitespace in the source text\&.
1045		.PP
1046		Hints derived from earlier files are also applied to later ones\&. This behavior may be useful when lifting collections of documents that apply to a function or command library\&. What should be more useful is the fact that a hints file dumped with
1047		\fB\-h\fR
1048		can be one of the file arguments to
1049		\fBdoclifter\fR; the code detects this special case and does not write XML output for such a file\&. Thus, a good procedure for lifting a large library is to generate a hints file with a first run, inspect it to delete false positives, and use it as the first input to a second run\&.
1050		.PP
1051		It is also possible to include a hints file directly in a troff sourcefile\&. This may be useful if you want to enrich the file by stages before converting to XML\&.
1052		.SH "TROUBLESHOOTING"
1053		.PP
1054		\fBdoclifter\fR
1055		tries to warn about problems that it can can diagnose but not fix by itself\&. When it says
1056		"look for FIXME", do that in the generated XML; the markup around that token may be wrong\&.
1057		.PP
1058		Occasionally (less than 2% of the time)
1059		\fBdoclifter\fR
1060		will produce invalid DocBook markup even from correct troff markup\&. Usually this results from strange constructions in the source page, or macro calls that are beyond the ability of
1061		\fBdoclifter\fR\*(Aqs macro processor to get right\&. Here are some things to watch for, and how to fix them:
1062		.SS "Malformed command synopses\&."
1063		.PP
1064		If you get a message that says
1065		"command synopsis parse failed", try rewriting the synopsis in your manual page source\&. The most common cause of failure is unbalanced [] groupings, a bug that can be very difficult to notice by eyeball\&. To assist with this, the error message includes a token number in parentheses indicating on which token the parse failed\&.
1066		.PP
1067		For more information, use the \-v option\&. This will trigger a dump telling you what the command synopsis looked like after preprocessing, and indicate on which token the parse failed (both with a token number and a caret sign inserted in the dump of the synopsis tokens)\&. Try rewriting the synopsis in your manual page source\&. The most common cause of failure is unbalanced [] groupings, a bug that can be very difficult to notice by eyeball\&. To assist with this, the error token dump tries to insert \(oq$\(cq at the point of the last nesting\-depth increase, but the code that does this is failure\-prone\&.
1068		.SS "Confusing macro calls\&."
1069		.PP
1070		Some manual page authors replace standard requests (like
1071		\fB\&.PP\fR,
1072		\fB\&.SH\fR
1073		and
1074		\fB\&.TP\fR) with versions that do different things in
1075		\fBnroff\fR
1076		and
1077		\fBtroff\fR
1078		environments\&. While
1079		\fBdoclifter\fR
1080		tries to cope and usually does a good job, the quirks of [nt]roff are legion and confusing macro calls sometimes lead to bad XML being generated\&. A common symptom of such problems is unclosed
1081		Emphasis
1082		tags\&.
1083		.SS "Malformed list syntax\&."
1084		.PP
1085		The manual\-page parser can be confused by
1086		\fB\&.TP\fR
1087		constructs that have header tags but no following body\&. If the XML produced doesn\*(Aqt validate, and the problem seems to be a misplaced
1088		listitem
1089		tag, try using the verbose (\-v) option\&. This will enable line\-numbered warnings that may help you zero in on the problem\&.
1090		.SS "Section nesting problems with SS\&."
1091		.PP
1092		The message
1093		"possible section nesting error"
1094		means that the program has seen two adjacent subsection headers\&. In man pages, subsections don\*(Aqt have a depth argument, so
1095		\fBdoclifter\fR
1096		cannot be certain how subsections should be nested\&. Any subsection heading between the indicated line and the beginning of the next top\-level section might be wrong and require correcting by hand\&.
1097		.SS "Bad output with no doclifter error message"
1098		.PP
1099		If you\*(Aqre translating a page that uses user\-defined macros, and doclifter fails to complain about it but you get bad output, the first thing to do is simplify or eliminate the user\-defined macros\&. Replace them with stock requests where possible\&.
1100		.SH "IMPROVING TRANSLATION QUALITY"
1101		.PP
1102		There are a few constructions that are a good idea to check by hand after lifting a page\&.
1103		.PP
1104		Look near the
1105		BlockQuote
1106		tags\&. The troff temporary indent request (\fB\&.ti\fR) is translated into a
1107		BlockQuote
1108		wrapper around the following line\&. Sometimes
1109		LiteralLayout
1110		or
1111		ProgramListing
1112		would be a better translation, but
1113		\fBdoclifter\fR
1114		has no way to know this\&.
1115		.PP
1116		It is not possible to unambiguously detect candidates for wrapping in a DocBook
1117		option
1118		tag in running text\&. If you care, you\*(Aqll have to check for these and fix them by hand\&.
1119		.SH "BUGS AND LIMITATIONS"
1120		.PP
1121		About 3% of man pages will either make this program throw error status 1 or generate invalid XML\&. In almost all such cases the misbehavior is triggered by markup bugs in the source that are too severe to be coped with\&.
1122		.PP
1123		Equation number arguments of EQN calls are ignored\&.
1124		.PP
1125		Semicolon used as a TBL field separator will lead to garbled tables\&. The easiest way to fix this is by patching the source\&.
1126		.PP
1127		The function\-synopsis parser is crude (it\*(Aqs not a compiler) and prone to errors\&. Function\-synopsis markup should be checked carefully by a human\&.
1128		.PP
1129		If a man page has both paragraphed text in a Synopsis section and also a body section before the Synopis section, bad things will happen\&.
1130		.PP
1131		Running text (e\&.g\&., explanatory notes) at the end of a Synopsis section cannot reliably be distinguished from synopsis\-syntax markup\&. (This problem is AI\-complete\&.)
1132		.PP
1133		Some firewalls put in to cope with common malformations in troff code mean that the tail end of a span between two
1134		\fB\ef{B,I,U,(CW}\fR
1135		or
1136		\fB\&.ft\fR
1137		highlight changes may not be completely covered by corresponding
1138		Emphasis
1139		macros if (for example) the span crosses a boundary between filled and unfilled (\fB\&.nf\fR/\fB\&.fi\fR) text\&.
1140		.PP
1141		The treatment of conditionals relies on the assumption that conditional macros never generate structural or font\-highlight markup that differs between the if and else branches\&. This appears to be true of all the standard macro packages, but if you roll any of your own macros you\*(Aqre on your own\&.
1142		.PP
1143		Macro definitions in a manual page NAME section are not interpreted\&.
1144		.PP
1145		Uses of \ec for line continuation sometimes are not translated, leaving the \ec in the output XML\&. The program will print a warning when this occurs\&.
1146		.PP
1147		It is not possible to unambiguously detect candidates for wrapping in a DocBook
1148		option
1149		tag in running text\&. If you care, you\*(Aqll have to check for these and fix them by hand\&.
1150		.PP
1151		The line numbers in
1152		\fBdoclifter\fR
1153		error messages are unreliable in the presence of
1154		\fB\&.EQ/\&.EN\fR,
1155		\fB\&.PS/\&.PE\fR, and quantum fluctuations\&.
1156		.SH "OLD MACRO SETS"
1157		.PP
1158		There is a conflict between Berkeley ms\*(Aqs documented
1159		\fB\&.P1\fR
1160		print\-header\-on\-page request and an undocumented Bell Labs use for displayed program and equation listings\&. The
1161		\fBms\fR
1162		translator uses the Bell Labs interpretation when
1163		\fB\&.P2\fR
1164		is present in the document, and otherwise ignores the request\&.
1165		.SH "RETURN VALUES"
1166		.PP
1167		On successful completion, the program returns status 0\&. It returns 1 if some file or standard input could not be translated\&. It returns 2 if one of the input sources was a
1168		\fB\&.so\fR
1169		inclusion\&. It returns 3 if there is an error in reading or writing files\&. It returns 4 to indicate an internal error\&. It returns 5 when aborted by a keyboard interrupt\&.
1170		.PP
1171		Note that a zero return does not guarantee that the output is valid DocBook\&. It will almost always (as in, more than 98% of cases) be syntactically valid XML, but in some rare cases fixups by hand may be necessary to meet the semantics of the DocBook DTD\&. Validation problems are most likely to occur with complicated list markup\&.
1172		.SH "REQUIREMENTS"
1173		.PP
1174		The
1175		\fBpic2plot\fR(1)
1176		utility must be installed in order to translate PIC diagrams to SVG\&.
1177		.SH "SEE ALSO"
1178		.PP
1179		\fBman\fR(7),
1180		\fBmdoc\fR(7),
1181		\fBms\fR(7),
1182		\fBme\fR(7),
1183		\fBmm\fR(7),
1184		\fBmwww\fR(7),
1185		\fBtroff\fR(1)\&.
1186		.SH "AUTHOR"
1187		.PP
1188		Eric S\&. Raymond
1189		<esr@thyrsus\&.com>
1190		.PP
1191		There is a project web page at
1192		\m[blue]\fBhttp://www\&.catb\&.org/~esr/doclifter/\fR\m[]\&.

+43

-0

doclifter.el less more

	0	;; doclifter.el -- lift markup in a manual page converted by doclifter
	1
	2	(defun lift-next ()
	3	"Lift the next remapped emphasis highlight."
	4	(interactive)
	5	(if (re-search-forward "<emphasis remap='[A-Z]'>[^<]</emphasis>" nil t)
	6	(progn
	7	(let ((start (match-beginning 0)) (end (match-end 0)))
	8	(add-text-properties start end '(background-color . "green"))
	9	(call-interactively 'lift-at-point)
	10	(remove-text-properties start end nil)
	11	t))
	12	)
	13	)
	14
	15	(defun lift-at-point (type)
	16	"Lift a remapped emphasis highlight starting at point."
	17	(interactive "sTag: ")
	18	(if (re-search-forward "<emphasis remap='[A-Z]'>\$[^<]\$</emphasis>")
	19	(replace-match (concat "<" type ">\\1</" type ">") t nil)))
	20
	21	(defun bump-sendcounts ()
	22	"Bump send counts on each piece of bugmail."
	23	(interactive)
	24	(if (re-search-forward "%%" nil t)
	25	(progn
	26	(forward-line 1)
	27	(while (not (eobp))
	28	(cond ((looking-at "[yb]")
	29	nil)
	30	((looking-at "[0-9]+")
	31	(let* ((s (match-beginning 0))
	32	(e (match-end 0))
	33	(d (buffer-substring s e)))
	34	(delete-region s e)
	35	(insert-string (format "%d" (1+ (string-to-number d))))
	36	))
	37	(t
	38	(insert-string "1")))
	39	(forward-line 1))
	40	)))
	41
	42	;; End

+1522

-0

man_page_howto.xml less more

	0	<?xml version="1.0"?>
	1	<!DOCTYPE refentry PUBLIC
	2	"-//OASIS//DTD DocBook XML V4.1.2//EN"
	3	"docbook/docbookx.dtd"[
	4	<!ENTITY howto "http://www.linuxdoc.org/HOWTO/">
	5	<!ENTITY mini-howto "http://www.linuxdoc.org/HOWTO/mini/">
	6	]>
	7	<article>
	8	<articleinfo>
	9	<title>Linux Man Page HOWTO</title>
	10
	11	<author>
	12	<firstname>Jens</firstname>
	13	<surname>Schweikhardt</surname>
	14	<affiliation>
	15	<orgname><ulink url="http://www.schweikhardt.net/">
	16	http://www.schweikhardt.net/</ulink></orgname>
	17	<address>
	18	<email><ulink url="mailto:howto@schweikhardt.net">howto at schweikhardt dot net</ulink></email>
	19	</address>
	20	</affiliation>
	21	</author>
	22	<copyright>
	23	<year>1995-2002</year>
	24	</copyright>
	25
	26	<revhistory>
	27	<revision>
	28	<revnumber>1.5</revnumber>
	29	<date>March 6 2001</date>
	30	<authorinitials>js</authorinitials>
	31	<revremark>
	32	HTML source now passes <command>weblint -pedantic</command>.
	33	<link linkend="q6">Paragraph 6:</link> Added workarounds for
	34	<application>tbl</application> screw-ups. Added appendices
	35	<link linkend="acknowledgements">B)</link> and changelog.
	36	Added RCS Id.
	37	</revremark>
	38	</revision>
	39	<revision>
	40	<revnumber>1.4</revnumber>
	41	<date>August 9 2001</date>
	42	<authorinitials>js</authorinitials>
	43	<revremark>
	44	Howto put under a two clause BSD license.
	45	</revremark>
	46	</revision>
	47	<revision>
	48	<revnumber>1.3</revnumber>
	49	<date>August 20 2001</date>
	50	<authorinitials>js</authorinitials>
	51	<revremark>
	52	Improved grammar. Use a numbered list for the TOC.
	53	</revremark>
	54	</revision>
	55	<revision>
	56	<revnumber>1.2</revnumber>
	57	<date>October 28 2001</date>
	58	<authorinitials>js</authorinitials>
	59	<revremark>
	60	Added refs to mdoc(7), mdoc.samples(7) and groff_man(7).
	61	</revremark>
	62	</revision>
	63	<revision>
	64	<revnumber>1.1</revnumber>
	65	<date>April 28 2002</date>
	66	<authorinitials>js</authorinitials>
	67	<revremark>
	68	Fix a grammar bogon by s/particular/particularly/.
	69	</revremark>
	70	</revision>
	71	<revision>
	72	<revnumber>1.0</revnumber>
	73	<date>April 30 2002</date>
	74	<authorinitials>js</authorinitials>
	75	<revremark>
	76	Update the link to the groff_mdoc BSD tutorial.
	77	</revremark>
	78	</revision>
	79	</revhistory>
	80
	81	<abstract>
	82	<para>This document describes the tools and conventions used for writing
	83	man pages under Linux.</para>
	84	</abstract>
	85	</articleinfo>
	86
	87	<sect1 id="introduction"><title>Introduction</title>
	88
	89	<para>See further information on <link linkend="copying">copying
	90	conditions</link> below.</para>
	91
	92	<para>$Id$</para>
	93
	94	<para>The author's latest version is at <ulink
	95	url="http://www.schweikhardt.net/man_page_howto.html">http://www.schweikhardt.net/man_page_howto.html</ulink>. Corrections
	96	and suggestions welcome! </para>
	97
	98	<para>This HOWTO explains what you should bear in mind when you are
	99	going to write on-line documentation -- a so-called man page -- that
	100	you want to make accessible via the <citerefentry>
	101	<refentrytitle>man</refentrytitle> <manvolnum>1</manvolnum>
	102	</citerefentry> command. Throughout this HOWTO, a manual entry is
	103	simply referred to as a man page, regardless of actual length and
	104	without sexist intention. </para>
	105
	106	</sect1>
	107	<sect1 id="q1"><title>A few thoughts on documentation</title>
	108
	109	<para>Why do we write documentation? Silly question. Because we want
	110	others to be able to use our program, library function or whatever we
	111	have written and made available. But writing documentation is not all
	112	there is to it: </para>
	113
	114	<itemizedlist>
	115
	116	<listitem><para>Documentation must be accessible. If it's hidden in
	117	some non-standard place where the documentation-related tools won't
	118	find it -- how can it serve its purpose?</para></listitem>
	119
	120	<listitem><para>Documentation must be reliable and accurate. There's
	121	nothing more annoying than having program behaviour and documentation
	122	disagree. Users will curse you, send you hate mail and throw your work
	123	into the bit bucket, with the firm intent to never install anything
	124	written by that jerk again.</para></listitem>
	125	</itemizedlist>
	126
	127	<para>The historical and well known way documentation is accessed on
	128	UNIX is via the man(1) command. This HOWTO describes what you have to
	129	do to write a man page that will be correctly processed by the
	130	documentation-related tools. The most important of these tools are
	131	<citerefentry>
	132	<refentrytitle>man</refentrytitle> <manvolnum>1</manvolnum>
	133	</citerefentry>,
	134	<citerefentry>
	135	<refentrytitle>xman</refentrytitle> <manvolnum>1</manvolnum>
	136	</citerefentry>,
	137	<citerefentry>
	138	<refentrytitle>makewhatis</refentrytitle> <manvolnum>8</manvolnum>
	139	</citerefentry>,
	140	<citerefentry>
	141	<refentrytitle>catman</refentrytitle> <manvolnum>8</manvolnum>
	142	</citerefentry>.
	143	Reliability and accuracy of the
	144	information are, of course, up to you. But even in this respect you will
	145	find <link linkend="q9">some ideas below</link> that help you avoid some common
	146	glitches. </para>
	147	</sect1>
	148
	149	<sect1 id="xml"><title>Why not to write a man page!</title>
	150
	151	<para>Man pages will remain an important distribution format, but
	152	nowadays there are good reasons to consider writing your documentation
	153	masters in a richer, more Web-friendly format. Many open-source
	154	projects (including the Linux kernel, GNOME, KDE, FreeBSD, the Free
	155	Software Foundation, and the Linux Documentation Project) have agreed
	156	to use XML-DocBook as their preferred master format.</para>
	157
	158	<para>There are many good reasons for this, including the fact that
	159	moving to XML will give you the ability to easily generate both HTML and
	160	print output from the same masters. Ultimately, moving to XML will
	161	allow rich searching and indexing of documentation in ways that troff
	162	markup cannot support.</para>
	163
	164	<para>Now that the <ulink
	165	url="http://cyberelk.net/tim/xmlto/">xmlto</ulink> toolchain is
	166	generally available (it shipped with Red Hat 7.3), producing man pages
	167	with XML-DocBook is easy. Write your page as a docbook
	168	<sgmltag>RefEntry</sgmltag>. You will then ve able to generate troff
	169	markup from a document named <filename>foobar.xml</filename> with this
	170	command:</para>
	171
	172	<programlisting>
	173	bash$ xmlto man foobar.xml
	174	</programlisting>
	175
	176	<para>Note that this gives you <application>troff</application> macro
	177	source that can be installed in the man hierarchy just as though you
	178	had written it by hand. It is good practice to ship both the XML source
	179	and the generated man page source with your distribution.</para>
	180
	181	<para>If you have an existing man page you want to convert up to an
	182	XML <sgmltag>RefEntry</sgmltag>, that's also easy to do. Please
	183	consider converting your existing documentation with <ulink
	184	url="http://www.catb.org/~esr/doclifter/">doclifter</ulink>.</para>
	185
	186	<para>Some of the good-practice guidelines we'll recommend below make
	187	it easy for conversion tools like <application>doclifter</application>
	188	to do their work, so that even if you choose to keep maintaining your
	189	documentation as
	190	<citerefentry>
	191	<refentrytitle>man</refentrytitle><manvolnum>7</manvolnum>
	192	</citerefentry>
	193	sources, distribution-makers and othere will be
	194	able to automatically translate it into something XML tools can work
	195	on.</para>
	196	</sect1>
	197
	198	<sect1><title>If you decide to write in man(7) format...</title>
	199
	200	<para>It's not 1978 any more, there are more devices in the world than
	201	the now-extinct CAT phototypesetter <application>troff</application>
	202	was designed for, and you don't really know which one your readers
	203	will be using to view your page. So it's much more important than it
	204	used to be to write your man page structurally, and avoid trying to
	205	pull visual tricks with low-level <application>troff</application>
	206	requests. They probably won't come out as you intended,
	207	anyway — man page viewers and parsers often don't know how
	208	to interpret those tricks.</para>
	209
	210	<para>Here are the safe requests:</para>
	211
	212	<table><title>The Good</title>
	213	<tgroup cols="2" colsep="1" rowsep="1">
	214	<tbody>
	215	<row>
	216	<entry>.SH, .SS</entry>
	217	<entry>Section and subsection headers. Safe as houses.</entry>
	218	</row>
	219	<row>
	220	<entry>.P, .PP</entry>
	221	<entry>New-paragraph macros. Equally safe.</entry>
	222	</row>
	223	<row>
	224	<entry>.TP</entry>
	225	<entry>All-purpose tagged list construct. Be careful what you
	226	put after these; remember that <markup>.PP</markup> resets
	227	the indent and ends the list.</entry>
	228	</row>
	229	<row>
	230	<entry>.UR, .UE, .UN</entry>
	231	<entry>Hyperlink macros. Generally harmless when misinterpreted.</entry>
	232	</row>
	233	</tbody>
	234	</tgroup>
	235	</table>
	236
	237	<para>If you want to make your man page device-dependent and hard to
	238	parse, here are some excellent way to accomplish that:</para>
	239
	240	<table><title>The Bad</title>
	241	<tgroup cols="2" colsep="1" rowsep="1">
	242	<tbody>
	243	<row>
	244	<entry>.LP</entry>
	245	<entry>Synonym for <markup>.PP</markup>. Historically, <markup>.LP</markup>
	246	had a slightly different effect on whitespace than
	247	<markup>.PP</markup>, but the distinction is lost in modern
	248	versions. Still, you never know where your page will be read,
	249	nor with how archaic a version of the man macros....</entry>
	250	</row>
	251	<row>
	252	<entry>.RS, .RE</entry>
	253	<entry>Change relative indent. Confuses the heck out of programs,
	254	especially when it's intermingled with stuff like
	255	<markup>.TP</markup>. Avoid.</entry>
	256	</row>
	257	<row>
	258	<entry>.ta, .DT</entry>
	259	<entry>Troff-level tab set, and tab reset. Dubious in a modern,
	260	multi-device environment where you can't necessarily predict
	261	tab settings.</entry>
	262	</row>
	263	<row>
	264	<entry>.HP</entry>
	265	<entry>Hanging-indent macro. Just messes with whitespace, so
	266	you don't actually know what the results will be on
	267	different devices.</entry>
	268	</row>
	269	<row>
	270	<entry>.PD</entry>
	271	<entry>Interparagraph-vertical-distance setting. Almost guaranteed
	272	to be useless in a modern environment. What's that going to mean
	273	on a web page, bunky?</entry>
	274	</row>
	275	<row>
	276	<entry>.IP</entry>
	277	<entry>Indented-paragraph macro. The two-argument version with a
	278	bullet is not completely useless, but consider using
	279	<markup>.TP</markup> instead. It's easy to get confused
	280	and write the single-argument version, which
	281	only messes with indentation.</entry>
	282	</row>
	283	</tbody>
	284	</tgroup>
	285	</table>
	286
	287	<para>If you care about being friendly to XML translation,
	288	it's also wise to stay away from using low-level
	289	<application>troff</application> constructs like <markup>.br</markup>,
	290	<markup>.in</markup> and <markup>.ti</markup> to tweak the whitespace
	291	in your page layout. The whitespace layout won't be preserved
	292	anyway, and translator programs have a hard time interpreting these
	293	structurally.</para>
	294
	295	<para>Finally, one bad construct we just can't get around using:</para>
	296
	297	<table><title>The Ugly</title>
	298	<tgroup cols="2" colsep="1" rowsep="1">
	299	<tbody>
	300	<row>
	301	<entry>.nf, .fi</entry>
	302	<entry>The stop-filling and start-filling macros. Necessary
	303	because stock man(7) doesn't have anything like Ultrix's
	304	<markup>.EX</markup>/<markup>.EE</markup> or mm's
	305	<markup>.DS</markup>/<markup>.DE</markup> for unfilled
	306	displays (like program listings and command examples).</entry>
	307	</row>
	308	</tbody>
	309	</tgroup>
	310	</table>
	311	</sect1>
	312
	313	<para>There is more about good use of font macros <link linkend="q8">
	314	later in the document</link>.</para>
	315
	316	<sect1 id="q2"><title>How are man pages accessed?</title>
	317
	318	<para>You need to know the precise mechanism for acccessing man pages in
	319	order to give your man page the right name and install it in the right
	320	place. Each man page should be categorized in a specific section, denoted by
	321	a single character. The most common sections under Linux, and their human
	322	readable names, are: </para>
	323
	324	<informaltable>
	325	<tgroup cols="2" colsep="1" rowsep="1">
	326	<thead>
	327	<row>
	328	<entry>Section</entry>
	329	<entry>The human readable name</entry></row>
	330	</thead>
	331	<tbody>
	332	<row>
	333	<entry>1</entry>
	334	<entry>User commands that may be started by everyone.</entry></row>
	335	<row>
	336	<entry>2</entry>
	337	<entry>System calls, that is, functions provided by the kernel.</entry>
	338	</row>
	339	<row>
	340	<entry>3</entry>
	341	<entry>Subroutines, that is, library functions.</entry>
	342	</row>
	343	<row>
	344	<entry>4</entry>
	345	<entry>Devices, that is, special files in the /dev directory.</entry>
	346	</row>
	347	<row>
	348	<entry>5</entry>
	349	<entry>File format descriptions, e.g. /etc/passwd.</entry>
	350	</row>
	351	<row>
	352	<entry>6</entry>
	353	<entry>Games, self-explanatory.</entry>
	354	</row>
	355	<row>
	356	<entry>7</entry>
	357	<entry>Miscellaneous, e.g. macro packages, conventions.</entry>
	358	</row>
	359	<row>
	360	<entry>8</entry>
	361	<entry>System administration tools that only root can execute.</entry>
	362	</row>
	363	<row>
	364	<entry>9</entry>
	365	<entry>Another (Linux specific) place for kernel routine documentation.</entry>
	366	</row>
	367	<row>
	368	<entry>n</entry>
	369	<entry>(Deprecated) New documentation, that may be moved to a more appropriate section.</entry>
	370	</row>
	371	<row>
	372	<entry>o</entry>
	373	<entry>(Deprecated) Old documentation, that may be kept for a grace period.</entry>
	374	</row>
	375	<row>
	376	<entry>l</entry>
	377	<entry>(Deprecated) Local documentation referring to this particular system.</entry>
	378	</row>
	379	</tbody>
	380	</tgroup>
	381	</informaltable>
	382
	383	<para>The name of the man page's source file (the input to the
	384	formatting system) is the name of the command, function or file name,
	385	followed by a dot, followed by the section character. If you write the
	386	documentation on the format of the <filename>passwd</filename> file
	387	you have to name the source file `passwd.5'. Here we also have an
	388	example of a file name that is the same as a command name. There
	389	might be even a library subroutine named passwd. Sectioning is the
	390	usual way to resolve these ambiguities: The command description is
	391	found in the file `passwd.1' and the hypothetical library subroutine
	392	in `passwd.3'.</para>
	393
	394	<note><para>Sometimes additional characters are appended and the file name
	395	looks for example like `xterm.1x' or `wish.1tk'. The intent is to
	396	indicate that this is documentation for an X Window program or a Tk
	397	application, respectively. Some manual browsers can make use of this
	398	additional information. For example xman will use `xterm(x)' and
	399	`wish(tk)' in the list of available documentation.</para></note>
	400
	401	<para>Please don't use the n, o and l sections; according to the File
	402	System Standard these sections are deprecated. Stick to the numeric
	403	sections. Beware of name clashes with existing programs, functions or
	404	file names. It is certainly a bad idea to write yet another editor
	405	and call it ed, sed (for smart ed) or red (for Rocky's ed). By making
	406	sure your program's name is unique, you avoid having someone execute
	407	your program but read someone else's man page, or vice versa. Checking
	408	out the <ulink url="http://www.execpc.com/lsm/">Linux Software
	409	Map</ulink> (LSM) database on a program name is a place to start
	410	ensuring name uniqueness. </para>
	411
	412	<para>Now we know the name to give our file. The next decision is the
	413	directory in which it will finally be installed (say, when the user
	414	runs `<command>make install</command>' for your package.) On Linux,
	415	all man pages are below directories listed in the environment variable
	416	<envar>MANPATH</envar>. The doc-related tools use
	417	<envar>MANPATH</envar> in the same way the shell uses PATH to locate
	418	executables. In fact, <envar>MANPATH</envar> has the same format as
	419	PATH. Each contains a colon-separated list of directories (with the
	420	exception that <envar>MANPATH</envar> does not allow empty fields
	421	and relative pathnames -- it uses absolute names only.) If
	422	<envar>MANPATH</envar> is not set or not exported, a default will be
	423	used that contains at least the /usr/man directory. To speed up the
	424	search and to keep directories small, the directories specified by
	425	<envar>MANPATH</envar> (the so-called base directories) contain a
	426	bunch of subdirectories named `man<s>' where <s> stands
	427	for the one-character section designator introduced in the <link
	428	linkend="q2">table above</link>. Not all of the sections may be
	429	represented by a subdirectory because there simply is no reason to
	430	keep an empty `mano' subdirectory. However, there may be directories
	431	named `cat<s>', `dvi<s>' and `ps<s>' which hold
	432	documentation that is ready to display or print. More on this
	433	later. The only other file in any base directory should be a file
	434	named `whatis'. The purpose and creation of this file will also be
	435	described under <link linkend="q12">paragraph 12</link>. The safest
	436	way to have a man page for section <s> installed in the right
	437	place is to put it in the directory /usr/man/man<s>. A good
	438	<filename>Makefile</filename>, however, will allow the user to chose
	439	a base directory, by means of a <application>make</application> variable,
	440	<envar>MANDIR</envar>, say. Most of the GNU packages can be
	441	configured with the <command>--prefix=/what/ever</command> option.
	442	The manuals will then be installed under the base directory
	443	<filename>/what/ever/man</filename>. I suggest you also provide a way
	444	to do something similar. </para>
	445
	446	<para>With the advent of the <ulink
	447	url="ftp://tsx-11.mit.edu/pub/linux/docs/linux-standards/fsstnd/">Linux
	448	File System Standard</ulink> (FS-Stnd), things became more
	449	complicated. The FS-Stnd 1.2 states that </para>
	450
	451	<itemizedlist>
	452	<listitem><para>"Provisions must be made in the structure of /usr/man
	453	to support manual pages which are written in different (or multiple)
	454	languages."</para></listitem>
	455	</itemizedlist>
	456
	457	<para>This is achieved by introducing another directory level that
	458	distinguishes between different languages. Quoting again from FS-Stnd
	459	1.2: </para>
	460
	461	<itemizedlist>
	462	<listitem><para>"This naming of language subdirectories of /usr/man is
	463	based on Appendix E of the POSIX 1003.1 standard which describes the
	464	locale identification string -- the most well accepted method to
	465	describe a cultural environment. The <locale> string is:
	466	<language>[_<territory>][.<character-set>][,<version>]"</para></listitem>
	467	</itemizedlist>
	468
	469	<para>(See the FS-Stnd for a few common <locale> strings.)
	470	According to these guidelines, we have our man pages in
	471	<filename>/usr/man/<locale>/man[1-9lno]</filename>. The
	472	formatted versions should then be in
	473	<filename>/usr/man/<locale>/cat[1-9lno]</filename> of course,
	474	otherwise we could only provide them for a single locale. <emphasis
	475	role="strong">However</emphasis>, I can not recommend switching to
	476	that structure at this time. The FS-Stnd 1.2 also allows that </para>
	477
	478	<itemizedlist>
	479	<listitem><para>"Systems which use a unique language and code set for
	480	all manual pages may omit the <locale> substring and store all
	481	manual pages in <mandir>. For example, systems which only have
	482	English manual pages coded with ASCII, may store manual pages (the
	483	man[1-9] directories) directly in /usr/man. (That is the
	484	traditional circumstance and arrangement in fact.)"</para></listitem>
	485	</itemizedlist>
	486
	487	<para>I would not switch until all tools (like xman, tkman, info and many
	488	others that read man pages) can cope with the new structure.</para>
	489	</sect1>
	490
	491	<sect1 id="q3"><title>How should a formatted man page look?</title>
	492
	493	<para>Let me present you an example. Below I will explain it in
	494	detail. If you read this as plain text it won't show the different
	495	typefaces (<emphasis role="strong">bold</emphasis>and
	496	<emphasis>italics</emphasis>). Please refer to the paragraph <link
	497	linkend="q8">What are the font conventions?</link> for further
	498	explanations. Here comes the man page for the (hypothetical)
	499	<application>foo</application> program. </para>
	500
	501	<literallayout>
	502	FOO(1) User Manuals FOO(1)
	503
	504
	505
	506	<emphasis role="strong">NAME
	507	</emphasis> foo - frobnicate the bar library
	508
	509	<emphasis role="strong">SYNOPSIS
	510	</emphasis> <emphasis role="strong">foo [-bar] [-c</emphasis> <emphasis>config-file</emphasis> <emphasis role="strong">]</emphasis> <emphasis>file</emphasis> <emphasis role="strong">...
	511
	512	DESCRIPTION
	513	</emphasis> <emphasis role="strong">foo</emphasis> frobnicates the bar library by tweaking internal symbol
	514	tables. By default it parses all baz segments and rearranges
	515	them in reverse order by time for the <emphasis role="strong">xyzzy</emphasis>(1) linker to
	516	find them. The symdef entry is then compressed using the WBG
	517	(Whiz-Bang-Gizmo) algorithm. All files are processed in the
	518	order specified.
	519
	520	<emphasis role="strong">OPTIONS
	521	</emphasis> -b Do not write `busy' to stdout while processing.
	522
	523	-c config-file
	524	Use the alternate system wide <emphasis>config-file</emphasis> instead of
	525	<emphasis>/etc/foo.conf</emphasis>. This overrides any <emphasis role="strong">FOOCONF</emphasis> environment
	526	variable.
	527
	528	-a In addition to the baz segments, also parse the blurfl
	529	headers.
	530
	531	-r Recursive mode. Operates as fast as lightning at the
	532	expense of a megabyte of virtual memory.
	533
	534	<emphasis role="strong">FILES
	535	</emphasis> <emphasis>/etc/foo.conf
	536	</emphasis> The system wide configuration file. See <emphasis role="strong">foo</emphasis>(5) for fur-
	537	ther details.
	538	<emphasis>~/.foorc
	539	</emphasis> Per user configuration file. See <emphasis role="strong">foo</emphasis>(5) for further
	540	details.
	541
	542	<emphasis role="strong">ENVIRONMENT
	543	</emphasis> FOOCONF
	544	If non-null the full pathname for an alternate system
	545	wide <emphasis>foo.conf</emphasis>. Overridden by the -c option.
	546
	547	<emphasis role="strong">DIAGNOSTICS
	548	</emphasis> The following diagnostics may be issued on stderr:
	549
	550	Bad magic number.
	551	The input file does not look like an archive file.
	552	Old style baz segments.
	553	foo can only handle new style baz segments. COBOL
	554	object libraries are not supported in this version.
	555
	556	<emphasis role="strong">BUGS
	557	</emphasis> The command name should have been chosen more carefully to
	558	reflect its purpose.
	559
	560	<emphasis role="strong">AUTHOR
	561	</emphasis> Jens Schweikhardt <ulink url="mailto:howto@schweikhardt.net"><howto at schweikhardt dot net></ulink>
	562	<emphasis role="strong">SEE ALSO
	563	</emphasis> <emphasis role="strong">bar</emphasis>(1), <emphasis role="strong">foo</emphasis>(5), <emphasis role="strong">xyzzy</emphasis>(1)
	564
	565	Linux Last change: MARCH 1995 2
	566
	567
	568
	569	</literallayout>
	570
	571	<para>So how exactly did you generate that man page? I expected that
	572	question, here's the source, Luke:</para>
	573
	574	<programlisting>
	575	.\" Process this file with
	576	.\" groff -man -Tascii foo.1
	577	.\"
	578	.TH FOO 1 "MARCH 1995" Linux "User Manuals"
	579	.SH NAME
	580	foo \- frobnicate the bar library
	581	.SH SYNOPSIS
	582	.B foo [-bar] [-c
	583	.I config-file
	584	.B ]
	585	.I file
	586	.B ...
	587	.SH DESCRIPTION
	588	.B foo
	589	frobnicates the bar library by tweaking internal
	590	symbol tables. By default it parses all baz segments
	591	and rearranges them in reverse order by time for the
	592	.BR xyzzy (1)
	593	linker to find them. The symdef entry is then compressed
	594	using the WBG (Whiz-Bang-Gizmo) algorithm.
	595	All files are processed in the order specified.
	596	.SH OPTIONS
	597	.IP -b
	598	Do not write `busy' to stdout while processing.
	599	.IP "-c config-file"
	600	Use the alternate system wide
	601	.I config-file
	602	instead of
	603	.IR /etc/foo.conf .
	604	This overrides any
	605	.B FOOCONF
	606	environment variable.
	607	.IP -a
	608	In addition to the baz segments, also parse the
	609	blurfl headers.
	610	.IP -r
	611	Recursive mode. Operates as fast as lightning
	612	at the expense of a megabyte of virtual memory.
	613	.SH FILES
	614	.I /etc/foo.conf
	615	.RS
	616	The system wide configuration file. See
	617	.BR foo (5)
	618	for further details.
	619	.RE
	620	.I ~/.foorc
	621	.RS
	622	Per user configuration file. See
	623	.BR foo (5)
	624	for further details.
	625	.SH ENVIRONMENT
	626	.IP FOOCONF
	627	If non-null the full pathname for an alternate system wide
	628	.IR foo.conf .
	629	Overridden by the
	630	.B -c
	631	option.
	632	.SH DIAGNOSTICS
	633	The following diagnostics may be issued on stderr:
	634
	635	Bad magic number.
	636	.RS
	637	The input file does not look like an archive file.
	638	.RE
	639	Old style baz segments.
	640	.RS
	641	.B foo
	642	can only handle new style baz segments. COBOL
	643	object libraries are not supported in this version.
	644	.SH BUGS
	645	The command name should have been chosen more carefully
	646	to reflect its purpose.
	647	.SH AUTHOR
	648	Jens Schweikhardt <howto at schweikhardt dot net>
	649	.SH "SEE ALSO"
	650	.BR bar (1),
	651	.BR foo (5),
	652	.BR xyzzy (1)
	653	</programlisting>
	654
	655	<para>Here's the explanation as I promised. </para>
	656
	657	<sect2 id="NAME"><title>The NAME section</title>
	658
	659	<para>...is the only required section. Man pages without a name
	660	section are as useful as refrigerators at the north pole. This section
	661	also has a standardized format consisting of a comma-separated list of
	662	program or function names, followed by a dash, followed by a short
	663	(usually one line) description of the functionality the program (or
	664	function, or file) is supposed to provide. By means of
	665	<application>makewhatis</application>(8), the name sections make it
	666	into the <application>whatis</application> database files.
	667	<application>Makewhatis</application> is the reason the name section
	668	must exist, and why it must adhere to the format I described. In the
	669	<application>groff</application> source it must look like:</para>
	670
	671	<programlisting>
	672	.SH NAME foo \- frobnicate the bar library
	673	</programlisting>
	674
	675	<para>The \- is of importance here. The backslash is needed to make the dash
	676	distinct from a hyphenation dash that may appear in either the command
	677	name or the one line description. </para>
	678
	679	<para id="no_multiple_NAME_lines">Please do not make more than one of
	680	these name lines. Some people have done this on man pages that
	681	document multiple closely related commands, but it's not a good idea.
	682	Linux <citerefentry>
	683	<refentrytitle>makewhatis</refentrytitle><manvolnum>8</manvolnum>
	684	</citerefentry> indexes on words found in DESCRIPTION and INTRODUCTION
	685	sections as well as the NAME line, so as long as you put your key word
	686	somewhere in that text you'll get the same effect. And <citerefentry>
	687	<refentrytitle>apropos</refentrytitle><manvolnum>1</manvolnum>
	688	</citerefentry> only display the first description line anyway.
	689	Finally multiple name/description lines cannot be automatically
	690	translated to <sgmltag>RefEntry</sgmltag> format, which permits
	691	multiple names but only one description. So, if you're tempted to
	692	write something like:</para>
	693
	694	<programlisting>
	695	.SH NAME
	696	mzip2, munzip2 \- a magic file compressor
	697	.br
	698	mzcat \- decompresses files to stdout
	699	.br
	700	mzip2recover \- recovers data from damaged mzip2 files
	701	</programlisting>
	702
	703	<para>please do this instead:</para>
	704
	705	<programlisting>
	706	.SH NAME
	707	mzip2, munzip2, mzcat, mzip2recover \- file compression and decompression tools
	708	.SH DESCRIPTION
	709	.I mzcat
	710	decompresses files to stdout
	711	.PP
	712	.I mzip2recover
	713	recovers data from damaged mzip2 files
	714	</programlisting>
	715
	716	</sect2>
	717	<sect2><title>The SYNOPSIS section</title>
	718
	719	<para>...is intended to give a short overview of available program options.
	720	For functions this sections lists corresponding include files and the prototype
	721	so the programmer knows the type and number of arguments as well as the
	722	return type.</para>
	723
	724	<para>When writing command synopses, remember the standard
	725	notation:</para>
	726
	727	<informaltable>
	728	<tgroup cols="2" colsep="1" rowsep="1">
	729	<thead>
	730	<row>
	731	<entry>Notation:</entry>
	732	<entry>Meaning</entry></row>
	733	</thead>
	734	<tbody>
	735	<row>
	736	<entry> arg...</entry>
	737	<entry>Required argument, can be repeated any number of times. Please don't
	738	use "arg+" for this!</entry>
	739	</row>
	740	<row>
	741	<entry>[ arg ]</entry>
	742	<entry>Optional argument — may be omitted</entry>
	743	</row>
	744	<row>
	745	<entry>[ arg... ]</entry>
	746	<entry>Optional argument — may be omitted, but can be repeated any
	747	number of times. Please don't write "[arg]..." or "arg*" for
	748	this!</entry>
	749	</row>
	750	<row>
	751	<entry>[ arg1 \| arg2 \| ... argn ]</entry>
	752	<entry>Any one of these options, optionally</entry>
	753	</row>
	754	<row>
	755	<entry>{ arg1 \| arg2 \| ... argn }</entry>
	756	<entry>Any one of these options, but at least one in the group
	757	is required. Please don't write this with parens,
	758	rather than set braces!</entry>
	759	</row>
	760	</tbody>
	761	</tgroup>
	762	</informaltable>
	763
	764	<para>If you have a program with a bunch of single-character options,
	765	please avoid writing the synopsis in the old terse Bell Labs style,
	766	e.g:</para>
	767
	768	<programlisting>
	769	doclifter [ -qhsvx ] file...
	770	</programlisting>
	771
	772	<para>This is too easily confused with the newer X-style keyword
	773	option like -geometry (which is why it's GNU practice to lead long
	774	options with a double dash). For the benefit of programs that
	775	actually parse command-synopsis notation, it is better to be more
	776	explicit:</para>
	777
	778	<programlisting>
	779	doclifter [ -q ] [ -h ] [ -s ] [ -v ] [ -x ] file...
	780	</programlisting>
	781
	782	<para>Please don't include running test in this section. E.g. rather
	783	than this:</para>
	784
	785	<programlisting>
	786	.SH SYNOPSIS
	787	.TP
	788	.B ddd
	789	.RB "[\\|" \-\-help "\\|]"
	790	.RB "[\\|" \-\-gdb "\\|]"
	791	.RB "[\\|" \-\-dbx "\\|]"
	792	.TP
	793	but usually just
	794	.TP
	795	.B ddd
	796	.I program
	797	</programlisting>
	798
	799	<para>write this:</para>
	800
	801	<programlisting>
	802	.SH SYNOPSIS
	803	.TP
	804	.B ddd
	805	.RB "[\\|" \-\-help "\\|]"
	806	.RB "[\\|" \-\-gdb "\\|]"
	807	.RB "[\\|" \-\-dbx "\\|]"
	808	.SH USAGE
	809	This program is usually just invoked as
	810	.B ddd
	811	.I program
	812	</programlisting>
	813
	814	<para>DocBook has a markup sub-language for describing command
	815	synopses that unfortunately doesn't have a way to describe text
	816	annotations. Tools like <application>doclifter</application> that
	817	try to translate man pages to DocBook cannot automatically know
	818	what to do with such annotations.</para>
	819
	820	<para>If you're writing a description of C function entry points
	821	to a library, there are two style guidelines to keep in mind:</para>
	822
	823	<para>First, put any struct declarations, typedefs, and externs of
	824	variables <emphasis>before</emphasis> the function entry point
	825	declarations, rather than after (just as you would in a program).</para>
	826
	827	<para>Second, use ANSI C prototype syntax rather than K&R. That is,
	828	instead of this:</para>
	829
	830	<programlisting>
	831	int foo(ptr)
	832	char *ptr;
	833	</programlisting>
	834
	835	<para>write this:</para>
	836
	837	<programlisting>
	838	int foo(char *ptr);
	839	</programlisting>
	840
	841	<para>just as the function would appear in a modern header file.</para>
	842
	843	</sect2>
	844	<sect2><title>The DESCRIPTION section</title>
	845
	846	<para>...eloquently explains why your sequence of 0s and 1s is worth
	847	anything at all. Here's where you write down all your knowledge. This
	848	is the Hall Of Fame. Win other programmers' and users' admiration by
	849	making this section the source of reliable and detailed
	850	information. Explain what the arguments are for, the file format, what
	851	algorithms do the dirty jobs.</para>
	852
	853	</sect2>
	854	<sect2><title>The OPTIONS section</title>
	855
	856	<para>...gives a description of how each command-line option affects
	857	program behaviour. You knew that, didn't you? If your program is
	858	simple enough, this information may be folded into the DESCRIPTION
	859	section.</para>
	860
	861	</sect2>
	862	<sect2><title>The FILES section</title>
	863
	864	<para>...lists files the program or function uses. For example, it
	865	lists configuration files, startup files, and files the program
	866	directly operates on. It is a good idea to give the full pathname of
	867	these files and to make the install process modify the directory part
	868	to match user preferences: the <application>groff</application> manuals have a
	869	default prefix of /usr/local, so they reference
	870	<filename>/usr/local/lib/groff/*</filename> by default. However, if
	871	you install using <command>make prefix=/opt/gnu</command> the
	872	references in the man page change to
	873	<filename>/opt/gnu/lib/groff/*</filename>.</para>
	874
	875	</sect2>
	876	<sect2><title>The ENVIRONMENT section</title>
	877
	878	<para>...lists all environment variables that affect your program or function
	879	and tells how, of course. Most commonly the variables will hold pathnames,
	880	filenames or default options. </para>
	881
	882	</sect2>
	883	<sect2><title>The DIAGNOSTICS section</title>
	884
	885	<para>...should give an overview of the most common error messages
	886	from your program and how to cope with them. There's no need to
	887	explain system error error messages (from <citerefentry>
	888	<refentrytitle>perror</refentrytitle><manvolnum>3</manvolnum>
	889	</citerefentry>) or fatal signals (from <citerefentry>
	890	<refentrytitle>psignal</refentrytitle><manvolnum>3</manvolnum>
	891	</citerefentry>) as they can appear during execution of any
	892	program. </para>
	893
	894	</sect2>
	895	<sect2><title>The BUGS section</title>
	896
	897	<para>...should ideally be non-existent. If you're brave, you can describe
	898	here the limitations, known inconveniences and features that others may regard
	899	as misfeatures. If you're not so brave, rename it the TO DO section ;-)
	900	</para>
	901
	902	</sect2>
	903	<sect2><title>The AUTHOR section</title>
	904
	905	<para>...is nice to have in case there are gross errors in the documentation
	906	or program behaviour (Bzzt!) and you want to mail a bug report. </para>
	907
	908	</sect2>
	909	<sect2><title>The SEE ALSO section</title>
	910
	911	<para>...is a list of related man pages in alphabetical order. Conventionally,
	912	it is the last section. You are free to invent other sections if they really
	913	don't fit in one of those described so far.</para>
	914
	915	</sect2>
	916	</sect1>
	917	<sect1 id="q4"><title>How do I document several programs/functions
	918	in a single man page?</title>
	919
	920	<para>Many programs (<application>grep</application>,
	921	<application>egrep</application>) and functions (<function>printf</function>,
	922	<function>fprintf</function>, ...) are documented in a single man
	923	page. However, these man pages would be quite useless if they were
	924	only accessible under one name. We cannot expect a user to remember
	925	that the <application>egrep</application> man page is actually the
	926	<application>grep</application> man page. It is therefore necessary to have
	927	the man page available under different names. You have several
	928	possibilities to achieve this: </para>
	929
	930	<orderedlist>
	931	<listitem><para>have identical copies for each name.</para></listitem>
	932	<listitem><para>connect all man pages using hard links.</para></listitem>
	933	<listitem><para>symbolic links pointing to the actual man
	934	page.</para></listitem>
	935	<listitem><para>use <application>groff</application>'s `source' mechanism provided
	936	by the <markup>.so</markup> macro.</para></listitem>
	937	</orderedlist>
	938
	939	<para>The first way is obviously a waste of disk space. The second is
	940	not recommended because intelligent versions of the
	941	<application>catman</application> program can save a lot of work by looking at
	942	the the file type or contents. Hard links will prevent
	943	<application>catman</application> from being clever. (Note that
	944	<application>catman</application>'s purpose is to format all man pages so they
	945	can be displayed quickly.) The third alternative has a slight
	946	drawback: if flexibility is a concern, you have to be aware that there
	947	are file systems that do not support symbolic links. The upshot of
	948	this is that the Best Thing (TM) is using <application>groff</application>'s
	949	source mechanism. Here's how to do it: If you want to have your man
	950	page available under the names `foo' and `bar' in section 1, then put
	951	the man page in foo.1 and have bar.1 look like this: </para>
	952
	953	<programlisting>
	954	.so man1/foo.1
	955	</programlisting>
	956
	957	<para>It is important to specify the <filename>man1/</filename>
	958	directory part as well as the file name `foo.1' because when
	959	<application>groff</application> is run by the browser it will have
	960	the manual base directory as its current working directory (cwd) and
	961	<application>groff</application> interprets <markup>.so</markup> arguments
	962	relative to the cwd. </para>
	963
	964	</sect1>
	965	<sect1 id="q5"><title>Which macro package should I use?</title>
	966
	967	<para>There are a number of macro packages especially designed for use
	968	in writing man pages. Usually they are in the groff macro directory
	969	<filename>/usr/lib/groff/tmac</filename>. The file names are
	970	<filename>tmac.<something></filename>, where <something>
	971	is the argument to groff's -m option. Groff will use
	972	<filename>tmac.<something></filename> when it is given the
	973	`<option>-m</option> <something>' option. Often the blank
	974	between `<option>-m</option>' and `<something>' is omitted so we
	975	may say `<command>groff -man</command>' when we are formatting man
	976	pages using the <filename>tmac.an </filename> macro package. That's
	977	the reason for the strange name `tmac.an'. Besides tmac.an there is
	978	another popular macro package, <filename>tmac.doc</filename>, which
	979	originated at the University of California at Berkeley. Many BSD man
	980	pages use it and it seems that UCB has made it its standard for
	981	documentation. The <filename>tmac.doc</filename> macros are much more
	982	flexible but alas, there are manual browsers that will not use them
	983	but always call <command>groff -man</command>. For example, all
	984	<application>xman</application> programs I have seen will screw up on man
	985	pages requiring <filename>tmac.doc</filename>. So do yourself a favor:
	986	use <filename>tmac.an</filename> -- use of any other macro package is
	987	considered harmful. <filename>tmac.andoc </filename>is a pseudo macro
	988	package that takes a look at the source and then loads either
	989	<filename>tmac.an</filename> or
	990	<filename>tmac.doc</filename>. Actually, any man page browser should
	991	use it but to this point, not all of them do, so it is best we cling
	992	to ye olde <filename>tmac.an</filename>. Anything I tell you from now
	993	on and concerning macros only holds true for
	994	<filename>tmac.an</filename>. If you want to use the
	995	<filename>tmac.doc</filename> macros anyway, here is a pointer to
	996	detailed information on how to use them: <ulink
	997	url="http://www.freebsd.org/cgi/man.cgi">http://www.FreeBSD.org/cgi/man.cgi</ulink>.
	998	There is a searchable index form on the page. Enter
	999	<filename>groff_mdoc</filename> and it will find you
	1000	<citerefentry>
	1001	<refentrytitle>groff_mdoc</refentrytitle> <manvolnum>7</manvolnum>
	1002	</citerefentry>,
	1003	a tutorial sampler for writing BSD man
	1004	pages. Some distros (I'm told) also come with
	1005	<citerefentry>
	1006	<refentrytitle>mdoc</refentrytitle><manvolnum>7</manvolnum>
	1007	</citerefentry>,
	1008	<citerefentry>
	1009	<refentrytitle>mdoc_samples</refentrytitle><manvolnum>7</manvolnum>
	1010	</citerefentry>,
	1011	and
	1012	<citerefentry>
	1013	<refentrytitle>groff_man</refentrytitle> <manvolnum>7</manvolnum>
	1014	</citerefentry>.
	1015	</para>
	1016
	1017	<para>The definitive dope for <application>troff</application>, with all
	1018	macros explained, is the <citetitle>Troff User's Manual</citetitle>,
	1019	available as <ulink
	1020	url="http://cm.bell-labs.com/sys/doc/troff.html">html</ulink>, <ulink
	1021	url="http://cm.bell-labs.com/sys/doc/troff.ps">PostScript (ps,
	1022	760K)</ulink> or <ulink
	1023	url="http://cm.bell-labs.com/sys/doc/troff.pdf">Portable Document
	1024	Format (pdf, 240K)</ulink>. by Jospeh F. Ossanna and Brian
	1025	W. Kernighan, revised November 1992. AT&T Bell Labs have made it
	1026	publicly available. Don't forget to check out the late great <ulink
	1027	url="http://www.kohala.com/start/">W. Richard Steven's homepage</ulink>
	1028	(famous for <citetitle>Unix Network Programming</citetitle> as well as the
	1029	<emphasis>TCP/IP Illustrated</emphasis> trilogy), who also has a list of <ulink
	1030	url="http://www.kohala.com/start/troff/troff.html">Troff
	1031	Resources</ulink> including <application>tbl</application>,
	1032	<application>eqn</application>, <application>pic</application> and other filters.</para>
	1033
	1034	</sect1>
	1035	<sect1 id="q6"><title>What preprocessors may I use? </title>
	1036
	1037	<para>Groff comes with at least three preprocessors,
	1038	<application>tbl</application>, <application>eqn</application>, and
	1039	<application>pic</application> (on some systems they are named
	1040	<application>gtbl</application>, <application>geqn</application> and
	1041	<application>gpic</application>.) Their purpose is to translate preprocessor
	1042	macros and their data to regular troff input. <application>Tbl</application>
	1043	is a table preprocessor, <application>eqn</application> is an equations/maths
	1044	preprocessor and <application>pic</application> is a picture
	1045	preprocessor. Please refer to the man pages for more information on
	1046	what functionality they provide. To put it in a nutshell: don't write
	1047	man pages requiring <emphasis>any</emphasis> preprocessor. Eqn will
	1048	generally produce terrible output for typewriter-like devices,
	1049	unfortunately the type of device 99% of all man pages are viewed on
	1050	(well, at least I do). For example, XAllocColor.3x uses a few formulas
	1051	with exponentiation. Due to the nature of typewriter-like devices, the
	1052	exponent will be on the same line as the base. N to the power of two
	1053	appears as `N2'. <application>Tbl</application> should be avoided because all
	1054	xman programs I have seen fail on them. Xman 3.1.6 uses the following
	1055	command to format man pages, e.g.
	1056	<citerefentry>
	1057	<refentrytitle>signal</refentrytitle> <manvolnum>7</manvolnum>
	1058	</citerefentry>:</para>
	1059
	1060	<programlisting>
	1061	gtbl /usr/man/man7/signal.7 \| geqn \| gtbl \| \
	1062	groff -Tascii -man /tmp/xmana01760 2> /dev/null
	1063	</programlisting>
	1064
	1065	<para>which screws up for sources using <application>gtbl</application>,
	1066	because <application>gtbl</application> output is fed again into
	1067	<application>gtbl</application>. The effect is a man page without your
	1068	table. I don't know if it's a bug or a feature that
	1069	<application>gtbl</application> chokes on its own output or if xman could be a
	1070	little smarter and not use <application>gtbl</application> twice. Furthermore,
	1071	some systems use <application>grog</application> to determine what options to
	1072	pass to groff. Unfortunately grog sometimes guesses wrong and
	1073	recommends <command>groff -t</command> when in fact
	1074	<application>tbl</application> must not be used.</para>
	1075
	1076	<para>We are basically left with two workarounds for tables:</para>
	1077
	1078	<orderedlist>
	1079	<listitem><para>Format the table yourself manually and put it between .nf and
	1080	.fi lines so that it will be left unformatted. You won't have bold and
	1081	italics this way but this beats having your table swallowed any
	1082	day.</para></listitem>
	1083
	1084	<listitem><para>Use any <application>tbl</application> macros you like but
	1085	distribute the <application>tbl</application> output instead of the
	1086	input. There is however this quirk with <application>grog</application> who
	1087	thinks that any file containing a line starting with
	1088	<markup>.TS</markup> requires <application>tbl</application>.
	1089	<application>Tbl</application> output for some reason unbeknownst to me still
	1090	contains <markup>.TS</markup> and <markup>.TE</markup>. It seems you
	1091	can simply remove them and have the result still look okay. YMMV, so
	1092	please test this with your particular man
	1093	page.</para></listitem></orderedlist>
	1094
	1095	<para>I have yet to see a man page requiring
	1096	<application>pic</application> preprocessing. But I would not like
	1097	it. As you can see above, <application>xman</application> will not use
	1098	it and <application>groff</application> will certainly do the funky
	1099	wadakiki on the input.</para>
	1100
	1101	</sect1>
	1102	<sect1 id="q7"><title>Should I distribute source and/or
	1103	already formatted documentation?</title>
	1104
	1105	<para>Let me give the pros and cons of a few selected
	1106	possibilities:</para>
	1107
	1108	<variablelist>
	1109	<varlistentry>
	1110	<term>Source only:</term>
	1111	<listitem>
	1112	<itemizedlist>
	1113	<listitem>
	1114	<para><emphasis>Pro:</emphasis> smaller distribution package.</para>
	1115	</listitem>
	1116	<listitem>
	1117	<para><emphasis>Con:</emphasis> inaccessible on systems without
	1118	<application>groff</application>.</para>
	1119	</listitem>
	1120	</itemizedlist>
	1121	</listitem>
	1122	</varlistentry>
	1123
	1124	<varlistentry>
	1125	<term>Uncompressed formatted only:</term>
	1126	<listitem>
	1127	<itemizedlist>
	1128	<listitem>
	1129	<para><emphasis>Pro:</emphasis> accessible even on systems without
	1130	<application>groff</application>. </para>
	1131	</listitem>
	1132	<listitem>
	1133	<para><emphasis>Con:</emphasis> the user can't generate a dvi or
	1134	postscript file. </para>
	1135	</listitem>
	1136	<listitem>
	1137	<para><emphasis>Con:</emphasis> waste of disk space on systems that
	1138	also handle compressed pages.</para>
	1139	</listitem>
	1140	</itemizedlist>
	1141	</listitem>
	1142	</varlistentry>
	1143
	1144	<varlistentry>
	1145	<term>Compressed formatted only: </term>
	1146	<listitem>
	1147	<itemizedlist>
	1148	<listitem>
	1149	<para><emphasis>Pro:</emphasis> accessible even on systems
	1150	without <application>groff</application>. </para>
	1151	</listitem>
	1152	<listitem>
	1153	<para><emphasis>Con:</emphasis> the user can't generate a dvi
	1154	or postscript file. </para>
	1155	</listitem>
	1156	<listitem>
	1157	<para><emphasis>Con:</emphasis> which compression format would you use?
	1158	.Z? .z? .gz? All of them?</para>
	1159	</listitem>
	1160	</itemizedlist>
	1161	</listitem>
	1162	</varlistentry>
	1163
	1164	<varlistentry>
	1165	<term>Source and uncompressed formatted:</term>
	1166	<listitem>
	1167	<itemizedlist>
	1168	<listitem>
	1169	<para><emphasis>Pro:</emphasis> accessible even on systems without
	1170	<application>groff</application>.</para>
	1171	</listitem>
	1172	<listitem>
	1173	<para><emphasis>Con:</emphasis> larger distribution package</para>
	1174	</listitem>
	1175	<listitem>
	1176	<para><emphasis>Con:</emphasis> some systems may expect compressed
	1177	formatted man pages.</para>
	1178	</listitem>
	1179	<listitem>
	1180	<para><emphasis>Con:</emphasis> redundant information on systems equipped
	1181	with <application>groff</application>.</para>
	1182	</listitem>
	1183	</itemizedlist>
	1184	</listitem>
	1185	</varlistentry>
	1186	</variablelist>
	1187
	1188	<para>IMHO it is best to distribute source only. The argument that
	1189	it's inaccessible on systems without <application>groff</application> does not
	1190	matter. The 500+ man pages of the Linux Documentation Project are
	1191	source only. The man pages of XFree86 are source only. The man pages
	1192	from the FSF are source only. In fact, I have rarely seen software
	1193	distributed with formatted man pages. If any sysadmin is really
	1194	concerned about having man pages accessible then he also has
	1195	<application>groff</application> installed.</para>
	1196
	1197	</sect1>
	1198	<sect1 id="q8"><title>What are the font conventions?</title>
	1199
	1200	<para>First of all: don't use direct font operators like <markup>\fB</markup>,
	1201	<markup>\fP</markup> etc. Use macros which take arguments. This way you
	1202	avoid a common glitch: forgetting the font change at the end of the word
	1203	and having the bold or italic extend up to the next font change. Believe
	1204	me, it happens more often than you think. The <filename>tmac.an</filename>
	1205	macros provide the following type faces: </para>
	1206
	1207	<informaltable>
	1208	<tgroup cols="2" colsep="1" rowsep="1">
	1209	<tbody>
	1210	<row><entry>B</entry> <entry>Bold</entry></row>
	1211	<row><entry>BI</entry> <entry>Bold alternating with italics</entry></row>
	1212	<row><entry>BR</entry> <entry>Bold alternating with Roman </entry></row>
	1213	<row><entry>I</entry> <entry>Italics</entry></row>
	1214	<row><entry>IB</entry> <entry>Italics alternating with bold</entry></row>
	1215	<row><entry>IR</entry> <entry>Italics alternating with Roman</entry></row>
	1216	<row><entry>RB</entry> <entry>Roman alternating with bold</entry></row>
	1217	<row><entry>RI</entry> <entry>Roman alternating with italics</entry></row>
	1218	<row><entry>SM</entry> <entry>Small (scaled 9/10 of the regular size)</entry></row>
	1219	<row><entry>SB</entry> <entry>Small bold (<emphasis>not</emphasis>
	1220	small alternating with bold)</entry></row>
	1221	</tbody>
	1222	</tgroup>
	1223	</informaltable>
	1224
	1225	<para>X alternating with Y means that the odd arguments are typeset in X while
	1226	the even arguments are typeset in Y. For example </para>
	1227
	1228	<programlisting>
	1229	.BI "Arg 1 is Bold, " "Arg 2 is Italics, " "and= Bold, " "and Italics."
	1230	</programlisting>
	1231
	1232	<para>The double quotes are needed to include white space into an argument;
	1233	without them, no white space appears between the alternating typefaces.
	1234	In fact, you'll only need the macros for alternating typefaces in
	1235	cases where you <emphasis>want</emphasis>
	1236	to avoid white space between typeface changes.
	1237	So much for what's available. Here's how you should make use of the different
	1238	typefaces: (portions shamelessly stolen from man(7)) </para>
	1239
	1240	<para>Although there are many arbitrary conventions for man pages in the UNIX
	1241	world, the existence of several hundred Linux-specific man pages defines
	1242	our standards: For functions, the arguments are always specified using
	1243	italics, even in the SYNOPSIS section, where the rest of the function is
	1244	specified in bold: </para>
	1245
	1246	<programlisting>
	1247	<para>.BI "myfunction(int " argc ", char **" argv ); </para>
	1248	</programlisting>
	1249
	1250	<para>Filenames are always in italics, except in the SYNOPSIS section, where
	1251	included files are in bold. So you should use </para>
	1252
	1253	<programlisting>
	1254	.I /usr/include/stdio.h
	1255	</programlisting>
	1256
	1257	<para>and</para>
	1258
	1259	<programlisting>
	1260	.B #include <stdio.h>
	1261	</programlisting>
	1262
	1263	<para>Special macros, which are usually in upper case, are in bold: </para>
	1264
	1265	<programlisting>
	1266	.B MAXINT
	1267	</programlisting>
	1268
	1269	<para>When enumerating a list of error codes, the codes are in
	1270	bold. This list usually uses the <markup>.TP</markup> (paragraph with
	1271	hanging tag) macro as follows: </para>
	1272
	1273	<programlisting>
	1274	.TP
	1275	.B EBADF
	1276	.I fd is not a valid file descriptor.
	1277	.TP
	1278	.B EINVAL
	1279	.I fd is unsuitable for reading
	1280	</programlisting>
	1281
	1282	<para>Any reference to another man page (or to the subject of the current
	1283	man page) is in bold. If the manual section number is given, it is given
	1284	in Roman, without any spaces: </para>
	1285
	1286	<programlisting>
	1287	.BR man (7)
	1288	</programlisting>
	1289
	1290	<para>Acronyms look best when typeset in small-caps type face. So I
	1291	recommend </para>
	1292
	1293	<itemizedlist>
	1294	<para>.SM UNIX </para>
	1295	<para>.SM ASCII</para>
	1296	<para>.SM TAB </para>
	1297	<para>.SM NFS</para>
	1298	<para>.SM LALR(1)</para>
	1299	</itemizedlist>
	1300	</sect1>
	1301
	1302	<sect1 id="q9"><title>Polishing your man page</title>
	1303
	1304	<para>Following are some guidelines that increase reliability, readability
	1305	and 'formatability' of your documentation.</para>
	1306
	1307	<itemizedlist>
	1308
	1309	<listitem><para>Test examples to make sure they work (use cut and
	1310	paste to give your shell the exact wording from the man page). Copy
	1311	the output of your command into your man page, don't just type what
	1312	you <emphasis>think</emphasis> your program will print.</para></listitem>
	1313
	1314	<listitem><para>Proof read, ispell, and have someone else read it,
	1315	especially if you are not a native English speaker. The HOWTO you are
	1316	reading has passed the latter test (special thanks to Michael Miller
	1317	for a particularly heroic contribution! All the remaining rough edges
	1318	are entirely my fault). Additional volunteers are always
	1319	welcome.</para></listitem>
	1320
	1321	<listitem><para>Test your man page: Does <application>groff</application>
	1322	complain when you format your man page? It's nice to have the
	1323	<application>groff</application> command line in a comment. Does the
	1324	<citerefentry>
	1325	<refentrytitle>man</refentrytitle><manvolnum>1</manvolnum>
	1326	</citerefentry>
	1327	command complain when you call <command>man yourprog</command>?
	1328	Does it produce the expected result? Will
	1329	<citerefentry>
	1330	<refentrytitle>xman</refentrytitle><manvolnum>1m</manvolnum>
	1331	</citerefentry> and
	1332	<citerefentry>
	1333	<refentrytitle>tkman</refentrytitle><manvolnum>1tk</manvolnum>
	1334	</citerefentry> cope
	1335	with your manual? XFree86 3.1 has xman 3.1.6 - X11R6, it will try to
	1336	uncompress using:</para>
	1337
	1338	<programlisting>
	1339	gzip -c -d < %s > %s
	1340	zcat < %s > %s
	1341	</programlisting>
	1342	</listitem>
	1343
	1344	<listitem><para>Will
	1345	<citerefentry>
	1346	<refentrytitle>makewhatis</refentrytitle><manvolnum>8</manvolnum>
	1347	</citerefentry>
	1348	be able to extract the one-line description from the NAME
	1349	section?</para></listitem>
	1350	</itemizedlist>
	1351
	1352	</sect1>
	1353	<sect1 id="q10"><title>How do I get a plain text man page
	1354	without all that ^H^_ stuff? </title>
	1355
	1356	<para>Have a look at
	1357	<citerefentry>
	1358	<refentrytitle>col</refentrytitle><manvolnum>1</manvolnum>
	1359	</citerefentry>,
	1360	because <application>col</application> can filter out backspace
	1361	sequences. Just in case you can't wait that long: </para>
	1362
	1363	<programlisting>
	1364	funnyprompt$ groff -t -e -mandoc -Tascii manpage.1 \| \
	1365	col -bx > manpage.txt
	1366	</programlisting>
	1367
	1368	<para>The <option>-t</option> and <option>-e</option> switches tell
	1369	<application>groff</application> to preprocess using <application>tbl</application>
	1370	and <application>eqn</application>. This is overkill for man pages that don't
	1371	require preprocessing but it does no harm apart from a few CPU cycles
	1372	wasted. On the other hand, not using <option>-t</option> when it is
	1373	actually required does harm: the table is terribly formatted. You can
	1374	even find out (well, "guess" is a better word) what command is needed
	1375	to format a certain <application>groff</application> document (not just man
	1376	pages) by issuing </para>
	1377
	1378	<programlisting>
	1379	funnyprompt$ grog /usr/man/man7/signal.7 groff -t -man /usr/man/man7/signal.7
	1380	</programlisting>
	1381
	1382	<para>"Grog" stands for "GROff Guess", and it does what
	1383	it says — guess. If it were perfect we wouldn't need options any more.
	1384	I've seen it guess incorrectly on macro packages and on preprocessors.
	1385	Here is a little perl script I wrote that can delete the page headers
	1386	and footers, thereby saving you a few pages (and mother nature a tree)
	1387	when printing long and elaborate man pages. Save it in a file named
	1388	<filename>strip-headers</filename> & chmod 755. </para>
	1389
	1390	<programlisting>
	1391	#!/usr/bin/perl -wn
	1392	# make it slurp the whole file at once:
	1393	undef $/;
	1394	# delete first header:
	1395	s/^\n.\n+//;
	1396	# delete last footer:
	1397	s/\n+.*\n+$/\n/g;
	1398	# delete page breaks:
	1399	s/\n\n+[^ \t].\n\n+(\S+).\1\n\n+/\n/g;
	1400	# collapse two or more blank lines into a single one:
	1401	s/\n{3,}/\n\n/g;
	1402	# see what's left...
	1403	print;
	1404	</programlisting>
	1405
	1406	<para>You have to use it as the first filter after the
	1407	<application>man</application> command as it relies on the number of
	1408	newlines being output by <application>groff</application>. For
	1409	example: </para>
	1410
	1411	<programlisting>
	1412	funnyprompt$ man bash \| strip-headers \| col -bx > bash.txt
	1413	</programlisting>
	1414	</sect1>
	1415
	1416	<sect1 id="q11"><title>How do I get a high quality PostScript
	1417	man page?</title>
	1418
	1419	<programlisting>
	1420	funnyprompt$ groff -t -e -mandoc -Tps manpage.1 > manpage.ps
	1421	</programlisting>
	1422
	1423	<para>Print or view that using your favorite PostScript
	1424	printer/viewer. See <link linkend="q10">question 10)</link> for an
	1425	explanation of the options.</para>
	1426	</sect1>
	1427
	1428	<sect1 id="q12"><title>How do I get `apropos' and `whatis' to work? </title>
	1429
	1430	<para>Suppose you wonder what compilers are installed on your system
	1431	and how these can be invoked. To answer this (frequently asked)
	1432	question you say </para>
	1433
	1434	<programlisting>
	1435	funnyprompt$ apropos compiler
	1436	f77 (1) - Fortran 77 compiler
	1437	gcc (1) - GNU C and C++ compiler
	1438	pc (1) - Pascal compiler
	1439	</programlisting>
	1440
	1441	<para><application>Apropos</application> and
	1442	<application>whatis</application> are used to quickly report which man
	1443	page has information on a certain topic. Both programs search a number
	1444	of files named `whatis' that may be found in each of the manual base
	1445	directories. As previously stated, the whatis data base files contain
	1446	a one line entry for any man page in the respective directory tree. In
	1447	fact, that line is exactly the NAME section (to be precise: joined on
	1448	one line and with hyphenation removed; note that the section is
	1449	mentioned within parentheses). The whatis database files are created
	1450	with the
	1451	<citerefentry>
	1452	<refentrytitle>makewhatis</refentrytitle> <manvolnum>8</manvolnum>
	1453	</citerefentry>
	1454	program. There are several versions around, so please refer to the man
	1455	page to determine what options are available. In order for
	1456	<application>makewhatis</application> to be able to extract the NAME
	1457	sections correctly it is important that you, the manual writer, adhere
	1458	to the NAME section format described under <link linkend="q3">question
	1459	3)</link>. The differences between
	1460	<application>apropos</application> and
	1461	<application>whatis</application> are simply where in the line they
	1462	look, and what they are looking
	1463	for. <application>Apropos</application> (which is equivalent to
	1464	<command>man -k</command>) searches the argument string anywhere on
	1465	the line, whereas <application>whatis</application> (equivalent to
	1466	<command>man -f</command>) tries to match a complete command name only
	1467	on the part before the dash. Consequently, `<command>whatis
	1468	cc</command>' will report if there is a <application>cc</application>
	1469	manual and remain quiet for <application>gcc</application>.</para>
	1470	</sect1>
	1471
	1472	<sect1><title>Corrections and suggestions welcome!</title>
	1473
	1474	<sect2 id="copying"><title>A) Copying conditions</title>
	1475
	1476	<para>Copyright 1995-2001 by Jens Schweikhardt. All rights reserved.</para>
	1477	<literallayout>
	1478	"Two clause" BSD License:
	1479
	1480	Redistribution and use in source and binary forms, with or without
	1481	modification, are permitted provided that the following conditions
	1482	are met:
	1483	1. Redistributions of source code must retain the above copyright
	1484	notice, this list of conditions and the following disclaimer.
	1485	2. Redistributions in binary form must reproduce the above copyright
	1486	notice, this list of conditions and the following disclaimer in the
	1487	documentation and/or other materials provided with the distribution.
	1488
	1489	THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	1490	IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	1491	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	1492	DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
	1493	INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	1494	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	1495	SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	1496	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
	1497	STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
	1498	IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	1499	POSSIBILITY OF SUCH DAMAGE.
	1500	</literallayout>
	1501
	1502	</sect2>
	1503	<sect2 id="acknowledgements"><title>B) Acknowledgements</title>
	1504
	1505	<itemizedlist>
	1506	<listitem><para>Eric S. Raymond for converting the HTML to
	1507	XML-DocBook.</para></listitem>
	1508	<listitem><para>Michael Miller for proofreading the whole HOWTO (in February
	1509	2001); Gordon Torrie for many helpful grammar remarks (in August 2001). Any
	1510	remaining grammar or style bogons are entirely my fault.</para></listitem>
	1511	<listitem><para><ulink url="http://www.suse.de/">S.u.S.E. (.de)</ulink>
	1512	(or <ulink ulink="http://www.suse.com/">.com</ulink>)
	1513	who are the only distributor to keep sending me a free copy of their
	1514	latest product, acknowledging my work as a howto author.</para></listitem>
	1515	</itemizedlist>
	1516
	1517	<para>If your name is missing here, drop me a note.</para>
	1518	</sect2>
	1519	</sect1>
	1520
	1521	</article>

-1

manlifter less more

50	50	def analyze_manpage(manpage):
51	51	"Provide log annotations based on content."
52	52	exclusions = (
53		("<html>", "This page is HTML"),
	53	("<html>", "HTML"),
54	54	("auto-generated by docbook2man-spec", "DocBook"),
55	55	("automatically generated by docbook2man", "DocBook"),
56	56	("Generated by db2man.xsl", "XML DocBook"),

-194

~~manlifter.1~~ less more

0		'\" t
1		.\" Title: manlifter
2		.\" Author: [see the "Author" section]
3		.\" Generator: DocBook XSL Stylesheets v1.79.1 <http://docbook.sf.net/>
4		.\" Date: 06/12/2018
5		.\" Manual: Documentation Tools
6		.\" Source: manlifter
7		.\" Language: English
8		.\"
9		.TH "MANLIFTER" "1" "06/12/2018" "manlifter" "Documentation Tools"
10		.\" -----------------------------------------------------------------
11		.\" * Define some portability stuff
12		.\" -----------------------------------------------------------------
13		.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14		.\" http://bugs.debian.org/507673
15		.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
16		.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17		.ie \n(.g .ds Aq \(aq
18		.el .ds Aq '
19		.\" -----------------------------------------------------------------
20		.\" * set default formatting
21		.\" -----------------------------------------------------------------
22		.\" disable hyphenation
23		.nh
24		.\" disable justification (adjust text to left margin only)
25		.ad l
26		.\" -----------------------------------------------------------------
27		.\" * MAIN CONTENT STARTS HERE *
28		.\" -----------------------------------------------------------------
29		.SH "NAME"
30		manlifter \- mass\-conversion script and test harness for doclifter
31		.SH "SYNOPSIS"
32		.HP \w'\fBmanlifter\fR\ 'u
33		\fBmanlifter\fR [\-d\ \fIoption\fR] [\-e] [\-f\ \fIlistfile\fR] [\-h] [\-I\ \fImandir\fR] [\-m] [\-M] [\-o\ \fIoutdir\fR] [\-p\ \fIpatch\-directory\fR] [\-P] [\-q] [\-v] [\-s\ \fIsection\fR] [\-X\ \fIexclude\fR] \fIname\fR...
34		.HP \w'\fBmanlifter\fR\ 'u
35		\fBmanlifter\fR [\-S]
36		.SH "DESCRIPTION"
37		.PP
38		\fBmanlifter\fR
39		is a script that sequences
40		\fBdoclifter\fR(1)
41		to convert an entire manual\-page tree to XML\-Docbook, optionally also generating HTML from the XML\&. Another use is as a torture\-test tool for doclifter; it logs errors to standard output and collects timings\&.
42		.PP
43		Called without any file arguments, manlifter tries to convert all eligible man pages installed on the system, placing the resulting xml files under
44		xmlman
45		in the current directory\&. Each successfully translated page foo\&.N is copied to manN/foo\&.xml beneath the output directory, regardless of what source directory it came from\&.
46		.PP
47		A manual page is considered ineligible for batch conversion if it contains text indicating it has been generated from DocBook masters of from Doxygen\&.
48		.PP
49		For each source file examined, if the destination file exists and is newer than the source, the conversion is skipped; thus, incremental runs of
50		\fBmanlifter\fR
51		do the least work needed to keep the target XML tree up to date\&. Likewise, in \-h mode derived HTML files are only made when necessary\&.
52		.PP
53		Stub pages that are just
54		\fB\&.so\fR
55		redirections are translated to corresponding symlinks of XML files (and, with \-h, HTML files)\&.
56		.PP
57		\fBmanlifter\fR
58		may also be called with a single file argument, which is interpreted as the stem name of a potential manual page\&.
59		\fBmanlifter\fR
60		then searches all selected manual sections for a matching page and attempts to convert it\&. In this case, a copy of the man page and the converted version are dropped immediately beheath the output directory, with the names foobar\&.man and foobar\&.man\&.xml, respectively\&. This mode is normally only of interest only to
61		\fBdoclifter\fR
62		developers for debugging that program\&.
63		.PP
64		In either of the above cases,
65		\fBmanlifter\fR
66		will uncompress the file if it has a
67		\&.gz,
68		\&.bz2
69		or
70		\&.Z
71		suffix on the name\&.
72		.PP
73		Options are as follows:
74		.PP
75		\-d
76		.RS 4
77		Pass the string argument to each doclifter call as options\&. Each space\-separated token in the string becomes a separate argument in the call\&.
78		.RE
79		.PP
80		\-e
81		.RS 4
82		Run in log\-filter mode (mainly of interest to
83		\fBdoclifter\fR
84		developers)\&. In this mode,
85		\fBmanlifter\fR
86		reads a test log from standard input and filters it in a a way dependent on the \-f and \-q options\&. If neither of these is given, messages from successful runs are stripped out and only errors passed through to standard output\&.
87		.RE
88		.PP
89		\-f
90		.RS 4
91		Normally, run doclifter on the files named by each line in the argument file\&. In error\-filter mode the argument is instead interpreted as a filtering regular expression\&.
92		.RE
93		.PP
94		\-h
95		.RS 4
96		Also generate HTML translations into the output directory\&. DocBook citerefentry markup is transformed to hyperlinks in the directory, and a contents listing is generated to
97		index\&.html\&.
98		.RE
99		.PP
100		\-I
101		.RS 4
102		Specify the root of the manual\-page tree\&. By default this is
103		/usr/share/man\&.
104		.RE
105		.PP
106		\-m
107		.RS 4
108		Make a patch to correct the last page fetched\&. It is copied, an editor is called on the copy (using the environment variable
109		\fB$EDITOR\fR), and then
110		\fBdiff\fR(1)
111		is called to drop the patch in the prepatch directory\&. Fails with an error if such a patch is already present\&.
112		.RE
113		.PP
114		\-M
115		.RS 4
116		Lift the specified files, then do the equivalent of the \-m option\&.
117		.RE
118		.PP
119		\-o
120		.RS 4
121		Set the output directory into which XML\-DocBook translations will be dropped\&. By default this is
122		xmlman
123		under the current directory in batch mode, or the current directory otherwise\&.
124		.RE
125		.PP
126		\-p
127		.RS 4
128		Interpret the argument as the name of a patch directory (the default name is
129		prepatch
130		under the current directory)\&. Each file named
131		foo\&.N\&.patch
132		is interpreted as a patch to be applied to the manual page foo(N) before doclifter translates it\&.
133		.RE
134		.PP
135		\-q
136		.RS 4
137		Normally, pass the \-q (quiet) option to each doclifter call\&. In error\-filter mode, return a list of files on which translation failed\&.
138		.RE
139		.PP
140		\-v
141		.RS 4
142		Pass the \-v (verbose) option to each doclifter call\&. This option can be repeated to increase the verbosity level\&.
143		.RE
144		.PP
145		\-s
146		.RS 4
147		Specify a section to scan\&. Use this with an argument; it should not be necessary when doing a conversion of the entire tree\&.
148		.RE
149		.PP
150		\-S
151		.RS 4
152		Compile error statistics from a
153		\fBmanlifter\fR
154		logfile presented on standard input\&. This option will be of interest mainly to
155		\fBdoclifter\fR
156		developers\&.
157		.RE
158		.PP
159		\-X
160		.RS 4
161		In batch mode exclude pages listed in the argument file\&. Meant to be used for pages that are known good and take an extremely long time to lift, in order to cut down the time for a test run\&. (Most pages lift in less than a half second, but a few can take 15 minutes or longer\&.)
162		.RE
163		.PP
164		\fBmanlifter\fR
165		emits a logfile to standard output\&. The file begins with a timestamp line and a blank line, and ends with a line giving run time and various interesting statistics\&. Between these are stanzas, separated by blank lines, one for each file on which
166		\fBdoclifter\fR
167		was run\&.
168		.PP
169		The first line of each stanza beguns with "! ", followed by the pathname of the source manual pager, followed by "=" and the return status of doclifter run on that file\&. Following that is a space and
170		\fBdoclifter\fR\*(Aqs runtime in seconds\&.
171		.PP
172		This initial line may be followed by information messages and the error output of the doclifter run\&.
173		.PP
174		\fBmanlifter\fR
175		must find a copy of
176		\fBdoclifter\fR
177		in either the current directory or one of the command directories in your
178		\fBPATH\fR
179		in order to run\&.
180		.SH "BUGS"
181		.PP
182		HTML generation is painfully slow\&. Unfortunately, there is little we can do to remedy this, because XSLT engines are painfully slow\&.
183		.SH "SEE ALSO"
184		.PP
185		\fBdoclifter\fR(1),
186		\fBxmlto\fR(1)
187		.SH "AUTHOR"
188		.PP
189		Eric S\&. Raymond
190		<esr@thyrsus\&.com>
191		.PP
192		There is a project web page at
193		\m[blue]\fBhttp://www\&.catb\&.org/~esr/doclifter/\fR\m[]\&.

+219

-0

problemgen.py less more

	0	#!/usr/bin/env python3
	1	# Generate the bug summary page
	2	# All the distro-dependent stuff is imported from buglist.
	3
	4	import string, buglist, time
	5
	6	bugs = buglist.Buglist()
	7
	8	old_accepted = 594 # Represents patches formerly tagged 'y'.
	9
	10	applied = len(bugs.pagelist(include="npr"))
	11	pending = len(bugs.pagelist(include="n*"))
	12	accepted = old_accepted + len(bugs.pagelist(include="yp"))
	13	rejected = len(bugs.pagelist(include="r"))
	14
	15	total = sum(bugs.counts)
	16	already = bugs.counts[7]
	17	errors = sum(bugs.counts[1:7]) # Types 1-6
	18
	19	ok = (total - already) - errors - applied
	20
	21	def percent(x):
	22	return x * 100.0 / (total - already)
	23
	24	print '''<?xml version="1.0" encoding="ISO-8859-1"?>
	25	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
	26	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
	27	<html xmlns="http://www.w3.org/1999/xhtml">
	28	<head>
	29	<link rev="made" href="mailto:esr@snark.thyrsus.com" />
	30	<link rel="stylesheet" href="/~esr/sitestyle.css" type="text/css" />
	31	<meta name="description" content="" />
	32	<meta name="keywords" content="" />
	33	<meta name="MSSmartTagsPreventParsing" content="TRUE" />
	34	<title>Manual page glitches</title>
	35	</head>
	36	<body>
	37
	38	<div id="Header">
	39	<table width="100%%" cellpadding="0" summary="Canned page header">
	40	<tr>
	41	<td>Manual page glitches</td>
	42	<td align="right">%s</td>
	43	</tr>
	44	</table>
	45	</div>
	46
	47	<div id="Menu">
	48	<hr/>
	49	<a href="/~esr" title="My home page">Home Page</a><br />
	50	<a href="/~esr/whatsnew.html" title="New on this site">What is New</a><br />
	51	<a href="/~esr/sitemap.html" title="Map of the site">Site Map</a><br />
	52	<a href="/~esr/software.html" title="Software I maintain">Software</a><br />
	53	<a href="/~esr/projects.html" title="My projects">Projects</a><br />
	54	<a href="/~esr/faqs/" title="My FAQ documents">HOWTOs</a><br />
	55	<a href="/~esr/writings/" title="Essays and ruminations">Essays</a><br />
	56	<a href="/~esr/personal.html" title="Portrait of the author">Personal</a><br />
	57	<a href="http://www.ibiblio.org/esrblog/">Weblog</a><br/>
	58	<a href="/~esr/netfreedom/">Freedom!</a><br />
	59	<a href="/~esr/guns/">Firearms!</a><br />
	60	<hr/>
	61	<p>
	62	<a href="http://validator.w3.org/check/referer"><img
	63	src="http://www.w3.org/Icons/valid-xhtml10"
	64	alt="Valid XHTML 1.0!" height="31" width="88" /></a>
	65	</p>
	66
	67	</div>
	68
	69	<div id="Content">
	70
	71	<p>I maintain a man-page-to-DocBook converter, <a
	72	href="index.html">doclifter</a>. A side effect of this program is that
	73	it serves as a validator for the correctness and portability of the
	74	markup used on Unix manual pages. I test it by running it against all
	75	the manual pages in a full %s; there are %d of these on my development
	76	machine, of which %d already have DocBook masters. It converts %d
	77	(%02.2f%%) of the remaining %d into valid XML-DocBook.</p>
	78
	79	<p>Most of the remaining %02.2f%% of errors happen because groff(1)
	80	and its kin have weak-to-nonexistent validity checking. Often,
	81	doclifter fails because of outright errors in macro usage that groff
	82	does not catch. Sometime it fails on constructions that are legal but
	83	perverse. Very occasionally it throws an error because a man page is
	84	correct but has a structure that cannot be translated to DocBook. I
	85	keep a database of patches for such problems, and periodically
	86	try to push fix patches out to the manual-page maintainers.</p>
	87
	88	<p>(These are lower numbers and a higher error rate than in some
	89	previous reports because I now use i3 rather than GNOME or KDE. Many
	90	of the userland manuals that I used to check are no longer installed
	91	where my test procedure can see them. Because bad markup tends to be
	92	concentrated in the older manual pages of core tools, a larger random
	93	sample pulls down the error rate.)
	94
	95	<p>Even if you do not care about DocBook, this cleanup work benefits
	96	all third-party manual page viewers, including the GNOME and KDE
	97	documentation browsers; groff constructions that confuse doclifter
	98	are very likely to produce visible problems on these.</p>
	99
	100	<p>The table below is a listing of the %d (%02.2f%%) pages on which
	101	doclifter fails, but the failure can be prevented with a fix patch to
	102	the manual page source. %d pages (%02.2f%%) remain intractable,
	103	generally due to markup problems more severe than a point patch can
	104	address. I am working with the individual projects responsible to get
	105	those cleaned up.</p>
	106
	107	<p>It is likely that you are reading this because you have received
	108	email telling you that patches are associated with your name or list
	109	address. Please consider incorporating them, or equivalents, in your
	110	next release. Also, please write back and tell me what you plan to do
	111	so I can keep my database up-to-date.</p>
	112
	113	<p>If you are not already considering it, please think about moving
	114	the documentation masters of your project to DocBook (or some format
	115	from which you can generate DocBook). If everybody moved to using
	116	DocBook as a common exchange format, it would become much easier to
	117	support unified browsing of all system documentation with Web-like
	118	hypertext capabilities, automatic indexing, and rich search
	119	facilities.</p>
	120
	121	<p>Tools to generate man pages, HTML, and PostScript from DocBook
	122	files are open-source and generally available. My program, doclifter,
	123	should make moving your manual-page masters to DocBook a fairly
	124	painless process.</p>
	125
	126	<p>Many major open source projects (including the Linux kernel, the
	127	Linux Documentation Project, X.org, GNOME, KDE, and FreeBSD) have
	128	already moved to DocBook or are in the process of doing so.</p>
	129
	130	<p>(Individual entries for accepted patches are no longer shown.) </p>
	131
	132	<p>Summary: %d patches pending, %d accepted, %d rejected.</p>
	133
	134	<p>Status codes are as follows:</p>
	135
	136	<br />
	137
	138	<table width='100%%' border='1'>
	139	<tr><td>n</td>
	140	<td>No response yet.</td>
	141	</tr>
	142
	143	<tr><td>p</td>
	144	<td>Maintainer has informed me that this is fixed in the masters, but
	145	I have not seen the fix yet.</td>
	146	</tr>
	147
	148	<tr><td>y</td>
	149	<td>Accepted</td>
	150	</tr>
	151
	152	<tr><td>r</td>
	153	<td>Rejected</td>
	154	</tr>
	155
	156	<tr><td>s</td>
	157	<td>Superseded (page lifts correctly without the patch)</td>
	158	</tr>
	159
	160	<tr><td>[0-9]+</td>
	161	<td>number of mailings sent</td>
	162	</tr>
	163
	164	<tr><td>b</td>
	165	<td>Address is blocked</td>
	166	</tr>
	167	</table>
	168
	169	<br />
	170
	171	<p>Problem codes are explained after the table.</p>
	172
	173	<br clear='left'/>
	174
	175	<table width='100%%' border='1'>
	176	<tr><td><b>Patch:</b></td><td><b>Problem code:</b></td><td>Status:</td></tr>
	177	''' % (time.strftime("%d %b %Y", time.gmtime()),
	178	buglist.distro, total, already,
	179	ok, percent(ok),
	180	total - already,
	181	100.00 - percent(ok),
	182	applied, percent(applied),
	183	errors, percent(errors),
	184	pending, accepted, rejected)
	185
	186	for line in bugs.lines:
	187	(status, pages, problems, mailto) = map(string.strip, line.split("\|"))
	188	if 'g' in status:
	189	continue
	190	mailto = mailto.replace("<", "<").replace(">", ">").replace("@", "@")
	191	problems = " ".join(map(lambda x: "<a href='#%s'>%s</a>" % (x,x), problems))
	192	pages = "<br />\n".join(map(lambda x: "<a href='prepatch/%s.patch'>%s</a>" % (x,x), pages.split(",")))+"<br />\n"
	193	if 'p' in status or 'y' in status or 's' in status:
	194	status = "<font color='green'>" + status + "</font>"
	195	print "<tr><td>%s</td><td>%s</td><td>%s</td></tr>" % (pages,problems,status)
	196	print "</table>\n\n<h1>Error codes:</h1>\n<dl>\n"
	197
	198	items = bugs.codes.items()
	199	items.sort()
	200	for (key, value) in items:
	201	print "<dt><a name='%s'>%s</a></dt>" % (key, key)
	202	print "<dd>%s</dd>" % value
	203
	204	print '''
	205	</dl>
	206
	207	</div>
	208	<hr />
	209	</body>
	210	</html>
	211
	212	<!--
	213	Local Variables:
	214	compile-command: "(cd ~/WWW; upload doclifter/problems.html)"
	215	End:
	216	-->
	217	'''
	218

+19

-0

repatch less more

	0	#!/usr/bin/env python3
	1	# Redo a patch that applied successfully but with an offset
	2	# Give this a page and section as argument, e.g. "as.1".
	3	import os, sys
	4
	5	page = sys.argv[1]
	6	patch = page + ".patch"
	7	fields = page.split(".")
	8	stem = ".".join(fields[:-1])
	9	section = fields[-1]
	10	os.system("manlifter -s %s %s" % (section, stem))
	11	os.rename("foobar.man", page + "-patched") # Save the patched version
	12	os.rename("prepatch/" + patch, patch + "-old")
	13	os.system("manlifter -s %s %s" % (section, stem))
	14	os.rename("foobar.man", page + "-unpatched") # Save the unpatched version
	15	os.rename(page + "-patched", page)
	16	os.system("diff -u %s-unpatched %s >prepatch/%s" % (page, page, patch))
	17	os.remove(page + "-unpatched")
	18	os.remove(page)

+162

-0

shipmail.py less more

	0	#!/usr/bin/env python3
	1	import string, buglist, smtplib, time, hashlib
	2	import email.mime.multipart, email.mime.text
	3
	4	def send(contact, msg):
	5	"Send a message."
	6	try:
	7	server = smtplib.SMTP('localhost')
	8	#server.set_debuglevel(1)
	9	server.sendmail("esr@thyrsus.com", [contact], msg)
	10	server.quit()
	11	print contact, "OK"
	12	except smtplib.SMTPServerDisconnected:
	13	print "*** Disconnected while mailing", contacr
	14	except smtplib.SMTPSenderRefused:
	15	print "*** Host refused sender for", contact
	16	except smtplib.SMTPRecipientsRefused:
	17	print "*** Recipient address refused for", contact
	18	except smtplib.SMTPDataError:
	19	print "*** Message data refused for", contact
	20	except smtplib.SMTPConnectError:
	21	print "*** Connection refused for", contact
	22	except smtplib.SMTPHeloError:
	23	print "*** HELO refused for", contact
	24
	25	def hashkey(patch):
	26	body = "\n".join(patch.split('\n')[2:]) # Trim off the header lines
	27	m = hashlib.md5()
	28	m.update(body)
	29	return m.digest()
	30
	31	def mailall(filter, hook):
	32	duplicates = {}
	33	for contact in bugs.maildict:
	34	if not contact or contact.startswith("http"):
	35	continue
	36	template = '''\
	37	This is automatically generated email about markup problems in a man
	38	page for which you appear to be responsible. If you are not the right
	39	person or list, please tell me so I can correct my database.
	40
	41	See http://catb.org/~esr/doclifter/bugs.html for details on how and
	42	why these patches were generated. Feel free to email me with any
	43	questions. Note: These patches do not change the modification date of
	44	any manual page. You may wish to do that by hand.
	45
	46	I apologize if this message seems spammy or impersonal. The volume of
	47	markup bugs I am tracking is over five hundred - there is no real
	48	alternative to generating bugmail from a database and template.
	49	'''
	50	reminder = '''
	51	My records indicate that you have accepted this patch, so this is just
	52	a reminder.
	53	'''
	54	unique = {}
	55	pagelist = []
	56	page_to_problems = {}
	57	for (status, pages, problems) in bugs.maildict[contact]:
	58	if 'y' in status or 'r' in status or 'b' in status or 's' in status:
	59	continue
	60	if filter(status, pages, problems, contact):
	61	# Setup
	62	for page in pages.split(","):
	63	pagelist.append(page)
	64	duplicates[page] = []
	65	page_to_problems[page] = problems
	66	# Enable duplicate detection
	67	for page in pagelist:
	68	patch = buglist.pagetofile(page.strip())
	69	if patch is None:
	70	# Meant to generate a unique cookie
	71	body = str(time.time())
	72	else:
	73	d = hashkey(patch)
	74	if d in unique:
	75	if not d in duplicates:
	76	duplicates[d] = []
	77	duplicates[d].append(page)
	78	else:
	79	unique[d] = page
	80	# Now generate explanations
	81	explanations = []
	82	for page in unique.values():
	83	patch = buglist.pagetofile(page.strip())
	84	d = hashkey(patch)
	85	explanation = "Problems with " + page + ":\n"
	86	if d in duplicates:
	87	explanation += "\n(Identical patches should apply to: %s)\n" \
	88	% (" ".join(duplicates[d],))
	89	if 'p' in status:
	90	explanation += reminder
	91	if page.endswith("pm"):
	92	explanation += "\n(May reflect bugs in POD).\n"
	93	explanation += "\n"
	94	patch = buglist.pagetofile(page.strip())
	95	for letter in page_to_problems[page]:
	96	if letter.isalnum():
	97	explanation += "%s\n" % (bugs.codes[letter])
	98	elif letter != '*':
	99	sys.stderr.write("Problem with %s\n" % page)
	100	d = hashkey(patch)
	101	if patch:
	102	this = explanation + patch
	103	else:
	104	this += explanation + "(No patch.)\n"
	105	explanations.append(this)
	106	if explanations:
	107	pagelist = ", ".join(pagelist)
	108	if len(pagelist) > 100:
	109	pagelist = "several man pages you maintain"
	110	body = template \
	111	+ "\n--\n Eric S. Raymond\n"
	112	msg = email.mime.multipart.MIMEMultipart()
	113	msg["To"] = contact
	114	msg["Subject"] = "Problems in " + pagelist
	115	msg.attach(email.mime.text.MIMEText(body))
	116	for x in explanations:
	117	msg.attach(email.mime.text.MIMEText(x))
	118	apply(hook, (contact, str(msg) + "\n"))
	119
	120	if __name__ == '__main__':
	121	import getopt, re, sys
	122
	123	bugs = buglist.Buglist()
	124	selector = None
	125	pagefilter = None
	126	codefilter = None
	127	addressfilter = None
	128	hook = lambda contact, msg: sys.stdout.write(msg)
	129	(options, arguments) = getopt.getopt(sys.argv[1:], "a:c:mp:s:")
	130	for (switch, val) in options:
	131	if switch == '-s': # Select on given status code
	132	selector = val
	133	elif switch == '-p': # Filter on given regexp matching pages
	134	pagefilter = val
	135	elif switch == '-a': # Filter on given regexp matching addressees
	136	addressfilter = val
	137	elif switch == '-c': # Filter on given regexp matching problems
	138	codefilter = val
	139	elif switch == '-m': # Actually send mail
	140	hook = send
	141	if selector:
	142	selector = re.compile(selector)
	143	if pagefilter:
	144	pagefilter = re.compile(pagefilter)
	145	if addressfilter:
	146	addressfilter = re.compile(addressfilter)
	147	if codefilter:
	148	codefilter = re.compile(codefilter)
	149
	150	def filter(status, pages, problems, contact):
	151	if selector and not selector.search(status):
	152	return False
	153	if pagefilter and not pagefilter.search(pages):
	154	return False
	155	if addressfilter and not addressfilter.search(contact):
	156	return False
	157	if codefilter and not codefilter.search(contact):
	158	return False
	159	return True
	160
	161	mailall(filter, hook)

-3

tests/Makefile less more

4	4	TESTLOADS := $(shell ls *.man \| sed '/.man/s///')
5	5
6	6	default:
7		@setpython python2
	7	@./setpython python2
8	8	@make -e --quiet regress
9		@setpython python3
	9	@./setpython python3
10	10	@make -e --quiet regress
11		@setpython python
	11	@./setpython python
12	12	@echo "No output (other than testfile stem names) is good news."
13	13
14	14	rebuild:

21	21	echo $${file}; \
22	22	if $(UTILBINDIR)/doclifter <$${file}.man >$(TESTDIRPARENT)/regress$$; \
23	23	then diff -u $${file}.chk $(TESTDIRPARENT)/regress$$; \
	24	if [ $$? -ne 0 ]; then echo "*** Output differs"; exit 1; fi \
24	25	else echo "*** Nonzero return status on $${file}!"; exit 1; fi \
25	26	done
26	27	@rm -f $(TESTDIRPARENT)/regress