Commit 7fca73bbe6dcfbc5e10a735742aed88c25c302b8 - halibut

+31

-0

.gitignore less more

	0	/build.log
	1	/build.out
	2	/halibut
	3	/doc/*.html
	4	/doc/halibut.1
	5	/doc/halibut.info*
	6	/doc/halibut.pdf
	7	/doc/halibut.ps
	8	/doc/halibut.txt
	9	/doc/halibut.chm
	10	*.o
	11	.deps
	12	/Makefile
	13	/Makefile.in
	14	/aclocal.m4
	15	/autom4te.cache/
	16	/compile
	17	/configure
	18	/depcomp
	19	/install-sh
	20	/missing
	21	/stamp-h1
	22	/config.log
	23	/config.status
	24	/halibut.1
	25	/halibut.chm
	26	/halibut.info*
	27	/halibut.pdf
	28	/halibut.ps
	29	/halibut.txt
	30	/*.html

+23

-35

Buildscr less more

2	2
3	3	module halibut
4	4
5		ifnexist halibut/charset checkout charset halibut/charset
6
	5	# Make up a version number.
7	6	set Version $(!builddate).$(vcsid)
8	7	ifneq "$(RELEASE)" "" set Version $(RELEASE)
9	8

14	13	in halibut do echo '/* Generated by automated build script */' > version.h
15	14	in halibut do echo '$#define VERSION "version $(Version)"' >> version.h
16	15
17		set Rel
18		ifneq "$(RELEASE)" "" set Rel RELEASE=$(RELEASE)
	16	# Make the source archive.
	17	in . do ln -s halibut halibut-$(Version)
	18	in . do tar chzvf halibut-$(Version).tar.gz halibut-$(Version)
19	19
20		set Basename halibut-$(Version)
	20	# Build the Windows binary, using clang-cl.
	21	in . do mkdir buildwin
	22	in buildwin do cmake ../halibut -DCMAKE_TOOLCHAIN_FILE=$(cmake_toolchain_clangcl64) -DCMAKE_BUILD_TYPE=Release -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -DCMAKE_C_FLAGS_RELEASE="/MT /O2"
	23	in buildwin do make -j$(nproc) VERBOSE=1
	24	# Code-sign the Windows binary, if the local bob config provides a
	25	# script to do so. We assume here that the script accepts an -i option
	26	# to provide a 'more info' URL, and that it signs the file in place.
	27	ifneq "$(cross_winsigncode)" "" in buildwin do $(cross_winsigncode) -i https://www.chiark.greenend.org.uk/~sgtatham/halibut/ halibut.exe
21	28
22		# Make the source archive.
23		in halibut do ./release.sh $(Basename) $(Version)
	29	# Do a full Unix build, which will also build the docs, and also
	30	# checks that it _does_ build.
	31	in . do mkdir buildunix
	32	in buildunix do cmake ../halibut
	33	in buildunix do make -j$(nproc) VERBOSE=1
24	34
25		# Build a Windows binary of Halibut using clang-cl.
26		in halibut with clangcl64 do make CC='clang --target=x86_64-pc-windows-msvc18.0.0 -D_CRT_SECURE_NO_WARNINGS' CC_LINK='lld-link -defaultlib:libcmt -out:$$@' EXE=.exe
27		in halibut do mv build/halibut.exe .
28		in halibut do rm -rf build
29		delegate windows
30		# Code-sign the Windows binary, if the local bob config provides
31		# a script to do so. We assume here that the script accepts an -i
32		# option to provide a 'more info' URL, and that it signs the file
33		# in place.
34		ifneq "$(winsigncode)" "" in halibut do $(winsigncode) -i http://www.chiark.greenend.org.uk/~sgtatham/halibut/ halibut.exe
35		return halibut/halibut.exe
36		enddelegate
37
38		# Build a local binary of Halibut in order to build the docs. Make
39		# sure to tag it with the supplied version number, so that the
40		# release docs announce themselves as having been built with the
41		# release Halibut (e.g. PDF's Producer property).
42		in halibut do make $(Rel)
43
44		# And now build the docs.
45		in halibut/doc do make
46
47		deliver halibut/*.tar.gz $@
48		deliver halibut/halibut.exe $@
49		deliver halibut/doc/halibut.pdf $@
50		deliver halibut/doc/halibut.txt $@
51		deliver halibut/doc/halibut.chm $@
52		deliver halibut/doc/*.html $@
	35	deliver halibut-$(Version).tar.gz $@
	36	deliver buildwin/halibut.exe $@
	37	deliver buildunix/doc/manual/halibut.pdf $@
	38	deliver buildunix/doc/manual/halibut.txt $@
	39	deliver buildunix/doc/manual/halibut.chm $@
	40	deliver buildunix/doc/manual/*.html $@

+58

-0

CMakeLists.txt less more

	0	cmake_minimum_required(VERSION 3.5)
	1	project(halibut LANGUAGES C)
	2
	3	set(LIBCHARSET_LIBRARY_ONLY ON)
	4	include_directories(charset ${CMAKE_CURRENT_BINARY_DIR}/charset)
	5	add_subdirectory(charset)
	6
	7	if(CMAKE_SYSTEM_NAME MATCHES "Windows")
	8	add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
	9	endif()
	10
	11	add_executable(halibut
	12	biblio.c
	13	bk_html.c
	14	bk_info.c
	15	bk_man.c
	16	bk_paper.c
	17	bk_pdf.c
	18	bk_ps.c
	19	bk_text.c
	20	bk_whlp.c
	21	contents.c
	22	deflate.c
	23	error.c
	24	help.c
	25	huffman.c
	26	in_afm.c
	27	in_pf.c
	28	in_sfnt.c
	29	index.c
	30	input.c
	31	keywords.c
	32	licence.c
	33	lz77.c
	34	lzx.c
	35	main.c
	36	malloc.c
	37	misc.c
	38	psdata.c
	39	tree234.c
	40	ustring.c
	41	version.c
	42	wcwidth.c
	43	winchm.c
	44	winhelp.c)
	45	target_link_libraries(halibut charset)
	46
	47	if(CMAKE_VERSION VERSION_LESS 3.14)
	48	# CMake 3.13 and earlier required an explicit install destination.
	49	install(TARGETS halibut RUNTIME DESTINATION bin)
	50	else()
	51	# 3.14 and above selects a sensible default, which we should avoid
	52	# overriding here so that end users can override it using
	53	# CMAKE_INSTALL_BINDIR.
	54	install(TARGETS halibut)
	55	endif()
	56
	57	add_subdirectory(doc)

+7

-51

INSTALL less more

9	9	Building Halibut
10	10	----------------
11	11
12		If you have GNU make and gcc, you should simply be able to type
13		`make'. The Makefile will generate a `build' subdirectory, and will
14		put all the object files and binaries in there.
	12	Halibut is built using CMake <https://cmake.org/>. To compile in the
	13	simplest way (on any of Linux, Windows or Mac), run these commands in
	14	the source directory:
15	15
16		In a release archive, the Makefile will also check the source files
17		against a list of MD5 checksums, and if they match it will
18		automatically add the correct version number to the build. This is
19		_not_ a secure measure intended to enforce that only approved
20		Halibut sources are ever built into a binary with a given version
21		number; it is merely a sanity check against heavily modified copies
22		_accidentally_ confusing users expecting standard versions of
23		Halibut. Distribution maintainers are entirely at liberty, if they
24		choose, to modify Halibut source files as appropriate for their
25		distribution and then have the resulting binary call itself by the
26		original version number. If you run `make VERSION=x.y', the
27		resulting Halibut binary will call itself version x.y irrespective
28		of the md5sum manifest. (You may also need to do this if your build
29		system does not have the md5sum program.)
	16	cmake .
	17	cmake --build .
30	18
31		Halibut unfortunately does not yet come with an autoconf-generated
32		makefile, so if you do not have these utilities then you will have
33		to do the build manually. Look in the master `Makefile' to find the
34		list of source modules (they will be listed on the line starting
35		`MODULES :=', and continued on lines starting `MODULES +='), compile
36		those files with the C compiler of your choice, and link them
37		together into a binary. In addition to the modules on that list, you
38		will also need to compile `version.c', and if you wish your Halibut
39		binary to identify itself with a version number then you will have
40		to define the preprocessor symbol `VERSION' to the required version
41		number string. On Unix this can be done with a command such as
42
43		cc -c -DVERSION=\"0.9\" version.c
	19	(You'll also need to have a C compiler and some kind of build tool
	20	installed, such as gcc and make.)
44	21
45	22	Halibut's source files are intended to be almost entirely portable
46	23	ANSI C. If they fail to compile and run correctly on your compiler,
47	24	this might very well be considered a bug.
48
49		Building the Halibut manual
50		---------------------------
51
52		Once you have built Halibut itself, you might well want to build its
53		manual. If you're using GNU make, you can do this just by changing
54		into the `doc' subdirectory and typing `make'. (This relies on the
55		Halibut binary you built in the previous step being present in the
56		`build' subdirectory.)
57
58		Failing that, you will need to read the Makefile and run a manual
59		Halibut command, of the form
60
61		halibut --text=halibut.txt --html blurb.but intro.but [...] index.but
62
63		(The precise list of .but files is given at the top of doc/Makefile.)
64
65		This will build plain text documentation in `halibut.txt', and a set
66		of HTML files (*.html). It will also build a short man page
67		`halibut.1', although this is by no means a replacement for the full
68		manual.
69	25
70	26	Installing Halibut
71	27	------------------

+1

-1

LICENCE less more

0		Halibut is copyright (c) 1999-2017 Simon Tatham.
	0	Halibut is copyright (c) 1999-2021 Simon Tatham.
1	1
2	2	Permission is hereby granted, free of charge, to any person
3	3	obtaining a copy of this software and associated documentation files

+0

-130

~~Makefile~~ less more

0		# Halibut master makefile
1
2		# Currently depends on gcc, because:
3		# - the dependency tracking uses -MD in order to avoid needing an
4		# explicit `make depend' step
5		# - the definition of CFLAGS includes the gcc-specific flag
6		# `-Wall'
7		#
8		# Currently depends on GNU make, because:
9		# - the Makefile uses GNU ifdef / ifndef commands and GNU make `%'
10		# pattern rules
11		# - we use .PHONY
12
13		prefix=/usr/local
14		exec_prefix=$(prefix)
15		bindir=$(exec_prefix)/bin
16		INSTALL=install -c
17
18		.PHONY: all install clean spotless topclean release
19
20		ifdef RELEASE
21		ifndef VERSION
22		VERSION := $(RELEASE)
23		endif
24		else
25		CFLAGS += -g
26		endif
27
28		ifeq (x$(VERSION)y,xy)
29		RELDIR := halibut
30		else
31		RELDIR := halibut-$(VERSION)
32		endif
33
34		# `make' from top level will build in directory `build'
35		# `make BUILDDIR=foo' from top level will build in directory foo
36		ifndef REALBUILD
37		ifndef BUILDDIR
38		ifdef TEST
39		BUILDDIR := test
40		else
41		BUILDDIR := build
42		endif
43		endif
44
45		all install:
46		@test -d $(BUILDDIR) \|\| mkdir $(BUILDDIR)
47		@$(MAKE) -C $(BUILDDIR) -f ../Makefile $@ REALBUILD=yes
48
49		spotless: topclean
50		@test -d $(BUILDDIR) \|\| mkdir $(BUILDDIR)
51		@$(MAKE) -C $(BUILDDIR) -f ../Makefile spotless REALBUILD=yes
52
53		clean: topclean
54		@test -d $(BUILDDIR) \|\| mkdir $(BUILDDIR)
55		@$(MAKE) -C $(BUILDDIR) -f ../Makefile clean REALBUILD=yes
56
57		# Remove Halibut output files in the source directory (may
58		# have been created by running, for example, `build/halibut
59		# inputs/test.but').
60		topclean:
61		rm -f .html output. *.tar.gz
62
63		# Makef a release archive.
64		release: release.sh
65		./release.sh $(RELDIR) $(VERSION)
66
67		else
68
69		# The `real' makefile part.
70
71		CFLAGS += -Wall -W -ansi -pedantic
72
73		ifdef TEST
74		CFLAGS += -DLOGALLOC
75		LIBS += -lefence
76		endif
77
78		EXE =#
79
80		all: halibut$(EXE)
81
82		SRC := ../
83
84		ifeq ($(shell test -d $(SRC)charset && echo yes),yes)
85		LIBCHARSET_SRCDIR = $(SRC)charset/
86		else
87		LIBCHARSET_SRCDIR = $(SRC)../charset/
88		endif
89		LIBCHARSET_OBJDIR = ./#
90		LIBCHARSET_OBJPFX = cs-#
91		LIBCHARSET_GENPFX = charset-#
92		MD = -MD
93		CFLAGS += -I$(LIBCHARSET_SRCDIR) -I$(LIBCHARSET_OBJDIR)
94		include $(LIBCHARSET_SRCDIR)Makefile
95		CC_LINK = $(CC) -o $@
96
97		MODULES := main malloc ustring error help licence version misc tree234
98		MODULES += input in_afm in_pf in_sfnt keywords contents index biblio
99		MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf
100		MODULES += winhelp winchm deflate lzx lz77 huffman psdata wcwidth
101
102		OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS)
103		DEPS := $(addsuffix .d,$(MODULES))
104
105		halibut$(EXE): $(OBJECTS)
106		$(CC_LINK) $(LFLAGS) $(OBJECTS) $(LIBS)
107
108		%.o: $(SRC)%.c
109		$(CC) $(CFLAGS) -MD -c $<
110
111		version.o: FORCE
112		$(CC) $(VDEF) -MD -c $(SRC)version.c
113
114		spotless:: clean
115		rm -f *.d
116
117		clean::
118		rm -f *.o halibut core
119
120		install:
121		mkdir -p $(prefix) $(bindir)
122		$(INSTALL) -m 755 halibut $(bindir)/halibut
123		$(MAKE) -C ../doc install prefix="$(prefix)" INSTALL="$(INSTALL)"
124
125		FORCE: # phony target to force version.o to be rebuilt every time
126
127		-include $(DEPS)
128
129		endif

+9

-8

biblio.c less more

19	19	return ustrdup(p);
20	20	}
21	21
22		static void cite_biblio(keywordlist kl, wchar_t key, filepos fpos) {
	22	static void cite_biblio(keywordlist kl, wchar_t key, filepos fpos,
	23	errorstate *es) {
23	24	keyword *kw = kw_lookup(kl, key);
24	25	if (!kw)
25		err_nosuchkw(&fpos, key);
	26	err_nosuchkw(es, &fpos, key);
26	27	else {
27	28	/*
28	29	* We've found a \k reference. If it's a

43	44	* entries are actually cited (or \nocite-ed).
44	45	*/
45	46
46		void gen_citations(paragraph source, keywordlist kl) {
	47	void gen_citations(paragraph source, keywordlist kl, errorstate *es) {
47	48	paragraph *para;
48	49	int bibnum = 0;
49	50

56	57	if (para->type == para_BR) {
57	58	keyword *kw = kw_lookup(kl, para->keyword);
58	59	if (!kw) {
59		err_nosuchkw(&para->fpos, para->keyword);
	60	err_nosuchkw(es, &para->fpos, para->keyword);
60	61	} else if (kw->text) {
61		err_multiBR(&para->fpos, para->keyword);
	62	err_multiBR(es, &para->fpos, para->keyword);
62	63	} else {
63	64	kw->text = dup_word_list(para->words);
64	65	}
65	66	} else if (para->type == para_NoCite) {
66	67	wchar_t *wp = para->keyword;
67	68	while (*wp) {
68		cite_biblio(kl, wp, para->fpos);
	69	cite_biblio(kl, wp, para->fpos, es);
69	70	wp = uadv(wp);
70	71	}
71	72	}

76	77	for (ptr = para->words; ptr; ptr = ptr->next) {
77	78	if (ptr->type == word_UpperXref \|\|
78	79	ptr->type == word_LowerXref)
79		cite_biblio(kl, ptr->text, ptr->fpos);
	80	cite_biblio(kl, ptr->text, ptr->fpos, es);
80	81	}
81	82	}
82	83

94	95	word *wd = smalloc(sizeof(word));
95	96	wd->text = gentext(++bibnum);
96	97	wd->type = word_Normal;
97		wd->breaks = FALSE;
	98	wd->breaks = false;
98	99	wd->alt = NULL;
99	100	wd->next = NULL;
100	101	wd->aux = 0;

+113

-89

bk_html.c less more

37	37	(p)->type == para_Title ? -1 : 0 )
38	38
39	39	typedef struct {
40		int number_at_all, just_numbers;
	40	bool number_at_all, just_numbers;
41	41	wchar_t *number_suffix;
42	42	} sectlevel;
43	43

46	46	sectlevel achapter, *asect;
47	47	int contents_depths; / 0=main, 1=chapter, 2=sect etc */
48	48	int ncdepths;
49		int address_section, visible_version_id;
50		int leaf_contains_contents, leaf_smallest_contents;
51		int navlinks;
52		int rellinks;
	49	bool address_section, visible_version_id;
	50	bool leaf_contains_contents;
	51	int leaf_smallest_contents;
	52	bool navlinks;
	53	bool rellinks;
53	54	char *contents_filename;
54	55	char *index_filename;
55	56	char *template_filename;

135	136	typedef struct {
136	137	htmlsect *section;
137	138	char *fragment;
138		int generated, referenced;
	139	bool generated, referenced;
139	140	} htmlindexref;
140	141
141	142	typedef struct {

148	149	void (write)(void write_ctx, const char *data, int len);
149	150	int charset, restrict_charset;
150	151	charset_state cstate;
	152	errorstate *es;
151	153	int ver;
152	154	enum {
153	155	HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT

190	192	ho->write = ho_write_file;
191	193	ho->write_ctx = fp;
192	194	} else {
193		err_cantopenw(filename);
	195	err_cantopenw(ho->es, filename);
194	196	ho->write = ho_write_ignore; /* saves conditionalising rest of code */
195	197	}
196	198	}

265	267	#define HO_HACK_QUOTENOTHING 2
266	268	#define HO_HACK_OMITQUOTES 4
267	269
268		static int html_fragment_compare(void av, void bv)
269		{
270		htmlfragment a = (htmlfragment )av;
271		htmlfragment b = (htmlfragment )bv;
	270	static int html_fragment_compare(const void av, const void bv, void *cmpctx)
	271	{
	272	const htmlfragment a = (const htmlfragment )av;
	273	const htmlfragment b = (const htmlfragment )bv;
272	274	int cmp;
273	275
274	276	if ((cmp = strcmp(a->file->filename, b->file->filename)) != 0)

277	279	return strcmp(a->fragment, b->fragment);
278	280	}
279	281
280		static int html_filename_compare(void av, void bv)
281		{
282		char a = (char )av;
283		char b = (char )bv;
	282	static int html_filename_compare(const void av, const void bv, void *cmpctx)
	283	{
	284	const char a = (const char )av;
	285	const char b = (const char )bv;
284	286
285	287	return strcmp(a, b);
286	288	}

312	314	static void html_text_nbsp(htmloutput ho, wchar_t const str);
313	315	static void html_text_limit(htmloutput ho, wchar_t const str, int maxlen);
314	316	static void html_text_limit_internal(htmloutput ho, wchar_t const text,
315		int maxlen, int quote_quotes, int nbsp);
	317	int maxlen, bool quote_quotes, bool nbsp);
316	318	static void html_nl(htmloutput *ho);
317	319	static void html_raw(htmloutput ho, char text);
318	320	static void html_raw_as_attr(htmloutput ho, char text);

332	334	htmlconfig *cfg);
333	335	static void html_section_title(htmloutput ho, htmlsect s,
334	336	htmlfile thisfile, keywordlist keywords,
335		htmlconfig *cfg, int real);
336
337		static htmlconfig html_configure(paragraph *source, int chm_mode)
	337	htmlconfig *cfg, bool real);
	338
	339	static htmlconfig html_configure(paragraph *source, bool chm_mode,
	340	errorstate *es)
338	341	{
339	342	htmlconfig ret;
340	343	paragraph *p;

343	346	* Defaults.
344	347	*/
345	348	ret.leaf_level = chm_mode ? -1 /* infinite */ : 2;
346		ret.achapter.just_numbers = FALSE;
347		ret.achapter.number_at_all = TRUE;
	349	ret.achapter.just_numbers = false;
	350	ret.achapter.number_at_all = true;
348	351	ret.achapter.number_suffix = L": ";
349	352	ret.nasect = 1;
350	353	ret.asect = snewn(ret.nasect, sectlevel);
351		ret.asect[0].just_numbers = TRUE;
352		ret.asect[0].number_at_all = TRUE;
	354	ret.asect[0].just_numbers = true;
	355	ret.asect[0].number_at_all = true;
353	356	ret.asect[0].number_suffix = L" ";
354	357	ret.ncdepths = 0;
355	358	ret.contents_depths = 0;
356		ret.visible_version_id = TRUE;
357		ret.address_section = chm_mode ? FALSE : TRUE;
358		ret.leaf_contains_contents = FALSE;
	359	ret.visible_version_id = true;
	360	ret.address_section = chm_mode ? false : true;
	361	ret.leaf_contains_contents = false;
359	362	ret.leaf_smallest_contents = 4;
360		ret.navlinks = chm_mode ? FALSE : TRUE;
361		ret.rellinks = TRUE;
	363	ret.navlinks = chm_mode ? false : true;
	364	ret.rellinks = true;
362	365	ret.single_filename = dupstr("Manual.html");
363	366	ret.contents_filename = dupstr("Contents.html");
364	367	ret.index_filename = dupstr("IndexPage.html");

426	429	for (p = source; p; p = p->next) {
427	430	if (p->type == para_Config) {
428	431	wchar_t *k = p->keyword;
429		int generic = FALSE;
	432	bool generic = false;
430	433
431	434	if (!chm_mode && !ustrnicmp(k, L"html-", 5)) {
432	435	k += 5;

439	442	/* In this mode, only accept directives that don't
440	443	* vary completely between the HTML and CHM output
441	444	* types. */
442		generic = TRUE;
	445	generic = true;
443	446	} else {
444	447	continue;
445	448	}
446	449
447	450	if (!ustricmp(k, L"restrict-charset")) {
448		ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k));
	451	ret.restrict_charset = charset_from_ustr(
	452	&p->fpos, uadv(k), es);
449	453	} else if (!ustricmp(k, L"output-charset")) {
450		ret.output_charset = charset_from_ustr(&p->fpos, uadv(k));
	454	ret.output_charset = charset_from_ustr(
	455	&p->fpos, uadv(k), es);
451	456	} else if (!ustricmp(k, L"version")) {
452	457	wchar_t *vername = uadv(k);
453	458	static const struct {

467	472	break;
468	473
469	474	if (i == lenof(versions))
470		err_htmlver(&p->fpos, vername);
	475	err_htmlver(es, &p->fpos, vername);
471	476	else
472	477	ret.htmlver = versions[i].ver;
473	478	} else if (!ustricmp(k, L"single-filename")) {

500	505	frag = adv(frag);
501	506	}
502	507	} else
503		err_cfginsufarg(&p->fpos, p->origkeyword, 1);
	508	err_cfginsufarg(es, &p->fpos, p->origkeyword, 1);
504	509	} else if (!ustricmp(k, L"chapter-numeric")) {
505	510	ret.achapter.just_numbers = utob(uadv(k));
506	511	} else if (!ustricmp(k, L"chapter-shownumber")) {

674	679	chmname = diskname;
675	680
676	681	if (chmname[0] == '#' \|\| chmname[0] == '$')
677		err_chm_badname(&p->fpos, chmname);
	682	err_chm_badname(es, &p->fpos, chmname);
678	683
679	684	if (ret.nchmextrafiles >= ret.chmextrafilesize) {
680	685	ret.chmextrafilesize = ret.nchmextrafiles * 5 / 4 + 32;

705	710	* turn both off.
706	711	*/
707	712	if (!ret.chm_filename ^ !ret.hhp_filename) {
708		err_chmnames();
	713	err_chmnames(es);
709	714	sfree(ret.chm_filename); ret.chm_filename = NULL;
710	715	sfree(ret.hhp_filename); ret.hhp_filename = NULL;
711	716	}

756	761	}
757	762
758	763	static void html_backend_common(paragraph sourceform, keywordlist keywords,
759		indexdata *idx, int chm_mode)
	764	indexdata idx, errorstate es, bool chm_mode)
760	765	{
761	766	paragraph *p;
762	767	htmlsect *topsect;

764	769	htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL };
765	770	htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
766	771	struct chm *chm = NULL;
767		int has_index, hhk_needed = FALSE;
768
769		conf = html_configure(sourceform, chm_mode);
	772	bool has_index, hhk_needed = false;
	773
	774	conf = html_configure(sourceform, chm_mode, es);
770	775
771	776	/*
772	777	* We're going to make heavy use of paragraphs' private data

777	782	for (p = sourceform; p; p = p->next)
778	783	p->private_data = NULL;
779	784
780		files.frags = newtree234(html_fragment_compare);
781		files.files = newtree234(html_filename_compare);
	785	files.frags = newtree234(html_fragment_compare, NULL);
	786	files.files = newtree234(html_filename_compare, NULL);
782	787
783	788	/*
784	789	* Start by figuring out into which file each piece of the

978	983	indextag *tag;
979	984	int i;
980	985
981		hr->referenced = hr->generated = FALSE;
	986	hr->referenced = hr->generated = false;
982	987	hr->section = lastsect;
983	988	{
984	989	char buf[40];

1036	1041
1037	1042	for (f = files.head; f; f = f->next) {
1038	1043	htmloutput ho;
1039		int displaying;
	1044	bool displaying;
1040	1045	enum LISTTYPE { NOLIST, UL, OL, DL };
1041	1046	enum ITEMTYPE { NOITEM, LI, DT, DD };
1042	1047	struct stackelement {

1058	1063	ho.charset = conf.output_charset;
1059	1064	ho.restrict_charset = conf.restrict_charset;
1060	1065	ho.cstate = charset_init_state;
	1066	ho.es = es;
1061	1067	ho.ver = conf.htmlver;
1062	1068	ho.state = HO_NEUTRAL;
1063	1069	ho.contents_level = 0;

1309	1315	html_fragment(&ho, sects.head->fragments[i]);
1310	1316	}
1311	1317
1312		html_section_title(&ho, sects.head, f, keywords, &conf, TRUE);
	1318	html_section_title(&ho, sects.head, f, keywords, &conf, true);
1313	1319
1314	1320	element_close(&ho, "h1");
1315	1321	}

1328	1334	{
1329	1335	int ntoc = 0, tocsize = 0, tocstartidx = 0;
1330	1336	htmlsect **toc = NULL;
1331		int leaf = TRUE;
	1337	bool leaf = true;
1332	1338
1333	1339	for (s = sects.head; s; s = s->next) {
1334	1340	htmlsect a, ac;

1350	1356	}
1351	1357
1352	1358	if (s->file != f && a != NULL)
1353		leaf = FALSE;
	1359	leaf = false;
1354	1360
1355	1361	if (a) {
1356	1362	if (adepth <= a->contents_depth) {

1398	1404	* Now go through the document and output some real
1399	1405	* text.
1400	1406	*/
1401		displaying = FALSE;
	1407	displaying = false;
1402	1408	for (s = sects.head; s; s = s->next) {
1403	1409	if (s->file == f) {
1404	1410	/*
1405	1411	* This section belongs in this file.
1406	1412	* Display it.
1407	1413	*/
1408		displaying = TRUE;
	1414	displaying = true;
1409	1415	} else {
1410	1416	/*
1411	1417	* Doesn't belong in this file, but it may be

1416	1422	htmlsect a, ac;
1417	1423	int depth, adepth;
1418	1424
1419		displaying = FALSE;
	1425	displaying = false;
1420	1426
1421	1427	/*
1422	1428	* Search up from this section until we find

1489	1495	html_fragment(&ho, s->fragments[i]);
1490	1496	}
1491	1497
1492		html_section_title(&ho, s, f, keywords, &conf, TRUE);
	1498	html_section_title(&ho, s, f, keywords, &conf, true);
1493	1499
1494	1500	element_close(&ho, htag);
1495	1501	}

1721	1727
1722	1728	html_href(&ho, f, hr->section->file,
1723	1729	hr->fragment);
1724		hr->referenced = TRUE;
	1730	hr->referenced = true;
1725	1731	if (p && p->kwtext)
1726	1732	html_words(&ho, p->kwtext, MARKUP\|LINKS,
1727	1733	f, keywords, &conf);

1754	1760	/*
1755	1761	* Footer.
1756	1762	*/
1757		int done_version_ids = FALSE;
	1763	bool done_version_ids = false;
1758	1764
1759	1765	if (conf.address_section)
1760	1766	element_empty(&ho, "hr");

1763	1769	html_raw(&ho, conf.body_end);
1764	1770
1765	1771	if (conf.address_section) {
1766		int started = FALSE;
	1772	bool started = false;
1767	1773	if (conf.htmlver == ISO_HTML) {
1768	1774	/*
1769	1775	* The ISO-HTML validator complains if

1781	1787	if (conf.addr_start) {
1782	1788	html_raw(&ho, conf.addr_start);
1783	1789	html_nl(&ho);
1784		started = TRUE;
	1790	started = true;
1785	1791	}
1786	1792	if (conf.visible_version_id) {
1787	1793	for (p = sourceform; p; p = p->next)

1793	1799	html_words(&ho, p->words, NOTHING,
1794	1800	f, keywords, &conf);
1795	1801	html_text(&ho, conf.post_versionid);
1796		started = TRUE;
	1802	started = true;
1797	1803	}
1798		done_version_ids = TRUE;
	1804	done_version_ids = true;
1799	1805	}
1800	1806	if (conf.addr_end) {
1801	1807	if (started)

1813	1819	* visible, I think we still have a duty to put
1814	1820	* them in an HTML comment.
1815	1821	*/
1816		int started = FALSE;
	1822	bool started = false;
1817	1823	for (p = sourceform; p; p = p->next)
1818	1824	if (p->type == para_VersionID) {
1819	1825	if (!started) {
1820	1826	html_raw(&ho, "<!-- version IDs:\n");
1821		started = TRUE;
	1827	started = true;
1822	1828	}
1823	1829	html_words(&ho, p->words, NOTHING,
1824	1830	f, keywords, &conf);

1843	1849	* if the index contains nothing.
1844	1850	*/
1845	1851	if (chm_mode \|\| conf.hhk_filename) {
1846		int ok = FALSE;
	1852	bool ok = false;
1847	1853	int i;
1848	1854	indexentry *entry;
1849	1855

1851	1857	htmlindex hi = (htmlindex )entry->backend_data;
1852	1858
1853	1859	if (hi->nrefs > 0) {
1854		ok = TRUE; /* found an index entry */
	1860	ok = true; /* found an index entry */
1855	1861	break;
1856	1862	}
1857	1863	}
1858	1864
1859	1865	if (ok)
1860		hhk_needed = TRUE;
	1866	hhk_needed = true;
1861	1867	}
1862	1868
1863	1869	/*

1878	1884	ho.charset = CS_CP1252; /* as far as I know, CHM is */
1879	1885	ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
1880	1886	ho.cstate = charset_init_state;
	1887	ho.es = es;
1881	1888	ho.ver = HTML_4; /* shrug */
1882	1889	ho.state = HO_NEUTRAL;
1883	1890	ho.contents_level = 0;

1886	1893	ho_setup_rdstringc(&ho, &rs);
1887	1894
1888	1895	ho.hacklimit = 255;
1889		html_words(&ho, topsect->title->words, NOTHING,
1890		NULL, keywords, &conf);
	1896	if (topsect->title)
	1897	html_words(&ho, topsect->title->words, NOTHING,
	1898	NULL, keywords, &conf);
1891	1899
1892	1900	rdaddc(&rs, '\0');
1893	1901	chm_title(chm, rs.text);

1929	1937	ho.charset = CS_CP1252;
1930	1938	ho.restrict_charset = CS_CP1252;
1931	1939	ho.cstate = charset_init_state;
	1940	ho.es = es;
1932	1941	ho.ver = HTML_4; /* shrug */
1933	1942	ho.state = HO_NEUTRAL;
1934	1943	ho.contents_level = 0;

1972	1981
1973	1982	fp = fopen(fname, "rb");
1974	1983	if (!fp) {
1975		err_cantopen(fname);
	1984	err_cantopen(es, fname);
1976	1985	continue;
1977	1986	}
1978	1987

2002	2011	ho.charset = CS_CP1252; /* as far as I know, HHP files are */
2003	2012	ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
2004	2013	ho.cstate = charset_init_state;
	2014	ho.es = es;
2005	2015	ho.ver = HTML_4; /* shrug */
2006	2016	ho.state = HO_NEUTRAL;
2007	2017	ho.contents_level = 0;

2026	2036	"Title=");
2027	2037
2028	2038	ho.hacklimit = 255;
2029		html_words(&ho, topsect->title->words, NOTHING,
2030		NULL, keywords, &conf);
	2039	if (topsect->title)
	2040	html_words(&ho, topsect->title->words, NOTHING,
	2041	NULL, keywords, &conf);
2031	2042
2032	2043	ho_string(&ho, "\n");
2033	2044

2100	2111	ho.charset = CS_CP1252; /* as far as I know, HHC files are */
2101	2112	ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
2102	2113	ho.cstate = charset_init_state;
	2114	ho.es = es;
2103	2115	ho.ver = HTML_4; /* shrug */
2104	2116	ho.state = HO_NEUTRAL;
2105	2117	ho.contents_level = 0;

2126	2138	/*
2127	2139	* For each HTML file, write out a contents entry.
2128	2140	*/
2129		int depth, leaf = TRUE;
	2141	int depth;
	2142	bool leaf = true;
2130	2143
2131	2144	/*
2132	2145	* Determine the depth of this file in the contents

2159	2172	if (leaf && s->file != f) {
2160	2173	for (a = s; a; a = a->parent)
2161	2174	if (a->file == f) {
2162		leaf = FALSE;
	2175	leaf = false;
2163	2176	break;
2164	2177	}
2165	2178	}

2217	2230	ho.charset = CS_CP1252; /* as far as I know, HHK files are */
2218	2231	ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
2219	2232	ho.cstate = charset_init_state;
	2233	ho.es = es;
2220	2234	ho.ver = HTML_4; /* shrug */
2221	2235	ho.state = HO_NEUTRAL;
2222	2236	ho.contents_level = 0;

2269	2283	hr->section->file->temp = 1;
2270	2284	}
2271	2285
2272		hr->referenced = TRUE;
	2286	hr->referenced = true;
2273	2287	}
2274	2288
2275	2289	ho_string(&ho, "</OBJECT>\n");

2300	2314
2301	2315	fp = fopen(conf.chm_filename, "wb");
2302	2316	if (!fp) {
2303		err_cantopenw(conf.chm_filename);
	2317	err_cantopenw(es, conf.chm_filename);
2304	2318	} else {
2305	2319	data = chm_build(chm, &len);
2306	2320	fwrite(data, 1, len, fp);

2323	2337	if (w->type == word_IndexRef) {
2324	2338	htmlindexref hr = (htmlindexref )w->private_data;
2325	2339
2326		assert(!hr->referenced == !hr->generated);
	2340	assert(hr->referenced == hr->generated);
2327	2341	}
2328	2342	}
2329	2343

2416	2430	}
2417	2431
2418	2432	void html_backend(paragraph sourceform, keywordlist keywords,
2419		indexdata idx, void unused)
	2433	indexdata idx, void unused, errorstate *es)
2420	2434	{
2421	2435	IGNORE(unused);
2422		html_backend_common(sourceform, keywords, idx, FALSE);
	2436	html_backend_common(sourceform, keywords, idx, es, false);
2423	2437	}
2424	2438
2425	2439	void chm_backend(paragraph sourceform, keywordlist keywords,
2426		indexdata idx, void unused)
	2440	indexdata idx, void unused, errorstate *es)
2427	2441	{
2428	2442	IGNORE(unused);
2429		html_backend_common(sourceform, keywords, idx, TRUE);
	2443	html_backend_common(sourceform, keywords, idx, es, true);
2430	2444	}
2431	2445
2432	2446	static void html_file_section(htmlconfig cfg, htmlfilelist files,

2620	2634	if (flags & INDEXENTS) {
2621	2635	htmlindexref hr = (htmlindexref )w->private_data;
2622	2636	html_fragment(ho, hr->fragment);
2623		hr->generated = TRUE;
	2637	hr->generated = true;
2624	2638	}
2625	2639	break;
2626	2640	case word_Normal:

2827	2841	ho_string(ho, " ");
2828	2842	ho_string(ho, name);
2829	2843	ho_string(ho, "=\"");
2830		html_text_limit_internal(ho, value, 0, TRUE, FALSE);
	2844	html_text_limit_internal(ho, value, 0, true, false);
2831	2845	html_charset_cleanup(ho);
2832	2846	ho_string(ho, "\"");
2833	2847	}

2835	2849	static void html_text(htmloutput ho, wchar_t const text)
2836	2850	{
2837	2851	return_mostly_to_neutral(ho);
2838		html_text_limit_internal(ho, text, 0, FALSE, FALSE);
	2852	html_text_limit_internal(ho, text, 0, false, false);
2839	2853	}
2840	2854
2841	2855	static void html_text_nbsp(htmloutput ho, wchar_t const text)
2842	2856	{
2843	2857	return_mostly_to_neutral(ho);
2844		html_text_limit_internal(ho, text, 0, FALSE, TRUE);
	2858	html_text_limit_internal(ho, text, 0, false, true);
2845	2859	}
2846	2860
2847	2861	static void html_text_limit(htmloutput ho, wchar_t const text, int maxlen)
2848	2862	{
2849	2863	return_mostly_to_neutral(ho);
2850		html_text_limit_internal(ho, text, maxlen, FALSE, FALSE);
	2864	html_text_limit_internal(ho, text, maxlen, false, false);
2851	2865	}
2852	2866
2853	2867	static void html_text_limit_internal(htmloutput ho, wchar_t const text,
2854		int maxlen, int quote_quotes, int nbsp)
	2868	int maxlen, bool quote_quotes, bool nbsp)
2855	2869	{
2856	2870	int textlen = ustrlen(text);
2857	2871	char outbuf[256];
2858		int bytes, err;
	2872	int bytes;
	2873	bool err;
2859	2874
2860	2875	if (ho->hackflags & (HO_HACK_QUOTEQUOTES \| HO_HACK_OMITQUOTES))
2861		quote_quotes = TRUE; /* override the input value */
	2876	quote_quotes = true; /* override the input value */
2862	2877
2863	2878	if (maxlen > 0 && textlen > maxlen)
2864	2879	textlen = maxlen;

2943	2958	rdaddc(&rs, '#');
2944	2959	rdaddsc(&rs, targetfrag);
2945	2960	}
	2961
	2962	/* If _neither_ of those conditions were true, we don't have a URL
	2963	* at all and will segfault when we pass url==NULL to element_attr.
	2964	*
	2965	* I think this can only occur as a knock-on effect from an input
	2966	* file error, but we still shouldn't crash, of course. */
	2967
2946	2968	url = rs.text;
2947	2969
2948	2970	element_open(ho, "a");
2949		element_attr(ho, "href", url);
2950		sfree(url);
	2971	if (url) {
	2972	element_attr(ho, "href", url);
	2973	sfree(url);
	2974	}
2951	2975	}
2952	2976
2953	2977	static void html_fragment(htmloutput ho, char const fragment)

3129	3153
3130	3154	p = NULL;
3131	3155
3132		while (find234(files->files, text, NULL)) {
	3156	while (find234(files->files, text)) {
3133	3157	if (!p) {
3134	3158	len = strlen(text);
3135	3159	p = text;

3182	3206
3183	3207	element_open(ho, "li");
3184	3208	html_href(ho, thisfile, s->file, s->fragments[0]);
3185		html_section_title(ho, s, thisfile, keywords, cfg, FALSE);
	3209	html_section_title(ho, s, thisfile, keywords, cfg, false);
3186	3210	element_close(ho, "a");
3187	3211	/* <li> will be closed by a later invocation */
3188	3212	}
3189	3213
3190	3214	static void html_section_title(htmloutput ho, htmlsect s, htmlfile *thisfile,
3191	3215	keywordlist keywords, htmlconfig cfg,
3192		int real)
	3216	bool real)
3193	3217	{
3194	3218	if (s->title) {
3195	3219	sectlevel *sl;

+53

-47

bk_info.c less more

79	79	rdstringc output;
80	80	int charset;
81	81	charset_state state;
82		int wcmode;
	82	bool wcmode;
83	83	} info_data;
84		#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
	84	#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, false }
85	85	static const info_data empty_info_data = EMPTY_INFO_DATA;
86	86
87	87	typedef struct node_tag node;
88	88	struct node_tag {
89	89	node *listnext;
90	90	node up, prev, next, lastchild;
91		int pos, started_menu, filenum;
	91	int pos, filenum;
	92	bool started_menu;
92	93	char *name;
93	94	info_data text;
94	95	};

116	117	static word info_transform_wordlist(word , keywordlist *);
117	118	static int info_check_index(word , node , indexdata *);
118	119
119		static int info_rdaddwc(info_data , word , word , int, infoconfig );
	120	static int info_rdaddwc(info_data , word , word , bool, infoconfig );
120	121
121	122	static node info_node_new(char name, int charset);
122		static char info_node_name_for_para(paragraph p, infoconfig *);
123		static char info_node_name_for_text(wchar_t text, infoconfig *);
124
125		static infoconfig info_configure(paragraph *source) {
	123	static char info_node_name_for_para(paragraph p, infoconfig *,
	124	errorstate *);
	125	static char info_node_name_for_text(wchar_t text, infoconfig *,
	126	errorstate *);
	127
	128	static infoconfig info_configure(paragraph source, errorstate es) {
126	129	infoconfig ret;
127	130	paragraph *p;
128	131	int n;

185	188	sfree(ret.filename);
186	189	ret.filename = dupstr(adv(p->origkeyword));
187	190	} else if (!ustricmp(p->keyword, L"info-charset")) {
188		ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
	191	ret.charset = charset_from_ustr(
	192	&p->fpos, uadv(p->keyword), es);
189	193	} else if (!ustricmp(p->keyword, L"info-max-file-size")) {
190	194	ret.maxfilesize = utoi(uadv(p->keyword));
191	195	} else if (!ustricmp(p->keyword, L"info-width")) {

305	309	}
306	310
307	311	void info_backend(paragraph sourceform, keywordlist keywords,
308		indexdata idx, void unused) {
	312	indexdata idx, void unused, errorstate *es) {
309	313	paragraph *p;
310	314	infoconfig conf;
311	315	word prefix, body, *wp;

314	318	int nesting, nestindent;
315	319	int indentb, indenta;
316	320	int filepos;
317		int has_index = FALSE;
	321	bool has_index = false;
318	322	info_data intro_text = EMPTY_INFO_DATA;
319	323	node topnode, currnode;
320	324	word bullet;

322	326
323	327	IGNORE(unused);
324	328
325		conf = info_configure(sourceform);
	329	conf = info_configure(sourceform, es);
326	330
327	331	/*
328	332	* Go through and create a node for each section.

342	346	node newnode, upnode;
343	347	char *nodename;
344	348
345		nodename = info_node_name_for_para(p, &conf);
	349	nodename = info_node_name_for_para(p, &conf, es);
346	350	newnode = info_node_new(nodename, conf.charset);
347	351	sfree(nodename);
348	352

383	387	ii->nnodes = ii->nodesize = 0;
384	388	ii->nodes = NULL;
385	389
386		ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
	390	ii->length = info_rdaddwc(&id, entry->text, NULL, false, &conf);
387	391
388	392	ii->text = id.output.text;
389	393

416	420	kw = *longname ? uadv(longname) : L"";
417	421
418	422	if (!*longname) {
419		err_cfginsufarg(&p->fpos, p->origkeyword, 3);
	423	err_cfginsufarg(es, &p->fpos, p->origkeyword, 3);
420	424	continue;
421	425	}
422	426

511	515
512	516	if (!currnode->up->started_menu) {
513	517	info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
514		currnode->up->started_menu = TRUE;
	518	currnode->up->started_menu = true;
515	519	}
516	520	info_menu_item(&currnode->up->text, currnode, p, &conf);
517	521

600	604	indexentry *entry;
601	605	char *nodename;
602	606
603		nodename = info_node_name_for_text(conf.index_text, &conf);
	607	nodename = info_node_name_for_text(conf.index_text, &conf, es);
604	608	newnode = info_node_new(nodename, conf.charset);
605	609	sfree(nodename);
606	610

705	709	*/
706	710	fp = fopen(conf.filename, "w");
707	711	if (!fp) {
708		err_cantopenw(conf.filename);
	712	err_cantopenw(es, conf.filename);
709	713	return;
710	714	}
711	715	fputs(intro_text.output.text, fp);

749	753	sprintf(fname, "%s-%d", conf.filename, filenum);
750	754	fp = fopen(fname, "w");
751	755	if (!fp) {
752		err_cantopenw(fname);
	756	err_cantopenw(es, fname);
753	757	return;
754	758	}
755	759	sfree(fname);

862	866	return ret;
863	867	}
864	868
865		static int info_rdaddwc(info_data id, word words, word *end, int xrefs,
	869	static int info_rdaddwc(info_data id, word words, word *end, bool xrefs,
866	870	infoconfig *cfg) {
867	871	int ret = 0;
868	872

906	910	if (cvt_ok(id->charset, words->text) \|\| !words->alt)
907	911	ret += info_rdadds(id, words->text);
908	912	else
909		ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
	913	ret += info_rdaddwc(id, words->alt, NULL, false, cfg);
910	914	} else if (removeattr(words->type) == word_WhiteSpace) {
911	915	ret += info_rdadd(id, L' ');
912	916	} else if (removeattr(words->type) == word_Quote) {

944	948	return ret;
945	949	}
946	950
947		static int info_width_internal(word words, int xrefs, infoconfig cfg);
948
949		static int info_width_internal_list(word words, int xrefs, infoconfig cfg) {
	951	static int info_width_internal(word words, bool xrefs, infoconfig cfg);
	952
	953	static int info_width_internal_list(word words, bool xrefs, infoconfig cfg) {
950	954	int w = 0;
951	955	while (words) {
952	956	w += info_width_internal(words, xrefs, cfg);

955	959	return w;
956	960	}
957	961
958		static int info_width_internal(word words, int xrefs, infoconfig cfg) {
	962	static int info_width_internal(word words, bool xrefs, infoconfig cfg) {
959	963	int wid;
960	964	int attr;
961	965

1033	1037
1034	1038	static int info_width_noxrefs(void ctx, word words)
1035	1039	{
1036		return info_width_internal(words, FALSE, (infoconfig *)ctx);
	1040	return info_width_internal(words, false, (infoconfig *)ctx);
1037	1041	}
1038	1042	static int info_width_xrefs(void ctx, word words)
1039	1043	{
1040		return info_width_internal(words, TRUE, (infoconfig *)ctx);
	1044	return info_width_internal(words, true, (infoconfig *)ctx);
1041	1045	}
1042	1046
1043	1047	static void info_heading(info_data text, word tprefix,

1049	1053
1050	1054	length = 0;
1051	1055	if (tprefix) {
1052		length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
	1056	length += info_rdaddwc(text, tprefix, NULL, false, cfg);
1053	1057	length += info_rdadds(text, cfg->sectsuffix);
1054	1058	}
1055	1059

1059	1063	wrapping = wrap_para(words, firstlinewidth, wrapwidth,
1060	1064	info_width_noxrefs, cfg, 0);
1061	1065	for (p = wrapping; p; p = p->next) {
1062		length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
	1066	length += info_rdaddwc(text, p->begin, p->end, false, cfg);
1063	1067	info_rdadd(text, L'\n');
1064	1068	if (*align.underline) {
1065	1069	while (length > 0) {

1099	1103	if (prefix) {
1100	1104	for (i = 0; i < indent; i++)
1101	1105	info_rdadd(text, L' ');
1102		e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
	1106	e = info_rdaddwc(text, prefix, NULL, false, cfg);
1103	1107	if (prefixextra)
1104	1108	e += info_rdadds(text, prefixextra);
1105	1109	/* If the prefix is too long, shorten the first line to fit. */

1121	1125	for (p = wrapping; p; p = p->next) {
1122	1126	for (i = 0; i < e; i++)
1123	1127	info_rdadd(text, L' ');
1124		info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
	1128	info_rdaddwc(text, p->begin, p->end, true, cfg);
1125	1129	info_rdadd(text, L'\n');
1126	1130	e = indent + extraindent;
1127	1131	}

1149	1153
1150	1154	static void info_versionid(info_data text, word words, infoconfig *cfg) {
1151	1155	info_rdadd(text, L'[');
1152		info_rdaddwc(text, words, NULL, FALSE, cfg);
	1156	info_rdaddwc(text, words, NULL, false, cfg);
1153	1157	info_rdadds(text, L"]\n");
1154	1158	}
1155	1159

1162	1166	n->text.charset = charset;
1163	1167	n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1164	1168	n->name = dupstr(name);
1165		n->started_menu = FALSE;
	1169	n->started_menu = false;
1166	1170
1167	1171	return n;
1168	1172	}
1169	1173
1170		static char info_node_name_core(info_data id, filepos *fpos)
	1174	static char info_node_name_core(info_data id, filepos fpos, errorstate es)
1171	1175	{
1172	1176	char p, q;
1173	1177

1178	1182	p = q = id->output.text;
1179	1183	while (*p) {
1180	1184	if (p == ':' \|\| p == ',' \|\| p == '(' \|\| p == ')') {
1181		err_infonodechar(fpos, *p);
	1185	err_infonodechar(es, fpos, *p);
1182	1186	} else {
1183	1187	q++ = p;
1184	1188	}

1189	1193	return id->output.text;
1190	1194	}
1191	1195
1192		static char info_node_name_for_para(paragraph par, infoconfig *cfg)
	1196	static char info_node_name_for_para(paragraph par, infoconfig *cfg,
	1197	errorstate *es)
1193	1198	{
1194	1199	info_data id = EMPTY_INFO_DATA;
1195	1200
1196	1201	id.charset = cfg->charset;
1197	1202	info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1198		NULL, FALSE, cfg);
1199		info_rdaddsc(&id, NULL);
1200
1201		return info_node_name_core(&id, &par->fpos);
1202		}
1203
1204		static char info_node_name_for_text(wchar_t text, infoconfig *cfg)
	1203	NULL, false, cfg);
	1204	info_rdaddsc(&id, "");
	1205
	1206	return info_node_name_core(&id, &par->fpos, es);
	1207	}
	1208
	1209	static char info_node_name_for_text(wchar_t text, infoconfig *cfg,
	1210	errorstate *es)
1205	1211	{
1206	1212	info_data id = EMPTY_INFO_DATA;
1207	1213

1209	1215	info_rdadds(&id, text);
1210	1216	info_rdaddsc(&id, NULL);
1211	1217
1212		return info_node_name_core(&id, NULL);
	1218	return info_node_name_core(&id, NULL, es);
1213	1219	}
1214	1220
1215	1221	static void info_menu_item(info_data text, node n, paragraph *p,

1233	1239	info_rdaddsc(text, "::");
1234	1240	if (p) {
1235	1241	info_rdaddc(text, ' ');
1236		info_rdaddwc(text, p->words, NULL, FALSE, cfg);
	1242	info_rdaddwc(text, p->words, NULL, false, cfg);
1237	1243	}
1238	1244	info_rdaddc(text, '\n');
1239	1245	}

1252	1258	{
1253	1259	if (!d->wcmode) {
1254	1260	d->state = charset_init_state;
1255		d->wcmode = TRUE;
	1261	d->wcmode = true;
1256	1262	}
1257	1263
1258	1264	if (wcs) {

1294	1300	rdaddsc(&d->output, buf);
1295	1301	}
1296	1302
1297		d->wcmode = FALSE;
	1303	d->wcmode = false;
1298	1304	}
1299	1305
1300	1306	if (cs) {

+43

-41

bk_man.c less more

8	8
9	9	typedef struct {
10	10	wchar_t *th;
11		int headnumbers;
	11	bool headnumbers;
12	12	int mindepth;
13	13	char *filename;
14	14	int charset;

16	16	} manconfig;
17	17
18	18	static void man_text(FILE , word ,
19		int newline, int quote_props, manconfig *conf);
	19	bool newline, int quote_props, manconfig *conf);
20	20	static void man_codepara(FILE , word , int charset);
21		static int man_convert(wchar_t const *s, int maxlen,
22		char **result, int quote_props,
23		int charset, charset_state *state);
	21	static bool man_convert(wchar_t const *s, int maxlen,
	22	char **result, int quote_props,
	23	int charset, charset_state *state);
24	24
25	25	/*
26	26	* My TROFF reference is "NROFF/TROFF User's Manual", Joseph

98	98	}
99	99
100	100	/*
101		* Return TRUE if we can represent the whole of the given string either
102		* in the output charset or as named characters; FALSE otherwise.
	101	* Return true if we can represent the whole of the given string either
	102	* in the output charset or as named characters; false otherwise.
103	103	*/
104		static int troff_ok(int charset, wchar_t *string) {
	104	static bool troff_ok(int charset, wchar_t *string) {
105	105	wchar_t test[2];
106	106	while (*string) {
107	107	test[0] = *string;
108	108	test[1] = 0;
109	109	if (!cvt_ok(charset, test) && !troffchar(*string))
110		return FALSE;
	110	return false;
111	111	string++;
112	112	}
113		return TRUE;
114		}
115
116		static manconfig man_configure(paragraph *source) {
	113	return true;
	114	}
	115
	116	static manconfig man_configure(paragraph source, errorstate es) {
117	117	paragraph *p;
118	118	manconfig ret;
119	119

121	121	* Defaults.
122	122	*/
123	123	ret.th = NULL;
124		ret.headnumbers = FALSE;
	124	ret.headnumbers = false;
125	125	ret.mindepth = 0;
126	126	ret.filename = dupstr("output.1");
127	127	ret.charset = CS_ASCII;

160	160	ret.th = snewn(ep - wp + 1, wchar_t);
161	161	memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
162	162	} else if (!ustricmp(p->keyword, L"man-charset")) {
163		ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
	163	ret.charset = charset_from_ustr(
	164	&p->fpos, uadv(p->keyword), es);
164	165	} else if (!ustricmp(p->keyword, L"man-headnumbers")) {
165	166	ret.headnumbers = utob(uadv(p->keyword));
166	167	} else if (!ustricmp(p->keyword, L"man-mindepth")) {

219	220	#define QUOTE_LITERAL 4 /* defeat special meaning of `, ', - in troff */
220	221
221	222	void man_backend(paragraph sourceform, keywordlist keywords,
222		indexdata idx, void unused) {
	223	indexdata idx, void unused, errorstate *es) {
223	224	paragraph *p;
224	225	FILE *fp;
225	226	manconfig conf;
226		int had_described_thing;
	227	bool had_described_thing;
227	228
228	229	IGNORE(unused);
229	230	IGNORE(keywords);
230	231	IGNORE(idx);
231	232
232		conf = man_configure(sourceform);
	233	conf = man_configure(sourceform, es);
233	234
234	235	/*
235	236	* Open the output file.

239	240	else
240	241	fp = fopen(conf.filename, "w");
241	242	if (!fp) {
242		err_cantopenw(conf.filename);
	243	err_cantopenw(es, conf.filename);
243	244	return;
244	245	}
245	246

247	248	for (p = sourceform; p; p = p->next)
248	249	if (p->type == para_VersionID) {
249	250	fprintf(fp, ".\\\" ");
250		man_text(fp, p->words, TRUE, 0, &conf);
	251	man_text(fp, p->words, true, 0, &conf);
251	252	}
252	253
253	254	/* Standard preamble */

272	273	}
273	274	fputc('\n', fp);
274	275
275		had_described_thing = FALSE;
	276	had_described_thing = false;
276	277	#define cleanup_described_thing do { \
277	278	if (had_described_thing) \
278	279	fprintf(fp, "\n"); \
279		had_described_thing = FALSE; \
	280	had_described_thing = false; \
280	281	} while (0)
281	282
282	283	for (p = sourceform; p; p = p->next) switch (p->type) {

316	317	else
317	318	fprintf(fp, ".SH \"");
318	319	if (conf.headnumbers && p->kwtext) {
319		man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
	320	man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf);
320	321	fprintf(fp, " ");
321	322	}
322		man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
	323	man_text(fp, p->words, false, QUOTE_QUOTES, &conf);
323	324	fprintf(fp, "\"\n");
324	325	}
325	326	break;

341	342	case para_Copyright:
342	343	cleanup_described_thing;
343	344	fprintf(fp, ".PP\n");
344		man_text(fp, p->words, TRUE, 0, &conf);
	345	man_text(fp, p->words, true, 0, &conf);
345	346	break;
346	347
347	348	/*

362	363	sfree(bullettext);
363	364	} else if (p->type == para_NumberedList) {
364	365	fprintf(fp, ".IP \"");
365		man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
	366	man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf);
366	367	fprintf(fp, "\"\n");
367	368	} else if (p->type == para_Description) {
368	369	if (had_described_thing) {

380	381	}
381	382	} else if (p->type == para_BiblioCited) {
382	383	fprintf(fp, ".IP \"");
383		man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
	384	man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf);
384	385	fprintf(fp, "\"\n");
385	386	}
386		man_text(fp, p->words, TRUE, 0, &conf);
387		had_described_thing = FALSE;
	387	man_text(fp, p->words, true, 0, &conf);
	388	had_described_thing = false;
388	389	break;
389	390
390	391	case para_DescribedThing:
391	392	cleanup_described_thing;
392	393	fprintf(fp, ".IP \"");
393		man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
	394	man_text(fp, p->words, false, QUOTE_QUOTES, &conf);
394	395	fprintf(fp, "\"\n");
395		had_described_thing = TRUE;
	396	had_described_thing = true;
396	397	break;
397	398
398	399	case para_Rule:

447	448	*
448	449	* This function also does escaping of groff special characters.
449	450	*/
450		static int man_convert(wchar_t const *s, int maxlen,
451		char **result, int quote_props,
452		int charset, charset_state *state) {
	451	static bool man_convert(wchar_t const *s, int maxlen,
	452	char **result, int quote_props,
	453	int charset, charset_state *state) {
453	454	charset_state internal_state = CHARSET_INIT_STATE;
454		int slen, err;
	455	int slen;
455	456	char p = NULL, q;
456	457	int plen = 0, psize = 0;
457	458	rdstringc out = {0, 0, NULL};
458		int anyerr = 0;
	459	bool err, anyerr = false;
459	460
460	461	if (!state)
461	462	state = &internal_state;

468	469	psize = 384;
469	470	plen = 0;
470	471	p = snewn(psize, char);
471		err = 0;
	472	err = false;
472	473
473	474	while (slen > 0) {
474	475	int ret = charset_from_unicode(&s, &slen, p, psize,

527	528	if (err) {
528	529	char const tr = troffchar(s);
529	530	if (tr == NULL)
530		anyerr = TRUE;
	531	anyerr = true;
531	532	else
532	533	rdaddsc(&out, tr);
533	534	s++; slen--;

626	627
627	628	if (removeattr(text->type) == word_Normal) {
628	629	charset_state s2 = *state;
629		int len = ustrlen(text->text), hyphen = FALSE;
	630	int len = ustrlen(text->text);
	631	bool hyphen = false;
630	632
631	633	if (text->breaks && len > 0 && text->text[len - 1] == '-') {
632	634	len--;
633		hyphen = TRUE;
	635	hyphen = true;
634	636	}
635	637	if (len == 0 \|\|
636	638	man_convert(text->text, len, &c, quote_props, conf->charset,

677	679	return quote_props;
678	680	}
679	681
680		static void man_text(FILE fp, word text, int newline,
	682	static void man_text(FILE fp, word text, bool newline,
681	683	int quote_props, manconfig *conf) {
682	684	rdstringc t = { 0, 0, NULL };
683	685	charset_state state = CHARSET_INIT_STATE;

+119

-96

bk_paper.c less more

140	140	/* Flags for render_string() */
141	141	#define RS_NOLIG 1
142	142
143		static font_data make_std_font(font_list fontlist, char const *name);
	143	static font_data make_std_font(font_list fontlist, psdata *psd,
	144	const char *name);
144	145	static void wrap_paragraph(para_data pdata, word words,
145	146	int w, int i1, int i2, paper_conf *conf);
146	147	static page_data page_breaks(line_data first, line_data *last,

153	154	static void render_para(para_data pdata, paper_conf conf,
154	155	keywordlist keywords, indexdata idx,
155	156	paragraph index_placeholder, page_data index_page);
156		static int string_width(font_data font, wchar_t const string, int *errs,
	157	static int string_width(font_data font, wchar_t const string, bool *errs,
157	158	unsigned flags);
158	159	static int paper_width_simple(para_data pdata, word text, paper_conf *conf);
159	160	static para_data code_paragraph(int indent, word words, paper_conf *conf);

173	174	word *second);
174	175	static void fold_into_page(page_data dest, page_data src, int right_shift);
175	176
176		static int fonts_ok(wchar_t *string, ...)
	177	static bool fonts_ok(wchar_t *string, ...)
177	178	{
178	179	font_data *font;
179	180	va_list ap;
180		int ret = TRUE;
	181	bool ret = true;
181	182
182	183	va_start(ap, string);
183	184	while ( (font = va_arg(ap, font_data *)) != NULL) {
184		int errs;
	185	bool errs;
185	186	(void) string_width(font, string, &errs, 0);
186	187	if (errs) {
187		ret = FALSE;
	188	ret = false;
188	189	break;
189	190	}
190	191	}

194	195	}
195	196
196	197	static void paper_cfg_fonts(font_data *fonts, font_list fontlist,
197		wchar_t wp, filepos fpos) {
	198	wchar_t wp, filepos fpos, psdata *psd,
	199	errorstate *es) {
198	200	font_data *f;
199	201	char *fn;
200	202	int i;
201	203
202	204	for (i = 0; i < NFONTS && *wp; i++, wp = uadv(wp)) {
203	205	fn = utoa_dup(wp, CS_ASCII);
204		f = make_std_font(fontlist, fn);
	206	f = make_std_font(fontlist, psd, fn);
205	207	if (f)
206	208	fonts[i] = f;
207	209	else
208	210	/* FIXME: proper error */
209		err_nofont(fpos, wp);
210		}
211		}
212
213		static paper_conf paper_configure(paragraph source, font_list fontlist) {
	211	err_nofont(es, fpos, wp);
	212	}
	213	}
	214
	215	static paper_conf paper_configure(paragraph source, font_list fontlist,
	216	psdata psd, errorstate es) {
214	217	paragraph *p;
215	218	paper_conf ret;
216	219

234	237	ret.chapter_underline_thickness = 3 * UNITS_PER_PT;
235	238	ret.rule_thickness = 1 * UNITS_PER_PT;
236	239	ret.fbase.font_size = 12;
237		ret.fbase.fonts[FONT_NORMAL] = make_std_font(fontlist, "Times-Roman");
238		ret.fbase.fonts[FONT_EMPH] = make_std_font(fontlist, "Times-Italic");
239		ret.fbase.fonts[FONT_STRONG] = make_std_font(fontlist, "Times-Bold");
240		ret.fbase.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
	240	ret.fbase.fonts[FONT_NORMAL] =
	241	make_std_font(fontlist, psd, "Times-Roman");
	242	ret.fbase.fonts[FONT_EMPH] =
	243	make_std_font(fontlist, psd, "Times-Italic");
	244	ret.fbase.fonts[FONT_STRONG] =
	245	make_std_font(fontlist, psd, "Times-Bold");
	246	ret.fbase.fonts[FONT_CODE] =
	247	make_std_font(fontlist, psd, "Courier");
241	248	ret.fcode.font_size = 12;
242		ret.fcode.fonts[FONT_NORMAL] = make_std_font(fontlist, "Courier-Bold");
243		ret.fcode.fonts[FONT_EMPH] = make_std_font(fontlist, "Courier-Oblique");
244		ret.fcode.fonts[FONT_STRONG] = make_std_font(fontlist, "Courier-Bold");
245		ret.fcode.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
	249	ret.fcode.fonts[FONT_NORMAL] =
	250	make_std_font(fontlist, psd, "Courier-Bold");
	251	ret.fcode.fonts[FONT_EMPH] =
	252	make_std_font(fontlist, psd, "Courier-Oblique");
	253	ret.fcode.fonts[FONT_STRONG] =
	254	make_std_font(fontlist, psd, "Courier-Bold");
	255	ret.fcode.fonts[FONT_CODE] =
	256	make_std_font(fontlist, psd, "Courier");
246	257	ret.ftitle.font_size = 24;
247		ret.ftitle.fonts[FONT_NORMAL] = make_std_font(fontlist, "Helvetica-Bold");
	258	ret.ftitle.fonts[FONT_NORMAL] =
	259	make_std_font(fontlist, psd, "Helvetica-Bold");
248	260	ret.ftitle.fonts[FONT_EMPH] =
249		make_std_font(fontlist, "Helvetica-BoldOblique");
	261	make_std_font(fontlist, psd, "Helvetica-BoldOblique");
250	262	ret.ftitle.fonts[FONT_STRONG] =
251		make_std_font(fontlist, "Helvetica-Bold");
252		ret.ftitle.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
	263	make_std_font(fontlist, psd,"Helvetica-Bold");
	264	ret.ftitle.fonts[FONT_CODE] =
	265	make_std_font(fontlist, psd, "Courier-Bold");
253	266	ret.fchapter.font_size = 20;
254		ret.fchapter.fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
	267	ret.fchapter.fonts[FONT_NORMAL] =
	268	make_std_font(fontlist, psd, "Helvetica-Bold");
255	269	ret.fchapter.fonts[FONT_EMPH] =
256		make_std_font(fontlist, "Helvetica-BoldOblique");
	270	make_std_font(fontlist, psd,"Helvetica-BoldOblique");
257	271	ret.fchapter.fonts[FONT_STRONG] =
258		make_std_font(fontlist, "Helvetica-Bold");
259		ret.fchapter.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
	272	make_std_font(fontlist, psd,"Helvetica-Bold");
	273	ret.fchapter.fonts[FONT_CODE] =
	274	make_std_font(fontlist, psd, "Courier-Bold");
260	275	ret.nfsect = 3;
261	276	ret.fsect = snewn(ret.nfsect, font_cfg);
262	277	ret.fsect[0].font_size = 16;
263		ret.fsect[0].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
	278	ret.fsect[0].fonts[FONT_NORMAL] =
	279	make_std_font(fontlist, psd, "Helvetica-Bold");
264	280	ret.fsect[0].fonts[FONT_EMPH] =
265		make_std_font(fontlist, "Helvetica-BoldOblique");
	281	make_std_font(fontlist, psd,"Helvetica-BoldOblique");
266	282	ret.fsect[0].fonts[FONT_STRONG] =
267		make_std_font(fontlist, "Helvetica-Bold");
268		ret.fsect[0].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
	283	make_std_font(fontlist, psd,"Helvetica-Bold");
	284	ret.fsect[0].fonts[FONT_CODE] =
	285	make_std_font(fontlist, psd, "Courier-Bold");
269	286	ret.fsect[1].font_size = 14;
270		ret.fsect[1].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
	287	ret.fsect[1].fonts[FONT_NORMAL] =
	288	make_std_font(fontlist, psd, "Helvetica-Bold");
271	289	ret.fsect[1].fonts[FONT_EMPH] =
272		make_std_font(fontlist, "Helvetica-BoldOblique");
	290	make_std_font(fontlist, psd, "Helvetica-BoldOblique");
273	291	ret.fsect[1].fonts[FONT_STRONG] =
274		make_std_font(fontlist, "Helvetica-Bold");
275		ret.fsect[1].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
	292	make_std_font(fontlist, psd, "Helvetica-Bold");
	293	ret.fsect[1].fonts[FONT_CODE] =
	294	make_std_font(fontlist, psd, "Courier-Bold");
276	295	ret.fsect[2].font_size = 13;
277		ret.fsect[2].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
	296	ret.fsect[2].fonts[FONT_NORMAL] =
	297	make_std_font(fontlist, psd, "Helvetica-Bold");
278	298	ret.fsect[2].fonts[FONT_EMPH] =
279		make_std_font(fontlist, "Helvetica-BoldOblique");
	299	make_std_font(fontlist, psd, "Helvetica-BoldOblique");
280	300	ret.fsect[2].fonts[FONT_STRONG] =
281		make_std_font(fontlist, "Helvetica-Bold");
282		ret.fsect[2].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
	301	make_std_font(fontlist, psd, "Helvetica-Bold");
	302	ret.fsect[2].fonts[FONT_CODE] =
	303	make_std_font(fontlist, psd, "Courier-Bold");
283	304	ret.contents_indent_step = 24 * UNITS_PER_PT;
284	305	ret.contents_margin = 84 * UNITS_PER_PT;
285	306	ret.leader_separation = 12 * UNITS_PER_PT;

399	420	ret.pagenum_fontsize = utoi(uadv(p->keyword));
400	421	} else if (!ustricmp(p->keyword, L"paper-base-fonts")) {
401	422	paper_cfg_fonts(ret.fbase.fonts, fontlist, uadv(p->keyword),
402		&p->fpos);
	423	&p->fpos, psd, es);
403	424	} else if (!ustricmp(p->keyword, L"paper-code-font-size")) {
404	425	ret.fcode.font_size = utoi(uadv(p->keyword));
405	426	} else if (!ustricmp(p->keyword, L"paper-code-fonts")) {
406	427	paper_cfg_fonts(ret.fcode.fonts, fontlist, uadv(p->keyword),
407		&p->fpos);
	428	&p->fpos, psd, es);
408	429	} else if (!ustricmp(p->keyword, L"paper-title-font-size")) {
409	430	ret.ftitle.font_size = utoi(uadv(p->keyword));
410	431	} else if (!ustricmp(p->keyword, L"paper-title-fonts")) {
411	432	paper_cfg_fonts(ret.ftitle.fonts, fontlist, uadv(p->keyword),
412		&p->fpos);
	433	&p->fpos, psd, es);
413	434	} else if (!ustricmp(p->keyword, L"paper-chapter-font-size")) {
414	435	ret.fchapter.font_size = utoi(uadv(p->keyword));
415	436	} else if (!ustricmp(p->keyword, L"paper-chapter-fonts")) {
416	437	paper_cfg_fonts(ret.fchapter.fonts, fontlist, uadv(p->keyword),
417		&p->fpos);
	438	&p->fpos, psd, es);
418	439	} else if (!ustricmp(p->keyword, L"paper-section-font-size")) {
419	440	wchar_t *q = uadv(p->keyword);
420	441	int n = 0;

444	465	ret.fsect[i] = ret.fsect[ret.nfsect-1];
445	466	ret.nfsect = n+1;
446	467	}
447		paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos);
	468	paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos,
	469	psd, es);
448	470	}
449	471	}
450	472	}

520	542	}
521	543
522	544	void paper_pre_backend(paragraph sourceform, keywordlist *keywords,
523		indexdata *idx) {
	545	indexdata idx, psdata psd, errorstate *es) {
524	546	paragraph *p;
525	547	document *doc;
526		int indent, used_contents;
	548	int indent;
	549	bool used_contents;
527	550	para_data pdata, firstpara = NULL, *lastpara = NULL;
528	551	para_data firstcont, lastcont;
529	552	line_data firstline, lastline, firstcontline, lastcontline;
530	553	page_data *pages;
531	554	font_list *fontlist;
532	555	paper_conf *conf, ourconf;
533		int has_index;
	556	bool has_index;
534	557	int pagenum;
535	558	paragraph index_placeholder_para;
536		page_data *first_index_page;
537
538		init_std_fonts();
	559	page_data *first_index_page = NULL;
	560
	561	init_std_fonts(psd);
539	562	fontlist = snew(font_list);
540	563	fontlist->head = fontlist->tail = NULL;
541	564
542		ourconf = paper_configure(sourceform, fontlist);
	565	ourconf = paper_configure(sourceform, fontlist, psd, es);
543	566	conf = &ourconf;
544	567
545	568	/*

550	573	int i;
551	574	indexentry *entry;
552	575
553		has_index = FALSE;
	576	has_index = false;
554	577
555	578	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
556	579	paper_idx *pi = snew(paper_idx);
557	580
558		has_index = TRUE;
	581	has_index = true;
559	582
560	583	pi->words = pi->lastword = NULL;
561	584	pi->lastpage = NULL;

662	685	* Do the main paragraph formatting.
663	686	*/
664	687	indent = 0;
665		used_contents = FALSE;
	688	used_contents = false;
666	689	firstline = lastline = NULL;
667	690	for (p = sourceform; p; p = p->next) {
668	691	p->private_data = NULL;

752	775	* contents section in before it.
753	776	*/
754	777	if (!used_contents && pdata->outline_level > 0) {
755		used_contents = TRUE;
	778	used_contents = true;
756	779	if (lastpara)
757	780	lastpara->next = firstcont;
758	781	else

1055	1078	doc->pages = pages;
1056	1079	doc->paper_width = conf->paper_width;
1057	1080	doc->paper_height = conf->paper_height;
	1081	doc->psd = psd;
1058	1082
1059	1083	/*
1060	1084	* Collect the section heading paragraphs into a document

1318	1342	ptype == para_Chapter \|\|
1319	1343	ptype == para_Appendix \|\|
1320	1344	ptype == para_UnnumberedChapter) {
1321		pdata->first->page_break = TRUE;
	1345	pdata->first->page_break = true;
1322	1346	pdata->first->space_before = conf->chapter_top_space;
1323	1347	pdata->last->space_after +=
1324	1348	(conf->chapter_underline_depth +

1345	1369	ldata->space_after = conf->base_para_spacing / 2;
1346	1370	else
1347	1371	ldata->space_after = conf->base_leading / 2;
1348		ldata->page_break = FALSE;
	1372	ldata->page_break = false;
1349	1373	}
1350	1374	}
1351	1375

1387	1411	return sme;
1388	1412	}
1389	1413
1390		static int new_sfmap_cmp(void a, void b)
1391		{
1392		glyph ga = (glyph )a;
1393		subfont_map_entry *sb = b;
	1414	static int new_sfmap_cmp(const void a, const void b, void *cmpctx)
	1415	{
	1416	const glyph ga = (const glyph )a;
	1417	const subfont_map_entry *sb = b;
1394	1418	glyph gb = sb->subfont->vector[sb->position];
1395	1419
1396	1420	if (ga < gb) return -1;

1403	1427	subfont_map_entry *sme;
1404	1428	int c;
1405	1429
1406		sme = find234(font->subfont_map, &g, new_sfmap_cmp);
	1430	sme = findcmp234(font->subfont_map, &g, new_sfmap_cmp, NULL);
1407	1431	if (sme) return sme;
1408	1432
1409	1433	/*

1419	1443	return encode_glyph_at(g, u, font->latest_subfont, c);
1420	1444	}
1421	1445
1422		static int sfmap_cmp(void a, void b)
1423		{
1424		subfont_map_entry sa = a, sb = b;
	1446	static int sfmap_cmp(const void a, const void b, void *cmpctx)
	1447	{
	1448	const subfont_map_entry sa = a, sb = b;
1425	1449	glyph ga = sa->subfont->vector[sa->position];
1426	1450	glyph gb = sb->subfont->vector[sb->position];
1427	1451

1430	1454	return 0;
1431	1455	}
1432	1456
1433		int width_cmp(void a, void b)
	1457	int width_cmp(const void a, const void b, void *cmpctx)
1434	1458	{
1435	1459	glyph_width const wa = a, wb = b;
1436	1460

1441	1465	return 0;
1442	1466	}
1443	1467
1444		int kern_cmp(void a, void b)
	1468	int kern_cmp(const void a, const void b, void *cmpctx)
1445	1469	{
1446	1470	kern_pair const ka = a, kb = b;
1447	1471

1456	1480	return 0;
1457	1481	}
1458	1482
1459		int lig_cmp(void a, void b)
	1483	int lig_cmp(const void a, const void b, void *cmpctx)
1460	1484	{
1461	1485	ligature const la = a, lb = b;
1462	1486

1475	1499	return (u < 0 \|\| u > 0xFFFF ? NOGLYPH : fi->bmp[u]);
1476	1500	}
1477	1501
1478		void listfonts(void) {
	1502	void listfonts(psdata *psd) {
1479	1503	font_info const *fi;
1480	1504
1481		init_std_fonts();
1482		for (fi = all_fonts; fi; fi = fi->next)
	1505	init_std_fonts(psd);
	1506	for (fi = psd->all_fonts; fi; fi = fi->next)
1483	1507	printf("%s\n", fi->name);
1484	1508	}
1485	1509
1486		static font_data make_std_font(font_list fontlist, char const *name)
	1510	static font_data make_std_font(font_list fontlist, psdata *psd,
	1511	const char *name)
1487	1512	{
1488	1513	font_info const *fi;
1489	1514	font_data *f;

1494	1519	if (strcmp(fe->font->info->name, name) == 0)
1495	1520	return fe->font;
1496	1521
1497		for (fi = all_fonts; fi; fi = fi->next)
	1522	for (fi = psd->all_fonts; fi; fi = fi->next)
1498	1523	if (strcmp(fi->name, name) == 0) break;
1499	1524	if (!fi) return NULL;
1500	1525

1502	1527
1503	1528	f->list = fontlist;
1504	1529	f->info = fi;
1505		f->subfont_map = newtree234(sfmap_cmp);
	1530	f->subfont_map = newtree234(sfmap_cmp, NULL);
1506	1531
1507	1532	/*
1508	1533	* Our first subfont will contain all of US-ASCII. This isn't

1531	1556	glyph_width const *w;
1532	1557
1533	1558	wantw.glyph = index;
1534		w = find234(font->info->widths, &wantw, NULL);
	1559	w = find234(font->info->widths, &wantw);
1535	1560	if (!w) return 0;
1536	1561	return w->width;
1537	1562	}

1545	1570	return 0;
1546	1571	wantkp.left = lindex;
1547	1572	wantkp.right = rindex;
1548		kp = find234(font->info->kerns, &wantkp, NULL);
	1573	kp = find234(font->info->kerns, &wantkp);
1549	1574	if (kp == NULL)
1550	1575	return 0;
1551	1576	return kp->kern;

1560	1585	return NOGLYPH;
1561	1586	wantlig.left = lindex;
1562	1587	wantlig.right = rindex;
1563		lig = find234(font->info->ligs, &wantlig, NULL);
	1588	lig = find234(font->info->ligs, &wantlig);
1564	1589	if (lig == NULL)
1565	1590	return NOGLYPH;
1566	1591	return lig->lig;
1567	1592	}
1568	1593
1569		static int string_width(font_data font, wchar_t const string, int *errs,
	1594	static int string_width(font_data font, wchar_t const string, bool *errs,
1570	1595	unsigned flags)
1571	1596	{
1572	1597	int width = 0;
1573	1598	int nindex, index, oindex, lindex;
1574	1599
1575	1600	if (errs)
1576		*errs = 0;
	1601	*errs = false;
1577	1602
1578	1603	oindex = NOGLYPH;
1579	1604	index = utoglyph(font->info, *string);

1582	1607
1583	1608	if (index == NOGLYPH) {
1584	1609	if (errs)
1585		*errs = 1;
	1610	*errs = true;
1586	1611	} else {
1587	1612	if (!(flags & RS_NOLIG) &&
1588	1613	(lindex = find_lig(font, index, nindex)) != NOGLYPH) {

1618	1643	static int paper_width_internal(void vctx, word word, int *nspaces)
1619	1644	{
1620	1645	struct paper_width_ctx ctx = (struct paper_width_ctx )vctx;
1621		int style, type, findex, width, errs;
	1646	int style, type, findex, width;
	1647	bool errs;
1622	1648	wchar_t *str;
1623	1649	unsigned flags = 0;
1624	1650

2102	2128	keywordlist keywords, indexdata idx, paper_conf *conf)
2103	2129	{
2104	2130	while (text && text != text_end) {
2105		int style, type, findex, errs;
	2131	int style, type, findex;
	2132	bool errs;
2106	2133	wchar_t *str;
2107	2134	xref_dest dest;
2108	2135	unsigned flags = 0;

2323	2350	xr = NULL;
2324	2351
2325	2352	{
2326		int extra_indent, shortfall, spaces;
	2353	int extra_indent = 0, shortfall = 0, spaces = 0;
2327	2354	int just = ldata->pdata->justification;
2328	2355
2329	2356	/*

2337	2364	case JUST:
2338	2365	shortfall = ldata->hshortfall;
2339	2366	spaces = ldata->nspaces;
2340		extra_indent = 0;
2341		break;
2342		case LEFT:
2343		shortfall = spaces = extra_indent = 0;
2344	2367	break;
2345	2368	case RIGHT:
2346		shortfall = spaces = 0;
2347	2369	extra_indent = ldata->real_shortfall;
2348	2370	break;
2349	2371	}

2382	2404	cxref = NULL;
2383	2405	cxref_page = NULL;
2384	2406
	2407	assert(pdata->first);
2385	2408	for (ldata = pdata->first; ldata; ldata = ldata->next) {
2386	2409	/*
2387	2410	* If this is a contents entry, we expect to have a single

2560	2583	w->text = snewn(t-start+1, wchar_t);
2561	2584	memcpy(w->text, start, (t-start) * sizeof(wchar_t));
2562	2585	w->text[t-start] = '\0';
2563		w->breaks = FALSE;
	2586	w->breaks = false;
2564	2587	w->aux = 0;
2565	2588
2566	2589	if (ltail)

2734	2757	ret->alt = NULL;
2735	2758	ret->type = word_Normal;
2736	2759	ret->text = ustrdup(text);
2737		ret->breaks = FALSE;
	2760	ret->breaks = false;
2738	2761	ret->aux = 0;
2739	2762	return ret;
2740	2763	}

2746	2769	ret->alt = NULL;
2747	2770	ret->type = word_WhiteSpace;
2748	2771	ret->text = NULL;
2749		ret->breaks = TRUE;
	2772	ret->breaks = true;
2750	2773	ret->aux = 0;
2751	2774	return ret;
2752	2775	}

2758	2781	ret->alt = NULL;
2759	2782	ret->type = word_PageXref;
2760	2783	ret->text = NULL;
2761		ret->breaks = FALSE;
	2784	ret->breaks = false;
2762	2785	ret->aux = 0;
2763	2786	ret->private_data = page;
2764	2787	return ret;

2771	2794	ret->alt = NULL;
2772	2795	ret->type = word_XrefEnd;
2773	2796	ret->text = NULL;
2774		ret->breaks = FALSE;
	2797	ret->breaks = false;
2775	2798	ret->aux = 0;
2776	2799	return ret;
2777	2800	}

2779	2802	static word prepare_contents_title(word first, wchar_t *separator,
2780	2803	word *second)
2781	2804	{
2782		word *ret;
	2805	word *ret = NULL;
2783	2806	word *wptr, w;
2784	2807
2785	2808	wptr = &ret;

+21

-16

bk_pdf.c less more

34	34	static void objref(object o, object dest);
35	35	static void objdest(object o, page_data p);
36	36
37		static int is_std_font(char const *name);
	37	static bool is_std_font(char const *name);
38	38
39	39	static void make_pages_node(object node, object parent, page_data *first,
40	40	page_data last, object resources,
41	41	object *mediabox);
42	42	static int make_outline(object parent, outline_element start, int n,
43		int open);
	43	bool open);
44	44	static int pdf_versionid(FILE fp, word words);
45	45
46	46	void pdf_backend(paragraph sourceform, keywordlist keywords,
47		indexdata idx, void vdoc) {
	47	indexdata idx, void vdoc, errorstate *es) {
48	48	document doc = (document )vdoc;
49	49	int font_index;
50	50	font_encoding *fe;

130	130	char fname[40];
131	131	char buf[80];
132	132	int i, prev;
133		object font, fontdesc;
	133	object font, fontdesc = NULL;
134	134	int flags;
135	135	font_info const *fi = fe->font->info;
136	136

300	300	objtext(cidfont, fe->font->info->name);
301	301	objtext(cidfont, "\n/CIDSystemInfo<</Registry(Adobe)"
302	302	"/Ordering(Identity)/Supplement 0>>\n");
303		objtext(cidfont, "/FontDescriptor ");
304		objref(cidfont, fontdesc);
	303	assert(fontdesc); /* TrueType fonts are never standard */
	304	objtext(cidfont, "/FontDescriptor ");
	305	objref(cidfont, fontdesc);
305	306	objtext(cidfont, "\n/W[0[");
306	307	for (i = 0; i < (int)sfnt_nglyphs(fe->font->info->fontfile); i++) {
307	308	char buf[20];

327	328	objtext(font, buf);
328	329	}
329	330	objtext(font, i % 8 ? "/" : "\n/");
330		objtext(font, glyph_extern(fe->vector[i]));
	331	objtext(font, glyph_extern(doc->psd, fe->vector[i]));
331	332	prev = i;
332	333	}
333	334

368	369	size_t len;
369	370	char *ffbuf;
370	371
371		pf_part1((font_info *)fi, &ffbuf, &len);
	372	pf_part1((font_info *)fi, &ffbuf, &len, es);
372	373	objstream_len(fontfile, ffbuf, len);
373	374	sfree(ffbuf);
374	375	sprintf(buf, "<<\n/Length1 %lu\n", (unsigned long)len);
375	376	objtext(fontfile, buf);
376		pf_part2((font_info *)fi, &ffbuf, &len);
	377	pf_part2((font_info *)fi, &ffbuf, &len, es);
377	378	objstream_len(fontfile, ffbuf, len);
378	379	sfree(ffbuf);
379	380	sprintf(buf, "/Length2 %lu\n", (unsigned long)len);

605	606
606	607	objtext(outlines, "<<\n/Type /Outlines\n");
607	608	topcount = make_outline(outlines, doc->outline_elements,
608		doc->n_outline_elements, TRUE);
	609	doc->n_outline_elements, true);
609	610	sprintf(buf, "/Count %d\n>>\n", topcount);
610	611	objtext(outlines, buf);
611	612	}

671	672	else
672	673	fp = fopen(filename, "wb");
673	674	if (!fp) {
674		err_cantopenw(filename);
	675	err_cantopenw(es, filename);
675	676	return;
676	677	}
677	678

783	784	"Symbol", "ZapfDingbats"
784	785	};
785	786
786		static int is_std_font(char const *name) {
	787	static bool is_std_font(char const *name) {
787	788	unsigned i;
788	789	for (i = 0; i < lenof(stdfonts); i++)
789	790	if (strcmp(name, stdfonts[i]) == 0)
790		return TRUE;
791		return FALSE;
	791	return true;
	792	return false;
792	793	}
793	794
794	795	static void make_pages_node(object node, object parent, page_data *first,

829	830	for (i = 0; i < TREE_BRANCH; i++) {
830	831	int number = (i+1) * count / TREE_BRANCH - i * count / TREE_BRANCH;
831	832	thisfirst = page;
	833	thislast = NULL;
832	834	while (number--) {
833	835	thislast = page;
834	836	page = page->next;
835	837	}
	838	assert(thislast);
836	839
837	840	if (thisfirst == thislast) {
838	841	objref(node, (object *)thisfirst->spare);

912	915	}
913	916
914	917	static int make_outline(object parent, outline_element items, int n,
915		int open)
	918	bool open)
916	919	{
917	920	int level, totalcount = 0;
918	921	outline_element *itemp;

964	967
965	968	if (itemp > items) {
966	969	char buf[80];
967		int count = make_outline(curr, items, itemp - items, FALSE);
	970	int count = make_outline(curr, items, itemp - items, false);
968	971	if (!open)
969	972	count = -count;
970	973	else

1020	1023	case word_Quote:
1021	1024	text = dupstr("'");
1022	1025	break;
	1026	default:
	1027	continue;
1023	1028	}
1024	1029
1025	1030	fputs(text, fp);

+6

-4

bk_ps.c less more

21	21	}
22	22
23	23	void ps_backend(paragraph sourceform, keywordlist keywords,
24		indexdata idx, void vdoc) {
	24	indexdata idx, void vdoc, errorstate *es) {
25	25	document doc = (document )vdoc;
26	26	int font_index;
27	27	font_encoding *fe;

52	52	else
53	53	fp = fopen(filename, "w");
54	54	if (!fp) {
55		err_cantopenw(filename);
	55	err_cantopenw(es, filename);
56	56	return;
57	57	}
58	58

204	204	if (fe->font->info->filetype == TYPE1)
205	205	pf_writeps(fe->font->info, fp);
206	206	else
207		sfnt_writeps(fe->font->info, fp);
	207	sfnt_writeps(fe->font->info, fp, doc->psd, es);
208	208	fprintf(fp, "%%%%EndResource\n");
209	209	} else {
210	210	fprintf(fp, "%%%%IncludeResource: font %s\n",

228	228	ps_token(fp, &cc, "{1 index /FID ne {def} {pop pop} ifelse} forall\n");
229	229	ps_token(fp, &cc, "/Encoding [\n");
230	230	for (i = 0; i < 256; i++)
231		ps_token(fp, &cc, "/%s", glyph_extern(fe->vector[i]));
	231	ps_token(fp, &cc, "/%s", glyph_extern(doc->psd, fe->vector[i]));
232	232	ps_token(fp, &cc, "] def\n");
233	233	ps_token(fp, &cc, "currentdict end\n");
234	234	ps_token(fp, &cc, "/fontname-%s exch definefont /%s exch def\n",

343	343	case word_Quote:
344	344	text = dupstr("'");
345	345	break;
	346	default:
	347	continue;
346	348	}
347	349
348	350	if (cc + strlen(text) > PS_MAXWIDTH)

+20

-17

bk_text.c less more

9	9	typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
10	10	typedef struct {
11	11	alignment align;
12		int number_at_all, just_numbers;
	12	bool number_at_all, just_numbers;
13	13	wchar_t *underline;
14	14	wchar_t *number_suffix;
15	15	} alignstruct;

20	20	int width;
21	21	alignstruct atitle, achapter, *asect;
22	22	int nasect;
23		int include_version_id;
24		int indent_preambles;
	23	bool include_version_id;
	24	bool indent_preambles;
25	25	int charset;
26	26	word bullet;
27	27	wchar_t lquote, rquote, *rule;

33	33	FILE *fp;
34	34	int charset;
35	35	charset_state state;
	36	errorstate *es;
36	37	} textfile;
37	38
38	39	static void text_heading(textfile , word , word , word , alignstruct,

54	55	return LEFT;
55	56	}
56	57
57		static textconfig text_configure(paragraph *source) {
	58	static textconfig text_configure(paragraph source, errorstate es) {
58	59	textconfig ret;
59	60	paragraph *p;
60	61	int n;

65	66	ret.bullet.next = NULL;
66	67	ret.bullet.alt = NULL;
67	68	ret.bullet.type = word_Normal;
68		ret.atitle.just_numbers = FALSE; /* ignored */
69		ret.atitle.number_at_all = TRUE; /* ignored */
	69	ret.atitle.just_numbers = false; /* ignored */
	70	ret.atitle.number_at_all = true; /* ignored */
70	71
71	72	/*
72	73	* Defaults.

79	80	ret.atitle.align = CENTRE;
80	81	ret.atitle.underline = L"\x2550\0=\0\0";
81	82	ret.achapter.align = LEFT;
82		ret.achapter.just_numbers = FALSE;
83		ret.achapter.number_at_all = TRUE;
	83	ret.achapter.just_numbers = false;
	84	ret.achapter.number_at_all = true;
84	85	ret.achapter.number_suffix = L": ";
85	86	ret.achapter.underline = L"\x203E\0-\0\0";
86	87	ret.nasect = 1;
87	88	ret.asect = snewn(ret.nasect, alignstruct);
88	89	ret.asect[0].align = LEFTPLUS;
89		ret.asect[0].just_numbers = TRUE;
90		ret.asect[0].number_at_all = TRUE;
	90	ret.asect[0].just_numbers = true;
	91	ret.asect[0].number_at_all = true;
91	92	ret.asect[0].number_suffix = L" ";
92	93	ret.asect[0].underline = L"\0";
93		ret.include_version_id = TRUE;
94		ret.indent_preambles = FALSE;
	94	ret.include_version_id = true;
	95	ret.indent_preambles = false;
95	96	ret.bullet.text = L"\x2022\0-\0\0";
96	97	ret.rule = L"\x2500\0-\0\0";
97	98	ret.filename = dupstr("output.txt");

130	131	if (!ustricmp(p->keyword, L"text-indent")) {
131	132	ret.indent = utoi(uadv(p->keyword));
132	133	} else if (!ustricmp(p->keyword, L"text-charset")) {
133		ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
	134	ret.charset = charset_from_ustr(
	135	&p->fpos, uadv(p->keyword), es);
134	136	} else if (!ustricmp(p->keyword, L"text-filename")) {
135	137	sfree(ret.filename);
136	138	ret.filename = dupstr(adv(p->origkeyword));

317	319	}
318	320
319	321	void text_backend(paragraph sourceform, keywordlist keywords,
320		indexdata idx, void unused) {
	322	indexdata idx, void unused, errorstate *es) {
321	323	paragraph *p;
322	324	textconfig conf;
323	325	word prefix, body, *wp;

331	333	IGNORE(keywords); /* we don't happen to need this */
332	334	IGNORE(idx); /* or this */
333	335
334		conf = text_configure(sourceform);
	336	conf = text_configure(sourceform, es);
335	337
336	338	/*
337	339	* Open the output file.

341	343	else
342	344	tf.fp = fopen(conf.filename, "w");
343	345	if (!tf.fp) {
344		err_cantopenw(conf.filename);
	346	err_cantopenw(es, conf.filename);
345	347	return;
346	348	}
347	349	tf.charset = conf.charset;
	350	tf.es = es;
348	351	tf.state = charset_init_state;
349	352
350	353	/* Do the title */

781	784	for (; text; text = text->next) if (text->type == word_WeakCode) {
782	785	int wid = ustrwid(text->text, tf->charset);
783	786	if (wid > width)
784		err_text_codeline(&text->fpos, wid, width);
	787	err_text_codeline(tf->es, &text->fpos, wid, width);
785	788	text_output_many(tf, indent, L' ');
786	789	text_output(tf, text->text);
787	790	text_output(tf, L"\n");

+19

-19

bk_whlp.c less more

49	49	static void whlp_rdadds(rdstringc rs, const wchar_t text, whlpconf *conf,
50	50	charset_state *state);
51	51	static void whlp_mkparagraph(struct bk_whlp_state *state,
52		int font, word *text, int subsidiary,
	52	int font, word *text, bool subsidiary,
53	53	whlpconf *conf);
54	54	static void whlp_navmenu(struct bk_whlp_state state, paragraph p,
55	55	whlpconf *conf);

149	149	}
150	150
151	151	void whlp_backend(paragraph sourceform, keywordlist keywords,
152		indexdata idx, void unused) {
	152	indexdata idx, void unused, errorstate *es) {
153	153	WHLP h;
154	154	char *cntname;
155	155	paragraph p, lastsect;

158	158	int i;
159	159	int nesting;
160	160	indexentry *ie;
161		int done_contents_topic = FALSE;
	161	bool done_contents_topic = false;
162	162	whlpconf conf;
163	163
164	164	IGNORE(unused);

206	206	{
207	207	int len = strlen(conf.filename);
208	208	if (len < 4 \|\| conf.filename[len-4] != '.' \|\|
209		tolower(conf.filename[len-3] != 'h') \|\|
210		tolower(conf.filename[len-2] != 'l') \|\|
211		tolower(conf.filename[len-1] != 'p')) {
	209	tolower(conf.filename[len-3]) != 'h' \|\|
	210	tolower(conf.filename[len-2]) != 'l' \|\|
	211	tolower(conf.filename[len-1]) != 'p') {
212	212	char *newf;
213	213	newf = snewn(len + 5, char);
214	214	sprintf(newf, "%s.hlp", conf.filename);

222	222
223	223	state.cntfp = fopen(cntname, "wb");
224	224	if (!state.cntfp) {
225		err_cantopenw(cntname);
	225	err_cantopenw(es, cntname);
226	226	return;
227	227	}
228	228	state.cnt_last_level = -1; state.cnt_workaround = 0;

249	249	p->private_data = whlp_register_topic(h, rs.text, &errstr);
250	250	if (!p->private_data) {
251	251	p->private_data = whlp_register_topic(h, NULL, NULL);
252		err_winhelp_ctxclash(&p->fpos, rs.text, errstr);
	252	err_winhelp_ctxclash(es, &p->fpos, rs.text, errstr);
253	253	}
254	254	sfree(rs.text);
255	255	}

341	341	if (p->type == para_Title) {
342	342	whlp_begin_para(h, WHLP_PARA_NONSCROLL);
343	343	state.cstate = charset_init_state;
344		whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE, &conf);
	344	whlp_mkparagraph(&state, FONT_TITLE, p->words, false, &conf);
345	345	whlp_wtext(&state, NULL);
346	346	whlp_end_para(h);
347	347	whlp_rdaddwc(&rs, p->words, &conf, NULL);

430	430	whlp_navmenu(&state, p, &conf);
431	431	}
432	432
433		done_contents_topic = TRUE;
	433	done_contents_topic = true;
434	434	}
435	435
436	436	if (lastsect && lastsect->child) {

512	512	whlp_begin_para(h, WHLP_PARA_NONSCROLL);
513	513	state.cstate = charset_init_state;
514	514	if (p->kwtext) {
515		whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, FALSE, &conf);
	515	whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, false, &conf);
516	516	whlp_set_font(h, FONT_TITLE);
517	517	whlp_wtext(&state, conf.sectsuffix);
518	518	}
519		whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE, &conf);
	519	whlp_mkparagraph(&state, FONT_TITLE, p->words, false, &conf);
520	520	whlp_wtext(&state, NULL);
521	521	whlp_end_para(h);
522	522

562	562	if (p->type == para_Bullet) {
563	563	whlp_wtext(&state, conf.bullet);
564	564	} else {
565		whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE, &conf);
	565	whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, false, &conf);
566	566	whlp_wtext(&state, conf.listsuffix);
567	567	}
568	568	whlp_wtext(&state, NULL);

576	576	state.cstate = charset_init_state;
577	577
578	578	if (p->type == para_BiblioCited) {
579		whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE, &conf);
	579	whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, false, &conf);
580	580	whlp_wtext(&state, L" ");
581	581	}
582	582
583		whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE, &conf);
	583	whlp_mkparagraph(&state, FONT_NORMAL, p->words, false, &conf);
584	584	whlp_wtext(&state, NULL);
585	585	whlp_end_para(h);
586	586	break;

686	686	whlp_start_hyperlink(state->h, (WHLP_TOPIC)p->private_data);
687	687	state->cstate = charset_init_state;
688	688	if (p->kwtext) {
689		whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, TRUE, conf);
	689	whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, true, conf);
690	690	whlp_set_font(state->h, FONT_NORMAL);
691	691	whlp_wtext(state, conf->sectsuffix);
692	692	}
693		whlp_mkparagraph(state, FONT_NORMAL, p->words, TRUE, conf);
	693	whlp_mkparagraph(state, FONT_NORMAL, p->words, true, conf);
694	694	whlp_wtext(state, NULL);
695	695	whlp_end_hyperlink(state->h);
696	696	whlp_end_para(state->h);

698	698	}
699	699
700	700	static void whlp_mkparagraph(struct bk_whlp_state *state,
701		int font, word *text, int subsidiary,
	701	int font, word *text, bool subsidiary,
702	702	whlpconf *conf) {
703	703	keyword *kwl;
704	704	int deffont = font;

788	788	if (cvt_ok(conf->charset, text->text) \|\| !text->alt)
789	789	whlp_wtext(state, text->text);
790	790	else
791		whlp_mkparagraph(state, deffont, text->alt, FALSE, conf);
	791	whlp_mkparagraph(state, deffont, text->alt, false, conf);
792	792	} else if (removeattr(text->type) == word_WhiteSpace) {
793	793	whlp_wtext(state, L" ");
794	794	} else if (removeattr(text->type) == word_Quote) {

+28

-0

charset/.gitignore less more

	0	/*.o
	1	/sbcsdat.c
	2	/sbcsdat.h
	3	/convcs
	4	/cstable
	5	/confuse
	6	/csshow
	7	/libcharset.a
	8	.deps
	9	.ninja_deps
	10	.ninja_log
	11	/Makefile
	12	/Makefile.in
	13	/aclocal.m4
	14	/autom4te.cache/
	15	/compile
	16	/configure
	17	/depcomp
	18	/install-sh
	19	/missing
	20	/stamp-h1
	21	/config.log
	22	/config.status
	23	/CMakeCache.txt
	24	/CMakeFiles
	25	/cmake_install.cmake
	26	/build.ninja
	27	/rules.ninja

+43

-0

charset/CMakeLists.txt less more

	0	# CMake-based build system.
	1
	2	# I don't want to have to edit the master list of source files in more
	3	# than one place, so let's get CMake to use its built-in file and
	4	# string handling abilities to read the list out of Makefile.am
	5	# alongside it.
	6
	7	cmake_minimum_required(VERSION 3.0)
	8	project(libcharset LANGUAGES C)
	9
	10	file(READ ${CMAKE_CURRENT_SOURCE_DIR}/Makefile.am MAKEFILE_AM)
	11	string(REPLACE "\\\n" " " MAKEFILE_AM ${MAKEFILE_AM})
	12	string(REGEX MATCHALL "[^ \n]* = [^\n]*" MAKEFILE_AM_DEFS ${MAKEFILE_AM})
	13
	14	foreach(MAKEFILE_AM_DEF ${MAKEFILE_AM_DEFS})
	15	if(${MAKEFILE_AM_DEF} MATCHES "^([a-z][^ ])_SOURCES = (.)")
	16	set(TARGET ${CMAKE_MATCH_1})
	17	string(REGEX MATCHALL "[^ ]*\\.c" SOURCES ${CMAKE_MATCH_2})
	18	if ("libcharset_a" STREQUAL ${TARGET})
	19	add_library(charset STATIC ${SOURCES}
	20	${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h)
	21	target_include_directories(charset PRIVATE
	22	${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
	23	elseif(NOT LIBCHARSET_LIBRARY_ONLY)
	24	add_executable(${TARGET} ${SOURCES}
	25	${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h)
	26	target_include_directories(${TARGET} PRIVATE
	27	${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
	28	target_link_libraries(${TARGET} charset)
	29	endif()
	30	endif()
	31	endforeach()
	32
	33	add_custom_command(OUTPUT sbcsdat.c
	34	COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/sbcsgen.pl
	35	${CMAKE_CURRENT_SOURCE_DIR}/sbcs.dat
	36	--source=${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.c
	37	DEPENDS sbcsgen.pl sbcs.dat)
	38	add_custom_command(OUTPUT sbcsdat.h
	39	COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/sbcsgen.pl
	40	${CMAKE_CURRENT_SOURCE_DIR}/sbcs.dat
	41	--header=${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h
	42	DEPENDS sbcsgen.pl sbcs.dat)

+0

-252

~~charset/Makefile~~ less more

0		# -- make --
1		#
2		# Makefile for libcharset.
3
4		# This Makefile should be sufficient to build libcharset and its
5		# demo application all on its own. However, it's also a valid
6		# Makefile _fragment_ which can be linked in to another program
7		# Makefile to allow libcharset to be built directly into its
8		# binary.
9
10		# To include this as part of another Makefile, you need to:
11		#
12		# - Define $(LIBCHARSET_SRCDIR) to be a directory prefix (i.e.
13		# probably ending in a slash) which allows access to the
14		# libcharset source files.
15		#
16		# - Define $(LIBCHARSET_OBJDIR) to be a directory prefix (i.e.
17		# probably ending in a slash) which allows access to the
18		# directory where the libcharset object files need to be put.
19		#
20		# - Define $(LIBCHARSET_OBJPFX) to be a filename prefix to be
21		# applied to the libcharset object files (in case, for example,
22		# the file names clash with those of the main application, and
23		# you need to call them cs-*.o to resolve the clash).
24		#
25		# - Define $(LIBCHARSET_GENPFX) to be a prefix to be added to
26		# targets such as `all' and `clean'. (Mostly the point of this
27		# is to get those targets out of the way for the Makefile
28		# fragment including us.)
29		#
30		# - If you need your compiler to use the -MD flag, define $(MD) to
31		# be `-MD'.
32		#
33		# This Makefile fragment will then define rules for building each
34		# object file, and will in turn define $(LIBCHARSET_OBJS) to be
35		# what you need to add to your link line.
36
37		$(LIBCHARSET_GENPFX)all: \
38		$(LIBCHARSET_OBJDIR)libcharset.a \
39		$(LIBCHARSET_OBJDIR)convcs \
40		$(LIBCHARSET_OBJDIR)cstable \
41		$(LIBCHARSET_OBJDIR)confuse \
42		$(LIBCHARSET_OBJDIR)csshow
43
44		$(LIBCHARSET_OBJDIR)convcs: $(LIBCHARSET_SRCDIR)convcs.c \
45		$(LIBCHARSET_OBJDIR)libcharset.a
46		$(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)convcs \
47		$(LIBCHARSET_SRCDIR)convcs.c \
48		$(LIBCHARSET_OBJDIR)libcharset.a
49
50		$(LIBCHARSET_OBJDIR)cstable: $(LIBCHARSET_SRCDIR)cstable.c \
51		$(LIBCHARSET_OBJDIR)libcharset.a \
52		$(LIBCHARSET_OBJDIR)sbcsdat.c
53		$(CC) $(CFLAGS) -I $(LIBCHARSET_OBJDIR). \
54		-o $(LIBCHARSET_OBJDIR)cstable \
55		$(LIBCHARSET_SRCDIR)cstable.c \
56		$(LIBCHARSET_OBJDIR)libcharset.a
57
58		$(LIBCHARSET_OBJDIR)confuse: $(LIBCHARSET_SRCDIR)confuse.c \
59		$(LIBCHARSET_OBJDIR)libcharset.a
60		$(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)confuse \
61		$(LIBCHARSET_SRCDIR)confuse.c \
62		$(LIBCHARSET_OBJDIR)libcharset.a
63
64		$(LIBCHARSET_OBJDIR)csshow: $(LIBCHARSET_SRCDIR)csshow.c \
65		$(LIBCHARSET_OBJDIR)libcharset.a
66		$(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)csshow \
67		$(LIBCHARSET_SRCDIR)csshow.c \
68		$(LIBCHARSET_OBJDIR)libcharset.a
69
70		LIBCHARSET_OBJS = \
71		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5enc.o \
72		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5set.o \
73		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cns11643.o \
74		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cp949.o \
75		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)emacsenc.o \
76		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)euc.o \
77		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)fromucs.o \
78		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)gb2312.o \
79		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)htmlcs.o \
80		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)hz.o \
81		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022.o \
82		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022s.o \
83		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)istate.o \
84		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0208.o \
85		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0212.o \
86		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)ksx1001.o \
87		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)locale.o \
88		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)localenc.o \
89		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)macenc.o \
90		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)mimeenc.o \
91		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcs.o \
92		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcsdat.o \
93		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)shiftjis.o \
94		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)slookup.o \
95		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)superset.o \
96		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)toucs.o \
97		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf16.o \
98		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf7.o \
99		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf8.o \
100		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)xenc.o \
101		# end of list
102
103		$(LIBCHARSET_OBJDIR)libcharset.a: $(LIBCHARSET_OBJS)
104		ar rcs $@ $(LIBCHARSET_OBJS)
105
106		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5enc.o: \
107		$(LIBCHARSET_SRCDIR)big5enc.c
108		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
109
110		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5set.o: \
111		$(LIBCHARSET_SRCDIR)big5set.c
112		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
113
114		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cns11643.o: \
115		$(LIBCHARSET_SRCDIR)cns11643.c
116		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
117
118		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cp949.o: \
119		$(LIBCHARSET_SRCDIR)cp949.c
120		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
121
122		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)emacsenc.o: \
123		$(LIBCHARSET_SRCDIR)emacsenc.c
124		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
125
126		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)euc.o: \
127		$(LIBCHARSET_SRCDIR)euc.c
128		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
129
130		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)fromucs.o: \
131		$(LIBCHARSET_SRCDIR)fromucs.c
132		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
133
134		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)gb2312.o: \
135		$(LIBCHARSET_SRCDIR)gb2312.c
136		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
137
138		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)htmlcs.o: \
139		$(LIBCHARSET_SRCDIR)htmlcs.c
140		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
141
142		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)hz.o: \
143		$(LIBCHARSET_SRCDIR)hz.c
144		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
145
146		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022.o: \
147		$(LIBCHARSET_SRCDIR)iso2022.c \
148		$(LIBCHARSET_OBJDIR)sbcsdat.h
149		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $<
150
151		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022s.o: \
152		$(LIBCHARSET_SRCDIR)iso2022s.c \
153		$(LIBCHARSET_OBJDIR)sbcsdat.h
154		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $<
155
156		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)istate.o: \
157		$(LIBCHARSET_SRCDIR)istate.c
158		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
159
160		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0208.o: \
161		$(LIBCHARSET_SRCDIR)jisx0208.c
162		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
163
164		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0212.o: \
165		$(LIBCHARSET_SRCDIR)jisx0212.c
166		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
167
168		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)ksx1001.o: \
169		$(LIBCHARSET_SRCDIR)ksx1001.c
170		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
171
172		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)locale.o: \
173		$(LIBCHARSET_SRCDIR)locale.c
174		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
175
176		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)localenc.o: \
177		$(LIBCHARSET_SRCDIR)localenc.c
178		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
179
180		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)macenc.o: \
181		$(LIBCHARSET_SRCDIR)macenc.c
182		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
183
184		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)mimeenc.o: \
185		$(LIBCHARSET_SRCDIR)mimeenc.c
186		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
187
188		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcs.o: \
189		$(LIBCHARSET_SRCDIR)sbcs.c
190		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
191
192		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)shiftjis.o: \
193		$(LIBCHARSET_SRCDIR)shiftjis.c
194		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
195
196		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)slookup.o: \
197		$(LIBCHARSET_SRCDIR)slookup.c \
198		$(LIBCHARSET_OBJDIR)sbcsdat.c
199		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $<
200
201		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)superset.o: \
202		$(LIBCHARSET_SRCDIR)superset.c
203		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
204
205		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)toucs.o: \
206		$(LIBCHARSET_SRCDIR)toucs.c
207		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
208
209		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf16.o: \
210		$(LIBCHARSET_SRCDIR)utf16.c
211		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
212
213		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf7.o: \
214		$(LIBCHARSET_SRCDIR)utf7.c
215		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
216
217		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf8.o: \
218		$(LIBCHARSET_SRCDIR)utf8.c
219		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
220
221		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)xenc.o: \
222		$(LIBCHARSET_SRCDIR)xenc.c
223		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
224
225		# This object file is special, because its source file is itself
226		# generated - and therefore goes in the object directory.
227
228		$(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcsdat.o: \
229		$(LIBCHARSET_OBJDIR)sbcsdat.c
230		$(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
231
232		$(LIBCHARSET_OBJDIR)sbcsdat.c: \
233		$(LIBCHARSET_SRCDIR)sbcs.dat \
234		$(LIBCHARSET_SRCDIR)sbcsgen.pl
235		perl $(LIBCHARSET_SRCDIR)sbcsgen.pl \
236		$(LIBCHARSET_SRCDIR)sbcs.dat \
237		--source=$(LIBCHARSET_OBJDIR)sbcsdat.c
238
239		$(LIBCHARSET_OBJDIR)sbcsdat.h: \
240		$(LIBCHARSET_SRCDIR)sbcs.dat \
241		$(LIBCHARSET_SRCDIR)sbcsgen.pl
242		perl $(LIBCHARSET_SRCDIR)sbcsgen.pl \
243		$(LIBCHARSET_SRCDIR)sbcs.dat \
244		--header=$(LIBCHARSET_OBJDIR)sbcsdat.h
245
246		$(LIBCHARSET_GENPFX)clean:
247		rm -f $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)*.o \
248		$(LIBCHARSET_OBJDIR)libcharset.a \
249		$(LIBCHARSET_OBJDIR)sbcsdat.c \
250		$(LIBCHARSET_OBJDIR)sbcsdat.h \
251		$(LIBCHARSET_OBJDIR)convcs

+30

-0

charset/Makefile.am less more

	0	bin_PROGRAMS = convcs cstable csshow confuse
	1	lib_LIBRARIES = libcharset.a
	2
	3	convcs_SOURCES = convcs.c
	4	convcs_LDADD = libcharset.a
	5
	6	cstable_SOURCES = cstable.c
	7	cstable_LDADD = libcharset.a
	8
	9	csshow_SOURCES = csshow.c
	10	csshow_LDADD = libcharset.a
	11
	12	confuse_SOURCES = confuse.c
	13	confuse_LDADD = libcharset.a
	14
	15	libcharset_a_SOURCES = charset.h internal.h sbcsdat.h big5enc.c \
	16	big5set.c cns11643.c cp949.c emacsenc.c enum.h euc.c fromucs.c \
	17	gb2312.c htmlcs.c hz.c iso2022.c iso2022s.c iso6937.c istate.c \
	18	jisx0208.c jisx0212.c ksx1001.c locale.c localenc.c macenc.c \
	19	mimeenc.c sbcs.c sbcsdat.c shiftjis.c slookup.c superset.c toucs.c \
	20	utf16.c utf7.c utf8.c xenc.c
	21
	22	BUILT_SOURCES = sbcsdat.c sbcsdat.h
	23	CLEANFILES = sbcsdat.c sbcsdat.h
	24	sbcsdat.c: sbcsgen.pl sbcs.dat
	25	perl $^ --source=sbcsdat.c
	26	sbcsdat.h: sbcsgen.pl sbcs.dat
	27	perl $^ --header=sbcsdat.h
	28
	29	EXTRA_DIST = sbcsgen.pl sbcs.dat

+2

-0

charset/autogen.sh less more

	0	#!/bin/sh
	1	autoreconf -i && rm -rf autom4te.cache

+7

-7

charset/big5enc.c less more

56	56	* charset_state.
57	57	*/
58	58
59		static int write_big5(charset_spec const *charset, long int input_chr,
60		charset_state *state,
61		void (emit)(void ctx, long int output), void *emitctx)
	59	static bool write_big5(charset_spec const *charset, long int input_chr,
	60	charset_state *state,
	61	void (emit)(void ctx, long int output), void *emitctx)
62	62	{
63	63	UNUSEDARG(charset);
64	64	UNUSEDARG(state);
65	65
66	66	if (input_chr == -1)
67		return TRUE; /* stateless; no cleanup required */
	67	return true; /* stateless; no cleanup required */
68	68
69	69	if (input_chr < 0x80) {
70	70	emit(emitctx, input_chr);
71		return TRUE;
	71	return true;
72	72	} else {
73	73	int r, c;
74	74	if (unicode_to_big5(input_chr, &r, &c)) {
75	75	emit(emitctx, r + 0xA1);
76	76	emit(emitctx, c + 0x40);
77		return TRUE;
	77	return true;
78	78	} else {
79		return FALSE;
	79	return false;
80	80	}
81	81	}
82	82	}

+4

-4

charset/big5set.c less more

3964	3964	return big5_forward[r][c];
3965	3965	}
3966	3966
3967		/* This one returns 1 on success, 0 if the code point doesn't exist. */
3968		int unicode_to_big5(long int unicode, int r, int c)
	3967	/* This one returns true on success, false if the code point doesn't exist. */
	3968	bool unicode_to_big5(long int unicode, int r, int c)
3969	3969	{
3970	3970	int rr, cc;
3971	3971	long int uu;

3985	3985	else {
3986	3986	*r = rr;
3987	3987	*c = cc;
3988		return 1;
	3988	return true;
3989	3989	}
3990	3990	}
3991		return 0;
	3991	return false;
3992	3992	}
3993	3993
3994	3994	#ifdef TESTMODE

+33

-12

charset/charset.h less more

6	6	#define charset_charset_h
7	7
8	8	#include <stddef.h>
	9	#include <stdbool.h>
9	10
10	11	/*
11	12	* Enumeration that lists all the multibyte or single-byte

78	79	CS_UTF16,
79	80	CS_UTF16BE,
80	81	CS_UTF16LE,
	82	CS_UTF16BE_NO_BOM,
	83	CS_UTF16LE_NO_BOM,
81	84	CS_EUC_JP,
82	85	CS_EUC_CN,
83	86	CS_EUC_KR,

94	97	CS_BS4730,
95	98	CS_DEC_GRAPHICS,
96	99	CS_EUC_TW,
	100	CS_ISO6937,
	101	CS_ISO6937_EURO,
	102	CS_ITS,
	103	CS_SAIL,
97	104	CS_LIMIT /* dummy value indicating extent of enum */
98	105	} charset_t;
99	106

107	114	* charset_state mystate = CHARSET_INIT_STATE;
108	115	*/
109	116	#define CHARSET_INIT_STATE { 0L, 0L } /* a suitable initialiser */
	117
	118	#if defined __cplusplus
	119	extern "C" {
	120	#if 0
	121	}
	122	#endif
	123	#endif
110	124
111	125	/*
112	126	* This external variable contains the same data, but is provided

167	181	* If `error' is non-NULL and a character is found which cannot be
168	182	* expressed in the output charset, conversion will terminate at
169	183	* that character (so `input' points to the offending character)
170		* and `*error' will be set to TRUE; if `error' is non-NULL and no
	184	* and `*error' will be set to true; if `error' is non-NULL and no
171	185	* difficult characters are encountered, `*error' will be set to
172		* FALSE. If `error' is NULL, difficult characters will simply be
	186	* false. If `error' is NULL, difficult characters will simply be
173	187	* ignored.
174	188	*
175	189	* If `input' is NULL, this routine will output the necessary bytes

186	200
187	201	int charset_from_unicode(const wchar_t *input, int inlen,
188	202	char *output, int outlen,
189		int charset, charset_state state, int error);
	203	int charset, charset_state state, bool error);
190	204
191	205	/*
192	206	* Convert X11 encoding names to and from our charset identifiers.

233	247	int charset_upgrade(int charset);
234	248
235	249	/*
236		* This function returns TRUE if the input charset is a vaguely
237		* sensible superset of ASCII. That is, it returns FALSE for 7-bit
	250	* This function returns true if the input charset is a vaguely
	251	* sensible superset of ASCII. That is, it returns false for 7-bit
238	252	* encoding formats such as HZ and UTF-7.
239	253	*/
240		int charset_contains_ascii(int charset);
241
242		/*
243		* This function returns TRUE if the input charset is single-byte.
244		*/
245		int charset_is_single_byte(int charset);
	254	bool charset_contains_ascii(int charset);
	255
	256	/*
	257	* This function returns true if the input charset is single-byte.
	258	*/
	259	bool charset_is_single_byte(int charset);
246	260
247	261	/*
248	262	* This function tries to deduce the CS_* identifier of the charset

284	298	* if (charset_exists(cs))
285	299	* do_stuff_with(cs);
286	300	*/
287		int charset_exists(int charset);
	301	bool charset_exists(int charset);
	302
	303	#if defined __cplusplus
	304	#if 0
	305	{
	306	#endif
	307	}
	308	#endif
288	309
289	310	#endif /* charset_charset_h */

+4

-4

charset/cns11643.c less more

13019	13019	return cns11643_forward((p94+r)94+c);
13020	13020	}
13021	13021
13022		/* This one returns 1 on success, 0 if the code point doesn't exist. */
13023		int unicode_to_cns11643(long int unicode, int p, int r, int *c)
	13022	/* This one returns true on success, false if the code point doesn't exist. */
	13023	bool unicode_to_cns11643(long int unicode, int p, int r, int *c)
13024	13024	{
13025	13025	int index, pp, rr, cc;
13026	13026	long int uu;

13044	13044	*p = pp;
13045	13045	*r = rr;
13046	13046	*c = cc;
13047		return 1;
	13047	return true;
13048	13048	}
13049	13049	}
13050		return 0;
	13050	return false;
13051	13051	}
13052	13052
13053	13053	#ifdef TESTMODE

+16

-0

charset/configure.ac less more

	0	# autoconf input for libcharset.
	1
	2	AC_INIT([libcharset], [NOVERSION], [anakin@pobox.com])
	3	AC_CONFIG_SRCDIR([charset.h])
	4	AC_CONFIG_AUX_DIR([.])
	5
	6	AM_INIT_AUTOMAKE(foreign)
	7
	8	AC_PROG_CC
	9	AC_PROG_RANLIB
	10	AC_PROG_INSTALL
	11
	12	AC_LANG([C])
	13
	14	AC_CONFIG_FILES([Makefile])
	15	AC_OUTPUT

+7

-5

charset/confuse.c less more

38	38	char p = ++argv;
39	39	char *orig = p;
40	40	char *end;
41		int base = 16, semi_ok = 0;
	41	int base = 16;
	42	bool semi_ok = false;
42	43	wchar_t ch;
43	44
44	45	if ((p[0] == 'U' \|\| p[0] == 'u') &&

52	53	p++;
53	54	else
54	55	base = 10;
55		semi_ok = 1;
56		} else if (mbtowc(&ch, p, strlen(p)) == strlen(p)) {
	56	semi_ok = true;
	57	} else if ((size_t)mbtowc(&ch, p, strlen(p)) == strlen(p)) {
57	58	chars[nchars++] = ch;
58	59	continue;
59	60	}

73	74	for (i = 0; i < nchars; i++) {
74	75	wchar_t inbuf[1];
75	76	const wchar_t *inptr;
76		int inlen, error, ret;
	77	int inlen, ret;
	78	bool error;
77	79
78	80	if (!charset_exists(cs)) {
79	81	encodings[i*CS_LIMIT+cs].len = 0;

83	85	inbuf[0] = chars[i];
84	86	inptr = inbuf;
85	87	inlen = 1;
86		error = 0;
	88	error = false;
87	89	ret = charset_from_unicode(&inptr, &inlen,
88	90	encodings[i*CS_LIMIT+cs].string,
89	91	MAXENCLEN, cs, NULL, &error);

+40

-26

charset/convcs.c less more

62	62	fputs(helptext, fp);
63	63	}
64	64
65		int match_long_opt(const char argument, const char optname, const char **val)
66		{
67		int optlen = strlen(optname);
	65	bool match_long_opt(const char argument, const char optname,
	66	const char **val)
	67	{
	68	size_t optlen = strlen(optname);
68	69	if (strcspn(argument, "=") != optlen)
69		return 0; /* not the right length to match */
	70	return false; /* not the right length to match */
70	71	if (memcmp(argument, optname, optlen) != 0)
71		return 0; /* doesn't match the leading text */
	72	return false; /* doesn't match the leading text */
72	73	if (argument[optlen])
73	74	*val = argument + optlen + 1;
74	75	else
75	76	*val = NULL;
76		return 1;
	77	return true;
77	78	}
78	79
79	80	static int srcset = CS_NONE;
80	81	static int dstset = CS_NONE;
81		static int html_mode = 0;
	82	static bool html_mode = false;
82	83	static const wchar_t *replacement_cooked = NULL;
83	84	static int replacement_cooked_len = 0;
84	85
85	86	int main(int argc, char **argv)
86	87	{
87		int doing_opts = 1;
	88	bool doing_opts = true;
88	89	int localeset;
89		charset_state instate = CHARSET_INIT_STATE;
90		charset_state outstate = CHARSET_INIT_STATE;
91		char inbuf[256], outbuf[256];
92		wchar_t midbuf[256];
93	90	const char *replacement_raw = NULL;
94	91	const char *inptr;
95		const wchar_t *midptr;
96		int rdlen, inlen, midlen, inret, midret;
	92	int inlen;
97	93	const char *infilename = NULL;
98	94
99	95	setlocale(LC_CTYPE, "");

104	100	const char *v;
105	101	if (*p == '-' && p[1] && doing_opts) {
106	102	if (!strcmp(p, "--")) {
107		doing_opts = 0;
	103	doing_opts = false;
108	104	} else if (match_long_opt(p, "--help", &v)) {
109	105	help(stdout);
110	106	return 0;

120	116	}
121	117	replacement_raw = v;
122	118	} else if (match_long_opt(p, "--html", &v)) {
123		html_mode = 1;
	119	html_mode = true;
124	120	} else {
125	121	fprintf(stderr, "convcs: unrecognised option '%s'\n", p);
126	122	return 1;
127	123	}
128	124	} else {
129		int cs;
130
131	125	if (srcset == CS_NONE) {
132	126	srcset = !strcmp(p, "-")? localeset : charset_from_localenc(p);
133	127	if (srcset == CS_NONE) {

220	214	if (html_srcset != CS_NONE) {
221	215	const char *output_cs_name = charset_to_mimeenc(dstset);
222	216	srcset = html_srcset;
223		assert(namepos + namelen <= rdret);
	217	assert(namepos + namelen <= (size_t)rdret);
224	218	convert_got_data(inbuf, namepos);
225	219	convert_got_data(output_cs_name, strlen(output_cs_name));
226	220	convert_got_data(inbuf + namepos + namelen,

230	224	}
231	225	}
232	226
233		while (1) {
234		if (!fgets(inbuf, sizeof(inbuf), infile))
235		break; /* EOF */
236
237		convert_got_data(inbuf, strlen(inbuf));
	227	bool eof = false;
	228	while (!eof) {
	229	/*
	230	* Manual loop on getc which has the feature of fgets that we
	231	* stop if we see a newline (so that when convcs is run
	232	* interactively in a terminal it will deliver each translated
	233	* line promptly), but also has the feature of fread that it
	234	* provides the correct buffer length even in the face of NUL
	235	* bytes in the input.
	236	*/
	237
	238	size_t nread = 0;
	239	while (nread < lenof(inbuf)) {
	240	int c = getc(infile);
	241	if (c == EOF) {
	242	eof = true;
	243	break;
	244	}
	245	inbuf[nread++] = c;
	246	if (c == '\n')
	247	break;
	248	}
	249
	250	if (nread)
	251	convert_got_data(inbuf, nread);
238	252	}
239	253	convert_done();
240	254	return 0;

263	277	lenof(midbuf), srcset,
264	278	&instate, replacement_cooked,
265	279	replacement_cooked_len)) > 0) {
266		int error;
	280	bool error;
267	281
268	282	midlen = inret;
269	283	midptr = midbuf;

276	290	while ( (midret = charset_from_unicode(&midptr, &midlen, outbuf,
277	291	lenof(outbuf), dstset,
278	292	&outstate, &error)) > 0 \|\|
279		error != 0) {
	293	error) {
280	294	fwrite(outbuf, 1, midret, stdout);
281	295	if (error) {
282	296	const wchar_t *repl_ptr = replacement_cooked;

+8

-8

charset/cp949.c less more

56	56	* charset_state.
57	57	*/
58	58
59		static int write_cp949(charset_spec const *charset, long int input_chr,
60		charset_state *state,
61		void (emit)(void ctx, long int output),
62		void *emitctx)
	59	static bool write_cp949(charset_spec const *charset, long int input_chr,
	60	charset_state *state,
	61	void (emit)(void ctx, long int output),
	62	void *emitctx)
63	63	{
64	64	UNUSEDARG(charset);
65	65	UNUSEDARG(state);
66	66
67	67	if (input_chr == -1)
68		return TRUE; /* stateless; no cleanup required */
	68	return true; /* stateless; no cleanup required */
69	69
70	70	if (input_chr < 0x80) {
71	71	emit(emitctx, input_chr);
72		return TRUE;
	72	return true;
73	73	} else {
74	74	int r, c;
75	75	if (unicode_to_cp949(input_chr, &r, &c)) {
76	76	emit(emitctx, r + 0x80);
77	77	emit(emitctx, c + 0x40);
78		return TRUE;
	78	return true;
79	79	} else {
80		return FALSE;
	80	return false;
81	81	}
82	82	}
83	83	}

+171

-36

charset/csshow.c less more

11	11	* terminal window, of course.
12	12	*
13	13	* Possible extra features:
14		* - configurable row len and table size.
	14	* - configurable row length.
15	15	* - option to disambiguate the various classes of failure in the
16	16	* output, e.g. if terminfo gives us control sequences to change
17	17	* colours then we could colour the missing characters differently

22	22	* of undisplayability. (In particular, don't forget to turn off
23	23	* the early exit when nothing in the range is printable at
24	24	* all.)
25		* - ability to display sub-blocks of multibyte encodings such as
26		* EUCs. But that would need some thought about how to sensibly
27		* index those tables.
28	25	*/
29	26
30		#define _XOPEN_SOURCE 500 /* for wcwidth and snprintf */
	27	/*
	28	* Feature macros I've found necessary to make the standard headers
	29	* declare wcwidth and snprintf (on various systems).
	30	*/
	31	#define _XOPEN_SOURCE 500
	32	#define _C99_SOURCE
31	33
32	34	#include <assert.h>
33	35	#include <stdio.h>

47	49	#include "charset.h"
48	50
49	51	static const char *helptext =
50		"usage: csshow ( CHARSET \| BASE-UNICODE-VALUE )\n"
51		" e.g.: csshow Win1252\n"
52		" csshow U+2500\n"
	52	"usage: csshow CHARSET-NAME [ ENCODING-PREFIX-BYTE... ]\n"
	53	" e.g.: csshow Win1252 show a whole single-byte charset\n"
	54	" csshow Shift-JIS show all single-byte chars in a "
	55	"multibyte charset\n"
	56	" csshow Shift-JIS 9C show all chars encoded as 9C xx in "
	57	"Shift-JIS\n\n"
	58	" or: csshow BASE-UNICODE-VALUE [ +RANGE-LENGTH \| END-UNICODE-VALUE ]\n"
	59	" e.g.: csshow U+2500 show 0x100 characters starting at U+2500 "
	60	"inclusive\n"
	61	" e.g.: csshow U+2500 +128 show a different number of characters\n"
	62	" e.g.: csshow U+2500 +0x80 same effect, but you can write the length "
	63	"in hex\n"
	64	" csshow U+2500 U+2580 or specify the (non-inclusive) range "
	65	"endpoint\n\n"
53	66	" also: csshow --help display this help text\n"
54	67	;
55	68

62	75	BAD_CHAR_IN_SOURCE_CHARSET,
63	76	BAD_CHAR_IN_OUTPUT_CHARSET,
64	77	UNPRINTABLE_CHAR,
	78	MULTIBYTE_INTRODUCER,
65	79	FIRST_PRINTABLE_VALUE,
66	80	COMBINING_CHAR = FIRST_PRINTABLE_VALUE,
67	81	WIDE_PRINTABLE_CHAR,

72	86	char buf[7]; /* maximum even theoretical UTF-8 code length, plus NUL */
73	87	};
74	88
	89	struct buf {
	90	char *data;
	91	size_t size, len;
	92	};
	93	static char buf_add_space(struct buf buf, size_t space)
	94	{
	95	char *toret;
	96
	97	if (buf->size - buf->len < space) {
	98	buf->size = (buf->len + space) * 5 / 4 + 64;
	99	buf->data = realloc(buf->data, buf->size);
	100	if (!buf->data) {
	101	fprintf(stderr, "csshow: out of memory\n");
	102	exit(1);
	103	}
	104	}
	105
	106	toret = buf->data + buf->len;
	107	buf->len += space;
	108	return toret;
	109	}
	110
	111	static enum Trans try_translate_from_source(
	112	const char in, int inlen, int charset, wchar_t wc_out)
	113	{
	114	const char *cp;
	115	int clen, ret0, ret1;
	116
	117	cp = in;
	118	clen = inlen;
	119	ret1 = charset_to_unicode(&cp, &clen, wc_out, 1, charset, NULL, L"?", 1);
	120
	121	cp = in;
	122	clen = inlen;
	123	ret0 = charset_to_unicode(&cp, &clen, wc_out, 1, charset, NULL, L"", 0);
	124
	125	if (ret0 == 1 && ret1 == 1) {
	126	/* Successful translation into Unicode */
	127	return NORMAL_PRINTABLE_CHAR;
	128	} else if (ret0 == 0 && ret1 == 0) {
	129	/* No output, even _with_ a replacement character
	130	* defined for bad chars, means the input
	131	* character has been absorbed into the charset
	132	* state but not _yet_ generated any output or
	133	* discovered an error. In other words, this is a
	134	* multibyte introducer. */
	135	return MULTIBYTE_INTRODUCER;
	136	} else {
	137	return BAD_CHAR_IN_SOURCE_CHARSET;
	138	}
	139	}
	140
75	141	int main(int argc, char **argv)
76	142	{
77		int doing_opts = 1;
	143	bool doing_opts = true;
78	144	int source_charset = CS_ASCII, output_charset = CS_NONE;
79	145	unsigned long base = 0, size = 0x100, rowlen = 0x10;
	146	struct buf prefix = { NULL, 0, 0 };
	147	enum ArgsState {
	148	AS_INITIAL,
	149	AS_UNICODE_ENDRANGE,
	150	AS_MBCS_PREFIX,
	151	AS_DONE
	152	} args_state = AS_INITIAL;
80	153
81	154	while (--argc > 0) {
82	155	const char p = ++argv;
83	156	if (*p == '-' && doing_opts) {
84	157	if (!strcmp(p, "--")) {
85		doing_opts = 0;
	158	doing_opts = false;
86	159	} else if (!strcmp(p, "--help")) {
87	160	help(stdout);
88	161	return 0;

90	163	fprintf(stderr, "csshow: unrecognised option '%s'\n", p);
91	164	return 1;
92	165	}
93		} else {
	166	} else if (args_state == AS_INITIAL) {
	167	/*
	168	* First argument can be a Unicode code point or a
	169	* single-byte charset name.
	170	*/
	171
94	172	int cs;
95	173
96	174	if (toupper((unsigned char)p[0]) == 'U' &&
97	175	(p[1] == '-' \|\| p[1] == '+')) {
98	176	source_charset = CS_NONE; /* means just translate Unicode */
99	177	base = strtoul(p+2, NULL, 16);
	178	args_state = AS_UNICODE_ENDRANGE;
100	179	} else if ((cs = charset_from_localenc(p)) != CS_NONE) {
101		if (!charset_is_single_byte(cs)) {
102		fprintf(stderr, "csshow: cannot display multibyte"
103		" charset %s\n", charset_to_localenc(cs));
104		return 1;
105		}
106	180	source_charset = cs;
107	181	base = 0;
	182	args_state = AS_MBCS_PREFIX;
108	183	} else {
109	184	fprintf(stderr, "csshow: unrecognised argument '%s'\n", p);
110	185	return 1;
111	186	}
	187	} else if (args_state == AS_UNICODE_ENDRANGE) {
	188	/*
	189	* If the first argument was a Unicode code point, then
	190	* the next argument is taken to be an end point for the
	191	* range, so that you can print larger ranges than 256
	192	* characters.
	193	*/
	194
	195	if (toupper((unsigned char)p[0]) == 'U' &&
	196	(p[1] == '-' \|\| p[1] == '+')) {
	197	/* U+XXXX / U-XXXXXXXX specify the end code point of
	198	* the range. (Exclusive.) */
	199	size = strtoul(p+2, NULL, 16) - base;
	200	} else if (p[0] == '+') {
	201	/* +NNNN specifies the size of the range. We use
	202	* strtoul in base 0 so that decimal or 0xHEX are both
	203	* accepted. */
	204	size = strtoul(p+1, NULL, 0);
	205	}
	206
	207	/* No further arguments expected. */
	208	args_state = AS_DONE;
	209
	210	} else if (args_state == AS_MBCS_PREFIX) {
	211	/*
	212	* If the first argument was a charset name, then further
	213	* arguments are taken to be hex byte values to accumulate
	214	* into an encoding prefix. This allows you to say, for
	215	* example, 'csshow Shift-JIS 89' to see the slice of the
	216	* Shift-JIS encoding consisting of characters whose first
	217	* encoding byte is 0x89, indexed by their second byte.
	218	*/
	219
	220	*buf_add_space(&prefix, 1) = strtoul(p, NULL, 16);
	221	} else {
	222	fprintf(stderr, "csshow: extra argument '%s' unexpected\n", p);
	223	return 1;
112	224	}
113	225	}
114	226

123	235	struct translated_char *trans;
124	236	const char *rowheadfmt;
125	237	int rowheadwidth, colwidth;
126		int printed_a_line, skipped_a_line;
	238	bool printed_a_line, skipped_a_line;
127	239	unsigned long i, j;
	240	enum Trans transret;
	241	char *suffix_position = NULL;
	242	wchar_t wc;
	243
	244	if (source_charset != CS_NONE) {
	245	/*
	246	* First, check that the prefix doesn't already form a
	247	* completed character or an error.
	248	*/
	249	transret = try_translate_from_source(
	250	prefix.data, prefix.len, source_charset, &wc);
	251	if (transret == BAD_CHAR_IN_SOURCE_CHARSET) {
	252	fprintf(stderr, "csshow: prefix sequence is not valid\n");
	253	return 1;
	254	} else if (transret != MULTIBYTE_INTRODUCER) {
	255	fprintf(stderr, "csshow: prefix sequence generates output\n");
	256	return 1;
	257	}
	258
	259	/*
	260	* Make space in the prefix buffer to put each test byte on
	261	* the end.
	262	*/
	263	suffix_position = buf_add_space(&prefix, 1);
	264	}
128	265
129	266	trans = malloc(size * sizeof(struct translated_char));
130	267	if (!trans) {

138	275	*/
139	276	for (i = 0; i < size; i++) {
140	277	unsigned long charcode = base + i;
141		wchar_t wc;
142	278
143	279	trans[i].buf[0] = '\0';
144	280
145	281	if (source_charset == CS_NONE) {
146	282	wc = charcode;
147	283	} else {
148		char c = charcode;
149		const char *cp = &c;
150		int clen = 1;
151		int error = 0;
152
153		int ret = charset_to_unicode(
154		&cp, &clen, &wc, 1, source_charset, NULL, L"", 0);
155		if (ret != 1) {
156		trans[i].type = BAD_CHAR_IN_SOURCE_CHARSET;
	284	*suffix_position = charcode;
	285	transret = try_translate_from_source(
	286	prefix.data, prefix.len, source_charset, &wc);
	287
	288	if (transret != NORMAL_PRINTABLE_CHAR) {
	289	trans[i].type = transret;
157	290	continue;
158	291	}
159	292	}

161	294	{
162	295	const wchar_t *wcp = &wc;
163	296	int wclen = 1;
164		int error = 0;
	297	bool error = false;
165	298
166	299	int ret = charset_from_unicode(
167	300	&wcp, &wclen, trans[i].buf, sizeof(trans[i].buf) - 1,
168	301	output_charset, NULL, &error);
169	302
170		assert(ret < sizeof(trans[i].buf));
	303	assert(0 <= ret);
	304	assert((size_t)ret < sizeof(trans[i].buf));
171	305	trans[i].buf[ret] = '\0';
172	306
173	307	if (wclen != 0 \|\| ret == 0 \|\| error) {

267	401	printf("%-*X", colwidth, (unsigned)i);
268	402	printf("\n");
269	403
270		printed_a_line = skipped_a_line = 0;
	404	printed_a_line = false;
	405	skipped_a_line = false;
271	406
272	407	for (j = 0; j < size; j += rowlen) {
273	408	/* See if we're skipping this row completely. */
274		int skip = 1;
	409	bool skip = true;
275	410	for (i = 0; i < rowlen && j+i < size; i++)
276	411	if (trans[j+i].type >= FIRST_PRINTABLE_VALUE)
277		skip = 0;
	412	skip = false;
278	413	if (skip) {
279		skipped_a_line = 1;
	414	skipped_a_line = true;
280	415	continue;
281	416	}
282	417

288	423	if (skipped_a_line && printed_a_line) {
289	424	printf("\n");
290	425	}
291		skipped_a_line = 0;
292
293		printed_a_line = 1;
	426	skipped_a_line = false;
	427
	428	printed_a_line = true;
294	429	printf(rowheadfmt, (unsigned)(base + j));;
295	430	for (i = 0; i < rowlen && j+i < size; i++) {
296	431	int chars_left = colwidth;

+9

-8

charset/cstable.c less more

14	14	#include "sbcsdat.h"
15	15
16	16	#define ENUM_CHARSET(x) extern charset_spec const charset_##x;
17		#include "enum.c"
	17	#include "enum.h"
18	18	#undef ENUM_CHARSET
19	19	static charset_spec const *const cs_table[] = {
20	20	#define ENUM_CHARSET(x) &charset_##x,
21		#include "enum.c"
	21	#include "enum.h"
22	22	#undef ENUM_CHARSET
23	23	};
24	24	static const char *const cs_names[] = {
25	25	#define ENUM_CHARSET(x) #x,
26		#include "enum.c"
	26	#include "enum.h"
27	27	#undef ENUM_CHARSET
28	28	};
29	29
30	30	int main(int argc, char **argv)
31	31	{
32	32	long int c;
33		int internal_names = FALSE;
34		int verbose = FALSE;
	33	bool internal_names = false;
	34	bool verbose = false;
35	35
36	36	while (--argc) {
37	37	char p = ++argv;
38	38	if (!strcmp(p, "-i"))
39		internal_names = TRUE;
	39	internal_names = true;
40	40	else if (!strcmp(p, "-v"))
41		verbose = TRUE;
	41	verbose = true;
42	42	}
43	43
44	44	for (c = 0; c < 0x30000; c++) {
45		int i, plane, row, col, chr;
	45	int plane, row, col, chr;
	46	size_t i;
46	47	char const *sep = "";
47	48
48	49	printf("U+%04x:", (unsigned)c);

+0

-28

~~charset/enum.c~~ less more

0		/*
1		* enum.c - enumerate all charsets defined by the library.
2		*
3		* This file maintains a list of every other source file which
4		* contains ENUM_CHARSET definitions. It #includes each one with
5		* ENUM_CHARSETS defined, which causes those source files to do
6		* nothing at all except call the ENUM_CHARSET macro on each
7		* charset they define.
8		*
9		* This file in turn is included from various other places, with
10		* the ENUM_CHARSET macro defined to various different things. This
11		* allows us to have multiple implementations of the master charset
12		* lookup table (a static one and a dynamic one).
13		*/
14
15		#define ENUM_CHARSETS
16		#include "sbcsdat.c"
17		#include "utf8.c"
18		#include "utf7.c"
19		#include "utf16.c"
20		#include "euc.c"
21		#include "iso2022.c"
22		#include "iso2022s.c"
23		#include "big5enc.c"
24		#include "shiftjis.c"
25		#include "hz.c"
26		#include "cp949.c"
27		#undef ENUM_CHARSETS

+29

-0

charset/enum.h less more

	0	/*
	1	* enum.h - enumerate all charsets defined by the library.
	2	*
	3	* This file maintains a list of every other source file which
	4	* contains ENUM_CHARSET definitions. It #includes each one with
	5	* ENUM_CHARSETS defined, which causes those source files to do
	6	* nothing at all except call the ENUM_CHARSET macro on each
	7	* charset they define.
	8	*
	9	* This file in turn is included from various other places, with
	10	* the ENUM_CHARSET macro defined to various different things. This
	11	* allows us to have multiple implementations of the master charset
	12	* lookup table (a static one and a dynamic one).
	13	*/
	14
	15	#define ENUM_CHARSETS
	16	#include "sbcsdat.c"
	17	#include "utf8.c"
	18	#include "utf7.c"
	19	#include "utf16.c"
	20	#include "euc.c"
	21	#include "iso2022.c"
	22	#include "iso2022s.c"
	23	#include "big5enc.c"
	24	#include "shiftjis.c"
	25	#include "hz.c"
	26	#include "cp949.c"
	27	#include "iso6937.c"
	28	#undef ENUM_CHARSETS

+7

-7

charset/euc.c less more

92	92	* charset_state.
93	93	*/
94	94
95		static int write_euc(charset_spec const *charset, long int input_chr,
96		charset_state *state,
97		void (emit)(void ctx, long int output), void *emitctx)
	95	static bool write_euc(charset_spec const *charset, long int input_chr,
	96	charset_state *state,
	97	void (emit)(void ctx, long int output), void *emitctx)
98	98	{
99	99	struct euc const euc = (struct euc )charset->data;
100	100	unsigned long c;

103	103	UNUSEDARG(state);
104	104
105	105	if (input_chr == -1)
106		return TRUE; /* stateless; no cleanup required */
	106	return true; /* stateless; no cleanup required */
107	107
108	108	/* ASCII is the easy bit, and is always the same. */
109	109	if (input_chr < 0x80) {
110	110	emit(emitctx, input_chr);
111		return TRUE;
	111	return true;
112	112	}
113	113
114	114	c = euc->from_ucs(input_chr);
115	115	if (!c) {
116		return FALSE;
	116	return false;
117	117	}
118	118
119	119	cset = c >> 28;

125	125
126	126	while (len--)
127	127	emit(emitctx, (c >> (8*len)) & 0xFF);
128		return TRUE;
	128	return true;
129	129	}
130	130
131	131	/*

+7

-7

charset/fromucs.c less more

8	8	char *output;
9	9	int outlen;
10	10	int writtenlen;
11		int stopped;
	11	bool stopped;
12	12	};
13	13
14	14	static void charset_emit(void *ctx, long int output)

22	22	param->outlen--;
23	23	param->writtenlen++;
24	24	} else {
25		param->stopped = 1;
	25	param->stopped = true;
26	26	}
27	27	}
28	28
29	29	int charset_from_unicode(const wchar_t *input, int inlen,
30	30	char *output, int outlen,
31		int charset, charset_state state, int error)
	31	int charset, charset_state state, bool error)
32	32	{
33	33	charset_spec const *spec = charset_find_spec(charset);
34	34	charset_state localstate = CHARSET_INIT_STATE;

43	43	param.output = output;
44	44	param.outlen = outlen;
45	45	param.writtenlen = 0;
46		param.stopped = 0;
	46	param.stopped = false;
47	47
48	48	if (state)
49	49	localstate = state; / structure copy */
50	50	if (error)
51		*error = FALSE;
	51	*error = false;
52	52
53	53	while (*inlen > 0) {
54	54	int lenbefore = param.writtenlen;
55		int ret;
	55	bool ret;
56	56
57	57	if (input)
58	58	ret = spec->write(spec, **input, &localstate,

64	64	* We have hit a difficult character, which the user
65	65	* wants to know about. Leave now.
66	66	*/
67		*error = TRUE;
	67	*error = true;
68	68	return lenbefore;
69	69	}
70	70	if (param.stopped) {

+4

-4

charset/gb2312.c less more

2019	2019	return gb2312_forward[r][c];
2020	2020	}
2021	2021
2022		/* This one returns 1 on success, 0 if the code point doesn't exist. */
2023		int unicode_to_gb2312(long int unicode, int r, int c)
	2022	/* This one returns true on success, false if the code point doesn't exist. */
	2023	bool unicode_to_gb2312(long int unicode, int r, int c)
2024	2024	{
2025	2025	int rr, cc;
2026	2026	long int uu;

2040	2040	else {
2041	2041	*r = rr;
2042	2042	*c = cc;
2043		return 1;
	2043	return true;
2044	2044	}
2045	2045	}
2046		return 0;
	2046	return false;
2047	2047	}
2048	2048
2049	2049	#ifdef TESTMODE

+6

-6

charset/hz.c less more

89	89	}
90	90	}
91	91
92		static int write_hz(charset_spec const *charset, long int input_chr,
93		charset_state *state,
94		void (emit)(void ctx, long int output), void *emitctx)
	92	static bool write_hz(charset_spec const *charset, long int input_chr,
	93	charset_state *state,
	94	void (emit)(void ctx, long int output), void *emitctx)
95	95	{
96	96	int desired_state, r, c;
97	97

106	106	} else if (unicode_to_gb2312(input_chr, &r, &c)) {
107	107	desired_state = 1;
108	108	} else {
109		return FALSE;
	109	return false;
110	110	}
111	111
112	112	if (state->s0 != (unsigned)desired_state) {

116	116	}
117	117
118	118	if (input_chr < 0)
119		return TRUE; /* special case: just reset state */
	119	return true; /* special case: just reset state */
120	120
121	121	if (state->s0) {
122	122	/*

127	127	} else {
128	128	emit(emitctx, c);
129	129	}
130		return TRUE;
	130	return true;
131	131	}
132	132
133	133	const charset_spec charset_CS_HZ = {

+27

-24

charset/internal.h less more

4	4	#ifndef charset_internal_h
5	5	#define charset_internal_h
6	6
	7	#include <stdbool.h>
	8
7	9	/* This invariably comes in handy */
8	10	#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
9	11
10	12	/* This is an invalid Unicode value used to indicate an error. */
11	13	#define ERROR 0xFFFFL /* Unicode value representing error */
12
13		#undef TRUE
14		#define TRUE 1
15		#undef FALSE
16		#define FALSE 0
17	14
18	15	typedef struct charset_spec charset_spec;
19	16	typedef struct sbcs_data sbcs_data;

35	32	* character set. The `emit' function expects to get byte
36	33	* values passed to it.
37	34	*
38		* A non-representable input character should cause a FALSE
	35	* A non-representable input character should cause a false
39	36	* return, _before_ `emit' is called. Successful conversion
40		* causes a TRUE return.
	37	* causes a true return.
41	38	*
42	39	* If `input_chr' is -1, this function must revert the encoding
43	40	* state to any default required at the end of a piece of
44	41	* encoded text.
45	42	*/
46		int (write)(charset_spec const charset, long int input_chr,
47		charset_state *state,
48		void (emit)(void ctx, long int output), void *emitctx);
	43	bool (write)(charset_spec const charset, long int input_chr,
	44	charset_state *state,
	45	void (emit)(void ctx, long int output), void *emitctx);
49	46	void const *data;
50	47	};
51	48

87	84	void read_sbcs(charset_spec const *charset, long int input_chr,
88	85	charset_state *state,
89	86	void (emit)(void ctx, long int output), void *emitctx);
90		int write_sbcs(charset_spec const *charset, long int input_chr,
91		charset_state *state,
92		void (emit)(void ctx, long int output), void *emitctx);
	87	bool write_sbcs(charset_spec const *charset, long int input_chr,
	88	charset_state *state,
	89	void (emit)(void ctx, long int output), void *emitctx);
93	90	long int sbcs_to_unicode(const struct sbcs_data *sd, long int input_chr);
94	91	long int sbcs_from_unicode(const struct sbcs_data *sd, long int input_chr);
95	92
96	93	void read_utf8(charset_spec const *charset, long int input_chr,
97	94	charset_state *state,
98	95	void (emit)(void ctx, long int output), void *emitctx);
99		int write_utf8(charset_spec const *charset, long int input_chr,
100		charset_state *state,
101		void (emit)(void ctx, long int output),
102		void *emitctx);
	96	bool write_utf8(charset_spec const *charset, long int input_chr,
	97	charset_state *state,
	98	void (emit)(void ctx, long int output),
	99	void *emitctx);
103	100
104	101	long int big5_to_unicode(int r, int c);
105		int unicode_to_big5(long int unicode, int r, int c);
	102	bool unicode_to_big5(long int unicode, int r, int c);
106	103	long int cns11643_to_unicode(int p, int r, int c);
107		int unicode_to_cns11643(long int unicode, int p, int r, int *c);
	104	bool unicode_to_cns11643(long int unicode, int p, int r, int *c);
108	105	long int cp949_to_unicode(int r, int c);
109		int unicode_to_cp949(long int unicode, int r, int c);
	106	bool unicode_to_cp949(long int unicode, int r, int c);
110	107	long int ksx1001_to_unicode(int r, int c);
111		int unicode_to_ksx1001(long int unicode, int r, int c);
	108	bool unicode_to_ksx1001(long int unicode, int r, int c);
112	109	long int gb2312_to_unicode(int r, int c);
113		int unicode_to_gb2312(long int unicode, int r, int c);
	110	bool unicode_to_gb2312(long int unicode, int r, int c);
114	111	long int jisx0208_to_unicode(int r, int c);
115		int unicode_to_jisx0208(long int unicode, int r, int c);
	112	bool unicode_to_jisx0208(long int unicode, int r, int c);
116	113	long int jisx0212_to_unicode(int r, int c);
117		int unicode_to_jisx0212(long int unicode, int r, int c);
	114	bool unicode_to_jisx0212(long int unicode, int r, int c);
118	115
119	116	/*
120	117	* Placate compiler warning about unused parameters, of which we

122	119	*/
123	120	#define UNUSEDARG(x) ( (x) = (x) )
124	121
	122	#ifdef __GNUC__
	123	#define DELIBERATE_FALLTHROUGH __attribute__ ((fallthrough));
	124	#else
	125	#define DELIBERATE_FALLTHROUGH ((void)0)
	126	#endif
	127
125	128	#endif /* charset_internal_h */

+47

-35

charset/iso2022.c less more

36	36
37	37	static long int emacs_big5_1_to_unicode(int, int);
38	38	static long int emacs_big5_2_to_unicode(int, int);
39		static int unicode_to_emacs_big5(long int, int , int , int *);
	39	static bool unicode_to_emacs_big5(long int, int , int , int *);
40	40	static long int cns11643_1_to_unicode(int, int);
41	41	static long int cns11643_2_to_unicode(int, int);
42	42	static long int cns11643_3_to_unicode(int, int);

45	45	static long int cns11643_6_to_unicode(int, int);
46	46	static long int cns11643_7_to_unicode(int, int);
47	47	static long int null_dbcs_to_unicode(int, int);
48		static int unicode_to_null_dbcs(long int, int , int );
49
50		typedef int (to_dbcs_t)(long int, int , int *);
51		typedef int (to_dbcs_planar_t)(long int, int , int , int );
	48	static bool unicode_to_null_dbcs(long int, int , int );
	49
	50	typedef bool (to_dbcs_t)(long int, int , int *);
	51	typedef bool (to_dbcs_planar_t)(long int, int , int , int );
52	52
53	53	/*
54	54	* These macros cast between to_dbcs_planar_t and to_dbcs_t, in

80	80	*
81	81	* We are permitted to use ?:, however, and that works quite well
82	82	* since the actual result of the sizeof expression _is_ evaluable
83		* at compile time. So here's my final answer:
	83	* at compile time. So here's my final answer.
	84	*
	85	* (The double cast of each function pointer from its original type
	86	* through void (*)(void) to the final type is there to suppress the
	87	* warning that later versions of gcc will otherwise give about
	88	* casting between different function pointer types. Apparently gcc
	89	* accepts void (*)(void) as the canonical type you use when
	90	* _deliberately_ doing that, so going via that deals with the
	91	* warning.)
84	92	*/
85	93	#define TYPECHECK(x,y) ( sizeof((x)) == sizeof((x)) ? (y) : (y) )
86		#define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x))
87		#define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, (to_dbcs_planar_t)(x))
	94	#define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, \
	95	(to_dbcs_t)(void (*)(void))(x))
	96	#define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, \
	97	(to_dbcs_planar_t)(void (*)(void))(x))
88	98
89	99	/*
90	100	* Values used in the `enable' field. Each of these identifies a

204	214	UNUSEDARG(c);
205	215	return ERROR;
206	216	}
207		static int unicode_to_null_dbcs(long int unicode, int r, int c)
	217	static bool unicode_to_null_dbcs(long int unicode, int r, int c)
208	218	{
209	219	UNUSEDARG(unicode);
210	220	UNUSEDARG(r);
211	221	UNUSEDARG(c);
212		return 0; /* failed to convert anything */
	222	return false; /* failed to convert anything */
213	223	}
214	224
215	225	/*

239	249	return big5_to_unicode(r, c);
240	250	}
241	251
242		static int unicode_to_emacs_big5(long int unicode, int p, int r, int *c)
	252	static bool unicode_to_emacs_big5(long int unicode, int p, int r, int *c)
243	253	{
244	254	int rr, cc, s;
245	255	if (!unicode_to_big5(unicode, &rr, &cc))
246		return 0;
	256	return false;
247	257	if (cc >= 64) {
248	258	cc -= 34;
249	259	assert(cc >= 64);

257	267	}
258	268	*r = s / 94;
259	269	*c = s % 94;
260		return 1;
	270	return true;
261	271	}
262	272
263	273	/* Wrappers for cns11643_to_unicode() */

594	604	break;
595	605	}
596	606	} else if ((input_chr & 0x80) \|\| MODE < ESCSEQ) {
597		int is_gl = 0;
	607	bool is_gl = false;
598	608	struct iso2022_subcharset const *subcs;
599	609	unsigned container;
600	610	long input_7bit;

613	623	container = (state->s1 >> 28) & 3;
614	624	else { /* GL */
615	625	container = state->s1 >> 30;
616		is_gl = 1;
	626	is_gl = true;
617	627	}
618	628	input_7bit = input_chr & ~0x80;
619	629	subcs = &iso2022_subcharsets[(state->s1 >> (container * 7)) & 0x7f];

763	773	switch (i2) {
764	774	case 0: /* Obsolete version of GZDM4 */
765	775	i2 = '(';
	776	DELIBERATE_FALLTHROUGH;
766	777	case '(': /* GZDM4 / case ')': / G1DM4 */
767	778	case '': / G2DM4 / case '+': / G3DM4 */
768	779	designate(state, i2 - '(', M4, 0, input_chr);

775	786	emit(emitctx, ERROR);
776	787	break;
777	788	}
	789	break;
778	790	case '%': /* DOCS */
779	791	/* XXX What's a reasonable way to handle an unrecognised DOCS? */
780	792	switch (i2) {

805	817	}
806	818	}
807	819
808		static void oselect(charset_state *state, int i, int right,
	820	static void oselect(charset_state *state, int i, bool right,
809	821	void (emit)(void ctx, long int output),
810	822	void *emitctx)
811	823	{

982	994	* exact output policy for compound text wants thinking about more
983	995	* carefully.
984	996	*/
985		static int write_iso2022(charset_spec const *charset, long int input_chr,
986		charset_state *state,
987		void (emit)(void ctx, long int output),
988		void *emitctx)
	997	static bool write_iso2022(charset_spec const *charset, long int input_chr,
	998	charset_state *state,
	999	void (emit)(void ctx, long int output),
	1000	void *emitctx)
989	1001	{
990	1002	int i;
991	1003	struct iso2022_subcharset const *subcs;

1016	1028	if (subcs->type == mode->ltype &&
1017	1029	subcs->i == mode->li &&
1018	1030	subcs->f == mode->lf)
1019		oselect(state, i, FALSE, NULL, NULL);
	1031	oselect(state, i, false, NULL, NULL);
1020	1032	if (subcs->type == mode->rtype &&
1021	1033	subcs->i == mode->ri &&
1022	1034	subcs->f == mode->rf)
1023		oselect(state, i, TRUE, NULL, NULL);
	1035	oselect(state, i, true, NULL, NULL);
1024	1036	}
1025	1037	}
1026	1038

1035	1047	if (subcs->type == mode->ltype &&
1036	1048	subcs->i == mode->li &&
1037	1049	subcs->f == mode->lf)
1038		oselect(state, i, FALSE, emit, emitctx);
	1050	oselect(state, i, false, emit, emitctx);
1039	1051	if (subcs->type == mode->rtype &&
1040	1052	subcs->i == mode->ri &&
1041	1053	subcs->f == mode->rf)
1042		oselect(state, i, TRUE, emit, emitctx);
1043		}
1044		return TRUE;
	1054	oselect(state, i, true, emit, emitctx);
	1055	}
	1056	return true;
1045	1057	}
1046	1058
1047	1059	/*

1050	1062	*/
1051	1063	if (input_chr <= 0x20 \|\| (input_chr >= 0x7F && input_chr < 0xA0)) {
1052	1064	emit(emitctx, input_chr);
1053		return TRUE;
	1065	return true;
1054	1066	}
1055	1067
1056	1068	/*

1102	1114	}
1103	1115
1104	1116	if ((unsigned)i < lenof(iso2022_subcharsets)) {
1105		int right;
	1117	bool right;
1106	1118
1107	1119	/*
1108	1120	* Our character is represented by c1 (and possibly also

1152	1164	}
1153	1165	}
1154	1166
1155		return TRUE;
	1167	return true;
1156	1168	}
1157	1169
1158	1170	/*

1167	1179
1168	1180	for (i = 0; (unsigned)i <= lenof(ctext_encodings); i++) {
1169	1181	charset_state substate;
1170		charset_spec const *subcs = ctext_encodings[i].subcs;
1171	1182
1172	1183	/*
1173	1184	* We assume that all character sets dealt with by DOCS

1177	1188	p = data;
1178	1189
1179	1190	if ((unsigned)i < lenof(ctext_encodings)) {
	1191	charset_spec const *subcs = ctext_encodings[i].subcs;
1180	1192	if ((mode->enable_mask & (1 << ctext_encodings[i].enable)) &&
1181	1193	subcs->write(subcs, input_chr, &substate,
1182	1194	write_to_pointer, &p)) {

1194	1206
1195	1207	if (cs != -2) {
1196	1208	docs_char(state, emit, emitctx, cs, data, p - data);
1197		return TRUE;
1198		}
1199		}
1200
1201		return FALSE;
	1209	return true;
	1210	}
	1211	}
	1212
	1213	return false;
1202	1214	}
1203	1215
1204	1216	/*

+22

-22

charset/iso2022s.c less more

77	77	/*
78	78	* Is this an 8-bit ISO 2022 subset?
79	79	*/
80		int eightbit;
	80	bool eightbit;
81	81
82	82	/*
83	83	* Function calls to do the actual translation.
84	84	*/
85	85	long int (*to_ucs)(int subcharset, unsigned long bytes);
86		int (from_ucs)(long int ucs, int subcharset, unsigned long *bytes);
	86	bool (from_ucs)(long int ucs, int subcharset, unsigned long *bytes);
87	87	};
88	88
89	89	static void read_iso2022s(charset_spec const *charset, long int input_chr,

325	325	}
326	326	}
327	327
328		static int write_iso2022s(charset_spec const *charset, long int input_chr,
329		charset_state *state,
330		void (emit)(void ctx, long int output),
331		void *emitctx)
	328	static bool write_iso2022s(charset_spec const *charset, long int input_chr,
	329	charset_state *state,
	330	void (emit)(void ctx, long int output),
	331	void *emitctx)
332	332	{
333	333	struct iso2022 const iso = (struct iso2022 )charset->data;
334	334	int subcharset, len, i, j, cont, topbit = 0;

346	346	* to go in.
347	347	*/
348	348	if (input_chr >= 0 && !iso->from_ucs(input_chr, &subcharset, &bytes))
349		return FALSE;
	349	return false;
350	350
351	351	if (!(state->s1 & 0x80000000)) {
352	352	state->s1 = iso->s1;

374	374	}
375	375	}
376	376
377		return TRUE;
	377	return true;
378	378	}
379	379
380	380	/*

436	436	while (len--)
437	437	emit(emitctx, ((bytes >> (8*len)) & 0xFF) \| topbit);
438	438
439		return TRUE;
	439	return true;
440	440	}
441	441
442	442	/*

450	450	return 0xA5;
451	451	else if (bytes == 0x7E)
452	452	return 0x203E;
453		/* else fall through to ASCII */
	453	DELIBERATE_FALLTHROUGH; /* else fall through to ASCII */
454	454	case 0: return bytes; /* one-byte ASCII */
455	455	/* (no break needed since all control paths have returned) */
456	456	case 2: return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21,

458	458	default: return ERROR;
459	459	}
460	460	}
461		static int iso2022jp_from_ucs(long int ucs, int *subcharset,
	461	static bool iso2022jp_from_ucs(long int ucs, int *subcharset,
462	462	unsigned long *bytes)
463	463	{
464	464	int r, c;
465	465	if (ucs < 0x80) {
466	466	*subcharset = 0;
467	467	*bytes = ucs;
468		return 1;
	468	return true;
469	469	} else if (ucs == 0xA5 \|\| ucs == 0x203E) {
470	470	*subcharset = 1;
471	471	*bytes = (ucs == 0xA5 ? 0x5C : 0x7E);
472		return 1;
	472	return true;
473	473	} else if (unicode_to_jisx0208(ucs, &r, &c)) {
474	474	*subcharset = 2;
475	475	*bytes = ((r+0x21) << 8) \| (c+0x21);
476		return 1;
	476	return true;
477	477	} else {
478		return 0;
	478	return false;
479	479	}
480	480	}
481	481	static const struct iso2022_escape iso2022jp_escapes[] = {

486	486	};
487	487	static const struct iso2022 iso2022jp = {
488	488	iso2022jp_escapes, lenof(iso2022jp_escapes),
489		"\1\1\2", "\3", 0x80000000, NULL, FALSE,
	489	"\1\1\2", "\3", 0x80000000, NULL, false,
490	490	iso2022jp_to_ucs, iso2022jp_from_ucs
491	491	};
492	492	const charset_spec charset_CS_ISO2022_JP = {

505	505	default: return ERROR;
506	506	}
507	507	}
508		static int iso2022kr_from_ucs(long int ucs, int *subcharset,
509		unsigned long *bytes)
	508	static bool iso2022kr_from_ucs(long int ucs, int *subcharset,
	509	unsigned long *bytes)
510	510	{
511	511	int r, c;
512	512	if (ucs < 0x80) {
513	513	*subcharset = 0;
514	514	*bytes = ucs;
515		return 1;
	515	return true;
516	516	} else if (unicode_to_ksx1001(ucs, &r, &c)) {
517	517	*subcharset = 1;
518	518	*bytes = ((r+0x21) << 8) \| (c+0x21);
519		return 1;
	519	return true;
520	520	} else {
521		return 0;
	521	return false;
522	522	}
523	523	}
524	524	static const struct iso2022_escape iso2022kr_escapes[] = {

528	528	};
529	529	static const struct iso2022 iso2022kr = {
530	530	iso2022kr_escapes, lenof(iso2022kr_escapes),
531		"\1\2", "\2", 0x80000040, "\033$)C", FALSE,
	531	"\1\2", "\2", 0x80000040, "\033$)C", false,
532	532	iso2022kr_to_ucs, iso2022kr_from_ucs
533	533	};
534	534	const charset_spec charset_CS_ISO2022_KR = {

+336

-0

charset/iso6937.c less more

	0	/*
	1	* iso6937.c - the _almost_ single-byte character set ISO/IEC 6937.
	2	*
	3	* Also, a tiny variation on it which adds the Euro sign at the
	4	* previously unused position 0xA4, used in DVB metadata.
	5	*/
	6
	7	#ifndef ENUM_CHARSETS
	8
	9	#include "charset.h"
	10	#include "internal.h"
	11
	12	/*
	13	* ISO/IEC 6937 is a _mostly_ single-byte character sets, except that
	14	* the 0xC0-0xCF range of bytes are introducer characters for two-byte
	15	* encodings of accented letters.
	16	*
	17	* You'd be forgiven for mistaking the bytes in the C0-CF range for
	18	* something more like combining characters, because the two-byte
	19	* encodings are organised in a very semantic way: each introducer
	20	* character corresponds to a specific diacritic mark, in the sense
	21	* that all the two-byte encodings beginning with that introducer byte
	22	* have an ASCII alphabetic character as their second byte and encode
	23	* that letter with the given diacritic.
	24	*
	25	* But it would be a mistake to consider this to have anything to do
	26	* with the Unicode combining characters for those diacritics, because
	27	* (a) the ISO 6937 diacritic bytes are _prefixes_, not combining
	28	* characters applied afterwards; (b) ISO 6937 specifies an exact list
	29	* of the permissible second bytes after each introducer; (c) the
	30	* right translation of one of these two-byte encodings is the single
	31	* Unicode code point for the accented letter, and not a separate pair
	32	* of (letter, combining character) code points.
	33	*
	34	* So this is better viewed as simply a multibyte _encoding_, just
	35	* with an unusually mnemonic organisation.
	36	*
	37	* Implementation strategy: the single-byte encodings for this charset
	38	* (or rather, this pair of very similar charsets) are handled by a
	39	* pair of mapping tables in sbcs.dat, only declared with the 'tables'
	40	* rather than 'charset' keyword so that sbcsgen.pl doesn't generate
	41	* the top-level charset_spec. So the read and write functions below
	42	* can call sbcs_to_unicode and sbcs_from_unicode on those tables just
	43	* like the ones in sbcs.c.
	44	*
	45	* The two-byte pairs are dealt with using the pair of mapping tables
	46	* below. These are generated by Perl from a minimal amount of
	47	* starting data that just gives each prefix character along with the
	48	* corresponding Unicode combining character and the list of letters
	49	* it's allowed to apply to; the Perl script runs over UnicodeData.txt
	50	* to achieve the translation of (letter, combining character) pairs
	51	* to precombined code points.
	52	*/
	53
	54	/*
	55
	56	perl -e '
	57	while (<<>>) {
	58	chomp; @_ = split /;/,$_; @d = split / /,$_[5];
	59	if (2 == @d) {
	60	($p, $s, $c) = (hex $d[0], hex $d[1], hex $_[0]);
	61	$combine{$p,$s} = $c if $p && $s && $c;
	62	}
	63	}
	64	@forward = (" ERROR,") x 0x400;
	65	for $t ( [0xC1, 0x300, "AEIOUaeiou" ],
	66	[0xC2, 0x301, "ACEILNORSUYZacegilnorsuyz" ],
	67	[0xC3, 0x302, "ACEGHIJOSUWYaceghijosuwy" ],
	68	[0xC4, 0x303, "AINOUainou" ],
	69	[0xC5, 0x304, "AEIOUaeiou" ],
	70	[0xC6, 0x306, "AGUagu" ],
	71	[0xC7, 0x307, "CEGIZcegz" ],
	72	[0xC8, 0x308, "AEIOUYaeiouy" ],
	73	[0xCA, 0x30A, "AUau" ],
	74	[0xCB, 0x327, "CGKLNRSTcklnrst" ],
	75	[0xCD, 0x30B, "OUou" ],
	76	[0xCE, 0x328, "AEIUaeiu" ],
	77	[0xCF, 0x30C, "CDELNRSTZcdelnrstz" ] ) {
	78	($prefix, $cc, $letters) = @$t;
	79	for $letter (unpack "C*", $letters) {
	80	$cp = $combine{$letter,$cc};
	81	$offset = ($prefix - 0xC0) * 0x40 + ($letter - 0x40);
	82	$forward[$offset] = sprintf " 0x%04x,", $cp;
	83	push @backward, [$cp, (sprintf " %d,", $offset)];
	84	}
	85	}
	86	@backward = map { $_->[1] } sort {$a->[0] <=> $b->[0]} @backward;
	87	print "static const unsigned short iso6937_2byte_forward[0x400] = {\n";
	88	$line = " ";
	89	for $e (@forward, "sentinel" x 100) {
	90	if (length($line.$e) > 77) { print "$line\n"; $line = " "; }
	91	$line .= $e;
	92	}
	93	print "};\n\n";
	94	$line = " ";
	95	print "static const unsigned short iso6937_2byte_backward[] = {\n";
	96	for $e (@backward, "sentinel" x 100) {
	97	if (length($line.$e) > 77) { print "$line\n"; $line = " "; }
	98	$line .= $e;
	99	}
	100	print "};\n\n";
	101	' UnicodeData.txt
	102
	103	*/
	104
	105	static const unsigned short iso6937_2byte_forward[0x400] = {
	106	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	107	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	108	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	109	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	110	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	111	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	112	ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c0, ERROR, ERROR, ERROR, 0x00c8,
	113	ERROR, ERROR, ERROR, 0x00cc, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00d2,
	114	ERROR, ERROR, ERROR, ERROR, ERROR, 0x00d9, ERROR, ERROR, ERROR, ERROR,
	115	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e0, ERROR, ERROR,
	116	ERROR, 0x00e8, ERROR, ERROR, ERROR, 0x00ec, ERROR, ERROR, ERROR, ERROR,
	117	ERROR, 0x00f2, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00f9, ERROR, ERROR,
	118	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c1,
	119	ERROR, 0x0106, ERROR, 0x00c9, ERROR, ERROR, ERROR, 0x00cd, ERROR, ERROR,
	120	0x0139, ERROR, 0x0143, 0x00d3, ERROR, ERROR, 0x0154, 0x015a, ERROR,
	121	0x00da, ERROR, ERROR, ERROR, 0x00dd, 0x0179, ERROR, ERROR, ERROR, ERROR,
	122	ERROR, ERROR, 0x00e1, ERROR, 0x0107, ERROR, 0x00e9, ERROR, 0x01f5, ERROR,
	123	0x00ed, ERROR, ERROR, 0x013a, ERROR, 0x0144, 0x00f3, ERROR, ERROR,
	124	0x0155, 0x015b, ERROR, 0x00fa, ERROR, ERROR, ERROR, 0x00fd, 0x017a,
	125	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c2, ERROR, 0x0108, ERROR,
	126	0x00ca, ERROR, 0x011c, 0x0124, 0x00ce, 0x0134, ERROR, ERROR, ERROR,
	127	ERROR, 0x00d4, ERROR, ERROR, ERROR, 0x015c, ERROR, 0x00db, ERROR, 0x0174,
	128	ERROR, 0x0176, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e2,
	129	ERROR, 0x0109, ERROR, 0x00ea, ERROR, 0x011d, 0x0125, 0x00ee, 0x0135,
	130	ERROR, ERROR, ERROR, ERROR, 0x00f4, ERROR, ERROR, ERROR, 0x015d, ERROR,
	131	0x00fb, ERROR, 0x0175, ERROR, 0x0177, ERROR, ERROR, ERROR, ERROR, ERROR,
	132	ERROR, ERROR, 0x00c3, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	133	0x0128, ERROR, ERROR, ERROR, ERROR, 0x00d1, 0x00d5, ERROR, ERROR, ERROR,
	134	ERROR, ERROR, 0x0168, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	135	ERROR, ERROR, ERROR, ERROR, 0x00e3, ERROR, ERROR, ERROR, ERROR, ERROR,
	136	ERROR, ERROR, 0x0129, ERROR, ERROR, ERROR, ERROR, 0x00f1, 0x00f5, ERROR,
	137	ERROR, ERROR, ERROR, ERROR, 0x0169, ERROR, ERROR, ERROR, ERROR, ERROR,
	138	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0100, ERROR, ERROR, ERROR,
	139	0x0112, ERROR, ERROR, ERROR, 0x012a, ERROR, ERROR, ERROR, ERROR, ERROR,
	140	0x014c, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016a, ERROR, ERROR, ERROR,
	141	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0101, ERROR,
	142	ERROR, ERROR, 0x0113, ERROR, ERROR, ERROR, 0x012b, ERROR, ERROR, ERROR,
	143	ERROR, ERROR, 0x014d, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016b, ERROR,
	144	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	145	0x0102, ERROR, ERROR, ERROR, ERROR, ERROR, 0x011e, ERROR, ERROR, ERROR,
	146	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	147	0x016c, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	148	ERROR, ERROR, 0x0103, ERROR, ERROR, ERROR, ERROR, ERROR, 0x011f, ERROR,
	149	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	150	ERROR, ERROR, 0x016d, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	151	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010a, ERROR, 0x0116, ERROR,
	152	0x0120, ERROR, 0x0130, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	153	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x017b,
	154	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010b, ERROR,
	155	0x0117, ERROR, 0x0121, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	156	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	157	ERROR, 0x017c, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c4, ERROR,
	158	ERROR, ERROR, 0x00cb, ERROR, ERROR, ERROR, 0x00cf, ERROR, ERROR, ERROR,
	159	ERROR, ERROR, 0x00d6, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00dc, ERROR,
	160	ERROR, ERROR, 0x0178, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	161	0x00e4, ERROR, ERROR, ERROR, 0x00eb, ERROR, ERROR, ERROR, 0x00ef, ERROR,
	162	ERROR, ERROR, ERROR, ERROR, 0x00f6, ERROR, ERROR, ERROR, ERROR, ERROR,
	163	0x00fc, ERROR, ERROR, ERROR, 0x00ff, ERROR, ERROR, ERROR, ERROR, ERROR,
	164	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	165	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	166	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	167	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	168	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	169	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	170	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c5, ERROR, ERROR, ERROR,
	171	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	172	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016e, ERROR, ERROR, ERROR,
	173	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e5, ERROR,
	174	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	175	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016f, ERROR,
	176	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	177	ERROR, ERROR, 0x00c7, ERROR, ERROR, ERROR, 0x0122, ERROR, ERROR, ERROR,
	178	0x0136, 0x013b, ERROR, 0x0145, ERROR, ERROR, ERROR, 0x0156, 0x015e,
	179	0x0162, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	180	ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e7, ERROR, ERROR, ERROR, ERROR,
	181	ERROR, ERROR, ERROR, 0x0137, 0x013c, ERROR, 0x0146, ERROR, ERROR, ERROR,
	182	0x0157, 0x015f, 0x0163, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	183	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	184	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	185	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	186	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	187	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	188	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	189	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	190	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	191	ERROR, ERROR, ERROR, 0x0150, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0170,
	192	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	193	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	194	ERROR, ERROR, ERROR, ERROR, ERROR, 0x0151, ERROR, ERROR, ERROR, ERROR,
	195	ERROR, 0x0171, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	196	ERROR, ERROR, ERROR, 0x0104, ERROR, ERROR, ERROR, 0x0118, ERROR, ERROR,
	197	ERROR, 0x012e, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	198	ERROR, ERROR, ERROR, 0x0172, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	199	ERROR, ERROR, ERROR, ERROR, ERROR, 0x0105, ERROR, ERROR, ERROR, 0x0119,
	200	ERROR, ERROR, ERROR, 0x012f, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	201	ERROR, ERROR, ERROR, ERROR, ERROR, 0x0173, ERROR, ERROR, ERROR, ERROR,
	202	ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010c,
	203	0x010e, 0x011a, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x013d, ERROR,
	204	0x0147, ERROR, ERROR, ERROR, 0x0158, 0x0160, 0x0164, ERROR, ERROR, ERROR,
	205	ERROR, ERROR, 0x017d, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	206	ERROR, 0x010d, 0x010f, 0x011b, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
	207	0x013e, ERROR, 0x0148, ERROR, ERROR, ERROR, 0x0159, 0x0161, 0x0165,
	208	ERROR, ERROR, ERROR, ERROR, ERROR, 0x017e, ERROR, ERROR, ERROR, ERROR,
	209	ERROR,
	210	};
	211
	212	static const unsigned short iso6937_2byte_backward[] = {
	213	65, 129, 193, 257, 513, 641, 707, 69, 133, 197, 517, 73, 137, 201, 521,
	214	270, 79, 143, 207, 271, 527, 85, 149, 213, 533, 153, 97, 161, 225, 289,
	215	545, 673, 739, 101, 165, 229, 549, 105, 169, 233, 553, 302, 111, 175,
	216	239, 303, 559, 117, 181, 245, 565, 185, 569, 321, 353, 385, 417, 897,
	217	929, 131, 163, 195, 227, 451, 483, 963, 995, 964, 996, 325, 357, 453,
	218	485, 901, 933, 965, 997, 199, 231, 391, 423, 455, 487, 711, 200, 232,
	219	265, 297, 329, 361, 905, 937, 457, 202, 234, 715, 747, 140, 172, 716,
	220	748, 972, 1004, 142, 174, 718, 750, 974, 1006, 335, 367, 847, 879, 146,
	221	178, 722, 754, 978, 1010, 147, 179, 211, 243, 723, 755, 979, 1011, 724,
	222	756, 980, 1012, 277, 309, 341, 373, 405, 437, 661, 693, 853, 885, 917,
	223	949, 215, 247, 217, 249, 537, 154, 186, 474, 506, 986, 1018, 167,
	224	};
	225
	226	/* This returns ERROR if the code point doesn't exist. */
	227	static long int iso6937_2byte_to_unicode(int prefix, int letter)
	228	{
	229	if (!(prefix >= 0xC0 && prefix < 0xD0 && letter >= 0x40 && letter < 0x80))
	230	return ERROR;
	231	return iso6937_2byte_forward[(prefix - 0xC0) * 0x40 + (letter - 0x40)];
	232	}
	233
	234	/* This returns true if it filled in the output values */
	235	static bool iso6937_2byte_from_unicode(long int cp, int prefix, int letter)
	236	{
	237	int lo = -1, hi = lenof(iso6937_2byte_backward);
	238
	239	while (hi - lo >= 2) {
	240	int mid = (hi + lo) / 2;
	241	int midpos = iso6937_2byte_backward[mid];
	242	long int midcp = iso6937_2byte_forward[midpos];
	243	if (cp == midcp) {
	244	*prefix = 0xC0 + (midpos >> 6);
	245	*letter = 0x40 + (midpos & 0x3F);
	246	return true;
	247	} else if (cp < midcp) {
	248	hi = mid;
	249	} else {
	250	lo = mid;
	251	}
	252	}
	253	return false;
	254	}
	255
	256	void read_iso6937(charset_spec const *charset, long int input_chr,
	257	charset_state *state,
	258	void (emit)(void ctx, long int output), void *emitctx)
	259	{
	260	const sbcs_data *sd = charset->data;
	261
	262	if (input_chr >= 0xC0 && input_chr < 0xD0) {
	263	/*
	264	* Input bytes in the C0-DF region of this encoding are
	265	* 'combining characters', but not in the Unicode sense of
	266	* mapping to separate Unicode code points. Instead, they're
	267	* prefixes which modify a specific set of subsequent printing
	268	* characters. Stash such a byte in the conversion state to
	269	* use in the next call.
	270	*/
	271	if (state->s0) {
	272	emit(emitctx, ERROR); /* the previous prefix was erroneous */
	273	}
	274	state->s0 = input_chr;
	275	} else {
	276	if (state->s0) {
	277	long int output = iso6937_2byte_to_unicode(state->s0, input_chr);
	278	emit(emitctx, output);
	279	state->s0 = 0;
	280
	281	/*
	282	* If we've successfully emitted a character, we're done.
	283	* Otherwise, we'll take the view that the ERROR we've
	284	* emitted corresponded to _just_ the misplaced prefix
	285	* byte, so we'll fall through to the emit() below which
	286	* will output the unmodified followup byte too.
	287	*/
	288	if (output != ERROR)
	289	return;
	290	}
	291
	292	emit(emitctx, sbcs_to_unicode(sd, input_chr));
	293	}
	294	}
	295
	296	bool write_iso6937(charset_spec const *charset, long int input_chr,
	297	charset_state *state,
	298	void (emit)(void ctx, long int output), void *emitctx)
	299	{
	300	const struct sbcs_data *sd = charset->data;
	301	long int ret;
	302	int prefix, letter;
	303
	304	UNUSEDARG(state);
	305
	306	if (input_chr == -1)
	307	return true; /* stateless; no cleanup required */
	308
	309	if ((ret = sbcs_from_unicode(sd, input_chr)) != ERROR) {
	310	emit(emitctx, ret);
	311	return true;
	312	} else if (iso6937_2byte_from_unicode(input_chr, &prefix, &letter)) {
	313	emit(emitctx, prefix);
	314	emit(emitctx, letter);
	315	return true;
	316	} else {
	317	return false;
	318	}
	319	}
	320
	321	extern const sbcs_data sbcsdata_ISO6937, sbcsdata_ISO6937_EURO;
	322
	323	const charset_spec charset_CS_ISO6937 = {
	324	CS_ISO6937, read_iso6937, write_iso6937, &sbcsdata_ISO6937
	325	};
	326	const charset_spec charset_CS_ISO6937_EURO = {
	327	CS_ISO6937_EURO, read_iso6937, write_iso6937, &sbcsdata_ISO6937_EURO
	328	};
	329
	330	#else /* ENUM_CHARSETS */
	331
	332	ENUM_CHARSET(CS_ISO6937)
	333	ENUM_CHARSET(CS_ISO6937_EURO)
	334
	335	#endif /* ENUM_CHARSETS */

+4

-4

charset/jisx0208.c less more

1951	1951	return jisx0208_forward[r][c];
1952	1952	}
1953	1953
1954		/* This one returns 1 on success, 0 if the code point doesn't exist. */
1955		int unicode_to_jisx0208(long int unicode, int r, int c)
	1954	/* This one returns true on success, false if the code point doesn't exist. */
	1955	bool unicode_to_jisx0208(long int unicode, int r, int c)
1956	1956	{
1957	1957	int rr, cc;
1958	1958	long int uu;

1972	1972	else {
1973	1973	*r = rr;
1974	1974	*c = cc;
1975		return 1;
	1975	return true;
1976	1976	}
1977	1977	}
1978		return 0;
	1978	return false;
1979	1979	}
1980	1980
1981	1981	#ifdef TESTMODE

+4

-4

charset/jisx0212.c less more

1849	1849	return jisx0212_forward[r][c];
1850	1850	}
1851	1851
1852		/* This one returns 1 on success, 0 if the code point doesn't exist. */
1853		int unicode_to_jisx0212(long int unicode, int r, int c)
	1852	/* This one returns true on success, false if the code point doesn't exist. */
	1853	bool unicode_to_jisx0212(long int unicode, int r, int c)
1854	1854	{
1855	1855	int rr, cc;
1856	1856	long int uu;

1870	1870	else {
1871	1871	*r = rr;
1872	1872	*c = cc;
1873		return 1;
	1873	return true;
1874	1874	}
1875	1875	}
1876		return 0;
	1876	return false;
1877	1877	}
1878	1878
1879	1879	#ifdef TESTMODE

+9

-9

charset/ksx1001.c less more

5168	5168	return cp949_forward[r][c];
5169	5169	}
5170	5170
5171		/* This one returns 1 on success, 0 if the code point doesn't exist. */
5172		int unicode_to_cp949(long int unicode, int r, int c)
	5171	/* This one returns true on success, false if the code point doesn't exist. */
	5172	bool unicode_to_cp949(long int unicode, int r, int c)
5173	5173	{
5174	5174	int rr, cc;
5175	5175	long int uu;

5189	5189	else {
5190	5190	*r = rr;
5191	5191	*c = cc;
5192		return 1;
	5192	return true;
5193	5193	}
5194	5194	}
5195		return 0;
	5195	return false;
5196	5196	}
5197	5197
5198	5198	/* Functions dealing with the KS X 1001 square subset */

5203	5203	return cp949_forward[r+0x21][c+0x61];
5204	5204	}
5205	5205
5206		/* This one returns 1 on success, 0 if the code point doesn't exist. */
5207		int unicode_to_ksx1001(long int unicode, int r, int c)
	5206	/* This one returns true on success, false if the code point doesn't exist. */
	5207	bool unicode_to_ksx1001(long int unicode, int r, int c)
5208	5208	{
5209	5209	int rr, cc;
5210	5210	if (!unicode_to_cp949(unicode, &rr, &cc))
5211		return 0;
	5211	return false;
5212	5212	rr -= 0x21;
5213	5213	cc -= 0x61;
5214	5214	if (rr < 0 \|\| rr >= 94 \|\| cc < 0 \|\| cc >= 94)
5215		return 0;
	5215	return false;
5216	5216	*r = rr;
5217	5217	*c = cc;
5218		return 1;
	5218	return true;
5219	5219	}
5220	5220
5221	5221	#ifdef TESTMODE

+175

-153

charset/localenc.c less more

23	23	static const struct {
24	24	const char *name;
25	25	int charset;
26		int return_in_enum; /* enumeration misses some charsets */
	26	bool return_in_enum; /* enumeration misses some charsets */
27	27	} localencs[] = {
28		{ "<UNKNOWN>", CS_NONE, 0 },
29		{ "ASCII", CS_ASCII, 1 },
30		{ "BS 4730", CS_BS4730, 1 },
31		{ "BS-4730", CS_BS4730, 0 },
32		{ "BS4730", CS_BS4730, 0 },
33		{ "ISO-8859-1", CS_ISO8859_1, 1 },
34		{ "ISO-8859-1 with X11 line drawing", CS_ISO8859_1_X11, 0 },
35		{ "ISO-8859-1-X11", CS_ISO8859_1_X11, 0 },
36		{ "ISO8859-1-X11", CS_ISO8859_1_X11, 0 },
37		{ "ISO-8859-2", CS_ISO8859_2, 1 },
38		{ "ISO-8859-3", CS_ISO8859_3, 1 },
39		{ "ISO-8859-4", CS_ISO8859_4, 1 },
40		{ "ISO-8859-5", CS_ISO8859_5, 1 },
41		{ "ISO-8859-6", CS_ISO8859_6, 1 },
42		{ "ISO-8859-7", CS_ISO8859_7, 1 },
43		{ "ISO-8859-8", CS_ISO8859_8, 1 },
44		{ "ISO-8859-9", CS_ISO8859_9, 1 },
45		{ "ISO-8859-10", CS_ISO8859_10, 1 },
46		{ "ISO-8859-11", CS_ISO8859_11, 1 },
47		{ "ISO-8859-13", CS_ISO8859_13, 1 },
48		{ "ISO-8859-14", CS_ISO8859_14, 1 },
49		{ "ISO-8859-15", CS_ISO8859_15, 1 },
50		{ "ISO-8859-16", CS_ISO8859_16, 1 },
51		{ "CP437", CS_CP437, 1 },
52		{ "CP850", CS_CP850, 1 },
53		{ "CP852", CS_CP852, 1 },
54		{ "CP866", CS_CP866, 1 },
55		{ "CP874", CS_CP874, 1 },
56		{ "Win874", CS_CP874, 0 },
57		{ "Win-874", CS_CP874, 0 },
58		{ "CP1250", CS_CP1250, 1 },
59		{ "Win1250", CS_CP1250, 0 },
60		{ "CP1251", CS_CP1251, 1 },
61		{ "Win1251", CS_CP1251, 0 },
62		{ "CP1252", CS_CP1252, 1 },
63		{ "Win1252", CS_CP1252, 0 },
64		{ "CP1253", CS_CP1253, 1 },
65		{ "Win1253", CS_CP1253, 0 },
66		{ "CP1254", CS_CP1254, 1 },
67		{ "Win1254", CS_CP1254, 0 },
68		{ "CP1255", CS_CP1255, 1 },
69		{ "Win1255", CS_CP1255, 0 },
70		{ "CP1256", CS_CP1256, 1 },
71		{ "Win1256", CS_CP1256, 0 },
72		{ "CP1257", CS_CP1257, 1 },
73		{ "Win1257", CS_CP1257, 0 },
74		{ "CP1258", CS_CP1258, 1 },
75		{ "Win1258", CS_CP1258, 0 },
76		{ "KOI8-R", CS_KOI8_R, 1 },
77		{ "KOI8-U", CS_KOI8_U, 1 },
78		{ "KOI8-RU", CS_KOI8_RU, 1 },
79		{ "JIS X 0201", CS_JISX0201, 1 },
80		{ "JIS-X-0201", CS_JISX0201, 0 },
81		{ "JIS_X_0201", CS_JISX0201, 0 },
82		{ "JISX0201", CS_JISX0201, 0 },
83		{ "Mac Roman", CS_MAC_ROMAN, 1 },
84		{ "Mac-Roman", CS_MAC_ROMAN, 0 },
85		{ "MacRoman", CS_MAC_ROMAN, 0 },
86		{ "Mac Turkish", CS_MAC_TURKISH, 1 },
87		{ "Mac-Turkish", CS_MAC_TURKISH, 0 },
88		{ "MacTurkish", CS_MAC_TURKISH, 0 },
89		{ "Mac Croatian", CS_MAC_CROATIAN, 1 },
90		{ "Mac-Croatian", CS_MAC_CROATIAN, 0 },
91		{ "MacCroatian", CS_MAC_CROATIAN, 0 },
92		{ "Mac Iceland", CS_MAC_ICELAND, 1 },
93		{ "Mac-Iceland", CS_MAC_ICELAND, 0 },
94		{ "MacIceland", CS_MAC_ICELAND, 0 },
95		{ "Mac Romanian", CS_MAC_ROMANIAN, 1 },
96		{ "Mac-Romanian", CS_MAC_ROMANIAN, 0 },
97		{ "MacRomanian", CS_MAC_ROMANIAN, 0 },
98		{ "Mac Greek", CS_MAC_GREEK, 1 },
99		{ "Mac-Greek", CS_MAC_GREEK, 0 },
100		{ "MacGreek", CS_MAC_GREEK, 0 },
101		{ "Mac Cyrillic", CS_MAC_CYRILLIC, 1 },
102		{ "Mac-Cyrillic", CS_MAC_CYRILLIC, 0 },
103		{ "MacCyrillic", CS_MAC_CYRILLIC, 0 },
104		{ "Mac Thai", CS_MAC_THAI, 1 },
105		{ "Mac-Thai", CS_MAC_THAI, 0 },
106		{ "MacThai", CS_MAC_THAI, 0 },
107		{ "Mac Centeuro", CS_MAC_CENTEURO, 1 },
108		{ "Mac-Centeuro", CS_MAC_CENTEURO, 0 },
109		{ "MacCenteuro", CS_MAC_CENTEURO, 0 },
110		{ "Mac Symbol", CS_MAC_SYMBOL, 1 },
111		{ "Mac-Symbol", CS_MAC_SYMBOL, 0 },
112		{ "MacSymbol", CS_MAC_SYMBOL, 0 },
113		{ "Mac Dingbats", CS_MAC_DINGBATS, 1 },
114		{ "Mac-Dingbats", CS_MAC_DINGBATS, 0 },
115		{ "MacDingbats", CS_MAC_DINGBATS, 0 },
116		{ "Mac Roman (old)", CS_MAC_ROMAN_OLD, 0 },
117		{ "Mac-Roman-old", CS_MAC_ROMAN_OLD, 0 },
118		{ "MacRoman-old", CS_MAC_ROMAN_OLD, 0 },
119		{ "Mac Croatian (old)", CS_MAC_CROATIAN_OLD, 0 },
120		{ "Mac-Croatian-old", CS_MAC_CROATIAN_OLD, 0 },
121		{ "MacCroatian-old", CS_MAC_CROATIAN_OLD, 0 },
122		{ "Mac Iceland (old)", CS_MAC_ICELAND_OLD, 0 },
123		{ "Mac-Iceland-old", CS_MAC_ICELAND_OLD, 0 },
124		{ "MacIceland-old", CS_MAC_ICELAND_OLD, 0 },
125		{ "Mac Romanian (old)", CS_MAC_ROMANIAN_OLD, 0 },
126		{ "Mac-Romanian-old", CS_MAC_ROMANIAN_OLD, 0 },
127		{ "MacRomanian-old", CS_MAC_ROMANIAN_OLD, 0 },
128		{ "Mac Greek (old)", CS_MAC_GREEK_OLD, 0 },
129		{ "Mac-Greek-old", CS_MAC_GREEK_OLD, 0 },
130		{ "MacGreek-old", CS_MAC_GREEK_OLD, 0 },
131		{ "Mac Cyrillic (old)", CS_MAC_CYRILLIC_OLD, 0 },
132		{ "Mac-Cyrillic-old", CS_MAC_CYRILLIC_OLD, 0 },
133		{ "MacCyrillic-old", CS_MAC_CYRILLIC_OLD, 0 },
134		{ "Mac Ukraine", CS_MAC_UKRAINE, 1 },
135		{ "Mac-Ukraine", CS_MAC_UKRAINE, 0 },
136		{ "MacUkraine", CS_MAC_UKRAINE, 0 },
137		{ "Mac VT100", CS_MAC_VT100, 1 },
138		{ "Mac-VT100", CS_MAC_VT100, 0 },
139		{ "MacVT100", CS_MAC_VT100, 0 },
140		{ "Mac VT100 (old)", CS_MAC_VT100_OLD, 0 },
141		{ "Mac-VT100-old", CS_MAC_VT100_OLD, 0 },
142		{ "MacVT100-old", CS_MAC_VT100_OLD, 0 },
143		{ "Mac Roman (Pirard encoding)", CS_MAC_PIRARD, 0 },
144		{ "Mac Pirard", CS_MAC_PIRARD, 0 },
145		{ "Mac-Pirard", CS_MAC_PIRARD, 0 },
146		{ "MacPirard", CS_MAC_PIRARD, 0 },
147		{ "VISCII", CS_VISCII, 1 },
148		{ "HP ROMAN8", CS_HP_ROMAN8, 1 },
149		{ "HP-ROMAN8", CS_HP_ROMAN8, 0 },
150		{ "DEC MCS", CS_DEC_MCS, 1 },
151		{ "DEC-MCS", CS_DEC_MCS, 1 },
152		{ "DEC graphics", CS_DEC_GRAPHICS, 1 },
153		{ "DEC-graphics", CS_DEC_GRAPHICS, 0 },
154		{ "DECgraphics", CS_DEC_GRAPHICS, 0 },
155		{ "UTF-8", CS_UTF8, 1 },
156		{ "UTF-7", CS_UTF7, 1 },
157		{ "UTF-7-conservative", CS_UTF7_CONSERVATIVE, 0 },
158		{ "EUC-CN", CS_EUC_CN, 1 },
159		{ "EUC-KR", CS_EUC_KR, 1 },
160		{ "EUC-JP", CS_EUC_JP, 1 },
161		{ "EUC-TW", CS_EUC_TW, 1 },
162		{ "ISO-2022-JP", CS_ISO2022_JP, 1 },
163		{ "ISO-2022-KR", CS_ISO2022_KR, 1 },
164		{ "Big5", CS_BIG5, 1 },
165		{ "Shift-JIS", CS_SHIFT_JIS, 1 },
166		{ "HZ", CS_HZ, 1 },
167		{ "UTF-16BE", CS_UTF16BE, 1 },
168		{ "UTF-16LE", CS_UTF16LE, 1 },
169		{ "UTF-16", CS_UTF16, 1 },
170		{ "CP949", CS_CP949, 1 },
171		{ "PDFDocEncoding", CS_PDF, 1 },
172		{ "StandardEncoding", CS_PSSTD, 1 },
173		{ "COMPOUND_TEXT", CS_CTEXT, 1 },
174		{ "COMPOUND-TEXT", CS_CTEXT, 0 },
175		{ "COMPOUND TEXT", CS_CTEXT, 0 },
176		{ "COMPOUNDTEXT", CS_CTEXT, 0 },
177		{ "CTEXT", CS_CTEXT, 0 },
178		{ "ISO-2022", CS_ISO2022, 1 },
179		{ "ISO2022", CS_ISO2022, 0 },
	28	{ "<UNKNOWN>", CS_NONE, false },
	29	{ "ASCII", CS_ASCII, true },
	30	{ "BS 4730", CS_BS4730, true },
	31	{ "BS-4730", CS_BS4730, false },
	32	{ "BS4730", CS_BS4730, false },
	33	{ "ISO-8859-1", CS_ISO8859_1, true },
	34	{ "ISO-8859-1 with X11 line drawing", CS_ISO8859_1_X11, false },
	35	{ "ISO-8859-1-X11", CS_ISO8859_1_X11, false },
	36	{ "ISO8859-1-X11", CS_ISO8859_1_X11, false },
	37	{ "ISO-8859-2", CS_ISO8859_2, true },
	38	{ "ISO-8859-3", CS_ISO8859_3, true },
	39	{ "ISO-8859-4", CS_ISO8859_4, true },
	40	{ "ISO-8859-5", CS_ISO8859_5, true },
	41	{ "ISO-8859-6", CS_ISO8859_6, true },
	42	{ "ISO-8859-7", CS_ISO8859_7, true },
	43	{ "ISO-8859-8", CS_ISO8859_8, true },
	44	{ "ISO-8859-9", CS_ISO8859_9, true },
	45	{ "ISO-8859-10", CS_ISO8859_10, true },
	46	{ "ISO-8859-11", CS_ISO8859_11, true },
	47	{ "ISO-8859-13", CS_ISO8859_13, true },
	48	{ "ISO-8859-14", CS_ISO8859_14, true },
	49	{ "ISO-8859-15", CS_ISO8859_15, true },
	50	{ "ISO-8859-16", CS_ISO8859_16, true },
	51	{ "CP437", CS_CP437, true },
	52	{ "CP850", CS_CP850, true },
	53	{ "CP852", CS_CP852, true },
	54	{ "CP866", CS_CP866, true },
	55	{ "CP874", CS_CP874, true },
	56	{ "Win874", CS_CP874, false },
	57	{ "Win-874", CS_CP874, false },
	58	{ "CP1250", CS_CP1250, true },
	59	{ "Win1250", CS_CP1250, false },
	60	{ "CP1251", CS_CP1251, true },
	61	{ "Win1251", CS_CP1251, false },
	62	{ "CP1252", CS_CP1252, true },
	63	{ "Win1252", CS_CP1252, false },
	64	{ "CP1253", CS_CP1253, true },
	65	{ "Win1253", CS_CP1253, false },
	66	{ "CP1254", CS_CP1254, true },
	67	{ "Win1254", CS_CP1254, false },
	68	{ "CP1255", CS_CP1255, true },
	69	{ "Win1255", CS_CP1255, false },
	70	{ "CP1256", CS_CP1256, true },
	71	{ "Win1256", CS_CP1256, false },
	72	{ "CP1257", CS_CP1257, true },
	73	{ "Win1257", CS_CP1257, false },
	74	{ "CP1258", CS_CP1258, true },
	75	{ "Win1258", CS_CP1258, false },
	76	{ "KOI8-R", CS_KOI8_R, true },
	77	{ "KOI8R", CS_KOI8_R, false },
	78	{ "KOI8-U", CS_KOI8_U, true },
	79	{ "KOI8U", CS_KOI8_U, false },
	80	{ "KOI8-RU", CS_KOI8_RU, true },
	81	{ "KOI8RU", CS_KOI8_RU, false },
	82	{ "JIS X 0201", CS_JISX0201, true },
	83	{ "JIS-X-0201", CS_JISX0201, false },
	84	{ "JIS_X_0201", CS_JISX0201, false },
	85	{ "JISX0201", CS_JISX0201, false },
	86	{ "Mac Roman", CS_MAC_ROMAN, true },
	87	{ "Mac-Roman", CS_MAC_ROMAN, false },
	88	{ "MacRoman", CS_MAC_ROMAN, false },
	89	{ "Mac Turkish", CS_MAC_TURKISH, true },
	90	{ "Mac-Turkish", CS_MAC_TURKISH, false },
	91	{ "MacTurkish", CS_MAC_TURKISH, false },
	92	{ "Mac Croatian", CS_MAC_CROATIAN, true },
	93	{ "Mac-Croatian", CS_MAC_CROATIAN, false },
	94	{ "MacCroatian", CS_MAC_CROATIAN, false },
	95	{ "Mac Iceland", CS_MAC_ICELAND, true },
	96	{ "Mac-Iceland", CS_MAC_ICELAND, false },
	97	{ "MacIceland", CS_MAC_ICELAND, false },
	98	{ "Mac Romanian", CS_MAC_ROMANIAN, true },
	99	{ "Mac-Romanian", CS_MAC_ROMANIAN, false },
	100	{ "MacRomanian", CS_MAC_ROMANIAN, false },
	101	{ "Mac Greek", CS_MAC_GREEK, true },
	102	{ "Mac-Greek", CS_MAC_GREEK, false },
	103	{ "MacGreek", CS_MAC_GREEK, false },
	104	{ "Mac Cyrillic", CS_MAC_CYRILLIC, true },
	105	{ "Mac-Cyrillic", CS_MAC_CYRILLIC, false },
	106	{ "MacCyrillic", CS_MAC_CYRILLIC, false },
	107	{ "Mac Thai", CS_MAC_THAI, true },
	108	{ "Mac-Thai", CS_MAC_THAI, false },
	109	{ "MacThai", CS_MAC_THAI, false },
	110	{ "Mac Centeuro", CS_MAC_CENTEURO, true },
	111	{ "Mac-Centeuro", CS_MAC_CENTEURO, false },
	112	{ "MacCenteuro", CS_MAC_CENTEURO, false },
	113	{ "Mac Symbol", CS_MAC_SYMBOL, true },
	114	{ "Mac-Symbol", CS_MAC_SYMBOL, false },
	115	{ "MacSymbol", CS_MAC_SYMBOL, false },
	116	{ "Mac Dingbats", CS_MAC_DINGBATS, true },
	117	{ "Mac-Dingbats", CS_MAC_DINGBATS, false },
	118	{ "MacDingbats", CS_MAC_DINGBATS, false },
	119	{ "Mac Roman (old)", CS_MAC_ROMAN_OLD, false },
	120	{ "Mac-Roman-old", CS_MAC_ROMAN_OLD, false },
	121	{ "MacRoman-old", CS_MAC_ROMAN_OLD, false },
	122	{ "Mac Croatian (old)", CS_MAC_CROATIAN_OLD, false },
	123	{ "Mac-Croatian-old", CS_MAC_CROATIAN_OLD, false },
	124	{ "MacCroatian-old", CS_MAC_CROATIAN_OLD, false },
	125	{ "Mac Iceland (old)", CS_MAC_ICELAND_OLD, false },
	126	{ "Mac-Iceland-old", CS_MAC_ICELAND_OLD, false },
	127	{ "MacIceland-old", CS_MAC_ICELAND_OLD, false },
	128	{ "Mac Romanian (old)", CS_MAC_ROMANIAN_OLD, false },
	129	{ "Mac-Romanian-old", CS_MAC_ROMANIAN_OLD, false },
	130	{ "MacRomanian-old", CS_MAC_ROMANIAN_OLD, false },
	131	{ "Mac Greek (old)", CS_MAC_GREEK_OLD, false },
	132	{ "Mac-Greek-old", CS_MAC_GREEK_OLD, false },
	133	{ "MacGreek-old", CS_MAC_GREEK_OLD, false },
	134	{ "Mac Cyrillic (old)", CS_MAC_CYRILLIC_OLD, false },
	135	{ "Mac-Cyrillic-old", CS_MAC_CYRILLIC_OLD, false },
	136	{ "MacCyrillic-old", CS_MAC_CYRILLIC_OLD, false },
	137	{ "Mac Ukraine", CS_MAC_UKRAINE, true },
	138	{ "Mac-Ukraine", CS_MAC_UKRAINE, false },
	139	{ "MacUkraine", CS_MAC_UKRAINE, false },
	140	{ "Mac VT100", CS_MAC_VT100, true },
	141	{ "Mac-VT100", CS_MAC_VT100, false },
	142	{ "MacVT100", CS_MAC_VT100, false },
	143	{ "Mac VT100 (old)", CS_MAC_VT100_OLD, false },
	144	{ "Mac-VT100-old", CS_MAC_VT100_OLD, false },
	145	{ "MacVT100-old", CS_MAC_VT100_OLD, false },
	146	{ "Mac Roman (Pirard encoding)", CS_MAC_PIRARD, false },
	147	{ "Mac Pirard", CS_MAC_PIRARD, false },
	148	{ "Mac-Pirard", CS_MAC_PIRARD, false },
	149	{ "MacPirard", CS_MAC_PIRARD, false },
	150	{ "VISCII", CS_VISCII, true },
	151	{ "HP ROMAN8", CS_HP_ROMAN8, true },
	152	{ "HP-ROMAN8", CS_HP_ROMAN8, false },
	153	{ "DEC MCS", CS_DEC_MCS, true },
	154	{ "DEC-MCS", CS_DEC_MCS, true },
	155	{ "DEC graphics", CS_DEC_GRAPHICS, true },
	156	{ "DEC-graphics", CS_DEC_GRAPHICS, false },
	157	{ "DECgraphics", CS_DEC_GRAPHICS, false },
	158	{ "UTF-8", CS_UTF8, true },
	159	{ "UTF8", CS_UTF8, false },
	160	{ "UTF-7", CS_UTF7, true },
	161	{ "UTF7", CS_UTF7, false },
	162	{ "UTF-7-conservative", CS_UTF7_CONSERVATIVE, false },
	163	{ "EUC-CN", CS_EUC_CN, true },
	164	{ "EUC-KR", CS_EUC_KR, true },
	165	{ "EUC-JP", CS_EUC_JP, true },
	166	{ "EUC-TW", CS_EUC_TW, true },
	167	{ "ISO-2022-JP", CS_ISO2022_JP, true },
	168	{ "ISO-2022-KR", CS_ISO2022_KR, true },
	169	{ "Big5", CS_BIG5, true },
	170	{ "Shift-JIS", CS_SHIFT_JIS, true },
	171	{ "HZ", CS_HZ, true },
	172	{ "UTF-16BE", CS_UTF16BE, true },
	173	{ "UTF16BE", CS_UTF16BE, false },
	174	{ "UTF-16LE", CS_UTF16LE, true },
	175	{ "UTF16LE", CS_UTF16LE, false },
	176	{ "UTF-16BE-NO-BOM", CS_UTF16BE_NO_BOM, true },
	177	{ "UTF-16BE-NOBOM", CS_UTF16BE_NO_BOM, false },
	178	{ "UTF16BENOBOM", CS_UTF16BE_NO_BOM, false },
	179	{ "UTF-16LE-NO-BOM", CS_UTF16LE_NO_BOM, true },
	180	{ "UTF-16LE-NOBOM", CS_UTF16LE_NO_BOM, false },
	181	{ "UTF16LENOBOM", CS_UTF16LE_NO_BOM, false },
	182	{ "UTF-16", CS_UTF16, true },
	183	{ "UTF16", CS_UTF16, false },
	184	{ "CP949", CS_CP949, true },
	185	{ "PDFDocEncoding", CS_PDF, true },
	186	{ "StandardEncoding", CS_PSSTD, true },
	187	{ "COMPOUND_TEXT", CS_CTEXT, true },
	188	{ "COMPOUND-TEXT", CS_CTEXT, false },
	189	{ "COMPOUND TEXT", CS_CTEXT, false },
	190	{ "COMPOUNDTEXT", CS_CTEXT, false },
	191	{ "CTEXT", CS_CTEXT, false },
	192	{ "ISO-2022", CS_ISO2022, true },
	193	{ "ISO2022", CS_ISO2022, false },
	194	{ "ISO-6937", CS_ISO6937, true },
	195	{ "ISO6937", CS_ISO6937, false },
	196	{ "ISO-6937 with euro sign", CS_ISO6937_EURO, true },
	197	{ "ISO-6937-euro", CS_ISO6937_EURO, false },
	198	{ "ISO6937-euro", CS_ISO6937_EURO, false },
	199	{ "ITS", CS_ITS, true },
	200	{ "SAIL", CS_SAIL, true },
	201	{ "WAITS", CS_SAIL, false },
180	202	};
181	203
182	204	const char *charset_to_localenc(int charset)

+6

-6

charset/sbcs.c less more

50	50	return ERROR;
51	51	}
52	52
53		int write_sbcs(charset_spec const *charset, long int input_chr,
54		charset_state *state,
55		void (emit)(void ctx, long int output), void *emitctx)
	53	bool write_sbcs(charset_spec const *charset, long int input_chr,
	54	charset_state *state,
	55	void (emit)(void ctx, long int output), void *emitctx)
56	56	{
57	57	const struct sbcs_data *sd = charset->data;
58	58	long int ret;

60	60	UNUSEDARG(state);
61	61
62	62	if (input_chr == -1)
63		return TRUE; /* stateless; no cleanup required */
	63	return true; /* stateless; no cleanup required */
64	64
65	65	ret = sbcs_from_unicode(sd, input_chr);
66	66	if (ret == ERROR)
67		return FALSE;
	67	return false;
68	68
69	69	emit(emitctx, ret);
70		return TRUE;
	70	return true;
71	71	}

+120

-0

charset/sbcs.dat less more

1487	1487	2014 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1488	1488	XXXX 00C6 XXXX 00AA XXXX XXXX XXXX XXXX 0141 00D8 0152 00BA XXXX XXXX XXXX XXXX
1489	1489	XXXX 00E6 XXXX XXXX XXXX 0131 XXXX XXXX 0142 00F8 0153 00DF XXXX XXXX XXXX XXXX
	1490
	1491	ISO/IEC 6937. Or rather, this version is extended to add the usual
	1492	C0/C1 controls in the non-printing positions below 0xA0, on the
	1493	expectation that text encoded in this character set may still need
	1494	interleaving with the usual amenities such as newlines and terminal
	1495	escapes.
	1496
	1497	Source: https://en.wikipedia.org/wiki/ISO/IEC_6937 and manual
	1498	transcription.
	1499
	1500	ISO6937_EURO is a tiny modification to ISO/IEC 6937, used in DVB
	1501	(digital broadcast TV) in the metadata and EPG streams. Defined by
	1502	Annex A of the DVB standards document EN 300 468, it differs from
	1503	ordinary ISO6937 only in the addition of the euro sign in the unused
	1504	location 0xA4.
	1505
	1506	Only the translation tables for the single-byte encodings are stored
	1507	here. The rest of the implementation of this pair of charsets is in
	1508	custom code, and lives in iso6937.c.
	1509
	1510	tables ISO6937
	1511	0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
	1512	0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
	1513	0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
	1514	0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
	1515	0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
	1516	0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
	1517	0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
	1518	0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
	1519	0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F
	1520	0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F
	1521	00A0 00A1 00A2 00A3 XXXX 00A5 XXXX 00A7 00A4 2018 201C 00AB 2190 2191 2192 2193
	1522	00B0 00B1 00B2 00B3 00D7 00B5 00B6 00B7 00F7 2019 201D 00BB 00BC 00BD 00BE 00BF
	1523	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1524	2015 00B9 00AE 00A9 2122 266A 00AC 00A6 XXXX XXXX XXXX XXXX 215B 215C 215D 215E
	1525	2126 00C6 0110 00AA 0126 XXXX 0132 013F 0141 00D8 0152 00BA 00DE 0166 014A 0149
	1526	0138 00E6 0111 00F0 0127 0131 0133 0140 0142 00F8 0153 00DF 00FE 0167 014B 00AD
	1527
	1528	tables ISO6937_EURO
	1529	0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
	1530	0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
	1531	0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
	1532	0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
	1533	0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
	1534	0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
	1535	0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
	1536	0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
	1537	0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F
	1538	0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F
	1539	00A0 00A1 00A2 00A3 20AC 00A5 XXXX 00A7 00A4 2018 201C 00AB 2190 2191 2192 2193
	1540	00B0 00B1 00B2 00B3 00D7 00B5 00B6 00B7 00F7 2019 201D 00BB 00BC 00BD 00BE 00BF
	1541	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1542	2015 00B9 00AE 00A9 2122 266A 00AC 00A6 XXXX XXXX XXXX XXXX 215B 215C 215D 215E
	1543	2126 00C6 0110 00AA 0126 XXXX 0132 013F 0141 00D8 0152 00BA 00DE 0166 014A 0149
	1544	0138 00E6 0111 00F0 0127 0131 0133 0140 0142 00F8 0153 00DF 00FE 0167 014B 00AD
	1545
	1546	The ITS character set, standardised in the SUPDUP protocol (RFC
	1547	734). Fills in the whole C0 control space and 7F with graphic
	1548	characters, on the basis that SUPDUP has its own out-of-band way to
	1549	do terminal control.
	1550
	1551	RFC 734 doesn't give exact Unicode code points for its extra
	1552	characters (it couldn't have done so without time travel, of
	1553	course!). I've tried to choose the best representation in each case.
	1554	In particular, I've chosen position 0x16 to be U+2297 CIRCLED TIMES
	1555	rather than U+24E7 CIRCLED LATIN SMALL LETTER X. The RFC says
	1556	'circle-X', but since it also has 'circle-plus' at position 0x0D
	1557	(which is unambiguously U+2295 CIRCLED PLUS), my feeling is that the
	1558	notation in the RFC was not intended to be especially precise, and
	1559	the likely uses of the two characters match, i.e. both are intended
	1560	to be mathematical rather than literal.
	1561
	1562	charset CS_ITS
	1563	00B7 2193 03B1 03B2 2227 00AC 03B5 03C0 03BB 03B3 03B4 2191 00B1 2295 221E 2202
	1564	2282 2283 2229 222A 2200 2203 2297 2194 2190 2192 2260 25CA 2264 2265 2261 2228
	1565	0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f
	1566	0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f
	1567	0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f
	1568	0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 005e 005f
	1569	0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f
	1570	0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 007d 007e 222b
	1571	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1572	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1573	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1574	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1575	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1576	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1577	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1578	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1579
	1580	The SAIL character set, used in the Stanford AI Lab's WAITS
	1581	operating system (a modified version of ITS).
	1582
	1583	Source: https://www.saildart.org/allow/sail-charset-utf8.html plus
	1584	some manual editing. The web page gives translations that supersede
	1585	ASCII for 00-1F, 5E, 5F, 7B-7F. The rest I've filled in with their
	1586	normal ASCII values, so in particular, CR, LF and tab still act as
	1587	expected.
	1588
	1589	Also, that web page lists code point 0C as "form feed as FF symbol",
	1590	but for some reason doesn't give the Unicode value U+240C for that.
	1591	I've filled it in.
	1592
	1593	charset CS_SAIL
	1594	0000 2193 03b1 03b2 2227 00ac 03b5 03c0 03bb 0009 000a 000b 240c 000d 221e 2202
	1595	2282 2283 2229 222a 2200 2203 2297 2194 005f 2192 007e 2260 2264 2265 2261 2228
	1596	0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f
	1597	0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f
	1598	0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f
	1599	0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 2191 2190
	1600	0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f
	1601	0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 2387 007d 2408
	1602	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1603	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1604	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1605	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1606	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1607	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1608	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
	1609	XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX

+11

-8

charset/sbcsgen.pl less more

64	64	while (<INFH>) {
65	65	chomp;
66	66	y/\r\n//; # robustness in the face of strange line endings
67		if (/^charset (.*)$/) {
68		$charsetname = $1;
	67	if (/^(charset\|tables) (.*)$/) {
	68	$tables_only = ($1 eq "tables");
	69	$charsetname = $2;
69	70	@vals = ();
70	71	@sortpriority = map { 0 } 0..255;
71	72	} elsif (/^sortpriority ([^-])-([^-]) (.*)$/) {

77	78	if (scalar @vals > 256) {
78	79	die "$infile:$.: charset $charsetname has more than 256 values\n";
79	80	} elsif (scalar @vals == 256) {
80		&outcharset($charsetname, \@vals, \@sortpriority)
	81	&outcharset($charsetname, \@vals, \@sortpriority, $tables_only)
81	82	if defined $outfile;
82		push @charsetnames, $charsetname;
	83	push @charsetnames, $charsetname unless $tables_only;
83	84	$charsetname = undef;
84	85	@vals = ();
85	86	@sortpriority = map { 0 } 0..255;

127	128	close HEADERFH;
128	129	}
129	130
130		sub outcharset($$$) {
131		my ($name, $vals, $sortpriority) = @_;
	131	sub outcharset($$$$) {
	132	my ($name, $vals, $sortpriority, $tables_only) = @_;
132	133	my ($prefix, $i, @sorted);
133	134
134	135	print "const sbcs_data sbcsdata_$name = {\n";

169	170	}
170	171	printf "\n },\n %d\n", $j;
171	172	print "};\n";
172		print "const charset_spec charset_$name = {\n" .
173		" $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
	173	unless ($tables_only) {
	174	print "const charset_spec charset_$name = {\n" .
	175	" $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
	176	}
174	177	}

+10

-10

charset/shiftjis.c less more

76	76	* charset_state.
77	77	*/
78	78
79		static int write_sjis(charset_spec const *charset, long int input_chr,
80		charset_state *state,
81		void (emit)(void ctx, long int output), void *emitctx)
	79	static bool write_sjis(charset_spec const *charset, long int input_chr,
	80	charset_state *state,
	81	void (emit)(void ctx, long int output), void *emitctx)
82	82	{
83	83	UNUSEDARG(charset);
84	84	UNUSEDARG(state);
85	85
86	86	if (input_chr == -1)
87		return TRUE; /* stateless; no cleanup required */
	87	return true; /* stateless; no cleanup required */
88	88
89	89	if (input_chr < 0x80 && input_chr != 0x5C && input_chr != 0x7E) {
90	90	emit(emitctx, input_chr);
91		return TRUE;
	91	return true;
92	92	} else if (input_chr == 0xA5) {
93	93	emit(emitctx, 0x5C);
94		return TRUE;
	94	return true;
95	95	} else if (input_chr == 0x203E) {
96	96	emit(emitctx, 0x7E);
97		return TRUE;
	97	return true;
98	98	} else if (input_chr >= 0xFF61 && input_chr <= 0xFF9F) {
99	99	emit(emitctx, input_chr - (0xFF61 - 0xA1));
100		return TRUE;
	100	return true;
101	101	} else {
102	102	int r, c;
103	103	if (unicode_to_jisx0208(input_chr, &r, &c)) {

109	109	if (c >= 0x7F) c++;
110	110	emit(emitctx, r);
111	111	emit(emitctx, c);
112		return TRUE;
	112	return true;
113	113	} else {
114		return FALSE;
	114	return false;
115	115	}
116	116	}
117	117	}

+4

-4

charset/slookup.c less more

5	5	#include "internal.h"
6	6
7	7	#define ENUM_CHARSET(x) extern charset_spec const charset_##x;
8		#include "enum.c"
	8	#include "enum.h"
9	9	#undef ENUM_CHARSET
10	10
11	11	static charset_spec const *const cs_table[] = {
12	12
13	13	#define ENUM_CHARSET(x) &charset_##x,
14		#include "enum.c"
	14	#include "enum.h"
15	15	#undef ENUM_CHARSET
16	16
17	17	};

27	27	return NULL;
28	28	}
29	29
30		int charset_exists(int charset)
	30	bool charset_exists(int charset)
31	31	{
32	32	return charset_find_spec(charset) != NULL;
33	33	}
34	34
35		int charset_is_single_byte(int charset)
	35	bool charset_is_single_byte(int charset)
36	36	{
37	37	charset_spec const *spec = charset_find_spec(charset);
38	38	return spec && spec->read == read_sbcs;

+3

-3

charset/superset.c less more

52	52	}
53	53
54	54	/*
55		* This function returns TRUE if the input charset is a vaguely
56		* sensible superset of ASCII. That is, it returns FALSE for 7-bit
	55	* This function returns true if the input charset is a vaguely
	56	* sensible superset of ASCII. That is, it returns false for 7-bit
57	57	* encoding formats such as HZ and UTF-7.
58	58	*/
59		int charset_contains_ascii(int charset)
	59	bool charset_contains_ascii(int charset)
60	60	{
61	61	return (charset != CS_HZ &&
62	62	charset != CS_UTF7 &&

+3

-3

charset/toucs.c less more

10	10	int writtenlen;
11	11	const wchar_t *errstr;
12	12	int errlen;
13		int stopped;
	13	bool stopped;
14	14	};
15	15
16	16	static void unicode_emit(void *ctx, long int output)

45	45	param->writtenlen++;
46	46	}
47	47	} else {
48		param->stopped = 1;
	48	param->stopped = true;
49	49	}
50	50	}
51	51

63	63	param.errstr = errstr;
64	64	param.errlen = errlen;
65	65	param.writtenlen = 0;
66		param.stopped = 0;
	66	param.stopped = false;
67	67
68	68	if (state)
69	69	localstate = state; / structure copy */

+23

-11

charset/utf16.c less more

8	8
9	9	struct utf16 {
10	10	int s0; /* initial value of state->s0 */
	11	bool output_bom;
11	12	};
12	13
13	14	static void read_utf16(charset_spec const *charset, long int input_chr,

153	154	}
154	155	}
155	156
156		static int write_utf16(charset_spec const *charset, long int input_chr,
157		charset_state *state,
158		void (emit)(void ctx, long int output),
159		void *emitctx)
	157	static bool write_utf16(charset_spec const *charset, long int input_chr,
	158	charset_state *state,
	159	void (emit)(void ctx, long int output),
	160	void *emitctx)
160	161	{
161	162	struct utf16 const utf = (struct utf16 )charset->data;
162	163

167	168	*/
168	169
169	170	if (input_chr < 0)
170		return TRUE; /* no cleanup required */
	171	return true; /* no cleanup required */
171	172
172	173	if ((input_chr >= 0xD800 && input_chr < 0xE000) \|\|
173	174	input_chr >= 0x110000) {
174	175	/*
175	176	* We can't output surrogates, or anything above 0x10FFFF.
176	177	*/
177		return FALSE;
	178	return false;
178	179	}
179	180
180	181	if (!state->s0) {
181	182	state->s0 = 1;
182		emithl(emit, emitctx, utf->s0, 0xFEFF);
	183	if (utf->output_bom)
	184	emithl(emit, emitctx, utf->s0, 0xFEFF);
183	185	}
184	186
185	187	if (input_chr < 0x10000) {

190	192	emithl(emit, emitctx, utf->s0, 0xD800 \| ((input_chr >> 10) & 0x3FF));
191	193	emithl(emit, emitctx, utf->s0, 0xDC00 \| (input_chr & 0x3FF));
192	194	}
193		return TRUE;
	195	return true;
194	196	}
195	197
196		static const struct utf16 utf16_bigendian = { 0x20000 };
197		static const struct utf16 utf16_littleendian = { 0x10000 };
198		static const struct utf16 utf16_variable_endianness = { 0x30000 };
	198	static const struct utf16 utf16_bigendian = { 0x20000, true };
	199	static const struct utf16 utf16_littleendian = { 0x10000, true };
	200	static const struct utf16 utf16_bigendian_no_bom = { 0x20000, false };
	201	static const struct utf16 utf16_littleendian_no_bom = { 0x10000, false };
	202	static const struct utf16 utf16_variable_endianness = { 0x30000, true };
199	203
200	204	const charset_spec charset_CS_UTF16BE = {
201	205	CS_UTF16BE, read_utf16, write_utf16, &utf16_bigendian
202	206	};
203	207	const charset_spec charset_CS_UTF16LE = {
204	208	CS_UTF16LE, read_utf16, write_utf16, &utf16_littleendian
	209	};
	210	const charset_spec charset_CS_UTF16BE_NO_BOM = {
	211	CS_UTF16BE_NO_BOM, read_utf16, write_utf16, &utf16_bigendian_no_bom
	212	};
	213	const charset_spec charset_CS_UTF16LE_NO_BOM = {
	214	CS_UTF16LE_NO_BOM, read_utf16, write_utf16, &utf16_littleendian_no_bom
205	215	};
206	216	const charset_spec charset_CS_UTF16 = {
207	217	CS_UTF16, read_utf16, write_utf16, &utf16_variable_endianness

212	222	ENUM_CHARSET(CS_UTF16)
213	223	ENUM_CHARSET(CS_UTF16BE)
214	224	ENUM_CHARSET(CS_UTF16LE)
	225	ENUM_CHARSET(CS_UTF16BE_NO_BOM)
	226	ENUM_CHARSET(CS_UTF16LE_NO_BOM)
215	227
216	228	#endif /* ENUM_CHARSETS */

+8

-8

charset/utf7.c less more

164	164	* which will directly encode Set O characters and the other of
165	165	* which will cautiously base64 them.
166	166	*/
167		static int write_utf7(charset_spec const *charset, long int input_chr,
168		charset_state *state,
169		void (emit)(void ctx, long int output),
170		void *emitctx)
	167	static bool write_utf7(charset_spec const *charset, long int input_chr,
	168	charset_state *state,
	169	void (emit)(void ctx, long int output),
	170	void *emitctx)
171	171	{
172	172	unsigned long hws[2];
173	173	int nhws;

184	184	/*
185	185	* We can't output surrogates, or anything above 0x10FFFF.
186	186	*/
187		return FALSE;
	187	return false;
188	188	}
189	189
190	190	/*

222	222	emit(emitctx, input_chr);
223	223	if (input_chr == '+')
224	224	emit(emitctx, '-'); /* +- encodes + */
225		return TRUE;
	225	return true;
226	226	}
227	227
228	228	/*

237	237	input_chr -= 0x10000;
238	238	if (input_chr >= 0x100000) {
239	239	/* Anything above 0x10FFFF is outside UTF-7 range. */
240		return FALSE;
	240	return false;
241	241	}
242	242
243	243	nhws = 2;

275	275	emit(emitctx, base64_chars[out]);
276	276	}
277	277	}
278		return TRUE;
	278	return true;
279	279	}
280	280
281	281	const charset_spec charset_CS_UTF7 = {

+224

-224

charset/utf8.c less more

198	198	* charset_state.
199	199	*/
200	200
201		int write_utf8(charset_spec const *charset, long int input_chr,
202		charset_state *state,
203		void (emit)(void ctx, long int output),
204		void *emitctx)
	201	bool write_utf8(charset_spec const *charset, long int input_chr,
	202	charset_state *state,
	203	void (emit)(void ctx, long int output),
	204	void *emitctx)
205	205	{
206	206	UNUSEDARG(charset);
207	207	UNUSEDARG(state);
208	208
209	209	if (input_chr == -1)
210		return TRUE; /* stateless; no cleanup required */
	210	return true; /* stateless; no cleanup required */
211	211
212	212	/*
213	213	* Refuse to output any illegal code points.
214	214	*/
215	215	if (input_chr == 0xFFFE \|\| input_chr == 0xFFFF \|\|
216	216	(input_chr >= 0xD800 && input_chr < 0xE000)) {
217		return FALSE;
	217	return false;
218	218	} else if (input_chr < 0x80) { /* one-byte character */
219	219	emit(emitctx, input_chr);
220		return TRUE;
	220	return true;
221	221	} else if (input_chr < 0x800) { /* two-byte character */
222	222	emit(emitctx, 0xC0 \| (0x1F & (input_chr >> 6)));
223	223	emit(emitctx, 0x80 \| (0x3F & (input_chr )));
224		return TRUE;
	224	return true;
225	225	} else if (input_chr < 0x10000) { /* three-byte character */
226	226	emit(emitctx, 0xE0 \| (0x0F & (input_chr >> 12)));
227	227	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 6)));
228	228	emit(emitctx, 0x80 \| (0x3F & (input_chr )));
229		return TRUE;
	229	return true;
230	230	} else if (input_chr < 0x200000) { /* four-byte character */
231	231	emit(emitctx, 0xF0 \| (0x07 & (input_chr >> 18)));
232	232	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 12)));
233	233	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 6)));
234	234	emit(emitctx, 0x80 \| (0x3F & (input_chr )));
235		return TRUE;
	235	return true;
236	236	} else if (input_chr < 0x4000000) {/* five-byte character */
237	237	emit(emitctx, 0xF8 \| (0x03 & (input_chr >> 24)));
238	238	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 18)));
239	239	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 12)));
240	240	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 6)));
241	241	emit(emitctx, 0x80 \| (0x3F & (input_chr )));
242		return TRUE;
	242	return true;
243	243	} else { /* six-byte character */
244	244	emit(emitctx, 0xFC \| (0x01 & (input_chr >> 30)));
245	245	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 24)));

247	247	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 12)));
248	248	emit(emitctx, 0x80 \| (0x3F & (input_chr >> 6)));
249	249	emit(emitctx, 0x80 \| (0x3F & (input_chr )));
250		return TRUE;
	250	return true;
251	251	}
252	252	}
253	253

289	289	}
290	290	if (l != str[i]) {
291	291	printf("%d: char %d came out as %08x, should be %08x\n",
292		line, i, str[i], l);
	292	line, i, str[i], (unsigned)l);
293	293	total_errs++;
294	294	}
295	295	}

330	330	}
331	331	if (l != str[i]) {
332	332	printf("%d: char %d came out as %08x, should be %08x\n",
333		line, i, str[i], l);
	333	line, i, str[i], (unsigned)l);
334	334	total_errs++;
335	335	}
336	336	}

351	351	{
352	352	printf("read tests beginning\n");
353	353	utf8_read_test(TESTSTR("\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"),
354		0x000003BA, /* GREEK SMALL LETTER KAPPA */
355		0x00001F79, /* GREEK SMALL LETTER OMICRON WITH OXIA */
356		0x000003C3, /* GREEK SMALL LETTER SIGMA */
357		0x000003BC, /* GREEK SMALL LETTER MU */
358		0x000003B5, /* GREEK SMALL LETTER EPSILON */
359		0, -1);
	354	0x000003BAL, /* GREEK SMALL LETTER KAPPA */
	355	0x00001F79L, /* GREEK SMALL LETTER OMICRON WITH OXIA */
	356	0x000003C3L, /* GREEK SMALL LETTER SIGMA */
	357	0x000003BCL, /* GREEK SMALL LETTER MU */
	358	0x000003B5L, /* GREEK SMALL LETTER EPSILON */
	359	0L, -1L);
360	360	utf8_read_test(TESTSTR("\x00"),
361		0x00000000, /* <control> */
362		0, -1);
	361	0x00000000L, /* <control> */
	362	0L, -1L);
363	363	utf8_read_test(TESTSTR("\xC2\x80"),
364		0x00000080, /* <control> */
365		0, -1);
	364	0x00000080L, /* <control> */
	365	0L, -1L);
366	366	utf8_read_test(TESTSTR("\xE0\xA0\x80"),
367		0x00000800, /* <no name available> */
368		0, -1);
	367	0x00000800L, /* <no name available> */
	368	0L, -1L);
369	369	utf8_read_test(TESTSTR("\xF0\x90\x80\x80"),
370		0x00010000, /* <no name available> */
371		0, -1);
	370	0x00010000L, /* <no name available> */
	371	0L, -1L);
372	372	utf8_read_test(TESTSTR("\xF8\x88\x80\x80\x80"),
373		0x00200000, /* <no name available> */
374		0, -1);
	373	0x00200000L, /* <no name available> */
	374	0L, -1L);
375	375	utf8_read_test(TESTSTR("\xFC\x84\x80\x80\x80\x80"),
376		0x04000000, /* <no name available> */
377		0, -1);
	376	0x04000000L, /* <no name available> */
	377	0L, -1L);
378	378	utf8_read_test(TESTSTR("\x7F"),
379		0x0000007F, /* <control> */
380		0, -1);
	379	0x0000007FL, /* <control> */
	380	0L, -1L);
381	381	utf8_read_test(TESTSTR("\xDF\xBF"),
382		0x000007FF, /* <no name available> */
383		0, -1);
	382	0x000007FFL, /* <no name available> */
	383	0L, -1L);
384	384	utf8_read_test(TESTSTR("\xEF\xBF\xBD"),
385		0x0000FFFD, /* REPLACEMENT CHARACTER */
386		0, -1);
	385	0x0000FFFDL, /* REPLACEMENT CHARACTER */
	386	0L, -1L);
387	387	utf8_read_test(TESTSTR("\xEF\xBF\xBF"),
388	388	ERROR, /* <no name available> (invalid char) */
389		0, -1);
	389	0L, -1L);
390	390	utf8_read_test(TESTSTR("\xF7\xBF\xBF\xBF"),
391		0x001FFFFF, /* <no name available> */
392		0, -1);
	391	0x001FFFFFL, /* <no name available> */
	392	0L, -1L);
393	393	utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF\xBF"),
394		0x03FFFFFF, /* <no name available> */
395		0, -1);
	394	0x03FFFFFFL, /* <no name available> */
	395	0L, -1L);
396	396	utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF\xBF"),
397		0x7FFFFFFF, /* <no name available> */
398		0, -1);
	397	0x7FFFFFFFL, /* <no name available> */
	398	0L, -1L);
399	399	utf8_read_test(TESTSTR("\xED\x9F\xBF"),
400		0x0000D7FF, /* <no name available> */
401		0, -1);
	400	0x0000D7FFL, /* <no name available> */
	401	0L, -1L);
402	402	utf8_read_test(TESTSTR("\xEE\x80\x80"),
403		0x0000E000, /* <Private Use, First> */
404		0, -1);
	403	0x0000E000L, /* <Private Use, First> */
	404	0L, -1L);
405	405	utf8_read_test(TESTSTR("\xEF\xBF\xBD"),
406		0x0000FFFD, /* REPLACEMENT CHARACTER */
407		0, -1);
	406	0x0000FFFDL, /* REPLACEMENT CHARACTER */
	407	0L, -1L);
408	408	utf8_read_test(TESTSTR("\xF4\x8F\xBF\xBF"),
409		0x0010FFFF, /* <no name available> */
410		0, -1);
	409	0x0010FFFFL, /* <no name available> */
	410	0L, -1L);
411	411	utf8_read_test(TESTSTR("\xF4\x90\x80\x80"),
412		0x00110000, /* <no name available> */
413		0, -1);
	412	0x00110000L, /* <no name available> */
	413	0L, -1L);
414	414	utf8_read_test(TESTSTR("\x80"),
415	415	ERROR, /* (unexpected continuation byte) */
416		0, -1);
	416	0L, -1L);
417	417	utf8_read_test(TESTSTR("\xBF"),
418	418	ERROR, /* (unexpected continuation byte) */
419		0, -1);
	419	0L, -1L);
420	420	utf8_read_test(TESTSTR("\x80\xBF"),
421	421	ERROR, /* (unexpected continuation byte) */
422	422	ERROR, /* (unexpected continuation byte) */
423		0, -1);
	423	0L, -1L);
424	424	utf8_read_test(TESTSTR("\x80\xBF\x80"),
425	425	ERROR, /* (unexpected continuation byte) */
426	426	ERROR, /* (unexpected continuation byte) */
427	427	ERROR, /* (unexpected continuation byte) */
428		0, -1);
	428	0L, -1L);
429	429	utf8_read_test(TESTSTR("\x80\xBF\x80\xBF"),
430	430	ERROR, /* (unexpected continuation byte) */
431	431	ERROR, /* (unexpected continuation byte) */
432	432	ERROR, /* (unexpected continuation byte) */
433	433	ERROR, /* (unexpected continuation byte) */
434		0, -1);
	434	0L, -1L);
435	435	utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80"),
436	436	ERROR, /* (unexpected continuation byte) */
437	437	ERROR, /* (unexpected continuation byte) */
438	438	ERROR, /* (unexpected continuation byte) */
439	439	ERROR, /* (unexpected continuation byte) */
440	440	ERROR, /* (unexpected continuation byte) */
441		0, -1);
	441	0L, -1L);
442	442	utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF"),
443	443	ERROR, /* (unexpected continuation byte) */
444	444	ERROR, /* (unexpected continuation byte) */

446	446	ERROR, /* (unexpected continuation byte) */
447	447	ERROR, /* (unexpected continuation byte) */
448	448	ERROR, /* (unexpected continuation byte) */
449		0, -1);
	449	0L, -1L);
450	450	utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF\x80"),
451	451	ERROR, /* (unexpected continuation byte) */
452	452	ERROR, /* (unexpected continuation byte) */

455	455	ERROR, /* (unexpected continuation byte) */
456	456	ERROR, /* (unexpected continuation byte) */
457	457	ERROR, /* (unexpected continuation byte) */
458		0, -1);
	458	0L, -1L);
459	459	utf8_read_test(TESTSTR("\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"),
460	460	ERROR, /* (unexpected continuation byte) */
461	461	ERROR, /* (unexpected continuation byte) */

521	521	ERROR, /* (unexpected continuation byte) */
522	522	ERROR, /* (unexpected continuation byte) */
523	523	ERROR, /* (unexpected continuation byte) */
524		0, -1);
	524	0L, -1L);
525	525	utf8_read_test(TESTSTR("\xC0\x20\xC1\x20\xC2\x20\xC3\x20\xC4\x20\xC5\x20\xC6\x20\xC7\x20"),
526	526	ERROR, /* (incomplete sequence) */
527		0x00000020, /* SPACE */
528		ERROR, /* (incomplete sequence) */
529		0x00000020, /* SPACE */
530		ERROR, /* (incomplete sequence) */
531		0x00000020, /* SPACE */
532		ERROR, /* (incomplete sequence) */
533		0x00000020, /* SPACE */
534		ERROR, /* (incomplete sequence) */
535		0x00000020, /* SPACE */
536		ERROR, /* (incomplete sequence) */
537		0x00000020, /* SPACE */
538		ERROR, /* (incomplete sequence) */
539		0x00000020, /* SPACE */
540		ERROR, /* (incomplete sequence) */
541		0x00000020, /* SPACE */
542		0, -1);
	527	0x00000020L, /* SPACE */
	528	ERROR, /* (incomplete sequence) */
	529	0x00000020L, /* SPACE */
	530	ERROR, /* (incomplete sequence) */
	531	0x00000020L, /* SPACE */
	532	ERROR, /* (incomplete sequence) */
	533	0x00000020L, /* SPACE */
	534	ERROR, /* (incomplete sequence) */
	535	0x00000020L, /* SPACE */
	536	ERROR, /* (incomplete sequence) */
	537	0x00000020L, /* SPACE */
	538	ERROR, /* (incomplete sequence) */
	539	0x00000020L, /* SPACE */
	540	ERROR, /* (incomplete sequence) */
	541	0x00000020L, /* SPACE */
	542	0L, -1L);
543	543	utf8_read_test(TESTSTR("\xE0\x20\xE1\x20\xE2\x20\xE3\x20\xE4\x20\xE5\x20\xE6\x20\xE7\x20\xE8\x20\xE9\x20\xEA\x20\xEB\x20\xEC\x20\xED\x20\xEE\x20\xEF\x20"),
544	544	ERROR, /* (incomplete sequence) */
545		0x00000020, /* SPACE */
546		ERROR, /* (incomplete sequence) */
547		0x00000020, /* SPACE */
548		ERROR, /* (incomplete sequence) */
549		0x00000020, /* SPACE */
550		ERROR, /* (incomplete sequence) */
551		0x00000020, /* SPACE */
552		ERROR, /* (incomplete sequence) */
553		0x00000020, /* SPACE */
554		ERROR, /* (incomplete sequence) */
555		0x00000020, /* SPACE */
556		ERROR, /* (incomplete sequence) */
557		0x00000020, /* SPACE */
558		ERROR, /* (incomplete sequence) */
559		0x00000020, /* SPACE */
560		ERROR, /* (incomplete sequence) */
561		0x00000020, /* SPACE */
562		ERROR, /* (incomplete sequence) */
563		0x00000020, /* SPACE */
564		ERROR, /* (incomplete sequence) */
565		0x00000020, /* SPACE */
566		ERROR, /* (incomplete sequence) */
567		0x00000020, /* SPACE */
568		ERROR, /* (incomplete sequence) */
569		0x00000020, /* SPACE */
570		ERROR, /* (incomplete sequence) */
571		0x00000020, /* SPACE */
572		ERROR, /* (incomplete sequence) */
573		0x00000020, /* SPACE */
574		ERROR, /* (incomplete sequence) */
575		0x00000020, /* SPACE */
576		0, -1);
	545	0x00000020L, /* SPACE */
	546	ERROR, /* (incomplete sequence) */
	547	0x00000020L, /* SPACE */
	548	ERROR, /* (incomplete sequence) */
	549	0x00000020L, /* SPACE */
	550	ERROR, /* (incomplete sequence) */
	551	0x00000020L, /* SPACE */
	552	ERROR, /* (incomplete sequence) */
	553	0x00000020L, /* SPACE */
	554	ERROR, /* (incomplete sequence) */
	555	0x00000020L, /* SPACE */
	556	ERROR, /* (incomplete sequence) */
	557	0x00000020L, /* SPACE */
	558	ERROR, /* (incomplete sequence) */
	559	0x00000020L, /* SPACE */
	560	ERROR, /* (incomplete sequence) */
	561	0x00000020L, /* SPACE */
	562	ERROR, /* (incomplete sequence) */
	563	0x00000020L, /* SPACE */
	564	ERROR, /* (incomplete sequence) */
	565	0x00000020L, /* SPACE */
	566	ERROR, /* (incomplete sequence) */
	567	0x00000020L, /* SPACE */
	568	ERROR, /* (incomplete sequence) */
	569	0x00000020L, /* SPACE */
	570	ERROR, /* (incomplete sequence) */
	571	0x00000020L, /* SPACE */
	572	ERROR, /* (incomplete sequence) */
	573	0x00000020L, /* SPACE */
	574	ERROR, /* (incomplete sequence) */
	575	0x00000020L, /* SPACE */
	576	0L, -1L);
577	577	utf8_read_test(TESTSTR("\xF0\x20\xF1\x20\xF2\x20\xF3\x20\xF4\x20\xF5\x20\xF6\x20\xF7\x20"),
578	578	ERROR, /* (incomplete sequence) */
579		0x00000020, /* SPACE */
580		ERROR, /* (incomplete sequence) */
581		0x00000020, /* SPACE */
582		ERROR, /* (incomplete sequence) */
583		0x00000020, /* SPACE */
584		ERROR, /* (incomplete sequence) */
585		0x00000020, /* SPACE */
586		ERROR, /* (incomplete sequence) */
587		0x00000020, /* SPACE */
588		ERROR, /* (incomplete sequence) */
589		0x00000020, /* SPACE */
590		ERROR, /* (incomplete sequence) */
591		0x00000020, /* SPACE */
592		ERROR, /* (incomplete sequence) */
593		0x00000020, /* SPACE */
594		0, -1);
	579	0x00000020L, /* SPACE */
	580	ERROR, /* (incomplete sequence) */
	581	0x00000020L, /* SPACE */
	582	ERROR, /* (incomplete sequence) */
	583	0x00000020L, /* SPACE */
	584	ERROR, /* (incomplete sequence) */
	585	0x00000020L, /* SPACE */
	586	ERROR, /* (incomplete sequence) */
	587	0x00000020L, /* SPACE */
	588	ERROR, /* (incomplete sequence) */
	589	0x00000020L, /* SPACE */
	590	ERROR, /* (incomplete sequence) */
	591	0x00000020L, /* SPACE */
	592	ERROR, /* (incomplete sequence) */
	593	0x00000020L, /* SPACE */
	594	0L, -1L);
595	595	utf8_read_test(TESTSTR("\xF8\x20\xF9\x20\xFA\x20\xFB\x20"),
596	596	ERROR, /* (incomplete sequence) */
597		0x00000020, /* SPACE */
598		ERROR, /* (incomplete sequence) */
599		0x00000020, /* SPACE */
600		ERROR, /* (incomplete sequence) */
601		0x00000020, /* SPACE */
602		ERROR, /* (incomplete sequence) */
603		0x00000020, /* SPACE */
604		0, -1);
	597	0x00000020L, /* SPACE */
	598	ERROR, /* (incomplete sequence) */
	599	0x00000020L, /* SPACE */
	600	ERROR, /* (incomplete sequence) */
	601	0x00000020L, /* SPACE */
	602	ERROR, /* (incomplete sequence) */
	603	0x00000020L, /* SPACE */
	604	0L, -1L);
605	605	utf8_read_test(TESTSTR("\xFC\x20\xFD\x20"),
606	606	ERROR, /* (incomplete sequence) */
607		0x00000020, /* SPACE */
608		ERROR, /* (incomplete sequence) */
609		0x00000020, /* SPACE */
610		0, -1);
	607	0x00000020L, /* SPACE */
	608	ERROR, /* (incomplete sequence) */
	609	0x00000020L, /* SPACE */
	610	0L, -1L);
611	611	utf8_read_test(TESTSTR("\xC0"),
612	612	ERROR, /* (incomplete sequence) */
613		0, -1);
	613	0L, -1L);
614	614	utf8_read_test(TESTSTR("\xE0\x80"),
615	615	ERROR, /* (incomplete sequence) */
616		0, -1);
	616	0L, -1L);
617	617	utf8_read_test(TESTSTR("\xF0\x80\x80"),
618	618	ERROR, /* (incomplete sequence) */
619		0, -1);
	619	0L, -1L);
620	620	utf8_read_test(TESTSTR("\xF8\x80\x80\x80"),
621	621	ERROR, /* (incomplete sequence) */
622		0, -1);
	622	0L, -1L);
623	623	utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80"),
624	624	ERROR, /* (incomplete sequence) */
625		0, -1);
	625	0L, -1L);
626	626	utf8_read_test(TESTSTR("\xDF"),
627	627	ERROR, /* (incomplete sequence) */
628		0, -1);
	628	0L, -1L);
629	629	utf8_read_test(TESTSTR("\xEF\xBF"),
630	630	ERROR, /* (incomplete sequence) */
631		0, -1);
	631	0L, -1L);
632	632	utf8_read_test(TESTSTR("\xF7\xBF\xBF"),
633	633	ERROR, /* (incomplete sequence) */
634		0, -1);
	634	0L, -1L);
635	635	utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF"),
636	636	ERROR, /* (incomplete sequence) */
637		0, -1);
	637	0L, -1L);
638	638	utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF"),
639	639	ERROR, /* (incomplete sequence) */
640		0, -1);
	640	0L, -1L);
641	641	utf8_read_test(TESTSTR("\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF"),
642	642	ERROR, /* (incomplete sequence) */
643	643	ERROR, /* (incomplete sequence) */

649	649	ERROR, /* (incomplete sequence) */
650	650	ERROR, /* (incomplete sequence) */
651	651	ERROR, /* (incomplete sequence) */
652		0, -1);
	652	0L, -1L);
653	653	utf8_read_test(TESTSTR("\xFE"),
654	654	ERROR, /* (invalid UTF-8 byte) */
655		0, -1);
	655	0L, -1L);
656	656	utf8_read_test(TESTSTR("\xFF"),
657	657	ERROR, /* (invalid UTF-8 byte) */
658		0, -1);
	658	0L, -1L);
659	659	utf8_read_test(TESTSTR("\xFE\xFE\xFF\xFF"),
660	660	ERROR, /* (invalid UTF-8 byte) */
661	661	ERROR, /* (invalid UTF-8 byte) */
662	662	ERROR, /* (invalid UTF-8 byte) */
663	663	ERROR, /* (invalid UTF-8 byte) */
664		0, -1);
	664	0L, -1L);
665	665	utf8_read_test(TESTSTR("\xC0\xAF"),
666	666	ERROR, /* SOLIDUS (overlong form of 2F) */
667		0, -1);
	667	0L, -1L);
668	668	utf8_read_test(TESTSTR("\xE0\x80\xAF"),
669	669	ERROR, /* SOLIDUS (overlong form of 2F) */
670		0, -1);
	670	0L, -1L);
671	671	utf8_read_test(TESTSTR("\xF0\x80\x80\xAF"),
672	672	ERROR, /* SOLIDUS (overlong form of 2F) */
673		0, -1);
	673	0L, -1L);
674	674	utf8_read_test(TESTSTR("\xF8\x80\x80\x80\xAF"),
675	675	ERROR, /* SOLIDUS (overlong form of 2F) */
676		0, -1);
	676	0L, -1L);
677	677	utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\xAF"),
678	678	ERROR, /* SOLIDUS (overlong form of 2F) */
679		0, -1);
	679	0L, -1L);
680	680	utf8_read_test(TESTSTR("\xC1\xBF"),
681	681	ERROR, /* <control> (overlong form of 7F) */
682		0, -1);
	682	0L, -1L);
683	683	utf8_read_test(TESTSTR("\xE0\x9F\xBF"),
684	684	ERROR, /* <no name available> (overlong form of DF BF) */
685		0, -1);
	685	0L, -1L);
686	686	utf8_read_test(TESTSTR("\xF0\x8F\xBF\xBF"),
687	687	ERROR, /* <no name available> (overlong form of EF BF BF) (invalid char) */
688		0, -1);
	688	0L, -1L);
689	689	utf8_read_test(TESTSTR("\xF8\x87\xBF\xBF\xBF"),
690	690	ERROR, /* <no name available> (overlong form of F7 BF BF BF) */
691		0, -1);
	691	0L, -1L);
692	692	utf8_read_test(TESTSTR("\xFC\x83\xBF\xBF\xBF\xBF"),
693	693	ERROR, /* <no name available> (overlong form of FB BF BF BF BF) */
694		0, -1);
	694	0L, -1L);
695	695	utf8_read_test(TESTSTR("\xC0\x80"),
696	696	ERROR, /* <control> (overlong form of 00) */
697		0, -1);
	697	0L, -1L);
698	698	utf8_read_test(TESTSTR("\xE0\x80\x80"),
699	699	ERROR, /* <control> (overlong form of 00) */
700		0, -1);
	700	0L, -1L);
701	701	utf8_read_test(TESTSTR("\xF0\x80\x80\x80"),
702	702	ERROR, /* <control> (overlong form of 00) */
703		0, -1);
	703	0L, -1L);
704	704	utf8_read_test(TESTSTR("\xF8\x80\x80\x80\x80"),
705	705	ERROR, /* <control> (overlong form of 00) */
706		0, -1);
	706	0L, -1L);
707	707	utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\x80"),
708	708	ERROR, /* <control> (overlong form of 00) */
709		0, -1);
	709	0L, -1L);
710	710	utf8_read_test(TESTSTR("\xED\xA0\x80"),
711	711	ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */
712		0, -1);
	712	0L, -1L);
713	713	utf8_read_test(TESTSTR("\xED\xAD\xBF"),
714	714	ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */
715		0, -1);
	715	0L, -1L);
716	716	utf8_read_test(TESTSTR("\xED\xAE\x80"),
717	717	ERROR, /* <Private Use High Surrogate, First> (surrogate) */
718		0, -1);
	718	0L, -1L);
719	719	utf8_read_test(TESTSTR("\xED\xAF\xBF"),
720	720	ERROR, /* <Private Use High Surrogate, Last> (surrogate) */
721		0, -1);
	721	0L, -1L);
722	722	utf8_read_test(TESTSTR("\xED\xB0\x80"),
723	723	ERROR, /* <Low Surrogate, First> (surrogate) */
724		0, -1);
	724	0L, -1L);
725	725	utf8_read_test(TESTSTR("\xED\xBE\x80"),
726	726	ERROR, /* <no name available> (surrogate) */
727		0, -1);
	727	0L, -1L);
728	728	utf8_read_test(TESTSTR("\xED\xBF\xBF"),
729	729	ERROR, /* <Low Surrogate, Last> (surrogate) */
730		0, -1);
	730	0L, -1L);
731	731	utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xB0\x80"),
732	732	ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */
733	733	ERROR, /* <Low Surrogate, First> (surrogate) */
734		0, -1);
	734	0L, -1L);
735	735	utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xBF\xBF"),
736	736	ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */
737	737	ERROR, /* <Low Surrogate, Last> (surrogate) */
738		0, -1);
	738	0L, -1L);
739	739	utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xB0\x80"),
740	740	ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */
741	741	ERROR, /* <Low Surrogate, First> (surrogate) */
742		0, -1);
	742	0L, -1L);
743	743	utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xBF\xBF"),
744	744	ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */
745	745	ERROR, /* <Low Surrogate, Last> (surrogate) */
746		0, -1);
	746	0L, -1L);
747	747	utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xB0\x80"),
748	748	ERROR, /* <Private Use High Surrogate, First> (surrogate) */
749	749	ERROR, /* <Low Surrogate, First> (surrogate) */
750		0, -1);
	750	0L, -1L);
751	751	utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xBF\xBF"),
752	752	ERROR, /* <Private Use High Surrogate, First> (surrogate) */
753	753	ERROR, /* <Low Surrogate, Last> (surrogate) */
754		0, -1);
	754	0L, -1L);
755	755	utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xB0\x80"),
756	756	ERROR, /* <Private Use High Surrogate, Last> (surrogate) */
757	757	ERROR, /* <Low Surrogate, First> (surrogate) */
758		0, -1);
	758	0L, -1L);
759	759	utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xBF\xBF"),
760	760	ERROR, /* <Private Use High Surrogate, Last> (surrogate) */
761	761	ERROR, /* <Low Surrogate, Last> (surrogate) */
762		0, -1);
	762	0L, -1L);
763	763	utf8_read_test(TESTSTR("\xEF\xBF\xBE"),
764	764	ERROR, /* <no name available> (invalid char) */
765		0, -1);
	765	0L, -1L);
766	766	utf8_read_test(TESTSTR("\xEF\xBF\xBF"),
767	767	ERROR, /* <no name available> (invalid char) */
768		0, -1);
	768	0L, -1L);
769	769	printf("read tests completed\n");
770	770	printf("write tests beginning\n");
771	771	{
772	772	const static long str[] =
773	773	{0x03BAL, 0x1F79L, 0x03C3L, 0x03BCL, 0x03B5L, 0};
774	774	utf8_write_test(TESTSTR(str),
775		0xCE, 0xBA,
776		0xE1, 0xBD, 0xB9,
777		0xCF, 0x83,
778		0xCE, 0xBC,
779		0xCE, 0xB5,
780		0, -1);
	775	0xCEL, 0xBAL,
	776	0xE1L, 0xBDL, 0xB9L,
	777	0xCFL, 0x83L,
	778	0xCEL, 0xBCL,
	779	0xCEL, 0xB5L,
	780	0L, -1L);
781	781	}
782	782	{
783	783	const static long str[] = {0x0000L, 0};
784	784	utf8_write_test(TESTSTR(str),
785		0x00,
786		0, -1);
	785	0x00L,
	786	0L, -1L);
787	787	}
788	788	{
789	789	const static long str[] = {0x0080L, 0};
790	790	utf8_write_test(TESTSTR(str),
791		0xC2, 0x80,
792		0, -1);
	791	0xC2L, 0x80L,
	792	0L, -1L);
793	793	}
794	794	{
795	795	const static long str[] = {0x0800L, 0};
796	796	utf8_write_test(TESTSTR(str),
797		0xE0, 0xA0, 0x80,
798		0, -1);
	797	0xE0L, 0xA0L, 0x80L,
	798	0L, -1L);
799	799	}
800	800	{
801	801	const static long str[] = {0x00010000L, 0};
802	802	utf8_write_test(TESTSTR(str),
803		0xF0, 0x90, 0x80, 0x80,
804		0, -1);
	803	0xF0L, 0x90L, 0x80L, 0x80L,
	804	0L, -1L);
805	805	}
806	806	{
807	807	const static long str[] = {0x00200000L, 0};
808	808	utf8_write_test(TESTSTR(str),
809		0xF8, 0x88, 0x80, 0x80, 0x80,
810		0, -1);
	809	0xF8L, 0x88L, 0x80L, 0x80L, 0x80L,
	810	0L, -1L);
811	811	}
812	812	{
813	813	const static long str[] = {0x04000000L, 0};
814	814	utf8_write_test(TESTSTR(str),
815		0xFC, 0x84, 0x80, 0x80, 0x80, 0x80,
816		0, -1);
	815	0xFCL, 0x84L, 0x80L, 0x80L, 0x80L, 0x80L,
	816	0L, -1L);
817	817	}
818	818	{
819	819	const static long str[] = {0x007FL, 0};
820	820	utf8_write_test(TESTSTR(str),
821		0x7F,
822		0, -1);
	821	0x7FL,
	822	0L, -1L);
823	823	}
824	824	{
825	825	const static long str[] = {0x07FFL, 0};
826	826	utf8_write_test(TESTSTR(str),
827		0xDF, 0xBF,
828		0, -1);
	827	0xDFL, 0xBFL,
	828	0L, -1L);
829	829	}
830	830	{
831	831	const static long str[] = {0xFFFDL, 0};
832	832	utf8_write_test(TESTSTR(str),
833		0xEF, 0xBF, 0xBD,
834		0, -1);
	833	0xEFL, 0xBFL, 0xBDL,
	834	0L, -1L);
835	835	}
836	836	{
837	837	const static long str[] = {0xFFFFL, 0};
838	838	utf8_write_test(TESTSTR(str),
839	839	ERROR,
840		0, -1);
	840	0L, -1L);
841	841	}
842	842	{
843	843	const static long str[] = {0x001FFFFFL, 0};
844	844	utf8_write_test(TESTSTR(str),
845		0xF7, 0xBF, 0xBF, 0xBF,
846		0, -1);
	845	0xF7L, 0xBFL, 0xBFL, 0xBFL,
	846	0L, -1L);
847	847	}
848	848	{
849	849	const static long str[] = {0x03FFFFFFL, 0};
850	850	utf8_write_test(TESTSTR(str),
851		0xFB, 0xBF, 0xBF, 0xBF, 0xBF,
852		0, -1);
	851	0xFBL, 0xBFL, 0xBFL, 0xBFL, 0xBFL,
	852	0L, -1L);
853	853	}
854	854	{
855	855	const static long str[] = {0x7FFFFFFFL, 0};
856	856	utf8_write_test(TESTSTR(str),
857		0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF,
858		0, -1);
	857	0xFDL, 0xBFL, 0xBFL, 0xBFL, 0xBFL, 0xBFL,
	858	0L, -1L);
859	859	}
860	860	{
861	861	const static long str[] = {0xD7FFL, 0};
862	862	utf8_write_test(TESTSTR(str),
863		0xED, 0x9F, 0xBF,
864		0, -1);
	863	0xEDL, 0x9FL, 0xBFL,
	864	0L, -1L);
865	865	}
866	866	{
867	867	const static long str[] = {0xD800L, 0};
868	868	utf8_write_test(TESTSTR(str),
869	869	ERROR,
870		0, -1);
	870	0L, -1L);
871	871	}
872	872	{
873	873	const static long str[] = {0xD800L, 0xDC00L, 0};
874	874	utf8_write_test(TESTSTR(str),
875	875	ERROR,
876	876	ERROR,
877		0, -1);
	877	0L, -1L);
878	878	}
879	879	{
880	880	const static long str[] = {0xDFFFL, 0};
881	881	utf8_write_test(TESTSTR(str),
882	882	ERROR,
883		0, -1);
	883	0L, -1L);
884	884	}
885	885	{
886	886	const static long str[] = {0xE000L, 0};
887	887	utf8_write_test(TESTSTR(str),
888		0xEE, 0x80, 0x80,
889		0, -1);
	888	0xEEL, 0x80L, 0x80L,
	889	0L, -1L);
890	890	}
891	891	printf("write tests completed\n");
892	892

+9

-9

contents.c less more

10	10	struct numberstate_Tag {
11	11	int chapternum;
12	12	int appendixnum;
13		int ischapter;
	13	bool ischapter;
14	14	int *sectionlevels;
15	15	paragraph **currentsects;
16	16	paragraph *lastsect;

27	27	numberstate *ret = snew(numberstate);
28	28	ret->chapternum = 0;
29	29	ret->appendixnum = -1;
30		ret->ischapter = 1;
	30	ret->ischapter = true;
31	31	ret->oklevel = -1; /* not even in a chapter yet */
32	32	ret->maxsectlevel = 32;
33	33	ret->sectionlevels = snewn(ret->maxsectlevel, int);

52	52	mnewword->type = word_Normal;
53	53	mnewword->alt = NULL;
54	54	mnewword->next = NULL;
55		mnewword->breaks = FALSE;
	55	mnewword->breaks = false;
56	56	mnewword->aux = 0;
57	57	**wret = mnewword;
58	58	*wret = &mnewword->next;

64	64	mnewword->type = word_WhiteSpace;
65	65	mnewword->alt = NULL;
66	66	mnewword->next = NULL;
67		mnewword->breaks = FALSE;
	67	mnewword->breaks = false;
68	68	mnewword->aux = 0;
69	69	**wret = mnewword;
70	70	*wret = &mnewword->next;

128	128	}
129	129
130	130	word number_mktext(numberstate state, paragraph p, wchar_t category,
131		int prev, int errflag) {
	131	int prev, bool errflag, errorstate *es) {
132	132	word *ret = NULL;
133	133	word **ret2 = &ret;
134	134	word **pret = &ret;

149	149	dospace(&pret);
150	150	ret2 = pret;
151	151	donumber(&pret, state->chapternum);
152		state->ischapter = 1;
	152	state->ischapter = true;
153	153	state->oklevel = 0;
154	154	level = -1;
155	155	break;

157	157	case para_Subsect:
158	158	level = (p->type == para_Heading ? 0 : p->aux);
159	159	if (level > state->oklevel) {
160		err_sectjump(&p->fpos);
161		*errflag = TRUE;
	160	err_sectjump(es, &p->fpos);
	161	*errflag = true;
162	162	ret = NULL;
163	163	break;
164	164	}

193	193	dospace(&pret);
194	194	ret2 = pret;
195	195	doanumber(&pret, state->appendixnum);
196		state->ischapter = 0;
	196	state->ischapter = false;
197	197	state->oklevel = 0;
198	198	level = -1;
199	199	break;

+27

-25

deflate.c less more

331	331	int outlen, outsize;
332	332	unsigned long outbits;
333	333	int noutbits;
334		int firstblock;
	334	bool firstblock;
335	335	unsigned long *syms;
336	336	int symstart, nsyms;
337	337	int type;
338	338	unsigned long checksum;
339	339	unsigned long datasize;
340		int lastblock;
341		int finished;
	340	bool lastblock;
	341	bool finished;
342	342	unsigned char static_len1[288], static_len2[30];
343	343	int static_code1[288], static_code2[30];
344	344	struct huftrees sht;

434	434	int treesyms[286 + 30];
435	435	int codelen[19];
436	436	int i, ntreesrc, ntreesyms;
437		int dynamic, blklen;
	437	bool dynamic;
	438	int blklen;
438	439	struct huftrees dht;
439	440	const struct huftrees *ht;
440	441	#ifdef STATISTICS

962	963	out = snew(deflate_compress_ctx);
963	964	out->type = type;
964	965	out->outbits = out->noutbits = 0;
965		out->firstblock = TRUE;
	966	out->firstblock = true;
966	967	#ifdef STATISTICS
967	968	out->bitcount = 0;
968	969	#endif

972	973
973	974	out->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
974	975	out->datasize = 0;
975		out->lastblock = FALSE;
976		out->finished = FALSE;
	976	out->lastblock = false;
	977	out->finished = false;
977	978
978	979	/*
979	980	* Build the static Huffman tables now, so we'll have them

1059	1060	outbits(out, 0xFF02, 16); /* xflags, OS */
1060	1061	break;
1061	1062	}
1062		out->firstblock = FALSE;
	1063	out->firstblock = false;
1063	1064	}
1064	1065
1065	1066	/*
1066	1067	* Feed our data to the LZ77 compression phase.
1067	1068	*/
1068		lz77_compress(ectx, block, len, TRUE);
	1069	lz77_compress(ectx, block, len, true);
1069	1070
1070	1071	/*
1071	1072	* Update checksums and counters.

1111	1112	/*
1112	1113	* Output a block with BFINAL set.
1113	1114	*/
1114		out->lastblock = TRUE;
	1115	out->lastblock = true;
1115	1116	flushblock(out);
1116	1117
1117	1118	/*

1142	1143	break;
1143	1144	}
1144	1145
1145		out->finished = TRUE;
	1146	out->finished = true;
1146	1147	break;
1147	1148	}
1148	1149

1313	1314	CRC1, CRC2, ILEN1, ILEN2,
1314	1315	FINALSPIN
1315	1316	} state;
1316		int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
1317		lenrep, lastblock;
	1317	int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep;
	1318	bool lastblock;
1318	1319	int uncomplen;
1319	1320	unsigned char lenlen[19];
1320	1321	unsigned char lengths[286 + 32];

1365	1366	dctx->nbits = 0;
1366	1367	dctx->winpos = 0;
1367	1368	dctx->type = type;
1368		dctx->lastblock = FALSE;
	1369	dctx->lastblock = false;
1369	1370	dctx->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
1370	1371	dctx->bytesout = 0;
1371	1372	dctx->gzflags = dctx->gzextralen = 0;

1622	1623	goto finished; /* done all we can */
1623	1624	bfinal = dctx->bits & 1;
1624	1625	if (bfinal)
1625		dctx->lastblock = TRUE;
	1626	dctx->lastblock = true;
1626	1627	EATBITS(1);
1627	1628	btype = dctx->bits & 3;
1628	1629	EATBITS(2);

2029	2030	int ret, err, outlen;
2030	2031	deflate_decompress_ctx *dhandle;
2031	2032	deflate_compress_ctx *chandle;
2032		int type = DEFLATE_TYPE_ZLIB, opts = TRUE;
2033		int compress = FALSE, decompress = FALSE;
2034		int got_arg = FALSE;
	2033	int type = DEFLATE_TYPE_ZLIB;
	2034	bool opts = true;
	2035	bool compress = false, decompress = false;
	2036	bool got_arg = false;
2035	2037	char *filename = NULL;
2036	2038	FILE *fp;
2037	2039
2038	2040	while (--argc) {
2039	2041	char p = ++argv;
2040	2042
2041		got_arg = TRUE;
	2043	got_arg = true;
2042	2044
2043	2045	if (p[0] == '-' && opts) {
2044	2046	if (!strcmp(p, "-b"))

2046	2048	else if (!strcmp(p, "-g"))
2047	2049	type = DEFLATE_TYPE_GZIP;
2048	2050	else if (!strcmp(p, "-c"))
2049		compress = TRUE;
	2051	compress = true;
2050	2052	else if (!strcmp(p, "-d"))
2051		decompress = TRUE;
	2053	decompress = true;
2052	2054	else if (!strcmp(p, "-a"))
2053		analyse_level++, decompress = TRUE;
	2055	analyse_level++, decompress = true;
2054	2056	else if (!strcmp(p, "--"))
2055		opts = FALSE; /* next thing is filename */
	2057	opts = false; /* next thing is filename */
2056	2058	else {
2057	2059	fprintf(stderr, "unknown command line option '%s'\n", p);
2058	2060	return 1;

2158	2160	unsigned char buf[65536], outbuf, outbuf2;
2159	2161	int ret, err, outlen, outlen2;
2160	2162	int dlen = 0, clen = 0;
2161		int opts = TRUE;
	2163	int opts = true;
2162	2164
2163	2165	while (--argc) {
2164	2166	char p = ++argv;
2165	2167
2166	2168	if (p[0] == '-' && opts) {
2167	2169	if (!strcmp(p, "--"))
2168		opts = FALSE; /* next thing is filename */
	2170	opts = false; /* next thing is filename */
2169	2171	else {
2170	2172	fprintf(stderr, "unknown command line option '%s'\n", p);
2171	2173	return 1;

+67

-0

doc/CMakeLists.txt less more

	0	# Halibut is used to build its own documentation. So we can only build
	1	# the documentation if we're not cross-compiling.
	2
	3	if(CMAKE_CROSSCOMPILING)
	4	message(WARNING "Not building the Halibut documentation in a cross-compile")
	5	else()
	6
	7	include(GNUInstallDirs)
	8
	9	set(HALIBUT $<TARGET_FILE:halibut>)
	10
	11	set(manual_sources
	12	${CMAKE_CURRENT_SOURCE_DIR}/blurb.but
	13	${CMAKE_CURRENT_SOURCE_DIR}/intro.but
	14	${CMAKE_CURRENT_SOURCE_DIR}/running.but
	15	${CMAKE_CURRENT_SOURCE_DIR}/input.but
	16	${CMAKE_CURRENT_SOURCE_DIR}/output.but
	17	${CMAKE_CURRENT_SOURCE_DIR}/licence.but
	18	${CMAKE_CURRENT_SOURCE_DIR}/manpage.but
	19	${CMAKE_CURRENT_SOURCE_DIR}/index.but
	20	)
	21
	22	# Do the manual build in a subdirectory, to avoid the install
	23	# command making a CMakeFiles directory in the output location.
	24	set(manual_dir ${CMAKE_CURRENT_BINARY_DIR}/manual)
	25	file(MAKE_DIRECTORY ${manual_dir})
	26	add_custom_target(manual ALL
	27	BYPRODUCTS
	28	${manual_dir}/index.html
	29	${manual_dir}/halibut.txt
	30	${manual_dir}/halibut.info
	31	${manual_dir}/halibut.ps
	32	${manual_dir}/halibut.pdf
	33	${manual_dir}/halibut.chm
	34	COMMAND
	35	${HALIBUT}
	36	--html
	37	--text=halibut.txt
	38	--info=halibut.info
	39	--ps=halibut.ps
	40	--pdf=halibut.pdf
	41	--chm=halibut.chm
	42	${manual_sources}
	43	WORKING_DIRECTORY ${manual_dir}
	44	DEPENDS halibut ${manual_sources})
	45
	46	add_custom_target(manpage ALL
	47	BYPRODUCTS
	48	halibut.1
	49	COMMAND
	50	${HALIBUT}
	51	--man=halibut.1
	52	${CMAKE_CURRENT_SOURCE_DIR}/manpage.but
	53	DEPENDS halibut ${CMAKE_CURRENT_SOURCE_DIR}/manpage.but)
	54
	55	install(DIRECTORY ${manual_dir}/
	56	DESTINATION ${CMAKE_INSTALL_DOCDIR}
	57	FILES_MATCHING PATTERN "*.html")
	58
	59	install(DIRECTORY ${manual_dir}/
	60	DESTINATION ${CMAKE_INSTALL_INFODIR}
	61	FILES_MATCHING PATTERN ".info")
	62
	63	install(FILES ${CMAKE_CURRENT_BINARY_DIR}/halibut.1
	64	DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
	65
	66	endif()

+0

-24

~~doc/Makefile~~ less more

0		mandir=$(prefix)/man
1		man1dir=$(mandir)/man1
2
3		CHAPTERS := $(SITE) blurb intro running input output licence manpage index
4
5		INPUTS = $(patsubst %,%.but,$(CHAPTERS))
6
7		HALIBUT = ../build/halibut
8
9		all: index.html halibut.1
10
11		index.html: $(INPUTS) $(HALIBUT)
12		$(HALIBUT) --text=halibut.txt --html --info=halibut.info \
13		--ps=halibut.ps --pdf=halibut.pdf --chm=halibut.chm $(INPUTS)
14
15		halibut.1: manpage.but
16		$(HALIBUT) --man=halibut.1 manpage.but
17
18		install:
19		mkdir -p $(man1dir)
20		$(INSTALL) -m 644 halibut.1 $(man1dir)/halibut.1
21
22		clean:
23		rm -f .html .txt .hlp .cnt .1 .info* .ps .pdf *.chm

+1

-1

doc/licence.but less more

0	0	\A{licence} Halibut Licence
1	1
2		Halibut is copyright (c) 1999-2017 Simon Tatham.
	2	Halibut is copyright (c) 1999-2021 Simon Tatham.
3	3
4	4	Permission is hereby granted, free of charge, to any person
5	5	obtaining a copy of this software and associated documentation files

+1

-1

doc/manpage.but less more

157	157
158	158	This man page isn't terribly complete.
159	159
160		\versionid Halibut version 1.2
	160	\versionid Halibut version 1.3

+176

-112

error.c less more

41	41	exit(EXIT_FAILURE);
42	42	}
43	43
44		void err_optnoarg(const char *sp)
45		{
	44	void err_optnoarg(errorstate es, const char sp)
	45	{
	46	es->fatal = true;
46	47	do_error(NULL, "option `-%s' requires an argument", sp);
47	48	}
48	49
49		void err_nosuchopt(const char *sp)
50		{
	50	void err_nosuchopt(errorstate es, const char sp)
	51	{
	52	es->fatal = true;
51	53	do_error(NULL, "unrecognised option `-%s'", sp);
52	54	}
53	55
54		void err_cmdcharset(const char *sp)
55		{
	56	void err_cmdcharset(errorstate es, const char sp)
	57	{
	58	es->fatal = true;
56	59	do_error(NULL, "character set `%s' not recognised", sp);
57	60	}
58	61
59		void err_futileopt(const char sp, const char sp2)
	62	void err_futileopt(errorstate es, const char sp, const char *sp2)
60	63	{
61	64	do_error(NULL, "warning: option `-%s' has no effect%s", sp, sp2);
62	65	}
63	66
64		void err_noinput(void)
65		{
	67	void err_noinput(errorstate *es)
	68	{
	69	es->fatal = true;
66	70	do_error(NULL, "no input files");
67	71	}
68	72
69		void err_cantopen(const char *sp)
70		{
	73	void err_cantopen(errorstate es, const char sp)
	74	{
	75	es->fatal = true;
71	76	do_error(NULL, "unable to open input file `%s'", sp);
72	77	}
73	78
74		void err_nodata(void)
75		{
	79	void err_nodata(errorstate *es)
	80	{
	81	es->fatal = true;
76	82	do_error(NULL, "no data in input files");
77	83	}
78	84
79		void err_brokencodepara(const filepos *fpos)
80		{
	85	void err_zerochar(errorstate es, const filepos fpos)
	86	{
	87	es->fatal = true;
	88	do_error(fpos, "the Unicode zero character is not permitted in input");
	89	}
	90
	91	void err_brokencodepara(errorstate es, const filepos fpos)
	92	{
	93	es->fatal = true;
81	94	do_error(fpos, "every line of a code paragraph should begin `\\c'");
82	95	}
83	96
84		void err_kwunclosed(const filepos *fpos)
85		{
	97	void err_kwunclosed(errorstate es, const filepos fpos)
	98	{
	99	es->fatal = true;
86	100	do_error(fpos, "expected `}' after paragraph keyword");
87	101	}
88	102
89		void err_kwexpected(const filepos *fpos)
90		{
	103	void err_kwexpected(errorstate es, const filepos fpos)
	104	{
	105	es->fatal = true;
91	106	do_error(fpos, "expected a paragraph keyword");
92	107	}
93	108
94		void err_kwillegal(const filepos *fpos)
95		{
	109	void err_kwillegal(errorstate es, const filepos fpos)
	110	{
	111	es->fatal = true;
96	112	do_error(fpos, "expected no paragraph keyword");
97	113	}
98	114
99		void err_kwtoomany(const filepos *fpos)
100		{
	115	void err_kwtoomany(errorstate es, const filepos fpos)
	116	{
	117	es->fatal = true;
101	118	do_error(fpos, "expected only one paragraph keyword");
102	119	}
103	120
104		void err_bodyillegal(const filepos *fpos)
105		{
	121	void err_bodyillegal(errorstate es, const filepos fpos)
	122	{
	123	es->fatal = true;
106	124	do_error(fpos, "expected no text after paragraph keyword");
107	125	}
108	126
109		void err_badparatype(const wchar_t wsp, const filepos fpos)
110		{
	127	void err_badparatype(errorstate es, const wchar_t wsp, const filepos *fpos)
	128	{
	129	es->fatal = true;
111	130	char *sp = utoa_locale_dup(wsp);
112	131	do_error(fpos, "command `%s' unrecognised at start of paragraph", sp);
113	132	sfree(sp);
114	133	}
115	134
116		void err_badmidcmd(const wchar_t wsp, const filepos fpos)
117		{
	135	void err_badmidcmd(errorstate es, const wchar_t wsp, const filepos *fpos)
	136	{
	137	es->fatal = true;
118	138	char *sp = utoa_locale_dup(wsp);
119	139	do_error(fpos, "command `%s' unexpected in mid-paragraph", sp);
120	140	sfree(sp);
121	141	}
122	142
123		void err_unexbrace(const filepos *fpos)
124		{
	143	void err_unexbrace(errorstate es, const filepos fpos)
	144	{
	145	es->fatal = true;
125	146	do_error(fpos, "brace character unexpected in mid-paragraph");
126	147	}
127	148
128		void err_explbr(const filepos *fpos)
129		{
	149	void err_explbr(errorstate es, const filepos fpos)
	150	{
	151	es->fatal = true;
130	152	do_error(fpos, "expected `{' after command");
131	153	}
132	154
133		void err_commenteof(const filepos *fpos)
134		{
	155	void err_commenteof(errorstate es, const filepos fpos)
	156	{
	157	es->fatal = true;
135	158	do_error(fpos, "end of file unexpected inside `\\#{...}' comment");
136	159	}
137	160
138		void err_kwexprbr(const filepos *fpos)
139		{
	161	void err_kwexprbr(errorstate es, const filepos fpos)
	162	{
	163	es->fatal = true;
140	164	do_error(fpos, "expected `}' after cross-reference");
141	165	}
142	166
143		void err_codequote(const filepos *fpos)
144		{
	167	void err_codequote(errorstate es, const filepos fpos)
	168	{
	169	es->fatal = true;
145	170	do_error(fpos, "unable to nest \\q{...} within \\c{...} or \\cw{...}");
146	171	}
147	172
148		void err_missingrbrace(const filepos *fpos)
149		{
	173	void err_missingrbrace(errorstate es, const filepos fpos)
	174	{
	175	es->fatal = true;
150	176	do_error(fpos, "unclosed braces at end of paragraph");
151	177	}
152	178
153		void err_missingrbrace2(const filepos *fpos)
154		{
	179	void err_missingrbrace2(errorstate es, const filepos fpos)
	180	{
	181	es->fatal = true;
155	182	do_error(fpos, "unclosed braces at end of input file");
156	183	}
157	184
158		void err_nestedstyles(const filepos *fpos)
159		{
	185	void err_nestedstyles(errorstate es, const filepos fpos)
	186	{
	187	es->fatal = true;
160	188	do_error(fpos, "unable to nest text styles");
161	189	}
162	190
163		void err_nestedindex(const filepos *fpos)
164		{
	191	void err_nestedindex(errorstate es, const filepos fpos)
	192	{
	193	es->fatal = true;
165	194	do_error(fpos, "unable to nest index markings");
166	195	}
167	196
168		void err_indexcase(const filepos fpos, const wchar_t wsp,
	197	void err_indexcase(errorstate es, const filepos fpos, const wchar_t *wsp,
169	198	const filepos fpos2, const wchar_t wsp2)
170	199	{
171	200	char sp = utoa_locale_dup(wsp), sp2 = utoa_locale_dup(wsp2);

176	205	sfree(sp2);
177	206	}
178	207
179		void err_nosuchkw(const filepos fpos, const wchar_t wsp)
180		{
	208	void err_nosuchkw(errorstate es, const filepos fpos, const wchar_t *wsp)
	209	{
	210	es->fatal = true;
181	211	char *sp = utoa_locale_dup(wsp);
182	212	do_error(fpos, "unable to resolve cross-reference to `%s'", sp);
183	213	sfree(sp);
184	214	}
185	215
186		void err_multiBR(const filepos fpos, const wchar_t wsp)
187		{
	216	void err_multiBR(errorstate es, const filepos fpos, const wchar_t *wsp)
	217	{
	218	es->fatal = true;
188	219	char *sp = utoa_locale_dup(wsp);
189	220	do_error(fpos, "multiple `\\BR' entries given for `%s'", sp);
190	221	sfree(sp);
191	222	}
192	223
193		void err_nosuchidxtag(const filepos fpos, const wchar_t wsp)
194		{
	224	void err_nosuchidxtag(errorstate es, const filepos fpos, const wchar_t *wsp)
	225	{
	226	es->fatal = true;
195	227	char *sp = utoa_locale_dup(wsp);
196	228	do_error(fpos, "`\\IM' on unknown index tag `%s'", sp);
197	229	sfree(sp);
198	230	}
199	231
200		void err_cantopenw(const char *sp)
201		{
	232	void err_cantopenw(errorstate es, const char sp)
	233	{
	234	es->fatal = true;
202	235	do_error(NULL, "unable to open output file `%s'", sp);
203	236	}
204	237
205		void err_macroexists(const filepos fpos, const wchar_t wsp)
206		{
	238	void err_macroexists(errorstate es, const filepos fpos, const wchar_t *wsp)
	239	{
	240	es->fatal = true;
207	241	char *sp = utoa_locale_dup(wsp);
208	242	do_error(fpos, "macro `%s' already defined", sp);
209	243	sfree(sp);
210	244	}
211	245
212		void err_sectjump(const filepos *fpos)
213		{
	246	void err_sectjump(errorstate es, const filepos fpos)
	247	{
	248	es->fatal = true;
214	249	do_error(fpos, "expected higher heading levels before this one");
215	250	}
216	251
217		void err_winhelp_ctxclash(const filepos fpos, const char sp, const char *sp2)
218		{
	252	void err_winhelp_ctxclash(errorstate es, const filepos fpos,
	253	const char sp, const char sp2)
	254	{
	255	es->fatal = true;
219	256	do_error(fpos, "Windows Help context id `%s' clashes with "
220	257	"previously defined `%s'", sp, sp2);
221	258	}
222	259
223		void err_multikw(const filepos fpos, const filepos fpos2, const wchar_t *wsp)
224		{
	260	void err_multikw(errorstate es, const filepos fpos, const filepos *fpos2,
	261	const wchar_t *wsp)
	262	{
	263	es->fatal = true;
225	264	char *sp = utoa_locale_dup(wsp);
226	265	do_error(fpos, "paragraph keyword `%s' already defined at %s:%d",
227	266	sp, fpos2->filename, fpos2->line);
228	267	sfree(sp);
229	268	}
230	269
231		void err_misplacedlcont(const filepos *fpos)
232		{
	270	void err_misplacedlcont(errorstate es, const filepos fpos)
	271	{
	272	es->fatal = true;
233	273	do_error(fpos, "\\lcont is only expected after a list item");
234	274	}
235	275
236		void err_sectmarkerinblock(const filepos fpos, const char sp)
237		{
	276	void err_sectmarkerinblock(errorstate es, const filepos fpos, const char *sp)
	277	{
	278	es->fatal = true;
238	279	do_error(fpos, "section headings are not supported within \\%s", sp);
239	280	}
240	281
241		void err_cfginsufarg(const filepos fpos, const char sp, int i)
242		{
	282	void err_cfginsufarg(errorstate es, const filepos fpos, const char *sp,
	283	int i)
	284	{
	285	es->fatal = true;
243	286	do_error(fpos, "\\cfg{%s} expects at least %d parameter%s",
244	287	sp, i, (i==1)?"":"s");
245	288	}
246	289
247		void err_infonodechar(const filepos fpos, char c) / fpos might be NULL */
248		{
	290	void err_infonodechar(errorstate es, const filepos fpos, char c)
	291	/* fpos might be NULL */
	292	{
	293	es->fatal = true;
249	294	do_error(fpos, "info output format does not support '%c' in"
250	295	" node names; removing", c);
251	296	}
252	297
253		void err_text_codeline(const filepos *fpos, int i, int j)
	298	void err_text_codeline(errorstate es, const filepos fpos, int i, int j)
254	299	{
255	300	do_error(fpos, "warning: code paragraph line is %d chars wide, wider"
256	301	" than body width %d", i, j);
257	302	}
258	303
259		void err_htmlver(const filepos fpos, const wchar_t wsp)
260		{
	304	void err_htmlver(errorstate es, const filepos fpos, const wchar_t *wsp)
	305	{
	306	es->fatal = true;
261	307	char *sp = utoa_locale_dup(wsp);
262	308	do_error(fpos, "unrecognised HTML version keyword `%s'", sp);
263	309	sfree(sp);
264	310	}
265	311
266		void err_charset(const filepos fpos, const wchar_t wsp)
267		{
	312	void err_charset(errorstate es, const filepos fpos, const wchar_t *wsp)
	313	{
	314	es->fatal = true;
268	315	char *sp = utoa_locale_dup(wsp);
269	316	do_error(fpos, "character set `%s' not recognised", sp);
270	317	sfree(sp);
271	318	}
272	319
273		void err_nofont(const filepos fpos, const wchar_t wsp)
274		{
	320	void err_nofont(errorstate es, const filepos fpos, const wchar_t *wsp)
	321	{
	322	es->fatal = true;
275	323	char *sp = utoa_locale_dup(wsp);
276	324	do_error(fpos, "font `%s' not recognised", sp);
277	325	sfree(sp);
278	326	}
279	327
280		void err_afmeof(const filepos *fpos)
281		{
	328	void err_afmeof(errorstate es, const filepos fpos)
	329	{
	330	es->fatal = true;
282	331	do_error(fpos, "AFM file ended unexpectedly");
283	332	}
284	333
285		void err_afmkey(const filepos fpos, const char sp)
286		{
	334	void err_afmkey(errorstate es, const filepos fpos, const char *sp)
	335	{
	336	es->fatal = true;
287	337	do_error(fpos, "required AFM key '%s' missing", sp);
288	338	}
289	339
290		void err_afmvers(const filepos *fpos)
291		{
	340	void err_afmvers(errorstate es, const filepos fpos)
	341	{
	342	es->fatal = true;
292	343	do_error(fpos, "unsupported AFM version");
293	344	}
294	345
295		void err_afmval(const filepos fpos, const char sp, int i)
296		{
	346	void err_afmval(errorstate es, const filepos fpos, const char *sp, int i)
	347	{
	348	es->fatal = true;
297	349	if (i == 1)
298	350	do_error(fpos, "AFM key '%s' requires a value", sp);
299	351	else
300	352	do_error(fpos, "AFM key '%s' requires %d values", sp, i);
301	353	}
302	354
303		void err_pfeof(const filepos *fpos)
304		{
	355	void err_pfeof(errorstate es, const filepos fpos)
	356	{
	357	es->fatal = true;
305	358	do_error(fpos, "Type 1 font file ended unexpectedly");
306	359	}
307	360
308		void err_pfhead(const filepos *fpos)
309		{
	361	void err_pfhead(errorstate es, const filepos fpos)
	362	{
	363	es->fatal = true;
310	364	do_error(fpos, "Type 1 font file header line invalid");
311	365	}
312	366
313		void err_pfbad(const filepos *fpos)
314		{
	367	void err_pfbad(errorstate es, const filepos fpos)
	368	{
	369	es->fatal = true;
315	370	do_error(fpos, "Type 1 font file invalid");
316	371	}
317	372
318		void err_pfnoafm(const filepos fpos, const char sp)
319		{
	373	void err_pfnoafm(errorstate es, const filepos fpos, const char *sp)
	374	{
	375	es->fatal = true;
320	376	do_error(fpos, "no metrics available for Type 1 font '%s'", sp);
321	377	}
322	378
323		void err_chmnames(void)
324		{
	379	void err_chmnames(errorstate *es)
	380	{
	381	es->fatal = true;
325	382	do_error(NULL, "only one of html-mshtmlhelp-chm and "
326	383	"html-mshtmlhelp-hhp found");
327	384	}
328	385
329		void err_sfntnotable(const filepos fpos, const char sp)
330		{
	386	void err_sfntnotable(errorstate es, const filepos fpos, const char *sp)
	387	{
	388	es->fatal = true;
331	389	do_error(fpos, "font has no '%s' table", sp);
332	390	}
333	391
334		void err_sfntnopsname(const filepos *fpos)
335		{
	392	void err_sfntnopsname(errorstate es, const filepos fpos)
	393	{
	394	es->fatal = true;
336	395	do_error(fpos, "font has no PostScript name");
337	396	}
338	397
339		void err_sfntbadtable(const filepos fpos, const char sp)
340		{
	398	void err_sfntbadtable(errorstate es, const filepos fpos, const char *sp)
	399	{
	400	es->fatal = true;
341	401	do_error(fpos, "font has an invalid '%s' table", sp);
342	402	}
343	403
344		void err_sfntnounicmap(const filepos *fpos)
345		{
	404	void err_sfntnounicmap(errorstate es, const filepos fpos)
	405	{
	406	es->fatal = true;
346	407	do_error(fpos, "font has no UCS-2 character map");
347	408	}
348	409
349		void err_sfnttablevers(const filepos fpos, const char sp)
350		{
	410	void err_sfnttablevers(errorstate es, const filepos fpos, const char *sp)
	411	{
	412	es->fatal = true;
351	413	do_error(fpos, "font has an unsupported '%s' table version", sp);
352	414	}
353	415
354		void err_sfntbadhdr(const filepos *fpos)
355		{
	416	void err_sfntbadhdr(errorstate es, const filepos fpos)
	417	{
	418	es->fatal = true;
356	419	do_error(fpos, "font has an invalid header");
357	420	}
358	421
359		void err_sfntbadglyph(const filepos *fpos, unsigned wc)
	422	void err_sfntbadglyph(errorstate es, const filepos fpos, unsigned wc)
360	423	{
361	424	do_error(fpos,
362	425	"warning: character U+%04X references a non-existent glyph",
363	426	wc);
364	427	}
365	428
366		void err_chm_badname(const filepos fpos, const char sp)
367		{
	429	void err_chm_badname(errorstate es, const filepos fpos, const char *sp)
	430	{
	431	es->fatal = true;
368	432	do_error(fpos, "CHM internal file name `%s' begins with"
369	433	" a reserved character", sp);
370	434	}

+119

-93

halibut.h less more

4	4	#include <wchar.h>
5	5	#include <time.h>
6	6	#include <string.h>
	7	#include <stdbool.h>
	8
	9	#ifdef BOOLIFY
	10	# include "boolify.h"
	11	#endif
7	12
8	13	#include "charset.h"
9	14

11	16	#define NORETURN __attribute__((__noreturn__))
12	17	#else
13	18	#define NORETURN /* nothing */
14		#endif
15
16		#ifndef TRUE
17		#define TRUE 1
18		#endif
19		#ifndef FALSE
20		#define FALSE 0
21	19	#endif
22	20
23	21	/* For suppressing unused-parameter warnings */

39	37	typedef struct indextag_Tag indextag;
40	38	typedef struct indexentry_Tag indexentry;
41	39	typedef struct macrostack_Tag macrostack;
	40	typedef struct errorstate_Tag errorstate;
	41	typedef struct psdata_Tag psdata;
42	42
43	43	/*
44	44	* Data structure to hold a file name and index, a line and a

61	61	int nfiles; /* how many in the list */
62	62	FILE currfp; / the currently open one */
63	63	int currindex; /* which one is that in the list */
64		int wantclose; /* does the current file want closing */
	64	bool wantclose; /* does the current file want closing */
65	65	pushback pushback; / pushed-back input characters */
66	66	int npushback, pushbacksize;
67	67	filepos pos;
68		int reportcols; /* report column numbers in errors */
	68	bool reportcols; /* report column numbers in errors */
69	69	macrostack stack; / macro expansions in force */
70	70	int defcharset, charset; /* character sets for input files */
71	71	charset_state csstate;
72	72	wchar_t wc[16]; /* wide chars from input conversion */
73	73	int nwc, wcpos; /* size of, and position in, wc[] */
74	74	char pushback_chars; / used to save input-encoding data */
	75	errorstate *es;
75	76	};
76	77
77	78	/*

135	136	word next, alt;
136	137	int type;
137	138	int aux;
138		int breaks; /* can a line break after it? */
	139	bool breaks; /* can a line break after it? */
139	140	wchar_t *text;
140	141	filepos fpos;
141	142

204	205	/*
205	206	* error.c
206	207	*/
	208	struct errorstate_Tag {
	209	bool fatal;
	210	};
207	211	/* out of memory */
208	212	void fatalerr_nomemory(void) NORETURN;
209	213	/* option `-%s' requires an argument */
210		void err_optnoarg(const char *sp);
	214	void err_optnoarg(errorstate es, const char sp);
211	215	/* unrecognised option `-%s' */
212		void err_nosuchopt(const char *sp);
	216	void err_nosuchopt(errorstate es, const char sp);
213	217	/* unrecognised charset %s (cmdline) */
214		void err_cmdcharset(const char *sp);
	218	void err_cmdcharset(errorstate es, const char sp);
215	219	/* futile option `-%s'%s */
216		void err_futileopt(const char sp, const char sp2);
	220	void err_futileopt(errorstate es, const char sp, const char *sp2);
217	221	/* no input files */
218		void err_noinput(void);
	222	void err_noinput(errorstate *es);
219	223	/* unable to open input file `%s' */
220		void err_cantopen(const char *sp);
	224	void err_cantopen(errorstate es, const char sp);
221	225	/* no data in input files */
222		void err_nodata(void);
	226	void err_nodata(errorstate *es);
	227	/* unexpected zero character in input file */
	228	void err_zerochar(errorstate es, const filepos fpos);
223	229	/* line in codepara didn't begin `\c' */
224		void err_brokencodepara(const filepos *fpos);
	230	void err_brokencodepara(errorstate es, const filepos fpos);
225	231	/* expected `}' after keyword */
226		void err_kwunclosed(const filepos *fpos);
	232	void err_kwunclosed(errorstate es, const filepos fpos);
227	233	/* paragraph type expects no keyword */
228		void err_kwexpected(const filepos *fpos);
	234	void err_kwexpected(errorstate es, const filepos fpos);
229	235	/* paragraph type expects a keyword */
230		void err_kwillegal(const filepos *fpos);
	236	void err_kwillegal(errorstate es, const filepos fpos);
231	237	/* paragraph type expects only 1 */
232		void err_kwtoomany(const filepos *fpos);
	238	void err_kwtoomany(errorstate es, const filepos fpos);
233	239	/* paragraph type expects only kws! */
234		void err_bodyillegal(const filepos *fpos);
	240	void err_bodyillegal(errorstate es, const filepos fpos);
235	241	/* invalid command at start of para */
236		void err_badparatype(const wchar_t wsp, const filepos fpos);
	242	void err_badparatype(errorstate es, const wchar_t wsp, const filepos *fpos);
237	243	/* invalid command in mid-para */
238		void err_badmidcmd(const wchar_t wsp, const filepos fpos);
	244	void err_badmidcmd(errorstate es, const wchar_t wsp, const filepos *fpos);
239	245	/* unexpected brace */
240		void err_unexbrace(const filepos *fpos);
	246	void err_unexbrace(errorstate es, const filepos fpos);
241	247	/* expected `{' after command */
242		void err_explbr(const filepos *fpos);
	248	void err_explbr(errorstate es, const filepos fpos);
243	249	/* EOF inside braced comment */
244		void err_commenteof(const filepos *fpos);
	250	void err_commenteof(errorstate es, const filepos fpos);
245	251	/* expected `}' after cross-ref */
246		void err_kwexprbr(const filepos *fpos);
	252	void err_kwexprbr(errorstate es, const filepos fpos);
247	253	/* \q within \c is not supported */
248		void err_codequote(const filepos *fpos);
	254	void err_codequote(errorstate es, const filepos fpos);
249	255	/* unclosed braces at end of para */
250		void err_missingrbrace(const filepos *fpos);
	256	void err_missingrbrace(errorstate es, const filepos fpos);
251	257	/* unclosed braces at end of file */
252		void err_missingrbrace2(const filepos *fpos);
	258	void err_missingrbrace2(errorstate es, const filepos fpos);
253	259	/* unable to nest text styles */
254		void err_nestedstyles(const filepos *fpos);
	260	void err_nestedstyles(errorstate es, const filepos fpos);
255	261	/* unable to nest `\i' thingys */
256		void err_nestedindex(const filepos *fpos);
	262	void err_nestedindex(errorstate es, const filepos fpos);
257	263	/* two \i differing only in case */
258		void err_indexcase(const filepos fpos, const wchar_t wsp,
	264	void err_indexcase(errorstate es, const filepos fpos, const wchar_t *wsp,
259	265	const filepos fpos2, const wchar_t wsp2);
260	266	/* unresolved cross-reference */
261		void err_nosuchkw(const filepos fpos, const wchar_t wsp);
	267	void err_nosuchkw(errorstate es, const filepos fpos, const wchar_t *wsp);
262	268	/* multiple \BRs on same keyword */
263		void err_multiBR(const filepos fpos, const wchar_t wsp);
	269	void err_multiBR(errorstate es, const filepos fpos, const wchar_t *wsp);
264	270	/* \IM on unknown index tag (warning) */
265		void err_nosuchidxtag(const filepos fpos, const wchar_t wsp);
	271	void err_nosuchidxtag(errorstate es, const filepos fpos,
	272	const wchar_t *wsp);
266	273	/* can't open output file for write */
267		void err_cantopenw(const char *sp);
	274	void err_cantopenw(errorstate es, const char sp);
268	275	/* this macro already exists */
269		void err_macroexists(const filepos fpos, const wchar_t wsp);
	276	void err_macroexists(errorstate es, const filepos fpos, const wchar_t *wsp);
270	277	/* jump a heading level, eg \C -> \S */
271		void err_sectjump(const filepos *fpos);
	278	void err_sectjump(errorstate es, const filepos fpos);
272	279	/* WinHelp context ID hash clash */
273		void err_winhelp_ctxclash(const filepos fpos, const char sp, const char *sp2);
	280	void err_winhelp_ctxclash(errorstate es, const filepos fpos,
	281	const char sp, const char sp2);
274	282	/* keyword clash in sections */
275		void err_multikw(const filepos fpos, const filepos fpos2, const wchar_t *wsp);
	283	void err_multikw(errorstate es, const filepos fpos, const filepos *fpos2,
	284	const wchar_t *wsp);
276	285	/* \lcont not after a list item */
277		void err_misplacedlcont(const filepos *fpos);
	286	void err_misplacedlcont(errorstate es, const filepos fpos);
278	287	/* section marker appeared in block */
279		void err_sectmarkerinblock(const filepos fpos, const char sp);
	288	void err_sectmarkerinblock(errorstate es, const filepos fpos,
	289	const char *sp);
280	290	/* \cfg{%s} insufficient args (<%d) */
281		void err_cfginsufarg(const filepos fpos, const char sp, int i);
	291	void err_cfginsufarg(errorstate es, const filepos fpos, const char *sp,
	292	int i);
282	293	/* colon/comma in node name in info */
283		void err_infonodechar(const filepos fpos, char c) / fpos might be NULL */;
	294	void err_infonodechar(errorstate es, const filepos fpos, char c)
	295	/* fpos might be NULL */;
284	296	/* \c line too long in text backend */
285		void err_text_codeline(const filepos *fpos, int i, int j);
	297	void err_text_codeline(errorstate es, const filepos fpos, int i, int j);
286	298	/* unrecognised HTML version keyword */
287		void err_htmlver(const filepos fpos, const wchar_t wsp);
	299	void err_htmlver(errorstate es, const filepos fpos, const wchar_t *wsp);
288	300	/* unrecognised character set name */
289		void err_charset(const filepos fpos, const wchar_t wsp);
	301	void err_charset(errorstate es, const filepos fpos, const wchar_t *wsp);
290	302	/* unrecognised font name */
291		void err_nofont(const filepos fpos, const wchar_t wsp);
	303	void err_nofont(errorstate es, const filepos fpos, const wchar_t *wsp);
292	304	/* eof in AFM file */
293		void err_afmeof(const filepos *fpos);
	305	void err_afmeof(errorstate es, const filepos fpos);
294	306	/* missing expected keyword in AFM */
295		void err_afmkey(const filepos fpos, const char sp);
	307	void err_afmkey(errorstate es, const filepos fpos, const char *sp);
296	308	/* unsupported AFM version */
297		void err_afmvers(const filepos *fpos);
	309	void err_afmvers(errorstate es, const filepos fpos);
298	310	/* missing value(s) for AFM key */
299		void err_afmval(const filepos fpos, const char sp, int i);
	311	void err_afmval(errorstate es, const filepos fpos, const char *sp, int i);
300	312	/* eof in Type 1 font file */
301		void err_pfeof(const filepos *fpos);
	313	void err_pfeof(errorstate es, const filepos fpos);
302	314	/* bad Type 1 header line */
303		void err_pfhead(const filepos *fpos);
	315	void err_pfhead(errorstate es, const filepos fpos);
304	316	/* otherwise invalide Type 1 font */
305		void err_pfbad(const filepos *fpos);
	317	void err_pfbad(errorstate es, const filepos fpos);
306	318	/* Type 1 font but no AFM */
307		void err_pfnoafm(const filepos fpos, const char sp);
	319	void err_pfnoafm(errorstate es, const filepos fpos, const char *sp);
308	320	/* need both or neither of hhp+chm */
309		void err_chmnames(void);
	321	void err_chmnames(errorstate *es);
310	322	/* required sfnt table missing */
311		void err_sfntnotable(const filepos fpos, const char sp);
	323	void err_sfntnotable(errorstate es, const filepos fpos, const char *sp);
312	324	/* sfnt has no PostScript name */
313		void err_sfntnopsname(const filepos *fpos);
	325	void err_sfntnopsname(errorstate es, const filepos fpos);
314	326	/* sfnt table not valid */
315		void err_sfntbadtable(const filepos fpos, const char sp);
	327	void err_sfntbadtable(errorstate es, const filepos fpos, const char *sp);
316	328	/* sfnt has no UCS-2 cmap */
317		void err_sfntnounicmap(const filepos *fpos);
	329	void err_sfntnounicmap(errorstate es, const filepos fpos);
318	330	/* sfnt table version unknown */
319		void err_sfnttablevers(const filepos fpos, const char sp);
	331	void err_sfnttablevers(errorstate es, const filepos fpos, const char *sp);
320	332	/* sfnt has bad header */
321		void err_sfntbadhdr(const filepos *fpos);
	333	void err_sfntbadhdr(errorstate es, const filepos fpos);
322	334	/* sfnt cmap references bad glyph */
323		void err_sfntbadglyph(const filepos *fpos, unsigned wc);
	335	void err_sfntbadglyph(errorstate es, const filepos fpos, unsigned wc);
324	336	/* CHM internal file names can't start with # or $ */
325		void err_chm_badname(const filepos fpos, const char sp);
	337	void err_chm_badname(errorstate es, const filepos fpos, const char *sp);
326	338
327	339	/*
328	340	* malloc.c

368	380	wchar_t ustrcpy(wchar_t dest, wchar_t const *source);
369	381	wchar_t ustrncpy(wchar_t dest, wchar_t const *source, int n);
370	382	wchar_t utolower(wchar_t);
371		int uisalpha(wchar_t);
	383	bool uisalpha(wchar_t);
372	384	int ustrcmp(wchar_t lhs, wchar_t rhs);
373	385	int ustricmp(wchar_t const lhs, wchar_t const rhs);
374	386	int ustrnicmp(wchar_t const lhs, wchar_t const rhs, int maxlen);
375	387	int utoi(wchar_t const *);
376	388	double utof(wchar_t const *);
377		int utob(wchar_t const *);
378		int uisdigit(wchar_t);
	389	bool utob(wchar_t const *);
	390	bool uisdigit(wchar_t);
379	391	wchar_t ustrlow(wchar_t s);
380	392	wchar_t ustrftime(const wchar_t wfmt, const struct tm *timespec);
381		int cvt_ok(int charset, const wchar_t *s);
382		int charset_from_ustr(filepos fpos, const wchar_t name);
	393	bool cvt_ok(int charset, const wchar_t *s);
	394	int charset_from_ustr(filepos fpos, const wchar_t name, errorstate *);
383	395
384	396	/*
385	397	* wcwidth.c

433	445	void rdadds(rdstring rs, wchar_t const p);
434	446	wchar_t rdtrim(rdstring rs);
435	447	void rdaddc(rdstringc *rs, char c);
	448	void rdaddc_rep(rdstringc *rs, char c, int repeat);
436	449	void rdaddsc(rdstringc rs, char const p);
437	450	void rdaddsn(rdstringc rc, char const p, int len);
438	451	char rdtrimc(rdstringc rs);

454	467	paragraph *cmdline_cfg_new(void);
455	468	paragraph cmdline_cfg_simple(char string, ...);
456	469
	470	time_t current_time(void); /* use in place of time(NULL) */
	471
457	472	/*
458	473	* input.c
459	474	*/
460		paragraph read_input(input in, indexdata *idx);
	475	paragraph read_input(input in, indexdata idx, psdata psd);
461	476
462	477	/*
463	478	* in_afm.c
464	479	*/
465		void read_afm_file(input *in);
	480	void read_afm_file(input in, psdata psd);
466	481
467	482	/*
468	483	* in_pf.c
469	484	*/
470		void read_pfa_file(input *in);
471		void read_pfb_file(input *in);
	485	void read_pfa_file(input in, psdata psd);
	486	void read_pfb_file(input in, psdata psd);
472	487
473	488	/*
474	489	* in_sfnt.c
475	490	*/
476		void read_sfnt_file(input *in);
	491	void read_sfnt_file(input in, psdata psd);
477	492
478	493	/*
479	494	* keywords.c

493	508	paragraph para; / the paragraph referenced */
494	509	};
495	510	keyword kw_lookup(keywordlist , wchar_t *);
496		keywordlist get_keywords(paragraph );
	511	keywordlist get_keywords(paragraph , errorstate *);
497	512	void free_keywords(keywordlist *);
498		void subst_keywords(paragraph , keywordlist );
	513	void subst_keywords(paragraph , keywordlist , errorstate *);
499	514
500	515	/*
501	516	* index.c

536	551	void cleanup_index(indexdata *);
537	552	/* index_merge takes responsibility for freeing arg 3 iff implicit; never
538	553	* takes responsibility for arg 2 */
539		void index_merge(indexdata , int is_explicit, wchar_t , word , filepos );
	554	void index_merge(indexdata , bool is_explicit, wchar_t , word , filepos ,
	555	errorstate *es);
540	556	void build_index(indexdata *);
541	557	void index_debug(indexdata *);
542	558	indextag index_findtag(indexdata idx, wchar_t *name);

546	562	*/
547	563	numberstate *number_init(void);
548	564	void number_cfg(numberstate , paragraph );
549		word number_mktext(numberstate , paragraph , wchar_t , int , int );
	565	word number_mktext(numberstate , paragraph , wchar_t , int , bool ,
	566	errorstate *es);
550	567	void number_free(numberstate *);
551	568
552	569	/*
553	570	* biblio.c
554	571	*/
555		void gen_citations(paragraph , keywordlist );
	572	void gen_citations(paragraph , keywordlist , errorstate *);
556	573
557	574	/*
558	575	* bk_text.c
559	576	*/
560		void text_backend(paragraph , keywordlist , indexdata , void );
	577	void text_backend(paragraph , keywordlist , indexdata , void ,
	578	errorstate *);
561	579	paragraph text_config_filename(char filename);
562	580
563	581	/*
564	582	* bk_html.c
565	583	*/
566		void html_backend(paragraph , keywordlist , indexdata , void );
567		void chm_backend(paragraph , keywordlist , indexdata , void );
	584	void html_backend(paragraph , keywordlist , indexdata , void ,
	585	errorstate *);
	586	void chm_backend(paragraph , keywordlist , indexdata , void ,
	587	errorstate *);
568	588	paragraph html_config_filename(char filename);
569	589	paragraph chm_config_filename(char filename);
570	590
571	591	/*
572	592	* bk_whlp.c
573	593	*/
574		void whlp_backend(paragraph , keywordlist , indexdata , void );
	594	void whlp_backend(paragraph , keywordlist , indexdata , void ,
	595	errorstate *);
575	596	paragraph whlp_config_filename(char filename);
576	597
577	598	/*
578	599	* bk_man.c
579	600	*/
580		void man_backend(paragraph , keywordlist , indexdata , void );
	601	void man_backend(paragraph , keywordlist , indexdata , void ,
	602	errorstate *);
581	603	paragraph man_config_filename(char filename);
582	604
583	605	/*
584	606	* bk_info.c
585	607	*/
586		void info_backend(paragraph , keywordlist , indexdata , void );
	608	void info_backend(paragraph , keywordlist , indexdata , void ,
	609	errorstate *);
587	610	paragraph info_config_filename(char filename);
588	611
589	612	/*
590	613	* bk_paper.c
591	614	*/
592		void paper_pre_backend(paragraph , keywordlist , indexdata );
593		void listfonts(void);
	615	void paper_pre_backend(paragraph , keywordlist , indexdata , psdata *,
	616	errorstate *);
	617	void listfonts(psdata *);
594	618
595	619	/*
596	620	* bk_ps.c
597	621	*/
598		void ps_backend(paragraph , keywordlist , indexdata , void );
	622	void ps_backend(paragraph , keywordlist , indexdata , void ,
	623	errorstate *);
599	624	paragraph ps_config_filename(char filename);
600	625
601	626	/*
602	627	* bk_pdf.c
603	628	*/
604		void pdf_backend(paragraph , keywordlist , indexdata , void );
	629	void pdf_backend(paragraph , keywordlist , indexdata , void ,
	630	errorstate *);
605	631	paragraph pdf_config_filename(char filename);
606	632
607	633	#endif

+13

-2

huffman.c less more

273	273	* ----------------------------------
274	274	* maxprob - nactivesyms
275	275	*
276		* rounded up, of course. And we'll only even be trying
277		* this if
	276	* rounded up, of course. And we'll only even be trying this if
	277	* smallestfreq <= totalfreq / maxprob, which is precisely the
	278	* condition under which the numerator of this fraction is
	279	* positive.
	280	*
	281	* (As for the denominator, that could only be negative if there
	282	* were more than F_{n+2} symbols overall, in which case it
	283	* _wouldn't_ be possible to avoid having a symbol with
	284	* probability at most 1/F_{n+2}. So that is a constraint on the
	285	* input parameters to this function, which we enforce by
	286	* assertion.)
278	287	*/
279	288	num = totalfreq - smallestfreq * maxprob;
280	289	denom = maxprob - nactivesyms;
	290	assert(num > 0); /* this just restates the assert above */
	291	assert(denom > 0); /* this is a constraint on the function parameters */
281	292	adjust = (num + denom - 1) / denom;
282	293
283	294	/*

+43

-43

in_afm.c less more

12	12	in->pos.line++;
13	13	c = getc(in->currfp);
14	14	if (c == EOF) {
15		err_afmeof(&in->pos);
	15	err_afmeof(in->es, &in->pos);
16	16	return NULL;
17	17	}
18	18	line = snewn(len, char);

38	38	return line;
39	39	}
40	40
41		static int afm_require_key(char line, char const expected, input *in) {
	41	static bool afm_require_key(char line, char const expected, input *in) {
42	42	char *key = strtok(line, " \t");
43	43
44	44	if (strcmp(key, expected) == 0)
45		return TRUE;
46		err_afmkey(&in->pos, expected);
47		return FALSE;
	45	return true;
	46	err_afmkey(in->es, &in->pos, expected);
	47	return false;
48	48	}
49	49
50		void read_afm_file(input *in) {
	50	void read_afm_file(input in, psdata psd) {
51	51	char line, key, *val;
52	52	font_info *fi;
53	53	size_t i;
54	54
55	55	fi = snew(font_info);
56	56	fi->name = NULL;
57		fi->widths = newtree234(width_cmp);
	57	fi->widths = newtree234(width_cmp, NULL);
58	58	fi->fontfile = NULL;
59		fi->kerns = newtree234(kern_cmp);
60		fi->ligs = newtree234(lig_cmp);
	59	fi->kerns = newtree234(kern_cmp, NULL);
	60	fi->ligs = newtree234(lig_cmp, NULL);
61	61	fi->fontbbox[0] = fi->fontbbox[1] = fi->fontbbox[2] = fi->fontbbox[3] = 0;
62	62	fi->capheight = fi->xheight = fi->ascent = fi->descent = 0;
63	63	fi->stemh = fi->stemv = fi->italicangle = 0;

68	68	if (!line \|\| !afm_require_key(line, "StartFontMetrics", in))
69	69	goto giveup;
70	70	if (!(val = strtok(NULL, " \t"))) {
71		err_afmval(&in->pos, "StartFontMetrics", 1);
	71	err_afmval(in->es, &in->pos, "StartFontMetrics", 1);
72	72	goto giveup;
73	73	}
74	74	if (atof(val) >= 5.0) {
75		err_afmvers(&in->pos);
	75	err_afmvers(in->es, &in->pos);
76	76	goto giveup;
77	77	}
78	78	sfree(line);

82	82	goto giveup;
83	83	key = strtok(line, " \t");
84	84	if (strcmp(key, "EndFontMetrics") == 0) {
85		fi->next = all_fonts;
86		all_fonts = fi;
	85	fi->next = psd->all_fonts;
	86	psd->all_fonts = fi;
87	87	fclose(in->currfp);
88	88	return;
89	89	} else if (strcmp(key, "FontName") == 0) {
90	90	if (!(val = strtok(NULL, " \t"))) {
91		err_afmval(&in->pos, key, 1);
	91	err_afmval(in->es, &in->pos, key, 1);
92	92	goto giveup;
93	93	}
94	94	fi->name = dupstr(val);

96	96	int i;
97	97	for (i = 0; i < 3; i++) {
98	98	if (!(val = strtok(NULL, " \t"))) {
99		err_afmval(&in->pos, key, 4);
	99	err_afmval(in->es, &in->pos, key, 4);
100	100	goto giveup;
101	101	}
102	102	fi->fontbbox[i] = atof(val);
103	103	}
104	104	} else if (strcmp(key, "CapHeight") == 0) {
105	105	if (!(val = strtok(NULL, " \t"))) {
106		err_afmval(&in->pos, key, 1);
	106	err_afmval(in->es, &in->pos, key, 1);
107	107	goto giveup;
108	108	}
109	109	fi->capheight = atof(val);
110	110	} else if (strcmp(key, "XHeight") == 0) {
111	111	if (!(val = strtok(NULL, " \t"))) {
112		err_afmval(&in->pos, key, 1);
	112	err_afmval(in->es, &in->pos, key, 1);
113	113	goto giveup;
114	114	}
115	115	fi->xheight = atof(val);
116	116	} else if (strcmp(key, "Ascender") == 0) {
117	117	if (!(val = strtok(NULL, " \t"))) {
118		err_afmval(&in->pos, key, 1);
	118	err_afmval(in->es, &in->pos, key, 1);
119	119	goto giveup;
120	120	}
121	121	fi->ascent = atof(val);
122	122	} else if (strcmp(key, "Descender") == 0) {
123	123	if (!(val = strtok(NULL, " \t"))) {
124		err_afmval(&in->pos, key, 1);
	124	err_afmval(in->es, &in->pos, key, 1);
125	125	goto giveup;
126	126	}
127	127	fi->descent = atof(val);
128	128	} else if (strcmp(key, "CapHeight") == 0) {
129	129	if (!(val = strtok(NULL, " \t"))) {
130		err_afmval(&in->pos, key, 1);
	130	err_afmval(in->es, &in->pos, key, 1);
131	131	goto giveup;
132	132	}
133	133	fi->capheight = atof(val);
134	134	} else if (strcmp(key, "StdHW") == 0) {
135	135	if (!(val = strtok(NULL, " \t"))) {
136		err_afmval(&in->pos, key, 1);
	136	err_afmval(in->es, &in->pos, key, 1);
137	137	goto giveup;
138	138	}
139	139	fi->stemh = atof(val);
140	140	} else if (strcmp(key, "StdVW") == 0) {
141	141	if (!(val = strtok(NULL, " \t"))) {
142		err_afmval(&in->pos, key, 1);
	142	err_afmval(in->es, &in->pos, key, 1);
143	143	goto giveup;
144	144	}
145	145	fi->stemv = atof(val);
146	146	} else if (strcmp(key, "ItalicAngle") == 0) {
147	147	if (!(val = strtok(NULL, " \t"))) {
148		err_afmval(&in->pos, key, 1);
	148	err_afmval(in->es, &in->pos, key, 1);
149	149	goto giveup;
150	150	}
151	151	fi->italicangle = atof(val);
152	152	} else if (strcmp(key, "StartCharMetrics") == 0) {
153	153	int nglyphs, i;
154	154	if (!(val = strtok(NULL, " \t"))) {
155		err_afmval(&in->pos, key, 1);
	155	err_afmval(in->es, &in->pos, key, 1);
156	156	goto giveup;
157	157	}
158	158	nglyphs = atoi(val);

169	169	if (strcmp(key, "WX") == 0 \|\| strcmp(key, "W0X") == 0) {
170	170	if (!(val = strtok(NULL, " \t")) \|\|
171	171	!strcmp(val, ";")) {
172		err_afmval(&in->pos, key, 1);
	172	err_afmval(in->es, &in->pos, key, 1);
173	173	goto giveup;
174	174	}
175	175	width = atoi(val);
176	176	} else if (strcmp(key, "N") == 0) {
177	177	if (!(val = strtok(NULL, " \t")) \|\|
178	178	!strcmp(val, ";")) {
179		err_afmval(&in->pos, key, 1);
180		goto giveup;
181		}
182		g = glyph_intern(val);
	179	err_afmval(in->es, &in->pos, key, 1);
	180	goto giveup;
	181	}
	182	g = glyph_intern(psd, val);
183	183	} else if (strcmp(key, "L") == 0) {
184	184	glyph succ, lig;
185	185	if (!(val = strtok(NULL, " \t")) \|\|
186	186	!strcmp(val, ";")) {
187		err_afmval(&in->pos, key, 1);
188		goto giveup;
189		}
190		succ = glyph_intern(val);
191		if (!(val = strtok(NULL, " \t")) \|\|
192		!strcmp(val, ";")) {
193		err_afmval(&in->pos, key, 1);
194		goto giveup;
195		}
196		lig = glyph_intern(val);
	187	err_afmval(in->es, &in->pos, key, 1);
	188	goto giveup;
	189	}
	190	succ = glyph_intern(psd, val);
	191	if (!(val = strtok(NULL, " \t")) \|\|
	192	!strcmp(val, ";")) {
	193	err_afmval(in->es, &in->pos, key, 1);
	194	goto giveup;
	195	}
	196	lig = glyph_intern(psd, val);
197	197	if (g != NOGLYPH && succ != NOGLYPH &&
198	198	lig != NOGLYPH) {
199	199	ligature *l = snew(ligature);

229	229	strcmp(key, "StartKernPairs0") == 0) {
230	230	int nkerns, i;
231	231	if (!(val = strtok(NULL, " \t"))) {
232		err_afmval(&in->pos, key, 1);
	232	err_afmval(in->es, &in->pos, key, 1);
233	233	goto giveup;
234	234	}
235	235	nkerns = atoi(val);

247	247	nr = strtok(NULL, " \t");
248	248	val = strtok(NULL, " \t");
249	249	if (!val) {
250		err_afmval(&in->pos, key, 3);
	250	err_afmval(in->es, &in->pos, key, 3);
251	251	goto giveup;
252	252	}
253		l = glyph_intern(nl);
254		r = glyph_intern(nr);
	253	l = glyph_intern(psd, nl);
	254	r = glyph_intern(psd, nr);
255	255	if (l == -1 \|\| r == -1) continue;
256	256	kp = snew(kern_pair);
257	257	kp->left = l;

+24

-24

in_pf.c less more

41	41	size_t offset;
42	42	} pfstate;
43	43
44		static void pf_identify(t1_font *tf);
	44	static void pf_identify(t1_font tf, psdata , errorstate *);
45	45
46	46	static t1_data load_pfb_file(FILE fp, filepos *pos) {
47	47	t1_data head = NULL, tail = NULL;

94	94	return ret;
95	95	}
96	96
97		void read_pfa_file(input *in) {
	97	void read_pfa_file(input in, psdata psd) {
98	98	t1_font *tf = snew(t1_font);
99	99
100	100	tf->data = load_pfa_file(in->currfp, &in->pos);
101	101	tf->pos = in->pos;
102	102	tf->length1 = tf->length2 = 0;
103	103	fclose(in->currfp);
104		pf_identify(tf);
105		}
106
107		void read_pfb_file(input *in) {
	104	pf_identify(tf, psd, in->es);
	105	}
	106
	107	void read_pfb_file(input in, psdata psd) {
108	108	t1_font *tf = snew(t1_font);
109	109
110	110	tf->data = load_pfb_file(in->currfp, &in->pos);
111	111	tf->pos = in->pos;
112	112	tf->length1 = tf->length2 = 0;
113	113	fclose(in->currfp);
114		pf_identify(tf);
	114	pf_identify(tf, psd, in->es);
115	115	}
116	116	static char pf_read_token(pfstate );
117	117

161	161	return o + pf->offset;
162	162	}
163	163
164		static void pf_identify(t1_font *tf) {
	164	static void pf_identify(t1_font tf, psdata psd, errorstate *es) {
165	165	rdstringc rsc = { 0, 0, NULL };
166	166	char *p;
167	167	size_t len;

176	176	c = pf_getc(pf);
177	177	if (c == EOF) {
178	178	sfree(rsc.text);
179		err_pfeof(&tf->pos);
	179	err_pfeof(es, &tf->pos);
180	180	return;
181	181	}
182	182	rdaddc(&rsc, c);

184	184	p = rsc.text;
185	185	if ((p = strchr(p, ':')) == NULL) {
186	186	sfree(rsc.text);
187		err_pfhead(&tf->pos);
	187	err_pfhead(es, &tf->pos);
188	188	return;
189	189	}
190	190	p++;

195	195	fontname[len] = 0;
196	196	sfree(rsc.text);
197	197
198		for (fi = all_fonts; fi; fi = fi->next) {
	198	for (fi = psd->all_fonts; fi; fi = fi->next) {
199	199	if (strcmp(fi->name, fontname) == 0) {
200	200	fi->fontfile = tf;
201	201	fi->filetype = TYPE1;

203	203	return;
204	204	}
205	205	}
206		err_pfnoafm(&tf->pos, fontname);
	206	err_pfnoafm(es, &tf->pos, fontname);
207	207	sfree(fontname);
208	208	}
209	209
210	210	/*
211	211	* PostScript white space characters; PLRM3 table 3.1
212	212	*/
213		static int pf_isspace(int c) {
	213	static bool pf_isspace(int c) {
214	214	return c == 000 \|\| c == 011 \|\| c == 012 \|\| c == 014 \|\| c == 015 \|\|
215	215	c == ' ';
216	216	}

218	218	/*
219	219	* PostScript special characters; PLRM3 page 27
220	220	*/
221		static int pf_isspecial(int c) {
	221	static bool pf_isspecial(int c) {
222	222	return c == '(' \|\| c == ')' \|\| c == '<' \|\| c == '>' \|\| c == '[' \|\|
223	223	c == ']' \|\| c == '{' \|\| c == '}' \|\| c == '/' \|\| c == '%';
224	224	}

246	246	}
247	247	}
248	248
249		static size_t pf_length1(t1_font *tf) {
	249	static size_t pf_length1(t1_font tf, errorstate es) {
250	250	size_t ret;
251	251
252	252	ret = pf_findtoken(tf, 0, "eexec");
253	253	if (ret == (size_t)-1) {
254		err_pfeof(&tf->pos);
	254	err_pfeof(es, &tf->pos);
255	255	return 0;
256	256	}
257	257	return ret;
258	258	}
259	259
260		static size_t pf_length2(t1_font *tf) {
	260	static size_t pf_length2(t1_font tf, errorstate es) {
261	261	size_t ret;
262	262
263	263	if (tf->length1 == 0)
264		tf->length1 = pf_length1(tf);
	264	tf->length1 = pf_length1(tf, es);
265	265	ret = pf_findtoken(tf, tf->length1, "cleartomark");
266	266	if (ret == (size_t)-1) {
267		err_pfeof(&tf->pos);
	267	err_pfeof(es, &tf->pos);
268	268	return 0;
269	269	}
270	270	return ret - 12 - tf->length1; /* backspace over "cleartomark\n" */

324	324	char *bufp, size_t lenp) {
325	325	t1_data *td = tf->data;
326	326	size_t blk, i;
327		int havenybble = 0;
	327	bool havenybble = false;
328	328	char *p, nybble;
329	329
330	330	while (td && off >= td->length) {

362	362	/*
363	363	* Return the initial, unencrypted, part of a font.
364	364	*/
365		void pf_part1(font_info fi, char bufp, size_t lenp) {
	365	void pf_part1(font_info fi, char bufp, size_t lenp, errorstate *es) {
366	366	t1_font *tf = fi->fontfile;
367	367
368	368	if (tf->length1 == 0)
369		tf->length1 = pf_length1(tf);
	369	tf->length1 = pf_length1(tf, es);
370	370	pf_getascii(tf, 0, tf->length1, bufp, lenp);
371	371	}
372	372
373	373	/*
374	374	* Return the middle, encrypted, part of a font.
375	375	*/
376		void pf_part2(font_info fi, char bufp, size_t lenp) {
	376	void pf_part2(font_info fi, char bufp, size_t lenp, errorstate *es) {
377	377	t1_font *tf = fi->fontfile;
378	378
379	379	if (tf->length2 == 0)
380		tf->length2 = pf_length2(tf);
	380	tf->length2 = pf_length2(tf, es);
381	381	pf_getbinary(tf, tf->length1, tf->length2, bufp, lenp);
382	382	if (*lenp >= 256)
383	383	*lenp -= 256;

+96

-95

in_sfnt.c less more

85	85	}
86	86	#define d_end decode_end, 0, 0
87	87
88		static void decode(sfnt_decode dec, void src, void end, void *dest) {
	88	static void decode(const sfnt_decode dec, void src, void end, void *dest) {
89	89	while (dec->decoder != decode_end) {
90	90	if ((char )src + dec->src_len > (char )end) return NULL;
91	91	dec->decoder(src, (char *)dest + dec->dest_offset);

95	95	return src;
96	96	}
97	97
98		static void decoden(sfnt_decode dec, void src, void end, void *dest,
	98	static void decoden(const sfnt_decode dec, void src, void end, void *dest,
99	99	size_t size, size_t n) {
100	100	while (n-- && src) {
101	101	src = decode(dec, src, end, dest);

105	105	}
106	106
107	107	/* Decoding specs for simple data types */
108		sfnt_decode uint16_decode[] = { { d_uint16, 0 }, { d_end } };
109		sfnt_decode int16_decode[] = { { d_int16, 0 }, { d_end } };
110		sfnt_decode uint32_decode[] = { { d_uint32, 0 }, { d_end } };
	108	const sfnt_decode uint16_decode[] = { { d_uint16, 0 }, { d_end } };
	109	const sfnt_decode int16_decode[] = { { d_int16, 0 }, { d_end } };
	110	const sfnt_decode uint32_decode[] = { { d_uint32, 0 }, { d_end } };
111	111
112	112	/* Offset subdirectory -- the start of the file */
113	113	typedef struct offsubdir_Tag offsubdir;

115	115	unsigned scaler_type;
116	116	unsigned numTables;
117	117	};
118		sfnt_decode offsubdir_decode[] = {
	118	const sfnt_decode offsubdir_decode[] = {
119	119	{ d_uint32, offsetof(offsubdir, scaler_type) },
120	120	{ d_uint16, offsetof(offsubdir, numTables) },
121	121	{ d_skip(6) },

144	144	unsigned offset;
145	145	unsigned length;
146	146	};
147		sfnt_decode tabledir_decode[] = {
	147	const sfnt_decode tabledir_decode[] = {
148	148	{ d_uint32, offsetof(tabledir, tag) },
149	149	{ d_uint32, offsetof(tabledir, checkSum) },
150	150	{ d_uint32, offsetof(tabledir, offset) },

159	159	int sTypoAscender, sTypoDescender;
160	160	int sxHeight, sCapHeight;
161	161	};
162		sfnt_decode t_OS_2_v0_decode[] = {
	162	const sfnt_decode t_OS_2_v0_decode[] = {
163	163	{ d_uint16, offsetof(t_OS_2, version) },
164	164	{ d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */
165	165	/* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */

169	169	/* achVendID, fsSelection, usFirstCharIndex, usLastCharIndex */
170	170	{ d_end }
171	171	};
172		sfnt_decode t_OS_2_v1_decode[] = {
	172	const sfnt_decode t_OS_2_v1_decode[] = {
173	173	{ d_uint16, offsetof(t_OS_2, version) },
174	174	{ d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */
175	175	/* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */

183	183	/* ulCodePageRange1, ulCodePageRange2 */
184	184	{ d_end }
185	185	};
186		sfnt_decode t_OS_2_v2_decode[] = {
	186	const sfnt_decode t_OS_2_v2_decode[] = {
187	187	{ d_uint16, offsetof(t_OS_2, version) },
188	188	{ d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */
189	189	/* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */

206	206	struct t_cmap_Tag {
207	207	unsigned numTables;
208	208	};
209		sfnt_decode t_cmap_decode[] = {
	209	const sfnt_decode t_cmap_decode[] = {
210	210	{ d_skip(2) },
211	211	{ d_uint16, offsetof(t_cmap, numTables) },
212	212	{ d_end }

217	217	unsigned encodingID;
218	218	unsigned offset;
219	219	};
220		sfnt_decode encodingrec_decode[] = {
	220	const sfnt_decode encodingrec_decode[] = {
221	221	{ d_uint16, offsetof(encodingrec, platformID) },
222	222	{ d_uint16, offsetof(encodingrec, encodingID) },
223	223	{ d_uint32, offsetof(encodingrec, offset) },

228	228	unsigned length;
229	229	unsigned segCountX2;
230	230	};
231		sfnt_decode cmap4_decode[] = {
	231	const sfnt_decode cmap4_decode[] = {
232	232	{ d_skip(2) }, /* format */
233	233	{ d_uint16, offsetof(cmap4, length) },
234	234	{ d_skip(2) }, /* language */

247	247	int xMin, yMin, xMax, yMax;
248	248	int indexToLocFormat;
249	249	};
250		sfnt_decode t_head_decode[] = {
	250	const sfnt_decode t_head_decode[] = {
251	251	{ d_uint32, offsetof(t_head, version) },
252	252	{ d_uint32, offsetof(t_head, fontRevision) },
253	253	{ d_skip(8) }, /* checkSumAdjustment, magicNumber, flags */

274	274	int metricDataFormat;
275	275	unsigned numOfLongHorMetrics;
276	276	};
277		sfnt_decode t_hhea_decode[] = {
	277	const sfnt_decode t_hhea_decode[] = {
278	278	{ d_uint32, offsetof(t_hhea, version) },
279	279	{ d_int16, offsetof(t_hhea, ascent) },
280	280	{ d_int16, offsetof(t_hhea, descent) },

286	286	};
287	287
288	288	/* Horizontal Metrics ('hmtx') table */
289		sfnt_decode longhormetric_decode[] = {
	289	const sfnt_decode longhormetric_decode[] = {
290	290	{ d_uint16, 0 },
291	291	{ d_skip(2) },
292	292	{ d_end }

298	298	unsigned version;
299	299	unsigned nTables;
300	300	};
301		sfnt_decode t_kern_v0_decode[] = {
	301	const sfnt_decode t_kern_v0_decode[] = {
302	302	{ d_uint16, offsetof(t_kern, version) },
303	303	{ d_uint16, offsetof(t_kern, nTables) },
304	304	{ d_end }

309	309	unsigned length;
310	310	unsigned coverage;
311	311	};
312		sfnt_decode kern_v0_subhdr_decode[] = {
	312	const sfnt_decode kern_v0_subhdr_decode[] = {
313	313	{ d_uint16, offsetof(kern_v0_subhdr, version) },
314	314	{ d_uint16, offsetof(kern_v0_subhdr, length) },
315	315	{ d_uint16, offsetof(kern_v0_subhdr, coverage) },

321	321	#define KERN_V0_OVERRIDE 0x0008
322	322	#define KERN_V0_FORMAT 0xff00
323	323	#define KERN_V0_FORMAT_0 0x0000
324		sfnt_decode t_kern_v1_decode[] = {
	324	const sfnt_decode t_kern_v1_decode[] = {
325	325	{ d_uint32, offsetof(t_kern, version) },
326	326	{ d_uint32, offsetof(t_kern, nTables) },
327	327	{ d_end }

331	331	unsigned length;
332	332	unsigned coverage;
333	333	};
334		sfnt_decode kern_v1_subhdr_decode[] = {
	334	const sfnt_decode kern_v1_subhdr_decode[] = {
335	335	{ d_uint32, offsetof(kern_v1_subhdr, length) },
336	336	{ d_uint16, offsetof(kern_v1_subhdr, coverage) },
337	337	{ d_skip(2) }, /* tupleIndex */

346	346	struct kern_f0_Tag {
347	347	unsigned nPairs;
348	348	};
349		sfnt_decode kern_f0_decode[] = {
	349	const sfnt_decode kern_f0_decode[] = {
350	350	{ d_uint16, offsetof(kern_f0, nPairs) },
351	351	{ d_skip(6) }, /* searchRange, entrySelector, rangeShift */
352	352	{ d_end }

357	357	unsigned right;
358	358	int value;
359	359	};
360		sfnt_decode kern_f0_pair_decode[] = {
	360	const sfnt_decode kern_f0_pair_decode[] = {
361	361	{ d_uint16, offsetof(kern_f0_pair, left) },
362	362	{ d_uint16, offsetof(kern_f0_pair, right) },
363	363	{ d_int16, offsetof(kern_f0_pair, value) },

370	370	unsigned version;
371	371	unsigned numGlyphs;
372	372	};
373		sfnt_decode t_maxp_decode[] = {
	373	const sfnt_decode t_maxp_decode[] = {
374	374	{ d_uint32, offsetof(t_maxp, version) },
375	375	{ d_uint16, offsetof(t_maxp, numGlyphs) },
376	376	{ d_end }

385	385	unsigned stringOffset;
386	386	namerecord *nameRecord;
387	387	};
388		sfnt_decode t_name_decode[] = {
	388	const sfnt_decode t_name_decode[] = {
389	389	{ d_uint16, offsetof(t_name, format) },
390	390	{ d_uint16, offsetof(t_name, count) },
391	391	{ d_uint16, offsetof(t_name, stringOffset) },

399	399	unsigned length;
400	400	unsigned offset;
401	401	};
402		sfnt_decode namerecord_decode[] = {
	402	const sfnt_decode namerecord_decode[] = {
403	403	{ d_uint16, offsetof(namerecord, platformID) },
404	404	{ d_uint16, offsetof(namerecord, encodingID) },
405	405	{ d_uint16, offsetof(namerecord, languageID) },

420	420	unsigned minMemType42;
421	421	unsigned maxMemType42;
422	422	};
423		sfnt_decode t_post_decode[] = {
	423	const sfnt_decode t_post_decode[] = {
424	424	{ d_uint32, offsetof(t_post, format) },
425	425	{ d_int32, offsetof(t_post, italicAngle) },
426	426	{ d_int16, offsetof(t_post, underlinePosition) },

451	451	unsigned minmem, maxmem;
452	452	};
453	453
454		static int sfnt_findtable(sfnt *sf, unsigned tag,
455		void startp, void endp) {
	454	static bool sfnt_findtable(sfnt *sf, unsigned tag,
	455	void startp, void endp) {
456	456	size_t i;
457	457
458	458	for (i = 0; i < sf->osd.numTables; i++) {
459	459	if (sf->td[i].tag == tag) {
460	460	startp = (char )sf->data + sf->td[i].offset;
461	461	endp = (char )*startp + sf->td[i].length;
462		return TRUE;
	462	return true;
463	463	}
464	464	}
465		return FALSE;
466		}
467
468		static char sfnt_psname(font_info fi) {
	465	return false;
	466	}
	467
	468	static char sfnt_psname(font_info fi, errorstate *es) {
469	469	sfnt *sf = fi->fontfile;
470	470	t_name name;
471	471	void ptr, end;

474	474	namerecord *nr;
475	475
476	476	if (!sfnt_findtable(sf, TAG_name, &ptr, &end)) {
477		err_sfntnotable(&sf->pos, "name");
	477	err_sfntnotable(es, &sf->pos, "name");
478	478	return NULL;
479	479	}
480	480	ptr = decode(t_name_decode, ptr, end, &name);

495	495	}
496	496	}
497	497	}
498		err_sfntnopsname(&sf->pos);
	498	err_sfntnopsname(es, &sf->pos);
499	499	return NULL;
500	500	}
501	501

519	519	}
520	520
521	521	/* Generate an name for a glyph that doesn't have one. */
522		static glyph genglyph(unsigned idx) {
523		char buf[11];
524		if (idx == 0) return glyph_intern(".notdef");
	522	static glyph genglyph(psdata *psd, unsigned idx) {
	523	char buf[64];
	524	if (idx == 0) return glyph_intern(psd, ".notdef");
525	525	sprintf(buf, "glyph%u", idx);
526		return glyph_intern(buf);
	526	return glyph_intern(psd, buf);
527	527	}
528	528
529	529	/*

532	532	* TODO: cope better with duplicated glyph names (usually .notdef)
533	533	* TODO: when presented with format 3.0, try to use 'CFF' if present.
534	534	*/
535		static void sfnt_mapglyphs(font_info *fi) {
	535	static void sfnt_mapglyphs(font_info fi, psdata psd, errorstate *es) {
536	536	sfnt *sf = fi->fontfile;
537	537	t_post post;
538	538	void ptr, end;

545	545	if (sfnt_findtable(sf, TAG_post, &ptr, &end)) {
546	546	ptr = decode(t_post_decode, ptr, end, &post);
547	547	if (ptr == NULL) {
548		err_sfntbadtable(&sf->pos, "post");
	548	err_sfntbadtable(es, &sf->pos, "post");
549	549	goto noglyphs;
550	550	}
551	551

555	555	switch (post.format) {
556	556	case 0x00010000:
557	557	if (sf->nglyphs != 258) {
558		err_sfntbadtable(&sf->pos, "post");
	558	err_sfntbadtable(es, &sf->pos, "post");
559	559	break;
560	560	}
561	561	sf->glyphsbyindex = (glyph *)tt_std_glyphs;
562	562	break;
563	563	case 0x00020000:
564	564	if ((char )ptr + 2 > (char )end) {
565		err_sfntbadtable(&sf->pos, "post");
	565	err_sfntbadtable(es, &sf->pos, "post");
566	566	break;
567	567	}
568	568	ptr = (char *)ptr + 2;
569	569	if ((char )ptr + 2sf->nglyphs > (char *)end) {
570		err_sfntbadtable(&sf->pos, "post");
	570	err_sfntbadtable(es, &sf->pos, "post");
571	571	break;
572	572	}
573	573	nextras = 0;

583	583	memcpy(tmp, sptr + 1, *sptr);
584	584	tmp[*sptr] = 0;
585	585	assert(i < nextras);
586		extraglyphs[i++] = glyph_intern(tmp);
	586	extraglyphs[i++] = glyph_intern(psd, tmp);
587	587	}
588	588	sf->glyphsbyindex = snewn(sf->nglyphs, glyph);
589	589	for (i = 0; i < sf->nglyphs; i++) {

593	593	else if (g < 258 + nextras)
594	594	sf->glyphsbyindex[i] = extraglyphs[g - 258];
595	595	else {
596		err_sfntbadtable(&sf->pos, "post");
597		sf->glyphsbyindex[i] = genglyph(i);
	596	err_sfntbadtable(es, &sf->pos, "post");
	597	sf->glyphsbyindex[i] = genglyph(psd, i);
598	598	}
599	599	}
600	600	sfree(extraglyphs);

602	602	case 0x00030000:
603	603	break;
604	604	default:
605		err_sfnttablevers(&sf->pos, "post");
	605	err_sfnttablevers(es, &sf->pos, "post");
606	606	break;
607	607	}
608	608	}

610	610	if (!sf->glyphsbyindex) {
611	611	sf->glyphsbyindex = snewn(sf->nglyphs, glyph);
612	612	for (i = 0; i < sf->nglyphs; i++)
613		sf->glyphsbyindex[i] = genglyph(i);
	613	sf->glyphsbyindex[i] = genglyph(psd, i);
614	614	}
615	615	/* Construct glyphsbyname */
616	616	sf->glyphsbyname = snewn(sf->nglyphs, unsigned short);

632	632	suflen = 4;
633	633	for (i = 0; i < sf->nglyphs; i++) {
634	634	char const *p;
635		p = strrchr(glyph_extern(sfnt_indextoglyph(sf, i)), '.');
	635	p = strrchr(glyph_extern(psd, sfnt_indextoglyph(sf, i)), '.');
636	636	if (p && !(p+1)[strspn(p+1, "0123456789")] && strlen(p+1) > suflen)
637	637	suflen = strlen(p+1);
638	638	}

642	642	if (prev == (this = sfnt_indextoglyph(sf, sf->glyphsbyname[i]))) {
643	643	char const *basename;
644	644	char *buf;
645		basename = glyph_extern(this);
	645	basename = glyph_extern(psd, this);
646	646	buf = snewn(strlen(basename) + 2 + suflen, char);
647	647	strcpy(buf, basename);
648	648	sprintf(buf + strlen(basename), ".%0*hu", suflen,
649	649	sf->glyphsbyname[i]);
650		sf->glyphsbyindex[sf->glyphsbyname[i]] = glyph_intern(buf);
	650	sf->glyphsbyindex[sf->glyphsbyname[i]] = glyph_intern(psd, buf);
651	651	sfree(buf);
652	652	}
653	653	prev = this;

675	675	/*
676	676	* Get data from 'hhea', 'hmtx', and 'OS/2' tables
677	677	*/
678		void sfnt_getmetrics(font_info *fi) {
	678	void sfnt_getmetrics(font_info fi, errorstate es) {
679	679	sfnt *sf = fi->fontfile;
680	680	t_hhea hhea;
681	681	t_OS_2 OS_2;

689	689	fi->fontbbox[2] = sf->head.xMax * FUNITS_PER_PT / sf->head.unitsPerEm;
690	690	fi->fontbbox[3] = sf->head.yMax * FUNITS_PER_PT / sf->head.unitsPerEm;
691	691	if (!sfnt_findtable(sf, TAG_hhea, &ptr, &end)) {
692		err_sfntnotable(&sf->pos, "hhea");
	692	err_sfntnotable(es, &sf->pos, "hhea");
693	693	return;
694	694	}
695	695	if (decode(t_hhea_decode, ptr, end, &hhea) == NULL) {
696		err_sfntbadtable(&sf->pos, "hhea");
	696	err_sfntbadtable(es, &sf->pos, "hhea");
697	697	return;
698	698	}
699	699	if ((hhea.version & 0xffff0000) != 0x00010000) {
700		err_sfnttablevers(&sf->pos, "hhea");
	700	err_sfnttablevers(es, &sf->pos, "hhea");
701	701	return;
702	702	}
703	703	fi->ascent = hhea.ascent;
704	704	fi->descent = hhea.descent;
705	705	if (hhea.metricDataFormat != 0) {
706		err_sfnttablevers(&sf->pos, "hmtx");
	706	err_sfnttablevers(es, &sf->pos, "hmtx");
707	707	return;
708	708	}
709	709	if (!sfnt_findtable(sf, TAG_hmtx, &ptr, &end)) {
710		err_sfntnotable(&sf->pos, "hmtx");
	710	err_sfntnotable(es, &sf->pos, "hmtx");
711	711	return;
712	712	}
713	713	hmtx = snewn(hhea.numOfLongHorMetrics, unsigned);
714	714	if (decoden(longhormetric_decode, ptr, end, hmtx, sizeof(*hmtx),
715	715	hhea.numOfLongHorMetrics) == NULL) {
716		err_sfntbadtable(&sf->pos, "hmtx");
	716	err_sfntbadtable(es, &sf->pos, "hmtx");
717	717	return;
718	718	}
719	719	for (i = 0; i < sf->nglyphs; i++) {

742	742	fi->descent = OS_2.sTypoDescender * FUNITS_PER_PT / sf->head.unitsPerEm;
743	743	return;
744	744	bados2:
745		err_sfntbadtable(&sf->pos, "OS/2");
	745	err_sfntbadtable(es, &sf->pos, "OS/2");
746	746	}
747	747
748	748	/*

754	754	* pairs for horizontal kerning of horizontal text, and ignores
755	755	* everything else.
756	756	*/
757		static void sfnt_getkern(font_info *fi) {
	757	static void sfnt_getkern(font_info fi, errorstate es) {
758	758	sfnt *sf = fi->fontfile;
759	759	t_kern kern;
760	760	unsigned version, i, j;

812	812	}
813	813	return;
814	814	bad:
815		err_sfntbadtable(&sf->pos, "kern");
	815	err_sfntbadtable(es, &sf->pos, "kern");
816	816	return;
817	817	}
818	818

824	824	* Unicode 1.1 with precomposed Hangul syllables. We only handle
825	825	* format 4 of this table, since that seems to be the only one in use.
826	826	*/
827		void sfnt_getmap(font_info *fi) {
	827	static void sfnt_getmap(font_info fi, errorstate es) {
828	828	sfnt *sf = fi->fontfile;
829	829	t_cmap cmap;
830	830	encodingrec *esd;

836	836	for (i = 0; i < lenof(fi->bmp); i++)
837	837	fi->bmp[i] = 0xFFFF;
838	838	if (!sfnt_findtable(sf, TAG_cmap, &ptr, &end)) {
839		err_sfntnotable(&sf->pos, "cmap");
	839	err_sfntnotable(es, &sf->pos, "cmap");
	840	return;
840	841	}
841	842	base = ptr;
842	843	ptr = decode(t_cmap_decode, ptr, end, &cmap);

885	886	idx = (k + idDelta[j]) & 0xffff;
886	887	if (idx != 0) {
887	888	if (idx > sf->nglyphs) {
888		err_sfntbadglyph(&sf->pos, k);
	889	err_sfntbadglyph(es, &sf->pos, k);
889	890	continue;
890	891	}
891	892	fi->bmp[k] = sfnt_indextoglyph(sf, idx);

896	897	for (k = startCode[j]; k <= endCode[j]; k++) {
897	898	if (startidx + k - startCode[j] >=
898	899	nglyphindex) {
899		err_sfntbadglyph(&sf->pos, k);
	900	err_sfntbadglyph(es, &sf->pos, k);
900	901	continue;
901	902	}
902	903	idx = glyphIndexArray[startidx + k - startCode[j]];
903	904	if (idx != 0) {
904	905	idx = (idx + idDelta[j]) & 0xffff;
905	906	if (idx > sf->nglyphs) {
906		err_sfntbadglyph(&sf->pos, k);
	907	err_sfntbadglyph(es, &sf->pos, k);
907	908	continue;
908	909	}
909	910	fi->bmp[k] = sfnt_indextoglyph(sf, idx);

916	917	}
917	918	}
918	919	}
919		err_sfntnounicmap(&sf->pos);
	920	err_sfntnounicmap(es, &sf->pos);
920	921	return;
921	922	bad:
922		err_sfntbadtable(&sf->pos, "cmap");
923		}
924
925		void read_sfnt_file(input *in) {
	923	err_sfntbadtable(es, &sf->pos, "cmap");
	924	}
	925
	926	void read_sfnt_file(input in, psdata psd) {
926	927	sfnt *sf = snew(sfnt);
927	928	size_t off = 0, got;
928	929	FILE *fp = in->currfp;

931	932	t_maxp maxp;
932	933
933	934	fi->name = NULL;
934		fi->widths = newtree234(width_cmp);
935		fi->kerns = newtree234(kern_cmp);
936		fi->ligs = newtree234(lig_cmp);
	935	fi->widths = newtree234(width_cmp, NULL);
	936	fi->kerns = newtree234(kern_cmp, NULL);
	937	fi->ligs = newtree234(lig_cmp, NULL);
937	938	fi->fontbbox[0] = fi->fontbbox[1] = fi->fontbbox[2] = fi->fontbbox[3] = 0;
938	939	fi->capheight = fi->xheight = fi->ascent = fi->descent = 0;
939	940	fi->stemh = fi->stemv = fi->italicangle = 0;

958	959	sf->nglyphs = 0;
959	960	ptr = decode(offsubdir_decode, sf->data, sf->end, &sf->osd);
960	961	if (ptr == NULL) {
961		err_sfntbadhdr(&sf->pos);
	962	err_sfntbadhdr(in->es, &sf->pos);
962	963	return;
963	964	}
964	965	sf->td = snewn(sf->osd.numTables, tabledir);
965	966	ptr = decoden(tabledir_decode, ptr, sf->end, sf->td, sizeof(*sf->td),
966	967	sf->osd.numTables);
967	968	if (ptr == NULL) {
968		err_sfntbadhdr(&sf->pos);
	969	err_sfntbadhdr(in->es, &sf->pos);
969	970	return;
970	971	}
971	972	if (!sfnt_findtable(sf, TAG_head, &ptr, &end)) {
972		err_sfntnotable(&sf->pos, "head");
	973	err_sfntnotable(in->es, &sf->pos, "head");
973	974	return;
974	975	}
975	976	if (decode(t_head_decode, ptr, end, &sf->head) == NULL) {
976		err_sfntbadtable(&sf->pos, "head");
	977	err_sfntbadtable(in->es, &sf->pos, "head");
977	978	return;
978	979	}
979	980	if ((sf->head.version & 0xffff0000) != 0x00010000) {
980		err_sfnttablevers(&sf->pos, "head");
	981	err_sfnttablevers(in->es, &sf->pos, "head");
981	982	return;
982	983	}
983	984	if (!sfnt_findtable(sf, TAG_maxp, &ptr, &end)) {
984		err_sfntnotable(&sf->pos, "maxp");
	985	err_sfntnotable(in->es, &sf->pos, "maxp");
985	986	return;
986	987	}
987	988	if (decode(t_maxp_decode, ptr, end, &maxp) == NULL) {
988		err_sfntbadtable(&sf->pos, "maxp");
	989	err_sfntbadtable(in->es, &sf->pos, "maxp");
989	990	return;
990	991	}
991	992	if (maxp.version < 0x00005000 \|\| maxp.version > 0x0001ffff) {
992		err_sfnttablevers(&sf->pos, "maxp");
	993	err_sfnttablevers(in->es, &sf->pos, "maxp");
993	994	return;
994	995	}
995	996	sf->nglyphs = maxp.numGlyphs;
996		fi->name = sfnt_psname(fi);
	997	fi->name = sfnt_psname(fi, in->es);
997	998	if (fi->name == NULL) return;
998		sfnt_mapglyphs(fi);
999		sfnt_getmetrics(fi);
1000		sfnt_getkern(fi);
1001		sfnt_getmap(fi);
1002		fi->next = all_fonts;
1003		all_fonts = fi;
	999	sfnt_mapglyphs(fi, psd, in->es);
	1000	sfnt_getmetrics(fi, in->es);
	1001	sfnt_getkern(fi, in->es);
	1002	sfnt_getmap(fi, in->es);
	1003	fi->next = psd->all_fonts;
	1004	psd->all_fonts = fi;
1004	1005	}
1005	1006
1006	1007	static int sizecmp(const void a, const void b) {

1015	1016	* <http://partners.adobe.com/public/developer/en/font/5012.Type42_Spec.pdf>
1016	1017	*/
1017	1018
1018		void sfnt_writeps(font_info const fi, FILE ofp) {
	1019	void sfnt_writeps(font_info const fi, FILE ofp, psdata psd, errorstate es) {
1019	1020	unsigned i, j, lastbreak;
1020	1021	sfnt *sf = fi->fontfile;
1021	1022	size_t *breaks, glyfoff, glyflen;

1052	1053	fprintf(ofp, "0 1 %u{currentfile token pop exch def}bind for\n",
1053	1054	sf->nglyphs - 1);
1054	1055	for (i = 0; i < sf->nglyphs; i++)
1055		ps_token(ofp, &cc, "/%s", glyph_extern(sfnt_indextoglyph(sf, i)));
	1056	ps_token(ofp, &cc, "/%s", glyph_extern(psd, sfnt_indextoglyph(sf, i)));
1056	1057	fprintf(ofp, "\nend readonly def\n");
1057	1058	fprintf(ofp, "/sfnts [<");
1058	1059	breaks = snewn(sf->osd.numTables + sf->nglyphs, size_t);

1060	1061	breaks[i] = sf->td[i].offset;
1061	1062	}
1062	1063	if (!sfnt_findtable(sf, TAG_glyf, &glyfptr, &glyfend)) {
1063		err_sfntnotable(&sf->pos, "glyf");
	1064	err_sfntnotable(es, &sf->pos, "glyf");
1064	1065	return;
1065	1066	}
1066	1067	glyfoff = (char )glyfptr - (char )sf->data;
1067	1068	glyflen = (char )glyfend - (char )glyfptr;
1068	1069	if (!sfnt_findtable(sf, TAG_loca, &locaptr, &locaend)) {
1069		err_sfntnotable(&sf->pos, "loca");
	1070	err_sfntnotable(es, &sf->pos, "loca");
1070	1071	return;
1071	1072	}
1072	1073	loca = snewn(sf->nglyphs, unsigned);

1099	1100	fprintf(ofp, "end /%s exch definefont\n", fi->name);
1100	1101	return;
1101	1102	badloca:
1102		err_sfntbadtable(&sf->pos, "loca");
	1103	err_sfntbadtable(es, &sf->pos, "loca");
1103	1104	}
1104	1105
1105	1106	void sfnt_data(font_info fi, char bufp, size_t lenp) {

+17

-17

index.c less more

5	5	#include <stdlib.h>
6	6	#include "halibut.h"
7	7
8		static int compare_tags(void av, void bv);
9		static int compare_entries(void av, void bv);
	8	static int compare_tags(const void av, const void bv, void *cmpctx);
	9	static int compare_entries(const void av, const void bv, void *cmpctx);
10	10
11	11	indexdata *make_index(void) {
12	12	indexdata *ret = snew(indexdata);
13		ret->tags = newtree234(compare_tags);
14		ret->entries = newtree234(compare_entries);
	13	ret->tags = newtree234(compare_tags, NULL);
	14	ret->entries = newtree234(compare_entries, NULL);
15	15	return ret;
16	16	}
17	17

26	26	return ret;
27	27	}
28	28
29		static int compare_tags(void av, void bv) {
30		indextag a = (indextag )av, b = (indextag )bv;
	29	static int compare_tags(const void av, const void bv, void *cmpctx) {
	30	const indextag a = (const indextag )av, b = (const indextag )bv;
31	31	return ustricmp(a->name, b->name);
32	32	}
33	33
34		static int compare_to_find_tag(void av, void bv) {
35		wchar_t a = (wchar_t )av;
36		indextag b = (indextag )bv;
	34	static int compare_to_find_tag(const void av, const void bv, void *cmpctx) {
	35	const wchar_t a = (const wchar_t )av;
	36	const indextag b = (const indextag )bv;
37	37	return ustricmp(a, b->name);
38	38	}
39	39
40		static int compare_entries(void av, void bv) {
	40	static int compare_entries(const void av, const void bv, void *cmpctx) {
41	41	indexentry a = (indexentry )av, b = (indexentry )bv;
42	42	return compare_wordlists(a->text, b->text);
43	43	}

46	46	* Back-end utility: find the indextag with a given name.
47	47	*/
48	48	indextag index_findtag(indexdata idx, wchar_t *name) {
49		return find234(idx->tags, name, compare_to_find_tag);
	49	return findcmp234(idx->tags, name, compare_to_find_tag, NULL);
50	50	}
51	51
52	52	/*

57	57	* Guarantee on calling sequence: all implicit merges are given
58	58	* before the explicit ones.
59	59	*/
60		void index_merge(indexdata idx, int is_explicit, wchar_t tags, word *text,
61		filepos *fpos) {
	60	void index_merge(indexdata idx, bool is_explicit, wchar_t tags, word *text,
	61	filepos fpos, errorstate es) {
62	62	indextag t, existing;
63	63
64	64	/*

98	98	* warn (and drop it, since it won't be referenced).
99	99	*/
100	100	if (is_explicit) {
101		err_nosuchidxtag(fpos, tags);
	101	err_nosuchidxtag(es, fpos, tags);
102	102	continue;
103	103	}
104	104

122	122	* see if the cases match.
123	123	*/
124	124	if (ustrcmp(t->name, existing->name)) {
125		err_indexcase(fpos, t->name,
	125	err_indexcase(es, fpos, t->name,
126	126	&existing->implicit_fpos, existing->name);
127	127	}
128	128

213	213	}
214	214
215	215	static void dbg_prtwordlist(int level, word *w);
216		static void dbg_prtmerge(int is_explicit, wchar_t tag, word text);
	216	static void dbg_prtmerge(bool is_explicit, wchar_t tag, word text);
217	217
218	218	void index_debug(indexdata *i) {
219	219	indextag *t;

239	239	}
240	240	}
241	241
242		static void dbg_prtmerge(int is_explicit, wchar_t tag, word text) {
	242	static void dbg_prtmerge(bool is_explicit, wchar_t tag, word text) {
243	243	printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
244	244	for (; *tag; tag++)
245	245	putchar(*tag);

+173

-130

input.c less more

38	38	int ptr, npushback;
39	39	filepos pos;
40	40	};
41		static int macrocmp(void av, void bv) {
	41	static int macrocmp(const void av, const void bv, void *cmpctx) {
42	42	macro a = (macro )av, b = (macro )bv;
43	43	return ustrcmp(a->name, b->name);
44	44	}
45	45	static void macrodef(tree234 macros, wchar_t name, wchar_t *text,
46		filepos fpos) {
	46	filepos fpos, errorstate *es) {
47	47	macro *m = snew(macro);
48	48	m->name = name;
49	49	m->text = text;
50	50	if (add234(macros, m) != m) {
51		err_macroexists(&fpos, name);
	51	err_macroexists(es, &fpos, name);
52	52	sfree(name);
53	53	sfree(text);
54	54	}
55	55	}
56		static int macrolookup(tree234 macros, input in, wchar_t *name,
57		filepos *pos) {
	56	static bool macrolookup(tree234 macros, input in, wchar_t *name,
	57	filepos *pos) {
58	58	macro m, *gotit;
59	59	m.name = name;
60		gotit = find234(macros, &m, NULL);
	60	gotit = find234(macros, &m);
61	61	if (gotit) {
62	62	macrostack *expansion = snew(macrostack);
63	63	expansion->next = in->stack;

66	66	expansion->ptr = 0;
67	67	expansion->npushback = in->npushback;
68	68	in->stack = expansion;
69		return TRUE;
	69	return true;
70	70	} else
71		return FALSE;
	71	return false;
72	72	}
73	73	static void macrocleanup(tree234 *macros) {
74	74	int ti;

85	85	assert(cfg->type == para_Config);
86	86
87	87	if (!ustricmp(cfg->keyword, L"input-charset")) {
88		in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
	88	in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword),
	89	in->es);
89	90	}
90	91	}
91	92

168	169	NULL, 0);
169	170	assert(p == buf+1 && inlen == 0);
170	171
	172	for (int i = 0; i < in->nwc; i++) {
	173	if (in->wc[i] == 0) {
	174	/* The zero Unicode character is never legal */
	175	err_zerochar(in->es, pos);
	176	return EOF;
	177	}
	178	}
	179
171	180	in->wcpos = 0;
172	181	}
173	182	}
174	183
175		return in->wc[in->wcpos++];
	184	wchar_t wc = in->wc[in->wcpos++];
	185
	186	return wc;
176	187
177	188	} else
178	189	return EOF;

345	356	/* We expect hex characters thereafter. */
346	357	wchar_t *p = tok->text+1;
347	358	int n = 0;
	359	bool seen_a_char = false;
348	360	while (p && ishex(p)) {
	361	seen_a_char = true;
349	362	n = 16 * n + fromhex(*p);
350	363	p++;
351	364	}
352		if (!*p) {
	365	if (!*p && seen_a_char) {
353	366	tok->cmd = c_u;
354	367	tok->aux = n;
355	368	return;

477	490	* things other than whitespace, backslash, braces and
478	491	* hyphen. A hyphen terminates the word but is returned as
479	492	* part of it; everything else is pushed back for the next
480		* token. The `aux' field contains TRUE if the word ends in
	493	* token. The `aux' field contains true if the word ends in
481	494	* a hyphen.
482	495	*/
483		ret.aux = FALSE; /* assumed for now */
	496	ret.aux = false; /* assumed for now */
484	497	prevpos = 0;
485	498	while (1) {
486	499	if (iswhite(c) \|\| c=='{' \|\| c=='}' \|\| c=='\\' \|\| c==EOF) {

491	504	rdadd(&rs, c);
492	505	if (c == '-') {
493	506	prevpos = rsc.pos;
494		ret.aux = TRUE;
	507	ret.aux = true;
495	508	break; /* hyphen terminates word */
496	509	}
497	510	}

518	531	* telling code paragraphs from paragraphs which merely start with
519	532	* code).
520	533	*/
521		int isbrace(input *in) {
	534	bool isbrace(input *in) {
522	535	int c;
523	536	filepos cpos;
524	537

566	579	if (!hptrptr)
567	580	return NULL;
568	581	mnewword = snew(word);
	582	newword.private_data = NULL; /* placate gcc warning */
569	583	mnewword = newword; / structure copy */
570	584	mnewword->next = NULL;
571	585	**hptrptr = mnewword;

598	612	tree234 *macros) {
599	613	token t;
600	614	paragraph par;
601		word wd, whptr, idximplicit;
	615	word wd, whptr, idximplicit = NULL;
602	616	wchar_t utext[2], *wdtext;
603	617	int style, spcstyle;
604		int already;
605		int iswhite, seenwhite;
606		int type;
607		int prev_para_type;
	618	bool already;
	619	bool iswhite, seenwhite;
	620	int prev_para_type = para_NotParaType;
608	621	struct stack_item {
609	622	enum {
610	623	stack_nop = 0, /* do nothing (for error recovery) */

622	635	stack parsestk;
623	636	struct crossparaitem {
624	637	int type; /* currently c_lcont, c_quote or -1 */
625		int seen_lcont, seen_quote;
	638	bool seen_lcont, seen_quote;
626	639	};
627	640	stack crossparastk;
628		word indexword, uword, *iword;
	641	word indexword = NULL, uword, *iword;
629	642	word *idxwordlist;
630	643	rdstring indexstr;
631		int index_downcase, index_visible, indexing;
	644	bool index_downcase = false, index_visible = false, indexing;
632	645	const rdstring nullrs = { 0, 0, NULL };
633	646	wchar_t uchr;
634	647
635		t.text = NULL;
636		t.origtext = NULL;
637		already = FALSE;
	648	t = get_token(in);
	649	already = true;
	650
	651	/*
	652	* Ignore tok_white if it appears at the very start of the file.
	653	*
	654	* At the start of most paragraphs, tok_white is guaranteed not to
	655	* appear, because get_token will have folded it into the
	656	* preceding tok_eop (since a tok_eop is simply a sequence of
	657	* whitespace containing at least two newlines).
	658	*
	659	* The one exception is if there isn't a preceding tok_eop, i.e.
	660	* if the very first paragraph begins with something that lexes as
	661	* a tok_white. Easiest way to get round that is to ignore it
	662	* here, by unsetting the 'already' flag which will force a new
	663	* token to be fetched below.
	664	*/
	665	if (t.type == tok_white)
	666	already = false;
638	667
639	668	crossparastk = stk_new();
640	669

655	684	if (!already) {
656	685	dtor(t), t = get_token(in);
657	686	}
658		already = FALSE;
	687	already = false;
659	688	} while (t.type == tok_eop);
660	689	if (t.type == tok_eof)
661	690	break;

671	700	while (1) {
672	701	dtor(t), t = get_codepar_token(in);
673	702	wd.type = wtype;
674		wd.breaks = FALSE; /* shouldn't need this... */
	703	wd.breaks = false; /* shouldn't need this... */
675	704	wd.text = ustrdup(t.text);
676	705	wd.alt = NULL;
	706	wd.aux = 0;
677	707	wd.fpos = t.pos;
678	708	addword(wd, &whptr);
679	709	dtor(t), t = get_token(in);

686	716	if (t.type == tok_eop \|\| t.type == tok_eof \|\|
687	717	t.type == tok_rbrace) { /* might be } terminating \lcont */
688	718	if (t.type == tok_rbrace)
689		already = TRUE;
	719	already = true;
690	720	break;
691	721	} else if (t.type == tok_cmd && t.cmd == c_c) {
692	722	wtype = word_WeakCode;

697	727	wtype == word_WeakCode) {
698	728	wtype = word_Strong;
699	729	} else {
700		err_brokencodepara(&t.pos);
	730	err_brokencodepara(in->es, &t.pos);
701	731	prev_para_type = par.type;
702	732	addpara(par, ret);
703	733	while (t.type != tok_eop) /* error recovery: */

726	756	*/
727	757	dtor(t), t = get_token(in);
728	758	if (t.type != tok_lbrace) {
729		err_explbr(&t.pos);
	759	err_explbr(in->es, &t.pos);
730	760	continue;
731	761	}
732	762

738	768	do {
739	769	dtor(t), t = get_token(in);
740	770	} while (t.type == tok_white);
741		already = TRUE;
	771	already = true;
742	772
743	773	if (cmd == c_lcont) {
744	774	/*

749	779	*/
750	780	sitem = snew(struct crossparaitem);
751	781	stop = (struct crossparaitem *)stk_top(crossparastk);
752		if (stop)
	782	if (stop) {
753	783	sitem = stop;
754		else
755		sitem->seen_quote = sitem->seen_lcont = 0;
	784	} else {
	785	sitem->seen_quote = false;
	786	sitem->seen_lcont = false;
	787	}
756	788
757	789	if (prev_para_type == para_Bullet \|\|
758	790	prev_para_type == para_NumberedList \|\|
759	791	prev_para_type == para_Description) {
760	792	sitem->type = c_lcont;
761		sitem->seen_lcont = 1;
	793	sitem->seen_lcont = true;
762	794	par.type = para_LcontPush;
763	795	prev_para_type = par.type;
764	796	addpara(par, ret);

769	801	* don't give a cascade error.
770	802	*/
771	803	sitem->type = -1;
772		err_misplacedlcont(&t.pos);
	804	err_misplacedlcont(in->es, &t.pos);
773	805	}
774	806	} else {
775	807	/*

779	811	*/
780	812	sitem = snew(struct crossparaitem);
781	813	stop = (struct crossparaitem *)stk_top(crossparastk);
782		if (stop)
	814	if (stop) {
783	815	sitem = stop;
784		else
785		sitem->seen_quote = sitem->seen_lcont = 0;
	816	} else {
	817	sitem->seen_quote = false;
	818	sitem->seen_lcont = false;
	819	}
786	820	sitem->type = c_quote;
787		sitem->seen_quote = 1;
	821	sitem->seen_quote = true;
788	822	par.type = para_QuotePush;
789	823	prev_para_type = par.type;
790	824	addpara(par, ret);

794	828	} else if (t.type == tok_rbrace) {
795	829	struct crossparaitem *sitem = stk_pop(crossparastk);
796	830	if (!sitem)
797		err_unexbrace(&t.pos);
	831	err_unexbrace(in->es, &t.pos);
798	832	else {
799	833	switch (sitem->type) {
800	834	case c_lcont:

828	862	par.type = para_Normal;
829	863	if (t.type == tok_cmd) {
830	864	int needkw;
831		int is_macro = FALSE;
	865	bool is_macro = false;
832	866
833	867	par.fpos = t.pos;
834	868	switch (t.cmd) {

836	870	needkw = -1;
837	871	break;
838	872	case c__invalid:
839		err_badparatype(t.text, &t.pos);
	873	err_badparatype(in->es, t.text, &t.pos);
840	874	needkw = 4;
841	875	break;
842	876	case c__comment:

879	913	case c_cfg: needkw = 8; par.type = para_Config;
880	914	start_cmd = c_cfg; break;
881	915	case c_copyright: needkw = 32; par.type = para_Copyright; break;
882		case c_define: is_macro = TRUE; needkw = 1; break;
	916	case c_define: is_macro = true; needkw = 1; break;
883	917	/* For \nocite the keyword is _everything_ */
884	918	case c_nocite: needkw = 8; par.type = para_NoCite; break;
885	919	case c_preamble: needkw = 32; par.type = para_Normal; break;

895	929	par.type == para_UnnumberedChapter) {
896	930	struct crossparaitem *sitem = stk_top(crossparastk);
897	931	if (sitem && (sitem->seen_lcont \|\| sitem->seen_quote)) {
898		err_sectmarkerinblock( &t.pos,
899		(sitem->seen_lcont ? "lcont" : "quote"));
	932	err_sectmarkerinblock(
	933	in->es, &t.pos,
	934	(sitem->seen_lcont ? "lcont" : "quote"));
900	935	}
901	936	}
902	937

948	983	}
949	984	}
950	985	if (t.type != tok_rbrace) {
951		err_kwunclosed(&t.pos);
	986	err_kwunclosed(in->es, &t.pos);
952	987	continue;
953	988	}
954	989	rdadd(&rs, 0); /* add string terminator */

961	996
962	997	/* See whether we have the right number of keywords. */
963	998	if ((needkw & 48) && nkeys > 0)
964		err_kwillegal(&fp);
	999	err_kwillegal(in->es, &fp);
965	1000	if ((needkw & 11) && nkeys == 0)
966		err_kwexpected(&fp);
	1001	err_kwexpected(in->es, &fp);
967	1002	if ((needkw & 5) && nkeys > 1)
968		err_kwtoomany(&fp);
	1003	err_kwtoomany(in->es, &fp);
969	1004
970	1005	if (is_macro) {
971	1006	/*

984	1019	if (t.type == tok_eop \|\| t.type == tok_eof)
985	1020	break;
986	1021	}
987		macrodef(macros, rs.text, macrotext.text, fp);
	1022	macrodef(macros, rs.text, macrotext.text, fp, in->es);
988	1023	continue; /* next paragraph */
989	1024	}
990	1025

999	1034	if (t.type != tok_eop && t.type != tok_eof &&
1000	1035	(start_cmd == c__invalid \|\|
1001	1036	t.type != tok_cmd \|\| t.cmd != start_cmd)) {
1002		err_bodyillegal(&t.pos);
	1037	err_bodyillegal(in->es, &t.pos);
1003	1038	/* Error recovery: eat the rest of the paragraph */
1004	1039	while (t.type != tok_eop && t.type != tok_eof &&
1005	1040	(start_cmd == c__invalid \|\|

1007	1042	dtor(t), t = get_token(in);
1008	1043	}
1009	1044	if (t.type == tok_cmd)
1010		already = TRUE;/* inhibit get_token at top of loop */
	1045	already = true;/* inhibit get_token at top of loop */
1011	1046	prev_para_type = par.type;
1012	1047	addpara(par, ret);
1013	1048

1039	1074	parsestk = stk_new();
1040	1075	style = word_Normal;
1041	1076	spcstyle = word_WhiteSpace;
1042		indexing = FALSE;
1043		seenwhite = TRUE;
	1077	indexing = false;
	1078	seenwhite = true;
1044	1079	while (t.type != tok_eop && t.type != tok_eof) {
1045		iswhite = FALSE;
1046		already = FALSE;
	1080	iswhite = false;
	1081	already = false;
1047	1082
1048	1083	/* Handle implicit paragraph breaks after \IM, \BR etc */
1049	1084	if (start_cmd != c__invalid &&
1050	1085	t.type == tok_cmd && t.cmd == start_cmd) {
1051		already = TRUE; /* inhibit get_token at top of loop */
	1086	already = true; /* inhibit get_token at top of loop */
1052	1087	break;
1053	1088	}
1054	1089

1076	1111	wd.alt = NULL;
1077	1112	wd.aux = 0;
1078	1113	wd.fpos = t.pos;
1079		wd.breaks = FALSE;
	1114	wd.breaks = false;
1080	1115
1081	1116	/*
1082	1117	* Inhibit use of whitespace if it's (probably the

1085	1120	*/
1086	1121	if (start_cmd != c__invalid) {
1087	1122	dtor(t), t = get_token(in);
1088		already = TRUE;
	1123	already = true;
1089	1124	if (t.type == tok_cmd && t.cmd == start_cmd)
1090	1125	break;
1091	1126	}

1096	1131	addword(wd, &whptr);
1097	1132	if (indexing)
1098	1133	addword(wd, &idximplicit);
1099		iswhite = TRUE;
	1134	iswhite = true;
1100	1135	break;
1101	1136	case tok_word:
1102	1137	if (indexing)

1116	1151	}
1117	1152	break;
1118	1153	case tok_lbrace:
1119		err_unexbrace(&t.pos);
	1154	err_unexbrace(in->es, &t.pos);
1120	1155	/* Error recovery: push nop */
1121	1156	sitem = snew(struct stack_item);
1122	1157	sitem->type = stack_nop;

1132	1167	* wants popping. Accordingly, we treat it here
1133	1168	* as an indication that the paragraph is over.
1134	1169	*/
1135		already = TRUE;
	1170	already = true;
1136	1171	goto finished_para;
1137	1172	} else {
1138	1173	if (sitem->type & stack_ualt) {

1144	1179	spcstyle = word_WhiteSpace;
1145	1180	}
1146	1181	if (sitem->type & stack_idx) {
	1182	rdadds(&indexstr, L"");
1147	1183	indexword->text = ustrdup(indexstr.text);
1148	1184	if (index_downcase) {
1149	1185	word *w;

1155	1191	if (w->text)
1156	1192	ustrlow(w->text);
1157	1193	}
1158		indexing = FALSE;
	1194	indexing = false;
1159	1195	rdadd(&indexstr, L'\0');
1160		index_merge(idx, FALSE, indexstr.text,
1161		idxwordlist, &sitem->fpos);
	1196	index_merge(idx, false, indexstr.text,
	1197	idxwordlist, &sitem->fpos, in->es);
1162	1198	sfree(indexstr.text);
1163	1199	}
1164	1200	if (sitem->type & stack_hyper) {

1167	1203	wd.alt = NULL;
1168	1204	wd.aux = 0;
1169	1205	wd.fpos = t.pos;
1170		wd.breaks = FALSE;
	1206	wd.breaks = false;
1171	1207	if (!indexing \|\| index_visible)
1172	1208	addword(wd, &whptr);
1173	1209	if (indexing)

1179	1215	wd.alt = NULL;
1180	1216	wd.aux = quote_Close;
1181	1217	wd.fpos = t.pos;
1182		wd.breaks = FALSE;
	1218	wd.breaks = false;
1183	1219	if (!indexing \|\| index_visible)
1184	1220	addword(wd, &whptr);
1185	1221	if (indexing) {

1202	1238	*/
1203	1239	dtor(t), t = get_token(in);
1204	1240	if (t.type != tok_lbrace) {
1205		err_explbr(&t.pos);
	1241	err_explbr(in->es, &t.pos);
1206	1242	} else {
1207	1243	int braces = 1;
1208	1244	while (braces > 0) {

1212	1248	else if (t.type == tok_rbrace)
1213	1249	braces--;
1214	1250	else if (t.type == tok_eof) {
1215		err_commenteof(&t.pos);
	1251	err_commenteof(in->es, &t.pos);
1216	1252	break;
1217	1253	}
1218	1254	}
1219	1255	}
1220	1256	if (seenwhite) {
1221		already = TRUE;
	1257	already = true;
1222	1258	dtor(t), t = get_token(in);
1223	1259	if (t.type == tok_white) {
1224		iswhite = TRUE;
1225		already = FALSE;
	1260	iswhite = true;
	1261	already = false;
1226	1262	}
1227	1263	}
1228	1264	break;
1229	1265	case c_q:
1230		case c_cq:
1231		type = t.cmd;
	1266	case c_cq: {
	1267	int type = t.cmd;
1232	1268	dtor(t), t = get_token(in);
1233	1269	if (t.type != tok_lbrace) {
1234		err_explbr(&t.pos);
	1270	err_explbr(in->es, &t.pos);
1235	1271	} else {
1236	1272	/*
1237	1273	* Enforce that \q may not be used anywhere

1251	1287	wd.alt = NULL;
1252	1288	wd.aux = quote_Open;
1253	1289	wd.fpos = t.pos;
1254		wd.breaks = FALSE;
	1290	wd.breaks = false;
1255	1291	if (!indexing \|\| index_visible)
1256	1292	addword(wd, &whptr);
1257	1293	if (indexing) {

1260	1296	}
1261	1297	stype = stack_quote;
1262	1298	} else {
1263		err_codequote(&t.pos);
	1299	err_codequote(in->es, &t.pos);
1264	1300	stype = stack_nop;
1265	1301	}
1266	1302	sitem = snew(struct stack_item);

1268	1304	sitem->type = stype;
1269	1305	if (type == c_cq) {
1270	1306	if (style != word_Normal) {
1271		err_nestedstyles(&t.pos);
	1307	err_nestedstyles(in->es, &t.pos);
1272	1308	} else {
1273	1309	style = word_WeakCode;
1274	1310	spcstyle = tospacestyle(style);

1278	1314	stk_push(parsestk, sitem);
1279	1315	}
1280	1316	break;
	1317	}
1281	1318	case c_K:
1282	1319	case c_k:
1283	1320	case c_W:

1288	1325	* brace. No nesting; no arguments.
1289	1326	*/
1290	1327	wd.fpos = t.pos;
1291		wd.breaks = FALSE;
	1328	wd.breaks = false;
1292	1329	if (t.cmd == c_K)
1293	1330	wd.type = word_UpperXref;
1294	1331	else if (t.cmd == c_k)

1300	1337	dtor(t), t = get_token(in);
1301	1338	if (t.type != tok_lbrace) {
1302	1339	if (wd.type == word_Normal) {
1303		time_t thetime = time(NULL);
	1340	time_t thetime = current_time();
1304	1341	struct tm *broken = localtime(&thetime);
1305		already = TRUE;
	1342	already = true;
1306	1343	wdtext = ustrftime(NULL, broken);
1307	1344	wd.type = style;
1308	1345	} else {
1309		err_explbr(&t.pos);
	1346	err_explbr(in->es, &t.pos);
1310	1347	wdtext = NULL;
1311	1348	}
1312	1349	} else {

1319	1356	rdadds(&rs, t.text);
1320	1357	}
1321	1358	if (wd.type == word_Normal) {
1322		time_t thetime = time(NULL);
	1359	time_t thetime = current_time();
1323	1360	struct tm *broken = localtime(&thetime);
1324	1361	wdtext = ustrftime(rs.text, broken);
1325	1362	wd.type = style;

1328	1365	}
1329	1366	sfree(rs.text);
1330	1367	if (t.type != tok_rbrace) {
1331		err_kwexprbr(&t.pos);
	1368	err_kwexprbr(in->es, &t.pos);
1332	1369	}
1333	1370	}
1334	1371	wd.alt = NULL;

1358	1395	if (t.type == tok_cmd &&
1359	1396	(t.cmd == c_i \|\| t.cmd == c_ii)) {
1360	1397	if (indexing) {
1361		err_nestedindex(&t.pos);
	1398	err_nestedindex(in->es, &t.pos);
1362	1399	} else {
1363	1400	/* Add an index-reference word with no
1364	1401	* text as yet */

1366	1403	wd.text = NULL;
1367	1404	wd.alt = NULL;
1368	1405	wd.aux = 0;
1369		wd.breaks = FALSE;
	1406	wd.breaks = false;
1370	1407	indexword = addword(wd, &whptr);
1371	1408	/* Set up a rdstring to read the
1372	1409	* index text */
1373	1410	indexstr = nullrs;
1374	1411	/* Flags so that we do the Right
1375	1412	* Things with text */
1376		index_visible = (type != c_I);
1377		index_downcase = (type == c_ii);
1378		indexing = TRUE;
	1413	index_visible = (t.cmd != c_I);
	1414	index_downcase = (t.cmd == c_ii);
	1415	indexing = true;
1379	1416	idxwordlist = NULL;
1380	1417	idximplicit = &idxwordlist;
1381	1418

1390	1427	(t.cmd == c_e \|\| t.cmd == c_s \|\|
1391	1428	t.cmd == c_c \|\| t.cmd == c_cw)) {
1392	1429	if (style != word_Normal)
1393		err_nestedstyles(&t.pos);
	1430	err_nestedstyles(in->es, &t.pos);
1394	1431	else {
1395	1432	style = (t.cmd == c_c ? word_Code :
1396	1433	t.cmd == c_cw ? word_WeakCode :

1402	1439	dtor(t), t = get_token(in);
1403	1440	}
1404	1441	if (t.type != tok_lbrace) {
1405		err_explbr(&t.pos);
	1442	err_explbr(in->es, &t.pos);
1406	1443	sfree(sitem);
1407	1444	} else {
1408	1445	stk_push(parsestk, sitem);

1412	1449	case c_c:
1413	1450	case c_cw:
1414	1451	case c_e:
1415		case c_s:
1416		type = t.cmd;
	1452	case c_s: {
	1453	int type = t.cmd;
1417	1454	if (style != word_Normal) {
1418		err_nestedstyles(&t.pos);
	1455	err_nestedstyles(in->es, &t.pos);
1419	1456	/* Error recovery: eat lbrace, push nop. */
1420	1457	dtor(t), t = get_token(in);
1421	1458	sitem = snew(struct stack_item);

1425	1462	}
1426	1463	dtor(t), t = get_token(in);
1427	1464	if (t.type != tok_lbrace) {
1428		err_explbr(&t.pos);
	1465	err_explbr(in->es, &t.pos);
1429	1466	} else {
1430	1467	style = (type == c_c ? word_Code :
1431	1468	type == c_cw ? word_WeakCode :

1438	1475	stk_push(parsestk, sitem);
1439	1476	}
1440	1477	break;
	1478	}
1441	1479	case c_i:
1442	1480	case c_ii:
1443		case c_I:
1444		type = t.cmd;
	1481	case c_I: {
	1482	int type = t.cmd;
1445	1483	if (indexing) {
1446		err_nestedindex(&t.pos);
	1484	err_nestedindex(in->es, &t.pos);
1447	1485	/* Error recovery: eat lbrace, push nop. */
1448	1486	dtor(t), t = get_token(in);
1449	1487	sitem = snew(struct stack_item);

1463	1501	(t.cmd == c_e \|\| t.cmd == c_s \|\|
1464	1502	t.cmd == c_c \|\| t.cmd == c_cw)) {
1465	1503	if (style != word_Normal)
1466		err_nestedstyles(&t.pos);
	1504	err_nestedstyles(in->es, &t.pos);
1467	1505	else {
1468	1506	style = (t.cmd == c_c ? word_Code :
1469	1507	t.cmd == c_cw ? word_WeakCode :

1476	1514	}
1477	1515	if (t.type != tok_lbrace) {
1478	1516	sfree(sitem);
1479		err_explbr(&t.pos);
	1517	err_explbr(in->es, &t.pos);
1480	1518	} else {
1481	1519	/* Add an index-reference word with no text as yet */
1482	1520	wd.type = word_IndexRef;
1483	1521	wd.text = NULL;
1484	1522	wd.alt = NULL;
1485	1523	wd.aux = 0;
1486		wd.breaks = FALSE;
	1524	wd.breaks = false;
1487	1525	indexword = addword(wd, &whptr);
1488	1526	/* Set up a rdstring to read the index text */
1489	1527	indexstr = nullrs;
1490	1528	/* Flags so that we do the Right Things with text */
1491	1529	index_visible = (type != c_I);
1492	1530	index_downcase = (type == c_ii);
1493		indexing = TRUE;
	1531	indexing = true;
1494	1532	idxwordlist = NULL;
1495	1533	idximplicit = &idxwordlist;
1496	1534	/* Stack item to close the indexing on exit */
1497	1535	stk_push(parsestk, sitem);
1498	1536	}
1499	1537	break;
	1538	}
1500	1539	case c_u:
1501	1540	uchr = t.aux;
	1541	if (uchr == 0) {
	1542	err_zerochar(in->es, &t.pos);
	1543	break;
	1544	}
1502	1545	utext[0] = uchr; utext[1] = 0;
1503	1546	wd.type = style;
1504		wd.breaks = FALSE;
	1547	wd.breaks = false;
1505	1548	wd.alt = NULL;
1506	1549	wd.aux = 0;
1507	1550	wd.fpos = t.pos;

1534	1577	} else {
1535	1578	if (indexing)
1536	1579	rdadd(&indexstr, uchr);
1537		already = TRUE;
	1580	already = true;
1538	1581	}
1539	1582	break;
1540	1583	default:
1541	1584	if (!macrolookup(macros, in, t.text, &t.pos))
1542		err_badmidcmd(t.text, &t.pos);
	1585	err_badmidcmd(in->es, t.text, &t.pos);
1543	1586	break;
1544	1587	}
1545	1588	}

1552	1595	if (stk_top(parsestk)) {
1553	1596	while ((sitem = stk_pop(parsestk)))
1554	1597	sfree(sitem);
1555		err_missingrbrace(&t.pos);
	1598	err_missingrbrace(in->es, &t.pos);
1556	1599	}
1557	1600	stk_free(parsestk);
1558	1601	prev_para_type = par.type;

1568	1611	addpara(par, ret);
1569	1612	}
1570	1613	if (t.type == tok_eof)
1571		already = TRUE;
	1614	already = true;
1572	1615	}
1573	1616
1574	1617	if (stk_top(crossparastk)) {
1575	1618	void *p;
1576	1619
1577		err_missingrbrace2(&t.pos);
	1620	err_missingrbrace2(in->es, &t.pos);
1578	1621	while ((p = stk_pop(crossparastk)))
1579	1622	sfree(p);
1580	1623	}

1588	1631	stk_free(crossparastk);
1589	1632	}
1590	1633
1591		struct {
	1634	const struct {
1592	1635	char const *magic;
1593	1636	size_t nmagic;
1594		int binary;
1595		void (reader)(input );
	1637	bool binary;
	1638	void (reader)(input , psdata *);
1596	1639	} magics[] = {
1597		{ "%!FontType1-", 12, FALSE, &read_pfa_file },
1598		{ "%!PS-AdobeFont-", 15, FALSE, &read_pfa_file },
1599		{ "\x80\x01", 2, TRUE, &read_pfb_file },
1600		{ "StartFontMetrics", 16, FALSE, &read_afm_file },
1601		{ "\x00\x01\x00\x00", 4, TRUE, &read_sfnt_file },
1602		{ "true", 4, TRUE, &read_sfnt_file },
	1640	{ "%!FontType1-", 12, false, &read_pfa_file },
	1641	{ "%!PS-AdobeFont-", 15, false, &read_pfa_file },
	1642	{ "\x80\x01", 2, true, &read_pfb_file },
	1643	{ "StartFontMetrics", 16, false, &read_afm_file },
	1644	{ "\x00\x01\x00\x00", 4, true, &read_sfnt_file },
	1645	{ "true", 4, true, &read_sfnt_file },
1603	1646	};
1604	1647
1605		paragraph read_input(input in, indexdata *idx) {
	1648	paragraph read_input(input in, indexdata idx, psdata psd) {
1606	1649	paragraph *head = NULL;
1607	1650	paragraph **hptr = &head;
1608	1651	tree234 *macros;
1609	1652	char mag[16];
1610	1653	size_t len, i;
1611		int binary;
1612		void (reader)(input );
1613
1614		macros = newtree234(macrocmp);
	1654	bool binary;
	1655	void (reader)(input , psdata *);
	1656
	1657	macros = newtree234(macrocmp, NULL);
1615	1658
1616	1659	while (in->currindex < in->nfiles) {
1617	1660	setpos(in, in->filenames[in->currindex]);

1622	1665
1623	1666	if (!in->filenames[in->currindex]) {
1624	1667	in->currfp = stdin;
1625		in->wantclose = FALSE; /* don't fclose stdin */
	1668	in->wantclose = false; /* don't fclose stdin */
1626	1669	/*
1627	1670	* When reading standard input, we always expect to see
1628	1671	* an actual Halibut file and not any of the unusual

1636	1679	* looking at a text file type.
1637	1680	*/
1638	1681	in->currfp = fopen(in->filenames[in->currindex], "rb");
1639		binary = FALSE; /* default to Halibut source, which is text */
	1682	binary = false; /* default to Halibut source, which is text */
	1683	reader = NULL;
1640	1684	if (in->currfp) {
1641		in->wantclose = TRUE;
1642		reader = NULL;
	1685	in->wantclose = true;
1643	1686	len = fread(mag, 1, sizeof(mag), in->currfp);
1644	1687	for (i = 0; i < lenof(magics); i++) {
1645	1688	if (len >= magics[i].nmagic &&

1661	1704	if (reader == NULL) {
1662	1705	read_file(&hptr, in, idx, macros);
1663	1706	} else {
1664		(*reader)(in);
	1707	(*reader)(in, psd);
1665	1708	}
1666	1709	} else {
1667		err_cantopen(in->filenames[in->currindex]);
	1710	err_cantopen(in->es, in->filenames[in->currindex]);
1668	1711	}
1669	1712	in->currindex++;
1670	1713	}

+11

-11

keywords.c less more

6	6	#include <assert.h>
7	7	#include "halibut.h"
8	8
9		static int kwcmp(void av, void bv)
	9	static int kwcmp(const void av, const void bv, void *cmpctx)
10	10	{
11	11	const keyword a = (const keyword )av;
12	12	const keyword b = (const keyword )bv;
13	13	return ustrcmp(a->key, b->key);
14	14	}
15	15
16		static int kwfind(void av, void bv)
	16	static int kwfind(const void av, const void bv, void *cmpctx)
17	17	{
18	18	wchar_t a = (wchar_t )av;
19	19	const keyword b = (const keyword )bv;

21	21	}
22	22
23	23	keyword kw_lookup(keywordlist kl, wchar_t *str) {
24		return find234(kl->keys, str, kwfind);
	24	return findcmp234(kl->keys, str, kwfind, NULL);
25	25	}
26	26
27	27	/*

30	30	* collation, last at the top (so that we can Heapsort them when we
31	31	* finish).
32	32	*/
33		keywordlist get_keywords(paragraph source) {
34		int errors = FALSE;
	33	keywordlist get_keywords(paragraph source, errorstate *es) {
	34	bool errors = false;
35	35	keywordlist *kl = snew(keywordlist);
36	36	numberstate *n = number_init();
37	37	int prevpara = para_NotParaType;

39	39	number_cfg(n, source);
40	40
41	41	kl->size = 0;
42		kl->keys = newtree234(kwcmp);
	42	kl->keys = newtree234(kwcmp, NULL);
43	43	kl->nlooseends = kl->looseendssize = 0;
44	44	kl->looseends = NULL;
45	45	for (; source; source = source->next) {

61	61	* This also sets up the `parent', `child' and `sibling'
62	62	* links.
63	63	*/
64		source->kwtext = number_mktext(n, source, q, &prevpara, &errors);
	64	source->kwtext = number_mktext(n, source, q, &prevpara, &errors, es);
65	65
66	66	if (p && *p) {
67	67	if (source->kwtext \|\| source->type == para_Biblio) {

73	73	kw->para = source;
74	74	ret = add234(kl->keys, kw);
75	75	if (ret != kw) {
76		err_multikw(&source->fpos, &ret->para->fpos, p);
	76	err_multikw(es, &source->fpos, &ret->para->fpos, p);
77	77	sfree(kw);
78	78	/* FIXME: what happens to kw->text? Does it leak? */
79	79	}

112	112	sfree(kl);
113	113	}
114	114
115		void subst_keywords(paragraph source, keywordlist kl) {
	115	void subst_keywords(paragraph source, keywordlist kl, errorstate *es) {
116	116	for (; source; source = source->next) {
117	117	word *ptr;
118	118	for (ptr = source->words; ptr; ptr = ptr->next) {

123	123
124	124	kw = kw_lookup(kl, ptr->text);
125	125	if (!kw) {
126		err_nosuchkw(&ptr->fpos, ptr->text);
	126	err_nosuchkw(es, &ptr->fpos, ptr->text);
127	127	subst = NULL;
128	128	} else
129	129	subst = dup_word_list(kw->text);

138	138	close->alt = NULL;
139	139	close->type = word_XrefEnd;
140	140	close->fpos = ptr->fpos;
141		close->breaks = FALSE;
	141	close->breaks = false;
142	142	close->aux = 0;
143	143
144	144	close->next = ptr->next;

+1

-1

licence.c less more

4	4	#include <stdio.h>
5	5
6	6	static const char *const licencetext[] = {
7		"Halibut is copyright (c) 1999-2017 Simon Tatham.",
	7	"Halibut is copyright (c) 1999-2021 Simon Tatham.",
8	8	"",
9	9	"Permission is hereby granted, free of charge, to any person",
10	10	"obtaining a copy of this software and associated documentation files",

+2

-2

lz77.c less more

116	116	#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)%st->winsize] : data[k] )
117	117
118	118	void lz77_compress(struct LZ77Context *ctx,
119		const unsigned char *data, int len, int compress)
	119	const unsigned char *data, int len, bool compress)
120	120	{
121	121	struct LZ77InternalContext *st = ctx->ictx;
122	122	int i, hash, distance, off, nmatch, matchlen, advance;

143	143	}
144	144	st->npending -= i;
145	145
146		defermatch.len = 0;
	146	defermatch.distance = defermatch.len = 0;
147	147	deferchr = '\0';
148	148	while (len > 0) {
149	149

+2

-2

lz77.h less more

27	27	/*
28	28	* Supply data to be compressed. Will update the private fields of
29	29	* the LZ77Context, and will call literal() and match() to output.
30		* If `compress' is FALSE, it will never emit a match, but will
	30	* If `compress' is false, it will never emit a match, but will
31	31	* instead call literal() for everything.
32	32	*/
33	33	void lz77_compress(struct LZ77Context *ctx,
34		const unsigned char *data, int len, int compress);
	34	const unsigned char *data, int len, bool compress);

+4

-4

lzx.c less more

218	218	lz77c.literal = lzx_literal;
219	219	lz77c.match = lzx_match;
220	220	lz77c.userdata = info;
221		lz77_compress(&lz77c, data, len, TRUE);
	221	lz77_compress(&lz77c, data, len, true);
222	222	lz77_cleanup(&lz77c);
223	223	}
224	224

396	396	size_t data_size, resets_size;
397	397	unsigned short bitbuffer;
398	398	int nbits;
399		int first_block;
	399	bool first_block;
400	400	} LZXBitstream;
401	401
402	402	void lzx_write_bits(LZXBitstream *bs, int value, int bits)

562	562	* the whole-file header.
563	563	*/
564	564	lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0);
565		bs->first_block = FALSE;
	565	bs->first_block = false;
566	566	}
567	567	lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype);
568	568	lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize);

634	634	* block-boundary heuristics, but I don't really think it's
635	635	* worth it.
636	636	*/
637		bs.first_block = TRUE; /* reset every time we reset the LZ state */
	637	bs.first_block = true; /* reset every time we reset the LZ state */
638	638	lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs);
639	639
640	640	sfree(buf.syms);

+55

-42

main.c less more

6	6	#include <stdio.h>
7	7	#include <stdlib.h>
8	8	#include "halibut.h"
	9	#include "paper.h"
9	10
10	11	static void dbg_prtsource(paragraph *sourceform);
11	12	static void dbg_prtwordlist(int level, word *w);
12	13	static void dbg_prtkws(keywordlist *kws);
13	14
14	15	static const struct pre_backend {
15		void (func)(paragraph , keywordlist , indexdata *);
	16	void (func)(paragraph , keywordlist , indexdata , psdata ,
	17	errorstate *);
16	18	int bitfield;
17	19	} pre_backends[] = {
18	20	{paper_pre_backend, 0x0001}
19	21	};
20	22
21	23	static const struct backend {
22		char *name;
23		void (func)(paragraph , keywordlist , indexdata , void *);
	24	const char *name;
	25	void (func)(paragraph , keywordlist , indexdata , void *,
	26	errorstate *);
24	27	paragraph (filename)(char *filename);
25	28	int bitfield, prebackend_bitfield;
26	29	} backends[] = {

40	43	int main(int argc, char **argv) {
41	44	char **infiles;
42	45	int nfiles;
43		int nogo;
44		int errs;
45		int reportcols;
46		int list_fonts;
	46	bool nogo;
	47	bool reportcols;
	48	bool list_fonts;
47	49	int input_charset;
48		int debug;
	50	bool debug;
49	51	int backendbits, prebackbits;
50	52	int k, b;
51	53	paragraph cfg, cfg_tail;
52	54	void *pre_backend_data[16];
	55	errorstate es[1];
53	56
54	57	/*
55	58	* Use the specified locale everywhere. It'll be used for

67	70	*/
68	71	infiles = snewn(argc, char *);
69	72	nfiles = 0;
70		nogo = errs = FALSE;
71		reportcols = 0;
72		list_fonts = 0;
	73	nogo = false;
	74	reportcols = false;
	75	list_fonts = false;
73	76	input_charset = CS_ASCII;
74		debug = 0;
	77	debug = false;
75	78	backendbits = 0;
76	79	cfg = cfg_tail = NULL;
	80	es->fatal = false;
77	81
78	82	if (argc == 1) {
79	83	usage();

128	132	/* do nothing */;
129	133	} else if (!strcmp(opt, "-input-charset")) {
130	134	if (!val) {
131		errs = TRUE, err_optnoarg(opt);
	135	err_optnoarg(es, opt);
132	136	} else {
133	137	int charset = charset_from_localenc(val);
134	138	if (charset == CS_NONE) {
135		errs = TRUE, err_cmdcharset(val);
	139	err_cmdcharset(es, val);
136	140	} else {
137	141	input_charset = charset;
138	142	}
139	143	}
140	144	} else if (!strcmp(opt, "-help")) {
141	145	help();
142		nogo = TRUE;
	146	nogo = true;
143	147	} else if (!strcmp(opt, "-version")) {
144	148	showversion();
145		nogo = TRUE;
	149	nogo = true;
146	150	} else if (!strcmp(opt, "-licence") \|\|
147	151	!strcmp(opt, "-license")) {
148	152	licence();
149		nogo = TRUE;
	153	nogo = true;
150	154	} else if (!strcmp(opt, "-list-charsets")) {
151	155	listcharsets();
152		nogo = TRUE;
	156	nogo = true;
153	157	} else if (!strcmp(opt, "-list-fonts")) {
154		list_fonts = TRUE;
	158	list_fonts = true;
155	159	} else if (!strcmp(opt, "-precise")) {
156		reportcols = 1;
	160	reportcols = true;
157	161	} else {
158		errs = TRUE, err_nosuchopt(opt);
	162	err_nosuchopt(es, opt);
159	163	}
160	164	}
161	165	p = NULL;

171	175	switch (c) {
172	176	case 'h':
173	177	help();
174		nogo = TRUE;
	178	nogo = true;
175	179	break;
176	180	case 'V':
177	181	showversion();
178		nogo = TRUE;
	182	nogo = true;
179	183	break;
180	184	case 'L':
181	185	licence();
182		nogo = TRUE;
	186	nogo = true;
183	187	break;
184	188	case 'P':
185		reportcols = 1;
	189	reportcols = true;
186	190	break;
187	191	case 'd':
188		debug = TRUE;
	192	debug = true;
189	193	break;
190	194	}
191	195	break;

200	204	char opt[2];
201	205	opt[0] = c;
202	206	opt[1] = '\0';
203		errs = TRUE, err_optnoarg(opt);
	207	err_optnoarg(es, opt);
204	208	}
205	209	/*
206	210	* Now c is the option and p is the parameter.

224	228	*r = '\0';
225	229	/* XXX ad-hoc diagnostic */
226	230	if (!strcmp(s, "input-charset"))
227		err_futileopt("Cinput-charset",
	231	err_futileopt(es, "Cinput-charset",
228	232	"; use --input-charset");
229	233	cmdline_cfg_add(para, s);
230	234	r = s;

256	260	char opt[2];
257	261	opt[0] = c;
258	262	opt[1] = '\0';
259		errs = TRUE, err_nosuchopt(opt);
	263	err_nosuchopt(es, opt);
260	264	}
261	265	}
262	266	}

271	275	}
272	276	}
273	277
274		if (errs)
	278	if (es->fatal)
275	279	exit(EXIT_FAILURE);
276	280	if (nogo)
277	281	exit(EXIT_SUCCESS);

280	284	* Do the work.
281	285	*/
282	286	if (nfiles == 0 && !list_fonts) {
283		err_noinput();
	287	err_noinput(es);
284	288	usage();
285	289	exit(EXIT_FAILURE);
286	290	}

290	294	paragraph sourceform, p;
291	295	indexdata *idx;
292	296	keywordlist *keywords;
	297	psdata *psd;
293	298
294	299	in.filenames = infiles;
295	300	in.nfiles = nfiles;

300	305	in.reportcols = reportcols;
301	306	in.stack = NULL;
302	307	in.defcharset = input_charset;
	308	in.es = es;
303	309
304	310	idx = make_index();
305
306		sourceform = read_input(&in, idx);
	311	psd = psdata_new();
	312
	313	sourceform = read_input(&in, idx, psd);
307	314	if (list_fonts) {
308		listfonts();
	315	listfonts(psd);
309	316	exit(EXIT_SUCCESS);
310	317	}
311		if (!sourceform)
	318	if (es->fatal)
312	319	exit(EXIT_FAILURE);
	320	assert(sourceform);
313	321
314	322	/*
315	323	* Append the config directives acquired from the command

330	338
331	339	sfree(infiles);
332	340
333		keywords = get_keywords(sourceform);
	341	keywords = get_keywords(sourceform, es);
334	342	if (!keywords)
335	343	exit(EXIT_FAILURE);
336		gen_citations(sourceform, keywords);
337		subst_keywords(sourceform, keywords);
	344	gen_citations(sourceform, keywords, es);
	345	subst_keywords(sourceform, keywords, es);
338	346
339	347	for (p = sourceform; p; p = p->next)
340	348	if (p->type == para_IM)
341		index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
	349	index_merge(idx, true, p->keyword, p->words, &p->fpos, es);
342	350
343	351	build_index(idx);
344	352

366	374	* Select and run the pre-backends.
367	375	*/
368	376	prebackbits = 0;
	377	memset(pre_backend_data, 0, sizeof(pre_backend_data));
369	378	for (k = 0; k < (int)lenof(backends); k++)
370	379	if (backendbits == 0 \|\| (backendbits & backends[k].bitfield))
371	380	prebackbits \|= backends[k].prebackend_bitfield;

373	382	if (prebackbits & pre_backends[k].bitfield) {
374	383	assert(k < (int)lenof(pre_backend_data));
375	384	pre_backend_data[k] =
376		pre_backends[k].func(sourceform, keywords, idx);
	385	pre_backends[k].func(sourceform, keywords, idx, psd, es);
377	386	}
378	387
379	388	/*

394	403	break;
395	404	}
396	405
397		backends[k].func(sourceform, keywords, idx, pbd);
	406	backends[k].func(sourceform, keywords, idx, pbd, es);
398	407	}
399	408	}
400	409
401	410	free_para_list(sourceform);
402	411	free_keywords(keywords);
403	412	cleanup_index(idx);
404		}
	413	psdata_free(psd);
	414	}
	415
	416	if (es->fatal)
	417	exit(EXIT_FAILURE);
405	418
406	419	return 0;
407	420	}

+1

-1

misc/logalloc less more

14	14
15	15	$errors=0;
16	16
17		while (<>) {
	17	while (<<>>) {
18	18	$in=$out="";
19	19	($file, $line, $call, $in, $out)=($1,$2,$3,"",$4)
20	20	if /^(\S+) (\S+) (malloc\|strdup)$\S+$ returns (\S+)$/;

+33

-1

misc.c less more

1	1	* misc.c: miscellaneous useful items
2	2	*/
3	3
	4	#include <assert.h>
4	5	#include <stdarg.h>
	6	#include <stdlib.h>
	7	#include <time.h>
5	8	#include "halibut.h"
6	9
7	10	char adv(char s) {

97	100	rs->text = sresize(rs->text, rs->size, char);
98	101	}
99	102	memcpy(rs->text + rs->pos, p, len);
	103	rs->pos += len;
	104	rs->text[rs->pos] = 0;
	105	}
	106	void rdaddc_rep(rdstringc *rs, char c, int len) {
	107	if (len <= 0) {
	108	assert(len == 0);
	109	return;
	110	}
	111	if (rs->pos >= rs->size - len) {
	112	rs->size = rs->pos + len + 128;
	113	rs->text = sresize(rs->text, rs->size, char);
	114	}
	115	memset(rs->text + rs->pos, c, len);
100	116	rs->pos += len;
101	117	rs->text[rs->pos] = 0;
102	118	}

236	252
237	253	wp = NULL;
238	254	for (w = words; w; w = w->next) {
239		int both;
	255	bool both;
240	256	if (!isvis(w->type))
241	257	/* Invisible elements should not affect this calculation */
242	258	continue;

579	595
580	596	return p;
581	597	}
	598
	599	/*
	600	* Wrapper around the standard C time() function, which allows its
	601	* return value to be overridden by the environment variable
	602	* SOURCE_DATE_EPOCH, used to achieve reproducible builds by avoiding
	603	* baking different datestamps into repetitions of what ought to be
	604	* the same build.
	605	*/
	606	time_t current_time(void)
	607	{
	608	const char *epoch = getenv("SOURCE_DATE_EPOCH");
	609	if (epoch)
	610	return atoll(epoch);
	611
	612	return time(NULL);
	613	}

+20

-12

paper.h less more

43	43	page_data *pages;
44	44	outline_element *outline_elements;
45	45	int n_outline_elements;
	46	psdata *psd;
46	47	};
47	48
48	49	/*

76	77	* depend on the particular document. It gets generated when the font's
77	78	* metrics are read in.
78	79	*/
79
80		font_info *all_fonts;
81	80
82	81	struct font_info_Tag {
83	82	font_info *next;

276	275	* the heights of the three fonts in the pdata) because it's
277	276	* easier than looking it up repeatedly during page breaking.
278	277	*/
279		int page_break;
	278	bool page_break;
280	279	int space_before;
281	280	int space_after;
282	281	int line_height;

372	371	para_data *pdata;
373	372	};
374	373
	374	struct psdata_Tag {
	375	char **extraglyphs;
	376	glyph nextglyph;
	377	tree234 *extrabyname;
	378	font_info *all_fonts;
	379	};
	380
375	381	/*
376	382	* Functions exported from bk_paper.c
377	383	*/
378		int width_cmp(void , void ); /* use when setting up widths */
379		int kern_cmp(void , void ); /* use when setting up kern_pairs */
380		int lig_cmp(void , void ); /* use when setting up ligatures */
	384	int width_cmp(const void , const void , void ); / use when setting up widths */
	385	int kern_cmp(const void , const void , void ); / use when setting up kern_pairs */
	386	int lig_cmp(const void , const void , void ); / use when setting up ligatures */
381	387	int find_width(font_data *, glyph);
382	388
383	389	/*
384	390	* Functions and data exported from psdata.c.
385	391	*/
386		glyph glyph_intern(char const *);
387		char const *glyph_extern(glyph);
	392	psdata *psdata_new(void);
	393	void psdata_free(psdata *);
	394	glyph glyph_intern(psdata , const char );
	395	char const glyph_extern(psdata , glyph);
388	396	wchar_t ps_glyph_to_unicode(glyph);
389	397	extern const char *const ps_std_glyphs[];
390	398	extern glyph const tt_std_glyphs[];
391		void init_std_fonts(void);
	399	void init_std_fonts(psdata *psd);
392	400	const int ps_std_font_widths(char const fontname);
393	401	const kern_pair ps_std_font_kerns(char const fontname);
394	402

411	419	/*
412	420	* Backend functions exported by in_pf.c
413	421	*/
414		void pf_part1(font_info fi, char bufp, size_t lenp);
415		void pf_part2(font_info fi, char bufp, size_t lenp);
	422	void pf_part1(font_info fi, char bufp, size_t lenp, errorstate *es);
	423	void pf_part2(font_info fi, char bufp, size_t lenp, errorstate *es);
416	424	void pf_writeps(font_info const fi, FILE ofp);
417	425
418	426	/*

422	430	glyph sfnt_indextoglyph(sfnt *sf, unsigned idx);
423	431	unsigned sfnt_glyphtoindex(sfnt *sf, glyph g);
424	432	unsigned sfnt_nglyphs(sfnt *sf);
425		void sfnt_writeps(font_info const fi, FILE ofp);
	433	void sfnt_writeps(font_info const fi, FILE ofp, psdata psd, errorstate es);
426	434	void sfnt_data(font_info fi, char bufp, size_t lenp);
427	435
428	436	#endif

+64

-32

psdata.c less more

1120	1120	"zretroflexhook", "zstroke", "zuhiragana", "zukatakana",
1121	1121	};
1122	1122
1123		char const **extraglyphs = NULL;
1124		glyph nextglyph = lenof(ps_glyphs_alphabetic);
1125		tree234 *extrabyname = NULL;
1126
1127		char const *glyph_extern(glyph glyph) {
	1123	#define EXTRAGLYPHSOFFSET lenof(ps_glyphs_alphabetic)
	1124
	1125	const char glyph_extern(psdata psd, glyph glyph) {
1128	1126	if (glyph == NOGLYPH) return ".notdef";
1129		if (glyph < lenof(ps_glyphs_alphabetic))
	1127	if (glyph < EXTRAGLYPHSOFFSET)
1130	1128	return ps_glyphs_alphabetic[glyph];
1131	1129	else
1132		return extraglyphs[glyph - lenof(ps_glyphs_alphabetic)];
	1130	return psd->extraglyphs[glyph - EXTRAGLYPHSOFFSET];
1133	1131	}
1134	1132
1135		static int glyphcmp(void a, void b) {
1136		glyph ga = (glyph )a, gb = (glyph )b;
1137		return strcmp(glyph_extern(ga), glyph_extern(gb));
	1133	static int glyphcmp(const void a, const void b, void *cmpctx) {
	1134	psdata psd = (psdata )cmpctx;
	1135	glyph ga = (const glyph )a, gb = (const glyph )b;
	1136	return strcmp(glyph_extern(psd, ga), glyph_extern(psd, gb));
1138	1137	}
1139	1138
1140		static int glyphcmp_search(void a, void b) {
1141		glyph gb = (glyph )b;
1142		return strcmp(a, glyph_extern(gb));
	1139	static int glyphcmp_search(const void a, const void b, void *cmpctx) {
	1140	psdata psd = (psdata )cmpctx;
	1141	glyph gb = (const glyph )b;
	1142	return strcmp(a, glyph_extern(psd, gb));
1143	1143	}
1144	1144
1145		glyph glyph_intern(char const *glyphname) {
	1145	psdata *psdata_new(void)
	1146	{
	1147	psdata *psd = snew(psdata);
	1148	psd->extraglyphs = NULL;
	1149	psd->nextglyph = EXTRAGLYPHSOFFSET;
	1150	psd->extrabyname = newtree234(glyphcmp, NULL);
	1151	psd->all_fonts = NULL;
	1152	return psd;
	1153	}
	1154
	1155	void psdata_free(psdata *psd)
	1156	{
	1157	glyph i, *gp;
	1158	while ((gp = delpos234(psd->extrabyname, 0)) != NULL)
	1159	sfree(gp);
	1160	freetree234(psd->extrabyname);
	1161	for (i = EXTRAGLYPHSOFFSET; i < psd->nextglyph; i++)
	1162	sfree(psd->extraglyphs[i - EXTRAGLYPHSOFFSET]);
	1163	sfree(psd->extraglyphs);
	1164	while (psd->all_fonts) {
	1165	font_info *fi = psd->all_fonts;
	1166	glyph_width *w;
	1167	psd->all_fonts = fi->next;
	1168	while ((w = delpos234(fi->widths, 0)) != NULL)
	1169	sfree(w);
	1170	freetree234(fi->widths);
	1171	freetree234(fi->kerns);
	1172	freetree234(fi->ligs);
	1173	sfree(fi);
	1174	}
	1175	sfree(psd);
	1176	}
	1177
	1178	glyph glyph_intern(psdata psd, const char glyphname) {
1146	1179	int i, j, k, c;
1147	1180	glyph *gp;
1148	1181
1149	1182	i = -1;
1150		j = lenof(ps_glyphs_alphabetic);
	1183	j = EXTRAGLYPHSOFFSET;
1151	1184	while (j-i > 1) {
1152	1185	k = (i + j) / 2;
1153	1186	c = strcmp(glyphname, ps_glyphs_alphabetic[k]);

1160	1193	i = k;
1161	1194	}
1162	1195	/* Non-standard glyph. We may need to add it to our tree. */
1163		if (extrabyname == NULL)
1164		extrabyname = newtree234(glyphcmp);
1165		gp = find234(extrabyname, (void *)glyphname, glyphcmp_search);
	1196	gp = findcmp234(psd->extrabyname, (const void *)glyphname,
	1197	glyphcmp_search, psd);
1166	1198	if (gp) {
1167	1199	k = *gp;
1168	1200	} else {
1169		extraglyphs = sresize(extraglyphs, nextglyph, char const *);
1170		k = nextglyph++;
1171		extraglyphs[k - lenof(ps_glyphs_alphabetic)] = dupstr(glyphname);
	1201	psd->extraglyphs = sresize(psd->extraglyphs, psd->nextglyph, char *);
	1202	k = psd->nextglyph++;
	1203	psd->extraglyphs[k - EXTRAGLYPHSOFFSET] = dupstr(glyphname);
1172	1204	gp = snew(glyph);
1173	1205	*gp = k;
1174		add234(extrabyname, gp);
	1206	add234(psd->extrabyname, gp);
1175	1207	}
1176	1208	return k;
1177	1209	}

1852	1884	perl -e '
1853	1885	open G, "glyphnames.txt" or die;
1854	1886	chomp(@g = <G>); %g = map(($_, $i++), @g);
1855		while(<>){chomp;print"$g{$_}, "}
	1887	while(<<>>){chomp;print"$g{$_}, "}
1856	1888	print "NOGLYPH\n";' \| fold -sw68 \| sed 's/^/ /'
1857	1889
1858	1890	*/

4541	4573	}},
4542	4574	};
4543	4575
4544		void init_std_fonts(void) {
	4576	void init_std_fonts(psdata *psd) {
4545	4577	int i, j;
4546	4578	ligature const *lig;
4547	4579	kern_pair const *kern;
4548		static int done = FALSE;
	4580	static bool done = false;
4549	4581
4550	4582	if (done) return;
4551	4583	for (i = 0; i < (int)lenof(ps_std_fonts); i++) {

4553	4585	fi->fontfile = NULL;
4554	4586	fi->name = ps_std_fonts[i].name;
4555	4587	fi->filetype = TYPE1; /* for purposes of making subset fonts */
4556		fi->widths = newtree234(width_cmp);
	4588	fi->widths = newtree234(width_cmp, NULL);
4557	4589	for (j = 0; j < (int)lenof(fi->bmp); j++)
4558	4590	fi->bmp[j] = NOGLYPH;
4559	4591	for (j = 0; j < (int)lenof(ps_std_glyphs) - 1; j++) {
4560	4592	glyph_width *w = snew(glyph_width);
4561	4593	wchar_t ucs;
4562		w->glyph = glyph_intern(ps_std_glyphs[j]);
	4594	w->glyph = glyph_intern(psd, ps_std_glyphs[j]);
4563	4595	w->width = ps_std_fonts[i].widths[j];
4564	4596	add234(fi->widths, w);
4565	4597	ucs = ps_glyph_to_unicode(w->glyph);
4566	4598	assert(ucs != 0xFFFF);
4567	4599	fi->bmp[ucs] = w->glyph;
4568	4600	}
4569		fi->kerns = newtree234(kern_cmp);
	4601	fi->kerns = newtree234(kern_cmp, NULL);
4570	4602	for (kern = ps_std_fonts[i].kerns; kern->left != NOGLYPH; kern++)
4571	4603	add234(fi->kerns, (void *)kern);
4572		fi->ligs = newtree234(lig_cmp);
	4604	fi->ligs = newtree234(lig_cmp, NULL);
4573	4605	for (lig = ps_std_fonts[i].ligs; lig->left != NOGLYPH; lig++)
4574	4606	add234(fi->ligs, (void *)lig);
4575		fi->next = all_fonts;
4576		all_fonts = fi;
	4607	fi->next = psd->all_fonts;
	4608	psd->all_fonts = fi;
4577	4609	}
4578		done = TRUE;
	4610	done = true;
4579	4611	}
4580	4612
4581	4613	const int ps_std_font_widths(char const fontname)

+0

-29

~~release.sh~~ less more

0		#!/bin/sh
1
2		# Make a Halibut release archive.
3
4		RELDIR="$1"
5		VERSION="$2"
6
7		linkmirror() {
8		(cd "$1"; find . -name CVS -prune -o -name .svn -prune -o \
9		-name build -prune -o -name reltmp -prune -o -type d -print) \| \
10		while read dir; do mkdir -p "$2"/"$dir"; done
11		(cd "$1"; find . -name CVS -prune -o -name .svn -prune -o \
12		-name build -prune -o -name reltmp -prune -o \
13		-name '.orig' -prune -o -name '.rej' -prune -o \
14		-name '.txt' -prune -o -name '.html' -prune -o \
15		-name '*.1' -prune -o -name '.cvsignore' -prune -o \
16		-name '.gz' -prune -o -name '.[^.]' -prune -o \
17		-type f -print) \| \
18		while read file; do ln -s "$1"/"$file" "$2"/"$file"; done
19		}
20
21		linkmirror $PWD reltmp/$RELDIR
22		if ! test -d charset; then
23		linkmirror $PWD/../charset reltmp/$RELDIR/charset
24		fi
25
26		tar chzvoCf reltmp $RELDIR.tar.gz $RELDIR
27
28		rm -rf reltmp

+53

-34

tree234.c less more

46	46	struct tree234_Tag {
47	47	node234 *root;
48	48	cmpfn234 cmp;
	49	void *cmpctx;
49	50	};
50	51
51	52	struct node234_Tag {

58	59	/*
59	60	* Create a 2-3-4 tree.
60	61	*/
61		tree234 *newtree234(cmpfn234 cmp) {
	62	tree234 newtree234(cmpfn234 cmp, void cmpctx) {
62	63	tree234 *ret = snew(tree234);
63	64	LOG(("created tree %p\n", ret));
64	65	ret->root = NULL;
65	66	ret->cmp = cmp;
	67	ret->cmpctx = cmpctx;
66	68	return ret;
67	69	}
68	70

360	362	return NULL; /* error: index out of range */
361	363	}
362	364	} else {
363		if ((c = t->cmp(e, n->elems[0])) < 0)
	365	if ((c = t->cmp(e, n->elems[0], t->cmpctx)) < 0)
364	366	ki = 0;
365	367	else if (c == 0)
366	368	return n->elems[0]; /* already exists */
367		else if (n->elems[1] == NULL \|\| (c = t->cmp(e, n->elems[1])) < 0)
	369	else if (n->elems[1] == NULL \|\| (c = t->cmp(e, n->elems[1], t->cmpctx)) < 0)
368	370	ki = 1;
369	371	else if (c == 0)
370	372	return n->elems[1]; /* already exists */
371		else if (n->elems[2] == NULL \|\| (c = t->cmp(e, n->elems[2])) < 0)
	373	else if (n->elems[2] == NULL \|\| (c = t->cmp(e, n->elems[2], t->cmpctx)) < 0)
372	374	ki = 2;
373	375	else if (c == 0)
374	376	return n->elems[2]; /* already exists */

443	445	* as NULL, in which case the compare function from the tree proper
444	446	* will be used.
445	447	*/
446		void findrelpos234(tree234 t, void *e, cmpfn234 cmp,
447		int relation, int *index) {
	448	void findcmprelpos234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	449	int relation, int *index) {
448	450	node234 *n;
449	451	void *ret;
450	452	int c;

452	454
453	455	if (t->root == NULL)
454	456	return NULL;
455
456		if (cmp == NULL)
457		cmp = t->cmp;
458	457
459	458	n = t->root;
460	459	/*

476	475	while (1) {
477	476	for (kcount = 0; kcount < 4; kcount++) {
478	477	if (kcount >= 3 \|\| n->elems[kcount] == NULL \|\|
479		(c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
	478	(c = cmpret ? cmpret : cmp(e, n->elems[kcount], cmpctx)) < 0) {
480	479	break;
481	480	}
482	481	if (n->kids[kcount]) idx += n->counts[kcount];

547	546	if (ret && index) *index = idx;
548	547	return ret;
549	548	}
550		void find234(tree234 t, void *e, cmpfn234 cmp) {
551		return findrelpos234(t, e, cmp, REL234_EQ, NULL);
552		}
553		void findrel234(tree234 t, void *e, cmpfn234 cmp, int relation) {
554		return findrelpos234(t, e, cmp, relation, NULL);
555		}
556		void findpos234(tree234 t, void e, cmpfn234 cmp, int index) {
557		return findrelpos234(t, e, cmp, REL234_EQ, index);
	549	void findcmp234(tree234 t, const void e, cmpfn234 cmp, void cmpctx) {
	550	return findcmprelpos234(t, e, cmp, cmpctx, REL234_EQ, NULL);
	551	}
	552	void findcmprel234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	553	int relation) {
	554	return findcmprelpos234(t, e, cmp, cmpctx, relation, NULL);
	555	}
	556	void findcmppos234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	557	int *index) {
	558	return findcmprelpos234(t, e, cmp, cmpctx, REL234_EQ, index);
	559	}
	560	void find234(tree234 t, const void *e) {
	561	return findcmprelpos234(t, e, t->cmp, t->cmpctx, REL234_EQ, NULL);
	562	}
	563	void findrel234(tree234 t, const void *e, int relation) {
	564	return findcmprelpos234(t, e, t->cmp, t->cmpctx, relation, NULL);
	565	}
	566	void findpos234(tree234 t, const void e, int index) {
	567	return findcmprelpos234(t, e, t->cmp, t->cmpctx, REL234_EQ, index);
	568	}
	569	void findrelpos234(tree234 t, const void e, int relation, int index) {
	570	return findcmprelpos234(t, e, t->cmp, t->cmpctx, relation, index);
558	571	}
559	572
560	573	/*

1005	1018	}
1006	1019	void del234(tree234 t, void *e) {
1007	1020	int index;
1008		if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
	1021	if (!findrelpos234(t, e, REL234_EQ, &index))
1009	1022	return NULL; /* it wasn't in there anyway */
1010	1023	return delpos234_internal(t, index); /* it's there; delete it. */
1011	1024	}

1120	1133
1121	1134	if (t1->cmp) {
1122	1135	element = index234(t2, 0);
1123		element = findrelpos234(t1, element, NULL, REL234_GE, NULL);
	1136	element = findrelpos234(t1, element, REL234_GE, NULL);
1124	1137	if (element)
1125	1138	return NULL;
1126	1139	}

1140	1153
1141	1154	if (t2->cmp) {
1142	1155	element = index234(t1, size1-1);
1143		element = findrelpos234(t2, element, NULL, REL234_LE, NULL);
	1156	element = findrelpos234(t2, element, REL234_LE, NULL);
1144	1157	if (element)
1145	1158	return NULL;
1146	1159	}

1177	1190	t->root = NULL;
1178	1191	return ret;
1179	1192	}
	1193	assert(n);
1180	1194
1181	1195	/*
1182	1196	* Search down the tree to find the split point.

1332	1346	* over to it until it is greater than minimum
1333	1347	* size.
1334	1348	*/
1335		int undersized = (!sub->elems[0]);
	1349	bool undersized = (!sub->elems[0]);
1336	1350	LOG((" child %d is %ssize\n", ki,
1337	1351	undersized ? "under" : "minimum-"));
1338	1352	LOG((" neighbour is %s\n",

1371	1385	t->root = halves[1];
1372	1386	return halves[0];
1373	1387	}
1374		tree234 splitpos234(tree234 t, int index, int before) {
	1388	tree234 splitpos234(tree234 t, int index, bool before) {
1375	1389	tree234 *ret;
1376	1390	node234 *n;
1377	1391	int count;

1379	1393	count = countnode234(t->root);
1380	1394	if (index < 0 \|\| index > count)
1381	1395	return NULL; /* error */
1382		ret = newtree234(t->cmp);
	1396	ret = newtree234(t->cmp, NULL);
1383	1397	n = split234_internal(t, index);
1384	1398	if (before) {
1385	1399	/* We want to return the ones before the index. */

1394	1408	}
1395	1409	return ret;
1396	1410	}
1397		tree234 split234(tree234 t, void *e, cmpfn234 cmp, int rel) {
	1411	tree234 splitcmp234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	1412	int rel) {
1398	1413	int before;
1399	1414	int index;
1400	1415

1406	1421	} else {
1407	1422	before = 0;
1408	1423	}
1409		if (!findrelpos234(t, e, cmp, rel, &index))
	1424	if (!findcmprelpos234(t, e, cmp, cmpctx, rel, &index))
1410	1425	index = 0;
1411	1426
1412	1427	return splitpos234(t, index+1, before);
	1428	}
	1429	tree234 split234(tree234 t, const void *e, int rel)
	1430	{
	1431	return splitcmp234(t, e, t->cmp, t->cmpctx, rel);
1413	1432	}
1414	1433
1415	1434	static node234 copynode234(node234 n, copyfn234 copyfn, void *copyfnstate) {

1438	1457	tree234 copytree234(tree234 t, copyfn234 copyfn, void *copyfnstate) {
1439	1458	tree234 *t2;
1440	1459
1441		t2 = newtree234(t->cmp);
	1460	t2 = newtree234(t->cmp, t->cmpctx);
1442	1461	if (t->root) {
1443	1462	t2->root = copynode234(t->root, copyfn, copyfnstate);
1444	1463	t2->root->parent = NULL;

1706	1725	for (i = -1; i < nelems; i++) {
1707	1726	void *lower = (i == -1 ? lowbound : node->elems[i]);
1708	1727	void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1709		if (lower && higher && cmp(lower, higher) >= 0) {
	1728	if (lower && higher && cmp(lower, higher, cmpctx) >= 0) {
1710	1729	error("node %p: kid comparison [%d=%s,%d=%s] failed",
1711	1730	node, i, lower, i+1, higher);
1712	1731	}

1816	1835	realret = add234(tree, elem);
1817	1836
1818	1837	i = 0;
1819		while (i < arraylen && cmp(elem, array[i]) > 0)
	1838	while (i < arraylen && cmp(elem, array[i], NULL) > 0)
1820	1839	i++;
1821		if (i < arraylen && !cmp(elem, array[i])) {
	1840	if (i < arraylen && !cmp(elem, array[i], NULL)) {
1822	1841	void retval = array[i]; / expect that returned not elem */
1823	1842	if (realret != retval) {
1824	1843	error("add: retval was %p expected %p", realret, retval);

1862	1881	int i;
1863	1882
1864	1883	i = 0;
1865		while (i < arraylen && cmp(elem, array[i]) > 0)
	1884	while (i < arraylen && cmp(elem, array[i], NULL) > 0)
1866	1885	i++;
1867		if (i >= arraylen \|\| cmp(elem, array[i]) != 0)
	1886	if (i >= arraylen \|\| cmp(elem, array[i], NULL) != 0)
1868	1887	return; /* don't do it! */
1869	1888	delpostest(i);
1870	1889	}

1883	1902	return ((*seed) / 65536) % 32768;
1884	1903	}
1885	1904
1886		int mycmp(void av, void bv) {
	1905	int mycmp(const void av, const void bv, void *cmpctx) {
1887	1906	char const a = (char const )av;
1888	1907	char const b = (char const )bv;
1889	1908	return strcmp(a, b);

2142	2161	tree2 = newtree234(mycmp);
2143	2162	tree3 = newtree234(mycmp);
2144	2163	tree4 = newtree234(mycmp);
2145		assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */
	2164	assert(mycmp(strings[0], strings[1], NULL) < 0); /* just in case :-) */
2146	2165	add234(tree2, strings[1]);
2147	2166	add234(tree4, strings[0]);
2148	2167	array[0] = strings[0];

+21

-11

tree234.h less more

27	27	#ifndef TREE234_H
28	28	#define TREE234_H
29	29
	30	#include <stdbool.h>
	31
30	32	/*
31	33	* This typedef is opaque outside tree234.c itself.
32	34	*/
33	35	typedef struct tree234_Tag tree234;
34	36
35		typedef int (cmpfn234)(void , void *);
	37	typedef int (cmpfn234)(const void av, const void bv, void cmpctx);
36	38
37	39	typedef void (copyfn234)(void state, void element);
38	40

41	43	* lookups by key will fail: you can only look things up by numeric
42	44	* index, and you have to use addpos234() and delpos234().
43	45	*/
44		tree234 *newtree234(cmpfn234 cmp);
	46	tree234 newtree234(cmpfn234 cmp, void cmpctx);
45	47
46	48	/*
47	49	* Free a 2-3-4 tree (not including freeing the elements).

127	129	enum {
128	130	REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE
129	131	};
130		void find234(tree234 t, void *e, cmpfn234 cmp);
131		void findrel234(tree234 t, void *e, cmpfn234 cmp, int relation);
132		void findpos234(tree234 t, void e, cmpfn234 cmp, int index);
133		void findrelpos234(tree234 t, void *e, cmpfn234 cmp, int relation,
134		int *index);
	132	void find234(tree234 t, const void *e);
	133	void findrel234(tree234 t, const void *e, int relation);
	134	void findpos234(tree234 t, const void e, int index);
	135	void findrelpos234(tree234 t, const void e, int relation, int index);
	136	void findcmp234(tree234 t, const void e, cmpfn234 cmp, void cmpctx);
	137	void findcmprel234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	138	int relation);
	139	void findcmppos234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	140	int *index);
	141	void findcmprelpos234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	142	int relation, int *index);
135	143
136	144	/*
137	145	* Delete an element e in a 2-3-4 tree. Does not free the element,

161	169	/*
162	170	* Split a tree234 into two valid tree234s.
163	171	*
164		* splitpos234 splits at a given index. If `before' is TRUE, the
	172	* splitpos234 splits at a given index. If `before' is true, the
165	173	* items at and after that index are left in t and the ones before
166		* are returned; if `before' is FALSE, the items before that index
	174	* are returned; if `before' is false, the items before that index
167	175	* are left in t and the rest are returned.
168	176	*
169	177	* split234 splits at a given key. You can pass any of the

171	179	* in the tree that satisfy the relation are returned; the
172	180	* remainder are left.
173	181	*/
174		tree234 splitpos234(tree234 t, int index, int before);
175		tree234 split234(tree234 t, void *e, cmpfn234 cmp, int rel);
	182	tree234 splitpos234(tree234 t, int index, bool before);
	183	tree234 split234(tree234 t, const void *e, int rel);
	184	tree234 splitcmp234(tree234 t, const void e, cmpfn234 cmp, void cmpctx,
	185	int rel);
176	186
177	187	/*
178	188	* Join two tree234s together into a single one.

+30

-27

ustring.c less more

20	20	}
21	21
22	22	static char ustrtoa_internal(wchar_t const s, char *outbuf, int size,
23		int charset, int careful) {
24		int len, ret, err;
	23	int charset, bool careful) {
	24	int len, ret;
	25	bool err;
25	26	charset_state state = CHARSET_INIT_STATE;
26	27
27	28	if (!s) {

33	34	size--; /* leave room for terminating NUL */
34	35	*outbuf = '\0';
35	36	while (len > 0) {
36		err = 0;
	37	err = false;
37	38	ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state,
38	39	(careful ? &err : NULL));
39	40	if (err)

55	56	}
56	57
57	58	char ustrtoa(wchar_t const s, char *outbuf, int size, int charset) {
58		return ustrtoa_internal(s, outbuf, size, charset, FALSE);
	59	return ustrtoa_internal(s, outbuf, size, charset, false);
59	60	}
60	61
61	62	char ustrtoa_careful(wchar_t const s, char *outbuf, int size, int charset) {
62		return ustrtoa_internal(s, outbuf, size, charset, TRUE);
	63	return ustrtoa_internal(s, outbuf, size, charset, true);
63	64	}
64	65
65	66	wchar_t ustrfroma(char const s, wchar_t *outbuf, int size, int charset) {

86	87	return outbuf;
87	88	}
88	89
89		char utoa_internal_dup(wchar_t const s, int charset, int *lenp, int careful)
	90	char utoa_internal_dup(wchar_t const s, int charset, int *lenp, bool careful)
90	91	{
91	92	char *outbuf;
92		int outpos, outlen, len, ret, err;
	93	int outpos, outlen, len, ret;
	94	bool err;
93	95	charset_state state = CHARSET_INIT_STATE;
94	96
95	97	if (!s) {

105	107	outbuf[outpos] = '\0';
106	108
107	109	while (len > 0) {
108		err = 0;
	110	err = false;
109	111	ret = charset_from_unicode(&s, &len,
110	112	outbuf + outpos, outlen - outpos - 1,
111	113	charset, &state, (careful ? &err : NULL));

137	139
138	140	char utoa_dup(wchar_t const s, int charset)
139	141	{
140		return utoa_internal_dup(s, charset, NULL, FALSE);
	142	return utoa_internal_dup(s, charset, NULL, false);
141	143	}
142	144
143	145	char utoa_dup_len(wchar_t const s, int charset, int *len)
144	146	{
145		return utoa_internal_dup(s, charset, len, FALSE);
	147	return utoa_internal_dup(s, charset, len, false);
146	148	}
147	149
148	150	char utoa_careful_dup(wchar_t const s, int charset)
149	151	{
150		return utoa_internal_dup(s, charset, NULL, TRUE);
	152	return utoa_internal_dup(s, charset, NULL, true);
151	153	}
152	154
153	155	wchar_t ufroma_dup(char const s, int charset) {

281	283	#endif
282	284	}
283	285
284		int uisalpha(wchar_t c) {
	286	bool uisalpha(wchar_t c) {
285	287	#ifdef HAS_ISWALPHA
286	288	return iswalpha(c);
287	289	#else

350	352	return ret;
351	353	}
352	354
353		int utob(wchar_t const *s) {
	355	bool utob(wchar_t const *s) {
354	356	if (!ustricmp(s, L"yes") \|\| !ustricmp(s, L"y") \|\|
355	357	!ustricmp(s, L"true") \|\| !ustricmp(s, L"t"))
356		return TRUE;
357		return FALSE;
358		}
359
360		int uisdigit(wchar_t c) {
	358	return true;
	359	return false;
	360	}
	361
	362	bool uisdigit(wchar_t c) {
361	363	return c >= L'0' && c <= L'9';
362	364	}
363	365

445	447	* Determine whether a Unicode string can be translated into a
446	448	* given charset without any missing characters.
447	449	*/
448		int cvt_ok(int charset, const wchar_t *s)
	450	bool cvt_ok(int charset, const wchar_t *s)
449	451	{
450	452	char buf[256];
451	453	charset_state state = CHARSET_INIT_STATE;
452		int err, len = ustrlen(s);
453
454		err = 0;
	454	bool err;
	455	int len = ustrlen(s);
	456
	457	err = false;
455	458	while (len > 0) {
456	459	(void)charset_from_unicode(&s, &len, buf, lenof(buf),
457	460	charset, &state, &err);
458	461	if (err)
459		return FALSE;
460		}
461		return TRUE;
	462	return false;
	463	}
	464	return true;
462	465	}
463	466
464	467	/*

469	472	* rely on always getting a valid charset id back from this
470	473	* function.
471	474	*/
472		int charset_from_ustr(filepos fpos, const wchar_t name)
	475	int charset_from_ustr(filepos fpos, const wchar_t name, errorstate *es)
473	476	{
474	477	char *csname;
475	478	int charset;

479	482
480	483	if (charset == CS_NONE) {
481	484	charset = CS_ASCII;
482		err_charset(fpos, name);
	485	err_charset(es, fpos, name);
483	486	}
484	487
485	488	sfree(csname);

+1

-1

version.h less more

0	0	/* Generated by automated build script */
1		#define VERSION "version 1.2"
	1	#define VERSION "version 1.3"

+6

-6

wcwidth.c less more

23	23	};
24	24
25	25	/* auxiliary function for binary search in interval table */
26		static int bisearch(wchar_t ucs, const struct interval *table, int max) {
	26	static bool bisearch(wchar_t ucs, const struct interval *table, int max) {
27	27	int min = 0;
28	28	int mid;
29	29
30	30	if (ucs < table[0].first \|\| ucs > table[max].last)
31		return 0;
	31	return false;
32	32	while (max >= min) {
33	33	mid = (min + max) / 2;
34	34	if (ucs > table[mid].last)

36	36	else if (ucs < table[mid].first)
37	37	max = mid - 1;
38	38	else
39		return 1;
	39	return true;
40	40	}
41	41
42		return 0;
	42	return false;
43	43	}
44	44
45	45	int mk_wcwidth(wchar_t ucs)

143	143	wid = 0;
144	144
145	145	while (len > 0) {
146		int err;
	146	bool err;
147	147	wchar_t const *s_orig;
148	148
149		err = 0;
	149	err = false;
150	150	s_orig = s;
151	151	charset_from_unicode(&s, &len, buf, lenof(buf), charset, &state, &err);
152	152	wid += wcswidth(s_orig, s - s_orig);

+33

-35

winchm.c less more

172	172	return 0;
173	173	}
174	174
175		int chm_directory_entry_cmp(void av, void bv)
	175	int chm_directory_entry_cmp(const void av, const void bv, void *cmpctx)
176	176	{
177	177	const struct chm_directory_entry
178	178	a = (const struct chm_directory_entry )av,

180	180	return strcmp_chm(a->filename, b->filename);
181	181	}
182	182
183		int chm_directory_entry_find(void av, void bv)
	183	int chm_directory_entry_find(const void av, const void bv, void *cmpctx)
184	184	{
185	185	const char a = (const char )av;
186	186	const struct chm_directory_entry

234	234	PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos);
235	235	PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos);
236	236
237		index = newtree234(NULL);
	237	index = newtree234(NULL, NULL);
238	238	curr_chunk = 0;
239	239	depth = 1;
240	240	/* Write out lowest-level PMGL chunks full of actual directory entries */

326	326	PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
327	327	chunksize - chunk.pos);
328	328	PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
329		while (chunk.pos + reversed_quickref.pos < chunksize)
330		rdaddc(&chunk, 0); /* zero-pad */
	329	rdaddc_rep(&chunk, 0, chunksize - chunk.pos - reversed_quickref.pos);
331	330	for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
332	331	rdaddsn(&chunk, reversed_quickref.text+i, 2);
333	332

345	344	int index_index = 0;
346	345
347	346	prev_index = index;
348		index = newtree234(NULL);
	347	index = newtree234(NULL, NULL);
349	348	depth++;
350	349
351	350	while (index_index < count234(prev_index)) {

416	415	PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
417	416	chunksize - chunk.pos);
418	417	PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
419		while (chunk.pos + reversed_quickref.pos < chunksize)
420		rdaddc(&chunk, 0); /* zero-pad */
	418	rdaddc_rep(&chunk, 0,
	419	chunksize - chunk.pos - reversed_quickref.pos);
421	420	for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
422	421	rdaddsn(&chunk, reversed_quickref.text+i, 2);
423	422

509	508	int strtab_offset;
510	509	};
511	510
512		static int chm_stringtab_cmp(void av, void bv)
	511	static int chm_stringtab_cmp(const void av, const void bv, void *cmpctx)
513	512	{
514	513	const struct chm_stringtab_entry
515	514	a = (const struct chm_stringtab_entry )av,

518	517	b->chm->stringsfile.text + b->strtab_offset);
519	518	}
520	519
521		static int chm_stringtab_find(void av, void bv)
	520	static int chm_stringtab_find(const void av, const void bv, void *cmpctx)
522	521	{
523	522	const char a = (const char )av;
524	523	const struct chm_stringtab_entry

534	533	if (!string)
535	534	return 0;
536	535
537		if ((ent = (struct chm_stringtab_entry *)find234(
538		chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) {
	536	if ((ent = (struct chm_stringtab_entry *)findcmp234(
	537	chm->stringtab, (void *)string, chm_stringtab_find, NULL)) ==
	538	NULL) {
539	539	ent = snew(struct chm_stringtab_entry);
540	540	ent->chm = chm;
541	541
542	542	/* Pad to ensure the string doesn't cross a page boundary. */
543	543	size = strlen(string) + 1; /* include the NUL terminator */
544	544	assert(size < 0x1000); /* avoid really serious trouble */
545		while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
546		rdaddc(&chm->stringsfile, 0);
	545	if ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
	546	rdaddc_rep(&chm->stringsfile, 0, 0xFFF & -chm->stringsfile.pos);
547	547
548	548	ent->strtab_offset = chm->stringsfile.pos;
549	549	rdaddsc(&chm->stringsfile, string);

556	556	struct chm *chm_new(void)
557	557	{
558	558	struct chm *chm = snew(struct chm);
559		chm->files = newtree234(chm_directory_entry_cmp);
560		chm->windows = newtree234(NULL);
561		chm->stringtab = newtree234(chm_stringtab_cmp);
	559	chm->files = newtree234(chm_directory_entry_cmp, NULL);
	560	chm->windows = newtree234(NULL, NULL);
	561	chm->stringtab = newtree234(chm_stringtab_cmp, NULL);
562	562	chm->content0 = empty_rdstringc;
563	563	chm->content1 = empty_rdstringc;
564	564	chm->outfile = empty_rdstringc;

640	640	static struct chm_directory_entry *chm_find_file(
641	641	struct chm chm, const char name)
642	642	{
643		return find234(chm->files, (void *)name, chm_directory_entry_find);
	643	return findcmp234(chm->files, (const void *)name,
	644	chm_directory_entry_find, NULL);
644	645	}
645	646
646	647	static char add_leading_slash(const char str)

748	749	int topics_offset_to_update;
749	750	};
750	751
751		int chm_urltbl_entry_cmp(void av, void bv)
	752	int chm_urltbl_entry_cmp(const void av, const void bv, void *cmpctx)
752	753	{
753	754	const struct chm_urltbl_entry
754	755	a = (const struct chm_urltbl_entry )av,

894	895
895	896	{
896	897	rdstringc winfile = {0, 0, NULL};
897		int i, j, s;
	898	int i, s;
898	899	struct chm_window *win;
899	900
900	901	RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows));

949	950	RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */
950	951	RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */
951	952	RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */
952		for (j = 0; j < 20; j++)
953		rdaddc(&winfile, 0); /* tab order block */
	953	rdaddc_rep(&winfile, 0, 20); /* tab order block */
954	954	RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */
955	955	RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */
956	956	RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */

975	975	rdstringc topics = {0, 0, NULL};
976	976	rdstringc urltbl = {0, 0, NULL};
977	977	rdstringc urlstr = {0, 0, NULL};
978		int i, index, s, n_tocidx_3;
	978	int index, s, n_tocidx_3;
979	979	struct chm_directory_entry contentsfile = NULL, indexfile = NULL;
980	980	tree234 *urltbl_pre;
981	981	struct chm_urltbl_entry *urltbl_entry;
982	982
983		urltbl_pre = newtree234(chm_urltbl_entry_cmp);
984
985		for (i = 0; i < 0x1000; i++)
986		rdaddc(&tocidx, 0);
	983	urltbl_pre = newtree234(chm_urltbl_entry_cmp, NULL);
	984
	985	rdaddc_rep(&tocidx, 0, 0x1000);
987	986
988	987	/* Write a header of one zero byte at the start of #URLSTR.
989	988	* chmspec says this doesn't always appear, and is unclear on

1078	1077	* plus a NUL-terminated copy of the target file name / URL. */
1079	1078	urlstr_size = 8 + strlen(sect->url) + 1;
1080	1079	assert(urlstr_size < 0x1000); /* must _fit_ in a page! */
1081		while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
1082		rdaddc(&urlstr, 0);
	1080	if ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
	1081	rdaddc_rep(&urlstr, 0, 0xFFF & -urlstr_size);
1083	1082
1084	1083	/*
1085	1084	* Save everything we know so far about the #URLTBL record

1228	1227	}
1229	1228
1230	1229	/* Align the current #TOCIDX offset to 16 bytes */
1231		while (tocidx.pos & 0xF)
1232		rdaddc(&tocidx, 0);
	1230	rdaddc_rep(&tocidx, 0, 0xF & -tocidx.pos);
1233	1231
1234	1232	/* #TOCIDX header field pointing at start of type-3 records */
1235	1233	PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos);

1294	1292	RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
1295	1293	RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */
1296	1294	RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1297		while (sysfile.pos - idxhdr_start < 4096)
1298		rdaddc(&sysfile, 0);
	1295	rdaddc_rep(&sysfile, 0, 4096 - (sysfile.pos - idxhdr_start));
1299	1296
1300	1297	chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start,
1301	1298	sysfile.pos - idxhdr_start,

1366	1363	int orig_decomp_size = chm->content1.pos;
1367	1364	size_t i;
1368	1365
1369		while (chm->content1.pos & 0x7FFF)
1370		rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */
	1366	/* Pad to a realign-interval boundary */
	1367	rdaddc_rep(&chm->content1, 0, 0x7FFF & -chm->content1.pos);
	1368
1371	1369	ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000);
1372	1370	chm_add_file_internal(
1373	1371	chm, "::DataSpace/Storage/MSCompressed/Content",

+23

-20

winhelp.c less more

214	214
215	215	/* The master index maps file names to help-file offsets. */
216	216
217		static int filecmp(void av, void bv)
	217	static int filecmp(const void av, const void bv, void *cmpctx)
218	218	{
219	219	const struct file a = (const struct file )av;
220	220	const struct file b = (const struct file )bv;

240	240
241	241	/* The \|CONTEXT internal file maps help context hashes to TOPICOFFSETs. */
242	242
243		static int ctxcmp(void av, void bv)
	243	static int ctxcmp(const void av, const void bv, void *cmpctx)
244	244	{
245	245	const context a = (const context )av;
246	246	const context b = (const context )bv;

268	268
269	269	/* The \|TTLBTREE internal file maps TOPICOFFSETs to title strings. */
270	270
271		static int ttlcmp(void av, void bv)
	271	static int ttlcmp(const void av, const void bv, void *cmpctx)
272	272	{
273	273	const context a = (const context )av;
274	274	const context b = (const context )bv;

298	298
299	299	/* The \|KWBTREE internal file maps index strings to TOPICOFFSETs. */
300	300
301		static int idxcmp(void av, void bv)
	301	static int idxcmp(const void av, const void bv, void *cmpctx)
302	302	{
303	303	const struct indexrec a = (const struct indexrec )av;
304	304	const struct indexrec b = (const struct indexrec )bv;

336	336	* is by the low 16 bits of the number (above that is flags).
337	337	*/
338	338
339		static int tabcmp(void av, void bv)
	339	static int tabcmp(const void av, const void bv, void *cmpctx)
340	340	{
341	341	const int a = (const int )av;
342	342	const int b = (const int )bv;

348	348	}
349	349
350	350	/* The internal `fontnames' B-tree stores strings. */
351		static int fontcmp(void av, void bv)
	351	static int fontcmp(const void av, const void bv, void *cmpctx)
352	352	{
353	353	const char a = (const char )av;
354	354	const char b = (const char )bv;

1240	1240	whlp_file_add_short(f, 0x36C); /* magic number */
1241	1241	whlp_file_add_short(f, 33); /* minor version: HCW 4.00 Win95+ */
1242	1242	whlp_file_add_short(f, 1); /* major version */
1243		whlp_file_add_long(f, time(NULL)); /* generation date */
	1243	whlp_file_add_long(f, current_time()); /* generation date */
1244	1244	whlp_file_add_short(f, 0); /* flags=0 means no compression */
1245	1245
1246	1246	/*

1327	1327	*/
1328	1328	for (i = 0; (fontname = index234(h->fontnames, i)) != NULL; i++) {
1329	1329	char data[32];
1330		memset(data, i, sizeof(data));
1331		strncpy(data, fontname, sizeof(data));
	1330	size_t len = strlen(fontname);
	1331	if (len > sizeof(data))
	1332	len = sizeof(data);
	1333	memset(data, 0, sizeof(data));
	1334	memcpy(data, fontname, len);
1332	1335	whlp_file_add(f, data, sizeof(data));
1333	1336	}
1334	1337

1339	1342	int fontpos;
1340	1343	void *ret;
1341	1344
1342		ret = findpos234(h->fontnames, fontdesc->font, NULL, &fontpos);
	1345	ret = findpos234(h->fontnames, fontdesc->font, &fontpos);
1343	1346	assert(ret != NULL);
1344	1347
1345	1348	whlp_file_add_char(f, fontdesc->rendition);

1710	1713	/*
1711	1714	* Internal B-trees.
1712	1715	*/
1713		ret->files = newtree234(filecmp);
1714		ret->pre_contexts = newtree234(NULL);
1715		ret->contexts = newtree234(ctxcmp);
1716		ret->titles = newtree234(ttlcmp);
1717		ret->text = newtree234(NULL);
1718		ret->index = newtree234(idxcmp);
1719		ret->tabstops = newtree234(tabcmp);
1720		ret->fontnames = newtree234(fontcmp);
1721		ret->fontdescs = newtree234(NULL);
	1716	ret->files = newtree234(filecmp, NULL);
	1717	ret->pre_contexts = newtree234(NULL, NULL);
	1718	ret->contexts = newtree234(ctxcmp, NULL);
	1719	ret->titles = newtree234(ttlcmp, NULL);
	1720	ret->text = newtree234(NULL, NULL);
	1721	ret->index = newtree234(idxcmp, NULL);
	1722	ret->tabstops = newtree234(tabcmp, NULL);
	1723	ret->fontnames = newtree234(fontcmp, NULL);
	1724	ret->fontdescs = newtree234(NULL, NULL);
1722	1725
1723	1726	/*
1724	1727	* Some standard files.

1746	1749	int filecount, offset, index, filelen;
1747	1750	struct file file, map, *md;
1748	1751	context *ctx;
1749		int has_index;
	1752	bool has_index;
1750	1753
1751	1754	/*
1752	1755	* Lay out the topic section.