Codebase list halibut / 7fca73b
Import halibut_1.3.orig.tar.gz Colin Watson 2 years ago
86 changed file(s) with 3140 addition(s) and 2343 deletion(s). Raw diff Collapse all Expand all
0 /build.log
1 /build.out
2 /halibut
3 /doc/*.html
4 /doc/halibut.1
5 /doc/halibut.info*
6 /doc/halibut.pdf
7 /doc/halibut.ps
8 /doc/halibut.txt
9 /doc/halibut.chm
10 *.o
11 .deps
12 /Makefile
13 /Makefile.in
14 /aclocal.m4
15 /autom4te.cache/
16 /compile
17 /configure
18 /depcomp
19 /install-sh
20 /missing
21 /stamp-h1
22 /config.log
23 /config.status
24 /halibut.1
25 /halibut.chm
26 /halibut.info*
27 /halibut.pdf
28 /halibut.ps
29 /halibut.txt
30 /*.html
22
33 module halibut
44
5 ifnexist halibut/charset checkout charset halibut/charset
6
5 # Make up a version number.
76 set Version $(!builddate).$(vcsid)
87 ifneq "$(RELEASE)" "" set Version $(RELEASE)
98
1413 in halibut do echo '/* Generated by automated build script */' > version.h
1514 in halibut do echo '$#define VERSION "version $(Version)"' >> version.h
1615
17 set Rel
18 ifneq "$(RELEASE)" "" set Rel RELEASE=$(RELEASE)
16 # Make the source archive.
17 in . do ln -s halibut halibut-$(Version)
18 in . do tar chzvf halibut-$(Version).tar.gz halibut-$(Version)
1919
20 set Basename halibut-$(Version)
20 # Build the Windows binary, using clang-cl.
21 in . do mkdir buildwin
22 in buildwin do cmake ../halibut -DCMAKE_TOOLCHAIN_FILE=$(cmake_toolchain_clangcl64) -DCMAKE_BUILD_TYPE=Release -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -DCMAKE_C_FLAGS_RELEASE="/MT /O2"
23 in buildwin do make -j$(nproc) VERBOSE=1
24 # Code-sign the Windows binary, if the local bob config provides a
25 # script to do so. We assume here that the script accepts an -i option
26 # to provide a 'more info' URL, and that it signs the file in place.
27 ifneq "$(cross_winsigncode)" "" in buildwin do $(cross_winsigncode) -i https://www.chiark.greenend.org.uk/~sgtatham/halibut/ halibut.exe
2128
22 # Make the source archive.
23 in halibut do ./release.sh $(Basename) $(Version)
29 # Do a full Unix build, which will also build the docs, and also
30 # checks that it _does_ build.
31 in . do mkdir buildunix
32 in buildunix do cmake ../halibut
33 in buildunix do make -j$(nproc) VERBOSE=1
2434
25 # Build a Windows binary of Halibut using clang-cl.
26 in halibut with clangcl64 do make CC='clang --target=x86_64-pc-windows-msvc18.0.0 -D_CRT_SECURE_NO_WARNINGS' CC_LINK='lld-link -defaultlib:libcmt -out:$$@' EXE=.exe
27 in halibut do mv build/halibut.exe .
28 in halibut do rm -rf build
29 delegate windows
30 # Code-sign the Windows binary, if the local bob config provides
31 # a script to do so. We assume here that the script accepts an -i
32 # option to provide a 'more info' URL, and that it signs the file
33 # in place.
34 ifneq "$(winsigncode)" "" in halibut do $(winsigncode) -i http://www.chiark.greenend.org.uk/~sgtatham/halibut/ halibut.exe
35 return halibut/halibut.exe
36 enddelegate
37
38 # Build a local binary of Halibut in order to build the docs. Make
39 # sure to tag it with the supplied version number, so that the
40 # release docs announce themselves as having been built with the
41 # release Halibut (e.g. PDF's Producer property).
42 in halibut do make $(Rel)
43
44 # And now build the docs.
45 in halibut/doc do make
46
47 deliver halibut/*.tar.gz $@
48 deliver halibut/halibut.exe $@
49 deliver halibut/doc/halibut.pdf $@
50 deliver halibut/doc/halibut.txt $@
51 deliver halibut/doc/halibut.chm $@
52 deliver halibut/doc/*.html $@
35 deliver halibut-$(Version).tar.gz $@
36 deliver buildwin/halibut.exe $@
37 deliver buildunix/doc/manual/halibut.pdf $@
38 deliver buildunix/doc/manual/halibut.txt $@
39 deliver buildunix/doc/manual/halibut.chm $@
40 deliver buildunix/doc/manual/*.html $@
0 cmake_minimum_required(VERSION 3.5)
1 project(halibut LANGUAGES C)
2
3 set(LIBCHARSET_LIBRARY_ONLY ON)
4 include_directories(charset ${CMAKE_CURRENT_BINARY_DIR}/charset)
5 add_subdirectory(charset)
6
7 if(CMAKE_SYSTEM_NAME MATCHES "Windows")
8 add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
9 endif()
10
11 add_executable(halibut
12 biblio.c
13 bk_html.c
14 bk_info.c
15 bk_man.c
16 bk_paper.c
17 bk_pdf.c
18 bk_ps.c
19 bk_text.c
20 bk_whlp.c
21 contents.c
22 deflate.c
23 error.c
24 help.c
25 huffman.c
26 in_afm.c
27 in_pf.c
28 in_sfnt.c
29 index.c
30 input.c
31 keywords.c
32 licence.c
33 lz77.c
34 lzx.c
35 main.c
36 malloc.c
37 misc.c
38 psdata.c
39 tree234.c
40 ustring.c
41 version.c
42 wcwidth.c
43 winchm.c
44 winhelp.c)
45 target_link_libraries(halibut charset)
46
47 if(CMAKE_VERSION VERSION_LESS 3.14)
48 # CMake 3.13 and earlier required an explicit install destination.
49 install(TARGETS halibut RUNTIME DESTINATION bin)
50 else()
51 # 3.14 and above selects a sensible default, which we should avoid
52 # overriding here so that end users can override it using
53 # CMAKE_INSTALL_BINDIR.
54 install(TARGETS halibut)
55 endif()
56
57 add_subdirectory(doc)
99 Building Halibut
1010 ----------------
1111
12 If you have GNU make and gcc, you should simply be able to type
13 `make'. The Makefile will generate a `build' subdirectory, and will
14 put all the object files and binaries in there.
12 Halibut is built using CMake <https://cmake.org/>. To compile in the
13 simplest way (on any of Linux, Windows or Mac), run these commands in
14 the source directory:
1515
16 In a release archive, the Makefile will also check the source files
17 against a list of MD5 checksums, and if they match it will
18 automatically add the correct version number to the build. This is
19 _not_ a secure measure intended to enforce that only approved
20 Halibut sources are ever built into a binary with a given version
21 number; it is merely a sanity check against heavily modified copies
22 _accidentally_ confusing users expecting standard versions of
23 Halibut. Distribution maintainers are entirely at liberty, if they
24 choose, to modify Halibut source files as appropriate for their
25 distribution and then have the resulting binary call itself by the
26 original version number. If you run `make VERSION=x.y', the
27 resulting Halibut binary will call itself version x.y irrespective
28 of the md5sum manifest. (You may also need to do this if your build
29 system does not have the md5sum program.)
16 cmake .
17 cmake --build .
3018
31 Halibut unfortunately does not yet come with an autoconf-generated
32 makefile, so if you do not have these utilities then you will have
33 to do the build manually. Look in the master `Makefile' to find the
34 list of source modules (they will be listed on the line starting
35 `MODULES :=', and continued on lines starting `MODULES +='), compile
36 those files with the C compiler of your choice, and link them
37 together into a binary. In addition to the modules on that list, you
38 will also need to compile `version.c', and if you wish your Halibut
39 binary to identify itself with a version number then you will have
40 to define the preprocessor symbol `VERSION' to the required version
41 number string. On Unix this can be done with a command such as
42
43 cc -c -DVERSION=\"0.9\" version.c
19 (You'll also need to have a C compiler and some kind of build tool
20 installed, such as gcc and make.)
4421
4522 Halibut's source files are intended to be almost entirely portable
4623 ANSI C. If they fail to compile and run correctly on your compiler,
4724 this might very well be considered a bug.
48
49 Building the Halibut manual
50 ---------------------------
51
52 Once you have built Halibut itself, you might well want to build its
53 manual. If you're using GNU make, you can do this just by changing
54 into the `doc' subdirectory and typing `make'. (This relies on the
55 Halibut binary you built in the previous step being present in the
56 `build' subdirectory.)
57
58 Failing that, you will need to read the Makefile and run a manual
59 Halibut command, of the form
60
61 halibut --text=halibut.txt --html blurb.but intro.but [...] index.but
62
63 (The precise list of .but files is given at the top of doc/Makefile.)
64
65 This will build plain text documentation in `halibut.txt', and a set
66 of HTML files (*.html). It will also build a short man page
67 `halibut.1', although this is by no means a replacement for the full
68 manual.
6925
7026 Installing Halibut
7127 ------------------
0 Halibut is copyright (c) 1999-2017 Simon Tatham.
0 Halibut is copyright (c) 1999-2021 Simon Tatham.
11
22 Permission is hereby granted, free of charge, to any person
33 obtaining a copy of this software and associated documentation files
+0
-130
Makefile less more
0 # Halibut master makefile
1
2 # Currently depends on gcc, because:
3 # - the dependency tracking uses -MD in order to avoid needing an
4 # explicit `make depend' step
5 # - the definition of CFLAGS includes the gcc-specific flag
6 # `-Wall'
7 #
8 # Currently depends on GNU make, because:
9 # - the Makefile uses GNU ifdef / ifndef commands and GNU make `%'
10 # pattern rules
11 # - we use .PHONY
12
13 prefix=/usr/local
14 exec_prefix=$(prefix)
15 bindir=$(exec_prefix)/bin
16 INSTALL=install -c
17
18 .PHONY: all install clean spotless topclean release
19
20 ifdef RELEASE
21 ifndef VERSION
22 VERSION := $(RELEASE)
23 endif
24 else
25 CFLAGS += -g
26 endif
27
28 ifeq (x$(VERSION)y,xy)
29 RELDIR := halibut
30 else
31 RELDIR := halibut-$(VERSION)
32 endif
33
34 # `make' from top level will build in directory `build'
35 # `make BUILDDIR=foo' from top level will build in directory foo
36 ifndef REALBUILD
37 ifndef BUILDDIR
38 ifdef TEST
39 BUILDDIR := test
40 else
41 BUILDDIR := build
42 endif
43 endif
44
45 all install:
46 @test -d $(BUILDDIR) || mkdir $(BUILDDIR)
47 @$(MAKE) -C $(BUILDDIR) -f ../Makefile $@ REALBUILD=yes
48
49 spotless: topclean
50 @test -d $(BUILDDIR) || mkdir $(BUILDDIR)
51 @$(MAKE) -C $(BUILDDIR) -f ../Makefile spotless REALBUILD=yes
52
53 clean: topclean
54 @test -d $(BUILDDIR) || mkdir $(BUILDDIR)
55 @$(MAKE) -C $(BUILDDIR) -f ../Makefile clean REALBUILD=yes
56
57 # Remove Halibut output files in the source directory (may
58 # have been created by running, for example, `build/halibut
59 # inputs/test.but').
60 topclean:
61 rm -f *.html output.* *.tar.gz
62
63 # Makef a release archive.
64 release: release.sh
65 ./release.sh $(RELDIR) $(VERSION)
66
67 else
68
69 # The `real' makefile part.
70
71 CFLAGS += -Wall -W -ansi -pedantic
72
73 ifdef TEST
74 CFLAGS += -DLOGALLOC
75 LIBS += -lefence
76 endif
77
78 EXE =#
79
80 all: halibut$(EXE)
81
82 SRC := ../
83
84 ifeq ($(shell test -d $(SRC)charset && echo yes),yes)
85 LIBCHARSET_SRCDIR = $(SRC)charset/
86 else
87 LIBCHARSET_SRCDIR = $(SRC)../charset/
88 endif
89 LIBCHARSET_OBJDIR = ./#
90 LIBCHARSET_OBJPFX = cs-#
91 LIBCHARSET_GENPFX = charset-#
92 MD = -MD
93 CFLAGS += -I$(LIBCHARSET_SRCDIR) -I$(LIBCHARSET_OBJDIR)
94 include $(LIBCHARSET_SRCDIR)Makefile
95 CC_LINK = $(CC) -o $@
96
97 MODULES := main malloc ustring error help licence version misc tree234
98 MODULES += input in_afm in_pf in_sfnt keywords contents index biblio
99 MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf
100 MODULES += winhelp winchm deflate lzx lz77 huffman psdata wcwidth
101
102 OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS)
103 DEPS := $(addsuffix .d,$(MODULES))
104
105 halibut$(EXE): $(OBJECTS)
106 $(CC_LINK) $(LFLAGS) $(OBJECTS) $(LIBS)
107
108 %.o: $(SRC)%.c
109 $(CC) $(CFLAGS) -MD -c $<
110
111 version.o: FORCE
112 $(CC) $(VDEF) -MD -c $(SRC)version.c
113
114 spotless:: clean
115 rm -f *.d
116
117 clean::
118 rm -f *.o halibut core
119
120 install:
121 mkdir -p $(prefix) $(bindir)
122 $(INSTALL) -m 755 halibut $(bindir)/halibut
123 $(MAKE) -C ../doc install prefix="$(prefix)" INSTALL="$(INSTALL)"
124
125 FORCE: # phony target to force version.o to be rebuilt every time
126
127 -include $(DEPS)
128
129 endif
1919 return ustrdup(p);
2020 }
2121
22 static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos) {
22 static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos,
23 errorstate *es) {
2324 keyword *kw = kw_lookup(kl, key);
2425 if (!kw)
25 err_nosuchkw(&fpos, key);
26 err_nosuchkw(es, &fpos, key);
2627 else {
2728 /*
2829 * We've found a \k reference. If it's a
4344 * entries are actually cited (or \nocite-ed).
4445 */
4546
46 void gen_citations(paragraph *source, keywordlist *kl) {
47 void gen_citations(paragraph *source, keywordlist *kl, errorstate *es) {
4748 paragraph *para;
4849 int bibnum = 0;
4950
5657 if (para->type == para_BR) {
5758 keyword *kw = kw_lookup(kl, para->keyword);
5859 if (!kw) {
59 err_nosuchkw(&para->fpos, para->keyword);
60 err_nosuchkw(es, &para->fpos, para->keyword);
6061 } else if (kw->text) {
61 err_multiBR(&para->fpos, para->keyword);
62 err_multiBR(es, &para->fpos, para->keyword);
6263 } else {
6364 kw->text = dup_word_list(para->words);
6465 }
6566 } else if (para->type == para_NoCite) {
6667 wchar_t *wp = para->keyword;
6768 while (*wp) {
68 cite_biblio(kl, wp, para->fpos);
69 cite_biblio(kl, wp, para->fpos, es);
6970 wp = uadv(wp);
7071 }
7172 }
7677 for (ptr = para->words; ptr; ptr = ptr->next) {
7778 if (ptr->type == word_UpperXref ||
7879 ptr->type == word_LowerXref)
79 cite_biblio(kl, ptr->text, ptr->fpos);
80 cite_biblio(kl, ptr->text, ptr->fpos, es);
8081 }
8182 }
8283
9495 word *wd = smalloc(sizeof(word));
9596 wd->text = gentext(++bibnum);
9697 wd->type = word_Normal;
97 wd->breaks = FALSE;
98 wd->breaks = false;
9899 wd->alt = NULL;
99100 wd->next = NULL;
100101 wd->aux = 0;
3737 (p)->type == para_Title ? -1 : 0 )
3838
3939 typedef struct {
40 int number_at_all, just_numbers;
40 bool number_at_all, just_numbers;
4141 wchar_t *number_suffix;
4242 } sectlevel;
4343
4646 sectlevel achapter, *asect;
4747 int *contents_depths; /* 0=main, 1=chapter, 2=sect etc */
4848 int ncdepths;
49 int address_section, visible_version_id;
50 int leaf_contains_contents, leaf_smallest_contents;
51 int navlinks;
52 int rellinks;
49 bool address_section, visible_version_id;
50 bool leaf_contains_contents;
51 int leaf_smallest_contents;
52 bool navlinks;
53 bool rellinks;
5354 char *contents_filename;
5455 char *index_filename;
5556 char *template_filename;
135136 typedef struct {
136137 htmlsect *section;
137138 char *fragment;
138 int generated, referenced;
139 bool generated, referenced;
139140 } htmlindexref;
140141
141142 typedef struct {
148149 void (*write)(void *write_ctx, const char *data, int len);
149150 int charset, restrict_charset;
150151 charset_state cstate;
152 errorstate *es;
151153 int ver;
152154 enum {
153155 HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT
190192 ho->write = ho_write_file;
191193 ho->write_ctx = fp;
192194 } else {
193 err_cantopenw(filename);
195 err_cantopenw(ho->es, filename);
194196 ho->write = ho_write_ignore; /* saves conditionalising rest of code */
195197 }
196198 }
265267 #define HO_HACK_QUOTENOTHING 2
266268 #define HO_HACK_OMITQUOTES 4
267269
268 static int html_fragment_compare(void *av, void *bv)
269 {
270 htmlfragment *a = (htmlfragment *)av;
271 htmlfragment *b = (htmlfragment *)bv;
270 static int html_fragment_compare(const void *av, const void *bv, void *cmpctx)
271 {
272 const htmlfragment *a = (const htmlfragment *)av;
273 const htmlfragment *b = (const htmlfragment *)bv;
272274 int cmp;
273275
274276 if ((cmp = strcmp(a->file->filename, b->file->filename)) != 0)
277279 return strcmp(a->fragment, b->fragment);
278280 }
279281
280 static int html_filename_compare(void *av, void *bv)
281 {
282 char *a = (char *)av;
283 char *b = (char *)bv;
282 static int html_filename_compare(const void *av, const void *bv, void *cmpctx)
283 {
284 const char *a = (const char *)av;
285 const char *b = (const char *)bv;
284286
285287 return strcmp(a, b);
286288 }
312314 static void html_text_nbsp(htmloutput *ho, wchar_t const *str);
313315 static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen);
314316 static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
315 int maxlen, int quote_quotes, int nbsp);
317 int maxlen, bool quote_quotes, bool nbsp);
316318 static void html_nl(htmloutput *ho);
317319 static void html_raw(htmloutput *ho, char *text);
318320 static void html_raw_as_attr(htmloutput *ho, char *text);
332334 htmlconfig *cfg);
333335 static void html_section_title(htmloutput *ho, htmlsect *s,
334336 htmlfile *thisfile, keywordlist *keywords,
335 htmlconfig *cfg, int real);
336
337 static htmlconfig html_configure(paragraph *source, int chm_mode)
337 htmlconfig *cfg, bool real);
338
339 static htmlconfig html_configure(paragraph *source, bool chm_mode,
340 errorstate *es)
338341 {
339342 htmlconfig ret;
340343 paragraph *p;
343346 * Defaults.
344347 */
345348 ret.leaf_level = chm_mode ? -1 /* infinite */ : 2;
346 ret.achapter.just_numbers = FALSE;
347 ret.achapter.number_at_all = TRUE;
349 ret.achapter.just_numbers = false;
350 ret.achapter.number_at_all = true;
348351 ret.achapter.number_suffix = L": ";
349352 ret.nasect = 1;
350353 ret.asect = snewn(ret.nasect, sectlevel);
351 ret.asect[0].just_numbers = TRUE;
352 ret.asect[0].number_at_all = TRUE;
354 ret.asect[0].just_numbers = true;
355 ret.asect[0].number_at_all = true;
353356 ret.asect[0].number_suffix = L" ";
354357 ret.ncdepths = 0;
355358 ret.contents_depths = 0;
356 ret.visible_version_id = TRUE;
357 ret.address_section = chm_mode ? FALSE : TRUE;
358 ret.leaf_contains_contents = FALSE;
359 ret.visible_version_id = true;
360 ret.address_section = chm_mode ? false : true;
361 ret.leaf_contains_contents = false;
359362 ret.leaf_smallest_contents = 4;
360 ret.navlinks = chm_mode ? FALSE : TRUE;
361 ret.rellinks = TRUE;
363 ret.navlinks = chm_mode ? false : true;
364 ret.rellinks = true;
362365 ret.single_filename = dupstr("Manual.html");
363366 ret.contents_filename = dupstr("Contents.html");
364367 ret.index_filename = dupstr("IndexPage.html");
426429 for (p = source; p; p = p->next) {
427430 if (p->type == para_Config) {
428431 wchar_t *k = p->keyword;
429 int generic = FALSE;
432 bool generic = false;
430433
431434 if (!chm_mode && !ustrnicmp(k, L"html-", 5)) {
432435 k += 5;
439442 /* In this mode, only accept directives that don't
440443 * vary completely between the HTML and CHM output
441444 * types. */
442 generic = TRUE;
445 generic = true;
443446 } else {
444447 continue;
445448 }
446449
447450 if (!ustricmp(k, L"restrict-charset")) {
448 ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k));
451 ret.restrict_charset = charset_from_ustr(
452 &p->fpos, uadv(k), es);
449453 } else if (!ustricmp(k, L"output-charset")) {
450 ret.output_charset = charset_from_ustr(&p->fpos, uadv(k));
454 ret.output_charset = charset_from_ustr(
455 &p->fpos, uadv(k), es);
451456 } else if (!ustricmp(k, L"version")) {
452457 wchar_t *vername = uadv(k);
453458 static const struct {
467472 break;
468473
469474 if (i == lenof(versions))
470 err_htmlver(&p->fpos, vername);
475 err_htmlver(es, &p->fpos, vername);
471476 else
472477 ret.htmlver = versions[i].ver;
473478 } else if (!ustricmp(k, L"single-filename")) {
500505 frag = adv(frag);
501506 }
502507 } else
503 err_cfginsufarg(&p->fpos, p->origkeyword, 1);
508 err_cfginsufarg(es, &p->fpos, p->origkeyword, 1);
504509 } else if (!ustricmp(k, L"chapter-numeric")) {
505510 ret.achapter.just_numbers = utob(uadv(k));
506511 } else if (!ustricmp(k, L"chapter-shownumber")) {
674679 chmname = diskname;
675680
676681 if (chmname[0] == '#' || chmname[0] == '$')
677 err_chm_badname(&p->fpos, chmname);
682 err_chm_badname(es, &p->fpos, chmname);
678683
679684 if (ret.nchmextrafiles >= ret.chmextrafilesize) {
680685 ret.chmextrafilesize = ret.nchmextrafiles * 5 / 4 + 32;
705710 * turn both off.
706711 */
707712 if (!ret.chm_filename ^ !ret.hhp_filename) {
708 err_chmnames();
713 err_chmnames(es);
709714 sfree(ret.chm_filename); ret.chm_filename = NULL;
710715 sfree(ret.hhp_filename); ret.hhp_filename = NULL;
711716 }
756761 }
757762
758763 static void html_backend_common(paragraph *sourceform, keywordlist *keywords,
759 indexdata *idx, int chm_mode)
764 indexdata *idx, errorstate *es, bool chm_mode)
760765 {
761766 paragraph *p;
762767 htmlsect *topsect;
764769 htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL };
765770 htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
766771 struct chm *chm = NULL;
767 int has_index, hhk_needed = FALSE;
768
769 conf = html_configure(sourceform, chm_mode);
772 bool has_index, hhk_needed = false;
773
774 conf = html_configure(sourceform, chm_mode, es);
770775
771776 /*
772777 * We're going to make heavy use of paragraphs' private data
777782 for (p = sourceform; p; p = p->next)
778783 p->private_data = NULL;
779784
780 files.frags = newtree234(html_fragment_compare);
781 files.files = newtree234(html_filename_compare);
785 files.frags = newtree234(html_fragment_compare, NULL);
786 files.files = newtree234(html_filename_compare, NULL);
782787
783788 /*
784789 * Start by figuring out into which file each piece of the
978983 indextag *tag;
979984 int i;
980985
981 hr->referenced = hr->generated = FALSE;
986 hr->referenced = hr->generated = false;
982987 hr->section = lastsect;
983988 {
984989 char buf[40];
10361041
10371042 for (f = files.head; f; f = f->next) {
10381043 htmloutput ho;
1039 int displaying;
1044 bool displaying;
10401045 enum LISTTYPE { NOLIST, UL, OL, DL };
10411046 enum ITEMTYPE { NOITEM, LI, DT, DD };
10421047 struct stackelement {
10581063 ho.charset = conf.output_charset;
10591064 ho.restrict_charset = conf.restrict_charset;
10601065 ho.cstate = charset_init_state;
1066 ho.es = es;
10611067 ho.ver = conf.htmlver;
10621068 ho.state = HO_NEUTRAL;
10631069 ho.contents_level = 0;
13091315 html_fragment(&ho, sects.head->fragments[i]);
13101316 }
13111317
1312 html_section_title(&ho, sects.head, f, keywords, &conf, TRUE);
1318 html_section_title(&ho, sects.head, f, keywords, &conf, true);
13131319
13141320 element_close(&ho, "h1");
13151321 }
13281334 {
13291335 int ntoc = 0, tocsize = 0, tocstartidx = 0;
13301336 htmlsect **toc = NULL;
1331 int leaf = TRUE;
1337 bool leaf = true;
13321338
13331339 for (s = sects.head; s; s = s->next) {
13341340 htmlsect *a, *ac;
13501356 }
13511357
13521358 if (s->file != f && a != NULL)
1353 leaf = FALSE;
1359 leaf = false;
13541360
13551361 if (a) {
13561362 if (adepth <= a->contents_depth) {
13981404 * Now go through the document and output some real
13991405 * text.
14001406 */
1401 displaying = FALSE;
1407 displaying = false;
14021408 for (s = sects.head; s; s = s->next) {
14031409 if (s->file == f) {
14041410 /*
14051411 * This section belongs in this file.
14061412 * Display it.
14071413 */
1408 displaying = TRUE;
1414 displaying = true;
14091415 } else {
14101416 /*
14111417 * Doesn't belong in this file, but it may be
14161422 htmlsect *a, *ac;
14171423 int depth, adepth;
14181424
1419 displaying = FALSE;
1425 displaying = false;
14201426
14211427 /*
14221428 * Search up from this section until we find
14891495 html_fragment(&ho, s->fragments[i]);
14901496 }
14911497
1492 html_section_title(&ho, s, f, keywords, &conf, TRUE);
1498 html_section_title(&ho, s, f, keywords, &conf, true);
14931499
14941500 element_close(&ho, htag);
14951501 }
17211727
17221728 html_href(&ho, f, hr->section->file,
17231729 hr->fragment);
1724 hr->referenced = TRUE;
1730 hr->referenced = true;
17251731 if (p && p->kwtext)
17261732 html_words(&ho, p->kwtext, MARKUP|LINKS,
17271733 f, keywords, &conf);
17541760 /*
17551761 * Footer.
17561762 */
1757 int done_version_ids = FALSE;
1763 bool done_version_ids = false;
17581764
17591765 if (conf.address_section)
17601766 element_empty(&ho, "hr");
17631769 html_raw(&ho, conf.body_end);
17641770
17651771 if (conf.address_section) {
1766 int started = FALSE;
1772 bool started = false;
17671773 if (conf.htmlver == ISO_HTML) {
17681774 /*
17691775 * The ISO-HTML validator complains if
17811787 if (conf.addr_start) {
17821788 html_raw(&ho, conf.addr_start);
17831789 html_nl(&ho);
1784 started = TRUE;
1790 started = true;
17851791 }
17861792 if (conf.visible_version_id) {
17871793 for (p = sourceform; p; p = p->next)
17931799 html_words(&ho, p->words, NOTHING,
17941800 f, keywords, &conf);
17951801 html_text(&ho, conf.post_versionid);
1796 started = TRUE;
1802 started = true;
17971803 }
1798 done_version_ids = TRUE;
1804 done_version_ids = true;
17991805 }
18001806 if (conf.addr_end) {
18011807 if (started)
18131819 * visible, I think we still have a duty to put
18141820 * them in an HTML comment.
18151821 */
1816 int started = FALSE;
1822 bool started = false;
18171823 for (p = sourceform; p; p = p->next)
18181824 if (p->type == para_VersionID) {
18191825 if (!started) {
18201826 html_raw(&ho, "<!-- version IDs:\n");
1821 started = TRUE;
1827 started = true;
18221828 }
18231829 html_words(&ho, p->words, NOTHING,
18241830 f, keywords, &conf);
18431849 * if the index contains nothing.
18441850 */
18451851 if (chm_mode || conf.hhk_filename) {
1846 int ok = FALSE;
1852 bool ok = false;
18471853 int i;
18481854 indexentry *entry;
18491855
18511857 htmlindex *hi = (htmlindex *)entry->backend_data;
18521858
18531859 if (hi->nrefs > 0) {
1854 ok = TRUE; /* found an index entry */
1860 ok = true; /* found an index entry */
18551861 break;
18561862 }
18571863 }
18581864
18591865 if (ok)
1860 hhk_needed = TRUE;
1866 hhk_needed = true;
18611867 }
18621868
18631869 /*
18781884 ho.charset = CS_CP1252; /* as far as I know, CHM is */
18791885 ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
18801886 ho.cstate = charset_init_state;
1887 ho.es = es;
18811888 ho.ver = HTML_4; /* *shrug* */
18821889 ho.state = HO_NEUTRAL;
18831890 ho.contents_level = 0;
18861893 ho_setup_rdstringc(&ho, &rs);
18871894
18881895 ho.hacklimit = 255;
1889 html_words(&ho, topsect->title->words, NOTHING,
1890 NULL, keywords, &conf);
1896 if (topsect->title)
1897 html_words(&ho, topsect->title->words, NOTHING,
1898 NULL, keywords, &conf);
18911899
18921900 rdaddc(&rs, '\0');
18931901 chm_title(chm, rs.text);
19291937 ho.charset = CS_CP1252;
19301938 ho.restrict_charset = CS_CP1252;
19311939 ho.cstate = charset_init_state;
1940 ho.es = es;
19321941 ho.ver = HTML_4; /* *shrug* */
19331942 ho.state = HO_NEUTRAL;
19341943 ho.contents_level = 0;
19721981
19731982 fp = fopen(fname, "rb");
19741983 if (!fp) {
1975 err_cantopen(fname);
1984 err_cantopen(es, fname);
19761985 continue;
19771986 }
19781987
20022011 ho.charset = CS_CP1252; /* as far as I know, HHP files are */
20032012 ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
20042013 ho.cstate = charset_init_state;
2014 ho.es = es;
20052015 ho.ver = HTML_4; /* *shrug* */
20062016 ho.state = HO_NEUTRAL;
20072017 ho.contents_level = 0;
20262036 "Title=");
20272037
20282038 ho.hacklimit = 255;
2029 html_words(&ho, topsect->title->words, NOTHING,
2030 NULL, keywords, &conf);
2039 if (topsect->title)
2040 html_words(&ho, topsect->title->words, NOTHING,
2041 NULL, keywords, &conf);
20312042
20322043 ho_string(&ho, "\n");
20332044
21002111 ho.charset = CS_CP1252; /* as far as I know, HHC files are */
21012112 ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
21022113 ho.cstate = charset_init_state;
2114 ho.es = es;
21032115 ho.ver = HTML_4; /* *shrug* */
21042116 ho.state = HO_NEUTRAL;
21052117 ho.contents_level = 0;
21262138 /*
21272139 * For each HTML file, write out a contents entry.
21282140 */
2129 int depth, leaf = TRUE;
2141 int depth;
2142 bool leaf = true;
21302143
21312144 /*
21322145 * Determine the depth of this file in the contents
21592172 if (leaf && s->file != f) {
21602173 for (a = s; a; a = a->parent)
21612174 if (a->file == f) {
2162 leaf = FALSE;
2175 leaf = false;
21632176 break;
21642177 }
21652178 }
22172230 ho.charset = CS_CP1252; /* as far as I know, HHK files are */
22182231 ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
22192232 ho.cstate = charset_init_state;
2233 ho.es = es;
22202234 ho.ver = HTML_4; /* *shrug* */
22212235 ho.state = HO_NEUTRAL;
22222236 ho.contents_level = 0;
22692283 hr->section->file->temp = 1;
22702284 }
22712285
2272 hr->referenced = TRUE;
2286 hr->referenced = true;
22732287 }
22742288
22752289 ho_string(&ho, "</OBJECT>\n");
23002314
23012315 fp = fopen(conf.chm_filename, "wb");
23022316 if (!fp) {
2303 err_cantopenw(conf.chm_filename);
2317 err_cantopenw(es, conf.chm_filename);
23042318 } else {
23052319 data = chm_build(chm, &len);
23062320 fwrite(data, 1, len, fp);
23232337 if (w->type == word_IndexRef) {
23242338 htmlindexref *hr = (htmlindexref *)w->private_data;
23252339
2326 assert(!hr->referenced == !hr->generated);
2340 assert(hr->referenced == hr->generated);
23272341 }
23282342 }
23292343
24162430 }
24172431
24182432 void html_backend(paragraph *sourceform, keywordlist *keywords,
2419 indexdata *idx, void *unused)
2433 indexdata *idx, void *unused, errorstate *es)
24202434 {
24212435 IGNORE(unused);
2422 html_backend_common(sourceform, keywords, idx, FALSE);
2436 html_backend_common(sourceform, keywords, idx, es, false);
24232437 }
24242438
24252439 void chm_backend(paragraph *sourceform, keywordlist *keywords,
2426 indexdata *idx, void *unused)
2440 indexdata *idx, void *unused, errorstate *es)
24272441 {
24282442 IGNORE(unused);
2429 html_backend_common(sourceform, keywords, idx, TRUE);
2443 html_backend_common(sourceform, keywords, idx, es, true);
24302444 }
24312445
24322446 static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
26202634 if (flags & INDEXENTS) {
26212635 htmlindexref *hr = (htmlindexref *)w->private_data;
26222636 html_fragment(ho, hr->fragment);
2623 hr->generated = TRUE;
2637 hr->generated = true;
26242638 }
26252639 break;
26262640 case word_Normal:
28272841 ho_string(ho, " ");
28282842 ho_string(ho, name);
28292843 ho_string(ho, "=\"");
2830 html_text_limit_internal(ho, value, 0, TRUE, FALSE);
2844 html_text_limit_internal(ho, value, 0, true, false);
28312845 html_charset_cleanup(ho);
28322846 ho_string(ho, "\"");
28332847 }
28352849 static void html_text(htmloutput *ho, wchar_t const *text)
28362850 {
28372851 return_mostly_to_neutral(ho);
2838 html_text_limit_internal(ho, text, 0, FALSE, FALSE);
2852 html_text_limit_internal(ho, text, 0, false, false);
28392853 }
28402854
28412855 static void html_text_nbsp(htmloutput *ho, wchar_t const *text)
28422856 {
28432857 return_mostly_to_neutral(ho);
2844 html_text_limit_internal(ho, text, 0, FALSE, TRUE);
2858 html_text_limit_internal(ho, text, 0, false, true);
28452859 }
28462860
28472861 static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen)
28482862 {
28492863 return_mostly_to_neutral(ho);
2850 html_text_limit_internal(ho, text, maxlen, FALSE, FALSE);
2864 html_text_limit_internal(ho, text, maxlen, false, false);
28512865 }
28522866
28532867 static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
2854 int maxlen, int quote_quotes, int nbsp)
2868 int maxlen, bool quote_quotes, bool nbsp)
28552869 {
28562870 int textlen = ustrlen(text);
28572871 char outbuf[256];
2858 int bytes, err;
2872 int bytes;
2873 bool err;
28592874
28602875 if (ho->hackflags & (HO_HACK_QUOTEQUOTES | HO_HACK_OMITQUOTES))
2861 quote_quotes = TRUE; /* override the input value */
2876 quote_quotes = true; /* override the input value */
28622877
28632878 if (maxlen > 0 && textlen > maxlen)
28642879 textlen = maxlen;
29432958 rdaddc(&rs, '#');
29442959 rdaddsc(&rs, targetfrag);
29452960 }
2961
2962 /* If _neither_ of those conditions were true, we don't have a URL
2963 * at all and will segfault when we pass url==NULL to element_attr.
2964 *
2965 * I think this can only occur as a knock-on effect from an input
2966 * file error, but we still shouldn't crash, of course. */
2967
29462968 url = rs.text;
29472969
29482970 element_open(ho, "a");
2949 element_attr(ho, "href", url);
2950 sfree(url);
2971 if (url) {
2972 element_attr(ho, "href", url);
2973 sfree(url);
2974 }
29512975 }
29522976
29532977 static void html_fragment(htmloutput *ho, char const *fragment)
31293153
31303154 p = NULL;
31313155
3132 while (find234(files->files, text, NULL)) {
3156 while (find234(files->files, text)) {
31333157 if (!p) {
31343158 len = strlen(text);
31353159 p = text;
31823206
31833207 element_open(ho, "li");
31843208 html_href(ho, thisfile, s->file, s->fragments[0]);
3185 html_section_title(ho, s, thisfile, keywords, cfg, FALSE);
3209 html_section_title(ho, s, thisfile, keywords, cfg, false);
31863210 element_close(ho, "a");
31873211 /* <li> will be closed by a later invocation */
31883212 }
31893213
31903214 static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
31913215 keywordlist *keywords, htmlconfig *cfg,
3192 int real)
3216 bool real)
31933217 {
31943218 if (s->title) {
31953219 sectlevel *sl;
7979 rdstringc output;
8080 int charset;
8181 charset_state state;
82 int wcmode;
82 bool wcmode;
8383 } info_data;
84 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
84 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, false }
8585 static const info_data empty_info_data = EMPTY_INFO_DATA;
8686
8787 typedef struct node_tag node;
8888 struct node_tag {
8989 node *listnext;
9090 node *up, *prev, *next, *lastchild;
91 int pos, started_menu, filenum;
91 int pos, filenum;
92 bool started_menu;
9293 char *name;
9394 info_data text;
9495 };
116117 static word *info_transform_wordlist(word *, keywordlist *);
117118 static int info_check_index(word *, node *, indexdata *);
118119
119 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
120 static int info_rdaddwc(info_data *, word *, word *, bool, infoconfig *);
120121
121122 static node *info_node_new(char *name, int charset);
122 static char *info_node_name_for_para(paragraph *p, infoconfig *);
123 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
124
125 static infoconfig info_configure(paragraph *source) {
123 static char *info_node_name_for_para(paragraph *p, infoconfig *,
124 errorstate *);
125 static char *info_node_name_for_text(wchar_t *text, infoconfig *,
126 errorstate *);
127
128 static infoconfig info_configure(paragraph *source, errorstate *es) {
126129 infoconfig ret;
127130 paragraph *p;
128131 int n;
185188 sfree(ret.filename);
186189 ret.filename = dupstr(adv(p->origkeyword));
187190 } else if (!ustricmp(p->keyword, L"info-charset")) {
188 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
191 ret.charset = charset_from_ustr(
192 &p->fpos, uadv(p->keyword), es);
189193 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
190194 ret.maxfilesize = utoi(uadv(p->keyword));
191195 } else if (!ustricmp(p->keyword, L"info-width")) {
305309 }
306310
307311 void info_backend(paragraph *sourceform, keywordlist *keywords,
308 indexdata *idx, void *unused) {
312 indexdata *idx, void *unused, errorstate *es) {
309313 paragraph *p;
310314 infoconfig conf;
311315 word *prefix, *body, *wp;
314318 int nesting, nestindent;
315319 int indentb, indenta;
316320 int filepos;
317 int has_index = FALSE;
321 bool has_index = false;
318322 info_data intro_text = EMPTY_INFO_DATA;
319323 node *topnode, *currnode;
320324 word bullet;
322326
323327 IGNORE(unused);
324328
325 conf = info_configure(sourceform);
329 conf = info_configure(sourceform, es);
326330
327331 /*
328332 * Go through and create a node for each section.
342346 node *newnode, *upnode;
343347 char *nodename;
344348
345 nodename = info_node_name_for_para(p, &conf);
349 nodename = info_node_name_for_para(p, &conf, es);
346350 newnode = info_node_new(nodename, conf.charset);
347351 sfree(nodename);
348352
383387 ii->nnodes = ii->nodesize = 0;
384388 ii->nodes = NULL;
385389
386 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
390 ii->length = info_rdaddwc(&id, entry->text, NULL, false, &conf);
387391
388392 ii->text = id.output.text;
389393
416420 kw = *longname ? uadv(longname) : L"";
417421
418422 if (!*longname) {
419 err_cfginsufarg(&p->fpos, p->origkeyword, 3);
423 err_cfginsufarg(es, &p->fpos, p->origkeyword, 3);
420424 continue;
421425 }
422426
511515
512516 if (!currnode->up->started_menu) {
513517 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
514 currnode->up->started_menu = TRUE;
518 currnode->up->started_menu = true;
515519 }
516520 info_menu_item(&currnode->up->text, currnode, p, &conf);
517521
600604 indexentry *entry;
601605 char *nodename;
602606
603 nodename = info_node_name_for_text(conf.index_text, &conf);
607 nodename = info_node_name_for_text(conf.index_text, &conf, es);
604608 newnode = info_node_new(nodename, conf.charset);
605609 sfree(nodename);
606610
705709 */
706710 fp = fopen(conf.filename, "w");
707711 if (!fp) {
708 err_cantopenw(conf.filename);
712 err_cantopenw(es, conf.filename);
709713 return;
710714 }
711715 fputs(intro_text.output.text, fp);
749753 sprintf(fname, "%s-%d", conf.filename, filenum);
750754 fp = fopen(fname, "w");
751755 if (!fp) {
752 err_cantopenw(fname);
756 err_cantopenw(es, fname);
753757 return;
754758 }
755759 sfree(fname);
862866 return ret;
863867 }
864868
865 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
869 static int info_rdaddwc(info_data *id, word *words, word *end, bool xrefs,
866870 infoconfig *cfg) {
867871 int ret = 0;
868872
906910 if (cvt_ok(id->charset, words->text) || !words->alt)
907911 ret += info_rdadds(id, words->text);
908912 else
909 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
913 ret += info_rdaddwc(id, words->alt, NULL, false, cfg);
910914 } else if (removeattr(words->type) == word_WhiteSpace) {
911915 ret += info_rdadd(id, L' ');
912916 } else if (removeattr(words->type) == word_Quote) {
944948 return ret;
945949 }
946950
947 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
948
949 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
951 static int info_width_internal(word *words, bool xrefs, infoconfig *cfg);
952
953 static int info_width_internal_list(word *words, bool xrefs, infoconfig *cfg) {
950954 int w = 0;
951955 while (words) {
952956 w += info_width_internal(words, xrefs, cfg);
955959 return w;
956960 }
957961
958 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
962 static int info_width_internal(word *words, bool xrefs, infoconfig *cfg) {
959963 int wid;
960964 int attr;
961965
10331037
10341038 static int info_width_noxrefs(void *ctx, word *words)
10351039 {
1036 return info_width_internal(words, FALSE, (infoconfig *)ctx);
1040 return info_width_internal(words, false, (infoconfig *)ctx);
10371041 }
10381042 static int info_width_xrefs(void *ctx, word *words)
10391043 {
1040 return info_width_internal(words, TRUE, (infoconfig *)ctx);
1044 return info_width_internal(words, true, (infoconfig *)ctx);
10411045 }
10421046
10431047 static void info_heading(info_data *text, word *tprefix,
10491053
10501054 length = 0;
10511055 if (tprefix) {
1052 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
1056 length += info_rdaddwc(text, tprefix, NULL, false, cfg);
10531057 length += info_rdadds(text, cfg->sectsuffix);
10541058 }
10551059
10591063 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
10601064 info_width_noxrefs, cfg, 0);
10611065 for (p = wrapping; p; p = p->next) {
1062 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
1066 length += info_rdaddwc(text, p->begin, p->end, false, cfg);
10631067 info_rdadd(text, L'\n');
10641068 if (*align.underline) {
10651069 while (length > 0) {
10991103 if (prefix) {
11001104 for (i = 0; i < indent; i++)
11011105 info_rdadd(text, L' ');
1102 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1106 e = info_rdaddwc(text, prefix, NULL, false, cfg);
11031107 if (prefixextra)
11041108 e += info_rdadds(text, prefixextra);
11051109 /* If the prefix is too long, shorten the first line to fit. */
11211125 for (p = wrapping; p; p = p->next) {
11221126 for (i = 0; i < e; i++)
11231127 info_rdadd(text, L' ');
1124 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1128 info_rdaddwc(text, p->begin, p->end, true, cfg);
11251129 info_rdadd(text, L'\n');
11261130 e = indent + extraindent;
11271131 }
11491153
11501154 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
11511155 info_rdadd(text, L'[');
1152 info_rdaddwc(text, words, NULL, FALSE, cfg);
1156 info_rdaddwc(text, words, NULL, false, cfg);
11531157 info_rdadds(text, L"]\n");
11541158 }
11551159
11621166 n->text.charset = charset;
11631167 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
11641168 n->name = dupstr(name);
1165 n->started_menu = FALSE;
1169 n->started_menu = false;
11661170
11671171 return n;
11681172 }
11691173
1170 static char *info_node_name_core(info_data *id, filepos *fpos)
1174 static char *info_node_name_core(info_data *id, filepos *fpos, errorstate *es)
11711175 {
11721176 char *p, *q;
11731177
11781182 p = q = id->output.text;
11791183 while (*p) {
11801184 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1181 err_infonodechar(fpos, *p);
1185 err_infonodechar(es, fpos, *p);
11821186 } else {
11831187 *q++ = *p;
11841188 }
11891193 return id->output.text;
11901194 }
11911195
1192 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1196 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg,
1197 errorstate *es)
11931198 {
11941199 info_data id = EMPTY_INFO_DATA;
11951200
11961201 id.charset = cfg->charset;
11971202 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1198 NULL, FALSE, cfg);
1199 info_rdaddsc(&id, NULL);
1200
1201 return info_node_name_core(&id, &par->fpos);
1202 }
1203
1204 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1203 NULL, false, cfg);
1204 info_rdaddsc(&id, "");
1205
1206 return info_node_name_core(&id, &par->fpos, es);
1207 }
1208
1209 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg,
1210 errorstate *es)
12051211 {
12061212 info_data id = EMPTY_INFO_DATA;
12071213
12091215 info_rdadds(&id, text);
12101216 info_rdaddsc(&id, NULL);
12111217
1212 return info_node_name_core(&id, NULL);
1218 return info_node_name_core(&id, NULL, es);
12131219 }
12141220
12151221 static void info_menu_item(info_data *text, node *n, paragraph *p,
12331239 info_rdaddsc(text, "::");
12341240 if (p) {
12351241 info_rdaddc(text, ' ');
1236 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1242 info_rdaddwc(text, p->words, NULL, false, cfg);
12371243 }
12381244 info_rdaddc(text, '\n');
12391245 }
12521258 {
12531259 if (!d->wcmode) {
12541260 d->state = charset_init_state;
1255 d->wcmode = TRUE;
1261 d->wcmode = true;
12561262 }
12571263
12581264 if (wcs) {
12941300 rdaddsc(&d->output, buf);
12951301 }
12961302
1297 d->wcmode = FALSE;
1303 d->wcmode = false;
12981304 }
12991305
13001306 if (cs) {
88
99 typedef struct {
1010 wchar_t *th;
11 int headnumbers;
11 bool headnumbers;
1212 int mindepth;
1313 char *filename;
1414 int charset;
1616 } manconfig;
1717
1818 static void man_text(FILE *, word *,
19 int newline, int quote_props, manconfig *conf);
19 bool newline, int quote_props, manconfig *conf);
2020 static void man_codepara(FILE *, word *, int charset);
21 static int man_convert(wchar_t const *s, int maxlen,
22 char **result, int quote_props,
23 int charset, charset_state *state);
21 static bool man_convert(wchar_t const *s, int maxlen,
22 char **result, int quote_props,
23 int charset, charset_state *state);
2424
2525 /*
2626 * My TROFF reference is "NROFF/TROFF User's Manual", Joseph
9898 }
9999
100100 /*
101 * Return TRUE if we can represent the whole of the given string either
102 * in the output charset or as named characters; FALSE otherwise.
101 * Return true if we can represent the whole of the given string either
102 * in the output charset or as named characters; false otherwise.
103103 */
104 static int troff_ok(int charset, wchar_t *string) {
104 static bool troff_ok(int charset, wchar_t *string) {
105105 wchar_t test[2];
106106 while (*string) {
107107 test[0] = *string;
108108 test[1] = 0;
109109 if (!cvt_ok(charset, test) && !troffchar(*string))
110 return FALSE;
110 return false;
111111 string++;
112112 }
113 return TRUE;
114 }
115
116 static manconfig man_configure(paragraph *source) {
113 return true;
114 }
115
116 static manconfig man_configure(paragraph *source, errorstate *es) {
117117 paragraph *p;
118118 manconfig ret;
119119
121121 * Defaults.
122122 */
123123 ret.th = NULL;
124 ret.headnumbers = FALSE;
124 ret.headnumbers = false;
125125 ret.mindepth = 0;
126126 ret.filename = dupstr("output.1");
127127 ret.charset = CS_ASCII;
160160 ret.th = snewn(ep - wp + 1, wchar_t);
161161 memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t));
162162 } else if (!ustricmp(p->keyword, L"man-charset")) {
163 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
163 ret.charset = charset_from_ustr(
164 &p->fpos, uadv(p->keyword), es);
164165 } else if (!ustricmp(p->keyword, L"man-headnumbers")) {
165166 ret.headnumbers = utob(uadv(p->keyword));
166167 } else if (!ustricmp(p->keyword, L"man-mindepth")) {
219220 #define QUOTE_LITERAL 4 /* defeat special meaning of `, ', - in troff */
220221
221222 void man_backend(paragraph *sourceform, keywordlist *keywords,
222 indexdata *idx, void *unused) {
223 indexdata *idx, void *unused, errorstate *es) {
223224 paragraph *p;
224225 FILE *fp;
225226 manconfig conf;
226 int had_described_thing;
227 bool had_described_thing;
227228
228229 IGNORE(unused);
229230 IGNORE(keywords);
230231 IGNORE(idx);
231232
232 conf = man_configure(sourceform);
233 conf = man_configure(sourceform, es);
233234
234235 /*
235236 * Open the output file.
239240 else
240241 fp = fopen(conf.filename, "w");
241242 if (!fp) {
242 err_cantopenw(conf.filename);
243 err_cantopenw(es, conf.filename);
243244 return;
244245 }
245246
247248 for (p = sourceform; p; p = p->next)
248249 if (p->type == para_VersionID) {
249250 fprintf(fp, ".\\\" ");
250 man_text(fp, p->words, TRUE, 0, &conf);
251 man_text(fp, p->words, true, 0, &conf);
251252 }
252253
253254 /* Standard preamble */
272273 }
273274 fputc('\n', fp);
274275
275 had_described_thing = FALSE;
276 had_described_thing = false;
276277 #define cleanup_described_thing do { \
277278 if (had_described_thing) \
278279 fprintf(fp, "\n"); \
279 had_described_thing = FALSE; \
280 had_described_thing = false; \
280281 } while (0)
281282
282283 for (p = sourceform; p; p = p->next) switch (p->type) {
316317 else
317318 fprintf(fp, ".SH \"");
318319 if (conf.headnumbers && p->kwtext) {
319 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
320 man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf);
320321 fprintf(fp, " ");
321322 }
322 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
323 man_text(fp, p->words, false, QUOTE_QUOTES, &conf);
323324 fprintf(fp, "\"\n");
324325 }
325326 break;
341342 case para_Copyright:
342343 cleanup_described_thing;
343344 fprintf(fp, ".PP\n");
344 man_text(fp, p->words, TRUE, 0, &conf);
345 man_text(fp, p->words, true, 0, &conf);
345346 break;
346347
347348 /*
362363 sfree(bullettext);
363364 } else if (p->type == para_NumberedList) {
364365 fprintf(fp, ".IP \"");
365 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
366 man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf);
366367 fprintf(fp, "\"\n");
367368 } else if (p->type == para_Description) {
368369 if (had_described_thing) {
380381 }
381382 } else if (p->type == para_BiblioCited) {
382383 fprintf(fp, ".IP \"");
383 man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf);
384 man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf);
384385 fprintf(fp, "\"\n");
385386 }
386 man_text(fp, p->words, TRUE, 0, &conf);
387 had_described_thing = FALSE;
387 man_text(fp, p->words, true, 0, &conf);
388 had_described_thing = false;
388389 break;
389390
390391 case para_DescribedThing:
391392 cleanup_described_thing;
392393 fprintf(fp, ".IP \"");
393 man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf);
394 man_text(fp, p->words, false, QUOTE_QUOTES, &conf);
394395 fprintf(fp, "\"\n");
395 had_described_thing = TRUE;
396 had_described_thing = true;
396397 break;
397398
398399 case para_Rule:
447448 *
448449 * This function also does escaping of groff special characters.
449450 */
450 static int man_convert(wchar_t const *s, int maxlen,
451 char **result, int quote_props,
452 int charset, charset_state *state) {
451 static bool man_convert(wchar_t const *s, int maxlen,
452 char **result, int quote_props,
453 int charset, charset_state *state) {
453454 charset_state internal_state = CHARSET_INIT_STATE;
454 int slen, err;
455 int slen;
455456 char *p = NULL, *q;
456457 int plen = 0, psize = 0;
457458 rdstringc out = {0, 0, NULL};
458 int anyerr = 0;
459 bool err, anyerr = false;
459460
460461 if (!state)
461462 state = &internal_state;
468469 psize = 384;
469470 plen = 0;
470471 p = snewn(psize, char);
471 err = 0;
472 err = false;
472473
473474 while (slen > 0) {
474475 int ret = charset_from_unicode(&s, &slen, p, psize,
527528 if (err) {
528529 char const *tr = troffchar(*s);
529530 if (tr == NULL)
530 anyerr = TRUE;
531 anyerr = true;
531532 else
532533 rdaddsc(&out, tr);
533534 s++; slen--;
626627
627628 if (removeattr(text->type) == word_Normal) {
628629 charset_state s2 = *state;
629 int len = ustrlen(text->text), hyphen = FALSE;
630 int len = ustrlen(text->text);
631 bool hyphen = false;
630632
631633 if (text->breaks && len > 0 && text->text[len - 1] == '-') {
632634 len--;
633 hyphen = TRUE;
635 hyphen = true;
634636 }
635637 if (len == 0 ||
636638 man_convert(text->text, len, &c, quote_props, conf->charset,
677679 return quote_props;
678680 }
679681
680 static void man_text(FILE *fp, word *text, int newline,
682 static void man_text(FILE *fp, word *text, bool newline,
681683 int quote_props, manconfig *conf) {
682684 rdstringc t = { 0, 0, NULL };
683685 charset_state state = CHARSET_INIT_STATE;
140140 /* Flags for render_string() */
141141 #define RS_NOLIG 1
142142
143 static font_data *make_std_font(font_list *fontlist, char const *name);
143 static font_data *make_std_font(font_list *fontlist, psdata *psd,
144 const char *name);
144145 static void wrap_paragraph(para_data *pdata, word *words,
145146 int w, int i1, int i2, paper_conf *conf);
146147 static page_data *page_breaks(line_data *first, line_data *last,
153154 static void render_para(para_data *pdata, paper_conf *conf,
154155 keywordlist *keywords, indexdata *idx,
155156 paragraph *index_placeholder, page_data *index_page);
156 static int string_width(font_data *font, wchar_t const *string, int *errs,
157 static int string_width(font_data *font, wchar_t const *string, bool *errs,
157158 unsigned flags);
158159 static int paper_width_simple(para_data *pdata, word *text, paper_conf *conf);
159160 static para_data *code_paragraph(int indent, word *words, paper_conf *conf);
173174 word *second);
174175 static void fold_into_page(page_data *dest, page_data *src, int right_shift);
175176
176 static int fonts_ok(wchar_t *string, ...)
177 static bool fonts_ok(wchar_t *string, ...)
177178 {
178179 font_data *font;
179180 va_list ap;
180 int ret = TRUE;
181 bool ret = true;
181182
182183 va_start(ap, string);
183184 while ( (font = va_arg(ap, font_data *)) != NULL) {
184 int errs;
185 bool errs;
185186 (void) string_width(font, string, &errs, 0);
186187 if (errs) {
187 ret = FALSE;
188 ret = false;
188189 break;
189190 }
190191 }
194195 }
195196
196197 static void paper_cfg_fonts(font_data **fonts, font_list *fontlist,
197 wchar_t *wp, filepos *fpos) {
198 wchar_t *wp, filepos *fpos, psdata *psd,
199 errorstate *es) {
198200 font_data *f;
199201 char *fn;
200202 int i;
201203
202204 for (i = 0; i < NFONTS && *wp; i++, wp = uadv(wp)) {
203205 fn = utoa_dup(wp, CS_ASCII);
204 f = make_std_font(fontlist, fn);
206 f = make_std_font(fontlist, psd, fn);
205207 if (f)
206208 fonts[i] = f;
207209 else
208210 /* FIXME: proper error */
209 err_nofont(fpos, wp);
210 }
211 }
212
213 static paper_conf paper_configure(paragraph *source, font_list *fontlist) {
211 err_nofont(es, fpos, wp);
212 }
213 }
214
215 static paper_conf paper_configure(paragraph *source, font_list *fontlist,
216 psdata *psd, errorstate *es) {
214217 paragraph *p;
215218 paper_conf ret;
216219
234237 ret.chapter_underline_thickness = 3 * UNITS_PER_PT;
235238 ret.rule_thickness = 1 * UNITS_PER_PT;
236239 ret.fbase.font_size = 12;
237 ret.fbase.fonts[FONT_NORMAL] = make_std_font(fontlist, "Times-Roman");
238 ret.fbase.fonts[FONT_EMPH] = make_std_font(fontlist, "Times-Italic");
239 ret.fbase.fonts[FONT_STRONG] = make_std_font(fontlist, "Times-Bold");
240 ret.fbase.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
240 ret.fbase.fonts[FONT_NORMAL] =
241 make_std_font(fontlist, psd, "Times-Roman");
242 ret.fbase.fonts[FONT_EMPH] =
243 make_std_font(fontlist, psd, "Times-Italic");
244 ret.fbase.fonts[FONT_STRONG] =
245 make_std_font(fontlist, psd, "Times-Bold");
246 ret.fbase.fonts[FONT_CODE] =
247 make_std_font(fontlist, psd, "Courier");
241248 ret.fcode.font_size = 12;
242 ret.fcode.fonts[FONT_NORMAL] = make_std_font(fontlist, "Courier-Bold");
243 ret.fcode.fonts[FONT_EMPH] = make_std_font(fontlist, "Courier-Oblique");
244 ret.fcode.fonts[FONT_STRONG] = make_std_font(fontlist, "Courier-Bold");
245 ret.fcode.fonts[FONT_CODE] = make_std_font(fontlist, "Courier");
249 ret.fcode.fonts[FONT_NORMAL] =
250 make_std_font(fontlist, psd, "Courier-Bold");
251 ret.fcode.fonts[FONT_EMPH] =
252 make_std_font(fontlist, psd, "Courier-Oblique");
253 ret.fcode.fonts[FONT_STRONG] =
254 make_std_font(fontlist, psd, "Courier-Bold");
255 ret.fcode.fonts[FONT_CODE] =
256 make_std_font(fontlist, psd, "Courier");
246257 ret.ftitle.font_size = 24;
247 ret.ftitle.fonts[FONT_NORMAL] = make_std_font(fontlist, "Helvetica-Bold");
258 ret.ftitle.fonts[FONT_NORMAL] =
259 make_std_font(fontlist, psd, "Helvetica-Bold");
248260 ret.ftitle.fonts[FONT_EMPH] =
249 make_std_font(fontlist, "Helvetica-BoldOblique");
261 make_std_font(fontlist, psd, "Helvetica-BoldOblique");
250262 ret.ftitle.fonts[FONT_STRONG] =
251 make_std_font(fontlist, "Helvetica-Bold");
252 ret.ftitle.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
263 make_std_font(fontlist, psd,"Helvetica-Bold");
264 ret.ftitle.fonts[FONT_CODE] =
265 make_std_font(fontlist, psd, "Courier-Bold");
253266 ret.fchapter.font_size = 20;
254 ret.fchapter.fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
267 ret.fchapter.fonts[FONT_NORMAL] =
268 make_std_font(fontlist, psd, "Helvetica-Bold");
255269 ret.fchapter.fonts[FONT_EMPH] =
256 make_std_font(fontlist, "Helvetica-BoldOblique");
270 make_std_font(fontlist, psd,"Helvetica-BoldOblique");
257271 ret.fchapter.fonts[FONT_STRONG] =
258 make_std_font(fontlist, "Helvetica-Bold");
259 ret.fchapter.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
272 make_std_font(fontlist, psd,"Helvetica-Bold");
273 ret.fchapter.fonts[FONT_CODE] =
274 make_std_font(fontlist, psd, "Courier-Bold");
260275 ret.nfsect = 3;
261276 ret.fsect = snewn(ret.nfsect, font_cfg);
262277 ret.fsect[0].font_size = 16;
263 ret.fsect[0].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
278 ret.fsect[0].fonts[FONT_NORMAL] =
279 make_std_font(fontlist, psd, "Helvetica-Bold");
264280 ret.fsect[0].fonts[FONT_EMPH] =
265 make_std_font(fontlist, "Helvetica-BoldOblique");
281 make_std_font(fontlist, psd,"Helvetica-BoldOblique");
266282 ret.fsect[0].fonts[FONT_STRONG] =
267 make_std_font(fontlist, "Helvetica-Bold");
268 ret.fsect[0].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
283 make_std_font(fontlist, psd,"Helvetica-Bold");
284 ret.fsect[0].fonts[FONT_CODE] =
285 make_std_font(fontlist, psd, "Courier-Bold");
269286 ret.fsect[1].font_size = 14;
270 ret.fsect[1].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
287 ret.fsect[1].fonts[FONT_NORMAL] =
288 make_std_font(fontlist, psd, "Helvetica-Bold");
271289 ret.fsect[1].fonts[FONT_EMPH] =
272 make_std_font(fontlist, "Helvetica-BoldOblique");
290 make_std_font(fontlist, psd, "Helvetica-BoldOblique");
273291 ret.fsect[1].fonts[FONT_STRONG] =
274 make_std_font(fontlist, "Helvetica-Bold");
275 ret.fsect[1].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
292 make_std_font(fontlist, psd, "Helvetica-Bold");
293 ret.fsect[1].fonts[FONT_CODE] =
294 make_std_font(fontlist, psd, "Courier-Bold");
276295 ret.fsect[2].font_size = 13;
277 ret.fsect[2].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold");
296 ret.fsect[2].fonts[FONT_NORMAL] =
297 make_std_font(fontlist, psd, "Helvetica-Bold");
278298 ret.fsect[2].fonts[FONT_EMPH] =
279 make_std_font(fontlist, "Helvetica-BoldOblique");
299 make_std_font(fontlist, psd, "Helvetica-BoldOblique");
280300 ret.fsect[2].fonts[FONT_STRONG] =
281 make_std_font(fontlist, "Helvetica-Bold");
282 ret.fsect[2].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold");
301 make_std_font(fontlist, psd, "Helvetica-Bold");
302 ret.fsect[2].fonts[FONT_CODE] =
303 make_std_font(fontlist, psd, "Courier-Bold");
283304 ret.contents_indent_step = 24 * UNITS_PER_PT;
284305 ret.contents_margin = 84 * UNITS_PER_PT;
285306 ret.leader_separation = 12 * UNITS_PER_PT;
399420 ret.pagenum_fontsize = utoi(uadv(p->keyword));
400421 } else if (!ustricmp(p->keyword, L"paper-base-fonts")) {
401422 paper_cfg_fonts(ret.fbase.fonts, fontlist, uadv(p->keyword),
402 &p->fpos);
423 &p->fpos, psd, es);
403424 } else if (!ustricmp(p->keyword, L"paper-code-font-size")) {
404425 ret.fcode.font_size = utoi(uadv(p->keyword));
405426 } else if (!ustricmp(p->keyword, L"paper-code-fonts")) {
406427 paper_cfg_fonts(ret.fcode.fonts, fontlist, uadv(p->keyword),
407 &p->fpos);
428 &p->fpos, psd, es);
408429 } else if (!ustricmp(p->keyword, L"paper-title-font-size")) {
409430 ret.ftitle.font_size = utoi(uadv(p->keyword));
410431 } else if (!ustricmp(p->keyword, L"paper-title-fonts")) {
411432 paper_cfg_fonts(ret.ftitle.fonts, fontlist, uadv(p->keyword),
412 &p->fpos);
433 &p->fpos, psd, es);
413434 } else if (!ustricmp(p->keyword, L"paper-chapter-font-size")) {
414435 ret.fchapter.font_size = utoi(uadv(p->keyword));
415436 } else if (!ustricmp(p->keyword, L"paper-chapter-fonts")) {
416437 paper_cfg_fonts(ret.fchapter.fonts, fontlist, uadv(p->keyword),
417 &p->fpos);
438 &p->fpos, psd, es);
418439 } else if (!ustricmp(p->keyword, L"paper-section-font-size")) {
419440 wchar_t *q = uadv(p->keyword);
420441 int n = 0;
444465 ret.fsect[i] = ret.fsect[ret.nfsect-1];
445466 ret.nfsect = n+1;
446467 }
447 paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos);
468 paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos,
469 psd, es);
448470 }
449471 }
450472 }
520542 }
521543
522544 void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
523 indexdata *idx) {
545 indexdata *idx, psdata *psd, errorstate *es) {
524546 paragraph *p;
525547 document *doc;
526 int indent, used_contents;
548 int indent;
549 bool used_contents;
527550 para_data *pdata, *firstpara = NULL, *lastpara = NULL;
528551 para_data *firstcont, *lastcont;
529552 line_data *firstline, *lastline, *firstcontline, *lastcontline;
530553 page_data *pages;
531554 font_list *fontlist;
532555 paper_conf *conf, ourconf;
533 int has_index;
556 bool has_index;
534557 int pagenum;
535558 paragraph index_placeholder_para;
536 page_data *first_index_page;
537
538 init_std_fonts();
559 page_data *first_index_page = NULL;
560
561 init_std_fonts(psd);
539562 fontlist = snew(font_list);
540563 fontlist->head = fontlist->tail = NULL;
541564
542 ourconf = paper_configure(sourceform, fontlist);
565 ourconf = paper_configure(sourceform, fontlist, psd, es);
543566 conf = &ourconf;
544567
545568 /*
550573 int i;
551574 indexentry *entry;
552575
553 has_index = FALSE;
576 has_index = false;
554577
555578 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
556579 paper_idx *pi = snew(paper_idx);
557580
558 has_index = TRUE;
581 has_index = true;
559582
560583 pi->words = pi->lastword = NULL;
561584 pi->lastpage = NULL;
662685 * Do the main paragraph formatting.
663686 */
664687 indent = 0;
665 used_contents = FALSE;
688 used_contents = false;
666689 firstline = lastline = NULL;
667690 for (p = sourceform; p; p = p->next) {
668691 p->private_data = NULL;
752775 * contents section in before it.
753776 */
754777 if (!used_contents && pdata->outline_level > 0) {
755 used_contents = TRUE;
778 used_contents = true;
756779 if (lastpara)
757780 lastpara->next = firstcont;
758781 else
10551078 doc->pages = pages;
10561079 doc->paper_width = conf->paper_width;
10571080 doc->paper_height = conf->paper_height;
1081 doc->psd = psd;
10581082
10591083 /*
10601084 * Collect the section heading paragraphs into a document
13181342 ptype == para_Chapter ||
13191343 ptype == para_Appendix ||
13201344 ptype == para_UnnumberedChapter) {
1321 pdata->first->page_break = TRUE;
1345 pdata->first->page_break = true;
13221346 pdata->first->space_before = conf->chapter_top_space;
13231347 pdata->last->space_after +=
13241348 (conf->chapter_underline_depth +
13451369 ldata->space_after = conf->base_para_spacing / 2;
13461370 else
13471371 ldata->space_after = conf->base_leading / 2;
1348 ldata->page_break = FALSE;
1372 ldata->page_break = false;
13491373 }
13501374 }
13511375
13871411 return sme;
13881412 }
13891413
1390 static int new_sfmap_cmp(void *a, void *b)
1391 {
1392 glyph ga = *(glyph *)a;
1393 subfont_map_entry *sb = b;
1414 static int new_sfmap_cmp(const void *a, const void *b, void *cmpctx)
1415 {
1416 const glyph ga = *(const glyph *)a;
1417 const subfont_map_entry *sb = b;
13941418 glyph gb = sb->subfont->vector[sb->position];
13951419
13961420 if (ga < gb) return -1;
14031427 subfont_map_entry *sme;
14041428 int c;
14051429
1406 sme = find234(font->subfont_map, &g, new_sfmap_cmp);
1430 sme = findcmp234(font->subfont_map, &g, new_sfmap_cmp, NULL);
14071431 if (sme) return sme;
14081432
14091433 /*
14191443 return encode_glyph_at(g, u, font->latest_subfont, c);
14201444 }
14211445
1422 static int sfmap_cmp(void *a, void *b)
1423 {
1424 subfont_map_entry *sa = a, *sb = b;
1446 static int sfmap_cmp(const void *a, const void *b, void *cmpctx)
1447 {
1448 const subfont_map_entry *sa = a, *sb = b;
14251449 glyph ga = sa->subfont->vector[sa->position];
14261450 glyph gb = sb->subfont->vector[sb->position];
14271451
14301454 return 0;
14311455 }
14321456
1433 int width_cmp(void *a, void *b)
1457 int width_cmp(const void *a, const void *b, void *cmpctx)
14341458 {
14351459 glyph_width const *wa = a, *wb = b;
14361460
14411465 return 0;
14421466 }
14431467
1444 int kern_cmp(void *a, void *b)
1468 int kern_cmp(const void *a, const void *b, void *cmpctx)
14451469 {
14461470 kern_pair const *ka = a, *kb = b;
14471471
14561480 return 0;
14571481 }
14581482
1459 int lig_cmp(void *a, void *b)
1483 int lig_cmp(const void *a, const void *b, void *cmpctx)
14601484 {
14611485 ligature const *la = a, *lb = b;
14621486
14751499 return (u < 0 || u > 0xFFFF ? NOGLYPH : fi->bmp[u]);
14761500 }
14771501
1478 void listfonts(void) {
1502 void listfonts(psdata *psd) {
14791503 font_info const *fi;
14801504
1481 init_std_fonts();
1482 for (fi = all_fonts; fi; fi = fi->next)
1505 init_std_fonts(psd);
1506 for (fi = psd->all_fonts; fi; fi = fi->next)
14831507 printf("%s\n", fi->name);
14841508 }
14851509
1486 static font_data *make_std_font(font_list *fontlist, char const *name)
1510 static font_data *make_std_font(font_list *fontlist, psdata *psd,
1511 const char *name)
14871512 {
14881513 font_info const *fi;
14891514 font_data *f;
14941519 if (strcmp(fe->font->info->name, name) == 0)
14951520 return fe->font;
14961521
1497 for (fi = all_fonts; fi; fi = fi->next)
1522 for (fi = psd->all_fonts; fi; fi = fi->next)
14981523 if (strcmp(fi->name, name) == 0) break;
14991524 if (!fi) return NULL;
15001525
15021527
15031528 f->list = fontlist;
15041529 f->info = fi;
1505 f->subfont_map = newtree234(sfmap_cmp);
1530 f->subfont_map = newtree234(sfmap_cmp, NULL);
15061531
15071532 /*
15081533 * Our first subfont will contain all of US-ASCII. This isn't
15311556 glyph_width const *w;
15321557
15331558 wantw.glyph = index;
1534 w = find234(font->info->widths, &wantw, NULL);
1559 w = find234(font->info->widths, &wantw);
15351560 if (!w) return 0;
15361561 return w->width;
15371562 }
15451570 return 0;
15461571 wantkp.left = lindex;
15471572 wantkp.right = rindex;
1548 kp = find234(font->info->kerns, &wantkp, NULL);
1573 kp = find234(font->info->kerns, &wantkp);
15491574 if (kp == NULL)
15501575 return 0;
15511576 return kp->kern;
15601585 return NOGLYPH;
15611586 wantlig.left = lindex;
15621587 wantlig.right = rindex;
1563 lig = find234(font->info->ligs, &wantlig, NULL);
1588 lig = find234(font->info->ligs, &wantlig);
15641589 if (lig == NULL)
15651590 return NOGLYPH;
15661591 return lig->lig;
15671592 }
15681593
1569 static int string_width(font_data *font, wchar_t const *string, int *errs,
1594 static int string_width(font_data *font, wchar_t const *string, bool *errs,
15701595 unsigned flags)
15711596 {
15721597 int width = 0;
15731598 int nindex, index, oindex, lindex;
15741599
15751600 if (errs)
1576 *errs = 0;
1601 *errs = false;
15771602
15781603 oindex = NOGLYPH;
15791604 index = utoglyph(font->info, *string);
15821607
15831608 if (index == NOGLYPH) {
15841609 if (errs)
1585 *errs = 1;
1610 *errs = true;
15861611 } else {
15871612 if (!(flags & RS_NOLIG) &&
15881613 (lindex = find_lig(font, index, nindex)) != NOGLYPH) {
16181643 static int paper_width_internal(void *vctx, word *word, int *nspaces)
16191644 {
16201645 struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx;
1621 int style, type, findex, width, errs;
1646 int style, type, findex, width;
1647 bool errs;
16221648 wchar_t *str;
16231649 unsigned flags = 0;
16241650
21022128 keywordlist *keywords, indexdata *idx, paper_conf *conf)
21032129 {
21042130 while (text && text != text_end) {
2105 int style, type, findex, errs;
2131 int style, type, findex;
2132 bool errs;
21062133 wchar_t *str;
21072134 xref_dest dest;
21082135 unsigned flags = 0;
23232350 xr = NULL;
23242351
23252352 {
2326 int extra_indent, shortfall, spaces;
2353 int extra_indent = 0, shortfall = 0, spaces = 0;
23272354 int just = ldata->pdata->justification;
23282355
23292356 /*
23372364 case JUST:
23382365 shortfall = ldata->hshortfall;
23392366 spaces = ldata->nspaces;
2340 extra_indent = 0;
2341 break;
2342 case LEFT:
2343 shortfall = spaces = extra_indent = 0;
23442367 break;
23452368 case RIGHT:
2346 shortfall = spaces = 0;
23472369 extra_indent = ldata->real_shortfall;
23482370 break;
23492371 }
23822404 cxref = NULL;
23832405 cxref_page = NULL;
23842406
2407 assert(pdata->first);
23852408 for (ldata = pdata->first; ldata; ldata = ldata->next) {
23862409 /*
23872410 * If this is a contents entry, we expect to have a single
25602583 w->text = snewn(t-start+1, wchar_t);
25612584 memcpy(w->text, start, (t-start) * sizeof(wchar_t));
25622585 w->text[t-start] = '\0';
2563 w->breaks = FALSE;
2586 w->breaks = false;
25642587 w->aux = 0;
25652588
25662589 if (ltail)
27342757 ret->alt = NULL;
27352758 ret->type = word_Normal;
27362759 ret->text = ustrdup(text);
2737 ret->breaks = FALSE;
2760 ret->breaks = false;
27382761 ret->aux = 0;
27392762 return ret;
27402763 }
27462769 ret->alt = NULL;
27472770 ret->type = word_WhiteSpace;
27482771 ret->text = NULL;
2749 ret->breaks = TRUE;
2772 ret->breaks = true;
27502773 ret->aux = 0;
27512774 return ret;
27522775 }
27582781 ret->alt = NULL;
27592782 ret->type = word_PageXref;
27602783 ret->text = NULL;
2761 ret->breaks = FALSE;
2784 ret->breaks = false;
27622785 ret->aux = 0;
27632786 ret->private_data = page;
27642787 return ret;
27712794 ret->alt = NULL;
27722795 ret->type = word_XrefEnd;
27732796 ret->text = NULL;
2774 ret->breaks = FALSE;
2797 ret->breaks = false;
27752798 ret->aux = 0;
27762799 return ret;
27772800 }
27792802 static word *prepare_contents_title(word *first, wchar_t *separator,
27802803 word *second)
27812804 {
2782 word *ret;
2805 word *ret = NULL;
27832806 word **wptr, *w;
27842807
27852808 wptr = &ret;
3434 static void objref(object *o, object *dest);
3535 static void objdest(object *o, page_data *p);
3636
37 static int is_std_font(char const *name);
37 static bool is_std_font(char const *name);
3838
3939 static void make_pages_node(object *node, object *parent, page_data *first,
4040 page_data *last, object *resources,
4141 object *mediabox);
4242 static int make_outline(object *parent, outline_element *start, int n,
43 int open);
43 bool open);
4444 static int pdf_versionid(FILE *fp, word *words);
4545
4646 void pdf_backend(paragraph *sourceform, keywordlist *keywords,
47 indexdata *idx, void *vdoc) {
47 indexdata *idx, void *vdoc, errorstate *es) {
4848 document *doc = (document *)vdoc;
4949 int font_index;
5050 font_encoding *fe;
130130 char fname[40];
131131 char buf[80];
132132 int i, prev;
133 object *font, *fontdesc;
133 object *font, *fontdesc = NULL;
134134 int flags;
135135 font_info const *fi = fe->font->info;
136136
300300 objtext(cidfont, fe->font->info->name);
301301 objtext(cidfont, "\n/CIDSystemInfo<</Registry(Adobe)"
302302 "/Ordering(Identity)/Supplement 0>>\n");
303 objtext(cidfont, "/FontDescriptor ");
304 objref(cidfont, fontdesc);
303 assert(fontdesc); /* TrueType fonts are never standard */
304 objtext(cidfont, "/FontDescriptor ");
305 objref(cidfont, fontdesc);
305306 objtext(cidfont, "\n/W[0[");
306307 for (i = 0; i < (int)sfnt_nglyphs(fe->font->info->fontfile); i++) {
307308 char buf[20];
327328 objtext(font, buf);
328329 }
329330 objtext(font, i % 8 ? "/" : "\n/");
330 objtext(font, glyph_extern(fe->vector[i]));
331 objtext(font, glyph_extern(doc->psd, fe->vector[i]));
331332 prev = i;
332333 }
333334
368369 size_t len;
369370 char *ffbuf;
370371
371 pf_part1((font_info *)fi, &ffbuf, &len);
372 pf_part1((font_info *)fi, &ffbuf, &len, es);
372373 objstream_len(fontfile, ffbuf, len);
373374 sfree(ffbuf);
374375 sprintf(buf, "<<\n/Length1 %lu\n", (unsigned long)len);
375376 objtext(fontfile, buf);
376 pf_part2((font_info *)fi, &ffbuf, &len);
377 pf_part2((font_info *)fi, &ffbuf, &len, es);
377378 objstream_len(fontfile, ffbuf, len);
378379 sfree(ffbuf);
379380 sprintf(buf, "/Length2 %lu\n", (unsigned long)len);
605606
606607 objtext(outlines, "<<\n/Type /Outlines\n");
607608 topcount = make_outline(outlines, doc->outline_elements,
608 doc->n_outline_elements, TRUE);
609 doc->n_outline_elements, true);
609610 sprintf(buf, "/Count %d\n>>\n", topcount);
610611 objtext(outlines, buf);
611612 }
671672 else
672673 fp = fopen(filename, "wb");
673674 if (!fp) {
674 err_cantopenw(filename);
675 err_cantopenw(es, filename);
675676 return;
676677 }
677678
783784 "Symbol", "ZapfDingbats"
784785 };
785786
786 static int is_std_font(char const *name) {
787 static bool is_std_font(char const *name) {
787788 unsigned i;
788789 for (i = 0; i < lenof(stdfonts); i++)
789790 if (strcmp(name, stdfonts[i]) == 0)
790 return TRUE;
791 return FALSE;
791 return true;
792 return false;
792793 }
793794
794795 static void make_pages_node(object *node, object *parent, page_data *first,
829830 for (i = 0; i < TREE_BRANCH; i++) {
830831 int number = (i+1) * count / TREE_BRANCH - i * count / TREE_BRANCH;
831832 thisfirst = page;
833 thislast = NULL;
832834 while (number--) {
833835 thislast = page;
834836 page = page->next;
835837 }
838 assert(thislast);
836839
837840 if (thisfirst == thislast) {
838841 objref(node, (object *)thisfirst->spare);
912915 }
913916
914917 static int make_outline(object *parent, outline_element *items, int n,
915 int open)
918 bool open)
916919 {
917920 int level, totalcount = 0;
918921 outline_element *itemp;
964967
965968 if (itemp > items) {
966969 char buf[80];
967 int count = make_outline(curr, items, itemp - items, FALSE);
970 int count = make_outline(curr, items, itemp - items, false);
968971 if (!open)
969972 count = -count;
970973 else
10201023 case word_Quote:
10211024 text = dupstr("'");
10221025 break;
1026 default:
1027 continue;
10231028 }
10241029
10251030 fputs(text, fp);
2121 }
2222
2323 void ps_backend(paragraph *sourceform, keywordlist *keywords,
24 indexdata *idx, void *vdoc) {
24 indexdata *idx, void *vdoc, errorstate *es) {
2525 document *doc = (document *)vdoc;
2626 int font_index;
2727 font_encoding *fe;
5252 else
5353 fp = fopen(filename, "w");
5454 if (!fp) {
55 err_cantopenw(filename);
55 err_cantopenw(es, filename);
5656 return;
5757 }
5858
204204 if (fe->font->info->filetype == TYPE1)
205205 pf_writeps(fe->font->info, fp);
206206 else
207 sfnt_writeps(fe->font->info, fp);
207 sfnt_writeps(fe->font->info, fp, doc->psd, es);
208208 fprintf(fp, "%%%%EndResource\n");
209209 } else {
210210 fprintf(fp, "%%%%IncludeResource: font %s\n",
228228 ps_token(fp, &cc, "{1 index /FID ne {def} {pop pop} ifelse} forall\n");
229229 ps_token(fp, &cc, "/Encoding [\n");
230230 for (i = 0; i < 256; i++)
231 ps_token(fp, &cc, "/%s", glyph_extern(fe->vector[i]));
231 ps_token(fp, &cc, "/%s", glyph_extern(doc->psd, fe->vector[i]));
232232 ps_token(fp, &cc, "] def\n");
233233 ps_token(fp, &cc, "currentdict end\n");
234234 ps_token(fp, &cc, "/fontname-%s exch definefont /%s exch def\n",
343343 case word_Quote:
344344 text = dupstr("'");
345345 break;
346 default:
347 continue;
346348 }
347349
348350 if (cc + strlen(text) > PS_MAXWIDTH)
99 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
1010 typedef struct {
1111 alignment align;
12 int number_at_all, just_numbers;
12 bool number_at_all, just_numbers;
1313 wchar_t *underline;
1414 wchar_t *number_suffix;
1515 } alignstruct;
2020 int width;
2121 alignstruct atitle, achapter, *asect;
2222 int nasect;
23 int include_version_id;
24 int indent_preambles;
23 bool include_version_id;
24 bool indent_preambles;
2525 int charset;
2626 word bullet;
2727 wchar_t *lquote, *rquote, *rule;
3333 FILE *fp;
3434 int charset;
3535 charset_state state;
36 errorstate *es;
3637 } textfile;
3738
3839 static void text_heading(textfile *, word *, word *, word *, alignstruct,
5455 return LEFT;
5556 }
5657
57 static textconfig text_configure(paragraph *source) {
58 static textconfig text_configure(paragraph *source, errorstate *es) {
5859 textconfig ret;
5960 paragraph *p;
6061 int n;
6566 ret.bullet.next = NULL;
6667 ret.bullet.alt = NULL;
6768 ret.bullet.type = word_Normal;
68 ret.atitle.just_numbers = FALSE; /* ignored */
69 ret.atitle.number_at_all = TRUE; /* ignored */
69 ret.atitle.just_numbers = false; /* ignored */
70 ret.atitle.number_at_all = true; /* ignored */
7071
7172 /*
7273 * Defaults.
7980 ret.atitle.align = CENTRE;
8081 ret.atitle.underline = L"\x2550\0=\0\0";
8182 ret.achapter.align = LEFT;
82 ret.achapter.just_numbers = FALSE;
83 ret.achapter.number_at_all = TRUE;
83 ret.achapter.just_numbers = false;
84 ret.achapter.number_at_all = true;
8485 ret.achapter.number_suffix = L": ";
8586 ret.achapter.underline = L"\x203E\0-\0\0";
8687 ret.nasect = 1;
8788 ret.asect = snewn(ret.nasect, alignstruct);
8889 ret.asect[0].align = LEFTPLUS;
89 ret.asect[0].just_numbers = TRUE;
90 ret.asect[0].number_at_all = TRUE;
90 ret.asect[0].just_numbers = true;
91 ret.asect[0].number_at_all = true;
9192 ret.asect[0].number_suffix = L" ";
9293 ret.asect[0].underline = L"\0";
93 ret.include_version_id = TRUE;
94 ret.indent_preambles = FALSE;
94 ret.include_version_id = true;
95 ret.indent_preambles = false;
9596 ret.bullet.text = L"\x2022\0-\0\0";
9697 ret.rule = L"\x2500\0-\0\0";
9798 ret.filename = dupstr("output.txt");
130131 if (!ustricmp(p->keyword, L"text-indent")) {
131132 ret.indent = utoi(uadv(p->keyword));
132133 } else if (!ustricmp(p->keyword, L"text-charset")) {
133 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
134 ret.charset = charset_from_ustr(
135 &p->fpos, uadv(p->keyword), es);
134136 } else if (!ustricmp(p->keyword, L"text-filename")) {
135137 sfree(ret.filename);
136138 ret.filename = dupstr(adv(p->origkeyword));
317319 }
318320
319321 void text_backend(paragraph *sourceform, keywordlist *keywords,
320 indexdata *idx, void *unused) {
322 indexdata *idx, void *unused, errorstate *es) {
321323 paragraph *p;
322324 textconfig conf;
323325 word *prefix, *body, *wp;
331333 IGNORE(keywords); /* we don't happen to need this */
332334 IGNORE(idx); /* or this */
333335
334 conf = text_configure(sourceform);
336 conf = text_configure(sourceform, es);
335337
336338 /*
337339 * Open the output file.
341343 else
342344 tf.fp = fopen(conf.filename, "w");
343345 if (!tf.fp) {
344 err_cantopenw(conf.filename);
346 err_cantopenw(es, conf.filename);
345347 return;
346348 }
347349 tf.charset = conf.charset;
350 tf.es = es;
348351 tf.state = charset_init_state;
349352
350353 /* Do the title */
781784 for (; text; text = text->next) if (text->type == word_WeakCode) {
782785 int wid = ustrwid(text->text, tf->charset);
783786 if (wid > width)
784 err_text_codeline(&text->fpos, wid, width);
787 err_text_codeline(tf->es, &text->fpos, wid, width);
785788 text_output_many(tf, indent, L' ');
786789 text_output(tf, text->text);
787790 text_output(tf, L"\n");
4949 static void whlp_rdadds(rdstringc *rs, const wchar_t *text, whlpconf *conf,
5050 charset_state *state);
5151 static void whlp_mkparagraph(struct bk_whlp_state *state,
52 int font, word *text, int subsidiary,
52 int font, word *text, bool subsidiary,
5353 whlpconf *conf);
5454 static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p,
5555 whlpconf *conf);
149149 }
150150
151151 void whlp_backend(paragraph *sourceform, keywordlist *keywords,
152 indexdata *idx, void *unused) {
152 indexdata *idx, void *unused, errorstate *es) {
153153 WHLP h;
154154 char *cntname;
155155 paragraph *p, *lastsect;
158158 int i;
159159 int nesting;
160160 indexentry *ie;
161 int done_contents_topic = FALSE;
161 bool done_contents_topic = false;
162162 whlpconf conf;
163163
164164 IGNORE(unused);
206206 {
207207 int len = strlen(conf.filename);
208208 if (len < 4 || conf.filename[len-4] != '.' ||
209 tolower(conf.filename[len-3] != 'h') ||
210 tolower(conf.filename[len-2] != 'l') ||
211 tolower(conf.filename[len-1] != 'p')) {
209 tolower(conf.filename[len-3]) != 'h' ||
210 tolower(conf.filename[len-2]) != 'l' ||
211 tolower(conf.filename[len-1]) != 'p') {
212212 char *newf;
213213 newf = snewn(len + 5, char);
214214 sprintf(newf, "%s.hlp", conf.filename);
222222
223223 state.cntfp = fopen(cntname, "wb");
224224 if (!state.cntfp) {
225 err_cantopenw(cntname);
225 err_cantopenw(es, cntname);
226226 return;
227227 }
228228 state.cnt_last_level = -1; state.cnt_workaround = 0;
249249 p->private_data = whlp_register_topic(h, rs.text, &errstr);
250250 if (!p->private_data) {
251251 p->private_data = whlp_register_topic(h, NULL, NULL);
252 err_winhelp_ctxclash(&p->fpos, rs.text, errstr);
252 err_winhelp_ctxclash(es, &p->fpos, rs.text, errstr);
253253 }
254254 sfree(rs.text);
255255 }
341341 if (p->type == para_Title) {
342342 whlp_begin_para(h, WHLP_PARA_NONSCROLL);
343343 state.cstate = charset_init_state;
344 whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE, &conf);
344 whlp_mkparagraph(&state, FONT_TITLE, p->words, false, &conf);
345345 whlp_wtext(&state, NULL);
346346 whlp_end_para(h);
347347 whlp_rdaddwc(&rs, p->words, &conf, NULL);
430430 whlp_navmenu(&state, p, &conf);
431431 }
432432
433 done_contents_topic = TRUE;
433 done_contents_topic = true;
434434 }
435435
436436 if (lastsect && lastsect->child) {
512512 whlp_begin_para(h, WHLP_PARA_NONSCROLL);
513513 state.cstate = charset_init_state;
514514 if (p->kwtext) {
515 whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, FALSE, &conf);
515 whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, false, &conf);
516516 whlp_set_font(h, FONT_TITLE);
517517 whlp_wtext(&state, conf.sectsuffix);
518518 }
519 whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE, &conf);
519 whlp_mkparagraph(&state, FONT_TITLE, p->words, false, &conf);
520520 whlp_wtext(&state, NULL);
521521 whlp_end_para(h);
522522
562562 if (p->type == para_Bullet) {
563563 whlp_wtext(&state, conf.bullet);
564564 } else {
565 whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE, &conf);
565 whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, false, &conf);
566566 whlp_wtext(&state, conf.listsuffix);
567567 }
568568 whlp_wtext(&state, NULL);
576576 state.cstate = charset_init_state;
577577
578578 if (p->type == para_BiblioCited) {
579 whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE, &conf);
579 whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, false, &conf);
580580 whlp_wtext(&state, L" ");
581581 }
582582
583 whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE, &conf);
583 whlp_mkparagraph(&state, FONT_NORMAL, p->words, false, &conf);
584584 whlp_wtext(&state, NULL);
585585 whlp_end_para(h);
586586 break;
686686 whlp_start_hyperlink(state->h, (WHLP_TOPIC)p->private_data);
687687 state->cstate = charset_init_state;
688688 if (p->kwtext) {
689 whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, TRUE, conf);
689 whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, true, conf);
690690 whlp_set_font(state->h, FONT_NORMAL);
691691 whlp_wtext(state, conf->sectsuffix);
692692 }
693 whlp_mkparagraph(state, FONT_NORMAL, p->words, TRUE, conf);
693 whlp_mkparagraph(state, FONT_NORMAL, p->words, true, conf);
694694 whlp_wtext(state, NULL);
695695 whlp_end_hyperlink(state->h);
696696 whlp_end_para(state->h);
698698 }
699699
700700 static void whlp_mkparagraph(struct bk_whlp_state *state,
701 int font, word *text, int subsidiary,
701 int font, word *text, bool subsidiary,
702702 whlpconf *conf) {
703703 keyword *kwl;
704704 int deffont = font;
788788 if (cvt_ok(conf->charset, text->text) || !text->alt)
789789 whlp_wtext(state, text->text);
790790 else
791 whlp_mkparagraph(state, deffont, text->alt, FALSE, conf);
791 whlp_mkparagraph(state, deffont, text->alt, false, conf);
792792 } else if (removeattr(text->type) == word_WhiteSpace) {
793793 whlp_wtext(state, L" ");
794794 } else if (removeattr(text->type) == word_Quote) {
0 /*.o
1 /sbcsdat.c
2 /sbcsdat.h
3 /convcs
4 /cstable
5 /confuse
6 /csshow
7 /libcharset.a
8 .deps
9 .ninja_deps
10 .ninja_log
11 /Makefile
12 /Makefile.in
13 /aclocal.m4
14 /autom4te.cache/
15 /compile
16 /configure
17 /depcomp
18 /install-sh
19 /missing
20 /stamp-h1
21 /config.log
22 /config.status
23 /CMakeCache.txt
24 /CMakeFiles
25 /cmake_install.cmake
26 /build.ninja
27 /rules.ninja
0 # CMake-based build system.
1
2 # I don't want to have to edit the master list of source files in more
3 # than one place, so let's get CMake to use its built-in file and
4 # string handling abilities to read the list out of Makefile.am
5 # alongside it.
6
7 cmake_minimum_required(VERSION 3.0)
8 project(libcharset LANGUAGES C)
9
10 file(READ ${CMAKE_CURRENT_SOURCE_DIR}/Makefile.am MAKEFILE_AM)
11 string(REPLACE "\\\n" " " MAKEFILE_AM ${MAKEFILE_AM})
12 string(REGEX MATCHALL "[^ \n]* = [^\n]*" MAKEFILE_AM_DEFS ${MAKEFILE_AM})
13
14 foreach(MAKEFILE_AM_DEF ${MAKEFILE_AM_DEFS})
15 if(${MAKEFILE_AM_DEF} MATCHES "^([a-z][^ ]*)_SOURCES = (.*)")
16 set(TARGET ${CMAKE_MATCH_1})
17 string(REGEX MATCHALL "[^ ]*\\.c" SOURCES ${CMAKE_MATCH_2})
18 if ("libcharset_a" STREQUAL ${TARGET})
19 add_library(charset STATIC ${SOURCES}
20 ${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h)
21 target_include_directories(charset PRIVATE
22 ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
23 elseif(NOT LIBCHARSET_LIBRARY_ONLY)
24 add_executable(${TARGET} ${SOURCES}
25 ${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h)
26 target_include_directories(${TARGET} PRIVATE
27 ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
28 target_link_libraries(${TARGET} charset)
29 endif()
30 endif()
31 endforeach()
32
33 add_custom_command(OUTPUT sbcsdat.c
34 COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/sbcsgen.pl
35 ${CMAKE_CURRENT_SOURCE_DIR}/sbcs.dat
36 --source=${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.c
37 DEPENDS sbcsgen.pl sbcs.dat)
38 add_custom_command(OUTPUT sbcsdat.h
39 COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/sbcsgen.pl
40 ${CMAKE_CURRENT_SOURCE_DIR}/sbcs.dat
41 --header=${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h
42 DEPENDS sbcsgen.pl sbcs.dat)
+0
-252
charset/Makefile less more
0 # -*- make -*-
1 #
2 # Makefile for libcharset.
3
4 # This Makefile should be sufficient to build libcharset and its
5 # demo application all on its own. However, it's also a valid
6 # Makefile _fragment_ which can be linked in to another program
7 # Makefile to allow libcharset to be built directly into its
8 # binary.
9
10 # To include this as part of another Makefile, you need to:
11 #
12 # - Define $(LIBCHARSET_SRCDIR) to be a directory prefix (i.e.
13 # probably ending in a slash) which allows access to the
14 # libcharset source files.
15 #
16 # - Define $(LIBCHARSET_OBJDIR) to be a directory prefix (i.e.
17 # probably ending in a slash) which allows access to the
18 # directory where the libcharset object files need to be put.
19 #
20 # - Define $(LIBCHARSET_OBJPFX) to be a filename prefix to be
21 # applied to the libcharset object files (in case, for example,
22 # the file names clash with those of the main application, and
23 # you need to call them cs-*.o to resolve the clash).
24 #
25 # - Define $(LIBCHARSET_GENPFX) to be a prefix to be added to
26 # targets such as `all' and `clean'. (Mostly the point of this
27 # is to get those targets out of the way for the Makefile
28 # fragment including us.)
29 #
30 # - If you need your compiler to use the -MD flag, define $(MD) to
31 # be `-MD'.
32 #
33 # This Makefile fragment will then define rules for building each
34 # object file, and will in turn define $(LIBCHARSET_OBJS) to be
35 # what you need to add to your link line.
36
37 $(LIBCHARSET_GENPFX)all: \
38 $(LIBCHARSET_OBJDIR)libcharset.a \
39 $(LIBCHARSET_OBJDIR)convcs \
40 $(LIBCHARSET_OBJDIR)cstable \
41 $(LIBCHARSET_OBJDIR)confuse \
42 $(LIBCHARSET_OBJDIR)csshow
43
44 $(LIBCHARSET_OBJDIR)convcs: $(LIBCHARSET_SRCDIR)convcs.c \
45 $(LIBCHARSET_OBJDIR)libcharset.a
46 $(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)convcs \
47 $(LIBCHARSET_SRCDIR)convcs.c \
48 $(LIBCHARSET_OBJDIR)libcharset.a
49
50 $(LIBCHARSET_OBJDIR)cstable: $(LIBCHARSET_SRCDIR)cstable.c \
51 $(LIBCHARSET_OBJDIR)libcharset.a \
52 $(LIBCHARSET_OBJDIR)sbcsdat.c
53 $(CC) $(CFLAGS) -I $(LIBCHARSET_OBJDIR). \
54 -o $(LIBCHARSET_OBJDIR)cstable \
55 $(LIBCHARSET_SRCDIR)cstable.c \
56 $(LIBCHARSET_OBJDIR)libcharset.a
57
58 $(LIBCHARSET_OBJDIR)confuse: $(LIBCHARSET_SRCDIR)confuse.c \
59 $(LIBCHARSET_OBJDIR)libcharset.a
60 $(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)confuse \
61 $(LIBCHARSET_SRCDIR)confuse.c \
62 $(LIBCHARSET_OBJDIR)libcharset.a
63
64 $(LIBCHARSET_OBJDIR)csshow: $(LIBCHARSET_SRCDIR)csshow.c \
65 $(LIBCHARSET_OBJDIR)libcharset.a
66 $(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)csshow \
67 $(LIBCHARSET_SRCDIR)csshow.c \
68 $(LIBCHARSET_OBJDIR)libcharset.a
69
70 LIBCHARSET_OBJS = \
71 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5enc.o \
72 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5set.o \
73 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cns11643.o \
74 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cp949.o \
75 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)emacsenc.o \
76 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)euc.o \
77 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)fromucs.o \
78 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)gb2312.o \
79 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)htmlcs.o \
80 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)hz.o \
81 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022.o \
82 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022s.o \
83 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)istate.o \
84 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0208.o \
85 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0212.o \
86 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)ksx1001.o \
87 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)locale.o \
88 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)localenc.o \
89 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)macenc.o \
90 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)mimeenc.o \
91 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcs.o \
92 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcsdat.o \
93 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)shiftjis.o \
94 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)slookup.o \
95 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)superset.o \
96 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)toucs.o \
97 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf16.o \
98 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf7.o \
99 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf8.o \
100 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)xenc.o \
101 # end of list
102
103 $(LIBCHARSET_OBJDIR)libcharset.a: $(LIBCHARSET_OBJS)
104 ar rcs $@ $(LIBCHARSET_OBJS)
105
106 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5enc.o: \
107 $(LIBCHARSET_SRCDIR)big5enc.c
108 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
109
110 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5set.o: \
111 $(LIBCHARSET_SRCDIR)big5set.c
112 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
113
114 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cns11643.o: \
115 $(LIBCHARSET_SRCDIR)cns11643.c
116 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
117
118 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cp949.o: \
119 $(LIBCHARSET_SRCDIR)cp949.c
120 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
121
122 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)emacsenc.o: \
123 $(LIBCHARSET_SRCDIR)emacsenc.c
124 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
125
126 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)euc.o: \
127 $(LIBCHARSET_SRCDIR)euc.c
128 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
129
130 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)fromucs.o: \
131 $(LIBCHARSET_SRCDIR)fromucs.c
132 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
133
134 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)gb2312.o: \
135 $(LIBCHARSET_SRCDIR)gb2312.c
136 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
137
138 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)htmlcs.o: \
139 $(LIBCHARSET_SRCDIR)htmlcs.c
140 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
141
142 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)hz.o: \
143 $(LIBCHARSET_SRCDIR)hz.c
144 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
145
146 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022.o: \
147 $(LIBCHARSET_SRCDIR)iso2022.c \
148 $(LIBCHARSET_OBJDIR)sbcsdat.h
149 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $<
150
151 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022s.o: \
152 $(LIBCHARSET_SRCDIR)iso2022s.c \
153 $(LIBCHARSET_OBJDIR)sbcsdat.h
154 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $<
155
156 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)istate.o: \
157 $(LIBCHARSET_SRCDIR)istate.c
158 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
159
160 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0208.o: \
161 $(LIBCHARSET_SRCDIR)jisx0208.c
162 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
163
164 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0212.o: \
165 $(LIBCHARSET_SRCDIR)jisx0212.c
166 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
167
168 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)ksx1001.o: \
169 $(LIBCHARSET_SRCDIR)ksx1001.c
170 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
171
172 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)locale.o: \
173 $(LIBCHARSET_SRCDIR)locale.c
174 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
175
176 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)localenc.o: \
177 $(LIBCHARSET_SRCDIR)localenc.c
178 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
179
180 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)macenc.o: \
181 $(LIBCHARSET_SRCDIR)macenc.c
182 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
183
184 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)mimeenc.o: \
185 $(LIBCHARSET_SRCDIR)mimeenc.c
186 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
187
188 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcs.o: \
189 $(LIBCHARSET_SRCDIR)sbcs.c
190 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
191
192 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)shiftjis.o: \
193 $(LIBCHARSET_SRCDIR)shiftjis.c
194 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
195
196 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)slookup.o: \
197 $(LIBCHARSET_SRCDIR)slookup.c \
198 $(LIBCHARSET_OBJDIR)sbcsdat.c
199 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $<
200
201 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)superset.o: \
202 $(LIBCHARSET_SRCDIR)superset.c
203 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
204
205 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)toucs.o: \
206 $(LIBCHARSET_SRCDIR)toucs.c
207 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
208
209 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf16.o: \
210 $(LIBCHARSET_SRCDIR)utf16.c
211 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
212
213 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf7.o: \
214 $(LIBCHARSET_SRCDIR)utf7.c
215 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
216
217 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf8.o: \
218 $(LIBCHARSET_SRCDIR)utf8.c
219 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
220
221 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)xenc.o: \
222 $(LIBCHARSET_SRCDIR)xenc.c
223 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
224
225 # This object file is special, because its source file is itself
226 # generated - and therefore goes in the object directory.
227
228 $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcsdat.o: \
229 $(LIBCHARSET_OBJDIR)sbcsdat.c
230 $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $<
231
232 $(LIBCHARSET_OBJDIR)sbcsdat.c: \
233 $(LIBCHARSET_SRCDIR)sbcs.dat \
234 $(LIBCHARSET_SRCDIR)sbcsgen.pl
235 perl $(LIBCHARSET_SRCDIR)sbcsgen.pl \
236 $(LIBCHARSET_SRCDIR)sbcs.dat \
237 --source=$(LIBCHARSET_OBJDIR)sbcsdat.c
238
239 $(LIBCHARSET_OBJDIR)sbcsdat.h: \
240 $(LIBCHARSET_SRCDIR)sbcs.dat \
241 $(LIBCHARSET_SRCDIR)sbcsgen.pl
242 perl $(LIBCHARSET_SRCDIR)sbcsgen.pl \
243 $(LIBCHARSET_SRCDIR)sbcs.dat \
244 --header=$(LIBCHARSET_OBJDIR)sbcsdat.h
245
246 $(LIBCHARSET_GENPFX)clean:
247 rm -f $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)*.o \
248 $(LIBCHARSET_OBJDIR)libcharset.a \
249 $(LIBCHARSET_OBJDIR)sbcsdat.c \
250 $(LIBCHARSET_OBJDIR)sbcsdat.h \
251 $(LIBCHARSET_OBJDIR)convcs
0 bin_PROGRAMS = convcs cstable csshow confuse
1 lib_LIBRARIES = libcharset.a
2
3 convcs_SOURCES = convcs.c
4 convcs_LDADD = libcharset.a
5
6 cstable_SOURCES = cstable.c
7 cstable_LDADD = libcharset.a
8
9 csshow_SOURCES = csshow.c
10 csshow_LDADD = libcharset.a
11
12 confuse_SOURCES = confuse.c
13 confuse_LDADD = libcharset.a
14
15 libcharset_a_SOURCES = charset.h internal.h sbcsdat.h big5enc.c \
16 big5set.c cns11643.c cp949.c emacsenc.c enum.h euc.c fromucs.c \
17 gb2312.c htmlcs.c hz.c iso2022.c iso2022s.c iso6937.c istate.c \
18 jisx0208.c jisx0212.c ksx1001.c locale.c localenc.c macenc.c \
19 mimeenc.c sbcs.c sbcsdat.c shiftjis.c slookup.c superset.c toucs.c \
20 utf16.c utf7.c utf8.c xenc.c
21
22 BUILT_SOURCES = sbcsdat.c sbcsdat.h
23 CLEANFILES = sbcsdat.c sbcsdat.h
24 sbcsdat.c: sbcsgen.pl sbcs.dat
25 perl $^ --source=sbcsdat.c
26 sbcsdat.h: sbcsgen.pl sbcs.dat
27 perl $^ --header=sbcsdat.h
28
29 EXTRA_DIST = sbcsgen.pl sbcs.dat
0 #!/bin/sh
1 autoreconf -i && rm -rf autom4te.cache
5656 * charset_state.
5757 */
5858
59 static int write_big5(charset_spec const *charset, long int input_chr,
60 charset_state *state,
61 void (*emit)(void *ctx, long int output), void *emitctx)
59 static bool write_big5(charset_spec const *charset, long int input_chr,
60 charset_state *state,
61 void (*emit)(void *ctx, long int output), void *emitctx)
6262 {
6363 UNUSEDARG(charset);
6464 UNUSEDARG(state);
6565
6666 if (input_chr == -1)
67 return TRUE; /* stateless; no cleanup required */
67 return true; /* stateless; no cleanup required */
6868
6969 if (input_chr < 0x80) {
7070 emit(emitctx, input_chr);
71 return TRUE;
71 return true;
7272 } else {
7373 int r, c;
7474 if (unicode_to_big5(input_chr, &r, &c)) {
7575 emit(emitctx, r + 0xA1);
7676 emit(emitctx, c + 0x40);
77 return TRUE;
77 return true;
7878 } else {
79 return FALSE;
79 return false;
8080 }
8181 }
8282 }
39643964 return big5_forward[r][c];
39653965 }
39663966
3967 /* This one returns 1 on success, 0 if the code point doesn't exist. */
3968 int unicode_to_big5(long int unicode, int *r, int *c)
3967 /* This one returns true on success, false if the code point doesn't exist. */
3968 bool unicode_to_big5(long int unicode, int *r, int *c)
39693969 {
39703970 int rr, cc;
39713971 long int uu;
39853985 else {
39863986 *r = rr;
39873987 *c = cc;
3988 return 1;
3988 return true;
39893989 }
39903990 }
3991 return 0;
3991 return false;
39923992 }
39933993
39943994 #ifdef TESTMODE
66 #define charset_charset_h
77
88 #include <stddef.h>
9 #include <stdbool.h>
910
1011 /*
1112 * Enumeration that lists all the multibyte or single-byte
7879 CS_UTF16,
7980 CS_UTF16BE,
8081 CS_UTF16LE,
82 CS_UTF16BE_NO_BOM,
83 CS_UTF16LE_NO_BOM,
8184 CS_EUC_JP,
8285 CS_EUC_CN,
8386 CS_EUC_KR,
9497 CS_BS4730,
9598 CS_DEC_GRAPHICS,
9699 CS_EUC_TW,
100 CS_ISO6937,
101 CS_ISO6937_EURO,
102 CS_ITS,
103 CS_SAIL,
97104 CS_LIMIT /* dummy value indicating extent of enum */
98105 } charset_t;
99106
107114 * charset_state mystate = CHARSET_INIT_STATE;
108115 */
109116 #define CHARSET_INIT_STATE { 0L, 0L } /* a suitable initialiser */
117
118 #if defined __cplusplus
119 extern "C" {
120 #if 0
121 }
122 #endif
123 #endif
110124
111125 /*
112126 * This external variable contains the same data, but is provided
167181 * If `error' is non-NULL and a character is found which cannot be
168182 * expressed in the output charset, conversion will terminate at
169183 * that character (so `input' points to the offending character)
170 * and `*error' will be set to TRUE; if `error' is non-NULL and no
184 * and `*error' will be set to true; if `error' is non-NULL and no
171185 * difficult characters are encountered, `*error' will be set to
172 * FALSE. If `error' is NULL, difficult characters will simply be
186 * false. If `error' is NULL, difficult characters will simply be
173187 * ignored.
174188 *
175189 * If `input' is NULL, this routine will output the necessary bytes
186200
187201 int charset_from_unicode(const wchar_t **input, int *inlen,
188202 char *output, int outlen,
189 int charset, charset_state *state, int *error);
203 int charset, charset_state *state, bool *error);
190204
191205 /*
192206 * Convert X11 encoding names to and from our charset identifiers.
233247 int charset_upgrade(int charset);
234248
235249 /*
236 * This function returns TRUE if the input charset is a vaguely
237 * sensible superset of ASCII. That is, it returns FALSE for 7-bit
250 * This function returns true if the input charset is a vaguely
251 * sensible superset of ASCII. That is, it returns false for 7-bit
238252 * encoding formats such as HZ and UTF-7.
239253 */
240 int charset_contains_ascii(int charset);
241
242 /*
243 * This function returns TRUE if the input charset is single-byte.
244 */
245 int charset_is_single_byte(int charset);
254 bool charset_contains_ascii(int charset);
255
256 /*
257 * This function returns true if the input charset is single-byte.
258 */
259 bool charset_is_single_byte(int charset);
246260
247261 /*
248262 * This function tries to deduce the CS_* identifier of the charset
284298 * if (charset_exists(cs))
285299 * do_stuff_with(cs);
286300 */
287 int charset_exists(int charset);
301 bool charset_exists(int charset);
302
303 #if defined __cplusplus
304 #if 0
305 {
306 #endif
307 }
308 #endif
288309
289310 #endif /* charset_charset_h */
1301913019 return cns11643_forward((p*94+r)*94+c);
1302013020 }
1302113021
13022 /* This one returns 1 on success, 0 if the code point doesn't exist. */
13023 int unicode_to_cns11643(long int unicode, int *p, int *r, int *c)
13022 /* This one returns true on success, false if the code point doesn't exist. */
13023 bool unicode_to_cns11643(long int unicode, int *p, int *r, int *c)
1302413024 {
1302513025 int index, pp, rr, cc;
1302613026 long int uu;
1304413044 *p = pp;
1304513045 *r = rr;
1304613046 *c = cc;
13047 return 1;
13047 return true;
1304813048 }
1304913049 }
13050 return 0;
13050 return false;
1305113051 }
1305213052
1305313053 #ifdef TESTMODE
0 # autoconf input for libcharset.
1
2 AC_INIT([libcharset], [NOVERSION], [anakin@pobox.com])
3 AC_CONFIG_SRCDIR([charset.h])
4 AC_CONFIG_AUX_DIR([.])
5
6 AM_INIT_AUTOMAKE(foreign)
7
8 AC_PROG_CC
9 AC_PROG_RANLIB
10 AC_PROG_INSTALL
11
12 AC_LANG([C])
13
14 AC_CONFIG_FILES([Makefile])
15 AC_OUTPUT
3838 char *p = *++argv;
3939 char *orig = p;
4040 char *end;
41 int base = 16, semi_ok = 0;
41 int base = 16;
42 bool semi_ok = false;
4243 wchar_t ch;
4344
4445 if ((p[0] == 'U' || p[0] == 'u') &&
5253 p++;
5354 else
5455 base = 10;
55 semi_ok = 1;
56 } else if (mbtowc(&ch, p, strlen(p)) == strlen(p)) {
56 semi_ok = true;
57 } else if ((size_t)mbtowc(&ch, p, strlen(p)) == strlen(p)) {
5758 chars[nchars++] = ch;
5859 continue;
5960 }
7374 for (i = 0; i < nchars; i++) {
7475 wchar_t inbuf[1];
7576 const wchar_t *inptr;
76 int inlen, error, ret;
77 int inlen, ret;
78 bool error;
7779
7880 if (!charset_exists(cs)) {
7981 encodings[i*CS_LIMIT+cs].len = 0;
8385 inbuf[0] = chars[i];
8486 inptr = inbuf;
8587 inlen = 1;
86 error = 0;
88 error = false;
8789 ret = charset_from_unicode(&inptr, &inlen,
8890 encodings[i*CS_LIMIT+cs].string,
8991 MAXENCLEN, cs, NULL, &error);
6262 fputs(helptext, fp);
6363 }
6464
65 int match_long_opt(const char *argument, const char *optname, const char **val)
66 {
67 int optlen = strlen(optname);
65 bool match_long_opt(const char *argument, const char *optname,
66 const char **val)
67 {
68 size_t optlen = strlen(optname);
6869 if (strcspn(argument, "=") != optlen)
69 return 0; /* not the right length to match */
70 return false; /* not the right length to match */
7071 if (memcmp(argument, optname, optlen) != 0)
71 return 0; /* doesn't match the leading text */
72 return false; /* doesn't match the leading text */
7273 if (argument[optlen])
7374 *val = argument + optlen + 1;
7475 else
7576 *val = NULL;
76 return 1;
77 return true;
7778 }
7879
7980 static int srcset = CS_NONE;
8081 static int dstset = CS_NONE;
81 static int html_mode = 0;
82 static bool html_mode = false;
8283 static const wchar_t *replacement_cooked = NULL;
8384 static int replacement_cooked_len = 0;
8485
8586 int main(int argc, char **argv)
8687 {
87 int doing_opts = 1;
88 bool doing_opts = true;
8889 int localeset;
89 charset_state instate = CHARSET_INIT_STATE;
90 charset_state outstate = CHARSET_INIT_STATE;
91 char inbuf[256], outbuf[256];
92 wchar_t midbuf[256];
9390 const char *replacement_raw = NULL;
9491 const char *inptr;
95 const wchar_t *midptr;
96 int rdlen, inlen, midlen, inret, midret;
92 int inlen;
9793 const char *infilename = NULL;
9894
9995 setlocale(LC_CTYPE, "");
104100 const char *v;
105101 if (*p == '-' && p[1] && doing_opts) {
106102 if (!strcmp(p, "--")) {
107 doing_opts = 0;
103 doing_opts = false;
108104 } else if (match_long_opt(p, "--help", &v)) {
109105 help(stdout);
110106 return 0;
120116 }
121117 replacement_raw = v;
122118 } else if (match_long_opt(p, "--html", &v)) {
123 html_mode = 1;
119 html_mode = true;
124120 } else {
125121 fprintf(stderr, "convcs: unrecognised option '%s'\n", p);
126122 return 1;
127123 }
128124 } else {
129 int cs;
130
131125 if (srcset == CS_NONE) {
132126 srcset = !strcmp(p, "-")? localeset : charset_from_localenc(p);
133127 if (srcset == CS_NONE) {
220214 if (html_srcset != CS_NONE) {
221215 const char *output_cs_name = charset_to_mimeenc(dstset);
222216 srcset = html_srcset;
223 assert(namepos + namelen <= rdret);
217 assert(namepos + namelen <= (size_t)rdret);
224218 convert_got_data(inbuf, namepos);
225219 convert_got_data(output_cs_name, strlen(output_cs_name));
226220 convert_got_data(inbuf + namepos + namelen,
230224 }
231225 }
232226
233 while (1) {
234 if (!fgets(inbuf, sizeof(inbuf), infile))
235 break; /* EOF */
236
237 convert_got_data(inbuf, strlen(inbuf));
227 bool eof = false;
228 while (!eof) {
229 /*
230 * Manual loop on getc which has the feature of fgets that we
231 * stop if we see a newline (so that when convcs is run
232 * interactively in a terminal it will deliver each translated
233 * line promptly), but also has the feature of fread that it
234 * provides the correct buffer length even in the face of NUL
235 * bytes in the input.
236 */
237
238 size_t nread = 0;
239 while (nread < lenof(inbuf)) {
240 int c = getc(infile);
241 if (c == EOF) {
242 eof = true;
243 break;
244 }
245 inbuf[nread++] = c;
246 if (c == '\n')
247 break;
248 }
249
250 if (nread)
251 convert_got_data(inbuf, nread);
238252 }
239253 convert_done();
240254 return 0;
263277 lenof(midbuf), srcset,
264278 &instate, replacement_cooked,
265279 replacement_cooked_len)) > 0) {
266 int error;
280 bool error;
267281
268282 midlen = inret;
269283 midptr = midbuf;
276290 while ( (midret = charset_from_unicode(&midptr, &midlen, outbuf,
277291 lenof(outbuf), dstset,
278292 &outstate, &error)) > 0 ||
279 error != 0) {
293 error) {
280294 fwrite(outbuf, 1, midret, stdout);
281295 if (error) {
282296 const wchar_t *repl_ptr = replacement_cooked;
5656 * charset_state.
5757 */
5858
59 static int write_cp949(charset_spec const *charset, long int input_chr,
60 charset_state *state,
61 void (*emit)(void *ctx, long int output),
62 void *emitctx)
59 static bool write_cp949(charset_spec const *charset, long int input_chr,
60 charset_state *state,
61 void (*emit)(void *ctx, long int output),
62 void *emitctx)
6363 {
6464 UNUSEDARG(charset);
6565 UNUSEDARG(state);
6666
6767 if (input_chr == -1)
68 return TRUE; /* stateless; no cleanup required */
68 return true; /* stateless; no cleanup required */
6969
7070 if (input_chr < 0x80) {
7171 emit(emitctx, input_chr);
72 return TRUE;
72 return true;
7373 } else {
7474 int r, c;
7575 if (unicode_to_cp949(input_chr, &r, &c)) {
7676 emit(emitctx, r + 0x80);
7777 emit(emitctx, c + 0x40);
78 return TRUE;
78 return true;
7979 } else {
80 return FALSE;
80 return false;
8181 }
8282 }
8383 }
1111 * terminal window, of course.
1212 *
1313 * Possible extra features:
14 * - configurable row len and table size.
14 * - configurable row length.
1515 * - option to disambiguate the various classes of failure in the
1616 * output, e.g. if terminfo gives us control sequences to change
1717 * colours then we could colour the missing characters differently
2222 * of undisplayability. (In particular, don't forget to turn off
2323 * the early exit when nothing in the range is printable at
2424 * all.)
25 * - ability to display sub-blocks of multibyte encodings such as
26 * EUCs. But that would need some thought about how to sensibly
27 * index those tables.
2825 */
2926
30 #define _XOPEN_SOURCE 500 /* for wcwidth and snprintf */
27 /*
28 * Feature macros I've found necessary to make the standard headers
29 * declare wcwidth and snprintf (on various systems).
30 */
31 #define _XOPEN_SOURCE 500
32 #define _C99_SOURCE
3133
3234 #include <assert.h>
3335 #include <stdio.h>
4749 #include "charset.h"
4850
4951 static const char *helptext =
50 "usage: csshow ( CHARSET | BASE-UNICODE-VALUE )\n"
51 " e.g.: csshow Win1252\n"
52 " csshow U+2500\n"
52 "usage: csshow CHARSET-NAME [ ENCODING-PREFIX-BYTE... ]\n"
53 " e.g.: csshow Win1252 show a whole single-byte charset\n"
54 " csshow Shift-JIS show all single-byte chars in a "
55 "multibyte charset\n"
56 " csshow Shift-JIS 9C show all chars encoded as 9C xx in "
57 "Shift-JIS\n\n"
58 " or: csshow BASE-UNICODE-VALUE [ +RANGE-LENGTH | END-UNICODE-VALUE ]\n"
59 " e.g.: csshow U+2500 show 0x100 characters starting at U+2500 "
60 "inclusive\n"
61 " e.g.: csshow U+2500 +128 show a different number of characters\n"
62 " e.g.: csshow U+2500 +0x80 same effect, but you can write the length "
63 "in hex\n"
64 " csshow U+2500 U+2580 or specify the (non-inclusive) range "
65 "endpoint\n\n"
5366 " also: csshow --help display this help text\n"
5467 ;
5568
6275 BAD_CHAR_IN_SOURCE_CHARSET,
6376 BAD_CHAR_IN_OUTPUT_CHARSET,
6477 UNPRINTABLE_CHAR,
78 MULTIBYTE_INTRODUCER,
6579 FIRST_PRINTABLE_VALUE,
6680 COMBINING_CHAR = FIRST_PRINTABLE_VALUE,
6781 WIDE_PRINTABLE_CHAR,
7286 char buf[7]; /* maximum even theoretical UTF-8 code length, plus NUL */
7387 };
7488
89 struct buf {
90 char *data;
91 size_t size, len;
92 };
93 static char *buf_add_space(struct buf *buf, size_t space)
94 {
95 char *toret;
96
97 if (buf->size - buf->len < space) {
98 buf->size = (buf->len + space) * 5 / 4 + 64;
99 buf->data = realloc(buf->data, buf->size);
100 if (!buf->data) {
101 fprintf(stderr, "csshow: out of memory\n");
102 exit(1);
103 }
104 }
105
106 toret = buf->data + buf->len;
107 buf->len += space;
108 return toret;
109 }
110
111 static enum Trans try_translate_from_source(
112 const char *in, int inlen, int charset, wchar_t *wc_out)
113 {
114 const char *cp;
115 int clen, ret0, ret1;
116
117 cp = in;
118 clen = inlen;
119 ret1 = charset_to_unicode(&cp, &clen, wc_out, 1, charset, NULL, L"?", 1);
120
121 cp = in;
122 clen = inlen;
123 ret0 = charset_to_unicode(&cp, &clen, wc_out, 1, charset, NULL, L"", 0);
124
125 if (ret0 == 1 && ret1 == 1) {
126 /* Successful translation into Unicode */
127 return NORMAL_PRINTABLE_CHAR;
128 } else if (ret0 == 0 && ret1 == 0) {
129 /* No output, even _with_ a replacement character
130 * defined for bad chars, means the input
131 * character has been absorbed into the charset
132 * state but not _yet_ generated any output or
133 * discovered an error. In other words, this is a
134 * multibyte introducer. */
135 return MULTIBYTE_INTRODUCER;
136 } else {
137 return BAD_CHAR_IN_SOURCE_CHARSET;
138 }
139 }
140
75141 int main(int argc, char **argv)
76142 {
77 int doing_opts = 1;
143 bool doing_opts = true;
78144 int source_charset = CS_ASCII, output_charset = CS_NONE;
79145 unsigned long base = 0, size = 0x100, rowlen = 0x10;
146 struct buf prefix = { NULL, 0, 0 };
147 enum ArgsState {
148 AS_INITIAL,
149 AS_UNICODE_ENDRANGE,
150 AS_MBCS_PREFIX,
151 AS_DONE
152 } args_state = AS_INITIAL;
80153
81154 while (--argc > 0) {
82155 const char *p = *++argv;
83156 if (*p == '-' && doing_opts) {
84157 if (!strcmp(p, "--")) {
85 doing_opts = 0;
158 doing_opts = false;
86159 } else if (!strcmp(p, "--help")) {
87160 help(stdout);
88161 return 0;
90163 fprintf(stderr, "csshow: unrecognised option '%s'\n", p);
91164 return 1;
92165 }
93 } else {
166 } else if (args_state == AS_INITIAL) {
167 /*
168 * First argument can be a Unicode code point or a
169 * single-byte charset name.
170 */
171
94172 int cs;
95173
96174 if (toupper((unsigned char)p[0]) == 'U' &&
97175 (p[1] == '-' || p[1] == '+')) {
98176 source_charset = CS_NONE; /* means just translate Unicode */
99177 base = strtoul(p+2, NULL, 16);
178 args_state = AS_UNICODE_ENDRANGE;
100179 } else if ((cs = charset_from_localenc(p)) != CS_NONE) {
101 if (!charset_is_single_byte(cs)) {
102 fprintf(stderr, "csshow: cannot display multibyte"
103 " charset %s\n", charset_to_localenc(cs));
104 return 1;
105 }
106180 source_charset = cs;
107181 base = 0;
182 args_state = AS_MBCS_PREFIX;
108183 } else {
109184 fprintf(stderr, "csshow: unrecognised argument '%s'\n", p);
110185 return 1;
111186 }
187 } else if (args_state == AS_UNICODE_ENDRANGE) {
188 /*
189 * If the first argument was a Unicode code point, then
190 * the next argument is taken to be an end point for the
191 * range, so that you can print larger ranges than 256
192 * characters.
193 */
194
195 if (toupper((unsigned char)p[0]) == 'U' &&
196 (p[1] == '-' || p[1] == '+')) {
197 /* U+XXXX / U-XXXXXXXX specify the end code point of
198 * the range. (Exclusive.) */
199 size = strtoul(p+2, NULL, 16) - base;
200 } else if (p[0] == '+') {
201 /* +NNNN specifies the size of the range. We use
202 * strtoul in base 0 so that decimal or 0xHEX are both
203 * accepted. */
204 size = strtoul(p+1, NULL, 0);
205 }
206
207 /* No further arguments expected. */
208 args_state = AS_DONE;
209
210 } else if (args_state == AS_MBCS_PREFIX) {
211 /*
212 * If the first argument was a charset name, then further
213 * arguments are taken to be hex byte values to accumulate
214 * into an encoding prefix. This allows you to say, for
215 * example, 'csshow Shift-JIS 89' to see the slice of the
216 * Shift-JIS encoding consisting of characters whose first
217 * encoding byte is 0x89, indexed by their second byte.
218 */
219
220 *buf_add_space(&prefix, 1) = strtoul(p, NULL, 16);
221 } else {
222 fprintf(stderr, "csshow: extra argument '%s' unexpected\n", p);
223 return 1;
112224 }
113225 }
114226
123235 struct translated_char *trans;
124236 const char *rowheadfmt;
125237 int rowheadwidth, colwidth;
126 int printed_a_line, skipped_a_line;
238 bool printed_a_line, skipped_a_line;
127239 unsigned long i, j;
240 enum Trans transret;
241 char *suffix_position = NULL;
242 wchar_t wc;
243
244 if (source_charset != CS_NONE) {
245 /*
246 * First, check that the prefix doesn't already form a
247 * completed character or an error.
248 */
249 transret = try_translate_from_source(
250 prefix.data, prefix.len, source_charset, &wc);
251 if (transret == BAD_CHAR_IN_SOURCE_CHARSET) {
252 fprintf(stderr, "csshow: prefix sequence is not valid\n");
253 return 1;
254 } else if (transret != MULTIBYTE_INTRODUCER) {
255 fprintf(stderr, "csshow: prefix sequence generates output\n");
256 return 1;
257 }
258
259 /*
260 * Make space in the prefix buffer to put each test byte on
261 * the end.
262 */
263 suffix_position = buf_add_space(&prefix, 1);
264 }
128265
129266 trans = malloc(size * sizeof(struct translated_char));
130267 if (!trans) {
138275 */
139276 for (i = 0; i < size; i++) {
140277 unsigned long charcode = base + i;
141 wchar_t wc;
142278
143279 trans[i].buf[0] = '\0';
144280
145281 if (source_charset == CS_NONE) {
146282 wc = charcode;
147283 } else {
148 char c = charcode;
149 const char *cp = &c;
150 int clen = 1;
151 int error = 0;
152
153 int ret = charset_to_unicode(
154 &cp, &clen, &wc, 1, source_charset, NULL, L"", 0);
155 if (ret != 1) {
156 trans[i].type = BAD_CHAR_IN_SOURCE_CHARSET;
284 *suffix_position = charcode;
285 transret = try_translate_from_source(
286 prefix.data, prefix.len, source_charset, &wc);
287
288 if (transret != NORMAL_PRINTABLE_CHAR) {
289 trans[i].type = transret;
157290 continue;
158291 }
159292 }
161294 {
162295 const wchar_t *wcp = &wc;
163296 int wclen = 1;
164 int error = 0;
297 bool error = false;
165298
166299 int ret = charset_from_unicode(
167300 &wcp, &wclen, trans[i].buf, sizeof(trans[i].buf) - 1,
168301 output_charset, NULL, &error);
169302
170 assert(ret < sizeof(trans[i].buf));
303 assert(0 <= ret);
304 assert((size_t)ret < sizeof(trans[i].buf));
171305 trans[i].buf[ret] = '\0';
172306
173307 if (wclen != 0 || ret == 0 || error) {
267401 printf("%-*X", colwidth, (unsigned)i);
268402 printf("\n");
269403
270 printed_a_line = skipped_a_line = 0;
404 printed_a_line = false;
405 skipped_a_line = false;
271406
272407 for (j = 0; j < size; j += rowlen) {
273408 /* See if we're skipping this row completely. */
274 int skip = 1;
409 bool skip = true;
275410 for (i = 0; i < rowlen && j+i < size; i++)
276411 if (trans[j+i].type >= FIRST_PRINTABLE_VALUE)
277 skip = 0;
412 skip = false;
278413 if (skip) {
279 skipped_a_line = 1;
414 skipped_a_line = true;
280415 continue;
281416 }
282417
288423 if (skipped_a_line && printed_a_line) {
289424 printf("\n");
290425 }
291 skipped_a_line = 0;
292
293 printed_a_line = 1;
426 skipped_a_line = false;
427
428 printed_a_line = true;
294429 printf(rowheadfmt, (unsigned)(base + j));;
295430 for (i = 0; i < rowlen && j+i < size; i++) {
296431 int chars_left = colwidth;
1414 #include "sbcsdat.h"
1515
1616 #define ENUM_CHARSET(x) extern charset_spec const charset_##x;
17 #include "enum.c"
17 #include "enum.h"
1818 #undef ENUM_CHARSET
1919 static charset_spec const *const cs_table[] = {
2020 #define ENUM_CHARSET(x) &charset_##x,
21 #include "enum.c"
21 #include "enum.h"
2222 #undef ENUM_CHARSET
2323 };
2424 static const char *const cs_names[] = {
2525 #define ENUM_CHARSET(x) #x,
26 #include "enum.c"
26 #include "enum.h"
2727 #undef ENUM_CHARSET
2828 };
2929
3030 int main(int argc, char **argv)
3131 {
3232 long int c;
33 int internal_names = FALSE;
34 int verbose = FALSE;
33 bool internal_names = false;
34 bool verbose = false;
3535
3636 while (--argc) {
3737 char *p = *++argv;
3838 if (!strcmp(p, "-i"))
39 internal_names = TRUE;
39 internal_names = true;
4040 else if (!strcmp(p, "-v"))
41 verbose = TRUE;
41 verbose = true;
4242 }
4343
4444 for (c = 0; c < 0x30000; c++) {
45 int i, plane, row, col, chr;
45 int plane, row, col, chr;
46 size_t i;
4647 char const *sep = "";
4748
4849 printf("U+%04x:", (unsigned)c);
+0
-28
charset/enum.c less more
0 /*
1 * enum.c - enumerate all charsets defined by the library.
2 *
3 * This file maintains a list of every other source file which
4 * contains ENUM_CHARSET definitions. It #includes each one with
5 * ENUM_CHARSETS defined, which causes those source files to do
6 * nothing at all except call the ENUM_CHARSET macro on each
7 * charset they define.
8 *
9 * This file in turn is included from various other places, with
10 * the ENUM_CHARSET macro defined to various different things. This
11 * allows us to have multiple implementations of the master charset
12 * lookup table (a static one and a dynamic one).
13 */
14
15 #define ENUM_CHARSETS
16 #include "sbcsdat.c"
17 #include "utf8.c"
18 #include "utf7.c"
19 #include "utf16.c"
20 #include "euc.c"
21 #include "iso2022.c"
22 #include "iso2022s.c"
23 #include "big5enc.c"
24 #include "shiftjis.c"
25 #include "hz.c"
26 #include "cp949.c"
27 #undef ENUM_CHARSETS
0 /*
1 * enum.h - enumerate all charsets defined by the library.
2 *
3 * This file maintains a list of every other source file which
4 * contains ENUM_CHARSET definitions. It #includes each one with
5 * ENUM_CHARSETS defined, which causes those source files to do
6 * nothing at all except call the ENUM_CHARSET macro on each
7 * charset they define.
8 *
9 * This file in turn is included from various other places, with
10 * the ENUM_CHARSET macro defined to various different things. This
11 * allows us to have multiple implementations of the master charset
12 * lookup table (a static one and a dynamic one).
13 */
14
15 #define ENUM_CHARSETS
16 #include "sbcsdat.c"
17 #include "utf8.c"
18 #include "utf7.c"
19 #include "utf16.c"
20 #include "euc.c"
21 #include "iso2022.c"
22 #include "iso2022s.c"
23 #include "big5enc.c"
24 #include "shiftjis.c"
25 #include "hz.c"
26 #include "cp949.c"
27 #include "iso6937.c"
28 #undef ENUM_CHARSETS
9292 * charset_state.
9393 */
9494
95 static int write_euc(charset_spec const *charset, long int input_chr,
96 charset_state *state,
97 void (*emit)(void *ctx, long int output), void *emitctx)
95 static bool write_euc(charset_spec const *charset, long int input_chr,
96 charset_state *state,
97 void (*emit)(void *ctx, long int output), void *emitctx)
9898 {
9999 struct euc const *euc = (struct euc *)charset->data;
100100 unsigned long c;
103103 UNUSEDARG(state);
104104
105105 if (input_chr == -1)
106 return TRUE; /* stateless; no cleanup required */
106 return true; /* stateless; no cleanup required */
107107
108108 /* ASCII is the easy bit, and is always the same. */
109109 if (input_chr < 0x80) {
110110 emit(emitctx, input_chr);
111 return TRUE;
111 return true;
112112 }
113113
114114 c = euc->from_ucs(input_chr);
115115 if (!c) {
116 return FALSE;
116 return false;
117117 }
118118
119119 cset = c >> 28;
125125
126126 while (len--)
127127 emit(emitctx, (c >> (8*len)) & 0xFF);
128 return TRUE;
128 return true;
129129 }
130130
131131 /*
88 char *output;
99 int outlen;
1010 int writtenlen;
11 int stopped;
11 bool stopped;
1212 };
1313
1414 static void charset_emit(void *ctx, long int output)
2222 param->outlen--;
2323 param->writtenlen++;
2424 } else {
25 param->stopped = 1;
25 param->stopped = true;
2626 }
2727 }
2828
2929 int charset_from_unicode(const wchar_t **input, int *inlen,
3030 char *output, int outlen,
31 int charset, charset_state *state, int *error)
31 int charset, charset_state *state, bool *error)
3232 {
3333 charset_spec const *spec = charset_find_spec(charset);
3434 charset_state localstate = CHARSET_INIT_STATE;
4343 param.output = output;
4444 param.outlen = outlen;
4545 param.writtenlen = 0;
46 param.stopped = 0;
46 param.stopped = false;
4747
4848 if (state)
4949 localstate = *state; /* structure copy */
5050 if (error)
51 *error = FALSE;
51 *error = false;
5252
5353 while (*inlen > 0) {
5454 int lenbefore = param.writtenlen;
55 int ret;
55 bool ret;
5656
5757 if (input)
5858 ret = spec->write(spec, **input, &localstate,
6464 * We have hit a difficult character, which the user
6565 * wants to know about. Leave now.
6666 */
67 *error = TRUE;
67 *error = true;
6868 return lenbefore;
6969 }
7070 if (param.stopped) {
20192019 return gb2312_forward[r][c];
20202020 }
20212021
2022 /* This one returns 1 on success, 0 if the code point doesn't exist. */
2023 int unicode_to_gb2312(long int unicode, int *r, int *c)
2022 /* This one returns true on success, false if the code point doesn't exist. */
2023 bool unicode_to_gb2312(long int unicode, int *r, int *c)
20242024 {
20252025 int rr, cc;
20262026 long int uu;
20402040 else {
20412041 *r = rr;
20422042 *c = cc;
2043 return 1;
2043 return true;
20442044 }
20452045 }
2046 return 0;
2046 return false;
20472047 }
20482048
20492049 #ifdef TESTMODE
8989 }
9090 }
9191
92 static int write_hz(charset_spec const *charset, long int input_chr,
93 charset_state *state,
94 void (*emit)(void *ctx, long int output), void *emitctx)
92 static bool write_hz(charset_spec const *charset, long int input_chr,
93 charset_state *state,
94 void (*emit)(void *ctx, long int output), void *emitctx)
9595 {
9696 int desired_state, r, c;
9797
106106 } else if (unicode_to_gb2312(input_chr, &r, &c)) {
107107 desired_state = 1;
108108 } else {
109 return FALSE;
109 return false;
110110 }
111111
112112 if (state->s0 != (unsigned)desired_state) {
116116 }
117117
118118 if (input_chr < 0)
119 return TRUE; /* special case: just reset state */
119 return true; /* special case: just reset state */
120120
121121 if (state->s0) {
122122 /*
127127 } else {
128128 emit(emitctx, c);
129129 }
130 return TRUE;
130 return true;
131131 }
132132
133133 const charset_spec charset_CS_HZ = {
44 #ifndef charset_internal_h
55 #define charset_internal_h
66
7 #include <stdbool.h>
8
79 /* This invariably comes in handy */
810 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
911
1012 /* This is an invalid Unicode value used to indicate an error. */
1113 #define ERROR 0xFFFFL /* Unicode value representing error */
12
13 #undef TRUE
14 #define TRUE 1
15 #undef FALSE
16 #define FALSE 0
1714
1815 typedef struct charset_spec charset_spec;
1916 typedef struct sbcs_data sbcs_data;
3532 * character set. The `emit' function expects to get byte
3633 * values passed to it.
3734 *
38 * A non-representable input character should cause a FALSE
35 * A non-representable input character should cause a false
3936 * return, _before_ `emit' is called. Successful conversion
40 * causes a TRUE return.
37 * causes a true return.
4138 *
4239 * If `input_chr' is -1, this function must revert the encoding
4340 * state to any default required at the end of a piece of
4441 * encoded text.
4542 */
46 int (*write)(charset_spec const *charset, long int input_chr,
47 charset_state *state,
48 void (*emit)(void *ctx, long int output), void *emitctx);
43 bool (*write)(charset_spec const *charset, long int input_chr,
44 charset_state *state,
45 void (*emit)(void *ctx, long int output), void *emitctx);
4946 void const *data;
5047 };
5148
8784 void read_sbcs(charset_spec const *charset, long int input_chr,
8885 charset_state *state,
8986 void (*emit)(void *ctx, long int output), void *emitctx);
90 int write_sbcs(charset_spec const *charset, long int input_chr,
91 charset_state *state,
92 void (*emit)(void *ctx, long int output), void *emitctx);
87 bool write_sbcs(charset_spec const *charset, long int input_chr,
88 charset_state *state,
89 void (*emit)(void *ctx, long int output), void *emitctx);
9390 long int sbcs_to_unicode(const struct sbcs_data *sd, long int input_chr);
9491 long int sbcs_from_unicode(const struct sbcs_data *sd, long int input_chr);
9592
9693 void read_utf8(charset_spec const *charset, long int input_chr,
9794 charset_state *state,
9895 void (*emit)(void *ctx, long int output), void *emitctx);
99 int write_utf8(charset_spec const *charset, long int input_chr,
100 charset_state *state,
101 void (*emit)(void *ctx, long int output),
102 void *emitctx);
96 bool write_utf8(charset_spec const *charset, long int input_chr,
97 charset_state *state,
98 void (*emit)(void *ctx, long int output),
99 void *emitctx);
103100
104101 long int big5_to_unicode(int r, int c);
105 int unicode_to_big5(long int unicode, int *r, int *c);
102 bool unicode_to_big5(long int unicode, int *r, int *c);
106103 long int cns11643_to_unicode(int p, int r, int c);
107 int unicode_to_cns11643(long int unicode, int *p, int *r, int *c);
104 bool unicode_to_cns11643(long int unicode, int *p, int *r, int *c);
108105 long int cp949_to_unicode(int r, int c);
109 int unicode_to_cp949(long int unicode, int *r, int *c);
106 bool unicode_to_cp949(long int unicode, int *r, int *c);
110107 long int ksx1001_to_unicode(int r, int c);
111 int unicode_to_ksx1001(long int unicode, int *r, int *c);
108 bool unicode_to_ksx1001(long int unicode, int *r, int *c);
112109 long int gb2312_to_unicode(int r, int c);
113 int unicode_to_gb2312(long int unicode, int *r, int *c);
110 bool unicode_to_gb2312(long int unicode, int *r, int *c);
114111 long int jisx0208_to_unicode(int r, int c);
115 int unicode_to_jisx0208(long int unicode, int *r, int *c);
112 bool unicode_to_jisx0208(long int unicode, int *r, int *c);
116113 long int jisx0212_to_unicode(int r, int c);
117 int unicode_to_jisx0212(long int unicode, int *r, int *c);
114 bool unicode_to_jisx0212(long int unicode, int *r, int *c);
118115
119116 /*
120117 * Placate compiler warning about unused parameters, of which we
122119 */
123120 #define UNUSEDARG(x) ( (x) = (x) )
124121
122 #ifdef __GNUC__
123 #define DELIBERATE_FALLTHROUGH __attribute__ ((fallthrough));
124 #else
125 #define DELIBERATE_FALLTHROUGH ((void)0)
126 #endif
127
125128 #endif /* charset_internal_h */
3636
3737 static long int emacs_big5_1_to_unicode(int, int);
3838 static long int emacs_big5_2_to_unicode(int, int);
39 static int unicode_to_emacs_big5(long int, int *, int *, int *);
39 static bool unicode_to_emacs_big5(long int, int *, int *, int *);
4040 static long int cns11643_1_to_unicode(int, int);
4141 static long int cns11643_2_to_unicode(int, int);
4242 static long int cns11643_3_to_unicode(int, int);
4545 static long int cns11643_6_to_unicode(int, int);
4646 static long int cns11643_7_to_unicode(int, int);
4747 static long int null_dbcs_to_unicode(int, int);
48 static int unicode_to_null_dbcs(long int, int *, int *);
49
50 typedef int (*to_dbcs_t)(long int, int *, int *);
51 typedef int (*to_dbcs_planar_t)(long int, int *, int *, int *);
48 static bool unicode_to_null_dbcs(long int, int *, int *);
49
50 typedef bool (*to_dbcs_t)(long int, int *, int *);
51 typedef bool (*to_dbcs_planar_t)(long int, int *, int *, int *);
5252
5353 /*
5454 * These macros cast between to_dbcs_planar_t and to_dbcs_t, in
8080 *
8181 * We are permitted to use ?:, however, and that works quite well
8282 * since the actual result of the sizeof expression _is_ evaluable
83 * at compile time. So here's my final answer:
83 * at compile time. So here's my final answer.
84 *
85 * (The double cast of each function pointer from its original type
86 * through void (*)(void) to the final type is there to suppress the
87 * warning that later versions of gcc will otherwise give about
88 * casting between different function pointer types. Apparently gcc
89 * accepts void (*)(void) as the canonical type you use when
90 * _deliberately_ doing that, so going via that deals with the
91 * warning.)
8492 */
8593 #define TYPECHECK(x,y) ( sizeof((x)) == sizeof((x)) ? (y) : (y) )
86 #define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x))
87 #define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, (to_dbcs_planar_t)(x))
94 #define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, \
95 (to_dbcs_t)(void (*)(void))(x))
96 #define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, \
97 (to_dbcs_planar_t)(void (*)(void))(x))
8898
8999 /*
90100 * Values used in the `enable' field. Each of these identifies a
204214 UNUSEDARG(c);
205215 return ERROR;
206216 }
207 static int unicode_to_null_dbcs(long int unicode, int *r, int *c)
217 static bool unicode_to_null_dbcs(long int unicode, int *r, int *c)
208218 {
209219 UNUSEDARG(unicode);
210220 UNUSEDARG(r);
211221 UNUSEDARG(c);
212 return 0; /* failed to convert anything */
222 return false; /* failed to convert anything */
213223 }
214224
215225 /*
239249 return big5_to_unicode(r, c);
240250 }
241251
242 static int unicode_to_emacs_big5(long int unicode, int *p, int *r, int *c)
252 static bool unicode_to_emacs_big5(long int unicode, int *p, int *r, int *c)
243253 {
244254 int rr, cc, s;
245255 if (!unicode_to_big5(unicode, &rr, &cc))
246 return 0;
256 return false;
247257 if (cc >= 64) {
248258 cc -= 34;
249259 assert(cc >= 64);
257267 }
258268 *r = s / 94;
259269 *c = s % 94;
260 return 1;
270 return true;
261271 }
262272
263273 /* Wrappers for cns11643_to_unicode() */
594604 break;
595605 }
596606 } else if ((input_chr & 0x80) || MODE < ESCSEQ) {
597 int is_gl = 0;
607 bool is_gl = false;
598608 struct iso2022_subcharset const *subcs;
599609 unsigned container;
600610 long input_7bit;
613623 container = (state->s1 >> 28) & 3;
614624 else { /* GL */
615625 container = state->s1 >> 30;
616 is_gl = 1;
626 is_gl = true;
617627 }
618628 input_7bit = input_chr & ~0x80;
619629 subcs = &iso2022_subcharsets[(state->s1 >> (container * 7)) & 0x7f];
763773 switch (i2) {
764774 case 0: /* Obsolete version of GZDM4 */
765775 i2 = '(';
776 DELIBERATE_FALLTHROUGH;
766777 case '(': /* GZDM4 */ case ')': /* G1DM4 */
767778 case '*': /* G2DM4 */ case '+': /* G3DM4 */
768779 designate(state, i2 - '(', M4, 0, input_chr);
775786 emit(emitctx, ERROR);
776787 break;
777788 }
789 break;
778790 case '%': /* DOCS */
779791 /* XXX What's a reasonable way to handle an unrecognised DOCS? */
780792 switch (i2) {
805817 }
806818 }
807819
808 static void oselect(charset_state *state, int i, int right,
820 static void oselect(charset_state *state, int i, bool right,
809821 void (*emit)(void *ctx, long int output),
810822 void *emitctx)
811823 {
982994 * exact output policy for compound text wants thinking about more
983995 * carefully.
984996 */
985 static int write_iso2022(charset_spec const *charset, long int input_chr,
986 charset_state *state,
987 void (*emit)(void *ctx, long int output),
988 void *emitctx)
997 static bool write_iso2022(charset_spec const *charset, long int input_chr,
998 charset_state *state,
999 void (*emit)(void *ctx, long int output),
1000 void *emitctx)
9891001 {
9901002 int i;
9911003 struct iso2022_subcharset const *subcs;
10161028 if (subcs->type == mode->ltype &&
10171029 subcs->i == mode->li &&
10181030 subcs->f == mode->lf)
1019 oselect(state, i, FALSE, NULL, NULL);
1031 oselect(state, i, false, NULL, NULL);
10201032 if (subcs->type == mode->rtype &&
10211033 subcs->i == mode->ri &&
10221034 subcs->f == mode->rf)
1023 oselect(state, i, TRUE, NULL, NULL);
1035 oselect(state, i, true, NULL, NULL);
10241036 }
10251037 }
10261038
10351047 if (subcs->type == mode->ltype &&
10361048 subcs->i == mode->li &&
10371049 subcs->f == mode->lf)
1038 oselect(state, i, FALSE, emit, emitctx);
1050 oselect(state, i, false, emit, emitctx);
10391051 if (subcs->type == mode->rtype &&
10401052 subcs->i == mode->ri &&
10411053 subcs->f == mode->rf)
1042 oselect(state, i, TRUE, emit, emitctx);
1043 }
1044 return TRUE;
1054 oselect(state, i, true, emit, emitctx);
1055 }
1056 return true;
10451057 }
10461058
10471059 /*
10501062 */
10511063 if (input_chr <= 0x20 || (input_chr >= 0x7F && input_chr < 0xA0)) {
10521064 emit(emitctx, input_chr);
1053 return TRUE;
1065 return true;
10541066 }
10551067
10561068 /*
11021114 }
11031115
11041116 if ((unsigned)i < lenof(iso2022_subcharsets)) {
1105 int right;
1117 bool right;
11061118
11071119 /*
11081120 * Our character is represented by c1 (and possibly also
11521164 }
11531165 }
11541166
1155 return TRUE;
1167 return true;
11561168 }
11571169
11581170 /*
11671179
11681180 for (i = 0; (unsigned)i <= lenof(ctext_encodings); i++) {
11691181 charset_state substate;
1170 charset_spec const *subcs = ctext_encodings[i].subcs;
11711182
11721183 /*
11731184 * We assume that all character sets dealt with by DOCS
11771188 p = data;
11781189
11791190 if ((unsigned)i < lenof(ctext_encodings)) {
1191 charset_spec const *subcs = ctext_encodings[i].subcs;
11801192 if ((mode->enable_mask & (1 << ctext_encodings[i].enable)) &&
11811193 subcs->write(subcs, input_chr, &substate,
11821194 write_to_pointer, &p)) {
11941206
11951207 if (cs != -2) {
11961208 docs_char(state, emit, emitctx, cs, data, p - data);
1197 return TRUE;
1198 }
1199 }
1200
1201 return FALSE;
1209 return true;
1210 }
1211 }
1212
1213 return false;
12021214 }
12031215
12041216 /*
7777 /*
7878 * Is this an 8-bit ISO 2022 subset?
7979 */
80 int eightbit;
80 bool eightbit;
8181
8282 /*
8383 * Function calls to do the actual translation.
8484 */
8585 long int (*to_ucs)(int subcharset, unsigned long bytes);
86 int (*from_ucs)(long int ucs, int *subcharset, unsigned long *bytes);
86 bool (*from_ucs)(long int ucs, int *subcharset, unsigned long *bytes);
8787 };
8888
8989 static void read_iso2022s(charset_spec const *charset, long int input_chr,
325325 }
326326 }
327327
328 static int write_iso2022s(charset_spec const *charset, long int input_chr,
329 charset_state *state,
330 void (*emit)(void *ctx, long int output),
331 void *emitctx)
328 static bool write_iso2022s(charset_spec const *charset, long int input_chr,
329 charset_state *state,
330 void (*emit)(void *ctx, long int output),
331 void *emitctx)
332332 {
333333 struct iso2022 const *iso = (struct iso2022 *)charset->data;
334334 int subcharset, len, i, j, cont, topbit = 0;
346346 * to go in.
347347 */
348348 if (input_chr >= 0 && !iso->from_ucs(input_chr, &subcharset, &bytes))
349 return FALSE;
349 return false;
350350
351351 if (!(state->s1 & 0x80000000)) {
352352 state->s1 = iso->s1;
374374 }
375375 }
376376
377 return TRUE;
377 return true;
378378 }
379379
380380 /*
436436 while (len--)
437437 emit(emitctx, ((bytes >> (8*len)) & 0xFF) | topbit);
438438
439 return TRUE;
439 return true;
440440 }
441441
442442 /*
450450 return 0xA5;
451451 else if (bytes == 0x7E)
452452 return 0x203E;
453 /* else fall through to ASCII */
453 DELIBERATE_FALLTHROUGH; /* else fall through to ASCII */
454454 case 0: return bytes; /* one-byte ASCII */
455455 /* (no break needed since all control paths have returned) */
456456 case 2: return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
458458 default: return ERROR;
459459 }
460460 }
461 static int iso2022jp_from_ucs(long int ucs, int *subcharset,
461 static bool iso2022jp_from_ucs(long int ucs, int *subcharset,
462462 unsigned long *bytes)
463463 {
464464 int r, c;
465465 if (ucs < 0x80) {
466466 *subcharset = 0;
467467 *bytes = ucs;
468 return 1;
468 return true;
469469 } else if (ucs == 0xA5 || ucs == 0x203E) {
470470 *subcharset = 1;
471471 *bytes = (ucs == 0xA5 ? 0x5C : 0x7E);
472 return 1;
472 return true;
473473 } else if (unicode_to_jisx0208(ucs, &r, &c)) {
474474 *subcharset = 2;
475475 *bytes = ((r+0x21) << 8) | (c+0x21);
476 return 1;
476 return true;
477477 } else {
478 return 0;
478 return false;
479479 }
480480 }
481481 static const struct iso2022_escape iso2022jp_escapes[] = {
486486 };
487487 static const struct iso2022 iso2022jp = {
488488 iso2022jp_escapes, lenof(iso2022jp_escapes),
489 "\1\1\2", "\3", 0x80000000, NULL, FALSE,
489 "\1\1\2", "\3", 0x80000000, NULL, false,
490490 iso2022jp_to_ucs, iso2022jp_from_ucs
491491 };
492492 const charset_spec charset_CS_ISO2022_JP = {
505505 default: return ERROR;
506506 }
507507 }
508 static int iso2022kr_from_ucs(long int ucs, int *subcharset,
509 unsigned long *bytes)
508 static bool iso2022kr_from_ucs(long int ucs, int *subcharset,
509 unsigned long *bytes)
510510 {
511511 int r, c;
512512 if (ucs < 0x80) {
513513 *subcharset = 0;
514514 *bytes = ucs;
515 return 1;
515 return true;
516516 } else if (unicode_to_ksx1001(ucs, &r, &c)) {
517517 *subcharset = 1;
518518 *bytes = ((r+0x21) << 8) | (c+0x21);
519 return 1;
519 return true;
520520 } else {
521 return 0;
521 return false;
522522 }
523523 }
524524 static const struct iso2022_escape iso2022kr_escapes[] = {
528528 };
529529 static const struct iso2022 iso2022kr = {
530530 iso2022kr_escapes, lenof(iso2022kr_escapes),
531 "\1\2", "\2", 0x80000040, "\033$)C", FALSE,
531 "\1\2", "\2", 0x80000040, "\033$)C", false,
532532 iso2022kr_to_ucs, iso2022kr_from_ucs
533533 };
534534 const charset_spec charset_CS_ISO2022_KR = {
0 /*
1 * iso6937.c - the _almost_ single-byte character set ISO/IEC 6937.
2 *
3 * Also, a tiny variation on it which adds the Euro sign at the
4 * previously unused position 0xA4, used in DVB metadata.
5 */
6
7 #ifndef ENUM_CHARSETS
8
9 #include "charset.h"
10 #include "internal.h"
11
12 /*
13 * ISO/IEC 6937 is a _mostly_ single-byte character sets, except that
14 * the 0xC0-0xCF range of bytes are introducer characters for two-byte
15 * encodings of accented letters.
16 *
17 * You'd be forgiven for mistaking the bytes in the C0-CF range for
18 * something more like combining characters, because the two-byte
19 * encodings are organised in a very semantic way: each introducer
20 * character corresponds to a specific diacritic mark, in the sense
21 * that all the two-byte encodings beginning with that introducer byte
22 * have an ASCII alphabetic character as their second byte and encode
23 * that letter with the given diacritic.
24 *
25 * But it would be a mistake to consider this to have anything to do
26 * with the Unicode combining characters for those diacritics, because
27 * (a) the ISO 6937 diacritic bytes are _prefixes_, not combining
28 * characters applied afterwards; (b) ISO 6937 specifies an exact list
29 * of the permissible second bytes after each introducer; (c) the
30 * right translation of one of these two-byte encodings is the single
31 * Unicode code point for the accented letter, and not a separate pair
32 * of (letter, combining character) code points.
33 *
34 * So this is better viewed as simply a multibyte _encoding_, just
35 * with an unusually mnemonic organisation.
36 *
37 * Implementation strategy: the single-byte encodings for this charset
38 * (or rather, this pair of very similar charsets) are handled by a
39 * pair of mapping tables in sbcs.dat, only declared with the 'tables'
40 * rather than 'charset' keyword so that sbcsgen.pl doesn't generate
41 * the top-level charset_spec. So the read and write functions below
42 * can call sbcs_to_unicode and sbcs_from_unicode on those tables just
43 * like the ones in sbcs.c.
44 *
45 * The two-byte pairs are dealt with using the pair of mapping tables
46 * below. These are generated by Perl from a minimal amount of
47 * starting data that just gives each prefix character along with the
48 * corresponding Unicode combining character and the list of letters
49 * it's allowed to apply to; the Perl script runs over UnicodeData.txt
50 * to achieve the translation of (letter, combining character) pairs
51 * to precombined code points.
52 */
53
54 /*
55
56 perl -e '
57 while (<<>>) {
58 chomp; @_ = split /;/,$_; @d = split / /,$_[5];
59 if (2 == @d) {
60 ($p, $s, $c) = (hex $d[0], hex $d[1], hex $_[0]);
61 $combine{$p,$s} = $c if $p && $s && $c;
62 }
63 }
64 @forward = (" ERROR,") x 0x400;
65 for $t ( [0xC1, 0x300, "AEIOUaeiou" ],
66 [0xC2, 0x301, "ACEILNORSUYZacegilnorsuyz" ],
67 [0xC3, 0x302, "ACEGHIJOSUWYaceghijosuwy" ],
68 [0xC4, 0x303, "AINOUainou" ],
69 [0xC5, 0x304, "AEIOUaeiou" ],
70 [0xC6, 0x306, "AGUagu" ],
71 [0xC7, 0x307, "CEGIZcegz" ],
72 [0xC8, 0x308, "AEIOUYaeiouy" ],
73 [0xCA, 0x30A, "AUau" ],
74 [0xCB, 0x327, "CGKLNRSTcklnrst" ],
75 [0xCD, 0x30B, "OUou" ],
76 [0xCE, 0x328, "AEIUaeiu" ],
77 [0xCF, 0x30C, "CDELNRSTZcdelnrstz" ] ) {
78 ($prefix, $cc, $letters) = @$t;
79 for $letter (unpack "C*", $letters) {
80 $cp = $combine{$letter,$cc};
81 $offset = ($prefix - 0xC0) * 0x40 + ($letter - 0x40);
82 $forward[$offset] = sprintf " 0x%04x,", $cp;
83 push @backward, [$cp, (sprintf " %d,", $offset)];
84 }
85 }
86 @backward = map { $_->[1] } sort {$a->[0] <=> $b->[0]} @backward;
87 print "static const unsigned short iso6937_2byte_forward[0x400] = {\n";
88 $line = " ";
89 for $e (@forward, "sentinel" x 100) {
90 if (length($line.$e) > 77) { print "$line\n"; $line = " "; }
91 $line .= $e;
92 }
93 print "};\n\n";
94 $line = " ";
95 print "static const unsigned short iso6937_2byte_backward[] = {\n";
96 for $e (@backward, "sentinel" x 100) {
97 if (length($line.$e) > 77) { print "$line\n"; $line = " "; }
98 $line .= $e;
99 }
100 print "};\n\n";
101 ' UnicodeData.txt
102
103 */
104
105 static const unsigned short iso6937_2byte_forward[0x400] = {
106 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
107 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
108 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
109 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
110 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
111 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
112 ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c0, ERROR, ERROR, ERROR, 0x00c8,
113 ERROR, ERROR, ERROR, 0x00cc, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00d2,
114 ERROR, ERROR, ERROR, ERROR, ERROR, 0x00d9, ERROR, ERROR, ERROR, ERROR,
115 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e0, ERROR, ERROR,
116 ERROR, 0x00e8, ERROR, ERROR, ERROR, 0x00ec, ERROR, ERROR, ERROR, ERROR,
117 ERROR, 0x00f2, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00f9, ERROR, ERROR,
118 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c1,
119 ERROR, 0x0106, ERROR, 0x00c9, ERROR, ERROR, ERROR, 0x00cd, ERROR, ERROR,
120 0x0139, ERROR, 0x0143, 0x00d3, ERROR, ERROR, 0x0154, 0x015a, ERROR,
121 0x00da, ERROR, ERROR, ERROR, 0x00dd, 0x0179, ERROR, ERROR, ERROR, ERROR,
122 ERROR, ERROR, 0x00e1, ERROR, 0x0107, ERROR, 0x00e9, ERROR, 0x01f5, ERROR,
123 0x00ed, ERROR, ERROR, 0x013a, ERROR, 0x0144, 0x00f3, ERROR, ERROR,
124 0x0155, 0x015b, ERROR, 0x00fa, ERROR, ERROR, ERROR, 0x00fd, 0x017a,
125 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c2, ERROR, 0x0108, ERROR,
126 0x00ca, ERROR, 0x011c, 0x0124, 0x00ce, 0x0134, ERROR, ERROR, ERROR,
127 ERROR, 0x00d4, ERROR, ERROR, ERROR, 0x015c, ERROR, 0x00db, ERROR, 0x0174,
128 ERROR, 0x0176, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e2,
129 ERROR, 0x0109, ERROR, 0x00ea, ERROR, 0x011d, 0x0125, 0x00ee, 0x0135,
130 ERROR, ERROR, ERROR, ERROR, 0x00f4, ERROR, ERROR, ERROR, 0x015d, ERROR,
131 0x00fb, ERROR, 0x0175, ERROR, 0x0177, ERROR, ERROR, ERROR, ERROR, ERROR,
132 ERROR, ERROR, 0x00c3, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
133 0x0128, ERROR, ERROR, ERROR, ERROR, 0x00d1, 0x00d5, ERROR, ERROR, ERROR,
134 ERROR, ERROR, 0x0168, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
135 ERROR, ERROR, ERROR, ERROR, 0x00e3, ERROR, ERROR, ERROR, ERROR, ERROR,
136 ERROR, ERROR, 0x0129, ERROR, ERROR, ERROR, ERROR, 0x00f1, 0x00f5, ERROR,
137 ERROR, ERROR, ERROR, ERROR, 0x0169, ERROR, ERROR, ERROR, ERROR, ERROR,
138 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0100, ERROR, ERROR, ERROR,
139 0x0112, ERROR, ERROR, ERROR, 0x012a, ERROR, ERROR, ERROR, ERROR, ERROR,
140 0x014c, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016a, ERROR, ERROR, ERROR,
141 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0101, ERROR,
142 ERROR, ERROR, 0x0113, ERROR, ERROR, ERROR, 0x012b, ERROR, ERROR, ERROR,
143 ERROR, ERROR, 0x014d, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016b, ERROR,
144 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
145 0x0102, ERROR, ERROR, ERROR, ERROR, ERROR, 0x011e, ERROR, ERROR, ERROR,
146 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
147 0x016c, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
148 ERROR, ERROR, 0x0103, ERROR, ERROR, ERROR, ERROR, ERROR, 0x011f, ERROR,
149 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
150 ERROR, ERROR, 0x016d, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
151 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010a, ERROR, 0x0116, ERROR,
152 0x0120, ERROR, 0x0130, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
153 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x017b,
154 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010b, ERROR,
155 0x0117, ERROR, 0x0121, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
156 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
157 ERROR, 0x017c, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c4, ERROR,
158 ERROR, ERROR, 0x00cb, ERROR, ERROR, ERROR, 0x00cf, ERROR, ERROR, ERROR,
159 ERROR, ERROR, 0x00d6, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00dc, ERROR,
160 ERROR, ERROR, 0x0178, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
161 0x00e4, ERROR, ERROR, ERROR, 0x00eb, ERROR, ERROR, ERROR, 0x00ef, ERROR,
162 ERROR, ERROR, ERROR, ERROR, 0x00f6, ERROR, ERROR, ERROR, ERROR, ERROR,
163 0x00fc, ERROR, ERROR, ERROR, 0x00ff, ERROR, ERROR, ERROR, ERROR, ERROR,
164 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
165 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
166 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
167 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
168 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
169 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
170 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c5, ERROR, ERROR, ERROR,
171 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
172 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016e, ERROR, ERROR, ERROR,
173 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e5, ERROR,
174 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
175 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016f, ERROR,
176 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
177 ERROR, ERROR, 0x00c7, ERROR, ERROR, ERROR, 0x0122, ERROR, ERROR, ERROR,
178 0x0136, 0x013b, ERROR, 0x0145, ERROR, ERROR, ERROR, 0x0156, 0x015e,
179 0x0162, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
180 ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e7, ERROR, ERROR, ERROR, ERROR,
181 ERROR, ERROR, ERROR, 0x0137, 0x013c, ERROR, 0x0146, ERROR, ERROR, ERROR,
182 0x0157, 0x015f, 0x0163, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
183 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
184 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
185 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
186 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
187 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
188 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
189 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
190 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
191 ERROR, ERROR, ERROR, 0x0150, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0170,
192 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
193 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
194 ERROR, ERROR, ERROR, ERROR, ERROR, 0x0151, ERROR, ERROR, ERROR, ERROR,
195 ERROR, 0x0171, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
196 ERROR, ERROR, ERROR, 0x0104, ERROR, ERROR, ERROR, 0x0118, ERROR, ERROR,
197 ERROR, 0x012e, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
198 ERROR, ERROR, ERROR, 0x0172, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
199 ERROR, ERROR, ERROR, ERROR, ERROR, 0x0105, ERROR, ERROR, ERROR, 0x0119,
200 ERROR, ERROR, ERROR, 0x012f, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
201 ERROR, ERROR, ERROR, ERROR, ERROR, 0x0173, ERROR, ERROR, ERROR, ERROR,
202 ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010c,
203 0x010e, 0x011a, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x013d, ERROR,
204 0x0147, ERROR, ERROR, ERROR, 0x0158, 0x0160, 0x0164, ERROR, ERROR, ERROR,
205 ERROR, ERROR, 0x017d, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
206 ERROR, 0x010d, 0x010f, 0x011b, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR,
207 0x013e, ERROR, 0x0148, ERROR, ERROR, ERROR, 0x0159, 0x0161, 0x0165,
208 ERROR, ERROR, ERROR, ERROR, ERROR, 0x017e, ERROR, ERROR, ERROR, ERROR,
209 ERROR,
210 };
211
212 static const unsigned short iso6937_2byte_backward[] = {
213 65, 129, 193, 257, 513, 641, 707, 69, 133, 197, 517, 73, 137, 201, 521,
214 270, 79, 143, 207, 271, 527, 85, 149, 213, 533, 153, 97, 161, 225, 289,
215 545, 673, 739, 101, 165, 229, 549, 105, 169, 233, 553, 302, 111, 175,
216 239, 303, 559, 117, 181, 245, 565, 185, 569, 321, 353, 385, 417, 897,
217 929, 131, 163, 195, 227, 451, 483, 963, 995, 964, 996, 325, 357, 453,
218 485, 901, 933, 965, 997, 199, 231, 391, 423, 455, 487, 711, 200, 232,
219 265, 297, 329, 361, 905, 937, 457, 202, 234, 715, 747, 140, 172, 716,
220 748, 972, 1004, 142, 174, 718, 750, 974, 1006, 335, 367, 847, 879, 146,
221 178, 722, 754, 978, 1010, 147, 179, 211, 243, 723, 755, 979, 1011, 724,
222 756, 980, 1012, 277, 309, 341, 373, 405, 437, 661, 693, 853, 885, 917,
223 949, 215, 247, 217, 249, 537, 154, 186, 474, 506, 986, 1018, 167,
224 };
225
226 /* This returns ERROR if the code point doesn't exist. */
227 static long int iso6937_2byte_to_unicode(int prefix, int letter)
228 {
229 if (!(prefix >= 0xC0 && prefix < 0xD0 && letter >= 0x40 && letter < 0x80))
230 return ERROR;
231 return iso6937_2byte_forward[(prefix - 0xC0) * 0x40 + (letter - 0x40)];
232 }
233
234 /* This returns true if it filled in the output values */
235 static bool iso6937_2byte_from_unicode(long int cp, int *prefix, int *letter)
236 {
237 int lo = -1, hi = lenof(iso6937_2byte_backward);
238
239 while (hi - lo >= 2) {
240 int mid = (hi + lo) / 2;
241 int midpos = iso6937_2byte_backward[mid];
242 long int midcp = iso6937_2byte_forward[midpos];
243 if (cp == midcp) {
244 *prefix = 0xC0 + (midpos >> 6);
245 *letter = 0x40 + (midpos & 0x3F);
246 return true;
247 } else if (cp < midcp) {
248 hi = mid;
249 } else {
250 lo = mid;
251 }
252 }
253 return false;
254 }
255
256 void read_iso6937(charset_spec const *charset, long int input_chr,
257 charset_state *state,
258 void (*emit)(void *ctx, long int output), void *emitctx)
259 {
260 const sbcs_data *sd = charset->data;
261
262 if (input_chr >= 0xC0 && input_chr < 0xD0) {
263 /*
264 * Input bytes in the C0-DF region of this encoding are
265 * 'combining characters', but not in the Unicode sense of
266 * mapping to separate Unicode code points. Instead, they're
267 * prefixes which modify a specific set of subsequent printing
268 * characters. Stash such a byte in the conversion state to
269 * use in the next call.
270 */
271 if (state->s0) {
272 emit(emitctx, ERROR); /* the previous prefix was erroneous */
273 }
274 state->s0 = input_chr;
275 } else {
276 if (state->s0) {
277 long int output = iso6937_2byte_to_unicode(state->s0, input_chr);
278 emit(emitctx, output);
279 state->s0 = 0;
280
281 /*
282 * If we've successfully emitted a character, we're done.
283 * Otherwise, we'll take the view that the ERROR we've
284 * emitted corresponded to _just_ the misplaced prefix
285 * byte, so we'll fall through to the emit() below which
286 * will output the unmodified followup byte too.
287 */
288 if (output != ERROR)
289 return;
290 }
291
292 emit(emitctx, sbcs_to_unicode(sd, input_chr));
293 }
294 }
295
296 bool write_iso6937(charset_spec const *charset, long int input_chr,
297 charset_state *state,
298 void (*emit)(void *ctx, long int output), void *emitctx)
299 {
300 const struct sbcs_data *sd = charset->data;
301 long int ret;
302 int prefix, letter;
303
304 UNUSEDARG(state);
305
306 if (input_chr == -1)
307 return true; /* stateless; no cleanup required */
308
309 if ((ret = sbcs_from_unicode(sd, input_chr)) != ERROR) {
310 emit(emitctx, ret);
311 return true;
312 } else if (iso6937_2byte_from_unicode(input_chr, &prefix, &letter)) {
313 emit(emitctx, prefix);
314 emit(emitctx, letter);
315 return true;
316 } else {
317 return false;
318 }
319 }
320
321 extern const sbcs_data sbcsdata_ISO6937, sbcsdata_ISO6937_EURO;
322
323 const charset_spec charset_CS_ISO6937 = {
324 CS_ISO6937, read_iso6937, write_iso6937, &sbcsdata_ISO6937
325 };
326 const charset_spec charset_CS_ISO6937_EURO = {
327 CS_ISO6937_EURO, read_iso6937, write_iso6937, &sbcsdata_ISO6937_EURO
328 };
329
330 #else /* ENUM_CHARSETS */
331
332 ENUM_CHARSET(CS_ISO6937)
333 ENUM_CHARSET(CS_ISO6937_EURO)
334
335 #endif /* ENUM_CHARSETS */
19511951 return jisx0208_forward[r][c];
19521952 }
19531953
1954 /* This one returns 1 on success, 0 if the code point doesn't exist. */
1955 int unicode_to_jisx0208(long int unicode, int *r, int *c)
1954 /* This one returns true on success, false if the code point doesn't exist. */
1955 bool unicode_to_jisx0208(long int unicode, int *r, int *c)
19561956 {
19571957 int rr, cc;
19581958 long int uu;
19721972 else {
19731973 *r = rr;
19741974 *c = cc;
1975 return 1;
1975 return true;
19761976 }
19771977 }
1978 return 0;
1978 return false;
19791979 }
19801980
19811981 #ifdef TESTMODE
18491849 return jisx0212_forward[r][c];
18501850 }
18511851
1852 /* This one returns 1 on success, 0 if the code point doesn't exist. */
1853 int unicode_to_jisx0212(long int unicode, int *r, int *c)
1852 /* This one returns true on success, false if the code point doesn't exist. */
1853 bool unicode_to_jisx0212(long int unicode, int *r, int *c)
18541854 {
18551855 int rr, cc;
18561856 long int uu;
18701870 else {
18711871 *r = rr;
18721872 *c = cc;
1873 return 1;
1873 return true;
18741874 }
18751875 }
1876 return 0;
1876 return false;
18771877 }
18781878
18791879 #ifdef TESTMODE
51685168 return cp949_forward[r][c];
51695169 }
51705170
5171 /* This one returns 1 on success, 0 if the code point doesn't exist. */
5172 int unicode_to_cp949(long int unicode, int *r, int *c)
5171 /* This one returns true on success, false if the code point doesn't exist. */
5172 bool unicode_to_cp949(long int unicode, int *r, int *c)
51735173 {
51745174 int rr, cc;
51755175 long int uu;
51895189 else {
51905190 *r = rr;
51915191 *c = cc;
5192 return 1;
5192 return true;
51935193 }
51945194 }
5195 return 0;
5195 return false;
51965196 }
51975197
51985198 /* Functions dealing with the KS X 1001 square subset */
52035203 return cp949_forward[r+0x21][c+0x61];
52045204 }
52055205
5206 /* This one returns 1 on success, 0 if the code point doesn't exist. */
5207 int unicode_to_ksx1001(long int unicode, int *r, int *c)
5206 /* This one returns true on success, false if the code point doesn't exist. */
5207 bool unicode_to_ksx1001(long int unicode, int *r, int *c)
52085208 {
52095209 int rr, cc;
52105210 if (!unicode_to_cp949(unicode, &rr, &cc))
5211 return 0;
5211 return false;
52125212 rr -= 0x21;
52135213 cc -= 0x61;
52145214 if (rr < 0 || rr >= 94 || cc < 0 || cc >= 94)
5215 return 0;
5215 return false;
52165216 *r = rr;
52175217 *c = cc;
5218 return 1;
5218 return true;
52195219 }
52205220
52215221 #ifdef TESTMODE
2323 static const struct {
2424 const char *name;
2525 int charset;
26 int return_in_enum; /* enumeration misses some charsets */
26 bool return_in_enum; /* enumeration misses some charsets */
2727 } localencs[] = {
28 { "<UNKNOWN>", CS_NONE, 0 },
29 { "ASCII", CS_ASCII, 1 },
30 { "BS 4730", CS_BS4730, 1 },
31 { "BS-4730", CS_BS4730, 0 },
32 { "BS4730", CS_BS4730, 0 },
33 { "ISO-8859-1", CS_ISO8859_1, 1 },
34 { "ISO-8859-1 with X11 line drawing", CS_ISO8859_1_X11, 0 },
35 { "ISO-8859-1-X11", CS_ISO8859_1_X11, 0 },
36 { "ISO8859-1-X11", CS_ISO8859_1_X11, 0 },
37 { "ISO-8859-2", CS_ISO8859_2, 1 },
38 { "ISO-8859-3", CS_ISO8859_3, 1 },
39 { "ISO-8859-4", CS_ISO8859_4, 1 },
40 { "ISO-8859-5", CS_ISO8859_5, 1 },
41 { "ISO-8859-6", CS_ISO8859_6, 1 },
42 { "ISO-8859-7", CS_ISO8859_7, 1 },
43 { "ISO-8859-8", CS_ISO8859_8, 1 },
44 { "ISO-8859-9", CS_ISO8859_9, 1 },
45 { "ISO-8859-10", CS_ISO8859_10, 1 },
46 { "ISO-8859-11", CS_ISO8859_11, 1 },
47 { "ISO-8859-13", CS_ISO8859_13, 1 },
48 { "ISO-8859-14", CS_ISO8859_14, 1 },
49 { "ISO-8859-15", CS_ISO8859_15, 1 },
50 { "ISO-8859-16", CS_ISO8859_16, 1 },
51 { "CP437", CS_CP437, 1 },
52 { "CP850", CS_CP850, 1 },
53 { "CP852", CS_CP852, 1 },
54 { "CP866", CS_CP866, 1 },
55 { "CP874", CS_CP874, 1 },
56 { "Win874", CS_CP874, 0 },
57 { "Win-874", CS_CP874, 0 },
58 { "CP1250", CS_CP1250, 1 },
59 { "Win1250", CS_CP1250, 0 },
60 { "CP1251", CS_CP1251, 1 },
61 { "Win1251", CS_CP1251, 0 },
62 { "CP1252", CS_CP1252, 1 },
63 { "Win1252", CS_CP1252, 0 },
64 { "CP1253", CS_CP1253, 1 },
65 { "Win1253", CS_CP1253, 0 },
66 { "CP1254", CS_CP1254, 1 },
67 { "Win1254", CS_CP1254, 0 },
68 { "CP1255", CS_CP1255, 1 },
69 { "Win1255", CS_CP1255, 0 },
70 { "CP1256", CS_CP1256, 1 },
71 { "Win1256", CS_CP1256, 0 },
72 { "CP1257", CS_CP1257, 1 },
73 { "Win1257", CS_CP1257, 0 },
74 { "CP1258", CS_CP1258, 1 },
75 { "Win1258", CS_CP1258, 0 },
76 { "KOI8-R", CS_KOI8_R, 1 },
77 { "KOI8-U", CS_KOI8_U, 1 },
78 { "KOI8-RU", CS_KOI8_RU, 1 },
79 { "JIS X 0201", CS_JISX0201, 1 },
80 { "JIS-X-0201", CS_JISX0201, 0 },
81 { "JIS_X_0201", CS_JISX0201, 0 },
82 { "JISX0201", CS_JISX0201, 0 },
83 { "Mac Roman", CS_MAC_ROMAN, 1 },
84 { "Mac-Roman", CS_MAC_ROMAN, 0 },
85 { "MacRoman", CS_MAC_ROMAN, 0 },
86 { "Mac Turkish", CS_MAC_TURKISH, 1 },
87 { "Mac-Turkish", CS_MAC_TURKISH, 0 },
88 { "MacTurkish", CS_MAC_TURKISH, 0 },
89 { "Mac Croatian", CS_MAC_CROATIAN, 1 },
90 { "Mac-Croatian", CS_MAC_CROATIAN, 0 },
91 { "MacCroatian", CS_MAC_CROATIAN, 0 },
92 { "Mac Iceland", CS_MAC_ICELAND, 1 },
93 { "Mac-Iceland", CS_MAC_ICELAND, 0 },
94 { "MacIceland", CS_MAC_ICELAND, 0 },
95 { "Mac Romanian", CS_MAC_ROMANIAN, 1 },
96 { "Mac-Romanian", CS_MAC_ROMANIAN, 0 },
97 { "MacRomanian", CS_MAC_ROMANIAN, 0 },
98 { "Mac Greek", CS_MAC_GREEK, 1 },
99 { "Mac-Greek", CS_MAC_GREEK, 0 },
100 { "MacGreek", CS_MAC_GREEK, 0 },
101 { "Mac Cyrillic", CS_MAC_CYRILLIC, 1 },
102 { "Mac-Cyrillic", CS_MAC_CYRILLIC, 0 },
103 { "MacCyrillic", CS_MAC_CYRILLIC, 0 },
104 { "Mac Thai", CS_MAC_THAI, 1 },
105 { "Mac-Thai", CS_MAC_THAI, 0 },
106 { "MacThai", CS_MAC_THAI, 0 },
107 { "Mac Centeuro", CS_MAC_CENTEURO, 1 },
108 { "Mac-Centeuro", CS_MAC_CENTEURO, 0 },
109 { "MacCenteuro", CS_MAC_CENTEURO, 0 },
110 { "Mac Symbol", CS_MAC_SYMBOL, 1 },
111 { "Mac-Symbol", CS_MAC_SYMBOL, 0 },
112 { "MacSymbol", CS_MAC_SYMBOL, 0 },
113 { "Mac Dingbats", CS_MAC_DINGBATS, 1 },
114 { "Mac-Dingbats", CS_MAC_DINGBATS, 0 },
115 { "MacDingbats", CS_MAC_DINGBATS, 0 },
116 { "Mac Roman (old)", CS_MAC_ROMAN_OLD, 0 },
117 { "Mac-Roman-old", CS_MAC_ROMAN_OLD, 0 },
118 { "MacRoman-old", CS_MAC_ROMAN_OLD, 0 },
119 { "Mac Croatian (old)", CS_MAC_CROATIAN_OLD, 0 },
120 { "Mac-Croatian-old", CS_MAC_CROATIAN_OLD, 0 },
121 { "MacCroatian-old", CS_MAC_CROATIAN_OLD, 0 },
122 { "Mac Iceland (old)", CS_MAC_ICELAND_OLD, 0 },
123 { "Mac-Iceland-old", CS_MAC_ICELAND_OLD, 0 },
124 { "MacIceland-old", CS_MAC_ICELAND_OLD, 0 },
125 { "Mac Romanian (old)", CS_MAC_ROMANIAN_OLD, 0 },
126 { "Mac-Romanian-old", CS_MAC_ROMANIAN_OLD, 0 },
127 { "MacRomanian-old", CS_MAC_ROMANIAN_OLD, 0 },
128 { "Mac Greek (old)", CS_MAC_GREEK_OLD, 0 },
129 { "Mac-Greek-old", CS_MAC_GREEK_OLD, 0 },
130 { "MacGreek-old", CS_MAC_GREEK_OLD, 0 },
131 { "Mac Cyrillic (old)", CS_MAC_CYRILLIC_OLD, 0 },
132 { "Mac-Cyrillic-old", CS_MAC_CYRILLIC_OLD, 0 },
133 { "MacCyrillic-old", CS_MAC_CYRILLIC_OLD, 0 },
134 { "Mac Ukraine", CS_MAC_UKRAINE, 1 },
135 { "Mac-Ukraine", CS_MAC_UKRAINE, 0 },
136 { "MacUkraine", CS_MAC_UKRAINE, 0 },
137 { "Mac VT100", CS_MAC_VT100, 1 },
138 { "Mac-VT100", CS_MAC_VT100, 0 },
139 { "MacVT100", CS_MAC_VT100, 0 },
140 { "Mac VT100 (old)", CS_MAC_VT100_OLD, 0 },
141 { "Mac-VT100-old", CS_MAC_VT100_OLD, 0 },
142 { "MacVT100-old", CS_MAC_VT100_OLD, 0 },
143 { "Mac Roman (Pirard encoding)", CS_MAC_PIRARD, 0 },
144 { "Mac Pirard", CS_MAC_PIRARD, 0 },
145 { "Mac-Pirard", CS_MAC_PIRARD, 0 },
146 { "MacPirard", CS_MAC_PIRARD, 0 },
147 { "VISCII", CS_VISCII, 1 },
148 { "HP ROMAN8", CS_HP_ROMAN8, 1 },
149 { "HP-ROMAN8", CS_HP_ROMAN8, 0 },
150 { "DEC MCS", CS_DEC_MCS, 1 },
151 { "DEC-MCS", CS_DEC_MCS, 1 },
152 { "DEC graphics", CS_DEC_GRAPHICS, 1 },
153 { "DEC-graphics", CS_DEC_GRAPHICS, 0 },
154 { "DECgraphics", CS_DEC_GRAPHICS, 0 },
155 { "UTF-8", CS_UTF8, 1 },
156 { "UTF-7", CS_UTF7, 1 },
157 { "UTF-7-conservative", CS_UTF7_CONSERVATIVE, 0 },
158 { "EUC-CN", CS_EUC_CN, 1 },
159 { "EUC-KR", CS_EUC_KR, 1 },
160 { "EUC-JP", CS_EUC_JP, 1 },
161 { "EUC-TW", CS_EUC_TW, 1 },
162 { "ISO-2022-JP", CS_ISO2022_JP, 1 },
163 { "ISO-2022-KR", CS_ISO2022_KR, 1 },
164 { "Big5", CS_BIG5, 1 },
165 { "Shift-JIS", CS_SHIFT_JIS, 1 },
166 { "HZ", CS_HZ, 1 },
167 { "UTF-16BE", CS_UTF16BE, 1 },
168 { "UTF-16LE", CS_UTF16LE, 1 },
169 { "UTF-16", CS_UTF16, 1 },
170 { "CP949", CS_CP949, 1 },
171 { "PDFDocEncoding", CS_PDF, 1 },
172 { "StandardEncoding", CS_PSSTD, 1 },
173 { "COMPOUND_TEXT", CS_CTEXT, 1 },
174 { "COMPOUND-TEXT", CS_CTEXT, 0 },
175 { "COMPOUND TEXT", CS_CTEXT, 0 },
176 { "COMPOUNDTEXT", CS_CTEXT, 0 },
177 { "CTEXT", CS_CTEXT, 0 },
178 { "ISO-2022", CS_ISO2022, 1 },
179 { "ISO2022", CS_ISO2022, 0 },
28 { "<UNKNOWN>", CS_NONE, false },
29 { "ASCII", CS_ASCII, true },
30 { "BS 4730", CS_BS4730, true },
31 { "BS-4730", CS_BS4730, false },
32 { "BS4730", CS_BS4730, false },
33 { "ISO-8859-1", CS_ISO8859_1, true },
34 { "ISO-8859-1 with X11 line drawing", CS_ISO8859_1_X11, false },
35 { "ISO-8859-1-X11", CS_ISO8859_1_X11, false },
36 { "ISO8859-1-X11", CS_ISO8859_1_X11, false },
37 { "ISO-8859-2", CS_ISO8859_2, true },
38 { "ISO-8859-3", CS_ISO8859_3, true },
39 { "ISO-8859-4", CS_ISO8859_4, true },
40 { "ISO-8859-5", CS_ISO8859_5, true },
41 { "ISO-8859-6", CS_ISO8859_6, true },
42 { "ISO-8859-7", CS_ISO8859_7, true },
43 { "ISO-8859-8", CS_ISO8859_8, true },
44 { "ISO-8859-9", CS_ISO8859_9, true },
45 { "ISO-8859-10", CS_ISO8859_10, true },
46 { "ISO-8859-11", CS_ISO8859_11, true },
47 { "ISO-8859-13", CS_ISO8859_13, true },
48 { "ISO-8859-14", CS_ISO8859_14, true },
49 { "ISO-8859-15", CS_ISO8859_15, true },
50 { "ISO-8859-16", CS_ISO8859_16, true },
51 { "CP437", CS_CP437, true },
52 { "CP850", CS_CP850, true },
53 { "CP852", CS_CP852, true },
54 { "CP866", CS_CP866, true },
55 { "CP874", CS_CP874, true },
56 { "Win874", CS_CP874, false },
57 { "Win-874", CS_CP874, false },
58 { "CP1250", CS_CP1250, true },
59 { "Win1250", CS_CP1250, false },
60 { "CP1251", CS_CP1251, true },
61 { "Win1251", CS_CP1251, false },
62 { "CP1252", CS_CP1252, true },
63 { "Win1252", CS_CP1252, false },
64 { "CP1253", CS_CP1253, true },
65 { "Win1253", CS_CP1253, false },
66 { "CP1254", CS_CP1254, true },
67 { "Win1254", CS_CP1254, false },
68 { "CP1255", CS_CP1255, true },
69 { "Win1255", CS_CP1255, false },
70 { "CP1256", CS_CP1256, true },
71 { "Win1256", CS_CP1256, false },
72 { "CP1257", CS_CP1257, true },
73 { "Win1257", CS_CP1257, false },
74 { "CP1258", CS_CP1258, true },
75 { "Win1258", CS_CP1258, false },
76 { "KOI8-R", CS_KOI8_R, true },
77 { "KOI8R", CS_KOI8_R, false },
78 { "KOI8-U", CS_KOI8_U, true },
79 { "KOI8U", CS_KOI8_U, false },
80 { "KOI8-RU", CS_KOI8_RU, true },
81 { "KOI8RU", CS_KOI8_RU, false },
82 { "JIS X 0201", CS_JISX0201, true },
83 { "JIS-X-0201", CS_JISX0201, false },
84 { "JIS_X_0201", CS_JISX0201, false },
85 { "JISX0201", CS_JISX0201, false },
86 { "Mac Roman", CS_MAC_ROMAN, true },
87 { "Mac-Roman", CS_MAC_ROMAN, false },
88 { "MacRoman", CS_MAC_ROMAN, false },
89 { "Mac Turkish", CS_MAC_TURKISH, true },
90 { "Mac-Turkish", CS_MAC_TURKISH, false },
91 { "MacTurkish", CS_MAC_TURKISH, false },
92 { "Mac Croatian", CS_MAC_CROATIAN, true },
93 { "Mac-Croatian", CS_MAC_CROATIAN, false },
94 { "MacCroatian", CS_MAC_CROATIAN, false },
95 { "Mac Iceland", CS_MAC_ICELAND, true },
96 { "Mac-Iceland", CS_MAC_ICELAND, false },
97 { "MacIceland", CS_MAC_ICELAND, false },
98 { "Mac Romanian", CS_MAC_ROMANIAN, true },
99 { "Mac-Romanian", CS_MAC_ROMANIAN, false },
100 { "MacRomanian", CS_MAC_ROMANIAN, false },
101 { "Mac Greek", CS_MAC_GREEK, true },
102 { "Mac-Greek", CS_MAC_GREEK, false },
103 { "MacGreek", CS_MAC_GREEK, false },
104 { "Mac Cyrillic", CS_MAC_CYRILLIC, true },
105 { "Mac-Cyrillic", CS_MAC_CYRILLIC, false },
106 { "MacCyrillic", CS_MAC_CYRILLIC, false },
107 { "Mac Thai", CS_MAC_THAI, true },
108 { "Mac-Thai", CS_MAC_THAI, false },
109 { "MacThai", CS_MAC_THAI, false },
110 { "Mac Centeuro", CS_MAC_CENTEURO, true },
111 { "Mac-Centeuro", CS_MAC_CENTEURO, false },
112 { "MacCenteuro", CS_MAC_CENTEURO, false },
113 { "Mac Symbol", CS_MAC_SYMBOL, true },
114 { "Mac-Symbol", CS_MAC_SYMBOL, false },
115 { "MacSymbol", CS_MAC_SYMBOL, false },
116 { "Mac Dingbats", CS_MAC_DINGBATS, true },
117 { "Mac-Dingbats", CS_MAC_DINGBATS, false },
118 { "MacDingbats", CS_MAC_DINGBATS, false },
119 { "Mac Roman (old)", CS_MAC_ROMAN_OLD, false },
120 { "Mac-Roman-old", CS_MAC_ROMAN_OLD, false },
121 { "MacRoman-old", CS_MAC_ROMAN_OLD, false },
122 { "Mac Croatian (old)", CS_MAC_CROATIAN_OLD, false },
123 { "Mac-Croatian-old", CS_MAC_CROATIAN_OLD, false },
124 { "MacCroatian-old", CS_MAC_CROATIAN_OLD, false },
125 { "Mac Iceland (old)", CS_MAC_ICELAND_OLD, false },
126 { "Mac-Iceland-old", CS_MAC_ICELAND_OLD, false },
127 { "MacIceland-old", CS_MAC_ICELAND_OLD, false },
128 { "Mac Romanian (old)", CS_MAC_ROMANIAN_OLD, false },
129 { "Mac-Romanian-old", CS_MAC_ROMANIAN_OLD, false },
130 { "MacRomanian-old", CS_MAC_ROMANIAN_OLD, false },
131 { "Mac Greek (old)", CS_MAC_GREEK_OLD, false },
132 { "Mac-Greek-old", CS_MAC_GREEK_OLD, false },
133 { "MacGreek-old", CS_MAC_GREEK_OLD, false },
134 { "Mac Cyrillic (old)", CS_MAC_CYRILLIC_OLD, false },
135 { "Mac-Cyrillic-old", CS_MAC_CYRILLIC_OLD, false },
136 { "MacCyrillic-old", CS_MAC_CYRILLIC_OLD, false },
137 { "Mac Ukraine", CS_MAC_UKRAINE, true },
138 { "Mac-Ukraine", CS_MAC_UKRAINE, false },
139 { "MacUkraine", CS_MAC_UKRAINE, false },
140 { "Mac VT100", CS_MAC_VT100, true },
141 { "Mac-VT100", CS_MAC_VT100, false },
142 { "MacVT100", CS_MAC_VT100, false },
143 { "Mac VT100 (old)", CS_MAC_VT100_OLD, false },
144 { "Mac-VT100-old", CS_MAC_VT100_OLD, false },
145 { "MacVT100-old", CS_MAC_VT100_OLD, false },
146 { "Mac Roman (Pirard encoding)", CS_MAC_PIRARD, false },
147 { "Mac Pirard", CS_MAC_PIRARD, false },
148 { "Mac-Pirard", CS_MAC_PIRARD, false },
149 { "MacPirard", CS_MAC_PIRARD, false },
150 { "VISCII", CS_VISCII, true },
151 { "HP ROMAN8", CS_HP_ROMAN8, true },
152 { "HP-ROMAN8", CS_HP_ROMAN8, false },
153 { "DEC MCS", CS_DEC_MCS, true },
154 { "DEC-MCS", CS_DEC_MCS, true },
155 { "DEC graphics", CS_DEC_GRAPHICS, true },
156 { "DEC-graphics", CS_DEC_GRAPHICS, false },
157 { "DECgraphics", CS_DEC_GRAPHICS, false },
158 { "UTF-8", CS_UTF8, true },
159 { "UTF8", CS_UTF8, false },
160 { "UTF-7", CS_UTF7, true },
161 { "UTF7", CS_UTF7, false },
162 { "UTF-7-conservative", CS_UTF7_CONSERVATIVE, false },
163 { "EUC-CN", CS_EUC_CN, true },
164 { "EUC-KR", CS_EUC_KR, true },
165 { "EUC-JP", CS_EUC_JP, true },
166 { "EUC-TW", CS_EUC_TW, true },
167 { "ISO-2022-JP", CS_ISO2022_JP, true },
168 { "ISO-2022-KR", CS_ISO2022_KR, true },
169 { "Big5", CS_BIG5, true },
170 { "Shift-JIS", CS_SHIFT_JIS, true },
171 { "HZ", CS_HZ, true },
172 { "UTF-16BE", CS_UTF16BE, true },
173 { "UTF16BE", CS_UTF16BE, false },
174 { "UTF-16LE", CS_UTF16LE, true },
175 { "UTF16LE", CS_UTF16LE, false },
176 { "UTF-16BE-NO-BOM", CS_UTF16BE_NO_BOM, true },
177 { "UTF-16BE-NOBOM", CS_UTF16BE_NO_BOM, false },
178 { "UTF16BENOBOM", CS_UTF16BE_NO_BOM, false },
179 { "UTF-16LE-NO-BOM", CS_UTF16LE_NO_BOM, true },
180 { "UTF-16LE-NOBOM", CS_UTF16LE_NO_BOM, false },
181 { "UTF16LENOBOM", CS_UTF16LE_NO_BOM, false },
182 { "UTF-16", CS_UTF16, true },
183 { "UTF16", CS_UTF16, false },
184 { "CP949", CS_CP949, true },
185 { "PDFDocEncoding", CS_PDF, true },
186 { "StandardEncoding", CS_PSSTD, true },
187 { "COMPOUND_TEXT", CS_CTEXT, true },
188 { "COMPOUND-TEXT", CS_CTEXT, false },
189 { "COMPOUND TEXT", CS_CTEXT, false },
190 { "COMPOUNDTEXT", CS_CTEXT, false },
191 { "CTEXT", CS_CTEXT, false },
192 { "ISO-2022", CS_ISO2022, true },
193 { "ISO2022", CS_ISO2022, false },
194 { "ISO-6937", CS_ISO6937, true },
195 { "ISO6937", CS_ISO6937, false },
196 { "ISO-6937 with euro sign", CS_ISO6937_EURO, true },
197 { "ISO-6937-euro", CS_ISO6937_EURO, false },
198 { "ISO6937-euro", CS_ISO6937_EURO, false },
199 { "ITS", CS_ITS, true },
200 { "SAIL", CS_SAIL, true },
201 { "WAITS", CS_SAIL, false },
180202 };
181203
182204 const char *charset_to_localenc(int charset)
5050 return ERROR;
5151 }
5252
53 int write_sbcs(charset_spec const *charset, long int input_chr,
54 charset_state *state,
55 void (*emit)(void *ctx, long int output), void *emitctx)
53 bool write_sbcs(charset_spec const *charset, long int input_chr,
54 charset_state *state,
55 void (*emit)(void *ctx, long int output), void *emitctx)
5656 {
5757 const struct sbcs_data *sd = charset->data;
5858 long int ret;
6060 UNUSEDARG(state);
6161
6262 if (input_chr == -1)
63 return TRUE; /* stateless; no cleanup required */
63 return true; /* stateless; no cleanup required */
6464
6565 ret = sbcs_from_unicode(sd, input_chr);
6666 if (ret == ERROR)
67 return FALSE;
67 return false;
6868
6969 emit(emitctx, ret);
70 return TRUE;
70 return true;
7171 }
14871487 2014 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
14881488 XXXX 00C6 XXXX 00AA XXXX XXXX XXXX XXXX 0141 00D8 0152 00BA XXXX XXXX XXXX XXXX
14891489 XXXX 00E6 XXXX XXXX XXXX 0131 XXXX XXXX 0142 00F8 0153 00DF XXXX XXXX XXXX XXXX
1490
1491 ISO/IEC 6937. Or rather, this version is extended to add the usual
1492 C0/C1 controls in the non-printing positions below 0xA0, on the
1493 expectation that text encoded in this character set may still need
1494 interleaving with the usual amenities such as newlines and terminal
1495 escapes.
1496
1497 Source: https://en.wikipedia.org/wiki/ISO/IEC_6937 and manual
1498 transcription.
1499
1500 ISO6937_EURO is a tiny modification to ISO/IEC 6937, used in DVB
1501 (digital broadcast TV) in the metadata and EPG streams. Defined by
1502 Annex A of the DVB standards document EN 300 468, it differs from
1503 ordinary ISO6937 only in the addition of the euro sign in the unused
1504 location 0xA4.
1505
1506 Only the translation tables for the single-byte encodings are stored
1507 here. The rest of the implementation of this pair of charsets is in
1508 custom code, and lives in iso6937.c.
1509
1510 tables ISO6937
1511 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
1512 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
1513 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
1514 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
1515 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
1516 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
1517 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
1518 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
1519 0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F
1520 0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F
1521 00A0 00A1 00A2 00A3 XXXX 00A5 XXXX 00A7 00A4 2018 201C 00AB 2190 2191 2192 2193
1522 00B0 00B1 00B2 00B3 00D7 00B5 00B6 00B7 00F7 2019 201D 00BB 00BC 00BD 00BE 00BF
1523 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1524 2015 00B9 00AE 00A9 2122 266A 00AC 00A6 XXXX XXXX XXXX XXXX 215B 215C 215D 215E
1525 2126 00C6 0110 00AA 0126 XXXX 0132 013F 0141 00D8 0152 00BA 00DE 0166 014A 0149
1526 0138 00E6 0111 00F0 0127 0131 0133 0140 0142 00F8 0153 00DF 00FE 0167 014B 00AD
1527
1528 tables ISO6937_EURO
1529 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
1530 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
1531 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
1532 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
1533 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
1534 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
1535 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
1536 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
1537 0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F
1538 0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F
1539 00A0 00A1 00A2 00A3 20AC 00A5 XXXX 00A7 00A4 2018 201C 00AB 2190 2191 2192 2193
1540 00B0 00B1 00B2 00B3 00D7 00B5 00B6 00B7 00F7 2019 201D 00BB 00BC 00BD 00BE 00BF
1541 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1542 2015 00B9 00AE 00A9 2122 266A 00AC 00A6 XXXX XXXX XXXX XXXX 215B 215C 215D 215E
1543 2126 00C6 0110 00AA 0126 XXXX 0132 013F 0141 00D8 0152 00BA 00DE 0166 014A 0149
1544 0138 00E6 0111 00F0 0127 0131 0133 0140 0142 00F8 0153 00DF 00FE 0167 014B 00AD
1545
1546 The ITS character set, standardised in the SUPDUP protocol (RFC
1547 734). Fills in the whole C0 control space and 7F with graphic
1548 characters, on the basis that SUPDUP has its own out-of-band way to
1549 do terminal control.
1550
1551 RFC 734 doesn't give exact Unicode code points for its extra
1552 characters (it couldn't have done so without time travel, of
1553 course!). I've tried to choose the best representation in each case.
1554 In particular, I've chosen position 0x16 to be U+2297 CIRCLED TIMES
1555 rather than U+24E7 CIRCLED LATIN SMALL LETTER X. The RFC says
1556 'circle-X', but since it also has 'circle-plus' at position 0x0D
1557 (which is unambiguously U+2295 CIRCLED PLUS), my feeling is that the
1558 notation in the RFC was not intended to be especially precise, and
1559 the likely uses of the two characters match, i.e. both are intended
1560 to be mathematical rather than literal.
1561
1562 charset CS_ITS
1563 00B7 2193 03B1 03B2 2227 00AC 03B5 03C0 03BB 03B3 03B4 2191 00B1 2295 221E 2202
1564 2282 2283 2229 222A 2200 2203 2297 2194 2190 2192 2260 25CA 2264 2265 2261 2228
1565 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f
1566 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f
1567 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f
1568 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 005e 005f
1569 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f
1570 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 007d 007e 222b
1571 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1572 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1573 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1574 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1575 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1576 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1577 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1578 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1579
1580 The SAIL character set, used in the Stanford AI Lab's WAITS
1581 operating system (a modified version of ITS).
1582
1583 Source: https://www.saildart.org/allow/sail-charset-utf8.html plus
1584 some manual editing. The web page gives translations that supersede
1585 ASCII for 00-1F, 5E, 5F, 7B-7F. The rest I've filled in with their
1586 normal ASCII values, so in particular, CR, LF and tab still act as
1587 expected.
1588
1589 Also, that web page lists code point 0C as "form feed as FF symbol",
1590 but for some reason doesn't give the Unicode value U+240C for that.
1591 I've filled it in.
1592
1593 charset CS_SAIL
1594 0000 2193 03b1 03b2 2227 00ac 03b5 03c0 03bb 0009 000a 000b 240c 000d 221e 2202
1595 2282 2283 2229 222a 2200 2203 2297 2194 005f 2192 007e 2260 2264 2265 2261 2228
1596 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f
1597 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f
1598 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f
1599 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 2191 2190
1600 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f
1601 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 2387 007d 2408
1602 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1603 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1604 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1605 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1606 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1607 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1608 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
1609 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX
6464 while (<INFH>) {
6565 chomp;
6666 y/\r\n//; # robustness in the face of strange line endings
67 if (/^charset (.*)$/) {
68 $charsetname = $1;
67 if (/^(charset|tables) (.*)$/) {
68 $tables_only = ($1 eq "tables");
69 $charsetname = $2;
6970 @vals = ();
7071 @sortpriority = map { 0 } 0..255;
7172 } elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) {
7778 if (scalar @vals > 256) {
7879 die "$infile:$.: charset $charsetname has more than 256 values\n";
7980 } elsif (scalar @vals == 256) {
80 &outcharset($charsetname, \@vals, \@sortpriority)
81 &outcharset($charsetname, \@vals, \@sortpriority, $tables_only)
8182 if defined $outfile;
82 push @charsetnames, $charsetname;
83 push @charsetnames, $charsetname unless $tables_only;
8384 $charsetname = undef;
8485 @vals = ();
8586 @sortpriority = map { 0 } 0..255;
127128 close HEADERFH;
128129 }
129130
130 sub outcharset($$$) {
131 my ($name, $vals, $sortpriority) = @_;
131 sub outcharset($$$$) {
132 my ($name, $vals, $sortpriority, $tables_only) = @_;
132133 my ($prefix, $i, @sorted);
133134
134135 print "const sbcs_data sbcsdata_$name = {\n";
169170 }
170171 printf "\n },\n %d\n", $j;
171172 print "};\n";
172 print "const charset_spec charset_$name = {\n" .
173 " $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
173 unless ($tables_only) {
174 print "const charset_spec charset_$name = {\n" .
175 " $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
176 }
174177 }
7676 * charset_state.
7777 */
7878
79 static int write_sjis(charset_spec const *charset, long int input_chr,
80 charset_state *state,
81 void (*emit)(void *ctx, long int output), void *emitctx)
79 static bool write_sjis(charset_spec const *charset, long int input_chr,
80 charset_state *state,
81 void (*emit)(void *ctx, long int output), void *emitctx)
8282 {
8383 UNUSEDARG(charset);
8484 UNUSEDARG(state);
8585
8686 if (input_chr == -1)
87 return TRUE; /* stateless; no cleanup required */
87 return true; /* stateless; no cleanup required */
8888
8989 if (input_chr < 0x80 && input_chr != 0x5C && input_chr != 0x7E) {
9090 emit(emitctx, input_chr);
91 return TRUE;
91 return true;
9292 } else if (input_chr == 0xA5) {
9393 emit(emitctx, 0x5C);
94 return TRUE;
94 return true;
9595 } else if (input_chr == 0x203E) {
9696 emit(emitctx, 0x7E);
97 return TRUE;
97 return true;
9898 } else if (input_chr >= 0xFF61 && input_chr <= 0xFF9F) {
9999 emit(emitctx, input_chr - (0xFF61 - 0xA1));
100 return TRUE;
100 return true;
101101 } else {
102102 int r, c;
103103 if (unicode_to_jisx0208(input_chr, &r, &c)) {
109109 if (c >= 0x7F) c++;
110110 emit(emitctx, r);
111111 emit(emitctx, c);
112 return TRUE;
112 return true;
113113 } else {
114 return FALSE;
114 return false;
115115 }
116116 }
117117 }
55 #include "internal.h"
66
77 #define ENUM_CHARSET(x) extern charset_spec const charset_##x;
8 #include "enum.c"
8 #include "enum.h"
99 #undef ENUM_CHARSET
1010
1111 static charset_spec const *const cs_table[] = {
1212
1313 #define ENUM_CHARSET(x) &charset_##x,
14 #include "enum.c"
14 #include "enum.h"
1515 #undef ENUM_CHARSET
1616
1717 };
2727 return NULL;
2828 }
2929
30 int charset_exists(int charset)
30 bool charset_exists(int charset)
3131 {
3232 return charset_find_spec(charset) != NULL;
3333 }
3434
35 int charset_is_single_byte(int charset)
35 bool charset_is_single_byte(int charset)
3636 {
3737 charset_spec const *spec = charset_find_spec(charset);
3838 return spec && spec->read == read_sbcs;
5252 }
5353
5454 /*
55 * This function returns TRUE if the input charset is a vaguely
56 * sensible superset of ASCII. That is, it returns FALSE for 7-bit
55 * This function returns true if the input charset is a vaguely
56 * sensible superset of ASCII. That is, it returns false for 7-bit
5757 * encoding formats such as HZ and UTF-7.
5858 */
59 int charset_contains_ascii(int charset)
59 bool charset_contains_ascii(int charset)
6060 {
6161 return (charset != CS_HZ &&
6262 charset != CS_UTF7 &&
1010 int writtenlen;
1111 const wchar_t *errstr;
1212 int errlen;
13 int stopped;
13 bool stopped;
1414 };
1515
1616 static void unicode_emit(void *ctx, long int output)
4545 param->writtenlen++;
4646 }
4747 } else {
48 param->stopped = 1;
48 param->stopped = true;
4949 }
5050 }
5151
6363 param.errstr = errstr;
6464 param.errlen = errlen;
6565 param.writtenlen = 0;
66 param.stopped = 0;
66 param.stopped = false;
6767
6868 if (state)
6969 localstate = *state; /* structure copy */
88
99 struct utf16 {
1010 int s0; /* initial value of state->s0 */
11 bool output_bom;
1112 };
1213
1314 static void read_utf16(charset_spec const *charset, long int input_chr,
153154 }
154155 }
155156
156 static int write_utf16(charset_spec const *charset, long int input_chr,
157 charset_state *state,
158 void (*emit)(void *ctx, long int output),
159 void *emitctx)
157 static bool write_utf16(charset_spec const *charset, long int input_chr,
158 charset_state *state,
159 void (*emit)(void *ctx, long int output),
160 void *emitctx)
160161 {
161162 struct utf16 const *utf = (struct utf16 *)charset->data;
162163
167168 */
168169
169170 if (input_chr < 0)
170 return TRUE; /* no cleanup required */
171 return true; /* no cleanup required */
171172
172173 if ((input_chr >= 0xD800 && input_chr < 0xE000) ||
173174 input_chr >= 0x110000) {
174175 /*
175176 * We can't output surrogates, or anything above 0x10FFFF.
176177 */
177 return FALSE;
178 return false;
178179 }
179180
180181 if (!state->s0) {
181182 state->s0 = 1;
182 emithl(emit, emitctx, utf->s0, 0xFEFF);
183 if (utf->output_bom)
184 emithl(emit, emitctx, utf->s0, 0xFEFF);
183185 }
184186
185187 if (input_chr < 0x10000) {
190192 emithl(emit, emitctx, utf->s0, 0xD800 | ((input_chr >> 10) & 0x3FF));
191193 emithl(emit, emitctx, utf->s0, 0xDC00 | (input_chr & 0x3FF));
192194 }
193 return TRUE;
195 return true;
194196 }
195197
196 static const struct utf16 utf16_bigendian = { 0x20000 };
197 static const struct utf16 utf16_littleendian = { 0x10000 };
198 static const struct utf16 utf16_variable_endianness = { 0x30000 };
198 static const struct utf16 utf16_bigendian = { 0x20000, true };
199 static const struct utf16 utf16_littleendian = { 0x10000, true };
200 static const struct utf16 utf16_bigendian_no_bom = { 0x20000, false };
201 static const struct utf16 utf16_littleendian_no_bom = { 0x10000, false };
202 static const struct utf16 utf16_variable_endianness = { 0x30000, true };
199203
200204 const charset_spec charset_CS_UTF16BE = {
201205 CS_UTF16BE, read_utf16, write_utf16, &utf16_bigendian
202206 };
203207 const charset_spec charset_CS_UTF16LE = {
204208 CS_UTF16LE, read_utf16, write_utf16, &utf16_littleendian
209 };
210 const charset_spec charset_CS_UTF16BE_NO_BOM = {
211 CS_UTF16BE_NO_BOM, read_utf16, write_utf16, &utf16_bigendian_no_bom
212 };
213 const charset_spec charset_CS_UTF16LE_NO_BOM = {
214 CS_UTF16LE_NO_BOM, read_utf16, write_utf16, &utf16_littleendian_no_bom
205215 };
206216 const charset_spec charset_CS_UTF16 = {
207217 CS_UTF16, read_utf16, write_utf16, &utf16_variable_endianness
212222 ENUM_CHARSET(CS_UTF16)
213223 ENUM_CHARSET(CS_UTF16BE)
214224 ENUM_CHARSET(CS_UTF16LE)
225 ENUM_CHARSET(CS_UTF16BE_NO_BOM)
226 ENUM_CHARSET(CS_UTF16LE_NO_BOM)
215227
216228 #endif /* ENUM_CHARSETS */
164164 * which will directly encode Set O characters and the other of
165165 * which will cautiously base64 them.
166166 */
167 static int write_utf7(charset_spec const *charset, long int input_chr,
168 charset_state *state,
169 void (*emit)(void *ctx, long int output),
170 void *emitctx)
167 static bool write_utf7(charset_spec const *charset, long int input_chr,
168 charset_state *state,
169 void (*emit)(void *ctx, long int output),
170 void *emitctx)
171171 {
172172 unsigned long hws[2];
173173 int nhws;
184184 /*
185185 * We can't output surrogates, or anything above 0x10FFFF.
186186 */
187 return FALSE;
187 return false;
188188 }
189189
190190 /*
222222 emit(emitctx, input_chr);
223223 if (input_chr == '+')
224224 emit(emitctx, '-'); /* +- encodes + */
225 return TRUE;
225 return true;
226226 }
227227
228228 /*
237237 input_chr -= 0x10000;
238238 if (input_chr >= 0x100000) {
239239 /* Anything above 0x10FFFF is outside UTF-7 range. */
240 return FALSE;
240 return false;
241241 }
242242
243243 nhws = 2;
275275 emit(emitctx, base64_chars[out]);
276276 }
277277 }
278 return TRUE;
278 return true;
279279 }
280280
281281 const charset_spec charset_CS_UTF7 = {
198198 * charset_state.
199199 */
200200
201 int write_utf8(charset_spec const *charset, long int input_chr,
202 charset_state *state,
203 void (*emit)(void *ctx, long int output),
204 void *emitctx)
201 bool write_utf8(charset_spec const *charset, long int input_chr,
202 charset_state *state,
203 void (*emit)(void *ctx, long int output),
204 void *emitctx)
205205 {
206206 UNUSEDARG(charset);
207207 UNUSEDARG(state);
208208
209209 if (input_chr == -1)
210 return TRUE; /* stateless; no cleanup required */
210 return true; /* stateless; no cleanup required */
211211
212212 /*
213213 * Refuse to output any illegal code points.
214214 */
215215 if (input_chr == 0xFFFE || input_chr == 0xFFFF ||
216216 (input_chr >= 0xD800 && input_chr < 0xE000)) {
217 return FALSE;
217 return false;
218218 } else if (input_chr < 0x80) { /* one-byte character */
219219 emit(emitctx, input_chr);
220 return TRUE;
220 return true;
221221 } else if (input_chr < 0x800) { /* two-byte character */
222222 emit(emitctx, 0xC0 | (0x1F & (input_chr >> 6)));
223223 emit(emitctx, 0x80 | (0x3F & (input_chr )));
224 return TRUE;
224 return true;
225225 } else if (input_chr < 0x10000) { /* three-byte character */
226226 emit(emitctx, 0xE0 | (0x0F & (input_chr >> 12)));
227227 emit(emitctx, 0x80 | (0x3F & (input_chr >> 6)));
228228 emit(emitctx, 0x80 | (0x3F & (input_chr )));
229 return TRUE;
229 return true;
230230 } else if (input_chr < 0x200000) { /* four-byte character */
231231 emit(emitctx, 0xF0 | (0x07 & (input_chr >> 18)));
232232 emit(emitctx, 0x80 | (0x3F & (input_chr >> 12)));
233233 emit(emitctx, 0x80 | (0x3F & (input_chr >> 6)));
234234 emit(emitctx, 0x80 | (0x3F & (input_chr )));
235 return TRUE;
235 return true;
236236 } else if (input_chr < 0x4000000) {/* five-byte character */
237237 emit(emitctx, 0xF8 | (0x03 & (input_chr >> 24)));
238238 emit(emitctx, 0x80 | (0x3F & (input_chr >> 18)));
239239 emit(emitctx, 0x80 | (0x3F & (input_chr >> 12)));
240240 emit(emitctx, 0x80 | (0x3F & (input_chr >> 6)));
241241 emit(emitctx, 0x80 | (0x3F & (input_chr )));
242 return TRUE;
242 return true;
243243 } else { /* six-byte character */
244244 emit(emitctx, 0xFC | (0x01 & (input_chr >> 30)));
245245 emit(emitctx, 0x80 | (0x3F & (input_chr >> 24)));
247247 emit(emitctx, 0x80 | (0x3F & (input_chr >> 12)));
248248 emit(emitctx, 0x80 | (0x3F & (input_chr >> 6)));
249249 emit(emitctx, 0x80 | (0x3F & (input_chr )));
250 return TRUE;
250 return true;
251251 }
252252 }
253253
289289 }
290290 if (l != str[i]) {
291291 printf("%d: char %d came out as %08x, should be %08x\n",
292 line, i, str[i], l);
292 line, i, str[i], (unsigned)l);
293293 total_errs++;
294294 }
295295 }
330330 }
331331 if (l != str[i]) {
332332 printf("%d: char %d came out as %08x, should be %08x\n",
333 line, i, str[i], l);
333 line, i, str[i], (unsigned)l);
334334 total_errs++;
335335 }
336336 }
351351 {
352352 printf("read tests beginning\n");
353353 utf8_read_test(TESTSTR("\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"),
354 0x000003BA, /* GREEK SMALL LETTER KAPPA */
355 0x00001F79, /* GREEK SMALL LETTER OMICRON WITH OXIA */
356 0x000003C3, /* GREEK SMALL LETTER SIGMA */
357 0x000003BC, /* GREEK SMALL LETTER MU */
358 0x000003B5, /* GREEK SMALL LETTER EPSILON */
359 0, -1);
354 0x000003BAL, /* GREEK SMALL LETTER KAPPA */
355 0x00001F79L, /* GREEK SMALL LETTER OMICRON WITH OXIA */
356 0x000003C3L, /* GREEK SMALL LETTER SIGMA */
357 0x000003BCL, /* GREEK SMALL LETTER MU */
358 0x000003B5L, /* GREEK SMALL LETTER EPSILON */
359 0L, -1L);
360360 utf8_read_test(TESTSTR("\x00"),
361 0x00000000, /* <control> */
362 0, -1);
361 0x00000000L, /* <control> */
362 0L, -1L);
363363 utf8_read_test(TESTSTR("\xC2\x80"),
364 0x00000080, /* <control> */
365 0, -1);
364 0x00000080L, /* <control> */
365 0L, -1L);
366366 utf8_read_test(TESTSTR("\xE0\xA0\x80"),
367 0x00000800, /* <no name available> */
368 0, -1);
367 0x00000800L, /* <no name available> */
368 0L, -1L);
369369 utf8_read_test(TESTSTR("\xF0\x90\x80\x80"),
370 0x00010000, /* <no name available> */
371 0, -1);
370 0x00010000L, /* <no name available> */
371 0L, -1L);
372372 utf8_read_test(TESTSTR("\xF8\x88\x80\x80\x80"),
373 0x00200000, /* <no name available> */
374 0, -1);
373 0x00200000L, /* <no name available> */
374 0L, -1L);
375375 utf8_read_test(TESTSTR("\xFC\x84\x80\x80\x80\x80"),
376 0x04000000, /* <no name available> */
377 0, -1);
376 0x04000000L, /* <no name available> */
377 0L, -1L);
378378 utf8_read_test(TESTSTR("\x7F"),
379 0x0000007F, /* <control> */
380 0, -1);
379 0x0000007FL, /* <control> */
380 0L, -1L);
381381 utf8_read_test(TESTSTR("\xDF\xBF"),
382 0x000007FF, /* <no name available> */
383 0, -1);
382 0x000007FFL, /* <no name available> */
383 0L, -1L);
384384 utf8_read_test(TESTSTR("\xEF\xBF\xBD"),
385 0x0000FFFD, /* REPLACEMENT CHARACTER */
386 0, -1);
385 0x0000FFFDL, /* REPLACEMENT CHARACTER */
386 0L, -1L);
387387 utf8_read_test(TESTSTR("\xEF\xBF\xBF"),
388388 ERROR, /* <no name available> (invalid char) */
389 0, -1);
389 0L, -1L);
390390 utf8_read_test(TESTSTR("\xF7\xBF\xBF\xBF"),
391 0x001FFFFF, /* <no name available> */
392 0, -1);
391 0x001FFFFFL, /* <no name available> */
392 0L, -1L);
393393 utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF\xBF"),
394 0x03FFFFFF, /* <no name available> */
395 0, -1);
394 0x03FFFFFFL, /* <no name available> */
395 0L, -1L);
396396 utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF\xBF"),
397 0x7FFFFFFF, /* <no name available> */
398 0, -1);
397 0x7FFFFFFFL, /* <no name available> */
398 0L, -1L);
399399 utf8_read_test(TESTSTR("\xED\x9F\xBF"),
400 0x0000D7FF, /* <no name available> */
401 0, -1);
400 0x0000D7FFL, /* <no name available> */
401 0L, -1L);
402402 utf8_read_test(TESTSTR("\xEE\x80\x80"),
403 0x0000E000, /* <Private Use, First> */
404 0, -1);
403 0x0000E000L, /* <Private Use, First> */
404 0L, -1L);
405405 utf8_read_test(TESTSTR("\xEF\xBF\xBD"),
406 0x0000FFFD, /* REPLACEMENT CHARACTER */
407 0, -1);
406 0x0000FFFDL, /* REPLACEMENT CHARACTER */
407 0L, -1L);
408408 utf8_read_test(TESTSTR("\xF4\x8F\xBF\xBF"),
409 0x0010FFFF, /* <no name available> */
410 0, -1);
409 0x0010FFFFL, /* <no name available> */
410 0L, -1L);
411411 utf8_read_test(TESTSTR("\xF4\x90\x80\x80"),
412 0x00110000, /* <no name available> */
413 0, -1);
412 0x00110000L, /* <no name available> */
413 0L, -1L);
414414 utf8_read_test(TESTSTR("\x80"),
415415 ERROR, /* (unexpected continuation byte) */
416 0, -1);
416 0L, -1L);
417417 utf8_read_test(TESTSTR("\xBF"),
418418 ERROR, /* (unexpected continuation byte) */
419 0, -1);
419 0L, -1L);
420420 utf8_read_test(TESTSTR("\x80\xBF"),
421421 ERROR, /* (unexpected continuation byte) */
422422 ERROR, /* (unexpected continuation byte) */
423 0, -1);
423 0L, -1L);
424424 utf8_read_test(TESTSTR("\x80\xBF\x80"),
425425 ERROR, /* (unexpected continuation byte) */
426426 ERROR, /* (unexpected continuation byte) */
427427 ERROR, /* (unexpected continuation byte) */
428 0, -1);
428 0L, -1L);
429429 utf8_read_test(TESTSTR("\x80\xBF\x80\xBF"),
430430 ERROR, /* (unexpected continuation byte) */
431431 ERROR, /* (unexpected continuation byte) */
432432 ERROR, /* (unexpected continuation byte) */
433433 ERROR, /* (unexpected continuation byte) */
434 0, -1);
434 0L, -1L);
435435 utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80"),
436436 ERROR, /* (unexpected continuation byte) */
437437 ERROR, /* (unexpected continuation byte) */
438438 ERROR, /* (unexpected continuation byte) */
439439 ERROR, /* (unexpected continuation byte) */
440440 ERROR, /* (unexpected continuation byte) */
441 0, -1);
441 0L, -1L);
442442 utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF"),
443443 ERROR, /* (unexpected continuation byte) */
444444 ERROR, /* (unexpected continuation byte) */
446446 ERROR, /* (unexpected continuation byte) */
447447 ERROR, /* (unexpected continuation byte) */
448448 ERROR, /* (unexpected continuation byte) */
449 0, -1);
449 0L, -1L);
450450 utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF\x80"),
451451 ERROR, /* (unexpected continuation byte) */
452452 ERROR, /* (unexpected continuation byte) */
455455 ERROR, /* (unexpected continuation byte) */
456456 ERROR, /* (unexpected continuation byte) */
457457 ERROR, /* (unexpected continuation byte) */
458 0, -1);
458 0L, -1L);
459459 utf8_read_test(TESTSTR("\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"),
460460 ERROR, /* (unexpected continuation byte) */
461461 ERROR, /* (unexpected continuation byte) */
521521 ERROR, /* (unexpected continuation byte) */
522522 ERROR, /* (unexpected continuation byte) */
523523 ERROR, /* (unexpected continuation byte) */
524 0, -1);
524 0L, -1L);
525525 utf8_read_test(TESTSTR("\xC0\x20\xC1\x20\xC2\x20\xC3\x20\xC4\x20\xC5\x20\xC6\x20\xC7\x20"),
526526 ERROR, /* (incomplete sequence) */
527 0x00000020, /* SPACE */
528 ERROR, /* (incomplete sequence) */
529 0x00000020, /* SPACE */
530 ERROR, /* (incomplete sequence) */
531 0x00000020, /* SPACE */
532 ERROR, /* (incomplete sequence) */
533 0x00000020, /* SPACE */
534 ERROR, /* (incomplete sequence) */
535 0x00000020, /* SPACE */
536 ERROR, /* (incomplete sequence) */
537 0x00000020, /* SPACE */
538 ERROR, /* (incomplete sequence) */
539 0x00000020, /* SPACE */
540 ERROR, /* (incomplete sequence) */
541 0x00000020, /* SPACE */
542 0, -1);
527 0x00000020L, /* SPACE */
528 ERROR, /* (incomplete sequence) */
529 0x00000020L, /* SPACE */
530 ERROR, /* (incomplete sequence) */
531 0x00000020L, /* SPACE */
532 ERROR, /* (incomplete sequence) */
533 0x00000020L, /* SPACE */
534 ERROR, /* (incomplete sequence) */
535 0x00000020L, /* SPACE */
536 ERROR, /* (incomplete sequence) */
537 0x00000020L, /* SPACE */
538 ERROR, /* (incomplete sequence) */
539 0x00000020L, /* SPACE */
540 ERROR, /* (incomplete sequence) */
541 0x00000020L, /* SPACE */
542 0L, -1L);
543543 utf8_read_test(TESTSTR("\xE0\x20\xE1\x20\xE2\x20\xE3\x20\xE4\x20\xE5\x20\xE6\x20\xE7\x20\xE8\x20\xE9\x20\xEA\x20\xEB\x20\xEC\x20\xED\x20\xEE\x20\xEF\x20"),
544544 ERROR, /* (incomplete sequence) */
545 0x00000020, /* SPACE */
546 ERROR, /* (incomplete sequence) */
547 0x00000020, /* SPACE */
548 ERROR, /* (incomplete sequence) */
549 0x00000020, /* SPACE */
550 ERROR, /* (incomplete sequence) */
551 0x00000020, /* SPACE */
552 ERROR, /* (incomplete sequence) */
553 0x00000020, /* SPACE */
554 ERROR, /* (incomplete sequence) */
555 0x00000020, /* SPACE */
556 ERROR, /* (incomplete sequence) */
557 0x00000020, /* SPACE */
558 ERROR, /* (incomplete sequence) */
559 0x00000020, /* SPACE */
560 ERROR, /* (incomplete sequence) */
561 0x00000020, /* SPACE */
562 ERROR, /* (incomplete sequence) */
563 0x00000020, /* SPACE */
564 ERROR, /* (incomplete sequence) */
565 0x00000020, /* SPACE */
566 ERROR, /* (incomplete sequence) */
567 0x00000020, /* SPACE */
568 ERROR, /* (incomplete sequence) */
569 0x00000020, /* SPACE */
570 ERROR, /* (incomplete sequence) */
571 0x00000020, /* SPACE */
572 ERROR, /* (incomplete sequence) */
573 0x00000020, /* SPACE */
574 ERROR, /* (incomplete sequence) */
575 0x00000020, /* SPACE */
576 0, -1);
545 0x00000020L, /* SPACE */
546 ERROR, /* (incomplete sequence) */
547 0x00000020L, /* SPACE */
548 ERROR, /* (incomplete sequence) */
549 0x00000020L, /* SPACE */
550 ERROR, /* (incomplete sequence) */
551 0x00000020L, /* SPACE */
552 ERROR, /* (incomplete sequence) */
553 0x00000020L, /* SPACE */
554 ERROR, /* (incomplete sequence) */
555 0x00000020L, /* SPACE */
556 ERROR, /* (incomplete sequence) */
557 0x00000020L, /* SPACE */
558 ERROR, /* (incomplete sequence) */
559 0x00000020L, /* SPACE */
560 ERROR, /* (incomplete sequence) */
561 0x00000020L, /* SPACE */
562 ERROR, /* (incomplete sequence) */
563 0x00000020L, /* SPACE */
564 ERROR, /* (incomplete sequence) */
565 0x00000020L, /* SPACE */
566 ERROR, /* (incomplete sequence) */
567 0x00000020L, /* SPACE */
568 ERROR, /* (incomplete sequence) */
569 0x00000020L, /* SPACE */
570 ERROR, /* (incomplete sequence) */
571 0x00000020L, /* SPACE */
572 ERROR, /* (incomplete sequence) */
573 0x00000020L, /* SPACE */
574 ERROR, /* (incomplete sequence) */
575 0x00000020L, /* SPACE */
576 0L, -1L);
577577 utf8_read_test(TESTSTR("\xF0\x20\xF1\x20\xF2\x20\xF3\x20\xF4\x20\xF5\x20\xF6\x20\xF7\x20"),
578578 ERROR, /* (incomplete sequence) */
579 0x00000020, /* SPACE */
580 ERROR, /* (incomplete sequence) */
581 0x00000020, /* SPACE */
582 ERROR, /* (incomplete sequence) */
583 0x00000020, /* SPACE */
584 ERROR, /* (incomplete sequence) */
585 0x00000020, /* SPACE */
586 ERROR, /* (incomplete sequence) */
587 0x00000020, /* SPACE */
588 ERROR, /* (incomplete sequence) */
589 0x00000020, /* SPACE */
590 ERROR, /* (incomplete sequence) */
591 0x00000020, /* SPACE */
592 ERROR, /* (incomplete sequence) */
593 0x00000020, /* SPACE */
594 0, -1);
579 0x00000020L, /* SPACE */
580 ERROR, /* (incomplete sequence) */
581 0x00000020L, /* SPACE */
582 ERROR, /* (incomplete sequence) */
583 0x00000020L, /* SPACE */
584 ERROR, /* (incomplete sequence) */
585 0x00000020L, /* SPACE */
586 ERROR, /* (incomplete sequence) */
587 0x00000020L, /* SPACE */
588 ERROR, /* (incomplete sequence) */
589 0x00000020L, /* SPACE */
590 ERROR, /* (incomplete sequence) */
591 0x00000020L, /* SPACE */
592 ERROR, /* (incomplete sequence) */
593 0x00000020L, /* SPACE */
594 0L, -1L);
595595 utf8_read_test(TESTSTR("\xF8\x20\xF9\x20\xFA\x20\xFB\x20"),
596596 ERROR, /* (incomplete sequence) */
597 0x00000020, /* SPACE */
598 ERROR, /* (incomplete sequence) */
599 0x00000020, /* SPACE */
600 ERROR, /* (incomplete sequence) */
601 0x00000020, /* SPACE */
602 ERROR, /* (incomplete sequence) */
603 0x00000020, /* SPACE */
604 0, -1);
597 0x00000020L, /* SPACE */
598 ERROR, /* (incomplete sequence) */
599 0x00000020L, /* SPACE */
600 ERROR, /* (incomplete sequence) */
601 0x00000020L, /* SPACE */
602 ERROR, /* (incomplete sequence) */
603 0x00000020L, /* SPACE */
604 0L, -1L);
605605 utf8_read_test(TESTSTR("\xFC\x20\xFD\x20"),
606606 ERROR, /* (incomplete sequence) */
607 0x00000020, /* SPACE */
608 ERROR, /* (incomplete sequence) */
609 0x00000020, /* SPACE */
610 0, -1);
607 0x00000020L, /* SPACE */
608 ERROR, /* (incomplete sequence) */
609 0x00000020L, /* SPACE */
610 0L, -1L);
611611 utf8_read_test(TESTSTR("\xC0"),
612612 ERROR, /* (incomplete sequence) */
613 0, -1);
613 0L, -1L);
614614 utf8_read_test(TESTSTR("\xE0\x80"),
615615 ERROR, /* (incomplete sequence) */
616 0, -1);
616 0L, -1L);
617617 utf8_read_test(TESTSTR("\xF0\x80\x80"),
618618 ERROR, /* (incomplete sequence) */
619 0, -1);
619 0L, -1L);
620620 utf8_read_test(TESTSTR("\xF8\x80\x80\x80"),
621621 ERROR, /* (incomplete sequence) */
622 0, -1);
622 0L, -1L);
623623 utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80"),
624624 ERROR, /* (incomplete sequence) */
625 0, -1);
625 0L, -1L);
626626 utf8_read_test(TESTSTR("\xDF"),
627627 ERROR, /* (incomplete sequence) */
628 0, -1);
628 0L, -1L);
629629 utf8_read_test(TESTSTR("\xEF\xBF"),
630630 ERROR, /* (incomplete sequence) */
631 0, -1);
631 0L, -1L);
632632 utf8_read_test(TESTSTR("\xF7\xBF\xBF"),
633633 ERROR, /* (incomplete sequence) */
634 0, -1);
634 0L, -1L);
635635 utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF"),
636636 ERROR, /* (incomplete sequence) */
637 0, -1);
637 0L, -1L);
638638 utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF"),
639639 ERROR, /* (incomplete sequence) */
640 0, -1);
640 0L, -1L);
641641 utf8_read_test(TESTSTR("\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF"),
642642 ERROR, /* (incomplete sequence) */
643643 ERROR, /* (incomplete sequence) */
649649 ERROR, /* (incomplete sequence) */
650650 ERROR, /* (incomplete sequence) */
651651 ERROR, /* (incomplete sequence) */
652 0, -1);
652 0L, -1L);
653653 utf8_read_test(TESTSTR("\xFE"),
654654 ERROR, /* (invalid UTF-8 byte) */
655 0, -1);
655 0L, -1L);
656656 utf8_read_test(TESTSTR("\xFF"),
657657 ERROR, /* (invalid UTF-8 byte) */
658 0, -1);
658 0L, -1L);
659659 utf8_read_test(TESTSTR("\xFE\xFE\xFF\xFF"),
660660 ERROR, /* (invalid UTF-8 byte) */
661661 ERROR, /* (invalid UTF-8 byte) */
662662 ERROR, /* (invalid UTF-8 byte) */
663663 ERROR, /* (invalid UTF-8 byte) */
664 0, -1);
664 0L, -1L);
665665 utf8_read_test(TESTSTR("\xC0\xAF"),
666666 ERROR, /* SOLIDUS (overlong form of 2F) */
667 0, -1);
667 0L, -1L);
668668 utf8_read_test(TESTSTR("\xE0\x80\xAF"),
669669 ERROR, /* SOLIDUS (overlong form of 2F) */
670 0, -1);
670 0L, -1L);
671671 utf8_read_test(TESTSTR("\xF0\x80\x80\xAF"),
672672 ERROR, /* SOLIDUS (overlong form of 2F) */
673 0, -1);
673 0L, -1L);
674674 utf8_read_test(TESTSTR("\xF8\x80\x80\x80\xAF"),
675675 ERROR, /* SOLIDUS (overlong form of 2F) */
676 0, -1);
676 0L, -1L);
677677 utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\xAF"),
678678 ERROR, /* SOLIDUS (overlong form of 2F) */
679 0, -1);
679 0L, -1L);
680680 utf8_read_test(TESTSTR("\xC1\xBF"),
681681 ERROR, /* <control> (overlong form of 7F) */
682 0, -1);
682 0L, -1L);
683683 utf8_read_test(TESTSTR("\xE0\x9F\xBF"),
684684 ERROR, /* <no name available> (overlong form of DF BF) */
685 0, -1);
685 0L, -1L);
686686 utf8_read_test(TESTSTR("\xF0\x8F\xBF\xBF"),
687687 ERROR, /* <no name available> (overlong form of EF BF BF) (invalid char) */
688 0, -1);
688 0L, -1L);
689689 utf8_read_test(TESTSTR("\xF8\x87\xBF\xBF\xBF"),
690690 ERROR, /* <no name available> (overlong form of F7 BF BF BF) */
691 0, -1);
691 0L, -1L);
692692 utf8_read_test(TESTSTR("\xFC\x83\xBF\xBF\xBF\xBF"),
693693 ERROR, /* <no name available> (overlong form of FB BF BF BF BF) */
694 0, -1);
694 0L, -1L);
695695 utf8_read_test(TESTSTR("\xC0\x80"),
696696 ERROR, /* <control> (overlong form of 00) */
697 0, -1);
697 0L, -1L);
698698 utf8_read_test(TESTSTR("\xE0\x80\x80"),
699699 ERROR, /* <control> (overlong form of 00) */
700 0, -1);
700 0L, -1L);
701701 utf8_read_test(TESTSTR("\xF0\x80\x80\x80"),
702702 ERROR, /* <control> (overlong form of 00) */
703 0, -1);
703 0L, -1L);
704704 utf8_read_test(TESTSTR("\xF8\x80\x80\x80\x80"),
705705 ERROR, /* <control> (overlong form of 00) */
706 0, -1);
706 0L, -1L);
707707 utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\x80"),
708708 ERROR, /* <control> (overlong form of 00) */
709 0, -1);
709 0L, -1L);
710710 utf8_read_test(TESTSTR("\xED\xA0\x80"),
711711 ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */
712 0, -1);
712 0L, -1L);
713713 utf8_read_test(TESTSTR("\xED\xAD\xBF"),
714714 ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */
715 0, -1);
715 0L, -1L);
716716 utf8_read_test(TESTSTR("\xED\xAE\x80"),
717717 ERROR, /* <Private Use High Surrogate, First> (surrogate) */
718 0, -1);
718 0L, -1L);
719719 utf8_read_test(TESTSTR("\xED\xAF\xBF"),
720720 ERROR, /* <Private Use High Surrogate, Last> (surrogate) */
721 0, -1);
721 0L, -1L);
722722 utf8_read_test(TESTSTR("\xED\xB0\x80"),
723723 ERROR, /* <Low Surrogate, First> (surrogate) */
724 0, -1);
724 0L, -1L);
725725 utf8_read_test(TESTSTR("\xED\xBE\x80"),
726726 ERROR, /* <no name available> (surrogate) */
727 0, -1);
727 0L, -1L);
728728 utf8_read_test(TESTSTR("\xED\xBF\xBF"),
729729 ERROR, /* <Low Surrogate, Last> (surrogate) */
730 0, -1);
730 0L, -1L);
731731 utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xB0\x80"),
732732 ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */
733733 ERROR, /* <Low Surrogate, First> (surrogate) */
734 0, -1);
734 0L, -1L);
735735 utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xBF\xBF"),
736736 ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */
737737 ERROR, /* <Low Surrogate, Last> (surrogate) */
738 0, -1);
738 0L, -1L);
739739 utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xB0\x80"),
740740 ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */
741741 ERROR, /* <Low Surrogate, First> (surrogate) */
742 0, -1);
742 0L, -1L);
743743 utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xBF\xBF"),
744744 ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */
745745 ERROR, /* <Low Surrogate, Last> (surrogate) */
746 0, -1);
746 0L, -1L);
747747 utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xB0\x80"),
748748 ERROR, /* <Private Use High Surrogate, First> (surrogate) */
749749 ERROR, /* <Low Surrogate, First> (surrogate) */
750 0, -1);
750 0L, -1L);
751751 utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xBF\xBF"),
752752 ERROR, /* <Private Use High Surrogate, First> (surrogate) */
753753 ERROR, /* <Low Surrogate, Last> (surrogate) */
754 0, -1);
754 0L, -1L);
755755 utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xB0\x80"),
756756 ERROR, /* <Private Use High Surrogate, Last> (surrogate) */
757757 ERROR, /* <Low Surrogate, First> (surrogate) */
758 0, -1);
758 0L, -1L);
759759 utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xBF\xBF"),
760760 ERROR, /* <Private Use High Surrogate, Last> (surrogate) */
761761 ERROR, /* <Low Surrogate, Last> (surrogate) */
762 0, -1);
762 0L, -1L);
763763 utf8_read_test(TESTSTR("\xEF\xBF\xBE"),
764764 ERROR, /* <no name available> (invalid char) */
765 0, -1);
765 0L, -1L);
766766 utf8_read_test(TESTSTR("\xEF\xBF\xBF"),
767767 ERROR, /* <no name available> (invalid char) */
768 0, -1);
768 0L, -1L);
769769 printf("read tests completed\n");
770770 printf("write tests beginning\n");
771771 {
772772 const static long str[] =
773773 {0x03BAL, 0x1F79L, 0x03C3L, 0x03BCL, 0x03B5L, 0};
774774 utf8_write_test(TESTSTR(str),
775 0xCE, 0xBA,
776 0xE1, 0xBD, 0xB9,
777 0xCF, 0x83,
778 0xCE, 0xBC,
779 0xCE, 0xB5,
780 0, -1);
775 0xCEL, 0xBAL,
776 0xE1L, 0xBDL, 0xB9L,
777 0xCFL, 0x83L,
778 0xCEL, 0xBCL,
779 0xCEL, 0xB5L,
780 0L, -1L);
781781 }
782782 {
783783 const static long str[] = {0x0000L, 0};
784784 utf8_write_test(TESTSTR(str),
785 0x00,
786 0, -1);
785 0x00L,
786 0L, -1L);
787787 }
788788 {
789789 const static long str[] = {0x0080L, 0};
790790 utf8_write_test(TESTSTR(str),
791 0xC2, 0x80,
792 0, -1);
791 0xC2L, 0x80L,
792 0L, -1L);
793793 }
794794 {
795795 const static long str[] = {0x0800L, 0};
796796 utf8_write_test(TESTSTR(str),
797 0xE0, 0xA0, 0x80,
798 0, -1);
797 0xE0L, 0xA0L, 0x80L,
798 0L, -1L);
799799 }
800800 {
801801 const static long str[] = {0x00010000L, 0};
802802 utf8_write_test(TESTSTR(str),
803 0xF0, 0x90, 0x80, 0x80,
804 0, -1);
803 0xF0L, 0x90L, 0x80L, 0x80L,
804 0L, -1L);
805805 }
806806 {
807807 const static long str[] = {0x00200000L, 0};
808808 utf8_write_test(TESTSTR(str),
809 0xF8, 0x88, 0x80, 0x80, 0x80,
810 0, -1);
809 0xF8L, 0x88L, 0x80L, 0x80L, 0x80L,
810 0L, -1L);
811811 }
812812 {
813813 const static long str[] = {0x04000000L, 0};
814814 utf8_write_test(TESTSTR(str),
815 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80,
816 0, -1);
815 0xFCL, 0x84L, 0x80L, 0x80L, 0x80L, 0x80L,
816 0L, -1L);
817817 }
818818 {
819819 const static long str[] = {0x007FL, 0};
820820 utf8_write_test(TESTSTR(str),
821 0x7F,
822 0, -1);
821 0x7FL,
822 0L, -1L);
823823 }
824824 {
825825 const static long str[] = {0x07FFL, 0};
826826 utf8_write_test(TESTSTR(str),
827 0xDF, 0xBF,
828 0, -1);
827 0xDFL, 0xBFL,
828 0L, -1L);
829829 }
830830 {
831831 const static long str[] = {0xFFFDL, 0};
832832 utf8_write_test(TESTSTR(str),
833 0xEF, 0xBF, 0xBD,
834 0, -1);
833 0xEFL, 0xBFL, 0xBDL,
834 0L, -1L);
835835 }
836836 {
837837 const static long str[] = {0xFFFFL, 0};
838838 utf8_write_test(TESTSTR(str),
839839 ERROR,
840 0, -1);
840 0L, -1L);
841841 }
842842 {
843843 const static long str[] = {0x001FFFFFL, 0};
844844 utf8_write_test(TESTSTR(str),
845 0xF7, 0xBF, 0xBF, 0xBF,
846 0, -1);
845 0xF7L, 0xBFL, 0xBFL, 0xBFL,
846 0L, -1L);
847847 }
848848 {
849849 const static long str[] = {0x03FFFFFFL, 0};
850850 utf8_write_test(TESTSTR(str),
851 0xFB, 0xBF, 0xBF, 0xBF, 0xBF,
852 0, -1);
851 0xFBL, 0xBFL, 0xBFL, 0xBFL, 0xBFL,
852 0L, -1L);
853853 }
854854 {
855855 const static long str[] = {0x7FFFFFFFL, 0};
856856 utf8_write_test(TESTSTR(str),
857 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF,
858 0, -1);
857 0xFDL, 0xBFL, 0xBFL, 0xBFL, 0xBFL, 0xBFL,
858 0L, -1L);
859859 }
860860 {
861861 const static long str[] = {0xD7FFL, 0};
862862 utf8_write_test(TESTSTR(str),
863 0xED, 0x9F, 0xBF,
864 0, -1);
863 0xEDL, 0x9FL, 0xBFL,
864 0L, -1L);
865865 }
866866 {
867867 const static long str[] = {0xD800L, 0};
868868 utf8_write_test(TESTSTR(str),
869869 ERROR,
870 0, -1);
870 0L, -1L);
871871 }
872872 {
873873 const static long str[] = {0xD800L, 0xDC00L, 0};
874874 utf8_write_test(TESTSTR(str),
875875 ERROR,
876876 ERROR,
877 0, -1);
877 0L, -1L);
878878 }
879879 {
880880 const static long str[] = {0xDFFFL, 0};
881881 utf8_write_test(TESTSTR(str),
882882 ERROR,
883 0, -1);
883 0L, -1L);
884884 }
885885 {
886886 const static long str[] = {0xE000L, 0};
887887 utf8_write_test(TESTSTR(str),
888 0xEE, 0x80, 0x80,
889 0, -1);
888 0xEEL, 0x80L, 0x80L,
889 0L, -1L);
890890 }
891891 printf("write tests completed\n");
892892
1010 struct numberstate_Tag {
1111 int chapternum;
1212 int appendixnum;
13 int ischapter;
13 bool ischapter;
1414 int *sectionlevels;
1515 paragraph **currentsects;
1616 paragraph *lastsect;
2727 numberstate *ret = snew(numberstate);
2828 ret->chapternum = 0;
2929 ret->appendixnum = -1;
30 ret->ischapter = 1;
30 ret->ischapter = true;
3131 ret->oklevel = -1; /* not even in a chapter yet */
3232 ret->maxsectlevel = 32;
3333 ret->sectionlevels = snewn(ret->maxsectlevel, int);
5252 mnewword->type = word_Normal;
5353 mnewword->alt = NULL;
5454 mnewword->next = NULL;
55 mnewword->breaks = FALSE;
55 mnewword->breaks = false;
5656 mnewword->aux = 0;
5757 **wret = mnewword;
5858 *wret = &mnewword->next;
6464 mnewword->type = word_WhiteSpace;
6565 mnewword->alt = NULL;
6666 mnewword->next = NULL;
67 mnewword->breaks = FALSE;
67 mnewword->breaks = false;
6868 mnewword->aux = 0;
6969 **wret = mnewword;
7070 *wret = &mnewword->next;
128128 }
129129
130130 word *number_mktext(numberstate *state, paragraph *p, wchar_t *category,
131 int *prev, int *errflag) {
131 int *prev, bool *errflag, errorstate *es) {
132132 word *ret = NULL;
133133 word **ret2 = &ret;
134134 word **pret = &ret;
149149 dospace(&pret);
150150 ret2 = pret;
151151 donumber(&pret, state->chapternum);
152 state->ischapter = 1;
152 state->ischapter = true;
153153 state->oklevel = 0;
154154 level = -1;
155155 break;
157157 case para_Subsect:
158158 level = (p->type == para_Heading ? 0 : p->aux);
159159 if (level > state->oklevel) {
160 err_sectjump(&p->fpos);
161 *errflag = TRUE;
160 err_sectjump(es, &p->fpos);
161 *errflag = true;
162162 ret = NULL;
163163 break;
164164 }
193193 dospace(&pret);
194194 ret2 = pret;
195195 doanumber(&pret, state->appendixnum);
196 state->ischapter = 0;
196 state->ischapter = false;
197197 state->oklevel = 0;
198198 level = -1;
199199 break;
331331 int outlen, outsize;
332332 unsigned long outbits;
333333 int noutbits;
334 int firstblock;
334 bool firstblock;
335335 unsigned long *syms;
336336 int symstart, nsyms;
337337 int type;
338338 unsigned long checksum;
339339 unsigned long datasize;
340 int lastblock;
341 int finished;
340 bool lastblock;
341 bool finished;
342342 unsigned char static_len1[288], static_len2[30];
343343 int static_code1[288], static_code2[30];
344344 struct huftrees sht;
434434 int treesyms[286 + 30];
435435 int codelen[19];
436436 int i, ntreesrc, ntreesyms;
437 int dynamic, blklen;
437 bool dynamic;
438 int blklen;
438439 struct huftrees dht;
439440 const struct huftrees *ht;
440441 #ifdef STATISTICS
962963 out = snew(deflate_compress_ctx);
963964 out->type = type;
964965 out->outbits = out->noutbits = 0;
965 out->firstblock = TRUE;
966 out->firstblock = true;
966967 #ifdef STATISTICS
967968 out->bitcount = 0;
968969 #endif
972973
973974 out->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
974975 out->datasize = 0;
975 out->lastblock = FALSE;
976 out->finished = FALSE;
976 out->lastblock = false;
977 out->finished = false;
977978
978979 /*
979980 * Build the static Huffman tables now, so we'll have them
10591060 outbits(out, 0xFF02, 16); /* xflags, OS */
10601061 break;
10611062 }
1062 out->firstblock = FALSE;
1063 out->firstblock = false;
10631064 }
10641065
10651066 /*
10661067 * Feed our data to the LZ77 compression phase.
10671068 */
1068 lz77_compress(ectx, block, len, TRUE);
1069 lz77_compress(ectx, block, len, true);
10691070
10701071 /*
10711072 * Update checksums and counters.
11111112 /*
11121113 * Output a block with BFINAL set.
11131114 */
1114 out->lastblock = TRUE;
1115 out->lastblock = true;
11151116 flushblock(out);
11161117
11171118 /*
11421143 break;
11431144 }
11441145
1145 out->finished = TRUE;
1146 out->finished = true;
11461147 break;
11471148 }
11481149
13131314 CRC1, CRC2, ILEN1, ILEN2,
13141315 FINALSPIN
13151316 } state;
1316 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
1317 lenrep, lastblock;
1317 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep;
1318 bool lastblock;
13181319 int uncomplen;
13191320 unsigned char lenlen[19];
13201321 unsigned char lengths[286 + 32];
13651366 dctx->nbits = 0;
13661367 dctx->winpos = 0;
13671368 dctx->type = type;
1368 dctx->lastblock = FALSE;
1369 dctx->lastblock = false;
13691370 dctx->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
13701371 dctx->bytesout = 0;
13711372 dctx->gzflags = dctx->gzextralen = 0;
16221623 goto finished; /* done all we can */
16231624 bfinal = dctx->bits & 1;
16241625 if (bfinal)
1625 dctx->lastblock = TRUE;
1626 dctx->lastblock = true;
16261627 EATBITS(1);
16271628 btype = dctx->bits & 3;
16281629 EATBITS(2);
20292030 int ret, err, outlen;
20302031 deflate_decompress_ctx *dhandle;
20312032 deflate_compress_ctx *chandle;
2032 int type = DEFLATE_TYPE_ZLIB, opts = TRUE;
2033 int compress = FALSE, decompress = FALSE;
2034 int got_arg = FALSE;
2033 int type = DEFLATE_TYPE_ZLIB;
2034 bool opts = true;
2035 bool compress = false, decompress = false;
2036 bool got_arg = false;
20352037 char *filename = NULL;
20362038 FILE *fp;
20372039
20382040 while (--argc) {
20392041 char *p = *++argv;
20402042
2041 got_arg = TRUE;
2043 got_arg = true;
20422044
20432045 if (p[0] == '-' && opts) {
20442046 if (!strcmp(p, "-b"))
20462048 else if (!strcmp(p, "-g"))
20472049 type = DEFLATE_TYPE_GZIP;
20482050 else if (!strcmp(p, "-c"))
2049 compress = TRUE;
2051 compress = true;
20502052 else if (!strcmp(p, "-d"))
2051 decompress = TRUE;
2053 decompress = true;
20522054 else if (!strcmp(p, "-a"))
2053 analyse_level++, decompress = TRUE;
2055 analyse_level++, decompress = true;
20542056 else if (!strcmp(p, "--"))
2055 opts = FALSE; /* next thing is filename */
2057 opts = false; /* next thing is filename */
20562058 else {
20572059 fprintf(stderr, "unknown command line option '%s'\n", p);
20582060 return 1;
21582160 unsigned char buf[65536], *outbuf, *outbuf2;
21592161 int ret, err, outlen, outlen2;
21602162 int dlen = 0, clen = 0;
2161 int opts = TRUE;
2163 int opts = true;
21622164
21632165 while (--argc) {
21642166 char *p = *++argv;
21652167
21662168 if (p[0] == '-' && opts) {
21672169 if (!strcmp(p, "--"))
2168 opts = FALSE; /* next thing is filename */
2170 opts = false; /* next thing is filename */
21692171 else {
21702172 fprintf(stderr, "unknown command line option '%s'\n", p);
21712173 return 1;
0 # Halibut is used to build its own documentation. So we can only build
1 # the documentation if we're not cross-compiling.
2
3 if(CMAKE_CROSSCOMPILING)
4 message(WARNING "Not building the Halibut documentation in a cross-compile")
5 else()
6
7 include(GNUInstallDirs)
8
9 set(HALIBUT $<TARGET_FILE:halibut>)
10
11 set(manual_sources
12 ${CMAKE_CURRENT_SOURCE_DIR}/blurb.but
13 ${CMAKE_CURRENT_SOURCE_DIR}/intro.but
14 ${CMAKE_CURRENT_SOURCE_DIR}/running.but
15 ${CMAKE_CURRENT_SOURCE_DIR}/input.but
16 ${CMAKE_CURRENT_SOURCE_DIR}/output.but
17 ${CMAKE_CURRENT_SOURCE_DIR}/licence.but
18 ${CMAKE_CURRENT_SOURCE_DIR}/manpage.but
19 ${CMAKE_CURRENT_SOURCE_DIR}/index.but
20 )
21
22 # Do the manual build in a subdirectory, to avoid the install
23 # command making a CMakeFiles directory in the output location.
24 set(manual_dir ${CMAKE_CURRENT_BINARY_DIR}/manual)
25 file(MAKE_DIRECTORY ${manual_dir})
26 add_custom_target(manual ALL
27 BYPRODUCTS
28 ${manual_dir}/index.html
29 ${manual_dir}/halibut.txt
30 ${manual_dir}/halibut.info
31 ${manual_dir}/halibut.ps
32 ${manual_dir}/halibut.pdf
33 ${manual_dir}/halibut.chm
34 COMMAND
35 ${HALIBUT}
36 --html
37 --text=halibut.txt
38 --info=halibut.info
39 --ps=halibut.ps
40 --pdf=halibut.pdf
41 --chm=halibut.chm
42 ${manual_sources}
43 WORKING_DIRECTORY ${manual_dir}
44 DEPENDS halibut ${manual_sources})
45
46 add_custom_target(manpage ALL
47 BYPRODUCTS
48 halibut.1
49 COMMAND
50 ${HALIBUT}
51 --man=halibut.1
52 ${CMAKE_CURRENT_SOURCE_DIR}/manpage.but
53 DEPENDS halibut ${CMAKE_CURRENT_SOURCE_DIR}/manpage.but)
54
55 install(DIRECTORY ${manual_dir}/
56 DESTINATION ${CMAKE_INSTALL_DOCDIR}
57 FILES_MATCHING PATTERN "*.html")
58
59 install(DIRECTORY ${manual_dir}/
60 DESTINATION ${CMAKE_INSTALL_INFODIR}
61 FILES_MATCHING PATTERN "*.info*")
62
63 install(FILES ${CMAKE_CURRENT_BINARY_DIR}/halibut.1
64 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
65
66 endif()
+0
-24
doc/Makefile less more
0 mandir=$(prefix)/man
1 man1dir=$(mandir)/man1
2
3 CHAPTERS := $(SITE) blurb intro running input output licence manpage index
4
5 INPUTS = $(patsubst %,%.but,$(CHAPTERS))
6
7 HALIBUT = ../build/halibut
8
9 all: index.html halibut.1
10
11 index.html: $(INPUTS) $(HALIBUT)
12 $(HALIBUT) --text=halibut.txt --html --info=halibut.info \
13 --ps=halibut.ps --pdf=halibut.pdf --chm=halibut.chm $(INPUTS)
14
15 halibut.1: manpage.but
16 $(HALIBUT) --man=halibut.1 manpage.but
17
18 install:
19 mkdir -p $(man1dir)
20 $(INSTALL) -m 644 halibut.1 $(man1dir)/halibut.1
21
22 clean:
23 rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.chm
00 \A{licence} Halibut Licence
11
2 Halibut is copyright (c) 1999-2017 Simon Tatham.
2 Halibut is copyright (c) 1999-2021 Simon Tatham.
33
44 Permission is hereby granted, free of charge, to any person
55 obtaining a copy of this software and associated documentation files
157157
158158 This man page isn't terribly complete.
159159
160 \versionid Halibut version 1.2
160 \versionid Halibut version 1.3
+176
-112
error.c less more
4141 exit(EXIT_FAILURE);
4242 }
4343
44 void err_optnoarg(const char *sp)
45 {
44 void err_optnoarg(errorstate *es, const char *sp)
45 {
46 es->fatal = true;
4647 do_error(NULL, "option `-%s' requires an argument", sp);
4748 }
4849
49 void err_nosuchopt(const char *sp)
50 {
50 void err_nosuchopt(errorstate *es, const char *sp)
51 {
52 es->fatal = true;
5153 do_error(NULL, "unrecognised option `-%s'", sp);
5254 }
5355
54 void err_cmdcharset(const char *sp)
55 {
56 void err_cmdcharset(errorstate *es, const char *sp)
57 {
58 es->fatal = true;
5659 do_error(NULL, "character set `%s' not recognised", sp);
5760 }
5861
59 void err_futileopt(const char *sp, const char *sp2)
62 void err_futileopt(errorstate *es, const char *sp, const char *sp2)
6063 {
6164 do_error(NULL, "warning: option `-%s' has no effect%s", sp, sp2);
6265 }
6366
64 void err_noinput(void)
65 {
67 void err_noinput(errorstate *es)
68 {
69 es->fatal = true;
6670 do_error(NULL, "no input files");
6771 }
6872
69 void err_cantopen(const char *sp)
70 {
73 void err_cantopen(errorstate *es, const char *sp)
74 {
75 es->fatal = true;
7176 do_error(NULL, "unable to open input file `%s'", sp);
7277 }
7378
74 void err_nodata(void)
75 {
79 void err_nodata(errorstate *es)
80 {
81 es->fatal = true;
7682 do_error(NULL, "no data in input files");
7783 }
7884
79 void err_brokencodepara(const filepos *fpos)
80 {
85 void err_zerochar(errorstate *es, const filepos *fpos)
86 {
87 es->fatal = true;
88 do_error(fpos, "the Unicode zero character is not permitted in input");
89 }
90
91 void err_brokencodepara(errorstate *es, const filepos *fpos)
92 {
93 es->fatal = true;
8194 do_error(fpos, "every line of a code paragraph should begin `\\c'");
8295 }
8396
84 void err_kwunclosed(const filepos *fpos)
85 {
97 void err_kwunclosed(errorstate *es, const filepos *fpos)
98 {
99 es->fatal = true;
86100 do_error(fpos, "expected `}' after paragraph keyword");
87101 }
88102
89 void err_kwexpected(const filepos *fpos)
90 {
103 void err_kwexpected(errorstate *es, const filepos *fpos)
104 {
105 es->fatal = true;
91106 do_error(fpos, "expected a paragraph keyword");
92107 }
93108
94 void err_kwillegal(const filepos *fpos)
95 {
109 void err_kwillegal(errorstate *es, const filepos *fpos)
110 {
111 es->fatal = true;
96112 do_error(fpos, "expected no paragraph keyword");
97113 }
98114
99 void err_kwtoomany(const filepos *fpos)
100 {
115 void err_kwtoomany(errorstate *es, const filepos *fpos)
116 {
117 es->fatal = true;
101118 do_error(fpos, "expected only one paragraph keyword");
102119 }
103120
104 void err_bodyillegal(const filepos *fpos)
105 {
121 void err_bodyillegal(errorstate *es, const filepos *fpos)
122 {
123 es->fatal = true;
106124 do_error(fpos, "expected no text after paragraph keyword");
107125 }
108126
109 void err_badparatype(const wchar_t *wsp, const filepos *fpos)
110 {
127 void err_badparatype(errorstate *es, const wchar_t *wsp, const filepos *fpos)
128 {
129 es->fatal = true;
111130 char *sp = utoa_locale_dup(wsp);
112131 do_error(fpos, "command `%s' unrecognised at start of paragraph", sp);
113132 sfree(sp);
114133 }
115134
116 void err_badmidcmd(const wchar_t *wsp, const filepos *fpos)
117 {
135 void err_badmidcmd(errorstate *es, const wchar_t *wsp, const filepos *fpos)
136 {
137 es->fatal = true;
118138 char *sp = utoa_locale_dup(wsp);
119139 do_error(fpos, "command `%s' unexpected in mid-paragraph", sp);
120140 sfree(sp);
121141 }
122142
123 void err_unexbrace(const filepos *fpos)
124 {
143 void err_unexbrace(errorstate *es, const filepos *fpos)
144 {
145 es->fatal = true;
125146 do_error(fpos, "brace character unexpected in mid-paragraph");
126147 }
127148
128 void err_explbr(const filepos *fpos)
129 {
149 void err_explbr(errorstate *es, const filepos *fpos)
150 {
151 es->fatal = true;
130152 do_error(fpos, "expected `{' after command");
131153 }
132154
133 void err_commenteof(const filepos *fpos)
134 {
155 void err_commenteof(errorstate *es, const filepos *fpos)
156 {
157 es->fatal = true;
135158 do_error(fpos, "end of file unexpected inside `\\#{...}' comment");
136159 }
137160
138 void err_kwexprbr(const filepos *fpos)
139 {
161 void err_kwexprbr(errorstate *es, const filepos *fpos)
162 {
163 es->fatal = true;
140164 do_error(fpos, "expected `}' after cross-reference");
141165 }
142166
143 void err_codequote(const filepos *fpos)
144 {
167 void err_codequote(errorstate *es, const filepos *fpos)
168 {
169 es->fatal = true;
145170 do_error(fpos, "unable to nest \\q{...} within \\c{...} or \\cw{...}");
146171 }
147172
148 void err_missingrbrace(const filepos *fpos)
149 {
173 void err_missingrbrace(errorstate *es, const filepos *fpos)
174 {
175 es->fatal = true;
150176 do_error(fpos, "unclosed braces at end of paragraph");
151177 }
152178
153 void err_missingrbrace2(const filepos *fpos)
154 {
179 void err_missingrbrace2(errorstate *es, const filepos *fpos)
180 {
181 es->fatal = true;
155182 do_error(fpos, "unclosed braces at end of input file");
156183 }
157184
158 void err_nestedstyles(const filepos *fpos)
159 {
185 void err_nestedstyles(errorstate *es, const filepos *fpos)
186 {
187 es->fatal = true;
160188 do_error(fpos, "unable to nest text styles");
161189 }
162190
163 void err_nestedindex(const filepos *fpos)
164 {
191 void err_nestedindex(errorstate *es, const filepos *fpos)
192 {
193 es->fatal = true;
165194 do_error(fpos, "unable to nest index markings");
166195 }
167196
168 void err_indexcase(const filepos *fpos, const wchar_t *wsp,
197 void err_indexcase(errorstate *es, const filepos *fpos, const wchar_t *wsp,
169198 const filepos *fpos2, const wchar_t *wsp2)
170199 {
171200 char *sp = utoa_locale_dup(wsp), *sp2 = utoa_locale_dup(wsp2);
176205 sfree(sp2);
177206 }
178207
179 void err_nosuchkw(const filepos *fpos, const wchar_t *wsp)
180 {
208 void err_nosuchkw(errorstate *es, const filepos *fpos, const wchar_t *wsp)
209 {
210 es->fatal = true;
181211 char *sp = utoa_locale_dup(wsp);
182212 do_error(fpos, "unable to resolve cross-reference to `%s'", sp);
183213 sfree(sp);
184214 }
185215
186 void err_multiBR(const filepos *fpos, const wchar_t *wsp)
187 {
216 void err_multiBR(errorstate *es, const filepos *fpos, const wchar_t *wsp)
217 {
218 es->fatal = true;
188219 char *sp = utoa_locale_dup(wsp);
189220 do_error(fpos, "multiple `\\BR' entries given for `%s'", sp);
190221 sfree(sp);
191222 }
192223
193 void err_nosuchidxtag(const filepos *fpos, const wchar_t *wsp)
194 {
224 void err_nosuchidxtag(errorstate *es, const filepos *fpos, const wchar_t *wsp)
225 {
226 es->fatal = true;
195227 char *sp = utoa_locale_dup(wsp);
196228 do_error(fpos, "`\\IM' on unknown index tag `%s'", sp);
197229 sfree(sp);
198230 }
199231
200 void err_cantopenw(const char *sp)
201 {
232 void err_cantopenw(errorstate *es, const char *sp)
233 {
234 es->fatal = true;
202235 do_error(NULL, "unable to open output file `%s'", sp);
203236 }
204237
205 void err_macroexists(const filepos *fpos, const wchar_t *wsp)
206 {
238 void err_macroexists(errorstate *es, const filepos *fpos, const wchar_t *wsp)
239 {
240 es->fatal = true;
207241 char *sp = utoa_locale_dup(wsp);
208242 do_error(fpos, "macro `%s' already defined", sp);
209243 sfree(sp);
210244 }
211245
212 void err_sectjump(const filepos *fpos)
213 {
246 void err_sectjump(errorstate *es, const filepos *fpos)
247 {
248 es->fatal = true;
214249 do_error(fpos, "expected higher heading levels before this one");
215250 }
216251
217 void err_winhelp_ctxclash(const filepos *fpos, const char *sp, const char *sp2)
218 {
252 void err_winhelp_ctxclash(errorstate *es, const filepos *fpos,
253 const char *sp, const char *sp2)
254 {
255 es->fatal = true;
219256 do_error(fpos, "Windows Help context id `%s' clashes with "
220257 "previously defined `%s'", sp, sp2);
221258 }
222259
223 void err_multikw(const filepos *fpos, const filepos *fpos2, const wchar_t *wsp)
224 {
260 void err_multikw(errorstate *es, const filepos *fpos, const filepos *fpos2,
261 const wchar_t *wsp)
262 {
263 es->fatal = true;
225264 char *sp = utoa_locale_dup(wsp);
226265 do_error(fpos, "paragraph keyword `%s' already defined at %s:%d",
227266 sp, fpos2->filename, fpos2->line);
228267 sfree(sp);
229268 }
230269
231 void err_misplacedlcont(const filepos *fpos)
232 {
270 void err_misplacedlcont(errorstate *es, const filepos *fpos)
271 {
272 es->fatal = true;
233273 do_error(fpos, "\\lcont is only expected after a list item");
234274 }
235275
236 void err_sectmarkerinblock(const filepos *fpos, const char *sp)
237 {
276 void err_sectmarkerinblock(errorstate *es, const filepos *fpos, const char *sp)
277 {
278 es->fatal = true;
238279 do_error(fpos, "section headings are not supported within \\%s", sp);
239280 }
240281
241 void err_cfginsufarg(const filepos *fpos, const char *sp, int i)
242 {
282 void err_cfginsufarg(errorstate *es, const filepos *fpos, const char *sp,
283 int i)
284 {
285 es->fatal = true;
243286 do_error(fpos, "\\cfg{%s} expects at least %d parameter%s",
244287 sp, i, (i==1)?"":"s");
245288 }
246289
247 void err_infonodechar(const filepos *fpos, char c) /* fpos might be NULL */
248 {
290 void err_infonodechar(errorstate *es, const filepos *fpos, char c)
291 /* fpos might be NULL */
292 {
293 es->fatal = true;
249294 do_error(fpos, "info output format does not support '%c' in"
250295 " node names; removing", c);
251296 }
252297
253 void err_text_codeline(const filepos *fpos, int i, int j)
298 void err_text_codeline(errorstate *es, const filepos *fpos, int i, int j)
254299 {
255300 do_error(fpos, "warning: code paragraph line is %d chars wide, wider"
256301 " than body width %d", i, j);
257302 }
258303
259 void err_htmlver(const filepos *fpos, const wchar_t *wsp)
260 {
304 void err_htmlver(errorstate *es, const filepos *fpos, const wchar_t *wsp)
305 {
306 es->fatal = true;
261307 char *sp = utoa_locale_dup(wsp);
262308 do_error(fpos, "unrecognised HTML version keyword `%s'", sp);
263309 sfree(sp);
264310 }
265311
266 void err_charset(const filepos *fpos, const wchar_t *wsp)
267 {
312 void err_charset(errorstate *es, const filepos *fpos, const wchar_t *wsp)
313 {
314 es->fatal = true;
268315 char *sp = utoa_locale_dup(wsp);
269316 do_error(fpos, "character set `%s' not recognised", sp);
270317 sfree(sp);
271318 }
272319
273 void err_nofont(const filepos *fpos, const wchar_t *wsp)
274 {
320 void err_nofont(errorstate *es, const filepos *fpos, const wchar_t *wsp)
321 {
322 es->fatal = true;
275323 char *sp = utoa_locale_dup(wsp);
276324 do_error(fpos, "font `%s' not recognised", sp);
277325 sfree(sp);
278326 }
279327
280 void err_afmeof(const filepos *fpos)
281 {
328 void err_afmeof(errorstate *es, const filepos *fpos)
329 {
330 es->fatal = true;
282331 do_error(fpos, "AFM file ended unexpectedly");
283332 }
284333
285 void err_afmkey(const filepos *fpos, const char *sp)
286 {
334 void err_afmkey(errorstate *es, const filepos *fpos, const char *sp)
335 {
336 es->fatal = true;
287337 do_error(fpos, "required AFM key '%s' missing", sp);
288338 }
289339
290 void err_afmvers(const filepos *fpos)
291 {
340 void err_afmvers(errorstate *es, const filepos *fpos)
341 {
342 es->fatal = true;
292343 do_error(fpos, "unsupported AFM version");
293344 }
294345
295 void err_afmval(const filepos *fpos, const char *sp, int i)
296 {
346 void err_afmval(errorstate *es, const filepos *fpos, const char *sp, int i)
347 {
348 es->fatal = true;
297349 if (i == 1)
298350 do_error(fpos, "AFM key '%s' requires a value", sp);
299351 else
300352 do_error(fpos, "AFM key '%s' requires %d values", sp, i);
301353 }
302354
303 void err_pfeof(const filepos *fpos)
304 {
355 void err_pfeof(errorstate *es, const filepos *fpos)
356 {
357 es->fatal = true;
305358 do_error(fpos, "Type 1 font file ended unexpectedly");
306359 }
307360
308 void err_pfhead(const filepos *fpos)
309 {
361 void err_pfhead(errorstate *es, const filepos *fpos)
362 {
363 es->fatal = true;
310364 do_error(fpos, "Type 1 font file header line invalid");
311365 }
312366
313 void err_pfbad(const filepos *fpos)
314 {
367 void err_pfbad(errorstate *es, const filepos *fpos)
368 {
369 es->fatal = true;
315370 do_error(fpos, "Type 1 font file invalid");
316371 }
317372
318 void err_pfnoafm(const filepos *fpos, const char *sp)
319 {
373 void err_pfnoafm(errorstate *es, const filepos *fpos, const char *sp)
374 {
375 es->fatal = true;
320376 do_error(fpos, "no metrics available for Type 1 font '%s'", sp);
321377 }
322378
323 void err_chmnames(void)
324 {
379 void err_chmnames(errorstate *es)
380 {
381 es->fatal = true;
325382 do_error(NULL, "only one of html-mshtmlhelp-chm and "
326383 "html-mshtmlhelp-hhp found");
327384 }
328385
329 void err_sfntnotable(const filepos *fpos, const char *sp)
330 {
386 void err_sfntnotable(errorstate *es, const filepos *fpos, const char *sp)
387 {
388 es->fatal = true;
331389 do_error(fpos, "font has no '%s' table", sp);
332390 }
333391
334 void err_sfntnopsname(const filepos *fpos)
335 {
392 void err_sfntnopsname(errorstate *es, const filepos *fpos)
393 {
394 es->fatal = true;
336395 do_error(fpos, "font has no PostScript name");
337396 }
338397
339 void err_sfntbadtable(const filepos *fpos, const char *sp)
340 {
398 void err_sfntbadtable(errorstate *es, const filepos *fpos, const char *sp)
399 {
400 es->fatal = true;
341401 do_error(fpos, "font has an invalid '%s' table", sp);
342402 }
343403
344 void err_sfntnounicmap(const filepos *fpos)
345 {
404 void err_sfntnounicmap(errorstate *es, const filepos *fpos)
405 {
406 es->fatal = true;
346407 do_error(fpos, "font has no UCS-2 character map");
347408 }
348409
349 void err_sfnttablevers(const filepos *fpos, const char *sp)
350 {
410 void err_sfnttablevers(errorstate *es, const filepos *fpos, const char *sp)
411 {
412 es->fatal = true;
351413 do_error(fpos, "font has an unsupported '%s' table version", sp);
352414 }
353415
354 void err_sfntbadhdr(const filepos *fpos)
355 {
416 void err_sfntbadhdr(errorstate *es, const filepos *fpos)
417 {
418 es->fatal = true;
356419 do_error(fpos, "font has an invalid header");
357420 }
358421
359 void err_sfntbadglyph(const filepos *fpos, unsigned wc)
422 void err_sfntbadglyph(errorstate *es, const filepos *fpos, unsigned wc)
360423 {
361424 do_error(fpos,
362425 "warning: character U+%04X references a non-existent glyph",
363426 wc);
364427 }
365428
366 void err_chm_badname(const filepos *fpos, const char *sp)
367 {
429 void err_chm_badname(errorstate *es, const filepos *fpos, const char *sp)
430 {
431 es->fatal = true;
368432 do_error(fpos, "CHM internal file name `%s' begins with"
369433 " a reserved character", sp);
370434 }
44 #include <wchar.h>
55 #include <time.h>
66 #include <string.h>
7 #include <stdbool.h>
8
9 #ifdef BOOLIFY
10 # include "boolify.h"
11 #endif
712
813 #include "charset.h"
914
1116 #define NORETURN __attribute__((__noreturn__))
1217 #else
1318 #define NORETURN /* nothing */
14 #endif
15
16 #ifndef TRUE
17 #define TRUE 1
18 #endif
19 #ifndef FALSE
20 #define FALSE 0
2119 #endif
2220
2321 /* For suppressing unused-parameter warnings */
3937 typedef struct indextag_Tag indextag;
4038 typedef struct indexentry_Tag indexentry;
4139 typedef struct macrostack_Tag macrostack;
40 typedef struct errorstate_Tag errorstate;
41 typedef struct psdata_Tag psdata;
4242
4343 /*
4444 * Data structure to hold a file name and index, a line and a
6161 int nfiles; /* how many in the list */
6262 FILE *currfp; /* the currently open one */
6363 int currindex; /* which one is that in the list */
64 int wantclose; /* does the current file want closing */
64 bool wantclose; /* does the current file want closing */
6565 pushback *pushback; /* pushed-back input characters */
6666 int npushback, pushbacksize;
6767 filepos pos;
68 int reportcols; /* report column numbers in errors */
68 bool reportcols; /* report column numbers in errors */
6969 macrostack *stack; /* macro expansions in force */
7070 int defcharset, charset; /* character sets for input files */
7171 charset_state csstate;
7272 wchar_t wc[16]; /* wide chars from input conversion */
7373 int nwc, wcpos; /* size of, and position in, wc[] */
7474 char *pushback_chars; /* used to save input-encoding data */
75 errorstate *es;
7576 };
7677
7778 /*
135136 word *next, *alt;
136137 int type;
137138 int aux;
138 int breaks; /* can a line break after it? */
139 bool breaks; /* can a line break after it? */
139140 wchar_t *text;
140141 filepos fpos;
141142
204205 /*
205206 * error.c
206207 */
208 struct errorstate_Tag {
209 bool fatal;
210 };
207211 /* out of memory */
208212 void fatalerr_nomemory(void) NORETURN;
209213 /* option `-%s' requires an argument */
210 void err_optnoarg(const char *sp);
214 void err_optnoarg(errorstate *es, const char *sp);
211215 /* unrecognised option `-%s' */
212 void err_nosuchopt(const char *sp);
216 void err_nosuchopt(errorstate *es, const char *sp);
213217 /* unrecognised charset %s (cmdline) */
214 void err_cmdcharset(const char *sp);
218 void err_cmdcharset(errorstate *es, const char *sp);
215219 /* futile option `-%s'%s */
216 void err_futileopt(const char *sp, const char *sp2);
220 void err_futileopt(errorstate *es, const char *sp, const char *sp2);
217221 /* no input files */
218 void err_noinput(void);
222 void err_noinput(errorstate *es);
219223 /* unable to open input file `%s' */
220 void err_cantopen(const char *sp);
224 void err_cantopen(errorstate *es, const char *sp);
221225 /* no data in input files */
222 void err_nodata(void);
226 void err_nodata(errorstate *es);
227 /* unexpected zero character in input file */
228 void err_zerochar(errorstate *es, const filepos *fpos);
223229 /* line in codepara didn't begin `\c' */
224 void err_brokencodepara(const filepos *fpos);
230 void err_brokencodepara(errorstate *es, const filepos *fpos);
225231 /* expected `}' after keyword */
226 void err_kwunclosed(const filepos *fpos);
232 void err_kwunclosed(errorstate *es, const filepos *fpos);
227233 /* paragraph type expects no keyword */
228 void err_kwexpected(const filepos *fpos);
234 void err_kwexpected(errorstate *es, const filepos *fpos);
229235 /* paragraph type expects a keyword */
230 void err_kwillegal(const filepos *fpos);
236 void err_kwillegal(errorstate *es, const filepos *fpos);
231237 /* paragraph type expects only 1 */
232 void err_kwtoomany(const filepos *fpos);
238 void err_kwtoomany(errorstate *es, const filepos *fpos);
233239 /* paragraph type expects only kws! */
234 void err_bodyillegal(const filepos *fpos);
240 void err_bodyillegal(errorstate *es, const filepos *fpos);
235241 /* invalid command at start of para */
236 void err_badparatype(const wchar_t *wsp, const filepos *fpos);
242 void err_badparatype(errorstate *es, const wchar_t *wsp, const filepos *fpos);
237243 /* invalid command in mid-para */
238 void err_badmidcmd(const wchar_t *wsp, const filepos *fpos);
244 void err_badmidcmd(errorstate *es, const wchar_t *wsp, const filepos *fpos);
239245 /* unexpected brace */
240 void err_unexbrace(const filepos *fpos);
246 void err_unexbrace(errorstate *es, const filepos *fpos);
241247 /* expected `{' after command */
242 void err_explbr(const filepos *fpos);
248 void err_explbr(errorstate *es, const filepos *fpos);
243249 /* EOF inside braced comment */
244 void err_commenteof(const filepos *fpos);
250 void err_commenteof(errorstate *es, const filepos *fpos);
245251 /* expected `}' after cross-ref */
246 void err_kwexprbr(const filepos *fpos);
252 void err_kwexprbr(errorstate *es, const filepos *fpos);
247253 /* \q within \c is not supported */
248 void err_codequote(const filepos *fpos);
254 void err_codequote(errorstate *es, const filepos *fpos);
249255 /* unclosed braces at end of para */
250 void err_missingrbrace(const filepos *fpos);
256 void err_missingrbrace(errorstate *es, const filepos *fpos);
251257 /* unclosed braces at end of file */
252 void err_missingrbrace2(const filepos *fpos);
258 void err_missingrbrace2(errorstate *es, const filepos *fpos);
253259 /* unable to nest text styles */
254 void err_nestedstyles(const filepos *fpos);
260 void err_nestedstyles(errorstate *es, const filepos *fpos);
255261 /* unable to nest `\i' thingys */
256 void err_nestedindex(const filepos *fpos);
262 void err_nestedindex(errorstate *es, const filepos *fpos);
257263 /* two \i differing only in case */
258 void err_indexcase(const filepos *fpos, const wchar_t *wsp,
264 void err_indexcase(errorstate *es, const filepos *fpos, const wchar_t *wsp,
259265 const filepos *fpos2, const wchar_t *wsp2);
260266 /* unresolved cross-reference */
261 void err_nosuchkw(const filepos *fpos, const wchar_t *wsp);
267 void err_nosuchkw(errorstate *es, const filepos *fpos, const wchar_t *wsp);
262268 /* multiple \BRs on same keyword */
263 void err_multiBR(const filepos *fpos, const wchar_t *wsp);
269 void err_multiBR(errorstate *es, const filepos *fpos, const wchar_t *wsp);
264270 /* \IM on unknown index tag (warning) */
265 void err_nosuchidxtag(const filepos *fpos, const wchar_t *wsp);
271 void err_nosuchidxtag(errorstate *es, const filepos *fpos,
272 const wchar_t *wsp);
266273 /* can't open output file for write */
267 void err_cantopenw(const char *sp);
274 void err_cantopenw(errorstate *es, const char *sp);
268275 /* this macro already exists */
269 void err_macroexists(const filepos *fpos, const wchar_t *wsp);
276 void err_macroexists(errorstate *es, const filepos *fpos, const wchar_t *wsp);
270277 /* jump a heading level, eg \C -> \S */
271 void err_sectjump(const filepos *fpos);
278 void err_sectjump(errorstate *es, const filepos *fpos);
272279 /* WinHelp context ID hash clash */
273 void err_winhelp_ctxclash(const filepos *fpos, const char *sp, const char *sp2);
280 void err_winhelp_ctxclash(errorstate *es, const filepos *fpos,
281 const char *sp, const char *sp2);
274282 /* keyword clash in sections */
275 void err_multikw(const filepos *fpos, const filepos *fpos2, const wchar_t *wsp);
283 void err_multikw(errorstate *es, const filepos *fpos, const filepos *fpos2,
284 const wchar_t *wsp);
276285 /* \lcont not after a list item */
277 void err_misplacedlcont(const filepos *fpos);
286 void err_misplacedlcont(errorstate *es, const filepos *fpos);
278287 /* section marker appeared in block */
279 void err_sectmarkerinblock(const filepos *fpos, const char *sp);
288 void err_sectmarkerinblock(errorstate *es, const filepos *fpos,
289 const char *sp);
280290 /* \cfg{%s} insufficient args (<%d) */
281 void err_cfginsufarg(const filepos *fpos, const char *sp, int i);
291 void err_cfginsufarg(errorstate *es, const filepos *fpos, const char *sp,
292 int i);
282293 /* colon/comma in node name in info */
283 void err_infonodechar(const filepos *fpos, char c) /* fpos might be NULL */;
294 void err_infonodechar(errorstate *es, const filepos *fpos, char c)
295 /* fpos might be NULL */;
284296 /* \c line too long in text backend */
285 void err_text_codeline(const filepos *fpos, int i, int j);
297 void err_text_codeline(errorstate *es, const filepos *fpos, int i, int j);
286298 /* unrecognised HTML version keyword */
287 void err_htmlver(const filepos *fpos, const wchar_t *wsp);
299 void err_htmlver(errorstate *es, const filepos *fpos, const wchar_t *wsp);
288300 /* unrecognised character set name */
289 void err_charset(const filepos *fpos, const wchar_t *wsp);
301 void err_charset(errorstate *es, const filepos *fpos, const wchar_t *wsp);
290302 /* unrecognised font name */
291 void err_nofont(const filepos *fpos, const wchar_t *wsp);
303 void err_nofont(errorstate *es, const filepos *fpos, const wchar_t *wsp);
292304 /* eof in AFM file */
293 void err_afmeof(const filepos *fpos);
305 void err_afmeof(errorstate *es, const filepos *fpos);
294306 /* missing expected keyword in AFM */
295 void err_afmkey(const filepos *fpos, const char *sp);
307 void err_afmkey(errorstate *es, const filepos *fpos, const char *sp);
296308 /* unsupported AFM version */
297 void err_afmvers(const filepos *fpos);
309 void err_afmvers(errorstate *es, const filepos *fpos);
298310 /* missing value(s) for AFM key */
299 void err_afmval(const filepos *fpos, const char *sp, int i);
311 void err_afmval(errorstate *es, const filepos *fpos, const char *sp, int i);
300312 /* eof in Type 1 font file */
301 void err_pfeof(const filepos *fpos);
313 void err_pfeof(errorstate *es, const filepos *fpos);
302314 /* bad Type 1 header line */
303 void err_pfhead(const filepos *fpos);
315 void err_pfhead(errorstate *es, const filepos *fpos);
304316 /* otherwise invalide Type 1 font */
305 void err_pfbad(const filepos *fpos);
317 void err_pfbad(errorstate *es, const filepos *fpos);
306318 /* Type 1 font but no AFM */
307 void err_pfnoafm(const filepos *fpos, const char *sp);
319 void err_pfnoafm(errorstate *es, const filepos *fpos, const char *sp);
308320 /* need both or neither of hhp+chm */
309 void err_chmnames(void);
321 void err_chmnames(errorstate *es);
310322 /* required sfnt table missing */
311 void err_sfntnotable(const filepos *fpos, const char *sp);
323 void err_sfntnotable(errorstate *es, const filepos *fpos, const char *sp);
312324 /* sfnt has no PostScript name */
313 void err_sfntnopsname(const filepos *fpos);
325 void err_sfntnopsname(errorstate *es, const filepos *fpos);
314326 /* sfnt table not valid */
315 void err_sfntbadtable(const filepos *fpos, const char *sp);
327 void err_sfntbadtable(errorstate *es, const filepos *fpos, const char *sp);
316328 /* sfnt has no UCS-2 cmap */
317 void err_sfntnounicmap(const filepos *fpos);
329 void err_sfntnounicmap(errorstate *es, const filepos *fpos);
318330 /* sfnt table version unknown */
319 void err_sfnttablevers(const filepos *fpos, const char *sp);
331 void err_sfnttablevers(errorstate *es, const filepos *fpos, const char *sp);
320332 /* sfnt has bad header */
321 void err_sfntbadhdr(const filepos *fpos);
333 void err_sfntbadhdr(errorstate *es, const filepos *fpos);
322334 /* sfnt cmap references bad glyph */
323 void err_sfntbadglyph(const filepos *fpos, unsigned wc);
335 void err_sfntbadglyph(errorstate *es, const filepos *fpos, unsigned wc);
324336 /* CHM internal file names can't start with # or $ */
325 void err_chm_badname(const filepos *fpos, const char *sp);
337 void err_chm_badname(errorstate *es, const filepos *fpos, const char *sp);
326338
327339 /*
328340 * malloc.c
368380 wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source);
369381 wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n);
370382 wchar_t utolower(wchar_t);
371 int uisalpha(wchar_t);
383 bool uisalpha(wchar_t);
372384 int ustrcmp(wchar_t *lhs, wchar_t *rhs);
373385 int ustricmp(wchar_t const *lhs, wchar_t const *rhs);
374386 int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen);
375387 int utoi(wchar_t const *);
376388 double utof(wchar_t const *);
377 int utob(wchar_t const *);
378 int uisdigit(wchar_t);
389 bool utob(wchar_t const *);
390 bool uisdigit(wchar_t);
379391 wchar_t *ustrlow(wchar_t *s);
380392 wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec);
381 int cvt_ok(int charset, const wchar_t *s);
382 int charset_from_ustr(filepos *fpos, const wchar_t *name);
393 bool cvt_ok(int charset, const wchar_t *s);
394 int charset_from_ustr(filepos *fpos, const wchar_t *name, errorstate *);
383395
384396 /*
385397 * wcwidth.c
433445 void rdadds(rdstring *rs, wchar_t const *p);
434446 wchar_t *rdtrim(rdstring *rs);
435447 void rdaddc(rdstringc *rs, char c);
448 void rdaddc_rep(rdstringc *rs, char c, int repeat);
436449 void rdaddsc(rdstringc *rs, char const *p);
437450 void rdaddsn(rdstringc *rc, char const *p, int len);
438451 char *rdtrimc(rdstringc *rs);
454467 paragraph *cmdline_cfg_new(void);
455468 paragraph *cmdline_cfg_simple(char *string, ...);
456469
470 time_t current_time(void); /* use in place of time(NULL) */
471
457472 /*
458473 * input.c
459474 */
460 paragraph *read_input(input *in, indexdata *idx);
475 paragraph *read_input(input *in, indexdata *idx, psdata *psd);
461476
462477 /*
463478 * in_afm.c
464479 */
465 void read_afm_file(input *in);
480 void read_afm_file(input *in, psdata *psd);
466481
467482 /*
468483 * in_pf.c
469484 */
470 void read_pfa_file(input *in);
471 void read_pfb_file(input *in);
485 void read_pfa_file(input *in, psdata *psd);
486 void read_pfb_file(input *in, psdata *psd);
472487
473488 /*
474489 * in_sfnt.c
475490 */
476 void read_sfnt_file(input *in);
491 void read_sfnt_file(input *in, psdata *psd);
477492
478493 /*
479494 * keywords.c
493508 paragraph *para; /* the paragraph referenced */
494509 };
495510 keyword *kw_lookup(keywordlist *, wchar_t *);
496 keywordlist *get_keywords(paragraph *);
511 keywordlist *get_keywords(paragraph *, errorstate *);
497512 void free_keywords(keywordlist *);
498 void subst_keywords(paragraph *, keywordlist *);
513 void subst_keywords(paragraph *, keywordlist *, errorstate *);
499514
500515 /*
501516 * index.c
536551 void cleanup_index(indexdata *);
537552 /* index_merge takes responsibility for freeing arg 3 iff implicit; never
538553 * takes responsibility for arg 2 */
539 void index_merge(indexdata *, int is_explicit, wchar_t *, word *, filepos *);
554 void index_merge(indexdata *, bool is_explicit, wchar_t *, word *, filepos *,
555 errorstate *es);
540556 void build_index(indexdata *);
541557 void index_debug(indexdata *);
542558 indextag *index_findtag(indexdata *idx, wchar_t *name);
546562 */
547563 numberstate *number_init(void);
548564 void number_cfg(numberstate *, paragraph *);
549 word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, int *);
565 word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, bool *,
566 errorstate *es);
550567 void number_free(numberstate *);
551568
552569 /*
553570 * biblio.c
554571 */
555 void gen_citations(paragraph *, keywordlist *);
572 void gen_citations(paragraph *, keywordlist *, errorstate *);
556573
557574 /*
558575 * bk_text.c
559576 */
560 void text_backend(paragraph *, keywordlist *, indexdata *, void *);
577 void text_backend(paragraph *, keywordlist *, indexdata *, void *,
578 errorstate *);
561579 paragraph *text_config_filename(char *filename);
562580
563581 /*
564582 * bk_html.c
565583 */
566 void html_backend(paragraph *, keywordlist *, indexdata *, void *);
567 void chm_backend(paragraph *, keywordlist *, indexdata *, void *);
584 void html_backend(paragraph *, keywordlist *, indexdata *, void *,
585 errorstate *);
586 void chm_backend(paragraph *, keywordlist *, indexdata *, void *,
587 errorstate *);
568588 paragraph *html_config_filename(char *filename);
569589 paragraph *chm_config_filename(char *filename);
570590
571591 /*
572592 * bk_whlp.c
573593 */
574 void whlp_backend(paragraph *, keywordlist *, indexdata *, void *);
594 void whlp_backend(paragraph *, keywordlist *, indexdata *, void *,
595 errorstate *);
575596 paragraph *whlp_config_filename(char *filename);
576597
577598 /*
578599 * bk_man.c
579600 */
580 void man_backend(paragraph *, keywordlist *, indexdata *, void *);
601 void man_backend(paragraph *, keywordlist *, indexdata *, void *,
602 errorstate *);
581603 paragraph *man_config_filename(char *filename);
582604
583605 /*
584606 * bk_info.c
585607 */
586 void info_backend(paragraph *, keywordlist *, indexdata *, void *);
608 void info_backend(paragraph *, keywordlist *, indexdata *, void *,
609 errorstate *);
587610 paragraph *info_config_filename(char *filename);
588611
589612 /*
590613 * bk_paper.c
591614 */
592 void *paper_pre_backend(paragraph *, keywordlist *, indexdata *);
593 void listfonts(void);
615 void *paper_pre_backend(paragraph *, keywordlist *, indexdata *, psdata *,
616 errorstate *);
617 void listfonts(psdata *);
594618
595619 /*
596620 * bk_ps.c
597621 */
598 void ps_backend(paragraph *, keywordlist *, indexdata *, void *);
622 void ps_backend(paragraph *, keywordlist *, indexdata *, void *,
623 errorstate *);
599624 paragraph *ps_config_filename(char *filename);
600625
601626 /*
602627 * bk_pdf.c
603628 */
604 void pdf_backend(paragraph *, keywordlist *, indexdata *, void *);
629 void pdf_backend(paragraph *, keywordlist *, indexdata *, void *,
630 errorstate *);
605631 paragraph *pdf_config_filename(char *filename);
606632
607633 #endif
273273 * ----------------------------------
274274 * maxprob - nactivesyms
275275 *
276 * rounded up, of course. And we'll only even be trying
277 * this if
276 * rounded up, of course. And we'll only even be trying this if
277 * smallestfreq <= totalfreq / maxprob, which is precisely the
278 * condition under which the numerator of this fraction is
279 * positive.
280 *
281 * (As for the denominator, that could only be negative if there
282 * were more than F_{n+2} symbols overall, in which case it
283 * _wouldn't_ be possible to avoid having a symbol with
284 * probability at most 1/F_{n+2}. So that is a constraint on the
285 * input parameters to this function, which we enforce by
286 * assertion.)
278287 */
279288 num = totalfreq - smallestfreq * maxprob;
280289 denom = maxprob - nactivesyms;
290 assert(num > 0); /* this just restates the assert above */
291 assert(denom > 0); /* this is a constraint on the function parameters */
281292 adjust = (num + denom - 1) / denom;
282293
283294 /*
1212 in->pos.line++;
1313 c = getc(in->currfp);
1414 if (c == EOF) {
15 err_afmeof(&in->pos);
15 err_afmeof(in->es, &in->pos);
1616 return NULL;
1717 }
1818 line = snewn(len, char);
3838 return line;
3939 }
4040
41 static int afm_require_key(char *line, char const *expected, input *in) {
41 static bool afm_require_key(char *line, char const *expected, input *in) {
4242 char *key = strtok(line, " \t");
4343
4444 if (strcmp(key, expected) == 0)
45 return TRUE;
46 err_afmkey(&in->pos, expected);
47 return FALSE;
45 return true;
46 err_afmkey(in->es, &in->pos, expected);
47 return false;
4848 }
4949
50 void read_afm_file(input *in) {
50 void read_afm_file(input *in, psdata *psd) {
5151 char *line, *key, *val;
5252 font_info *fi;
5353 size_t i;
5454
5555 fi = snew(font_info);
5656 fi->name = NULL;
57 fi->widths = newtree234(width_cmp);
57 fi->widths = newtree234(width_cmp, NULL);
5858 fi->fontfile = NULL;
59 fi->kerns = newtree234(kern_cmp);
60 fi->ligs = newtree234(lig_cmp);
59 fi->kerns = newtree234(kern_cmp, NULL);
60 fi->ligs = newtree234(lig_cmp, NULL);
6161 fi->fontbbox[0] = fi->fontbbox[1] = fi->fontbbox[2] = fi->fontbbox[3] = 0;
6262 fi->capheight = fi->xheight = fi->ascent = fi->descent = 0;
6363 fi->stemh = fi->stemv = fi->italicangle = 0;
6868 if (!line || !afm_require_key(line, "StartFontMetrics", in))
6969 goto giveup;
7070 if (!(val = strtok(NULL, " \t"))) {
71 err_afmval(&in->pos, "StartFontMetrics", 1);
71 err_afmval(in->es, &in->pos, "StartFontMetrics", 1);
7272 goto giveup;
7373 }
7474 if (atof(val) >= 5.0) {
75 err_afmvers(&in->pos);
75 err_afmvers(in->es, &in->pos);
7676 goto giveup;
7777 }
7878 sfree(line);
8282 goto giveup;
8383 key = strtok(line, " \t");
8484 if (strcmp(key, "EndFontMetrics") == 0) {
85 fi->next = all_fonts;
86 all_fonts = fi;
85 fi->next = psd->all_fonts;
86 psd->all_fonts = fi;
8787 fclose(in->currfp);
8888 return;
8989 } else if (strcmp(key, "FontName") == 0) {
9090 if (!(val = strtok(NULL, " \t"))) {
91 err_afmval(&in->pos, key, 1);
91 err_afmval(in->es, &in->pos, key, 1);
9292 goto giveup;
9393 }
9494 fi->name = dupstr(val);
9696 int i;
9797 for (i = 0; i < 3; i++) {
9898 if (!(val = strtok(NULL, " \t"))) {
99 err_afmval(&in->pos, key, 4);
99 err_afmval(in->es, &in->pos, key, 4);
100100 goto giveup;
101101 }
102102 fi->fontbbox[i] = atof(val);
103103 }
104104 } else if (strcmp(key, "CapHeight") == 0) {
105105 if (!(val = strtok(NULL, " \t"))) {
106 err_afmval(&in->pos, key, 1);
106 err_afmval(in->es, &in->pos, key, 1);
107107 goto giveup;
108108 }
109109 fi->capheight = atof(val);
110110 } else if (strcmp(key, "XHeight") == 0) {
111111 if (!(val = strtok(NULL, " \t"))) {
112 err_afmval(&in->pos, key, 1);
112 err_afmval(in->es, &in->pos, key, 1);
113113 goto giveup;
114114 }
115115 fi->xheight = atof(val);
116116 } else if (strcmp(key, "Ascender") == 0) {
117117 if (!(val = strtok(NULL, " \t"))) {
118 err_afmval(&in->pos, key, 1);
118 err_afmval(in->es, &in->pos, key, 1);
119119 goto giveup;
120120 }
121121 fi->ascent = atof(val);
122122 } else if (strcmp(key, "Descender") == 0) {
123123 if (!(val = strtok(NULL, " \t"))) {
124 err_afmval(&in->pos, key, 1);
124 err_afmval(in->es, &in->pos, key, 1);
125125 goto giveup;
126126 }
127127 fi->descent = atof(val);
128128 } else if (strcmp(key, "CapHeight") == 0) {
129129 if (!(val = strtok(NULL, " \t"))) {
130 err_afmval(&in->pos, key, 1);
130 err_afmval(in->es, &in->pos, key, 1);
131131 goto giveup;
132132 }
133133 fi->capheight = atof(val);
134134 } else if (strcmp(key, "StdHW") == 0) {
135135 if (!(val = strtok(NULL, " \t"))) {
136 err_afmval(&in->pos, key, 1);
136 err_afmval(in->es, &in->pos, key, 1);
137137 goto giveup;
138138 }
139139 fi->stemh = atof(val);
140140 } else if (strcmp(key, "StdVW") == 0) {
141141 if (!(val = strtok(NULL, " \t"))) {
142 err_afmval(&in->pos, key, 1);
142 err_afmval(in->es, &in->pos, key, 1);
143143 goto giveup;
144144 }
145145 fi->stemv = atof(val);
146146 } else if (strcmp(key, "ItalicAngle") == 0) {
147147 if (!(val = strtok(NULL, " \t"))) {
148 err_afmval(&in->pos, key, 1);
148 err_afmval(in->es, &in->pos, key, 1);
149149 goto giveup;
150150 }
151151 fi->italicangle = atof(val);
152152 } else if (strcmp(key, "StartCharMetrics") == 0) {
153153 int nglyphs, i;
154154 if (!(val = strtok(NULL, " \t"))) {
155 err_afmval(&in->pos, key, 1);
155 err_afmval(in->es, &in->pos, key, 1);
156156 goto giveup;
157157 }
158158 nglyphs = atoi(val);
169169 if (strcmp(key, "WX") == 0 || strcmp(key, "W0X") == 0) {
170170 if (!(val = strtok(NULL, " \t")) ||
171171 !strcmp(val, ";")) {
172 err_afmval(&in->pos, key, 1);
172 err_afmval(in->es, &in->pos, key, 1);
173173 goto giveup;
174174 }
175175 width = atoi(val);
176176 } else if (strcmp(key, "N") == 0) {
177177 if (!(val = strtok(NULL, " \t")) ||
178178 !strcmp(val, ";")) {
179 err_afmval(&in->pos, key, 1);
180 goto giveup;
181 }
182 g = glyph_intern(val);
179 err_afmval(in->es, &in->pos, key, 1);
180 goto giveup;
181 }
182 g = glyph_intern(psd, val);
183183 } else if (strcmp(key, "L") == 0) {
184184 glyph succ, lig;
185185 if (!(val = strtok(NULL, " \t")) ||
186186 !strcmp(val, ";")) {
187 err_afmval(&in->pos, key, 1);
188 goto giveup;
189 }
190 succ = glyph_intern(val);
191 if (!(val = strtok(NULL, " \t")) ||
192 !strcmp(val, ";")) {
193 err_afmval(&in->pos, key, 1);
194 goto giveup;
195 }
196 lig = glyph_intern(val);
187 err_afmval(in->es, &in->pos, key, 1);
188 goto giveup;
189 }
190 succ = glyph_intern(psd, val);
191 if (!(val = strtok(NULL, " \t")) ||
192 !strcmp(val, ";")) {
193 err_afmval(in->es, &in->pos, key, 1);
194 goto giveup;
195 }
196 lig = glyph_intern(psd, val);
197197 if (g != NOGLYPH && succ != NOGLYPH &&
198198 lig != NOGLYPH) {
199199 ligature *l = snew(ligature);
229229 strcmp(key, "StartKernPairs0") == 0) {
230230 int nkerns, i;
231231 if (!(val = strtok(NULL, " \t"))) {
232 err_afmval(&in->pos, key, 1);
232 err_afmval(in->es, &in->pos, key, 1);
233233 goto giveup;
234234 }
235235 nkerns = atoi(val);
247247 nr = strtok(NULL, " \t");
248248 val = strtok(NULL, " \t");
249249 if (!val) {
250 err_afmval(&in->pos, key, 3);
250 err_afmval(in->es, &in->pos, key, 3);
251251 goto giveup;
252252 }
253 l = glyph_intern(nl);
254 r = glyph_intern(nr);
253 l = glyph_intern(psd, nl);
254 r = glyph_intern(psd, nr);
255255 if (l == -1 || r == -1) continue;
256256 kp = snew(kern_pair);
257257 kp->left = l;
4141 size_t offset;
4242 } pfstate;
4343
44 static void pf_identify(t1_font *tf);
44 static void pf_identify(t1_font *tf, psdata *, errorstate *);
4545
4646 static t1_data *load_pfb_file(FILE *fp, filepos *pos) {
4747 t1_data *head = NULL, *tail = NULL;
9494 return ret;
9595 }
9696
97 void read_pfa_file(input *in) {
97 void read_pfa_file(input *in, psdata *psd) {
9898 t1_font *tf = snew(t1_font);
9999
100100 tf->data = load_pfa_file(in->currfp, &in->pos);
101101 tf->pos = in->pos;
102102 tf->length1 = tf->length2 = 0;
103103 fclose(in->currfp);
104 pf_identify(tf);
105 }
106
107 void read_pfb_file(input *in) {
104 pf_identify(tf, psd, in->es);
105 }
106
107 void read_pfb_file(input *in, psdata *psd) {
108108 t1_font *tf = snew(t1_font);
109109
110110 tf->data = load_pfb_file(in->currfp, &in->pos);
111111 tf->pos = in->pos;
112112 tf->length1 = tf->length2 = 0;
113113 fclose(in->currfp);
114 pf_identify(tf);
114 pf_identify(tf, psd, in->es);
115115 }
116116 static char *pf_read_token(pfstate *);
117117
161161 return o + pf->offset;
162162 }
163163
164 static void pf_identify(t1_font *tf) {
164 static void pf_identify(t1_font *tf, psdata *psd, errorstate *es) {
165165 rdstringc rsc = { 0, 0, NULL };
166166 char *p;
167167 size_t len;
176176 c = pf_getc(pf);
177177 if (c == EOF) {
178178 sfree(rsc.text);
179 err_pfeof(&tf->pos);
179 err_pfeof(es, &tf->pos);
180180 return;
181181 }
182182 rdaddc(&rsc, c);
184184 p = rsc.text;
185185 if ((p = strchr(p, ':')) == NULL) {
186186 sfree(rsc.text);
187 err_pfhead(&tf->pos);
187 err_pfhead(es, &tf->pos);
188188 return;
189189 }
190190 p++;
195195 fontname[len] = 0;
196196 sfree(rsc.text);
197197
198 for (fi = all_fonts; fi; fi = fi->next) {
198 for (fi = psd->all_fonts; fi; fi = fi->next) {
199199 if (strcmp(fi->name, fontname) == 0) {
200200 fi->fontfile = tf;
201201 fi->filetype = TYPE1;
203203 return;
204204 }
205205 }
206 err_pfnoafm(&tf->pos, fontname);
206 err_pfnoafm(es, &tf->pos, fontname);
207207 sfree(fontname);
208208 }
209209
210210 /*
211211 * PostScript white space characters; PLRM3 table 3.1
212212 */
213 static int pf_isspace(int c) {
213 static bool pf_isspace(int c) {
214214 return c == 000 || c == 011 || c == 012 || c == 014 || c == 015 ||
215215 c == ' ';
216216 }
218218 /*
219219 * PostScript special characters; PLRM3 page 27
220220 */
221 static int pf_isspecial(int c) {
221 static bool pf_isspecial(int c) {
222222 return c == '(' || c == ')' || c == '<' || c == '>' || c == '[' ||
223223 c == ']' || c == '{' || c == '}' || c == '/' || c == '%';
224224 }
246246 }
247247 }
248248
249 static size_t pf_length1(t1_font *tf) {
249 static size_t pf_length1(t1_font *tf, errorstate *es) {
250250 size_t ret;
251251
252252 ret = pf_findtoken(tf, 0, "eexec");
253253 if (ret == (size_t)-1) {
254 err_pfeof(&tf->pos);
254 err_pfeof(es, &tf->pos);
255255 return 0;
256256 }
257257 return ret;
258258 }
259259
260 static size_t pf_length2(t1_font *tf) {
260 static size_t pf_length2(t1_font *tf, errorstate *es) {
261261 size_t ret;
262262
263263 if (tf->length1 == 0)
264 tf->length1 = pf_length1(tf);
264 tf->length1 = pf_length1(tf, es);
265265 ret = pf_findtoken(tf, tf->length1, "cleartomark");
266266 if (ret == (size_t)-1) {
267 err_pfeof(&tf->pos);
267 err_pfeof(es, &tf->pos);
268268 return 0;
269269 }
270270 return ret - 12 - tf->length1; /* backspace over "cleartomark\n" */
324324 char **bufp, size_t *lenp) {
325325 t1_data *td = tf->data;
326326 size_t blk, i;
327 int havenybble = 0;
327 bool havenybble = false;
328328 char *p, nybble;
329329
330330 while (td && off >= td->length) {
362362 /*
363363 * Return the initial, unencrypted, part of a font.
364364 */
365 void pf_part1(font_info *fi, char **bufp, size_t *lenp) {
365 void pf_part1(font_info *fi, char **bufp, size_t *lenp, errorstate *es) {
366366 t1_font *tf = fi->fontfile;
367367
368368 if (tf->length1 == 0)
369 tf->length1 = pf_length1(tf);
369 tf->length1 = pf_length1(tf, es);
370370 pf_getascii(tf, 0, tf->length1, bufp, lenp);
371371 }
372372
373373 /*
374374 * Return the middle, encrypted, part of a font.
375375 */
376 void pf_part2(font_info *fi, char **bufp, size_t *lenp) {
376 void pf_part2(font_info *fi, char **bufp, size_t *lenp, errorstate *es) {
377377 t1_font *tf = fi->fontfile;
378378
379379 if (tf->length2 == 0)
380 tf->length2 = pf_length2(tf);
380 tf->length2 = pf_length2(tf, es);
381381 pf_getbinary(tf, tf->length1, tf->length2, bufp, lenp);
382382 if (*lenp >= 256)
383383 *lenp -= 256;
8585 }
8686 #define d_end decode_end, 0, 0
8787
88 static void *decode(sfnt_decode *dec, void *src, void *end, void *dest) {
88 static void *decode(const sfnt_decode *dec, void *src, void *end, void *dest) {
8989 while (dec->decoder != decode_end) {
9090 if ((char *)src + dec->src_len > (char *)end) return NULL;
9191 dec->decoder(src, (char *)dest + dec->dest_offset);
9595 return src;
9696 }
9797
98 static void *decoden(sfnt_decode *dec, void *src, void *end, void *dest,
98 static void *decoden(const sfnt_decode *dec, void *src, void *end, void *dest,
9999 size_t size, size_t n) {
100100 while (n-- && src) {
101101 src = decode(dec, src, end, dest);
105105 }
106106
107107 /* Decoding specs for simple data types */
108 sfnt_decode uint16_decode[] = { { d_uint16, 0 }, { d_end } };
109 sfnt_decode int16_decode[] = { { d_int16, 0 }, { d_end } };
110 sfnt_decode uint32_decode[] = { { d_uint32, 0 }, { d_end } };
108 const sfnt_decode uint16_decode[] = { { d_uint16, 0 }, { d_end } };
109 const sfnt_decode int16_decode[] = { { d_int16, 0 }, { d_end } };
110 const sfnt_decode uint32_decode[] = { { d_uint32, 0 }, { d_end } };
111111
112112 /* Offset subdirectory -- the start of the file */
113113 typedef struct offsubdir_Tag offsubdir;
115115 unsigned scaler_type;
116116 unsigned numTables;
117117 };
118 sfnt_decode offsubdir_decode[] = {
118 const sfnt_decode offsubdir_decode[] = {
119119 { d_uint32, offsetof(offsubdir, scaler_type) },
120120 { d_uint16, offsetof(offsubdir, numTables) },
121121 { d_skip(6) },
144144 unsigned offset;
145145 unsigned length;
146146 };
147 sfnt_decode tabledir_decode[] = {
147 const sfnt_decode tabledir_decode[] = {
148148 { d_uint32, offsetof(tabledir, tag) },
149149 { d_uint32, offsetof(tabledir, checkSum) },
150150 { d_uint32, offsetof(tabledir, offset) },
159159 int sTypoAscender, sTypoDescender;
160160 int sxHeight, sCapHeight;
161161 };
162 sfnt_decode t_OS_2_v0_decode[] = {
162 const sfnt_decode t_OS_2_v0_decode[] = {
163163 { d_uint16, offsetof(t_OS_2, version) },
164164 { d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */
165165 /* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */
169169 /* achVendID, fsSelection, usFirstCharIndex, usLastCharIndex */
170170 { d_end }
171171 };
172 sfnt_decode t_OS_2_v1_decode[] = {
172 const sfnt_decode t_OS_2_v1_decode[] = {
173173 { d_uint16, offsetof(t_OS_2, version) },
174174 { d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */
175175 /* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */
183183 /* ulCodePageRange1, ulCodePageRange2 */
184184 { d_end }
185185 };
186 sfnt_decode t_OS_2_v2_decode[] = {
186 const sfnt_decode t_OS_2_v2_decode[] = {
187187 { d_uint16, offsetof(t_OS_2, version) },
188188 { d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */
189189 /* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */
206206 struct t_cmap_Tag {
207207 unsigned numTables;
208208 };
209 sfnt_decode t_cmap_decode[] = {
209 const sfnt_decode t_cmap_decode[] = {
210210 { d_skip(2) },
211211 { d_uint16, offsetof(t_cmap, numTables) },
212212 { d_end }
217217 unsigned encodingID;
218218 unsigned offset;
219219 };
220 sfnt_decode encodingrec_decode[] = {
220 const sfnt_decode encodingrec_decode[] = {
221221 { d_uint16, offsetof(encodingrec, platformID) },
222222 { d_uint16, offsetof(encodingrec, encodingID) },
223223 { d_uint32, offsetof(encodingrec, offset) },
228228 unsigned length;
229229 unsigned segCountX2;
230230 };
231 sfnt_decode cmap4_decode[] = {
231 const sfnt_decode cmap4_decode[] = {
232232 { d_skip(2) }, /* format */
233233 { d_uint16, offsetof(cmap4, length) },
234234 { d_skip(2) }, /* language */
247247 int xMin, yMin, xMax, yMax;
248248 int indexToLocFormat;
249249 };
250 sfnt_decode t_head_decode[] = {
250 const sfnt_decode t_head_decode[] = {
251251 { d_uint32, offsetof(t_head, version) },
252252 { d_uint32, offsetof(t_head, fontRevision) },
253253 { d_skip(8) }, /* checkSumAdjustment, magicNumber, flags */
274274 int metricDataFormat;
275275 unsigned numOfLongHorMetrics;
276276 };
277 sfnt_decode t_hhea_decode[] = {
277 const sfnt_decode t_hhea_decode[] = {
278278 { d_uint32, offsetof(t_hhea, version) },
279279 { d_int16, offsetof(t_hhea, ascent) },
280280 { d_int16, offsetof(t_hhea, descent) },
286286 };
287287
288288 /* Horizontal Metrics ('hmtx') table */
289 sfnt_decode longhormetric_decode[] = {
289 const sfnt_decode longhormetric_decode[] = {
290290 { d_uint16, 0 },
291291 { d_skip(2) },
292292 { d_end }
298298 unsigned version;
299299 unsigned nTables;
300300 };
301 sfnt_decode t_kern_v0_decode[] = {
301 const sfnt_decode t_kern_v0_decode[] = {
302302 { d_uint16, offsetof(t_kern, version) },
303303 { d_uint16, offsetof(t_kern, nTables) },
304304 { d_end }
309309 unsigned length;
310310 unsigned coverage;
311311 };
312 sfnt_decode kern_v0_subhdr_decode[] = {
312 const sfnt_decode kern_v0_subhdr_decode[] = {
313313 { d_uint16, offsetof(kern_v0_subhdr, version) },
314314 { d_uint16, offsetof(kern_v0_subhdr, length) },
315315 { d_uint16, offsetof(kern_v0_subhdr, coverage) },
321321 #define KERN_V0_OVERRIDE 0x0008
322322 #define KERN_V0_FORMAT 0xff00
323323 #define KERN_V0_FORMAT_0 0x0000
324 sfnt_decode t_kern_v1_decode[] = {
324 const sfnt_decode t_kern_v1_decode[] = {
325325 { d_uint32, offsetof(t_kern, version) },
326326 { d_uint32, offsetof(t_kern, nTables) },
327327 { d_end }
331331 unsigned length;
332332 unsigned coverage;
333333 };
334 sfnt_decode kern_v1_subhdr_decode[] = {
334 const sfnt_decode kern_v1_subhdr_decode[] = {
335335 { d_uint32, offsetof(kern_v1_subhdr, length) },
336336 { d_uint16, offsetof(kern_v1_subhdr, coverage) },
337337 { d_skip(2) }, /* tupleIndex */
346346 struct kern_f0_Tag {
347347 unsigned nPairs;
348348 };
349 sfnt_decode kern_f0_decode[] = {
349 const sfnt_decode kern_f0_decode[] = {
350350 { d_uint16, offsetof(kern_f0, nPairs) },
351351 { d_skip(6) }, /* searchRange, entrySelector, rangeShift */
352352 { d_end }
357357 unsigned right;
358358 int value;
359359 };
360 sfnt_decode kern_f0_pair_decode[] = {
360 const sfnt_decode kern_f0_pair_decode[] = {
361361 { d_uint16, offsetof(kern_f0_pair, left) },
362362 { d_uint16, offsetof(kern_f0_pair, right) },
363363 { d_int16, offsetof(kern_f0_pair, value) },
370370 unsigned version;
371371 unsigned numGlyphs;
372372 };
373 sfnt_decode t_maxp_decode[] = {
373 const sfnt_decode t_maxp_decode[] = {
374374 { d_uint32, offsetof(t_maxp, version) },
375375 { d_uint16, offsetof(t_maxp, numGlyphs) },
376376 { d_end }
385385 unsigned stringOffset;
386386 namerecord *nameRecord;
387387 };
388 sfnt_decode t_name_decode[] = {
388 const sfnt_decode t_name_decode[] = {
389389 { d_uint16, offsetof(t_name, format) },
390390 { d_uint16, offsetof(t_name, count) },
391391 { d_uint16, offsetof(t_name, stringOffset) },
399399 unsigned length;
400400 unsigned offset;
401401 };
402 sfnt_decode namerecord_decode[] = {
402 const sfnt_decode namerecord_decode[] = {
403403 { d_uint16, offsetof(namerecord, platformID) },
404404 { d_uint16, offsetof(namerecord, encodingID) },
405405 { d_uint16, offsetof(namerecord, languageID) },
420420 unsigned minMemType42;
421421 unsigned maxMemType42;
422422 };
423 sfnt_decode t_post_decode[] = {
423 const sfnt_decode t_post_decode[] = {
424424 { d_uint32, offsetof(t_post, format) },
425425 { d_int32, offsetof(t_post, italicAngle) },
426426 { d_int16, offsetof(t_post, underlinePosition) },
451451 unsigned minmem, maxmem;
452452 };
453453
454 static int sfnt_findtable(sfnt *sf, unsigned tag,
455 void **startp, void **endp) {
454 static bool sfnt_findtable(sfnt *sf, unsigned tag,
455 void **startp, void **endp) {
456456 size_t i;
457457
458458 for (i = 0; i < sf->osd.numTables; i++) {
459459 if (sf->td[i].tag == tag) {
460460 *startp = (char *)sf->data + sf->td[i].offset;
461461 *endp = (char *)*startp + sf->td[i].length;
462 return TRUE;
462 return true;
463463 }
464464 }
465 return FALSE;
466 }
467
468 static char *sfnt_psname(font_info *fi) {
465 return false;
466 }
467
468 static char *sfnt_psname(font_info *fi, errorstate *es) {
469469 sfnt *sf = fi->fontfile;
470470 t_name name;
471471 void *ptr, *end;
474474 namerecord *nr;
475475
476476 if (!sfnt_findtable(sf, TAG_name, &ptr, &end)) {
477 err_sfntnotable(&sf->pos, "name");
477 err_sfntnotable(es, &sf->pos, "name");
478478 return NULL;
479479 }
480480 ptr = decode(t_name_decode, ptr, end, &name);
495495 }
496496 }
497497 }
498 err_sfntnopsname(&sf->pos);
498 err_sfntnopsname(es, &sf->pos);
499499 return NULL;
500500 }
501501
519519 }
520520
521521 /* Generate an name for a glyph that doesn't have one. */
522 static glyph genglyph(unsigned idx) {
523 char buf[11];
524 if (idx == 0) return glyph_intern(".notdef");
522 static glyph genglyph(psdata *psd, unsigned idx) {
523 char buf[64];
524 if (idx == 0) return glyph_intern(psd, ".notdef");
525525 sprintf(buf, "glyph%u", idx);
526 return glyph_intern(buf);
526 return glyph_intern(psd, buf);
527527 }
528528
529529 /*
532532 * TODO: cope better with duplicated glyph names (usually .notdef)
533533 * TODO: when presented with format 3.0, try to use 'CFF' if present.
534534 */
535 static void sfnt_mapglyphs(font_info *fi) {
535 static void sfnt_mapglyphs(font_info *fi, psdata *psd, errorstate *es) {
536536 sfnt *sf = fi->fontfile;
537537 t_post post;
538538 void *ptr, *end;
545545 if (sfnt_findtable(sf, TAG_post, &ptr, &end)) {
546546 ptr = decode(t_post_decode, ptr, end, &post);
547547 if (ptr == NULL) {
548 err_sfntbadtable(&sf->pos, "post");
548 err_sfntbadtable(es, &sf->pos, "post");
549549 goto noglyphs;
550550 }
551551
555555 switch (post.format) {
556556 case 0x00010000:
557557 if (sf->nglyphs != 258) {
558 err_sfntbadtable(&sf->pos, "post");
558 err_sfntbadtable(es, &sf->pos, "post");
559559 break;
560560 }
561561 sf->glyphsbyindex = (glyph *)tt_std_glyphs;
562562 break;
563563 case 0x00020000:
564564 if ((char *)ptr + 2 > (char *)end) {
565 err_sfntbadtable(&sf->pos, "post");
565 err_sfntbadtable(es, &sf->pos, "post");
566566 break;
567567 }
568568 ptr = (char *)ptr + 2;
569569 if ((char *)ptr + 2*sf->nglyphs > (char *)end) {
570 err_sfntbadtable(&sf->pos, "post");
570 err_sfntbadtable(es, &sf->pos, "post");
571571 break;
572572 }
573573 nextras = 0;
583583 memcpy(tmp, sptr + 1, *sptr);
584584 tmp[*sptr] = 0;
585585 assert(i < nextras);
586 extraglyphs[i++] = glyph_intern(tmp);
586 extraglyphs[i++] = glyph_intern(psd, tmp);
587587 }
588588 sf->glyphsbyindex = snewn(sf->nglyphs, glyph);
589589 for (i = 0; i < sf->nglyphs; i++) {
593593 else if (g < 258 + nextras)
594594 sf->glyphsbyindex[i] = extraglyphs[g - 258];
595595 else {
596 err_sfntbadtable(&sf->pos, "post");
597 sf->glyphsbyindex[i] = genglyph(i);
596 err_sfntbadtable(es, &sf->pos, "post");
597 sf->glyphsbyindex[i] = genglyph(psd, i);
598598 }
599599 }
600600 sfree(extraglyphs);
602602 case 0x00030000:
603603 break;
604604 default:
605 err_sfnttablevers(&sf->pos, "post");
605 err_sfnttablevers(es, &sf->pos, "post");
606606 break;
607607 }
608608 }
610610 if (!sf->glyphsbyindex) {
611611 sf->glyphsbyindex = snewn(sf->nglyphs, glyph);
612612 for (i = 0; i < sf->nglyphs; i++)
613 sf->glyphsbyindex[i] = genglyph(i);
613 sf->glyphsbyindex[i] = genglyph(psd, i);
614614 }
615615 /* Construct glyphsbyname */
616616 sf->glyphsbyname = snewn(sf->nglyphs, unsigned short);
632632 suflen = 4;
633633 for (i = 0; i < sf->nglyphs; i++) {
634634 char const *p;
635 p = strrchr(glyph_extern(sfnt_indextoglyph(sf, i)), '.');
635 p = strrchr(glyph_extern(psd, sfnt_indextoglyph(sf, i)), '.');
636636 if (p && !(p+1)[strspn(p+1, "0123456789")] && strlen(p+1) > suflen)
637637 suflen = strlen(p+1);
638638 }
642642 if (prev == (this = sfnt_indextoglyph(sf, sf->glyphsbyname[i]))) {
643643 char const *basename;
644644 char *buf;
645 basename = glyph_extern(this);
645 basename = glyph_extern(psd, this);
646646 buf = snewn(strlen(basename) + 2 + suflen, char);
647647 strcpy(buf, basename);
648648 sprintf(buf + strlen(basename), ".%0*hu", suflen,
649649 sf->glyphsbyname[i]);
650 sf->glyphsbyindex[sf->glyphsbyname[i]] = glyph_intern(buf);
650 sf->glyphsbyindex[sf->glyphsbyname[i]] = glyph_intern(psd, buf);
651651 sfree(buf);
652652 }
653653 prev = this;
675675 /*
676676 * Get data from 'hhea', 'hmtx', and 'OS/2' tables
677677 */
678 void sfnt_getmetrics(font_info *fi) {
678 void sfnt_getmetrics(font_info *fi, errorstate *es) {
679679 sfnt *sf = fi->fontfile;
680680 t_hhea hhea;
681681 t_OS_2 OS_2;
689689 fi->fontbbox[2] = sf->head.xMax * FUNITS_PER_PT / sf->head.unitsPerEm;
690690 fi->fontbbox[3] = sf->head.yMax * FUNITS_PER_PT / sf->head.unitsPerEm;
691691 if (!sfnt_findtable(sf, TAG_hhea, &ptr, &end)) {
692 err_sfntnotable(&sf->pos, "hhea");
692 err_sfntnotable(es, &sf->pos, "hhea");
693693 return;
694694 }
695695 if (decode(t_hhea_decode, ptr, end, &hhea) == NULL) {
696 err_sfntbadtable(&sf->pos, "hhea");
696 err_sfntbadtable(es, &sf->pos, "hhea");
697697 return;
698698 }
699699 if ((hhea.version & 0xffff0000) != 0x00010000) {
700 err_sfnttablevers(&sf->pos, "hhea");
700 err_sfnttablevers(es, &sf->pos, "hhea");
701701 return;
702702 }
703703 fi->ascent = hhea.ascent;
704704 fi->descent = hhea.descent;
705705 if (hhea.metricDataFormat != 0) {
706 err_sfnttablevers(&sf->pos, "hmtx");
706 err_sfnttablevers(es, &sf->pos, "hmtx");
707707 return;
708708 }
709709 if (!sfnt_findtable(sf, TAG_hmtx, &ptr, &end)) {
710 err_sfntnotable(&sf->pos, "hmtx");
710 err_sfntnotable(es, &sf->pos, "hmtx");
711711 return;
712712 }
713713 hmtx = snewn(hhea.numOfLongHorMetrics, unsigned);
714714 if (decoden(longhormetric_decode, ptr, end, hmtx, sizeof(*hmtx),
715715 hhea.numOfLongHorMetrics) == NULL) {
716 err_sfntbadtable(&sf->pos, "hmtx");
716 err_sfntbadtable(es, &sf->pos, "hmtx");
717717 return;
718718 }
719719 for (i = 0; i < sf->nglyphs; i++) {
742742 fi->descent = OS_2.sTypoDescender * FUNITS_PER_PT / sf->head.unitsPerEm;
743743 return;
744744 bados2:
745 err_sfntbadtable(&sf->pos, "OS/2");
745 err_sfntbadtable(es, &sf->pos, "OS/2");
746746 }
747747
748748 /*
754754 * pairs for horizontal kerning of horizontal text, and ignores
755755 * everything else.
756756 */
757 static void sfnt_getkern(font_info *fi) {
757 static void sfnt_getkern(font_info *fi, errorstate *es) {
758758 sfnt *sf = fi->fontfile;
759759 t_kern kern;
760760 unsigned version, i, j;
812812 }
813813 return;
814814 bad:
815 err_sfntbadtable(&sf->pos, "kern");
815 err_sfntbadtable(es, &sf->pos, "kern");
816816 return;
817817 }
818818
824824 * Unicode 1.1 with precomposed Hangul syllables. We only handle
825825 * format 4 of this table, since that seems to be the only one in use.
826826 */
827 void sfnt_getmap(font_info *fi) {
827 static void sfnt_getmap(font_info *fi, errorstate *es) {
828828 sfnt *sf = fi->fontfile;
829829 t_cmap cmap;
830830 encodingrec *esd;
836836 for (i = 0; i < lenof(fi->bmp); i++)
837837 fi->bmp[i] = 0xFFFF;
838838 if (!sfnt_findtable(sf, TAG_cmap, &ptr, &end)) {
839 err_sfntnotable(&sf->pos, "cmap");
839 err_sfntnotable(es, &sf->pos, "cmap");
840 return;
840841 }
841842 base = ptr;
842843 ptr = decode(t_cmap_decode, ptr, end, &cmap);
885886 idx = (k + idDelta[j]) & 0xffff;
886887 if (idx != 0) {
887888 if (idx > sf->nglyphs) {
888 err_sfntbadglyph(&sf->pos, k);
889 err_sfntbadglyph(es, &sf->pos, k);
889890 continue;
890891 }
891892 fi->bmp[k] = sfnt_indextoglyph(sf, idx);
896897 for (k = startCode[j]; k <= endCode[j]; k++) {
897898 if (startidx + k - startCode[j] >=
898899 nglyphindex) {
899 err_sfntbadglyph(&sf->pos, k);
900 err_sfntbadglyph(es, &sf->pos, k);
900901 continue;
901902 }
902903 idx = glyphIndexArray[startidx + k - startCode[j]];
903904 if (idx != 0) {
904905 idx = (idx + idDelta[j]) & 0xffff;
905906 if (idx > sf->nglyphs) {
906 err_sfntbadglyph(&sf->pos, k);
907 err_sfntbadglyph(es, &sf->pos, k);
907908 continue;
908909 }
909910 fi->bmp[k] = sfnt_indextoglyph(sf, idx);
916917 }
917918 }
918919 }
919 err_sfntnounicmap(&sf->pos);
920 err_sfntnounicmap(es, &sf->pos);
920921 return;
921922 bad:
922 err_sfntbadtable(&sf->pos, "cmap");
923 }
924
925 void read_sfnt_file(input *in) {
923 err_sfntbadtable(es, &sf->pos, "cmap");
924 }
925
926 void read_sfnt_file(input *in, psdata *psd) {
926927 sfnt *sf = snew(sfnt);
927928 size_t off = 0, got;
928929 FILE *fp = in->currfp;
931932 t_maxp maxp;
932933
933934 fi->name = NULL;
934 fi->widths = newtree234(width_cmp);
935 fi->kerns = newtree234(kern_cmp);
936 fi->ligs = newtree234(lig_cmp);
935 fi->widths = newtree234(width_cmp, NULL);
936 fi->kerns = newtree234(kern_cmp, NULL);
937 fi->ligs = newtree234(lig_cmp, NULL);
937938 fi->fontbbox[0] = fi->fontbbox[1] = fi->fontbbox[2] = fi->fontbbox[3] = 0;
938939 fi->capheight = fi->xheight = fi->ascent = fi->descent = 0;
939940 fi->stemh = fi->stemv = fi->italicangle = 0;
958959 sf->nglyphs = 0;
959960 ptr = decode(offsubdir_decode, sf->data, sf->end, &sf->osd);
960961 if (ptr == NULL) {
961 err_sfntbadhdr(&sf->pos);
962 err_sfntbadhdr(in->es, &sf->pos);
962963 return;
963964 }
964965 sf->td = snewn(sf->osd.numTables, tabledir);
965966 ptr = decoden(tabledir_decode, ptr, sf->end, sf->td, sizeof(*sf->td),
966967 sf->osd.numTables);
967968 if (ptr == NULL) {
968 err_sfntbadhdr(&sf->pos);
969 err_sfntbadhdr(in->es, &sf->pos);
969970 return;
970971 }
971972 if (!sfnt_findtable(sf, TAG_head, &ptr, &end)) {
972 err_sfntnotable(&sf->pos, "head");
973 err_sfntnotable(in->es, &sf->pos, "head");
973974 return;
974975 }
975976 if (decode(t_head_decode, ptr, end, &sf->head) == NULL) {
976 err_sfntbadtable(&sf->pos, "head");
977 err_sfntbadtable(in->es, &sf->pos, "head");
977978 return;
978979 }
979980 if ((sf->head.version & 0xffff0000) != 0x00010000) {
980 err_sfnttablevers(&sf->pos, "head");
981 err_sfnttablevers(in->es, &sf->pos, "head");
981982 return;
982983 }
983984 if (!sfnt_findtable(sf, TAG_maxp, &ptr, &end)) {
984 err_sfntnotable(&sf->pos, "maxp");
985 err_sfntnotable(in->es, &sf->pos, "maxp");
985986 return;
986987 }
987988 if (decode(t_maxp_decode, ptr, end, &maxp) == NULL) {
988 err_sfntbadtable(&sf->pos, "maxp");
989 err_sfntbadtable(in->es, &sf->pos, "maxp");
989990 return;
990991 }
991992 if (maxp.version < 0x00005000 || maxp.version > 0x0001ffff) {
992 err_sfnttablevers(&sf->pos, "maxp");
993 err_sfnttablevers(in->es, &sf->pos, "maxp");
993994 return;
994995 }
995996 sf->nglyphs = maxp.numGlyphs;
996 fi->name = sfnt_psname(fi);
997 fi->name = sfnt_psname(fi, in->es);
997998 if (fi->name == NULL) return;
998 sfnt_mapglyphs(fi);
999 sfnt_getmetrics(fi);
1000 sfnt_getkern(fi);
1001 sfnt_getmap(fi);
1002 fi->next = all_fonts;
1003 all_fonts = fi;
999 sfnt_mapglyphs(fi, psd, in->es);
1000 sfnt_getmetrics(fi, in->es);
1001 sfnt_getkern(fi, in->es);
1002 sfnt_getmap(fi, in->es);
1003 fi->next = psd->all_fonts;
1004 psd->all_fonts = fi;
10041005 }
10051006
10061007 static int sizecmp(const void *a, const void *b) {
10151016 * <http://partners.adobe.com/public/developer/en/font/5012.Type42_Spec.pdf>
10161017 */
10171018
1018 void sfnt_writeps(font_info const *fi, FILE *ofp) {
1019 void sfnt_writeps(font_info const *fi, FILE *ofp, psdata *psd, errorstate *es) {
10191020 unsigned i, j, lastbreak;
10201021 sfnt *sf = fi->fontfile;
10211022 size_t *breaks, glyfoff, glyflen;
10521053 fprintf(ofp, "0 1 %u{currentfile token pop exch def}bind for\n",
10531054 sf->nglyphs - 1);
10541055 for (i = 0; i < sf->nglyphs; i++)
1055 ps_token(ofp, &cc, "/%s", glyph_extern(sfnt_indextoglyph(sf, i)));
1056 ps_token(ofp, &cc, "/%s", glyph_extern(psd, sfnt_indextoglyph(sf, i)));
10561057 fprintf(ofp, "\nend readonly def\n");
10571058 fprintf(ofp, "/sfnts [<");
10581059 breaks = snewn(sf->osd.numTables + sf->nglyphs, size_t);
10601061 breaks[i] = sf->td[i].offset;
10611062 }
10621063 if (!sfnt_findtable(sf, TAG_glyf, &glyfptr, &glyfend)) {
1063 err_sfntnotable(&sf->pos, "glyf");
1064 err_sfntnotable(es, &sf->pos, "glyf");
10641065 return;
10651066 }
10661067 glyfoff = (char *)glyfptr - (char *)sf->data;
10671068 glyflen = (char *)glyfend - (char *)glyfptr;
10681069 if (!sfnt_findtable(sf, TAG_loca, &locaptr, &locaend)) {
1069 err_sfntnotable(&sf->pos, "loca");
1070 err_sfntnotable(es, &sf->pos, "loca");
10701071 return;
10711072 }
10721073 loca = snewn(sf->nglyphs, unsigned);
10991100 fprintf(ofp, "end /%s exch definefont\n", fi->name);
11001101 return;
11011102 badloca:
1102 err_sfntbadtable(&sf->pos, "loca");
1103 err_sfntbadtable(es, &sf->pos, "loca");
11031104 }
11041105
11051106 void sfnt_data(font_info *fi, char **bufp, size_t *lenp) {
55 #include <stdlib.h>
66 #include "halibut.h"
77
8 static int compare_tags(void *av, void *bv);
9 static int compare_entries(void *av, void *bv);
8 static int compare_tags(const void *av, const void *bv, void *cmpctx);
9 static int compare_entries(const void *av, const void *bv, void *cmpctx);
1010
1111 indexdata *make_index(void) {
1212 indexdata *ret = snew(indexdata);
13 ret->tags = newtree234(compare_tags);
14 ret->entries = newtree234(compare_entries);
13 ret->tags = newtree234(compare_tags, NULL);
14 ret->entries = newtree234(compare_entries, NULL);
1515 return ret;
1616 }
1717
2626 return ret;
2727 }
2828
29 static int compare_tags(void *av, void *bv) {
30 indextag *a = (indextag *)av, *b = (indextag *)bv;
29 static int compare_tags(const void *av, const void *bv, void *cmpctx) {
30 const indextag *a = (const indextag *)av, *b = (const indextag *)bv;
3131 return ustricmp(a->name, b->name);
3232 }
3333
34 static int compare_to_find_tag(void *av, void *bv) {
35 wchar_t *a = (wchar_t *)av;
36 indextag *b = (indextag *)bv;
34 static int compare_to_find_tag(const void *av, const void *bv, void *cmpctx) {
35 const wchar_t *a = (const wchar_t *)av;
36 const indextag *b = (const indextag *)bv;
3737 return ustricmp(a, b->name);
3838 }
3939
40 static int compare_entries(void *av, void *bv) {
40 static int compare_entries(const void *av, const void *bv, void *cmpctx) {
4141 indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
4242 return compare_wordlists(a->text, b->text);
4343 }
4646 * Back-end utility: find the indextag with a given name.
4747 */
4848 indextag *index_findtag(indexdata *idx, wchar_t *name) {
49 return find234(idx->tags, name, compare_to_find_tag);
49 return findcmp234(idx->tags, name, compare_to_find_tag, NULL);
5050 }
5151
5252 /*
5757 * Guarantee on calling sequence: all implicit merges are given
5858 * before the explicit ones.
5959 */
60 void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text,
61 filepos *fpos) {
60 void index_merge(indexdata *idx, bool is_explicit, wchar_t *tags, word *text,
61 filepos *fpos, errorstate *es) {
6262 indextag *t, *existing;
6363
6464 /*
9898 * warn (and drop it, since it won't be referenced).
9999 */
100100 if (is_explicit) {
101 err_nosuchidxtag(fpos, tags);
101 err_nosuchidxtag(es, fpos, tags);
102102 continue;
103103 }
104104
122122 * see if the cases match.
123123 */
124124 if (ustrcmp(t->name, existing->name)) {
125 err_indexcase(fpos, t->name,
125 err_indexcase(es, fpos, t->name,
126126 &existing->implicit_fpos, existing->name);
127127 }
128128
213213 }
214214
215215 static void dbg_prtwordlist(int level, word *w);
216 static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text);
216 static void dbg_prtmerge(bool is_explicit, wchar_t *tag, word *text);
217217
218218 void index_debug(indexdata *i) {
219219 indextag *t;
239239 }
240240 }
241241
242 static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) {
242 static void dbg_prtmerge(bool is_explicit, wchar_t *tag, word *text) {
243243 printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
244244 for (; *tag; tag++)
245245 putchar(*tag);
+173
-130
input.c less more
3838 int ptr, npushback;
3939 filepos pos;
4040 };
41 static int macrocmp(void *av, void *bv) {
41 static int macrocmp(const void *av, const void *bv, void *cmpctx) {
4242 macro *a = (macro *)av, *b = (macro *)bv;
4343 return ustrcmp(a->name, b->name);
4444 }
4545 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
46 filepos fpos) {
46 filepos fpos, errorstate *es) {
4747 macro *m = snew(macro);
4848 m->name = name;
4949 m->text = text;
5050 if (add234(macros, m) != m) {
51 err_macroexists(&fpos, name);
51 err_macroexists(es, &fpos, name);
5252 sfree(name);
5353 sfree(text);
5454 }
5555 }
56 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
57 filepos *pos) {
56 static bool macrolookup(tree234 *macros, input *in, wchar_t *name,
57 filepos *pos) {
5858 macro m, *gotit;
5959 m.name = name;
60 gotit = find234(macros, &m, NULL);
60 gotit = find234(macros, &m);
6161 if (gotit) {
6262 macrostack *expansion = snew(macrostack);
6363 expansion->next = in->stack;
6666 expansion->ptr = 0;
6767 expansion->npushback = in->npushback;
6868 in->stack = expansion;
69 return TRUE;
69 return true;
7070 } else
71 return FALSE;
71 return false;
7272 }
7373 static void macrocleanup(tree234 *macros) {
7474 int ti;
8585 assert(cfg->type == para_Config);
8686
8787 if (!ustricmp(cfg->keyword, L"input-charset")) {
88 in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
88 in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword),
89 in->es);
8990 }
9091 }
9192
168169 NULL, 0);
169170 assert(p == buf+1 && inlen == 0);
170171
172 for (int i = 0; i < in->nwc; i++) {
173 if (in->wc[i] == 0) {
174 /* The zero Unicode character is never legal */
175 err_zerochar(in->es, pos);
176 return EOF;
177 }
178 }
179
171180 in->wcpos = 0;
172181 }
173182 }
174183
175 return in->wc[in->wcpos++];
184 wchar_t wc = in->wc[in->wcpos++];
185
186 return wc;
176187
177188 } else
178189 return EOF;
345356 /* We expect hex characters thereafter. */
346357 wchar_t *p = tok->text+1;
347358 int n = 0;
359 bool seen_a_char = false;
348360 while (*p && ishex(*p)) {
361 seen_a_char = true;
349362 n = 16 * n + fromhex(*p);
350363 p++;
351364 }
352 if (!*p) {
365 if (!*p && seen_a_char) {
353366 tok->cmd = c_u;
354367 tok->aux = n;
355368 return;
477490 * things other than whitespace, backslash, braces and
478491 * hyphen. A hyphen terminates the word but is returned as
479492 * part of it; everything else is pushed back for the next
480 * token. The `aux' field contains TRUE if the word ends in
493 * token. The `aux' field contains true if the word ends in
481494 * a hyphen.
482495 */
483 ret.aux = FALSE; /* assumed for now */
496 ret.aux = false; /* assumed for now */
484497 prevpos = 0;
485498 while (1) {
486499 if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
491504 rdadd(&rs, c);
492505 if (c == '-') {
493506 prevpos = rsc.pos;
494 ret.aux = TRUE;
507 ret.aux = true;
495508 break; /* hyphen terminates word */
496509 }
497510 }
518531 * telling code paragraphs from paragraphs which merely start with
519532 * code).
520533 */
521 int isbrace(input *in) {
534 bool isbrace(input *in) {
522535 int c;
523536 filepos cpos;
524537
566579 if (!hptrptr)
567580 return NULL;
568581 mnewword = snew(word);
582 newword.private_data = NULL; /* placate gcc warning */
569583 *mnewword = newword; /* structure copy */
570584 mnewword->next = NULL;
571585 **hptrptr = mnewword;
598612 tree234 *macros) {
599613 token t;
600614 paragraph par;
601 word wd, **whptr, **idximplicit;
615 word wd, **whptr, **idximplicit = NULL;
602616 wchar_t utext[2], *wdtext;
603617 int style, spcstyle;
604 int already;
605 int iswhite, seenwhite;
606 int type;
607 int prev_para_type;
618 bool already;
619 bool iswhite, seenwhite;
620 int prev_para_type = para_NotParaType;
608621 struct stack_item {
609622 enum {
610623 stack_nop = 0, /* do nothing (for error recovery) */
622635 stack parsestk;
623636 struct crossparaitem {
624637 int type; /* currently c_lcont, c_quote or -1 */
625 int seen_lcont, seen_quote;
638 bool seen_lcont, seen_quote;
626639 };
627640 stack crossparastk;
628 word *indexword, *uword, *iword;
641 word *indexword = NULL, *uword, *iword;
629642 word *idxwordlist;
630643 rdstring indexstr;
631 int index_downcase, index_visible, indexing;
644 bool index_downcase = false, index_visible = false, indexing;
632645 const rdstring nullrs = { 0, 0, NULL };
633646 wchar_t uchr;
634647
635 t.text = NULL;
636 t.origtext = NULL;
637 already = FALSE;
648 t = get_token(in);
649 already = true;
650
651 /*
652 * Ignore tok_white if it appears at the very start of the file.
653 *
654 * At the start of most paragraphs, tok_white is guaranteed not to
655 * appear, because get_token will have folded it into the
656 * preceding tok_eop (since a tok_eop is simply a sequence of
657 * whitespace containing at least two newlines).
658 *
659 * The one exception is if there isn't a preceding tok_eop, i.e.
660 * if the very first paragraph begins with something that lexes as
661 * a tok_white. Easiest way to get round that is to ignore it
662 * here, by unsetting the 'already' flag which will force a new
663 * token to be fetched below.
664 */
665 if (t.type == tok_white)
666 already = false;
638667
639668 crossparastk = stk_new();
640669
655684 if (!already) {
656685 dtor(t), t = get_token(in);
657686 }
658 already = FALSE;
687 already = false;
659688 } while (t.type == tok_eop);
660689 if (t.type == tok_eof)
661690 break;
671700 while (1) {
672701 dtor(t), t = get_codepar_token(in);
673702 wd.type = wtype;
674 wd.breaks = FALSE; /* shouldn't need this... */
703 wd.breaks = false; /* shouldn't need this... */
675704 wd.text = ustrdup(t.text);
676705 wd.alt = NULL;
706 wd.aux = 0;
677707 wd.fpos = t.pos;
678708 addword(wd, &whptr);
679709 dtor(t), t = get_token(in);
686716 if (t.type == tok_eop || t.type == tok_eof ||
687717 t.type == tok_rbrace) { /* might be } terminating \lcont */
688718 if (t.type == tok_rbrace)
689 already = TRUE;
719 already = true;
690720 break;
691721 } else if (t.type == tok_cmd && t.cmd == c_c) {
692722 wtype = word_WeakCode;
697727 wtype == word_WeakCode) {
698728 wtype = word_Strong;
699729 } else {
700 err_brokencodepara(&t.pos);
730 err_brokencodepara(in->es, &t.pos);
701731 prev_para_type = par.type;
702732 addpara(par, ret);
703733 while (t.type != tok_eop) /* error recovery: */
726756 */
727757 dtor(t), t = get_token(in);
728758 if (t.type != tok_lbrace) {
729 err_explbr(&t.pos);
759 err_explbr(in->es, &t.pos);
730760 continue;
731761 }
732762
738768 do {
739769 dtor(t), t = get_token(in);
740770 } while (t.type == tok_white);
741 already = TRUE;
771 already = true;
742772
743773 if (cmd == c_lcont) {
744774 /*
749779 */
750780 sitem = snew(struct crossparaitem);
751781 stop = (struct crossparaitem *)stk_top(crossparastk);
752 if (stop)
782 if (stop) {
753783 *sitem = *stop;
754 else
755 sitem->seen_quote = sitem->seen_lcont = 0;
784 } else {
785 sitem->seen_quote = false;
786 sitem->seen_lcont = false;
787 }
756788
757789 if (prev_para_type == para_Bullet ||
758790 prev_para_type == para_NumberedList ||
759791 prev_para_type == para_Description) {
760792 sitem->type = c_lcont;
761 sitem->seen_lcont = 1;
793 sitem->seen_lcont = true;
762794 par.type = para_LcontPush;
763795 prev_para_type = par.type;
764796 addpara(par, ret);
769801 * don't give a cascade error.
770802 */
771803 sitem->type = -1;
772 err_misplacedlcont(&t.pos);
804 err_misplacedlcont(in->es, &t.pos);
773805 }
774806 } else {
775807 /*
779811 */
780812 sitem = snew(struct crossparaitem);
781813 stop = (struct crossparaitem *)stk_top(crossparastk);
782 if (stop)
814 if (stop) {
783815 *sitem = *stop;
784 else
785 sitem->seen_quote = sitem->seen_lcont = 0;
816 } else {
817 sitem->seen_quote = false;
818 sitem->seen_lcont = false;
819 }
786820 sitem->type = c_quote;
787 sitem->seen_quote = 1;
821 sitem->seen_quote = true;
788822 par.type = para_QuotePush;
789823 prev_para_type = par.type;
790824 addpara(par, ret);
794828 } else if (t.type == tok_rbrace) {
795829 struct crossparaitem *sitem = stk_pop(crossparastk);
796830 if (!sitem)
797 err_unexbrace(&t.pos);
831 err_unexbrace(in->es, &t.pos);
798832 else {
799833 switch (sitem->type) {
800834 case c_lcont:
828862 par.type = para_Normal;
829863 if (t.type == tok_cmd) {
830864 int needkw;
831 int is_macro = FALSE;
865 bool is_macro = false;
832866
833867 par.fpos = t.pos;
834868 switch (t.cmd) {
836870 needkw = -1;
837871 break;
838872 case c__invalid:
839 err_badparatype(t.text, &t.pos);
873 err_badparatype(in->es, t.text, &t.pos);
840874 needkw = 4;
841875 break;
842876 case c__comment:
879913 case c_cfg: needkw = 8; par.type = para_Config;
880914 start_cmd = c_cfg; break;
881915 case c_copyright: needkw = 32; par.type = para_Copyright; break;
882 case c_define: is_macro = TRUE; needkw = 1; break;
916 case c_define: is_macro = true; needkw = 1; break;
883917 /* For \nocite the keyword is _everything_ */
884918 case c_nocite: needkw = 8; par.type = para_NoCite; break;
885919 case c_preamble: needkw = 32; par.type = para_Normal; break;
895929 par.type == para_UnnumberedChapter) {
896930 struct crossparaitem *sitem = stk_top(crossparastk);
897931 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
898 err_sectmarkerinblock( &t.pos,
899 (sitem->seen_lcont ? "lcont" : "quote"));
932 err_sectmarkerinblock(
933 in->es, &t.pos,
934 (sitem->seen_lcont ? "lcont" : "quote"));
900935 }
901936 }
902937
948983 }
949984 }
950985 if (t.type != tok_rbrace) {
951 err_kwunclosed(&t.pos);
986 err_kwunclosed(in->es, &t.pos);
952987 continue;
953988 }
954989 rdadd(&rs, 0); /* add string terminator */
961996
962997 /* See whether we have the right number of keywords. */
963998 if ((needkw & 48) && nkeys > 0)
964 err_kwillegal(&fp);
999 err_kwillegal(in->es, &fp);
9651000 if ((needkw & 11) && nkeys == 0)
966 err_kwexpected(&fp);
1001 err_kwexpected(in->es, &fp);
9671002 if ((needkw & 5) && nkeys > 1)
968 err_kwtoomany(&fp);
1003 err_kwtoomany(in->es, &fp);
9691004
9701005 if (is_macro) {
9711006 /*
9841019 if (t.type == tok_eop || t.type == tok_eof)
9851020 break;
9861021 }
987 macrodef(macros, rs.text, macrotext.text, fp);
1022 macrodef(macros, rs.text, macrotext.text, fp, in->es);
9881023 continue; /* next paragraph */
9891024 }
9901025
9991034 if (t.type != tok_eop && t.type != tok_eof &&
10001035 (start_cmd == c__invalid ||
10011036 t.type != tok_cmd || t.cmd != start_cmd)) {
1002 err_bodyillegal(&t.pos);
1037 err_bodyillegal(in->es, &t.pos);
10031038 /* Error recovery: eat the rest of the paragraph */
10041039 while (t.type != tok_eop && t.type != tok_eof &&
10051040 (start_cmd == c__invalid ||
10071042 dtor(t), t = get_token(in);
10081043 }
10091044 if (t.type == tok_cmd)
1010 already = TRUE;/* inhibit get_token at top of loop */
1045 already = true;/* inhibit get_token at top of loop */
10111046 prev_para_type = par.type;
10121047 addpara(par, ret);
10131048
10391074 parsestk = stk_new();
10401075 style = word_Normal;
10411076 spcstyle = word_WhiteSpace;
1042 indexing = FALSE;
1043 seenwhite = TRUE;
1077 indexing = false;
1078 seenwhite = true;
10441079 while (t.type != tok_eop && t.type != tok_eof) {
1045 iswhite = FALSE;
1046 already = FALSE;
1080 iswhite = false;
1081 already = false;
10471082
10481083 /* Handle implicit paragraph breaks after \IM, \BR etc */
10491084 if (start_cmd != c__invalid &&
10501085 t.type == tok_cmd && t.cmd == start_cmd) {
1051 already = TRUE; /* inhibit get_token at top of loop */
1086 already = true; /* inhibit get_token at top of loop */
10521087 break;
10531088 }
10541089
10761111 wd.alt = NULL;
10771112 wd.aux = 0;
10781113 wd.fpos = t.pos;
1079 wd.breaks = FALSE;
1114 wd.breaks = false;
10801115
10811116 /*
10821117 * Inhibit use of whitespace if it's (probably the
10851120 */
10861121 if (start_cmd != c__invalid) {
10871122 dtor(t), t = get_token(in);
1088 already = TRUE;
1123 already = true;
10891124 if (t.type == tok_cmd && t.cmd == start_cmd)
10901125 break;
10911126 }
10961131 addword(wd, &whptr);
10971132 if (indexing)
10981133 addword(wd, &idximplicit);
1099 iswhite = TRUE;
1134 iswhite = true;
11001135 break;
11011136 case tok_word:
11021137 if (indexing)
11161151 }
11171152 break;
11181153 case tok_lbrace:
1119 err_unexbrace(&t.pos);
1154 err_unexbrace(in->es, &t.pos);
11201155 /* Error recovery: push nop */
11211156 sitem = snew(struct stack_item);
11221157 sitem->type = stack_nop;
11321167 * wants popping. Accordingly, we treat it here
11331168 * as an indication that the paragraph is over.
11341169 */
1135 already = TRUE;
1170 already = true;
11361171 goto finished_para;
11371172 } else {
11381173 if (sitem->type & stack_ualt) {
11441179 spcstyle = word_WhiteSpace;
11451180 }
11461181 if (sitem->type & stack_idx) {
1182 rdadds(&indexstr, L"");
11471183 indexword->text = ustrdup(indexstr.text);
11481184 if (index_downcase) {
11491185 word *w;
11551191 if (w->text)
11561192 ustrlow(w->text);
11571193 }
1158 indexing = FALSE;
1194 indexing = false;
11591195 rdadd(&indexstr, L'\0');
1160 index_merge(idx, FALSE, indexstr.text,
1161 idxwordlist, &sitem->fpos);
1196 index_merge(idx, false, indexstr.text,
1197 idxwordlist, &sitem->fpos, in->es);
11621198 sfree(indexstr.text);
11631199 }
11641200 if (sitem->type & stack_hyper) {
11671203 wd.alt = NULL;
11681204 wd.aux = 0;
11691205 wd.fpos = t.pos;
1170 wd.breaks = FALSE;
1206 wd.breaks = false;
11711207 if (!indexing || index_visible)
11721208 addword(wd, &whptr);
11731209 if (indexing)
11791215 wd.alt = NULL;
11801216 wd.aux = quote_Close;
11811217 wd.fpos = t.pos;
1182 wd.breaks = FALSE;
1218 wd.breaks = false;
11831219 if (!indexing || index_visible)
11841220 addword(wd, &whptr);
11851221 if (indexing) {
12021238 */
12031239 dtor(t), t = get_token(in);
12041240 if (t.type != tok_lbrace) {
1205 err_explbr(&t.pos);
1241 err_explbr(in->es, &t.pos);
12061242 } else {
12071243 int braces = 1;
12081244 while (braces > 0) {
12121248 else if (t.type == tok_rbrace)
12131249 braces--;
12141250 else if (t.type == tok_eof) {
1215 err_commenteof(&t.pos);
1251 err_commenteof(in->es, &t.pos);
12161252 break;
12171253 }
12181254 }
12191255 }
12201256 if (seenwhite) {
1221 already = TRUE;
1257 already = true;
12221258 dtor(t), t = get_token(in);
12231259 if (t.type == tok_white) {
1224 iswhite = TRUE;
1225 already = FALSE;
1260 iswhite = true;
1261 already = false;
12261262 }
12271263 }
12281264 break;
12291265 case c_q:
1230 case c_cq:
1231 type = t.cmd;
1266 case c_cq: {
1267 int type = t.cmd;
12321268 dtor(t), t = get_token(in);
12331269 if (t.type != tok_lbrace) {
1234 err_explbr(&t.pos);
1270 err_explbr(in->es, &t.pos);
12351271 } else {
12361272 /*
12371273 * Enforce that \q may not be used anywhere
12511287 wd.alt = NULL;
12521288 wd.aux = quote_Open;
12531289 wd.fpos = t.pos;
1254 wd.breaks = FALSE;
1290 wd.breaks = false;
12551291 if (!indexing || index_visible)
12561292 addword(wd, &whptr);
12571293 if (indexing) {
12601296 }
12611297 stype = stack_quote;
12621298 } else {
1263 err_codequote(&t.pos);
1299 err_codequote(in->es, &t.pos);
12641300 stype = stack_nop;
12651301 }
12661302 sitem = snew(struct stack_item);
12681304 sitem->type = stype;
12691305 if (type == c_cq) {
12701306 if (style != word_Normal) {
1271 err_nestedstyles(&t.pos);
1307 err_nestedstyles(in->es, &t.pos);
12721308 } else {
12731309 style = word_WeakCode;
12741310 spcstyle = tospacestyle(style);
12781314 stk_push(parsestk, sitem);
12791315 }
12801316 break;
1317 }
12811318 case c_K:
12821319 case c_k:
12831320 case c_W:
12881325 * brace. No nesting; no arguments.
12891326 */
12901327 wd.fpos = t.pos;
1291 wd.breaks = FALSE;
1328 wd.breaks = false;
12921329 if (t.cmd == c_K)
12931330 wd.type = word_UpperXref;
12941331 else if (t.cmd == c_k)
13001337 dtor(t), t = get_token(in);
13011338 if (t.type != tok_lbrace) {
13021339 if (wd.type == word_Normal) {
1303 time_t thetime = time(NULL);
1340 time_t thetime = current_time();
13041341 struct tm *broken = localtime(&thetime);
1305 already = TRUE;
1342 already = true;
13061343 wdtext = ustrftime(NULL, broken);
13071344 wd.type = style;
13081345 } else {
1309 err_explbr(&t.pos);
1346 err_explbr(in->es, &t.pos);
13101347 wdtext = NULL;
13111348 }
13121349 } else {
13191356 rdadds(&rs, t.text);
13201357 }
13211358 if (wd.type == word_Normal) {
1322 time_t thetime = time(NULL);
1359 time_t thetime = current_time();
13231360 struct tm *broken = localtime(&thetime);
13241361 wdtext = ustrftime(rs.text, broken);
13251362 wd.type = style;
13281365 }
13291366 sfree(rs.text);
13301367 if (t.type != tok_rbrace) {
1331 err_kwexprbr(&t.pos);
1368 err_kwexprbr(in->es, &t.pos);
13321369 }
13331370 }
13341371 wd.alt = NULL;
13581395 if (t.type == tok_cmd &&
13591396 (t.cmd == c_i || t.cmd == c_ii)) {
13601397 if (indexing) {
1361 err_nestedindex(&t.pos);
1398 err_nestedindex(in->es, &t.pos);
13621399 } else {
13631400 /* Add an index-reference word with no
13641401 * text as yet */
13661403 wd.text = NULL;
13671404 wd.alt = NULL;
13681405 wd.aux = 0;
1369 wd.breaks = FALSE;
1406 wd.breaks = false;
13701407 indexword = addword(wd, &whptr);
13711408 /* Set up a rdstring to read the
13721409 * index text */
13731410 indexstr = nullrs;
13741411 /* Flags so that we do the Right
13751412 * Things with text */
1376 index_visible = (type != c_I);
1377 index_downcase = (type == c_ii);
1378 indexing = TRUE;
1413 index_visible = (t.cmd != c_I);
1414 index_downcase = (t.cmd == c_ii);
1415 indexing = true;
13791416 idxwordlist = NULL;
13801417 idximplicit = &idxwordlist;
13811418
13901427 (t.cmd == c_e || t.cmd == c_s ||
13911428 t.cmd == c_c || t.cmd == c_cw)) {
13921429 if (style != word_Normal)
1393 err_nestedstyles(&t.pos);
1430 err_nestedstyles(in->es, &t.pos);
13941431 else {
13951432 style = (t.cmd == c_c ? word_Code :
13961433 t.cmd == c_cw ? word_WeakCode :
14021439 dtor(t), t = get_token(in);
14031440 }
14041441 if (t.type != tok_lbrace) {
1405 err_explbr(&t.pos);
1442 err_explbr(in->es, &t.pos);
14061443 sfree(sitem);
14071444 } else {
14081445 stk_push(parsestk, sitem);
14121449 case c_c:
14131450 case c_cw:
14141451 case c_e:
1415 case c_s:
1416 type = t.cmd;
1452 case c_s: {
1453 int type = t.cmd;
14171454 if (style != word_Normal) {
1418 err_nestedstyles(&t.pos);
1455 err_nestedstyles(in->es, &t.pos);
14191456 /* Error recovery: eat lbrace, push nop. */
14201457 dtor(t), t = get_token(in);
14211458 sitem = snew(struct stack_item);
14251462 }
14261463 dtor(t), t = get_token(in);
14271464 if (t.type != tok_lbrace) {
1428 err_explbr(&t.pos);
1465 err_explbr(in->es, &t.pos);
14291466 } else {
14301467 style = (type == c_c ? word_Code :
14311468 type == c_cw ? word_WeakCode :
14381475 stk_push(parsestk, sitem);
14391476 }
14401477 break;
1478 }
14411479 case c_i:
14421480 case c_ii:
1443 case c_I:
1444 type = t.cmd;
1481 case c_I: {
1482 int type = t.cmd;
14451483 if (indexing) {
1446 err_nestedindex(&t.pos);
1484 err_nestedindex(in->es, &t.pos);
14471485 /* Error recovery: eat lbrace, push nop. */
14481486 dtor(t), t = get_token(in);
14491487 sitem = snew(struct stack_item);
14631501 (t.cmd == c_e || t.cmd == c_s ||
14641502 t.cmd == c_c || t.cmd == c_cw)) {
14651503 if (style != word_Normal)
1466 err_nestedstyles(&t.pos);
1504 err_nestedstyles(in->es, &t.pos);
14671505 else {
14681506 style = (t.cmd == c_c ? word_Code :
14691507 t.cmd == c_cw ? word_WeakCode :
14761514 }
14771515 if (t.type != tok_lbrace) {
14781516 sfree(sitem);
1479 err_explbr(&t.pos);
1517 err_explbr(in->es, &t.pos);
14801518 } else {
14811519 /* Add an index-reference word with no text as yet */
14821520 wd.type = word_IndexRef;
14831521 wd.text = NULL;
14841522 wd.alt = NULL;
14851523 wd.aux = 0;
1486 wd.breaks = FALSE;
1524 wd.breaks = false;
14871525 indexword = addword(wd, &whptr);
14881526 /* Set up a rdstring to read the index text */
14891527 indexstr = nullrs;
14901528 /* Flags so that we do the Right Things with text */
14911529 index_visible = (type != c_I);
14921530 index_downcase = (type == c_ii);
1493 indexing = TRUE;
1531 indexing = true;
14941532 idxwordlist = NULL;
14951533 idximplicit = &idxwordlist;
14961534 /* Stack item to close the indexing on exit */
14971535 stk_push(parsestk, sitem);
14981536 }
14991537 break;
1538 }
15001539 case c_u:
15011540 uchr = t.aux;
1541 if (uchr == 0) {
1542 err_zerochar(in->es, &t.pos);
1543 break;
1544 }
15021545 utext[0] = uchr; utext[1] = 0;
15031546 wd.type = style;
1504 wd.breaks = FALSE;
1547 wd.breaks = false;
15051548 wd.alt = NULL;
15061549 wd.aux = 0;
15071550 wd.fpos = t.pos;
15341577 } else {
15351578 if (indexing)
15361579 rdadd(&indexstr, uchr);
1537 already = TRUE;
1580 already = true;
15381581 }
15391582 break;
15401583 default:
15411584 if (!macrolookup(macros, in, t.text, &t.pos))
1542 err_badmidcmd(t.text, &t.pos);
1585 err_badmidcmd(in->es, t.text, &t.pos);
15431586 break;
15441587 }
15451588 }
15521595 if (stk_top(parsestk)) {
15531596 while ((sitem = stk_pop(parsestk)))
15541597 sfree(sitem);
1555 err_missingrbrace(&t.pos);
1598 err_missingrbrace(in->es, &t.pos);
15561599 }
15571600 stk_free(parsestk);
15581601 prev_para_type = par.type;
15681611 addpara(par, ret);
15691612 }
15701613 if (t.type == tok_eof)
1571 already = TRUE;
1614 already = true;
15721615 }
15731616
15741617 if (stk_top(crossparastk)) {
15751618 void *p;
15761619
1577 err_missingrbrace2(&t.pos);
1620 err_missingrbrace2(in->es, &t.pos);
15781621 while ((p = stk_pop(crossparastk)))
15791622 sfree(p);
15801623 }
15881631 stk_free(crossparastk);
15891632 }
15901633
1591 struct {
1634 const struct {
15921635 char const *magic;
15931636 size_t nmagic;
1594 int binary;
1595 void (*reader)(input *);
1637 bool binary;
1638 void (*reader)(input *, psdata *);
15961639 } magics[] = {
1597 { "%!FontType1-", 12, FALSE, &read_pfa_file },
1598 { "%!PS-AdobeFont-", 15, FALSE, &read_pfa_file },
1599 { "\x80\x01", 2, TRUE, &read_pfb_file },
1600 { "StartFontMetrics", 16, FALSE, &read_afm_file },
1601 { "\x00\x01\x00\x00", 4, TRUE, &read_sfnt_file },
1602 { "true", 4, TRUE, &read_sfnt_file },
1640 { "%!FontType1-", 12, false, &read_pfa_file },
1641 { "%!PS-AdobeFont-", 15, false, &read_pfa_file },
1642 { "\x80\x01", 2, true, &read_pfb_file },
1643 { "StartFontMetrics", 16, false, &read_afm_file },
1644 { "\x00\x01\x00\x00", 4, true, &read_sfnt_file },
1645 { "true", 4, true, &read_sfnt_file },
16031646 };
16041647
1605 paragraph *read_input(input *in, indexdata *idx) {
1648 paragraph *read_input(input *in, indexdata *idx, psdata *psd) {
16061649 paragraph *head = NULL;
16071650 paragraph **hptr = &head;
16081651 tree234 *macros;
16091652 char mag[16];
16101653 size_t len, i;
1611 int binary;
1612 void (*reader)(input *);
1613
1614 macros = newtree234(macrocmp);
1654 bool binary;
1655 void (*reader)(input *, psdata *);
1656
1657 macros = newtree234(macrocmp, NULL);
16151658
16161659 while (in->currindex < in->nfiles) {
16171660 setpos(in, in->filenames[in->currindex]);
16221665
16231666 if (!in->filenames[in->currindex]) {
16241667 in->currfp = stdin;
1625 in->wantclose = FALSE; /* don't fclose stdin */
1668 in->wantclose = false; /* don't fclose stdin */
16261669 /*
16271670 * When reading standard input, we always expect to see
16281671 * an actual Halibut file and not any of the unusual
16361679 * looking at a text file type.
16371680 */
16381681 in->currfp = fopen(in->filenames[in->currindex], "rb");
1639 binary = FALSE; /* default to Halibut source, which is text */
1682 binary = false; /* default to Halibut source, which is text */
1683 reader = NULL;
16401684 if (in->currfp) {
1641 in->wantclose = TRUE;
1642 reader = NULL;
1685 in->wantclose = true;
16431686 len = fread(mag, 1, sizeof(mag), in->currfp);
16441687 for (i = 0; i < lenof(magics); i++) {
16451688 if (len >= magics[i].nmagic &&
16611704 if (reader == NULL) {
16621705 read_file(&hptr, in, idx, macros);
16631706 } else {
1664 (*reader)(in);
1707 (*reader)(in, psd);
16651708 }
16661709 } else {
1667 err_cantopen(in->filenames[in->currindex]);
1710 err_cantopen(in->es, in->filenames[in->currindex]);
16681711 }
16691712 in->currindex++;
16701713 }
66 #include <assert.h>
77 #include "halibut.h"
88
9 static int kwcmp(void *av, void *bv)
9 static int kwcmp(const void *av, const void *bv, void *cmpctx)
1010 {
1111 const keyword *a = (const keyword *)av;
1212 const keyword *b = (const keyword *)bv;
1313 return ustrcmp(a->key, b->key);
1414 }
1515
16 static int kwfind(void *av, void *bv)
16 static int kwfind(const void *av, const void *bv, void *cmpctx)
1717 {
1818 wchar_t *a = (wchar_t *)av;
1919 const keyword *b = (const keyword *)bv;
2121 }
2222
2323 keyword *kw_lookup(keywordlist *kl, wchar_t *str) {
24 return find234(kl->keys, str, kwfind);
24 return findcmp234(kl->keys, str, kwfind, NULL);
2525 }
2626
2727 /*
3030 * collation, last at the top (so that we can Heapsort them when we
3131 * finish).
3232 */
33 keywordlist *get_keywords(paragraph *source) {
34 int errors = FALSE;
33 keywordlist *get_keywords(paragraph *source, errorstate *es) {
34 bool errors = false;
3535 keywordlist *kl = snew(keywordlist);
3636 numberstate *n = number_init();
3737 int prevpara = para_NotParaType;
3939 number_cfg(n, source);
4040
4141 kl->size = 0;
42 kl->keys = newtree234(kwcmp);
42 kl->keys = newtree234(kwcmp, NULL);
4343 kl->nlooseends = kl->looseendssize = 0;
4444 kl->looseends = NULL;
4545 for (; source; source = source->next) {
6161 * This also sets up the `parent', `child' and `sibling'
6262 * links.
6363 */
64 source->kwtext = number_mktext(n, source, q, &prevpara, &errors);
64 source->kwtext = number_mktext(n, source, q, &prevpara, &errors, es);
6565
6666 if (p && *p) {
6767 if (source->kwtext || source->type == para_Biblio) {
7373 kw->para = source;
7474 ret = add234(kl->keys, kw);
7575 if (ret != kw) {
76 err_multikw(&source->fpos, &ret->para->fpos, p);
76 err_multikw(es, &source->fpos, &ret->para->fpos, p);
7777 sfree(kw);
7878 /* FIXME: what happens to kw->text? Does it leak? */
7979 }
112112 sfree(kl);
113113 }
114114
115 void subst_keywords(paragraph *source, keywordlist *kl) {
115 void subst_keywords(paragraph *source, keywordlist *kl, errorstate *es) {
116116 for (; source; source = source->next) {
117117 word *ptr;
118118 for (ptr = source->words; ptr; ptr = ptr->next) {
123123
124124 kw = kw_lookup(kl, ptr->text);
125125 if (!kw) {
126 err_nosuchkw(&ptr->fpos, ptr->text);
126 err_nosuchkw(es, &ptr->fpos, ptr->text);
127127 subst = NULL;
128128 } else
129129 subst = dup_word_list(kw->text);
138138 close->alt = NULL;
139139 close->type = word_XrefEnd;
140140 close->fpos = ptr->fpos;
141 close->breaks = FALSE;
141 close->breaks = false;
142142 close->aux = 0;
143143
144144 close->next = ptr->next;
44 #include <stdio.h>
55
66 static const char *const licencetext[] = {
7 "Halibut is copyright (c) 1999-2017 Simon Tatham.",
7 "Halibut is copyright (c) 1999-2021 Simon Tatham.",
88 "",
99 "Permission is hereby granted, free of charge, to any person",
1010 "obtaining a copy of this software and associated documentation files",
116116 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)%st->winsize] : data[k] )
117117
118118 void lz77_compress(struct LZ77Context *ctx,
119 const unsigned char *data, int len, int compress)
119 const unsigned char *data, int len, bool compress)
120120 {
121121 struct LZ77InternalContext *st = ctx->ictx;
122122 int i, hash, distance, off, nmatch, matchlen, advance;
143143 }
144144 st->npending -= i;
145145
146 defermatch.len = 0;
146 defermatch.distance = defermatch.len = 0;
147147 deferchr = '\0';
148148 while (len > 0) {
149149
2727 /*
2828 * Supply data to be compressed. Will update the private fields of
2929 * the LZ77Context, and will call literal() and match() to output.
30 * If `compress' is FALSE, it will never emit a match, but will
30 * If `compress' is false, it will never emit a match, but will
3131 * instead call literal() for everything.
3232 */
3333 void lz77_compress(struct LZ77Context *ctx,
34 const unsigned char *data, int len, int compress);
34 const unsigned char *data, int len, bool compress);
218218 lz77c.literal = lzx_literal;
219219 lz77c.match = lzx_match;
220220 lz77c.userdata = info;
221 lz77_compress(&lz77c, data, len, TRUE);
221 lz77_compress(&lz77c, data, len, true);
222222 lz77_cleanup(&lz77c);
223223 }
224224
396396 size_t data_size, resets_size;
397397 unsigned short bitbuffer;
398398 int nbits;
399 int first_block;
399 bool first_block;
400400 } LZXBitstream;
401401
402402 void lzx_write_bits(LZXBitstream *bs, int value, int bits)
562562 * the whole-file header.
563563 */
564564 lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0);
565 bs->first_block = FALSE;
565 bs->first_block = false;
566566 }
567567 lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype);
568568 lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize);
634634 * block-boundary heuristics, but I don't really think it's
635635 * worth it.
636636 */
637 bs.first_block = TRUE; /* reset every time we reset the LZ state */
637 bs.first_block = true; /* reset every time we reset the LZ state */
638638 lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs);
639639
640640 sfree(buf.syms);
66 #include <stdio.h>
77 #include <stdlib.h>
88 #include "halibut.h"
9 #include "paper.h"
910
1011 static void dbg_prtsource(paragraph *sourceform);
1112 static void dbg_prtwordlist(int level, word *w);
1213 static void dbg_prtkws(keywordlist *kws);
1314
1415 static const struct pre_backend {
15 void *(*func)(paragraph *, keywordlist *, indexdata *);
16 void *(*func)(paragraph *, keywordlist *, indexdata *, psdata *,
17 errorstate *);
1618 int bitfield;
1719 } pre_backends[] = {
1820 {paper_pre_backend, 0x0001}
1921 };
2022
2123 static const struct backend {
22 char *name;
23 void (*func)(paragraph *, keywordlist *, indexdata *, void *);
24 const char *name;
25 void (*func)(paragraph *, keywordlist *, indexdata *, void *,
26 errorstate *);
2427 paragraph *(*filename)(char *filename);
2528 int bitfield, prebackend_bitfield;
2629 } backends[] = {
4043 int main(int argc, char **argv) {
4144 char **infiles;
4245 int nfiles;
43 int nogo;
44 int errs;
45 int reportcols;
46 int list_fonts;
46 bool nogo;
47 bool reportcols;
48 bool list_fonts;
4749 int input_charset;
48 int debug;
50 bool debug;
4951 int backendbits, prebackbits;
5052 int k, b;
5153 paragraph *cfg, *cfg_tail;
5254 void *pre_backend_data[16];
55 errorstate es[1];
5356
5457 /*
5558 * Use the specified locale everywhere. It'll be used for
6770 */
6871 infiles = snewn(argc, char *);
6972 nfiles = 0;
70 nogo = errs = FALSE;
71 reportcols = 0;
72 list_fonts = 0;
73 nogo = false;
74 reportcols = false;
75 list_fonts = false;
7376 input_charset = CS_ASCII;
74 debug = 0;
77 debug = false;
7578 backendbits = 0;
7679 cfg = cfg_tail = NULL;
80 es->fatal = false;
7781
7882 if (argc == 1) {
7983 usage();
128132 /* do nothing */;
129133 } else if (!strcmp(opt, "-input-charset")) {
130134 if (!val) {
131 errs = TRUE, err_optnoarg(opt);
135 err_optnoarg(es, opt);
132136 } else {
133137 int charset = charset_from_localenc(val);
134138 if (charset == CS_NONE) {
135 errs = TRUE, err_cmdcharset(val);
139 err_cmdcharset(es, val);
136140 } else {
137141 input_charset = charset;
138142 }
139143 }
140144 } else if (!strcmp(opt, "-help")) {
141145 help();
142 nogo = TRUE;
146 nogo = true;
143147 } else if (!strcmp(opt, "-version")) {
144148 showversion();
145 nogo = TRUE;
149 nogo = true;
146150 } else if (!strcmp(opt, "-licence") ||
147151 !strcmp(opt, "-license")) {
148152 licence();
149 nogo = TRUE;
153 nogo = true;
150154 } else if (!strcmp(opt, "-list-charsets")) {
151155 listcharsets();
152 nogo = TRUE;
156 nogo = true;
153157 } else if (!strcmp(opt, "-list-fonts")) {
154 list_fonts = TRUE;
158 list_fonts = true;
155159 } else if (!strcmp(opt, "-precise")) {
156 reportcols = 1;
160 reportcols = true;
157161 } else {
158 errs = TRUE, err_nosuchopt(opt);
162 err_nosuchopt(es, opt);
159163 }
160164 }
161165 p = NULL;
171175 switch (c) {
172176 case 'h':
173177 help();
174 nogo = TRUE;
178 nogo = true;
175179 break;
176180 case 'V':
177181 showversion();
178 nogo = TRUE;
182 nogo = true;
179183 break;
180184 case 'L':
181185 licence();
182 nogo = TRUE;
186 nogo = true;
183187 break;
184188 case 'P':
185 reportcols = 1;
189 reportcols = true;
186190 break;
187191 case 'd':
188 debug = TRUE;
192 debug = true;
189193 break;
190194 }
191195 break;
200204 char opt[2];
201205 opt[0] = c;
202206 opt[1] = '\0';
203 errs = TRUE, err_optnoarg(opt);
207 err_optnoarg(es, opt);
204208 }
205209 /*
206210 * Now c is the option and p is the parameter.
224228 *r = '\0';
225229 /* XXX ad-hoc diagnostic */
226230 if (!strcmp(s, "input-charset"))
227 err_futileopt("Cinput-charset",
231 err_futileopt(es, "Cinput-charset",
228232 "; use --input-charset");
229233 cmdline_cfg_add(para, s);
230234 r = s;
256260 char opt[2];
257261 opt[0] = c;
258262 opt[1] = '\0';
259 errs = TRUE, err_nosuchopt(opt);
263 err_nosuchopt(es, opt);
260264 }
261265 }
262266 }
271275 }
272276 }
273277
274 if (errs)
278 if (es->fatal)
275279 exit(EXIT_FAILURE);
276280 if (nogo)
277281 exit(EXIT_SUCCESS);
280284 * Do the work.
281285 */
282286 if (nfiles == 0 && !list_fonts) {
283 err_noinput();
287 err_noinput(es);
284288 usage();
285289 exit(EXIT_FAILURE);
286290 }
290294 paragraph *sourceform, *p;
291295 indexdata *idx;
292296 keywordlist *keywords;
297 psdata *psd;
293298
294299 in.filenames = infiles;
295300 in.nfiles = nfiles;
300305 in.reportcols = reportcols;
301306 in.stack = NULL;
302307 in.defcharset = input_charset;
308 in.es = es;
303309
304310 idx = make_index();
305
306 sourceform = read_input(&in, idx);
311 psd = psdata_new();
312
313 sourceform = read_input(&in, idx, psd);
307314 if (list_fonts) {
308 listfonts();
315 listfonts(psd);
309316 exit(EXIT_SUCCESS);
310317 }
311 if (!sourceform)
318 if (es->fatal)
312319 exit(EXIT_FAILURE);
320 assert(sourceform);
313321
314322 /*
315323 * Append the config directives acquired from the command
330338
331339 sfree(infiles);
332340
333 keywords = get_keywords(sourceform);
341 keywords = get_keywords(sourceform, es);
334342 if (!keywords)
335343 exit(EXIT_FAILURE);
336 gen_citations(sourceform, keywords);
337 subst_keywords(sourceform, keywords);
344 gen_citations(sourceform, keywords, es);
345 subst_keywords(sourceform, keywords, es);
338346
339347 for (p = sourceform; p; p = p->next)
340348 if (p->type == para_IM)
341 index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
349 index_merge(idx, true, p->keyword, p->words, &p->fpos, es);
342350
343351 build_index(idx);
344352
366374 * Select and run the pre-backends.
367375 */
368376 prebackbits = 0;
377 memset(pre_backend_data, 0, sizeof(pre_backend_data));
369378 for (k = 0; k < (int)lenof(backends); k++)
370379 if (backendbits == 0 || (backendbits & backends[k].bitfield))
371380 prebackbits |= backends[k].prebackend_bitfield;
373382 if (prebackbits & pre_backends[k].bitfield) {
374383 assert(k < (int)lenof(pre_backend_data));
375384 pre_backend_data[k] =
376 pre_backends[k].func(sourceform, keywords, idx);
385 pre_backends[k].func(sourceform, keywords, idx, psd, es);
377386 }
378387
379388 /*
394403 break;
395404 }
396405
397 backends[k].func(sourceform, keywords, idx, pbd);
406 backends[k].func(sourceform, keywords, idx, pbd, es);
398407 }
399408 }
400409
401410 free_para_list(sourceform);
402411 free_keywords(keywords);
403412 cleanup_index(idx);
404 }
413 psdata_free(psd);
414 }
415
416 if (es->fatal)
417 exit(EXIT_FAILURE);
405418
406419 return 0;
407420 }
1414
1515 $errors=0;
1616
17 while (<>) {
17 while (<<>>) {
1818 $in=$out="";
1919 ($file, $line, $call, $in, $out)=($1,$2,$3,"",$4)
2020 if /^(\S+) (\S+) (malloc|strdup)\(\S+\) returns (\S+)$/;
11 * misc.c: miscellaneous useful items
22 */
33
4 #include <assert.h>
45 #include <stdarg.h>
6 #include <stdlib.h>
7 #include <time.h>
58 #include "halibut.h"
69
710 char *adv(char *s) {
97100 rs->text = sresize(rs->text, rs->size, char);
98101 }
99102 memcpy(rs->text + rs->pos, p, len);
103 rs->pos += len;
104 rs->text[rs->pos] = 0;
105 }
106 void rdaddc_rep(rdstringc *rs, char c, int len) {
107 if (len <= 0) {
108 assert(len == 0);
109 return;
110 }
111 if (rs->pos >= rs->size - len) {
112 rs->size = rs->pos + len + 128;
113 rs->text = sresize(rs->text, rs->size, char);
114 }
115 memset(rs->text + rs->pos, c, len);
100116 rs->pos += len;
101117 rs->text[rs->pos] = 0;
102118 }
236252
237253 wp = NULL;
238254 for (w = words; w; w = w->next) {
239 int both;
255 bool both;
240256 if (!isvis(w->type))
241257 /* Invisible elements should not affect this calculation */
242258 continue;
579595
580596 return p;
581597 }
598
599 /*
600 * Wrapper around the standard C time() function, which allows its
601 * return value to be overridden by the environment variable
602 * SOURCE_DATE_EPOCH, used to achieve reproducible builds by avoiding
603 * baking different datestamps into repetitions of what ought to be
604 * the same build.
605 */
606 time_t current_time(void)
607 {
608 const char *epoch = getenv("SOURCE_DATE_EPOCH");
609 if (epoch)
610 return atoll(epoch);
611
612 return time(NULL);
613 }
4343 page_data *pages;
4444 outline_element *outline_elements;
4545 int n_outline_elements;
46 psdata *psd;
4647 };
4748
4849 /*
7677 * depend on the particular document. It gets generated when the font's
7778 * metrics are read in.
7879 */
79
80 font_info *all_fonts;
8180
8281 struct font_info_Tag {
8382 font_info *next;
276275 * the heights of the three fonts in the pdata) because it's
277276 * easier than looking it up repeatedly during page breaking.
278277 */
279 int page_break;
278 bool page_break;
280279 int space_before;
281280 int space_after;
282281 int line_height;
372371 para_data *pdata;
373372 };
374373
374 struct psdata_Tag {
375 char **extraglyphs;
376 glyph nextglyph;
377 tree234 *extrabyname;
378 font_info *all_fonts;
379 };
380
375381 /*
376382 * Functions exported from bk_paper.c
377383 */
378 int width_cmp(void *, void *); /* use when setting up widths */
379 int kern_cmp(void *, void *); /* use when setting up kern_pairs */
380 int lig_cmp(void *, void *); /* use when setting up ligatures */
384 int width_cmp(const void *, const void *, void *); /* use when setting up widths */
385 int kern_cmp(const void *, const void *, void *); /* use when setting up kern_pairs */
386 int lig_cmp(const void *, const void *, void *); /* use when setting up ligatures */
381387 int find_width(font_data *, glyph);
382388
383389 /*
384390 * Functions and data exported from psdata.c.
385391 */
386 glyph glyph_intern(char const *);
387 char const *glyph_extern(glyph);
392 psdata *psdata_new(void);
393 void psdata_free(psdata *);
394 glyph glyph_intern(psdata *, const char *);
395 char const *glyph_extern(psdata *, glyph);
388396 wchar_t ps_glyph_to_unicode(glyph);
389397 extern const char *const ps_std_glyphs[];
390398 extern glyph const tt_std_glyphs[];
391 void init_std_fonts(void);
399 void init_std_fonts(psdata *psd);
392400 const int *ps_std_font_widths(char const *fontname);
393401 const kern_pair *ps_std_font_kerns(char const *fontname);
394402
411419 /*
412420 * Backend functions exported by in_pf.c
413421 */
414 void pf_part1(font_info *fi, char **bufp, size_t *lenp);
415 void pf_part2(font_info *fi, char **bufp, size_t *lenp);
422 void pf_part1(font_info *fi, char **bufp, size_t *lenp, errorstate *es);
423 void pf_part2(font_info *fi, char **bufp, size_t *lenp, errorstate *es);
416424 void pf_writeps(font_info const *fi, FILE *ofp);
417425
418426 /*
422430 glyph sfnt_indextoglyph(sfnt *sf, unsigned idx);
423431 unsigned sfnt_glyphtoindex(sfnt *sf, glyph g);
424432 unsigned sfnt_nglyphs(sfnt *sf);
425 void sfnt_writeps(font_info const *fi, FILE *ofp);
433 void sfnt_writeps(font_info const *fi, FILE *ofp, psdata *psd, errorstate *es);
426434 void sfnt_data(font_info *fi, char **bufp, size_t *lenp);
427435
428436 #endif
11201120 "zretroflexhook", "zstroke", "zuhiragana", "zukatakana",
11211121 };
11221122
1123 char const **extraglyphs = NULL;
1124 glyph nextglyph = lenof(ps_glyphs_alphabetic);
1125 tree234 *extrabyname = NULL;
1126
1127 char const *glyph_extern(glyph glyph) {
1123 #define EXTRAGLYPHSOFFSET lenof(ps_glyphs_alphabetic)
1124
1125 const char *glyph_extern(psdata *psd, glyph glyph) {
11281126 if (glyph == NOGLYPH) return ".notdef";
1129 if (glyph < lenof(ps_glyphs_alphabetic))
1127 if (glyph < EXTRAGLYPHSOFFSET)
11301128 return ps_glyphs_alphabetic[glyph];
11311129 else
1132 return extraglyphs[glyph - lenof(ps_glyphs_alphabetic)];
1130 return psd->extraglyphs[glyph - EXTRAGLYPHSOFFSET];
11331131 }
11341132
1135 static int glyphcmp(void *a, void *b) {
1136 glyph ga = *(glyph *)a, gb = *(glyph *)b;
1137 return strcmp(glyph_extern(ga), glyph_extern(gb));
1133 static int glyphcmp(const void *a, const void *b, void *cmpctx) {
1134 psdata *psd = (psdata *)cmpctx;
1135 glyph ga = *(const glyph *)a, gb = *(const glyph *)b;
1136 return strcmp(glyph_extern(psd, ga), glyph_extern(psd, gb));
11381137 }
11391138
1140 static int glyphcmp_search(void *a, void *b) {
1141 glyph gb = *(glyph *)b;
1142 return strcmp(a, glyph_extern(gb));
1139 static int glyphcmp_search(const void *a, const void *b, void *cmpctx) {
1140 psdata *psd = (psdata *)cmpctx;
1141 glyph gb = *(const glyph *)b;
1142 return strcmp(a, glyph_extern(psd, gb));
11431143 }
11441144
1145 glyph glyph_intern(char const *glyphname) {
1145 psdata *psdata_new(void)
1146 {
1147 psdata *psd = snew(psdata);
1148 psd->extraglyphs = NULL;
1149 psd->nextglyph = EXTRAGLYPHSOFFSET;
1150 psd->extrabyname = newtree234(glyphcmp, NULL);
1151 psd->all_fonts = NULL;
1152 return psd;
1153 }
1154
1155 void psdata_free(psdata *psd)
1156 {
1157 glyph i, *gp;
1158 while ((gp = delpos234(psd->extrabyname, 0)) != NULL)
1159 sfree(gp);
1160 freetree234(psd->extrabyname);
1161 for (i = EXTRAGLYPHSOFFSET; i < psd->nextglyph; i++)
1162 sfree(psd->extraglyphs[i - EXTRAGLYPHSOFFSET]);
1163 sfree(psd->extraglyphs);
1164 while (psd->all_fonts) {
1165 font_info *fi = psd->all_fonts;
1166 glyph_width *w;
1167 psd->all_fonts = fi->next;
1168 while ((w = delpos234(fi->widths, 0)) != NULL)
1169 sfree(w);
1170 freetree234(fi->widths);
1171 freetree234(fi->kerns);
1172 freetree234(fi->ligs);
1173 sfree(fi);
1174 }
1175 sfree(psd);
1176 }
1177
1178 glyph glyph_intern(psdata *psd, const char *glyphname) {
11461179 int i, j, k, c;
11471180 glyph *gp;
11481181
11491182 i = -1;
1150 j = lenof(ps_glyphs_alphabetic);
1183 j = EXTRAGLYPHSOFFSET;
11511184 while (j-i > 1) {
11521185 k = (i + j) / 2;
11531186 c = strcmp(glyphname, ps_glyphs_alphabetic[k]);
11601193 i = k;
11611194 }
11621195 /* Non-standard glyph. We may need to add it to our tree. */
1163 if (extrabyname == NULL)
1164 extrabyname = newtree234(glyphcmp);
1165 gp = find234(extrabyname, (void *)glyphname, glyphcmp_search);
1196 gp = findcmp234(psd->extrabyname, (const void *)glyphname,
1197 glyphcmp_search, psd);
11661198 if (gp) {
11671199 k = *gp;
11681200 } else {
1169 extraglyphs = sresize(extraglyphs, nextglyph, char const *);
1170 k = nextglyph++;
1171 extraglyphs[k - lenof(ps_glyphs_alphabetic)] = dupstr(glyphname);
1201 psd->extraglyphs = sresize(psd->extraglyphs, psd->nextglyph, char *);
1202 k = psd->nextglyph++;
1203 psd->extraglyphs[k - EXTRAGLYPHSOFFSET] = dupstr(glyphname);
11721204 gp = snew(glyph);
11731205 *gp = k;
1174 add234(extrabyname, gp);
1206 add234(psd->extrabyname, gp);
11751207 }
11761208 return k;
11771209 }
18521884 perl -e '
18531885 open G, "glyphnames.txt" or die;
18541886 chomp(@g = <G>); %g = map(($_, $i++), @g);
1855 while(<>){chomp;print"$g{$_}, "}
1887 while(<<>>){chomp;print"$g{$_}, "}
18561888 print "NOGLYPH\n";' | fold -sw68 | sed 's/^/ /'
18571889
18581890 */
45414573 }},
45424574 };
45434575
4544 void init_std_fonts(void) {
4576 void init_std_fonts(psdata *psd) {
45454577 int i, j;
45464578 ligature const *lig;
45474579 kern_pair const *kern;
4548 static int done = FALSE;
4580 static bool done = false;
45494581
45504582 if (done) return;
45514583 for (i = 0; i < (int)lenof(ps_std_fonts); i++) {
45534585 fi->fontfile = NULL;
45544586 fi->name = ps_std_fonts[i].name;
45554587 fi->filetype = TYPE1; /* for purposes of making subset fonts */
4556 fi->widths = newtree234(width_cmp);
4588 fi->widths = newtree234(width_cmp, NULL);
45574589 for (j = 0; j < (int)lenof(fi->bmp); j++)
45584590 fi->bmp[j] = NOGLYPH;
45594591 for (j = 0; j < (int)lenof(ps_std_glyphs) - 1; j++) {
45604592 glyph_width *w = snew(glyph_width);
45614593 wchar_t ucs;
4562 w->glyph = glyph_intern(ps_std_glyphs[j]);
4594 w->glyph = glyph_intern(psd, ps_std_glyphs[j]);
45634595 w->width = ps_std_fonts[i].widths[j];
45644596 add234(fi->widths, w);
45654597 ucs = ps_glyph_to_unicode(w->glyph);
45664598 assert(ucs != 0xFFFF);
45674599 fi->bmp[ucs] = w->glyph;
45684600 }
4569 fi->kerns = newtree234(kern_cmp);
4601 fi->kerns = newtree234(kern_cmp, NULL);
45704602 for (kern = ps_std_fonts[i].kerns; kern->left != NOGLYPH; kern++)
45714603 add234(fi->kerns, (void *)kern);
4572 fi->ligs = newtree234(lig_cmp);
4604 fi->ligs = newtree234(lig_cmp, NULL);
45734605 for (lig = ps_std_fonts[i].ligs; lig->left != NOGLYPH; lig++)
45744606 add234(fi->ligs, (void *)lig);
4575 fi->next = all_fonts;
4576 all_fonts = fi;
4607 fi->next = psd->all_fonts;
4608 psd->all_fonts = fi;
45774609 }
4578 done = TRUE;
4610 done = true;
45794611 }
45804612
45814613 const int *ps_std_font_widths(char const *fontname)
+0
-29
release.sh less more
0 #!/bin/sh
1
2 # Make a Halibut release archive.
3
4 RELDIR="$1"
5 VERSION="$2"
6
7 linkmirror() {
8 (cd "$1"; find . -name CVS -prune -o -name .svn -prune -o \
9 -name build -prune -o -name reltmp -prune -o -type d -print) | \
10 while read dir; do mkdir -p "$2"/"$dir"; done
11 (cd "$1"; find . -name CVS -prune -o -name .svn -prune -o \
12 -name build -prune -o -name reltmp -prune -o \
13 -name '*.orig' -prune -o -name '*.rej' -prune -o \
14 -name '*.txt' -prune -o -name '*.html' -prune -o \
15 -name '*.1' -prune -o -name '.cvsignore' -prune -o \
16 -name '*.gz' -prune -o -name '.[^.]*' -prune -o \
17 -type f -print) | \
18 while read file; do ln -s "$1"/"$file" "$2"/"$file"; done
19 }
20
21 linkmirror $PWD reltmp/$RELDIR
22 if ! test -d charset; then
23 linkmirror $PWD/../charset reltmp/$RELDIR/charset
24 fi
25
26 tar chzvoCf reltmp $RELDIR.tar.gz $RELDIR
27
28 rm -rf reltmp
4646 struct tree234_Tag {
4747 node234 *root;
4848 cmpfn234 cmp;
49 void *cmpctx;
4950 };
5051
5152 struct node234_Tag {
5859 /*
5960 * Create a 2-3-4 tree.
6061 */
61 tree234 *newtree234(cmpfn234 cmp) {
62 tree234 *newtree234(cmpfn234 cmp, void *cmpctx) {
6263 tree234 *ret = snew(tree234);
6364 LOG(("created tree %p\n", ret));
6465 ret->root = NULL;
6566 ret->cmp = cmp;
67 ret->cmpctx = cmpctx;
6668 return ret;
6769 }
6870
360362 return NULL; /* error: index out of range */
361363 }
362364 } else {
363 if ((c = t->cmp(e, n->elems[0])) < 0)
365 if ((c = t->cmp(e, n->elems[0], t->cmpctx)) < 0)
364366 ki = 0;
365367 else if (c == 0)
366368 return n->elems[0]; /* already exists */
367 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
369 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1], t->cmpctx)) < 0)
368370 ki = 1;
369371 else if (c == 0)
370372 return n->elems[1]; /* already exists */
371 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
373 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2], t->cmpctx)) < 0)
372374 ki = 2;
373375 else if (c == 0)
374376 return n->elems[2]; /* already exists */
443445 * as NULL, in which case the compare function from the tree proper
444446 * will be used.
445447 */
446 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
447 int relation, int *index) {
448 void *findcmprelpos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
449 int relation, int *index) {
448450 node234 *n;
449451 void *ret;
450452 int c;
452454
453455 if (t->root == NULL)
454456 return NULL;
455
456 if (cmp == NULL)
457 cmp = t->cmp;
458457
459458 n = t->root;
460459 /*
476475 while (1) {
477476 for (kcount = 0; kcount < 4; kcount++) {
478477 if (kcount >= 3 || n->elems[kcount] == NULL ||
479 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
478 (c = cmpret ? cmpret : cmp(e, n->elems[kcount], cmpctx)) < 0) {
480479 break;
481480 }
482481 if (n->kids[kcount]) idx += n->counts[kcount];
547546 if (ret && index) *index = idx;
548547 return ret;
549548 }
550 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
551 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
552 }
553 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
554 return findrelpos234(t, e, cmp, relation, NULL);
555 }
556 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
557 return findrelpos234(t, e, cmp, REL234_EQ, index);
549 void *findcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx) {
550 return findcmprelpos234(t, e, cmp, cmpctx, REL234_EQ, NULL);
551 }
552 void *findcmprel234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
553 int relation) {
554 return findcmprelpos234(t, e, cmp, cmpctx, relation, NULL);
555 }
556 void *findcmppos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
557 int *index) {
558 return findcmprelpos234(t, e, cmp, cmpctx, REL234_EQ, index);
559 }
560 void *find234(tree234 *t, const void *e) {
561 return findcmprelpos234(t, e, t->cmp, t->cmpctx, REL234_EQ, NULL);
562 }
563 void *findrel234(tree234 *t, const void *e, int relation) {
564 return findcmprelpos234(t, e, t->cmp, t->cmpctx, relation, NULL);
565 }
566 void *findpos234(tree234 *t, const void *e, int *index) {
567 return findcmprelpos234(t, e, t->cmp, t->cmpctx, REL234_EQ, index);
568 }
569 void *findrelpos234(tree234 *t, const void *e, int relation, int *index) {
570 return findcmprelpos234(t, e, t->cmp, t->cmpctx, relation, index);
558571 }
559572
560573 /*
10051018 }
10061019 void *del234(tree234 *t, void *e) {
10071020 int index;
1008 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
1021 if (!findrelpos234(t, e, REL234_EQ, &index))
10091022 return NULL; /* it wasn't in there anyway */
10101023 return delpos234_internal(t, index); /* it's there; delete it. */
10111024 }
11201133
11211134 if (t1->cmp) {
11221135 element = index234(t2, 0);
1123 element = findrelpos234(t1, element, NULL, REL234_GE, NULL);
1136 element = findrelpos234(t1, element, REL234_GE, NULL);
11241137 if (element)
11251138 return NULL;
11261139 }
11401153
11411154 if (t2->cmp) {
11421155 element = index234(t1, size1-1);
1143 element = findrelpos234(t2, element, NULL, REL234_LE, NULL);
1156 element = findrelpos234(t2, element, REL234_LE, NULL);
11441157 if (element)
11451158 return NULL;
11461159 }
11771190 t->root = NULL;
11781191 return ret;
11791192 }
1193 assert(n);
11801194
11811195 /*
11821196 * Search down the tree to find the split point.
13321346 * over to it until it is greater than minimum
13331347 * size.
13341348 */
1335 int undersized = (!sub->elems[0]);
1349 bool undersized = (!sub->elems[0]);
13361350 LOG((" child %d is %ssize\n", ki,
13371351 undersized ? "under" : "minimum-"));
13381352 LOG((" neighbour is %s\n",
13711385 t->root = halves[1];
13721386 return halves[0];
13731387 }
1374 tree234 *splitpos234(tree234 *t, int index, int before) {
1388 tree234 *splitpos234(tree234 *t, int index, bool before) {
13751389 tree234 *ret;
13761390 node234 *n;
13771391 int count;
13791393 count = countnode234(t->root);
13801394 if (index < 0 || index > count)
13811395 return NULL; /* error */
1382 ret = newtree234(t->cmp);
1396 ret = newtree234(t->cmp, NULL);
13831397 n = split234_internal(t, index);
13841398 if (before) {
13851399 /* We want to return the ones before the index. */
13941408 }
13951409 return ret;
13961410 }
1397 tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) {
1411 tree234 *splitcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
1412 int rel) {
13981413 int before;
13991414 int index;
14001415
14061421 } else {
14071422 before = 0;
14081423 }
1409 if (!findrelpos234(t, e, cmp, rel, &index))
1424 if (!findcmprelpos234(t, e, cmp, cmpctx, rel, &index))
14101425 index = 0;
14111426
14121427 return splitpos234(t, index+1, before);
1428 }
1429 tree234 *split234(tree234 *t, const void *e, int rel)
1430 {
1431 return splitcmp234(t, e, t->cmp, t->cmpctx, rel);
14131432 }
14141433
14151434 static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) {
14381457 tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) {
14391458 tree234 *t2;
14401459
1441 t2 = newtree234(t->cmp);
1460 t2 = newtree234(t->cmp, t->cmpctx);
14421461 if (t->root) {
14431462 t2->root = copynode234(t->root, copyfn, copyfnstate);
14441463 t2->root->parent = NULL;
17061725 for (i = -1; i < nelems; i++) {
17071726 void *lower = (i == -1 ? lowbound : node->elems[i]);
17081727 void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1709 if (lower && higher && cmp(lower, higher) >= 0) {
1728 if (lower && higher && cmp(lower, higher, cmpctx) >= 0) {
17101729 error("node %p: kid comparison [%d=%s,%d=%s] failed",
17111730 node, i, lower, i+1, higher);
17121731 }
18161835 realret = add234(tree, elem);
18171836
18181837 i = 0;
1819 while (i < arraylen && cmp(elem, array[i]) > 0)
1838 while (i < arraylen && cmp(elem, array[i], NULL) > 0)
18201839 i++;
1821 if (i < arraylen && !cmp(elem, array[i])) {
1840 if (i < arraylen && !cmp(elem, array[i], NULL)) {
18221841 void *retval = array[i]; /* expect that returned not elem */
18231842 if (realret != retval) {
18241843 error("add: retval was %p expected %p", realret, retval);
18621881 int i;
18631882
18641883 i = 0;
1865 while (i < arraylen && cmp(elem, array[i]) > 0)
1884 while (i < arraylen && cmp(elem, array[i], NULL) > 0)
18661885 i++;
1867 if (i >= arraylen || cmp(elem, array[i]) != 0)
1886 if (i >= arraylen || cmp(elem, array[i], NULL) != 0)
18681887 return; /* don't do it! */
18691888 delpostest(i);
18701889 }
18831902 return ((*seed) / 65536) % 32768;
18841903 }
18851904
1886 int mycmp(void *av, void *bv) {
1905 int mycmp(const void *av, const void *bv, void *cmpctx) {
18871906 char const *a = (char const *)av;
18881907 char const *b = (char const *)bv;
18891908 return strcmp(a, b);
21422161 tree2 = newtree234(mycmp);
21432162 tree3 = newtree234(mycmp);
21442163 tree4 = newtree234(mycmp);
2145 assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */
2164 assert(mycmp(strings[0], strings[1], NULL) < 0); /* just in case :-) */
21462165 add234(tree2, strings[1]);
21472166 add234(tree4, strings[0]);
21482167 array[0] = strings[0];
2727 #ifndef TREE234_H
2828 #define TREE234_H
2929
30 #include <stdbool.h>
31
3032 /*
3133 * This typedef is opaque outside tree234.c itself.
3234 */
3335 typedef struct tree234_Tag tree234;
3436
35 typedef int (*cmpfn234)(void *, void *);
37 typedef int (*cmpfn234)(const void *av, const void *bv, void *cmpctx);
3638
3739 typedef void *(*copyfn234)(void *state, void *element);
3840
4143 * lookups by key will fail: you can only look things up by numeric
4244 * index, and you have to use addpos234() and delpos234().
4345 */
44 tree234 *newtree234(cmpfn234 cmp);
46 tree234 *newtree234(cmpfn234 cmp, void *cmpctx);
4547
4648 /*
4749 * Free a 2-3-4 tree (not including freeing the elements).
127129 enum {
128130 REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE
129131 };
130 void *find234(tree234 *t, void *e, cmpfn234 cmp);
131 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation);
132 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index);
133 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, int relation,
134 int *index);
132 void *find234(tree234 *t, const void *e);
133 void *findrel234(tree234 *t, const void *e, int relation);
134 void *findpos234(tree234 *t, const void *e, int *index);
135 void *findrelpos234(tree234 *t, const void *e, int relation, int *index);
136 void *findcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx);
137 void *findcmprel234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
138 int relation);
139 void *findcmppos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
140 int *index);
141 void *findcmprelpos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
142 int relation, int *index);
135143
136144 /*
137145 * Delete an element e in a 2-3-4 tree. Does not free the element,
161169 /*
162170 * Split a tree234 into two valid tree234s.
163171 *
164 * splitpos234 splits at a given index. If `before' is TRUE, the
172 * splitpos234 splits at a given index. If `before' is true, the
165173 * items at and after that index are left in t and the ones before
166 * are returned; if `before' is FALSE, the items before that index
174 * are returned; if `before' is false, the items before that index
167175 * are left in t and the rest are returned.
168176 *
169177 * split234 splits at a given key. You can pass any of the
171179 * in the tree that satisfy the relation are returned; the
172180 * remainder are left.
173181 */
174 tree234 *splitpos234(tree234 *t, int index, int before);
175 tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel);
182 tree234 *splitpos234(tree234 *t, int index, bool before);
183 tree234 *split234(tree234 *t, const void *e, int rel);
184 tree234 *splitcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx,
185 int rel);
176186
177187 /*
178188 * Join two tree234s together into a single one.
2020 }
2121
2222 static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size,
23 int charset, int careful) {
24 int len, ret, err;
23 int charset, bool careful) {
24 int len, ret;
25 bool err;
2526 charset_state state = CHARSET_INIT_STATE;
2627
2728 if (!s) {
3334 size--; /* leave room for terminating NUL */
3435 *outbuf = '\0';
3536 while (len > 0) {
36 err = 0;
37 err = false;
3738 ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state,
3839 (careful ? &err : NULL));
3940 if (err)
5556 }
5657
5758 char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) {
58 return ustrtoa_internal(s, outbuf, size, charset, FALSE);
59 return ustrtoa_internal(s, outbuf, size, charset, false);
5960 }
6061
6162 char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) {
62 return ustrtoa_internal(s, outbuf, size, charset, TRUE);
63 return ustrtoa_internal(s, outbuf, size, charset, true);
6364 }
6465
6566 wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) {
8687 return outbuf;
8788 }
8889
89 char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
90 char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, bool careful)
9091 {
9192 char *outbuf;
92 int outpos, outlen, len, ret, err;
93 int outpos, outlen, len, ret;
94 bool err;
9395 charset_state state = CHARSET_INIT_STATE;
9496
9597 if (!s) {
105107 outbuf[outpos] = '\0';
106108
107109 while (len > 0) {
108 err = 0;
110 err = false;
109111 ret = charset_from_unicode(&s, &len,
110112 outbuf + outpos, outlen - outpos - 1,
111113 charset, &state, (careful ? &err : NULL));
137139
138140 char *utoa_dup(wchar_t const *s, int charset)
139141 {
140 return utoa_internal_dup(s, charset, NULL, FALSE);
142 return utoa_internal_dup(s, charset, NULL, false);
141143 }
142144
143145 char *utoa_dup_len(wchar_t const *s, int charset, int *len)
144146 {
145 return utoa_internal_dup(s, charset, len, FALSE);
147 return utoa_internal_dup(s, charset, len, false);
146148 }
147149
148150 char *utoa_careful_dup(wchar_t const *s, int charset)
149151 {
150 return utoa_internal_dup(s, charset, NULL, TRUE);
152 return utoa_internal_dup(s, charset, NULL, true);
151153 }
152154
153155 wchar_t *ufroma_dup(char const *s, int charset) {
281283 #endif
282284 }
283285
284 int uisalpha(wchar_t c) {
286 bool uisalpha(wchar_t c) {
285287 #ifdef HAS_ISWALPHA
286288 return iswalpha(c);
287289 #else
350352 return ret;
351353 }
352354
353 int utob(wchar_t const *s) {
355 bool utob(wchar_t const *s) {
354356 if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
355357 !ustricmp(s, L"true") || !ustricmp(s, L"t"))
356 return TRUE;
357 return FALSE;
358 }
359
360 int uisdigit(wchar_t c) {
358 return true;
359 return false;
360 }
361
362 bool uisdigit(wchar_t c) {
361363 return c >= L'0' && c <= L'9';
362364 }
363365
445447 * Determine whether a Unicode string can be translated into a
446448 * given charset without any missing characters.
447449 */
448 int cvt_ok(int charset, const wchar_t *s)
450 bool cvt_ok(int charset, const wchar_t *s)
449451 {
450452 char buf[256];
451453 charset_state state = CHARSET_INIT_STATE;
452 int err, len = ustrlen(s);
453
454 err = 0;
454 bool err;
455 int len = ustrlen(s);
456
457 err = false;
455458 while (len > 0) {
456459 (void)charset_from_unicode(&s, &len, buf, lenof(buf),
457460 charset, &state, &err);
458461 if (err)
459 return FALSE;
460 }
461 return TRUE;
462 return false;
463 }
464 return true;
462465 }
463466
464467 /*
469472 * rely on always getting a valid charset id back from this
470473 * function.
471474 */
472 int charset_from_ustr(filepos *fpos, const wchar_t *name)
475 int charset_from_ustr(filepos *fpos, const wchar_t *name, errorstate *es)
473476 {
474477 char *csname;
475478 int charset;
479482
480483 if (charset == CS_NONE) {
481484 charset = CS_ASCII;
482 err_charset(fpos, name);
485 err_charset(es, fpos, name);
483486 }
484487
485488 sfree(csname);
00 /* Generated by automated build script */
1 #define VERSION "version 1.2"
1 #define VERSION "version 1.3"
2323 };
2424
2525 /* auxiliary function for binary search in interval table */
26 static int bisearch(wchar_t ucs, const struct interval *table, int max) {
26 static bool bisearch(wchar_t ucs, const struct interval *table, int max) {
2727 int min = 0;
2828 int mid;
2929
3030 if (ucs < table[0].first || ucs > table[max].last)
31 return 0;
31 return false;
3232 while (max >= min) {
3333 mid = (min + max) / 2;
3434 if (ucs > table[mid].last)
3636 else if (ucs < table[mid].first)
3737 max = mid - 1;
3838 else
39 return 1;
39 return true;
4040 }
4141
42 return 0;
42 return false;
4343 }
4444
4545 int mk_wcwidth(wchar_t ucs)
143143 wid = 0;
144144
145145 while (len > 0) {
146 int err;
146 bool err;
147147 wchar_t const *s_orig;
148148
149 err = 0;
149 err = false;
150150 s_orig = s;
151151 charset_from_unicode(&s, &len, buf, lenof(buf), charset, &state, &err);
152152 wid += wcswidth(s_orig, s - s_orig);
172172 return 0;
173173 }
174174
175 int chm_directory_entry_cmp(void *av, void *bv)
175 int chm_directory_entry_cmp(const void *av, const void *bv, void *cmpctx)
176176 {
177177 const struct chm_directory_entry
178178 *a = (const struct chm_directory_entry *)av,
180180 return strcmp_chm(a->filename, b->filename);
181181 }
182182
183 int chm_directory_entry_find(void *av, void *bv)
183 int chm_directory_entry_find(const void *av, const void *bv, void *cmpctx)
184184 {
185185 const char *a = (const char *)av;
186186 const struct chm_directory_entry
234234 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos);
235235 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos);
236236
237 index = newtree234(NULL);
237 index = newtree234(NULL, NULL);
238238 curr_chunk = 0;
239239 depth = 1;
240240 /* Write out lowest-level PMGL chunks full of actual directory entries */
326326 PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
327327 chunksize - chunk.pos);
328328 PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
329 while (chunk.pos + reversed_quickref.pos < chunksize)
330 rdaddc(&chunk, 0); /* zero-pad */
329 rdaddc_rep(&chunk, 0, chunksize - chunk.pos - reversed_quickref.pos);
331330 for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
332331 rdaddsn(&chunk, reversed_quickref.text+i, 2);
333332
345344 int index_index = 0;
346345
347346 prev_index = index;
348 index = newtree234(NULL);
347 index = newtree234(NULL, NULL);
349348 depth++;
350349
351350 while (index_index < count234(prev_index)) {
416415 PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
417416 chunksize - chunk.pos);
418417 PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
419 while (chunk.pos + reversed_quickref.pos < chunksize)
420 rdaddc(&chunk, 0); /* zero-pad */
418 rdaddc_rep(&chunk, 0,
419 chunksize - chunk.pos - reversed_quickref.pos);
421420 for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
422421 rdaddsn(&chunk, reversed_quickref.text+i, 2);
423422
509508 int strtab_offset;
510509 };
511510
512 static int chm_stringtab_cmp(void *av, void *bv)
511 static int chm_stringtab_cmp(const void *av, const void *bv, void *cmpctx)
513512 {
514513 const struct chm_stringtab_entry
515514 *a = (const struct chm_stringtab_entry *)av,
518517 b->chm->stringsfile.text + b->strtab_offset);
519518 }
520519
521 static int chm_stringtab_find(void *av, void *bv)
520 static int chm_stringtab_find(const void *av, const void *bv, void *cmpctx)
522521 {
523522 const char *a = (const char *)av;
524523 const struct chm_stringtab_entry
534533 if (!string)
535534 return 0;
536535
537 if ((ent = (struct chm_stringtab_entry *)find234(
538 chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) {
536 if ((ent = (struct chm_stringtab_entry *)findcmp234(
537 chm->stringtab, (void *)string, chm_stringtab_find, NULL)) ==
538 NULL) {
539539 ent = snew(struct chm_stringtab_entry);
540540 ent->chm = chm;
541541
542542 /* Pad to ensure the string doesn't cross a page boundary. */
543543 size = strlen(string) + 1; /* include the NUL terminator */
544544 assert(size < 0x1000); /* avoid really serious trouble */
545 while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
546 rdaddc(&chm->stringsfile, 0);
545 if ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
546 rdaddc_rep(&chm->stringsfile, 0, 0xFFF & -chm->stringsfile.pos);
547547
548548 ent->strtab_offset = chm->stringsfile.pos;
549549 rdaddsc(&chm->stringsfile, string);
556556 struct chm *chm_new(void)
557557 {
558558 struct chm *chm = snew(struct chm);
559 chm->files = newtree234(chm_directory_entry_cmp);
560 chm->windows = newtree234(NULL);
561 chm->stringtab = newtree234(chm_stringtab_cmp);
559 chm->files = newtree234(chm_directory_entry_cmp, NULL);
560 chm->windows = newtree234(NULL, NULL);
561 chm->stringtab = newtree234(chm_stringtab_cmp, NULL);
562562 chm->content0 = empty_rdstringc;
563563 chm->content1 = empty_rdstringc;
564564 chm->outfile = empty_rdstringc;
640640 static struct chm_directory_entry *chm_find_file(
641641 struct chm *chm, const char *name)
642642 {
643 return find234(chm->files, (void *)name, chm_directory_entry_find);
643 return findcmp234(chm->files, (const void *)name,
644 chm_directory_entry_find, NULL);
644645 }
645646
646647 static char *add_leading_slash(const char *str)
748749 int topics_offset_to_update;
749750 };
750751
751 int chm_urltbl_entry_cmp(void *av, void *bv)
752 int chm_urltbl_entry_cmp(const void *av, const void *bv, void *cmpctx)
752753 {
753754 const struct chm_urltbl_entry
754755 *a = (const struct chm_urltbl_entry *)av,
894895
895896 {
896897 rdstringc winfile = {0, 0, NULL};
897 int i, j, s;
898 int i, s;
898899 struct chm_window *win;
899900
900901 RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows));
949950 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */
950951 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */
951952 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */
952 for (j = 0; j < 20; j++)
953 rdaddc(&winfile, 0); /* tab order block */
953 rdaddc_rep(&winfile, 0, 20); /* tab order block */
954954 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */
955955 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */
956956 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */
975975 rdstringc topics = {0, 0, NULL};
976976 rdstringc urltbl = {0, 0, NULL};
977977 rdstringc urlstr = {0, 0, NULL};
978 int i, index, s, n_tocidx_3;
978 int index, s, n_tocidx_3;
979979 struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL;
980980 tree234 *urltbl_pre;
981981 struct chm_urltbl_entry *urltbl_entry;
982982
983 urltbl_pre = newtree234(chm_urltbl_entry_cmp);
984
985 for (i = 0; i < 0x1000; i++)
986 rdaddc(&tocidx, 0);
983 urltbl_pre = newtree234(chm_urltbl_entry_cmp, NULL);
984
985 rdaddc_rep(&tocidx, 0, 0x1000);
987986
988987 /* Write a header of one zero byte at the start of #URLSTR.
989988 * chmspec says this doesn't always appear, and is unclear on
10781077 * plus a NUL-terminated copy of the target file name / URL. */
10791078 urlstr_size = 8 + strlen(sect->url) + 1;
10801079 assert(urlstr_size < 0x1000); /* must _fit_ in a page! */
1081 while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
1082 rdaddc(&urlstr, 0);
1080 if ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
1081 rdaddc_rep(&urlstr, 0, 0xFFF & -urlstr_size);
10831082
10841083 /*
10851084 * Save everything we know so far about the #URLTBL record
12281227 }
12291228
12301229 /* Align the current #TOCIDX offset to 16 bytes */
1231 while (tocidx.pos & 0xF)
1232 rdaddc(&tocidx, 0);
1230 rdaddc_rep(&tocidx, 0, 0xF & -tocidx.pos);
12331231
12341232 /* #TOCIDX header field pointing at start of type-3 records */
12351233 PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos);
12941292 RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
12951293 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */
12961294 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1297 while (sysfile.pos - idxhdr_start < 4096)
1298 rdaddc(&sysfile, 0);
1295 rdaddc_rep(&sysfile, 0, 4096 - (sysfile.pos - idxhdr_start));
12991296
13001297 chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start,
13011298 sysfile.pos - idxhdr_start,
13661363 int orig_decomp_size = chm->content1.pos;
13671364 size_t i;
13681365
1369 while (chm->content1.pos & 0x7FFF)
1370 rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */
1366 /* Pad to a realign-interval boundary */
1367 rdaddc_rep(&chm->content1, 0, 0x7FFF & -chm->content1.pos);
1368
13711369 ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000);
13721370 chm_add_file_internal(
13731371 chm, "::DataSpace/Storage/MSCompressed/Content",
214214
215215 /* The master index maps file names to help-file offsets. */
216216
217 static int filecmp(void *av, void *bv)
217 static int filecmp(const void *av, const void *bv, void *cmpctx)
218218 {
219219 const struct file *a = (const struct file *)av;
220220 const struct file *b = (const struct file *)bv;
240240
241241 /* The |CONTEXT internal file maps help context hashes to TOPICOFFSETs. */
242242
243 static int ctxcmp(void *av, void *bv)
243 static int ctxcmp(const void *av, const void *bv, void *cmpctx)
244244 {
245245 const context *a = (const context *)av;
246246 const context *b = (const context *)bv;
268268
269269 /* The |TTLBTREE internal file maps TOPICOFFSETs to title strings. */
270270
271 static int ttlcmp(void *av, void *bv)
271 static int ttlcmp(const void *av, const void *bv, void *cmpctx)
272272 {
273273 const context *a = (const context *)av;
274274 const context *b = (const context *)bv;
298298
299299 /* The |KWBTREE internal file maps index strings to TOPICOFFSETs. */
300300
301 static int idxcmp(void *av, void *bv)
301 static int idxcmp(const void *av, const void *bv, void *cmpctx)
302302 {
303303 const struct indexrec *a = (const struct indexrec *)av;
304304 const struct indexrec *b = (const struct indexrec *)bv;
336336 * is by the low 16 bits of the number (above that is flags).
337337 */
338338
339 static int tabcmp(void *av, void *bv)
339 static int tabcmp(const void *av, const void *bv, void *cmpctx)
340340 {
341341 const int *a = (const int *)av;
342342 const int *b = (const int *)bv;
348348 }
349349
350350 /* The internal `fontnames' B-tree stores strings. */
351 static int fontcmp(void *av, void *bv)
351 static int fontcmp(const void *av, const void *bv, void *cmpctx)
352352 {
353353 const char *a = (const char *)av;
354354 const char *b = (const char *)bv;
12401240 whlp_file_add_short(f, 0x36C); /* magic number */
12411241 whlp_file_add_short(f, 33); /* minor version: HCW 4.00 Win95+ */
12421242 whlp_file_add_short(f, 1); /* major version */
1243 whlp_file_add_long(f, time(NULL)); /* generation date */
1243 whlp_file_add_long(f, current_time()); /* generation date */
12441244 whlp_file_add_short(f, 0); /* flags=0 means no compression */
12451245
12461246 /*
13271327 */
13281328 for (i = 0; (fontname = index234(h->fontnames, i)) != NULL; i++) {
13291329 char data[32];
1330 memset(data, i, sizeof(data));
1331 strncpy(data, fontname, sizeof(data));
1330 size_t len = strlen(fontname);
1331 if (len > sizeof(data))
1332 len = sizeof(data);
1333 memset(data, 0, sizeof(data));
1334 memcpy(data, fontname, len);
13321335 whlp_file_add(f, data, sizeof(data));
13331336 }
13341337
13391342 int fontpos;
13401343 void *ret;
13411344
1342 ret = findpos234(h->fontnames, fontdesc->font, NULL, &fontpos);
1345 ret = findpos234(h->fontnames, fontdesc->font, &fontpos);
13431346 assert(ret != NULL);
13441347
13451348 whlp_file_add_char(f, fontdesc->rendition);
17101713 /*
17111714 * Internal B-trees.
17121715 */
1713 ret->files = newtree234(filecmp);
1714 ret->pre_contexts = newtree234(NULL);
1715 ret->contexts = newtree234(ctxcmp);
1716 ret->titles = newtree234(ttlcmp);
1717 ret->text = newtree234(NULL);
1718 ret->index = newtree234(idxcmp);
1719 ret->tabstops = newtree234(tabcmp);
1720 ret->fontnames = newtree234(fontcmp);
1721 ret->fontdescs = newtree234(NULL);
1716 ret->files = newtree234(filecmp, NULL);
1717 ret->pre_contexts = newtree234(NULL, NULL);
1718 ret->contexts = newtree234(ctxcmp, NULL);
1719 ret->titles = newtree234(ttlcmp, NULL);
1720 ret->text = newtree234(NULL, NULL);
1721 ret->index = newtree234(idxcmp, NULL);
1722 ret->tabstops = newtree234(tabcmp, NULL);
1723 ret->fontnames = newtree234(fontcmp, NULL);
1724 ret->fontdescs = newtree234(NULL, NULL);
17221725
17231726 /*
17241727 * Some standard files.
17461749 int filecount, offset, index, filelen;
17471750 struct file *file, *map, *md;
17481751 context *ctx;
1749 int has_index;
1752 bool has_index;
17501753
17511754 /*
17521755 * Lay out the topic section.