Import halibut_1.3.orig.tar.gz
Colin Watson
2 years ago
0 | /build.log | |
1 | /build.out | |
2 | /halibut | |
3 | /doc/*.html | |
4 | /doc/halibut.1 | |
5 | /doc/halibut.info* | |
6 | /doc/halibut.pdf | |
7 | /doc/halibut.ps | |
8 | /doc/halibut.txt | |
9 | /doc/halibut.chm | |
10 | *.o | |
11 | .deps | |
12 | /Makefile | |
13 | /Makefile.in | |
14 | /aclocal.m4 | |
15 | /autom4te.cache/ | |
16 | /compile | |
17 | /configure | |
18 | /depcomp | |
19 | /install-sh | |
20 | /missing | |
21 | /stamp-h1 | |
22 | /config.log | |
23 | /config.status | |
24 | /halibut.1 | |
25 | /halibut.chm | |
26 | /halibut.info* | |
27 | /halibut.pdf | |
28 | /halibut.ps | |
29 | /halibut.txt | |
30 | /*.html |
2 | 2 | |
3 | 3 | module halibut |
4 | 4 | |
5 | ifnexist halibut/charset checkout charset halibut/charset | |
6 | ||
5 | # Make up a version number. | |
7 | 6 | set Version $(!builddate).$(vcsid) |
8 | 7 | ifneq "$(RELEASE)" "" set Version $(RELEASE) |
9 | 8 | |
14 | 13 | in halibut do echo '/* Generated by automated build script */' > version.h |
15 | 14 | in halibut do echo '$#define VERSION "version $(Version)"' >> version.h |
16 | 15 | |
17 | set Rel | |
18 | ifneq "$(RELEASE)" "" set Rel RELEASE=$(RELEASE) | |
16 | # Make the source archive. | |
17 | in . do ln -s halibut halibut-$(Version) | |
18 | in . do tar chzvf halibut-$(Version).tar.gz halibut-$(Version) | |
19 | 19 | |
20 | set Basename halibut-$(Version) | |
20 | # Build the Windows binary, using clang-cl. | |
21 | in . do mkdir buildwin | |
22 | in buildwin do cmake ../halibut -DCMAKE_TOOLCHAIN_FILE=$(cmake_toolchain_clangcl64) -DCMAKE_BUILD_TYPE=Release -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -DCMAKE_C_FLAGS_RELEASE="/MT /O2" | |
23 | in buildwin do make -j$(nproc) VERBOSE=1 | |
24 | # Code-sign the Windows binary, if the local bob config provides a | |
25 | # script to do so. We assume here that the script accepts an -i option | |
26 | # to provide a 'more info' URL, and that it signs the file in place. | |
27 | ifneq "$(cross_winsigncode)" "" in buildwin do $(cross_winsigncode) -i https://www.chiark.greenend.org.uk/~sgtatham/halibut/ halibut.exe | |
21 | 28 | |
22 | # Make the source archive. | |
23 | in halibut do ./release.sh $(Basename) $(Version) | |
29 | # Do a full Unix build, which will also build the docs, and also | |
30 | # checks that it _does_ build. | |
31 | in . do mkdir buildunix | |
32 | in buildunix do cmake ../halibut | |
33 | in buildunix do make -j$(nproc) VERBOSE=1 | |
24 | 34 | |
25 | # Build a Windows binary of Halibut using clang-cl. | |
26 | in halibut with clangcl64 do make CC='clang --target=x86_64-pc-windows-msvc18.0.0 -D_CRT_SECURE_NO_WARNINGS' CC_LINK='lld-link -defaultlib:libcmt -out:$$@' EXE=.exe | |
27 | in halibut do mv build/halibut.exe . | |
28 | in halibut do rm -rf build | |
29 | delegate windows | |
30 | # Code-sign the Windows binary, if the local bob config provides | |
31 | # a script to do so. We assume here that the script accepts an -i | |
32 | # option to provide a 'more info' URL, and that it signs the file | |
33 | # in place. | |
34 | ifneq "$(winsigncode)" "" in halibut do $(winsigncode) -i http://www.chiark.greenend.org.uk/~sgtatham/halibut/ halibut.exe | |
35 | return halibut/halibut.exe | |
36 | enddelegate | |
37 | ||
38 | # Build a local binary of Halibut in order to build the docs. Make | |
39 | # sure to tag it with the supplied version number, so that the | |
40 | # release docs announce themselves as having been built with the | |
41 | # release Halibut (e.g. PDF's Producer property). | |
42 | in halibut do make $(Rel) | |
43 | ||
44 | # And now build the docs. | |
45 | in halibut/doc do make | |
46 | ||
47 | deliver halibut/*.tar.gz $@ | |
48 | deliver halibut/halibut.exe $@ | |
49 | deliver halibut/doc/halibut.pdf $@ | |
50 | deliver halibut/doc/halibut.txt $@ | |
51 | deliver halibut/doc/halibut.chm $@ | |
52 | deliver halibut/doc/*.html $@ | |
35 | deliver halibut-$(Version).tar.gz $@ | |
36 | deliver buildwin/halibut.exe $@ | |
37 | deliver buildunix/doc/manual/halibut.pdf $@ | |
38 | deliver buildunix/doc/manual/halibut.txt $@ | |
39 | deliver buildunix/doc/manual/halibut.chm $@ | |
40 | deliver buildunix/doc/manual/*.html $@ |
0 | cmake_minimum_required(VERSION 3.5) | |
1 | project(halibut LANGUAGES C) | |
2 | ||
3 | set(LIBCHARSET_LIBRARY_ONLY ON) | |
4 | include_directories(charset ${CMAKE_CURRENT_BINARY_DIR}/charset) | |
5 | add_subdirectory(charset) | |
6 | ||
7 | if(CMAKE_SYSTEM_NAME MATCHES "Windows") | |
8 | add_compile_definitions(_CRT_SECURE_NO_WARNINGS) | |
9 | endif() | |
10 | ||
11 | add_executable(halibut | |
12 | biblio.c | |
13 | bk_html.c | |
14 | bk_info.c | |
15 | bk_man.c | |
16 | bk_paper.c | |
17 | bk_pdf.c | |
18 | bk_ps.c | |
19 | bk_text.c | |
20 | bk_whlp.c | |
21 | contents.c | |
22 | deflate.c | |
23 | error.c | |
24 | help.c | |
25 | huffman.c | |
26 | in_afm.c | |
27 | in_pf.c | |
28 | in_sfnt.c | |
29 | index.c | |
30 | input.c | |
31 | keywords.c | |
32 | licence.c | |
33 | lz77.c | |
34 | lzx.c | |
35 | main.c | |
36 | malloc.c | |
37 | misc.c | |
38 | psdata.c | |
39 | tree234.c | |
40 | ustring.c | |
41 | version.c | |
42 | wcwidth.c | |
43 | winchm.c | |
44 | winhelp.c) | |
45 | target_link_libraries(halibut charset) | |
46 | ||
47 | if(CMAKE_VERSION VERSION_LESS 3.14) | |
48 | # CMake 3.13 and earlier required an explicit install destination. | |
49 | install(TARGETS halibut RUNTIME DESTINATION bin) | |
50 | else() | |
51 | # 3.14 and above selects a sensible default, which we should avoid | |
52 | # overriding here so that end users can override it using | |
53 | # CMAKE_INSTALL_BINDIR. | |
54 | install(TARGETS halibut) | |
55 | endif() | |
56 | ||
57 | add_subdirectory(doc) |
9 | 9 | Building Halibut |
10 | 10 | ---------------- |
11 | 11 | |
12 | If you have GNU make and gcc, you should simply be able to type | |
13 | `make'. The Makefile will generate a `build' subdirectory, and will | |
14 | put all the object files and binaries in there. | |
12 | Halibut is built using CMake <https://cmake.org/>. To compile in the | |
13 | simplest way (on any of Linux, Windows or Mac), run these commands in | |
14 | the source directory: | |
15 | 15 | |
16 | In a release archive, the Makefile will also check the source files | |
17 | against a list of MD5 checksums, and if they match it will | |
18 | automatically add the correct version number to the build. This is | |
19 | _not_ a secure measure intended to enforce that only approved | |
20 | Halibut sources are ever built into a binary with a given version | |
21 | number; it is merely a sanity check against heavily modified copies | |
22 | _accidentally_ confusing users expecting standard versions of | |
23 | Halibut. Distribution maintainers are entirely at liberty, if they | |
24 | choose, to modify Halibut source files as appropriate for their | |
25 | distribution and then have the resulting binary call itself by the | |
26 | original version number. If you run `make VERSION=x.y', the | |
27 | resulting Halibut binary will call itself version x.y irrespective | |
28 | of the md5sum manifest. (You may also need to do this if your build | |
29 | system does not have the md5sum program.) | |
16 | cmake . | |
17 | cmake --build . | |
30 | 18 | |
31 | Halibut unfortunately does not yet come with an autoconf-generated | |
32 | makefile, so if you do not have these utilities then you will have | |
33 | to do the build manually. Look in the master `Makefile' to find the | |
34 | list of source modules (they will be listed on the line starting | |
35 | `MODULES :=', and continued on lines starting `MODULES +='), compile | |
36 | those files with the C compiler of your choice, and link them | |
37 | together into a binary. In addition to the modules on that list, you | |
38 | will also need to compile `version.c', and if you wish your Halibut | |
39 | binary to identify itself with a version number then you will have | |
40 | to define the preprocessor symbol `VERSION' to the required version | |
41 | number string. On Unix this can be done with a command such as | |
42 | ||
43 | cc -c -DVERSION=\"0.9\" version.c | |
19 | (You'll also need to have a C compiler and some kind of build tool | |
20 | installed, such as gcc and make.) | |
44 | 21 | |
45 | 22 | Halibut's source files are intended to be almost entirely portable |
46 | 23 | ANSI C. If they fail to compile and run correctly on your compiler, |
47 | 24 | this might very well be considered a bug. |
48 | ||
49 | Building the Halibut manual | |
50 | --------------------------- | |
51 | ||
52 | Once you have built Halibut itself, you might well want to build its | |
53 | manual. If you're using GNU make, you can do this just by changing | |
54 | into the `doc' subdirectory and typing `make'. (This relies on the | |
55 | Halibut binary you built in the previous step being present in the | |
56 | `build' subdirectory.) | |
57 | ||
58 | Failing that, you will need to read the Makefile and run a manual | |
59 | Halibut command, of the form | |
60 | ||
61 | halibut --text=halibut.txt --html blurb.but intro.but [...] index.but | |
62 | ||
63 | (The precise list of .but files is given at the top of doc/Makefile.) | |
64 | ||
65 | This will build plain text documentation in `halibut.txt', and a set | |
66 | of HTML files (*.html). It will also build a short man page | |
67 | `halibut.1', although this is by no means a replacement for the full | |
68 | manual. | |
69 | 25 | |
70 | 26 | Installing Halibut |
71 | 27 | ------------------ |
0 | Halibut is copyright (c) 1999-2017 Simon Tatham. | |
0 | Halibut is copyright (c) 1999-2021 Simon Tatham. | |
1 | 1 | |
2 | 2 | Permission is hereby granted, free of charge, to any person |
3 | 3 | obtaining a copy of this software and associated documentation files |
0 | # Halibut master makefile | |
1 | ||
2 | # Currently depends on gcc, because: | |
3 | # - the dependency tracking uses -MD in order to avoid needing an | |
4 | # explicit `make depend' step | |
5 | # - the definition of CFLAGS includes the gcc-specific flag | |
6 | # `-Wall' | |
7 | # | |
8 | # Currently depends on GNU make, because: | |
9 | # - the Makefile uses GNU ifdef / ifndef commands and GNU make `%' | |
10 | # pattern rules | |
11 | # - we use .PHONY | |
12 | ||
13 | prefix=/usr/local | |
14 | exec_prefix=$(prefix) | |
15 | bindir=$(exec_prefix)/bin | |
16 | INSTALL=install -c | |
17 | ||
18 | .PHONY: all install clean spotless topclean release | |
19 | ||
20 | ifdef RELEASE | |
21 | ifndef VERSION | |
22 | VERSION := $(RELEASE) | |
23 | endif | |
24 | else | |
25 | CFLAGS += -g | |
26 | endif | |
27 | ||
28 | ifeq (x$(VERSION)y,xy) | |
29 | RELDIR := halibut | |
30 | else | |
31 | RELDIR := halibut-$(VERSION) | |
32 | endif | |
33 | ||
34 | # `make' from top level will build in directory `build' | |
35 | # `make BUILDDIR=foo' from top level will build in directory foo | |
36 | ifndef REALBUILD | |
37 | ifndef BUILDDIR | |
38 | ifdef TEST | |
39 | BUILDDIR := test | |
40 | else | |
41 | BUILDDIR := build | |
42 | endif | |
43 | endif | |
44 | ||
45 | all install: | |
46 | @test -d $(BUILDDIR) || mkdir $(BUILDDIR) | |
47 | @$(MAKE) -C $(BUILDDIR) -f ../Makefile $@ REALBUILD=yes | |
48 | ||
49 | spotless: topclean | |
50 | @test -d $(BUILDDIR) || mkdir $(BUILDDIR) | |
51 | @$(MAKE) -C $(BUILDDIR) -f ../Makefile spotless REALBUILD=yes | |
52 | ||
53 | clean: topclean | |
54 | @test -d $(BUILDDIR) || mkdir $(BUILDDIR) | |
55 | @$(MAKE) -C $(BUILDDIR) -f ../Makefile clean REALBUILD=yes | |
56 | ||
57 | # Remove Halibut output files in the source directory (may | |
58 | # have been created by running, for example, `build/halibut | |
59 | # inputs/test.but'). | |
60 | topclean: | |
61 | rm -f *.html output.* *.tar.gz | |
62 | ||
63 | # Makef a release archive. | |
64 | release: release.sh | |
65 | ./release.sh $(RELDIR) $(VERSION) | |
66 | ||
67 | else | |
68 | ||
69 | # The `real' makefile part. | |
70 | ||
71 | CFLAGS += -Wall -W -ansi -pedantic | |
72 | ||
73 | ifdef TEST | |
74 | CFLAGS += -DLOGALLOC | |
75 | LIBS += -lefence | |
76 | endif | |
77 | ||
78 | EXE =# | |
79 | ||
80 | all: halibut$(EXE) | |
81 | ||
82 | SRC := ../ | |
83 | ||
84 | ifeq ($(shell test -d $(SRC)charset && echo yes),yes) | |
85 | LIBCHARSET_SRCDIR = $(SRC)charset/ | |
86 | else | |
87 | LIBCHARSET_SRCDIR = $(SRC)../charset/ | |
88 | endif | |
89 | LIBCHARSET_OBJDIR = ./# | |
90 | LIBCHARSET_OBJPFX = cs-# | |
91 | LIBCHARSET_GENPFX = charset-# | |
92 | MD = -MD | |
93 | CFLAGS += -I$(LIBCHARSET_SRCDIR) -I$(LIBCHARSET_OBJDIR) | |
94 | include $(LIBCHARSET_SRCDIR)Makefile | |
95 | CC_LINK = $(CC) -o $@ | |
96 | ||
97 | MODULES := main malloc ustring error help licence version misc tree234 | |
98 | MODULES += input in_afm in_pf in_sfnt keywords contents index biblio | |
99 | MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf | |
100 | MODULES += winhelp winchm deflate lzx lz77 huffman psdata wcwidth | |
101 | ||
102 | OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS) | |
103 | DEPS := $(addsuffix .d,$(MODULES)) | |
104 | ||
105 | halibut$(EXE): $(OBJECTS) | |
106 | $(CC_LINK) $(LFLAGS) $(OBJECTS) $(LIBS) | |
107 | ||
108 | %.o: $(SRC)%.c | |
109 | $(CC) $(CFLAGS) -MD -c $< | |
110 | ||
111 | version.o: FORCE | |
112 | $(CC) $(VDEF) -MD -c $(SRC)version.c | |
113 | ||
114 | spotless:: clean | |
115 | rm -f *.d | |
116 | ||
117 | clean:: | |
118 | rm -f *.o halibut core | |
119 | ||
120 | install: | |
121 | mkdir -p $(prefix) $(bindir) | |
122 | $(INSTALL) -m 755 halibut $(bindir)/halibut | |
123 | $(MAKE) -C ../doc install prefix="$(prefix)" INSTALL="$(INSTALL)" | |
124 | ||
125 | FORCE: # phony target to force version.o to be rebuilt every time | |
126 | ||
127 | -include $(DEPS) | |
128 | ||
129 | endif |
19 | 19 | return ustrdup(p); |
20 | 20 | } |
21 | 21 | |
22 | static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos) { | |
22 | static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos, | |
23 | errorstate *es) { | |
23 | 24 | keyword *kw = kw_lookup(kl, key); |
24 | 25 | if (!kw) |
25 | err_nosuchkw(&fpos, key); | |
26 | err_nosuchkw(es, &fpos, key); | |
26 | 27 | else { |
27 | 28 | /* |
28 | 29 | * We've found a \k reference. If it's a |
43 | 44 | * entries are actually cited (or \nocite-ed). |
44 | 45 | */ |
45 | 46 | |
46 | void gen_citations(paragraph *source, keywordlist *kl) { | |
47 | void gen_citations(paragraph *source, keywordlist *kl, errorstate *es) { | |
47 | 48 | paragraph *para; |
48 | 49 | int bibnum = 0; |
49 | 50 | |
56 | 57 | if (para->type == para_BR) { |
57 | 58 | keyword *kw = kw_lookup(kl, para->keyword); |
58 | 59 | if (!kw) { |
59 | err_nosuchkw(¶->fpos, para->keyword); | |
60 | err_nosuchkw(es, ¶->fpos, para->keyword); | |
60 | 61 | } else if (kw->text) { |
61 | err_multiBR(¶->fpos, para->keyword); | |
62 | err_multiBR(es, ¶->fpos, para->keyword); | |
62 | 63 | } else { |
63 | 64 | kw->text = dup_word_list(para->words); |
64 | 65 | } |
65 | 66 | } else if (para->type == para_NoCite) { |
66 | 67 | wchar_t *wp = para->keyword; |
67 | 68 | while (*wp) { |
68 | cite_biblio(kl, wp, para->fpos); | |
69 | cite_biblio(kl, wp, para->fpos, es); | |
69 | 70 | wp = uadv(wp); |
70 | 71 | } |
71 | 72 | } |
76 | 77 | for (ptr = para->words; ptr; ptr = ptr->next) { |
77 | 78 | if (ptr->type == word_UpperXref || |
78 | 79 | ptr->type == word_LowerXref) |
79 | cite_biblio(kl, ptr->text, ptr->fpos); | |
80 | cite_biblio(kl, ptr->text, ptr->fpos, es); | |
80 | 81 | } |
81 | 82 | } |
82 | 83 | |
94 | 95 | word *wd = smalloc(sizeof(word)); |
95 | 96 | wd->text = gentext(++bibnum); |
96 | 97 | wd->type = word_Normal; |
97 | wd->breaks = FALSE; | |
98 | wd->breaks = false; | |
98 | 99 | wd->alt = NULL; |
99 | 100 | wd->next = NULL; |
100 | 101 | wd->aux = 0; |
37 | 37 | (p)->type == para_Title ? -1 : 0 ) |
38 | 38 | |
39 | 39 | typedef struct { |
40 | int number_at_all, just_numbers; | |
40 | bool number_at_all, just_numbers; | |
41 | 41 | wchar_t *number_suffix; |
42 | 42 | } sectlevel; |
43 | 43 | |
46 | 46 | sectlevel achapter, *asect; |
47 | 47 | int *contents_depths; /* 0=main, 1=chapter, 2=sect etc */ |
48 | 48 | int ncdepths; |
49 | int address_section, visible_version_id; | |
50 | int leaf_contains_contents, leaf_smallest_contents; | |
51 | int navlinks; | |
52 | int rellinks; | |
49 | bool address_section, visible_version_id; | |
50 | bool leaf_contains_contents; | |
51 | int leaf_smallest_contents; | |
52 | bool navlinks; | |
53 | bool rellinks; | |
53 | 54 | char *contents_filename; |
54 | 55 | char *index_filename; |
55 | 56 | char *template_filename; |
135 | 136 | typedef struct { |
136 | 137 | htmlsect *section; |
137 | 138 | char *fragment; |
138 | int generated, referenced; | |
139 | bool generated, referenced; | |
139 | 140 | } htmlindexref; |
140 | 141 | |
141 | 142 | typedef struct { |
148 | 149 | void (*write)(void *write_ctx, const char *data, int len); |
149 | 150 | int charset, restrict_charset; |
150 | 151 | charset_state cstate; |
152 | errorstate *es; | |
151 | 153 | int ver; |
152 | 154 | enum { |
153 | 155 | HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT |
190 | 192 | ho->write = ho_write_file; |
191 | 193 | ho->write_ctx = fp; |
192 | 194 | } else { |
193 | err_cantopenw(filename); | |
195 | err_cantopenw(ho->es, filename); | |
194 | 196 | ho->write = ho_write_ignore; /* saves conditionalising rest of code */ |
195 | 197 | } |
196 | 198 | } |
265 | 267 | #define HO_HACK_QUOTENOTHING 2 |
266 | 268 | #define HO_HACK_OMITQUOTES 4 |
267 | 269 | |
268 | static int html_fragment_compare(void *av, void *bv) | |
269 | { | |
270 | htmlfragment *a = (htmlfragment *)av; | |
271 | htmlfragment *b = (htmlfragment *)bv; | |
270 | static int html_fragment_compare(const void *av, const void *bv, void *cmpctx) | |
271 | { | |
272 | const htmlfragment *a = (const htmlfragment *)av; | |
273 | const htmlfragment *b = (const htmlfragment *)bv; | |
272 | 274 | int cmp; |
273 | 275 | |
274 | 276 | if ((cmp = strcmp(a->file->filename, b->file->filename)) != 0) |
277 | 279 | return strcmp(a->fragment, b->fragment); |
278 | 280 | } |
279 | 281 | |
280 | static int html_filename_compare(void *av, void *bv) | |
281 | { | |
282 | char *a = (char *)av; | |
283 | char *b = (char *)bv; | |
282 | static int html_filename_compare(const void *av, const void *bv, void *cmpctx) | |
283 | { | |
284 | const char *a = (const char *)av; | |
285 | const char *b = (const char *)bv; | |
284 | 286 | |
285 | 287 | return strcmp(a, b); |
286 | 288 | } |
312 | 314 | static void html_text_nbsp(htmloutput *ho, wchar_t const *str); |
313 | 315 | static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen); |
314 | 316 | static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, |
315 | int maxlen, int quote_quotes, int nbsp); | |
317 | int maxlen, bool quote_quotes, bool nbsp); | |
316 | 318 | static void html_nl(htmloutput *ho); |
317 | 319 | static void html_raw(htmloutput *ho, char *text); |
318 | 320 | static void html_raw_as_attr(htmloutput *ho, char *text); |
332 | 334 | htmlconfig *cfg); |
333 | 335 | static void html_section_title(htmloutput *ho, htmlsect *s, |
334 | 336 | htmlfile *thisfile, keywordlist *keywords, |
335 | htmlconfig *cfg, int real); | |
336 | ||
337 | static htmlconfig html_configure(paragraph *source, int chm_mode) | |
337 | htmlconfig *cfg, bool real); | |
338 | ||
339 | static htmlconfig html_configure(paragraph *source, bool chm_mode, | |
340 | errorstate *es) | |
338 | 341 | { |
339 | 342 | htmlconfig ret; |
340 | 343 | paragraph *p; |
343 | 346 | * Defaults. |
344 | 347 | */ |
345 | 348 | ret.leaf_level = chm_mode ? -1 /* infinite */ : 2; |
346 | ret.achapter.just_numbers = FALSE; | |
347 | ret.achapter.number_at_all = TRUE; | |
349 | ret.achapter.just_numbers = false; | |
350 | ret.achapter.number_at_all = true; | |
348 | 351 | ret.achapter.number_suffix = L": "; |
349 | 352 | ret.nasect = 1; |
350 | 353 | ret.asect = snewn(ret.nasect, sectlevel); |
351 | ret.asect[0].just_numbers = TRUE; | |
352 | ret.asect[0].number_at_all = TRUE; | |
354 | ret.asect[0].just_numbers = true; | |
355 | ret.asect[0].number_at_all = true; | |
353 | 356 | ret.asect[0].number_suffix = L" "; |
354 | 357 | ret.ncdepths = 0; |
355 | 358 | ret.contents_depths = 0; |
356 | ret.visible_version_id = TRUE; | |
357 | ret.address_section = chm_mode ? FALSE : TRUE; | |
358 | ret.leaf_contains_contents = FALSE; | |
359 | ret.visible_version_id = true; | |
360 | ret.address_section = chm_mode ? false : true; | |
361 | ret.leaf_contains_contents = false; | |
359 | 362 | ret.leaf_smallest_contents = 4; |
360 | ret.navlinks = chm_mode ? FALSE : TRUE; | |
361 | ret.rellinks = TRUE; | |
363 | ret.navlinks = chm_mode ? false : true; | |
364 | ret.rellinks = true; | |
362 | 365 | ret.single_filename = dupstr("Manual.html"); |
363 | 366 | ret.contents_filename = dupstr("Contents.html"); |
364 | 367 | ret.index_filename = dupstr("IndexPage.html"); |
426 | 429 | for (p = source; p; p = p->next) { |
427 | 430 | if (p->type == para_Config) { |
428 | 431 | wchar_t *k = p->keyword; |
429 | int generic = FALSE; | |
432 | bool generic = false; | |
430 | 433 | |
431 | 434 | if (!chm_mode && !ustrnicmp(k, L"html-", 5)) { |
432 | 435 | k += 5; |
439 | 442 | /* In this mode, only accept directives that don't |
440 | 443 | * vary completely between the HTML and CHM output |
441 | 444 | * types. */ |
442 | generic = TRUE; | |
445 | generic = true; | |
443 | 446 | } else { |
444 | 447 | continue; |
445 | 448 | } |
446 | 449 | |
447 | 450 | if (!ustricmp(k, L"restrict-charset")) { |
448 | ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k)); | |
451 | ret.restrict_charset = charset_from_ustr( | |
452 | &p->fpos, uadv(k), es); | |
449 | 453 | } else if (!ustricmp(k, L"output-charset")) { |
450 | ret.output_charset = charset_from_ustr(&p->fpos, uadv(k)); | |
454 | ret.output_charset = charset_from_ustr( | |
455 | &p->fpos, uadv(k), es); | |
451 | 456 | } else if (!ustricmp(k, L"version")) { |
452 | 457 | wchar_t *vername = uadv(k); |
453 | 458 | static const struct { |
467 | 472 | break; |
468 | 473 | |
469 | 474 | if (i == lenof(versions)) |
470 | err_htmlver(&p->fpos, vername); | |
475 | err_htmlver(es, &p->fpos, vername); | |
471 | 476 | else |
472 | 477 | ret.htmlver = versions[i].ver; |
473 | 478 | } else if (!ustricmp(k, L"single-filename")) { |
500 | 505 | frag = adv(frag); |
501 | 506 | } |
502 | 507 | } else |
503 | err_cfginsufarg(&p->fpos, p->origkeyword, 1); | |
508 | err_cfginsufarg(es, &p->fpos, p->origkeyword, 1); | |
504 | 509 | } else if (!ustricmp(k, L"chapter-numeric")) { |
505 | 510 | ret.achapter.just_numbers = utob(uadv(k)); |
506 | 511 | } else if (!ustricmp(k, L"chapter-shownumber")) { |
674 | 679 | chmname = diskname; |
675 | 680 | |
676 | 681 | if (chmname[0] == '#' || chmname[0] == '$') |
677 | err_chm_badname(&p->fpos, chmname); | |
682 | err_chm_badname(es, &p->fpos, chmname); | |
678 | 683 | |
679 | 684 | if (ret.nchmextrafiles >= ret.chmextrafilesize) { |
680 | 685 | ret.chmextrafilesize = ret.nchmextrafiles * 5 / 4 + 32; |
705 | 710 | * turn both off. |
706 | 711 | */ |
707 | 712 | if (!ret.chm_filename ^ !ret.hhp_filename) { |
708 | err_chmnames(); | |
713 | err_chmnames(es); | |
709 | 714 | sfree(ret.chm_filename); ret.chm_filename = NULL; |
710 | 715 | sfree(ret.hhp_filename); ret.hhp_filename = NULL; |
711 | 716 | } |
756 | 761 | } |
757 | 762 | |
758 | 763 | static void html_backend_common(paragraph *sourceform, keywordlist *keywords, |
759 | indexdata *idx, int chm_mode) | |
764 | indexdata *idx, errorstate *es, bool chm_mode) | |
760 | 765 | { |
761 | 766 | paragraph *p; |
762 | 767 | htmlsect *topsect; |
764 | 769 | htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL }; |
765 | 770 | htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL }; |
766 | 771 | struct chm *chm = NULL; |
767 | int has_index, hhk_needed = FALSE; | |
768 | ||
769 | conf = html_configure(sourceform, chm_mode); | |
772 | bool has_index, hhk_needed = false; | |
773 | ||
774 | conf = html_configure(sourceform, chm_mode, es); | |
770 | 775 | |
771 | 776 | /* |
772 | 777 | * We're going to make heavy use of paragraphs' private data |
777 | 782 | for (p = sourceform; p; p = p->next) |
778 | 783 | p->private_data = NULL; |
779 | 784 | |
780 | files.frags = newtree234(html_fragment_compare); | |
781 | files.files = newtree234(html_filename_compare); | |
785 | files.frags = newtree234(html_fragment_compare, NULL); | |
786 | files.files = newtree234(html_filename_compare, NULL); | |
782 | 787 | |
783 | 788 | /* |
784 | 789 | * Start by figuring out into which file each piece of the |
978 | 983 | indextag *tag; |
979 | 984 | int i; |
980 | 985 | |
981 | hr->referenced = hr->generated = FALSE; | |
986 | hr->referenced = hr->generated = false; | |
982 | 987 | hr->section = lastsect; |
983 | 988 | { |
984 | 989 | char buf[40]; |
1036 | 1041 | |
1037 | 1042 | for (f = files.head; f; f = f->next) { |
1038 | 1043 | htmloutput ho; |
1039 | int displaying; | |
1044 | bool displaying; | |
1040 | 1045 | enum LISTTYPE { NOLIST, UL, OL, DL }; |
1041 | 1046 | enum ITEMTYPE { NOITEM, LI, DT, DD }; |
1042 | 1047 | struct stackelement { |
1058 | 1063 | ho.charset = conf.output_charset; |
1059 | 1064 | ho.restrict_charset = conf.restrict_charset; |
1060 | 1065 | ho.cstate = charset_init_state; |
1066 | ho.es = es; | |
1061 | 1067 | ho.ver = conf.htmlver; |
1062 | 1068 | ho.state = HO_NEUTRAL; |
1063 | 1069 | ho.contents_level = 0; |
1309 | 1315 | html_fragment(&ho, sects.head->fragments[i]); |
1310 | 1316 | } |
1311 | 1317 | |
1312 | html_section_title(&ho, sects.head, f, keywords, &conf, TRUE); | |
1318 | html_section_title(&ho, sects.head, f, keywords, &conf, true); | |
1313 | 1319 | |
1314 | 1320 | element_close(&ho, "h1"); |
1315 | 1321 | } |
1328 | 1334 | { |
1329 | 1335 | int ntoc = 0, tocsize = 0, tocstartidx = 0; |
1330 | 1336 | htmlsect **toc = NULL; |
1331 | int leaf = TRUE; | |
1337 | bool leaf = true; | |
1332 | 1338 | |
1333 | 1339 | for (s = sects.head; s; s = s->next) { |
1334 | 1340 | htmlsect *a, *ac; |
1350 | 1356 | } |
1351 | 1357 | |
1352 | 1358 | if (s->file != f && a != NULL) |
1353 | leaf = FALSE; | |
1359 | leaf = false; | |
1354 | 1360 | |
1355 | 1361 | if (a) { |
1356 | 1362 | if (adepth <= a->contents_depth) { |
1398 | 1404 | * Now go through the document and output some real |
1399 | 1405 | * text. |
1400 | 1406 | */ |
1401 | displaying = FALSE; | |
1407 | displaying = false; | |
1402 | 1408 | for (s = sects.head; s; s = s->next) { |
1403 | 1409 | if (s->file == f) { |
1404 | 1410 | /* |
1405 | 1411 | * This section belongs in this file. |
1406 | 1412 | * Display it. |
1407 | 1413 | */ |
1408 | displaying = TRUE; | |
1414 | displaying = true; | |
1409 | 1415 | } else { |
1410 | 1416 | /* |
1411 | 1417 | * Doesn't belong in this file, but it may be |
1416 | 1422 | htmlsect *a, *ac; |
1417 | 1423 | int depth, adepth; |
1418 | 1424 | |
1419 | displaying = FALSE; | |
1425 | displaying = false; | |
1420 | 1426 | |
1421 | 1427 | /* |
1422 | 1428 | * Search up from this section until we find |
1489 | 1495 | html_fragment(&ho, s->fragments[i]); |
1490 | 1496 | } |
1491 | 1497 | |
1492 | html_section_title(&ho, s, f, keywords, &conf, TRUE); | |
1498 | html_section_title(&ho, s, f, keywords, &conf, true); | |
1493 | 1499 | |
1494 | 1500 | element_close(&ho, htag); |
1495 | 1501 | } |
1721 | 1727 | |
1722 | 1728 | html_href(&ho, f, hr->section->file, |
1723 | 1729 | hr->fragment); |
1724 | hr->referenced = TRUE; | |
1730 | hr->referenced = true; | |
1725 | 1731 | if (p && p->kwtext) |
1726 | 1732 | html_words(&ho, p->kwtext, MARKUP|LINKS, |
1727 | 1733 | f, keywords, &conf); |
1754 | 1760 | /* |
1755 | 1761 | * Footer. |
1756 | 1762 | */ |
1757 | int done_version_ids = FALSE; | |
1763 | bool done_version_ids = false; | |
1758 | 1764 | |
1759 | 1765 | if (conf.address_section) |
1760 | 1766 | element_empty(&ho, "hr"); |
1763 | 1769 | html_raw(&ho, conf.body_end); |
1764 | 1770 | |
1765 | 1771 | if (conf.address_section) { |
1766 | int started = FALSE; | |
1772 | bool started = false; | |
1767 | 1773 | if (conf.htmlver == ISO_HTML) { |
1768 | 1774 | /* |
1769 | 1775 | * The ISO-HTML validator complains if |
1781 | 1787 | if (conf.addr_start) { |
1782 | 1788 | html_raw(&ho, conf.addr_start); |
1783 | 1789 | html_nl(&ho); |
1784 | started = TRUE; | |
1790 | started = true; | |
1785 | 1791 | } |
1786 | 1792 | if (conf.visible_version_id) { |
1787 | 1793 | for (p = sourceform; p; p = p->next) |
1793 | 1799 | html_words(&ho, p->words, NOTHING, |
1794 | 1800 | f, keywords, &conf); |
1795 | 1801 | html_text(&ho, conf.post_versionid); |
1796 | started = TRUE; | |
1802 | started = true; | |
1797 | 1803 | } |
1798 | done_version_ids = TRUE; | |
1804 | done_version_ids = true; | |
1799 | 1805 | } |
1800 | 1806 | if (conf.addr_end) { |
1801 | 1807 | if (started) |
1813 | 1819 | * visible, I think we still have a duty to put |
1814 | 1820 | * them in an HTML comment. |
1815 | 1821 | */ |
1816 | int started = FALSE; | |
1822 | bool started = false; | |
1817 | 1823 | for (p = sourceform; p; p = p->next) |
1818 | 1824 | if (p->type == para_VersionID) { |
1819 | 1825 | if (!started) { |
1820 | 1826 | html_raw(&ho, "<!-- version IDs:\n"); |
1821 | started = TRUE; | |
1827 | started = true; | |
1822 | 1828 | } |
1823 | 1829 | html_words(&ho, p->words, NOTHING, |
1824 | 1830 | f, keywords, &conf); |
1843 | 1849 | * if the index contains nothing. |
1844 | 1850 | */ |
1845 | 1851 | if (chm_mode || conf.hhk_filename) { |
1846 | int ok = FALSE; | |
1852 | bool ok = false; | |
1847 | 1853 | int i; |
1848 | 1854 | indexentry *entry; |
1849 | 1855 | |
1851 | 1857 | htmlindex *hi = (htmlindex *)entry->backend_data; |
1852 | 1858 | |
1853 | 1859 | if (hi->nrefs > 0) { |
1854 | ok = TRUE; /* found an index entry */ | |
1860 | ok = true; /* found an index entry */ | |
1855 | 1861 | break; |
1856 | 1862 | } |
1857 | 1863 | } |
1858 | 1864 | |
1859 | 1865 | if (ok) |
1860 | hhk_needed = TRUE; | |
1866 | hhk_needed = true; | |
1861 | 1867 | } |
1862 | 1868 | |
1863 | 1869 | /* |
1878 | 1884 | ho.charset = CS_CP1252; /* as far as I know, CHM is */ |
1879 | 1885 | ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ |
1880 | 1886 | ho.cstate = charset_init_state; |
1887 | ho.es = es; | |
1881 | 1888 | ho.ver = HTML_4; /* *shrug* */ |
1882 | 1889 | ho.state = HO_NEUTRAL; |
1883 | 1890 | ho.contents_level = 0; |
1886 | 1893 | ho_setup_rdstringc(&ho, &rs); |
1887 | 1894 | |
1888 | 1895 | ho.hacklimit = 255; |
1889 | html_words(&ho, topsect->title->words, NOTHING, | |
1890 | NULL, keywords, &conf); | |
1896 | if (topsect->title) | |
1897 | html_words(&ho, topsect->title->words, NOTHING, | |
1898 | NULL, keywords, &conf); | |
1891 | 1899 | |
1892 | 1900 | rdaddc(&rs, '\0'); |
1893 | 1901 | chm_title(chm, rs.text); |
1929 | 1937 | ho.charset = CS_CP1252; |
1930 | 1938 | ho.restrict_charset = CS_CP1252; |
1931 | 1939 | ho.cstate = charset_init_state; |
1940 | ho.es = es; | |
1932 | 1941 | ho.ver = HTML_4; /* *shrug* */ |
1933 | 1942 | ho.state = HO_NEUTRAL; |
1934 | 1943 | ho.contents_level = 0; |
1972 | 1981 | |
1973 | 1982 | fp = fopen(fname, "rb"); |
1974 | 1983 | if (!fp) { |
1975 | err_cantopen(fname); | |
1984 | err_cantopen(es, fname); | |
1976 | 1985 | continue; |
1977 | 1986 | } |
1978 | 1987 | |
2002 | 2011 | ho.charset = CS_CP1252; /* as far as I know, HHP files are */ |
2003 | 2012 | ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ |
2004 | 2013 | ho.cstate = charset_init_state; |
2014 | ho.es = es; | |
2005 | 2015 | ho.ver = HTML_4; /* *shrug* */ |
2006 | 2016 | ho.state = HO_NEUTRAL; |
2007 | 2017 | ho.contents_level = 0; |
2026 | 2036 | "Title="); |
2027 | 2037 | |
2028 | 2038 | ho.hacklimit = 255; |
2029 | html_words(&ho, topsect->title->words, NOTHING, | |
2030 | NULL, keywords, &conf); | |
2039 | if (topsect->title) | |
2040 | html_words(&ho, topsect->title->words, NOTHING, | |
2041 | NULL, keywords, &conf); | |
2031 | 2042 | |
2032 | 2043 | ho_string(&ho, "\n"); |
2033 | 2044 | |
2100 | 2111 | ho.charset = CS_CP1252; /* as far as I know, HHC files are */ |
2101 | 2112 | ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ |
2102 | 2113 | ho.cstate = charset_init_state; |
2114 | ho.es = es; | |
2103 | 2115 | ho.ver = HTML_4; /* *shrug* */ |
2104 | 2116 | ho.state = HO_NEUTRAL; |
2105 | 2117 | ho.contents_level = 0; |
2126 | 2138 | /* |
2127 | 2139 | * For each HTML file, write out a contents entry. |
2128 | 2140 | */ |
2129 | int depth, leaf = TRUE; | |
2141 | int depth; | |
2142 | bool leaf = true; | |
2130 | 2143 | |
2131 | 2144 | /* |
2132 | 2145 | * Determine the depth of this file in the contents |
2159 | 2172 | if (leaf && s->file != f) { |
2160 | 2173 | for (a = s; a; a = a->parent) |
2161 | 2174 | if (a->file == f) { |
2162 | leaf = FALSE; | |
2175 | leaf = false; | |
2163 | 2176 | break; |
2164 | 2177 | } |
2165 | 2178 | } |
2217 | 2230 | ho.charset = CS_CP1252; /* as far as I know, HHK files are */ |
2218 | 2231 | ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ |
2219 | 2232 | ho.cstate = charset_init_state; |
2233 | ho.es = es; | |
2220 | 2234 | ho.ver = HTML_4; /* *shrug* */ |
2221 | 2235 | ho.state = HO_NEUTRAL; |
2222 | 2236 | ho.contents_level = 0; |
2269 | 2283 | hr->section->file->temp = 1; |
2270 | 2284 | } |
2271 | 2285 | |
2272 | hr->referenced = TRUE; | |
2286 | hr->referenced = true; | |
2273 | 2287 | } |
2274 | 2288 | |
2275 | 2289 | ho_string(&ho, "</OBJECT>\n"); |
2300 | 2314 | |
2301 | 2315 | fp = fopen(conf.chm_filename, "wb"); |
2302 | 2316 | if (!fp) { |
2303 | err_cantopenw(conf.chm_filename); | |
2317 | err_cantopenw(es, conf.chm_filename); | |
2304 | 2318 | } else { |
2305 | 2319 | data = chm_build(chm, &len); |
2306 | 2320 | fwrite(data, 1, len, fp); |
2323 | 2337 | if (w->type == word_IndexRef) { |
2324 | 2338 | htmlindexref *hr = (htmlindexref *)w->private_data; |
2325 | 2339 | |
2326 | assert(!hr->referenced == !hr->generated); | |
2340 | assert(hr->referenced == hr->generated); | |
2327 | 2341 | } |
2328 | 2342 | } |
2329 | 2343 | |
2416 | 2430 | } |
2417 | 2431 | |
2418 | 2432 | void html_backend(paragraph *sourceform, keywordlist *keywords, |
2419 | indexdata *idx, void *unused) | |
2433 | indexdata *idx, void *unused, errorstate *es) | |
2420 | 2434 | { |
2421 | 2435 | IGNORE(unused); |
2422 | html_backend_common(sourceform, keywords, idx, FALSE); | |
2436 | html_backend_common(sourceform, keywords, idx, es, false); | |
2423 | 2437 | } |
2424 | 2438 | |
2425 | 2439 | void chm_backend(paragraph *sourceform, keywordlist *keywords, |
2426 | indexdata *idx, void *unused) | |
2440 | indexdata *idx, void *unused, errorstate *es) | |
2427 | 2441 | { |
2428 | 2442 | IGNORE(unused); |
2429 | html_backend_common(sourceform, keywords, idx, TRUE); | |
2443 | html_backend_common(sourceform, keywords, idx, es, true); | |
2430 | 2444 | } |
2431 | 2445 | |
2432 | 2446 | static void html_file_section(htmlconfig *cfg, htmlfilelist *files, |
2620 | 2634 | if (flags & INDEXENTS) { |
2621 | 2635 | htmlindexref *hr = (htmlindexref *)w->private_data; |
2622 | 2636 | html_fragment(ho, hr->fragment); |
2623 | hr->generated = TRUE; | |
2637 | hr->generated = true; | |
2624 | 2638 | } |
2625 | 2639 | break; |
2626 | 2640 | case word_Normal: |
2827 | 2841 | ho_string(ho, " "); |
2828 | 2842 | ho_string(ho, name); |
2829 | 2843 | ho_string(ho, "=\""); |
2830 | html_text_limit_internal(ho, value, 0, TRUE, FALSE); | |
2844 | html_text_limit_internal(ho, value, 0, true, false); | |
2831 | 2845 | html_charset_cleanup(ho); |
2832 | 2846 | ho_string(ho, "\""); |
2833 | 2847 | } |
2835 | 2849 | static void html_text(htmloutput *ho, wchar_t const *text) |
2836 | 2850 | { |
2837 | 2851 | return_mostly_to_neutral(ho); |
2838 | html_text_limit_internal(ho, text, 0, FALSE, FALSE); | |
2852 | html_text_limit_internal(ho, text, 0, false, false); | |
2839 | 2853 | } |
2840 | 2854 | |
2841 | 2855 | static void html_text_nbsp(htmloutput *ho, wchar_t const *text) |
2842 | 2856 | { |
2843 | 2857 | return_mostly_to_neutral(ho); |
2844 | html_text_limit_internal(ho, text, 0, FALSE, TRUE); | |
2858 | html_text_limit_internal(ho, text, 0, false, true); | |
2845 | 2859 | } |
2846 | 2860 | |
2847 | 2861 | static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen) |
2848 | 2862 | { |
2849 | 2863 | return_mostly_to_neutral(ho); |
2850 | html_text_limit_internal(ho, text, maxlen, FALSE, FALSE); | |
2864 | html_text_limit_internal(ho, text, maxlen, false, false); | |
2851 | 2865 | } |
2852 | 2866 | |
2853 | 2867 | static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, |
2854 | int maxlen, int quote_quotes, int nbsp) | |
2868 | int maxlen, bool quote_quotes, bool nbsp) | |
2855 | 2869 | { |
2856 | 2870 | int textlen = ustrlen(text); |
2857 | 2871 | char outbuf[256]; |
2858 | int bytes, err; | |
2872 | int bytes; | |
2873 | bool err; | |
2859 | 2874 | |
2860 | 2875 | if (ho->hackflags & (HO_HACK_QUOTEQUOTES | HO_HACK_OMITQUOTES)) |
2861 | quote_quotes = TRUE; /* override the input value */ | |
2876 | quote_quotes = true; /* override the input value */ | |
2862 | 2877 | |
2863 | 2878 | if (maxlen > 0 && textlen > maxlen) |
2864 | 2879 | textlen = maxlen; |
2943 | 2958 | rdaddc(&rs, '#'); |
2944 | 2959 | rdaddsc(&rs, targetfrag); |
2945 | 2960 | } |
2961 | ||
2962 | /* If _neither_ of those conditions were true, we don't have a URL | |
2963 | * at all and will segfault when we pass url==NULL to element_attr. | |
2964 | * | |
2965 | * I think this can only occur as a knock-on effect from an input | |
2966 | * file error, but we still shouldn't crash, of course. */ | |
2967 | ||
2946 | 2968 | url = rs.text; |
2947 | 2969 | |
2948 | 2970 | element_open(ho, "a"); |
2949 | element_attr(ho, "href", url); | |
2950 | sfree(url); | |
2971 | if (url) { | |
2972 | element_attr(ho, "href", url); | |
2973 | sfree(url); | |
2974 | } | |
2951 | 2975 | } |
2952 | 2976 | |
2953 | 2977 | static void html_fragment(htmloutput *ho, char const *fragment) |
3129 | 3153 | |
3130 | 3154 | p = NULL; |
3131 | 3155 | |
3132 | while (find234(files->files, text, NULL)) { | |
3156 | while (find234(files->files, text)) { | |
3133 | 3157 | if (!p) { |
3134 | 3158 | len = strlen(text); |
3135 | 3159 | p = text; |
3182 | 3206 | |
3183 | 3207 | element_open(ho, "li"); |
3184 | 3208 | html_href(ho, thisfile, s->file, s->fragments[0]); |
3185 | html_section_title(ho, s, thisfile, keywords, cfg, FALSE); | |
3209 | html_section_title(ho, s, thisfile, keywords, cfg, false); | |
3186 | 3210 | element_close(ho, "a"); |
3187 | 3211 | /* <li> will be closed by a later invocation */ |
3188 | 3212 | } |
3189 | 3213 | |
3190 | 3214 | static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile, |
3191 | 3215 | keywordlist *keywords, htmlconfig *cfg, |
3192 | int real) | |
3216 | bool real) | |
3193 | 3217 | { |
3194 | 3218 | if (s->title) { |
3195 | 3219 | sectlevel *sl; |
79 | 79 | rdstringc output; |
80 | 80 | int charset; |
81 | 81 | charset_state state; |
82 | int wcmode; | |
82 | bool wcmode; | |
83 | 83 | } info_data; |
84 | #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE } | |
84 | #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, false } | |
85 | 85 | static const info_data empty_info_data = EMPTY_INFO_DATA; |
86 | 86 | |
87 | 87 | typedef struct node_tag node; |
88 | 88 | struct node_tag { |
89 | 89 | node *listnext; |
90 | 90 | node *up, *prev, *next, *lastchild; |
91 | int pos, started_menu, filenum; | |
91 | int pos, filenum; | |
92 | bool started_menu; | |
92 | 93 | char *name; |
93 | 94 | info_data text; |
94 | 95 | }; |
116 | 117 | static word *info_transform_wordlist(word *, keywordlist *); |
117 | 118 | static int info_check_index(word *, node *, indexdata *); |
118 | 119 | |
119 | static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *); | |
120 | static int info_rdaddwc(info_data *, word *, word *, bool, infoconfig *); | |
120 | 121 | |
121 | 122 | static node *info_node_new(char *name, int charset); |
122 | static char *info_node_name_for_para(paragraph *p, infoconfig *); | |
123 | static char *info_node_name_for_text(wchar_t *text, infoconfig *); | |
124 | ||
125 | static infoconfig info_configure(paragraph *source) { | |
123 | static char *info_node_name_for_para(paragraph *p, infoconfig *, | |
124 | errorstate *); | |
125 | static char *info_node_name_for_text(wchar_t *text, infoconfig *, | |
126 | errorstate *); | |
127 | ||
128 | static infoconfig info_configure(paragraph *source, errorstate *es) { | |
126 | 129 | infoconfig ret; |
127 | 130 | paragraph *p; |
128 | 131 | int n; |
185 | 188 | sfree(ret.filename); |
186 | 189 | ret.filename = dupstr(adv(p->origkeyword)); |
187 | 190 | } else if (!ustricmp(p->keyword, L"info-charset")) { |
188 | ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword)); | |
191 | ret.charset = charset_from_ustr( | |
192 | &p->fpos, uadv(p->keyword), es); | |
189 | 193 | } else if (!ustricmp(p->keyword, L"info-max-file-size")) { |
190 | 194 | ret.maxfilesize = utoi(uadv(p->keyword)); |
191 | 195 | } else if (!ustricmp(p->keyword, L"info-width")) { |
305 | 309 | } |
306 | 310 | |
307 | 311 | void info_backend(paragraph *sourceform, keywordlist *keywords, |
308 | indexdata *idx, void *unused) { | |
312 | indexdata *idx, void *unused, errorstate *es) { | |
309 | 313 | paragraph *p; |
310 | 314 | infoconfig conf; |
311 | 315 | word *prefix, *body, *wp; |
314 | 318 | int nesting, nestindent; |
315 | 319 | int indentb, indenta; |
316 | 320 | int filepos; |
317 | int has_index = FALSE; | |
321 | bool has_index = false; | |
318 | 322 | info_data intro_text = EMPTY_INFO_DATA; |
319 | 323 | node *topnode, *currnode; |
320 | 324 | word bullet; |
322 | 326 | |
323 | 327 | IGNORE(unused); |
324 | 328 | |
325 | conf = info_configure(sourceform); | |
329 | conf = info_configure(sourceform, es); | |
326 | 330 | |
327 | 331 | /* |
328 | 332 | * Go through and create a node for each section. |
342 | 346 | node *newnode, *upnode; |
343 | 347 | char *nodename; |
344 | 348 | |
345 | nodename = info_node_name_for_para(p, &conf); | |
349 | nodename = info_node_name_for_para(p, &conf, es); | |
346 | 350 | newnode = info_node_new(nodename, conf.charset); |
347 | 351 | sfree(nodename); |
348 | 352 | |
383 | 387 | ii->nnodes = ii->nodesize = 0; |
384 | 388 | ii->nodes = NULL; |
385 | 389 | |
386 | ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf); | |
390 | ii->length = info_rdaddwc(&id, entry->text, NULL, false, &conf); | |
387 | 391 | |
388 | 392 | ii->text = id.output.text; |
389 | 393 | |
416 | 420 | kw = *longname ? uadv(longname) : L""; |
417 | 421 | |
418 | 422 | if (!*longname) { |
419 | err_cfginsufarg(&p->fpos, p->origkeyword, 3); | |
423 | err_cfginsufarg(es, &p->fpos, p->origkeyword, 3); | |
420 | 424 | continue; |
421 | 425 | } |
422 | 426 | |
511 | 515 | |
512 | 516 | if (!currnode->up->started_menu) { |
513 | 517 | info_rdaddsc(&currnode->up->text, "* Menu:\n\n"); |
514 | currnode->up->started_menu = TRUE; | |
518 | currnode->up->started_menu = true; | |
515 | 519 | } |
516 | 520 | info_menu_item(&currnode->up->text, currnode, p, &conf); |
517 | 521 | |
600 | 604 | indexentry *entry; |
601 | 605 | char *nodename; |
602 | 606 | |
603 | nodename = info_node_name_for_text(conf.index_text, &conf); | |
607 | nodename = info_node_name_for_text(conf.index_text, &conf, es); | |
604 | 608 | newnode = info_node_new(nodename, conf.charset); |
605 | 609 | sfree(nodename); |
606 | 610 | |
705 | 709 | */ |
706 | 710 | fp = fopen(conf.filename, "w"); |
707 | 711 | if (!fp) { |
708 | err_cantopenw(conf.filename); | |
712 | err_cantopenw(es, conf.filename); | |
709 | 713 | return; |
710 | 714 | } |
711 | 715 | fputs(intro_text.output.text, fp); |
749 | 753 | sprintf(fname, "%s-%d", conf.filename, filenum); |
750 | 754 | fp = fopen(fname, "w"); |
751 | 755 | if (!fp) { |
752 | err_cantopenw(fname); | |
756 | err_cantopenw(es, fname); | |
753 | 757 | return; |
754 | 758 | } |
755 | 759 | sfree(fname); |
862 | 866 | return ret; |
863 | 867 | } |
864 | 868 | |
865 | static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs, | |
869 | static int info_rdaddwc(info_data *id, word *words, word *end, bool xrefs, | |
866 | 870 | infoconfig *cfg) { |
867 | 871 | int ret = 0; |
868 | 872 | |
906 | 910 | if (cvt_ok(id->charset, words->text) || !words->alt) |
907 | 911 | ret += info_rdadds(id, words->text); |
908 | 912 | else |
909 | ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg); | |
913 | ret += info_rdaddwc(id, words->alt, NULL, false, cfg); | |
910 | 914 | } else if (removeattr(words->type) == word_WhiteSpace) { |
911 | 915 | ret += info_rdadd(id, L' '); |
912 | 916 | } else if (removeattr(words->type) == word_Quote) { |
944 | 948 | return ret; |
945 | 949 | } |
946 | 950 | |
947 | static int info_width_internal(word *words, int xrefs, infoconfig *cfg); | |
948 | ||
949 | static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) { | |
951 | static int info_width_internal(word *words, bool xrefs, infoconfig *cfg); | |
952 | ||
953 | static int info_width_internal_list(word *words, bool xrefs, infoconfig *cfg) { | |
950 | 954 | int w = 0; |
951 | 955 | while (words) { |
952 | 956 | w += info_width_internal(words, xrefs, cfg); |
955 | 959 | return w; |
956 | 960 | } |
957 | 961 | |
958 | static int info_width_internal(word *words, int xrefs, infoconfig *cfg) { | |
962 | static int info_width_internal(word *words, bool xrefs, infoconfig *cfg) { | |
959 | 963 | int wid; |
960 | 964 | int attr; |
961 | 965 | |
1033 | 1037 | |
1034 | 1038 | static int info_width_noxrefs(void *ctx, word *words) |
1035 | 1039 | { |
1036 | return info_width_internal(words, FALSE, (infoconfig *)ctx); | |
1040 | return info_width_internal(words, false, (infoconfig *)ctx); | |
1037 | 1041 | } |
1038 | 1042 | static int info_width_xrefs(void *ctx, word *words) |
1039 | 1043 | { |
1040 | return info_width_internal(words, TRUE, (infoconfig *)ctx); | |
1044 | return info_width_internal(words, true, (infoconfig *)ctx); | |
1041 | 1045 | } |
1042 | 1046 | |
1043 | 1047 | static void info_heading(info_data *text, word *tprefix, |
1049 | 1053 | |
1050 | 1054 | length = 0; |
1051 | 1055 | if (tprefix) { |
1052 | length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg); | |
1056 | length += info_rdaddwc(text, tprefix, NULL, false, cfg); | |
1053 | 1057 | length += info_rdadds(text, cfg->sectsuffix); |
1054 | 1058 | } |
1055 | 1059 | |
1059 | 1063 | wrapping = wrap_para(words, firstlinewidth, wrapwidth, |
1060 | 1064 | info_width_noxrefs, cfg, 0); |
1061 | 1065 | for (p = wrapping; p; p = p->next) { |
1062 | length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg); | |
1066 | length += info_rdaddwc(text, p->begin, p->end, false, cfg); | |
1063 | 1067 | info_rdadd(text, L'\n'); |
1064 | 1068 | if (*align.underline) { |
1065 | 1069 | while (length > 0) { |
1099 | 1103 | if (prefix) { |
1100 | 1104 | for (i = 0; i < indent; i++) |
1101 | 1105 | info_rdadd(text, L' '); |
1102 | e = info_rdaddwc(text, prefix, NULL, FALSE, cfg); | |
1106 | e = info_rdaddwc(text, prefix, NULL, false, cfg); | |
1103 | 1107 | if (prefixextra) |
1104 | 1108 | e += info_rdadds(text, prefixextra); |
1105 | 1109 | /* If the prefix is too long, shorten the first line to fit. */ |
1121 | 1125 | for (p = wrapping; p; p = p->next) { |
1122 | 1126 | for (i = 0; i < e; i++) |
1123 | 1127 | info_rdadd(text, L' '); |
1124 | info_rdaddwc(text, p->begin, p->end, TRUE, cfg); | |
1128 | info_rdaddwc(text, p->begin, p->end, true, cfg); | |
1125 | 1129 | info_rdadd(text, L'\n'); |
1126 | 1130 | e = indent + extraindent; |
1127 | 1131 | } |
1149 | 1153 | |
1150 | 1154 | static void info_versionid(info_data *text, word *words, infoconfig *cfg) { |
1151 | 1155 | info_rdadd(text, L'['); |
1152 | info_rdaddwc(text, words, NULL, FALSE, cfg); | |
1156 | info_rdaddwc(text, words, NULL, false, cfg); | |
1153 | 1157 | info_rdadds(text, L"]\n"); |
1154 | 1158 | } |
1155 | 1159 | |
1162 | 1166 | n->text.charset = charset; |
1163 | 1167 | n->up = n->next = n->prev = n->lastchild = n->listnext = NULL; |
1164 | 1168 | n->name = dupstr(name); |
1165 | n->started_menu = FALSE; | |
1169 | n->started_menu = false; | |
1166 | 1170 | |
1167 | 1171 | return n; |
1168 | 1172 | } |
1169 | 1173 | |
1170 | static char *info_node_name_core(info_data *id, filepos *fpos) | |
1174 | static char *info_node_name_core(info_data *id, filepos *fpos, errorstate *es) | |
1171 | 1175 | { |
1172 | 1176 | char *p, *q; |
1173 | 1177 | |
1178 | 1182 | p = q = id->output.text; |
1179 | 1183 | while (*p) { |
1180 | 1184 | if (*p == ':' || *p == ',' || *p == '(' || *p == ')') { |
1181 | err_infonodechar(fpos, *p); | |
1185 | err_infonodechar(es, fpos, *p); | |
1182 | 1186 | } else { |
1183 | 1187 | *q++ = *p; |
1184 | 1188 | } |
1189 | 1193 | return id->output.text; |
1190 | 1194 | } |
1191 | 1195 | |
1192 | static char *info_node_name_for_para(paragraph *par, infoconfig *cfg) | |
1196 | static char *info_node_name_for_para(paragraph *par, infoconfig *cfg, | |
1197 | errorstate *es) | |
1193 | 1198 | { |
1194 | 1199 | info_data id = EMPTY_INFO_DATA; |
1195 | 1200 | |
1196 | 1201 | id.charset = cfg->charset; |
1197 | 1202 | info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words, |
1198 | NULL, FALSE, cfg); | |
1199 | info_rdaddsc(&id, NULL); | |
1200 | ||
1201 | return info_node_name_core(&id, &par->fpos); | |
1202 | } | |
1203 | ||
1204 | static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg) | |
1203 | NULL, false, cfg); | |
1204 | info_rdaddsc(&id, ""); | |
1205 | ||
1206 | return info_node_name_core(&id, &par->fpos, es); | |
1207 | } | |
1208 | ||
1209 | static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg, | |
1210 | errorstate *es) | |
1205 | 1211 | { |
1206 | 1212 | info_data id = EMPTY_INFO_DATA; |
1207 | 1213 | |
1209 | 1215 | info_rdadds(&id, text); |
1210 | 1216 | info_rdaddsc(&id, NULL); |
1211 | 1217 | |
1212 | return info_node_name_core(&id, NULL); | |
1218 | return info_node_name_core(&id, NULL, es); | |
1213 | 1219 | } |
1214 | 1220 | |
1215 | 1221 | static void info_menu_item(info_data *text, node *n, paragraph *p, |
1233 | 1239 | info_rdaddsc(text, "::"); |
1234 | 1240 | if (p) { |
1235 | 1241 | info_rdaddc(text, ' '); |
1236 | info_rdaddwc(text, p->words, NULL, FALSE, cfg); | |
1242 | info_rdaddwc(text, p->words, NULL, false, cfg); | |
1237 | 1243 | } |
1238 | 1244 | info_rdaddc(text, '\n'); |
1239 | 1245 | } |
1252 | 1258 | { |
1253 | 1259 | if (!d->wcmode) { |
1254 | 1260 | d->state = charset_init_state; |
1255 | d->wcmode = TRUE; | |
1261 | d->wcmode = true; | |
1256 | 1262 | } |
1257 | 1263 | |
1258 | 1264 | if (wcs) { |
1294 | 1300 | rdaddsc(&d->output, buf); |
1295 | 1301 | } |
1296 | 1302 | |
1297 | d->wcmode = FALSE; | |
1303 | d->wcmode = false; | |
1298 | 1304 | } |
1299 | 1305 | |
1300 | 1306 | if (cs) { |
8 | 8 | |
9 | 9 | typedef struct { |
10 | 10 | wchar_t *th; |
11 | int headnumbers; | |
11 | bool headnumbers; | |
12 | 12 | int mindepth; |
13 | 13 | char *filename; |
14 | 14 | int charset; |
16 | 16 | } manconfig; |
17 | 17 | |
18 | 18 | static void man_text(FILE *, word *, |
19 | int newline, int quote_props, manconfig *conf); | |
19 | bool newline, int quote_props, manconfig *conf); | |
20 | 20 | static void man_codepara(FILE *, word *, int charset); |
21 | static int man_convert(wchar_t const *s, int maxlen, | |
22 | char **result, int quote_props, | |
23 | int charset, charset_state *state); | |
21 | static bool man_convert(wchar_t const *s, int maxlen, | |
22 | char **result, int quote_props, | |
23 | int charset, charset_state *state); | |
24 | 24 | |
25 | 25 | /* |
26 | 26 | * My TROFF reference is "NROFF/TROFF User's Manual", Joseph |
98 | 98 | } |
99 | 99 | |
100 | 100 | /* |
101 | * Return TRUE if we can represent the whole of the given string either | |
102 | * in the output charset or as named characters; FALSE otherwise. | |
101 | * Return true if we can represent the whole of the given string either | |
102 | * in the output charset or as named characters; false otherwise. | |
103 | 103 | */ |
104 | static int troff_ok(int charset, wchar_t *string) { | |
104 | static bool troff_ok(int charset, wchar_t *string) { | |
105 | 105 | wchar_t test[2]; |
106 | 106 | while (*string) { |
107 | 107 | test[0] = *string; |
108 | 108 | test[1] = 0; |
109 | 109 | if (!cvt_ok(charset, test) && !troffchar(*string)) |
110 | return FALSE; | |
110 | return false; | |
111 | 111 | string++; |
112 | 112 | } |
113 | return TRUE; | |
114 | } | |
115 | ||
116 | static manconfig man_configure(paragraph *source) { | |
113 | return true; | |
114 | } | |
115 | ||
116 | static manconfig man_configure(paragraph *source, errorstate *es) { | |
117 | 117 | paragraph *p; |
118 | 118 | manconfig ret; |
119 | 119 | |
121 | 121 | * Defaults. |
122 | 122 | */ |
123 | 123 | ret.th = NULL; |
124 | ret.headnumbers = FALSE; | |
124 | ret.headnumbers = false; | |
125 | 125 | ret.mindepth = 0; |
126 | 126 | ret.filename = dupstr("output.1"); |
127 | 127 | ret.charset = CS_ASCII; |
160 | 160 | ret.th = snewn(ep - wp + 1, wchar_t); |
161 | 161 | memcpy(ret.th, wp, (ep - wp + 1) * sizeof(wchar_t)); |
162 | 162 | } else if (!ustricmp(p->keyword, L"man-charset")) { |
163 | ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword)); | |
163 | ret.charset = charset_from_ustr( | |
164 | &p->fpos, uadv(p->keyword), es); | |
164 | 165 | } else if (!ustricmp(p->keyword, L"man-headnumbers")) { |
165 | 166 | ret.headnumbers = utob(uadv(p->keyword)); |
166 | 167 | } else if (!ustricmp(p->keyword, L"man-mindepth")) { |
219 | 220 | #define QUOTE_LITERAL 4 /* defeat special meaning of `, ', - in troff */ |
220 | 221 | |
221 | 222 | void man_backend(paragraph *sourceform, keywordlist *keywords, |
222 | indexdata *idx, void *unused) { | |
223 | indexdata *idx, void *unused, errorstate *es) { | |
223 | 224 | paragraph *p; |
224 | 225 | FILE *fp; |
225 | 226 | manconfig conf; |
226 | int had_described_thing; | |
227 | bool had_described_thing; | |
227 | 228 | |
228 | 229 | IGNORE(unused); |
229 | 230 | IGNORE(keywords); |
230 | 231 | IGNORE(idx); |
231 | 232 | |
232 | conf = man_configure(sourceform); | |
233 | conf = man_configure(sourceform, es); | |
233 | 234 | |
234 | 235 | /* |
235 | 236 | * Open the output file. |
239 | 240 | else |
240 | 241 | fp = fopen(conf.filename, "w"); |
241 | 242 | if (!fp) { |
242 | err_cantopenw(conf.filename); | |
243 | err_cantopenw(es, conf.filename); | |
243 | 244 | return; |
244 | 245 | } |
245 | 246 | |
247 | 248 | for (p = sourceform; p; p = p->next) |
248 | 249 | if (p->type == para_VersionID) { |
249 | 250 | fprintf(fp, ".\\\" "); |
250 | man_text(fp, p->words, TRUE, 0, &conf); | |
251 | man_text(fp, p->words, true, 0, &conf); | |
251 | 252 | } |
252 | 253 | |
253 | 254 | /* Standard preamble */ |
272 | 273 | } |
273 | 274 | fputc('\n', fp); |
274 | 275 | |
275 | had_described_thing = FALSE; | |
276 | had_described_thing = false; | |
276 | 277 | #define cleanup_described_thing do { \ |
277 | 278 | if (had_described_thing) \ |
278 | 279 | fprintf(fp, "\n"); \ |
279 | had_described_thing = FALSE; \ | |
280 | had_described_thing = false; \ | |
280 | 281 | } while (0) |
281 | 282 | |
282 | 283 | for (p = sourceform; p; p = p->next) switch (p->type) { |
316 | 317 | else |
317 | 318 | fprintf(fp, ".SH \""); |
318 | 319 | if (conf.headnumbers && p->kwtext) { |
319 | man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf); | |
320 | man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf); | |
320 | 321 | fprintf(fp, " "); |
321 | 322 | } |
322 | man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf); | |
323 | man_text(fp, p->words, false, QUOTE_QUOTES, &conf); | |
323 | 324 | fprintf(fp, "\"\n"); |
324 | 325 | } |
325 | 326 | break; |
341 | 342 | case para_Copyright: |
342 | 343 | cleanup_described_thing; |
343 | 344 | fprintf(fp, ".PP\n"); |
344 | man_text(fp, p->words, TRUE, 0, &conf); | |
345 | man_text(fp, p->words, true, 0, &conf); | |
345 | 346 | break; |
346 | 347 | |
347 | 348 | /* |
362 | 363 | sfree(bullettext); |
363 | 364 | } else if (p->type == para_NumberedList) { |
364 | 365 | fprintf(fp, ".IP \""); |
365 | man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf); | |
366 | man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf); | |
366 | 367 | fprintf(fp, "\"\n"); |
367 | 368 | } else if (p->type == para_Description) { |
368 | 369 | if (had_described_thing) { |
380 | 381 | } |
381 | 382 | } else if (p->type == para_BiblioCited) { |
382 | 383 | fprintf(fp, ".IP \""); |
383 | man_text(fp, p->kwtext, FALSE, QUOTE_QUOTES, &conf); | |
384 | man_text(fp, p->kwtext, false, QUOTE_QUOTES, &conf); | |
384 | 385 | fprintf(fp, "\"\n"); |
385 | 386 | } |
386 | man_text(fp, p->words, TRUE, 0, &conf); | |
387 | had_described_thing = FALSE; | |
387 | man_text(fp, p->words, true, 0, &conf); | |
388 | had_described_thing = false; | |
388 | 389 | break; |
389 | 390 | |
390 | 391 | case para_DescribedThing: |
391 | 392 | cleanup_described_thing; |
392 | 393 | fprintf(fp, ".IP \""); |
393 | man_text(fp, p->words, FALSE, QUOTE_QUOTES, &conf); | |
394 | man_text(fp, p->words, false, QUOTE_QUOTES, &conf); | |
394 | 395 | fprintf(fp, "\"\n"); |
395 | had_described_thing = TRUE; | |
396 | had_described_thing = true; | |
396 | 397 | break; |
397 | 398 | |
398 | 399 | case para_Rule: |
447 | 448 | * |
448 | 449 | * This function also does escaping of groff special characters. |
449 | 450 | */ |
450 | static int man_convert(wchar_t const *s, int maxlen, | |
451 | char **result, int quote_props, | |
452 | int charset, charset_state *state) { | |
451 | static bool man_convert(wchar_t const *s, int maxlen, | |
452 | char **result, int quote_props, | |
453 | int charset, charset_state *state) { | |
453 | 454 | charset_state internal_state = CHARSET_INIT_STATE; |
454 | int slen, err; | |
455 | int slen; | |
455 | 456 | char *p = NULL, *q; |
456 | 457 | int plen = 0, psize = 0; |
457 | 458 | rdstringc out = {0, 0, NULL}; |
458 | int anyerr = 0; | |
459 | bool err, anyerr = false; | |
459 | 460 | |
460 | 461 | if (!state) |
461 | 462 | state = &internal_state; |
468 | 469 | psize = 384; |
469 | 470 | plen = 0; |
470 | 471 | p = snewn(psize, char); |
471 | err = 0; | |
472 | err = false; | |
472 | 473 | |
473 | 474 | while (slen > 0) { |
474 | 475 | int ret = charset_from_unicode(&s, &slen, p, psize, |
527 | 528 | if (err) { |
528 | 529 | char const *tr = troffchar(*s); |
529 | 530 | if (tr == NULL) |
530 | anyerr = TRUE; | |
531 | anyerr = true; | |
531 | 532 | else |
532 | 533 | rdaddsc(&out, tr); |
533 | 534 | s++; slen--; |
626 | 627 | |
627 | 628 | if (removeattr(text->type) == word_Normal) { |
628 | 629 | charset_state s2 = *state; |
629 | int len = ustrlen(text->text), hyphen = FALSE; | |
630 | int len = ustrlen(text->text); | |
631 | bool hyphen = false; | |
630 | 632 | |
631 | 633 | if (text->breaks && len > 0 && text->text[len - 1] == '-') { |
632 | 634 | len--; |
633 | hyphen = TRUE; | |
635 | hyphen = true; | |
634 | 636 | } |
635 | 637 | if (len == 0 || |
636 | 638 | man_convert(text->text, len, &c, quote_props, conf->charset, |
677 | 679 | return quote_props; |
678 | 680 | } |
679 | 681 | |
680 | static void man_text(FILE *fp, word *text, int newline, | |
682 | static void man_text(FILE *fp, word *text, bool newline, | |
681 | 683 | int quote_props, manconfig *conf) { |
682 | 684 | rdstringc t = { 0, 0, NULL }; |
683 | 685 | charset_state state = CHARSET_INIT_STATE; |
140 | 140 | /* Flags for render_string() */ |
141 | 141 | #define RS_NOLIG 1 |
142 | 142 | |
143 | static font_data *make_std_font(font_list *fontlist, char const *name); | |
143 | static font_data *make_std_font(font_list *fontlist, psdata *psd, | |
144 | const char *name); | |
144 | 145 | static void wrap_paragraph(para_data *pdata, word *words, |
145 | 146 | int w, int i1, int i2, paper_conf *conf); |
146 | 147 | static page_data *page_breaks(line_data *first, line_data *last, |
153 | 154 | static void render_para(para_data *pdata, paper_conf *conf, |
154 | 155 | keywordlist *keywords, indexdata *idx, |
155 | 156 | paragraph *index_placeholder, page_data *index_page); |
156 | static int string_width(font_data *font, wchar_t const *string, int *errs, | |
157 | static int string_width(font_data *font, wchar_t const *string, bool *errs, | |
157 | 158 | unsigned flags); |
158 | 159 | static int paper_width_simple(para_data *pdata, word *text, paper_conf *conf); |
159 | 160 | static para_data *code_paragraph(int indent, word *words, paper_conf *conf); |
173 | 174 | word *second); |
174 | 175 | static void fold_into_page(page_data *dest, page_data *src, int right_shift); |
175 | 176 | |
176 | static int fonts_ok(wchar_t *string, ...) | |
177 | static bool fonts_ok(wchar_t *string, ...) | |
177 | 178 | { |
178 | 179 | font_data *font; |
179 | 180 | va_list ap; |
180 | int ret = TRUE; | |
181 | bool ret = true; | |
181 | 182 | |
182 | 183 | va_start(ap, string); |
183 | 184 | while ( (font = va_arg(ap, font_data *)) != NULL) { |
184 | int errs; | |
185 | bool errs; | |
185 | 186 | (void) string_width(font, string, &errs, 0); |
186 | 187 | if (errs) { |
187 | ret = FALSE; | |
188 | ret = false; | |
188 | 189 | break; |
189 | 190 | } |
190 | 191 | } |
194 | 195 | } |
195 | 196 | |
196 | 197 | static void paper_cfg_fonts(font_data **fonts, font_list *fontlist, |
197 | wchar_t *wp, filepos *fpos) { | |
198 | wchar_t *wp, filepos *fpos, psdata *psd, | |
199 | errorstate *es) { | |
198 | 200 | font_data *f; |
199 | 201 | char *fn; |
200 | 202 | int i; |
201 | 203 | |
202 | 204 | for (i = 0; i < NFONTS && *wp; i++, wp = uadv(wp)) { |
203 | 205 | fn = utoa_dup(wp, CS_ASCII); |
204 | f = make_std_font(fontlist, fn); | |
206 | f = make_std_font(fontlist, psd, fn); | |
205 | 207 | if (f) |
206 | 208 | fonts[i] = f; |
207 | 209 | else |
208 | 210 | /* FIXME: proper error */ |
209 | err_nofont(fpos, wp); | |
210 | } | |
211 | } | |
212 | ||
213 | static paper_conf paper_configure(paragraph *source, font_list *fontlist) { | |
211 | err_nofont(es, fpos, wp); | |
212 | } | |
213 | } | |
214 | ||
215 | static paper_conf paper_configure(paragraph *source, font_list *fontlist, | |
216 | psdata *psd, errorstate *es) { | |
214 | 217 | paragraph *p; |
215 | 218 | paper_conf ret; |
216 | 219 | |
234 | 237 | ret.chapter_underline_thickness = 3 * UNITS_PER_PT; |
235 | 238 | ret.rule_thickness = 1 * UNITS_PER_PT; |
236 | 239 | ret.fbase.font_size = 12; |
237 | ret.fbase.fonts[FONT_NORMAL] = make_std_font(fontlist, "Times-Roman"); | |
238 | ret.fbase.fonts[FONT_EMPH] = make_std_font(fontlist, "Times-Italic"); | |
239 | ret.fbase.fonts[FONT_STRONG] = make_std_font(fontlist, "Times-Bold"); | |
240 | ret.fbase.fonts[FONT_CODE] = make_std_font(fontlist, "Courier"); | |
240 | ret.fbase.fonts[FONT_NORMAL] = | |
241 | make_std_font(fontlist, psd, "Times-Roman"); | |
242 | ret.fbase.fonts[FONT_EMPH] = | |
243 | make_std_font(fontlist, psd, "Times-Italic"); | |
244 | ret.fbase.fonts[FONT_STRONG] = | |
245 | make_std_font(fontlist, psd, "Times-Bold"); | |
246 | ret.fbase.fonts[FONT_CODE] = | |
247 | make_std_font(fontlist, psd, "Courier"); | |
241 | 248 | ret.fcode.font_size = 12; |
242 | ret.fcode.fonts[FONT_NORMAL] = make_std_font(fontlist, "Courier-Bold"); | |
243 | ret.fcode.fonts[FONT_EMPH] = make_std_font(fontlist, "Courier-Oblique"); | |
244 | ret.fcode.fonts[FONT_STRONG] = make_std_font(fontlist, "Courier-Bold"); | |
245 | ret.fcode.fonts[FONT_CODE] = make_std_font(fontlist, "Courier"); | |
249 | ret.fcode.fonts[FONT_NORMAL] = | |
250 | make_std_font(fontlist, psd, "Courier-Bold"); | |
251 | ret.fcode.fonts[FONT_EMPH] = | |
252 | make_std_font(fontlist, psd, "Courier-Oblique"); | |
253 | ret.fcode.fonts[FONT_STRONG] = | |
254 | make_std_font(fontlist, psd, "Courier-Bold"); | |
255 | ret.fcode.fonts[FONT_CODE] = | |
256 | make_std_font(fontlist, psd, "Courier"); | |
246 | 257 | ret.ftitle.font_size = 24; |
247 | ret.ftitle.fonts[FONT_NORMAL] = make_std_font(fontlist, "Helvetica-Bold"); | |
258 | ret.ftitle.fonts[FONT_NORMAL] = | |
259 | make_std_font(fontlist, psd, "Helvetica-Bold"); | |
248 | 260 | ret.ftitle.fonts[FONT_EMPH] = |
249 | make_std_font(fontlist, "Helvetica-BoldOblique"); | |
261 | make_std_font(fontlist, psd, "Helvetica-BoldOblique"); | |
250 | 262 | ret.ftitle.fonts[FONT_STRONG] = |
251 | make_std_font(fontlist, "Helvetica-Bold"); | |
252 | ret.ftitle.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold"); | |
263 | make_std_font(fontlist, psd,"Helvetica-Bold"); | |
264 | ret.ftitle.fonts[FONT_CODE] = | |
265 | make_std_font(fontlist, psd, "Courier-Bold"); | |
253 | 266 | ret.fchapter.font_size = 20; |
254 | ret.fchapter.fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold"); | |
267 | ret.fchapter.fonts[FONT_NORMAL] = | |
268 | make_std_font(fontlist, psd, "Helvetica-Bold"); | |
255 | 269 | ret.fchapter.fonts[FONT_EMPH] = |
256 | make_std_font(fontlist, "Helvetica-BoldOblique"); | |
270 | make_std_font(fontlist, psd,"Helvetica-BoldOblique"); | |
257 | 271 | ret.fchapter.fonts[FONT_STRONG] = |
258 | make_std_font(fontlist, "Helvetica-Bold"); | |
259 | ret.fchapter.fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold"); | |
272 | make_std_font(fontlist, psd,"Helvetica-Bold"); | |
273 | ret.fchapter.fonts[FONT_CODE] = | |
274 | make_std_font(fontlist, psd, "Courier-Bold"); | |
260 | 275 | ret.nfsect = 3; |
261 | 276 | ret.fsect = snewn(ret.nfsect, font_cfg); |
262 | 277 | ret.fsect[0].font_size = 16; |
263 | ret.fsect[0].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold"); | |
278 | ret.fsect[0].fonts[FONT_NORMAL] = | |
279 | make_std_font(fontlist, psd, "Helvetica-Bold"); | |
264 | 280 | ret.fsect[0].fonts[FONT_EMPH] = |
265 | make_std_font(fontlist, "Helvetica-BoldOblique"); | |
281 | make_std_font(fontlist, psd,"Helvetica-BoldOblique"); | |
266 | 282 | ret.fsect[0].fonts[FONT_STRONG] = |
267 | make_std_font(fontlist, "Helvetica-Bold"); | |
268 | ret.fsect[0].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold"); | |
283 | make_std_font(fontlist, psd,"Helvetica-Bold"); | |
284 | ret.fsect[0].fonts[FONT_CODE] = | |
285 | make_std_font(fontlist, psd, "Courier-Bold"); | |
269 | 286 | ret.fsect[1].font_size = 14; |
270 | ret.fsect[1].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold"); | |
287 | ret.fsect[1].fonts[FONT_NORMAL] = | |
288 | make_std_font(fontlist, psd, "Helvetica-Bold"); | |
271 | 289 | ret.fsect[1].fonts[FONT_EMPH] = |
272 | make_std_font(fontlist, "Helvetica-BoldOblique"); | |
290 | make_std_font(fontlist, psd, "Helvetica-BoldOblique"); | |
273 | 291 | ret.fsect[1].fonts[FONT_STRONG] = |
274 | make_std_font(fontlist, "Helvetica-Bold"); | |
275 | ret.fsect[1].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold"); | |
292 | make_std_font(fontlist, psd, "Helvetica-Bold"); | |
293 | ret.fsect[1].fonts[FONT_CODE] = | |
294 | make_std_font(fontlist, psd, "Courier-Bold"); | |
276 | 295 | ret.fsect[2].font_size = 13; |
277 | ret.fsect[2].fonts[FONT_NORMAL]= make_std_font(fontlist, "Helvetica-Bold"); | |
296 | ret.fsect[2].fonts[FONT_NORMAL] = | |
297 | make_std_font(fontlist, psd, "Helvetica-Bold"); | |
278 | 298 | ret.fsect[2].fonts[FONT_EMPH] = |
279 | make_std_font(fontlist, "Helvetica-BoldOblique"); | |
299 | make_std_font(fontlist, psd, "Helvetica-BoldOblique"); | |
280 | 300 | ret.fsect[2].fonts[FONT_STRONG] = |
281 | make_std_font(fontlist, "Helvetica-Bold"); | |
282 | ret.fsect[2].fonts[FONT_CODE] = make_std_font(fontlist, "Courier-Bold"); | |
301 | make_std_font(fontlist, psd, "Helvetica-Bold"); | |
302 | ret.fsect[2].fonts[FONT_CODE] = | |
303 | make_std_font(fontlist, psd, "Courier-Bold"); | |
283 | 304 | ret.contents_indent_step = 24 * UNITS_PER_PT; |
284 | 305 | ret.contents_margin = 84 * UNITS_PER_PT; |
285 | 306 | ret.leader_separation = 12 * UNITS_PER_PT; |
399 | 420 | ret.pagenum_fontsize = utoi(uadv(p->keyword)); |
400 | 421 | } else if (!ustricmp(p->keyword, L"paper-base-fonts")) { |
401 | 422 | paper_cfg_fonts(ret.fbase.fonts, fontlist, uadv(p->keyword), |
402 | &p->fpos); | |
423 | &p->fpos, psd, es); | |
403 | 424 | } else if (!ustricmp(p->keyword, L"paper-code-font-size")) { |
404 | 425 | ret.fcode.font_size = utoi(uadv(p->keyword)); |
405 | 426 | } else if (!ustricmp(p->keyword, L"paper-code-fonts")) { |
406 | 427 | paper_cfg_fonts(ret.fcode.fonts, fontlist, uadv(p->keyword), |
407 | &p->fpos); | |
428 | &p->fpos, psd, es); | |
408 | 429 | } else if (!ustricmp(p->keyword, L"paper-title-font-size")) { |
409 | 430 | ret.ftitle.font_size = utoi(uadv(p->keyword)); |
410 | 431 | } else if (!ustricmp(p->keyword, L"paper-title-fonts")) { |
411 | 432 | paper_cfg_fonts(ret.ftitle.fonts, fontlist, uadv(p->keyword), |
412 | &p->fpos); | |
433 | &p->fpos, psd, es); | |
413 | 434 | } else if (!ustricmp(p->keyword, L"paper-chapter-font-size")) { |
414 | 435 | ret.fchapter.font_size = utoi(uadv(p->keyword)); |
415 | 436 | } else if (!ustricmp(p->keyword, L"paper-chapter-fonts")) { |
416 | 437 | paper_cfg_fonts(ret.fchapter.fonts, fontlist, uadv(p->keyword), |
417 | &p->fpos); | |
438 | &p->fpos, psd, es); | |
418 | 439 | } else if (!ustricmp(p->keyword, L"paper-section-font-size")) { |
419 | 440 | wchar_t *q = uadv(p->keyword); |
420 | 441 | int n = 0; |
444 | 465 | ret.fsect[i] = ret.fsect[ret.nfsect-1]; |
445 | 466 | ret.nfsect = n+1; |
446 | 467 | } |
447 | paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos); | |
468 | paper_cfg_fonts(ret.fsect[n].fonts, fontlist, q, &p->fpos, | |
469 | psd, es); | |
448 | 470 | } |
449 | 471 | } |
450 | 472 | } |
520 | 542 | } |
521 | 543 | |
522 | 544 | void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, |
523 | indexdata *idx) { | |
545 | indexdata *idx, psdata *psd, errorstate *es) { | |
524 | 546 | paragraph *p; |
525 | 547 | document *doc; |
526 | int indent, used_contents; | |
548 | int indent; | |
549 | bool used_contents; | |
527 | 550 | para_data *pdata, *firstpara = NULL, *lastpara = NULL; |
528 | 551 | para_data *firstcont, *lastcont; |
529 | 552 | line_data *firstline, *lastline, *firstcontline, *lastcontline; |
530 | 553 | page_data *pages; |
531 | 554 | font_list *fontlist; |
532 | 555 | paper_conf *conf, ourconf; |
533 | int has_index; | |
556 | bool has_index; | |
534 | 557 | int pagenum; |
535 | 558 | paragraph index_placeholder_para; |
536 | page_data *first_index_page; | |
537 | ||
538 | init_std_fonts(); | |
559 | page_data *first_index_page = NULL; | |
560 | ||
561 | init_std_fonts(psd); | |
539 | 562 | fontlist = snew(font_list); |
540 | 563 | fontlist->head = fontlist->tail = NULL; |
541 | 564 | |
542 | ourconf = paper_configure(sourceform, fontlist); | |
565 | ourconf = paper_configure(sourceform, fontlist, psd, es); | |
543 | 566 | conf = &ourconf; |
544 | 567 | |
545 | 568 | /* |
550 | 573 | int i; |
551 | 574 | indexentry *entry; |
552 | 575 | |
553 | has_index = FALSE; | |
576 | has_index = false; | |
554 | 577 | |
555 | 578 | for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { |
556 | 579 | paper_idx *pi = snew(paper_idx); |
557 | 580 | |
558 | has_index = TRUE; | |
581 | has_index = true; | |
559 | 582 | |
560 | 583 | pi->words = pi->lastword = NULL; |
561 | 584 | pi->lastpage = NULL; |
662 | 685 | * Do the main paragraph formatting. |
663 | 686 | */ |
664 | 687 | indent = 0; |
665 | used_contents = FALSE; | |
688 | used_contents = false; | |
666 | 689 | firstline = lastline = NULL; |
667 | 690 | for (p = sourceform; p; p = p->next) { |
668 | 691 | p->private_data = NULL; |
752 | 775 | * contents section in before it. |
753 | 776 | */ |
754 | 777 | if (!used_contents && pdata->outline_level > 0) { |
755 | used_contents = TRUE; | |
778 | used_contents = true; | |
756 | 779 | if (lastpara) |
757 | 780 | lastpara->next = firstcont; |
758 | 781 | else |
1055 | 1078 | doc->pages = pages; |
1056 | 1079 | doc->paper_width = conf->paper_width; |
1057 | 1080 | doc->paper_height = conf->paper_height; |
1081 | doc->psd = psd; | |
1058 | 1082 | |
1059 | 1083 | /* |
1060 | 1084 | * Collect the section heading paragraphs into a document |
1318 | 1342 | ptype == para_Chapter || |
1319 | 1343 | ptype == para_Appendix || |
1320 | 1344 | ptype == para_UnnumberedChapter) { |
1321 | pdata->first->page_break = TRUE; | |
1345 | pdata->first->page_break = true; | |
1322 | 1346 | pdata->first->space_before = conf->chapter_top_space; |
1323 | 1347 | pdata->last->space_after += |
1324 | 1348 | (conf->chapter_underline_depth + |
1345 | 1369 | ldata->space_after = conf->base_para_spacing / 2; |
1346 | 1370 | else |
1347 | 1371 | ldata->space_after = conf->base_leading / 2; |
1348 | ldata->page_break = FALSE; | |
1372 | ldata->page_break = false; | |
1349 | 1373 | } |
1350 | 1374 | } |
1351 | 1375 | |
1387 | 1411 | return sme; |
1388 | 1412 | } |
1389 | 1413 | |
1390 | static int new_sfmap_cmp(void *a, void *b) | |
1391 | { | |
1392 | glyph ga = *(glyph *)a; | |
1393 | subfont_map_entry *sb = b; | |
1414 | static int new_sfmap_cmp(const void *a, const void *b, void *cmpctx) | |
1415 | { | |
1416 | const glyph ga = *(const glyph *)a; | |
1417 | const subfont_map_entry *sb = b; | |
1394 | 1418 | glyph gb = sb->subfont->vector[sb->position]; |
1395 | 1419 | |
1396 | 1420 | if (ga < gb) return -1; |
1403 | 1427 | subfont_map_entry *sme; |
1404 | 1428 | int c; |
1405 | 1429 | |
1406 | sme = find234(font->subfont_map, &g, new_sfmap_cmp); | |
1430 | sme = findcmp234(font->subfont_map, &g, new_sfmap_cmp, NULL); | |
1407 | 1431 | if (sme) return sme; |
1408 | 1432 | |
1409 | 1433 | /* |
1419 | 1443 | return encode_glyph_at(g, u, font->latest_subfont, c); |
1420 | 1444 | } |
1421 | 1445 | |
1422 | static int sfmap_cmp(void *a, void *b) | |
1423 | { | |
1424 | subfont_map_entry *sa = a, *sb = b; | |
1446 | static int sfmap_cmp(const void *a, const void *b, void *cmpctx) | |
1447 | { | |
1448 | const subfont_map_entry *sa = a, *sb = b; | |
1425 | 1449 | glyph ga = sa->subfont->vector[sa->position]; |
1426 | 1450 | glyph gb = sb->subfont->vector[sb->position]; |
1427 | 1451 | |
1430 | 1454 | return 0; |
1431 | 1455 | } |
1432 | 1456 | |
1433 | int width_cmp(void *a, void *b) | |
1457 | int width_cmp(const void *a, const void *b, void *cmpctx) | |
1434 | 1458 | { |
1435 | 1459 | glyph_width const *wa = a, *wb = b; |
1436 | 1460 | |
1441 | 1465 | return 0; |
1442 | 1466 | } |
1443 | 1467 | |
1444 | int kern_cmp(void *a, void *b) | |
1468 | int kern_cmp(const void *a, const void *b, void *cmpctx) | |
1445 | 1469 | { |
1446 | 1470 | kern_pair const *ka = a, *kb = b; |
1447 | 1471 | |
1456 | 1480 | return 0; |
1457 | 1481 | } |
1458 | 1482 | |
1459 | int lig_cmp(void *a, void *b) | |
1483 | int lig_cmp(const void *a, const void *b, void *cmpctx) | |
1460 | 1484 | { |
1461 | 1485 | ligature const *la = a, *lb = b; |
1462 | 1486 | |
1475 | 1499 | return (u < 0 || u > 0xFFFF ? NOGLYPH : fi->bmp[u]); |
1476 | 1500 | } |
1477 | 1501 | |
1478 | void listfonts(void) { | |
1502 | void listfonts(psdata *psd) { | |
1479 | 1503 | font_info const *fi; |
1480 | 1504 | |
1481 | init_std_fonts(); | |
1482 | for (fi = all_fonts; fi; fi = fi->next) | |
1505 | init_std_fonts(psd); | |
1506 | for (fi = psd->all_fonts; fi; fi = fi->next) | |
1483 | 1507 | printf("%s\n", fi->name); |
1484 | 1508 | } |
1485 | 1509 | |
1486 | static font_data *make_std_font(font_list *fontlist, char const *name) | |
1510 | static font_data *make_std_font(font_list *fontlist, psdata *psd, | |
1511 | const char *name) | |
1487 | 1512 | { |
1488 | 1513 | font_info const *fi; |
1489 | 1514 | font_data *f; |
1494 | 1519 | if (strcmp(fe->font->info->name, name) == 0) |
1495 | 1520 | return fe->font; |
1496 | 1521 | |
1497 | for (fi = all_fonts; fi; fi = fi->next) | |
1522 | for (fi = psd->all_fonts; fi; fi = fi->next) | |
1498 | 1523 | if (strcmp(fi->name, name) == 0) break; |
1499 | 1524 | if (!fi) return NULL; |
1500 | 1525 | |
1502 | 1527 | |
1503 | 1528 | f->list = fontlist; |
1504 | 1529 | f->info = fi; |
1505 | f->subfont_map = newtree234(sfmap_cmp); | |
1530 | f->subfont_map = newtree234(sfmap_cmp, NULL); | |
1506 | 1531 | |
1507 | 1532 | /* |
1508 | 1533 | * Our first subfont will contain all of US-ASCII. This isn't |
1531 | 1556 | glyph_width const *w; |
1532 | 1557 | |
1533 | 1558 | wantw.glyph = index; |
1534 | w = find234(font->info->widths, &wantw, NULL); | |
1559 | w = find234(font->info->widths, &wantw); | |
1535 | 1560 | if (!w) return 0; |
1536 | 1561 | return w->width; |
1537 | 1562 | } |
1545 | 1570 | return 0; |
1546 | 1571 | wantkp.left = lindex; |
1547 | 1572 | wantkp.right = rindex; |
1548 | kp = find234(font->info->kerns, &wantkp, NULL); | |
1573 | kp = find234(font->info->kerns, &wantkp); | |
1549 | 1574 | if (kp == NULL) |
1550 | 1575 | return 0; |
1551 | 1576 | return kp->kern; |
1560 | 1585 | return NOGLYPH; |
1561 | 1586 | wantlig.left = lindex; |
1562 | 1587 | wantlig.right = rindex; |
1563 | lig = find234(font->info->ligs, &wantlig, NULL); | |
1588 | lig = find234(font->info->ligs, &wantlig); | |
1564 | 1589 | if (lig == NULL) |
1565 | 1590 | return NOGLYPH; |
1566 | 1591 | return lig->lig; |
1567 | 1592 | } |
1568 | 1593 | |
1569 | static int string_width(font_data *font, wchar_t const *string, int *errs, | |
1594 | static int string_width(font_data *font, wchar_t const *string, bool *errs, | |
1570 | 1595 | unsigned flags) |
1571 | 1596 | { |
1572 | 1597 | int width = 0; |
1573 | 1598 | int nindex, index, oindex, lindex; |
1574 | 1599 | |
1575 | 1600 | if (errs) |
1576 | *errs = 0; | |
1601 | *errs = false; | |
1577 | 1602 | |
1578 | 1603 | oindex = NOGLYPH; |
1579 | 1604 | index = utoglyph(font->info, *string); |
1582 | 1607 | |
1583 | 1608 | if (index == NOGLYPH) { |
1584 | 1609 | if (errs) |
1585 | *errs = 1; | |
1610 | *errs = true; | |
1586 | 1611 | } else { |
1587 | 1612 | if (!(flags & RS_NOLIG) && |
1588 | 1613 | (lindex = find_lig(font, index, nindex)) != NOGLYPH) { |
1618 | 1643 | static int paper_width_internal(void *vctx, word *word, int *nspaces) |
1619 | 1644 | { |
1620 | 1645 | struct paper_width_ctx *ctx = (struct paper_width_ctx *)vctx; |
1621 | int style, type, findex, width, errs; | |
1646 | int style, type, findex, width; | |
1647 | bool errs; | |
1622 | 1648 | wchar_t *str; |
1623 | 1649 | unsigned flags = 0; |
1624 | 1650 | |
2102 | 2128 | keywordlist *keywords, indexdata *idx, paper_conf *conf) |
2103 | 2129 | { |
2104 | 2130 | while (text && text != text_end) { |
2105 | int style, type, findex, errs; | |
2131 | int style, type, findex; | |
2132 | bool errs; | |
2106 | 2133 | wchar_t *str; |
2107 | 2134 | xref_dest dest; |
2108 | 2135 | unsigned flags = 0; |
2323 | 2350 | xr = NULL; |
2324 | 2351 | |
2325 | 2352 | { |
2326 | int extra_indent, shortfall, spaces; | |
2353 | int extra_indent = 0, shortfall = 0, spaces = 0; | |
2327 | 2354 | int just = ldata->pdata->justification; |
2328 | 2355 | |
2329 | 2356 | /* |
2337 | 2364 | case JUST: |
2338 | 2365 | shortfall = ldata->hshortfall; |
2339 | 2366 | spaces = ldata->nspaces; |
2340 | extra_indent = 0; | |
2341 | break; | |
2342 | case LEFT: | |
2343 | shortfall = spaces = extra_indent = 0; | |
2344 | 2367 | break; |
2345 | 2368 | case RIGHT: |
2346 | shortfall = spaces = 0; | |
2347 | 2369 | extra_indent = ldata->real_shortfall; |
2348 | 2370 | break; |
2349 | 2371 | } |
2382 | 2404 | cxref = NULL; |
2383 | 2405 | cxref_page = NULL; |
2384 | 2406 | |
2407 | assert(pdata->first); | |
2385 | 2408 | for (ldata = pdata->first; ldata; ldata = ldata->next) { |
2386 | 2409 | /* |
2387 | 2410 | * If this is a contents entry, we expect to have a single |
2560 | 2583 | w->text = snewn(t-start+1, wchar_t); |
2561 | 2584 | memcpy(w->text, start, (t-start) * sizeof(wchar_t)); |
2562 | 2585 | w->text[t-start] = '\0'; |
2563 | w->breaks = FALSE; | |
2586 | w->breaks = false; | |
2564 | 2587 | w->aux = 0; |
2565 | 2588 | |
2566 | 2589 | if (ltail) |
2734 | 2757 | ret->alt = NULL; |
2735 | 2758 | ret->type = word_Normal; |
2736 | 2759 | ret->text = ustrdup(text); |
2737 | ret->breaks = FALSE; | |
2760 | ret->breaks = false; | |
2738 | 2761 | ret->aux = 0; |
2739 | 2762 | return ret; |
2740 | 2763 | } |
2746 | 2769 | ret->alt = NULL; |
2747 | 2770 | ret->type = word_WhiteSpace; |
2748 | 2771 | ret->text = NULL; |
2749 | ret->breaks = TRUE; | |
2772 | ret->breaks = true; | |
2750 | 2773 | ret->aux = 0; |
2751 | 2774 | return ret; |
2752 | 2775 | } |
2758 | 2781 | ret->alt = NULL; |
2759 | 2782 | ret->type = word_PageXref; |
2760 | 2783 | ret->text = NULL; |
2761 | ret->breaks = FALSE; | |
2784 | ret->breaks = false; | |
2762 | 2785 | ret->aux = 0; |
2763 | 2786 | ret->private_data = page; |
2764 | 2787 | return ret; |
2771 | 2794 | ret->alt = NULL; |
2772 | 2795 | ret->type = word_XrefEnd; |
2773 | 2796 | ret->text = NULL; |
2774 | ret->breaks = FALSE; | |
2797 | ret->breaks = false; | |
2775 | 2798 | ret->aux = 0; |
2776 | 2799 | return ret; |
2777 | 2800 | } |
2779 | 2802 | static word *prepare_contents_title(word *first, wchar_t *separator, |
2780 | 2803 | word *second) |
2781 | 2804 | { |
2782 | word *ret; | |
2805 | word *ret = NULL; | |
2783 | 2806 | word **wptr, *w; |
2784 | 2807 | |
2785 | 2808 | wptr = &ret; |
34 | 34 | static void objref(object *o, object *dest); |
35 | 35 | static void objdest(object *o, page_data *p); |
36 | 36 | |
37 | static int is_std_font(char const *name); | |
37 | static bool is_std_font(char const *name); | |
38 | 38 | |
39 | 39 | static void make_pages_node(object *node, object *parent, page_data *first, |
40 | 40 | page_data *last, object *resources, |
41 | 41 | object *mediabox); |
42 | 42 | static int make_outline(object *parent, outline_element *start, int n, |
43 | int open); | |
43 | bool open); | |
44 | 44 | static int pdf_versionid(FILE *fp, word *words); |
45 | 45 | |
46 | 46 | void pdf_backend(paragraph *sourceform, keywordlist *keywords, |
47 | indexdata *idx, void *vdoc) { | |
47 | indexdata *idx, void *vdoc, errorstate *es) { | |
48 | 48 | document *doc = (document *)vdoc; |
49 | 49 | int font_index; |
50 | 50 | font_encoding *fe; |
130 | 130 | char fname[40]; |
131 | 131 | char buf[80]; |
132 | 132 | int i, prev; |
133 | object *font, *fontdesc; | |
133 | object *font, *fontdesc = NULL; | |
134 | 134 | int flags; |
135 | 135 | font_info const *fi = fe->font->info; |
136 | 136 | |
300 | 300 | objtext(cidfont, fe->font->info->name); |
301 | 301 | objtext(cidfont, "\n/CIDSystemInfo<</Registry(Adobe)" |
302 | 302 | "/Ordering(Identity)/Supplement 0>>\n"); |
303 | objtext(cidfont, "/FontDescriptor "); | |
304 | objref(cidfont, fontdesc); | |
303 | assert(fontdesc); /* TrueType fonts are never standard */ | |
304 | objtext(cidfont, "/FontDescriptor "); | |
305 | objref(cidfont, fontdesc); | |
305 | 306 | objtext(cidfont, "\n/W[0["); |
306 | 307 | for (i = 0; i < (int)sfnt_nglyphs(fe->font->info->fontfile); i++) { |
307 | 308 | char buf[20]; |
327 | 328 | objtext(font, buf); |
328 | 329 | } |
329 | 330 | objtext(font, i % 8 ? "/" : "\n/"); |
330 | objtext(font, glyph_extern(fe->vector[i])); | |
331 | objtext(font, glyph_extern(doc->psd, fe->vector[i])); | |
331 | 332 | prev = i; |
332 | 333 | } |
333 | 334 | |
368 | 369 | size_t len; |
369 | 370 | char *ffbuf; |
370 | 371 | |
371 | pf_part1((font_info *)fi, &ffbuf, &len); | |
372 | pf_part1((font_info *)fi, &ffbuf, &len, es); | |
372 | 373 | objstream_len(fontfile, ffbuf, len); |
373 | 374 | sfree(ffbuf); |
374 | 375 | sprintf(buf, "<<\n/Length1 %lu\n", (unsigned long)len); |
375 | 376 | objtext(fontfile, buf); |
376 | pf_part2((font_info *)fi, &ffbuf, &len); | |
377 | pf_part2((font_info *)fi, &ffbuf, &len, es); | |
377 | 378 | objstream_len(fontfile, ffbuf, len); |
378 | 379 | sfree(ffbuf); |
379 | 380 | sprintf(buf, "/Length2 %lu\n", (unsigned long)len); |
605 | 606 | |
606 | 607 | objtext(outlines, "<<\n/Type /Outlines\n"); |
607 | 608 | topcount = make_outline(outlines, doc->outline_elements, |
608 | doc->n_outline_elements, TRUE); | |
609 | doc->n_outline_elements, true); | |
609 | 610 | sprintf(buf, "/Count %d\n>>\n", topcount); |
610 | 611 | objtext(outlines, buf); |
611 | 612 | } |
671 | 672 | else |
672 | 673 | fp = fopen(filename, "wb"); |
673 | 674 | if (!fp) { |
674 | err_cantopenw(filename); | |
675 | err_cantopenw(es, filename); | |
675 | 676 | return; |
676 | 677 | } |
677 | 678 | |
783 | 784 | "Symbol", "ZapfDingbats" |
784 | 785 | }; |
785 | 786 | |
786 | static int is_std_font(char const *name) { | |
787 | static bool is_std_font(char const *name) { | |
787 | 788 | unsigned i; |
788 | 789 | for (i = 0; i < lenof(stdfonts); i++) |
789 | 790 | if (strcmp(name, stdfonts[i]) == 0) |
790 | return TRUE; | |
791 | return FALSE; | |
791 | return true; | |
792 | return false; | |
792 | 793 | } |
793 | 794 | |
794 | 795 | static void make_pages_node(object *node, object *parent, page_data *first, |
829 | 830 | for (i = 0; i < TREE_BRANCH; i++) { |
830 | 831 | int number = (i+1) * count / TREE_BRANCH - i * count / TREE_BRANCH; |
831 | 832 | thisfirst = page; |
833 | thislast = NULL; | |
832 | 834 | while (number--) { |
833 | 835 | thislast = page; |
834 | 836 | page = page->next; |
835 | 837 | } |
838 | assert(thislast); | |
836 | 839 | |
837 | 840 | if (thisfirst == thislast) { |
838 | 841 | objref(node, (object *)thisfirst->spare); |
912 | 915 | } |
913 | 916 | |
914 | 917 | static int make_outline(object *parent, outline_element *items, int n, |
915 | int open) | |
918 | bool open) | |
916 | 919 | { |
917 | 920 | int level, totalcount = 0; |
918 | 921 | outline_element *itemp; |
964 | 967 | |
965 | 968 | if (itemp > items) { |
966 | 969 | char buf[80]; |
967 | int count = make_outline(curr, items, itemp - items, FALSE); | |
970 | int count = make_outline(curr, items, itemp - items, false); | |
968 | 971 | if (!open) |
969 | 972 | count = -count; |
970 | 973 | else |
1020 | 1023 | case word_Quote: |
1021 | 1024 | text = dupstr("'"); |
1022 | 1025 | break; |
1026 | default: | |
1027 | continue; | |
1023 | 1028 | } |
1024 | 1029 | |
1025 | 1030 | fputs(text, fp); |
21 | 21 | } |
22 | 22 | |
23 | 23 | void ps_backend(paragraph *sourceform, keywordlist *keywords, |
24 | indexdata *idx, void *vdoc) { | |
24 | indexdata *idx, void *vdoc, errorstate *es) { | |
25 | 25 | document *doc = (document *)vdoc; |
26 | 26 | int font_index; |
27 | 27 | font_encoding *fe; |
52 | 52 | else |
53 | 53 | fp = fopen(filename, "w"); |
54 | 54 | if (!fp) { |
55 | err_cantopenw(filename); | |
55 | err_cantopenw(es, filename); | |
56 | 56 | return; |
57 | 57 | } |
58 | 58 | |
204 | 204 | if (fe->font->info->filetype == TYPE1) |
205 | 205 | pf_writeps(fe->font->info, fp); |
206 | 206 | else |
207 | sfnt_writeps(fe->font->info, fp); | |
207 | sfnt_writeps(fe->font->info, fp, doc->psd, es); | |
208 | 208 | fprintf(fp, "%%%%EndResource\n"); |
209 | 209 | } else { |
210 | 210 | fprintf(fp, "%%%%IncludeResource: font %s\n", |
228 | 228 | ps_token(fp, &cc, "{1 index /FID ne {def} {pop pop} ifelse} forall\n"); |
229 | 229 | ps_token(fp, &cc, "/Encoding [\n"); |
230 | 230 | for (i = 0; i < 256; i++) |
231 | ps_token(fp, &cc, "/%s", glyph_extern(fe->vector[i])); | |
231 | ps_token(fp, &cc, "/%s", glyph_extern(doc->psd, fe->vector[i])); | |
232 | 232 | ps_token(fp, &cc, "] def\n"); |
233 | 233 | ps_token(fp, &cc, "currentdict end\n"); |
234 | 234 | ps_token(fp, &cc, "/fontname-%s exch definefont /%s exch def\n", |
343 | 343 | case word_Quote: |
344 | 344 | text = dupstr("'"); |
345 | 345 | break; |
346 | default: | |
347 | continue; | |
346 | 348 | } |
347 | 349 | |
348 | 350 | if (cc + strlen(text) > PS_MAXWIDTH) |
9 | 9 | typedef enum { LEFT, LEFTPLUS, CENTRE } alignment; |
10 | 10 | typedef struct { |
11 | 11 | alignment align; |
12 | int number_at_all, just_numbers; | |
12 | bool number_at_all, just_numbers; | |
13 | 13 | wchar_t *underline; |
14 | 14 | wchar_t *number_suffix; |
15 | 15 | } alignstruct; |
20 | 20 | int width; |
21 | 21 | alignstruct atitle, achapter, *asect; |
22 | 22 | int nasect; |
23 | int include_version_id; | |
24 | int indent_preambles; | |
23 | bool include_version_id; | |
24 | bool indent_preambles; | |
25 | 25 | int charset; |
26 | 26 | word bullet; |
27 | 27 | wchar_t *lquote, *rquote, *rule; |
33 | 33 | FILE *fp; |
34 | 34 | int charset; |
35 | 35 | charset_state state; |
36 | errorstate *es; | |
36 | 37 | } textfile; |
37 | 38 | |
38 | 39 | static void text_heading(textfile *, word *, word *, word *, alignstruct, |
54 | 55 | return LEFT; |
55 | 56 | } |
56 | 57 | |
57 | static textconfig text_configure(paragraph *source) { | |
58 | static textconfig text_configure(paragraph *source, errorstate *es) { | |
58 | 59 | textconfig ret; |
59 | 60 | paragraph *p; |
60 | 61 | int n; |
65 | 66 | ret.bullet.next = NULL; |
66 | 67 | ret.bullet.alt = NULL; |
67 | 68 | ret.bullet.type = word_Normal; |
68 | ret.atitle.just_numbers = FALSE; /* ignored */ | |
69 | ret.atitle.number_at_all = TRUE; /* ignored */ | |
69 | ret.atitle.just_numbers = false; /* ignored */ | |
70 | ret.atitle.number_at_all = true; /* ignored */ | |
70 | 71 | |
71 | 72 | /* |
72 | 73 | * Defaults. |
79 | 80 | ret.atitle.align = CENTRE; |
80 | 81 | ret.atitle.underline = L"\x2550\0=\0\0"; |
81 | 82 | ret.achapter.align = LEFT; |
82 | ret.achapter.just_numbers = FALSE; | |
83 | ret.achapter.number_at_all = TRUE; | |
83 | ret.achapter.just_numbers = false; | |
84 | ret.achapter.number_at_all = true; | |
84 | 85 | ret.achapter.number_suffix = L": "; |
85 | 86 | ret.achapter.underline = L"\x203E\0-\0\0"; |
86 | 87 | ret.nasect = 1; |
87 | 88 | ret.asect = snewn(ret.nasect, alignstruct); |
88 | 89 | ret.asect[0].align = LEFTPLUS; |
89 | ret.asect[0].just_numbers = TRUE; | |
90 | ret.asect[0].number_at_all = TRUE; | |
90 | ret.asect[0].just_numbers = true; | |
91 | ret.asect[0].number_at_all = true; | |
91 | 92 | ret.asect[0].number_suffix = L" "; |
92 | 93 | ret.asect[0].underline = L"\0"; |
93 | ret.include_version_id = TRUE; | |
94 | ret.indent_preambles = FALSE; | |
94 | ret.include_version_id = true; | |
95 | ret.indent_preambles = false; | |
95 | 96 | ret.bullet.text = L"\x2022\0-\0\0"; |
96 | 97 | ret.rule = L"\x2500\0-\0\0"; |
97 | 98 | ret.filename = dupstr("output.txt"); |
130 | 131 | if (!ustricmp(p->keyword, L"text-indent")) { |
131 | 132 | ret.indent = utoi(uadv(p->keyword)); |
132 | 133 | } else if (!ustricmp(p->keyword, L"text-charset")) { |
133 | ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword)); | |
134 | ret.charset = charset_from_ustr( | |
135 | &p->fpos, uadv(p->keyword), es); | |
134 | 136 | } else if (!ustricmp(p->keyword, L"text-filename")) { |
135 | 137 | sfree(ret.filename); |
136 | 138 | ret.filename = dupstr(adv(p->origkeyword)); |
317 | 319 | } |
318 | 320 | |
319 | 321 | void text_backend(paragraph *sourceform, keywordlist *keywords, |
320 | indexdata *idx, void *unused) { | |
322 | indexdata *idx, void *unused, errorstate *es) { | |
321 | 323 | paragraph *p; |
322 | 324 | textconfig conf; |
323 | 325 | word *prefix, *body, *wp; |
331 | 333 | IGNORE(keywords); /* we don't happen to need this */ |
332 | 334 | IGNORE(idx); /* or this */ |
333 | 335 | |
334 | conf = text_configure(sourceform); | |
336 | conf = text_configure(sourceform, es); | |
335 | 337 | |
336 | 338 | /* |
337 | 339 | * Open the output file. |
341 | 343 | else |
342 | 344 | tf.fp = fopen(conf.filename, "w"); |
343 | 345 | if (!tf.fp) { |
344 | err_cantopenw(conf.filename); | |
346 | err_cantopenw(es, conf.filename); | |
345 | 347 | return; |
346 | 348 | } |
347 | 349 | tf.charset = conf.charset; |
350 | tf.es = es; | |
348 | 351 | tf.state = charset_init_state; |
349 | 352 | |
350 | 353 | /* Do the title */ |
781 | 784 | for (; text; text = text->next) if (text->type == word_WeakCode) { |
782 | 785 | int wid = ustrwid(text->text, tf->charset); |
783 | 786 | if (wid > width) |
784 | err_text_codeline(&text->fpos, wid, width); | |
787 | err_text_codeline(tf->es, &text->fpos, wid, width); | |
785 | 788 | text_output_many(tf, indent, L' '); |
786 | 789 | text_output(tf, text->text); |
787 | 790 | text_output(tf, L"\n"); |
49 | 49 | static void whlp_rdadds(rdstringc *rs, const wchar_t *text, whlpconf *conf, |
50 | 50 | charset_state *state); |
51 | 51 | static void whlp_mkparagraph(struct bk_whlp_state *state, |
52 | int font, word *text, int subsidiary, | |
52 | int font, word *text, bool subsidiary, | |
53 | 53 | whlpconf *conf); |
54 | 54 | static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p, |
55 | 55 | whlpconf *conf); |
149 | 149 | } |
150 | 150 | |
151 | 151 | void whlp_backend(paragraph *sourceform, keywordlist *keywords, |
152 | indexdata *idx, void *unused) { | |
152 | indexdata *idx, void *unused, errorstate *es) { | |
153 | 153 | WHLP h; |
154 | 154 | char *cntname; |
155 | 155 | paragraph *p, *lastsect; |
158 | 158 | int i; |
159 | 159 | int nesting; |
160 | 160 | indexentry *ie; |
161 | int done_contents_topic = FALSE; | |
161 | bool done_contents_topic = false; | |
162 | 162 | whlpconf conf; |
163 | 163 | |
164 | 164 | IGNORE(unused); |
206 | 206 | { |
207 | 207 | int len = strlen(conf.filename); |
208 | 208 | if (len < 4 || conf.filename[len-4] != '.' || |
209 | tolower(conf.filename[len-3] != 'h') || | |
210 | tolower(conf.filename[len-2] != 'l') || | |
211 | tolower(conf.filename[len-1] != 'p')) { | |
209 | tolower(conf.filename[len-3]) != 'h' || | |
210 | tolower(conf.filename[len-2]) != 'l' || | |
211 | tolower(conf.filename[len-1]) != 'p') { | |
212 | 212 | char *newf; |
213 | 213 | newf = snewn(len + 5, char); |
214 | 214 | sprintf(newf, "%s.hlp", conf.filename); |
222 | 222 | |
223 | 223 | state.cntfp = fopen(cntname, "wb"); |
224 | 224 | if (!state.cntfp) { |
225 | err_cantopenw(cntname); | |
225 | err_cantopenw(es, cntname); | |
226 | 226 | return; |
227 | 227 | } |
228 | 228 | state.cnt_last_level = -1; state.cnt_workaround = 0; |
249 | 249 | p->private_data = whlp_register_topic(h, rs.text, &errstr); |
250 | 250 | if (!p->private_data) { |
251 | 251 | p->private_data = whlp_register_topic(h, NULL, NULL); |
252 | err_winhelp_ctxclash(&p->fpos, rs.text, errstr); | |
252 | err_winhelp_ctxclash(es, &p->fpos, rs.text, errstr); | |
253 | 253 | } |
254 | 254 | sfree(rs.text); |
255 | 255 | } |
341 | 341 | if (p->type == para_Title) { |
342 | 342 | whlp_begin_para(h, WHLP_PARA_NONSCROLL); |
343 | 343 | state.cstate = charset_init_state; |
344 | whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE, &conf); | |
344 | whlp_mkparagraph(&state, FONT_TITLE, p->words, false, &conf); | |
345 | 345 | whlp_wtext(&state, NULL); |
346 | 346 | whlp_end_para(h); |
347 | 347 | whlp_rdaddwc(&rs, p->words, &conf, NULL); |
430 | 430 | whlp_navmenu(&state, p, &conf); |
431 | 431 | } |
432 | 432 | |
433 | done_contents_topic = TRUE; | |
433 | done_contents_topic = true; | |
434 | 434 | } |
435 | 435 | |
436 | 436 | if (lastsect && lastsect->child) { |
512 | 512 | whlp_begin_para(h, WHLP_PARA_NONSCROLL); |
513 | 513 | state.cstate = charset_init_state; |
514 | 514 | if (p->kwtext) { |
515 | whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, FALSE, &conf); | |
515 | whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, false, &conf); | |
516 | 516 | whlp_set_font(h, FONT_TITLE); |
517 | 517 | whlp_wtext(&state, conf.sectsuffix); |
518 | 518 | } |
519 | whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE, &conf); | |
519 | whlp_mkparagraph(&state, FONT_TITLE, p->words, false, &conf); | |
520 | 520 | whlp_wtext(&state, NULL); |
521 | 521 | whlp_end_para(h); |
522 | 522 | |
562 | 562 | if (p->type == para_Bullet) { |
563 | 563 | whlp_wtext(&state, conf.bullet); |
564 | 564 | } else { |
565 | whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE, &conf); | |
565 | whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, false, &conf); | |
566 | 566 | whlp_wtext(&state, conf.listsuffix); |
567 | 567 | } |
568 | 568 | whlp_wtext(&state, NULL); |
576 | 576 | state.cstate = charset_init_state; |
577 | 577 | |
578 | 578 | if (p->type == para_BiblioCited) { |
579 | whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE, &conf); | |
579 | whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, false, &conf); | |
580 | 580 | whlp_wtext(&state, L" "); |
581 | 581 | } |
582 | 582 | |
583 | whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE, &conf); | |
583 | whlp_mkparagraph(&state, FONT_NORMAL, p->words, false, &conf); | |
584 | 584 | whlp_wtext(&state, NULL); |
585 | 585 | whlp_end_para(h); |
586 | 586 | break; |
686 | 686 | whlp_start_hyperlink(state->h, (WHLP_TOPIC)p->private_data); |
687 | 687 | state->cstate = charset_init_state; |
688 | 688 | if (p->kwtext) { |
689 | whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, TRUE, conf); | |
689 | whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, true, conf); | |
690 | 690 | whlp_set_font(state->h, FONT_NORMAL); |
691 | 691 | whlp_wtext(state, conf->sectsuffix); |
692 | 692 | } |
693 | whlp_mkparagraph(state, FONT_NORMAL, p->words, TRUE, conf); | |
693 | whlp_mkparagraph(state, FONT_NORMAL, p->words, true, conf); | |
694 | 694 | whlp_wtext(state, NULL); |
695 | 695 | whlp_end_hyperlink(state->h); |
696 | 696 | whlp_end_para(state->h); |
698 | 698 | } |
699 | 699 | |
700 | 700 | static void whlp_mkparagraph(struct bk_whlp_state *state, |
701 | int font, word *text, int subsidiary, | |
701 | int font, word *text, bool subsidiary, | |
702 | 702 | whlpconf *conf) { |
703 | 703 | keyword *kwl; |
704 | 704 | int deffont = font; |
788 | 788 | if (cvt_ok(conf->charset, text->text) || !text->alt) |
789 | 789 | whlp_wtext(state, text->text); |
790 | 790 | else |
791 | whlp_mkparagraph(state, deffont, text->alt, FALSE, conf); | |
791 | whlp_mkparagraph(state, deffont, text->alt, false, conf); | |
792 | 792 | } else if (removeattr(text->type) == word_WhiteSpace) { |
793 | 793 | whlp_wtext(state, L" "); |
794 | 794 | } else if (removeattr(text->type) == word_Quote) { |
0 | /*.o | |
1 | /sbcsdat.c | |
2 | /sbcsdat.h | |
3 | /convcs | |
4 | /cstable | |
5 | /confuse | |
6 | /csshow | |
7 | /libcharset.a | |
8 | .deps | |
9 | .ninja_deps | |
10 | .ninja_log | |
11 | /Makefile | |
12 | /Makefile.in | |
13 | /aclocal.m4 | |
14 | /autom4te.cache/ | |
15 | /compile | |
16 | /configure | |
17 | /depcomp | |
18 | /install-sh | |
19 | /missing | |
20 | /stamp-h1 | |
21 | /config.log | |
22 | /config.status | |
23 | /CMakeCache.txt | |
24 | /CMakeFiles | |
25 | /cmake_install.cmake | |
26 | /build.ninja | |
27 | /rules.ninja |
0 | # CMake-based build system. | |
1 | ||
2 | # I don't want to have to edit the master list of source files in more | |
3 | # than one place, so let's get CMake to use its built-in file and | |
4 | # string handling abilities to read the list out of Makefile.am | |
5 | # alongside it. | |
6 | ||
7 | cmake_minimum_required(VERSION 3.0) | |
8 | project(libcharset LANGUAGES C) | |
9 | ||
10 | file(READ ${CMAKE_CURRENT_SOURCE_DIR}/Makefile.am MAKEFILE_AM) | |
11 | string(REPLACE "\\\n" " " MAKEFILE_AM ${MAKEFILE_AM}) | |
12 | string(REGEX MATCHALL "[^ \n]* = [^\n]*" MAKEFILE_AM_DEFS ${MAKEFILE_AM}) | |
13 | ||
14 | foreach(MAKEFILE_AM_DEF ${MAKEFILE_AM_DEFS}) | |
15 | if(${MAKEFILE_AM_DEF} MATCHES "^([a-z][^ ]*)_SOURCES = (.*)") | |
16 | set(TARGET ${CMAKE_MATCH_1}) | |
17 | string(REGEX MATCHALL "[^ ]*\\.c" SOURCES ${CMAKE_MATCH_2}) | |
18 | if ("libcharset_a" STREQUAL ${TARGET}) | |
19 | add_library(charset STATIC ${SOURCES} | |
20 | ${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h) | |
21 | target_include_directories(charset PRIVATE | |
22 | ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) | |
23 | elseif(NOT LIBCHARSET_LIBRARY_ONLY) | |
24 | add_executable(${TARGET} ${SOURCES} | |
25 | ${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h) | |
26 | target_include_directories(${TARGET} PRIVATE | |
27 | ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) | |
28 | target_link_libraries(${TARGET} charset) | |
29 | endif() | |
30 | endif() | |
31 | endforeach() | |
32 | ||
33 | add_custom_command(OUTPUT sbcsdat.c | |
34 | COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/sbcsgen.pl | |
35 | ${CMAKE_CURRENT_SOURCE_DIR}/sbcs.dat | |
36 | --source=${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.c | |
37 | DEPENDS sbcsgen.pl sbcs.dat) | |
38 | add_custom_command(OUTPUT sbcsdat.h | |
39 | COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/sbcsgen.pl | |
40 | ${CMAKE_CURRENT_SOURCE_DIR}/sbcs.dat | |
41 | --header=${CMAKE_CURRENT_BINARY_DIR}/sbcsdat.h | |
42 | DEPENDS sbcsgen.pl sbcs.dat) |
0 | # -*- make -*- | |
1 | # | |
2 | # Makefile for libcharset. | |
3 | ||
4 | # This Makefile should be sufficient to build libcharset and its | |
5 | # demo application all on its own. However, it's also a valid | |
6 | # Makefile _fragment_ which can be linked in to another program | |
7 | # Makefile to allow libcharset to be built directly into its | |
8 | # binary. | |
9 | ||
10 | # To include this as part of another Makefile, you need to: | |
11 | # | |
12 | # - Define $(LIBCHARSET_SRCDIR) to be a directory prefix (i.e. | |
13 | # probably ending in a slash) which allows access to the | |
14 | # libcharset source files. | |
15 | # | |
16 | # - Define $(LIBCHARSET_OBJDIR) to be a directory prefix (i.e. | |
17 | # probably ending in a slash) which allows access to the | |
18 | # directory where the libcharset object files need to be put. | |
19 | # | |
20 | # - Define $(LIBCHARSET_OBJPFX) to be a filename prefix to be | |
21 | # applied to the libcharset object files (in case, for example, | |
22 | # the file names clash with those of the main application, and | |
23 | # you need to call them cs-*.o to resolve the clash). | |
24 | # | |
25 | # - Define $(LIBCHARSET_GENPFX) to be a prefix to be added to | |
26 | # targets such as `all' and `clean'. (Mostly the point of this | |
27 | # is to get those targets out of the way for the Makefile | |
28 | # fragment including us.) | |
29 | # | |
30 | # - If you need your compiler to use the -MD flag, define $(MD) to | |
31 | # be `-MD'. | |
32 | # | |
33 | # This Makefile fragment will then define rules for building each | |
34 | # object file, and will in turn define $(LIBCHARSET_OBJS) to be | |
35 | # what you need to add to your link line. | |
36 | ||
37 | $(LIBCHARSET_GENPFX)all: \ | |
38 | $(LIBCHARSET_OBJDIR)libcharset.a \ | |
39 | $(LIBCHARSET_OBJDIR)convcs \ | |
40 | $(LIBCHARSET_OBJDIR)cstable \ | |
41 | $(LIBCHARSET_OBJDIR)confuse \ | |
42 | $(LIBCHARSET_OBJDIR)csshow | |
43 | ||
44 | $(LIBCHARSET_OBJDIR)convcs: $(LIBCHARSET_SRCDIR)convcs.c \ | |
45 | $(LIBCHARSET_OBJDIR)libcharset.a | |
46 | $(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)convcs \ | |
47 | $(LIBCHARSET_SRCDIR)convcs.c \ | |
48 | $(LIBCHARSET_OBJDIR)libcharset.a | |
49 | ||
50 | $(LIBCHARSET_OBJDIR)cstable: $(LIBCHARSET_SRCDIR)cstable.c \ | |
51 | $(LIBCHARSET_OBJDIR)libcharset.a \ | |
52 | $(LIBCHARSET_OBJDIR)sbcsdat.c | |
53 | $(CC) $(CFLAGS) -I $(LIBCHARSET_OBJDIR). \ | |
54 | -o $(LIBCHARSET_OBJDIR)cstable \ | |
55 | $(LIBCHARSET_SRCDIR)cstable.c \ | |
56 | $(LIBCHARSET_OBJDIR)libcharset.a | |
57 | ||
58 | $(LIBCHARSET_OBJDIR)confuse: $(LIBCHARSET_SRCDIR)confuse.c \ | |
59 | $(LIBCHARSET_OBJDIR)libcharset.a | |
60 | $(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)confuse \ | |
61 | $(LIBCHARSET_SRCDIR)confuse.c \ | |
62 | $(LIBCHARSET_OBJDIR)libcharset.a | |
63 | ||
64 | $(LIBCHARSET_OBJDIR)csshow: $(LIBCHARSET_SRCDIR)csshow.c \ | |
65 | $(LIBCHARSET_OBJDIR)libcharset.a | |
66 | $(CC) $(CFLAGS) -o $(LIBCHARSET_OBJDIR)csshow \ | |
67 | $(LIBCHARSET_SRCDIR)csshow.c \ | |
68 | $(LIBCHARSET_OBJDIR)libcharset.a | |
69 | ||
70 | LIBCHARSET_OBJS = \ | |
71 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5enc.o \ | |
72 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5set.o \ | |
73 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cns11643.o \ | |
74 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cp949.o \ | |
75 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)emacsenc.o \ | |
76 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)euc.o \ | |
77 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)fromucs.o \ | |
78 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)gb2312.o \ | |
79 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)htmlcs.o \ | |
80 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)hz.o \ | |
81 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022.o \ | |
82 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022s.o \ | |
83 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)istate.o \ | |
84 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0208.o \ | |
85 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0212.o \ | |
86 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)ksx1001.o \ | |
87 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)locale.o \ | |
88 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)localenc.o \ | |
89 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)macenc.o \ | |
90 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)mimeenc.o \ | |
91 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcs.o \ | |
92 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcsdat.o \ | |
93 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)shiftjis.o \ | |
94 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)slookup.o \ | |
95 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)superset.o \ | |
96 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)toucs.o \ | |
97 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf16.o \ | |
98 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf7.o \ | |
99 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf8.o \ | |
100 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)xenc.o \ | |
101 | # end of list | |
102 | ||
103 | $(LIBCHARSET_OBJDIR)libcharset.a: $(LIBCHARSET_OBJS) | |
104 | ar rcs $@ $(LIBCHARSET_OBJS) | |
105 | ||
106 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5enc.o: \ | |
107 | $(LIBCHARSET_SRCDIR)big5enc.c | |
108 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
109 | ||
110 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)big5set.o: \ | |
111 | $(LIBCHARSET_SRCDIR)big5set.c | |
112 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
113 | ||
114 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cns11643.o: \ | |
115 | $(LIBCHARSET_SRCDIR)cns11643.c | |
116 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
117 | ||
118 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)cp949.o: \ | |
119 | $(LIBCHARSET_SRCDIR)cp949.c | |
120 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
121 | ||
122 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)emacsenc.o: \ | |
123 | $(LIBCHARSET_SRCDIR)emacsenc.c | |
124 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
125 | ||
126 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)euc.o: \ | |
127 | $(LIBCHARSET_SRCDIR)euc.c | |
128 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
129 | ||
130 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)fromucs.o: \ | |
131 | $(LIBCHARSET_SRCDIR)fromucs.c | |
132 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
133 | ||
134 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)gb2312.o: \ | |
135 | $(LIBCHARSET_SRCDIR)gb2312.c | |
136 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
137 | ||
138 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)htmlcs.o: \ | |
139 | $(LIBCHARSET_SRCDIR)htmlcs.c | |
140 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
141 | ||
142 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)hz.o: \ | |
143 | $(LIBCHARSET_SRCDIR)hz.c | |
144 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
145 | ||
146 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022.o: \ | |
147 | $(LIBCHARSET_SRCDIR)iso2022.c \ | |
148 | $(LIBCHARSET_OBJDIR)sbcsdat.h | |
149 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $< | |
150 | ||
151 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)iso2022s.o: \ | |
152 | $(LIBCHARSET_SRCDIR)iso2022s.c \ | |
153 | $(LIBCHARSET_OBJDIR)sbcsdat.h | |
154 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $< | |
155 | ||
156 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)istate.o: \ | |
157 | $(LIBCHARSET_SRCDIR)istate.c | |
158 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
159 | ||
160 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0208.o: \ | |
161 | $(LIBCHARSET_SRCDIR)jisx0208.c | |
162 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
163 | ||
164 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)jisx0212.o: \ | |
165 | $(LIBCHARSET_SRCDIR)jisx0212.c | |
166 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
167 | ||
168 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)ksx1001.o: \ | |
169 | $(LIBCHARSET_SRCDIR)ksx1001.c | |
170 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
171 | ||
172 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)locale.o: \ | |
173 | $(LIBCHARSET_SRCDIR)locale.c | |
174 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
175 | ||
176 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)localenc.o: \ | |
177 | $(LIBCHARSET_SRCDIR)localenc.c | |
178 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
179 | ||
180 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)macenc.o: \ | |
181 | $(LIBCHARSET_SRCDIR)macenc.c | |
182 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
183 | ||
184 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)mimeenc.o: \ | |
185 | $(LIBCHARSET_SRCDIR)mimeenc.c | |
186 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
187 | ||
188 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcs.o: \ | |
189 | $(LIBCHARSET_SRCDIR)sbcs.c | |
190 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
191 | ||
192 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)shiftjis.o: \ | |
193 | $(LIBCHARSET_SRCDIR)shiftjis.c | |
194 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
195 | ||
196 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)slookup.o: \ | |
197 | $(LIBCHARSET_SRCDIR)slookup.c \ | |
198 | $(LIBCHARSET_OBJDIR)sbcsdat.c | |
199 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -I $(LIBCHARSET_OBJDIR). -c -o $@ $< | |
200 | ||
201 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)superset.o: \ | |
202 | $(LIBCHARSET_SRCDIR)superset.c | |
203 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
204 | ||
205 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)toucs.o: \ | |
206 | $(LIBCHARSET_SRCDIR)toucs.c | |
207 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
208 | ||
209 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf16.o: \ | |
210 | $(LIBCHARSET_SRCDIR)utf16.c | |
211 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
212 | ||
213 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf7.o: \ | |
214 | $(LIBCHARSET_SRCDIR)utf7.c | |
215 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
216 | ||
217 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)utf8.o: \ | |
218 | $(LIBCHARSET_SRCDIR)utf8.c | |
219 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
220 | ||
221 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)xenc.o: \ | |
222 | $(LIBCHARSET_SRCDIR)xenc.c | |
223 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
224 | ||
225 | # This object file is special, because its source file is itself | |
226 | # generated - and therefore goes in the object directory. | |
227 | ||
228 | $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)sbcsdat.o: \ | |
229 | $(LIBCHARSET_OBJDIR)sbcsdat.c | |
230 | $(CC) $(CFLAGS) $(MD) -I $(LIBCHARSET_SRCDIR). -c -o $@ $< | |
231 | ||
232 | $(LIBCHARSET_OBJDIR)sbcsdat.c: \ | |
233 | $(LIBCHARSET_SRCDIR)sbcs.dat \ | |
234 | $(LIBCHARSET_SRCDIR)sbcsgen.pl | |
235 | perl $(LIBCHARSET_SRCDIR)sbcsgen.pl \ | |
236 | $(LIBCHARSET_SRCDIR)sbcs.dat \ | |
237 | --source=$(LIBCHARSET_OBJDIR)sbcsdat.c | |
238 | ||
239 | $(LIBCHARSET_OBJDIR)sbcsdat.h: \ | |
240 | $(LIBCHARSET_SRCDIR)sbcs.dat \ | |
241 | $(LIBCHARSET_SRCDIR)sbcsgen.pl | |
242 | perl $(LIBCHARSET_SRCDIR)sbcsgen.pl \ | |
243 | $(LIBCHARSET_SRCDIR)sbcs.dat \ | |
244 | --header=$(LIBCHARSET_OBJDIR)sbcsdat.h | |
245 | ||
246 | $(LIBCHARSET_GENPFX)clean: | |
247 | rm -f $(LIBCHARSET_OBJDIR)$(LIBCHARSET_OBJPFX)*.o \ | |
248 | $(LIBCHARSET_OBJDIR)libcharset.a \ | |
249 | $(LIBCHARSET_OBJDIR)sbcsdat.c \ | |
250 | $(LIBCHARSET_OBJDIR)sbcsdat.h \ | |
251 | $(LIBCHARSET_OBJDIR)convcs |
0 | bin_PROGRAMS = convcs cstable csshow confuse | |
1 | lib_LIBRARIES = libcharset.a | |
2 | ||
3 | convcs_SOURCES = convcs.c | |
4 | convcs_LDADD = libcharset.a | |
5 | ||
6 | cstable_SOURCES = cstable.c | |
7 | cstable_LDADD = libcharset.a | |
8 | ||
9 | csshow_SOURCES = csshow.c | |
10 | csshow_LDADD = libcharset.a | |
11 | ||
12 | confuse_SOURCES = confuse.c | |
13 | confuse_LDADD = libcharset.a | |
14 | ||
15 | libcharset_a_SOURCES = charset.h internal.h sbcsdat.h big5enc.c \ | |
16 | big5set.c cns11643.c cp949.c emacsenc.c enum.h euc.c fromucs.c \ | |
17 | gb2312.c htmlcs.c hz.c iso2022.c iso2022s.c iso6937.c istate.c \ | |
18 | jisx0208.c jisx0212.c ksx1001.c locale.c localenc.c macenc.c \ | |
19 | mimeenc.c sbcs.c sbcsdat.c shiftjis.c slookup.c superset.c toucs.c \ | |
20 | utf16.c utf7.c utf8.c xenc.c | |
21 | ||
22 | BUILT_SOURCES = sbcsdat.c sbcsdat.h | |
23 | CLEANFILES = sbcsdat.c sbcsdat.h | |
24 | sbcsdat.c: sbcsgen.pl sbcs.dat | |
25 | perl $^ --source=sbcsdat.c | |
26 | sbcsdat.h: sbcsgen.pl sbcs.dat | |
27 | perl $^ --header=sbcsdat.h | |
28 | ||
29 | EXTRA_DIST = sbcsgen.pl sbcs.dat |
56 | 56 | * charset_state. |
57 | 57 | */ |
58 | 58 | |
59 | static int write_big5(charset_spec const *charset, long int input_chr, | |
60 | charset_state *state, | |
61 | void (*emit)(void *ctx, long int output), void *emitctx) | |
59 | static bool write_big5(charset_spec const *charset, long int input_chr, | |
60 | charset_state *state, | |
61 | void (*emit)(void *ctx, long int output), void *emitctx) | |
62 | 62 | { |
63 | 63 | UNUSEDARG(charset); |
64 | 64 | UNUSEDARG(state); |
65 | 65 | |
66 | 66 | if (input_chr == -1) |
67 | return TRUE; /* stateless; no cleanup required */ | |
67 | return true; /* stateless; no cleanup required */ | |
68 | 68 | |
69 | 69 | if (input_chr < 0x80) { |
70 | 70 | emit(emitctx, input_chr); |
71 | return TRUE; | |
71 | return true; | |
72 | 72 | } else { |
73 | 73 | int r, c; |
74 | 74 | if (unicode_to_big5(input_chr, &r, &c)) { |
75 | 75 | emit(emitctx, r + 0xA1); |
76 | 76 | emit(emitctx, c + 0x40); |
77 | return TRUE; | |
77 | return true; | |
78 | 78 | } else { |
79 | return FALSE; | |
79 | return false; | |
80 | 80 | } |
81 | 81 | } |
82 | 82 | } |
3964 | 3964 | return big5_forward[r][c]; |
3965 | 3965 | } |
3966 | 3966 | |
3967 | /* This one returns 1 on success, 0 if the code point doesn't exist. */ | |
3968 | int unicode_to_big5(long int unicode, int *r, int *c) | |
3967 | /* This one returns true on success, false if the code point doesn't exist. */ | |
3968 | bool unicode_to_big5(long int unicode, int *r, int *c) | |
3969 | 3969 | { |
3970 | 3970 | int rr, cc; |
3971 | 3971 | long int uu; |
3985 | 3985 | else { |
3986 | 3986 | *r = rr; |
3987 | 3987 | *c = cc; |
3988 | return 1; | |
3988 | return true; | |
3989 | 3989 | } |
3990 | 3990 | } |
3991 | return 0; | |
3991 | return false; | |
3992 | 3992 | } |
3993 | 3993 | |
3994 | 3994 | #ifdef TESTMODE |
6 | 6 | #define charset_charset_h |
7 | 7 | |
8 | 8 | #include <stddef.h> |
9 | #include <stdbool.h> | |
9 | 10 | |
10 | 11 | /* |
11 | 12 | * Enumeration that lists all the multibyte or single-byte |
78 | 79 | CS_UTF16, |
79 | 80 | CS_UTF16BE, |
80 | 81 | CS_UTF16LE, |
82 | CS_UTF16BE_NO_BOM, | |
83 | CS_UTF16LE_NO_BOM, | |
81 | 84 | CS_EUC_JP, |
82 | 85 | CS_EUC_CN, |
83 | 86 | CS_EUC_KR, |
94 | 97 | CS_BS4730, |
95 | 98 | CS_DEC_GRAPHICS, |
96 | 99 | CS_EUC_TW, |
100 | CS_ISO6937, | |
101 | CS_ISO6937_EURO, | |
102 | CS_ITS, | |
103 | CS_SAIL, | |
97 | 104 | CS_LIMIT /* dummy value indicating extent of enum */ |
98 | 105 | } charset_t; |
99 | 106 | |
107 | 114 | * charset_state mystate = CHARSET_INIT_STATE; |
108 | 115 | */ |
109 | 116 | #define CHARSET_INIT_STATE { 0L, 0L } /* a suitable initialiser */ |
117 | ||
118 | #if defined __cplusplus | |
119 | extern "C" { | |
120 | #if 0 | |
121 | } | |
122 | #endif | |
123 | #endif | |
110 | 124 | |
111 | 125 | /* |
112 | 126 | * This external variable contains the same data, but is provided |
167 | 181 | * If `error' is non-NULL and a character is found which cannot be |
168 | 182 | * expressed in the output charset, conversion will terminate at |
169 | 183 | * that character (so `input' points to the offending character) |
170 | * and `*error' will be set to TRUE; if `error' is non-NULL and no | |
184 | * and `*error' will be set to true; if `error' is non-NULL and no | |
171 | 185 | * difficult characters are encountered, `*error' will be set to |
172 | * FALSE. If `error' is NULL, difficult characters will simply be | |
186 | * false. If `error' is NULL, difficult characters will simply be | |
173 | 187 | * ignored. |
174 | 188 | * |
175 | 189 | * If `input' is NULL, this routine will output the necessary bytes |
186 | 200 | |
187 | 201 | int charset_from_unicode(const wchar_t **input, int *inlen, |
188 | 202 | char *output, int outlen, |
189 | int charset, charset_state *state, int *error); | |
203 | int charset, charset_state *state, bool *error); | |
190 | 204 | |
191 | 205 | /* |
192 | 206 | * Convert X11 encoding names to and from our charset identifiers. |
233 | 247 | int charset_upgrade(int charset); |
234 | 248 | |
235 | 249 | /* |
236 | * This function returns TRUE if the input charset is a vaguely | |
237 | * sensible superset of ASCII. That is, it returns FALSE for 7-bit | |
250 | * This function returns true if the input charset is a vaguely | |
251 | * sensible superset of ASCII. That is, it returns false for 7-bit | |
238 | 252 | * encoding formats such as HZ and UTF-7. |
239 | 253 | */ |
240 | int charset_contains_ascii(int charset); | |
241 | ||
242 | /* | |
243 | * This function returns TRUE if the input charset is single-byte. | |
244 | */ | |
245 | int charset_is_single_byte(int charset); | |
254 | bool charset_contains_ascii(int charset); | |
255 | ||
256 | /* | |
257 | * This function returns true if the input charset is single-byte. | |
258 | */ | |
259 | bool charset_is_single_byte(int charset); | |
246 | 260 | |
247 | 261 | /* |
248 | 262 | * This function tries to deduce the CS_* identifier of the charset |
284 | 298 | * if (charset_exists(cs)) |
285 | 299 | * do_stuff_with(cs); |
286 | 300 | */ |
287 | int charset_exists(int charset); | |
301 | bool charset_exists(int charset); | |
302 | ||
303 | #if defined __cplusplus | |
304 | #if 0 | |
305 | { | |
306 | #endif | |
307 | } | |
308 | #endif | |
288 | 309 | |
289 | 310 | #endif /* charset_charset_h */ |
13019 | 13019 | return cns11643_forward((p*94+r)*94+c); |
13020 | 13020 | } |
13021 | 13021 | |
13022 | /* This one returns 1 on success, 0 if the code point doesn't exist. */ | |
13023 | int unicode_to_cns11643(long int unicode, int *p, int *r, int *c) | |
13022 | /* This one returns true on success, false if the code point doesn't exist. */ | |
13023 | bool unicode_to_cns11643(long int unicode, int *p, int *r, int *c) | |
13024 | 13024 | { |
13025 | 13025 | int index, pp, rr, cc; |
13026 | 13026 | long int uu; |
13044 | 13044 | *p = pp; |
13045 | 13045 | *r = rr; |
13046 | 13046 | *c = cc; |
13047 | return 1; | |
13047 | return true; | |
13048 | 13048 | } |
13049 | 13049 | } |
13050 | return 0; | |
13050 | return false; | |
13051 | 13051 | } |
13052 | 13052 | |
13053 | 13053 | #ifdef TESTMODE |
0 | # autoconf input for libcharset. | |
1 | ||
2 | AC_INIT([libcharset], [NOVERSION], [anakin@pobox.com]) | |
3 | AC_CONFIG_SRCDIR([charset.h]) | |
4 | AC_CONFIG_AUX_DIR([.]) | |
5 | ||
6 | AM_INIT_AUTOMAKE(foreign) | |
7 | ||
8 | AC_PROG_CC | |
9 | AC_PROG_RANLIB | |
10 | AC_PROG_INSTALL | |
11 | ||
12 | AC_LANG([C]) | |
13 | ||
14 | AC_CONFIG_FILES([Makefile]) | |
15 | AC_OUTPUT |
38 | 38 | char *p = *++argv; |
39 | 39 | char *orig = p; |
40 | 40 | char *end; |
41 | int base = 16, semi_ok = 0; | |
41 | int base = 16; | |
42 | bool semi_ok = false; | |
42 | 43 | wchar_t ch; |
43 | 44 | |
44 | 45 | if ((p[0] == 'U' || p[0] == 'u') && |
52 | 53 | p++; |
53 | 54 | else |
54 | 55 | base = 10; |
55 | semi_ok = 1; | |
56 | } else if (mbtowc(&ch, p, strlen(p)) == strlen(p)) { | |
56 | semi_ok = true; | |
57 | } else if ((size_t)mbtowc(&ch, p, strlen(p)) == strlen(p)) { | |
57 | 58 | chars[nchars++] = ch; |
58 | 59 | continue; |
59 | 60 | } |
73 | 74 | for (i = 0; i < nchars; i++) { |
74 | 75 | wchar_t inbuf[1]; |
75 | 76 | const wchar_t *inptr; |
76 | int inlen, error, ret; | |
77 | int inlen, ret; | |
78 | bool error; | |
77 | 79 | |
78 | 80 | if (!charset_exists(cs)) { |
79 | 81 | encodings[i*CS_LIMIT+cs].len = 0; |
83 | 85 | inbuf[0] = chars[i]; |
84 | 86 | inptr = inbuf; |
85 | 87 | inlen = 1; |
86 | error = 0; | |
88 | error = false; | |
87 | 89 | ret = charset_from_unicode(&inptr, &inlen, |
88 | 90 | encodings[i*CS_LIMIT+cs].string, |
89 | 91 | MAXENCLEN, cs, NULL, &error); |
62 | 62 | fputs(helptext, fp); |
63 | 63 | } |
64 | 64 | |
65 | int match_long_opt(const char *argument, const char *optname, const char **val) | |
66 | { | |
67 | int optlen = strlen(optname); | |
65 | bool match_long_opt(const char *argument, const char *optname, | |
66 | const char **val) | |
67 | { | |
68 | size_t optlen = strlen(optname); | |
68 | 69 | if (strcspn(argument, "=") != optlen) |
69 | return 0; /* not the right length to match */ | |
70 | return false; /* not the right length to match */ | |
70 | 71 | if (memcmp(argument, optname, optlen) != 0) |
71 | return 0; /* doesn't match the leading text */ | |
72 | return false; /* doesn't match the leading text */ | |
72 | 73 | if (argument[optlen]) |
73 | 74 | *val = argument + optlen + 1; |
74 | 75 | else |
75 | 76 | *val = NULL; |
76 | return 1; | |
77 | return true; | |
77 | 78 | } |
78 | 79 | |
79 | 80 | static int srcset = CS_NONE; |
80 | 81 | static int dstset = CS_NONE; |
81 | static int html_mode = 0; | |
82 | static bool html_mode = false; | |
82 | 83 | static const wchar_t *replacement_cooked = NULL; |
83 | 84 | static int replacement_cooked_len = 0; |
84 | 85 | |
85 | 86 | int main(int argc, char **argv) |
86 | 87 | { |
87 | int doing_opts = 1; | |
88 | bool doing_opts = true; | |
88 | 89 | int localeset; |
89 | charset_state instate = CHARSET_INIT_STATE; | |
90 | charset_state outstate = CHARSET_INIT_STATE; | |
91 | char inbuf[256], outbuf[256]; | |
92 | wchar_t midbuf[256]; | |
93 | 90 | const char *replacement_raw = NULL; |
94 | 91 | const char *inptr; |
95 | const wchar_t *midptr; | |
96 | int rdlen, inlen, midlen, inret, midret; | |
92 | int inlen; | |
97 | 93 | const char *infilename = NULL; |
98 | 94 | |
99 | 95 | setlocale(LC_CTYPE, ""); |
104 | 100 | const char *v; |
105 | 101 | if (*p == '-' && p[1] && doing_opts) { |
106 | 102 | if (!strcmp(p, "--")) { |
107 | doing_opts = 0; | |
103 | doing_opts = false; | |
108 | 104 | } else if (match_long_opt(p, "--help", &v)) { |
109 | 105 | help(stdout); |
110 | 106 | return 0; |
120 | 116 | } |
121 | 117 | replacement_raw = v; |
122 | 118 | } else if (match_long_opt(p, "--html", &v)) { |
123 | html_mode = 1; | |
119 | html_mode = true; | |
124 | 120 | } else { |
125 | 121 | fprintf(stderr, "convcs: unrecognised option '%s'\n", p); |
126 | 122 | return 1; |
127 | 123 | } |
128 | 124 | } else { |
129 | int cs; | |
130 | ||
131 | 125 | if (srcset == CS_NONE) { |
132 | 126 | srcset = !strcmp(p, "-")? localeset : charset_from_localenc(p); |
133 | 127 | if (srcset == CS_NONE) { |
220 | 214 | if (html_srcset != CS_NONE) { |
221 | 215 | const char *output_cs_name = charset_to_mimeenc(dstset); |
222 | 216 | srcset = html_srcset; |
223 | assert(namepos + namelen <= rdret); | |
217 | assert(namepos + namelen <= (size_t)rdret); | |
224 | 218 | convert_got_data(inbuf, namepos); |
225 | 219 | convert_got_data(output_cs_name, strlen(output_cs_name)); |
226 | 220 | convert_got_data(inbuf + namepos + namelen, |
230 | 224 | } |
231 | 225 | } |
232 | 226 | |
233 | while (1) { | |
234 | if (!fgets(inbuf, sizeof(inbuf), infile)) | |
235 | break; /* EOF */ | |
236 | ||
237 | convert_got_data(inbuf, strlen(inbuf)); | |
227 | bool eof = false; | |
228 | while (!eof) { | |
229 | /* | |
230 | * Manual loop on getc which has the feature of fgets that we | |
231 | * stop if we see a newline (so that when convcs is run | |
232 | * interactively in a terminal it will deliver each translated | |
233 | * line promptly), but also has the feature of fread that it | |
234 | * provides the correct buffer length even in the face of NUL | |
235 | * bytes in the input. | |
236 | */ | |
237 | ||
238 | size_t nread = 0; | |
239 | while (nread < lenof(inbuf)) { | |
240 | int c = getc(infile); | |
241 | if (c == EOF) { | |
242 | eof = true; | |
243 | break; | |
244 | } | |
245 | inbuf[nread++] = c; | |
246 | if (c == '\n') | |
247 | break; | |
248 | } | |
249 | ||
250 | if (nread) | |
251 | convert_got_data(inbuf, nread); | |
238 | 252 | } |
239 | 253 | convert_done(); |
240 | 254 | return 0; |
263 | 277 | lenof(midbuf), srcset, |
264 | 278 | &instate, replacement_cooked, |
265 | 279 | replacement_cooked_len)) > 0) { |
266 | int error; | |
280 | bool error; | |
267 | 281 | |
268 | 282 | midlen = inret; |
269 | 283 | midptr = midbuf; |
276 | 290 | while ( (midret = charset_from_unicode(&midptr, &midlen, outbuf, |
277 | 291 | lenof(outbuf), dstset, |
278 | 292 | &outstate, &error)) > 0 || |
279 | error != 0) { | |
293 | error) { | |
280 | 294 | fwrite(outbuf, 1, midret, stdout); |
281 | 295 | if (error) { |
282 | 296 | const wchar_t *repl_ptr = replacement_cooked; |
56 | 56 | * charset_state. |
57 | 57 | */ |
58 | 58 | |
59 | static int write_cp949(charset_spec const *charset, long int input_chr, | |
60 | charset_state *state, | |
61 | void (*emit)(void *ctx, long int output), | |
62 | void *emitctx) | |
59 | static bool write_cp949(charset_spec const *charset, long int input_chr, | |
60 | charset_state *state, | |
61 | void (*emit)(void *ctx, long int output), | |
62 | void *emitctx) | |
63 | 63 | { |
64 | 64 | UNUSEDARG(charset); |
65 | 65 | UNUSEDARG(state); |
66 | 66 | |
67 | 67 | if (input_chr == -1) |
68 | return TRUE; /* stateless; no cleanup required */ | |
68 | return true; /* stateless; no cleanup required */ | |
69 | 69 | |
70 | 70 | if (input_chr < 0x80) { |
71 | 71 | emit(emitctx, input_chr); |
72 | return TRUE; | |
72 | return true; | |
73 | 73 | } else { |
74 | 74 | int r, c; |
75 | 75 | if (unicode_to_cp949(input_chr, &r, &c)) { |
76 | 76 | emit(emitctx, r + 0x80); |
77 | 77 | emit(emitctx, c + 0x40); |
78 | return TRUE; | |
78 | return true; | |
79 | 79 | } else { |
80 | return FALSE; | |
80 | return false; | |
81 | 81 | } |
82 | 82 | } |
83 | 83 | } |
11 | 11 | * terminal window, of course. |
12 | 12 | * |
13 | 13 | * Possible extra features: |
14 | * - configurable row len and table size. | |
14 | * - configurable row length. | |
15 | 15 | * - option to disambiguate the various classes of failure in the |
16 | 16 | * output, e.g. if terminfo gives us control sequences to change |
17 | 17 | * colours then we could colour the missing characters differently |
22 | 22 | * of undisplayability. (In particular, don't forget to turn off |
23 | 23 | * the early exit when nothing in the range is printable at |
24 | 24 | * all.) |
25 | * - ability to display sub-blocks of multibyte encodings such as | |
26 | * EUCs. But that would need some thought about how to sensibly | |
27 | * index those tables. | |
28 | 25 | */ |
29 | 26 | |
30 | #define _XOPEN_SOURCE 500 /* for wcwidth and snprintf */ | |
27 | /* | |
28 | * Feature macros I've found necessary to make the standard headers | |
29 | * declare wcwidth and snprintf (on various systems). | |
30 | */ | |
31 | #define _XOPEN_SOURCE 500 | |
32 | #define _C99_SOURCE | |
31 | 33 | |
32 | 34 | #include <assert.h> |
33 | 35 | #include <stdio.h> |
47 | 49 | #include "charset.h" |
48 | 50 | |
49 | 51 | static const char *helptext = |
50 | "usage: csshow ( CHARSET | BASE-UNICODE-VALUE )\n" | |
51 | " e.g.: csshow Win1252\n" | |
52 | " csshow U+2500\n" | |
52 | "usage: csshow CHARSET-NAME [ ENCODING-PREFIX-BYTE... ]\n" | |
53 | " e.g.: csshow Win1252 show a whole single-byte charset\n" | |
54 | " csshow Shift-JIS show all single-byte chars in a " | |
55 | "multibyte charset\n" | |
56 | " csshow Shift-JIS 9C show all chars encoded as 9C xx in " | |
57 | "Shift-JIS\n\n" | |
58 | " or: csshow BASE-UNICODE-VALUE [ +RANGE-LENGTH | END-UNICODE-VALUE ]\n" | |
59 | " e.g.: csshow U+2500 show 0x100 characters starting at U+2500 " | |
60 | "inclusive\n" | |
61 | " e.g.: csshow U+2500 +128 show a different number of characters\n" | |
62 | " e.g.: csshow U+2500 +0x80 same effect, but you can write the length " | |
63 | "in hex\n" | |
64 | " csshow U+2500 U+2580 or specify the (non-inclusive) range " | |
65 | "endpoint\n\n" | |
53 | 66 | " also: csshow --help display this help text\n" |
54 | 67 | ; |
55 | 68 | |
62 | 75 | BAD_CHAR_IN_SOURCE_CHARSET, |
63 | 76 | BAD_CHAR_IN_OUTPUT_CHARSET, |
64 | 77 | UNPRINTABLE_CHAR, |
78 | MULTIBYTE_INTRODUCER, | |
65 | 79 | FIRST_PRINTABLE_VALUE, |
66 | 80 | COMBINING_CHAR = FIRST_PRINTABLE_VALUE, |
67 | 81 | WIDE_PRINTABLE_CHAR, |
72 | 86 | char buf[7]; /* maximum even theoretical UTF-8 code length, plus NUL */ |
73 | 87 | }; |
74 | 88 | |
89 | struct buf { | |
90 | char *data; | |
91 | size_t size, len; | |
92 | }; | |
93 | static char *buf_add_space(struct buf *buf, size_t space) | |
94 | { | |
95 | char *toret; | |
96 | ||
97 | if (buf->size - buf->len < space) { | |
98 | buf->size = (buf->len + space) * 5 / 4 + 64; | |
99 | buf->data = realloc(buf->data, buf->size); | |
100 | if (!buf->data) { | |
101 | fprintf(stderr, "csshow: out of memory\n"); | |
102 | exit(1); | |
103 | } | |
104 | } | |
105 | ||
106 | toret = buf->data + buf->len; | |
107 | buf->len += space; | |
108 | return toret; | |
109 | } | |
110 | ||
111 | static enum Trans try_translate_from_source( | |
112 | const char *in, int inlen, int charset, wchar_t *wc_out) | |
113 | { | |
114 | const char *cp; | |
115 | int clen, ret0, ret1; | |
116 | ||
117 | cp = in; | |
118 | clen = inlen; | |
119 | ret1 = charset_to_unicode(&cp, &clen, wc_out, 1, charset, NULL, L"?", 1); | |
120 | ||
121 | cp = in; | |
122 | clen = inlen; | |
123 | ret0 = charset_to_unicode(&cp, &clen, wc_out, 1, charset, NULL, L"", 0); | |
124 | ||
125 | if (ret0 == 1 && ret1 == 1) { | |
126 | /* Successful translation into Unicode */ | |
127 | return NORMAL_PRINTABLE_CHAR; | |
128 | } else if (ret0 == 0 && ret1 == 0) { | |
129 | /* No output, even _with_ a replacement character | |
130 | * defined for bad chars, means the input | |
131 | * character has been absorbed into the charset | |
132 | * state but not _yet_ generated any output or | |
133 | * discovered an error. In other words, this is a | |
134 | * multibyte introducer. */ | |
135 | return MULTIBYTE_INTRODUCER; | |
136 | } else { | |
137 | return BAD_CHAR_IN_SOURCE_CHARSET; | |
138 | } | |
139 | } | |
140 | ||
75 | 141 | int main(int argc, char **argv) |
76 | 142 | { |
77 | int doing_opts = 1; | |
143 | bool doing_opts = true; | |
78 | 144 | int source_charset = CS_ASCII, output_charset = CS_NONE; |
79 | 145 | unsigned long base = 0, size = 0x100, rowlen = 0x10; |
146 | struct buf prefix = { NULL, 0, 0 }; | |
147 | enum ArgsState { | |
148 | AS_INITIAL, | |
149 | AS_UNICODE_ENDRANGE, | |
150 | AS_MBCS_PREFIX, | |
151 | AS_DONE | |
152 | } args_state = AS_INITIAL; | |
80 | 153 | |
81 | 154 | while (--argc > 0) { |
82 | 155 | const char *p = *++argv; |
83 | 156 | if (*p == '-' && doing_opts) { |
84 | 157 | if (!strcmp(p, "--")) { |
85 | doing_opts = 0; | |
158 | doing_opts = false; | |
86 | 159 | } else if (!strcmp(p, "--help")) { |
87 | 160 | help(stdout); |
88 | 161 | return 0; |
90 | 163 | fprintf(stderr, "csshow: unrecognised option '%s'\n", p); |
91 | 164 | return 1; |
92 | 165 | } |
93 | } else { | |
166 | } else if (args_state == AS_INITIAL) { | |
167 | /* | |
168 | * First argument can be a Unicode code point or a | |
169 | * single-byte charset name. | |
170 | */ | |
171 | ||
94 | 172 | int cs; |
95 | 173 | |
96 | 174 | if (toupper((unsigned char)p[0]) == 'U' && |
97 | 175 | (p[1] == '-' || p[1] == '+')) { |
98 | 176 | source_charset = CS_NONE; /* means just translate Unicode */ |
99 | 177 | base = strtoul(p+2, NULL, 16); |
178 | args_state = AS_UNICODE_ENDRANGE; | |
100 | 179 | } else if ((cs = charset_from_localenc(p)) != CS_NONE) { |
101 | if (!charset_is_single_byte(cs)) { | |
102 | fprintf(stderr, "csshow: cannot display multibyte" | |
103 | " charset %s\n", charset_to_localenc(cs)); | |
104 | return 1; | |
105 | } | |
106 | 180 | source_charset = cs; |
107 | 181 | base = 0; |
182 | args_state = AS_MBCS_PREFIX; | |
108 | 183 | } else { |
109 | 184 | fprintf(stderr, "csshow: unrecognised argument '%s'\n", p); |
110 | 185 | return 1; |
111 | 186 | } |
187 | } else if (args_state == AS_UNICODE_ENDRANGE) { | |
188 | /* | |
189 | * If the first argument was a Unicode code point, then | |
190 | * the next argument is taken to be an end point for the | |
191 | * range, so that you can print larger ranges than 256 | |
192 | * characters. | |
193 | */ | |
194 | ||
195 | if (toupper((unsigned char)p[0]) == 'U' && | |
196 | (p[1] == '-' || p[1] == '+')) { | |
197 | /* U+XXXX / U-XXXXXXXX specify the end code point of | |
198 | * the range. (Exclusive.) */ | |
199 | size = strtoul(p+2, NULL, 16) - base; | |
200 | } else if (p[0] == '+') { | |
201 | /* +NNNN specifies the size of the range. We use | |
202 | * strtoul in base 0 so that decimal or 0xHEX are both | |
203 | * accepted. */ | |
204 | size = strtoul(p+1, NULL, 0); | |
205 | } | |
206 | ||
207 | /* No further arguments expected. */ | |
208 | args_state = AS_DONE; | |
209 | ||
210 | } else if (args_state == AS_MBCS_PREFIX) { | |
211 | /* | |
212 | * If the first argument was a charset name, then further | |
213 | * arguments are taken to be hex byte values to accumulate | |
214 | * into an encoding prefix. This allows you to say, for | |
215 | * example, 'csshow Shift-JIS 89' to see the slice of the | |
216 | * Shift-JIS encoding consisting of characters whose first | |
217 | * encoding byte is 0x89, indexed by their second byte. | |
218 | */ | |
219 | ||
220 | *buf_add_space(&prefix, 1) = strtoul(p, NULL, 16); | |
221 | } else { | |
222 | fprintf(stderr, "csshow: extra argument '%s' unexpected\n", p); | |
223 | return 1; | |
112 | 224 | } |
113 | 225 | } |
114 | 226 | |
123 | 235 | struct translated_char *trans; |
124 | 236 | const char *rowheadfmt; |
125 | 237 | int rowheadwidth, colwidth; |
126 | int printed_a_line, skipped_a_line; | |
238 | bool printed_a_line, skipped_a_line; | |
127 | 239 | unsigned long i, j; |
240 | enum Trans transret; | |
241 | char *suffix_position = NULL; | |
242 | wchar_t wc; | |
243 | ||
244 | if (source_charset != CS_NONE) { | |
245 | /* | |
246 | * First, check that the prefix doesn't already form a | |
247 | * completed character or an error. | |
248 | */ | |
249 | transret = try_translate_from_source( | |
250 | prefix.data, prefix.len, source_charset, &wc); | |
251 | if (transret == BAD_CHAR_IN_SOURCE_CHARSET) { | |
252 | fprintf(stderr, "csshow: prefix sequence is not valid\n"); | |
253 | return 1; | |
254 | } else if (transret != MULTIBYTE_INTRODUCER) { | |
255 | fprintf(stderr, "csshow: prefix sequence generates output\n"); | |
256 | return 1; | |
257 | } | |
258 | ||
259 | /* | |
260 | * Make space in the prefix buffer to put each test byte on | |
261 | * the end. | |
262 | */ | |
263 | suffix_position = buf_add_space(&prefix, 1); | |
264 | } | |
128 | 265 | |
129 | 266 | trans = malloc(size * sizeof(struct translated_char)); |
130 | 267 | if (!trans) { |
138 | 275 | */ |
139 | 276 | for (i = 0; i < size; i++) { |
140 | 277 | unsigned long charcode = base + i; |
141 | wchar_t wc; | |
142 | 278 | |
143 | 279 | trans[i].buf[0] = '\0'; |
144 | 280 | |
145 | 281 | if (source_charset == CS_NONE) { |
146 | 282 | wc = charcode; |
147 | 283 | } else { |
148 | char c = charcode; | |
149 | const char *cp = &c; | |
150 | int clen = 1; | |
151 | int error = 0; | |
152 | ||
153 | int ret = charset_to_unicode( | |
154 | &cp, &clen, &wc, 1, source_charset, NULL, L"", 0); | |
155 | if (ret != 1) { | |
156 | trans[i].type = BAD_CHAR_IN_SOURCE_CHARSET; | |
284 | *suffix_position = charcode; | |
285 | transret = try_translate_from_source( | |
286 | prefix.data, prefix.len, source_charset, &wc); | |
287 | ||
288 | if (transret != NORMAL_PRINTABLE_CHAR) { | |
289 | trans[i].type = transret; | |
157 | 290 | continue; |
158 | 291 | } |
159 | 292 | } |
161 | 294 | { |
162 | 295 | const wchar_t *wcp = &wc; |
163 | 296 | int wclen = 1; |
164 | int error = 0; | |
297 | bool error = false; | |
165 | 298 | |
166 | 299 | int ret = charset_from_unicode( |
167 | 300 | &wcp, &wclen, trans[i].buf, sizeof(trans[i].buf) - 1, |
168 | 301 | output_charset, NULL, &error); |
169 | 302 | |
170 | assert(ret < sizeof(trans[i].buf)); | |
303 | assert(0 <= ret); | |
304 | assert((size_t)ret < sizeof(trans[i].buf)); | |
171 | 305 | trans[i].buf[ret] = '\0'; |
172 | 306 | |
173 | 307 | if (wclen != 0 || ret == 0 || error) { |
267 | 401 | printf("%-*X", colwidth, (unsigned)i); |
268 | 402 | printf("\n"); |
269 | 403 | |
270 | printed_a_line = skipped_a_line = 0; | |
404 | printed_a_line = false; | |
405 | skipped_a_line = false; | |
271 | 406 | |
272 | 407 | for (j = 0; j < size; j += rowlen) { |
273 | 408 | /* See if we're skipping this row completely. */ |
274 | int skip = 1; | |
409 | bool skip = true; | |
275 | 410 | for (i = 0; i < rowlen && j+i < size; i++) |
276 | 411 | if (trans[j+i].type >= FIRST_PRINTABLE_VALUE) |
277 | skip = 0; | |
412 | skip = false; | |
278 | 413 | if (skip) { |
279 | skipped_a_line = 1; | |
414 | skipped_a_line = true; | |
280 | 415 | continue; |
281 | 416 | } |
282 | 417 | |
288 | 423 | if (skipped_a_line && printed_a_line) { |
289 | 424 | printf("\n"); |
290 | 425 | } |
291 | skipped_a_line = 0; | |
292 | ||
293 | printed_a_line = 1; | |
426 | skipped_a_line = false; | |
427 | ||
428 | printed_a_line = true; | |
294 | 429 | printf(rowheadfmt, (unsigned)(base + j));; |
295 | 430 | for (i = 0; i < rowlen && j+i < size; i++) { |
296 | 431 | int chars_left = colwidth; |
14 | 14 | #include "sbcsdat.h" |
15 | 15 | |
16 | 16 | #define ENUM_CHARSET(x) extern charset_spec const charset_##x; |
17 | #include "enum.c" | |
17 | #include "enum.h" | |
18 | 18 | #undef ENUM_CHARSET |
19 | 19 | static charset_spec const *const cs_table[] = { |
20 | 20 | #define ENUM_CHARSET(x) &charset_##x, |
21 | #include "enum.c" | |
21 | #include "enum.h" | |
22 | 22 | #undef ENUM_CHARSET |
23 | 23 | }; |
24 | 24 | static const char *const cs_names[] = { |
25 | 25 | #define ENUM_CHARSET(x) #x, |
26 | #include "enum.c" | |
26 | #include "enum.h" | |
27 | 27 | #undef ENUM_CHARSET |
28 | 28 | }; |
29 | 29 | |
30 | 30 | int main(int argc, char **argv) |
31 | 31 | { |
32 | 32 | long int c; |
33 | int internal_names = FALSE; | |
34 | int verbose = FALSE; | |
33 | bool internal_names = false; | |
34 | bool verbose = false; | |
35 | 35 | |
36 | 36 | while (--argc) { |
37 | 37 | char *p = *++argv; |
38 | 38 | if (!strcmp(p, "-i")) |
39 | internal_names = TRUE; | |
39 | internal_names = true; | |
40 | 40 | else if (!strcmp(p, "-v")) |
41 | verbose = TRUE; | |
41 | verbose = true; | |
42 | 42 | } |
43 | 43 | |
44 | 44 | for (c = 0; c < 0x30000; c++) { |
45 | int i, plane, row, col, chr; | |
45 | int plane, row, col, chr; | |
46 | size_t i; | |
46 | 47 | char const *sep = ""; |
47 | 48 | |
48 | 49 | printf("U+%04x:", (unsigned)c); |
0 | /* | |
1 | * enum.c - enumerate all charsets defined by the library. | |
2 | * | |
3 | * This file maintains a list of every other source file which | |
4 | * contains ENUM_CHARSET definitions. It #includes each one with | |
5 | * ENUM_CHARSETS defined, which causes those source files to do | |
6 | * nothing at all except call the ENUM_CHARSET macro on each | |
7 | * charset they define. | |
8 | * | |
9 | * This file in turn is included from various other places, with | |
10 | * the ENUM_CHARSET macro defined to various different things. This | |
11 | * allows us to have multiple implementations of the master charset | |
12 | * lookup table (a static one and a dynamic one). | |
13 | */ | |
14 | ||
15 | #define ENUM_CHARSETS | |
16 | #include "sbcsdat.c" | |
17 | #include "utf8.c" | |
18 | #include "utf7.c" | |
19 | #include "utf16.c" | |
20 | #include "euc.c" | |
21 | #include "iso2022.c" | |
22 | #include "iso2022s.c" | |
23 | #include "big5enc.c" | |
24 | #include "shiftjis.c" | |
25 | #include "hz.c" | |
26 | #include "cp949.c" | |
27 | #undef ENUM_CHARSETS |
0 | /* | |
1 | * enum.h - enumerate all charsets defined by the library. | |
2 | * | |
3 | * This file maintains a list of every other source file which | |
4 | * contains ENUM_CHARSET definitions. It #includes each one with | |
5 | * ENUM_CHARSETS defined, which causes those source files to do | |
6 | * nothing at all except call the ENUM_CHARSET macro on each | |
7 | * charset they define. | |
8 | * | |
9 | * This file in turn is included from various other places, with | |
10 | * the ENUM_CHARSET macro defined to various different things. This | |
11 | * allows us to have multiple implementations of the master charset | |
12 | * lookup table (a static one and a dynamic one). | |
13 | */ | |
14 | ||
15 | #define ENUM_CHARSETS | |
16 | #include "sbcsdat.c" | |
17 | #include "utf8.c" | |
18 | #include "utf7.c" | |
19 | #include "utf16.c" | |
20 | #include "euc.c" | |
21 | #include "iso2022.c" | |
22 | #include "iso2022s.c" | |
23 | #include "big5enc.c" | |
24 | #include "shiftjis.c" | |
25 | #include "hz.c" | |
26 | #include "cp949.c" | |
27 | #include "iso6937.c" | |
28 | #undef ENUM_CHARSETS |
92 | 92 | * charset_state. |
93 | 93 | */ |
94 | 94 | |
95 | static int write_euc(charset_spec const *charset, long int input_chr, | |
96 | charset_state *state, | |
97 | void (*emit)(void *ctx, long int output), void *emitctx) | |
95 | static bool write_euc(charset_spec const *charset, long int input_chr, | |
96 | charset_state *state, | |
97 | void (*emit)(void *ctx, long int output), void *emitctx) | |
98 | 98 | { |
99 | 99 | struct euc const *euc = (struct euc *)charset->data; |
100 | 100 | unsigned long c; |
103 | 103 | UNUSEDARG(state); |
104 | 104 | |
105 | 105 | if (input_chr == -1) |
106 | return TRUE; /* stateless; no cleanup required */ | |
106 | return true; /* stateless; no cleanup required */ | |
107 | 107 | |
108 | 108 | /* ASCII is the easy bit, and is always the same. */ |
109 | 109 | if (input_chr < 0x80) { |
110 | 110 | emit(emitctx, input_chr); |
111 | return TRUE; | |
111 | return true; | |
112 | 112 | } |
113 | 113 | |
114 | 114 | c = euc->from_ucs(input_chr); |
115 | 115 | if (!c) { |
116 | return FALSE; | |
116 | return false; | |
117 | 117 | } |
118 | 118 | |
119 | 119 | cset = c >> 28; |
125 | 125 | |
126 | 126 | while (len--) |
127 | 127 | emit(emitctx, (c >> (8*len)) & 0xFF); |
128 | return TRUE; | |
128 | return true; | |
129 | 129 | } |
130 | 130 | |
131 | 131 | /* |
8 | 8 | char *output; |
9 | 9 | int outlen; |
10 | 10 | int writtenlen; |
11 | int stopped; | |
11 | bool stopped; | |
12 | 12 | }; |
13 | 13 | |
14 | 14 | static void charset_emit(void *ctx, long int output) |
22 | 22 | param->outlen--; |
23 | 23 | param->writtenlen++; |
24 | 24 | } else { |
25 | param->stopped = 1; | |
25 | param->stopped = true; | |
26 | 26 | } |
27 | 27 | } |
28 | 28 | |
29 | 29 | int charset_from_unicode(const wchar_t **input, int *inlen, |
30 | 30 | char *output, int outlen, |
31 | int charset, charset_state *state, int *error) | |
31 | int charset, charset_state *state, bool *error) | |
32 | 32 | { |
33 | 33 | charset_spec const *spec = charset_find_spec(charset); |
34 | 34 | charset_state localstate = CHARSET_INIT_STATE; |
43 | 43 | param.output = output; |
44 | 44 | param.outlen = outlen; |
45 | 45 | param.writtenlen = 0; |
46 | param.stopped = 0; | |
46 | param.stopped = false; | |
47 | 47 | |
48 | 48 | if (state) |
49 | 49 | localstate = *state; /* structure copy */ |
50 | 50 | if (error) |
51 | *error = FALSE; | |
51 | *error = false; | |
52 | 52 | |
53 | 53 | while (*inlen > 0) { |
54 | 54 | int lenbefore = param.writtenlen; |
55 | int ret; | |
55 | bool ret; | |
56 | 56 | |
57 | 57 | if (input) |
58 | 58 | ret = spec->write(spec, **input, &localstate, |
64 | 64 | * We have hit a difficult character, which the user |
65 | 65 | * wants to know about. Leave now. |
66 | 66 | */ |
67 | *error = TRUE; | |
67 | *error = true; | |
68 | 68 | return lenbefore; |
69 | 69 | } |
70 | 70 | if (param.stopped) { |
2019 | 2019 | return gb2312_forward[r][c]; |
2020 | 2020 | } |
2021 | 2021 | |
2022 | /* This one returns 1 on success, 0 if the code point doesn't exist. */ | |
2023 | int unicode_to_gb2312(long int unicode, int *r, int *c) | |
2022 | /* This one returns true on success, false if the code point doesn't exist. */ | |
2023 | bool unicode_to_gb2312(long int unicode, int *r, int *c) | |
2024 | 2024 | { |
2025 | 2025 | int rr, cc; |
2026 | 2026 | long int uu; |
2040 | 2040 | else { |
2041 | 2041 | *r = rr; |
2042 | 2042 | *c = cc; |
2043 | return 1; | |
2043 | return true; | |
2044 | 2044 | } |
2045 | 2045 | } |
2046 | return 0; | |
2046 | return false; | |
2047 | 2047 | } |
2048 | 2048 | |
2049 | 2049 | #ifdef TESTMODE |
89 | 89 | } |
90 | 90 | } |
91 | 91 | |
92 | static int write_hz(charset_spec const *charset, long int input_chr, | |
93 | charset_state *state, | |
94 | void (*emit)(void *ctx, long int output), void *emitctx) | |
92 | static bool write_hz(charset_spec const *charset, long int input_chr, | |
93 | charset_state *state, | |
94 | void (*emit)(void *ctx, long int output), void *emitctx) | |
95 | 95 | { |
96 | 96 | int desired_state, r, c; |
97 | 97 | |
106 | 106 | } else if (unicode_to_gb2312(input_chr, &r, &c)) { |
107 | 107 | desired_state = 1; |
108 | 108 | } else { |
109 | return FALSE; | |
109 | return false; | |
110 | 110 | } |
111 | 111 | |
112 | 112 | if (state->s0 != (unsigned)desired_state) { |
116 | 116 | } |
117 | 117 | |
118 | 118 | if (input_chr < 0) |
119 | return TRUE; /* special case: just reset state */ | |
119 | return true; /* special case: just reset state */ | |
120 | 120 | |
121 | 121 | if (state->s0) { |
122 | 122 | /* |
127 | 127 | } else { |
128 | 128 | emit(emitctx, c); |
129 | 129 | } |
130 | return TRUE; | |
130 | return true; | |
131 | 131 | } |
132 | 132 | |
133 | 133 | const charset_spec charset_CS_HZ = { |
4 | 4 | #ifndef charset_internal_h |
5 | 5 | #define charset_internal_h |
6 | 6 | |
7 | #include <stdbool.h> | |
8 | ||
7 | 9 | /* This invariably comes in handy */ |
8 | 10 | #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) |
9 | 11 | |
10 | 12 | /* This is an invalid Unicode value used to indicate an error. */ |
11 | 13 | #define ERROR 0xFFFFL /* Unicode value representing error */ |
12 | ||
13 | #undef TRUE | |
14 | #define TRUE 1 | |
15 | #undef FALSE | |
16 | #define FALSE 0 | |
17 | 14 | |
18 | 15 | typedef struct charset_spec charset_spec; |
19 | 16 | typedef struct sbcs_data sbcs_data; |
35 | 32 | * character set. The `emit' function expects to get byte |
36 | 33 | * values passed to it. |
37 | 34 | * |
38 | * A non-representable input character should cause a FALSE | |
35 | * A non-representable input character should cause a false | |
39 | 36 | * return, _before_ `emit' is called. Successful conversion |
40 | * causes a TRUE return. | |
37 | * causes a true return. | |
41 | 38 | * |
42 | 39 | * If `input_chr' is -1, this function must revert the encoding |
43 | 40 | * state to any default required at the end of a piece of |
44 | 41 | * encoded text. |
45 | 42 | */ |
46 | int (*write)(charset_spec const *charset, long int input_chr, | |
47 | charset_state *state, | |
48 | void (*emit)(void *ctx, long int output), void *emitctx); | |
43 | bool (*write)(charset_spec const *charset, long int input_chr, | |
44 | charset_state *state, | |
45 | void (*emit)(void *ctx, long int output), void *emitctx); | |
49 | 46 | void const *data; |
50 | 47 | }; |
51 | 48 | |
87 | 84 | void read_sbcs(charset_spec const *charset, long int input_chr, |
88 | 85 | charset_state *state, |
89 | 86 | void (*emit)(void *ctx, long int output), void *emitctx); |
90 | int write_sbcs(charset_spec const *charset, long int input_chr, | |
91 | charset_state *state, | |
92 | void (*emit)(void *ctx, long int output), void *emitctx); | |
87 | bool write_sbcs(charset_spec const *charset, long int input_chr, | |
88 | charset_state *state, | |
89 | void (*emit)(void *ctx, long int output), void *emitctx); | |
93 | 90 | long int sbcs_to_unicode(const struct sbcs_data *sd, long int input_chr); |
94 | 91 | long int sbcs_from_unicode(const struct sbcs_data *sd, long int input_chr); |
95 | 92 | |
96 | 93 | void read_utf8(charset_spec const *charset, long int input_chr, |
97 | 94 | charset_state *state, |
98 | 95 | void (*emit)(void *ctx, long int output), void *emitctx); |
99 | int write_utf8(charset_spec const *charset, long int input_chr, | |
100 | charset_state *state, | |
101 | void (*emit)(void *ctx, long int output), | |
102 | void *emitctx); | |
96 | bool write_utf8(charset_spec const *charset, long int input_chr, | |
97 | charset_state *state, | |
98 | void (*emit)(void *ctx, long int output), | |
99 | void *emitctx); | |
103 | 100 | |
104 | 101 | long int big5_to_unicode(int r, int c); |
105 | int unicode_to_big5(long int unicode, int *r, int *c); | |
102 | bool unicode_to_big5(long int unicode, int *r, int *c); | |
106 | 103 | long int cns11643_to_unicode(int p, int r, int c); |
107 | int unicode_to_cns11643(long int unicode, int *p, int *r, int *c); | |
104 | bool unicode_to_cns11643(long int unicode, int *p, int *r, int *c); | |
108 | 105 | long int cp949_to_unicode(int r, int c); |
109 | int unicode_to_cp949(long int unicode, int *r, int *c); | |
106 | bool unicode_to_cp949(long int unicode, int *r, int *c); | |
110 | 107 | long int ksx1001_to_unicode(int r, int c); |
111 | int unicode_to_ksx1001(long int unicode, int *r, int *c); | |
108 | bool unicode_to_ksx1001(long int unicode, int *r, int *c); | |
112 | 109 | long int gb2312_to_unicode(int r, int c); |
113 | int unicode_to_gb2312(long int unicode, int *r, int *c); | |
110 | bool unicode_to_gb2312(long int unicode, int *r, int *c); | |
114 | 111 | long int jisx0208_to_unicode(int r, int c); |
115 | int unicode_to_jisx0208(long int unicode, int *r, int *c); | |
112 | bool unicode_to_jisx0208(long int unicode, int *r, int *c); | |
116 | 113 | long int jisx0212_to_unicode(int r, int c); |
117 | int unicode_to_jisx0212(long int unicode, int *r, int *c); | |
114 | bool unicode_to_jisx0212(long int unicode, int *r, int *c); | |
118 | 115 | |
119 | 116 | /* |
120 | 117 | * Placate compiler warning about unused parameters, of which we |
122 | 119 | */ |
123 | 120 | #define UNUSEDARG(x) ( (x) = (x) ) |
124 | 121 | |
122 | #ifdef __GNUC__ | |
123 | #define DELIBERATE_FALLTHROUGH __attribute__ ((fallthrough)); | |
124 | #else | |
125 | #define DELIBERATE_FALLTHROUGH ((void)0) | |
126 | #endif | |
127 | ||
125 | 128 | #endif /* charset_internal_h */ |
36 | 36 | |
37 | 37 | static long int emacs_big5_1_to_unicode(int, int); |
38 | 38 | static long int emacs_big5_2_to_unicode(int, int); |
39 | static int unicode_to_emacs_big5(long int, int *, int *, int *); | |
39 | static bool unicode_to_emacs_big5(long int, int *, int *, int *); | |
40 | 40 | static long int cns11643_1_to_unicode(int, int); |
41 | 41 | static long int cns11643_2_to_unicode(int, int); |
42 | 42 | static long int cns11643_3_to_unicode(int, int); |
45 | 45 | static long int cns11643_6_to_unicode(int, int); |
46 | 46 | static long int cns11643_7_to_unicode(int, int); |
47 | 47 | static long int null_dbcs_to_unicode(int, int); |
48 | static int unicode_to_null_dbcs(long int, int *, int *); | |
49 | ||
50 | typedef int (*to_dbcs_t)(long int, int *, int *); | |
51 | typedef int (*to_dbcs_planar_t)(long int, int *, int *, int *); | |
48 | static bool unicode_to_null_dbcs(long int, int *, int *); | |
49 | ||
50 | typedef bool (*to_dbcs_t)(long int, int *, int *); | |
51 | typedef bool (*to_dbcs_planar_t)(long int, int *, int *, int *); | |
52 | 52 | |
53 | 53 | /* |
54 | 54 | * These macros cast between to_dbcs_planar_t and to_dbcs_t, in |
80 | 80 | * |
81 | 81 | * We are permitted to use ?:, however, and that works quite well |
82 | 82 | * since the actual result of the sizeof expression _is_ evaluable |
83 | * at compile time. So here's my final answer: | |
83 | * at compile time. So here's my final answer. | |
84 | * | |
85 | * (The double cast of each function pointer from its original type | |
86 | * through void (*)(void) to the final type is there to suppress the | |
87 | * warning that later versions of gcc will otherwise give about | |
88 | * casting between different function pointer types. Apparently gcc | |
89 | * accepts void (*)(void) as the canonical type you use when | |
90 | * _deliberately_ doing that, so going via that deals with the | |
91 | * warning.) | |
84 | 92 | */ |
85 | 93 | #define TYPECHECK(x,y) ( sizeof((x)) == sizeof((x)) ? (y) : (y) ) |
86 | #define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x)) | |
87 | #define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, (to_dbcs_planar_t)(x)) | |
94 | #define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, \ | |
95 | (to_dbcs_t)(void (*)(void))(x)) | |
96 | #define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, \ | |
97 | (to_dbcs_planar_t)(void (*)(void))(x)) | |
88 | 98 | |
89 | 99 | /* |
90 | 100 | * Values used in the `enable' field. Each of these identifies a |
204 | 214 | UNUSEDARG(c); |
205 | 215 | return ERROR; |
206 | 216 | } |
207 | static int unicode_to_null_dbcs(long int unicode, int *r, int *c) | |
217 | static bool unicode_to_null_dbcs(long int unicode, int *r, int *c) | |
208 | 218 | { |
209 | 219 | UNUSEDARG(unicode); |
210 | 220 | UNUSEDARG(r); |
211 | 221 | UNUSEDARG(c); |
212 | return 0; /* failed to convert anything */ | |
222 | return false; /* failed to convert anything */ | |
213 | 223 | } |
214 | 224 | |
215 | 225 | /* |
239 | 249 | return big5_to_unicode(r, c); |
240 | 250 | } |
241 | 251 | |
242 | static int unicode_to_emacs_big5(long int unicode, int *p, int *r, int *c) | |
252 | static bool unicode_to_emacs_big5(long int unicode, int *p, int *r, int *c) | |
243 | 253 | { |
244 | 254 | int rr, cc, s; |
245 | 255 | if (!unicode_to_big5(unicode, &rr, &cc)) |
246 | return 0; | |
256 | return false; | |
247 | 257 | if (cc >= 64) { |
248 | 258 | cc -= 34; |
249 | 259 | assert(cc >= 64); |
257 | 267 | } |
258 | 268 | *r = s / 94; |
259 | 269 | *c = s % 94; |
260 | return 1; | |
270 | return true; | |
261 | 271 | } |
262 | 272 | |
263 | 273 | /* Wrappers for cns11643_to_unicode() */ |
594 | 604 | break; |
595 | 605 | } |
596 | 606 | } else if ((input_chr & 0x80) || MODE < ESCSEQ) { |
597 | int is_gl = 0; | |
607 | bool is_gl = false; | |
598 | 608 | struct iso2022_subcharset const *subcs; |
599 | 609 | unsigned container; |
600 | 610 | long input_7bit; |
613 | 623 | container = (state->s1 >> 28) & 3; |
614 | 624 | else { /* GL */ |
615 | 625 | container = state->s1 >> 30; |
616 | is_gl = 1; | |
626 | is_gl = true; | |
617 | 627 | } |
618 | 628 | input_7bit = input_chr & ~0x80; |
619 | 629 | subcs = &iso2022_subcharsets[(state->s1 >> (container * 7)) & 0x7f]; |
763 | 773 | switch (i2) { |
764 | 774 | case 0: /* Obsolete version of GZDM4 */ |
765 | 775 | i2 = '('; |
776 | DELIBERATE_FALLTHROUGH; | |
766 | 777 | case '(': /* GZDM4 */ case ')': /* G1DM4 */ |
767 | 778 | case '*': /* G2DM4 */ case '+': /* G3DM4 */ |
768 | 779 | designate(state, i2 - '(', M4, 0, input_chr); |
775 | 786 | emit(emitctx, ERROR); |
776 | 787 | break; |
777 | 788 | } |
789 | break; | |
778 | 790 | case '%': /* DOCS */ |
779 | 791 | /* XXX What's a reasonable way to handle an unrecognised DOCS? */ |
780 | 792 | switch (i2) { |
805 | 817 | } |
806 | 818 | } |
807 | 819 | |
808 | static void oselect(charset_state *state, int i, int right, | |
820 | static void oselect(charset_state *state, int i, bool right, | |
809 | 821 | void (*emit)(void *ctx, long int output), |
810 | 822 | void *emitctx) |
811 | 823 | { |
982 | 994 | * exact output policy for compound text wants thinking about more |
983 | 995 | * carefully. |
984 | 996 | */ |
985 | static int write_iso2022(charset_spec const *charset, long int input_chr, | |
986 | charset_state *state, | |
987 | void (*emit)(void *ctx, long int output), | |
988 | void *emitctx) | |
997 | static bool write_iso2022(charset_spec const *charset, long int input_chr, | |
998 | charset_state *state, | |
999 | void (*emit)(void *ctx, long int output), | |
1000 | void *emitctx) | |
989 | 1001 | { |
990 | 1002 | int i; |
991 | 1003 | struct iso2022_subcharset const *subcs; |
1016 | 1028 | if (subcs->type == mode->ltype && |
1017 | 1029 | subcs->i == mode->li && |
1018 | 1030 | subcs->f == mode->lf) |
1019 | oselect(state, i, FALSE, NULL, NULL); | |
1031 | oselect(state, i, false, NULL, NULL); | |
1020 | 1032 | if (subcs->type == mode->rtype && |
1021 | 1033 | subcs->i == mode->ri && |
1022 | 1034 | subcs->f == mode->rf) |
1023 | oselect(state, i, TRUE, NULL, NULL); | |
1035 | oselect(state, i, true, NULL, NULL); | |
1024 | 1036 | } |
1025 | 1037 | } |
1026 | 1038 | |
1035 | 1047 | if (subcs->type == mode->ltype && |
1036 | 1048 | subcs->i == mode->li && |
1037 | 1049 | subcs->f == mode->lf) |
1038 | oselect(state, i, FALSE, emit, emitctx); | |
1050 | oselect(state, i, false, emit, emitctx); | |
1039 | 1051 | if (subcs->type == mode->rtype && |
1040 | 1052 | subcs->i == mode->ri && |
1041 | 1053 | subcs->f == mode->rf) |
1042 | oselect(state, i, TRUE, emit, emitctx); | |
1043 | } | |
1044 | return TRUE; | |
1054 | oselect(state, i, true, emit, emitctx); | |
1055 | } | |
1056 | return true; | |
1045 | 1057 | } |
1046 | 1058 | |
1047 | 1059 | /* |
1050 | 1062 | */ |
1051 | 1063 | if (input_chr <= 0x20 || (input_chr >= 0x7F && input_chr < 0xA0)) { |
1052 | 1064 | emit(emitctx, input_chr); |
1053 | return TRUE; | |
1065 | return true; | |
1054 | 1066 | } |
1055 | 1067 | |
1056 | 1068 | /* |
1102 | 1114 | } |
1103 | 1115 | |
1104 | 1116 | if ((unsigned)i < lenof(iso2022_subcharsets)) { |
1105 | int right; | |
1117 | bool right; | |
1106 | 1118 | |
1107 | 1119 | /* |
1108 | 1120 | * Our character is represented by c1 (and possibly also |
1152 | 1164 | } |
1153 | 1165 | } |
1154 | 1166 | |
1155 | return TRUE; | |
1167 | return true; | |
1156 | 1168 | } |
1157 | 1169 | |
1158 | 1170 | /* |
1167 | 1179 | |
1168 | 1180 | for (i = 0; (unsigned)i <= lenof(ctext_encodings); i++) { |
1169 | 1181 | charset_state substate; |
1170 | charset_spec const *subcs = ctext_encodings[i].subcs; | |
1171 | 1182 | |
1172 | 1183 | /* |
1173 | 1184 | * We assume that all character sets dealt with by DOCS |
1177 | 1188 | p = data; |
1178 | 1189 | |
1179 | 1190 | if ((unsigned)i < lenof(ctext_encodings)) { |
1191 | charset_spec const *subcs = ctext_encodings[i].subcs; | |
1180 | 1192 | if ((mode->enable_mask & (1 << ctext_encodings[i].enable)) && |
1181 | 1193 | subcs->write(subcs, input_chr, &substate, |
1182 | 1194 | write_to_pointer, &p)) { |
1194 | 1206 | |
1195 | 1207 | if (cs != -2) { |
1196 | 1208 | docs_char(state, emit, emitctx, cs, data, p - data); |
1197 | return TRUE; | |
1198 | } | |
1199 | } | |
1200 | ||
1201 | return FALSE; | |
1209 | return true; | |
1210 | } | |
1211 | } | |
1212 | ||
1213 | return false; | |
1202 | 1214 | } |
1203 | 1215 | |
1204 | 1216 | /* |
77 | 77 | /* |
78 | 78 | * Is this an 8-bit ISO 2022 subset? |
79 | 79 | */ |
80 | int eightbit; | |
80 | bool eightbit; | |
81 | 81 | |
82 | 82 | /* |
83 | 83 | * Function calls to do the actual translation. |
84 | 84 | */ |
85 | 85 | long int (*to_ucs)(int subcharset, unsigned long bytes); |
86 | int (*from_ucs)(long int ucs, int *subcharset, unsigned long *bytes); | |
86 | bool (*from_ucs)(long int ucs, int *subcharset, unsigned long *bytes); | |
87 | 87 | }; |
88 | 88 | |
89 | 89 | static void read_iso2022s(charset_spec const *charset, long int input_chr, |
325 | 325 | } |
326 | 326 | } |
327 | 327 | |
328 | static int write_iso2022s(charset_spec const *charset, long int input_chr, | |
329 | charset_state *state, | |
330 | void (*emit)(void *ctx, long int output), | |
331 | void *emitctx) | |
328 | static bool write_iso2022s(charset_spec const *charset, long int input_chr, | |
329 | charset_state *state, | |
330 | void (*emit)(void *ctx, long int output), | |
331 | void *emitctx) | |
332 | 332 | { |
333 | 333 | struct iso2022 const *iso = (struct iso2022 *)charset->data; |
334 | 334 | int subcharset, len, i, j, cont, topbit = 0; |
346 | 346 | * to go in. |
347 | 347 | */ |
348 | 348 | if (input_chr >= 0 && !iso->from_ucs(input_chr, &subcharset, &bytes)) |
349 | return FALSE; | |
349 | return false; | |
350 | 350 | |
351 | 351 | if (!(state->s1 & 0x80000000)) { |
352 | 352 | state->s1 = iso->s1; |
374 | 374 | } |
375 | 375 | } |
376 | 376 | |
377 | return TRUE; | |
377 | return true; | |
378 | 378 | } |
379 | 379 | |
380 | 380 | /* |
436 | 436 | while (len--) |
437 | 437 | emit(emitctx, ((bytes >> (8*len)) & 0xFF) | topbit); |
438 | 438 | |
439 | return TRUE; | |
439 | return true; | |
440 | 440 | } |
441 | 441 | |
442 | 442 | /* |
450 | 450 | return 0xA5; |
451 | 451 | else if (bytes == 0x7E) |
452 | 452 | return 0x203E; |
453 | /* else fall through to ASCII */ | |
453 | DELIBERATE_FALLTHROUGH; /* else fall through to ASCII */ | |
454 | 454 | case 0: return bytes; /* one-byte ASCII */ |
455 | 455 | /* (no break needed since all control paths have returned) */ |
456 | 456 | case 2: return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21, |
458 | 458 | default: return ERROR; |
459 | 459 | } |
460 | 460 | } |
461 | static int iso2022jp_from_ucs(long int ucs, int *subcharset, | |
461 | static bool iso2022jp_from_ucs(long int ucs, int *subcharset, | |
462 | 462 | unsigned long *bytes) |
463 | 463 | { |
464 | 464 | int r, c; |
465 | 465 | if (ucs < 0x80) { |
466 | 466 | *subcharset = 0; |
467 | 467 | *bytes = ucs; |
468 | return 1; | |
468 | return true; | |
469 | 469 | } else if (ucs == 0xA5 || ucs == 0x203E) { |
470 | 470 | *subcharset = 1; |
471 | 471 | *bytes = (ucs == 0xA5 ? 0x5C : 0x7E); |
472 | return 1; | |
472 | return true; | |
473 | 473 | } else if (unicode_to_jisx0208(ucs, &r, &c)) { |
474 | 474 | *subcharset = 2; |
475 | 475 | *bytes = ((r+0x21) << 8) | (c+0x21); |
476 | return 1; | |
476 | return true; | |
477 | 477 | } else { |
478 | return 0; | |
478 | return false; | |
479 | 479 | } |
480 | 480 | } |
481 | 481 | static const struct iso2022_escape iso2022jp_escapes[] = { |
486 | 486 | }; |
487 | 487 | static const struct iso2022 iso2022jp = { |
488 | 488 | iso2022jp_escapes, lenof(iso2022jp_escapes), |
489 | "\1\1\2", "\3", 0x80000000, NULL, FALSE, | |
489 | "\1\1\2", "\3", 0x80000000, NULL, false, | |
490 | 490 | iso2022jp_to_ucs, iso2022jp_from_ucs |
491 | 491 | }; |
492 | 492 | const charset_spec charset_CS_ISO2022_JP = { |
505 | 505 | default: return ERROR; |
506 | 506 | } |
507 | 507 | } |
508 | static int iso2022kr_from_ucs(long int ucs, int *subcharset, | |
509 | unsigned long *bytes) | |
508 | static bool iso2022kr_from_ucs(long int ucs, int *subcharset, | |
509 | unsigned long *bytes) | |
510 | 510 | { |
511 | 511 | int r, c; |
512 | 512 | if (ucs < 0x80) { |
513 | 513 | *subcharset = 0; |
514 | 514 | *bytes = ucs; |
515 | return 1; | |
515 | return true; | |
516 | 516 | } else if (unicode_to_ksx1001(ucs, &r, &c)) { |
517 | 517 | *subcharset = 1; |
518 | 518 | *bytes = ((r+0x21) << 8) | (c+0x21); |
519 | return 1; | |
519 | return true; | |
520 | 520 | } else { |
521 | return 0; | |
521 | return false; | |
522 | 522 | } |
523 | 523 | } |
524 | 524 | static const struct iso2022_escape iso2022kr_escapes[] = { |
528 | 528 | }; |
529 | 529 | static const struct iso2022 iso2022kr = { |
530 | 530 | iso2022kr_escapes, lenof(iso2022kr_escapes), |
531 | "\1\2", "\2", 0x80000040, "\033$)C", FALSE, | |
531 | "\1\2", "\2", 0x80000040, "\033$)C", false, | |
532 | 532 | iso2022kr_to_ucs, iso2022kr_from_ucs |
533 | 533 | }; |
534 | 534 | const charset_spec charset_CS_ISO2022_KR = { |
0 | /* | |
1 | * iso6937.c - the _almost_ single-byte character set ISO/IEC 6937. | |
2 | * | |
3 | * Also, a tiny variation on it which adds the Euro sign at the | |
4 | * previously unused position 0xA4, used in DVB metadata. | |
5 | */ | |
6 | ||
7 | #ifndef ENUM_CHARSETS | |
8 | ||
9 | #include "charset.h" | |
10 | #include "internal.h" | |
11 | ||
12 | /* | |
13 | * ISO/IEC 6937 is a _mostly_ single-byte character sets, except that | |
14 | * the 0xC0-0xCF range of bytes are introducer characters for two-byte | |
15 | * encodings of accented letters. | |
16 | * | |
17 | * You'd be forgiven for mistaking the bytes in the C0-CF range for | |
18 | * something more like combining characters, because the two-byte | |
19 | * encodings are organised in a very semantic way: each introducer | |
20 | * character corresponds to a specific diacritic mark, in the sense | |
21 | * that all the two-byte encodings beginning with that introducer byte | |
22 | * have an ASCII alphabetic character as their second byte and encode | |
23 | * that letter with the given diacritic. | |
24 | * | |
25 | * But it would be a mistake to consider this to have anything to do | |
26 | * with the Unicode combining characters for those diacritics, because | |
27 | * (a) the ISO 6937 diacritic bytes are _prefixes_, not combining | |
28 | * characters applied afterwards; (b) ISO 6937 specifies an exact list | |
29 | * of the permissible second bytes after each introducer; (c) the | |
30 | * right translation of one of these two-byte encodings is the single | |
31 | * Unicode code point for the accented letter, and not a separate pair | |
32 | * of (letter, combining character) code points. | |
33 | * | |
34 | * So this is better viewed as simply a multibyte _encoding_, just | |
35 | * with an unusually mnemonic organisation. | |
36 | * | |
37 | * Implementation strategy: the single-byte encodings for this charset | |
38 | * (or rather, this pair of very similar charsets) are handled by a | |
39 | * pair of mapping tables in sbcs.dat, only declared with the 'tables' | |
40 | * rather than 'charset' keyword so that sbcsgen.pl doesn't generate | |
41 | * the top-level charset_spec. So the read and write functions below | |
42 | * can call sbcs_to_unicode and sbcs_from_unicode on those tables just | |
43 | * like the ones in sbcs.c. | |
44 | * | |
45 | * The two-byte pairs are dealt with using the pair of mapping tables | |
46 | * below. These are generated by Perl from a minimal amount of | |
47 | * starting data that just gives each prefix character along with the | |
48 | * corresponding Unicode combining character and the list of letters | |
49 | * it's allowed to apply to; the Perl script runs over UnicodeData.txt | |
50 | * to achieve the translation of (letter, combining character) pairs | |
51 | * to precombined code points. | |
52 | */ | |
53 | ||
54 | /* | |
55 | ||
56 | perl -e ' | |
57 | while (<<>>) { | |
58 | chomp; @_ = split /;/,$_; @d = split / /,$_[5]; | |
59 | if (2 == @d) { | |
60 | ($p, $s, $c) = (hex $d[0], hex $d[1], hex $_[0]); | |
61 | $combine{$p,$s} = $c if $p && $s && $c; | |
62 | } | |
63 | } | |
64 | @forward = (" ERROR,") x 0x400; | |
65 | for $t ( [0xC1, 0x300, "AEIOUaeiou" ], | |
66 | [0xC2, 0x301, "ACEILNORSUYZacegilnorsuyz" ], | |
67 | [0xC3, 0x302, "ACEGHIJOSUWYaceghijosuwy" ], | |
68 | [0xC4, 0x303, "AINOUainou" ], | |
69 | [0xC5, 0x304, "AEIOUaeiou" ], | |
70 | [0xC6, 0x306, "AGUagu" ], | |
71 | [0xC7, 0x307, "CEGIZcegz" ], | |
72 | [0xC8, 0x308, "AEIOUYaeiouy" ], | |
73 | [0xCA, 0x30A, "AUau" ], | |
74 | [0xCB, 0x327, "CGKLNRSTcklnrst" ], | |
75 | [0xCD, 0x30B, "OUou" ], | |
76 | [0xCE, 0x328, "AEIUaeiu" ], | |
77 | [0xCF, 0x30C, "CDELNRSTZcdelnrstz" ] ) { | |
78 | ($prefix, $cc, $letters) = @$t; | |
79 | for $letter (unpack "C*", $letters) { | |
80 | $cp = $combine{$letter,$cc}; | |
81 | $offset = ($prefix - 0xC0) * 0x40 + ($letter - 0x40); | |
82 | $forward[$offset] = sprintf " 0x%04x,", $cp; | |
83 | push @backward, [$cp, (sprintf " %d,", $offset)]; | |
84 | } | |
85 | } | |
86 | @backward = map { $_->[1] } sort {$a->[0] <=> $b->[0]} @backward; | |
87 | print "static const unsigned short iso6937_2byte_forward[0x400] = {\n"; | |
88 | $line = " "; | |
89 | for $e (@forward, "sentinel" x 100) { | |
90 | if (length($line.$e) > 77) { print "$line\n"; $line = " "; } | |
91 | $line .= $e; | |
92 | } | |
93 | print "};\n\n"; | |
94 | $line = " "; | |
95 | print "static const unsigned short iso6937_2byte_backward[] = {\n"; | |
96 | for $e (@backward, "sentinel" x 100) { | |
97 | if (length($line.$e) > 77) { print "$line\n"; $line = " "; } | |
98 | $line .= $e; | |
99 | } | |
100 | print "};\n\n"; | |
101 | ' UnicodeData.txt | |
102 | ||
103 | */ | |
104 | ||
105 | static const unsigned short iso6937_2byte_forward[0x400] = { | |
106 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
107 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
108 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
109 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
110 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
111 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
112 | ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c0, ERROR, ERROR, ERROR, 0x00c8, | |
113 | ERROR, ERROR, ERROR, 0x00cc, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00d2, | |
114 | ERROR, ERROR, ERROR, ERROR, ERROR, 0x00d9, ERROR, ERROR, ERROR, ERROR, | |
115 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e0, ERROR, ERROR, | |
116 | ERROR, 0x00e8, ERROR, ERROR, ERROR, 0x00ec, ERROR, ERROR, ERROR, ERROR, | |
117 | ERROR, 0x00f2, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00f9, ERROR, ERROR, | |
118 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c1, | |
119 | ERROR, 0x0106, ERROR, 0x00c9, ERROR, ERROR, ERROR, 0x00cd, ERROR, ERROR, | |
120 | 0x0139, ERROR, 0x0143, 0x00d3, ERROR, ERROR, 0x0154, 0x015a, ERROR, | |
121 | 0x00da, ERROR, ERROR, ERROR, 0x00dd, 0x0179, ERROR, ERROR, ERROR, ERROR, | |
122 | ERROR, ERROR, 0x00e1, ERROR, 0x0107, ERROR, 0x00e9, ERROR, 0x01f5, ERROR, | |
123 | 0x00ed, ERROR, ERROR, 0x013a, ERROR, 0x0144, 0x00f3, ERROR, ERROR, | |
124 | 0x0155, 0x015b, ERROR, 0x00fa, ERROR, ERROR, ERROR, 0x00fd, 0x017a, | |
125 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c2, ERROR, 0x0108, ERROR, | |
126 | 0x00ca, ERROR, 0x011c, 0x0124, 0x00ce, 0x0134, ERROR, ERROR, ERROR, | |
127 | ERROR, 0x00d4, ERROR, ERROR, ERROR, 0x015c, ERROR, 0x00db, ERROR, 0x0174, | |
128 | ERROR, 0x0176, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e2, | |
129 | ERROR, 0x0109, ERROR, 0x00ea, ERROR, 0x011d, 0x0125, 0x00ee, 0x0135, | |
130 | ERROR, ERROR, ERROR, ERROR, 0x00f4, ERROR, ERROR, ERROR, 0x015d, ERROR, | |
131 | 0x00fb, ERROR, 0x0175, ERROR, 0x0177, ERROR, ERROR, ERROR, ERROR, ERROR, | |
132 | ERROR, ERROR, 0x00c3, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
133 | 0x0128, ERROR, ERROR, ERROR, ERROR, 0x00d1, 0x00d5, ERROR, ERROR, ERROR, | |
134 | ERROR, ERROR, 0x0168, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
135 | ERROR, ERROR, ERROR, ERROR, 0x00e3, ERROR, ERROR, ERROR, ERROR, ERROR, | |
136 | ERROR, ERROR, 0x0129, ERROR, ERROR, ERROR, ERROR, 0x00f1, 0x00f5, ERROR, | |
137 | ERROR, ERROR, ERROR, ERROR, 0x0169, ERROR, ERROR, ERROR, ERROR, ERROR, | |
138 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0100, ERROR, ERROR, ERROR, | |
139 | 0x0112, ERROR, ERROR, ERROR, 0x012a, ERROR, ERROR, ERROR, ERROR, ERROR, | |
140 | 0x014c, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016a, ERROR, ERROR, ERROR, | |
141 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0101, ERROR, | |
142 | ERROR, ERROR, 0x0113, ERROR, ERROR, ERROR, 0x012b, ERROR, ERROR, ERROR, | |
143 | ERROR, ERROR, 0x014d, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016b, ERROR, | |
144 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
145 | 0x0102, ERROR, ERROR, ERROR, ERROR, ERROR, 0x011e, ERROR, ERROR, ERROR, | |
146 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
147 | 0x016c, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
148 | ERROR, ERROR, 0x0103, ERROR, ERROR, ERROR, ERROR, ERROR, 0x011f, ERROR, | |
149 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
150 | ERROR, ERROR, 0x016d, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
151 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010a, ERROR, 0x0116, ERROR, | |
152 | 0x0120, ERROR, 0x0130, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
153 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x017b, | |
154 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010b, ERROR, | |
155 | 0x0117, ERROR, 0x0121, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
156 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
157 | ERROR, 0x017c, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c4, ERROR, | |
158 | ERROR, ERROR, 0x00cb, ERROR, ERROR, ERROR, 0x00cf, ERROR, ERROR, ERROR, | |
159 | ERROR, ERROR, 0x00d6, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00dc, ERROR, | |
160 | ERROR, ERROR, 0x0178, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
161 | 0x00e4, ERROR, ERROR, ERROR, 0x00eb, ERROR, ERROR, ERROR, 0x00ef, ERROR, | |
162 | ERROR, ERROR, ERROR, ERROR, 0x00f6, ERROR, ERROR, ERROR, ERROR, ERROR, | |
163 | 0x00fc, ERROR, ERROR, ERROR, 0x00ff, ERROR, ERROR, ERROR, ERROR, ERROR, | |
164 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
165 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
166 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
167 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
168 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
169 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
170 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00c5, ERROR, ERROR, ERROR, | |
171 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
172 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016e, ERROR, ERROR, ERROR, | |
173 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e5, ERROR, | |
174 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
175 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x016f, ERROR, | |
176 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
177 | ERROR, ERROR, 0x00c7, ERROR, ERROR, ERROR, 0x0122, ERROR, ERROR, ERROR, | |
178 | 0x0136, 0x013b, ERROR, 0x0145, ERROR, ERROR, ERROR, 0x0156, 0x015e, | |
179 | 0x0162, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
180 | ERROR, ERROR, ERROR, ERROR, ERROR, 0x00e7, ERROR, ERROR, ERROR, ERROR, | |
181 | ERROR, ERROR, ERROR, 0x0137, 0x013c, ERROR, 0x0146, ERROR, ERROR, ERROR, | |
182 | 0x0157, 0x015f, 0x0163, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
183 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
184 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
185 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
186 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
187 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
188 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
189 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
190 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
191 | ERROR, ERROR, ERROR, 0x0150, ERROR, ERROR, ERROR, ERROR, ERROR, 0x0170, | |
192 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
193 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
194 | ERROR, ERROR, ERROR, ERROR, ERROR, 0x0151, ERROR, ERROR, ERROR, ERROR, | |
195 | ERROR, 0x0171, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
196 | ERROR, ERROR, ERROR, 0x0104, ERROR, ERROR, ERROR, 0x0118, ERROR, ERROR, | |
197 | ERROR, 0x012e, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
198 | ERROR, ERROR, ERROR, 0x0172, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
199 | ERROR, ERROR, ERROR, ERROR, ERROR, 0x0105, ERROR, ERROR, ERROR, 0x0119, | |
200 | ERROR, ERROR, ERROR, 0x012f, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
201 | ERROR, ERROR, ERROR, ERROR, ERROR, 0x0173, ERROR, ERROR, ERROR, ERROR, | |
202 | ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x010c, | |
203 | 0x010e, 0x011a, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, 0x013d, ERROR, | |
204 | 0x0147, ERROR, ERROR, ERROR, 0x0158, 0x0160, 0x0164, ERROR, ERROR, ERROR, | |
205 | ERROR, ERROR, 0x017d, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
206 | ERROR, 0x010d, 0x010f, 0x011b, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, | |
207 | 0x013e, ERROR, 0x0148, ERROR, ERROR, ERROR, 0x0159, 0x0161, 0x0165, | |
208 | ERROR, ERROR, ERROR, ERROR, ERROR, 0x017e, ERROR, ERROR, ERROR, ERROR, | |
209 | ERROR, | |
210 | }; | |
211 | ||
212 | static const unsigned short iso6937_2byte_backward[] = { | |
213 | 65, 129, 193, 257, 513, 641, 707, 69, 133, 197, 517, 73, 137, 201, 521, | |
214 | 270, 79, 143, 207, 271, 527, 85, 149, 213, 533, 153, 97, 161, 225, 289, | |
215 | 545, 673, 739, 101, 165, 229, 549, 105, 169, 233, 553, 302, 111, 175, | |
216 | 239, 303, 559, 117, 181, 245, 565, 185, 569, 321, 353, 385, 417, 897, | |
217 | 929, 131, 163, 195, 227, 451, 483, 963, 995, 964, 996, 325, 357, 453, | |
218 | 485, 901, 933, 965, 997, 199, 231, 391, 423, 455, 487, 711, 200, 232, | |
219 | 265, 297, 329, 361, 905, 937, 457, 202, 234, 715, 747, 140, 172, 716, | |
220 | 748, 972, 1004, 142, 174, 718, 750, 974, 1006, 335, 367, 847, 879, 146, | |
221 | 178, 722, 754, 978, 1010, 147, 179, 211, 243, 723, 755, 979, 1011, 724, | |
222 | 756, 980, 1012, 277, 309, 341, 373, 405, 437, 661, 693, 853, 885, 917, | |
223 | 949, 215, 247, 217, 249, 537, 154, 186, 474, 506, 986, 1018, 167, | |
224 | }; | |
225 | ||
226 | /* This returns ERROR if the code point doesn't exist. */ | |
227 | static long int iso6937_2byte_to_unicode(int prefix, int letter) | |
228 | { | |
229 | if (!(prefix >= 0xC0 && prefix < 0xD0 && letter >= 0x40 && letter < 0x80)) | |
230 | return ERROR; | |
231 | return iso6937_2byte_forward[(prefix - 0xC0) * 0x40 + (letter - 0x40)]; | |
232 | } | |
233 | ||
234 | /* This returns true if it filled in the output values */ | |
235 | static bool iso6937_2byte_from_unicode(long int cp, int *prefix, int *letter) | |
236 | { | |
237 | int lo = -1, hi = lenof(iso6937_2byte_backward); | |
238 | ||
239 | while (hi - lo >= 2) { | |
240 | int mid = (hi + lo) / 2; | |
241 | int midpos = iso6937_2byte_backward[mid]; | |
242 | long int midcp = iso6937_2byte_forward[midpos]; | |
243 | if (cp == midcp) { | |
244 | *prefix = 0xC0 + (midpos >> 6); | |
245 | *letter = 0x40 + (midpos & 0x3F); | |
246 | return true; | |
247 | } else if (cp < midcp) { | |
248 | hi = mid; | |
249 | } else { | |
250 | lo = mid; | |
251 | } | |
252 | } | |
253 | return false; | |
254 | } | |
255 | ||
256 | void read_iso6937(charset_spec const *charset, long int input_chr, | |
257 | charset_state *state, | |
258 | void (*emit)(void *ctx, long int output), void *emitctx) | |
259 | { | |
260 | const sbcs_data *sd = charset->data; | |
261 | ||
262 | if (input_chr >= 0xC0 && input_chr < 0xD0) { | |
263 | /* | |
264 | * Input bytes in the C0-DF region of this encoding are | |
265 | * 'combining characters', but not in the Unicode sense of | |
266 | * mapping to separate Unicode code points. Instead, they're | |
267 | * prefixes which modify a specific set of subsequent printing | |
268 | * characters. Stash such a byte in the conversion state to | |
269 | * use in the next call. | |
270 | */ | |
271 | if (state->s0) { | |
272 | emit(emitctx, ERROR); /* the previous prefix was erroneous */ | |
273 | } | |
274 | state->s0 = input_chr; | |
275 | } else { | |
276 | if (state->s0) { | |
277 | long int output = iso6937_2byte_to_unicode(state->s0, input_chr); | |
278 | emit(emitctx, output); | |
279 | state->s0 = 0; | |
280 | ||
281 | /* | |
282 | * If we've successfully emitted a character, we're done. | |
283 | * Otherwise, we'll take the view that the ERROR we've | |
284 | * emitted corresponded to _just_ the misplaced prefix | |
285 | * byte, so we'll fall through to the emit() below which | |
286 | * will output the unmodified followup byte too. | |
287 | */ | |
288 | if (output != ERROR) | |
289 | return; | |
290 | } | |
291 | ||
292 | emit(emitctx, sbcs_to_unicode(sd, input_chr)); | |
293 | } | |
294 | } | |
295 | ||
296 | bool write_iso6937(charset_spec const *charset, long int input_chr, | |
297 | charset_state *state, | |
298 | void (*emit)(void *ctx, long int output), void *emitctx) | |
299 | { | |
300 | const struct sbcs_data *sd = charset->data; | |
301 | long int ret; | |
302 | int prefix, letter; | |
303 | ||
304 | UNUSEDARG(state); | |
305 | ||
306 | if (input_chr == -1) | |
307 | return true; /* stateless; no cleanup required */ | |
308 | ||
309 | if ((ret = sbcs_from_unicode(sd, input_chr)) != ERROR) { | |
310 | emit(emitctx, ret); | |
311 | return true; | |
312 | } else if (iso6937_2byte_from_unicode(input_chr, &prefix, &letter)) { | |
313 | emit(emitctx, prefix); | |
314 | emit(emitctx, letter); | |
315 | return true; | |
316 | } else { | |
317 | return false; | |
318 | } | |
319 | } | |
320 | ||
321 | extern const sbcs_data sbcsdata_ISO6937, sbcsdata_ISO6937_EURO; | |
322 | ||
323 | const charset_spec charset_CS_ISO6937 = { | |
324 | CS_ISO6937, read_iso6937, write_iso6937, &sbcsdata_ISO6937 | |
325 | }; | |
326 | const charset_spec charset_CS_ISO6937_EURO = { | |
327 | CS_ISO6937_EURO, read_iso6937, write_iso6937, &sbcsdata_ISO6937_EURO | |
328 | }; | |
329 | ||
330 | #else /* ENUM_CHARSETS */ | |
331 | ||
332 | ENUM_CHARSET(CS_ISO6937) | |
333 | ENUM_CHARSET(CS_ISO6937_EURO) | |
334 | ||
335 | #endif /* ENUM_CHARSETS */ |
1951 | 1951 | return jisx0208_forward[r][c]; |
1952 | 1952 | } |
1953 | 1953 | |
1954 | /* This one returns 1 on success, 0 if the code point doesn't exist. */ | |
1955 | int unicode_to_jisx0208(long int unicode, int *r, int *c) | |
1954 | /* This one returns true on success, false if the code point doesn't exist. */ | |
1955 | bool unicode_to_jisx0208(long int unicode, int *r, int *c) | |
1956 | 1956 | { |
1957 | 1957 | int rr, cc; |
1958 | 1958 | long int uu; |
1972 | 1972 | else { |
1973 | 1973 | *r = rr; |
1974 | 1974 | *c = cc; |
1975 | return 1; | |
1975 | return true; | |
1976 | 1976 | } |
1977 | 1977 | } |
1978 | return 0; | |
1978 | return false; | |
1979 | 1979 | } |
1980 | 1980 | |
1981 | 1981 | #ifdef TESTMODE |
1849 | 1849 | return jisx0212_forward[r][c]; |
1850 | 1850 | } |
1851 | 1851 | |
1852 | /* This one returns 1 on success, 0 if the code point doesn't exist. */ | |
1853 | int unicode_to_jisx0212(long int unicode, int *r, int *c) | |
1852 | /* This one returns true on success, false if the code point doesn't exist. */ | |
1853 | bool unicode_to_jisx0212(long int unicode, int *r, int *c) | |
1854 | 1854 | { |
1855 | 1855 | int rr, cc; |
1856 | 1856 | long int uu; |
1870 | 1870 | else { |
1871 | 1871 | *r = rr; |
1872 | 1872 | *c = cc; |
1873 | return 1; | |
1873 | return true; | |
1874 | 1874 | } |
1875 | 1875 | } |
1876 | return 0; | |
1876 | return false; | |
1877 | 1877 | } |
1878 | 1878 | |
1879 | 1879 | #ifdef TESTMODE |
5168 | 5168 | return cp949_forward[r][c]; |
5169 | 5169 | } |
5170 | 5170 | |
5171 | /* This one returns 1 on success, 0 if the code point doesn't exist. */ | |
5172 | int unicode_to_cp949(long int unicode, int *r, int *c) | |
5171 | /* This one returns true on success, false if the code point doesn't exist. */ | |
5172 | bool unicode_to_cp949(long int unicode, int *r, int *c) | |
5173 | 5173 | { |
5174 | 5174 | int rr, cc; |
5175 | 5175 | long int uu; |
5189 | 5189 | else { |
5190 | 5190 | *r = rr; |
5191 | 5191 | *c = cc; |
5192 | return 1; | |
5192 | return true; | |
5193 | 5193 | } |
5194 | 5194 | } |
5195 | return 0; | |
5195 | return false; | |
5196 | 5196 | } |
5197 | 5197 | |
5198 | 5198 | /* Functions dealing with the KS X 1001 square subset */ |
5203 | 5203 | return cp949_forward[r+0x21][c+0x61]; |
5204 | 5204 | } |
5205 | 5205 | |
5206 | /* This one returns 1 on success, 0 if the code point doesn't exist. */ | |
5207 | int unicode_to_ksx1001(long int unicode, int *r, int *c) | |
5206 | /* This one returns true on success, false if the code point doesn't exist. */ | |
5207 | bool unicode_to_ksx1001(long int unicode, int *r, int *c) | |
5208 | 5208 | { |
5209 | 5209 | int rr, cc; |
5210 | 5210 | if (!unicode_to_cp949(unicode, &rr, &cc)) |
5211 | return 0; | |
5211 | return false; | |
5212 | 5212 | rr -= 0x21; |
5213 | 5213 | cc -= 0x61; |
5214 | 5214 | if (rr < 0 || rr >= 94 || cc < 0 || cc >= 94) |
5215 | return 0; | |
5215 | return false; | |
5216 | 5216 | *r = rr; |
5217 | 5217 | *c = cc; |
5218 | return 1; | |
5218 | return true; | |
5219 | 5219 | } |
5220 | 5220 | |
5221 | 5221 | #ifdef TESTMODE |
23 | 23 | static const struct { |
24 | 24 | const char *name; |
25 | 25 | int charset; |
26 | int return_in_enum; /* enumeration misses some charsets */ | |
26 | bool return_in_enum; /* enumeration misses some charsets */ | |
27 | 27 | } localencs[] = { |
28 | { "<UNKNOWN>", CS_NONE, 0 }, | |
29 | { "ASCII", CS_ASCII, 1 }, | |
30 | { "BS 4730", CS_BS4730, 1 }, | |
31 | { "BS-4730", CS_BS4730, 0 }, | |
32 | { "BS4730", CS_BS4730, 0 }, | |
33 | { "ISO-8859-1", CS_ISO8859_1, 1 }, | |
34 | { "ISO-8859-1 with X11 line drawing", CS_ISO8859_1_X11, 0 }, | |
35 | { "ISO-8859-1-X11", CS_ISO8859_1_X11, 0 }, | |
36 | { "ISO8859-1-X11", CS_ISO8859_1_X11, 0 }, | |
37 | { "ISO-8859-2", CS_ISO8859_2, 1 }, | |
38 | { "ISO-8859-3", CS_ISO8859_3, 1 }, | |
39 | { "ISO-8859-4", CS_ISO8859_4, 1 }, | |
40 | { "ISO-8859-5", CS_ISO8859_5, 1 }, | |
41 | { "ISO-8859-6", CS_ISO8859_6, 1 }, | |
42 | { "ISO-8859-7", CS_ISO8859_7, 1 }, | |
43 | { "ISO-8859-8", CS_ISO8859_8, 1 }, | |
44 | { "ISO-8859-9", CS_ISO8859_9, 1 }, | |
45 | { "ISO-8859-10", CS_ISO8859_10, 1 }, | |
46 | { "ISO-8859-11", CS_ISO8859_11, 1 }, | |
47 | { "ISO-8859-13", CS_ISO8859_13, 1 }, | |
48 | { "ISO-8859-14", CS_ISO8859_14, 1 }, | |
49 | { "ISO-8859-15", CS_ISO8859_15, 1 }, | |
50 | { "ISO-8859-16", CS_ISO8859_16, 1 }, | |
51 | { "CP437", CS_CP437, 1 }, | |
52 | { "CP850", CS_CP850, 1 }, | |
53 | { "CP852", CS_CP852, 1 }, | |
54 | { "CP866", CS_CP866, 1 }, | |
55 | { "CP874", CS_CP874, 1 }, | |
56 | { "Win874", CS_CP874, 0 }, | |
57 | { "Win-874", CS_CP874, 0 }, | |
58 | { "CP1250", CS_CP1250, 1 }, | |
59 | { "Win1250", CS_CP1250, 0 }, | |
60 | { "CP1251", CS_CP1251, 1 }, | |
61 | { "Win1251", CS_CP1251, 0 }, | |
62 | { "CP1252", CS_CP1252, 1 }, | |
63 | { "Win1252", CS_CP1252, 0 }, | |
64 | { "CP1253", CS_CP1253, 1 }, | |
65 | { "Win1253", CS_CP1253, 0 }, | |
66 | { "CP1254", CS_CP1254, 1 }, | |
67 | { "Win1254", CS_CP1254, 0 }, | |
68 | { "CP1255", CS_CP1255, 1 }, | |
69 | { "Win1255", CS_CP1255, 0 }, | |
70 | { "CP1256", CS_CP1256, 1 }, | |
71 | { "Win1256", CS_CP1256, 0 }, | |
72 | { "CP1257", CS_CP1257, 1 }, | |
73 | { "Win1257", CS_CP1257, 0 }, | |
74 | { "CP1258", CS_CP1258, 1 }, | |
75 | { "Win1258", CS_CP1258, 0 }, | |
76 | { "KOI8-R", CS_KOI8_R, 1 }, | |
77 | { "KOI8-U", CS_KOI8_U, 1 }, | |
78 | { "KOI8-RU", CS_KOI8_RU, 1 }, | |
79 | { "JIS X 0201", CS_JISX0201, 1 }, | |
80 | { "JIS-X-0201", CS_JISX0201, 0 }, | |
81 | { "JIS_X_0201", CS_JISX0201, 0 }, | |
82 | { "JISX0201", CS_JISX0201, 0 }, | |
83 | { "Mac Roman", CS_MAC_ROMAN, 1 }, | |
84 | { "Mac-Roman", CS_MAC_ROMAN, 0 }, | |
85 | { "MacRoman", CS_MAC_ROMAN, 0 }, | |
86 | { "Mac Turkish", CS_MAC_TURKISH, 1 }, | |
87 | { "Mac-Turkish", CS_MAC_TURKISH, 0 }, | |
88 | { "MacTurkish", CS_MAC_TURKISH, 0 }, | |
89 | { "Mac Croatian", CS_MAC_CROATIAN, 1 }, | |
90 | { "Mac-Croatian", CS_MAC_CROATIAN, 0 }, | |
91 | { "MacCroatian", CS_MAC_CROATIAN, 0 }, | |
92 | { "Mac Iceland", CS_MAC_ICELAND, 1 }, | |
93 | { "Mac-Iceland", CS_MAC_ICELAND, 0 }, | |
94 | { "MacIceland", CS_MAC_ICELAND, 0 }, | |
95 | { "Mac Romanian", CS_MAC_ROMANIAN, 1 }, | |
96 | { "Mac-Romanian", CS_MAC_ROMANIAN, 0 }, | |
97 | { "MacRomanian", CS_MAC_ROMANIAN, 0 }, | |
98 | { "Mac Greek", CS_MAC_GREEK, 1 }, | |
99 | { "Mac-Greek", CS_MAC_GREEK, 0 }, | |
100 | { "MacGreek", CS_MAC_GREEK, 0 }, | |
101 | { "Mac Cyrillic", CS_MAC_CYRILLIC, 1 }, | |
102 | { "Mac-Cyrillic", CS_MAC_CYRILLIC, 0 }, | |
103 | { "MacCyrillic", CS_MAC_CYRILLIC, 0 }, | |
104 | { "Mac Thai", CS_MAC_THAI, 1 }, | |
105 | { "Mac-Thai", CS_MAC_THAI, 0 }, | |
106 | { "MacThai", CS_MAC_THAI, 0 }, | |
107 | { "Mac Centeuro", CS_MAC_CENTEURO, 1 }, | |
108 | { "Mac-Centeuro", CS_MAC_CENTEURO, 0 }, | |
109 | { "MacCenteuro", CS_MAC_CENTEURO, 0 }, | |
110 | { "Mac Symbol", CS_MAC_SYMBOL, 1 }, | |
111 | { "Mac-Symbol", CS_MAC_SYMBOL, 0 }, | |
112 | { "MacSymbol", CS_MAC_SYMBOL, 0 }, | |
113 | { "Mac Dingbats", CS_MAC_DINGBATS, 1 }, | |
114 | { "Mac-Dingbats", CS_MAC_DINGBATS, 0 }, | |
115 | { "MacDingbats", CS_MAC_DINGBATS, 0 }, | |
116 | { "Mac Roman (old)", CS_MAC_ROMAN_OLD, 0 }, | |
117 | { "Mac-Roman-old", CS_MAC_ROMAN_OLD, 0 }, | |
118 | { "MacRoman-old", CS_MAC_ROMAN_OLD, 0 }, | |
119 | { "Mac Croatian (old)", CS_MAC_CROATIAN_OLD, 0 }, | |
120 | { "Mac-Croatian-old", CS_MAC_CROATIAN_OLD, 0 }, | |
121 | { "MacCroatian-old", CS_MAC_CROATIAN_OLD, 0 }, | |
122 | { "Mac Iceland (old)", CS_MAC_ICELAND_OLD, 0 }, | |
123 | { "Mac-Iceland-old", CS_MAC_ICELAND_OLD, 0 }, | |
124 | { "MacIceland-old", CS_MAC_ICELAND_OLD, 0 }, | |
125 | { "Mac Romanian (old)", CS_MAC_ROMANIAN_OLD, 0 }, | |
126 | { "Mac-Romanian-old", CS_MAC_ROMANIAN_OLD, 0 }, | |
127 | { "MacRomanian-old", CS_MAC_ROMANIAN_OLD, 0 }, | |
128 | { "Mac Greek (old)", CS_MAC_GREEK_OLD, 0 }, | |
129 | { "Mac-Greek-old", CS_MAC_GREEK_OLD, 0 }, | |
130 | { "MacGreek-old", CS_MAC_GREEK_OLD, 0 }, | |
131 | { "Mac Cyrillic (old)", CS_MAC_CYRILLIC_OLD, 0 }, | |
132 | { "Mac-Cyrillic-old", CS_MAC_CYRILLIC_OLD, 0 }, | |
133 | { "MacCyrillic-old", CS_MAC_CYRILLIC_OLD, 0 }, | |
134 | { "Mac Ukraine", CS_MAC_UKRAINE, 1 }, | |
135 | { "Mac-Ukraine", CS_MAC_UKRAINE, 0 }, | |
136 | { "MacUkraine", CS_MAC_UKRAINE, 0 }, | |
137 | { "Mac VT100", CS_MAC_VT100, 1 }, | |
138 | { "Mac-VT100", CS_MAC_VT100, 0 }, | |
139 | { "MacVT100", CS_MAC_VT100, 0 }, | |
140 | { "Mac VT100 (old)", CS_MAC_VT100_OLD, 0 }, | |
141 | { "Mac-VT100-old", CS_MAC_VT100_OLD, 0 }, | |
142 | { "MacVT100-old", CS_MAC_VT100_OLD, 0 }, | |
143 | { "Mac Roman (Pirard encoding)", CS_MAC_PIRARD, 0 }, | |
144 | { "Mac Pirard", CS_MAC_PIRARD, 0 }, | |
145 | { "Mac-Pirard", CS_MAC_PIRARD, 0 }, | |
146 | { "MacPirard", CS_MAC_PIRARD, 0 }, | |
147 | { "VISCII", CS_VISCII, 1 }, | |
148 | { "HP ROMAN8", CS_HP_ROMAN8, 1 }, | |
149 | { "HP-ROMAN8", CS_HP_ROMAN8, 0 }, | |
150 | { "DEC MCS", CS_DEC_MCS, 1 }, | |
151 | { "DEC-MCS", CS_DEC_MCS, 1 }, | |
152 | { "DEC graphics", CS_DEC_GRAPHICS, 1 }, | |
153 | { "DEC-graphics", CS_DEC_GRAPHICS, 0 }, | |
154 | { "DECgraphics", CS_DEC_GRAPHICS, 0 }, | |
155 | { "UTF-8", CS_UTF8, 1 }, | |
156 | { "UTF-7", CS_UTF7, 1 }, | |
157 | { "UTF-7-conservative", CS_UTF7_CONSERVATIVE, 0 }, | |
158 | { "EUC-CN", CS_EUC_CN, 1 }, | |
159 | { "EUC-KR", CS_EUC_KR, 1 }, | |
160 | { "EUC-JP", CS_EUC_JP, 1 }, | |
161 | { "EUC-TW", CS_EUC_TW, 1 }, | |
162 | { "ISO-2022-JP", CS_ISO2022_JP, 1 }, | |
163 | { "ISO-2022-KR", CS_ISO2022_KR, 1 }, | |
164 | { "Big5", CS_BIG5, 1 }, | |
165 | { "Shift-JIS", CS_SHIFT_JIS, 1 }, | |
166 | { "HZ", CS_HZ, 1 }, | |
167 | { "UTF-16BE", CS_UTF16BE, 1 }, | |
168 | { "UTF-16LE", CS_UTF16LE, 1 }, | |
169 | { "UTF-16", CS_UTF16, 1 }, | |
170 | { "CP949", CS_CP949, 1 }, | |
171 | { "PDFDocEncoding", CS_PDF, 1 }, | |
172 | { "StandardEncoding", CS_PSSTD, 1 }, | |
173 | { "COMPOUND_TEXT", CS_CTEXT, 1 }, | |
174 | { "COMPOUND-TEXT", CS_CTEXT, 0 }, | |
175 | { "COMPOUND TEXT", CS_CTEXT, 0 }, | |
176 | { "COMPOUNDTEXT", CS_CTEXT, 0 }, | |
177 | { "CTEXT", CS_CTEXT, 0 }, | |
178 | { "ISO-2022", CS_ISO2022, 1 }, | |
179 | { "ISO2022", CS_ISO2022, 0 }, | |
28 | { "<UNKNOWN>", CS_NONE, false }, | |
29 | { "ASCII", CS_ASCII, true }, | |
30 | { "BS 4730", CS_BS4730, true }, | |
31 | { "BS-4730", CS_BS4730, false }, | |
32 | { "BS4730", CS_BS4730, false }, | |
33 | { "ISO-8859-1", CS_ISO8859_1, true }, | |
34 | { "ISO-8859-1 with X11 line drawing", CS_ISO8859_1_X11, false }, | |
35 | { "ISO-8859-1-X11", CS_ISO8859_1_X11, false }, | |
36 | { "ISO8859-1-X11", CS_ISO8859_1_X11, false }, | |
37 | { "ISO-8859-2", CS_ISO8859_2, true }, | |
38 | { "ISO-8859-3", CS_ISO8859_3, true }, | |
39 | { "ISO-8859-4", CS_ISO8859_4, true }, | |
40 | { "ISO-8859-5", CS_ISO8859_5, true }, | |
41 | { "ISO-8859-6", CS_ISO8859_6, true }, | |
42 | { "ISO-8859-7", CS_ISO8859_7, true }, | |
43 | { "ISO-8859-8", CS_ISO8859_8, true }, | |
44 | { "ISO-8859-9", CS_ISO8859_9, true }, | |
45 | { "ISO-8859-10", CS_ISO8859_10, true }, | |
46 | { "ISO-8859-11", CS_ISO8859_11, true }, | |
47 | { "ISO-8859-13", CS_ISO8859_13, true }, | |
48 | { "ISO-8859-14", CS_ISO8859_14, true }, | |
49 | { "ISO-8859-15", CS_ISO8859_15, true }, | |
50 | { "ISO-8859-16", CS_ISO8859_16, true }, | |
51 | { "CP437", CS_CP437, true }, | |
52 | { "CP850", CS_CP850, true }, | |
53 | { "CP852", CS_CP852, true }, | |
54 | { "CP866", CS_CP866, true }, | |
55 | { "CP874", CS_CP874, true }, | |
56 | { "Win874", CS_CP874, false }, | |
57 | { "Win-874", CS_CP874, false }, | |
58 | { "CP1250", CS_CP1250, true }, | |
59 | { "Win1250", CS_CP1250, false }, | |
60 | { "CP1251", CS_CP1251, true }, | |
61 | { "Win1251", CS_CP1251, false }, | |
62 | { "CP1252", CS_CP1252, true }, | |
63 | { "Win1252", CS_CP1252, false }, | |
64 | { "CP1253", CS_CP1253, true }, | |
65 | { "Win1253", CS_CP1253, false }, | |
66 | { "CP1254", CS_CP1254, true }, | |
67 | { "Win1254", CS_CP1254, false }, | |
68 | { "CP1255", CS_CP1255, true }, | |
69 | { "Win1255", CS_CP1255, false }, | |
70 | { "CP1256", CS_CP1256, true }, | |
71 | { "Win1256", CS_CP1256, false }, | |
72 | { "CP1257", CS_CP1257, true }, | |
73 | { "Win1257", CS_CP1257, false }, | |
74 | { "CP1258", CS_CP1258, true }, | |
75 | { "Win1258", CS_CP1258, false }, | |
76 | { "KOI8-R", CS_KOI8_R, true }, | |
77 | { "KOI8R", CS_KOI8_R, false }, | |
78 | { "KOI8-U", CS_KOI8_U, true }, | |
79 | { "KOI8U", CS_KOI8_U, false }, | |
80 | { "KOI8-RU", CS_KOI8_RU, true }, | |
81 | { "KOI8RU", CS_KOI8_RU, false }, | |
82 | { "JIS X 0201", CS_JISX0201, true }, | |
83 | { "JIS-X-0201", CS_JISX0201, false }, | |
84 | { "JIS_X_0201", CS_JISX0201, false }, | |
85 | { "JISX0201", CS_JISX0201, false }, | |
86 | { "Mac Roman", CS_MAC_ROMAN, true }, | |
87 | { "Mac-Roman", CS_MAC_ROMAN, false }, | |
88 | { "MacRoman", CS_MAC_ROMAN, false }, | |
89 | { "Mac Turkish", CS_MAC_TURKISH, true }, | |
90 | { "Mac-Turkish", CS_MAC_TURKISH, false }, | |
91 | { "MacTurkish", CS_MAC_TURKISH, false }, | |
92 | { "Mac Croatian", CS_MAC_CROATIAN, true }, | |
93 | { "Mac-Croatian", CS_MAC_CROATIAN, false }, | |
94 | { "MacCroatian", CS_MAC_CROATIAN, false }, | |
95 | { "Mac Iceland", CS_MAC_ICELAND, true }, | |
96 | { "Mac-Iceland", CS_MAC_ICELAND, false }, | |
97 | { "MacIceland", CS_MAC_ICELAND, false }, | |
98 | { "Mac Romanian", CS_MAC_ROMANIAN, true }, | |
99 | { "Mac-Romanian", CS_MAC_ROMANIAN, false }, | |
100 | { "MacRomanian", CS_MAC_ROMANIAN, false }, | |
101 | { "Mac Greek", CS_MAC_GREEK, true }, | |
102 | { "Mac-Greek", CS_MAC_GREEK, false }, | |
103 | { "MacGreek", CS_MAC_GREEK, false }, | |
104 | { "Mac Cyrillic", CS_MAC_CYRILLIC, true }, | |
105 | { "Mac-Cyrillic", CS_MAC_CYRILLIC, false }, | |
106 | { "MacCyrillic", CS_MAC_CYRILLIC, false }, | |
107 | { "Mac Thai", CS_MAC_THAI, true }, | |
108 | { "Mac-Thai", CS_MAC_THAI, false }, | |
109 | { "MacThai", CS_MAC_THAI, false }, | |
110 | { "Mac Centeuro", CS_MAC_CENTEURO, true }, | |
111 | { "Mac-Centeuro", CS_MAC_CENTEURO, false }, | |
112 | { "MacCenteuro", CS_MAC_CENTEURO, false }, | |
113 | { "Mac Symbol", CS_MAC_SYMBOL, true }, | |
114 | { "Mac-Symbol", CS_MAC_SYMBOL, false }, | |
115 | { "MacSymbol", CS_MAC_SYMBOL, false }, | |
116 | { "Mac Dingbats", CS_MAC_DINGBATS, true }, | |
117 | { "Mac-Dingbats", CS_MAC_DINGBATS, false }, | |
118 | { "MacDingbats", CS_MAC_DINGBATS, false }, | |
119 | { "Mac Roman (old)", CS_MAC_ROMAN_OLD, false }, | |
120 | { "Mac-Roman-old", CS_MAC_ROMAN_OLD, false }, | |
121 | { "MacRoman-old", CS_MAC_ROMAN_OLD, false }, | |
122 | { "Mac Croatian (old)", CS_MAC_CROATIAN_OLD, false }, | |
123 | { "Mac-Croatian-old", CS_MAC_CROATIAN_OLD, false }, | |
124 | { "MacCroatian-old", CS_MAC_CROATIAN_OLD, false }, | |
125 | { "Mac Iceland (old)", CS_MAC_ICELAND_OLD, false }, | |
126 | { "Mac-Iceland-old", CS_MAC_ICELAND_OLD, false }, | |
127 | { "MacIceland-old", CS_MAC_ICELAND_OLD, false }, | |
128 | { "Mac Romanian (old)", CS_MAC_ROMANIAN_OLD, false }, | |
129 | { "Mac-Romanian-old", CS_MAC_ROMANIAN_OLD, false }, | |
130 | { "MacRomanian-old", CS_MAC_ROMANIAN_OLD, false }, | |
131 | { "Mac Greek (old)", CS_MAC_GREEK_OLD, false }, | |
132 | { "Mac-Greek-old", CS_MAC_GREEK_OLD, false }, | |
133 | { "MacGreek-old", CS_MAC_GREEK_OLD, false }, | |
134 | { "Mac Cyrillic (old)", CS_MAC_CYRILLIC_OLD, false }, | |
135 | { "Mac-Cyrillic-old", CS_MAC_CYRILLIC_OLD, false }, | |
136 | { "MacCyrillic-old", CS_MAC_CYRILLIC_OLD, false }, | |
137 | { "Mac Ukraine", CS_MAC_UKRAINE, true }, | |
138 | { "Mac-Ukraine", CS_MAC_UKRAINE, false }, | |
139 | { "MacUkraine", CS_MAC_UKRAINE, false }, | |
140 | { "Mac VT100", CS_MAC_VT100, true }, | |
141 | { "Mac-VT100", CS_MAC_VT100, false }, | |
142 | { "MacVT100", CS_MAC_VT100, false }, | |
143 | { "Mac VT100 (old)", CS_MAC_VT100_OLD, false }, | |
144 | { "Mac-VT100-old", CS_MAC_VT100_OLD, false }, | |
145 | { "MacVT100-old", CS_MAC_VT100_OLD, false }, | |
146 | { "Mac Roman (Pirard encoding)", CS_MAC_PIRARD, false }, | |
147 | { "Mac Pirard", CS_MAC_PIRARD, false }, | |
148 | { "Mac-Pirard", CS_MAC_PIRARD, false }, | |
149 | { "MacPirard", CS_MAC_PIRARD, false }, | |
150 | { "VISCII", CS_VISCII, true }, | |
151 | { "HP ROMAN8", CS_HP_ROMAN8, true }, | |
152 | { "HP-ROMAN8", CS_HP_ROMAN8, false }, | |
153 | { "DEC MCS", CS_DEC_MCS, true }, | |
154 | { "DEC-MCS", CS_DEC_MCS, true }, | |
155 | { "DEC graphics", CS_DEC_GRAPHICS, true }, | |
156 | { "DEC-graphics", CS_DEC_GRAPHICS, false }, | |
157 | { "DECgraphics", CS_DEC_GRAPHICS, false }, | |
158 | { "UTF-8", CS_UTF8, true }, | |
159 | { "UTF8", CS_UTF8, false }, | |
160 | { "UTF-7", CS_UTF7, true }, | |
161 | { "UTF7", CS_UTF7, false }, | |
162 | { "UTF-7-conservative", CS_UTF7_CONSERVATIVE, false }, | |
163 | { "EUC-CN", CS_EUC_CN, true }, | |
164 | { "EUC-KR", CS_EUC_KR, true }, | |
165 | { "EUC-JP", CS_EUC_JP, true }, | |
166 | { "EUC-TW", CS_EUC_TW, true }, | |
167 | { "ISO-2022-JP", CS_ISO2022_JP, true }, | |
168 | { "ISO-2022-KR", CS_ISO2022_KR, true }, | |
169 | { "Big5", CS_BIG5, true }, | |
170 | { "Shift-JIS", CS_SHIFT_JIS, true }, | |
171 | { "HZ", CS_HZ, true }, | |
172 | { "UTF-16BE", CS_UTF16BE, true }, | |
173 | { "UTF16BE", CS_UTF16BE, false }, | |
174 | { "UTF-16LE", CS_UTF16LE, true }, | |
175 | { "UTF16LE", CS_UTF16LE, false }, | |
176 | { "UTF-16BE-NO-BOM", CS_UTF16BE_NO_BOM, true }, | |
177 | { "UTF-16BE-NOBOM", CS_UTF16BE_NO_BOM, false }, | |
178 | { "UTF16BENOBOM", CS_UTF16BE_NO_BOM, false }, | |
179 | { "UTF-16LE-NO-BOM", CS_UTF16LE_NO_BOM, true }, | |
180 | { "UTF-16LE-NOBOM", CS_UTF16LE_NO_BOM, false }, | |
181 | { "UTF16LENOBOM", CS_UTF16LE_NO_BOM, false }, | |
182 | { "UTF-16", CS_UTF16, true }, | |
183 | { "UTF16", CS_UTF16, false }, | |
184 | { "CP949", CS_CP949, true }, | |
185 | { "PDFDocEncoding", CS_PDF, true }, | |
186 | { "StandardEncoding", CS_PSSTD, true }, | |
187 | { "COMPOUND_TEXT", CS_CTEXT, true }, | |
188 | { "COMPOUND-TEXT", CS_CTEXT, false }, | |
189 | { "COMPOUND TEXT", CS_CTEXT, false }, | |
190 | { "COMPOUNDTEXT", CS_CTEXT, false }, | |
191 | { "CTEXT", CS_CTEXT, false }, | |
192 | { "ISO-2022", CS_ISO2022, true }, | |
193 | { "ISO2022", CS_ISO2022, false }, | |
194 | { "ISO-6937", CS_ISO6937, true }, | |
195 | { "ISO6937", CS_ISO6937, false }, | |
196 | { "ISO-6937 with euro sign", CS_ISO6937_EURO, true }, | |
197 | { "ISO-6937-euro", CS_ISO6937_EURO, false }, | |
198 | { "ISO6937-euro", CS_ISO6937_EURO, false }, | |
199 | { "ITS", CS_ITS, true }, | |
200 | { "SAIL", CS_SAIL, true }, | |
201 | { "WAITS", CS_SAIL, false }, | |
180 | 202 | }; |
181 | 203 | |
182 | 204 | const char *charset_to_localenc(int charset) |
50 | 50 | return ERROR; |
51 | 51 | } |
52 | 52 | |
53 | int write_sbcs(charset_spec const *charset, long int input_chr, | |
54 | charset_state *state, | |
55 | void (*emit)(void *ctx, long int output), void *emitctx) | |
53 | bool write_sbcs(charset_spec const *charset, long int input_chr, | |
54 | charset_state *state, | |
55 | void (*emit)(void *ctx, long int output), void *emitctx) | |
56 | 56 | { |
57 | 57 | const struct sbcs_data *sd = charset->data; |
58 | 58 | long int ret; |
60 | 60 | UNUSEDARG(state); |
61 | 61 | |
62 | 62 | if (input_chr == -1) |
63 | return TRUE; /* stateless; no cleanup required */ | |
63 | return true; /* stateless; no cleanup required */ | |
64 | 64 | |
65 | 65 | ret = sbcs_from_unicode(sd, input_chr); |
66 | 66 | if (ret == ERROR) |
67 | return FALSE; | |
67 | return false; | |
68 | 68 | |
69 | 69 | emit(emitctx, ret); |
70 | return TRUE; | |
70 | return true; | |
71 | 71 | } |
1487 | 1487 | 2014 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX |
1488 | 1488 | XXXX 00C6 XXXX 00AA XXXX XXXX XXXX XXXX 0141 00D8 0152 00BA XXXX XXXX XXXX XXXX |
1489 | 1489 | XXXX 00E6 XXXX XXXX XXXX 0131 XXXX XXXX 0142 00F8 0153 00DF XXXX XXXX XXXX XXXX |
1490 | ||
1491 | ISO/IEC 6937. Or rather, this version is extended to add the usual | |
1492 | C0/C1 controls in the non-printing positions below 0xA0, on the | |
1493 | expectation that text encoded in this character set may still need | |
1494 | interleaving with the usual amenities such as newlines and terminal | |
1495 | escapes. | |
1496 | ||
1497 | Source: https://en.wikipedia.org/wiki/ISO/IEC_6937 and manual | |
1498 | transcription. | |
1499 | ||
1500 | ISO6937_EURO is a tiny modification to ISO/IEC 6937, used in DVB | |
1501 | (digital broadcast TV) in the metadata and EPG streams. Defined by | |
1502 | Annex A of the DVB standards document EN 300 468, it differs from | |
1503 | ordinary ISO6937 only in the addition of the euro sign in the unused | |
1504 | location 0xA4. | |
1505 | ||
1506 | Only the translation tables for the single-byte encodings are stored | |
1507 | here. The rest of the implementation of this pair of charsets is in | |
1508 | custom code, and lives in iso6937.c. | |
1509 | ||
1510 | tables ISO6937 | |
1511 | 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F | |
1512 | 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F | |
1513 | 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F | |
1514 | 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F | |
1515 | 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F | |
1516 | 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F | |
1517 | 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F | |
1518 | 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F | |
1519 | 0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F | |
1520 | 0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F | |
1521 | 00A0 00A1 00A2 00A3 XXXX 00A5 XXXX 00A7 00A4 2018 201C 00AB 2190 2191 2192 2193 | |
1522 | 00B0 00B1 00B2 00B3 00D7 00B5 00B6 00B7 00F7 2019 201D 00BB 00BC 00BD 00BE 00BF | |
1523 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1524 | 2015 00B9 00AE 00A9 2122 266A 00AC 00A6 XXXX XXXX XXXX XXXX 215B 215C 215D 215E | |
1525 | 2126 00C6 0110 00AA 0126 XXXX 0132 013F 0141 00D8 0152 00BA 00DE 0166 014A 0149 | |
1526 | 0138 00E6 0111 00F0 0127 0131 0133 0140 0142 00F8 0153 00DF 00FE 0167 014B 00AD | |
1527 | ||
1528 | tables ISO6937_EURO | |
1529 | 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F | |
1530 | 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F | |
1531 | 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F | |
1532 | 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F | |
1533 | 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F | |
1534 | 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F | |
1535 | 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F | |
1536 | 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F | |
1537 | 0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F | |
1538 | 0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F | |
1539 | 00A0 00A1 00A2 00A3 20AC 00A5 XXXX 00A7 00A4 2018 201C 00AB 2190 2191 2192 2193 | |
1540 | 00B0 00B1 00B2 00B3 00D7 00B5 00B6 00B7 00F7 2019 201D 00BB 00BC 00BD 00BE 00BF | |
1541 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1542 | 2015 00B9 00AE 00A9 2122 266A 00AC 00A6 XXXX XXXX XXXX XXXX 215B 215C 215D 215E | |
1543 | 2126 00C6 0110 00AA 0126 XXXX 0132 013F 0141 00D8 0152 00BA 00DE 0166 014A 0149 | |
1544 | 0138 00E6 0111 00F0 0127 0131 0133 0140 0142 00F8 0153 00DF 00FE 0167 014B 00AD | |
1545 | ||
1546 | The ITS character set, standardised in the SUPDUP protocol (RFC | |
1547 | 734). Fills in the whole C0 control space and 7F with graphic | |
1548 | characters, on the basis that SUPDUP has its own out-of-band way to | |
1549 | do terminal control. | |
1550 | ||
1551 | RFC 734 doesn't give exact Unicode code points for its extra | |
1552 | characters (it couldn't have done so without time travel, of | |
1553 | course!). I've tried to choose the best representation in each case. | |
1554 | In particular, I've chosen position 0x16 to be U+2297 CIRCLED TIMES | |
1555 | rather than U+24E7 CIRCLED LATIN SMALL LETTER X. The RFC says | |
1556 | 'circle-X', but since it also has 'circle-plus' at position 0x0D | |
1557 | (which is unambiguously U+2295 CIRCLED PLUS), my feeling is that the | |
1558 | notation in the RFC was not intended to be especially precise, and | |
1559 | the likely uses of the two characters match, i.e. both are intended | |
1560 | to be mathematical rather than literal. | |
1561 | ||
1562 | charset CS_ITS | |
1563 | 00B7 2193 03B1 03B2 2227 00AC 03B5 03C0 03BB 03B3 03B4 2191 00B1 2295 221E 2202 | |
1564 | 2282 2283 2229 222A 2200 2203 2297 2194 2190 2192 2260 25CA 2264 2265 2261 2228 | |
1565 | 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f | |
1566 | 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f | |
1567 | 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f | |
1568 | 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 005e 005f | |
1569 | 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f | |
1570 | 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 007d 007e 222b | |
1571 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1572 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1573 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1574 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1575 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1576 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1577 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1578 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1579 | ||
1580 | The SAIL character set, used in the Stanford AI Lab's WAITS | |
1581 | operating system (a modified version of ITS). | |
1582 | ||
1583 | Source: https://www.saildart.org/allow/sail-charset-utf8.html plus | |
1584 | some manual editing. The web page gives translations that supersede | |
1585 | ASCII for 00-1F, 5E, 5F, 7B-7F. The rest I've filled in with their | |
1586 | normal ASCII values, so in particular, CR, LF and tab still act as | |
1587 | expected. | |
1588 | ||
1589 | Also, that web page lists code point 0C as "form feed as FF symbol", | |
1590 | but for some reason doesn't give the Unicode value U+240C for that. | |
1591 | I've filled it in. | |
1592 | ||
1593 | charset CS_SAIL | |
1594 | 0000 2193 03b1 03b2 2227 00ac 03b5 03c0 03bb 0009 000a 000b 240c 000d 221e 2202 | |
1595 | 2282 2283 2229 222a 2200 2203 2297 2194 005f 2192 007e 2260 2264 2265 2261 2228 | |
1596 | 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f | |
1597 | 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f | |
1598 | 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f | |
1599 | 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 2191 2190 | |
1600 | 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f | |
1601 | 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 2387 007d 2408 | |
1602 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1603 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1604 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1605 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1606 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1607 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1608 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX | |
1609 | XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX |
64 | 64 | while (<INFH>) { |
65 | 65 | chomp; |
66 | 66 | y/\r\n//; # robustness in the face of strange line endings |
67 | if (/^charset (.*)$/) { | |
68 | $charsetname = $1; | |
67 | if (/^(charset|tables) (.*)$/) { | |
68 | $tables_only = ($1 eq "tables"); | |
69 | $charsetname = $2; | |
69 | 70 | @vals = (); |
70 | 71 | @sortpriority = map { 0 } 0..255; |
71 | 72 | } elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) { |
77 | 78 | if (scalar @vals > 256) { |
78 | 79 | die "$infile:$.: charset $charsetname has more than 256 values\n"; |
79 | 80 | } elsif (scalar @vals == 256) { |
80 | &outcharset($charsetname, \@vals, \@sortpriority) | |
81 | &outcharset($charsetname, \@vals, \@sortpriority, $tables_only) | |
81 | 82 | if defined $outfile; |
82 | push @charsetnames, $charsetname; | |
83 | push @charsetnames, $charsetname unless $tables_only; | |
83 | 84 | $charsetname = undef; |
84 | 85 | @vals = (); |
85 | 86 | @sortpriority = map { 0 } 0..255; |
127 | 128 | close HEADERFH; |
128 | 129 | } |
129 | 130 | |
130 | sub outcharset($$$) { | |
131 | my ($name, $vals, $sortpriority) = @_; | |
131 | sub outcharset($$$$) { | |
132 | my ($name, $vals, $sortpriority, $tables_only) = @_; | |
132 | 133 | my ($prefix, $i, @sorted); |
133 | 134 | |
134 | 135 | print "const sbcs_data sbcsdata_$name = {\n"; |
169 | 170 | } |
170 | 171 | printf "\n },\n %d\n", $j; |
171 | 172 | print "};\n"; |
172 | print "const charset_spec charset_$name = {\n" . | |
173 | " $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n"; | |
173 | unless ($tables_only) { | |
174 | print "const charset_spec charset_$name = {\n" . | |
175 | " $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n"; | |
176 | } | |
174 | 177 | } |
76 | 76 | * charset_state. |
77 | 77 | */ |
78 | 78 | |
79 | static int write_sjis(charset_spec const *charset, long int input_chr, | |
80 | charset_state *state, | |
81 | void (*emit)(void *ctx, long int output), void *emitctx) | |
79 | static bool write_sjis(charset_spec const *charset, long int input_chr, | |
80 | charset_state *state, | |
81 | void (*emit)(void *ctx, long int output), void *emitctx) | |
82 | 82 | { |
83 | 83 | UNUSEDARG(charset); |
84 | 84 | UNUSEDARG(state); |
85 | 85 | |
86 | 86 | if (input_chr == -1) |
87 | return TRUE; /* stateless; no cleanup required */ | |
87 | return true; /* stateless; no cleanup required */ | |
88 | 88 | |
89 | 89 | if (input_chr < 0x80 && input_chr != 0x5C && input_chr != 0x7E) { |
90 | 90 | emit(emitctx, input_chr); |
91 | return TRUE; | |
91 | return true; | |
92 | 92 | } else if (input_chr == 0xA5) { |
93 | 93 | emit(emitctx, 0x5C); |
94 | return TRUE; | |
94 | return true; | |
95 | 95 | } else if (input_chr == 0x203E) { |
96 | 96 | emit(emitctx, 0x7E); |
97 | return TRUE; | |
97 | return true; | |
98 | 98 | } else if (input_chr >= 0xFF61 && input_chr <= 0xFF9F) { |
99 | 99 | emit(emitctx, input_chr - (0xFF61 - 0xA1)); |
100 | return TRUE; | |
100 | return true; | |
101 | 101 | } else { |
102 | 102 | int r, c; |
103 | 103 | if (unicode_to_jisx0208(input_chr, &r, &c)) { |
109 | 109 | if (c >= 0x7F) c++; |
110 | 110 | emit(emitctx, r); |
111 | 111 | emit(emitctx, c); |
112 | return TRUE; | |
112 | return true; | |
113 | 113 | } else { |
114 | return FALSE; | |
114 | return false; | |
115 | 115 | } |
116 | 116 | } |
117 | 117 | } |
5 | 5 | #include "internal.h" |
6 | 6 | |
7 | 7 | #define ENUM_CHARSET(x) extern charset_spec const charset_##x; |
8 | #include "enum.c" | |
8 | #include "enum.h" | |
9 | 9 | #undef ENUM_CHARSET |
10 | 10 | |
11 | 11 | static charset_spec const *const cs_table[] = { |
12 | 12 | |
13 | 13 | #define ENUM_CHARSET(x) &charset_##x, |
14 | #include "enum.c" | |
14 | #include "enum.h" | |
15 | 15 | #undef ENUM_CHARSET |
16 | 16 | |
17 | 17 | }; |
27 | 27 | return NULL; |
28 | 28 | } |
29 | 29 | |
30 | int charset_exists(int charset) | |
30 | bool charset_exists(int charset) | |
31 | 31 | { |
32 | 32 | return charset_find_spec(charset) != NULL; |
33 | 33 | } |
34 | 34 | |
35 | int charset_is_single_byte(int charset) | |
35 | bool charset_is_single_byte(int charset) | |
36 | 36 | { |
37 | 37 | charset_spec const *spec = charset_find_spec(charset); |
38 | 38 | return spec && spec->read == read_sbcs; |
52 | 52 | } |
53 | 53 | |
54 | 54 | /* |
55 | * This function returns TRUE if the input charset is a vaguely | |
56 | * sensible superset of ASCII. That is, it returns FALSE for 7-bit | |
55 | * This function returns true if the input charset is a vaguely | |
56 | * sensible superset of ASCII. That is, it returns false for 7-bit | |
57 | 57 | * encoding formats such as HZ and UTF-7. |
58 | 58 | */ |
59 | int charset_contains_ascii(int charset) | |
59 | bool charset_contains_ascii(int charset) | |
60 | 60 | { |
61 | 61 | return (charset != CS_HZ && |
62 | 62 | charset != CS_UTF7 && |
10 | 10 | int writtenlen; |
11 | 11 | const wchar_t *errstr; |
12 | 12 | int errlen; |
13 | int stopped; | |
13 | bool stopped; | |
14 | 14 | }; |
15 | 15 | |
16 | 16 | static void unicode_emit(void *ctx, long int output) |
45 | 45 | param->writtenlen++; |
46 | 46 | } |
47 | 47 | } else { |
48 | param->stopped = 1; | |
48 | param->stopped = true; | |
49 | 49 | } |
50 | 50 | } |
51 | 51 | |
63 | 63 | param.errstr = errstr; |
64 | 64 | param.errlen = errlen; |
65 | 65 | param.writtenlen = 0; |
66 | param.stopped = 0; | |
66 | param.stopped = false; | |
67 | 67 | |
68 | 68 | if (state) |
69 | 69 | localstate = *state; /* structure copy */ |
8 | 8 | |
9 | 9 | struct utf16 { |
10 | 10 | int s0; /* initial value of state->s0 */ |
11 | bool output_bom; | |
11 | 12 | }; |
12 | 13 | |
13 | 14 | static void read_utf16(charset_spec const *charset, long int input_chr, |
153 | 154 | } |
154 | 155 | } |
155 | 156 | |
156 | static int write_utf16(charset_spec const *charset, long int input_chr, | |
157 | charset_state *state, | |
158 | void (*emit)(void *ctx, long int output), | |
159 | void *emitctx) | |
157 | static bool write_utf16(charset_spec const *charset, long int input_chr, | |
158 | charset_state *state, | |
159 | void (*emit)(void *ctx, long int output), | |
160 | void *emitctx) | |
160 | 161 | { |
161 | 162 | struct utf16 const *utf = (struct utf16 *)charset->data; |
162 | 163 | |
167 | 168 | */ |
168 | 169 | |
169 | 170 | if (input_chr < 0) |
170 | return TRUE; /* no cleanup required */ | |
171 | return true; /* no cleanup required */ | |
171 | 172 | |
172 | 173 | if ((input_chr >= 0xD800 && input_chr < 0xE000) || |
173 | 174 | input_chr >= 0x110000) { |
174 | 175 | /* |
175 | 176 | * We can't output surrogates, or anything above 0x10FFFF. |
176 | 177 | */ |
177 | return FALSE; | |
178 | return false; | |
178 | 179 | } |
179 | 180 | |
180 | 181 | if (!state->s0) { |
181 | 182 | state->s0 = 1; |
182 | emithl(emit, emitctx, utf->s0, 0xFEFF); | |
183 | if (utf->output_bom) | |
184 | emithl(emit, emitctx, utf->s0, 0xFEFF); | |
183 | 185 | } |
184 | 186 | |
185 | 187 | if (input_chr < 0x10000) { |
190 | 192 | emithl(emit, emitctx, utf->s0, 0xD800 | ((input_chr >> 10) & 0x3FF)); |
191 | 193 | emithl(emit, emitctx, utf->s0, 0xDC00 | (input_chr & 0x3FF)); |
192 | 194 | } |
193 | return TRUE; | |
195 | return true; | |
194 | 196 | } |
195 | 197 | |
196 | static const struct utf16 utf16_bigendian = { 0x20000 }; | |
197 | static const struct utf16 utf16_littleendian = { 0x10000 }; | |
198 | static const struct utf16 utf16_variable_endianness = { 0x30000 }; | |
198 | static const struct utf16 utf16_bigendian = { 0x20000, true }; | |
199 | static const struct utf16 utf16_littleendian = { 0x10000, true }; | |
200 | static const struct utf16 utf16_bigendian_no_bom = { 0x20000, false }; | |
201 | static const struct utf16 utf16_littleendian_no_bom = { 0x10000, false }; | |
202 | static const struct utf16 utf16_variable_endianness = { 0x30000, true }; | |
199 | 203 | |
200 | 204 | const charset_spec charset_CS_UTF16BE = { |
201 | 205 | CS_UTF16BE, read_utf16, write_utf16, &utf16_bigendian |
202 | 206 | }; |
203 | 207 | const charset_spec charset_CS_UTF16LE = { |
204 | 208 | CS_UTF16LE, read_utf16, write_utf16, &utf16_littleendian |
209 | }; | |
210 | const charset_spec charset_CS_UTF16BE_NO_BOM = { | |
211 | CS_UTF16BE_NO_BOM, read_utf16, write_utf16, &utf16_bigendian_no_bom | |
212 | }; | |
213 | const charset_spec charset_CS_UTF16LE_NO_BOM = { | |
214 | CS_UTF16LE_NO_BOM, read_utf16, write_utf16, &utf16_littleendian_no_bom | |
205 | 215 | }; |
206 | 216 | const charset_spec charset_CS_UTF16 = { |
207 | 217 | CS_UTF16, read_utf16, write_utf16, &utf16_variable_endianness |
212 | 222 | ENUM_CHARSET(CS_UTF16) |
213 | 223 | ENUM_CHARSET(CS_UTF16BE) |
214 | 224 | ENUM_CHARSET(CS_UTF16LE) |
225 | ENUM_CHARSET(CS_UTF16BE_NO_BOM) | |
226 | ENUM_CHARSET(CS_UTF16LE_NO_BOM) | |
215 | 227 | |
216 | 228 | #endif /* ENUM_CHARSETS */ |
164 | 164 | * which will directly encode Set O characters and the other of |
165 | 165 | * which will cautiously base64 them. |
166 | 166 | */ |
167 | static int write_utf7(charset_spec const *charset, long int input_chr, | |
168 | charset_state *state, | |
169 | void (*emit)(void *ctx, long int output), | |
170 | void *emitctx) | |
167 | static bool write_utf7(charset_spec const *charset, long int input_chr, | |
168 | charset_state *state, | |
169 | void (*emit)(void *ctx, long int output), | |
170 | void *emitctx) | |
171 | 171 | { |
172 | 172 | unsigned long hws[2]; |
173 | 173 | int nhws; |
184 | 184 | /* |
185 | 185 | * We can't output surrogates, or anything above 0x10FFFF. |
186 | 186 | */ |
187 | return FALSE; | |
187 | return false; | |
188 | 188 | } |
189 | 189 | |
190 | 190 | /* |
222 | 222 | emit(emitctx, input_chr); |
223 | 223 | if (input_chr == '+') |
224 | 224 | emit(emitctx, '-'); /* +- encodes + */ |
225 | return TRUE; | |
225 | return true; | |
226 | 226 | } |
227 | 227 | |
228 | 228 | /* |
237 | 237 | input_chr -= 0x10000; |
238 | 238 | if (input_chr >= 0x100000) { |
239 | 239 | /* Anything above 0x10FFFF is outside UTF-7 range. */ |
240 | return FALSE; | |
240 | return false; | |
241 | 241 | } |
242 | 242 | |
243 | 243 | nhws = 2; |
275 | 275 | emit(emitctx, base64_chars[out]); |
276 | 276 | } |
277 | 277 | } |
278 | return TRUE; | |
278 | return true; | |
279 | 279 | } |
280 | 280 | |
281 | 281 | const charset_spec charset_CS_UTF7 = { |
198 | 198 | * charset_state. |
199 | 199 | */ |
200 | 200 | |
201 | int write_utf8(charset_spec const *charset, long int input_chr, | |
202 | charset_state *state, | |
203 | void (*emit)(void *ctx, long int output), | |
204 | void *emitctx) | |
201 | bool write_utf8(charset_spec const *charset, long int input_chr, | |
202 | charset_state *state, | |
203 | void (*emit)(void *ctx, long int output), | |
204 | void *emitctx) | |
205 | 205 | { |
206 | 206 | UNUSEDARG(charset); |
207 | 207 | UNUSEDARG(state); |
208 | 208 | |
209 | 209 | if (input_chr == -1) |
210 | return TRUE; /* stateless; no cleanup required */ | |
210 | return true; /* stateless; no cleanup required */ | |
211 | 211 | |
212 | 212 | /* |
213 | 213 | * Refuse to output any illegal code points. |
214 | 214 | */ |
215 | 215 | if (input_chr == 0xFFFE || input_chr == 0xFFFF || |
216 | 216 | (input_chr >= 0xD800 && input_chr < 0xE000)) { |
217 | return FALSE; | |
217 | return false; | |
218 | 218 | } else if (input_chr < 0x80) { /* one-byte character */ |
219 | 219 | emit(emitctx, input_chr); |
220 | return TRUE; | |
220 | return true; | |
221 | 221 | } else if (input_chr < 0x800) { /* two-byte character */ |
222 | 222 | emit(emitctx, 0xC0 | (0x1F & (input_chr >> 6))); |
223 | 223 | emit(emitctx, 0x80 | (0x3F & (input_chr ))); |
224 | return TRUE; | |
224 | return true; | |
225 | 225 | } else if (input_chr < 0x10000) { /* three-byte character */ |
226 | 226 | emit(emitctx, 0xE0 | (0x0F & (input_chr >> 12))); |
227 | 227 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 6))); |
228 | 228 | emit(emitctx, 0x80 | (0x3F & (input_chr ))); |
229 | return TRUE; | |
229 | return true; | |
230 | 230 | } else if (input_chr < 0x200000) { /* four-byte character */ |
231 | 231 | emit(emitctx, 0xF0 | (0x07 & (input_chr >> 18))); |
232 | 232 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 12))); |
233 | 233 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 6))); |
234 | 234 | emit(emitctx, 0x80 | (0x3F & (input_chr ))); |
235 | return TRUE; | |
235 | return true; | |
236 | 236 | } else if (input_chr < 0x4000000) {/* five-byte character */ |
237 | 237 | emit(emitctx, 0xF8 | (0x03 & (input_chr >> 24))); |
238 | 238 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 18))); |
239 | 239 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 12))); |
240 | 240 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 6))); |
241 | 241 | emit(emitctx, 0x80 | (0x3F & (input_chr ))); |
242 | return TRUE; | |
242 | return true; | |
243 | 243 | } else { /* six-byte character */ |
244 | 244 | emit(emitctx, 0xFC | (0x01 & (input_chr >> 30))); |
245 | 245 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 24))); |
247 | 247 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 12))); |
248 | 248 | emit(emitctx, 0x80 | (0x3F & (input_chr >> 6))); |
249 | 249 | emit(emitctx, 0x80 | (0x3F & (input_chr ))); |
250 | return TRUE; | |
250 | return true; | |
251 | 251 | } |
252 | 252 | } |
253 | 253 | |
289 | 289 | } |
290 | 290 | if (l != str[i]) { |
291 | 291 | printf("%d: char %d came out as %08x, should be %08x\n", |
292 | line, i, str[i], l); | |
292 | line, i, str[i], (unsigned)l); | |
293 | 293 | total_errs++; |
294 | 294 | } |
295 | 295 | } |
330 | 330 | } |
331 | 331 | if (l != str[i]) { |
332 | 332 | printf("%d: char %d came out as %08x, should be %08x\n", |
333 | line, i, str[i], l); | |
333 | line, i, str[i], (unsigned)l); | |
334 | 334 | total_errs++; |
335 | 335 | } |
336 | 336 | } |
351 | 351 | { |
352 | 352 | printf("read tests beginning\n"); |
353 | 353 | utf8_read_test(TESTSTR("\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"), |
354 | 0x000003BA, /* GREEK SMALL LETTER KAPPA */ | |
355 | 0x00001F79, /* GREEK SMALL LETTER OMICRON WITH OXIA */ | |
356 | 0x000003C3, /* GREEK SMALL LETTER SIGMA */ | |
357 | 0x000003BC, /* GREEK SMALL LETTER MU */ | |
358 | 0x000003B5, /* GREEK SMALL LETTER EPSILON */ | |
359 | 0, -1); | |
354 | 0x000003BAL, /* GREEK SMALL LETTER KAPPA */ | |
355 | 0x00001F79L, /* GREEK SMALL LETTER OMICRON WITH OXIA */ | |
356 | 0x000003C3L, /* GREEK SMALL LETTER SIGMA */ | |
357 | 0x000003BCL, /* GREEK SMALL LETTER MU */ | |
358 | 0x000003B5L, /* GREEK SMALL LETTER EPSILON */ | |
359 | 0L, -1L); | |
360 | 360 | utf8_read_test(TESTSTR("\x00"), |
361 | 0x00000000, /* <control> */ | |
362 | 0, -1); | |
361 | 0x00000000L, /* <control> */ | |
362 | 0L, -1L); | |
363 | 363 | utf8_read_test(TESTSTR("\xC2\x80"), |
364 | 0x00000080, /* <control> */ | |
365 | 0, -1); | |
364 | 0x00000080L, /* <control> */ | |
365 | 0L, -1L); | |
366 | 366 | utf8_read_test(TESTSTR("\xE0\xA0\x80"), |
367 | 0x00000800, /* <no name available> */ | |
368 | 0, -1); | |
367 | 0x00000800L, /* <no name available> */ | |
368 | 0L, -1L); | |
369 | 369 | utf8_read_test(TESTSTR("\xF0\x90\x80\x80"), |
370 | 0x00010000, /* <no name available> */ | |
371 | 0, -1); | |
370 | 0x00010000L, /* <no name available> */ | |
371 | 0L, -1L); | |
372 | 372 | utf8_read_test(TESTSTR("\xF8\x88\x80\x80\x80"), |
373 | 0x00200000, /* <no name available> */ | |
374 | 0, -1); | |
373 | 0x00200000L, /* <no name available> */ | |
374 | 0L, -1L); | |
375 | 375 | utf8_read_test(TESTSTR("\xFC\x84\x80\x80\x80\x80"), |
376 | 0x04000000, /* <no name available> */ | |
377 | 0, -1); | |
376 | 0x04000000L, /* <no name available> */ | |
377 | 0L, -1L); | |
378 | 378 | utf8_read_test(TESTSTR("\x7F"), |
379 | 0x0000007F, /* <control> */ | |
380 | 0, -1); | |
379 | 0x0000007FL, /* <control> */ | |
380 | 0L, -1L); | |
381 | 381 | utf8_read_test(TESTSTR("\xDF\xBF"), |
382 | 0x000007FF, /* <no name available> */ | |
383 | 0, -1); | |
382 | 0x000007FFL, /* <no name available> */ | |
383 | 0L, -1L); | |
384 | 384 | utf8_read_test(TESTSTR("\xEF\xBF\xBD"), |
385 | 0x0000FFFD, /* REPLACEMENT CHARACTER */ | |
386 | 0, -1); | |
385 | 0x0000FFFDL, /* REPLACEMENT CHARACTER */ | |
386 | 0L, -1L); | |
387 | 387 | utf8_read_test(TESTSTR("\xEF\xBF\xBF"), |
388 | 388 | ERROR, /* <no name available> (invalid char) */ |
389 | 0, -1); | |
389 | 0L, -1L); | |
390 | 390 | utf8_read_test(TESTSTR("\xF7\xBF\xBF\xBF"), |
391 | 0x001FFFFF, /* <no name available> */ | |
392 | 0, -1); | |
391 | 0x001FFFFFL, /* <no name available> */ | |
392 | 0L, -1L); | |
393 | 393 | utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF\xBF"), |
394 | 0x03FFFFFF, /* <no name available> */ | |
395 | 0, -1); | |
394 | 0x03FFFFFFL, /* <no name available> */ | |
395 | 0L, -1L); | |
396 | 396 | utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF\xBF"), |
397 | 0x7FFFFFFF, /* <no name available> */ | |
398 | 0, -1); | |
397 | 0x7FFFFFFFL, /* <no name available> */ | |
398 | 0L, -1L); | |
399 | 399 | utf8_read_test(TESTSTR("\xED\x9F\xBF"), |
400 | 0x0000D7FF, /* <no name available> */ | |
401 | 0, -1); | |
400 | 0x0000D7FFL, /* <no name available> */ | |
401 | 0L, -1L); | |
402 | 402 | utf8_read_test(TESTSTR("\xEE\x80\x80"), |
403 | 0x0000E000, /* <Private Use, First> */ | |
404 | 0, -1); | |
403 | 0x0000E000L, /* <Private Use, First> */ | |
404 | 0L, -1L); | |
405 | 405 | utf8_read_test(TESTSTR("\xEF\xBF\xBD"), |
406 | 0x0000FFFD, /* REPLACEMENT CHARACTER */ | |
407 | 0, -1); | |
406 | 0x0000FFFDL, /* REPLACEMENT CHARACTER */ | |
407 | 0L, -1L); | |
408 | 408 | utf8_read_test(TESTSTR("\xF4\x8F\xBF\xBF"), |
409 | 0x0010FFFF, /* <no name available> */ | |
410 | 0, -1); | |
409 | 0x0010FFFFL, /* <no name available> */ | |
410 | 0L, -1L); | |
411 | 411 | utf8_read_test(TESTSTR("\xF4\x90\x80\x80"), |
412 | 0x00110000, /* <no name available> */ | |
413 | 0, -1); | |
412 | 0x00110000L, /* <no name available> */ | |
413 | 0L, -1L); | |
414 | 414 | utf8_read_test(TESTSTR("\x80"), |
415 | 415 | ERROR, /* (unexpected continuation byte) */ |
416 | 0, -1); | |
416 | 0L, -1L); | |
417 | 417 | utf8_read_test(TESTSTR("\xBF"), |
418 | 418 | ERROR, /* (unexpected continuation byte) */ |
419 | 0, -1); | |
419 | 0L, -1L); | |
420 | 420 | utf8_read_test(TESTSTR("\x80\xBF"), |
421 | 421 | ERROR, /* (unexpected continuation byte) */ |
422 | 422 | ERROR, /* (unexpected continuation byte) */ |
423 | 0, -1); | |
423 | 0L, -1L); | |
424 | 424 | utf8_read_test(TESTSTR("\x80\xBF\x80"), |
425 | 425 | ERROR, /* (unexpected continuation byte) */ |
426 | 426 | ERROR, /* (unexpected continuation byte) */ |
427 | 427 | ERROR, /* (unexpected continuation byte) */ |
428 | 0, -1); | |
428 | 0L, -1L); | |
429 | 429 | utf8_read_test(TESTSTR("\x80\xBF\x80\xBF"), |
430 | 430 | ERROR, /* (unexpected continuation byte) */ |
431 | 431 | ERROR, /* (unexpected continuation byte) */ |
432 | 432 | ERROR, /* (unexpected continuation byte) */ |
433 | 433 | ERROR, /* (unexpected continuation byte) */ |
434 | 0, -1); | |
434 | 0L, -1L); | |
435 | 435 | utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80"), |
436 | 436 | ERROR, /* (unexpected continuation byte) */ |
437 | 437 | ERROR, /* (unexpected continuation byte) */ |
438 | 438 | ERROR, /* (unexpected continuation byte) */ |
439 | 439 | ERROR, /* (unexpected continuation byte) */ |
440 | 440 | ERROR, /* (unexpected continuation byte) */ |
441 | 0, -1); | |
441 | 0L, -1L); | |
442 | 442 | utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF"), |
443 | 443 | ERROR, /* (unexpected continuation byte) */ |
444 | 444 | ERROR, /* (unexpected continuation byte) */ |
446 | 446 | ERROR, /* (unexpected continuation byte) */ |
447 | 447 | ERROR, /* (unexpected continuation byte) */ |
448 | 448 | ERROR, /* (unexpected continuation byte) */ |
449 | 0, -1); | |
449 | 0L, -1L); | |
450 | 450 | utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF\x80"), |
451 | 451 | ERROR, /* (unexpected continuation byte) */ |
452 | 452 | ERROR, /* (unexpected continuation byte) */ |
455 | 455 | ERROR, /* (unexpected continuation byte) */ |
456 | 456 | ERROR, /* (unexpected continuation byte) */ |
457 | 457 | ERROR, /* (unexpected continuation byte) */ |
458 | 0, -1); | |
458 | 0L, -1L); | |
459 | 459 | utf8_read_test(TESTSTR("\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"), |
460 | 460 | ERROR, /* (unexpected continuation byte) */ |
461 | 461 | ERROR, /* (unexpected continuation byte) */ |
521 | 521 | ERROR, /* (unexpected continuation byte) */ |
522 | 522 | ERROR, /* (unexpected continuation byte) */ |
523 | 523 | ERROR, /* (unexpected continuation byte) */ |
524 | 0, -1); | |
524 | 0L, -1L); | |
525 | 525 | utf8_read_test(TESTSTR("\xC0\x20\xC1\x20\xC2\x20\xC3\x20\xC4\x20\xC5\x20\xC6\x20\xC7\x20"), |
526 | 526 | ERROR, /* (incomplete sequence) */ |
527 | 0x00000020, /* SPACE */ | |
528 | ERROR, /* (incomplete sequence) */ | |
529 | 0x00000020, /* SPACE */ | |
530 | ERROR, /* (incomplete sequence) */ | |
531 | 0x00000020, /* SPACE */ | |
532 | ERROR, /* (incomplete sequence) */ | |
533 | 0x00000020, /* SPACE */ | |
534 | ERROR, /* (incomplete sequence) */ | |
535 | 0x00000020, /* SPACE */ | |
536 | ERROR, /* (incomplete sequence) */ | |
537 | 0x00000020, /* SPACE */ | |
538 | ERROR, /* (incomplete sequence) */ | |
539 | 0x00000020, /* SPACE */ | |
540 | ERROR, /* (incomplete sequence) */ | |
541 | 0x00000020, /* SPACE */ | |
542 | 0, -1); | |
527 | 0x00000020L, /* SPACE */ | |
528 | ERROR, /* (incomplete sequence) */ | |
529 | 0x00000020L, /* SPACE */ | |
530 | ERROR, /* (incomplete sequence) */ | |
531 | 0x00000020L, /* SPACE */ | |
532 | ERROR, /* (incomplete sequence) */ | |
533 | 0x00000020L, /* SPACE */ | |
534 | ERROR, /* (incomplete sequence) */ | |
535 | 0x00000020L, /* SPACE */ | |
536 | ERROR, /* (incomplete sequence) */ | |
537 | 0x00000020L, /* SPACE */ | |
538 | ERROR, /* (incomplete sequence) */ | |
539 | 0x00000020L, /* SPACE */ | |
540 | ERROR, /* (incomplete sequence) */ | |
541 | 0x00000020L, /* SPACE */ | |
542 | 0L, -1L); | |
543 | 543 | utf8_read_test(TESTSTR("\xE0\x20\xE1\x20\xE2\x20\xE3\x20\xE4\x20\xE5\x20\xE6\x20\xE7\x20\xE8\x20\xE9\x20\xEA\x20\xEB\x20\xEC\x20\xED\x20\xEE\x20\xEF\x20"), |
544 | 544 | ERROR, /* (incomplete sequence) */ |
545 | 0x00000020, /* SPACE */ | |
546 | ERROR, /* (incomplete sequence) */ | |
547 | 0x00000020, /* SPACE */ | |
548 | ERROR, /* (incomplete sequence) */ | |
549 | 0x00000020, /* SPACE */ | |
550 | ERROR, /* (incomplete sequence) */ | |
551 | 0x00000020, /* SPACE */ | |
552 | ERROR, /* (incomplete sequence) */ | |
553 | 0x00000020, /* SPACE */ | |
554 | ERROR, /* (incomplete sequence) */ | |
555 | 0x00000020, /* SPACE */ | |
556 | ERROR, /* (incomplete sequence) */ | |
557 | 0x00000020, /* SPACE */ | |
558 | ERROR, /* (incomplete sequence) */ | |
559 | 0x00000020, /* SPACE */ | |
560 | ERROR, /* (incomplete sequence) */ | |
561 | 0x00000020, /* SPACE */ | |
562 | ERROR, /* (incomplete sequence) */ | |
563 | 0x00000020, /* SPACE */ | |
564 | ERROR, /* (incomplete sequence) */ | |
565 | 0x00000020, /* SPACE */ | |
566 | ERROR, /* (incomplete sequence) */ | |
567 | 0x00000020, /* SPACE */ | |
568 | ERROR, /* (incomplete sequence) */ | |
569 | 0x00000020, /* SPACE */ | |
570 | ERROR, /* (incomplete sequence) */ | |
571 | 0x00000020, /* SPACE */ | |
572 | ERROR, /* (incomplete sequence) */ | |
573 | 0x00000020, /* SPACE */ | |
574 | ERROR, /* (incomplete sequence) */ | |
575 | 0x00000020, /* SPACE */ | |
576 | 0, -1); | |
545 | 0x00000020L, /* SPACE */ | |
546 | ERROR, /* (incomplete sequence) */ | |
547 | 0x00000020L, /* SPACE */ | |
548 | ERROR, /* (incomplete sequence) */ | |
549 | 0x00000020L, /* SPACE */ | |
550 | ERROR, /* (incomplete sequence) */ | |
551 | 0x00000020L, /* SPACE */ | |
552 | ERROR, /* (incomplete sequence) */ | |
553 | 0x00000020L, /* SPACE */ | |
554 | ERROR, /* (incomplete sequence) */ | |
555 | 0x00000020L, /* SPACE */ | |
556 | ERROR, /* (incomplete sequence) */ | |
557 | 0x00000020L, /* SPACE */ | |
558 | ERROR, /* (incomplete sequence) */ | |
559 | 0x00000020L, /* SPACE */ | |
560 | ERROR, /* (incomplete sequence) */ | |
561 | 0x00000020L, /* SPACE */ | |
562 | ERROR, /* (incomplete sequence) */ | |
563 | 0x00000020L, /* SPACE */ | |
564 | ERROR, /* (incomplete sequence) */ | |
565 | 0x00000020L, /* SPACE */ | |
566 | ERROR, /* (incomplete sequence) */ | |
567 | 0x00000020L, /* SPACE */ | |
568 | ERROR, /* (incomplete sequence) */ | |
569 | 0x00000020L, /* SPACE */ | |
570 | ERROR, /* (incomplete sequence) */ | |
571 | 0x00000020L, /* SPACE */ | |
572 | ERROR, /* (incomplete sequence) */ | |
573 | 0x00000020L, /* SPACE */ | |
574 | ERROR, /* (incomplete sequence) */ | |
575 | 0x00000020L, /* SPACE */ | |
576 | 0L, -1L); | |
577 | 577 | utf8_read_test(TESTSTR("\xF0\x20\xF1\x20\xF2\x20\xF3\x20\xF4\x20\xF5\x20\xF6\x20\xF7\x20"), |
578 | 578 | ERROR, /* (incomplete sequence) */ |
579 | 0x00000020, /* SPACE */ | |
580 | ERROR, /* (incomplete sequence) */ | |
581 | 0x00000020, /* SPACE */ | |
582 | ERROR, /* (incomplete sequence) */ | |
583 | 0x00000020, /* SPACE */ | |
584 | ERROR, /* (incomplete sequence) */ | |
585 | 0x00000020, /* SPACE */ | |
586 | ERROR, /* (incomplete sequence) */ | |
587 | 0x00000020, /* SPACE */ | |
588 | ERROR, /* (incomplete sequence) */ | |
589 | 0x00000020, /* SPACE */ | |
590 | ERROR, /* (incomplete sequence) */ | |
591 | 0x00000020, /* SPACE */ | |
592 | ERROR, /* (incomplete sequence) */ | |
593 | 0x00000020, /* SPACE */ | |
594 | 0, -1); | |
579 | 0x00000020L, /* SPACE */ | |
580 | ERROR, /* (incomplete sequence) */ | |
581 | 0x00000020L, /* SPACE */ | |
582 | ERROR, /* (incomplete sequence) */ | |
583 | 0x00000020L, /* SPACE */ | |
584 | ERROR, /* (incomplete sequence) */ | |
585 | 0x00000020L, /* SPACE */ | |
586 | ERROR, /* (incomplete sequence) */ | |
587 | 0x00000020L, /* SPACE */ | |
588 | ERROR, /* (incomplete sequence) */ | |
589 | 0x00000020L, /* SPACE */ | |
590 | ERROR, /* (incomplete sequence) */ | |
591 | 0x00000020L, /* SPACE */ | |
592 | ERROR, /* (incomplete sequence) */ | |
593 | 0x00000020L, /* SPACE */ | |
594 | 0L, -1L); | |
595 | 595 | utf8_read_test(TESTSTR("\xF8\x20\xF9\x20\xFA\x20\xFB\x20"), |
596 | 596 | ERROR, /* (incomplete sequence) */ |
597 | 0x00000020, /* SPACE */ | |
598 | ERROR, /* (incomplete sequence) */ | |
599 | 0x00000020, /* SPACE */ | |
600 | ERROR, /* (incomplete sequence) */ | |
601 | 0x00000020, /* SPACE */ | |
602 | ERROR, /* (incomplete sequence) */ | |
603 | 0x00000020, /* SPACE */ | |
604 | 0, -1); | |
597 | 0x00000020L, /* SPACE */ | |
598 | ERROR, /* (incomplete sequence) */ | |
599 | 0x00000020L, /* SPACE */ | |
600 | ERROR, /* (incomplete sequence) */ | |
601 | 0x00000020L, /* SPACE */ | |
602 | ERROR, /* (incomplete sequence) */ | |
603 | 0x00000020L, /* SPACE */ | |
604 | 0L, -1L); | |
605 | 605 | utf8_read_test(TESTSTR("\xFC\x20\xFD\x20"), |
606 | 606 | ERROR, /* (incomplete sequence) */ |
607 | 0x00000020, /* SPACE */ | |
608 | ERROR, /* (incomplete sequence) */ | |
609 | 0x00000020, /* SPACE */ | |
610 | 0, -1); | |
607 | 0x00000020L, /* SPACE */ | |
608 | ERROR, /* (incomplete sequence) */ | |
609 | 0x00000020L, /* SPACE */ | |
610 | 0L, -1L); | |
611 | 611 | utf8_read_test(TESTSTR("\xC0"), |
612 | 612 | ERROR, /* (incomplete sequence) */ |
613 | 0, -1); | |
613 | 0L, -1L); | |
614 | 614 | utf8_read_test(TESTSTR("\xE0\x80"), |
615 | 615 | ERROR, /* (incomplete sequence) */ |
616 | 0, -1); | |
616 | 0L, -1L); | |
617 | 617 | utf8_read_test(TESTSTR("\xF0\x80\x80"), |
618 | 618 | ERROR, /* (incomplete sequence) */ |
619 | 0, -1); | |
619 | 0L, -1L); | |
620 | 620 | utf8_read_test(TESTSTR("\xF8\x80\x80\x80"), |
621 | 621 | ERROR, /* (incomplete sequence) */ |
622 | 0, -1); | |
622 | 0L, -1L); | |
623 | 623 | utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80"), |
624 | 624 | ERROR, /* (incomplete sequence) */ |
625 | 0, -1); | |
625 | 0L, -1L); | |
626 | 626 | utf8_read_test(TESTSTR("\xDF"), |
627 | 627 | ERROR, /* (incomplete sequence) */ |
628 | 0, -1); | |
628 | 0L, -1L); | |
629 | 629 | utf8_read_test(TESTSTR("\xEF\xBF"), |
630 | 630 | ERROR, /* (incomplete sequence) */ |
631 | 0, -1); | |
631 | 0L, -1L); | |
632 | 632 | utf8_read_test(TESTSTR("\xF7\xBF\xBF"), |
633 | 633 | ERROR, /* (incomplete sequence) */ |
634 | 0, -1); | |
634 | 0L, -1L); | |
635 | 635 | utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF"), |
636 | 636 | ERROR, /* (incomplete sequence) */ |
637 | 0, -1); | |
637 | 0L, -1L); | |
638 | 638 | utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF"), |
639 | 639 | ERROR, /* (incomplete sequence) */ |
640 | 0, -1); | |
640 | 0L, -1L); | |
641 | 641 | utf8_read_test(TESTSTR("\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF"), |
642 | 642 | ERROR, /* (incomplete sequence) */ |
643 | 643 | ERROR, /* (incomplete sequence) */ |
649 | 649 | ERROR, /* (incomplete sequence) */ |
650 | 650 | ERROR, /* (incomplete sequence) */ |
651 | 651 | ERROR, /* (incomplete sequence) */ |
652 | 0, -1); | |
652 | 0L, -1L); | |
653 | 653 | utf8_read_test(TESTSTR("\xFE"), |
654 | 654 | ERROR, /* (invalid UTF-8 byte) */ |
655 | 0, -1); | |
655 | 0L, -1L); | |
656 | 656 | utf8_read_test(TESTSTR("\xFF"), |
657 | 657 | ERROR, /* (invalid UTF-8 byte) */ |
658 | 0, -1); | |
658 | 0L, -1L); | |
659 | 659 | utf8_read_test(TESTSTR("\xFE\xFE\xFF\xFF"), |
660 | 660 | ERROR, /* (invalid UTF-8 byte) */ |
661 | 661 | ERROR, /* (invalid UTF-8 byte) */ |
662 | 662 | ERROR, /* (invalid UTF-8 byte) */ |
663 | 663 | ERROR, /* (invalid UTF-8 byte) */ |
664 | 0, -1); | |
664 | 0L, -1L); | |
665 | 665 | utf8_read_test(TESTSTR("\xC0\xAF"), |
666 | 666 | ERROR, /* SOLIDUS (overlong form of 2F) */ |
667 | 0, -1); | |
667 | 0L, -1L); | |
668 | 668 | utf8_read_test(TESTSTR("\xE0\x80\xAF"), |
669 | 669 | ERROR, /* SOLIDUS (overlong form of 2F) */ |
670 | 0, -1); | |
670 | 0L, -1L); | |
671 | 671 | utf8_read_test(TESTSTR("\xF0\x80\x80\xAF"), |
672 | 672 | ERROR, /* SOLIDUS (overlong form of 2F) */ |
673 | 0, -1); | |
673 | 0L, -1L); | |
674 | 674 | utf8_read_test(TESTSTR("\xF8\x80\x80\x80\xAF"), |
675 | 675 | ERROR, /* SOLIDUS (overlong form of 2F) */ |
676 | 0, -1); | |
676 | 0L, -1L); | |
677 | 677 | utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\xAF"), |
678 | 678 | ERROR, /* SOLIDUS (overlong form of 2F) */ |
679 | 0, -1); | |
679 | 0L, -1L); | |
680 | 680 | utf8_read_test(TESTSTR("\xC1\xBF"), |
681 | 681 | ERROR, /* <control> (overlong form of 7F) */ |
682 | 0, -1); | |
682 | 0L, -1L); | |
683 | 683 | utf8_read_test(TESTSTR("\xE0\x9F\xBF"), |
684 | 684 | ERROR, /* <no name available> (overlong form of DF BF) */ |
685 | 0, -1); | |
685 | 0L, -1L); | |
686 | 686 | utf8_read_test(TESTSTR("\xF0\x8F\xBF\xBF"), |
687 | 687 | ERROR, /* <no name available> (overlong form of EF BF BF) (invalid char) */ |
688 | 0, -1); | |
688 | 0L, -1L); | |
689 | 689 | utf8_read_test(TESTSTR("\xF8\x87\xBF\xBF\xBF"), |
690 | 690 | ERROR, /* <no name available> (overlong form of F7 BF BF BF) */ |
691 | 0, -1); | |
691 | 0L, -1L); | |
692 | 692 | utf8_read_test(TESTSTR("\xFC\x83\xBF\xBF\xBF\xBF"), |
693 | 693 | ERROR, /* <no name available> (overlong form of FB BF BF BF BF) */ |
694 | 0, -1); | |
694 | 0L, -1L); | |
695 | 695 | utf8_read_test(TESTSTR("\xC0\x80"), |
696 | 696 | ERROR, /* <control> (overlong form of 00) */ |
697 | 0, -1); | |
697 | 0L, -1L); | |
698 | 698 | utf8_read_test(TESTSTR("\xE0\x80\x80"), |
699 | 699 | ERROR, /* <control> (overlong form of 00) */ |
700 | 0, -1); | |
700 | 0L, -1L); | |
701 | 701 | utf8_read_test(TESTSTR("\xF0\x80\x80\x80"), |
702 | 702 | ERROR, /* <control> (overlong form of 00) */ |
703 | 0, -1); | |
703 | 0L, -1L); | |
704 | 704 | utf8_read_test(TESTSTR("\xF8\x80\x80\x80\x80"), |
705 | 705 | ERROR, /* <control> (overlong form of 00) */ |
706 | 0, -1); | |
706 | 0L, -1L); | |
707 | 707 | utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\x80"), |
708 | 708 | ERROR, /* <control> (overlong form of 00) */ |
709 | 0, -1); | |
709 | 0L, -1L); | |
710 | 710 | utf8_read_test(TESTSTR("\xED\xA0\x80"), |
711 | 711 | ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */ |
712 | 0, -1); | |
712 | 0L, -1L); | |
713 | 713 | utf8_read_test(TESTSTR("\xED\xAD\xBF"), |
714 | 714 | ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */ |
715 | 0, -1); | |
715 | 0L, -1L); | |
716 | 716 | utf8_read_test(TESTSTR("\xED\xAE\x80"), |
717 | 717 | ERROR, /* <Private Use High Surrogate, First> (surrogate) */ |
718 | 0, -1); | |
718 | 0L, -1L); | |
719 | 719 | utf8_read_test(TESTSTR("\xED\xAF\xBF"), |
720 | 720 | ERROR, /* <Private Use High Surrogate, Last> (surrogate) */ |
721 | 0, -1); | |
721 | 0L, -1L); | |
722 | 722 | utf8_read_test(TESTSTR("\xED\xB0\x80"), |
723 | 723 | ERROR, /* <Low Surrogate, First> (surrogate) */ |
724 | 0, -1); | |
724 | 0L, -1L); | |
725 | 725 | utf8_read_test(TESTSTR("\xED\xBE\x80"), |
726 | 726 | ERROR, /* <no name available> (surrogate) */ |
727 | 0, -1); | |
727 | 0L, -1L); | |
728 | 728 | utf8_read_test(TESTSTR("\xED\xBF\xBF"), |
729 | 729 | ERROR, /* <Low Surrogate, Last> (surrogate) */ |
730 | 0, -1); | |
730 | 0L, -1L); | |
731 | 731 | utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xB0\x80"), |
732 | 732 | ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */ |
733 | 733 | ERROR, /* <Low Surrogate, First> (surrogate) */ |
734 | 0, -1); | |
734 | 0L, -1L); | |
735 | 735 | utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xBF\xBF"), |
736 | 736 | ERROR, /* <Non Private Use High Surrogate, First> (surrogate) */ |
737 | 737 | ERROR, /* <Low Surrogate, Last> (surrogate) */ |
738 | 0, -1); | |
738 | 0L, -1L); | |
739 | 739 | utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xB0\x80"), |
740 | 740 | ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */ |
741 | 741 | ERROR, /* <Low Surrogate, First> (surrogate) */ |
742 | 0, -1); | |
742 | 0L, -1L); | |
743 | 743 | utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xBF\xBF"), |
744 | 744 | ERROR, /* <Non Private Use High Surrogate, Last> (surrogate) */ |
745 | 745 | ERROR, /* <Low Surrogate, Last> (surrogate) */ |
746 | 0, -1); | |
746 | 0L, -1L); | |
747 | 747 | utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xB0\x80"), |
748 | 748 | ERROR, /* <Private Use High Surrogate, First> (surrogate) */ |
749 | 749 | ERROR, /* <Low Surrogate, First> (surrogate) */ |
750 | 0, -1); | |
750 | 0L, -1L); | |
751 | 751 | utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xBF\xBF"), |
752 | 752 | ERROR, /* <Private Use High Surrogate, First> (surrogate) */ |
753 | 753 | ERROR, /* <Low Surrogate, Last> (surrogate) */ |
754 | 0, -1); | |
754 | 0L, -1L); | |
755 | 755 | utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xB0\x80"), |
756 | 756 | ERROR, /* <Private Use High Surrogate, Last> (surrogate) */ |
757 | 757 | ERROR, /* <Low Surrogate, First> (surrogate) */ |
758 | 0, -1); | |
758 | 0L, -1L); | |
759 | 759 | utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xBF\xBF"), |
760 | 760 | ERROR, /* <Private Use High Surrogate, Last> (surrogate) */ |
761 | 761 | ERROR, /* <Low Surrogate, Last> (surrogate) */ |
762 | 0, -1); | |
762 | 0L, -1L); | |
763 | 763 | utf8_read_test(TESTSTR("\xEF\xBF\xBE"), |
764 | 764 | ERROR, /* <no name available> (invalid char) */ |
765 | 0, -1); | |
765 | 0L, -1L); | |
766 | 766 | utf8_read_test(TESTSTR("\xEF\xBF\xBF"), |
767 | 767 | ERROR, /* <no name available> (invalid char) */ |
768 | 0, -1); | |
768 | 0L, -1L); | |
769 | 769 | printf("read tests completed\n"); |
770 | 770 | printf("write tests beginning\n"); |
771 | 771 | { |
772 | 772 | const static long str[] = |
773 | 773 | {0x03BAL, 0x1F79L, 0x03C3L, 0x03BCL, 0x03B5L, 0}; |
774 | 774 | utf8_write_test(TESTSTR(str), |
775 | 0xCE, 0xBA, | |
776 | 0xE1, 0xBD, 0xB9, | |
777 | 0xCF, 0x83, | |
778 | 0xCE, 0xBC, | |
779 | 0xCE, 0xB5, | |
780 | 0, -1); | |
775 | 0xCEL, 0xBAL, | |
776 | 0xE1L, 0xBDL, 0xB9L, | |
777 | 0xCFL, 0x83L, | |
778 | 0xCEL, 0xBCL, | |
779 | 0xCEL, 0xB5L, | |
780 | 0L, -1L); | |
781 | 781 | } |
782 | 782 | { |
783 | 783 | const static long str[] = {0x0000L, 0}; |
784 | 784 | utf8_write_test(TESTSTR(str), |
785 | 0x00, | |
786 | 0, -1); | |
785 | 0x00L, | |
786 | 0L, -1L); | |
787 | 787 | } |
788 | 788 | { |
789 | 789 | const static long str[] = {0x0080L, 0}; |
790 | 790 | utf8_write_test(TESTSTR(str), |
791 | 0xC2, 0x80, | |
792 | 0, -1); | |
791 | 0xC2L, 0x80L, | |
792 | 0L, -1L); | |
793 | 793 | } |
794 | 794 | { |
795 | 795 | const static long str[] = {0x0800L, 0}; |
796 | 796 | utf8_write_test(TESTSTR(str), |
797 | 0xE0, 0xA0, 0x80, | |
798 | 0, -1); | |
797 | 0xE0L, 0xA0L, 0x80L, | |
798 | 0L, -1L); | |
799 | 799 | } |
800 | 800 | { |
801 | 801 | const static long str[] = {0x00010000L, 0}; |
802 | 802 | utf8_write_test(TESTSTR(str), |
803 | 0xF0, 0x90, 0x80, 0x80, | |
804 | 0, -1); | |
803 | 0xF0L, 0x90L, 0x80L, 0x80L, | |
804 | 0L, -1L); | |
805 | 805 | } |
806 | 806 | { |
807 | 807 | const static long str[] = {0x00200000L, 0}; |
808 | 808 | utf8_write_test(TESTSTR(str), |
809 | 0xF8, 0x88, 0x80, 0x80, 0x80, | |
810 | 0, -1); | |
809 | 0xF8L, 0x88L, 0x80L, 0x80L, 0x80L, | |
810 | 0L, -1L); | |
811 | 811 | } |
812 | 812 | { |
813 | 813 | const static long str[] = {0x04000000L, 0}; |
814 | 814 | utf8_write_test(TESTSTR(str), |
815 | 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80, | |
816 | 0, -1); | |
815 | 0xFCL, 0x84L, 0x80L, 0x80L, 0x80L, 0x80L, | |
816 | 0L, -1L); | |
817 | 817 | } |
818 | 818 | { |
819 | 819 | const static long str[] = {0x007FL, 0}; |
820 | 820 | utf8_write_test(TESTSTR(str), |
821 | 0x7F, | |
822 | 0, -1); | |
821 | 0x7FL, | |
822 | 0L, -1L); | |
823 | 823 | } |
824 | 824 | { |
825 | 825 | const static long str[] = {0x07FFL, 0}; |
826 | 826 | utf8_write_test(TESTSTR(str), |
827 | 0xDF, 0xBF, | |
828 | 0, -1); | |
827 | 0xDFL, 0xBFL, | |
828 | 0L, -1L); | |
829 | 829 | } |
830 | 830 | { |
831 | 831 | const static long str[] = {0xFFFDL, 0}; |
832 | 832 | utf8_write_test(TESTSTR(str), |
833 | 0xEF, 0xBF, 0xBD, | |
834 | 0, -1); | |
833 | 0xEFL, 0xBFL, 0xBDL, | |
834 | 0L, -1L); | |
835 | 835 | } |
836 | 836 | { |
837 | 837 | const static long str[] = {0xFFFFL, 0}; |
838 | 838 | utf8_write_test(TESTSTR(str), |
839 | 839 | ERROR, |
840 | 0, -1); | |
840 | 0L, -1L); | |
841 | 841 | } |
842 | 842 | { |
843 | 843 | const static long str[] = {0x001FFFFFL, 0}; |
844 | 844 | utf8_write_test(TESTSTR(str), |
845 | 0xF7, 0xBF, 0xBF, 0xBF, | |
846 | 0, -1); | |
845 | 0xF7L, 0xBFL, 0xBFL, 0xBFL, | |
846 | 0L, -1L); | |
847 | 847 | } |
848 | 848 | { |
849 | 849 | const static long str[] = {0x03FFFFFFL, 0}; |
850 | 850 | utf8_write_test(TESTSTR(str), |
851 | 0xFB, 0xBF, 0xBF, 0xBF, 0xBF, | |
852 | 0, -1); | |
851 | 0xFBL, 0xBFL, 0xBFL, 0xBFL, 0xBFL, | |
852 | 0L, -1L); | |
853 | 853 | } |
854 | 854 | { |
855 | 855 | const static long str[] = {0x7FFFFFFFL, 0}; |
856 | 856 | utf8_write_test(TESTSTR(str), |
857 | 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF, | |
858 | 0, -1); | |
857 | 0xFDL, 0xBFL, 0xBFL, 0xBFL, 0xBFL, 0xBFL, | |
858 | 0L, -1L); | |
859 | 859 | } |
860 | 860 | { |
861 | 861 | const static long str[] = {0xD7FFL, 0}; |
862 | 862 | utf8_write_test(TESTSTR(str), |
863 | 0xED, 0x9F, 0xBF, | |
864 | 0, -1); | |
863 | 0xEDL, 0x9FL, 0xBFL, | |
864 | 0L, -1L); | |
865 | 865 | } |
866 | 866 | { |
867 | 867 | const static long str[] = {0xD800L, 0}; |
868 | 868 | utf8_write_test(TESTSTR(str), |
869 | 869 | ERROR, |
870 | 0, -1); | |
870 | 0L, -1L); | |
871 | 871 | } |
872 | 872 | { |
873 | 873 | const static long str[] = {0xD800L, 0xDC00L, 0}; |
874 | 874 | utf8_write_test(TESTSTR(str), |
875 | 875 | ERROR, |
876 | 876 | ERROR, |
877 | 0, -1); | |
877 | 0L, -1L); | |
878 | 878 | } |
879 | 879 | { |
880 | 880 | const static long str[] = {0xDFFFL, 0}; |
881 | 881 | utf8_write_test(TESTSTR(str), |
882 | 882 | ERROR, |
883 | 0, -1); | |
883 | 0L, -1L); | |
884 | 884 | } |
885 | 885 | { |
886 | 886 | const static long str[] = {0xE000L, 0}; |
887 | 887 | utf8_write_test(TESTSTR(str), |
888 | 0xEE, 0x80, 0x80, | |
889 | 0, -1); | |
888 | 0xEEL, 0x80L, 0x80L, | |
889 | 0L, -1L); | |
890 | 890 | } |
891 | 891 | printf("write tests completed\n"); |
892 | 892 |
10 | 10 | struct numberstate_Tag { |
11 | 11 | int chapternum; |
12 | 12 | int appendixnum; |
13 | int ischapter; | |
13 | bool ischapter; | |
14 | 14 | int *sectionlevels; |
15 | 15 | paragraph **currentsects; |
16 | 16 | paragraph *lastsect; |
27 | 27 | numberstate *ret = snew(numberstate); |
28 | 28 | ret->chapternum = 0; |
29 | 29 | ret->appendixnum = -1; |
30 | ret->ischapter = 1; | |
30 | ret->ischapter = true; | |
31 | 31 | ret->oklevel = -1; /* not even in a chapter yet */ |
32 | 32 | ret->maxsectlevel = 32; |
33 | 33 | ret->sectionlevels = snewn(ret->maxsectlevel, int); |
52 | 52 | mnewword->type = word_Normal; |
53 | 53 | mnewword->alt = NULL; |
54 | 54 | mnewword->next = NULL; |
55 | mnewword->breaks = FALSE; | |
55 | mnewword->breaks = false; | |
56 | 56 | mnewword->aux = 0; |
57 | 57 | **wret = mnewword; |
58 | 58 | *wret = &mnewword->next; |
64 | 64 | mnewword->type = word_WhiteSpace; |
65 | 65 | mnewword->alt = NULL; |
66 | 66 | mnewword->next = NULL; |
67 | mnewword->breaks = FALSE; | |
67 | mnewword->breaks = false; | |
68 | 68 | mnewword->aux = 0; |
69 | 69 | **wret = mnewword; |
70 | 70 | *wret = &mnewword->next; |
128 | 128 | } |
129 | 129 | |
130 | 130 | word *number_mktext(numberstate *state, paragraph *p, wchar_t *category, |
131 | int *prev, int *errflag) { | |
131 | int *prev, bool *errflag, errorstate *es) { | |
132 | 132 | word *ret = NULL; |
133 | 133 | word **ret2 = &ret; |
134 | 134 | word **pret = &ret; |
149 | 149 | dospace(&pret); |
150 | 150 | ret2 = pret; |
151 | 151 | donumber(&pret, state->chapternum); |
152 | state->ischapter = 1; | |
152 | state->ischapter = true; | |
153 | 153 | state->oklevel = 0; |
154 | 154 | level = -1; |
155 | 155 | break; |
157 | 157 | case para_Subsect: |
158 | 158 | level = (p->type == para_Heading ? 0 : p->aux); |
159 | 159 | if (level > state->oklevel) { |
160 | err_sectjump(&p->fpos); | |
161 | *errflag = TRUE; | |
160 | err_sectjump(es, &p->fpos); | |
161 | *errflag = true; | |
162 | 162 | ret = NULL; |
163 | 163 | break; |
164 | 164 | } |
193 | 193 | dospace(&pret); |
194 | 194 | ret2 = pret; |
195 | 195 | doanumber(&pret, state->appendixnum); |
196 | state->ischapter = 0; | |
196 | state->ischapter = false; | |
197 | 197 | state->oklevel = 0; |
198 | 198 | level = -1; |
199 | 199 | break; |
331 | 331 | int outlen, outsize; |
332 | 332 | unsigned long outbits; |
333 | 333 | int noutbits; |
334 | int firstblock; | |
334 | bool firstblock; | |
335 | 335 | unsigned long *syms; |
336 | 336 | int symstart, nsyms; |
337 | 337 | int type; |
338 | 338 | unsigned long checksum; |
339 | 339 | unsigned long datasize; |
340 | int lastblock; | |
341 | int finished; | |
340 | bool lastblock; | |
341 | bool finished; | |
342 | 342 | unsigned char static_len1[288], static_len2[30]; |
343 | 343 | int static_code1[288], static_code2[30]; |
344 | 344 | struct huftrees sht; |
434 | 434 | int treesyms[286 + 30]; |
435 | 435 | int codelen[19]; |
436 | 436 | int i, ntreesrc, ntreesyms; |
437 | int dynamic, blklen; | |
437 | bool dynamic; | |
438 | int blklen; | |
438 | 439 | struct huftrees dht; |
439 | 440 | const struct huftrees *ht; |
440 | 441 | #ifdef STATISTICS |
962 | 963 | out = snew(deflate_compress_ctx); |
963 | 964 | out->type = type; |
964 | 965 | out->outbits = out->noutbits = 0; |
965 | out->firstblock = TRUE; | |
966 | out->firstblock = true; | |
966 | 967 | #ifdef STATISTICS |
967 | 968 | out->bitcount = 0; |
968 | 969 | #endif |
972 | 973 | |
973 | 974 | out->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0); |
974 | 975 | out->datasize = 0; |
975 | out->lastblock = FALSE; | |
976 | out->finished = FALSE; | |
976 | out->lastblock = false; | |
977 | out->finished = false; | |
977 | 978 | |
978 | 979 | /* |
979 | 980 | * Build the static Huffman tables now, so we'll have them |
1059 | 1060 | outbits(out, 0xFF02, 16); /* xflags, OS */ |
1060 | 1061 | break; |
1061 | 1062 | } |
1062 | out->firstblock = FALSE; | |
1063 | out->firstblock = false; | |
1063 | 1064 | } |
1064 | 1065 | |
1065 | 1066 | /* |
1066 | 1067 | * Feed our data to the LZ77 compression phase. |
1067 | 1068 | */ |
1068 | lz77_compress(ectx, block, len, TRUE); | |
1069 | lz77_compress(ectx, block, len, true); | |
1069 | 1070 | |
1070 | 1071 | /* |
1071 | 1072 | * Update checksums and counters. |
1111 | 1112 | /* |
1112 | 1113 | * Output a block with BFINAL set. |
1113 | 1114 | */ |
1114 | out->lastblock = TRUE; | |
1115 | out->lastblock = true; | |
1115 | 1116 | flushblock(out); |
1116 | 1117 | |
1117 | 1118 | /* |
1142 | 1143 | break; |
1143 | 1144 | } |
1144 | 1145 | |
1145 | out->finished = TRUE; | |
1146 | out->finished = true; | |
1146 | 1147 | break; |
1147 | 1148 | } |
1148 | 1149 | |
1313 | 1314 | CRC1, CRC2, ILEN1, ILEN2, |
1314 | 1315 | FINALSPIN |
1315 | 1316 | } state; |
1316 | int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, | |
1317 | lenrep, lastblock; | |
1317 | int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep; | |
1318 | bool lastblock; | |
1318 | 1319 | int uncomplen; |
1319 | 1320 | unsigned char lenlen[19]; |
1320 | 1321 | unsigned char lengths[286 + 32]; |
1365 | 1366 | dctx->nbits = 0; |
1366 | 1367 | dctx->winpos = 0; |
1367 | 1368 | dctx->type = type; |
1368 | dctx->lastblock = FALSE; | |
1369 | dctx->lastblock = false; | |
1369 | 1370 | dctx->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0); |
1370 | 1371 | dctx->bytesout = 0; |
1371 | 1372 | dctx->gzflags = dctx->gzextralen = 0; |
1622 | 1623 | goto finished; /* done all we can */ |
1623 | 1624 | bfinal = dctx->bits & 1; |
1624 | 1625 | if (bfinal) |
1625 | dctx->lastblock = TRUE; | |
1626 | dctx->lastblock = true; | |
1626 | 1627 | EATBITS(1); |
1627 | 1628 | btype = dctx->bits & 3; |
1628 | 1629 | EATBITS(2); |
2029 | 2030 | int ret, err, outlen; |
2030 | 2031 | deflate_decompress_ctx *dhandle; |
2031 | 2032 | deflate_compress_ctx *chandle; |
2032 | int type = DEFLATE_TYPE_ZLIB, opts = TRUE; | |
2033 | int compress = FALSE, decompress = FALSE; | |
2034 | int got_arg = FALSE; | |
2033 | int type = DEFLATE_TYPE_ZLIB; | |
2034 | bool opts = true; | |
2035 | bool compress = false, decompress = false; | |
2036 | bool got_arg = false; | |
2035 | 2037 | char *filename = NULL; |
2036 | 2038 | FILE *fp; |
2037 | 2039 | |
2038 | 2040 | while (--argc) { |
2039 | 2041 | char *p = *++argv; |
2040 | 2042 | |
2041 | got_arg = TRUE; | |
2043 | got_arg = true; | |
2042 | 2044 | |
2043 | 2045 | if (p[0] == '-' && opts) { |
2044 | 2046 | if (!strcmp(p, "-b")) |
2046 | 2048 | else if (!strcmp(p, "-g")) |
2047 | 2049 | type = DEFLATE_TYPE_GZIP; |
2048 | 2050 | else if (!strcmp(p, "-c")) |
2049 | compress = TRUE; | |
2051 | compress = true; | |
2050 | 2052 | else if (!strcmp(p, "-d")) |
2051 | decompress = TRUE; | |
2053 | decompress = true; | |
2052 | 2054 | else if (!strcmp(p, "-a")) |
2053 | analyse_level++, decompress = TRUE; | |
2055 | analyse_level++, decompress = true; | |
2054 | 2056 | else if (!strcmp(p, "--")) |
2055 | opts = FALSE; /* next thing is filename */ | |
2057 | opts = false; /* next thing is filename */ | |
2056 | 2058 | else { |
2057 | 2059 | fprintf(stderr, "unknown command line option '%s'\n", p); |
2058 | 2060 | return 1; |
2158 | 2160 | unsigned char buf[65536], *outbuf, *outbuf2; |
2159 | 2161 | int ret, err, outlen, outlen2; |
2160 | 2162 | int dlen = 0, clen = 0; |
2161 | int opts = TRUE; | |
2163 | int opts = true; | |
2162 | 2164 | |
2163 | 2165 | while (--argc) { |
2164 | 2166 | char *p = *++argv; |
2165 | 2167 | |
2166 | 2168 | if (p[0] == '-' && opts) { |
2167 | 2169 | if (!strcmp(p, "--")) |
2168 | opts = FALSE; /* next thing is filename */ | |
2170 | opts = false; /* next thing is filename */ | |
2169 | 2171 | else { |
2170 | 2172 | fprintf(stderr, "unknown command line option '%s'\n", p); |
2171 | 2173 | return 1; |
0 | # Halibut is used to build its own documentation. So we can only build | |
1 | # the documentation if we're not cross-compiling. | |
2 | ||
3 | if(CMAKE_CROSSCOMPILING) | |
4 | message(WARNING "Not building the Halibut documentation in a cross-compile") | |
5 | else() | |
6 | ||
7 | include(GNUInstallDirs) | |
8 | ||
9 | set(HALIBUT $<TARGET_FILE:halibut>) | |
10 | ||
11 | set(manual_sources | |
12 | ${CMAKE_CURRENT_SOURCE_DIR}/blurb.but | |
13 | ${CMAKE_CURRENT_SOURCE_DIR}/intro.but | |
14 | ${CMAKE_CURRENT_SOURCE_DIR}/running.but | |
15 | ${CMAKE_CURRENT_SOURCE_DIR}/input.but | |
16 | ${CMAKE_CURRENT_SOURCE_DIR}/output.but | |
17 | ${CMAKE_CURRENT_SOURCE_DIR}/licence.but | |
18 | ${CMAKE_CURRENT_SOURCE_DIR}/manpage.but | |
19 | ${CMAKE_CURRENT_SOURCE_DIR}/index.but | |
20 | ) | |
21 | ||
22 | # Do the manual build in a subdirectory, to avoid the install | |
23 | # command making a CMakeFiles directory in the output location. | |
24 | set(manual_dir ${CMAKE_CURRENT_BINARY_DIR}/manual) | |
25 | file(MAKE_DIRECTORY ${manual_dir}) | |
26 | add_custom_target(manual ALL | |
27 | BYPRODUCTS | |
28 | ${manual_dir}/index.html | |
29 | ${manual_dir}/halibut.txt | |
30 | ${manual_dir}/halibut.info | |
31 | ${manual_dir}/halibut.ps | |
32 | ${manual_dir}/halibut.pdf | |
33 | ${manual_dir}/halibut.chm | |
34 | COMMAND | |
35 | ${HALIBUT} | |
36 | --html | |
37 | --text=halibut.txt | |
38 | --info=halibut.info | |
39 | --ps=halibut.ps | |
40 | --pdf=halibut.pdf | |
41 | --chm=halibut.chm | |
42 | ${manual_sources} | |
43 | WORKING_DIRECTORY ${manual_dir} | |
44 | DEPENDS halibut ${manual_sources}) | |
45 | ||
46 | add_custom_target(manpage ALL | |
47 | BYPRODUCTS | |
48 | halibut.1 | |
49 | COMMAND | |
50 | ${HALIBUT} | |
51 | --man=halibut.1 | |
52 | ${CMAKE_CURRENT_SOURCE_DIR}/manpage.but | |
53 | DEPENDS halibut ${CMAKE_CURRENT_SOURCE_DIR}/manpage.but) | |
54 | ||
55 | install(DIRECTORY ${manual_dir}/ | |
56 | DESTINATION ${CMAKE_INSTALL_DOCDIR} | |
57 | FILES_MATCHING PATTERN "*.html") | |
58 | ||
59 | install(DIRECTORY ${manual_dir}/ | |
60 | DESTINATION ${CMAKE_INSTALL_INFODIR} | |
61 | FILES_MATCHING PATTERN "*.info*") | |
62 | ||
63 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/halibut.1 | |
64 | DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) | |
65 | ||
66 | endif() |
0 | mandir=$(prefix)/man | |
1 | man1dir=$(mandir)/man1 | |
2 | ||
3 | CHAPTERS := $(SITE) blurb intro running input output licence manpage index | |
4 | ||
5 | INPUTS = $(patsubst %,%.but,$(CHAPTERS)) | |
6 | ||
7 | HALIBUT = ../build/halibut | |
8 | ||
9 | all: index.html halibut.1 | |
10 | ||
11 | index.html: $(INPUTS) $(HALIBUT) | |
12 | $(HALIBUT) --text=halibut.txt --html --info=halibut.info \ | |
13 | --ps=halibut.ps --pdf=halibut.pdf --chm=halibut.chm $(INPUTS) | |
14 | ||
15 | halibut.1: manpage.but | |
16 | $(HALIBUT) --man=halibut.1 manpage.but | |
17 | ||
18 | install: | |
19 | mkdir -p $(man1dir) | |
20 | $(INSTALL) -m 644 halibut.1 $(man1dir)/halibut.1 | |
21 | ||
22 | clean: | |
23 | rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.chm |
0 | 0 | \A{licence} Halibut Licence |
1 | 1 | |
2 | Halibut is copyright (c) 1999-2017 Simon Tatham. | |
2 | Halibut is copyright (c) 1999-2021 Simon Tatham. | |
3 | 3 | |
4 | 4 | Permission is hereby granted, free of charge, to any person |
5 | 5 | obtaining a copy of this software and associated documentation files |
157 | 157 | |
158 | 158 | This man page isn't terribly complete. |
159 | 159 | |
160 | \versionid Halibut version 1.2 | |
160 | \versionid Halibut version 1.3 |
41 | 41 | exit(EXIT_FAILURE); |
42 | 42 | } |
43 | 43 | |
44 | void err_optnoarg(const char *sp) | |
45 | { | |
44 | void err_optnoarg(errorstate *es, const char *sp) | |
45 | { | |
46 | es->fatal = true; | |
46 | 47 | do_error(NULL, "option `-%s' requires an argument", sp); |
47 | 48 | } |
48 | 49 | |
49 | void err_nosuchopt(const char *sp) | |
50 | { | |
50 | void err_nosuchopt(errorstate *es, const char *sp) | |
51 | { | |
52 | es->fatal = true; | |
51 | 53 | do_error(NULL, "unrecognised option `-%s'", sp); |
52 | 54 | } |
53 | 55 | |
54 | void err_cmdcharset(const char *sp) | |
55 | { | |
56 | void err_cmdcharset(errorstate *es, const char *sp) | |
57 | { | |
58 | es->fatal = true; | |
56 | 59 | do_error(NULL, "character set `%s' not recognised", sp); |
57 | 60 | } |
58 | 61 | |
59 | void err_futileopt(const char *sp, const char *sp2) | |
62 | void err_futileopt(errorstate *es, const char *sp, const char *sp2) | |
60 | 63 | { |
61 | 64 | do_error(NULL, "warning: option `-%s' has no effect%s", sp, sp2); |
62 | 65 | } |
63 | 66 | |
64 | void err_noinput(void) | |
65 | { | |
67 | void err_noinput(errorstate *es) | |
68 | { | |
69 | es->fatal = true; | |
66 | 70 | do_error(NULL, "no input files"); |
67 | 71 | } |
68 | 72 | |
69 | void err_cantopen(const char *sp) | |
70 | { | |
73 | void err_cantopen(errorstate *es, const char *sp) | |
74 | { | |
75 | es->fatal = true; | |
71 | 76 | do_error(NULL, "unable to open input file `%s'", sp); |
72 | 77 | } |
73 | 78 | |
74 | void err_nodata(void) | |
75 | { | |
79 | void err_nodata(errorstate *es) | |
80 | { | |
81 | es->fatal = true; | |
76 | 82 | do_error(NULL, "no data in input files"); |
77 | 83 | } |
78 | 84 | |
79 | void err_brokencodepara(const filepos *fpos) | |
80 | { | |
85 | void err_zerochar(errorstate *es, const filepos *fpos) | |
86 | { | |
87 | es->fatal = true; | |
88 | do_error(fpos, "the Unicode zero character is not permitted in input"); | |
89 | } | |
90 | ||
91 | void err_brokencodepara(errorstate *es, const filepos *fpos) | |
92 | { | |
93 | es->fatal = true; | |
81 | 94 | do_error(fpos, "every line of a code paragraph should begin `\\c'"); |
82 | 95 | } |
83 | 96 | |
84 | void err_kwunclosed(const filepos *fpos) | |
85 | { | |
97 | void err_kwunclosed(errorstate *es, const filepos *fpos) | |
98 | { | |
99 | es->fatal = true; | |
86 | 100 | do_error(fpos, "expected `}' after paragraph keyword"); |
87 | 101 | } |
88 | 102 | |
89 | void err_kwexpected(const filepos *fpos) | |
90 | { | |
103 | void err_kwexpected(errorstate *es, const filepos *fpos) | |
104 | { | |
105 | es->fatal = true; | |
91 | 106 | do_error(fpos, "expected a paragraph keyword"); |
92 | 107 | } |
93 | 108 | |
94 | void err_kwillegal(const filepos *fpos) | |
95 | { | |
109 | void err_kwillegal(errorstate *es, const filepos *fpos) | |
110 | { | |
111 | es->fatal = true; | |
96 | 112 | do_error(fpos, "expected no paragraph keyword"); |
97 | 113 | } |
98 | 114 | |
99 | void err_kwtoomany(const filepos *fpos) | |
100 | { | |
115 | void err_kwtoomany(errorstate *es, const filepos *fpos) | |
116 | { | |
117 | es->fatal = true; | |
101 | 118 | do_error(fpos, "expected only one paragraph keyword"); |
102 | 119 | } |
103 | 120 | |
104 | void err_bodyillegal(const filepos *fpos) | |
105 | { | |
121 | void err_bodyillegal(errorstate *es, const filepos *fpos) | |
122 | { | |
123 | es->fatal = true; | |
106 | 124 | do_error(fpos, "expected no text after paragraph keyword"); |
107 | 125 | } |
108 | 126 | |
109 | void err_badparatype(const wchar_t *wsp, const filepos *fpos) | |
110 | { | |
127 | void err_badparatype(errorstate *es, const wchar_t *wsp, const filepos *fpos) | |
128 | { | |
129 | es->fatal = true; | |
111 | 130 | char *sp = utoa_locale_dup(wsp); |
112 | 131 | do_error(fpos, "command `%s' unrecognised at start of paragraph", sp); |
113 | 132 | sfree(sp); |
114 | 133 | } |
115 | 134 | |
116 | void err_badmidcmd(const wchar_t *wsp, const filepos *fpos) | |
117 | { | |
135 | void err_badmidcmd(errorstate *es, const wchar_t *wsp, const filepos *fpos) | |
136 | { | |
137 | es->fatal = true; | |
118 | 138 | char *sp = utoa_locale_dup(wsp); |
119 | 139 | do_error(fpos, "command `%s' unexpected in mid-paragraph", sp); |
120 | 140 | sfree(sp); |
121 | 141 | } |
122 | 142 | |
123 | void err_unexbrace(const filepos *fpos) | |
124 | { | |
143 | void err_unexbrace(errorstate *es, const filepos *fpos) | |
144 | { | |
145 | es->fatal = true; | |
125 | 146 | do_error(fpos, "brace character unexpected in mid-paragraph"); |
126 | 147 | } |
127 | 148 | |
128 | void err_explbr(const filepos *fpos) | |
129 | { | |
149 | void err_explbr(errorstate *es, const filepos *fpos) | |
150 | { | |
151 | es->fatal = true; | |
130 | 152 | do_error(fpos, "expected `{' after command"); |
131 | 153 | } |
132 | 154 | |
133 | void err_commenteof(const filepos *fpos) | |
134 | { | |
155 | void err_commenteof(errorstate *es, const filepos *fpos) | |
156 | { | |
157 | es->fatal = true; | |
135 | 158 | do_error(fpos, "end of file unexpected inside `\\#{...}' comment"); |
136 | 159 | } |
137 | 160 | |
138 | void err_kwexprbr(const filepos *fpos) | |
139 | { | |
161 | void err_kwexprbr(errorstate *es, const filepos *fpos) | |
162 | { | |
163 | es->fatal = true; | |
140 | 164 | do_error(fpos, "expected `}' after cross-reference"); |
141 | 165 | } |
142 | 166 | |
143 | void err_codequote(const filepos *fpos) | |
144 | { | |
167 | void err_codequote(errorstate *es, const filepos *fpos) | |
168 | { | |
169 | es->fatal = true; | |
145 | 170 | do_error(fpos, "unable to nest \\q{...} within \\c{...} or \\cw{...}"); |
146 | 171 | } |
147 | 172 | |
148 | void err_missingrbrace(const filepos *fpos) | |
149 | { | |
173 | void err_missingrbrace(errorstate *es, const filepos *fpos) | |
174 | { | |
175 | es->fatal = true; | |
150 | 176 | do_error(fpos, "unclosed braces at end of paragraph"); |
151 | 177 | } |
152 | 178 | |
153 | void err_missingrbrace2(const filepos *fpos) | |
154 | { | |
179 | void err_missingrbrace2(errorstate *es, const filepos *fpos) | |
180 | { | |
181 | es->fatal = true; | |
155 | 182 | do_error(fpos, "unclosed braces at end of input file"); |
156 | 183 | } |
157 | 184 | |
158 | void err_nestedstyles(const filepos *fpos) | |
159 | { | |
185 | void err_nestedstyles(errorstate *es, const filepos *fpos) | |
186 | { | |
187 | es->fatal = true; | |
160 | 188 | do_error(fpos, "unable to nest text styles"); |
161 | 189 | } |
162 | 190 | |
163 | void err_nestedindex(const filepos *fpos) | |
164 | { | |
191 | void err_nestedindex(errorstate *es, const filepos *fpos) | |
192 | { | |
193 | es->fatal = true; | |
165 | 194 | do_error(fpos, "unable to nest index markings"); |
166 | 195 | } |
167 | 196 | |
168 | void err_indexcase(const filepos *fpos, const wchar_t *wsp, | |
197 | void err_indexcase(errorstate *es, const filepos *fpos, const wchar_t *wsp, | |
169 | 198 | const filepos *fpos2, const wchar_t *wsp2) |
170 | 199 | { |
171 | 200 | char *sp = utoa_locale_dup(wsp), *sp2 = utoa_locale_dup(wsp2); |
176 | 205 | sfree(sp2); |
177 | 206 | } |
178 | 207 | |
179 | void err_nosuchkw(const filepos *fpos, const wchar_t *wsp) | |
180 | { | |
208 | void err_nosuchkw(errorstate *es, const filepos *fpos, const wchar_t *wsp) | |
209 | { | |
210 | es->fatal = true; | |
181 | 211 | char *sp = utoa_locale_dup(wsp); |
182 | 212 | do_error(fpos, "unable to resolve cross-reference to `%s'", sp); |
183 | 213 | sfree(sp); |
184 | 214 | } |
185 | 215 | |
186 | void err_multiBR(const filepos *fpos, const wchar_t *wsp) | |
187 | { | |
216 | void err_multiBR(errorstate *es, const filepos *fpos, const wchar_t *wsp) | |
217 | { | |
218 | es->fatal = true; | |
188 | 219 | char *sp = utoa_locale_dup(wsp); |
189 | 220 | do_error(fpos, "multiple `\\BR' entries given for `%s'", sp); |
190 | 221 | sfree(sp); |
191 | 222 | } |
192 | 223 | |
193 | void err_nosuchidxtag(const filepos *fpos, const wchar_t *wsp) | |
194 | { | |
224 | void err_nosuchidxtag(errorstate *es, const filepos *fpos, const wchar_t *wsp) | |
225 | { | |
226 | es->fatal = true; | |
195 | 227 | char *sp = utoa_locale_dup(wsp); |
196 | 228 | do_error(fpos, "`\\IM' on unknown index tag `%s'", sp); |
197 | 229 | sfree(sp); |
198 | 230 | } |
199 | 231 | |
200 | void err_cantopenw(const char *sp) | |
201 | { | |
232 | void err_cantopenw(errorstate *es, const char *sp) | |
233 | { | |
234 | es->fatal = true; | |
202 | 235 | do_error(NULL, "unable to open output file `%s'", sp); |
203 | 236 | } |
204 | 237 | |
205 | void err_macroexists(const filepos *fpos, const wchar_t *wsp) | |
206 | { | |
238 | void err_macroexists(errorstate *es, const filepos *fpos, const wchar_t *wsp) | |
239 | { | |
240 | es->fatal = true; | |
207 | 241 | char *sp = utoa_locale_dup(wsp); |
208 | 242 | do_error(fpos, "macro `%s' already defined", sp); |
209 | 243 | sfree(sp); |
210 | 244 | } |
211 | 245 | |
212 | void err_sectjump(const filepos *fpos) | |
213 | { | |
246 | void err_sectjump(errorstate *es, const filepos *fpos) | |
247 | { | |
248 | es->fatal = true; | |
214 | 249 | do_error(fpos, "expected higher heading levels before this one"); |
215 | 250 | } |
216 | 251 | |
217 | void err_winhelp_ctxclash(const filepos *fpos, const char *sp, const char *sp2) | |
218 | { | |
252 | void err_winhelp_ctxclash(errorstate *es, const filepos *fpos, | |
253 | const char *sp, const char *sp2) | |
254 | { | |
255 | es->fatal = true; | |
219 | 256 | do_error(fpos, "Windows Help context id `%s' clashes with " |
220 | 257 | "previously defined `%s'", sp, sp2); |
221 | 258 | } |
222 | 259 | |
223 | void err_multikw(const filepos *fpos, const filepos *fpos2, const wchar_t *wsp) | |
224 | { | |
260 | void err_multikw(errorstate *es, const filepos *fpos, const filepos *fpos2, | |
261 | const wchar_t *wsp) | |
262 | { | |
263 | es->fatal = true; | |
225 | 264 | char *sp = utoa_locale_dup(wsp); |
226 | 265 | do_error(fpos, "paragraph keyword `%s' already defined at %s:%d", |
227 | 266 | sp, fpos2->filename, fpos2->line); |
228 | 267 | sfree(sp); |
229 | 268 | } |
230 | 269 | |
231 | void err_misplacedlcont(const filepos *fpos) | |
232 | { | |
270 | void err_misplacedlcont(errorstate *es, const filepos *fpos) | |
271 | { | |
272 | es->fatal = true; | |
233 | 273 | do_error(fpos, "\\lcont is only expected after a list item"); |
234 | 274 | } |
235 | 275 | |
236 | void err_sectmarkerinblock(const filepos *fpos, const char *sp) | |
237 | { | |
276 | void err_sectmarkerinblock(errorstate *es, const filepos *fpos, const char *sp) | |
277 | { | |
278 | es->fatal = true; | |
238 | 279 | do_error(fpos, "section headings are not supported within \\%s", sp); |
239 | 280 | } |
240 | 281 | |
241 | void err_cfginsufarg(const filepos *fpos, const char *sp, int i) | |
242 | { | |
282 | void err_cfginsufarg(errorstate *es, const filepos *fpos, const char *sp, | |
283 | int i) | |
284 | { | |
285 | es->fatal = true; | |
243 | 286 | do_error(fpos, "\\cfg{%s} expects at least %d parameter%s", |
244 | 287 | sp, i, (i==1)?"":"s"); |
245 | 288 | } |
246 | 289 | |
247 | void err_infonodechar(const filepos *fpos, char c) /* fpos might be NULL */ | |
248 | { | |
290 | void err_infonodechar(errorstate *es, const filepos *fpos, char c) | |
291 | /* fpos might be NULL */ | |
292 | { | |
293 | es->fatal = true; | |
249 | 294 | do_error(fpos, "info output format does not support '%c' in" |
250 | 295 | " node names; removing", c); |
251 | 296 | } |
252 | 297 | |
253 | void err_text_codeline(const filepos *fpos, int i, int j) | |
298 | void err_text_codeline(errorstate *es, const filepos *fpos, int i, int j) | |
254 | 299 | { |
255 | 300 | do_error(fpos, "warning: code paragraph line is %d chars wide, wider" |
256 | 301 | " than body width %d", i, j); |
257 | 302 | } |
258 | 303 | |
259 | void err_htmlver(const filepos *fpos, const wchar_t *wsp) | |
260 | { | |
304 | void err_htmlver(errorstate *es, const filepos *fpos, const wchar_t *wsp) | |
305 | { | |
306 | es->fatal = true; | |
261 | 307 | char *sp = utoa_locale_dup(wsp); |
262 | 308 | do_error(fpos, "unrecognised HTML version keyword `%s'", sp); |
263 | 309 | sfree(sp); |
264 | 310 | } |
265 | 311 | |
266 | void err_charset(const filepos *fpos, const wchar_t *wsp) | |
267 | { | |
312 | void err_charset(errorstate *es, const filepos *fpos, const wchar_t *wsp) | |
313 | { | |
314 | es->fatal = true; | |
268 | 315 | char *sp = utoa_locale_dup(wsp); |
269 | 316 | do_error(fpos, "character set `%s' not recognised", sp); |
270 | 317 | sfree(sp); |
271 | 318 | } |
272 | 319 | |
273 | void err_nofont(const filepos *fpos, const wchar_t *wsp) | |
274 | { | |
320 | void err_nofont(errorstate *es, const filepos *fpos, const wchar_t *wsp) | |
321 | { | |
322 | es->fatal = true; | |
275 | 323 | char *sp = utoa_locale_dup(wsp); |
276 | 324 | do_error(fpos, "font `%s' not recognised", sp); |
277 | 325 | sfree(sp); |
278 | 326 | } |
279 | 327 | |
280 | void err_afmeof(const filepos *fpos) | |
281 | { | |
328 | void err_afmeof(errorstate *es, const filepos *fpos) | |
329 | { | |
330 | es->fatal = true; | |
282 | 331 | do_error(fpos, "AFM file ended unexpectedly"); |
283 | 332 | } |
284 | 333 | |
285 | void err_afmkey(const filepos *fpos, const char *sp) | |
286 | { | |
334 | void err_afmkey(errorstate *es, const filepos *fpos, const char *sp) | |
335 | { | |
336 | es->fatal = true; | |
287 | 337 | do_error(fpos, "required AFM key '%s' missing", sp); |
288 | 338 | } |
289 | 339 | |
290 | void err_afmvers(const filepos *fpos) | |
291 | { | |
340 | void err_afmvers(errorstate *es, const filepos *fpos) | |
341 | { | |
342 | es->fatal = true; | |
292 | 343 | do_error(fpos, "unsupported AFM version"); |
293 | 344 | } |
294 | 345 | |
295 | void err_afmval(const filepos *fpos, const char *sp, int i) | |
296 | { | |
346 | void err_afmval(errorstate *es, const filepos *fpos, const char *sp, int i) | |
347 | { | |
348 | es->fatal = true; | |
297 | 349 | if (i == 1) |
298 | 350 | do_error(fpos, "AFM key '%s' requires a value", sp); |
299 | 351 | else |
300 | 352 | do_error(fpos, "AFM key '%s' requires %d values", sp, i); |
301 | 353 | } |
302 | 354 | |
303 | void err_pfeof(const filepos *fpos) | |
304 | { | |
355 | void err_pfeof(errorstate *es, const filepos *fpos) | |
356 | { | |
357 | es->fatal = true; | |
305 | 358 | do_error(fpos, "Type 1 font file ended unexpectedly"); |
306 | 359 | } |
307 | 360 | |
308 | void err_pfhead(const filepos *fpos) | |
309 | { | |
361 | void err_pfhead(errorstate *es, const filepos *fpos) | |
362 | { | |
363 | es->fatal = true; | |
310 | 364 | do_error(fpos, "Type 1 font file header line invalid"); |
311 | 365 | } |
312 | 366 | |
313 | void err_pfbad(const filepos *fpos) | |
314 | { | |
367 | void err_pfbad(errorstate *es, const filepos *fpos) | |
368 | { | |
369 | es->fatal = true; | |
315 | 370 | do_error(fpos, "Type 1 font file invalid"); |
316 | 371 | } |
317 | 372 | |
318 | void err_pfnoafm(const filepos *fpos, const char *sp) | |
319 | { | |
373 | void err_pfnoafm(errorstate *es, const filepos *fpos, const char *sp) | |
374 | { | |
375 | es->fatal = true; | |
320 | 376 | do_error(fpos, "no metrics available for Type 1 font '%s'", sp); |
321 | 377 | } |
322 | 378 | |
323 | void err_chmnames(void) | |
324 | { | |
379 | void err_chmnames(errorstate *es) | |
380 | { | |
381 | es->fatal = true; | |
325 | 382 | do_error(NULL, "only one of html-mshtmlhelp-chm and " |
326 | 383 | "html-mshtmlhelp-hhp found"); |
327 | 384 | } |
328 | 385 | |
329 | void err_sfntnotable(const filepos *fpos, const char *sp) | |
330 | { | |
386 | void err_sfntnotable(errorstate *es, const filepos *fpos, const char *sp) | |
387 | { | |
388 | es->fatal = true; | |
331 | 389 | do_error(fpos, "font has no '%s' table", sp); |
332 | 390 | } |
333 | 391 | |
334 | void err_sfntnopsname(const filepos *fpos) | |
335 | { | |
392 | void err_sfntnopsname(errorstate *es, const filepos *fpos) | |
393 | { | |
394 | es->fatal = true; | |
336 | 395 | do_error(fpos, "font has no PostScript name"); |
337 | 396 | } |
338 | 397 | |
339 | void err_sfntbadtable(const filepos *fpos, const char *sp) | |
340 | { | |
398 | void err_sfntbadtable(errorstate *es, const filepos *fpos, const char *sp) | |
399 | { | |
400 | es->fatal = true; | |
341 | 401 | do_error(fpos, "font has an invalid '%s' table", sp); |
342 | 402 | } |
343 | 403 | |
344 | void err_sfntnounicmap(const filepos *fpos) | |
345 | { | |
404 | void err_sfntnounicmap(errorstate *es, const filepos *fpos) | |
405 | { | |
406 | es->fatal = true; | |
346 | 407 | do_error(fpos, "font has no UCS-2 character map"); |
347 | 408 | } |
348 | 409 | |
349 | void err_sfnttablevers(const filepos *fpos, const char *sp) | |
350 | { | |
410 | void err_sfnttablevers(errorstate *es, const filepos *fpos, const char *sp) | |
411 | { | |
412 | es->fatal = true; | |
351 | 413 | do_error(fpos, "font has an unsupported '%s' table version", sp); |
352 | 414 | } |
353 | 415 | |
354 | void err_sfntbadhdr(const filepos *fpos) | |
355 | { | |
416 | void err_sfntbadhdr(errorstate *es, const filepos *fpos) | |
417 | { | |
418 | es->fatal = true; | |
356 | 419 | do_error(fpos, "font has an invalid header"); |
357 | 420 | } |
358 | 421 | |
359 | void err_sfntbadglyph(const filepos *fpos, unsigned wc) | |
422 | void err_sfntbadglyph(errorstate *es, const filepos *fpos, unsigned wc) | |
360 | 423 | { |
361 | 424 | do_error(fpos, |
362 | 425 | "warning: character U+%04X references a non-existent glyph", |
363 | 426 | wc); |
364 | 427 | } |
365 | 428 | |
366 | void err_chm_badname(const filepos *fpos, const char *sp) | |
367 | { | |
429 | void err_chm_badname(errorstate *es, const filepos *fpos, const char *sp) | |
430 | { | |
431 | es->fatal = true; | |
368 | 432 | do_error(fpos, "CHM internal file name `%s' begins with" |
369 | 433 | " a reserved character", sp); |
370 | 434 | } |
4 | 4 | #include <wchar.h> |
5 | 5 | #include <time.h> |
6 | 6 | #include <string.h> |
7 | #include <stdbool.h> | |
8 | ||
9 | #ifdef BOOLIFY | |
10 | # include "boolify.h" | |
11 | #endif | |
7 | 12 | |
8 | 13 | #include "charset.h" |
9 | 14 | |
11 | 16 | #define NORETURN __attribute__((__noreturn__)) |
12 | 17 | #else |
13 | 18 | #define NORETURN /* nothing */ |
14 | #endif | |
15 | ||
16 | #ifndef TRUE | |
17 | #define TRUE 1 | |
18 | #endif | |
19 | #ifndef FALSE | |
20 | #define FALSE 0 | |
21 | 19 | #endif |
22 | 20 | |
23 | 21 | /* For suppressing unused-parameter warnings */ |
39 | 37 | typedef struct indextag_Tag indextag; |
40 | 38 | typedef struct indexentry_Tag indexentry; |
41 | 39 | typedef struct macrostack_Tag macrostack; |
40 | typedef struct errorstate_Tag errorstate; | |
41 | typedef struct psdata_Tag psdata; | |
42 | 42 | |
43 | 43 | /* |
44 | 44 | * Data structure to hold a file name and index, a line and a |
61 | 61 | int nfiles; /* how many in the list */ |
62 | 62 | FILE *currfp; /* the currently open one */ |
63 | 63 | int currindex; /* which one is that in the list */ |
64 | int wantclose; /* does the current file want closing */ | |
64 | bool wantclose; /* does the current file want closing */ | |
65 | 65 | pushback *pushback; /* pushed-back input characters */ |
66 | 66 | int npushback, pushbacksize; |
67 | 67 | filepos pos; |
68 | int reportcols; /* report column numbers in errors */ | |
68 | bool reportcols; /* report column numbers in errors */ | |
69 | 69 | macrostack *stack; /* macro expansions in force */ |
70 | 70 | int defcharset, charset; /* character sets for input files */ |
71 | 71 | charset_state csstate; |
72 | 72 | wchar_t wc[16]; /* wide chars from input conversion */ |
73 | 73 | int nwc, wcpos; /* size of, and position in, wc[] */ |
74 | 74 | char *pushback_chars; /* used to save input-encoding data */ |
75 | errorstate *es; | |
75 | 76 | }; |
76 | 77 | |
77 | 78 | /* |
135 | 136 | word *next, *alt; |
136 | 137 | int type; |
137 | 138 | int aux; |
138 | int breaks; /* can a line break after it? */ | |
139 | bool breaks; /* can a line break after it? */ | |
139 | 140 | wchar_t *text; |
140 | 141 | filepos fpos; |
141 | 142 | |
204 | 205 | /* |
205 | 206 | * error.c |
206 | 207 | */ |
208 | struct errorstate_Tag { | |
209 | bool fatal; | |
210 | }; | |
207 | 211 | /* out of memory */ |
208 | 212 | void fatalerr_nomemory(void) NORETURN; |
209 | 213 | /* option `-%s' requires an argument */ |
210 | void err_optnoarg(const char *sp); | |
214 | void err_optnoarg(errorstate *es, const char *sp); | |
211 | 215 | /* unrecognised option `-%s' */ |
212 | void err_nosuchopt(const char *sp); | |
216 | void err_nosuchopt(errorstate *es, const char *sp); | |
213 | 217 | /* unrecognised charset %s (cmdline) */ |
214 | void err_cmdcharset(const char *sp); | |
218 | void err_cmdcharset(errorstate *es, const char *sp); | |
215 | 219 | /* futile option `-%s'%s */ |
216 | void err_futileopt(const char *sp, const char *sp2); | |
220 | void err_futileopt(errorstate *es, const char *sp, const char *sp2); | |
217 | 221 | /* no input files */ |
218 | void err_noinput(void); | |
222 | void err_noinput(errorstate *es); | |
219 | 223 | /* unable to open input file `%s' */ |
220 | void err_cantopen(const char *sp); | |
224 | void err_cantopen(errorstate *es, const char *sp); | |
221 | 225 | /* no data in input files */ |
222 | void err_nodata(void); | |
226 | void err_nodata(errorstate *es); | |
227 | /* unexpected zero character in input file */ | |
228 | void err_zerochar(errorstate *es, const filepos *fpos); | |
223 | 229 | /* line in codepara didn't begin `\c' */ |
224 | void err_brokencodepara(const filepos *fpos); | |
230 | void err_brokencodepara(errorstate *es, const filepos *fpos); | |
225 | 231 | /* expected `}' after keyword */ |
226 | void err_kwunclosed(const filepos *fpos); | |
232 | void err_kwunclosed(errorstate *es, const filepos *fpos); | |
227 | 233 | /* paragraph type expects no keyword */ |
228 | void err_kwexpected(const filepos *fpos); | |
234 | void err_kwexpected(errorstate *es, const filepos *fpos); | |
229 | 235 | /* paragraph type expects a keyword */ |
230 | void err_kwillegal(const filepos *fpos); | |
236 | void err_kwillegal(errorstate *es, const filepos *fpos); | |
231 | 237 | /* paragraph type expects only 1 */ |
232 | void err_kwtoomany(const filepos *fpos); | |
238 | void err_kwtoomany(errorstate *es, const filepos *fpos); | |
233 | 239 | /* paragraph type expects only kws! */ |
234 | void err_bodyillegal(const filepos *fpos); | |
240 | void err_bodyillegal(errorstate *es, const filepos *fpos); | |
235 | 241 | /* invalid command at start of para */ |
236 | void err_badparatype(const wchar_t *wsp, const filepos *fpos); | |
242 | void err_badparatype(errorstate *es, const wchar_t *wsp, const filepos *fpos); | |
237 | 243 | /* invalid command in mid-para */ |
238 | void err_badmidcmd(const wchar_t *wsp, const filepos *fpos); | |
244 | void err_badmidcmd(errorstate *es, const wchar_t *wsp, const filepos *fpos); | |
239 | 245 | /* unexpected brace */ |
240 | void err_unexbrace(const filepos *fpos); | |
246 | void err_unexbrace(errorstate *es, const filepos *fpos); | |
241 | 247 | /* expected `{' after command */ |
242 | void err_explbr(const filepos *fpos); | |
248 | void err_explbr(errorstate *es, const filepos *fpos); | |
243 | 249 | /* EOF inside braced comment */ |
244 | void err_commenteof(const filepos *fpos); | |
250 | void err_commenteof(errorstate *es, const filepos *fpos); | |
245 | 251 | /* expected `}' after cross-ref */ |
246 | void err_kwexprbr(const filepos *fpos); | |
252 | void err_kwexprbr(errorstate *es, const filepos *fpos); | |
247 | 253 | /* \q within \c is not supported */ |
248 | void err_codequote(const filepos *fpos); | |
254 | void err_codequote(errorstate *es, const filepos *fpos); | |
249 | 255 | /* unclosed braces at end of para */ |
250 | void err_missingrbrace(const filepos *fpos); | |
256 | void err_missingrbrace(errorstate *es, const filepos *fpos); | |
251 | 257 | /* unclosed braces at end of file */ |
252 | void err_missingrbrace2(const filepos *fpos); | |
258 | void err_missingrbrace2(errorstate *es, const filepos *fpos); | |
253 | 259 | /* unable to nest text styles */ |
254 | void err_nestedstyles(const filepos *fpos); | |
260 | void err_nestedstyles(errorstate *es, const filepos *fpos); | |
255 | 261 | /* unable to nest `\i' thingys */ |
256 | void err_nestedindex(const filepos *fpos); | |
262 | void err_nestedindex(errorstate *es, const filepos *fpos); | |
257 | 263 | /* two \i differing only in case */ |
258 | void err_indexcase(const filepos *fpos, const wchar_t *wsp, | |
264 | void err_indexcase(errorstate *es, const filepos *fpos, const wchar_t *wsp, | |
259 | 265 | const filepos *fpos2, const wchar_t *wsp2); |
260 | 266 | /* unresolved cross-reference */ |
261 | void err_nosuchkw(const filepos *fpos, const wchar_t *wsp); | |
267 | void err_nosuchkw(errorstate *es, const filepos *fpos, const wchar_t *wsp); | |
262 | 268 | /* multiple \BRs on same keyword */ |
263 | void err_multiBR(const filepos *fpos, const wchar_t *wsp); | |
269 | void err_multiBR(errorstate *es, const filepos *fpos, const wchar_t *wsp); | |
264 | 270 | /* \IM on unknown index tag (warning) */ |
265 | void err_nosuchidxtag(const filepos *fpos, const wchar_t *wsp); | |
271 | void err_nosuchidxtag(errorstate *es, const filepos *fpos, | |
272 | const wchar_t *wsp); | |
266 | 273 | /* can't open output file for write */ |
267 | void err_cantopenw(const char *sp); | |
274 | void err_cantopenw(errorstate *es, const char *sp); | |
268 | 275 | /* this macro already exists */ |
269 | void err_macroexists(const filepos *fpos, const wchar_t *wsp); | |
276 | void err_macroexists(errorstate *es, const filepos *fpos, const wchar_t *wsp); | |
270 | 277 | /* jump a heading level, eg \C -> \S */ |
271 | void err_sectjump(const filepos *fpos); | |
278 | void err_sectjump(errorstate *es, const filepos *fpos); | |
272 | 279 | /* WinHelp context ID hash clash */ |
273 | void err_winhelp_ctxclash(const filepos *fpos, const char *sp, const char *sp2); | |
280 | void err_winhelp_ctxclash(errorstate *es, const filepos *fpos, | |
281 | const char *sp, const char *sp2); | |
274 | 282 | /* keyword clash in sections */ |
275 | void err_multikw(const filepos *fpos, const filepos *fpos2, const wchar_t *wsp); | |
283 | void err_multikw(errorstate *es, const filepos *fpos, const filepos *fpos2, | |
284 | const wchar_t *wsp); | |
276 | 285 | /* \lcont not after a list item */ |
277 | void err_misplacedlcont(const filepos *fpos); | |
286 | void err_misplacedlcont(errorstate *es, const filepos *fpos); | |
278 | 287 | /* section marker appeared in block */ |
279 | void err_sectmarkerinblock(const filepos *fpos, const char *sp); | |
288 | void err_sectmarkerinblock(errorstate *es, const filepos *fpos, | |
289 | const char *sp); | |
280 | 290 | /* \cfg{%s} insufficient args (<%d) */ |
281 | void err_cfginsufarg(const filepos *fpos, const char *sp, int i); | |
291 | void err_cfginsufarg(errorstate *es, const filepos *fpos, const char *sp, | |
292 | int i); | |
282 | 293 | /* colon/comma in node name in info */ |
283 | void err_infonodechar(const filepos *fpos, char c) /* fpos might be NULL */; | |
294 | void err_infonodechar(errorstate *es, const filepos *fpos, char c) | |
295 | /* fpos might be NULL */; | |
284 | 296 | /* \c line too long in text backend */ |
285 | void err_text_codeline(const filepos *fpos, int i, int j); | |
297 | void err_text_codeline(errorstate *es, const filepos *fpos, int i, int j); | |
286 | 298 | /* unrecognised HTML version keyword */ |
287 | void err_htmlver(const filepos *fpos, const wchar_t *wsp); | |
299 | void err_htmlver(errorstate *es, const filepos *fpos, const wchar_t *wsp); | |
288 | 300 | /* unrecognised character set name */ |
289 | void err_charset(const filepos *fpos, const wchar_t *wsp); | |
301 | void err_charset(errorstate *es, const filepos *fpos, const wchar_t *wsp); | |
290 | 302 | /* unrecognised font name */ |
291 | void err_nofont(const filepos *fpos, const wchar_t *wsp); | |
303 | void err_nofont(errorstate *es, const filepos *fpos, const wchar_t *wsp); | |
292 | 304 | /* eof in AFM file */ |
293 | void err_afmeof(const filepos *fpos); | |
305 | void err_afmeof(errorstate *es, const filepos *fpos); | |
294 | 306 | /* missing expected keyword in AFM */ |
295 | void err_afmkey(const filepos *fpos, const char *sp); | |
307 | void err_afmkey(errorstate *es, const filepos *fpos, const char *sp); | |
296 | 308 | /* unsupported AFM version */ |
297 | void err_afmvers(const filepos *fpos); | |
309 | void err_afmvers(errorstate *es, const filepos *fpos); | |
298 | 310 | /* missing value(s) for AFM key */ |
299 | void err_afmval(const filepos *fpos, const char *sp, int i); | |
311 | void err_afmval(errorstate *es, const filepos *fpos, const char *sp, int i); | |
300 | 312 | /* eof in Type 1 font file */ |
301 | void err_pfeof(const filepos *fpos); | |
313 | void err_pfeof(errorstate *es, const filepos *fpos); | |
302 | 314 | /* bad Type 1 header line */ |
303 | void err_pfhead(const filepos *fpos); | |
315 | void err_pfhead(errorstate *es, const filepos *fpos); | |
304 | 316 | /* otherwise invalide Type 1 font */ |
305 | void err_pfbad(const filepos *fpos); | |
317 | void err_pfbad(errorstate *es, const filepos *fpos); | |
306 | 318 | /* Type 1 font but no AFM */ |
307 | void err_pfnoafm(const filepos *fpos, const char *sp); | |
319 | void err_pfnoafm(errorstate *es, const filepos *fpos, const char *sp); | |
308 | 320 | /* need both or neither of hhp+chm */ |
309 | void err_chmnames(void); | |
321 | void err_chmnames(errorstate *es); | |
310 | 322 | /* required sfnt table missing */ |
311 | void err_sfntnotable(const filepos *fpos, const char *sp); | |
323 | void err_sfntnotable(errorstate *es, const filepos *fpos, const char *sp); | |
312 | 324 | /* sfnt has no PostScript name */ |
313 | void err_sfntnopsname(const filepos *fpos); | |
325 | void err_sfntnopsname(errorstate *es, const filepos *fpos); | |
314 | 326 | /* sfnt table not valid */ |
315 | void err_sfntbadtable(const filepos *fpos, const char *sp); | |
327 | void err_sfntbadtable(errorstate *es, const filepos *fpos, const char *sp); | |
316 | 328 | /* sfnt has no UCS-2 cmap */ |
317 | void err_sfntnounicmap(const filepos *fpos); | |
329 | void err_sfntnounicmap(errorstate *es, const filepos *fpos); | |
318 | 330 | /* sfnt table version unknown */ |
319 | void err_sfnttablevers(const filepos *fpos, const char *sp); | |
331 | void err_sfnttablevers(errorstate *es, const filepos *fpos, const char *sp); | |
320 | 332 | /* sfnt has bad header */ |
321 | void err_sfntbadhdr(const filepos *fpos); | |
333 | void err_sfntbadhdr(errorstate *es, const filepos *fpos); | |
322 | 334 | /* sfnt cmap references bad glyph */ |
323 | void err_sfntbadglyph(const filepos *fpos, unsigned wc); | |
335 | void err_sfntbadglyph(errorstate *es, const filepos *fpos, unsigned wc); | |
324 | 336 | /* CHM internal file names can't start with # or $ */ |
325 | void err_chm_badname(const filepos *fpos, const char *sp); | |
337 | void err_chm_badname(errorstate *es, const filepos *fpos, const char *sp); | |
326 | 338 | |
327 | 339 | /* |
328 | 340 | * malloc.c |
368 | 380 | wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source); |
369 | 381 | wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n); |
370 | 382 | wchar_t utolower(wchar_t); |
371 | int uisalpha(wchar_t); | |
383 | bool uisalpha(wchar_t); | |
372 | 384 | int ustrcmp(wchar_t *lhs, wchar_t *rhs); |
373 | 385 | int ustricmp(wchar_t const *lhs, wchar_t const *rhs); |
374 | 386 | int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen); |
375 | 387 | int utoi(wchar_t const *); |
376 | 388 | double utof(wchar_t const *); |
377 | int utob(wchar_t const *); | |
378 | int uisdigit(wchar_t); | |
389 | bool utob(wchar_t const *); | |
390 | bool uisdigit(wchar_t); | |
379 | 391 | wchar_t *ustrlow(wchar_t *s); |
380 | 392 | wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec); |
381 | int cvt_ok(int charset, const wchar_t *s); | |
382 | int charset_from_ustr(filepos *fpos, const wchar_t *name); | |
393 | bool cvt_ok(int charset, const wchar_t *s); | |
394 | int charset_from_ustr(filepos *fpos, const wchar_t *name, errorstate *); | |
383 | 395 | |
384 | 396 | /* |
385 | 397 | * wcwidth.c |
433 | 445 | void rdadds(rdstring *rs, wchar_t const *p); |
434 | 446 | wchar_t *rdtrim(rdstring *rs); |
435 | 447 | void rdaddc(rdstringc *rs, char c); |
448 | void rdaddc_rep(rdstringc *rs, char c, int repeat); | |
436 | 449 | void rdaddsc(rdstringc *rs, char const *p); |
437 | 450 | void rdaddsn(rdstringc *rc, char const *p, int len); |
438 | 451 | char *rdtrimc(rdstringc *rs); |
454 | 467 | paragraph *cmdline_cfg_new(void); |
455 | 468 | paragraph *cmdline_cfg_simple(char *string, ...); |
456 | 469 | |
470 | time_t current_time(void); /* use in place of time(NULL) */ | |
471 | ||
457 | 472 | /* |
458 | 473 | * input.c |
459 | 474 | */ |
460 | paragraph *read_input(input *in, indexdata *idx); | |
475 | paragraph *read_input(input *in, indexdata *idx, psdata *psd); | |
461 | 476 | |
462 | 477 | /* |
463 | 478 | * in_afm.c |
464 | 479 | */ |
465 | void read_afm_file(input *in); | |
480 | void read_afm_file(input *in, psdata *psd); | |
466 | 481 | |
467 | 482 | /* |
468 | 483 | * in_pf.c |
469 | 484 | */ |
470 | void read_pfa_file(input *in); | |
471 | void read_pfb_file(input *in); | |
485 | void read_pfa_file(input *in, psdata *psd); | |
486 | void read_pfb_file(input *in, psdata *psd); | |
472 | 487 | |
473 | 488 | /* |
474 | 489 | * in_sfnt.c |
475 | 490 | */ |
476 | void read_sfnt_file(input *in); | |
491 | void read_sfnt_file(input *in, psdata *psd); | |
477 | 492 | |
478 | 493 | /* |
479 | 494 | * keywords.c |
493 | 508 | paragraph *para; /* the paragraph referenced */ |
494 | 509 | }; |
495 | 510 | keyword *kw_lookup(keywordlist *, wchar_t *); |
496 | keywordlist *get_keywords(paragraph *); | |
511 | keywordlist *get_keywords(paragraph *, errorstate *); | |
497 | 512 | void free_keywords(keywordlist *); |
498 | void subst_keywords(paragraph *, keywordlist *); | |
513 | void subst_keywords(paragraph *, keywordlist *, errorstate *); | |
499 | 514 | |
500 | 515 | /* |
501 | 516 | * index.c |
536 | 551 | void cleanup_index(indexdata *); |
537 | 552 | /* index_merge takes responsibility for freeing arg 3 iff implicit; never |
538 | 553 | * takes responsibility for arg 2 */ |
539 | void index_merge(indexdata *, int is_explicit, wchar_t *, word *, filepos *); | |
554 | void index_merge(indexdata *, bool is_explicit, wchar_t *, word *, filepos *, | |
555 | errorstate *es); | |
540 | 556 | void build_index(indexdata *); |
541 | 557 | void index_debug(indexdata *); |
542 | 558 | indextag *index_findtag(indexdata *idx, wchar_t *name); |
546 | 562 | */ |
547 | 563 | numberstate *number_init(void); |
548 | 564 | void number_cfg(numberstate *, paragraph *); |
549 | word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, int *); | |
565 | word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, bool *, | |
566 | errorstate *es); | |
550 | 567 | void number_free(numberstate *); |
551 | 568 | |
552 | 569 | /* |
553 | 570 | * biblio.c |
554 | 571 | */ |
555 | void gen_citations(paragraph *, keywordlist *); | |
572 | void gen_citations(paragraph *, keywordlist *, errorstate *); | |
556 | 573 | |
557 | 574 | /* |
558 | 575 | * bk_text.c |
559 | 576 | */ |
560 | void text_backend(paragraph *, keywordlist *, indexdata *, void *); | |
577 | void text_backend(paragraph *, keywordlist *, indexdata *, void *, | |
578 | errorstate *); | |
561 | 579 | paragraph *text_config_filename(char *filename); |
562 | 580 | |
563 | 581 | /* |
564 | 582 | * bk_html.c |
565 | 583 | */ |
566 | void html_backend(paragraph *, keywordlist *, indexdata *, void *); | |
567 | void chm_backend(paragraph *, keywordlist *, indexdata *, void *); | |
584 | void html_backend(paragraph *, keywordlist *, indexdata *, void *, | |
585 | errorstate *); | |
586 | void chm_backend(paragraph *, keywordlist *, indexdata *, void *, | |
587 | errorstate *); | |
568 | 588 | paragraph *html_config_filename(char *filename); |
569 | 589 | paragraph *chm_config_filename(char *filename); |
570 | 590 | |
571 | 591 | /* |
572 | 592 | * bk_whlp.c |
573 | 593 | */ |
574 | void whlp_backend(paragraph *, keywordlist *, indexdata *, void *); | |
594 | void whlp_backend(paragraph *, keywordlist *, indexdata *, void *, | |
595 | errorstate *); | |
575 | 596 | paragraph *whlp_config_filename(char *filename); |
576 | 597 | |
577 | 598 | /* |
578 | 599 | * bk_man.c |
579 | 600 | */ |
580 | void man_backend(paragraph *, keywordlist *, indexdata *, void *); | |
601 | void man_backend(paragraph *, keywordlist *, indexdata *, void *, | |
602 | errorstate *); | |
581 | 603 | paragraph *man_config_filename(char *filename); |
582 | 604 | |
583 | 605 | /* |
584 | 606 | * bk_info.c |
585 | 607 | */ |
586 | void info_backend(paragraph *, keywordlist *, indexdata *, void *); | |
608 | void info_backend(paragraph *, keywordlist *, indexdata *, void *, | |
609 | errorstate *); | |
587 | 610 | paragraph *info_config_filename(char *filename); |
588 | 611 | |
589 | 612 | /* |
590 | 613 | * bk_paper.c |
591 | 614 | */ |
592 | void *paper_pre_backend(paragraph *, keywordlist *, indexdata *); | |
593 | void listfonts(void); | |
615 | void *paper_pre_backend(paragraph *, keywordlist *, indexdata *, psdata *, | |
616 | errorstate *); | |
617 | void listfonts(psdata *); | |
594 | 618 | |
595 | 619 | /* |
596 | 620 | * bk_ps.c |
597 | 621 | */ |
598 | void ps_backend(paragraph *, keywordlist *, indexdata *, void *); | |
622 | void ps_backend(paragraph *, keywordlist *, indexdata *, void *, | |
623 | errorstate *); | |
599 | 624 | paragraph *ps_config_filename(char *filename); |
600 | 625 | |
601 | 626 | /* |
602 | 627 | * bk_pdf.c |
603 | 628 | */ |
604 | void pdf_backend(paragraph *, keywordlist *, indexdata *, void *); | |
629 | void pdf_backend(paragraph *, keywordlist *, indexdata *, void *, | |
630 | errorstate *); | |
605 | 631 | paragraph *pdf_config_filename(char *filename); |
606 | 632 | |
607 | 633 | #endif |
273 | 273 | * ---------------------------------- |
274 | 274 | * maxprob - nactivesyms |
275 | 275 | * |
276 | * rounded up, of course. And we'll only even be trying | |
277 | * this if | |
276 | * rounded up, of course. And we'll only even be trying this if | |
277 | * smallestfreq <= totalfreq / maxprob, which is precisely the | |
278 | * condition under which the numerator of this fraction is | |
279 | * positive. | |
280 | * | |
281 | * (As for the denominator, that could only be negative if there | |
282 | * were more than F_{n+2} symbols overall, in which case it | |
283 | * _wouldn't_ be possible to avoid having a symbol with | |
284 | * probability at most 1/F_{n+2}. So that is a constraint on the | |
285 | * input parameters to this function, which we enforce by | |
286 | * assertion.) | |
278 | 287 | */ |
279 | 288 | num = totalfreq - smallestfreq * maxprob; |
280 | 289 | denom = maxprob - nactivesyms; |
290 | assert(num > 0); /* this just restates the assert above */ | |
291 | assert(denom > 0); /* this is a constraint on the function parameters */ | |
281 | 292 | adjust = (num + denom - 1) / denom; |
282 | 293 | |
283 | 294 | /* |
12 | 12 | in->pos.line++; |
13 | 13 | c = getc(in->currfp); |
14 | 14 | if (c == EOF) { |
15 | err_afmeof(&in->pos); | |
15 | err_afmeof(in->es, &in->pos); | |
16 | 16 | return NULL; |
17 | 17 | } |
18 | 18 | line = snewn(len, char); |
38 | 38 | return line; |
39 | 39 | } |
40 | 40 | |
41 | static int afm_require_key(char *line, char const *expected, input *in) { | |
41 | static bool afm_require_key(char *line, char const *expected, input *in) { | |
42 | 42 | char *key = strtok(line, " \t"); |
43 | 43 | |
44 | 44 | if (strcmp(key, expected) == 0) |
45 | return TRUE; | |
46 | err_afmkey(&in->pos, expected); | |
47 | return FALSE; | |
45 | return true; | |
46 | err_afmkey(in->es, &in->pos, expected); | |
47 | return false; | |
48 | 48 | } |
49 | 49 | |
50 | void read_afm_file(input *in) { | |
50 | void read_afm_file(input *in, psdata *psd) { | |
51 | 51 | char *line, *key, *val; |
52 | 52 | font_info *fi; |
53 | 53 | size_t i; |
54 | 54 | |
55 | 55 | fi = snew(font_info); |
56 | 56 | fi->name = NULL; |
57 | fi->widths = newtree234(width_cmp); | |
57 | fi->widths = newtree234(width_cmp, NULL); | |
58 | 58 | fi->fontfile = NULL; |
59 | fi->kerns = newtree234(kern_cmp); | |
60 | fi->ligs = newtree234(lig_cmp); | |
59 | fi->kerns = newtree234(kern_cmp, NULL); | |
60 | fi->ligs = newtree234(lig_cmp, NULL); | |
61 | 61 | fi->fontbbox[0] = fi->fontbbox[1] = fi->fontbbox[2] = fi->fontbbox[3] = 0; |
62 | 62 | fi->capheight = fi->xheight = fi->ascent = fi->descent = 0; |
63 | 63 | fi->stemh = fi->stemv = fi->italicangle = 0; |
68 | 68 | if (!line || !afm_require_key(line, "StartFontMetrics", in)) |
69 | 69 | goto giveup; |
70 | 70 | if (!(val = strtok(NULL, " \t"))) { |
71 | err_afmval(&in->pos, "StartFontMetrics", 1); | |
71 | err_afmval(in->es, &in->pos, "StartFontMetrics", 1); | |
72 | 72 | goto giveup; |
73 | 73 | } |
74 | 74 | if (atof(val) >= 5.0) { |
75 | err_afmvers(&in->pos); | |
75 | err_afmvers(in->es, &in->pos); | |
76 | 76 | goto giveup; |
77 | 77 | } |
78 | 78 | sfree(line); |
82 | 82 | goto giveup; |
83 | 83 | key = strtok(line, " \t"); |
84 | 84 | if (strcmp(key, "EndFontMetrics") == 0) { |
85 | fi->next = all_fonts; | |
86 | all_fonts = fi; | |
85 | fi->next = psd->all_fonts; | |
86 | psd->all_fonts = fi; | |
87 | 87 | fclose(in->currfp); |
88 | 88 | return; |
89 | 89 | } else if (strcmp(key, "FontName") == 0) { |
90 | 90 | if (!(val = strtok(NULL, " \t"))) { |
91 | err_afmval(&in->pos, key, 1); | |
91 | err_afmval(in->es, &in->pos, key, 1); | |
92 | 92 | goto giveup; |
93 | 93 | } |
94 | 94 | fi->name = dupstr(val); |
96 | 96 | int i; |
97 | 97 | for (i = 0; i < 3; i++) { |
98 | 98 | if (!(val = strtok(NULL, " \t"))) { |
99 | err_afmval(&in->pos, key, 4); | |
99 | err_afmval(in->es, &in->pos, key, 4); | |
100 | 100 | goto giveup; |
101 | 101 | } |
102 | 102 | fi->fontbbox[i] = atof(val); |
103 | 103 | } |
104 | 104 | } else if (strcmp(key, "CapHeight") == 0) { |
105 | 105 | if (!(val = strtok(NULL, " \t"))) { |
106 | err_afmval(&in->pos, key, 1); | |
106 | err_afmval(in->es, &in->pos, key, 1); | |
107 | 107 | goto giveup; |
108 | 108 | } |
109 | 109 | fi->capheight = atof(val); |
110 | 110 | } else if (strcmp(key, "XHeight") == 0) { |
111 | 111 | if (!(val = strtok(NULL, " \t"))) { |
112 | err_afmval(&in->pos, key, 1); | |
112 | err_afmval(in->es, &in->pos, key, 1); | |
113 | 113 | goto giveup; |
114 | 114 | } |
115 | 115 | fi->xheight = atof(val); |
116 | 116 | } else if (strcmp(key, "Ascender") == 0) { |
117 | 117 | if (!(val = strtok(NULL, " \t"))) { |
118 | err_afmval(&in->pos, key, 1); | |
118 | err_afmval(in->es, &in->pos, key, 1); | |
119 | 119 | goto giveup; |
120 | 120 | } |
121 | 121 | fi->ascent = atof(val); |
122 | 122 | } else if (strcmp(key, "Descender") == 0) { |
123 | 123 | if (!(val = strtok(NULL, " \t"))) { |
124 | err_afmval(&in->pos, key, 1); | |
124 | err_afmval(in->es, &in->pos, key, 1); | |
125 | 125 | goto giveup; |
126 | 126 | } |
127 | 127 | fi->descent = atof(val); |
128 | 128 | } else if (strcmp(key, "CapHeight") == 0) { |
129 | 129 | if (!(val = strtok(NULL, " \t"))) { |
130 | err_afmval(&in->pos, key, 1); | |
130 | err_afmval(in->es, &in->pos, key, 1); | |
131 | 131 | goto giveup; |
132 | 132 | } |
133 | 133 | fi->capheight = atof(val); |
134 | 134 | } else if (strcmp(key, "StdHW") == 0) { |
135 | 135 | if (!(val = strtok(NULL, " \t"))) { |
136 | err_afmval(&in->pos, key, 1); | |
136 | err_afmval(in->es, &in->pos, key, 1); | |
137 | 137 | goto giveup; |
138 | 138 | } |
139 | 139 | fi->stemh = atof(val); |
140 | 140 | } else if (strcmp(key, "StdVW") == 0) { |
141 | 141 | if (!(val = strtok(NULL, " \t"))) { |
142 | err_afmval(&in->pos, key, 1); | |
142 | err_afmval(in->es, &in->pos, key, 1); | |
143 | 143 | goto giveup; |
144 | 144 | } |
145 | 145 | fi->stemv = atof(val); |
146 | 146 | } else if (strcmp(key, "ItalicAngle") == 0) { |
147 | 147 | if (!(val = strtok(NULL, " \t"))) { |
148 | err_afmval(&in->pos, key, 1); | |
148 | err_afmval(in->es, &in->pos, key, 1); | |
149 | 149 | goto giveup; |
150 | 150 | } |
151 | 151 | fi->italicangle = atof(val); |
152 | 152 | } else if (strcmp(key, "StartCharMetrics") == 0) { |
153 | 153 | int nglyphs, i; |
154 | 154 | if (!(val = strtok(NULL, " \t"))) { |
155 | err_afmval(&in->pos, key, 1); | |
155 | err_afmval(in->es, &in->pos, key, 1); | |
156 | 156 | goto giveup; |
157 | 157 | } |
158 | 158 | nglyphs = atoi(val); |
169 | 169 | if (strcmp(key, "WX") == 0 || strcmp(key, "W0X") == 0) { |
170 | 170 | if (!(val = strtok(NULL, " \t")) || |
171 | 171 | !strcmp(val, ";")) { |
172 | err_afmval(&in->pos, key, 1); | |
172 | err_afmval(in->es, &in->pos, key, 1); | |
173 | 173 | goto giveup; |
174 | 174 | } |
175 | 175 | width = atoi(val); |
176 | 176 | } else if (strcmp(key, "N") == 0) { |
177 | 177 | if (!(val = strtok(NULL, " \t")) || |
178 | 178 | !strcmp(val, ";")) { |
179 | err_afmval(&in->pos, key, 1); | |
180 | goto giveup; | |
181 | } | |
182 | g = glyph_intern(val); | |
179 | err_afmval(in->es, &in->pos, key, 1); | |
180 | goto giveup; | |
181 | } | |
182 | g = glyph_intern(psd, val); | |
183 | 183 | } else if (strcmp(key, "L") == 0) { |
184 | 184 | glyph succ, lig; |
185 | 185 | if (!(val = strtok(NULL, " \t")) || |
186 | 186 | !strcmp(val, ";")) { |
187 | err_afmval(&in->pos, key, 1); | |
188 | goto giveup; | |
189 | } | |
190 | succ = glyph_intern(val); | |
191 | if (!(val = strtok(NULL, " \t")) || | |
192 | !strcmp(val, ";")) { | |
193 | err_afmval(&in->pos, key, 1); | |
194 | goto giveup; | |
195 | } | |
196 | lig = glyph_intern(val); | |
187 | err_afmval(in->es, &in->pos, key, 1); | |
188 | goto giveup; | |
189 | } | |
190 | succ = glyph_intern(psd, val); | |
191 | if (!(val = strtok(NULL, " \t")) || | |
192 | !strcmp(val, ";")) { | |
193 | err_afmval(in->es, &in->pos, key, 1); | |
194 | goto giveup; | |
195 | } | |
196 | lig = glyph_intern(psd, val); | |
197 | 197 | if (g != NOGLYPH && succ != NOGLYPH && |
198 | 198 | lig != NOGLYPH) { |
199 | 199 | ligature *l = snew(ligature); |
229 | 229 | strcmp(key, "StartKernPairs0") == 0) { |
230 | 230 | int nkerns, i; |
231 | 231 | if (!(val = strtok(NULL, " \t"))) { |
232 | err_afmval(&in->pos, key, 1); | |
232 | err_afmval(in->es, &in->pos, key, 1); | |
233 | 233 | goto giveup; |
234 | 234 | } |
235 | 235 | nkerns = atoi(val); |
247 | 247 | nr = strtok(NULL, " \t"); |
248 | 248 | val = strtok(NULL, " \t"); |
249 | 249 | if (!val) { |
250 | err_afmval(&in->pos, key, 3); | |
250 | err_afmval(in->es, &in->pos, key, 3); | |
251 | 251 | goto giveup; |
252 | 252 | } |
253 | l = glyph_intern(nl); | |
254 | r = glyph_intern(nr); | |
253 | l = glyph_intern(psd, nl); | |
254 | r = glyph_intern(psd, nr); | |
255 | 255 | if (l == -1 || r == -1) continue; |
256 | 256 | kp = snew(kern_pair); |
257 | 257 | kp->left = l; |
41 | 41 | size_t offset; |
42 | 42 | } pfstate; |
43 | 43 | |
44 | static void pf_identify(t1_font *tf); | |
44 | static void pf_identify(t1_font *tf, psdata *, errorstate *); | |
45 | 45 | |
46 | 46 | static t1_data *load_pfb_file(FILE *fp, filepos *pos) { |
47 | 47 | t1_data *head = NULL, *tail = NULL; |
94 | 94 | return ret; |
95 | 95 | } |
96 | 96 | |
97 | void read_pfa_file(input *in) { | |
97 | void read_pfa_file(input *in, psdata *psd) { | |
98 | 98 | t1_font *tf = snew(t1_font); |
99 | 99 | |
100 | 100 | tf->data = load_pfa_file(in->currfp, &in->pos); |
101 | 101 | tf->pos = in->pos; |
102 | 102 | tf->length1 = tf->length2 = 0; |
103 | 103 | fclose(in->currfp); |
104 | pf_identify(tf); | |
105 | } | |
106 | ||
107 | void read_pfb_file(input *in) { | |
104 | pf_identify(tf, psd, in->es); | |
105 | } | |
106 | ||
107 | void read_pfb_file(input *in, psdata *psd) { | |
108 | 108 | t1_font *tf = snew(t1_font); |
109 | 109 | |
110 | 110 | tf->data = load_pfb_file(in->currfp, &in->pos); |
111 | 111 | tf->pos = in->pos; |
112 | 112 | tf->length1 = tf->length2 = 0; |
113 | 113 | fclose(in->currfp); |
114 | pf_identify(tf); | |
114 | pf_identify(tf, psd, in->es); | |
115 | 115 | } |
116 | 116 | static char *pf_read_token(pfstate *); |
117 | 117 | |
161 | 161 | return o + pf->offset; |
162 | 162 | } |
163 | 163 | |
164 | static void pf_identify(t1_font *tf) { | |
164 | static void pf_identify(t1_font *tf, psdata *psd, errorstate *es) { | |
165 | 165 | rdstringc rsc = { 0, 0, NULL }; |
166 | 166 | char *p; |
167 | 167 | size_t len; |
176 | 176 | c = pf_getc(pf); |
177 | 177 | if (c == EOF) { |
178 | 178 | sfree(rsc.text); |
179 | err_pfeof(&tf->pos); | |
179 | err_pfeof(es, &tf->pos); | |
180 | 180 | return; |
181 | 181 | } |
182 | 182 | rdaddc(&rsc, c); |
184 | 184 | p = rsc.text; |
185 | 185 | if ((p = strchr(p, ':')) == NULL) { |
186 | 186 | sfree(rsc.text); |
187 | err_pfhead(&tf->pos); | |
187 | err_pfhead(es, &tf->pos); | |
188 | 188 | return; |
189 | 189 | } |
190 | 190 | p++; |
195 | 195 | fontname[len] = 0; |
196 | 196 | sfree(rsc.text); |
197 | 197 | |
198 | for (fi = all_fonts; fi; fi = fi->next) { | |
198 | for (fi = psd->all_fonts; fi; fi = fi->next) { | |
199 | 199 | if (strcmp(fi->name, fontname) == 0) { |
200 | 200 | fi->fontfile = tf; |
201 | 201 | fi->filetype = TYPE1; |
203 | 203 | return; |
204 | 204 | } |
205 | 205 | } |
206 | err_pfnoafm(&tf->pos, fontname); | |
206 | err_pfnoafm(es, &tf->pos, fontname); | |
207 | 207 | sfree(fontname); |
208 | 208 | } |
209 | 209 | |
210 | 210 | /* |
211 | 211 | * PostScript white space characters; PLRM3 table 3.1 |
212 | 212 | */ |
213 | static int pf_isspace(int c) { | |
213 | static bool pf_isspace(int c) { | |
214 | 214 | return c == 000 || c == 011 || c == 012 || c == 014 || c == 015 || |
215 | 215 | c == ' '; |
216 | 216 | } |
218 | 218 | /* |
219 | 219 | * PostScript special characters; PLRM3 page 27 |
220 | 220 | */ |
221 | static int pf_isspecial(int c) { | |
221 | static bool pf_isspecial(int c) { | |
222 | 222 | return c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || |
223 | 223 | c == ']' || c == '{' || c == '}' || c == '/' || c == '%'; |
224 | 224 | } |
246 | 246 | } |
247 | 247 | } |
248 | 248 | |
249 | static size_t pf_length1(t1_font *tf) { | |
249 | static size_t pf_length1(t1_font *tf, errorstate *es) { | |
250 | 250 | size_t ret; |
251 | 251 | |
252 | 252 | ret = pf_findtoken(tf, 0, "eexec"); |
253 | 253 | if (ret == (size_t)-1) { |
254 | err_pfeof(&tf->pos); | |
254 | err_pfeof(es, &tf->pos); | |
255 | 255 | return 0; |
256 | 256 | } |
257 | 257 | return ret; |
258 | 258 | } |
259 | 259 | |
260 | static size_t pf_length2(t1_font *tf) { | |
260 | static size_t pf_length2(t1_font *tf, errorstate *es) { | |
261 | 261 | size_t ret; |
262 | 262 | |
263 | 263 | if (tf->length1 == 0) |
264 | tf->length1 = pf_length1(tf); | |
264 | tf->length1 = pf_length1(tf, es); | |
265 | 265 | ret = pf_findtoken(tf, tf->length1, "cleartomark"); |
266 | 266 | if (ret == (size_t)-1) { |
267 | err_pfeof(&tf->pos); | |
267 | err_pfeof(es, &tf->pos); | |
268 | 268 | return 0; |
269 | 269 | } |
270 | 270 | return ret - 12 - tf->length1; /* backspace over "cleartomark\n" */ |
324 | 324 | char **bufp, size_t *lenp) { |
325 | 325 | t1_data *td = tf->data; |
326 | 326 | size_t blk, i; |
327 | int havenybble = 0; | |
327 | bool havenybble = false; | |
328 | 328 | char *p, nybble; |
329 | 329 | |
330 | 330 | while (td && off >= td->length) { |
362 | 362 | /* |
363 | 363 | * Return the initial, unencrypted, part of a font. |
364 | 364 | */ |
365 | void pf_part1(font_info *fi, char **bufp, size_t *lenp) { | |
365 | void pf_part1(font_info *fi, char **bufp, size_t *lenp, errorstate *es) { | |
366 | 366 | t1_font *tf = fi->fontfile; |
367 | 367 | |
368 | 368 | if (tf->length1 == 0) |
369 | tf->length1 = pf_length1(tf); | |
369 | tf->length1 = pf_length1(tf, es); | |
370 | 370 | pf_getascii(tf, 0, tf->length1, bufp, lenp); |
371 | 371 | } |
372 | 372 | |
373 | 373 | /* |
374 | 374 | * Return the middle, encrypted, part of a font. |
375 | 375 | */ |
376 | void pf_part2(font_info *fi, char **bufp, size_t *lenp) { | |
376 | void pf_part2(font_info *fi, char **bufp, size_t *lenp, errorstate *es) { | |
377 | 377 | t1_font *tf = fi->fontfile; |
378 | 378 | |
379 | 379 | if (tf->length2 == 0) |
380 | tf->length2 = pf_length2(tf); | |
380 | tf->length2 = pf_length2(tf, es); | |
381 | 381 | pf_getbinary(tf, tf->length1, tf->length2, bufp, lenp); |
382 | 382 | if (*lenp >= 256) |
383 | 383 | *lenp -= 256; |
85 | 85 | } |
86 | 86 | #define d_end decode_end, 0, 0 |
87 | 87 | |
88 | static void *decode(sfnt_decode *dec, void *src, void *end, void *dest) { | |
88 | static void *decode(const sfnt_decode *dec, void *src, void *end, void *dest) { | |
89 | 89 | while (dec->decoder != decode_end) { |
90 | 90 | if ((char *)src + dec->src_len > (char *)end) return NULL; |
91 | 91 | dec->decoder(src, (char *)dest + dec->dest_offset); |
95 | 95 | return src; |
96 | 96 | } |
97 | 97 | |
98 | static void *decoden(sfnt_decode *dec, void *src, void *end, void *dest, | |
98 | static void *decoden(const sfnt_decode *dec, void *src, void *end, void *dest, | |
99 | 99 | size_t size, size_t n) { |
100 | 100 | while (n-- && src) { |
101 | 101 | src = decode(dec, src, end, dest); |
105 | 105 | } |
106 | 106 | |
107 | 107 | /* Decoding specs for simple data types */ |
108 | sfnt_decode uint16_decode[] = { { d_uint16, 0 }, { d_end } }; | |
109 | sfnt_decode int16_decode[] = { { d_int16, 0 }, { d_end } }; | |
110 | sfnt_decode uint32_decode[] = { { d_uint32, 0 }, { d_end } }; | |
108 | const sfnt_decode uint16_decode[] = { { d_uint16, 0 }, { d_end } }; | |
109 | const sfnt_decode int16_decode[] = { { d_int16, 0 }, { d_end } }; | |
110 | const sfnt_decode uint32_decode[] = { { d_uint32, 0 }, { d_end } }; | |
111 | 111 | |
112 | 112 | /* Offset subdirectory -- the start of the file */ |
113 | 113 | typedef struct offsubdir_Tag offsubdir; |
115 | 115 | unsigned scaler_type; |
116 | 116 | unsigned numTables; |
117 | 117 | }; |
118 | sfnt_decode offsubdir_decode[] = { | |
118 | const sfnt_decode offsubdir_decode[] = { | |
119 | 119 | { d_uint32, offsetof(offsubdir, scaler_type) }, |
120 | 120 | { d_uint16, offsetof(offsubdir, numTables) }, |
121 | 121 | { d_skip(6) }, |
144 | 144 | unsigned offset; |
145 | 145 | unsigned length; |
146 | 146 | }; |
147 | sfnt_decode tabledir_decode[] = { | |
147 | const sfnt_decode tabledir_decode[] = { | |
148 | 148 | { d_uint32, offsetof(tabledir, tag) }, |
149 | 149 | { d_uint32, offsetof(tabledir, checkSum) }, |
150 | 150 | { d_uint32, offsetof(tabledir, offset) }, |
159 | 159 | int sTypoAscender, sTypoDescender; |
160 | 160 | int sxHeight, sCapHeight; |
161 | 161 | }; |
162 | sfnt_decode t_OS_2_v0_decode[] = { | |
162 | const sfnt_decode t_OS_2_v0_decode[] = { | |
163 | 163 | { d_uint16, offsetof(t_OS_2, version) }, |
164 | 164 | { d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */ |
165 | 165 | /* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */ |
169 | 169 | /* achVendID, fsSelection, usFirstCharIndex, usLastCharIndex */ |
170 | 170 | { d_end } |
171 | 171 | }; |
172 | sfnt_decode t_OS_2_v1_decode[] = { | |
172 | const sfnt_decode t_OS_2_v1_decode[] = { | |
173 | 173 | { d_uint16, offsetof(t_OS_2, version) }, |
174 | 174 | { d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */ |
175 | 175 | /* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */ |
183 | 183 | /* ulCodePageRange1, ulCodePageRange2 */ |
184 | 184 | { d_end } |
185 | 185 | }; |
186 | sfnt_decode t_OS_2_v2_decode[] = { | |
186 | const sfnt_decode t_OS_2_v2_decode[] = { | |
187 | 187 | { d_uint16, offsetof(t_OS_2, version) }, |
188 | 188 | { d_skip(66) }, /* xAvgCharWidth, usWeightClass, usWidthClass, fsType, */ |
189 | 189 | /* ySubscriptXSize, ySubscriptYSize, ySubscriptXOffset, */ |
206 | 206 | struct t_cmap_Tag { |
207 | 207 | unsigned numTables; |
208 | 208 | }; |
209 | sfnt_decode t_cmap_decode[] = { | |
209 | const sfnt_decode t_cmap_decode[] = { | |
210 | 210 | { d_skip(2) }, |
211 | 211 | { d_uint16, offsetof(t_cmap, numTables) }, |
212 | 212 | { d_end } |
217 | 217 | unsigned encodingID; |
218 | 218 | unsigned offset; |
219 | 219 | }; |
220 | sfnt_decode encodingrec_decode[] = { | |
220 | const sfnt_decode encodingrec_decode[] = { | |
221 | 221 | { d_uint16, offsetof(encodingrec, platformID) }, |
222 | 222 | { d_uint16, offsetof(encodingrec, encodingID) }, |
223 | 223 | { d_uint32, offsetof(encodingrec, offset) }, |
228 | 228 | unsigned length; |
229 | 229 | unsigned segCountX2; |
230 | 230 | }; |
231 | sfnt_decode cmap4_decode[] = { | |
231 | const sfnt_decode cmap4_decode[] = { | |
232 | 232 | { d_skip(2) }, /* format */ |
233 | 233 | { d_uint16, offsetof(cmap4, length) }, |
234 | 234 | { d_skip(2) }, /* language */ |
247 | 247 | int xMin, yMin, xMax, yMax; |
248 | 248 | int indexToLocFormat; |
249 | 249 | }; |
250 | sfnt_decode t_head_decode[] = { | |
250 | const sfnt_decode t_head_decode[] = { | |
251 | 251 | { d_uint32, offsetof(t_head, version) }, |
252 | 252 | { d_uint32, offsetof(t_head, fontRevision) }, |
253 | 253 | { d_skip(8) }, /* checkSumAdjustment, magicNumber, flags */ |
274 | 274 | int metricDataFormat; |
275 | 275 | unsigned numOfLongHorMetrics; |
276 | 276 | }; |
277 | sfnt_decode t_hhea_decode[] = { | |
277 | const sfnt_decode t_hhea_decode[] = { | |
278 | 278 | { d_uint32, offsetof(t_hhea, version) }, |
279 | 279 | { d_int16, offsetof(t_hhea, ascent) }, |
280 | 280 | { d_int16, offsetof(t_hhea, descent) }, |
286 | 286 | }; |
287 | 287 | |
288 | 288 | /* Horizontal Metrics ('hmtx') table */ |
289 | sfnt_decode longhormetric_decode[] = { | |
289 | const sfnt_decode longhormetric_decode[] = { | |
290 | 290 | { d_uint16, 0 }, |
291 | 291 | { d_skip(2) }, |
292 | 292 | { d_end } |
298 | 298 | unsigned version; |
299 | 299 | unsigned nTables; |
300 | 300 | }; |
301 | sfnt_decode t_kern_v0_decode[] = { | |
301 | const sfnt_decode t_kern_v0_decode[] = { | |
302 | 302 | { d_uint16, offsetof(t_kern, version) }, |
303 | 303 | { d_uint16, offsetof(t_kern, nTables) }, |
304 | 304 | { d_end } |
309 | 309 | unsigned length; |
310 | 310 | unsigned coverage; |
311 | 311 | }; |
312 | sfnt_decode kern_v0_subhdr_decode[] = { | |
312 | const sfnt_decode kern_v0_subhdr_decode[] = { | |
313 | 313 | { d_uint16, offsetof(kern_v0_subhdr, version) }, |
314 | 314 | { d_uint16, offsetof(kern_v0_subhdr, length) }, |
315 | 315 | { d_uint16, offsetof(kern_v0_subhdr, coverage) }, |
321 | 321 | #define KERN_V0_OVERRIDE 0x0008 |
322 | 322 | #define KERN_V0_FORMAT 0xff00 |
323 | 323 | #define KERN_V0_FORMAT_0 0x0000 |
324 | sfnt_decode t_kern_v1_decode[] = { | |
324 | const sfnt_decode t_kern_v1_decode[] = { | |
325 | 325 | { d_uint32, offsetof(t_kern, version) }, |
326 | 326 | { d_uint32, offsetof(t_kern, nTables) }, |
327 | 327 | { d_end } |
331 | 331 | unsigned length; |
332 | 332 | unsigned coverage; |
333 | 333 | }; |
334 | sfnt_decode kern_v1_subhdr_decode[] = { | |
334 | const sfnt_decode kern_v1_subhdr_decode[] = { | |
335 | 335 | { d_uint32, offsetof(kern_v1_subhdr, length) }, |
336 | 336 | { d_uint16, offsetof(kern_v1_subhdr, coverage) }, |
337 | 337 | { d_skip(2) }, /* tupleIndex */ |
346 | 346 | struct kern_f0_Tag { |
347 | 347 | unsigned nPairs; |
348 | 348 | }; |
349 | sfnt_decode kern_f0_decode[] = { | |
349 | const sfnt_decode kern_f0_decode[] = { | |
350 | 350 | { d_uint16, offsetof(kern_f0, nPairs) }, |
351 | 351 | { d_skip(6) }, /* searchRange, entrySelector, rangeShift */ |
352 | 352 | { d_end } |
357 | 357 | unsigned right; |
358 | 358 | int value; |
359 | 359 | }; |
360 | sfnt_decode kern_f0_pair_decode[] = { | |
360 | const sfnt_decode kern_f0_pair_decode[] = { | |
361 | 361 | { d_uint16, offsetof(kern_f0_pair, left) }, |
362 | 362 | { d_uint16, offsetof(kern_f0_pair, right) }, |
363 | 363 | { d_int16, offsetof(kern_f0_pair, value) }, |
370 | 370 | unsigned version; |
371 | 371 | unsigned numGlyphs; |
372 | 372 | }; |
373 | sfnt_decode t_maxp_decode[] = { | |
373 | const sfnt_decode t_maxp_decode[] = { | |
374 | 374 | { d_uint32, offsetof(t_maxp, version) }, |
375 | 375 | { d_uint16, offsetof(t_maxp, numGlyphs) }, |
376 | 376 | { d_end } |
385 | 385 | unsigned stringOffset; |
386 | 386 | namerecord *nameRecord; |
387 | 387 | }; |
388 | sfnt_decode t_name_decode[] = { | |
388 | const sfnt_decode t_name_decode[] = { | |
389 | 389 | { d_uint16, offsetof(t_name, format) }, |
390 | 390 | { d_uint16, offsetof(t_name, count) }, |
391 | 391 | { d_uint16, offsetof(t_name, stringOffset) }, |
399 | 399 | unsigned length; |
400 | 400 | unsigned offset; |
401 | 401 | }; |
402 | sfnt_decode namerecord_decode[] = { | |
402 | const sfnt_decode namerecord_decode[] = { | |
403 | 403 | { d_uint16, offsetof(namerecord, platformID) }, |
404 | 404 | { d_uint16, offsetof(namerecord, encodingID) }, |
405 | 405 | { d_uint16, offsetof(namerecord, languageID) }, |
420 | 420 | unsigned minMemType42; |
421 | 421 | unsigned maxMemType42; |
422 | 422 | }; |
423 | sfnt_decode t_post_decode[] = { | |
423 | const sfnt_decode t_post_decode[] = { | |
424 | 424 | { d_uint32, offsetof(t_post, format) }, |
425 | 425 | { d_int32, offsetof(t_post, italicAngle) }, |
426 | 426 | { d_int16, offsetof(t_post, underlinePosition) }, |
451 | 451 | unsigned minmem, maxmem; |
452 | 452 | }; |
453 | 453 | |
454 | static int sfnt_findtable(sfnt *sf, unsigned tag, | |
455 | void **startp, void **endp) { | |
454 | static bool sfnt_findtable(sfnt *sf, unsigned tag, | |
455 | void **startp, void **endp) { | |
456 | 456 | size_t i; |
457 | 457 | |
458 | 458 | for (i = 0; i < sf->osd.numTables; i++) { |
459 | 459 | if (sf->td[i].tag == tag) { |
460 | 460 | *startp = (char *)sf->data + sf->td[i].offset; |
461 | 461 | *endp = (char *)*startp + sf->td[i].length; |
462 | return TRUE; | |
462 | return true; | |
463 | 463 | } |
464 | 464 | } |
465 | return FALSE; | |
466 | } | |
467 | ||
468 | static char *sfnt_psname(font_info *fi) { | |
465 | return false; | |
466 | } | |
467 | ||
468 | static char *sfnt_psname(font_info *fi, errorstate *es) { | |
469 | 469 | sfnt *sf = fi->fontfile; |
470 | 470 | t_name name; |
471 | 471 | void *ptr, *end; |
474 | 474 | namerecord *nr; |
475 | 475 | |
476 | 476 | if (!sfnt_findtable(sf, TAG_name, &ptr, &end)) { |
477 | err_sfntnotable(&sf->pos, "name"); | |
477 | err_sfntnotable(es, &sf->pos, "name"); | |
478 | 478 | return NULL; |
479 | 479 | } |
480 | 480 | ptr = decode(t_name_decode, ptr, end, &name); |
495 | 495 | } |
496 | 496 | } |
497 | 497 | } |
498 | err_sfntnopsname(&sf->pos); | |
498 | err_sfntnopsname(es, &sf->pos); | |
499 | 499 | return NULL; |
500 | 500 | } |
501 | 501 | |
519 | 519 | } |
520 | 520 | |
521 | 521 | /* Generate an name for a glyph that doesn't have one. */ |
522 | static glyph genglyph(unsigned idx) { | |
523 | char buf[11]; | |
524 | if (idx == 0) return glyph_intern(".notdef"); | |
522 | static glyph genglyph(psdata *psd, unsigned idx) { | |
523 | char buf[64]; | |
524 | if (idx == 0) return glyph_intern(psd, ".notdef"); | |
525 | 525 | sprintf(buf, "glyph%u", idx); |
526 | return glyph_intern(buf); | |
526 | return glyph_intern(psd, buf); | |
527 | 527 | } |
528 | 528 | |
529 | 529 | /* |
532 | 532 | * TODO: cope better with duplicated glyph names (usually .notdef) |
533 | 533 | * TODO: when presented with format 3.0, try to use 'CFF' if present. |
534 | 534 | */ |
535 | static void sfnt_mapglyphs(font_info *fi) { | |
535 | static void sfnt_mapglyphs(font_info *fi, psdata *psd, errorstate *es) { | |
536 | 536 | sfnt *sf = fi->fontfile; |
537 | 537 | t_post post; |
538 | 538 | void *ptr, *end; |
545 | 545 | if (sfnt_findtable(sf, TAG_post, &ptr, &end)) { |
546 | 546 | ptr = decode(t_post_decode, ptr, end, &post); |
547 | 547 | if (ptr == NULL) { |
548 | err_sfntbadtable(&sf->pos, "post"); | |
548 | err_sfntbadtable(es, &sf->pos, "post"); | |
549 | 549 | goto noglyphs; |
550 | 550 | } |
551 | 551 | |
555 | 555 | switch (post.format) { |
556 | 556 | case 0x00010000: |
557 | 557 | if (sf->nglyphs != 258) { |
558 | err_sfntbadtable(&sf->pos, "post"); | |
558 | err_sfntbadtable(es, &sf->pos, "post"); | |
559 | 559 | break; |
560 | 560 | } |
561 | 561 | sf->glyphsbyindex = (glyph *)tt_std_glyphs; |
562 | 562 | break; |
563 | 563 | case 0x00020000: |
564 | 564 | if ((char *)ptr + 2 > (char *)end) { |
565 | err_sfntbadtable(&sf->pos, "post"); | |
565 | err_sfntbadtable(es, &sf->pos, "post"); | |
566 | 566 | break; |
567 | 567 | } |
568 | 568 | ptr = (char *)ptr + 2; |
569 | 569 | if ((char *)ptr + 2*sf->nglyphs > (char *)end) { |
570 | err_sfntbadtable(&sf->pos, "post"); | |
570 | err_sfntbadtable(es, &sf->pos, "post"); | |
571 | 571 | break; |
572 | 572 | } |
573 | 573 | nextras = 0; |
583 | 583 | memcpy(tmp, sptr + 1, *sptr); |
584 | 584 | tmp[*sptr] = 0; |
585 | 585 | assert(i < nextras); |
586 | extraglyphs[i++] = glyph_intern(tmp); | |
586 | extraglyphs[i++] = glyph_intern(psd, tmp); | |
587 | 587 | } |
588 | 588 | sf->glyphsbyindex = snewn(sf->nglyphs, glyph); |
589 | 589 | for (i = 0; i < sf->nglyphs; i++) { |
593 | 593 | else if (g < 258 + nextras) |
594 | 594 | sf->glyphsbyindex[i] = extraglyphs[g - 258]; |
595 | 595 | else { |
596 | err_sfntbadtable(&sf->pos, "post"); | |
597 | sf->glyphsbyindex[i] = genglyph(i); | |
596 | err_sfntbadtable(es, &sf->pos, "post"); | |
597 | sf->glyphsbyindex[i] = genglyph(psd, i); | |
598 | 598 | } |
599 | 599 | } |
600 | 600 | sfree(extraglyphs); |
602 | 602 | case 0x00030000: |
603 | 603 | break; |
604 | 604 | default: |
605 | err_sfnttablevers(&sf->pos, "post"); | |
605 | err_sfnttablevers(es, &sf->pos, "post"); | |
606 | 606 | break; |
607 | 607 | } |
608 | 608 | } |
610 | 610 | if (!sf->glyphsbyindex) { |
611 | 611 | sf->glyphsbyindex = snewn(sf->nglyphs, glyph); |
612 | 612 | for (i = 0; i < sf->nglyphs; i++) |
613 | sf->glyphsbyindex[i] = genglyph(i); | |
613 | sf->glyphsbyindex[i] = genglyph(psd, i); | |
614 | 614 | } |
615 | 615 | /* Construct glyphsbyname */ |
616 | 616 | sf->glyphsbyname = snewn(sf->nglyphs, unsigned short); |
632 | 632 | suflen = 4; |
633 | 633 | for (i = 0; i < sf->nglyphs; i++) { |
634 | 634 | char const *p; |
635 | p = strrchr(glyph_extern(sfnt_indextoglyph(sf, i)), '.'); | |
635 | p = strrchr(glyph_extern(psd, sfnt_indextoglyph(sf, i)), '.'); | |
636 | 636 | if (p && !(p+1)[strspn(p+1, "0123456789")] && strlen(p+1) > suflen) |
637 | 637 | suflen = strlen(p+1); |
638 | 638 | } |
642 | 642 | if (prev == (this = sfnt_indextoglyph(sf, sf->glyphsbyname[i]))) { |
643 | 643 | char const *basename; |
644 | 644 | char *buf; |
645 | basename = glyph_extern(this); | |
645 | basename = glyph_extern(psd, this); | |
646 | 646 | buf = snewn(strlen(basename) + 2 + suflen, char); |
647 | 647 | strcpy(buf, basename); |
648 | 648 | sprintf(buf + strlen(basename), ".%0*hu", suflen, |
649 | 649 | sf->glyphsbyname[i]); |
650 | sf->glyphsbyindex[sf->glyphsbyname[i]] = glyph_intern(buf); | |
650 | sf->glyphsbyindex[sf->glyphsbyname[i]] = glyph_intern(psd, buf); | |
651 | 651 | sfree(buf); |
652 | 652 | } |
653 | 653 | prev = this; |
675 | 675 | /* |
676 | 676 | * Get data from 'hhea', 'hmtx', and 'OS/2' tables |
677 | 677 | */ |
678 | void sfnt_getmetrics(font_info *fi) { | |
678 | void sfnt_getmetrics(font_info *fi, errorstate *es) { | |
679 | 679 | sfnt *sf = fi->fontfile; |
680 | 680 | t_hhea hhea; |
681 | 681 | t_OS_2 OS_2; |
689 | 689 | fi->fontbbox[2] = sf->head.xMax * FUNITS_PER_PT / sf->head.unitsPerEm; |
690 | 690 | fi->fontbbox[3] = sf->head.yMax * FUNITS_PER_PT / sf->head.unitsPerEm; |
691 | 691 | if (!sfnt_findtable(sf, TAG_hhea, &ptr, &end)) { |
692 | err_sfntnotable(&sf->pos, "hhea"); | |
692 | err_sfntnotable(es, &sf->pos, "hhea"); | |
693 | 693 | return; |
694 | 694 | } |
695 | 695 | if (decode(t_hhea_decode, ptr, end, &hhea) == NULL) { |
696 | err_sfntbadtable(&sf->pos, "hhea"); | |
696 | err_sfntbadtable(es, &sf->pos, "hhea"); | |
697 | 697 | return; |
698 | 698 | } |
699 | 699 | if ((hhea.version & 0xffff0000) != 0x00010000) { |
700 | err_sfnttablevers(&sf->pos, "hhea"); | |
700 | err_sfnttablevers(es, &sf->pos, "hhea"); | |
701 | 701 | return; |
702 | 702 | } |
703 | 703 | fi->ascent = hhea.ascent; |
704 | 704 | fi->descent = hhea.descent; |
705 | 705 | if (hhea.metricDataFormat != 0) { |
706 | err_sfnttablevers(&sf->pos, "hmtx"); | |
706 | err_sfnttablevers(es, &sf->pos, "hmtx"); | |
707 | 707 | return; |
708 | 708 | } |
709 | 709 | if (!sfnt_findtable(sf, TAG_hmtx, &ptr, &end)) { |
710 | err_sfntnotable(&sf->pos, "hmtx"); | |
710 | err_sfntnotable(es, &sf->pos, "hmtx"); | |
711 | 711 | return; |
712 | 712 | } |
713 | 713 | hmtx = snewn(hhea.numOfLongHorMetrics, unsigned); |
714 | 714 | if (decoden(longhormetric_decode, ptr, end, hmtx, sizeof(*hmtx), |
715 | 715 | hhea.numOfLongHorMetrics) == NULL) { |
716 | err_sfntbadtable(&sf->pos, "hmtx"); | |
716 | err_sfntbadtable(es, &sf->pos, "hmtx"); | |
717 | 717 | return; |
718 | 718 | } |
719 | 719 | for (i = 0; i < sf->nglyphs; i++) { |
742 | 742 | fi->descent = OS_2.sTypoDescender * FUNITS_PER_PT / sf->head.unitsPerEm; |
743 | 743 | return; |
744 | 744 | bados2: |
745 | err_sfntbadtable(&sf->pos, "OS/2"); | |
745 | err_sfntbadtable(es, &sf->pos, "OS/2"); | |
746 | 746 | } |
747 | 747 | |
748 | 748 | /* |
754 | 754 | * pairs for horizontal kerning of horizontal text, and ignores |
755 | 755 | * everything else. |
756 | 756 | */ |
757 | static void sfnt_getkern(font_info *fi) { | |
757 | static void sfnt_getkern(font_info *fi, errorstate *es) { | |
758 | 758 | sfnt *sf = fi->fontfile; |
759 | 759 | t_kern kern; |
760 | 760 | unsigned version, i, j; |
812 | 812 | } |
813 | 813 | return; |
814 | 814 | bad: |
815 | err_sfntbadtable(&sf->pos, "kern"); | |
815 | err_sfntbadtable(es, &sf->pos, "kern"); | |
816 | 816 | return; |
817 | 817 | } |
818 | 818 | |
824 | 824 | * Unicode 1.1 with precomposed Hangul syllables. We only handle |
825 | 825 | * format 4 of this table, since that seems to be the only one in use. |
826 | 826 | */ |
827 | void sfnt_getmap(font_info *fi) { | |
827 | static void sfnt_getmap(font_info *fi, errorstate *es) { | |
828 | 828 | sfnt *sf = fi->fontfile; |
829 | 829 | t_cmap cmap; |
830 | 830 | encodingrec *esd; |
836 | 836 | for (i = 0; i < lenof(fi->bmp); i++) |
837 | 837 | fi->bmp[i] = 0xFFFF; |
838 | 838 | if (!sfnt_findtable(sf, TAG_cmap, &ptr, &end)) { |
839 | err_sfntnotable(&sf->pos, "cmap"); | |
839 | err_sfntnotable(es, &sf->pos, "cmap"); | |
840 | return; | |
840 | 841 | } |
841 | 842 | base = ptr; |
842 | 843 | ptr = decode(t_cmap_decode, ptr, end, &cmap); |
885 | 886 | idx = (k + idDelta[j]) & 0xffff; |
886 | 887 | if (idx != 0) { |
887 | 888 | if (idx > sf->nglyphs) { |
888 | err_sfntbadglyph(&sf->pos, k); | |
889 | err_sfntbadglyph(es, &sf->pos, k); | |
889 | 890 | continue; |
890 | 891 | } |
891 | 892 | fi->bmp[k] = sfnt_indextoglyph(sf, idx); |
896 | 897 | for (k = startCode[j]; k <= endCode[j]; k++) { |
897 | 898 | if (startidx + k - startCode[j] >= |
898 | 899 | nglyphindex) { |
899 | err_sfntbadglyph(&sf->pos, k); | |
900 | err_sfntbadglyph(es, &sf->pos, k); | |
900 | 901 | continue; |
901 | 902 | } |
902 | 903 | idx = glyphIndexArray[startidx + k - startCode[j]]; |
903 | 904 | if (idx != 0) { |
904 | 905 | idx = (idx + idDelta[j]) & 0xffff; |
905 | 906 | if (idx > sf->nglyphs) { |
906 | err_sfntbadglyph(&sf->pos, k); | |
907 | err_sfntbadglyph(es, &sf->pos, k); | |
907 | 908 | continue; |
908 | 909 | } |
909 | 910 | fi->bmp[k] = sfnt_indextoglyph(sf, idx); |
916 | 917 | } |
917 | 918 | } |
918 | 919 | } |
919 | err_sfntnounicmap(&sf->pos); | |
920 | err_sfntnounicmap(es, &sf->pos); | |
920 | 921 | return; |
921 | 922 | bad: |
922 | err_sfntbadtable(&sf->pos, "cmap"); | |
923 | } | |
924 | ||
925 | void read_sfnt_file(input *in) { | |
923 | err_sfntbadtable(es, &sf->pos, "cmap"); | |
924 | } | |
925 | ||
926 | void read_sfnt_file(input *in, psdata *psd) { | |
926 | 927 | sfnt *sf = snew(sfnt); |
927 | 928 | size_t off = 0, got; |
928 | 929 | FILE *fp = in->currfp; |
931 | 932 | t_maxp maxp; |
932 | 933 | |
933 | 934 | fi->name = NULL; |
934 | fi->widths = newtree234(width_cmp); | |
935 | fi->kerns = newtree234(kern_cmp); | |
936 | fi->ligs = newtree234(lig_cmp); | |
935 | fi->widths = newtree234(width_cmp, NULL); | |
936 | fi->kerns = newtree234(kern_cmp, NULL); | |
937 | fi->ligs = newtree234(lig_cmp, NULL); | |
937 | 938 | fi->fontbbox[0] = fi->fontbbox[1] = fi->fontbbox[2] = fi->fontbbox[3] = 0; |
938 | 939 | fi->capheight = fi->xheight = fi->ascent = fi->descent = 0; |
939 | 940 | fi->stemh = fi->stemv = fi->italicangle = 0; |
958 | 959 | sf->nglyphs = 0; |
959 | 960 | ptr = decode(offsubdir_decode, sf->data, sf->end, &sf->osd); |
960 | 961 | if (ptr == NULL) { |
961 | err_sfntbadhdr(&sf->pos); | |
962 | err_sfntbadhdr(in->es, &sf->pos); | |
962 | 963 | return; |
963 | 964 | } |
964 | 965 | sf->td = snewn(sf->osd.numTables, tabledir); |
965 | 966 | ptr = decoden(tabledir_decode, ptr, sf->end, sf->td, sizeof(*sf->td), |
966 | 967 | sf->osd.numTables); |
967 | 968 | if (ptr == NULL) { |
968 | err_sfntbadhdr(&sf->pos); | |
969 | err_sfntbadhdr(in->es, &sf->pos); | |
969 | 970 | return; |
970 | 971 | } |
971 | 972 | if (!sfnt_findtable(sf, TAG_head, &ptr, &end)) { |
972 | err_sfntnotable(&sf->pos, "head"); | |
973 | err_sfntnotable(in->es, &sf->pos, "head"); | |
973 | 974 | return; |
974 | 975 | } |
975 | 976 | if (decode(t_head_decode, ptr, end, &sf->head) == NULL) { |
976 | err_sfntbadtable(&sf->pos, "head"); | |
977 | err_sfntbadtable(in->es, &sf->pos, "head"); | |
977 | 978 | return; |
978 | 979 | } |
979 | 980 | if ((sf->head.version & 0xffff0000) != 0x00010000) { |
980 | err_sfnttablevers(&sf->pos, "head"); | |
981 | err_sfnttablevers(in->es, &sf->pos, "head"); | |
981 | 982 | return; |
982 | 983 | } |
983 | 984 | if (!sfnt_findtable(sf, TAG_maxp, &ptr, &end)) { |
984 | err_sfntnotable(&sf->pos, "maxp"); | |
985 | err_sfntnotable(in->es, &sf->pos, "maxp"); | |
985 | 986 | return; |
986 | 987 | } |
987 | 988 | if (decode(t_maxp_decode, ptr, end, &maxp) == NULL) { |
988 | err_sfntbadtable(&sf->pos, "maxp"); | |
989 | err_sfntbadtable(in->es, &sf->pos, "maxp"); | |
989 | 990 | return; |
990 | 991 | } |
991 | 992 | if (maxp.version < 0x00005000 || maxp.version > 0x0001ffff) { |
992 | err_sfnttablevers(&sf->pos, "maxp"); | |
993 | err_sfnttablevers(in->es, &sf->pos, "maxp"); | |
993 | 994 | return; |
994 | 995 | } |
995 | 996 | sf->nglyphs = maxp.numGlyphs; |
996 | fi->name = sfnt_psname(fi); | |
997 | fi->name = sfnt_psname(fi, in->es); | |
997 | 998 | if (fi->name == NULL) return; |
998 | sfnt_mapglyphs(fi); | |
999 | sfnt_getmetrics(fi); | |
1000 | sfnt_getkern(fi); | |
1001 | sfnt_getmap(fi); | |
1002 | fi->next = all_fonts; | |
1003 | all_fonts = fi; | |
999 | sfnt_mapglyphs(fi, psd, in->es); | |
1000 | sfnt_getmetrics(fi, in->es); | |
1001 | sfnt_getkern(fi, in->es); | |
1002 | sfnt_getmap(fi, in->es); | |
1003 | fi->next = psd->all_fonts; | |
1004 | psd->all_fonts = fi; | |
1004 | 1005 | } |
1005 | 1006 | |
1006 | 1007 | static int sizecmp(const void *a, const void *b) { |
1015 | 1016 | * <http://partners.adobe.com/public/developer/en/font/5012.Type42_Spec.pdf> |
1016 | 1017 | */ |
1017 | 1018 | |
1018 | void sfnt_writeps(font_info const *fi, FILE *ofp) { | |
1019 | void sfnt_writeps(font_info const *fi, FILE *ofp, psdata *psd, errorstate *es) { | |
1019 | 1020 | unsigned i, j, lastbreak; |
1020 | 1021 | sfnt *sf = fi->fontfile; |
1021 | 1022 | size_t *breaks, glyfoff, glyflen; |
1052 | 1053 | fprintf(ofp, "0 1 %u{currentfile token pop exch def}bind for\n", |
1053 | 1054 | sf->nglyphs - 1); |
1054 | 1055 | for (i = 0; i < sf->nglyphs; i++) |
1055 | ps_token(ofp, &cc, "/%s", glyph_extern(sfnt_indextoglyph(sf, i))); | |
1056 | ps_token(ofp, &cc, "/%s", glyph_extern(psd, sfnt_indextoglyph(sf, i))); | |
1056 | 1057 | fprintf(ofp, "\nend readonly def\n"); |
1057 | 1058 | fprintf(ofp, "/sfnts [<"); |
1058 | 1059 | breaks = snewn(sf->osd.numTables + sf->nglyphs, size_t); |
1060 | 1061 | breaks[i] = sf->td[i].offset; |
1061 | 1062 | } |
1062 | 1063 | if (!sfnt_findtable(sf, TAG_glyf, &glyfptr, &glyfend)) { |
1063 | err_sfntnotable(&sf->pos, "glyf"); | |
1064 | err_sfntnotable(es, &sf->pos, "glyf"); | |
1064 | 1065 | return; |
1065 | 1066 | } |
1066 | 1067 | glyfoff = (char *)glyfptr - (char *)sf->data; |
1067 | 1068 | glyflen = (char *)glyfend - (char *)glyfptr; |
1068 | 1069 | if (!sfnt_findtable(sf, TAG_loca, &locaptr, &locaend)) { |
1069 | err_sfntnotable(&sf->pos, "loca"); | |
1070 | err_sfntnotable(es, &sf->pos, "loca"); | |
1070 | 1071 | return; |
1071 | 1072 | } |
1072 | 1073 | loca = snewn(sf->nglyphs, unsigned); |
1099 | 1100 | fprintf(ofp, "end /%s exch definefont\n", fi->name); |
1100 | 1101 | return; |
1101 | 1102 | badloca: |
1102 | err_sfntbadtable(&sf->pos, "loca"); | |
1103 | err_sfntbadtable(es, &sf->pos, "loca"); | |
1103 | 1104 | } |
1104 | 1105 | |
1105 | 1106 | void sfnt_data(font_info *fi, char **bufp, size_t *lenp) { |
5 | 5 | #include <stdlib.h> |
6 | 6 | #include "halibut.h" |
7 | 7 | |
8 | static int compare_tags(void *av, void *bv); | |
9 | static int compare_entries(void *av, void *bv); | |
8 | static int compare_tags(const void *av, const void *bv, void *cmpctx); | |
9 | static int compare_entries(const void *av, const void *bv, void *cmpctx); | |
10 | 10 | |
11 | 11 | indexdata *make_index(void) { |
12 | 12 | indexdata *ret = snew(indexdata); |
13 | ret->tags = newtree234(compare_tags); | |
14 | ret->entries = newtree234(compare_entries); | |
13 | ret->tags = newtree234(compare_tags, NULL); | |
14 | ret->entries = newtree234(compare_entries, NULL); | |
15 | 15 | return ret; |
16 | 16 | } |
17 | 17 | |
26 | 26 | return ret; |
27 | 27 | } |
28 | 28 | |
29 | static int compare_tags(void *av, void *bv) { | |
30 | indextag *a = (indextag *)av, *b = (indextag *)bv; | |
29 | static int compare_tags(const void *av, const void *bv, void *cmpctx) { | |
30 | const indextag *a = (const indextag *)av, *b = (const indextag *)bv; | |
31 | 31 | return ustricmp(a->name, b->name); |
32 | 32 | } |
33 | 33 | |
34 | static int compare_to_find_tag(void *av, void *bv) { | |
35 | wchar_t *a = (wchar_t *)av; | |
36 | indextag *b = (indextag *)bv; | |
34 | static int compare_to_find_tag(const void *av, const void *bv, void *cmpctx) { | |
35 | const wchar_t *a = (const wchar_t *)av; | |
36 | const indextag *b = (const indextag *)bv; | |
37 | 37 | return ustricmp(a, b->name); |
38 | 38 | } |
39 | 39 | |
40 | static int compare_entries(void *av, void *bv) { | |
40 | static int compare_entries(const void *av, const void *bv, void *cmpctx) { | |
41 | 41 | indexentry *a = (indexentry *)av, *b = (indexentry *)bv; |
42 | 42 | return compare_wordlists(a->text, b->text); |
43 | 43 | } |
46 | 46 | * Back-end utility: find the indextag with a given name. |
47 | 47 | */ |
48 | 48 | indextag *index_findtag(indexdata *idx, wchar_t *name) { |
49 | return find234(idx->tags, name, compare_to_find_tag); | |
49 | return findcmp234(idx->tags, name, compare_to_find_tag, NULL); | |
50 | 50 | } |
51 | 51 | |
52 | 52 | /* |
57 | 57 | * Guarantee on calling sequence: all implicit merges are given |
58 | 58 | * before the explicit ones. |
59 | 59 | */ |
60 | void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text, | |
61 | filepos *fpos) { | |
60 | void index_merge(indexdata *idx, bool is_explicit, wchar_t *tags, word *text, | |
61 | filepos *fpos, errorstate *es) { | |
62 | 62 | indextag *t, *existing; |
63 | 63 | |
64 | 64 | /* |
98 | 98 | * warn (and drop it, since it won't be referenced). |
99 | 99 | */ |
100 | 100 | if (is_explicit) { |
101 | err_nosuchidxtag(fpos, tags); | |
101 | err_nosuchidxtag(es, fpos, tags); | |
102 | 102 | continue; |
103 | 103 | } |
104 | 104 | |
122 | 122 | * see if the cases match. |
123 | 123 | */ |
124 | 124 | if (ustrcmp(t->name, existing->name)) { |
125 | err_indexcase(fpos, t->name, | |
125 | err_indexcase(es, fpos, t->name, | |
126 | 126 | &existing->implicit_fpos, existing->name); |
127 | 127 | } |
128 | 128 | |
213 | 213 | } |
214 | 214 | |
215 | 215 | static void dbg_prtwordlist(int level, word *w); |
216 | static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text); | |
216 | static void dbg_prtmerge(bool is_explicit, wchar_t *tag, word *text); | |
217 | 217 | |
218 | 218 | void index_debug(indexdata *i) { |
219 | 219 | indextag *t; |
239 | 239 | } |
240 | 240 | } |
241 | 241 | |
242 | static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) { | |
242 | static void dbg_prtmerge(bool is_explicit, wchar_t *tag, word *text) { | |
243 | 243 | printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im"); |
244 | 244 | for (; *tag; tag++) |
245 | 245 | putchar(*tag); |
38 | 38 | int ptr, npushback; |
39 | 39 | filepos pos; |
40 | 40 | }; |
41 | static int macrocmp(void *av, void *bv) { | |
41 | static int macrocmp(const void *av, const void *bv, void *cmpctx) { | |
42 | 42 | macro *a = (macro *)av, *b = (macro *)bv; |
43 | 43 | return ustrcmp(a->name, b->name); |
44 | 44 | } |
45 | 45 | static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text, |
46 | filepos fpos) { | |
46 | filepos fpos, errorstate *es) { | |
47 | 47 | macro *m = snew(macro); |
48 | 48 | m->name = name; |
49 | 49 | m->text = text; |
50 | 50 | if (add234(macros, m) != m) { |
51 | err_macroexists(&fpos, name); | |
51 | err_macroexists(es, &fpos, name); | |
52 | 52 | sfree(name); |
53 | 53 | sfree(text); |
54 | 54 | } |
55 | 55 | } |
56 | static int macrolookup(tree234 *macros, input *in, wchar_t *name, | |
57 | filepos *pos) { | |
56 | static bool macrolookup(tree234 *macros, input *in, wchar_t *name, | |
57 | filepos *pos) { | |
58 | 58 | macro m, *gotit; |
59 | 59 | m.name = name; |
60 | gotit = find234(macros, &m, NULL); | |
60 | gotit = find234(macros, &m); | |
61 | 61 | if (gotit) { |
62 | 62 | macrostack *expansion = snew(macrostack); |
63 | 63 | expansion->next = in->stack; |
66 | 66 | expansion->ptr = 0; |
67 | 67 | expansion->npushback = in->npushback; |
68 | 68 | in->stack = expansion; |
69 | return TRUE; | |
69 | return true; | |
70 | 70 | } else |
71 | return FALSE; | |
71 | return false; | |
72 | 72 | } |
73 | 73 | static void macrocleanup(tree234 *macros) { |
74 | 74 | int ti; |
85 | 85 | assert(cfg->type == para_Config); |
86 | 86 | |
87 | 87 | if (!ustricmp(cfg->keyword, L"input-charset")) { |
88 | in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword)); | |
88 | in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword), | |
89 | in->es); | |
89 | 90 | } |
90 | 91 | } |
91 | 92 | |
168 | 169 | NULL, 0); |
169 | 170 | assert(p == buf+1 && inlen == 0); |
170 | 171 | |
172 | for (int i = 0; i < in->nwc; i++) { | |
173 | if (in->wc[i] == 0) { | |
174 | /* The zero Unicode character is never legal */ | |
175 | err_zerochar(in->es, pos); | |
176 | return EOF; | |
177 | } | |
178 | } | |
179 | ||
171 | 180 | in->wcpos = 0; |
172 | 181 | } |
173 | 182 | } |
174 | 183 | |
175 | return in->wc[in->wcpos++]; | |
184 | wchar_t wc = in->wc[in->wcpos++]; | |
185 | ||
186 | return wc; | |
176 | 187 | |
177 | 188 | } else |
178 | 189 | return EOF; |
345 | 356 | /* We expect hex characters thereafter. */ |
346 | 357 | wchar_t *p = tok->text+1; |
347 | 358 | int n = 0; |
359 | bool seen_a_char = false; | |
348 | 360 | while (*p && ishex(*p)) { |
361 | seen_a_char = true; | |
349 | 362 | n = 16 * n + fromhex(*p); |
350 | 363 | p++; |
351 | 364 | } |
352 | if (!*p) { | |
365 | if (!*p && seen_a_char) { | |
353 | 366 | tok->cmd = c_u; |
354 | 367 | tok->aux = n; |
355 | 368 | return; |
477 | 490 | * things other than whitespace, backslash, braces and |
478 | 491 | * hyphen. A hyphen terminates the word but is returned as |
479 | 492 | * part of it; everything else is pushed back for the next |
480 | * token. The `aux' field contains TRUE if the word ends in | |
493 | * token. The `aux' field contains true if the word ends in | |
481 | 494 | * a hyphen. |
482 | 495 | */ |
483 | ret.aux = FALSE; /* assumed for now */ | |
496 | ret.aux = false; /* assumed for now */ | |
484 | 497 | prevpos = 0; |
485 | 498 | while (1) { |
486 | 499 | if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) { |
491 | 504 | rdadd(&rs, c); |
492 | 505 | if (c == '-') { |
493 | 506 | prevpos = rsc.pos; |
494 | ret.aux = TRUE; | |
507 | ret.aux = true; | |
495 | 508 | break; /* hyphen terminates word */ |
496 | 509 | } |
497 | 510 | } |
518 | 531 | * telling code paragraphs from paragraphs which merely start with |
519 | 532 | * code). |
520 | 533 | */ |
521 | int isbrace(input *in) { | |
534 | bool isbrace(input *in) { | |
522 | 535 | int c; |
523 | 536 | filepos cpos; |
524 | 537 | |
566 | 579 | if (!hptrptr) |
567 | 580 | return NULL; |
568 | 581 | mnewword = snew(word); |
582 | newword.private_data = NULL; /* placate gcc warning */ | |
569 | 583 | *mnewword = newword; /* structure copy */ |
570 | 584 | mnewword->next = NULL; |
571 | 585 | **hptrptr = mnewword; |
598 | 612 | tree234 *macros) { |
599 | 613 | token t; |
600 | 614 | paragraph par; |
601 | word wd, **whptr, **idximplicit; | |
615 | word wd, **whptr, **idximplicit = NULL; | |
602 | 616 | wchar_t utext[2], *wdtext; |
603 | 617 | int style, spcstyle; |
604 | int already; | |
605 | int iswhite, seenwhite; | |
606 | int type; | |
607 | int prev_para_type; | |
618 | bool already; | |
619 | bool iswhite, seenwhite; | |
620 | int prev_para_type = para_NotParaType; | |
608 | 621 | struct stack_item { |
609 | 622 | enum { |
610 | 623 | stack_nop = 0, /* do nothing (for error recovery) */ |
622 | 635 | stack parsestk; |
623 | 636 | struct crossparaitem { |
624 | 637 | int type; /* currently c_lcont, c_quote or -1 */ |
625 | int seen_lcont, seen_quote; | |
638 | bool seen_lcont, seen_quote; | |
626 | 639 | }; |
627 | 640 | stack crossparastk; |
628 | word *indexword, *uword, *iword; | |
641 | word *indexword = NULL, *uword, *iword; | |
629 | 642 | word *idxwordlist; |
630 | 643 | rdstring indexstr; |
631 | int index_downcase, index_visible, indexing; | |
644 | bool index_downcase = false, index_visible = false, indexing; | |
632 | 645 | const rdstring nullrs = { 0, 0, NULL }; |
633 | 646 | wchar_t uchr; |
634 | 647 | |
635 | t.text = NULL; | |
636 | t.origtext = NULL; | |
637 | already = FALSE; | |
648 | t = get_token(in); | |
649 | already = true; | |
650 | ||
651 | /* | |
652 | * Ignore tok_white if it appears at the very start of the file. | |
653 | * | |
654 | * At the start of most paragraphs, tok_white is guaranteed not to | |
655 | * appear, because get_token will have folded it into the | |
656 | * preceding tok_eop (since a tok_eop is simply a sequence of | |
657 | * whitespace containing at least two newlines). | |
658 | * | |
659 | * The one exception is if there isn't a preceding tok_eop, i.e. | |
660 | * if the very first paragraph begins with something that lexes as | |
661 | * a tok_white. Easiest way to get round that is to ignore it | |
662 | * here, by unsetting the 'already' flag which will force a new | |
663 | * token to be fetched below. | |
664 | */ | |
665 | if (t.type == tok_white) | |
666 | already = false; | |
638 | 667 | |
639 | 668 | crossparastk = stk_new(); |
640 | 669 | |
655 | 684 | if (!already) { |
656 | 685 | dtor(t), t = get_token(in); |
657 | 686 | } |
658 | already = FALSE; | |
687 | already = false; | |
659 | 688 | } while (t.type == tok_eop); |
660 | 689 | if (t.type == tok_eof) |
661 | 690 | break; |
671 | 700 | while (1) { |
672 | 701 | dtor(t), t = get_codepar_token(in); |
673 | 702 | wd.type = wtype; |
674 | wd.breaks = FALSE; /* shouldn't need this... */ | |
703 | wd.breaks = false; /* shouldn't need this... */ | |
675 | 704 | wd.text = ustrdup(t.text); |
676 | 705 | wd.alt = NULL; |
706 | wd.aux = 0; | |
677 | 707 | wd.fpos = t.pos; |
678 | 708 | addword(wd, &whptr); |
679 | 709 | dtor(t), t = get_token(in); |
686 | 716 | if (t.type == tok_eop || t.type == tok_eof || |
687 | 717 | t.type == tok_rbrace) { /* might be } terminating \lcont */ |
688 | 718 | if (t.type == tok_rbrace) |
689 | already = TRUE; | |
719 | already = true; | |
690 | 720 | break; |
691 | 721 | } else if (t.type == tok_cmd && t.cmd == c_c) { |
692 | 722 | wtype = word_WeakCode; |
697 | 727 | wtype == word_WeakCode) { |
698 | 728 | wtype = word_Strong; |
699 | 729 | } else { |
700 | err_brokencodepara(&t.pos); | |
730 | err_brokencodepara(in->es, &t.pos); | |
701 | 731 | prev_para_type = par.type; |
702 | 732 | addpara(par, ret); |
703 | 733 | while (t.type != tok_eop) /* error recovery: */ |
726 | 756 | */ |
727 | 757 | dtor(t), t = get_token(in); |
728 | 758 | if (t.type != tok_lbrace) { |
729 | err_explbr(&t.pos); | |
759 | err_explbr(in->es, &t.pos); | |
730 | 760 | continue; |
731 | 761 | } |
732 | 762 | |
738 | 768 | do { |
739 | 769 | dtor(t), t = get_token(in); |
740 | 770 | } while (t.type == tok_white); |
741 | already = TRUE; | |
771 | already = true; | |
742 | 772 | |
743 | 773 | if (cmd == c_lcont) { |
744 | 774 | /* |
749 | 779 | */ |
750 | 780 | sitem = snew(struct crossparaitem); |
751 | 781 | stop = (struct crossparaitem *)stk_top(crossparastk); |
752 | if (stop) | |
782 | if (stop) { | |
753 | 783 | *sitem = *stop; |
754 | else | |
755 | sitem->seen_quote = sitem->seen_lcont = 0; | |
784 | } else { | |
785 | sitem->seen_quote = false; | |
786 | sitem->seen_lcont = false; | |
787 | } | |
756 | 788 | |
757 | 789 | if (prev_para_type == para_Bullet || |
758 | 790 | prev_para_type == para_NumberedList || |
759 | 791 | prev_para_type == para_Description) { |
760 | 792 | sitem->type = c_lcont; |
761 | sitem->seen_lcont = 1; | |
793 | sitem->seen_lcont = true; | |
762 | 794 | par.type = para_LcontPush; |
763 | 795 | prev_para_type = par.type; |
764 | 796 | addpara(par, ret); |
769 | 801 | * don't give a cascade error. |
770 | 802 | */ |
771 | 803 | sitem->type = -1; |
772 | err_misplacedlcont(&t.pos); | |
804 | err_misplacedlcont(in->es, &t.pos); | |
773 | 805 | } |
774 | 806 | } else { |
775 | 807 | /* |
779 | 811 | */ |
780 | 812 | sitem = snew(struct crossparaitem); |
781 | 813 | stop = (struct crossparaitem *)stk_top(crossparastk); |
782 | if (stop) | |
814 | if (stop) { | |
783 | 815 | *sitem = *stop; |
784 | else | |
785 | sitem->seen_quote = sitem->seen_lcont = 0; | |
816 | } else { | |
817 | sitem->seen_quote = false; | |
818 | sitem->seen_lcont = false; | |
819 | } | |
786 | 820 | sitem->type = c_quote; |
787 | sitem->seen_quote = 1; | |
821 | sitem->seen_quote = true; | |
788 | 822 | par.type = para_QuotePush; |
789 | 823 | prev_para_type = par.type; |
790 | 824 | addpara(par, ret); |
794 | 828 | } else if (t.type == tok_rbrace) { |
795 | 829 | struct crossparaitem *sitem = stk_pop(crossparastk); |
796 | 830 | if (!sitem) |
797 | err_unexbrace(&t.pos); | |
831 | err_unexbrace(in->es, &t.pos); | |
798 | 832 | else { |
799 | 833 | switch (sitem->type) { |
800 | 834 | case c_lcont: |
828 | 862 | par.type = para_Normal; |
829 | 863 | if (t.type == tok_cmd) { |
830 | 864 | int needkw; |
831 | int is_macro = FALSE; | |
865 | bool is_macro = false; | |
832 | 866 | |
833 | 867 | par.fpos = t.pos; |
834 | 868 | switch (t.cmd) { |
836 | 870 | needkw = -1; |
837 | 871 | break; |
838 | 872 | case c__invalid: |
839 | err_badparatype(t.text, &t.pos); | |
873 | err_badparatype(in->es, t.text, &t.pos); | |
840 | 874 | needkw = 4; |
841 | 875 | break; |
842 | 876 | case c__comment: |
879 | 913 | case c_cfg: needkw = 8; par.type = para_Config; |
880 | 914 | start_cmd = c_cfg; break; |
881 | 915 | case c_copyright: needkw = 32; par.type = para_Copyright; break; |
882 | case c_define: is_macro = TRUE; needkw = 1; break; | |
916 | case c_define: is_macro = true; needkw = 1; break; | |
883 | 917 | /* For \nocite the keyword is _everything_ */ |
884 | 918 | case c_nocite: needkw = 8; par.type = para_NoCite; break; |
885 | 919 | case c_preamble: needkw = 32; par.type = para_Normal; break; |
895 | 929 | par.type == para_UnnumberedChapter) { |
896 | 930 | struct crossparaitem *sitem = stk_top(crossparastk); |
897 | 931 | if (sitem && (sitem->seen_lcont || sitem->seen_quote)) { |
898 | err_sectmarkerinblock( &t.pos, | |
899 | (sitem->seen_lcont ? "lcont" : "quote")); | |
932 | err_sectmarkerinblock( | |
933 | in->es, &t.pos, | |
934 | (sitem->seen_lcont ? "lcont" : "quote")); | |
900 | 935 | } |
901 | 936 | } |
902 | 937 | |
948 | 983 | } |
949 | 984 | } |
950 | 985 | if (t.type != tok_rbrace) { |
951 | err_kwunclosed(&t.pos); | |
986 | err_kwunclosed(in->es, &t.pos); | |
952 | 987 | continue; |
953 | 988 | } |
954 | 989 | rdadd(&rs, 0); /* add string terminator */ |
961 | 996 | |
962 | 997 | /* See whether we have the right number of keywords. */ |
963 | 998 | if ((needkw & 48) && nkeys > 0) |
964 | err_kwillegal(&fp); | |
999 | err_kwillegal(in->es, &fp); | |
965 | 1000 | if ((needkw & 11) && nkeys == 0) |
966 | err_kwexpected(&fp); | |
1001 | err_kwexpected(in->es, &fp); | |
967 | 1002 | if ((needkw & 5) && nkeys > 1) |
968 | err_kwtoomany(&fp); | |
1003 | err_kwtoomany(in->es, &fp); | |
969 | 1004 | |
970 | 1005 | if (is_macro) { |
971 | 1006 | /* |
984 | 1019 | if (t.type == tok_eop || t.type == tok_eof) |
985 | 1020 | break; |
986 | 1021 | } |
987 | macrodef(macros, rs.text, macrotext.text, fp); | |
1022 | macrodef(macros, rs.text, macrotext.text, fp, in->es); | |
988 | 1023 | continue; /* next paragraph */ |
989 | 1024 | } |
990 | 1025 | |
999 | 1034 | if (t.type != tok_eop && t.type != tok_eof && |
1000 | 1035 | (start_cmd == c__invalid || |
1001 | 1036 | t.type != tok_cmd || t.cmd != start_cmd)) { |
1002 | err_bodyillegal(&t.pos); | |
1037 | err_bodyillegal(in->es, &t.pos); | |
1003 | 1038 | /* Error recovery: eat the rest of the paragraph */ |
1004 | 1039 | while (t.type != tok_eop && t.type != tok_eof && |
1005 | 1040 | (start_cmd == c__invalid || |
1007 | 1042 | dtor(t), t = get_token(in); |
1008 | 1043 | } |
1009 | 1044 | if (t.type == tok_cmd) |
1010 | already = TRUE;/* inhibit get_token at top of loop */ | |
1045 | already = true;/* inhibit get_token at top of loop */ | |
1011 | 1046 | prev_para_type = par.type; |
1012 | 1047 | addpara(par, ret); |
1013 | 1048 | |
1039 | 1074 | parsestk = stk_new(); |
1040 | 1075 | style = word_Normal; |
1041 | 1076 | spcstyle = word_WhiteSpace; |
1042 | indexing = FALSE; | |
1043 | seenwhite = TRUE; | |
1077 | indexing = false; | |
1078 | seenwhite = true; | |
1044 | 1079 | while (t.type != tok_eop && t.type != tok_eof) { |
1045 | iswhite = FALSE; | |
1046 | already = FALSE; | |
1080 | iswhite = false; | |
1081 | already = false; | |
1047 | 1082 | |
1048 | 1083 | /* Handle implicit paragraph breaks after \IM, \BR etc */ |
1049 | 1084 | if (start_cmd != c__invalid && |
1050 | 1085 | t.type == tok_cmd && t.cmd == start_cmd) { |
1051 | already = TRUE; /* inhibit get_token at top of loop */ | |
1086 | already = true; /* inhibit get_token at top of loop */ | |
1052 | 1087 | break; |
1053 | 1088 | } |
1054 | 1089 | |
1076 | 1111 | wd.alt = NULL; |
1077 | 1112 | wd.aux = 0; |
1078 | 1113 | wd.fpos = t.pos; |
1079 | wd.breaks = FALSE; | |
1114 | wd.breaks = false; | |
1080 | 1115 | |
1081 | 1116 | /* |
1082 | 1117 | * Inhibit use of whitespace if it's (probably the |
1085 | 1120 | */ |
1086 | 1121 | if (start_cmd != c__invalid) { |
1087 | 1122 | dtor(t), t = get_token(in); |
1088 | already = TRUE; | |
1123 | already = true; | |
1089 | 1124 | if (t.type == tok_cmd && t.cmd == start_cmd) |
1090 | 1125 | break; |
1091 | 1126 | } |
1096 | 1131 | addword(wd, &whptr); |
1097 | 1132 | if (indexing) |
1098 | 1133 | addword(wd, &idximplicit); |
1099 | iswhite = TRUE; | |
1134 | iswhite = true; | |
1100 | 1135 | break; |
1101 | 1136 | case tok_word: |
1102 | 1137 | if (indexing) |
1116 | 1151 | } |
1117 | 1152 | break; |
1118 | 1153 | case tok_lbrace: |
1119 | err_unexbrace(&t.pos); | |
1154 | err_unexbrace(in->es, &t.pos); | |
1120 | 1155 | /* Error recovery: push nop */ |
1121 | 1156 | sitem = snew(struct stack_item); |
1122 | 1157 | sitem->type = stack_nop; |
1132 | 1167 | * wants popping. Accordingly, we treat it here |
1133 | 1168 | * as an indication that the paragraph is over. |
1134 | 1169 | */ |
1135 | already = TRUE; | |
1170 | already = true; | |
1136 | 1171 | goto finished_para; |
1137 | 1172 | } else { |
1138 | 1173 | if (sitem->type & stack_ualt) { |
1144 | 1179 | spcstyle = word_WhiteSpace; |
1145 | 1180 | } |
1146 | 1181 | if (sitem->type & stack_idx) { |
1182 | rdadds(&indexstr, L""); | |
1147 | 1183 | indexword->text = ustrdup(indexstr.text); |
1148 | 1184 | if (index_downcase) { |
1149 | 1185 | word *w; |
1155 | 1191 | if (w->text) |
1156 | 1192 | ustrlow(w->text); |
1157 | 1193 | } |
1158 | indexing = FALSE; | |
1194 | indexing = false; | |
1159 | 1195 | rdadd(&indexstr, L'\0'); |
1160 | index_merge(idx, FALSE, indexstr.text, | |
1161 | idxwordlist, &sitem->fpos); | |
1196 | index_merge(idx, false, indexstr.text, | |
1197 | idxwordlist, &sitem->fpos, in->es); | |
1162 | 1198 | sfree(indexstr.text); |
1163 | 1199 | } |
1164 | 1200 | if (sitem->type & stack_hyper) { |
1167 | 1203 | wd.alt = NULL; |
1168 | 1204 | wd.aux = 0; |
1169 | 1205 | wd.fpos = t.pos; |
1170 | wd.breaks = FALSE; | |
1206 | wd.breaks = false; | |
1171 | 1207 | if (!indexing || index_visible) |
1172 | 1208 | addword(wd, &whptr); |
1173 | 1209 | if (indexing) |
1179 | 1215 | wd.alt = NULL; |
1180 | 1216 | wd.aux = quote_Close; |
1181 | 1217 | wd.fpos = t.pos; |
1182 | wd.breaks = FALSE; | |
1218 | wd.breaks = false; | |
1183 | 1219 | if (!indexing || index_visible) |
1184 | 1220 | addword(wd, &whptr); |
1185 | 1221 | if (indexing) { |
1202 | 1238 | */ |
1203 | 1239 | dtor(t), t = get_token(in); |
1204 | 1240 | if (t.type != tok_lbrace) { |
1205 | err_explbr(&t.pos); | |
1241 | err_explbr(in->es, &t.pos); | |
1206 | 1242 | } else { |
1207 | 1243 | int braces = 1; |
1208 | 1244 | while (braces > 0) { |
1212 | 1248 | else if (t.type == tok_rbrace) |
1213 | 1249 | braces--; |
1214 | 1250 | else if (t.type == tok_eof) { |
1215 | err_commenteof(&t.pos); | |
1251 | err_commenteof(in->es, &t.pos); | |
1216 | 1252 | break; |
1217 | 1253 | } |
1218 | 1254 | } |
1219 | 1255 | } |
1220 | 1256 | if (seenwhite) { |
1221 | already = TRUE; | |
1257 | already = true; | |
1222 | 1258 | dtor(t), t = get_token(in); |
1223 | 1259 | if (t.type == tok_white) { |
1224 | iswhite = TRUE; | |
1225 | already = FALSE; | |
1260 | iswhite = true; | |
1261 | already = false; | |
1226 | 1262 | } |
1227 | 1263 | } |
1228 | 1264 | break; |
1229 | 1265 | case c_q: |
1230 | case c_cq: | |
1231 | type = t.cmd; | |
1266 | case c_cq: { | |
1267 | int type = t.cmd; | |
1232 | 1268 | dtor(t), t = get_token(in); |
1233 | 1269 | if (t.type != tok_lbrace) { |
1234 | err_explbr(&t.pos); | |
1270 | err_explbr(in->es, &t.pos); | |
1235 | 1271 | } else { |
1236 | 1272 | /* |
1237 | 1273 | * Enforce that \q may not be used anywhere |
1251 | 1287 | wd.alt = NULL; |
1252 | 1288 | wd.aux = quote_Open; |
1253 | 1289 | wd.fpos = t.pos; |
1254 | wd.breaks = FALSE; | |
1290 | wd.breaks = false; | |
1255 | 1291 | if (!indexing || index_visible) |
1256 | 1292 | addword(wd, &whptr); |
1257 | 1293 | if (indexing) { |
1260 | 1296 | } |
1261 | 1297 | stype = stack_quote; |
1262 | 1298 | } else { |
1263 | err_codequote(&t.pos); | |
1299 | err_codequote(in->es, &t.pos); | |
1264 | 1300 | stype = stack_nop; |
1265 | 1301 | } |
1266 | 1302 | sitem = snew(struct stack_item); |
1268 | 1304 | sitem->type = stype; |
1269 | 1305 | if (type == c_cq) { |
1270 | 1306 | if (style != word_Normal) { |
1271 | err_nestedstyles(&t.pos); | |
1307 | err_nestedstyles(in->es, &t.pos); | |
1272 | 1308 | } else { |
1273 | 1309 | style = word_WeakCode; |
1274 | 1310 | spcstyle = tospacestyle(style); |
1278 | 1314 | stk_push(parsestk, sitem); |
1279 | 1315 | } |
1280 | 1316 | break; |
1317 | } | |
1281 | 1318 | case c_K: |
1282 | 1319 | case c_k: |
1283 | 1320 | case c_W: |
1288 | 1325 | * brace. No nesting; no arguments. |
1289 | 1326 | */ |
1290 | 1327 | wd.fpos = t.pos; |
1291 | wd.breaks = FALSE; | |
1328 | wd.breaks = false; | |
1292 | 1329 | if (t.cmd == c_K) |
1293 | 1330 | wd.type = word_UpperXref; |
1294 | 1331 | else if (t.cmd == c_k) |
1300 | 1337 | dtor(t), t = get_token(in); |
1301 | 1338 | if (t.type != tok_lbrace) { |
1302 | 1339 | if (wd.type == word_Normal) { |
1303 | time_t thetime = time(NULL); | |
1340 | time_t thetime = current_time(); | |
1304 | 1341 | struct tm *broken = localtime(&thetime); |
1305 | already = TRUE; | |
1342 | already = true; | |
1306 | 1343 | wdtext = ustrftime(NULL, broken); |
1307 | 1344 | wd.type = style; |
1308 | 1345 | } else { |
1309 | err_explbr(&t.pos); | |
1346 | err_explbr(in->es, &t.pos); | |
1310 | 1347 | wdtext = NULL; |
1311 | 1348 | } |
1312 | 1349 | } else { |
1319 | 1356 | rdadds(&rs, t.text); |
1320 | 1357 | } |
1321 | 1358 | if (wd.type == word_Normal) { |
1322 | time_t thetime = time(NULL); | |
1359 | time_t thetime = current_time(); | |
1323 | 1360 | struct tm *broken = localtime(&thetime); |
1324 | 1361 | wdtext = ustrftime(rs.text, broken); |
1325 | 1362 | wd.type = style; |
1328 | 1365 | } |
1329 | 1366 | sfree(rs.text); |
1330 | 1367 | if (t.type != tok_rbrace) { |
1331 | err_kwexprbr(&t.pos); | |
1368 | err_kwexprbr(in->es, &t.pos); | |
1332 | 1369 | } |
1333 | 1370 | } |
1334 | 1371 | wd.alt = NULL; |
1358 | 1395 | if (t.type == tok_cmd && |
1359 | 1396 | (t.cmd == c_i || t.cmd == c_ii)) { |
1360 | 1397 | if (indexing) { |
1361 | err_nestedindex(&t.pos); | |
1398 | err_nestedindex(in->es, &t.pos); | |
1362 | 1399 | } else { |
1363 | 1400 | /* Add an index-reference word with no |
1364 | 1401 | * text as yet */ |
1366 | 1403 | wd.text = NULL; |
1367 | 1404 | wd.alt = NULL; |
1368 | 1405 | wd.aux = 0; |
1369 | wd.breaks = FALSE; | |
1406 | wd.breaks = false; | |
1370 | 1407 | indexword = addword(wd, &whptr); |
1371 | 1408 | /* Set up a rdstring to read the |
1372 | 1409 | * index text */ |
1373 | 1410 | indexstr = nullrs; |
1374 | 1411 | /* Flags so that we do the Right |
1375 | 1412 | * Things with text */ |
1376 | index_visible = (type != c_I); | |
1377 | index_downcase = (type == c_ii); | |
1378 | indexing = TRUE; | |
1413 | index_visible = (t.cmd != c_I); | |
1414 | index_downcase = (t.cmd == c_ii); | |
1415 | indexing = true; | |
1379 | 1416 | idxwordlist = NULL; |
1380 | 1417 | idximplicit = &idxwordlist; |
1381 | 1418 | |
1390 | 1427 | (t.cmd == c_e || t.cmd == c_s || |
1391 | 1428 | t.cmd == c_c || t.cmd == c_cw)) { |
1392 | 1429 | if (style != word_Normal) |
1393 | err_nestedstyles(&t.pos); | |
1430 | err_nestedstyles(in->es, &t.pos); | |
1394 | 1431 | else { |
1395 | 1432 | style = (t.cmd == c_c ? word_Code : |
1396 | 1433 | t.cmd == c_cw ? word_WeakCode : |
1402 | 1439 | dtor(t), t = get_token(in); |
1403 | 1440 | } |
1404 | 1441 | if (t.type != tok_lbrace) { |
1405 | err_explbr(&t.pos); | |
1442 | err_explbr(in->es, &t.pos); | |
1406 | 1443 | sfree(sitem); |
1407 | 1444 | } else { |
1408 | 1445 | stk_push(parsestk, sitem); |
1412 | 1449 | case c_c: |
1413 | 1450 | case c_cw: |
1414 | 1451 | case c_e: |
1415 | case c_s: | |
1416 | type = t.cmd; | |
1452 | case c_s: { | |
1453 | int type = t.cmd; | |
1417 | 1454 | if (style != word_Normal) { |
1418 | err_nestedstyles(&t.pos); | |
1455 | err_nestedstyles(in->es, &t.pos); | |
1419 | 1456 | /* Error recovery: eat lbrace, push nop. */ |
1420 | 1457 | dtor(t), t = get_token(in); |
1421 | 1458 | sitem = snew(struct stack_item); |
1425 | 1462 | } |
1426 | 1463 | dtor(t), t = get_token(in); |
1427 | 1464 | if (t.type != tok_lbrace) { |
1428 | err_explbr(&t.pos); | |
1465 | err_explbr(in->es, &t.pos); | |
1429 | 1466 | } else { |
1430 | 1467 | style = (type == c_c ? word_Code : |
1431 | 1468 | type == c_cw ? word_WeakCode : |
1438 | 1475 | stk_push(parsestk, sitem); |
1439 | 1476 | } |
1440 | 1477 | break; |
1478 | } | |
1441 | 1479 | case c_i: |
1442 | 1480 | case c_ii: |
1443 | case c_I: | |
1444 | type = t.cmd; | |
1481 | case c_I: { | |
1482 | int type = t.cmd; | |
1445 | 1483 | if (indexing) { |
1446 | err_nestedindex(&t.pos); | |
1484 | err_nestedindex(in->es, &t.pos); | |
1447 | 1485 | /* Error recovery: eat lbrace, push nop. */ |
1448 | 1486 | dtor(t), t = get_token(in); |
1449 | 1487 | sitem = snew(struct stack_item); |
1463 | 1501 | (t.cmd == c_e || t.cmd == c_s || |
1464 | 1502 | t.cmd == c_c || t.cmd == c_cw)) { |
1465 | 1503 | if (style != word_Normal) |
1466 | err_nestedstyles(&t.pos); | |
1504 | err_nestedstyles(in->es, &t.pos); | |
1467 | 1505 | else { |
1468 | 1506 | style = (t.cmd == c_c ? word_Code : |
1469 | 1507 | t.cmd == c_cw ? word_WeakCode : |
1476 | 1514 | } |
1477 | 1515 | if (t.type != tok_lbrace) { |
1478 | 1516 | sfree(sitem); |
1479 | err_explbr(&t.pos); | |
1517 | err_explbr(in->es, &t.pos); | |
1480 | 1518 | } else { |
1481 | 1519 | /* Add an index-reference word with no text as yet */ |
1482 | 1520 | wd.type = word_IndexRef; |
1483 | 1521 | wd.text = NULL; |
1484 | 1522 | wd.alt = NULL; |
1485 | 1523 | wd.aux = 0; |
1486 | wd.breaks = FALSE; | |
1524 | wd.breaks = false; | |
1487 | 1525 | indexword = addword(wd, &whptr); |
1488 | 1526 | /* Set up a rdstring to read the index text */ |
1489 | 1527 | indexstr = nullrs; |
1490 | 1528 | /* Flags so that we do the Right Things with text */ |
1491 | 1529 | index_visible = (type != c_I); |
1492 | 1530 | index_downcase = (type == c_ii); |
1493 | indexing = TRUE; | |
1531 | indexing = true; | |
1494 | 1532 | idxwordlist = NULL; |
1495 | 1533 | idximplicit = &idxwordlist; |
1496 | 1534 | /* Stack item to close the indexing on exit */ |
1497 | 1535 | stk_push(parsestk, sitem); |
1498 | 1536 | } |
1499 | 1537 | break; |
1538 | } | |
1500 | 1539 | case c_u: |
1501 | 1540 | uchr = t.aux; |
1541 | if (uchr == 0) { | |
1542 | err_zerochar(in->es, &t.pos); | |
1543 | break; | |
1544 | } | |
1502 | 1545 | utext[0] = uchr; utext[1] = 0; |
1503 | 1546 | wd.type = style; |
1504 | wd.breaks = FALSE; | |
1547 | wd.breaks = false; | |
1505 | 1548 | wd.alt = NULL; |
1506 | 1549 | wd.aux = 0; |
1507 | 1550 | wd.fpos = t.pos; |
1534 | 1577 | } else { |
1535 | 1578 | if (indexing) |
1536 | 1579 | rdadd(&indexstr, uchr); |
1537 | already = TRUE; | |
1580 | already = true; | |
1538 | 1581 | } |
1539 | 1582 | break; |
1540 | 1583 | default: |
1541 | 1584 | if (!macrolookup(macros, in, t.text, &t.pos)) |
1542 | err_badmidcmd(t.text, &t.pos); | |
1585 | err_badmidcmd(in->es, t.text, &t.pos); | |
1543 | 1586 | break; |
1544 | 1587 | } |
1545 | 1588 | } |
1552 | 1595 | if (stk_top(parsestk)) { |
1553 | 1596 | while ((sitem = stk_pop(parsestk))) |
1554 | 1597 | sfree(sitem); |
1555 | err_missingrbrace(&t.pos); | |
1598 | err_missingrbrace(in->es, &t.pos); | |
1556 | 1599 | } |
1557 | 1600 | stk_free(parsestk); |
1558 | 1601 | prev_para_type = par.type; |
1568 | 1611 | addpara(par, ret); |
1569 | 1612 | } |
1570 | 1613 | if (t.type == tok_eof) |
1571 | already = TRUE; | |
1614 | already = true; | |
1572 | 1615 | } |
1573 | 1616 | |
1574 | 1617 | if (stk_top(crossparastk)) { |
1575 | 1618 | void *p; |
1576 | 1619 | |
1577 | err_missingrbrace2(&t.pos); | |
1620 | err_missingrbrace2(in->es, &t.pos); | |
1578 | 1621 | while ((p = stk_pop(crossparastk))) |
1579 | 1622 | sfree(p); |
1580 | 1623 | } |
1588 | 1631 | stk_free(crossparastk); |
1589 | 1632 | } |
1590 | 1633 | |
1591 | struct { | |
1634 | const struct { | |
1592 | 1635 | char const *magic; |
1593 | 1636 | size_t nmagic; |
1594 | int binary; | |
1595 | void (*reader)(input *); | |
1637 | bool binary; | |
1638 | void (*reader)(input *, psdata *); | |
1596 | 1639 | } magics[] = { |
1597 | { "%!FontType1-", 12, FALSE, &read_pfa_file }, | |
1598 | { "%!PS-AdobeFont-", 15, FALSE, &read_pfa_file }, | |
1599 | { "\x80\x01", 2, TRUE, &read_pfb_file }, | |
1600 | { "StartFontMetrics", 16, FALSE, &read_afm_file }, | |
1601 | { "\x00\x01\x00\x00", 4, TRUE, &read_sfnt_file }, | |
1602 | { "true", 4, TRUE, &read_sfnt_file }, | |
1640 | { "%!FontType1-", 12, false, &read_pfa_file }, | |
1641 | { "%!PS-AdobeFont-", 15, false, &read_pfa_file }, | |
1642 | { "\x80\x01", 2, true, &read_pfb_file }, | |
1643 | { "StartFontMetrics", 16, false, &read_afm_file }, | |
1644 | { "\x00\x01\x00\x00", 4, true, &read_sfnt_file }, | |
1645 | { "true", 4, true, &read_sfnt_file }, | |
1603 | 1646 | }; |
1604 | 1647 | |
1605 | paragraph *read_input(input *in, indexdata *idx) { | |
1648 | paragraph *read_input(input *in, indexdata *idx, psdata *psd) { | |
1606 | 1649 | paragraph *head = NULL; |
1607 | 1650 | paragraph **hptr = &head; |
1608 | 1651 | tree234 *macros; |
1609 | 1652 | char mag[16]; |
1610 | 1653 | size_t len, i; |
1611 | int binary; | |
1612 | void (*reader)(input *); | |
1613 | ||
1614 | macros = newtree234(macrocmp); | |
1654 | bool binary; | |
1655 | void (*reader)(input *, psdata *); | |
1656 | ||
1657 | macros = newtree234(macrocmp, NULL); | |
1615 | 1658 | |
1616 | 1659 | while (in->currindex < in->nfiles) { |
1617 | 1660 | setpos(in, in->filenames[in->currindex]); |
1622 | 1665 | |
1623 | 1666 | if (!in->filenames[in->currindex]) { |
1624 | 1667 | in->currfp = stdin; |
1625 | in->wantclose = FALSE; /* don't fclose stdin */ | |
1668 | in->wantclose = false; /* don't fclose stdin */ | |
1626 | 1669 | /* |
1627 | 1670 | * When reading standard input, we always expect to see |
1628 | 1671 | * an actual Halibut file and not any of the unusual |
1636 | 1679 | * looking at a text file type. |
1637 | 1680 | */ |
1638 | 1681 | in->currfp = fopen(in->filenames[in->currindex], "rb"); |
1639 | binary = FALSE; /* default to Halibut source, which is text */ | |
1682 | binary = false; /* default to Halibut source, which is text */ | |
1683 | reader = NULL; | |
1640 | 1684 | if (in->currfp) { |
1641 | in->wantclose = TRUE; | |
1642 | reader = NULL; | |
1685 | in->wantclose = true; | |
1643 | 1686 | len = fread(mag, 1, sizeof(mag), in->currfp); |
1644 | 1687 | for (i = 0; i < lenof(magics); i++) { |
1645 | 1688 | if (len >= magics[i].nmagic && |
1661 | 1704 | if (reader == NULL) { |
1662 | 1705 | read_file(&hptr, in, idx, macros); |
1663 | 1706 | } else { |
1664 | (*reader)(in); | |
1707 | (*reader)(in, psd); | |
1665 | 1708 | } |
1666 | 1709 | } else { |
1667 | err_cantopen(in->filenames[in->currindex]); | |
1710 | err_cantopen(in->es, in->filenames[in->currindex]); | |
1668 | 1711 | } |
1669 | 1712 | in->currindex++; |
1670 | 1713 | } |
6 | 6 | #include <assert.h> |
7 | 7 | #include "halibut.h" |
8 | 8 | |
9 | static int kwcmp(void *av, void *bv) | |
9 | static int kwcmp(const void *av, const void *bv, void *cmpctx) | |
10 | 10 | { |
11 | 11 | const keyword *a = (const keyword *)av; |
12 | 12 | const keyword *b = (const keyword *)bv; |
13 | 13 | return ustrcmp(a->key, b->key); |
14 | 14 | } |
15 | 15 | |
16 | static int kwfind(void *av, void *bv) | |
16 | static int kwfind(const void *av, const void *bv, void *cmpctx) | |
17 | 17 | { |
18 | 18 | wchar_t *a = (wchar_t *)av; |
19 | 19 | const keyword *b = (const keyword *)bv; |
21 | 21 | } |
22 | 22 | |
23 | 23 | keyword *kw_lookup(keywordlist *kl, wchar_t *str) { |
24 | return find234(kl->keys, str, kwfind); | |
24 | return findcmp234(kl->keys, str, kwfind, NULL); | |
25 | 25 | } |
26 | 26 | |
27 | 27 | /* |
30 | 30 | * collation, last at the top (so that we can Heapsort them when we |
31 | 31 | * finish). |
32 | 32 | */ |
33 | keywordlist *get_keywords(paragraph *source) { | |
34 | int errors = FALSE; | |
33 | keywordlist *get_keywords(paragraph *source, errorstate *es) { | |
34 | bool errors = false; | |
35 | 35 | keywordlist *kl = snew(keywordlist); |
36 | 36 | numberstate *n = number_init(); |
37 | 37 | int prevpara = para_NotParaType; |
39 | 39 | number_cfg(n, source); |
40 | 40 | |
41 | 41 | kl->size = 0; |
42 | kl->keys = newtree234(kwcmp); | |
42 | kl->keys = newtree234(kwcmp, NULL); | |
43 | 43 | kl->nlooseends = kl->looseendssize = 0; |
44 | 44 | kl->looseends = NULL; |
45 | 45 | for (; source; source = source->next) { |
61 | 61 | * This also sets up the `parent', `child' and `sibling' |
62 | 62 | * links. |
63 | 63 | */ |
64 | source->kwtext = number_mktext(n, source, q, &prevpara, &errors); | |
64 | source->kwtext = number_mktext(n, source, q, &prevpara, &errors, es); | |
65 | 65 | |
66 | 66 | if (p && *p) { |
67 | 67 | if (source->kwtext || source->type == para_Biblio) { |
73 | 73 | kw->para = source; |
74 | 74 | ret = add234(kl->keys, kw); |
75 | 75 | if (ret != kw) { |
76 | err_multikw(&source->fpos, &ret->para->fpos, p); | |
76 | err_multikw(es, &source->fpos, &ret->para->fpos, p); | |
77 | 77 | sfree(kw); |
78 | 78 | /* FIXME: what happens to kw->text? Does it leak? */ |
79 | 79 | } |
112 | 112 | sfree(kl); |
113 | 113 | } |
114 | 114 | |
115 | void subst_keywords(paragraph *source, keywordlist *kl) { | |
115 | void subst_keywords(paragraph *source, keywordlist *kl, errorstate *es) { | |
116 | 116 | for (; source; source = source->next) { |
117 | 117 | word *ptr; |
118 | 118 | for (ptr = source->words; ptr; ptr = ptr->next) { |
123 | 123 | |
124 | 124 | kw = kw_lookup(kl, ptr->text); |
125 | 125 | if (!kw) { |
126 | err_nosuchkw(&ptr->fpos, ptr->text); | |
126 | err_nosuchkw(es, &ptr->fpos, ptr->text); | |
127 | 127 | subst = NULL; |
128 | 128 | } else |
129 | 129 | subst = dup_word_list(kw->text); |
138 | 138 | close->alt = NULL; |
139 | 139 | close->type = word_XrefEnd; |
140 | 140 | close->fpos = ptr->fpos; |
141 | close->breaks = FALSE; | |
141 | close->breaks = false; | |
142 | 142 | close->aux = 0; |
143 | 143 | |
144 | 144 | close->next = ptr->next; |
4 | 4 | #include <stdio.h> |
5 | 5 | |
6 | 6 | static const char *const licencetext[] = { |
7 | "Halibut is copyright (c) 1999-2017 Simon Tatham.", | |
7 | "Halibut is copyright (c) 1999-2021 Simon Tatham.", | |
8 | 8 | "", |
9 | 9 | "Permission is hereby granted, free of charge, to any person", |
10 | 10 | "obtaining a copy of this software and associated documentation files", |
116 | 116 | #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)%st->winsize] : data[k] ) |
117 | 117 | |
118 | 118 | void lz77_compress(struct LZ77Context *ctx, |
119 | const unsigned char *data, int len, int compress) | |
119 | const unsigned char *data, int len, bool compress) | |
120 | 120 | { |
121 | 121 | struct LZ77InternalContext *st = ctx->ictx; |
122 | 122 | int i, hash, distance, off, nmatch, matchlen, advance; |
143 | 143 | } |
144 | 144 | st->npending -= i; |
145 | 145 | |
146 | defermatch.len = 0; | |
146 | defermatch.distance = defermatch.len = 0; | |
147 | 147 | deferchr = '\0'; |
148 | 148 | while (len > 0) { |
149 | 149 |
27 | 27 | /* |
28 | 28 | * Supply data to be compressed. Will update the private fields of |
29 | 29 | * the LZ77Context, and will call literal() and match() to output. |
30 | * If `compress' is FALSE, it will never emit a match, but will | |
30 | * If `compress' is false, it will never emit a match, but will | |
31 | 31 | * instead call literal() for everything. |
32 | 32 | */ |
33 | 33 | void lz77_compress(struct LZ77Context *ctx, |
34 | const unsigned char *data, int len, int compress); | |
34 | const unsigned char *data, int len, bool compress); |
218 | 218 | lz77c.literal = lzx_literal; |
219 | 219 | lz77c.match = lzx_match; |
220 | 220 | lz77c.userdata = info; |
221 | lz77_compress(&lz77c, data, len, TRUE); | |
221 | lz77_compress(&lz77c, data, len, true); | |
222 | 222 | lz77_cleanup(&lz77c); |
223 | 223 | } |
224 | 224 | |
396 | 396 | size_t data_size, resets_size; |
397 | 397 | unsigned short bitbuffer; |
398 | 398 | int nbits; |
399 | int first_block; | |
399 | bool first_block; | |
400 | 400 | } LZXBitstream; |
401 | 401 | |
402 | 402 | void lzx_write_bits(LZXBitstream *bs, int value, int bits) |
562 | 562 | * the whole-file header. |
563 | 563 | */ |
564 | 564 | lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0); |
565 | bs->first_block = FALSE; | |
565 | bs->first_block = false; | |
566 | 566 | } |
567 | 567 | lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype); |
568 | 568 | lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize); |
634 | 634 | * block-boundary heuristics, but I don't really think it's |
635 | 635 | * worth it. |
636 | 636 | */ |
637 | bs.first_block = TRUE; /* reset every time we reset the LZ state */ | |
637 | bs.first_block = true; /* reset every time we reset the LZ state */ | |
638 | 638 | lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs); |
639 | 639 | |
640 | 640 | sfree(buf.syms); |
6 | 6 | #include <stdio.h> |
7 | 7 | #include <stdlib.h> |
8 | 8 | #include "halibut.h" |
9 | #include "paper.h" | |
9 | 10 | |
10 | 11 | static void dbg_prtsource(paragraph *sourceform); |
11 | 12 | static void dbg_prtwordlist(int level, word *w); |
12 | 13 | static void dbg_prtkws(keywordlist *kws); |
13 | 14 | |
14 | 15 | static const struct pre_backend { |
15 | void *(*func)(paragraph *, keywordlist *, indexdata *); | |
16 | void *(*func)(paragraph *, keywordlist *, indexdata *, psdata *, | |
17 | errorstate *); | |
16 | 18 | int bitfield; |
17 | 19 | } pre_backends[] = { |
18 | 20 | {paper_pre_backend, 0x0001} |
19 | 21 | }; |
20 | 22 | |
21 | 23 | static const struct backend { |
22 | char *name; | |
23 | void (*func)(paragraph *, keywordlist *, indexdata *, void *); | |
24 | const char *name; | |
25 | void (*func)(paragraph *, keywordlist *, indexdata *, void *, | |
26 | errorstate *); | |
24 | 27 | paragraph *(*filename)(char *filename); |
25 | 28 | int bitfield, prebackend_bitfield; |
26 | 29 | } backends[] = { |
40 | 43 | int main(int argc, char **argv) { |
41 | 44 | char **infiles; |
42 | 45 | int nfiles; |
43 | int nogo; | |
44 | int errs; | |
45 | int reportcols; | |
46 | int list_fonts; | |
46 | bool nogo; | |
47 | bool reportcols; | |
48 | bool list_fonts; | |
47 | 49 | int input_charset; |
48 | int debug; | |
50 | bool debug; | |
49 | 51 | int backendbits, prebackbits; |
50 | 52 | int k, b; |
51 | 53 | paragraph *cfg, *cfg_tail; |
52 | 54 | void *pre_backend_data[16]; |
55 | errorstate es[1]; | |
53 | 56 | |
54 | 57 | /* |
55 | 58 | * Use the specified locale everywhere. It'll be used for |
67 | 70 | */ |
68 | 71 | infiles = snewn(argc, char *); |
69 | 72 | nfiles = 0; |
70 | nogo = errs = FALSE; | |
71 | reportcols = 0; | |
72 | list_fonts = 0; | |
73 | nogo = false; | |
74 | reportcols = false; | |
75 | list_fonts = false; | |
73 | 76 | input_charset = CS_ASCII; |
74 | debug = 0; | |
77 | debug = false; | |
75 | 78 | backendbits = 0; |
76 | 79 | cfg = cfg_tail = NULL; |
80 | es->fatal = false; | |
77 | 81 | |
78 | 82 | if (argc == 1) { |
79 | 83 | usage(); |
128 | 132 | /* do nothing */; |
129 | 133 | } else if (!strcmp(opt, "-input-charset")) { |
130 | 134 | if (!val) { |
131 | errs = TRUE, err_optnoarg(opt); | |
135 | err_optnoarg(es, opt); | |
132 | 136 | } else { |
133 | 137 | int charset = charset_from_localenc(val); |
134 | 138 | if (charset == CS_NONE) { |
135 | errs = TRUE, err_cmdcharset(val); | |
139 | err_cmdcharset(es, val); | |
136 | 140 | } else { |
137 | 141 | input_charset = charset; |
138 | 142 | } |
139 | 143 | } |
140 | 144 | } else if (!strcmp(opt, "-help")) { |
141 | 145 | help(); |
142 | nogo = TRUE; | |
146 | nogo = true; | |
143 | 147 | } else if (!strcmp(opt, "-version")) { |
144 | 148 | showversion(); |
145 | nogo = TRUE; | |
149 | nogo = true; | |
146 | 150 | } else if (!strcmp(opt, "-licence") || |
147 | 151 | !strcmp(opt, "-license")) { |
148 | 152 | licence(); |
149 | nogo = TRUE; | |
153 | nogo = true; | |
150 | 154 | } else if (!strcmp(opt, "-list-charsets")) { |
151 | 155 | listcharsets(); |
152 | nogo = TRUE; | |
156 | nogo = true; | |
153 | 157 | } else if (!strcmp(opt, "-list-fonts")) { |
154 | list_fonts = TRUE; | |
158 | list_fonts = true; | |
155 | 159 | } else if (!strcmp(opt, "-precise")) { |
156 | reportcols = 1; | |
160 | reportcols = true; | |
157 | 161 | } else { |
158 | errs = TRUE, err_nosuchopt(opt); | |
162 | err_nosuchopt(es, opt); | |
159 | 163 | } |
160 | 164 | } |
161 | 165 | p = NULL; |
171 | 175 | switch (c) { |
172 | 176 | case 'h': |
173 | 177 | help(); |
174 | nogo = TRUE; | |
178 | nogo = true; | |
175 | 179 | break; |
176 | 180 | case 'V': |
177 | 181 | showversion(); |
178 | nogo = TRUE; | |
182 | nogo = true; | |
179 | 183 | break; |
180 | 184 | case 'L': |
181 | 185 | licence(); |
182 | nogo = TRUE; | |
186 | nogo = true; | |
183 | 187 | break; |
184 | 188 | case 'P': |
185 | reportcols = 1; | |
189 | reportcols = true; | |
186 | 190 | break; |
187 | 191 | case 'd': |
188 | debug = TRUE; | |
192 | debug = true; | |
189 | 193 | break; |
190 | 194 | } |
191 | 195 | break; |
200 | 204 | char opt[2]; |
201 | 205 | opt[0] = c; |
202 | 206 | opt[1] = '\0'; |
203 | errs = TRUE, err_optnoarg(opt); | |
207 | err_optnoarg(es, opt); | |
204 | 208 | } |
205 | 209 | /* |
206 | 210 | * Now c is the option and p is the parameter. |
224 | 228 | *r = '\0'; |
225 | 229 | /* XXX ad-hoc diagnostic */ |
226 | 230 | if (!strcmp(s, "input-charset")) |
227 | err_futileopt("Cinput-charset", | |
231 | err_futileopt(es, "Cinput-charset", | |
228 | 232 | "; use --input-charset"); |
229 | 233 | cmdline_cfg_add(para, s); |
230 | 234 | r = s; |
256 | 260 | char opt[2]; |
257 | 261 | opt[0] = c; |
258 | 262 | opt[1] = '\0'; |
259 | errs = TRUE, err_nosuchopt(opt); | |
263 | err_nosuchopt(es, opt); | |
260 | 264 | } |
261 | 265 | } |
262 | 266 | } |
271 | 275 | } |
272 | 276 | } |
273 | 277 | |
274 | if (errs) | |
278 | if (es->fatal) | |
275 | 279 | exit(EXIT_FAILURE); |
276 | 280 | if (nogo) |
277 | 281 | exit(EXIT_SUCCESS); |
280 | 284 | * Do the work. |
281 | 285 | */ |
282 | 286 | if (nfiles == 0 && !list_fonts) { |
283 | err_noinput(); | |
287 | err_noinput(es); | |
284 | 288 | usage(); |
285 | 289 | exit(EXIT_FAILURE); |
286 | 290 | } |
290 | 294 | paragraph *sourceform, *p; |
291 | 295 | indexdata *idx; |
292 | 296 | keywordlist *keywords; |
297 | psdata *psd; | |
293 | 298 | |
294 | 299 | in.filenames = infiles; |
295 | 300 | in.nfiles = nfiles; |
300 | 305 | in.reportcols = reportcols; |
301 | 306 | in.stack = NULL; |
302 | 307 | in.defcharset = input_charset; |
308 | in.es = es; | |
303 | 309 | |
304 | 310 | idx = make_index(); |
305 | ||
306 | sourceform = read_input(&in, idx); | |
311 | psd = psdata_new(); | |
312 | ||
313 | sourceform = read_input(&in, idx, psd); | |
307 | 314 | if (list_fonts) { |
308 | listfonts(); | |
315 | listfonts(psd); | |
309 | 316 | exit(EXIT_SUCCESS); |
310 | 317 | } |
311 | if (!sourceform) | |
318 | if (es->fatal) | |
312 | 319 | exit(EXIT_FAILURE); |
320 | assert(sourceform); | |
313 | 321 | |
314 | 322 | /* |
315 | 323 | * Append the config directives acquired from the command |
330 | 338 | |
331 | 339 | sfree(infiles); |
332 | 340 | |
333 | keywords = get_keywords(sourceform); | |
341 | keywords = get_keywords(sourceform, es); | |
334 | 342 | if (!keywords) |
335 | 343 | exit(EXIT_FAILURE); |
336 | gen_citations(sourceform, keywords); | |
337 | subst_keywords(sourceform, keywords); | |
344 | gen_citations(sourceform, keywords, es); | |
345 | subst_keywords(sourceform, keywords, es); | |
338 | 346 | |
339 | 347 | for (p = sourceform; p; p = p->next) |
340 | 348 | if (p->type == para_IM) |
341 | index_merge(idx, TRUE, p->keyword, p->words, &p->fpos); | |
349 | index_merge(idx, true, p->keyword, p->words, &p->fpos, es); | |
342 | 350 | |
343 | 351 | build_index(idx); |
344 | 352 | |
366 | 374 | * Select and run the pre-backends. |
367 | 375 | */ |
368 | 376 | prebackbits = 0; |
377 | memset(pre_backend_data, 0, sizeof(pre_backend_data)); | |
369 | 378 | for (k = 0; k < (int)lenof(backends); k++) |
370 | 379 | if (backendbits == 0 || (backendbits & backends[k].bitfield)) |
371 | 380 | prebackbits |= backends[k].prebackend_bitfield; |
373 | 382 | if (prebackbits & pre_backends[k].bitfield) { |
374 | 383 | assert(k < (int)lenof(pre_backend_data)); |
375 | 384 | pre_backend_data[k] = |
376 | pre_backends[k].func(sourceform, keywords, idx); | |
385 | pre_backends[k].func(sourceform, keywords, idx, psd, es); | |
377 | 386 | } |
378 | 387 | |
379 | 388 | /* |
394 | 403 | break; |
395 | 404 | } |
396 | 405 | |
397 | backends[k].func(sourceform, keywords, idx, pbd); | |
406 | backends[k].func(sourceform, keywords, idx, pbd, es); | |
398 | 407 | } |
399 | 408 | } |
400 | 409 | |
401 | 410 | free_para_list(sourceform); |
402 | 411 | free_keywords(keywords); |
403 | 412 | cleanup_index(idx); |
404 | } | |
413 | psdata_free(psd); | |
414 | } | |
415 | ||
416 | if (es->fatal) | |
417 | exit(EXIT_FAILURE); | |
405 | 418 | |
406 | 419 | return 0; |
407 | 420 | } |
14 | 14 | |
15 | 15 | $errors=0; |
16 | 16 | |
17 | while (<>) { | |
17 | while (<<>>) { | |
18 | 18 | $in=$out=""; |
19 | 19 | ($file, $line, $call, $in, $out)=($1,$2,$3,"",$4) |
20 | 20 | if /^(\S+) (\S+) (malloc|strdup)\(\S+\) returns (\S+)$/; |
1 | 1 | * misc.c: miscellaneous useful items |
2 | 2 | */ |
3 | 3 | |
4 | #include <assert.h> | |
4 | 5 | #include <stdarg.h> |
6 | #include <stdlib.h> | |
7 | #include <time.h> | |
5 | 8 | #include "halibut.h" |
6 | 9 | |
7 | 10 | char *adv(char *s) { |
97 | 100 | rs->text = sresize(rs->text, rs->size, char); |
98 | 101 | } |
99 | 102 | memcpy(rs->text + rs->pos, p, len); |
103 | rs->pos += len; | |
104 | rs->text[rs->pos] = 0; | |
105 | } | |
106 | void rdaddc_rep(rdstringc *rs, char c, int len) { | |
107 | if (len <= 0) { | |
108 | assert(len == 0); | |
109 | return; | |
110 | } | |
111 | if (rs->pos >= rs->size - len) { | |
112 | rs->size = rs->pos + len + 128; | |
113 | rs->text = sresize(rs->text, rs->size, char); | |
114 | } | |
115 | memset(rs->text + rs->pos, c, len); | |
100 | 116 | rs->pos += len; |
101 | 117 | rs->text[rs->pos] = 0; |
102 | 118 | } |
236 | 252 | |
237 | 253 | wp = NULL; |
238 | 254 | for (w = words; w; w = w->next) { |
239 | int both; | |
255 | bool both; | |
240 | 256 | if (!isvis(w->type)) |
241 | 257 | /* Invisible elements should not affect this calculation */ |
242 | 258 | continue; |
579 | 595 | |
580 | 596 | return p; |
581 | 597 | } |
598 | ||
599 | /* | |
600 | * Wrapper around the standard C time() function, which allows its | |
601 | * return value to be overridden by the environment variable | |
602 | * SOURCE_DATE_EPOCH, used to achieve reproducible builds by avoiding | |
603 | * baking different datestamps into repetitions of what ought to be | |
604 | * the same build. | |
605 | */ | |
606 | time_t current_time(void) | |
607 | { | |
608 | const char *epoch = getenv("SOURCE_DATE_EPOCH"); | |
609 | if (epoch) | |
610 | return atoll(epoch); | |
611 | ||
612 | return time(NULL); | |
613 | } |
43 | 43 | page_data *pages; |
44 | 44 | outline_element *outline_elements; |
45 | 45 | int n_outline_elements; |
46 | psdata *psd; | |
46 | 47 | }; |
47 | 48 | |
48 | 49 | /* |
76 | 77 | * depend on the particular document. It gets generated when the font's |
77 | 78 | * metrics are read in. |
78 | 79 | */ |
79 | ||
80 | font_info *all_fonts; | |
81 | 80 | |
82 | 81 | struct font_info_Tag { |
83 | 82 | font_info *next; |
276 | 275 | * the heights of the three fonts in the pdata) because it's |
277 | 276 | * easier than looking it up repeatedly during page breaking. |
278 | 277 | */ |
279 | int page_break; | |
278 | bool page_break; | |
280 | 279 | int space_before; |
281 | 280 | int space_after; |
282 | 281 | int line_height; |
372 | 371 | para_data *pdata; |
373 | 372 | }; |
374 | 373 | |
374 | struct psdata_Tag { | |
375 | char **extraglyphs; | |
376 | glyph nextglyph; | |
377 | tree234 *extrabyname; | |
378 | font_info *all_fonts; | |
379 | }; | |
380 | ||
375 | 381 | /* |
376 | 382 | * Functions exported from bk_paper.c |
377 | 383 | */ |
378 | int width_cmp(void *, void *); /* use when setting up widths */ | |
379 | int kern_cmp(void *, void *); /* use when setting up kern_pairs */ | |
380 | int lig_cmp(void *, void *); /* use when setting up ligatures */ | |
384 | int width_cmp(const void *, const void *, void *); /* use when setting up widths */ | |
385 | int kern_cmp(const void *, const void *, void *); /* use when setting up kern_pairs */ | |
386 | int lig_cmp(const void *, const void *, void *); /* use when setting up ligatures */ | |
381 | 387 | int find_width(font_data *, glyph); |
382 | 388 | |
383 | 389 | /* |
384 | 390 | * Functions and data exported from psdata.c. |
385 | 391 | */ |
386 | glyph glyph_intern(char const *); | |
387 | char const *glyph_extern(glyph); | |
392 | psdata *psdata_new(void); | |
393 | void psdata_free(psdata *); | |
394 | glyph glyph_intern(psdata *, const char *); | |
395 | char const *glyph_extern(psdata *, glyph); | |
388 | 396 | wchar_t ps_glyph_to_unicode(glyph); |
389 | 397 | extern const char *const ps_std_glyphs[]; |
390 | 398 | extern glyph const tt_std_glyphs[]; |
391 | void init_std_fonts(void); | |
399 | void init_std_fonts(psdata *psd); | |
392 | 400 | const int *ps_std_font_widths(char const *fontname); |
393 | 401 | const kern_pair *ps_std_font_kerns(char const *fontname); |
394 | 402 | |
411 | 419 | /* |
412 | 420 | * Backend functions exported by in_pf.c |
413 | 421 | */ |
414 | void pf_part1(font_info *fi, char **bufp, size_t *lenp); | |
415 | void pf_part2(font_info *fi, char **bufp, size_t *lenp); | |
422 | void pf_part1(font_info *fi, char **bufp, size_t *lenp, errorstate *es); | |
423 | void pf_part2(font_info *fi, char **bufp, size_t *lenp, errorstate *es); | |
416 | 424 | void pf_writeps(font_info const *fi, FILE *ofp); |
417 | 425 | |
418 | 426 | /* |
422 | 430 | glyph sfnt_indextoglyph(sfnt *sf, unsigned idx); |
423 | 431 | unsigned sfnt_glyphtoindex(sfnt *sf, glyph g); |
424 | 432 | unsigned sfnt_nglyphs(sfnt *sf); |
425 | void sfnt_writeps(font_info const *fi, FILE *ofp); | |
433 | void sfnt_writeps(font_info const *fi, FILE *ofp, psdata *psd, errorstate *es); | |
426 | 434 | void sfnt_data(font_info *fi, char **bufp, size_t *lenp); |
427 | 435 | |
428 | 436 | #endif |
1120 | 1120 | "zretroflexhook", "zstroke", "zuhiragana", "zukatakana", |
1121 | 1121 | }; |
1122 | 1122 | |
1123 | char const **extraglyphs = NULL; | |
1124 | glyph nextglyph = lenof(ps_glyphs_alphabetic); | |
1125 | tree234 *extrabyname = NULL; | |
1126 | ||
1127 | char const *glyph_extern(glyph glyph) { | |
1123 | #define EXTRAGLYPHSOFFSET lenof(ps_glyphs_alphabetic) | |
1124 | ||
1125 | const char *glyph_extern(psdata *psd, glyph glyph) { | |
1128 | 1126 | if (glyph == NOGLYPH) return ".notdef"; |
1129 | if (glyph < lenof(ps_glyphs_alphabetic)) | |
1127 | if (glyph < EXTRAGLYPHSOFFSET) | |
1130 | 1128 | return ps_glyphs_alphabetic[glyph]; |
1131 | 1129 | else |
1132 | return extraglyphs[glyph - lenof(ps_glyphs_alphabetic)]; | |
1130 | return psd->extraglyphs[glyph - EXTRAGLYPHSOFFSET]; | |
1133 | 1131 | } |
1134 | 1132 | |
1135 | static int glyphcmp(void *a, void *b) { | |
1136 | glyph ga = *(glyph *)a, gb = *(glyph *)b; | |
1137 | return strcmp(glyph_extern(ga), glyph_extern(gb)); | |
1133 | static int glyphcmp(const void *a, const void *b, void *cmpctx) { | |
1134 | psdata *psd = (psdata *)cmpctx; | |
1135 | glyph ga = *(const glyph *)a, gb = *(const glyph *)b; | |
1136 | return strcmp(glyph_extern(psd, ga), glyph_extern(psd, gb)); | |
1138 | 1137 | } |
1139 | 1138 | |
1140 | static int glyphcmp_search(void *a, void *b) { | |
1141 | glyph gb = *(glyph *)b; | |
1142 | return strcmp(a, glyph_extern(gb)); | |
1139 | static int glyphcmp_search(const void *a, const void *b, void *cmpctx) { | |
1140 | psdata *psd = (psdata *)cmpctx; | |
1141 | glyph gb = *(const glyph *)b; | |
1142 | return strcmp(a, glyph_extern(psd, gb)); | |
1143 | 1143 | } |
1144 | 1144 | |
1145 | glyph glyph_intern(char const *glyphname) { | |
1145 | psdata *psdata_new(void) | |
1146 | { | |
1147 | psdata *psd = snew(psdata); | |
1148 | psd->extraglyphs = NULL; | |
1149 | psd->nextglyph = EXTRAGLYPHSOFFSET; | |
1150 | psd->extrabyname = newtree234(glyphcmp, NULL); | |
1151 | psd->all_fonts = NULL; | |
1152 | return psd; | |
1153 | } | |
1154 | ||
1155 | void psdata_free(psdata *psd) | |
1156 | { | |
1157 | glyph i, *gp; | |
1158 | while ((gp = delpos234(psd->extrabyname, 0)) != NULL) | |
1159 | sfree(gp); | |
1160 | freetree234(psd->extrabyname); | |
1161 | for (i = EXTRAGLYPHSOFFSET; i < psd->nextglyph; i++) | |
1162 | sfree(psd->extraglyphs[i - EXTRAGLYPHSOFFSET]); | |
1163 | sfree(psd->extraglyphs); | |
1164 | while (psd->all_fonts) { | |
1165 | font_info *fi = psd->all_fonts; | |
1166 | glyph_width *w; | |
1167 | psd->all_fonts = fi->next; | |
1168 | while ((w = delpos234(fi->widths, 0)) != NULL) | |
1169 | sfree(w); | |
1170 | freetree234(fi->widths); | |
1171 | freetree234(fi->kerns); | |
1172 | freetree234(fi->ligs); | |
1173 | sfree(fi); | |
1174 | } | |
1175 | sfree(psd); | |
1176 | } | |
1177 | ||
1178 | glyph glyph_intern(psdata *psd, const char *glyphname) { | |
1146 | 1179 | int i, j, k, c; |
1147 | 1180 | glyph *gp; |
1148 | 1181 | |
1149 | 1182 | i = -1; |
1150 | j = lenof(ps_glyphs_alphabetic); | |
1183 | j = EXTRAGLYPHSOFFSET; | |
1151 | 1184 | while (j-i > 1) { |
1152 | 1185 | k = (i + j) / 2; |
1153 | 1186 | c = strcmp(glyphname, ps_glyphs_alphabetic[k]); |
1160 | 1193 | i = k; |
1161 | 1194 | } |
1162 | 1195 | /* Non-standard glyph. We may need to add it to our tree. */ |
1163 | if (extrabyname == NULL) | |
1164 | extrabyname = newtree234(glyphcmp); | |
1165 | gp = find234(extrabyname, (void *)glyphname, glyphcmp_search); | |
1196 | gp = findcmp234(psd->extrabyname, (const void *)glyphname, | |
1197 | glyphcmp_search, psd); | |
1166 | 1198 | if (gp) { |
1167 | 1199 | k = *gp; |
1168 | 1200 | } else { |
1169 | extraglyphs = sresize(extraglyphs, nextglyph, char const *); | |
1170 | k = nextglyph++; | |
1171 | extraglyphs[k - lenof(ps_glyphs_alphabetic)] = dupstr(glyphname); | |
1201 | psd->extraglyphs = sresize(psd->extraglyphs, psd->nextglyph, char *); | |
1202 | k = psd->nextglyph++; | |
1203 | psd->extraglyphs[k - EXTRAGLYPHSOFFSET] = dupstr(glyphname); | |
1172 | 1204 | gp = snew(glyph); |
1173 | 1205 | *gp = k; |
1174 | add234(extrabyname, gp); | |
1206 | add234(psd->extrabyname, gp); | |
1175 | 1207 | } |
1176 | 1208 | return k; |
1177 | 1209 | } |
1852 | 1884 | perl -e ' |
1853 | 1885 | open G, "glyphnames.txt" or die; |
1854 | 1886 | chomp(@g = <G>); %g = map(($_, $i++), @g); |
1855 | while(<>){chomp;print"$g{$_}, "} | |
1887 | while(<<>>){chomp;print"$g{$_}, "} | |
1856 | 1888 | print "NOGLYPH\n";' | fold -sw68 | sed 's/^/ /' |
1857 | 1889 | |
1858 | 1890 | */ |
4541 | 4573 | }}, |
4542 | 4574 | }; |
4543 | 4575 | |
4544 | void init_std_fonts(void) { | |
4576 | void init_std_fonts(psdata *psd) { | |
4545 | 4577 | int i, j; |
4546 | 4578 | ligature const *lig; |
4547 | 4579 | kern_pair const *kern; |
4548 | static int done = FALSE; | |
4580 | static bool done = false; | |
4549 | 4581 | |
4550 | 4582 | if (done) return; |
4551 | 4583 | for (i = 0; i < (int)lenof(ps_std_fonts); i++) { |
4553 | 4585 | fi->fontfile = NULL; |
4554 | 4586 | fi->name = ps_std_fonts[i].name; |
4555 | 4587 | fi->filetype = TYPE1; /* for purposes of making subset fonts */ |
4556 | fi->widths = newtree234(width_cmp); | |
4588 | fi->widths = newtree234(width_cmp, NULL); | |
4557 | 4589 | for (j = 0; j < (int)lenof(fi->bmp); j++) |
4558 | 4590 | fi->bmp[j] = NOGLYPH; |
4559 | 4591 | for (j = 0; j < (int)lenof(ps_std_glyphs) - 1; j++) { |
4560 | 4592 | glyph_width *w = snew(glyph_width); |
4561 | 4593 | wchar_t ucs; |
4562 | w->glyph = glyph_intern(ps_std_glyphs[j]); | |
4594 | w->glyph = glyph_intern(psd, ps_std_glyphs[j]); | |
4563 | 4595 | w->width = ps_std_fonts[i].widths[j]; |
4564 | 4596 | add234(fi->widths, w); |
4565 | 4597 | ucs = ps_glyph_to_unicode(w->glyph); |
4566 | 4598 | assert(ucs != 0xFFFF); |
4567 | 4599 | fi->bmp[ucs] = w->glyph; |
4568 | 4600 | } |
4569 | fi->kerns = newtree234(kern_cmp); | |
4601 | fi->kerns = newtree234(kern_cmp, NULL); | |
4570 | 4602 | for (kern = ps_std_fonts[i].kerns; kern->left != NOGLYPH; kern++) |
4571 | 4603 | add234(fi->kerns, (void *)kern); |
4572 | fi->ligs = newtree234(lig_cmp); | |
4604 | fi->ligs = newtree234(lig_cmp, NULL); | |
4573 | 4605 | for (lig = ps_std_fonts[i].ligs; lig->left != NOGLYPH; lig++) |
4574 | 4606 | add234(fi->ligs, (void *)lig); |
4575 | fi->next = all_fonts; | |
4576 | all_fonts = fi; | |
4607 | fi->next = psd->all_fonts; | |
4608 | psd->all_fonts = fi; | |
4577 | 4609 | } |
4578 | done = TRUE; | |
4610 | done = true; | |
4579 | 4611 | } |
4580 | 4612 | |
4581 | 4613 | const int *ps_std_font_widths(char const *fontname) |
0 | #!/bin/sh | |
1 | ||
2 | # Make a Halibut release archive. | |
3 | ||
4 | RELDIR="$1" | |
5 | VERSION="$2" | |
6 | ||
7 | linkmirror() { | |
8 | (cd "$1"; find . -name CVS -prune -o -name .svn -prune -o \ | |
9 | -name build -prune -o -name reltmp -prune -o -type d -print) | \ | |
10 | while read dir; do mkdir -p "$2"/"$dir"; done | |
11 | (cd "$1"; find . -name CVS -prune -o -name .svn -prune -o \ | |
12 | -name build -prune -o -name reltmp -prune -o \ | |
13 | -name '*.orig' -prune -o -name '*.rej' -prune -o \ | |
14 | -name '*.txt' -prune -o -name '*.html' -prune -o \ | |
15 | -name '*.1' -prune -o -name '.cvsignore' -prune -o \ | |
16 | -name '*.gz' -prune -o -name '.[^.]*' -prune -o \ | |
17 | -type f -print) | \ | |
18 | while read file; do ln -s "$1"/"$file" "$2"/"$file"; done | |
19 | } | |
20 | ||
21 | linkmirror $PWD reltmp/$RELDIR | |
22 | if ! test -d charset; then | |
23 | linkmirror $PWD/../charset reltmp/$RELDIR/charset | |
24 | fi | |
25 | ||
26 | tar chzvoCf reltmp $RELDIR.tar.gz $RELDIR | |
27 | ||
28 | rm -rf reltmp |
46 | 46 | struct tree234_Tag { |
47 | 47 | node234 *root; |
48 | 48 | cmpfn234 cmp; |
49 | void *cmpctx; | |
49 | 50 | }; |
50 | 51 | |
51 | 52 | struct node234_Tag { |
58 | 59 | /* |
59 | 60 | * Create a 2-3-4 tree. |
60 | 61 | */ |
61 | tree234 *newtree234(cmpfn234 cmp) { | |
62 | tree234 *newtree234(cmpfn234 cmp, void *cmpctx) { | |
62 | 63 | tree234 *ret = snew(tree234); |
63 | 64 | LOG(("created tree %p\n", ret)); |
64 | 65 | ret->root = NULL; |
65 | 66 | ret->cmp = cmp; |
67 | ret->cmpctx = cmpctx; | |
66 | 68 | return ret; |
67 | 69 | } |
68 | 70 | |
360 | 362 | return NULL; /* error: index out of range */ |
361 | 363 | } |
362 | 364 | } else { |
363 | if ((c = t->cmp(e, n->elems[0])) < 0) | |
365 | if ((c = t->cmp(e, n->elems[0], t->cmpctx)) < 0) | |
364 | 366 | ki = 0; |
365 | 367 | else if (c == 0) |
366 | 368 | return n->elems[0]; /* already exists */ |
367 | else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0) | |
369 | else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1], t->cmpctx)) < 0) | |
368 | 370 | ki = 1; |
369 | 371 | else if (c == 0) |
370 | 372 | return n->elems[1]; /* already exists */ |
371 | else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0) | |
373 | else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2], t->cmpctx)) < 0) | |
372 | 374 | ki = 2; |
373 | 375 | else if (c == 0) |
374 | 376 | return n->elems[2]; /* already exists */ |
443 | 445 | * as NULL, in which case the compare function from the tree proper |
444 | 446 | * will be used. |
445 | 447 | */ |
446 | void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, | |
447 | int relation, int *index) { | |
448 | void *findcmprelpos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
449 | int relation, int *index) { | |
448 | 450 | node234 *n; |
449 | 451 | void *ret; |
450 | 452 | int c; |
452 | 454 | |
453 | 455 | if (t->root == NULL) |
454 | 456 | return NULL; |
455 | ||
456 | if (cmp == NULL) | |
457 | cmp = t->cmp; | |
458 | 457 | |
459 | 458 | n = t->root; |
460 | 459 | /* |
476 | 475 | while (1) { |
477 | 476 | for (kcount = 0; kcount < 4; kcount++) { |
478 | 477 | if (kcount >= 3 || n->elems[kcount] == NULL || |
479 | (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) { | |
478 | (c = cmpret ? cmpret : cmp(e, n->elems[kcount], cmpctx)) < 0) { | |
480 | 479 | break; |
481 | 480 | } |
482 | 481 | if (n->kids[kcount]) idx += n->counts[kcount]; |
547 | 546 | if (ret && index) *index = idx; |
548 | 547 | return ret; |
549 | 548 | } |
550 | void *find234(tree234 *t, void *e, cmpfn234 cmp) { | |
551 | return findrelpos234(t, e, cmp, REL234_EQ, NULL); | |
552 | } | |
553 | void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) { | |
554 | return findrelpos234(t, e, cmp, relation, NULL); | |
555 | } | |
556 | void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) { | |
557 | return findrelpos234(t, e, cmp, REL234_EQ, index); | |
549 | void *findcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx) { | |
550 | return findcmprelpos234(t, e, cmp, cmpctx, REL234_EQ, NULL); | |
551 | } | |
552 | void *findcmprel234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
553 | int relation) { | |
554 | return findcmprelpos234(t, e, cmp, cmpctx, relation, NULL); | |
555 | } | |
556 | void *findcmppos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
557 | int *index) { | |
558 | return findcmprelpos234(t, e, cmp, cmpctx, REL234_EQ, index); | |
559 | } | |
560 | void *find234(tree234 *t, const void *e) { | |
561 | return findcmprelpos234(t, e, t->cmp, t->cmpctx, REL234_EQ, NULL); | |
562 | } | |
563 | void *findrel234(tree234 *t, const void *e, int relation) { | |
564 | return findcmprelpos234(t, e, t->cmp, t->cmpctx, relation, NULL); | |
565 | } | |
566 | void *findpos234(tree234 *t, const void *e, int *index) { | |
567 | return findcmprelpos234(t, e, t->cmp, t->cmpctx, REL234_EQ, index); | |
568 | } | |
569 | void *findrelpos234(tree234 *t, const void *e, int relation, int *index) { | |
570 | return findcmprelpos234(t, e, t->cmp, t->cmpctx, relation, index); | |
558 | 571 | } |
559 | 572 | |
560 | 573 | /* |
1005 | 1018 | } |
1006 | 1019 | void *del234(tree234 *t, void *e) { |
1007 | 1020 | int index; |
1008 | if (!findrelpos234(t, e, NULL, REL234_EQ, &index)) | |
1021 | if (!findrelpos234(t, e, REL234_EQ, &index)) | |
1009 | 1022 | return NULL; /* it wasn't in there anyway */ |
1010 | 1023 | return delpos234_internal(t, index); /* it's there; delete it. */ |
1011 | 1024 | } |
1120 | 1133 | |
1121 | 1134 | if (t1->cmp) { |
1122 | 1135 | element = index234(t2, 0); |
1123 | element = findrelpos234(t1, element, NULL, REL234_GE, NULL); | |
1136 | element = findrelpos234(t1, element, REL234_GE, NULL); | |
1124 | 1137 | if (element) |
1125 | 1138 | return NULL; |
1126 | 1139 | } |
1140 | 1153 | |
1141 | 1154 | if (t2->cmp) { |
1142 | 1155 | element = index234(t1, size1-1); |
1143 | element = findrelpos234(t2, element, NULL, REL234_LE, NULL); | |
1156 | element = findrelpos234(t2, element, REL234_LE, NULL); | |
1144 | 1157 | if (element) |
1145 | 1158 | return NULL; |
1146 | 1159 | } |
1177 | 1190 | t->root = NULL; |
1178 | 1191 | return ret; |
1179 | 1192 | } |
1193 | assert(n); | |
1180 | 1194 | |
1181 | 1195 | /* |
1182 | 1196 | * Search down the tree to find the split point. |
1332 | 1346 | * over to it until it is greater than minimum |
1333 | 1347 | * size. |
1334 | 1348 | */ |
1335 | int undersized = (!sub->elems[0]); | |
1349 | bool undersized = (!sub->elems[0]); | |
1336 | 1350 | LOG((" child %d is %ssize\n", ki, |
1337 | 1351 | undersized ? "under" : "minimum-")); |
1338 | 1352 | LOG((" neighbour is %s\n", |
1371 | 1385 | t->root = halves[1]; |
1372 | 1386 | return halves[0]; |
1373 | 1387 | } |
1374 | tree234 *splitpos234(tree234 *t, int index, int before) { | |
1388 | tree234 *splitpos234(tree234 *t, int index, bool before) { | |
1375 | 1389 | tree234 *ret; |
1376 | 1390 | node234 *n; |
1377 | 1391 | int count; |
1379 | 1393 | count = countnode234(t->root); |
1380 | 1394 | if (index < 0 || index > count) |
1381 | 1395 | return NULL; /* error */ |
1382 | ret = newtree234(t->cmp); | |
1396 | ret = newtree234(t->cmp, NULL); | |
1383 | 1397 | n = split234_internal(t, index); |
1384 | 1398 | if (before) { |
1385 | 1399 | /* We want to return the ones before the index. */ |
1394 | 1408 | } |
1395 | 1409 | return ret; |
1396 | 1410 | } |
1397 | tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) { | |
1411 | tree234 *splitcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
1412 | int rel) { | |
1398 | 1413 | int before; |
1399 | 1414 | int index; |
1400 | 1415 | |
1406 | 1421 | } else { |
1407 | 1422 | before = 0; |
1408 | 1423 | } |
1409 | if (!findrelpos234(t, e, cmp, rel, &index)) | |
1424 | if (!findcmprelpos234(t, e, cmp, cmpctx, rel, &index)) | |
1410 | 1425 | index = 0; |
1411 | 1426 | |
1412 | 1427 | return splitpos234(t, index+1, before); |
1428 | } | |
1429 | tree234 *split234(tree234 *t, const void *e, int rel) | |
1430 | { | |
1431 | return splitcmp234(t, e, t->cmp, t->cmpctx, rel); | |
1413 | 1432 | } |
1414 | 1433 | |
1415 | 1434 | static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) { |
1438 | 1457 | tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) { |
1439 | 1458 | tree234 *t2; |
1440 | 1459 | |
1441 | t2 = newtree234(t->cmp); | |
1460 | t2 = newtree234(t->cmp, t->cmpctx); | |
1442 | 1461 | if (t->root) { |
1443 | 1462 | t2->root = copynode234(t->root, copyfn, copyfnstate); |
1444 | 1463 | t2->root->parent = NULL; |
1706 | 1725 | for (i = -1; i < nelems; i++) { |
1707 | 1726 | void *lower = (i == -1 ? lowbound : node->elems[i]); |
1708 | 1727 | void *higher = (i+1 == nelems ? highbound : node->elems[i+1]); |
1709 | if (lower && higher && cmp(lower, higher) >= 0) { | |
1728 | if (lower && higher && cmp(lower, higher, cmpctx) >= 0) { | |
1710 | 1729 | error("node %p: kid comparison [%d=%s,%d=%s] failed", |
1711 | 1730 | node, i, lower, i+1, higher); |
1712 | 1731 | } |
1816 | 1835 | realret = add234(tree, elem); |
1817 | 1836 | |
1818 | 1837 | i = 0; |
1819 | while (i < arraylen && cmp(elem, array[i]) > 0) | |
1838 | while (i < arraylen && cmp(elem, array[i], NULL) > 0) | |
1820 | 1839 | i++; |
1821 | if (i < arraylen && !cmp(elem, array[i])) { | |
1840 | if (i < arraylen && !cmp(elem, array[i], NULL)) { | |
1822 | 1841 | void *retval = array[i]; /* expect that returned not elem */ |
1823 | 1842 | if (realret != retval) { |
1824 | 1843 | error("add: retval was %p expected %p", realret, retval); |
1862 | 1881 | int i; |
1863 | 1882 | |
1864 | 1883 | i = 0; |
1865 | while (i < arraylen && cmp(elem, array[i]) > 0) | |
1884 | while (i < arraylen && cmp(elem, array[i], NULL) > 0) | |
1866 | 1885 | i++; |
1867 | if (i >= arraylen || cmp(elem, array[i]) != 0) | |
1886 | if (i >= arraylen || cmp(elem, array[i], NULL) != 0) | |
1868 | 1887 | return; /* don't do it! */ |
1869 | 1888 | delpostest(i); |
1870 | 1889 | } |
1883 | 1902 | return ((*seed) / 65536) % 32768; |
1884 | 1903 | } |
1885 | 1904 | |
1886 | int mycmp(void *av, void *bv) { | |
1905 | int mycmp(const void *av, const void *bv, void *cmpctx) { | |
1887 | 1906 | char const *a = (char const *)av; |
1888 | 1907 | char const *b = (char const *)bv; |
1889 | 1908 | return strcmp(a, b); |
2142 | 2161 | tree2 = newtree234(mycmp); |
2143 | 2162 | tree3 = newtree234(mycmp); |
2144 | 2163 | tree4 = newtree234(mycmp); |
2145 | assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */ | |
2164 | assert(mycmp(strings[0], strings[1], NULL) < 0); /* just in case :-) */ | |
2146 | 2165 | add234(tree2, strings[1]); |
2147 | 2166 | add234(tree4, strings[0]); |
2148 | 2167 | array[0] = strings[0]; |
27 | 27 | #ifndef TREE234_H |
28 | 28 | #define TREE234_H |
29 | 29 | |
30 | #include <stdbool.h> | |
31 | ||
30 | 32 | /* |
31 | 33 | * This typedef is opaque outside tree234.c itself. |
32 | 34 | */ |
33 | 35 | typedef struct tree234_Tag tree234; |
34 | 36 | |
35 | typedef int (*cmpfn234)(void *, void *); | |
37 | typedef int (*cmpfn234)(const void *av, const void *bv, void *cmpctx); | |
36 | 38 | |
37 | 39 | typedef void *(*copyfn234)(void *state, void *element); |
38 | 40 | |
41 | 43 | * lookups by key will fail: you can only look things up by numeric |
42 | 44 | * index, and you have to use addpos234() and delpos234(). |
43 | 45 | */ |
44 | tree234 *newtree234(cmpfn234 cmp); | |
46 | tree234 *newtree234(cmpfn234 cmp, void *cmpctx); | |
45 | 47 | |
46 | 48 | /* |
47 | 49 | * Free a 2-3-4 tree (not including freeing the elements). |
127 | 129 | enum { |
128 | 130 | REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE |
129 | 131 | }; |
130 | void *find234(tree234 *t, void *e, cmpfn234 cmp); | |
131 | void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation); | |
132 | void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index); | |
133 | void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, int relation, | |
134 | int *index); | |
132 | void *find234(tree234 *t, const void *e); | |
133 | void *findrel234(tree234 *t, const void *e, int relation); | |
134 | void *findpos234(tree234 *t, const void *e, int *index); | |
135 | void *findrelpos234(tree234 *t, const void *e, int relation, int *index); | |
136 | void *findcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx); | |
137 | void *findcmprel234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
138 | int relation); | |
139 | void *findcmppos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
140 | int *index); | |
141 | void *findcmprelpos234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
142 | int relation, int *index); | |
135 | 143 | |
136 | 144 | /* |
137 | 145 | * Delete an element e in a 2-3-4 tree. Does not free the element, |
161 | 169 | /* |
162 | 170 | * Split a tree234 into two valid tree234s. |
163 | 171 | * |
164 | * splitpos234 splits at a given index. If `before' is TRUE, the | |
172 | * splitpos234 splits at a given index. If `before' is true, the | |
165 | 173 | * items at and after that index are left in t and the ones before |
166 | * are returned; if `before' is FALSE, the items before that index | |
174 | * are returned; if `before' is false, the items before that index | |
167 | 175 | * are left in t and the rest are returned. |
168 | 176 | * |
169 | 177 | * split234 splits at a given key. You can pass any of the |
171 | 179 | * in the tree that satisfy the relation are returned; the |
172 | 180 | * remainder are left. |
173 | 181 | */ |
174 | tree234 *splitpos234(tree234 *t, int index, int before); | |
175 | tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel); | |
182 | tree234 *splitpos234(tree234 *t, int index, bool before); | |
183 | tree234 *split234(tree234 *t, const void *e, int rel); | |
184 | tree234 *splitcmp234(tree234 *t, const void *e, cmpfn234 cmp, void *cmpctx, | |
185 | int rel); | |
176 | 186 | |
177 | 187 | /* |
178 | 188 | * Join two tree234s together into a single one. |
20 | 20 | } |
21 | 21 | |
22 | 22 | static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size, |
23 | int charset, int careful) { | |
24 | int len, ret, err; | |
23 | int charset, bool careful) { | |
24 | int len, ret; | |
25 | bool err; | |
25 | 26 | charset_state state = CHARSET_INIT_STATE; |
26 | 27 | |
27 | 28 | if (!s) { |
33 | 34 | size--; /* leave room for terminating NUL */ |
34 | 35 | *outbuf = '\0'; |
35 | 36 | while (len > 0) { |
36 | err = 0; | |
37 | err = false; | |
37 | 38 | ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state, |
38 | 39 | (careful ? &err : NULL)); |
39 | 40 | if (err) |
55 | 56 | } |
56 | 57 | |
57 | 58 | char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) { |
58 | return ustrtoa_internal(s, outbuf, size, charset, FALSE); | |
59 | return ustrtoa_internal(s, outbuf, size, charset, false); | |
59 | 60 | } |
60 | 61 | |
61 | 62 | char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) { |
62 | return ustrtoa_internal(s, outbuf, size, charset, TRUE); | |
63 | return ustrtoa_internal(s, outbuf, size, charset, true); | |
63 | 64 | } |
64 | 65 | |
65 | 66 | wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) { |
86 | 87 | return outbuf; |
87 | 88 | } |
88 | 89 | |
89 | char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful) | |
90 | char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, bool careful) | |
90 | 91 | { |
91 | 92 | char *outbuf; |
92 | int outpos, outlen, len, ret, err; | |
93 | int outpos, outlen, len, ret; | |
94 | bool err; | |
93 | 95 | charset_state state = CHARSET_INIT_STATE; |
94 | 96 | |
95 | 97 | if (!s) { |
105 | 107 | outbuf[outpos] = '\0'; |
106 | 108 | |
107 | 109 | while (len > 0) { |
108 | err = 0; | |
110 | err = false; | |
109 | 111 | ret = charset_from_unicode(&s, &len, |
110 | 112 | outbuf + outpos, outlen - outpos - 1, |
111 | 113 | charset, &state, (careful ? &err : NULL)); |
137 | 139 | |
138 | 140 | char *utoa_dup(wchar_t const *s, int charset) |
139 | 141 | { |
140 | return utoa_internal_dup(s, charset, NULL, FALSE); | |
142 | return utoa_internal_dup(s, charset, NULL, false); | |
141 | 143 | } |
142 | 144 | |
143 | 145 | char *utoa_dup_len(wchar_t const *s, int charset, int *len) |
144 | 146 | { |
145 | return utoa_internal_dup(s, charset, len, FALSE); | |
147 | return utoa_internal_dup(s, charset, len, false); | |
146 | 148 | } |
147 | 149 | |
148 | 150 | char *utoa_careful_dup(wchar_t const *s, int charset) |
149 | 151 | { |
150 | return utoa_internal_dup(s, charset, NULL, TRUE); | |
152 | return utoa_internal_dup(s, charset, NULL, true); | |
151 | 153 | } |
152 | 154 | |
153 | 155 | wchar_t *ufroma_dup(char const *s, int charset) { |
281 | 283 | #endif |
282 | 284 | } |
283 | 285 | |
284 | int uisalpha(wchar_t c) { | |
286 | bool uisalpha(wchar_t c) { | |
285 | 287 | #ifdef HAS_ISWALPHA |
286 | 288 | return iswalpha(c); |
287 | 289 | #else |
350 | 352 | return ret; |
351 | 353 | } |
352 | 354 | |
353 | int utob(wchar_t const *s) { | |
355 | bool utob(wchar_t const *s) { | |
354 | 356 | if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") || |
355 | 357 | !ustricmp(s, L"true") || !ustricmp(s, L"t")) |
356 | return TRUE; | |
357 | return FALSE; | |
358 | } | |
359 | ||
360 | int uisdigit(wchar_t c) { | |
358 | return true; | |
359 | return false; | |
360 | } | |
361 | ||
362 | bool uisdigit(wchar_t c) { | |
361 | 363 | return c >= L'0' && c <= L'9'; |
362 | 364 | } |
363 | 365 | |
445 | 447 | * Determine whether a Unicode string can be translated into a |
446 | 448 | * given charset without any missing characters. |
447 | 449 | */ |
448 | int cvt_ok(int charset, const wchar_t *s) | |
450 | bool cvt_ok(int charset, const wchar_t *s) | |
449 | 451 | { |
450 | 452 | char buf[256]; |
451 | 453 | charset_state state = CHARSET_INIT_STATE; |
452 | int err, len = ustrlen(s); | |
453 | ||
454 | err = 0; | |
454 | bool err; | |
455 | int len = ustrlen(s); | |
456 | ||
457 | err = false; | |
455 | 458 | while (len > 0) { |
456 | 459 | (void)charset_from_unicode(&s, &len, buf, lenof(buf), |
457 | 460 | charset, &state, &err); |
458 | 461 | if (err) |
459 | return FALSE; | |
460 | } | |
461 | return TRUE; | |
462 | return false; | |
463 | } | |
464 | return true; | |
462 | 465 | } |
463 | 466 | |
464 | 467 | /* |
469 | 472 | * rely on always getting a valid charset id back from this |
470 | 473 | * function. |
471 | 474 | */ |
472 | int charset_from_ustr(filepos *fpos, const wchar_t *name) | |
475 | int charset_from_ustr(filepos *fpos, const wchar_t *name, errorstate *es) | |
473 | 476 | { |
474 | 477 | char *csname; |
475 | 478 | int charset; |
479 | 482 | |
480 | 483 | if (charset == CS_NONE) { |
481 | 484 | charset = CS_ASCII; |
482 | err_charset(fpos, name); | |
485 | err_charset(es, fpos, name); | |
483 | 486 | } |
484 | 487 | |
485 | 488 | sfree(csname); |
0 | 0 | /* Generated by automated build script */ |
1 | #define VERSION "version 1.2" | |
1 | #define VERSION "version 1.3" |
23 | 23 | }; |
24 | 24 | |
25 | 25 | /* auxiliary function for binary search in interval table */ |
26 | static int bisearch(wchar_t ucs, const struct interval *table, int max) { | |
26 | static bool bisearch(wchar_t ucs, const struct interval *table, int max) { | |
27 | 27 | int min = 0; |
28 | 28 | int mid; |
29 | 29 | |
30 | 30 | if (ucs < table[0].first || ucs > table[max].last) |
31 | return 0; | |
31 | return false; | |
32 | 32 | while (max >= min) { |
33 | 33 | mid = (min + max) / 2; |
34 | 34 | if (ucs > table[mid].last) |
36 | 36 | else if (ucs < table[mid].first) |
37 | 37 | max = mid - 1; |
38 | 38 | else |
39 | return 1; | |
39 | return true; | |
40 | 40 | } |
41 | 41 | |
42 | return 0; | |
42 | return false; | |
43 | 43 | } |
44 | 44 | |
45 | 45 | int mk_wcwidth(wchar_t ucs) |
143 | 143 | wid = 0; |
144 | 144 | |
145 | 145 | while (len > 0) { |
146 | int err; | |
146 | bool err; | |
147 | 147 | wchar_t const *s_orig; |
148 | 148 | |
149 | err = 0; | |
149 | err = false; | |
150 | 150 | s_orig = s; |
151 | 151 | charset_from_unicode(&s, &len, buf, lenof(buf), charset, &state, &err); |
152 | 152 | wid += wcswidth(s_orig, s - s_orig); |
172 | 172 | return 0; |
173 | 173 | } |
174 | 174 | |
175 | int chm_directory_entry_cmp(void *av, void *bv) | |
175 | int chm_directory_entry_cmp(const void *av, const void *bv, void *cmpctx) | |
176 | 176 | { |
177 | 177 | const struct chm_directory_entry |
178 | 178 | *a = (const struct chm_directory_entry *)av, |
180 | 180 | return strcmp_chm(a->filename, b->filename); |
181 | 181 | } |
182 | 182 | |
183 | int chm_directory_entry_find(void *av, void *bv) | |
183 | int chm_directory_entry_find(const void *av, const void *bv, void *cmpctx) | |
184 | 184 | { |
185 | 185 | const char *a = (const char *)av; |
186 | 186 | const struct chm_directory_entry |
234 | 234 | PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos); |
235 | 235 | PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos); |
236 | 236 | |
237 | index = newtree234(NULL); | |
237 | index = newtree234(NULL, NULL); | |
238 | 238 | curr_chunk = 0; |
239 | 239 | depth = 1; |
240 | 240 | /* Write out lowest-level PMGL chunks full of actual directory entries */ |
326 | 326 | PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field, |
327 | 327 | chunksize - chunk.pos); |
328 | 328 | PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries); |
329 | while (chunk.pos + reversed_quickref.pos < chunksize) | |
330 | rdaddc(&chunk, 0); /* zero-pad */ | |
329 | rdaddc_rep(&chunk, 0, chunksize - chunk.pos - reversed_quickref.pos); | |
331 | 330 | for (i = reversed_quickref.pos - 2; i >= 0; i -= 2) |
332 | 331 | rdaddsn(&chunk, reversed_quickref.text+i, 2); |
333 | 332 | |
345 | 344 | int index_index = 0; |
346 | 345 | |
347 | 346 | prev_index = index; |
348 | index = newtree234(NULL); | |
347 | index = newtree234(NULL, NULL); | |
349 | 348 | depth++; |
350 | 349 | |
351 | 350 | while (index_index < count234(prev_index)) { |
416 | 415 | PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field, |
417 | 416 | chunksize - chunk.pos); |
418 | 417 | PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries); |
419 | while (chunk.pos + reversed_quickref.pos < chunksize) | |
420 | rdaddc(&chunk, 0); /* zero-pad */ | |
418 | rdaddc_rep(&chunk, 0, | |
419 | chunksize - chunk.pos - reversed_quickref.pos); | |
421 | 420 | for (i = reversed_quickref.pos - 2; i >= 0; i -= 2) |
422 | 421 | rdaddsn(&chunk, reversed_quickref.text+i, 2); |
423 | 422 | |
509 | 508 | int strtab_offset; |
510 | 509 | }; |
511 | 510 | |
512 | static int chm_stringtab_cmp(void *av, void *bv) | |
511 | static int chm_stringtab_cmp(const void *av, const void *bv, void *cmpctx) | |
513 | 512 | { |
514 | 513 | const struct chm_stringtab_entry |
515 | 514 | *a = (const struct chm_stringtab_entry *)av, |
518 | 517 | b->chm->stringsfile.text + b->strtab_offset); |
519 | 518 | } |
520 | 519 | |
521 | static int chm_stringtab_find(void *av, void *bv) | |
520 | static int chm_stringtab_find(const void *av, const void *bv, void *cmpctx) | |
522 | 521 | { |
523 | 522 | const char *a = (const char *)av; |
524 | 523 | const struct chm_stringtab_entry |
534 | 533 | if (!string) |
535 | 534 | return 0; |
536 | 535 | |
537 | if ((ent = (struct chm_stringtab_entry *)find234( | |
538 | chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) { | |
536 | if ((ent = (struct chm_stringtab_entry *)findcmp234( | |
537 | chm->stringtab, (void *)string, chm_stringtab_find, NULL)) == | |
538 | NULL) { | |
539 | 539 | ent = snew(struct chm_stringtab_entry); |
540 | 540 | ent->chm = chm; |
541 | 541 | |
542 | 542 | /* Pad to ensure the string doesn't cross a page boundary. */ |
543 | 543 | size = strlen(string) + 1; /* include the NUL terminator */ |
544 | 544 | assert(size < 0x1000); /* avoid really serious trouble */ |
545 | while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12) | |
546 | rdaddc(&chm->stringsfile, 0); | |
545 | if ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12) | |
546 | rdaddc_rep(&chm->stringsfile, 0, 0xFFF & -chm->stringsfile.pos); | |
547 | 547 | |
548 | 548 | ent->strtab_offset = chm->stringsfile.pos; |
549 | 549 | rdaddsc(&chm->stringsfile, string); |
556 | 556 | struct chm *chm_new(void) |
557 | 557 | { |
558 | 558 | struct chm *chm = snew(struct chm); |
559 | chm->files = newtree234(chm_directory_entry_cmp); | |
560 | chm->windows = newtree234(NULL); | |
561 | chm->stringtab = newtree234(chm_stringtab_cmp); | |
559 | chm->files = newtree234(chm_directory_entry_cmp, NULL); | |
560 | chm->windows = newtree234(NULL, NULL); | |
561 | chm->stringtab = newtree234(chm_stringtab_cmp, NULL); | |
562 | 562 | chm->content0 = empty_rdstringc; |
563 | 563 | chm->content1 = empty_rdstringc; |
564 | 564 | chm->outfile = empty_rdstringc; |
640 | 640 | static struct chm_directory_entry *chm_find_file( |
641 | 641 | struct chm *chm, const char *name) |
642 | 642 | { |
643 | return find234(chm->files, (void *)name, chm_directory_entry_find); | |
643 | return findcmp234(chm->files, (const void *)name, | |
644 | chm_directory_entry_find, NULL); | |
644 | 645 | } |
645 | 646 | |
646 | 647 | static char *add_leading_slash(const char *str) |
748 | 749 | int topics_offset_to_update; |
749 | 750 | }; |
750 | 751 | |
751 | int chm_urltbl_entry_cmp(void *av, void *bv) | |
752 | int chm_urltbl_entry_cmp(const void *av, const void *bv, void *cmpctx) | |
752 | 753 | { |
753 | 754 | const struct chm_urltbl_entry |
754 | 755 | *a = (const struct chm_urltbl_entry *)av, |
894 | 895 | |
895 | 896 | { |
896 | 897 | rdstringc winfile = {0, 0, NULL}; |
897 | int i, j, s; | |
898 | int i, s; | |
898 | 899 | struct chm_window *win; |
899 | 900 | |
900 | 901 | RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows)); |
949 | 950 | RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */ |
950 | 951 | RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */ |
951 | 952 | RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */ |
952 | for (j = 0; j < 20; j++) | |
953 | rdaddc(&winfile, 0); /* tab order block */ | |
953 | rdaddc_rep(&winfile, 0, 20); /* tab order block */ | |
954 | 954 | RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */ |
955 | 955 | RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */ |
956 | 956 | RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */ |
975 | 975 | rdstringc topics = {0, 0, NULL}; |
976 | 976 | rdstringc urltbl = {0, 0, NULL}; |
977 | 977 | rdstringc urlstr = {0, 0, NULL}; |
978 | int i, index, s, n_tocidx_3; | |
978 | int index, s, n_tocidx_3; | |
979 | 979 | struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL; |
980 | 980 | tree234 *urltbl_pre; |
981 | 981 | struct chm_urltbl_entry *urltbl_entry; |
982 | 982 | |
983 | urltbl_pre = newtree234(chm_urltbl_entry_cmp); | |
984 | ||
985 | for (i = 0; i < 0x1000; i++) | |
986 | rdaddc(&tocidx, 0); | |
983 | urltbl_pre = newtree234(chm_urltbl_entry_cmp, NULL); | |
984 | ||
985 | rdaddc_rep(&tocidx, 0, 0x1000); | |
987 | 986 | |
988 | 987 | /* Write a header of one zero byte at the start of #URLSTR. |
989 | 988 | * chmspec says this doesn't always appear, and is unclear on |
1078 | 1077 | * plus a NUL-terminated copy of the target file name / URL. */ |
1079 | 1078 | urlstr_size = 8 + strlen(sect->url) + 1; |
1080 | 1079 | assert(urlstr_size < 0x1000); /* must _fit_ in a page! */ |
1081 | while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12) | |
1082 | rdaddc(&urlstr, 0); | |
1080 | if ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12) | |
1081 | rdaddc_rep(&urlstr, 0, 0xFFF & -urlstr_size); | |
1083 | 1082 | |
1084 | 1083 | /* |
1085 | 1084 | * Save everything we know so far about the #URLTBL record |
1228 | 1227 | } |
1229 | 1228 | |
1230 | 1229 | /* Align the current #TOCIDX offset to 16 bytes */ |
1231 | while (tocidx.pos & 0xF) | |
1232 | rdaddc(&tocidx, 0); | |
1230 | rdaddc_rep(&tocidx, 0, 0xF & -tocidx.pos); | |
1233 | 1231 | |
1234 | 1232 | /* #TOCIDX header field pointing at start of type-3 records */ |
1235 | 1233 | PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos); |
1294 | 1292 | RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */ |
1295 | 1293 | RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */ |
1296 | 1294 | RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */ |
1297 | while (sysfile.pos - idxhdr_start < 4096) | |
1298 | rdaddc(&sysfile, 0); | |
1295 | rdaddc_rep(&sysfile, 0, 4096 - (sysfile.pos - idxhdr_start)); | |
1299 | 1296 | |
1300 | 1297 | chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start, |
1301 | 1298 | sysfile.pos - idxhdr_start, |
1366 | 1363 | int orig_decomp_size = chm->content1.pos; |
1367 | 1364 | size_t i; |
1368 | 1365 | |
1369 | while (chm->content1.pos & 0x7FFF) | |
1370 | rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */ | |
1366 | /* Pad to a realign-interval boundary */ | |
1367 | rdaddc_rep(&chm->content1, 0, 0x7FFF & -chm->content1.pos); | |
1368 | ||
1371 | 1369 | ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000); |
1372 | 1370 | chm_add_file_internal( |
1373 | 1371 | chm, "::DataSpace/Storage/MSCompressed/Content", |
214 | 214 | |
215 | 215 | /* The master index maps file names to help-file offsets. */ |
216 | 216 | |
217 | static int filecmp(void *av, void *bv) | |
217 | static int filecmp(const void *av, const void *bv, void *cmpctx) | |
218 | 218 | { |
219 | 219 | const struct file *a = (const struct file *)av; |
220 | 220 | const struct file *b = (const struct file *)bv; |
240 | 240 | |
241 | 241 | /* The |CONTEXT internal file maps help context hashes to TOPICOFFSETs. */ |
242 | 242 | |
243 | static int ctxcmp(void *av, void *bv) | |
243 | static int ctxcmp(const void *av, const void *bv, void *cmpctx) | |
244 | 244 | { |
245 | 245 | const context *a = (const context *)av; |
246 | 246 | const context *b = (const context *)bv; |
268 | 268 | |
269 | 269 | /* The |TTLBTREE internal file maps TOPICOFFSETs to title strings. */ |
270 | 270 | |
271 | static int ttlcmp(void *av, void *bv) | |
271 | static int ttlcmp(const void *av, const void *bv, void *cmpctx) | |
272 | 272 | { |
273 | 273 | const context *a = (const context *)av; |
274 | 274 | const context *b = (const context *)bv; |
298 | 298 | |
299 | 299 | /* The |KWBTREE internal file maps index strings to TOPICOFFSETs. */ |
300 | 300 | |
301 | static int idxcmp(void *av, void *bv) | |
301 | static int idxcmp(const void *av, const void *bv, void *cmpctx) | |
302 | 302 | { |
303 | 303 | const struct indexrec *a = (const struct indexrec *)av; |
304 | 304 | const struct indexrec *b = (const struct indexrec *)bv; |
336 | 336 | * is by the low 16 bits of the number (above that is flags). |
337 | 337 | */ |
338 | 338 | |
339 | static int tabcmp(void *av, void *bv) | |
339 | static int tabcmp(const void *av, const void *bv, void *cmpctx) | |
340 | 340 | { |
341 | 341 | const int *a = (const int *)av; |
342 | 342 | const int *b = (const int *)bv; |
348 | 348 | } |
349 | 349 | |
350 | 350 | /* The internal `fontnames' B-tree stores strings. */ |
351 | static int fontcmp(void *av, void *bv) | |
351 | static int fontcmp(const void *av, const void *bv, void *cmpctx) | |
352 | 352 | { |
353 | 353 | const char *a = (const char *)av; |
354 | 354 | const char *b = (const char *)bv; |
1240 | 1240 | whlp_file_add_short(f, 0x36C); /* magic number */ |
1241 | 1241 | whlp_file_add_short(f, 33); /* minor version: HCW 4.00 Win95+ */ |
1242 | 1242 | whlp_file_add_short(f, 1); /* major version */ |
1243 | whlp_file_add_long(f, time(NULL)); /* generation date */ | |
1243 | whlp_file_add_long(f, current_time()); /* generation date */ | |
1244 | 1244 | whlp_file_add_short(f, 0); /* flags=0 means no compression */ |
1245 | 1245 | |
1246 | 1246 | /* |
1327 | 1327 | */ |
1328 | 1328 | for (i = 0; (fontname = index234(h->fontnames, i)) != NULL; i++) { |
1329 | 1329 | char data[32]; |
1330 | memset(data, i, sizeof(data)); | |
1331 | strncpy(data, fontname, sizeof(data)); | |
1330 | size_t len = strlen(fontname); | |
1331 | if (len > sizeof(data)) | |
1332 | len = sizeof(data); | |
1333 | memset(data, 0, sizeof(data)); | |
1334 | memcpy(data, fontname, len); | |
1332 | 1335 | whlp_file_add(f, data, sizeof(data)); |
1333 | 1336 | } |
1334 | 1337 | |
1339 | 1342 | int fontpos; |
1340 | 1343 | void *ret; |
1341 | 1344 | |
1342 | ret = findpos234(h->fontnames, fontdesc->font, NULL, &fontpos); | |
1345 | ret = findpos234(h->fontnames, fontdesc->font, &fontpos); | |
1343 | 1346 | assert(ret != NULL); |
1344 | 1347 | |
1345 | 1348 | whlp_file_add_char(f, fontdesc->rendition); |
1710 | 1713 | /* |
1711 | 1714 | * Internal B-trees. |
1712 | 1715 | */ |
1713 | ret->files = newtree234(filecmp); | |
1714 | ret->pre_contexts = newtree234(NULL); | |
1715 | ret->contexts = newtree234(ctxcmp); | |
1716 | ret->titles = newtree234(ttlcmp); | |
1717 | ret->text = newtree234(NULL); | |
1718 | ret->index = newtree234(idxcmp); | |
1719 | ret->tabstops = newtree234(tabcmp); | |
1720 | ret->fontnames = newtree234(fontcmp); | |
1721 | ret->fontdescs = newtree234(NULL); | |
1716 | ret->files = newtree234(filecmp, NULL); | |
1717 | ret->pre_contexts = newtree234(NULL, NULL); | |
1718 | ret->contexts = newtree234(ctxcmp, NULL); | |
1719 | ret->titles = newtree234(ttlcmp, NULL); | |
1720 | ret->text = newtree234(NULL, NULL); | |
1721 | ret->index = newtree234(idxcmp, NULL); | |
1722 | ret->tabstops = newtree234(tabcmp, NULL); | |
1723 | ret->fontnames = newtree234(fontcmp, NULL); | |
1724 | ret->fontdescs = newtree234(NULL, NULL); | |
1722 | 1725 | |
1723 | 1726 | /* |
1724 | 1727 | * Some standard files. |
1746 | 1749 | int filecount, offset, index, filelen; |
1747 | 1750 | struct file *file, *map, *md; |
1748 | 1751 | context *ctx; |
1749 | int has_index; | |
1752 | bool has_index; | |
1750 | 1753 | |
1751 | 1754 | /* |
1752 | 1755 | * Lay out the topic section. |