Imported Upstream version 2.5.4~svn140+ds1
Daniel Glassey
7 years ago
0 | AUTHORS | |
1 | ||
2 | TECkit is written by Jonathan Kew, SIL International. | |
3 | ||
4 | The Perl interface is by Martin Hosken, and the JNI interface is by Keith Stribley. | |
5 | Thanks to Ulrik Petersen for patches to improve portability, especially for MS VC++ | |
6 | and for 64-bit systems. Authors of other library code included with TECkit are as | |
7 | noted in the relevant source files. |
0 | INSTALL | |
1 | ||
2 | Note: If you checked out teckit from svn, or for any other reason don't have | |
3 | a file called "configure", you will need to initialize the directory using | |
4 | GNU autotools. Instructions for this are given further down. | |
5 | ||
6 | For typical Unix-like systems: | |
7 | ||
8 | ./configure | |
9 | make | |
10 | [sudo] make install | |
11 | ||
12 | Installs tools into /usr/local/lib, with libraries and header files in | |
13 | /usr/local/lib and /usr/local/include respectively. | |
14 | ||
15 | Typical configure options such as --prefix should work, though little testing | |
16 | has been done. | |
17 | ||
18 | The tools installed are: | |
19 | teckit_compile | |
20 | compiler to create binary mapping tables (.tec) from text files (.map) | |
21 | txtconv | |
22 | simple tool to apply a mapping to a plain-text file | |
23 | sfconv | |
24 | tool to apply mappings to a Standard Format file, as specified by | |
25 | a control file | |
26 | ||
27 | BUILDING for WINDOWS | |
28 | ||
29 | You will need to install MinGW. | |
30 | ||
31 | On Mac or Linux, you can use the package system as follows: | |
32 | ||
33 | gcc-mingw32 (Debian/Ubuntu) | |
34 | i386-mingw32-gcc (MacPorts) | |
35 | ||
36 | You may also need some other packages on Mac, such as updated autotools. | |
37 | ||
38 | On Windows, install MinGW+MSys. Probably the easiest way to do this is to | |
39 | download and run mingw-get-inst. Check the options for C++ and MSYS Basic. | |
40 | Once installed, you can open a Unix-like shell by running MinGW > MinGW Shell | |
41 | from the All Programs menu. File and directory names use forward slashes in MSys, | |
42 | and C:\ is represented as /c | |
43 | ||
44 | Using the cd command, navigate to the place where you checked out or unzipped | |
45 | the teckit sources. Note that if there are spaces in any of the parent directory | |
46 | names you may experience difficulty building. If so, move the source directory | |
47 | to a location that doesn't involve spaces (eg C:\src\teckit). | |
48 | ||
49 | If necessary, follow the GNU AUTOTOOLS instructions below, then run: | |
50 | ||
51 | ./build-windows-binaries.sh | |
52 | ||
53 | This will create executables and DLLs in teckit-windows-bin. Documentation, | |
54 | header files, sample tools, etc. can be added and the result zipped to produce | |
55 | a release. | |
56 | ||
57 | BUILDING a LINUX PACKAGE | |
58 | ||
59 | Run the following script: | |
60 | ||
61 | ./build-linux-package.sh | |
62 | ||
63 | The results will be in the teckit-linux subdirectory. | |
64 | ||
65 | BUILDING a MAC PACKAGE | |
66 | ||
67 | Run the following script: | |
68 | ||
69 | ./build-mac-binaries.sh | |
70 | ||
71 | This will create files in the teckit-mac subdirectory. To make the package itself: | |
72 | ||
73 | cd mac-installer | |
74 | ./create-pkg.sh | |
75 | ||
76 | The result will be a file called TECkit.dmg containing a single .pkg file. | |
77 | ||
78 | GNU AUTOTOOLS | |
79 | ||
80 | If you don't have a file called "configure" in the top-level teckit source | |
81 | directory, you will need to initialize the directory using GNU autotools. | |
82 | ||
83 | On Windows, install the autotools by running: | |
84 | ||
85 | mingw-get install mingw32-autotools | |
86 | ||
87 | On Mac or Linux, use the package system (MacPorts on Mac). | |
88 | ||
89 | Then, on all platforms, run: | |
90 | ||
91 | ./autogen.sh | |
92 | ||
93 | in the teckit source directory. This normally needs to be done only once, but | |
94 | if you update the directory from svn you may need to run it again. |
0 | ACLOCAL_AMFLAGS = -I m4 | |
1 | ||
2 | SUBDIRS = lib bin docs test | |
3 | ||
4 | pkgconfigdir = $(libdir)/pkgconfig | |
5 | pkgconfig_DATA = teckit.pc | |
6 | ||
7 | EXTRA_DIST = license/License_CPLv05.txt | |
8 | EXTRA_DIST += license/License_LGPLv21.txt | |
9 | EXTRA_DIST += license/LICENSING.txt | |
10 |
0 | # Makefile.in generated by automake 1.14.1 from Makefile.am. | |
1 | # @configure_input@ | |
2 | ||
3 | # Copyright (C) 1994-2013 Free Software Foundation, Inc. | |
4 | ||
5 | # This Makefile.in is free software; the Free Software Foundation | |
6 | # gives unlimited permission to copy and/or distribute it, | |
7 | # with or without modifications, as long as this notice is preserved. | |
8 | ||
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY, to the extent permitted by law; without | |
11 | # even the implied warranty of MERCHANTABILITY or FITNESS FOR A | |
12 | # PARTICULAR PURPOSE. | |
13 | ||
14 | @SET_MAKE@ | |
15 | ||
16 | VPATH = @srcdir@ | |
17 | am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' | |
18 | am__make_running_with_option = \ | |
19 | case $${target_option-} in \ | |
20 | ?) ;; \ | |
21 | *) echo "am__make_running_with_option: internal error: invalid" \ | |
22 | "target option '$${target_option-}' specified" >&2; \ | |
23 | exit 1;; \ | |
24 | esac; \ | |
25 | has_opt=no; \ | |
26 | sane_makeflags=$$MAKEFLAGS; \ | |
27 | if $(am__is_gnu_make); then \ | |
28 | sane_makeflags=$$MFLAGS; \ | |
29 | else \ | |
30 | case $$MAKEFLAGS in \ | |
31 | *\\[\ \ ]*) \ | |
32 | bs=\\; \ | |
33 | sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | |
34 | | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ | |
35 | esac; \ | |
36 | fi; \ | |
37 | skip_next=no; \ | |
38 | strip_trailopt () \ | |
39 | { \ | |
40 | flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ | |
41 | }; \ | |
42 | for flg in $$sane_makeflags; do \ | |
43 | test $$skip_next = yes && { skip_next=no; continue; }; \ | |
44 | case $$flg in \ | |
45 | *=*|--*) continue;; \ | |
46 | -*I) strip_trailopt 'I'; skip_next=yes;; \ | |
47 | -*I?*) strip_trailopt 'I';; \ | |
48 | -*O) strip_trailopt 'O'; skip_next=yes;; \ | |
49 | -*O?*) strip_trailopt 'O';; \ | |
50 | -*l) strip_trailopt 'l'; skip_next=yes;; \ | |
51 | -*l?*) strip_trailopt 'l';; \ | |
52 | -[dEDm]) skip_next=yes;; \ | |
53 | -[JT]) skip_next=yes;; \ | |
54 | esac; \ | |
55 | case $$flg in \ | |
56 | *$$target_option*) has_opt=yes; break;; \ | |
57 | esac; \ | |
58 | done; \ | |
59 | test $$has_opt = yes | |
60 | am__make_dryrun = (target_option=n; $(am__make_running_with_option)) | |
61 | am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) | |
62 | pkgdatadir = $(datadir)/@PACKAGE@ | |
63 | pkgincludedir = $(includedir)/@PACKAGE@ | |
64 | pkglibdir = $(libdir)/@PACKAGE@ | |
65 | pkglibexecdir = $(libexecdir)/@PACKAGE@ | |
66 | am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd | |
67 | install_sh_DATA = $(install_sh) -c -m 644 | |
68 | install_sh_PROGRAM = $(install_sh) -c | |
69 | install_sh_SCRIPT = $(install_sh) -c | |
70 | INSTALL_HEADER = $(INSTALL_DATA) | |
71 | transform = $(program_transform_name) | |
72 | NORMAL_INSTALL = : | |
73 | PRE_INSTALL = : | |
74 | POST_INSTALL = : | |
75 | NORMAL_UNINSTALL = : | |
76 | PRE_UNINSTALL = : | |
77 | POST_UNINSTALL = : | |
78 | build_triplet = @build@ | |
79 | host_triplet = @host@ | |
80 | target_triplet = @target@ | |
81 | subdir = . | |
82 | DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ | |
83 | $(top_srcdir)/configure $(am__configure_deps) \ | |
84 | $(srcdir)/config.h.in $(srcdir)/installed-top.pc.in \ | |
85 | $(srcdir)/teckit.pc.in $(srcdir)/uninstalled-top.pc.in AUTHORS \ | |
86 | COPYING ChangeLog INSTALL NEWS README compile config.guess \ | |
87 | config.sub depcomp install-sh missing ltmain.sh | |
88 | ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 | |
89 | am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ | |
90 | $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ | |
91 | $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ | |
92 | $(top_srcdir)/zlib-1.2.3/withenable.ac \ | |
93 | $(top_srcdir)/zlib-1.2.3/zlib.ac $(top_srcdir)/configure.ac | |
94 | am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ | |
95 | $(ACLOCAL_M4) | |
96 | am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ | |
97 | configure.lineno config.status.lineno | |
98 | mkinstalldirs = $(install_sh) -d | |
99 | CONFIG_HEADER = config.h | |
100 | CONFIG_CLEAN_FILES = teckit.pc teckit-uninstalled.pc | |
101 | CONFIG_CLEAN_VPATH_FILES = | |
102 | AM_V_P = $(am__v_P_@AM_V@) | |
103 | am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) | |
104 | am__v_P_0 = false | |
105 | am__v_P_1 = : | |
106 | AM_V_GEN = $(am__v_GEN_@AM_V@) | |
107 | am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) | |
108 | am__v_GEN_0 = @echo " GEN " $@; | |
109 | am__v_GEN_1 = | |
110 | AM_V_at = $(am__v_at_@AM_V@) | |
111 | am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) | |
112 | am__v_at_0 = @ | |
113 | am__v_at_1 = | |
114 | SOURCES = | |
115 | DIST_SOURCES = | |
116 | RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ | |
117 | ctags-recursive dvi-recursive html-recursive info-recursive \ | |
118 | install-data-recursive install-dvi-recursive \ | |
119 | install-exec-recursive install-html-recursive \ | |
120 | install-info-recursive install-pdf-recursive \ | |
121 | install-ps-recursive install-recursive installcheck-recursive \ | |
122 | installdirs-recursive pdf-recursive ps-recursive \ | |
123 | tags-recursive uninstall-recursive | |
124 | am__can_run_installinfo = \ | |
125 | case $$AM_UPDATE_INFO_DIR in \ | |
126 | n|no|NO) false;; \ | |
127 | *) (install-info --version) >/dev/null 2>&1;; \ | |
128 | esac | |
129 | am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; | |
130 | am__vpath_adj = case $$p in \ | |
131 | $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ | |
132 | *) f=$$p;; \ | |
133 | esac; | |
134 | am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; | |
135 | am__install_max = 40 | |
136 | am__nobase_strip_setup = \ | |
137 | srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` | |
138 | am__nobase_strip = \ | |
139 | for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" | |
140 | am__nobase_list = $(am__nobase_strip_setup); \ | |
141 | for p in $$list; do echo "$$p $$p"; done | \ | |
142 | sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ | |
143 | $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ | |
144 | if (++n[$$2] == $(am__install_max)) \ | |
145 | { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ | |
146 | END { for (dir in files) print dir, files[dir] }' | |
147 | am__base_list = \ | |
148 | sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ | |
149 | sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | |
150 | am__uninstall_files_from_dir = { \ | |
151 | test -z "$$files" \ | |
152 | || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ | |
153 | || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ | |
154 | $(am__cd) "$$dir" && rm -f $$files; }; \ | |
155 | } | |
156 | am__installdirs = "$(DESTDIR)$(pkgconfigdir)" | |
157 | DATA = $(pkgconfig_DATA) | |
158 | RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ | |
159 | distclean-recursive maintainer-clean-recursive | |
160 | am__recursive_targets = \ | |
161 | $(RECURSIVE_TARGETS) \ | |
162 | $(RECURSIVE_CLEAN_TARGETS) \ | |
163 | $(am__extra_recursive_targets) | |
164 | AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ | |
165 | cscope distdir dist dist-all distcheck | |
166 | am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \ | |
167 | $(LISP)config.h.in | |
168 | # Read a list of newline-separated strings from the standard input, | |
169 | # and print each of them once, without duplicates. Input order is | |
170 | # *not* preserved. | |
171 | am__uniquify_input = $(AWK) '\ | |
172 | BEGIN { nonempty = 0; } \ | |
173 | { items[$$0] = 1; nonempty = 1; } \ | |
174 | END { if (nonempty) { for (i in items) print i; }; } \ | |
175 | ' | |
176 | # Make sure the list of sources is unique. This is necessary because, | |
177 | # e.g., the same source file might be shared among _SOURCES variables | |
178 | # for different programs/libraries. | |
179 | am__define_uniq_tagged_files = \ | |
180 | list='$(am__tagged_files)'; \ | |
181 | unique=`for i in $$list; do \ | |
182 | if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ | |
183 | done | $(am__uniquify_input)` | |
184 | ETAGS = etags | |
185 | CTAGS = ctags | |
186 | CSCOPE = cscope | |
187 | DIST_SUBDIRS = $(SUBDIRS) | |
188 | DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) | |
189 | distdir = $(PACKAGE)-$(VERSION) | |
190 | top_distdir = $(distdir) | |
191 | am__remove_distdir = \ | |
192 | if test -d "$(distdir)"; then \ | |
193 | find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ | |
194 | && rm -rf "$(distdir)" \ | |
195 | || { sleep 5 && rm -rf "$(distdir)"; }; \ | |
196 | else :; fi | |
197 | am__post_remove_distdir = $(am__remove_distdir) | |
198 | am__relativize = \ | |
199 | dir0=`pwd`; \ | |
200 | sed_first='s,^\([^/]*\)/.*$$,\1,'; \ | |
201 | sed_rest='s,^[^/]*/*,,'; \ | |
202 | sed_last='s,^.*/\([^/]*\)$$,\1,'; \ | |
203 | sed_butlast='s,/*[^/]*$$,,'; \ | |
204 | while test -n "$$dir1"; do \ | |
205 | first=`echo "$$dir1" | sed -e "$$sed_first"`; \ | |
206 | if test "$$first" != "."; then \ | |
207 | if test "$$first" = ".."; then \ | |
208 | dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ | |
209 | dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ | |
210 | else \ | |
211 | first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ | |
212 | if test "$$first2" = "$$first"; then \ | |
213 | dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ | |
214 | else \ | |
215 | dir2="../$$dir2"; \ | |
216 | fi; \ | |
217 | dir0="$$dir0"/"$$first"; \ | |
218 | fi; \ | |
219 | fi; \ | |
220 | dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ | |
221 | done; \ | |
222 | reldir="$$dir2" | |
223 | DIST_ARCHIVES = $(distdir).tar.gz | |
224 | GZIP_ENV = --best | |
225 | DIST_TARGETS = dist-gzip | |
226 | distuninstallcheck_listfiles = find . -type f -print | |
227 | am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ | |
228 | | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' | |
229 | distcleancheck_listfiles = find . -type f -print | |
230 | ACLOCAL = @ACLOCAL@ | |
231 | AMTAR = @AMTAR@ | |
232 | AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ | |
233 | AR = @AR@ | |
234 | AUTOCONF = @AUTOCONF@ | |
235 | AUTOHEADER = @AUTOHEADER@ | |
236 | AUTOMAKE = @AUTOMAKE@ | |
237 | AWK = @AWK@ | |
238 | CC = @CC@ | |
239 | CCDEPMODE = @CCDEPMODE@ | |
240 | CFLAGS = @CFLAGS@ | |
241 | CPP = @CPP@ | |
242 | CPPFLAGS = @CPPFLAGS@ | |
243 | CXX = @CXX@ | |
244 | CXXCPP = @CXXCPP@ | |
245 | CXXDEPMODE = @CXXDEPMODE@ | |
246 | CXXFLAGS = @CXXFLAGS@ | |
247 | CYGPATH_W = @CYGPATH_W@ | |
248 | DEFS = @DEFS@ | |
249 | DEPDIR = @DEPDIR@ | |
250 | DLLTOOL = @DLLTOOL@ | |
251 | DSYMUTIL = @DSYMUTIL@ | |
252 | DUMPBIN = @DUMPBIN@ | |
253 | ECHO_C = @ECHO_C@ | |
254 | ECHO_N = @ECHO_N@ | |
255 | ECHO_T = @ECHO_T@ | |
256 | EGREP = @EGREP@ | |
257 | EXEEXT = @EXEEXT@ | |
258 | FGREP = @FGREP@ | |
259 | GREP = @GREP@ | |
260 | INSTALL = @INSTALL@ | |
261 | INSTALL_DATA = @INSTALL_DATA@ | |
262 | INSTALL_PROGRAM = @INSTALL_PROGRAM@ | |
263 | INSTALL_SCRIPT = @INSTALL_SCRIPT@ | |
264 | INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ | |
265 | LD = @LD@ | |
266 | LDFLAGS = @LDFLAGS@ | |
267 | LDZLIB = @LDZLIB@ | |
268 | LIBOBJS = @LIBOBJS@ | |
269 | LIBS = @LIBS@ | |
270 | LIBTOOL = @LIBTOOL@ | |
271 | LIPO = @LIPO@ | |
272 | LN_S = @LN_S@ | |
273 | LTLIBOBJS = @LTLIBOBJS@ | |
274 | MAINT = @MAINT@ | |
275 | MAKEINFO = @MAKEINFO@ | |
276 | MANIFEST_TOOL = @MANIFEST_TOOL@ | |
277 | MKDIR_P = @MKDIR_P@ | |
278 | NM = @NM@ | |
279 | NMEDIT = @NMEDIT@ | |
280 | OBJDUMP = @OBJDUMP@ | |
281 | OBJEXT = @OBJEXT@ | |
282 | OTOOL = @OTOOL@ | |
283 | OTOOL64 = @OTOOL64@ | |
284 | PACKAGE = @PACKAGE@ | |
285 | PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ | |
286 | PACKAGE_NAME = @PACKAGE_NAME@ | |
287 | PACKAGE_STRING = @PACKAGE_STRING@ | |
288 | PACKAGE_TARNAME = @PACKAGE_TARNAME@ | |
289 | PACKAGE_URL = @PACKAGE_URL@ | |
290 | PACKAGE_VERSION = @PACKAGE_VERSION@ | |
291 | PATH_SEPARATOR = @PATH_SEPARATOR@ | |
292 | RANLIB = @RANLIB@ | |
293 | RC = @RC@ | |
294 | SED = @SED@ | |
295 | SET_MAKE = @SET_MAKE@ | |
296 | SHELL = @SHELL@ | |
297 | STRIP = @STRIP@ | |
298 | VERSION = @VERSION@ | |
299 | ZLIBCPPFLAGS = @ZLIBCPPFLAGS@ | |
300 | ZLIBDEP = @ZLIBDEP@ | |
301 | ZLIBDIR = @ZLIBDIR@ | |
302 | abs_builddir = @abs_builddir@ | |
303 | abs_srcdir = @abs_srcdir@ | |
304 | abs_top_builddir = @abs_top_builddir@ | |
305 | abs_top_srcdir = @abs_top_srcdir@ | |
306 | ac_ct_AR = @ac_ct_AR@ | |
307 | ac_ct_CC = @ac_ct_CC@ | |
308 | ac_ct_CXX = @ac_ct_CXX@ | |
309 | ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ | |
310 | am__include = @am__include@ | |
311 | am__leading_dot = @am__leading_dot@ | |
312 | am__quote = @am__quote@ | |
313 | am__tar = @am__tar@ | |
314 | am__untar = @am__untar@ | |
315 | bindir = @bindir@ | |
316 | build = @build@ | |
317 | build_alias = @build_alias@ | |
318 | build_cpu = @build_cpu@ | |
319 | build_os = @build_os@ | |
320 | build_vendor = @build_vendor@ | |
321 | builddir = @builddir@ | |
322 | datadir = @datadir@ | |
323 | datarootdir = @datarootdir@ | |
324 | docdir = @docdir@ | |
325 | dvidir = @dvidir@ | |
326 | exec_prefix = @exec_prefix@ | |
327 | expat_CFLAGS = @expat_CFLAGS@ | |
328 | expat_LIBS = @expat_LIBS@ | |
329 | host = @host@ | |
330 | host_alias = @host_alias@ | |
331 | host_cpu = @host_cpu@ | |
332 | host_os = @host_os@ | |
333 | host_vendor = @host_vendor@ | |
334 | htmldir = @htmldir@ | |
335 | includedir = @includedir@ | |
336 | infodir = @infodir@ | |
337 | install_sh = @install_sh@ | |
338 | libdir = @libdir@ | |
339 | libexecdir = @libexecdir@ | |
340 | localedir = @localedir@ | |
341 | localstatedir = @localstatedir@ | |
342 | mandir = @mandir@ | |
343 | mkdir_p = @mkdir_p@ | |
344 | oldincludedir = @oldincludedir@ | |
345 | pdfdir = @pdfdir@ | |
346 | prefix = @prefix@ | |
347 | program_transform_name = @program_transform_name@ | |
348 | psdir = @psdir@ | |
349 | sbindir = @sbindir@ | |
350 | sharedstatedir = @sharedstatedir@ | |
351 | srcdir = @srcdir@ | |
352 | sysconfdir = @sysconfdir@ | |
353 | target = @target@ | |
354 | target_alias = @target_alias@ | |
355 | target_cpu = @target_cpu@ | |
356 | target_os = @target_os@ | |
357 | target_vendor = @target_vendor@ | |
358 | top_build_prefix = @top_build_prefix@ | |
359 | top_builddir = @top_builddir@ | |
360 | top_srcdir = @top_srcdir@ | |
361 | ACLOCAL_AMFLAGS = -I m4 | |
362 | SUBDIRS = lib bin docs test | |
363 | pkgconfigdir = $(libdir)/pkgconfig | |
364 | pkgconfig_DATA = teckit.pc | |
365 | EXTRA_DIST = license/License_CPLv05.txt license/License_LGPLv21.txt \ | |
366 | license/LICENSING.txt | |
367 | all: config.h | |
368 | $(MAKE) $(AM_MAKEFLAGS) all-recursive | |
369 | ||
370 | .SUFFIXES: | |
371 | am--refresh: Makefile | |
372 | @: | |
373 | $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) | |
374 | @for dep in $?; do \ | |
375 | case '$(am__configure_deps)' in \ | |
376 | *$$dep*) \ | |
377 | echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ | |
378 | $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ | |
379 | && exit 0; \ | |
380 | exit 1;; \ | |
381 | esac; \ | |
382 | done; \ | |
383 | echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ | |
384 | $(am__cd) $(top_srcdir) && \ | |
385 | $(AUTOMAKE) --foreign Makefile | |
386 | .PRECIOUS: Makefile | |
387 | Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status | |
388 | @case '$?' in \ | |
389 | *config.status*) \ | |
390 | echo ' $(SHELL) ./config.status'; \ | |
391 | $(SHELL) ./config.status;; \ | |
392 | *) \ | |
393 | echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ | |
394 | cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ | |
395 | esac; | |
396 | ||
397 | $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) | |
398 | $(SHELL) ./config.status --recheck | |
399 | ||
400 | $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) | |
401 | $(am__cd) $(srcdir) && $(AUTOCONF) | |
402 | $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) | |
403 | $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) | |
404 | $(am__aclocal_m4_deps): | |
405 | ||
406 | config.h: stamp-h1 | |
407 | @test -f $@ || rm -f stamp-h1 | |
408 | @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1 | |
409 | ||
410 | stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status | |
411 | @rm -f stamp-h1 | |
412 | cd $(top_builddir) && $(SHELL) ./config.status config.h | |
413 | $(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) | |
414 | ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) | |
415 | rm -f stamp-h1 | |
416 | touch $@ | |
417 | ||
418 | distclean-hdr: | |
419 | -rm -f config.h stamp-h1 | |
420 | teckit.pc: $(top_builddir)/config.status $(srcdir)/installed-top.pc.in $(srcdir)/teckit.pc.in | |
421 | cd $(top_builddir) && $(SHELL) ./config.status $@ | |
422 | teckit-uninstalled.pc: $(top_builddir)/config.status $(srcdir)/uninstalled-top.pc.in $(srcdir)/teckit.pc.in | |
423 | cd $(top_builddir) && $(SHELL) ./config.status $@ | |
424 | ||
425 | mostlyclean-libtool: | |
426 | -rm -f *.lo | |
427 | ||
428 | clean-libtool: | |
429 | -rm -rf .libs _libs | |
430 | ||
431 | distclean-libtool: | |
432 | -rm -f libtool config.lt | |
433 | install-pkgconfigDATA: $(pkgconfig_DATA) | |
434 | @$(NORMAL_INSTALL) | |
435 | @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ | |
436 | if test -n "$$list"; then \ | |
437 | echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ | |
438 | $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ | |
439 | fi; \ | |
440 | for p in $$list; do \ | |
441 | if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ | |
442 | echo "$$d$$p"; \ | |
443 | done | $(am__base_list) | \ | |
444 | while read files; do \ | |
445 | echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ | |
446 | $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ | |
447 | done | |
448 | ||
449 | uninstall-pkgconfigDATA: | |
450 | @$(NORMAL_UNINSTALL) | |
451 | @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ | |
452 | files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ | |
453 | dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) | |
454 | ||
455 | # This directory's subdirectories are mostly independent; you can cd | |
456 | # into them and run 'make' without going through this Makefile. | |
457 | # To change the values of 'make' variables: instead of editing Makefiles, | |
458 | # (1) if the variable is set in 'config.status', edit 'config.status' | |
459 | # (which will cause the Makefiles to be regenerated when you run 'make'); | |
460 | # (2) otherwise, pass the desired values on the 'make' command line. | |
461 | $(am__recursive_targets): | |
462 | @fail=; \ | |
463 | if $(am__make_keepgoing); then \ | |
464 | failcom='fail=yes'; \ | |
465 | else \ | |
466 | failcom='exit 1'; \ | |
467 | fi; \ | |
468 | dot_seen=no; \ | |
469 | target=`echo $@ | sed s/-recursive//`; \ | |
470 | case "$@" in \ | |
471 | distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ | |
472 | *) list='$(SUBDIRS)' ;; \ | |
473 | esac; \ | |
474 | for subdir in $$list; do \ | |
475 | echo "Making $$target in $$subdir"; \ | |
476 | if test "$$subdir" = "."; then \ | |
477 | dot_seen=yes; \ | |
478 | local_target="$$target-am"; \ | |
479 | else \ | |
480 | local_target="$$target"; \ | |
481 | fi; \ | |
482 | ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ | |
483 | || eval $$failcom; \ | |
484 | done; \ | |
485 | if test "$$dot_seen" = "no"; then \ | |
486 | $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ | |
487 | fi; test -z "$$fail" | |
488 | ||
489 | ID: $(am__tagged_files) | |
490 | $(am__define_uniq_tagged_files); mkid -fID $$unique | |
491 | tags: tags-recursive | |
492 | TAGS: tags | |
493 | ||
494 | tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) | |
495 | set x; \ | |
496 | here=`pwd`; \ | |
497 | if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ | |
498 | include_option=--etags-include; \ | |
499 | empty_fix=.; \ | |
500 | else \ | |
501 | include_option=--include; \ | |
502 | empty_fix=; \ | |
503 | fi; \ | |
504 | list='$(SUBDIRS)'; for subdir in $$list; do \ | |
505 | if test "$$subdir" = .; then :; else \ | |
506 | test ! -f $$subdir/TAGS || \ | |
507 | set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ | |
508 | fi; \ | |
509 | done; \ | |
510 | $(am__define_uniq_tagged_files); \ | |
511 | shift; \ | |
512 | if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ | |
513 | test -n "$$unique" || unique=$$empty_fix; \ | |
514 | if test $$# -gt 0; then \ | |
515 | $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ | |
516 | "$$@" $$unique; \ | |
517 | else \ | |
518 | $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ | |
519 | $$unique; \ | |
520 | fi; \ | |
521 | fi | |
522 | ctags: ctags-recursive | |
523 | ||
524 | CTAGS: ctags | |
525 | ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) | |
526 | $(am__define_uniq_tagged_files); \ | |
527 | test -z "$(CTAGS_ARGS)$$unique" \ | |
528 | || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ | |
529 | $$unique | |
530 | ||
531 | GTAGS: | |
532 | here=`$(am__cd) $(top_builddir) && pwd` \ | |
533 | && $(am__cd) $(top_srcdir) \ | |
534 | && gtags -i $(GTAGS_ARGS) "$$here" | |
535 | cscope: cscope.files | |
536 | test ! -s cscope.files \ | |
537 | || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) | |
538 | clean-cscope: | |
539 | -rm -f cscope.files | |
540 | cscope.files: clean-cscope cscopelist | |
541 | cscopelist: cscopelist-recursive | |
542 | ||
543 | cscopelist-am: $(am__tagged_files) | |
544 | list='$(am__tagged_files)'; \ | |
545 | case "$(srcdir)" in \ | |
546 | [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ | |
547 | *) sdir=$(subdir)/$(srcdir) ;; \ | |
548 | esac; \ | |
549 | for i in $$list; do \ | |
550 | if test -f "$$i"; then \ | |
551 | echo "$(subdir)/$$i"; \ | |
552 | else \ | |
553 | echo "$$sdir/$$i"; \ | |
554 | fi; \ | |
555 | done >> $(top_builddir)/cscope.files | |
556 | ||
557 | distclean-tags: | |
558 | -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags | |
559 | -rm -f cscope.out cscope.in.out cscope.po.out cscope.files | |
560 | ||
561 | distdir: $(DISTFILES) | |
562 | $(am__remove_distdir) | |
563 | test -d "$(distdir)" || mkdir "$(distdir)" | |
564 | @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ | |
565 | topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ | |
566 | list='$(DISTFILES)'; \ | |
567 | dist_files=`for file in $$list; do echo $$file; done | \ | |
568 | sed -e "s|^$$srcdirstrip/||;t" \ | |
569 | -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ | |
570 | case $$dist_files in \ | |
571 | */*) $(MKDIR_P) `echo "$$dist_files" | \ | |
572 | sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ | |
573 | sort -u` ;; \ | |
574 | esac; \ | |
575 | for file in $$dist_files; do \ | |
576 | if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ | |
577 | if test -d $$d/$$file; then \ | |
578 | dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ | |
579 | if test -d "$(distdir)/$$file"; then \ | |
580 | find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ | |
581 | fi; \ | |
582 | if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ | |
583 | cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ | |
584 | find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ | |
585 | fi; \ | |
586 | cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ | |
587 | else \ | |
588 | test -f "$(distdir)/$$file" \ | |
589 | || cp -p $$d/$$file "$(distdir)/$$file" \ | |
590 | || exit 1; \ | |
591 | fi; \ | |
592 | done | |
593 | @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ | |
594 | if test "$$subdir" = .; then :; else \ | |
595 | $(am__make_dryrun) \ | |
596 | || test -d "$(distdir)/$$subdir" \ | |
597 | || $(MKDIR_P) "$(distdir)/$$subdir" \ | |
598 | || exit 1; \ | |
599 | dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ | |
600 | $(am__relativize); \ | |
601 | new_distdir=$$reldir; \ | |
602 | dir1=$$subdir; dir2="$(top_distdir)"; \ | |
603 | $(am__relativize); \ | |
604 | new_top_distdir=$$reldir; \ | |
605 | echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ | |
606 | echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ | |
607 | ($(am__cd) $$subdir && \ | |
608 | $(MAKE) $(AM_MAKEFLAGS) \ | |
609 | top_distdir="$$new_top_distdir" \ | |
610 | distdir="$$new_distdir" \ | |
611 | am__remove_distdir=: \ | |
612 | am__skip_length_check=: \ | |
613 | am__skip_mode_fix=: \ | |
614 | distdir) \ | |
615 | || exit 1; \ | |
616 | fi; \ | |
617 | done | |
618 | -test -n "$(am__skip_mode_fix)" \ | |
619 | || find "$(distdir)" -type d ! -perm -755 \ | |
620 | -exec chmod u+rwx,go+rx {} \; -o \ | |
621 | ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ | |
622 | ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ | |
623 | ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ | |
624 | || chmod -R a+r "$(distdir)" | |
625 | dist-gzip: distdir | |
626 | tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz | |
627 | $(am__post_remove_distdir) | |
628 | ||
629 | dist-bzip2: distdir | |
630 | tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 | |
631 | $(am__post_remove_distdir) | |
632 | ||
633 | dist-lzip: distdir | |
634 | tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz | |
635 | $(am__post_remove_distdir) | |
636 | ||
637 | dist-xz: distdir | |
638 | tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz | |
639 | $(am__post_remove_distdir) | |
640 | ||
641 | dist-tarZ: distdir | |
642 | @echo WARNING: "Support for shar distribution archives is" \ | |
643 | "deprecated." >&2 | |
644 | @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 | |
645 | tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z | |
646 | $(am__post_remove_distdir) | |
647 | ||
648 | dist-shar: distdir | |
649 | @echo WARNING: "Support for distribution archives compressed with" \ | |
650 | "legacy program 'compress' is deprecated." >&2 | |
651 | @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 | |
652 | shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz | |
653 | $(am__post_remove_distdir) | |
654 | ||
655 | dist-zip: distdir | |
656 | -rm -f $(distdir).zip | |
657 | zip -rq $(distdir).zip $(distdir) | |
658 | $(am__post_remove_distdir) | |
659 | ||
660 | dist dist-all: | |
661 | $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' | |
662 | $(am__post_remove_distdir) | |
663 | ||
664 | # This target untars the dist file and tries a VPATH configuration. Then | |
665 | # it guarantees that the distribution is self-contained by making another | |
666 | # tarfile. | |
667 | distcheck: dist | |
668 | case '$(DIST_ARCHIVES)' in \ | |
669 | *.tar.gz*) \ | |
670 | GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ | |
671 | *.tar.bz2*) \ | |
672 | bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ | |
673 | *.tar.lz*) \ | |
674 | lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ | |
675 | *.tar.xz*) \ | |
676 | xz -dc $(distdir).tar.xz | $(am__untar) ;;\ | |
677 | *.tar.Z*) \ | |
678 | uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ | |
679 | *.shar.gz*) \ | |
680 | GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ | |
681 | *.zip*) \ | |
682 | unzip $(distdir).zip ;;\ | |
683 | esac | |
684 | chmod -R a-w $(distdir) | |
685 | chmod u+w $(distdir) | |
686 | mkdir $(distdir)/_build $(distdir)/_inst | |
687 | chmod a-w $(distdir) | |
688 | test -d $(distdir)/_build || exit 0; \ | |
689 | dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ | |
690 | && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ | |
691 | && am__cwd=`pwd` \ | |
692 | && $(am__cd) $(distdir)/_build \ | |
693 | && ../configure \ | |
694 | $(AM_DISTCHECK_CONFIGURE_FLAGS) \ | |
695 | $(DISTCHECK_CONFIGURE_FLAGS) \ | |
696 | --srcdir=.. --prefix="$$dc_install_base" \ | |
697 | && $(MAKE) $(AM_MAKEFLAGS) \ | |
698 | && $(MAKE) $(AM_MAKEFLAGS) dvi \ | |
699 | && $(MAKE) $(AM_MAKEFLAGS) check \ | |
700 | && $(MAKE) $(AM_MAKEFLAGS) install \ | |
701 | && $(MAKE) $(AM_MAKEFLAGS) installcheck \ | |
702 | && $(MAKE) $(AM_MAKEFLAGS) uninstall \ | |
703 | && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ | |
704 | distuninstallcheck \ | |
705 | && chmod -R a-w "$$dc_install_base" \ | |
706 | && ({ \ | |
707 | (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ | |
708 | && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ | |
709 | && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ | |
710 | && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ | |
711 | distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ | |
712 | } || { rm -rf "$$dc_destdir"; exit 1; }) \ | |
713 | && rm -rf "$$dc_destdir" \ | |
714 | && $(MAKE) $(AM_MAKEFLAGS) dist \ | |
715 | && rm -rf $(DIST_ARCHIVES) \ | |
716 | && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ | |
717 | && cd "$$am__cwd" \ | |
718 | || exit 1 | |
719 | $(am__post_remove_distdir) | |
720 | @(echo "$(distdir) archives ready for distribution: "; \ | |
721 | list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ | |
722 | sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' | |
723 | distuninstallcheck: | |
724 | @test -n '$(distuninstallcheck_dir)' || { \ | |
725 | echo 'ERROR: trying to run $@ with an empty' \ | |
726 | '$$(distuninstallcheck_dir)' >&2; \ | |
727 | exit 1; \ | |
728 | }; \ | |
729 | $(am__cd) '$(distuninstallcheck_dir)' || { \ | |
730 | echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ | |
731 | exit 1; \ | |
732 | }; \ | |
733 | test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ | |
734 | || { echo "ERROR: files left after uninstall:" ; \ | |
735 | if test -n "$(DESTDIR)"; then \ | |
736 | echo " (check DESTDIR support)"; \ | |
737 | fi ; \ | |
738 | $(distuninstallcheck_listfiles) ; \ | |
739 | exit 1; } >&2 | |
740 | distcleancheck: distclean | |
741 | @if test '$(srcdir)' = . ; then \ | |
742 | echo "ERROR: distcleancheck can only run from a VPATH build" ; \ | |
743 | exit 1 ; \ | |
744 | fi | |
745 | @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ | |
746 | || { echo "ERROR: files left in build directory after distclean:" ; \ | |
747 | $(distcleancheck_listfiles) ; \ | |
748 | exit 1; } >&2 | |
749 | check-am: all-am | |
750 | check: check-recursive | |
751 | all-am: Makefile $(DATA) config.h | |
752 | installdirs: installdirs-recursive | |
753 | installdirs-am: | |
754 | for dir in "$(DESTDIR)$(pkgconfigdir)"; do \ | |
755 | test -z "$$dir" || $(MKDIR_P) "$$dir"; \ | |
756 | done | |
757 | install: install-recursive | |
758 | install-exec: install-exec-recursive | |
759 | install-data: install-data-recursive | |
760 | uninstall: uninstall-recursive | |
761 | ||
762 | install-am: all-am | |
763 | @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am | |
764 | ||
765 | installcheck: installcheck-recursive | |
766 | install-strip: | |
767 | if test -z '$(STRIP)'; then \ | |
768 | $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ | |
769 | install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ | |
770 | install; \ | |
771 | else \ | |
772 | $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ | |
773 | install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ | |
774 | "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ | |
775 | fi | |
776 | mostlyclean-generic: | |
777 | ||
778 | clean-generic: | |
779 | ||
780 | distclean-generic: | |
781 | -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) | |
782 | -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) | |
783 | ||
784 | maintainer-clean-generic: | |
785 | @echo "This command is intended for maintainers to use" | |
786 | @echo "it deletes files that may require special tools to rebuild." | |
787 | clean: clean-recursive | |
788 | ||
789 | clean-am: clean-generic clean-libtool mostlyclean-am | |
790 | ||
791 | distclean: distclean-recursive | |
792 | -rm -f $(am__CONFIG_DISTCLEAN_FILES) | |
793 | -rm -f Makefile | |
794 | distclean-am: clean-am distclean-generic distclean-hdr \ | |
795 | distclean-libtool distclean-tags | |
796 | ||
797 | dvi: dvi-recursive | |
798 | ||
799 | dvi-am: | |
800 | ||
801 | html: html-recursive | |
802 | ||
803 | html-am: | |
804 | ||
805 | info: info-recursive | |
806 | ||
807 | info-am: | |
808 | ||
809 | install-data-am: install-pkgconfigDATA | |
810 | ||
811 | install-dvi: install-dvi-recursive | |
812 | ||
813 | install-dvi-am: | |
814 | ||
815 | install-exec-am: | |
816 | ||
817 | install-html: install-html-recursive | |
818 | ||
819 | install-html-am: | |
820 | ||
821 | install-info: install-info-recursive | |
822 | ||
823 | install-info-am: | |
824 | ||
825 | install-man: | |
826 | ||
827 | install-pdf: install-pdf-recursive | |
828 | ||
829 | install-pdf-am: | |
830 | ||
831 | install-ps: install-ps-recursive | |
832 | ||
833 | install-ps-am: | |
834 | ||
835 | installcheck-am: | |
836 | ||
837 | maintainer-clean: maintainer-clean-recursive | |
838 | -rm -f $(am__CONFIG_DISTCLEAN_FILES) | |
839 | -rm -rf $(top_srcdir)/autom4te.cache | |
840 | -rm -f Makefile | |
841 | maintainer-clean-am: distclean-am maintainer-clean-generic | |
842 | ||
843 | mostlyclean: mostlyclean-recursive | |
844 | ||
845 | mostlyclean-am: mostlyclean-generic mostlyclean-libtool | |
846 | ||
847 | pdf: pdf-recursive | |
848 | ||
849 | pdf-am: | |
850 | ||
851 | ps: ps-recursive | |
852 | ||
853 | ps-am: | |
854 | ||
855 | uninstall-am: uninstall-pkgconfigDATA | |
856 | ||
857 | .MAKE: $(am__recursive_targets) all install-am install-strip | |
858 | ||
859 | .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ | |
860 | am--refresh check check-am clean clean-cscope clean-generic \ | |
861 | clean-libtool cscope cscopelist-am ctags ctags-am dist \ | |
862 | dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \ | |
863 | dist-xz dist-zip distcheck distclean distclean-generic \ | |
864 | distclean-hdr distclean-libtool distclean-tags distcleancheck \ | |
865 | distdir distuninstallcheck dvi dvi-am html html-am info \ | |
866 | info-am install install-am install-data install-data-am \ | |
867 | install-dvi install-dvi-am install-exec install-exec-am \ | |
868 | install-html install-html-am install-info install-info-am \ | |
869 | install-man install-pdf install-pdf-am install-pkgconfigDATA \ | |
870 | install-ps install-ps-am install-strip installcheck \ | |
871 | installcheck-am installdirs installdirs-am maintainer-clean \ | |
872 | maintainer-clean-generic mostlyclean mostlyclean-generic \ | |
873 | mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ | |
874 | uninstall-am uninstall-pkgconfigDATA | |
875 | ||
876 | ||
877 | # Tell versions [3.59,3.63) of GNU make to not export all variables. | |
878 | # Otherwise a system limit (for SysV at least) may be exceeded. | |
879 | .NOEXPORT: |
0 | 2014-06-30 | |
1 | Version 2.5.4 | |
2 | Updated Unicode character names and normalization data to 7.0.0 | |
3 | Fixed data normalization bug | |
4 | Updated tests | |
5 | Improved Unicode version updating | |
6 | Fixed compiler warnings | |
7 | ||
8 | 2011-01-13 | |
9 | Updated Unicode character names and normalization data to 6.0.0 | |
10 | Updated copyright dates and contact details | |
11 | ||
12 | 2009-01-30 | |
13 | Fixed returning zero-length strings in Perl | |
14 | ||
15 | 2008-04-07 | |
16 | updated Unicode character names and normalization data to 5.1 | |
17 | now building Windows release with mingw32-gcc instead of CodeWarrior | |
18 | minor compiler bugfixes and code cleanup for portability | |
19 | ||
20 | 2006-03-16 | |
21 | updated Unicode character names and normalization data to 5.0 | |
22 | added license files and docs to subversion repository | |
23 | released new Windows binary package, supporting -x option in teckit_compile |
0 | README | |
1 | ||
2 | This is TECkit, a library for encoding conversion, usable through standalone | |
3 | tools or by linking with other software packages. | |
4 | ||
5 | ||
6 | See the docs folder for TECkit mapping language and conversion tool usage notes. | |
7 | ||
8 | ||
9 | The teckit_compile tool now supports a new option, not described in the PDF | |
10 | documentation: | |
11 | ||
12 | -x generate XML representation rather than compiled table | |
13 | ||
14 | This is primarily intended for use by the Reprise utility, and the XML format | |
15 | produced is subject to change according to the needs of that tool. |
0 | /* | |
1 | * Copyright 2001-2004 Unicode, Inc. | |
2 | * | |
3 | * Disclaimer | |
4 | * | |
5 | * This source code is provided as is by Unicode, Inc. No claims are | |
6 | * made as to fitness for any particular purpose. No warranties of any | |
7 | * kind are expressed or implied. The recipient agrees to determine | |
8 | * applicability of information provided. If this file has been | |
9 | * purchased on magnetic or optical media from Unicode, Inc., the | |
10 | * sole remedy for any claim will be exchange of defective media | |
11 | * within 90 days of receipt. | |
12 | * | |
13 | * Limitations on Rights to Redistribute This Code | |
14 | * | |
15 | * Unicode, Inc. hereby grants the right to freely use the information | |
16 | * supplied in this file in the creation of products supporting the | |
17 | * Unicode Standard, and to make copies of this file in any form | |
18 | * for internal or external distribution as long as this notice | |
19 | * remains attached. | |
20 | */ | |
21 | ||
22 | /* --------------------------------------------------------------------- | |
23 | ||
24 | Conversions between UTF32, UTF-16, and UTF-8. Source code file. | |
25 | Author: Mark E. Davis, 1994. | |
26 | Rev History: Rick McGowan, fixes & updates May 2001. | |
27 | Sept 2001: fixed const & error conditions per | |
28 | mods suggested by S. Parent & A. Lillich. | |
29 | June 2002: Tim Dodd added detection and handling of incomplete | |
30 | source sequences, enhanced error detection, added casts | |
31 | to eliminate compiler warnings. | |
32 | July 2003: slight mods to back out aggressive FFFE detection. | |
33 | Jan 2004: updated switches in from-UTF8 conversions. | |
34 | Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. | |
35 | ||
36 | See the header file "ConvertUTF.h" for complete documentation. | |
37 | ||
38 | ------------------------------------------------------------------------ */ | |
39 | ||
40 | ||
41 | #include "ConvertUTF.h" | |
42 | #ifdef CVTUTF_DEBUG | |
43 | #include <stdio.h> | |
44 | #endif | |
45 | ||
46 | static const int halfShift = 10; /* used for shifting by 10 bits */ | |
47 | ||
48 | static const UTF32 halfBase = 0x0010000UL; | |
49 | static const UTF32 halfMask = 0x3FFUL; | |
50 | ||
51 | #define UNI_SUR_HIGH_START (UTF32)0xD800 | |
52 | #define UNI_SUR_HIGH_END (UTF32)0xDBFF | |
53 | #define UNI_SUR_LOW_START (UTF32)0xDC00 | |
54 | #define UNI_SUR_LOW_END (UTF32)0xDFFF | |
55 | #define false 0 | |
56 | #define true 1 | |
57 | ||
58 | /* --------------------------------------------------------------------- */ | |
59 | ||
60 | ConversionResult ConvertUTF32toUTF16 ( | |
61 | const UTF32** sourceStart, const UTF32* sourceEnd, | |
62 | UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { | |
63 | ConversionResult result = conversionOK; | |
64 | const UTF32* source = *sourceStart; | |
65 | UTF16* target = *targetStart; | |
66 | while (source < sourceEnd) { | |
67 | UTF32 ch; | |
68 | if (target >= targetEnd) { | |
69 | result = targetExhausted; break; | |
70 | } | |
71 | ch = *source++; | |
72 | if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ | |
73 | /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ | |
74 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { | |
75 | if (flags == strictConversion) { | |
76 | --source; /* return to the illegal value itself */ | |
77 | result = sourceIllegal; | |
78 | break; | |
79 | } else { | |
80 | *target++ = UNI_REPLACEMENT_CHAR; | |
81 | } | |
82 | } else { | |
83 | *target++ = (UTF16)ch; /* normal case */ | |
84 | } | |
85 | } else if (ch > UNI_MAX_LEGAL_UTF32) { | |
86 | if (flags == strictConversion) { | |
87 | result = sourceIllegal; | |
88 | } else { | |
89 | *target++ = UNI_REPLACEMENT_CHAR; | |
90 | } | |
91 | } else { | |
92 | /* target is a character in range 0xFFFF - 0x10FFFF. */ | |
93 | if (target + 1 >= targetEnd) { | |
94 | --source; /* Back up source pointer! */ | |
95 | result = targetExhausted; break; | |
96 | } | |
97 | ch -= halfBase; | |
98 | *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); | |
99 | *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); | |
100 | } | |
101 | } | |
102 | *sourceStart = source; | |
103 | *targetStart = target; | |
104 | return result; | |
105 | } | |
106 | ||
107 | /* --------------------------------------------------------------------- */ | |
108 | ||
109 | ConversionResult ConvertUTF16toUTF32 ( | |
110 | const UTF16** sourceStart, const UTF16* sourceEnd, | |
111 | UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { | |
112 | ConversionResult result = conversionOK; | |
113 | const UTF16* source = *sourceStart; | |
114 | UTF32* target = *targetStart; | |
115 | UTF32 ch, ch2; | |
116 | while (source < sourceEnd) { | |
117 | const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ | |
118 | ch = *source++; | |
119 | /* If we have a surrogate pair, convert to UTF32 first. */ | |
120 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { | |
121 | /* If the 16 bits following the high surrogate are in the source buffer... */ | |
122 | if (source < sourceEnd) { | |
123 | ch2 = *source; | |
124 | /* If it's a low surrogate, convert to UTF32. */ | |
125 | if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { | |
126 | ch = ((ch - UNI_SUR_HIGH_START) << halfShift) | |
127 | + (ch2 - UNI_SUR_LOW_START) + halfBase; | |
128 | ++source; | |
129 | } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ | |
130 | --source; /* return to the illegal value itself */ | |
131 | result = sourceIllegal; | |
132 | break; | |
133 | } | |
134 | } else { /* We don't have the 16 bits following the high surrogate. */ | |
135 | --source; /* return to the high surrogate */ | |
136 | result = sourceExhausted; | |
137 | break; | |
138 | } | |
139 | } else if (flags == strictConversion) { | |
140 | /* UTF-16 surrogate values are illegal in UTF-32 */ | |
141 | if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { | |
142 | --source; /* return to the illegal value itself */ | |
143 | result = sourceIllegal; | |
144 | break; | |
145 | } | |
146 | } | |
147 | if (target >= targetEnd) { | |
148 | source = oldSource; /* Back up source pointer! */ | |
149 | result = targetExhausted; break; | |
150 | } | |
151 | *target++ = ch; | |
152 | } | |
153 | *sourceStart = source; | |
154 | *targetStart = target; | |
155 | #ifdef CVTUTF_DEBUG | |
156 | if (result == sourceIllegal) { | |
157 | fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); | |
158 | fflush(stderr); | |
159 | } | |
160 | #endif | |
161 | return result; | |
162 | } | |
163 | ||
164 | /* --------------------------------------------------------------------- */ | |
165 | ||
166 | /* | |
167 | * Index into the table below with the first byte of a UTF-8 sequence to | |
168 | * get the number of trailing bytes that are supposed to follow it. | |
169 | * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is | |
170 | * left as-is for anyone who may want to do such conversion, which was | |
171 | * allowed in earlier algorithms. | |
172 | */ | |
173 | static const char trailingBytesForUTF8[256] = { | |
174 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
175 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
176 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
177 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
178 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
179 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
180 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
181 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 | |
182 | }; | |
183 | ||
184 | /* | |
185 | * Magic values subtracted from a buffer value during UTF8 conversion. | |
186 | * This table contains as many values as there might be trailing bytes | |
187 | * in a UTF-8 sequence. | |
188 | */ | |
189 | static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, | |
190 | 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; | |
191 | ||
192 | /* | |
193 | * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed | |
194 | * into the first byte, depending on how many bytes follow. There are | |
195 | * as many entries in this table as there are UTF-8 sequence types. | |
196 | * (I.e., one byte sequence, two byte... etc.). Remember that sequencs | |
197 | * for *legal* UTF-8 will be 4 or fewer bytes total. | |
198 | */ | |
199 | static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | |
200 | ||
201 | /* --------------------------------------------------------------------- */ | |
202 | ||
203 | /* The interface converts a whole buffer to avoid function-call overhead. | |
204 | * Constants have been gathered. Loops & conditionals have been removed as | |
205 | * much as possible for efficiency, in favor of drop-through switches. | |
206 | * (See "Note A" at the bottom of the file for equivalent code.) | |
207 | * If your compiler supports it, the "isLegalUTF8" call can be turned | |
208 | * into an inline function. | |
209 | */ | |
210 | ||
211 | /* --------------------------------------------------------------------- */ | |
212 | ||
213 | ConversionResult ConvertUTF16toUTF8 ( | |
214 | const UTF16** sourceStart, const UTF16* sourceEnd, | |
215 | UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { | |
216 | ConversionResult result = conversionOK; | |
217 | const UTF16* source = *sourceStart; | |
218 | UTF8* target = *targetStart; | |
219 | while (source < sourceEnd) { | |
220 | UTF32 ch; | |
221 | unsigned short bytesToWrite = 0; | |
222 | const UTF32 byteMask = 0xBF; | |
223 | const UTF32 byteMark = 0x80; | |
224 | const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ | |
225 | ch = *source++; | |
226 | /* If we have a surrogate pair, convert to UTF32 first. */ | |
227 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { | |
228 | /* If the 16 bits following the high surrogate are in the source buffer... */ | |
229 | if (source < sourceEnd) { | |
230 | UTF32 ch2 = *source; | |
231 | /* If it's a low surrogate, convert to UTF32. */ | |
232 | if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { | |
233 | ch = ((ch - UNI_SUR_HIGH_START) << halfShift) | |
234 | + (ch2 - UNI_SUR_LOW_START) + halfBase; | |
235 | ++source; | |
236 | } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ | |
237 | --source; /* return to the illegal value itself */ | |
238 | result = sourceIllegal; | |
239 | break; | |
240 | } | |
241 | } else { /* We don't have the 16 bits following the high surrogate. */ | |
242 | --source; /* return to the high surrogate */ | |
243 | result = sourceExhausted; | |
244 | break; | |
245 | } | |
246 | } else if (flags == strictConversion) { | |
247 | /* UTF-16 surrogate values are illegal in UTF-32 */ | |
248 | if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { | |
249 | --source; /* return to the illegal value itself */ | |
250 | result = sourceIllegal; | |
251 | break; | |
252 | } | |
253 | } | |
254 | /* Figure out how many bytes the result will require */ | |
255 | if (ch < (UTF32)0x80) { bytesToWrite = 1; | |
256 | } else if (ch < (UTF32)0x800) { bytesToWrite = 2; | |
257 | } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; | |
258 | } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; | |
259 | } else { bytesToWrite = 3; | |
260 | ch = UNI_REPLACEMENT_CHAR; | |
261 | } | |
262 | ||
263 | target += bytesToWrite; | |
264 | if (target > targetEnd) { | |
265 | source = oldSource; /* Back up source pointer! */ | |
266 | target -= bytesToWrite; result = targetExhausted; break; | |
267 | } | |
268 | switch (bytesToWrite) { /* note: everything falls through. */ | |
269 | case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; | |
270 | case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; | |
271 | case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; | |
272 | case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); | |
273 | } | |
274 | target += bytesToWrite; | |
275 | } | |
276 | *sourceStart = source; | |
277 | *targetStart = target; | |
278 | return result; | |
279 | } | |
280 | ||
281 | /* --------------------------------------------------------------------- */ | |
282 | ||
283 | /* | |
284 | * Utility routine to tell whether a sequence of bytes is legal UTF-8. | |
285 | * This must be called with the length pre-determined by the first byte. | |
286 | * If not calling this from ConvertUTF8to*, then the length can be set by: | |
287 | * length = trailingBytesForUTF8[*source]+1; | |
288 | * and the sequence is illegal right away if there aren't that many bytes | |
289 | * available. | |
290 | * If presented with a length > 4, this returns false. The Unicode | |
291 | * definition of UTF-8 goes up to 4-byte sequences. | |
292 | */ | |
293 | ||
294 | static Boolean isLegalUTF8(const UTF8 *source, int length) { | |
295 | UTF8 a; | |
296 | const UTF8 *srcptr = source+length; | |
297 | switch (length) { | |
298 | default: return false; | |
299 | /* Everything else falls through when "true"... */ | |
300 | case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; | |
301 | case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; | |
302 | case 2: if ((a = (*--srcptr)) > 0xBF) return false; | |
303 | ||
304 | switch (*source) { | |
305 | /* no fall-through in this inner switch */ | |
306 | case 0xE0: if (a < 0xA0) return false; break; | |
307 | case 0xED: if (a > 0x9F) return false; break; | |
308 | case 0xF0: if (a < 0x90) return false; break; | |
309 | case 0xF4: if (a > 0x8F) return false; break; | |
310 | default: if (a < 0x80) return false; | |
311 | } | |
312 | ||
313 | case 1: if (*source >= 0x80 && *source < 0xC2) return false; | |
314 | } | |
315 | if (*source > 0xF4) return false; | |
316 | return true; | |
317 | } | |
318 | ||
319 | /* --------------------------------------------------------------------- */ | |
320 | ||
321 | /* | |
322 | * Exported function to return whether a UTF-8 sequence is legal or not. | |
323 | * This is not used here; it's just exported. | |
324 | */ | |
325 | Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { | |
326 | int length = trailingBytesForUTF8[*source]+1; | |
327 | if (source+length > sourceEnd) { | |
328 | return false; | |
329 | } | |
330 | return isLegalUTF8(source, length); | |
331 | } | |
332 | ||
333 | /* --------------------------------------------------------------------- */ | |
334 | ||
335 | ConversionResult ConvertUTF8toUTF16 ( | |
336 | const UTF8** sourceStart, const UTF8* sourceEnd, | |
337 | UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { | |
338 | ConversionResult result = conversionOK; | |
339 | const UTF8* source = *sourceStart; | |
340 | UTF16* target = *targetStart; | |
341 | while (source < sourceEnd) { | |
342 | UTF32 ch = 0; | |
343 | unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; | |
344 | if (source + extraBytesToRead >= sourceEnd) { | |
345 | result = sourceExhausted; break; | |
346 | } | |
347 | /* Do this check whether lenient or strict */ | |
348 | if (! isLegalUTF8(source, extraBytesToRead+1)) { | |
349 | result = sourceIllegal; | |
350 | break; | |
351 | } | |
352 | /* | |
353 | * The cases all fall through. See "Note A" below. | |
354 | */ | |
355 | switch (extraBytesToRead) { | |
356 | case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ | |
357 | case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ | |
358 | case 3: ch += *source++; ch <<= 6; | |
359 | case 2: ch += *source++; ch <<= 6; | |
360 | case 1: ch += *source++; ch <<= 6; | |
361 | case 0: ch += *source++; | |
362 | } | |
363 | ch -= offsetsFromUTF8[extraBytesToRead]; | |
364 | ||
365 | if (target >= targetEnd) { | |
366 | source -= (extraBytesToRead+1); /* Back up source pointer! */ | |
367 | result = targetExhausted; break; | |
368 | } | |
369 | if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ | |
370 | /* UTF-16 surrogate values are illegal in UTF-32 */ | |
371 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { | |
372 | if (flags == strictConversion) { | |
373 | source -= (extraBytesToRead+1); /* return to the illegal value itself */ | |
374 | result = sourceIllegal; | |
375 | break; | |
376 | } else { | |
377 | *target++ = UNI_REPLACEMENT_CHAR; | |
378 | } | |
379 | } else { | |
380 | *target++ = (UTF16)ch; /* normal case */ | |
381 | } | |
382 | } else if (ch > UNI_MAX_UTF16) { | |
383 | if (flags == strictConversion) { | |
384 | result = sourceIllegal; | |
385 | source -= (extraBytesToRead+1); /* return to the start */ | |
386 | break; /* Bail out; shouldn't continue */ | |
387 | } else { | |
388 | *target++ = UNI_REPLACEMENT_CHAR; | |
389 | } | |
390 | } else { | |
391 | /* target is a character in range 0xFFFF - 0x10FFFF. */ | |
392 | if (target + 1 >= targetEnd) { | |
393 | source -= (extraBytesToRead+1); /* Back up source pointer! */ | |
394 | result = targetExhausted; break; | |
395 | } | |
396 | ch -= halfBase; | |
397 | *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); | |
398 | *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); | |
399 | } | |
400 | } | |
401 | *sourceStart = source; | |
402 | *targetStart = target; | |
403 | return result; | |
404 | } | |
405 | ||
406 | /* --------------------------------------------------------------------- */ | |
407 | ||
408 | ConversionResult ConvertUTF32toUTF8 ( | |
409 | const UTF32** sourceStart, const UTF32* sourceEnd, | |
410 | UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { | |
411 | ConversionResult result = conversionOK; | |
412 | const UTF32* source = *sourceStart; | |
413 | UTF8* target = *targetStart; | |
414 | while (source < sourceEnd) { | |
415 | UTF32 ch; | |
416 | unsigned short bytesToWrite = 0; | |
417 | const UTF32 byteMask = 0xBF; | |
418 | const UTF32 byteMark = 0x80; | |
419 | ch = *source++; | |
420 | if (flags == strictConversion ) { | |
421 | /* UTF-16 surrogate values are illegal in UTF-32 */ | |
422 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { | |
423 | --source; /* return to the illegal value itself */ | |
424 | result = sourceIllegal; | |
425 | break; | |
426 | } | |
427 | } | |
428 | /* | |
429 | * Figure out how many bytes the result will require. Turn any | |
430 | * illegally large UTF32 things (> Plane 17) into replacement chars. | |
431 | */ | |
432 | if (ch < (UTF32)0x80) { bytesToWrite = 1; | |
433 | } else if (ch < (UTF32)0x800) { bytesToWrite = 2; | |
434 | } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; | |
435 | } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; | |
436 | } else { bytesToWrite = 3; | |
437 | ch = UNI_REPLACEMENT_CHAR; | |
438 | result = sourceIllegal; | |
439 | } | |
440 | ||
441 | target += bytesToWrite; | |
442 | if (target > targetEnd) { | |
443 | --source; /* Back up source pointer! */ | |
444 | target -= bytesToWrite; result = targetExhausted; break; | |
445 | } | |
446 | switch (bytesToWrite) { /* note: everything falls through. */ | |
447 | case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; | |
448 | case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; | |
449 | case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; | |
450 | case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); | |
451 | } | |
452 | target += bytesToWrite; | |
453 | } | |
454 | *sourceStart = source; | |
455 | *targetStart = target; | |
456 | return result; | |
457 | } | |
458 | ||
459 | /* --------------------------------------------------------------------- */ | |
460 | ||
461 | ConversionResult ConvertUTF8toUTF32 ( | |
462 | const UTF8** sourceStart, const UTF8* sourceEnd, | |
463 | UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { | |
464 | ConversionResult result = conversionOK; | |
465 | const UTF8* source = *sourceStart; | |
466 | UTF32* target = *targetStart; | |
467 | while (source < sourceEnd) { | |
468 | UTF32 ch = 0; | |
469 | unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; | |
470 | if (source + extraBytesToRead >= sourceEnd) { | |
471 | result = sourceExhausted; break; | |
472 | } | |
473 | /* Do this check whether lenient or strict */ | |
474 | if (! isLegalUTF8(source, extraBytesToRead+1)) { | |
475 | result = sourceIllegal; | |
476 | break; | |
477 | } | |
478 | /* | |
479 | * The cases all fall through. See "Note A" below. | |
480 | */ | |
481 | switch (extraBytesToRead) { | |
482 | case 5: ch += *source++; ch <<= 6; | |
483 | case 4: ch += *source++; ch <<= 6; | |
484 | case 3: ch += *source++; ch <<= 6; | |
485 | case 2: ch += *source++; ch <<= 6; | |
486 | case 1: ch += *source++; ch <<= 6; | |
487 | case 0: ch += *source++; | |
488 | } | |
489 | ch -= offsetsFromUTF8[extraBytesToRead]; | |
490 | ||
491 | if (target >= targetEnd) { | |
492 | source -= (extraBytesToRead+1); /* Back up the source pointer! */ | |
493 | result = targetExhausted; break; | |
494 | } | |
495 | if (ch <= UNI_MAX_LEGAL_UTF32) { | |
496 | /* | |
497 | * UTF-16 surrogate values are illegal in UTF-32, and anything | |
498 | * over Plane 17 (> 0x10FFFF) is illegal. | |
499 | */ | |
500 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { | |
501 | if (flags == strictConversion) { | |
502 | source -= (extraBytesToRead+1); /* return to the illegal value itself */ | |
503 | result = sourceIllegal; | |
504 | break; | |
505 | } else { | |
506 | *target++ = UNI_REPLACEMENT_CHAR; | |
507 | } | |
508 | } else { | |
509 | *target++ = ch; | |
510 | } | |
511 | } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ | |
512 | result = sourceIllegal; | |
513 | *target++ = UNI_REPLACEMENT_CHAR; | |
514 | } | |
515 | } | |
516 | *sourceStart = source; | |
517 | *targetStart = target; | |
518 | return result; | |
519 | } | |
520 | ||
521 | /* --------------------------------------------------------------------- | |
522 | ||
523 | Note A. | |
524 | The fall-through switches in UTF-8 reading code save a | |
525 | temp variable, some decrements & conditionals. The switches | |
526 | are equivalent to the following loop: | |
527 | { | |
528 | int tmpBytesToRead = extraBytesToRead+1; | |
529 | do { | |
530 | ch += *source++; | |
531 | --tmpBytesToRead; | |
532 | if (tmpBytesToRead) ch <<= 6; | |
533 | } while (tmpBytesToRead > 0); | |
534 | } | |
535 | In UTF-8 writing code, the switches on "bytesToWrite" are | |
536 | similarly unrolled loops. | |
537 | ||
538 | --------------------------------------------------------------------- */ |
0 | /* | |
1 | * Copyright 2001-2004 Unicode, Inc. | |
2 | * | |
3 | * Disclaimer | |
4 | * | |
5 | * This source code is provided as is by Unicode, Inc. No claims are | |
6 | * made as to fitness for any particular purpose. No warranties of any | |
7 | * kind are expressed or implied. The recipient agrees to determine | |
8 | * applicability of information provided. If this file has been | |
9 | * purchased on magnetic or optical media from Unicode, Inc., the | |
10 | * sole remedy for any claim will be exchange of defective media | |
11 | * within 90 days of receipt. | |
12 | * | |
13 | * Limitations on Rights to Redistribute This Code | |
14 | * | |
15 | * Unicode, Inc. hereby grants the right to freely use the information | |
16 | * supplied in this file in the creation of products supporting the | |
17 | * Unicode Standard, and to make copies of this file in any form | |
18 | * for internal or external distribution as long as this notice | |
19 | * remains attached. | |
20 | */ | |
21 | ||
22 | /* --------------------------------------------------------------------- | |
23 | ||
24 | Conversions between UTF32, UTF-16, and UTF-8. Header file. | |
25 | ||
26 | Several funtions are included here, forming a complete set of | |
27 | conversions between the three formats. UTF-7 is not included | |
28 | here, but is handled in a separate source file. | |
29 | ||
30 | Each of these routines takes pointers to input buffers and output | |
31 | buffers. The input buffers are const. | |
32 | ||
33 | Each routine converts the text between *sourceStart and sourceEnd, | |
34 | putting the result into the buffer between *targetStart and | |
35 | targetEnd. Note: the end pointers are *after* the last item: e.g. | |
36 | *(sourceEnd - 1) is the last item. | |
37 | ||
38 | The return result indicates whether the conversion was successful, | |
39 | and if not, whether the problem was in the source or target buffers. | |
40 | (Only the first encountered problem is indicated.) | |
41 | ||
42 | After the conversion, *sourceStart and *targetStart are both | |
43 | updated to point to the end of last text successfully converted in | |
44 | the respective buffers. | |
45 | ||
46 | Input parameters: | |
47 | sourceStart - pointer to a pointer to the source buffer. | |
48 | The contents of this are modified on return so that | |
49 | it points at the next thing to be converted. | |
50 | targetStart - similarly, pointer to pointer to the target buffer. | |
51 | sourceEnd, targetEnd - respectively pointers to the ends of the | |
52 | two buffers, for overflow checking only. | |
53 | ||
54 | These conversion functions take a ConversionFlags argument. When this | |
55 | flag is set to strict, both irregular sequences and isolated surrogates | |
56 | will cause an error. When the flag is set to lenient, both irregular | |
57 | sequences and isolated surrogates are converted. | |
58 | ||
59 | Whether the flag is strict or lenient, all illegal sequences will cause | |
60 | an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, | |
61 | or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code | |
62 | must check for illegal sequences. | |
63 | ||
64 | When the flag is set to lenient, characters over 0x10FFFF are converted | |
65 | to the replacement character; otherwise (when the flag is set to strict) | |
66 | they constitute an error. | |
67 | ||
68 | Output parameters: | |
69 | The value "sourceIllegal" is returned from some routines if the input | |
70 | sequence is malformed. When "sourceIllegal" is returned, the source | |
71 | value will point to the illegal value that caused the problem. E.g., | |
72 | in UTF-8 when a sequence is malformed, it points to the start of the | |
73 | malformed sequence. | |
74 | ||
75 | Author: Mark E. Davis, 1994. | |
76 | Rev History: Rick McGowan, fixes & updates May 2001. | |
77 | Fixes & updates, Sept 2001. | |
78 | ||
79 | ------------------------------------------------------------------------ */ | |
80 | ||
81 | /* --------------------------------------------------------------------- | |
82 | The following 4 definitions are compiler-specific. | |
83 | The C standard does not guarantee that wchar_t has at least | |
84 | 16 bits, so wchar_t is no less portable than unsigned short! | |
85 | All should be unsigned values to avoid sign extension during | |
86 | bit mask & shift operations. | |
87 | ------------------------------------------------------------------------ */ | |
88 | ||
89 | typedef unsigned long UTF32; /* at least 32 bits */ | |
90 | typedef unsigned short UTF16; /* at least 16 bits */ | |
91 | typedef unsigned char UTF8; /* typically 8 bits */ | |
92 | typedef unsigned char Boolean; /* 0 or 1 */ | |
93 | ||
94 | /* Some fundamental constants */ | |
95 | #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD | |
96 | #define UNI_MAX_BMP (UTF32)0x0000FFFF | |
97 | #define UNI_MAX_UTF16 (UTF32)0x0010FFFF | |
98 | #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF | |
99 | #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF | |
100 | ||
101 | typedef enum { | |
102 | conversionOK, /* conversion successful */ | |
103 | sourceExhausted, /* partial character in source, but hit end */ | |
104 | targetExhausted, /* insuff. room in target for conversion */ | |
105 | sourceIllegal /* source sequence is illegal/malformed */ | |
106 | } ConversionResult; | |
107 | ||
108 | typedef enum { | |
109 | strictConversion = 0, | |
110 | lenientConversion | |
111 | } ConversionFlags; | |
112 | ||
113 | /* This is for C++ and does no harm in C */ | |
114 | #ifdef __cplusplus | |
115 | extern "C" { | |
116 | #endif | |
117 | ||
118 | ConversionResult ConvertUTF8toUTF16 ( | |
119 | const UTF8** sourceStart, const UTF8* sourceEnd, | |
120 | UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); | |
121 | ||
122 | ConversionResult ConvertUTF16toUTF8 ( | |
123 | const UTF16** sourceStart, const UTF16* sourceEnd, | |
124 | UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); | |
125 | ||
126 | ConversionResult ConvertUTF8toUTF32 ( | |
127 | const UTF8** sourceStart, const UTF8* sourceEnd, | |
128 | UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); | |
129 | ||
130 | ConversionResult ConvertUTF32toUTF8 ( | |
131 | const UTF32** sourceStart, const UTF32* sourceEnd, | |
132 | UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); | |
133 | ||
134 | ConversionResult ConvertUTF16toUTF32 ( | |
135 | const UTF16** sourceStart, const UTF16* sourceEnd, | |
136 | UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); | |
137 | ||
138 | ConversionResult ConvertUTF32toUTF16 ( | |
139 | const UTF32** sourceStart, const UTF32* sourceEnd, | |
140 | UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); | |
141 | ||
142 | Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); | |
143 | ||
144 | #ifdef __cplusplus | |
145 | } | |
146 | #endif | |
147 | ||
148 | /* --------------------------------------------------------------------- */ |
0 | #include <cstring> | |
1 | #include <cstdio> | |
2 | #include <cstdlib> | |
3 | ||
4 | #include <string> | |
5 | #include <fstream> | |
6 | #include <map> | |
7 | #include <vector> | |
8 | ||
9 | using namespace std; | |
10 | ||
11 | #include "TECkit_Engine.h" | |
12 | #include "ConvertUTF.h" | |
13 | #include "sfReader.h" | |
14 | ||
15 | #ifndef platformUTF16 | |
16 | #ifdef __MWERKS__ | |
17 | #if __dest_os==__win32_os | |
18 | #define platformUTF16 kForm_UTF16LE | |
19 | #else | |
20 | #define platformUTF16 kForm_UTF16BE | |
21 | #endif | |
22 | #endif | |
23 | #endif | |
24 | ||
25 | #ifndef platformUTF16 | |
26 | #ifdef __APPLE__ | |
27 | #include <TargetConditionals.h> | |
28 | #if TARGET_RT_BIG_ENDIAN | |
29 | #define platformUTF16 kForm_UTF16BE | |
30 | #else | |
31 | #define platformUTF16 kForm_UTF16LE | |
32 | #endif | |
33 | #endif | |
34 | #endif | |
35 | ||
36 | #ifndef platformUTF16 | |
37 | #include "config.h" | |
38 | #if WORDS_BIGENDIAN | |
39 | #define platformUTF16 kForm_UTF16BE | |
40 | #else | |
41 | #define platformUTF16 kForm_UTF16LE | |
42 | #endif | |
43 | #endif | |
44 | ||
45 | #if HAVE_LIBEXPAT | |
46 | #include <expat.h> | |
47 | #else | |
48 | #include "expat/xmlparse/xmlparse.h" | |
49 | #endif | |
50 | ||
51 | ||
52 | char* gMappingDirectory; | |
53 | ||
54 | typedef basic_string<UniChar> ustring; | |
55 | ||
56 | map<string,string> sfmMappings; | |
57 | map<string,string> inlineMappings; | |
58 | ||
59 | map<string,string>* mappings; | |
60 | ||
61 | string defaultMapping; | |
62 | string sfmMapping; | |
63 | string inlineMapping; | |
64 | ||
65 | ustring sfmCharsU; | |
66 | ustring inlineCharsU; | |
67 | long escapeCharU = 0x5c; | |
68 | long inlineEscapeCharU = -1; | |
69 | long startInlineU = -1; | |
70 | long endInlineU = -1; | |
71 | ||
72 | const char* defaultMarkerChars = "abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; | |
73 | ||
74 | map<string,TECkit_Converter> converters; | |
75 | ||
76 | enum dir_enum { | |
77 | kDirection_Unspecified = 0, | |
78 | kDirection_8_U, | |
79 | kDirection_U_8 | |
80 | } direction = kDirection_Unspecified; | |
81 | ||
82 | int inForm = kForm_Unspecified; | |
83 | int outForm = kForm_Unspecified; | |
84 | ||
85 | static int sDepth; | |
86 | static int sError; | |
87 | ||
88 | static inline bool | |
89 | streq(const char* s, const char* t) | |
90 | { | |
91 | return (strcmp(s, t) == 0); | |
92 | } | |
93 | ||
94 | static ustring | |
95 | Utf8ToString(const char* s) | |
96 | { | |
97 | int len = strlen(s); | |
98 | UniChar* buf = new UniChar[len]; | |
99 | const Byte* sourceStart = (Byte*)s; | |
100 | UniChar* targetStart = buf; | |
101 | int status = ConvertUTF8toUTF16(&sourceStart, sourceStart + len, &targetStart, targetStart + len, lenientConversion); | |
102 | if (status != conversionOK) { | |
103 | fprintf(stderr, "error %d converting UTF-8 to UTF-16\n", status); | |
104 | exit(1); | |
105 | } | |
106 | ustring ustr(buf, targetStart - buf); | |
107 | delete[] buf; | |
108 | return ustr; | |
109 | } | |
110 | ||
111 | static void | |
112 | startElement(void* /*userData*/, const XML_Char *name, const XML_Char **atts) | |
113 | { | |
114 | switch (sDepth) { | |
115 | case 0: | |
116 | if (streq(name, "sfConversion")) { | |
117 | while (*atts) { | |
118 | const XML_Char* n = *atts++; | |
119 | const XML_Char* v = *atts++; | |
120 | if (streq(n, "defaultMapping")) { | |
121 | defaultMapping = v; | |
122 | } | |
123 | else { | |
124 | fprintf(stderr, "unrecognized attribute \"%s\" for <sfConversion>\n", n); | |
125 | sError = 1; | |
126 | break; | |
127 | } | |
128 | } | |
129 | if (defaultMapping.length() == 0) { | |
130 | fprintf(stderr, "<sfConversion> requires \"defaultMapping\" attribute\n"); | |
131 | sError = 1; | |
132 | break; | |
133 | } | |
134 | } | |
135 | else { | |
136 | fprintf(stderr, "expected <sfConversion>, not <%s>, as top-level element in control file\n", name); | |
137 | sError = 1; | |
138 | break; | |
139 | } | |
140 | break; | |
141 | ||
142 | case 1: | |
143 | if (streq(name, "sfMarkers")) { | |
144 | while (*atts) { | |
145 | const XML_Char* n = *atts++; | |
146 | const XML_Char* v = *atts++; | |
147 | if (streq(n, "escape")) { | |
148 | ustring u = Utf8ToString(v); | |
149 | if (u.length() != 1) { | |
150 | fprintf(stderr, "\"escape\" attribute of <sfMarkers> must be a single character\n"); | |
151 | sError = 1; | |
152 | break; | |
153 | } | |
154 | escapeCharU = u[0]; | |
155 | } | |
156 | else if (streq(n, "chars")) { | |
157 | sfmCharsU = Utf8ToString(v); | |
158 | } | |
159 | else if (streq(n, "mapping")) { | |
160 | sfmMapping = v; | |
161 | } | |
162 | else { | |
163 | fprintf(stderr, "unrecognized attribute \"%s\" for <sfMarkers>\n", n); | |
164 | sError = 1; | |
165 | break; | |
166 | } | |
167 | } | |
168 | if (sfmMapping.length() == 0) | |
169 | sfmMapping = defaultMapping; | |
170 | if (sfmCharsU.length() == 0) { | |
171 | sfmCharsU.reserve(96); | |
172 | for (const char* cp = defaultMarkerChars; *cp; ++cp) | |
173 | sfmCharsU.append(1, *cp); | |
174 | } | |
175 | mappings = &sfmMappings; | |
176 | } | |
177 | else if (streq(name, "inlineMarkers")) { | |
178 | while (*atts) { | |
179 | const XML_Char* n = *atts++; | |
180 | const XML_Char* v = *atts++; | |
181 | if (streq(n, "escape")) { | |
182 | ustring u = Utf8ToString(v); | |
183 | if (u.length() != 1) { | |
184 | fprintf(stderr, "\"escape\" attribute of <inlineMarkers> must be a single character\n"); | |
185 | sError = 1; | |
186 | break; | |
187 | } | |
188 | inlineEscapeCharU = u[0]; | |
189 | } | |
190 | else if (streq(n, "start")) { | |
191 | ustring u = Utf8ToString(v); | |
192 | if (u.length() != 1) { | |
193 | fprintf(stderr, "\"start\" attribute of <inlineMarkers> must be a single character\n"); | |
194 | sError = 1; | |
195 | break; | |
196 | } | |
197 | startInlineU = u[0]; | |
198 | } | |
199 | else if (streq(n, "end")) { | |
200 | ustring u = Utf8ToString(v); | |
201 | if (u.length() != 1) { | |
202 | fprintf(stderr, "\"end\" attribute of <inlineMarkers> must be a single character\n"); | |
203 | sError = 1; | |
204 | break; | |
205 | } | |
206 | endInlineU = u[0]; | |
207 | } | |
208 | else if (streq(n, "chars")) { | |
209 | inlineCharsU = Utf8ToString(v); | |
210 | } | |
211 | else if (streq(n, "mapping")) { | |
212 | inlineMapping = v; | |
213 | } | |
214 | else { | |
215 | fprintf(stderr, "unrecognized attribute \"%s\" for <inlineMarkers>\n", n); | |
216 | sError = 1; | |
217 | break; | |
218 | } | |
219 | } | |
220 | if (inlineMapping.length() == 0) | |
221 | inlineMapping = defaultMapping; | |
222 | if (inlineCharsU.length() == 0) { | |
223 | inlineCharsU.reserve(96); | |
224 | for (const char* cp = defaultMarkerChars; *cp; ++cp) | |
225 | inlineCharsU.append(1, *cp); | |
226 | } | |
227 | mappings = &inlineMappings; | |
228 | } | |
229 | else { | |
230 | fprintf(stderr, "unrecognized element <%s> in <sfConversion>\n", name); | |
231 | sError = 1; | |
232 | break; | |
233 | } | |
234 | break; | |
235 | ||
236 | case 2: | |
237 | if (streq(name, "marker")) { | |
238 | string marker; | |
239 | string mapping; | |
240 | while (*atts) { | |
241 | const XML_Char* n = *atts++; | |
242 | const XML_Char* v = *atts++; | |
243 | if (streq(n, "name")) { | |
244 | marker = v; | |
245 | } | |
246 | else if (streq(n, "mapping")) { | |
247 | mapping = v; | |
248 | } | |
249 | else { | |
250 | fprintf(stderr, "unrecognized attribute \"%s\" for <marker>\n", n); | |
251 | sError = 1; | |
252 | break; | |
253 | } | |
254 | } | |
255 | if (marker.length() == 0 || mapping.length() == 0) { | |
256 | fprintf(stderr, "<marker> requires \"name\" and \"mapping\" attributes\n"); | |
257 | sError = 1; | |
258 | break; | |
259 | } | |
260 | if ((*mappings).find(marker) != (*mappings).end()) { | |
261 | fprintf(stderr, "duplicate entry for marker \"%s\"\n", marker.c_str()); | |
262 | sError = 1; | |
263 | break; | |
264 | } | |
265 | (*mappings)[marker] = mapping; | |
266 | } | |
267 | else { | |
268 | fprintf(stderr, "unrecognized element <%s>\n", name); | |
269 | sError = 1; | |
270 | break; | |
271 | } | |
272 | break; | |
273 | ||
274 | default: | |
275 | fprintf(stderr, "control file elements nested improperly at <%s>\n", name); | |
276 | sError = 1; | |
277 | break; | |
278 | } | |
279 | ||
280 | ++sDepth; | |
281 | } | |
282 | ||
283 | static void | |
284 | endElement(void* /*userData*/, const XML_Char */*name*/) | |
285 | { | |
286 | --sDepth; | |
287 | } | |
288 | ||
289 | bool bom = false; | |
290 | ||
291 | static TECkit_Converter | |
292 | makeConverter(const string& mappingName, int direction) | |
293 | { | |
294 | string mapFileName; | |
295 | if (gMappingDirectory) | |
296 | mapFileName += gMappingDirectory; | |
297 | mapFileName += mappingName; | |
298 | mapFileName += ".tec"; | |
299 | ||
300 | FILE* mapFile = fopen(mapFileName.c_str(), "rb"); | |
301 | if (mapFile == 0) { | |
302 | fprintf(stderr, "unable to read mapping file for %s (file %s)\n", mappingName.c_str(), mapFileName.c_str()); | |
303 | exit(1); | |
304 | } | |
305 | ||
306 | fseek(mapFile, 0, SEEK_END); | |
307 | long fileSize = ftell(mapFile); | |
308 | fseek(mapFile, 0, SEEK_SET); | |
309 | ||
310 | unsigned char* buf = (unsigned char*)malloc(fileSize); | |
311 | if (buf == 0) { | |
312 | fprintf(stderr, "unable to read mapping file for %s (file %s)\n", mappingName.c_str(), mapFileName.c_str()); | |
313 | exit(1); | |
314 | } | |
315 | fread(buf, 1, fileSize, mapFile); | |
316 | fclose(mapFile); | |
317 | ||
318 | TECkit_Converter converter; | |
319 | TECkit_Status status = TECkit_CreateConverter(buf, fileSize, | |
320 | (direction == kDirection_8_U), | |
321 | (direction == kDirection_8_U) ? kForm_Bytes : platformUTF16, | |
322 | (direction == kDirection_8_U) ? outForm : kForm_Bytes, | |
323 | &converter); | |
324 | if (status != kStatus_NoError) { | |
325 | fprintf(stderr, "failed to create converter for %s (file %s)\n", mappingName.c_str(), mapFileName.c_str()); | |
326 | exit(1); | |
327 | } | |
328 | ||
329 | free(buf); | |
330 | ||
331 | return converter; | |
332 | } | |
333 | ||
334 | static bool | |
335 | read_control_file(const char* controlFile) | |
336 | { | |
337 | FILE* ctlFile = fopen(controlFile, "r"); | |
338 | if (ctlFile == 0) { | |
339 | fprintf(stderr, "unable to open control file %s\n", controlFile); | |
340 | return false; | |
341 | } | |
342 | ||
343 | char buf[BUFSIZ]; | |
344 | XML_Parser parser = XML_ParserCreate(0); | |
345 | int done; | |
346 | ||
347 | XML_SetElementHandler(parser, startElement, endElement); | |
348 | ||
349 | int status = 0; | |
350 | do { | |
351 | size_t len = fread(buf, 1, sizeof(buf), ctlFile); | |
352 | done = len < sizeof(buf); | |
353 | if (!XML_Parse(parser, buf, len, done)) { | |
354 | fprintf(stderr, "XML parse error: %s at line %lu\n", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser)); | |
355 | status = 1; | |
356 | } | |
357 | } while (!status && !done); | |
358 | ||
359 | fclose(ctlFile); | |
360 | ||
361 | XML_ParserFree(parser); | |
362 | ||
363 | if (status != 0) | |
364 | return false; | |
365 | ||
366 | converters[defaultMapping] = makeConverter(defaultMapping, direction); | |
367 | ||
368 | if (converters.find(sfmMapping) == converters.end()) | |
369 | converters[sfmMapping] = makeConverter(sfmMapping, direction); | |
370 | if (inlineMapping.length() > 0 && converters.find(inlineMapping) == converters.end()) | |
371 | converters[sfmMapping] = makeConverter(inlineMapping, direction); | |
372 | ||
373 | for (map<string,string>::const_iterator i = sfmMappings.begin(); i != sfmMappings.end(); ++i) { | |
374 | if (converters.find(i->second) == converters.end()) | |
375 | converters[i->second] = makeConverter(i->second, direction); | |
376 | } | |
377 | for (map<string,string>::const_iterator i = inlineMappings.begin(); i != inlineMappings.end(); ++i) { | |
378 | if (converters.find(i->second) == converters.end()) | |
379 | converters[i->second] = makeConverter(i->second, direction); | |
380 | } | |
381 | return true; | |
382 | } | |
383 | ||
384 | static void | |
385 | write_converted(const Byte* data, long nBytes, TECkit_Converter converter, FILE* outFile) | |
386 | { | |
387 | static Byte* convBuffer = 0; | |
388 | static UInt32 bufferSize = 0; | |
389 | ||
390 | UInt32 reqSpace = nBytes * 4 + 256; // probably plenty of space | |
391 | UInt32 sourceUsed, destUsed; | |
392 | int status; | |
393 | ||
394 | // do the conversion | |
395 | ||
396 | while (1) { | |
397 | if (bufferSize < reqSpace) { | |
398 | if (convBuffer != 0) | |
399 | delete[] convBuffer; | |
400 | bufferSize = reqSpace; | |
401 | convBuffer = new Byte[bufferSize]; | |
402 | } | |
403 | status = TECkit_ConvertBuffer( | |
404 | converter, | |
405 | const_cast<Byte*>(data), | |
406 | nBytes, | |
407 | &sourceUsed, | |
408 | convBuffer, | |
409 | bufferSize, | |
410 | &destUsed, | |
411 | true); | |
412 | if (status == kStatus_OutputBufferFull) { | |
413 | reqSpace *= 2; // output didn't fit, enlarge buffer and try again | |
414 | continue; | |
415 | } | |
416 | if (status != kStatus_NoError) { | |
417 | fprintf(stderr, "error %d in TECkit_Convert\n", status); | |
418 | exit(1); | |
419 | } | |
420 | UInt32 destUsed2; | |
421 | status = TECkit_Flush( | |
422 | converter, | |
423 | convBuffer + destUsed, | |
424 | bufferSize - destUsed, | |
425 | &destUsed2); | |
426 | if (status == kStatus_OutputBufferFull) { | |
427 | reqSpace *= 2; | |
428 | continue; | |
429 | } | |
430 | TECkit_ResetConverter(converter); | |
431 | if (status != kStatus_NoError) { | |
432 | fprintf(stderr, "error %d in TECkit_Flush\n", status); | |
433 | exit(1); | |
434 | } | |
435 | nBytes = destUsed + destUsed2; | |
436 | break; | |
437 | } | |
438 | ||
439 | fwrite(convBuffer, 1, nBytes, outFile); | |
440 | } | |
441 | ||
442 | static void | |
443 | convertMarker(const ustring& marker, TECkit_Converter converter, string& cnvMarker) | |
444 | { | |
445 | int status; | |
446 | if (cnvMarker.size() < marker.length() * 4) | |
447 | cnvMarker.resize(marker.length() * 4 + 32); | |
448 | while (1) { | |
449 | UInt32 sourceUsed, destUsed, destUsed2; | |
450 | status = TECkit_ConvertBuffer(converter, | |
451 | (Byte*)marker.data(), | |
452 | marker.size() * 2, | |
453 | &sourceUsed, | |
454 | (Byte*)cnvMarker.data(), | |
455 | cnvMarker.size(), | |
456 | &destUsed, | |
457 | true); | |
458 | if (status == kStatus_OutputBufferFull) { | |
459 | cnvMarker.resize(cnvMarker.size() * 2); | |
460 | continue; | |
461 | } | |
462 | status = TECkit_Flush(converter, | |
463 | (Byte*)cnvMarker.data() + destUsed, | |
464 | cnvMarker.size() - destUsed, | |
465 | &destUsed2); | |
466 | if (status == kStatus_OutputBufferFull) { | |
467 | cnvMarker.resize(cnvMarker.size() * 2); | |
468 | continue; | |
469 | } | |
470 | cnvMarker.resize(destUsed + destUsed2); | |
471 | TECkit_ResetConverter(converter); | |
472 | if (status != kStatus_NoError) { | |
473 | fprintf(stderr, "error %d converting SFM from Unicode\n", status); | |
474 | exit(1); | |
475 | } | |
476 | break; | |
477 | } | |
478 | } | |
479 | ||
480 | static long | |
481 | convertSingleChar(UniChar inChar, TECkit_Converter converter) | |
482 | { | |
483 | int status; | |
484 | Byte buf[32]; | |
485 | UInt32 sourceUsed, destUsed, destUsed2; | |
486 | ||
487 | status = TECkit_ConvertBuffer(converter, | |
488 | (Byte*)&inChar, | |
489 | 2, | |
490 | &sourceUsed, | |
491 | &buf[0], | |
492 | sizeof(buf), | |
493 | &destUsed, | |
494 | true); | |
495 | if (status == kStatus_OutputBufferFull || destUsed > 1) { | |
496 | fprintf(stderr, "marker characters must map to single byte values"); | |
497 | exit(1); | |
498 | } | |
499 | status = TECkit_Flush(converter, | |
500 | &buf[0] + destUsed, | |
501 | sizeof(buf) - destUsed, | |
502 | &destUsed2); | |
503 | if (status == kStatus_OutputBufferFull || destUsed + destUsed2 != 1) { | |
504 | fprintf(stderr, "marker characters must map to single byte values"); | |
505 | exit(1); | |
506 | } | |
507 | TECkit_ResetConverter(converter); | |
508 | ||
509 | if (status != kStatus_NoError) { | |
510 | fprintf(stderr, "error %d converting marker characters from Unicode\n", status); | |
511 | exit(1); | |
512 | } | |
513 | ||
514 | return buf[0]; | |
515 | } | |
516 | ||
517 | static void | |
518 | process(const char* inputFile, const char* outputFile) | |
519 | { | |
520 | TECkit_Converter defaultConverter = converters[defaultMapping]; | |
521 | TECkit_Converter sfmConverter = converters[sfmMapping]; | |
522 | TECkit_Converter inlineConverter = converters[inlineMapping]; | |
523 | ||
524 | FILE* outFile = fopen(outputFile, "wb"); | |
525 | if (!outFile) { | |
526 | fprintf(stderr, "unable to open output file %s\n", outputFile); | |
527 | exit(1); | |
528 | } | |
529 | ||
530 | FILE* inFile = fopen(inputFile, "rb"); | |
531 | if (!inFile) { | |
532 | fprintf(stderr, "unable to open input file %s\n", inputFile); | |
533 | exit(1); | |
534 | } | |
535 | ||
536 | if (direction == kDirection_8_U) { | |
537 | // *** Byte to Unicode conversion | |
538 | if (outForm == kForm_Unspecified) | |
539 | outForm = kForm_UTF8; | |
540 | ||
541 | if (bom) { | |
542 | if (outForm == kForm_UTF8) { | |
543 | Byte bom[] = "\xEF\xBB\xBF"; | |
544 | fwrite(bom, 3, 1, outFile); | |
545 | } | |
546 | else if (outForm == kForm_UTF16BE) { | |
547 | Byte bom[] = "\xFE\xFF"; | |
548 | fwrite(&bom, 2, 1, outFile); | |
549 | } | |
550 | else { | |
551 | Byte bom[] = "\xFF\xFE"; | |
552 | fwrite(&bom, 2, 1, outFile); | |
553 | } | |
554 | } | |
555 | ||
556 | sfReader<char> reader(inFile); | |
557 | ||
558 | TECkit_Converter markerMapping = makeConverter(sfmMapping, kDirection_U_8); | |
559 | ||
560 | reader.escapeChar = convertSingleChar(escapeCharU, markerMapping); | |
561 | for (size_t i = 0; i < sfmCharsU.length(); ++i) | |
562 | reader.sfmChars.append(1, convertSingleChar(sfmCharsU[i], markerMapping)); | |
563 | ||
564 | if (inlineEscapeCharU != -1) { | |
565 | if (inlineMapping != sfmMapping) { | |
566 | TECkit_DisposeConverter(markerMapping); | |
567 | markerMapping = makeConverter(inlineMapping, kDirection_U_8); | |
568 | } | |
569 | reader.inlineEscapeChar = convertSingleChar(inlineEscapeCharU, markerMapping); | |
570 | reader.startInline = convertSingleChar(startInlineU, markerMapping); | |
571 | reader.endInline = convertSingleChar(endInlineU, markerMapping); | |
572 | for (size_t i = 0; i < inlineCharsU.length(); ++i) | |
573 | reader.inlineChars.append(1, convertSingleChar(inlineCharsU[i], markerMapping)); | |
574 | } | |
575 | ||
576 | TECkit_DisposeConverter(markerMapping); | |
577 | ||
578 | vector<TECkit_Converter> converterStack; | |
579 | converterStack.assign(1, defaultConverter); | |
580 | int dataType; | |
581 | while ((dataType = reader.next(converterStack.size() > 1)) != END_OF_FILE) { | |
582 | map<string,string>::const_iterator i; | |
583 | switch (dataType) { | |
584 | case BODY_TEXT: | |
585 | write_converted((Byte*)reader.text.data(), reader.text.length(), converterStack.back(), outFile); | |
586 | break; | |
587 | ||
588 | case SFM: | |
589 | i = sfmMappings.find(reader.text); | |
590 | if (i == sfmMappings.end()) | |
591 | converterStack.assign(1, defaultConverter); | |
592 | else | |
593 | converterStack.assign(1, converters[i->second]); | |
594 | reader.text.insert(reader.text.begin(), reader.escapeChar); | |
595 | write_converted((Byte*)reader.text.data(), reader.text.length(), sfmConverter, outFile); | |
596 | break; | |
597 | ||
598 | case INLINE_MARKER: | |
599 | i = inlineMappings.find(reader.text); | |
600 | if (i == inlineMappings.end()) | |
601 | converterStack.assign(1, converterStack.back()); | |
602 | else | |
603 | converterStack.assign(1, converters[i->second]); | |
604 | reader.text.insert(reader.text.begin(), reader.escapeChar); | |
605 | write_converted((Byte*)reader.text.data(), reader.text.length(), inlineConverter, outFile); | |
606 | break; | |
607 | ||
608 | case INLINE_START: | |
609 | i = inlineMappings.find(reader.text); | |
610 | if (i == inlineMappings.end()) | |
611 | converterStack.push_back(converterStack.back()); | |
612 | else | |
613 | converterStack.push_back(converters[i->second]); | |
614 | reader.text.insert(reader.text.begin(), reader.inlineEscapeChar); | |
615 | reader.text.insert(reader.text.end(), reader.startInline); | |
616 | write_converted((Byte*)reader.text.data(), reader.text.length(), inlineConverter, outFile); | |
617 | break; | |
618 | ||
619 | case INLINE_END: | |
620 | reader.text.insert(reader.text.end(), reader.endInline); | |
621 | write_converted((Byte*)reader.text.data(), reader.text.length(), inlineConverter, outFile); | |
622 | converterStack.pop_back(); | |
623 | break; | |
624 | } | |
625 | } | |
626 | } | |
627 | else { | |
628 | // *** Unicode to Byte conversion | |
629 | Byte bom[3]; | |
630 | long pos = ftell(inFile); | |
631 | if (fread(bom, 3, 1, inFile)) { | |
632 | if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) { | |
633 | inForm = kForm_UTF8; | |
634 | } | |
635 | else if (bom[0] == 0xfe && bom[1] == 0xff) { | |
636 | inForm = kForm_UTF16BE; | |
637 | fseek(inFile, pos + 2, SEEK_SET); | |
638 | } | |
639 | else if (bom[0] == 0xff && bom[1] == 0xfe) { | |
640 | inForm = kForm_UTF16LE; | |
641 | fseek(inFile, pos + 2, SEEK_SET); | |
642 | } | |
643 | else { | |
644 | if (inForm == kForm_Unspecified) { | |
645 | if (bom[0] == 0) | |
646 | inForm = kForm_UTF16BE; | |
647 | else if (bom[1] == 0) | |
648 | inForm = kForm_UTF16LE; | |
649 | else | |
650 | inForm = kForm_UTF8; | |
651 | } | |
652 | fseek(inFile, pos, SEEK_SET); | |
653 | } | |
654 | } | |
655 | else | |
656 | fseek(inFile, pos, SEEK_SET); | |
657 | ||
658 | sfReader<UniChar> reader(inFile, inForm); | |
659 | ||
660 | reader.escapeChar = escapeCharU; | |
661 | reader.sfmChars = sfmCharsU; | |
662 | ||
663 | if (inlineEscapeCharU != -1) { | |
664 | reader.inlineEscapeChar = inlineEscapeCharU; | |
665 | reader.startInline = startInlineU; | |
666 | reader.endInline = endInlineU; | |
667 | reader.inlineChars = inlineCharsU; | |
668 | } | |
669 | ||
670 | vector<TECkit_Converter> converterStack; | |
671 | converterStack.assign(1, defaultConverter); | |
672 | int dataType; | |
673 | while ((dataType = reader.next(converterStack.size() > 1)) != END_OF_FILE) { | |
674 | map<string,string>::const_iterator i; | |
675 | static string cnvMarker; | |
676 | switch (dataType) { | |
677 | case BODY_TEXT: | |
678 | write_converted((Byte*)reader.text.data(), reader.text.length() * 2, converterStack.back(), outFile); | |
679 | break; | |
680 | ||
681 | case SFM: | |
682 | convertMarker(reader.text, sfmConverter, cnvMarker); | |
683 | i = sfmMappings.find(cnvMarker); | |
684 | if (i == sfmMappings.end()) | |
685 | converterStack.assign(1, defaultConverter); | |
686 | else | |
687 | converterStack.assign(1, converters[i->second]); | |
688 | reader.text.insert(reader.text.begin(), reader.escapeChar); | |
689 | write_converted((Byte*)reader.text.data(), reader.text.length() * 2, sfmConverter, outFile); | |
690 | break; | |
691 | ||
692 | case INLINE_MARKER: | |
693 | convertMarker(reader.text, inlineConverter, cnvMarker); | |
694 | i = inlineMappings.find(cnvMarker); | |
695 | if (i == inlineMappings.end()) | |
696 | converterStack.assign(1, converterStack.back()); | |
697 | else | |
698 | converterStack.assign(1, converters[i->second]); | |
699 | reader.text.insert(reader.text.begin(), reader.inlineEscapeChar); | |
700 | write_converted((Byte*)reader.text.data(), reader.text.length() * 2, inlineConverter, outFile); | |
701 | break; | |
702 | ||
703 | case INLINE_START: | |
704 | convertMarker(reader.text, inlineConverter, cnvMarker); | |
705 | i = inlineMappings.find(cnvMarker); | |
706 | if (i == inlineMappings.end()) | |
707 | converterStack.push_back(converterStack.back()); | |
708 | else | |
709 | converterStack.push_back(converters[i->second]); | |
710 | reader.text.insert(reader.text.begin(), reader.inlineEscapeChar); | |
711 | reader.text.insert(reader.text.end(), reader.startInline); | |
712 | write_converted((Byte*)reader.text.data(), reader.text.length() * 2, inlineConverter, outFile); | |
713 | break; | |
714 | ||
715 | case INLINE_END: | |
716 | reader.text.insert(reader.text.end(), reader.endInline); | |
717 | write_converted((Byte*)reader.text.data(), reader.text.length() * 2, inlineConverter, outFile); | |
718 | converterStack.pop_back(); | |
719 | break; | |
720 | } | |
721 | } | |
722 | } | |
723 | ||
724 | fclose(inFile); | |
725 | fclose(outFile); | |
726 | } | |
727 | ||
728 | #ifdef __MWERKS__ | |
729 | #if (__dest_os == __mac_os) | |
730 | #include <console.h> | |
731 | #endif | |
732 | #endif | |
733 | ||
734 | int | |
735 | main( | |
736 | int argc, | |
737 | char** argv) | |
738 | { | |
739 | #ifdef __MWERKS__ | |
740 | #if (__dest_os == __mac_os) | |
741 | argc = ccommand(&argv); | |
742 | #endif | |
743 | #endif | |
744 | ||
745 | char* controlFile = 0; | |
746 | char* inputFile = 0; | |
747 | char* outputFile = 0; | |
748 | ||
749 | bool cmdLineErr = (argc < 2); | |
750 | ||
751 | char unicodeFormat = kForm_UTF8; | |
752 | ||
753 | int normForm = 0; | |
754 | ||
755 | while (--argc) { | |
756 | char *arg = *++argv; | |
757 | if (arg[0] == '-') { | |
758 | if (strlen(arg + 1) == 1) { | |
759 | switch (arg[1]) { | |
760 | case 'c': | |
761 | if (controlFile != 0) { | |
762 | fprintf(stderr, "repeated argument -c\n"); | |
763 | cmdLineErr = true; | |
764 | continue; | |
765 | } | |
766 | if (argc == 0) { | |
767 | fprintf(stderr, "missing file name after -c\n"); | |
768 | cmdLineErr = true; | |
769 | continue; | |
770 | } | |
771 | controlFile = *++argv; | |
772 | --argc; | |
773 | continue; | |
774 | case 'd': | |
775 | if (gMappingDirectory != 0) { | |
776 | fprintf(stderr, "repeated argument -d\n"); | |
777 | cmdLineErr = true; | |
778 | continue; | |
779 | } | |
780 | if (argc == 0) { | |
781 | fprintf(stderr, "missing directory path after -d\n"); | |
782 | cmdLineErr = true; | |
783 | continue; | |
784 | } | |
785 | gMappingDirectory = *++argv; | |
786 | --argc; | |
787 | continue; | |
788 | case 'i': | |
789 | if (inputFile != 0) { | |
790 | fprintf(stderr, "repeated argument -i\n"); | |
791 | cmdLineErr = true; | |
792 | continue; | |
793 | } | |
794 | if (argc == 0) { | |
795 | fprintf(stderr, "missing file name after -i\n"); | |
796 | cmdLineErr = true; | |
797 | continue; | |
798 | } | |
799 | inputFile = *++argv; | |
800 | --argc; | |
801 | continue; | |
802 | case 'o': | |
803 | if (outputFile != 0) { | |
804 | fprintf(stderr, "repeated argument -o\n"); | |
805 | cmdLineErr = true; | |
806 | continue; | |
807 | } | |
808 | if (argc == 0) { | |
809 | fprintf(stderr, "missing file name after -o\n"); | |
810 | cmdLineErr = true; | |
811 | continue; | |
812 | } | |
813 | outputFile = *++argv; | |
814 | --argc; | |
815 | continue; | |
816 | case 'h': | |
817 | cmdLineErr = true; // to get "usage" message | |
818 | continue; | |
819 | } | |
820 | } | |
821 | else if (strcmp(arg + 1, "utf8") == 0) | |
822 | unicodeFormat = kForm_UTF8; | |
823 | else if (strcmp(arg + 1, "be") == 0) | |
824 | unicodeFormat = kForm_UTF16BE; | |
825 | else if (strcmp(arg + 1, "le") == 0) | |
826 | unicodeFormat = kForm_UTF16LE; | |
827 | else if (strcmp(arg + 1, "bom") == 0) | |
828 | bom = true; | |
829 | else if (strcmp(arg + 1, "u8") == 0) | |
830 | direction = kDirection_U_8; | |
831 | else if (strcmp(arg + 1, "8u") == 0) | |
832 | direction = kDirection_8_U; | |
833 | else if (strcmp(arg + 1, "nfc") == 0) | |
834 | normForm = kForm_NFC; | |
835 | else if (strcmp(arg + 1, "nfd") == 0) | |
836 | normForm = kForm_NFD; | |
837 | else { | |
838 | fprintf(stderr, "Unknown option: %s\n", arg); | |
839 | cmdLineErr = true; | |
840 | } | |
841 | } | |
842 | else { | |
843 | cmdLineErr = true; | |
844 | } | |
845 | } | |
846 | ||
847 | if (cmdLineErr || direction == kDirection_Unspecified || unicodeFormat == kForm_Unspecified) { | |
848 | fprintf(stderr, "\ | |
849 | 8-bit to Unicode:\n\ | |
850 | SFconv -8u [-utf8|-be|-le] [-bom] -c ControlFile [-d MappingDirectory] -i InFile -o OutFile\n\ | |
851 | Unicode to 8-bit:\n\ | |
852 | SFconv -u8 [-utf8|-be|-le] -c ControlFile [-d MappingDirectory] -i InFile -o OutFile\n"); | |
853 | return 1; | |
854 | } | |
855 | ||
856 | if (direction == kDirection_8_U) { | |
857 | inForm = kForm_Bytes; | |
858 | outForm = unicodeFormat + normForm; | |
859 | } | |
860 | else { | |
861 | inForm = unicodeFormat; | |
862 | outForm = kForm_Bytes; | |
863 | } | |
864 | ||
865 | if (!read_control_file(controlFile)) | |
866 | exit(1); | |
867 | ||
868 | process(inputFile, outputFile); | |
869 | ||
870 | return 0; | |
871 | } |
0 | /* | |
1 | The contents of this file are subject to the Mozilla Public License | |
2 | Version 1.1 (the "License"); you may not use this file except in | |
3 | csompliance with the License. You may obtain a copy of the License at | |
4 | http://www.mozilla.org/MPL/ | |
5 | ||
6 | Software distributed under the License is distributed on an "AS IS" | |
7 | basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the | |
8 | License for the specific language governing rights and limitations | |
9 | under the License. | |
10 | ||
11 | The Original Code is expat. | |
12 | ||
13 | The Initial Developer of the Original Code is James Clark. | |
14 | Portions created by James Clark are Copyright (C) 1998, 1999 | |
15 | James Clark. All Rights Reserved. | |
16 | ||
17 | Contributor(s): | |
18 | ||
19 | Alternatively, the contents of this file may be used under the terms | |
20 | of the GNU General Public License (the "GPL"), in which case the | |
21 | provisions of the GPL are applicable instead of those above. If you | |
22 | wish to allow use of your version of this file only under the terms of | |
23 | the GPL and not to allow others to use your version of this file under | |
24 | the MPL, indicate your decision by deleting the provisions above and | |
25 | replace them with the notice and other provisions required by the | |
26 | GPL. If you do not delete the provisions above, a recipient may use | |
27 | your version of this file under either the MPL or the GPL. | |
28 | */ | |
29 | ||
30 | #include "xmldef.h" | |
31 | ||
32 | #ifdef XML_UNICODE_WCHAR_T | |
33 | #ifndef XML_UNICODE | |
34 | #define XML_UNICODE | |
35 | #endif | |
36 | #endif | |
37 | ||
38 | #include "hashtable.h" | |
39 | ||
40 | #define INIT_SIZE 64 | |
41 | ||
42 | static | |
43 | int keyeq(KEY s1, KEY s2) | |
44 | { | |
45 | for (; *s1 == *s2; s1++, s2++) | |
46 | if (*s1 == 0) | |
47 | return 1; | |
48 | return 0; | |
49 | } | |
50 | ||
51 | static | |
52 | unsigned long hash(KEY s) | |
53 | { | |
54 | unsigned long h = 0; | |
55 | while (*s) | |
56 | h = (h << 5) + h + (unsigned char)*s++; | |
57 | return h; | |
58 | } | |
59 | ||
60 | NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize) | |
61 | { | |
62 | size_t i; | |
63 | if (table->size == 0) { | |
64 | if (!createSize) | |
65 | return 0; | |
66 | table->v = calloc(INIT_SIZE, sizeof(NAMED *)); | |
67 | if (!table->v) | |
68 | return 0; | |
69 | table->size = INIT_SIZE; | |
70 | table->usedLim = INIT_SIZE / 2; | |
71 | i = hash(name) & (table->size - 1); | |
72 | } | |
73 | else { | |
74 | unsigned long h = hash(name); | |
75 | for (i = h & (table->size - 1); | |
76 | table->v[i]; | |
77 | i == 0 ? i = table->size - 1 : --i) { | |
78 | if (keyeq(name, table->v[i]->name)) | |
79 | return table->v[i]; | |
80 | } | |
81 | if (!createSize) | |
82 | return 0; | |
83 | if (table->used == table->usedLim) { | |
84 | /* check for overflow */ | |
85 | size_t newSize = table->size * 2; | |
86 | NAMED **newV = calloc(newSize, sizeof(NAMED *)); | |
87 | if (!newV) | |
88 | return 0; | |
89 | for (i = 0; i < table->size; i++) | |
90 | if (table->v[i]) { | |
91 | size_t j; | |
92 | for (j = hash(table->v[i]->name) & (newSize - 1); | |
93 | newV[j]; | |
94 | j == 0 ? j = newSize - 1 : --j) | |
95 | ; | |
96 | newV[j] = table->v[i]; | |
97 | } | |
98 | free(table->v); | |
99 | table->v = newV; | |
100 | table->size = newSize; | |
101 | table->usedLim = newSize/2; | |
102 | for (i = h & (table->size - 1); | |
103 | table->v[i]; | |
104 | i == 0 ? i = table->size - 1 : --i) | |
105 | ; | |
106 | } | |
107 | } | |
108 | table->v[i] = calloc(1, createSize); | |
109 | if (!table->v[i]) | |
110 | return 0; | |
111 | table->v[i]->name = name; | |
112 | (table->used)++; | |
113 | return table->v[i]; | |
114 | } | |
115 | ||
116 | void hashTableDestroy(HASH_TABLE *table) | |
117 | { | |
118 | size_t i; | |
119 | for (i = 0; i < table->size; i++) { | |
120 | NAMED *p = table->v[i]; | |
121 | if (p) | |
122 | free(p); | |
123 | } | |
124 | free(table->v); | |
125 | } | |
126 | ||
127 | void hashTableInit(HASH_TABLE *p) | |
128 | { | |
129 | p->size = 0; | |
130 | p->usedLim = 0; | |
131 | p->used = 0; | |
132 | p->v = 0; | |
133 | } | |
134 | ||
135 | void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) | |
136 | { | |
137 | iter->p = table->v; | |
138 | iter->end = iter->p + table->size; | |
139 | } | |
140 | ||
141 | NAMED *hashTableIterNext(HASH_TABLE_ITER *iter) | |
142 | { | |
143 | while (iter->p != iter->end) { | |
144 | NAMED *tem = *(iter->p)++; | |
145 | if (tem) | |
146 | return tem; | |
147 | } | |
148 | return 0; | |
149 | } | |
150 |
0 | /* | |
1 | The contents of this file are subject to the Mozilla Public License | |
2 | Version 1.1 (the "License"); you may not use this file except in | |
3 | compliance with the License. You may obtain a copy of the License at | |
4 | http://www.mozilla.org/MPL/ | |
5 | ||
6 | Software distributed under the License is distributed on an "AS IS" | |
7 | basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the | |
8 | License for the specific language governing rights and limitations | |
9 | under the License. | |
10 | ||
11 | The Original Code is expat. | |
12 | ||
13 | The Initial Developer of the Original Code is James Clark. | |
14 | Portions created by James Clark are Copyright (C) 1998, 1999 | |
15 | James Clark. All Rights Reserved. | |
16 | ||
17 | Contributor(s): | |
18 | ||
19 | Alternatively, the contents of this file may be used under the terms | |
20 | of the GNU General Public License (the "GPL"), in which case the | |
21 | provisions of the GPL are applicable instead of those above. If you | |
22 | wish to allow use of your version of this file only under the terms of | |
23 | the GPL and not to allow others to use your version of this file under | |
24 | the MPL, indicate your decision by deleting the provisions above and | |
25 | replace them with the notice and other provisions required by the | |
26 | GPL. If you do not delete the provisions above, a recipient may use | |
27 | your version of this file under either the MPL or the GPL. | |
28 | */ | |
29 | ||
30 | #include "xmldef.h" | |
31 | #include "xmlparse.h" | |
32 | ||
33 | #ifdef XML_UNICODE | |
34 | #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX | |
35 | #define XmlConvert XmlUtf16Convert | |
36 | #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding | |
37 | #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS | |
38 | #define XmlEncode XmlUtf16Encode | |
39 | #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) | |
40 | typedef unsigned short ICHAR; | |
41 | #else | |
42 | #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX | |
43 | #define XmlConvert XmlUtf8Convert | |
44 | #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding | |
45 | #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS | |
46 | #define XmlEncode XmlUtf8Encode | |
47 | #define MUST_CONVERT(enc, s) (!(enc)->isUtf8) | |
48 | typedef char ICHAR; | |
49 | #endif | |
50 | ||
51 | ||
52 | #ifndef XML_NS | |
53 | ||
54 | #define XmlInitEncodingNS XmlInitEncoding | |
55 | #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding | |
56 | #undef XmlGetInternalEncodingNS | |
57 | #define XmlGetInternalEncodingNS XmlGetInternalEncoding | |
58 | #define XmlParseXmlDeclNS XmlParseXmlDecl | |
59 | ||
60 | #endif | |
61 | ||
62 | #ifdef XML_UNICODE_WCHAR_T | |
63 | #define XML_T(x) L ## x | |
64 | #else | |
65 | #define XML_T(x) x | |
66 | #endif | |
67 | ||
68 | /* Round up n to be a multiple of sz, where sz is a power of 2. */ | |
69 | #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) | |
70 | ||
71 | #include "xmltok.h" | |
72 | #include "xmlrole.h" | |
73 | #include "hashtable.h" | |
74 | ||
75 | #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ | |
76 | #define INIT_DATA_BUF_SIZE 1024 | |
77 | #define INIT_ATTS_SIZE 16 | |
78 | #define INIT_BLOCK_SIZE 1024 | |
79 | #define INIT_BUFFER_SIZE 1024 | |
80 | ||
81 | #define EXPAND_SPARE 24 | |
82 | ||
83 | typedef struct binding { | |
84 | struct prefix *prefix; | |
85 | struct binding *nextTagBinding; | |
86 | struct binding *prevPrefixBinding; | |
87 | const struct attribute_id *attId; | |
88 | XML_Char *uri; | |
89 | int uriLen; | |
90 | int uriAlloc; | |
91 | } BINDING; | |
92 | ||
93 | typedef struct prefix { | |
94 | const XML_Char *name; | |
95 | BINDING *binding; | |
96 | } PREFIX; | |
97 | ||
98 | typedef struct { | |
99 | const XML_Char *str; | |
100 | const XML_Char *localPart; | |
101 | int uriLen; | |
102 | } TAG_NAME; | |
103 | ||
104 | typedef struct tag { | |
105 | struct tag *parent; | |
106 | const char *rawName; | |
107 | int rawNameLength; | |
108 | TAG_NAME name; | |
109 | char *buf; | |
110 | char *bufEnd; | |
111 | BINDING *bindings; | |
112 | } TAG; | |
113 | ||
114 | typedef struct { | |
115 | const XML_Char *name; | |
116 | const XML_Char *textPtr; | |
117 | int textLen; | |
118 | const XML_Char *systemId; | |
119 | const XML_Char *base; | |
120 | const XML_Char *publicId; | |
121 | const XML_Char *notation; | |
122 | char open; | |
123 | } ENTITY; | |
124 | ||
125 | typedef struct block { | |
126 | struct block *next; | |
127 | int size; | |
128 | XML_Char s[1]; | |
129 | } BLOCK; | |
130 | ||
131 | typedef struct { | |
132 | BLOCK *blocks; | |
133 | BLOCK *freeBlocks; | |
134 | const XML_Char *end; | |
135 | XML_Char *ptr; | |
136 | XML_Char *start; | |
137 | } STRING_POOL; | |
138 | ||
139 | /* The XML_Char before the name is used to determine whether | |
140 | an attribute has been specified. */ | |
141 | typedef struct attribute_id { | |
142 | XML_Char *name; | |
143 | PREFIX *prefix; | |
144 | char maybeTokenized; | |
145 | char xmlns; | |
146 | } ATTRIBUTE_ID; | |
147 | ||
148 | typedef struct { | |
149 | const ATTRIBUTE_ID *id; | |
150 | char isCdata; | |
151 | const XML_Char *value; | |
152 | } DEFAULT_ATTRIBUTE; | |
153 | ||
154 | typedef struct { | |
155 | const XML_Char *name; | |
156 | PREFIX *prefix; | |
157 | int nDefaultAtts; | |
158 | int allocDefaultAtts; | |
159 | DEFAULT_ATTRIBUTE *defaultAtts; | |
160 | } ELEMENT_TYPE; | |
161 | ||
162 | typedef struct { | |
163 | HASH_TABLE generalEntities; | |
164 | HASH_TABLE elementTypes; | |
165 | HASH_TABLE attributeIds; | |
166 | HASH_TABLE prefixes; | |
167 | STRING_POOL pool; | |
168 | int complete; | |
169 | int standalone; | |
170 | #ifdef XML_DTD | |
171 | HASH_TABLE paramEntities; | |
172 | #endif /* XML_DTD */ | |
173 | PREFIX defaultPrefix; | |
174 | } DTD; | |
175 | ||
176 | typedef struct open_internal_entity { | |
177 | const char *internalEventPtr; | |
178 | const char *internalEventEndPtr; | |
179 | struct open_internal_entity *next; | |
180 | ENTITY *entity; | |
181 | } OPEN_INTERNAL_ENTITY; | |
182 | ||
183 | typedef enum XML_Error Processor(XML_Parser parser, | |
184 | const char *start, | |
185 | const char *end, | |
186 | const char **endPtr); | |
187 | ||
188 | static Processor prologProcessor; | |
189 | static Processor prologInitProcessor; | |
190 | static Processor contentProcessor; | |
191 | static Processor cdataSectionProcessor; | |
192 | #ifdef XML_DTD | |
193 | static Processor ignoreSectionProcessor; | |
194 | #endif /* XML_DTD */ | |
195 | static Processor epilogProcessor; | |
196 | static Processor errorProcessor; | |
197 | static Processor externalEntityInitProcessor; | |
198 | static Processor externalEntityInitProcessor2; | |
199 | static Processor externalEntityInitProcessor3; | |
200 | static Processor externalEntityContentProcessor; | |
201 | ||
202 | static enum XML_Error | |
203 | handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); | |
204 | static enum XML_Error | |
205 | processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *); | |
206 | static enum XML_Error | |
207 | initializeEncoding(XML_Parser parser); | |
208 | static enum XML_Error | |
209 | doProlog(XML_Parser parser, const ENCODING *enc, const char *s, | |
210 | const char *end, int tok, const char *next, const char **nextPtr); | |
211 | static enum XML_Error | |
212 | processInternalParamEntity(XML_Parser parser, ENTITY *entity); | |
213 | static enum XML_Error | |
214 | doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, | |
215 | const char *start, const char *end, const char **endPtr); | |
216 | static enum XML_Error | |
217 | doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); | |
218 | #ifdef XML_DTD | |
219 | static enum XML_Error | |
220 | doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); | |
221 | #endif /* XML_DTD */ | |
222 | static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, | |
223 | TAG_NAME *tagNamePtr, BINDING **bindingsPtr); | |
224 | static | |
225 | int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); | |
226 | static int | |
227 | defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue); | |
228 | static enum XML_Error | |
229 | storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, | |
230 | STRING_POOL *); | |
231 | static enum XML_Error | |
232 | appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, | |
233 | STRING_POOL *); | |
234 | static ATTRIBUTE_ID * | |
235 | getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); | |
236 | static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); | |
237 | static enum XML_Error | |
238 | storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); | |
239 | static int | |
240 | reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); | |
241 | static int | |
242 | reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); | |
243 | static void | |
244 | reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); | |
245 | ||
246 | static const XML_Char *getContext(XML_Parser parser); | |
247 | static int setContext(XML_Parser parser, const XML_Char *context); | |
248 | static void normalizePublicId(XML_Char *s); | |
249 | static int dtdInit(DTD *); | |
250 | static void dtdDestroy(DTD *); | |
251 | static int dtdCopy(DTD *newDtd, const DTD *oldDtd); | |
252 | static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); | |
253 | #ifdef XML_DTD | |
254 | static void dtdSwap(DTD *, DTD *); | |
255 | #endif /* XML_DTD */ | |
256 | static void poolInit(STRING_POOL *); | |
257 | static void poolClear(STRING_POOL *); | |
258 | static void poolDestroy(STRING_POOL *); | |
259 | static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, | |
260 | const char *ptr, const char *end); | |
261 | static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, | |
262 | const char *ptr, const char *end); | |
263 | static int poolGrow(STRING_POOL *pool); | |
264 | static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); | |
265 | static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); | |
266 | ||
267 | #define poolStart(pool) ((pool)->start) | |
268 | #define poolEnd(pool) ((pool)->ptr) | |
269 | #define poolLength(pool) ((pool)->ptr - (pool)->start) | |
270 | #define poolChop(pool) ((void)--(pool->ptr)) | |
271 | #define poolLastChar(pool) (((pool)->ptr)[-1]) | |
272 | #define poolDiscard(pool) ((pool)->ptr = (pool)->start) | |
273 | #define poolFinish(pool) ((pool)->start = (pool)->ptr) | |
274 | #define poolAppendChar(pool, c) \ | |
275 | (((pool)->ptr == (pool)->end && !poolGrow(pool)) \ | |
276 | ? 0 \ | |
277 | : ((*((pool)->ptr)++ = c), 1)) | |
278 | ||
279 | typedef struct { | |
280 | /* The first member must be userData so that the XML_GetUserData macro works. */ | |
281 | void *m_userData; | |
282 | void *m_handlerArg; | |
283 | char *m_buffer; | |
284 | /* first character to be parsed */ | |
285 | const char *m_bufferPtr; | |
286 | /* past last character to be parsed */ | |
287 | char *m_bufferEnd; | |
288 | /* allocated end of buffer */ | |
289 | const char *m_bufferLim; | |
290 | long m_parseEndByteIndex; | |
291 | const char *m_parseEndPtr; | |
292 | XML_Char *m_dataBuf; | |
293 | XML_Char *m_dataBufEnd; | |
294 | XML_StartElementHandler m_startElementHandler; | |
295 | XML_EndElementHandler m_endElementHandler; | |
296 | XML_CharacterDataHandler m_characterDataHandler; | |
297 | XML_ProcessingInstructionHandler m_processingInstructionHandler; | |
298 | XML_CommentHandler m_commentHandler; | |
299 | XML_StartCdataSectionHandler m_startCdataSectionHandler; | |
300 | XML_EndCdataSectionHandler m_endCdataSectionHandler; | |
301 | XML_DefaultHandler m_defaultHandler; | |
302 | XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; | |
303 | XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; | |
304 | XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; | |
305 | XML_NotationDeclHandler m_notationDeclHandler; | |
306 | XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; | |
307 | XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; | |
308 | XML_NotStandaloneHandler m_notStandaloneHandler; | |
309 | XML_ExternalEntityRefHandler m_externalEntityRefHandler; | |
310 | void *m_externalEntityRefHandlerArg; | |
311 | XML_UnknownEncodingHandler m_unknownEncodingHandler; | |
312 | const ENCODING *m_encoding; | |
313 | INIT_ENCODING m_initEncoding; | |
314 | const ENCODING *m_internalEncoding; | |
315 | const XML_Char *m_protocolEncodingName; | |
316 | int m_ns; | |
317 | void *m_unknownEncodingMem; | |
318 | void *m_unknownEncodingData; | |
319 | void *m_unknownEncodingHandlerData; | |
320 | void (*m_unknownEncodingRelease)(void *); | |
321 | PROLOG_STATE m_prologState; | |
322 | Processor *m_processor; | |
323 | enum XML_Error m_errorCode; | |
324 | const char *m_eventPtr; | |
325 | const char *m_eventEndPtr; | |
326 | const char *m_positionPtr; | |
327 | OPEN_INTERNAL_ENTITY *m_openInternalEntities; | |
328 | int m_defaultExpandInternalEntities; | |
329 | int m_tagLevel; | |
330 | ENTITY *m_declEntity; | |
331 | const XML_Char *m_declNotationName; | |
332 | const XML_Char *m_declNotationPublicId; | |
333 | ELEMENT_TYPE *m_declElementType; | |
334 | ATTRIBUTE_ID *m_declAttributeId; | |
335 | char m_declAttributeIsCdata; | |
336 | DTD m_dtd; | |
337 | const XML_Char *m_curBase; | |
338 | TAG *m_tagStack; | |
339 | TAG *m_freeTagList; | |
340 | BINDING *m_inheritedBindings; | |
341 | BINDING *m_freeBindingList; | |
342 | int m_attsSize; | |
343 | int m_nSpecifiedAtts; | |
344 | ATTRIBUTE *m_atts; | |
345 | POSITION m_position; | |
346 | STRING_POOL m_tempPool; | |
347 | STRING_POOL m_temp2Pool; | |
348 | char *m_groupConnector; | |
349 | unsigned m_groupSize; | |
350 | int m_hadExternalDoctype; | |
351 | XML_Char m_namespaceSeparator; | |
352 | #ifdef XML_DTD | |
353 | enum XML_ParamEntityParsing m_paramEntityParsing; | |
354 | XML_Parser m_parentParser; | |
355 | #endif |