Codebase list teckit / upstream/2.5.4_svn140+ds2
Imported Upstream version 2.5.4~svn140+ds2 Daniel Glassey 8 years ago
4 changed file(s) with 0 addition(s) and 6665 deletion(s). Raw diff Collapse all Expand all
+0
-151
SFconv/expat/xmlparse/hashtable.c less more
0 /*
1 The contents of this file are subject to the Mozilla Public License
2 Version 1.1 (the "License"); you may not use this file except in
3 csompliance with the License. You may obtain a copy of the License at
4 http://www.mozilla.org/MPL/
5
6 Software distributed under the License is distributed on an "AS IS"
7 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
8 License for the specific language governing rights and limitations
9 under the License.
10
11 The Original Code is expat.
12
13 The Initial Developer of the Original Code is James Clark.
14 Portions created by James Clark are Copyright (C) 1998, 1999
15 James Clark. All Rights Reserved.
16
17 Contributor(s):
18
19 Alternatively, the contents of this file may be used under the terms
20 of the GNU General Public License (the "GPL"), in which case the
21 provisions of the GPL are applicable instead of those above. If you
22 wish to allow use of your version of this file only under the terms of
23 the GPL and not to allow others to use your version of this file under
24 the MPL, indicate your decision by deleting the provisions above and
25 replace them with the notice and other provisions required by the
26 GPL. If you do not delete the provisions above, a recipient may use
27 your version of this file under either the MPL or the GPL.
28 */
29
30 #include "xmldef.h"
31
32 #ifdef XML_UNICODE_WCHAR_T
33 #ifndef XML_UNICODE
34 #define XML_UNICODE
35 #endif
36 #endif
37
38 #include "hashtable.h"
39
40 #define INIT_SIZE 64
41
42 static
43 int keyeq(KEY s1, KEY s2)
44 {
45 for (; *s1 == *s2; s1++, s2++)
46 if (*s1 == 0)
47 return 1;
48 return 0;
49 }
50
51 static
52 unsigned long hash(KEY s)
53 {
54 unsigned long h = 0;
55 while (*s)
56 h = (h << 5) + h + (unsigned char)*s++;
57 return h;
58 }
59
60 NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize)
61 {
62 size_t i;
63 if (table->size == 0) {
64 if (!createSize)
65 return 0;
66 table->v = calloc(INIT_SIZE, sizeof(NAMED *));
67 if (!table->v)
68 return 0;
69 table->size = INIT_SIZE;
70 table->usedLim = INIT_SIZE / 2;
71 i = hash(name) & (table->size - 1);
72 }
73 else {
74 unsigned long h = hash(name);
75 for (i = h & (table->size - 1);
76 table->v[i];
77 i == 0 ? i = table->size - 1 : --i) {
78 if (keyeq(name, table->v[i]->name))
79 return table->v[i];
80 }
81 if (!createSize)
82 return 0;
83 if (table->used == table->usedLim) {
84 /* check for overflow */
85 size_t newSize = table->size * 2;
86 NAMED **newV = calloc(newSize, sizeof(NAMED *));
87 if (!newV)
88 return 0;
89 for (i = 0; i < table->size; i++)
90 if (table->v[i]) {
91 size_t j;
92 for (j = hash(table->v[i]->name) & (newSize - 1);
93 newV[j];
94 j == 0 ? j = newSize - 1 : --j)
95 ;
96 newV[j] = table->v[i];
97 }
98 free(table->v);
99 table->v = newV;
100 table->size = newSize;
101 table->usedLim = newSize/2;
102 for (i = h & (table->size - 1);
103 table->v[i];
104 i == 0 ? i = table->size - 1 : --i)
105 ;
106 }
107 }
108 table->v[i] = calloc(1, createSize);
109 if (!table->v[i])
110 return 0;
111 table->v[i]->name = name;
112 (table->used)++;
113 return table->v[i];
114 }
115
116 void hashTableDestroy(HASH_TABLE *table)
117 {
118 size_t i;
119 for (i = 0; i < table->size; i++) {
120 NAMED *p = table->v[i];
121 if (p)
122 free(p);
123 }
124 free(table->v);
125 }
126
127 void hashTableInit(HASH_TABLE *p)
128 {
129 p->size = 0;
130 p->usedLim = 0;
131 p->used = 0;
132 p->v = 0;
133 }
134
135 void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
136 {
137 iter->p = table->v;
138 iter->end = iter->p + table->size;
139 }
140
141 NAMED *hashTableIterNext(HASH_TABLE_ITER *iter)
142 {
143 while (iter->p != iter->end) {
144 NAMED *tem = *(iter->p)++;
145 if (tem)
146 return tem;
147 }
148 return 0;
149 }
150
+0
-3707
SFconv/expat/xmlparse/xmlparse.c less more
0 /*
1 The contents of this file are subject to the Mozilla Public License
2 Version 1.1 (the "License"); you may not use this file except in
3 compliance with the License. You may obtain a copy of the License at
4 http://www.mozilla.org/MPL/
5
6 Software distributed under the License is distributed on an "AS IS"
7 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
8 License for the specific language governing rights and limitations
9 under the License.
10
11 The Original Code is expat.
12
13 The Initial Developer of the Original Code is James Clark.
14 Portions created by James Clark are Copyright (C) 1998, 1999
15 James Clark. All Rights Reserved.
16
17 Contributor(s):
18
19 Alternatively, the contents of this file may be used under the terms
20 of the GNU General Public License (the "GPL"), in which case the
21 provisions of the GPL are applicable instead of those above. If you
22 wish to allow use of your version of this file only under the terms of
23 the GPL and not to allow others to use your version of this file under
24 the MPL, indicate your decision by deleting the provisions above and
25 replace them with the notice and other provisions required by the
26 GPL. If you do not delete the provisions above, a recipient may use
27 your version of this file under either the MPL or the GPL.
28 */
29
30 #include "xmldef.h"
31 #include "xmlparse.h"
32
33 #ifdef XML_UNICODE
34 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
35 #define XmlConvert XmlUtf16Convert
36 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
37 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
38 #define XmlEncode XmlUtf16Encode
39 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
40 typedef unsigned short ICHAR;
41 #else
42 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
43 #define XmlConvert XmlUtf8Convert
44 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
45 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
46 #define XmlEncode XmlUtf8Encode
47 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
48 typedef char ICHAR;
49 #endif
50
51
52 #ifndef XML_NS
53
54 #define XmlInitEncodingNS XmlInitEncoding
55 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
56 #undef XmlGetInternalEncodingNS
57 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
58 #define XmlParseXmlDeclNS XmlParseXmlDecl
59
60 #endif
61
62 #ifdef XML_UNICODE_WCHAR_T
63 #define XML_T(x) L ## x
64 #else
65 #define XML_T(x) x
66 #endif
67
68 /* Round up n to be a multiple of sz, where sz is a power of 2. */
69 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
70
71 #include "xmltok.h"
72 #include "xmlrole.h"
73 #include "hashtable.h"
74
75 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
76 #define INIT_DATA_BUF_SIZE 1024
77 #define INIT_ATTS_SIZE 16
78 #define INIT_BLOCK_SIZE 1024
79 #define INIT_BUFFER_SIZE 1024
80
81 #define EXPAND_SPARE 24
82
83 typedef struct binding {
84 struct prefix *prefix;
85 struct binding *nextTagBinding;
86 struct binding *prevPrefixBinding;
87 const struct attribute_id *attId;
88 XML_Char *uri;
89 int uriLen;
90 int uriAlloc;
91 } BINDING;
92
93 typedef struct prefix {
94 const XML_Char *name;
95 BINDING *binding;
96 } PREFIX;
97
98 typedef struct {
99 const XML_Char *str;
100 const XML_Char *localPart;
101 int uriLen;
102 } TAG_NAME;
103
104 typedef struct tag {
105 struct tag *parent;
106 const char *rawName;
107 int rawNameLength;
108 TAG_NAME name;
109 char *buf;
110 char *bufEnd;
111 BINDING *bindings;
112 } TAG;
113
114 typedef struct {
115 const XML_Char *name;
116 const XML_Char *textPtr;
117 int textLen;
118 const XML_Char *systemId;
119 const XML_Char *base;
120 const XML_Char *publicId;
121 const XML_Char *notation;
122 char open;
123 } ENTITY;
124
125 typedef struct block {
126 struct block *next;
127 int size;
128 XML_Char s[1];
129 } BLOCK;
130
131 typedef struct {
132 BLOCK *blocks;
133 BLOCK *freeBlocks;
134 const XML_Char *end;
135 XML_Char *ptr;
136 XML_Char *start;
137 } STRING_POOL;
138
139 /* The XML_Char before the name is used to determine whether
140 an attribute has been specified. */
141 typedef struct attribute_id {
142 XML_Char *name;
143 PREFIX *prefix;
144 char maybeTokenized;
145 char xmlns;
146 } ATTRIBUTE_ID;
147
148 typedef struct {
149 const ATTRIBUTE_ID *id;
150 char isCdata;
151 const XML_Char *value;
152 } DEFAULT_ATTRIBUTE;
153
154 typedef struct {
155 const XML_Char *name;
156 PREFIX *prefix;
157 int nDefaultAtts;
158 int allocDefaultAtts;
159 DEFAULT_ATTRIBUTE *defaultAtts;
160 } ELEMENT_TYPE;
161
162 typedef struct {
163 HASH_TABLE generalEntities;
164 HASH_TABLE elementTypes;
165 HASH_TABLE attributeIds;
166 HASH_TABLE prefixes;
167 STRING_POOL pool;
168 int complete;
169 int standalone;
170 #ifdef XML_DTD
171 HASH_TABLE paramEntities;
172 #endif /* XML_DTD */
173 PREFIX defaultPrefix;
174 } DTD;
175
176 typedef struct open_internal_entity {
177 const char *internalEventPtr;
178 const char *internalEventEndPtr;
179 struct open_internal_entity *next;
180 ENTITY *entity;
181 } OPEN_INTERNAL_ENTITY;
182
183 typedef enum XML_Error Processor(XML_Parser parser,
184 const char *start,
185 const char *end,
186 const char **endPtr);
187
188 static Processor prologProcessor;
189 static Processor prologInitProcessor;
190 static Processor contentProcessor;
191 static Processor cdataSectionProcessor;
192 #ifdef XML_DTD
193 static Processor ignoreSectionProcessor;
194 #endif /* XML_DTD */
195 static Processor epilogProcessor;
196 static Processor errorProcessor;
197 static Processor externalEntityInitProcessor;
198 static Processor externalEntityInitProcessor2;
199 static Processor externalEntityInitProcessor3;
200 static Processor externalEntityContentProcessor;
201
202 static enum XML_Error
203 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
204 static enum XML_Error
205 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
206 static enum XML_Error
207 initializeEncoding(XML_Parser parser);
208 static enum XML_Error
209 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
210 const char *end, int tok, const char *next, const char **nextPtr);
211 static enum XML_Error
212 processInternalParamEntity(XML_Parser parser, ENTITY *entity);
213 static enum XML_Error
214 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
215 const char *start, const char *end, const char **endPtr);
216 static enum XML_Error
217 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
218 #ifdef XML_DTD
219 static enum XML_Error
220 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
221 #endif /* XML_DTD */
222 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
223 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
224 static
225 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
226 static int
227 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
228 static enum XML_Error
229 storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
230 STRING_POOL *);
231 static enum XML_Error
232 appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
233 STRING_POOL *);
234 static ATTRIBUTE_ID *
235 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
236 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
237 static enum XML_Error
238 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
239 static int
240 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
241 static int
242 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
243 static void
244 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
245
246 static const XML_Char *getContext(XML_Parser parser);
247 static int setContext(XML_Parser parser, const XML_Char *context);
248 static void normalizePublicId(XML_Char *s);
249 static int dtdInit(DTD *);
250 static void dtdDestroy(DTD *);
251 static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
252 static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
253 #ifdef XML_DTD
254 static void dtdSwap(DTD *, DTD *);
255 #endif /* XML_DTD */
256 static void poolInit(STRING_POOL *);
257 static void poolClear(STRING_POOL *);
258 static void poolDestroy(STRING_POOL *);
259 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
260 const char *ptr, const char *end);
261 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
262 const char *ptr, const char *end);
263 static int poolGrow(STRING_POOL *pool);
264 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
265 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
266
267 #define poolStart(pool) ((pool)->start)
268 #define poolEnd(pool) ((pool)->ptr)
269 #define poolLength(pool) ((pool)->ptr - (pool)->start)
270 #define poolChop(pool) ((void)--(pool->ptr))
271 #define poolLastChar(pool) (((pool)->ptr)[-1])
272 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
273 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
274 #define poolAppendChar(pool, c) \
275 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
276 ? 0 \
277 : ((*((pool)->ptr)++ = c), 1))
278
279 typedef struct {
280 /* The first member must be userData so that the XML_GetUserData macro works. */
281 void *m_userData;
282 void *m_handlerArg;
283 char *m_buffer;
284 /* first character to be parsed */
285 const char *m_bufferPtr;
286 /* past last character to be parsed */
287 char *m_bufferEnd;
288 /* allocated end of buffer */
289 const char *m_bufferLim;
290 long m_parseEndByteIndex;
291 const char *m_parseEndPtr;
292 XML_Char *m_dataBuf;
293 XML_Char *m_dataBufEnd;
294 XML_StartElementHandler m_startElementHandler;
295 XML_EndElementHandler m_endElementHandler;
296 XML_CharacterDataHandler m_characterDataHandler;
297 XML_ProcessingInstructionHandler m_processingInstructionHandler;
298 XML_CommentHandler m_commentHandler;
299 XML_StartCdataSectionHandler m_startCdataSectionHandler;
300 XML_EndCdataSectionHandler m_endCdataSectionHandler;
301 XML_DefaultHandler m_defaultHandler;
302 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
303 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
304 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
305 XML_NotationDeclHandler m_notationDeclHandler;
306 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
307 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
308 XML_NotStandaloneHandler m_notStandaloneHandler;
309 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
310 void *m_externalEntityRefHandlerArg;
311 XML_UnknownEncodingHandler m_unknownEncodingHandler;
312 const ENCODING *m_encoding;
313 INIT_ENCODING m_initEncoding;
314 const ENCODING *m_internalEncoding;
315 const XML_Char *m_protocolEncodingName;
316 int m_ns;
317 void *m_unknownEncodingMem;
318 void *m_unknownEncodingData;
319 void *m_unknownEncodingHandlerData;
320 void (*m_unknownEncodingRelease)(void *);
321 PROLOG_STATE m_prologState;
322 Processor *m_processor;
323 enum XML_Error m_errorCode;
324 const char *m_eventPtr;
325 const char *m_eventEndPtr;
326 const char *m_positionPtr;
327 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
328 int m_defaultExpandInternalEntities;
329 int m_tagLevel;
330 ENTITY *m_declEntity;
331 const XML_Char *m_declNotationName;
332 const XML_Char *m_declNotationPublicId;
333 ELEMENT_TYPE *m_declElementType;
334 ATTRIBUTE_ID *m_declAttributeId;
335 char m_declAttributeIsCdata;
336 DTD m_dtd;
337 const XML_Char *m_curBase;
338 TAG *m_tagStack;
339 TAG *m_freeTagList;
340 BINDING *m_inheritedBindings;
341 BINDING *m_freeBindingList;
342 int m_attsSize;
343 int m_nSpecifiedAtts;
344 ATTRIBUTE *m_atts;
345 POSITION m_position;
346 STRING_POOL m_tempPool;
347 STRING_POOL m_temp2Pool;
348 char *m_groupConnector;
349 unsigned m_groupSize;
350 int m_hadExternalDoctype;
351 XML_Char m_namespaceSeparator;
352 #ifdef XML_DTD
353 enum XML_ParamEntityParsing m_paramEntityParsing;
354 XML_Parser m_parentParser;
355 #endif
356 } Parser;
357
358 #define userData (((Parser *)parser)->m_userData)
359 #define handlerArg (((Parser *)parser)->m_handlerArg)
360 #define startElementHandler (((Parser *)parser)->m_startElementHandler)
361 #define endElementHandler (((Parser *)parser)->m_endElementHandler)
362 #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
363 #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
364 #define commentHandler (((Parser *)parser)->m_commentHandler)
365 #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
366 #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
367 #define defaultHandler (((Parser *)parser)->m_defaultHandler)
368 #define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler)
369 #define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler)
370 #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
371 #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
372 #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
373 #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
374 #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
375 #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
376 #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
377 #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
378 #define encoding (((Parser *)parser)->m_encoding)
379 #define initEncoding (((Parser *)parser)->m_initEncoding)
380 #define internalEncoding (((Parser *)parser)->m_internalEncoding)
381 #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
382 #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
383 #define unknownEncodingHandlerData \
384 (((Parser *)parser)->m_unknownEncodingHandlerData)
385 #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
386 #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
387 #define ns (((Parser *)parser)->m_ns)
388 #define prologState (((Parser *)parser)->m_prologState)
389 #define processor (((Parser *)parser)->m_processor)
390 #define errorCode (((Parser *)parser)->m_errorCode)
391 #define eventPtr (((Parser *)parser)->m_eventPtr)
392 #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
393 #define positionPtr (((Parser *)parser)->m_positionPtr)
394 #define position (((Parser *)parser)->m_position)
395 #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
396 #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
397 #define tagLevel (((Parser *)parser)->m_tagLevel)
398 #define buffer (((Parser *)parser)->m_buffer)
399 #define bufferPtr (((Parser *)parser)->m_bufferPtr)
400 #define bufferEnd (((Parser *)parser)->m_bufferEnd)
401 #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
402 #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
403 #define bufferLim (((Parser *)parser)->m_bufferLim)
404 #define dataBuf (((Parser *)parser)->m_dataBuf)
405 #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
406 #define dtd (((Parser *)parser)->m_dtd)
407 #define curBase (((Parser *)parser)->m_curBase)
408 #define declEntity (((Parser *)parser)->m_declEntity)
409 #define declNotationName (((Parser *)parser)->m_declNotationName)
410 #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
411 #define declElementType (((Parser *)parser)->m_declElementType)
412 #define declAttributeId (((Parser *)parser)->m_declAttributeId)
413 #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
414 #define freeTagList (((Parser *)parser)->m_freeTagList)
415 #define freeBindingList (((Parser *)parser)->m_freeBindingList)
416 #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
417 #define tagStack (((Parser *)parser)->m_tagStack)
418 #define atts (((Parser *)parser)->m_atts)
419 #define attsSize (((Parser *)parser)->m_attsSize)
420 #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
421 #define tempPool (((Parser *)parser)->m_tempPool)
422 #define temp2Pool (((Parser *)parser)->m_temp2Pool)
423 #define groupConnector (((Parser *)parser)->m_groupConnector)
424 #define groupSize (((Parser *)parser)->m_groupSize)
425 #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
426 #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
427 #ifdef XML_DTD
428 #define parentParser (((Parser *)parser)->m_parentParser)
429 #define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing)
430 #endif /* XML_DTD */
431
432 #ifdef _MSC_VER
433 #ifdef _DEBUG
434 Parser *asParser(XML_Parser parser)
435 {
436 return parser;
437 }
438 #endif
439 #endif
440
441 XML_Parser XML_ParserCreate(const XML_Char *encodingName)
442 {
443 XML_Parser parser = malloc(sizeof(Parser));
444 if (!parser)
445 return parser;
446 processor = prologInitProcessor;
447 XmlPrologStateInit(&prologState);
448 userData = 0;
449 handlerArg = 0;
450 startElementHandler = 0;
451 endElementHandler = 0;
452 characterDataHandler = 0;
453 processingInstructionHandler = 0;
454 commentHandler = 0;
455 startCdataSectionHandler = 0;
456 endCdataSectionHandler = 0;
457 defaultHandler = 0;
458 startDoctypeDeclHandler = 0;
459 endDoctypeDeclHandler = 0;
460 unparsedEntityDeclHandler = 0;
461 notationDeclHandler = 0;
462 startNamespaceDeclHandler = 0;
463 endNamespaceDeclHandler = 0;
464 notStandaloneHandler = 0;
465 externalEntityRefHandler = 0;
466 externalEntityRefHandlerArg = parser;
467 unknownEncodingHandler = 0;
468 buffer = 0;
469 bufferPtr = 0;
470 bufferEnd = 0;
471 parseEndByteIndex = 0;
472 parseEndPtr = 0;
473 bufferLim = 0;
474 declElementType = 0;
475 declAttributeId = 0;
476 declEntity = 0;
477 declNotationName = 0;
478 declNotationPublicId = 0;
479 memset(&position, 0, sizeof(POSITION));
480 errorCode = XML_ERROR_NONE;
481 eventPtr = 0;
482 eventEndPtr = 0;
483 positionPtr = 0;
484 openInternalEntities = 0;
485 tagLevel = 0;
486 tagStack = 0;
487 freeTagList = 0;
488 freeBindingList = 0;
489 inheritedBindings = 0;
490 attsSize = INIT_ATTS_SIZE;
491 atts = malloc(attsSize * sizeof(ATTRIBUTE));
492 nSpecifiedAtts = 0;
493 dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
494 groupSize = 0;
495 groupConnector = 0;
496 hadExternalDoctype = 0;
497 unknownEncodingMem = 0;
498 unknownEncodingRelease = 0;
499 unknownEncodingData = 0;
500 unknownEncodingHandlerData = 0;
501 namespaceSeparator = '!';
502 #ifdef XML_DTD
503 parentParser = 0;
504 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
505 #endif
506 ns = 0;
507 poolInit(&tempPool);
508 poolInit(&temp2Pool);
509 protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
510 curBase = 0;
511 if (!dtdInit(&dtd) || !atts || !dataBuf
512 || (encodingName && !protocolEncodingName)) {
513 XML_ParserFree(parser);
514 return 0;
515 }
516 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
517 XmlInitEncoding(&initEncoding, &encoding, 0);
518 internalEncoding = XmlGetInternalEncoding();
519 return parser;
520 }
521
522 XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
523 {
524 static
525 const XML_Char implicitContext[] = {
526 XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
527 XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
528 XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
529 XML_T('.'), XML_T('w'), XML_T('3'),
530 XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
531 XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
532 XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
533 XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
534 XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
535 XML_T('\0')
536 };
537
538 XML_Parser parser = XML_ParserCreate(encodingName);
539 if (parser) {
540 XmlInitEncodingNS(&initEncoding, &encoding, 0);
541 ns = 1;
542 internalEncoding = XmlGetInternalEncodingNS();
543 namespaceSeparator = nsSep;
544 }
545 if (!setContext(parser, implicitContext)) {
546 XML_ParserFree(parser);
547 return 0;
548 }
549 return parser;
550 }
551
552 int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
553 {
554 if (!encodingName)
555 protocolEncodingName = 0;
556 else {
557 protocolEncodingName = poolCopyString(&tempPool, encodingName);
558 if (!protocolEncodingName)
559 return 0;
560 }
561 return 1;
562 }
563
564 XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
565 const XML_Char *context,
566 const XML_Char *encodingName)
567 {
568 XML_Parser parser = oldParser;
569 DTD *oldDtd = &dtd;
570 XML_StartElementHandler oldStartElementHandler = startElementHandler;
571 XML_EndElementHandler oldEndElementHandler = endElementHandler;
572 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
573 XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
574 XML_CommentHandler oldCommentHandler = commentHandler;
575 XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
576 XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
577 XML_DefaultHandler oldDefaultHandler = defaultHandler;
578 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
579 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
580 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
581 XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
582 XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
583 void *oldUserData = userData;
584 void *oldHandlerArg = handlerArg;
585 int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
586 void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
587 #ifdef XML_DTD
588 int oldParamEntityParsing = paramEntityParsing;
589 #endif
590 parser = (ns
591 ? XML_ParserCreateNS(encodingName, namespaceSeparator)
592 : XML_ParserCreate(encodingName));
593 if (!parser)
594 return 0;
595 startElementHandler = oldStartElementHandler;
596 endElementHandler = oldEndElementHandler;
597 characterDataHandler = oldCharacterDataHandler;
598 processingInstructionHandler = oldProcessingInstructionHandler;
599 commentHandler = oldCommentHandler;
600 startCdataSectionHandler = oldStartCdataSectionHandler;
601 endCdataSectionHandler = oldEndCdataSectionHandler;
602 defaultHandler = oldDefaultHandler;
603 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
604 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
605 notStandaloneHandler = oldNotStandaloneHandler;
606 externalEntityRefHandler = oldExternalEntityRefHandler;
607 unknownEncodingHandler = oldUnknownEncodingHandler;
608 userData = oldUserData;
609 if (oldUserData == oldHandlerArg)
610 handlerArg = userData;
611 else
612 handlerArg = parser;
613 if (oldExternalEntityRefHandlerArg != oldParser)
614 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
615 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
616 #ifdef XML_DTD
617 paramEntityParsing = oldParamEntityParsing;
618 if (context) {
619 #endif /* XML_DTD */
620 if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
621 XML_ParserFree(parser);
622 return 0;
623 }
624 processor = externalEntityInitProcessor;
625 #ifdef XML_DTD
626 }
627 else {
628 dtdSwap(&dtd, oldDtd);
629 parentParser = oldParser;
630 XmlPrologStateInitExternalEntity(&prologState);
631 dtd.complete = 1;
632 hadExternalDoctype = 1;
633 }
634 #endif /* XML_DTD */
635 return parser;
636 }
637
638 static
639 void destroyBindings(BINDING *bindings)
640 {
641 for (;;) {
642 BINDING *b = bindings;
643 if (!b)
644 break;
645 bindings = b->nextTagBinding;
646 free(b->uri);
647 free(b);
648 }
649 }
650
651 void XML_ParserFree(XML_Parser parser)
652 {
653 for (;;) {
654 TAG *p;
655 if (tagStack == 0) {
656 if (freeTagList == 0)
657 break;
658 tagStack = freeTagList;
659 freeTagList = 0;
660 }
661 p = tagStack;
662 tagStack = tagStack->parent;
663 free(p->buf);
664 destroyBindings(p->bindings);
665 free(p);
666 }
667 destroyBindings(freeBindingList);
668 destroyBindings(inheritedBindings);
669 poolDestroy(&tempPool);
670 poolDestroy(&temp2Pool);
671 #ifdef XML_DTD
672 if (parentParser) {
673 if (hadExternalDoctype)
674 dtd.complete = 0;
675 dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd);
676 }
677 #endif /* XML_DTD */
678 dtdDestroy(&dtd);
679 free((void *)atts);
680 free(groupConnector);
681 free(buffer);
682 free(dataBuf);
683 free(unknownEncodingMem);
684 if (unknownEncodingRelease)
685 unknownEncodingRelease(unknownEncodingData);
686 free(parser);
687 }
688
689 void XML_UseParserAsHandlerArg(XML_Parser parser)
690 {
691 handlerArg = parser;
692 }
693
694 void XML_SetUserData(XML_Parser parser, void *p)
695 {
696 if (handlerArg == userData)
697 handlerArg = userData = p;
698 else
699 userData = p;
700 }
701
702 int XML_SetBase(XML_Parser parser, const XML_Char *p)
703 {
704 if (p) {
705 p = poolCopyString(&dtd.pool, p);
706 if (!p)
707 return 0;
708 curBase = p;
709 }
710 else
711 curBase = 0;
712 return 1;
713 }
714
715 const XML_Char *XML_GetBase(XML_Parser parser)
716 {
717 return curBase;
718 }
719
720 int XML_GetSpecifiedAttributeCount(XML_Parser parser)
721 {
722 return nSpecifiedAtts;
723 }
724
725 void XML_SetElementHandler(XML_Parser parser,
726 XML_StartElementHandler start,
727 XML_EndElementHandler end)
728 {
729 startElementHandler = start;
730 endElementHandler = end;
731 }
732
733 void XML_SetCharacterDataHandler(XML_Parser parser,
734 XML_CharacterDataHandler handler)
735 {
736 characterDataHandler = handler;
737 }
738
739 void XML_SetProcessingInstructionHandler(XML_Parser parser,
740 XML_ProcessingInstructionHandler handler)
741 {
742 processingInstructionHandler = handler;
743 }
744
745 void XML_SetCommentHandler(XML_Parser parser,
746 XML_CommentHandler handler)
747 {
748 commentHandler = handler;
749 }
750
751 void XML_SetCdataSectionHandler(XML_Parser parser,
752 XML_StartCdataSectionHandler start,
753 XML_EndCdataSectionHandler end)
754 {
755 startCdataSectionHandler = start;
756 endCdataSectionHandler = end;
757 }
758
759 void XML_SetDefaultHandler(XML_Parser parser,
760 XML_DefaultHandler handler)
761 {
762 defaultHandler = handler;
763 defaultExpandInternalEntities = 0;
764 }
765
766 void XML_SetDefaultHandlerExpand(XML_Parser parser,
767 XML_DefaultHandler handler)
768 {
769 defaultHandler = handler;
770 defaultExpandInternalEntities = 1;
771 }
772
773 void XML_SetDoctypeDeclHandler(XML_Parser parser,
774 XML_StartDoctypeDeclHandler start,
775 XML_EndDoctypeDeclHandler end)
776 {
777 startDoctypeDeclHandler = start;
778 endDoctypeDeclHandler = end;
779 }
780
781 void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
782 XML_UnparsedEntityDeclHandler handler)
783 {
784 unparsedEntityDeclHandler = handler;
785 }
786
787 void XML_SetNotationDeclHandler(XML_Parser parser,
788 XML_NotationDeclHandler handler)
789 {
790 notationDeclHandler = handler;
791 }
792
793 void XML_SetNamespaceDeclHandler(XML_Parser parser,
794 XML_StartNamespaceDeclHandler start,
795 XML_EndNamespaceDeclHandler end)
796 {
797 startNamespaceDeclHandler = start;
798 endNamespaceDeclHandler = end;
799 }
800
801 void XML_SetNotStandaloneHandler(XML_Parser parser,
802 XML_NotStandaloneHandler handler)
803 {
804 notStandaloneHandler = handler;
805 }
806
807 void XML_SetExternalEntityRefHandler(XML_Parser parser,
808 XML_ExternalEntityRefHandler handler)
809 {
810 externalEntityRefHandler = handler;
811 }
812
813 void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
814 {
815 if (arg)
816 externalEntityRefHandlerArg = arg;
817 else
818 externalEntityRefHandlerArg = parser;
819 }
820
821 void XML_SetUnknownEncodingHandler(XML_Parser parser,
822 XML_UnknownEncodingHandler handler,
823 void *data)
824 {
825 unknownEncodingHandler = handler;
826 unknownEncodingHandlerData = data;
827 }
828
829 int XML_SetParamEntityParsing(XML_Parser parser,
830 enum XML_ParamEntityParsing parsing)
831 {
832 #ifdef XML_DTD
833 paramEntityParsing = parsing;
834 return 1;
835 #else
836 return parsing == XML_PARAM_ENTITY_PARSING_NEVER;
837 #endif
838 }
839
840 int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
841 {
842 if (len == 0) {
843 if (!isFinal)
844 return 1;
845 positionPtr = bufferPtr;
846 errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
847 if (errorCode == XML_ERROR_NONE)
848 return 1;
849 eventEndPtr = eventPtr;
850 processor = errorProcessor;
851 return 0;
852 }
853 else if (bufferPtr == bufferEnd) {
854 const char *end;
855 int nLeftOver;
856 parseEndByteIndex += len;
857 positionPtr = s;
858 if (isFinal) {
859 errorCode = processor(parser, s, parseEndPtr = s + len, 0);
860 if (errorCode == XML_ERROR_NONE)
861 return 1;
862 eventEndPtr = eventPtr;
863 processor = errorProcessor;
864 return 0;
865 }
866 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
867 if (errorCode != XML_ERROR_NONE) {
868 eventEndPtr = eventPtr;
869 processor = errorProcessor;
870 return 0;
871 }
872 XmlUpdatePosition(encoding, positionPtr, end, &position);
873 nLeftOver = s + len - end;
874 if (nLeftOver) {
875 if (buffer == 0 || nLeftOver > bufferLim - buffer) {
876 /* FIXME avoid integer overflow */
877 buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
878 /* FIXME storage leak if realloc fails */
879 if (!buffer) {
880 errorCode = XML_ERROR_NO_MEMORY;
881 eventPtr = eventEndPtr = 0;
882 processor = errorProcessor;
883 return 0;
884 }
885 bufferLim = buffer + len * 2;
886 }
887 memcpy(buffer, end, nLeftOver);
888 bufferPtr = buffer;
889 bufferEnd = buffer + nLeftOver;
890 }
891 return 1;
892 }
893 else {
894 memcpy(XML_GetBuffer(parser, len), s, len);
895 return XML_ParseBuffer(parser, len, isFinal);
896 }
897 }
898
899 int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
900 {
901 const char *start = bufferPtr;
902 positionPtr = start;
903 bufferEnd += len;
904 parseEndByteIndex += len;
905 errorCode = processor(parser, start, parseEndPtr = bufferEnd,
906 isFinal ? (const char **)0 : &bufferPtr);
907 if (errorCode == XML_ERROR_NONE) {
908 if (!isFinal)
909 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
910 return 1;
911 }
912 else {
913 eventEndPtr = eventPtr;
914 processor = errorProcessor;
915 return 0;
916 }
917 }
918
919 void *XML_GetBuffer(XML_Parser parser, int len)
920 {
921 if (len > bufferLim - bufferEnd) {
922 /* FIXME avoid integer overflow */
923 int neededSize = len + (bufferEnd - bufferPtr);
924 if (neededSize <= bufferLim - buffer) {
925 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
926 bufferEnd = buffer + (bufferEnd - bufferPtr);
927 bufferPtr = buffer;
928 }
929 else {
930 char *newBuf;
931 int bufferSize = bufferLim - bufferPtr;
932 if (bufferSize == 0)
933 bufferSize = INIT_BUFFER_SIZE;
934 do {
935 bufferSize *= 2;
936 } while (bufferSize < neededSize);
937 newBuf = malloc(bufferSize);
938 if (newBuf == 0) {
939 errorCode = XML_ERROR_NO_MEMORY;
940 return 0;
941 }
942 bufferLim = newBuf + bufferSize;
943 if (bufferPtr) {
944 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
945 free(buffer);
946 }
947 bufferEnd = newBuf + (bufferEnd - bufferPtr);
948 bufferPtr = buffer = newBuf;
949 }
950 }
951 return bufferEnd;
952 }
953
954 enum XML_Error XML_GetErrorCode(XML_Parser parser)
955 {
956 return errorCode;
957 }
958
959 long XML_GetCurrentByteIndex(XML_Parser parser)
960 {
961 if (eventPtr)
962 return parseEndByteIndex - (parseEndPtr - eventPtr);
963 return -1;
964 }
965
966 int XML_GetCurrentByteCount(XML_Parser parser)
967 {
968 if (eventEndPtr && eventPtr)
969 return eventEndPtr - eventPtr;
970 return 0;
971 }
972
973 int XML_GetCurrentLineNumber(XML_Parser parser)
974 {
975 if (eventPtr) {
976 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
977 positionPtr = eventPtr;
978 }
979 return position.lineNumber + 1;
980 }
981
982 int XML_GetCurrentColumnNumber(XML_Parser parser)
983 {
984 if (eventPtr) {
985 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
986 positionPtr = eventPtr;
987 }
988 return position.columnNumber;
989 }
990
991 void XML_DefaultCurrent(XML_Parser parser)
992 {
993 if (defaultHandler) {
994 if (openInternalEntities)
995 reportDefault(parser,
996 internalEncoding,
997 openInternalEntities->internalEventPtr,
998 openInternalEntities->internalEventEndPtr);
999 else
1000 reportDefault(parser, encoding, eventPtr, eventEndPtr);
1001 }
1002 }
1003
1004 const XML_LChar *XML_ErrorString(int code)
1005 {
1006 static const XML_LChar *message[] = {
1007 0,
1008 XML_T("out of memory"),
1009 XML_T("syntax error"),
1010 XML_T("no element found"),
1011 XML_T("not well-formed"),
1012 XML_T("unclosed token"),
1013 XML_T("unclosed token"),
1014 XML_T("mismatched tag"),
1015 XML_T("duplicate attribute"),
1016 XML_T("junk after document element"),
1017 XML_T("illegal parameter entity reference"),
1018 XML_T("undefined entity"),
1019 XML_T("recursive entity reference"),
1020 XML_T("asynchronous entity"),
1021 XML_T("reference to invalid character number"),
1022 XML_T("reference to binary entity"),
1023 XML_T("reference to external entity in attribute"),
1024 XML_T("xml processing instruction not at start of external entity"),
1025 XML_T("unknown encoding"),
1026 XML_T("encoding specified in XML declaration is incorrect"),
1027 XML_T("unclosed CDATA section"),
1028 XML_T("error in processing external entity reference"),
1029 XML_T("document is not standalone")
1030 };
1031 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1032 return message[code];
1033 return 0;
1034 }
1035
1036 static
1037 enum XML_Error contentProcessor(XML_Parser parser,
1038 const char *start,
1039 const char *end,
1040 const char **endPtr)
1041 {
1042 return doContent(parser, 0, encoding, start, end, endPtr);
1043 }
1044
1045 static
1046 enum XML_Error externalEntityInitProcessor(XML_Parser parser,
1047 const char *start,
1048 const char *end,
1049 const char **endPtr)
1050 {
1051 enum XML_Error result = initializeEncoding(parser);
1052 if (result != XML_ERROR_NONE)
1053 return result;
1054 processor = externalEntityInitProcessor2;
1055 return externalEntityInitProcessor2(parser, start, end, endPtr);
1056 }
1057
1058 static
1059 enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
1060 const char *start,
1061 const char *end,
1062 const char **endPtr)
1063 {
1064 const char *next;
1065 int tok = XmlContentTok(encoding, start, end, &next);
1066 switch (tok) {
1067 case XML_TOK_BOM:
1068 start = next;
1069 break;
1070 case XML_TOK_PARTIAL:
1071 if (endPtr) {
1072 *endPtr = start;
1073 return XML_ERROR_NONE;
1074 }
1075 eventPtr = start;
1076 return XML_ERROR_UNCLOSED_TOKEN;
1077 case XML_TOK_PARTIAL_CHAR:
1078 if (endPtr) {
1079 *endPtr = start;
1080 return XML_ERROR_NONE;
1081 }
1082 eventPtr = start;
1083 return XML_ERROR_PARTIAL_CHAR;
1084 }
1085 processor = externalEntityInitProcessor3;
1086 return externalEntityInitProcessor3(parser, start, end, endPtr);
1087 }
1088
1089 static
1090 enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1091 const char *start,
1092 const char *end,
1093 const char **endPtr)
1094 {
1095 const char *next;
1096 int tok = XmlContentTok(encoding, start, end, &next);
1097 switch (tok) {
1098 case XML_TOK_XML_DECL:
1099 {
1100 enum XML_Error result = processXmlDecl(parser, 1, start, next);
1101 if (result != XML_ERROR_NONE)
1102 return result;
1103 start = next;
1104 }
1105 break;
1106 case XML_TOK_PARTIAL:
1107 if (endPtr) {
1108 *endPtr = start;
1109 return XML_ERROR_NONE;
1110 }
1111 eventPtr = start;
1112 return XML_ERROR_UNCLOSED_TOKEN;
1113 case XML_TOK_PARTIAL_CHAR:
1114 if (endPtr) {
1115 *endPtr = start;
1116 return XML_ERROR_NONE;
1117 }
1118 eventPtr = start;
1119 return XML_ERROR_PARTIAL_CHAR;
1120 }
1121 processor = externalEntityContentProcessor;
1122 tagLevel = 1;
1123 return doContent(parser, 1, encoding, start, end, endPtr);
1124 }
1125
1126 static
1127 enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1128 const char *start,
1129 const char *end,
1130 const char **endPtr)
1131 {
1132 return doContent(parser, 1, encoding, start, end, endPtr);
1133 }
1134
1135 static enum XML_Error
1136 doContent(XML_Parser parser,
1137 int startTagLevel,
1138 const ENCODING *enc,
1139 const char *s,
1140 const char *end,
1141 const char **nextPtr)
1142 {
1143 const char **eventPP;
1144 const char **eventEndPP;
1145 if (enc == encoding) {
1146 eventPP = &eventPtr;
1147 eventEndPP = &eventEndPtr;
1148 }
1149 else {
1150 eventPP = &(openInternalEntities->internalEventPtr);
1151 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1152 }
1153 *eventPP = s;
1154 for (;;) {
1155 const char *next = s; /* XmlContentTok doesn't always set the last arg */
1156 int tok = XmlContentTok(enc, s, end, &next);
1157 *eventEndPP = next;
1158 switch (tok) {
1159 case XML_TOK_TRAILING_CR:
1160 if (nextPtr) {
1161 *nextPtr = s;
1162 return XML_ERROR_NONE;
1163 }
1164 *eventEndPP = end;
1165 if (characterDataHandler) {
1166 XML_Char c = 0xA;
1167 characterDataHandler(handlerArg, &c, 1);
1168 }
1169 else if (defaultHandler)
1170 reportDefault(parser, enc, s, end);
1171 if (startTagLevel == 0)
1172 return XML_ERROR_NO_ELEMENTS;
1173 if (tagLevel != startTagLevel)
1174 return XML_ERROR_ASYNC_ENTITY;
1175 return XML_ERROR_NONE;
1176 case XML_TOK_NONE:
1177 if (nextPtr) {
1178 *nextPtr = s;
1179 return XML_ERROR_NONE;
1180 }
1181 if (startTagLevel > 0) {
1182 if (tagLevel != startTagLevel)
1183 return XML_ERROR_ASYNC_ENTITY;
1184 return XML_ERROR_NONE;
1185 }
1186 return XML_ERROR_NO_ELEMENTS;
1187 case XML_TOK_INVALID:
1188 *eventPP = next;
1189 return XML_ERROR_INVALID_TOKEN;
1190 case XML_TOK_PARTIAL:
1191 if (nextPtr) {
1192 *nextPtr = s;
1193 return XML_ERROR_NONE;
1194 }
1195 return XML_ERROR_UNCLOSED_TOKEN;
1196 case XML_TOK_PARTIAL_CHAR:
1197 if (nextPtr) {
1198 *nextPtr = s;
1199 return XML_ERROR_NONE;
1200 }
1201 return XML_ERROR_PARTIAL_CHAR;
1202 case XML_TOK_ENTITY_REF:
1203 {
1204 const XML_Char *name;
1205 ENTITY *entity;
1206 XML_Char ch = XmlPredefinedEntityName(enc,
1207 s + enc->minBytesPerChar,
1208 next - enc->minBytesPerChar);
1209 if (ch) {
1210 if (characterDataHandler)
1211 characterDataHandler(handlerArg, &ch, 1);
1212 else if (defaultHandler)
1213 reportDefault(parser, enc, s, next);
1214 break;
1215 }
1216 name = poolStoreString(&dtd.pool, enc,
1217 s + enc->minBytesPerChar,
1218 next - enc->minBytesPerChar);
1219 if (!name)
1220 return XML_ERROR_NO_MEMORY;
1221 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1222 poolDiscard(&dtd.pool);
1223 if (!entity) {
1224 if (dtd.complete || dtd.standalone)
1225 return XML_ERROR_UNDEFINED_ENTITY;
1226 if (defaultHandler)
1227 reportDefault(parser, enc, s, next);
1228 break;
1229 }
1230 if (entity->open)
1231 return XML_ERROR_RECURSIVE_ENTITY_REF;
1232 if (entity->notation)
1233 return XML_ERROR_BINARY_ENTITY_REF;
1234 if (entity) {
1235 if (entity->textPtr) {
1236 enum XML_Error result;
1237 OPEN_INTERNAL_ENTITY openEntity;
1238 if (defaultHandler && !defaultExpandInternalEntities) {
1239 reportDefault(parser, enc, s, next);
1240 break;
1241 }
1242 entity->open = 1;
1243 openEntity.next = openInternalEntities;
1244 openInternalEntities = &openEntity;
1245 openEntity.entity = entity;
1246 openEntity.internalEventPtr = 0;
1247 openEntity.internalEventEndPtr = 0;
1248 result = doContent(parser,
1249 tagLevel,
1250 internalEncoding,
1251 (char *)entity->textPtr,
1252 (char *)(entity->textPtr + entity->textLen),
1253 0);
1254 entity->open = 0;
1255 openInternalEntities = openEntity.next;
1256 if (result)
1257 return result;
1258 }
1259 else if (externalEntityRefHandler) {
1260 const XML_Char *context;
1261 entity->open = 1;
1262 context = getContext(parser);
1263 entity->open = 0;
1264 if (!context)
1265 return XML_ERROR_NO_MEMORY;
1266 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
1267 context,
1268 entity->base,
1269 entity->systemId,
1270 entity->publicId))
1271 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1272 poolDiscard(&tempPool);
1273 }
1274 else if (defaultHandler)
1275 reportDefault(parser, enc, s, next);
1276 }
1277 break;
1278 }
1279 case XML_TOK_START_TAG_WITH_ATTS:
1280 if (!startElementHandler) {
1281 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1282 if (result)
1283 return result;
1284 }
1285 /* fall through */
1286 case XML_TOK_START_TAG_NO_ATTS:
1287 {
1288 TAG *tag;
1289 if (freeTagList) {
1290 tag = freeTagList;
1291 freeTagList = freeTagList->parent;
1292 }
1293 else {
1294 tag = malloc(sizeof(TAG));
1295 if (!tag)
1296 return XML_ERROR_NO_MEMORY;
1297 tag->buf = malloc(INIT_TAG_BUF_SIZE);
1298 if (!tag->buf)
1299 return XML_ERROR_NO_MEMORY;
1300 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1301 }
1302 tag->bindings = 0;
1303 tag->parent = tagStack;
1304 tagStack = tag;
1305 tag->name.localPart = 0;
1306 tag->rawName = s + enc->minBytesPerChar;
1307 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1308 if (nextPtr) {
1309 /* Need to guarantee that:
1310 tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
1311 if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1312 int bufSize = tag->rawNameLength * 4;
1313 bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1314 tag->buf = realloc(tag->buf, bufSize);
1315 if (!tag->buf)
1316 return XML_ERROR_NO_MEMORY;
1317 tag->bufEnd = tag->buf + bufSize;
1318 }
1319 memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1320 tag->rawName = tag->buf;
1321 }
1322 ++tagLevel;
1323 if (startElementHandler) {
1324 enum XML_Error result;
1325 XML_Char *toPtr;
1326 for (;;) {
1327 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1328 const char *fromPtr = tag->rawName;
1329 int bufSize;
1330 if (nextPtr)
1331 toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1332 else
1333 toPtr = (XML_Char *)tag->buf;
1334 tag->name.str = toPtr;
1335 XmlConvert(enc,
1336 &fromPtr, rawNameEnd,
1337 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1338 if (fromPtr == rawNameEnd)
1339 break;
1340 bufSize = (tag->bufEnd - tag->buf) << 1;
1341 tag->buf = realloc(tag->buf, bufSize);
1342 if (!tag->buf)
1343 return XML_ERROR_NO_MEMORY;
1344 tag->bufEnd = tag->buf + bufSize;
1345 if (nextPtr)
1346 tag->rawName = tag->buf;
1347 }
1348 *toPtr = XML_T('\0');
1349 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1350 if (result)
1351 return result;
1352 startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1353 poolClear(&tempPool);
1354 }
1355 else {
1356 tag->name.str = 0;
1357 if (defaultHandler)
1358 reportDefault(parser, enc, s, next);
1359 }
1360 break;
1361 }
1362 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1363 if (!startElementHandler) {
1364 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1365 if (result)
1366 return result;
1367 }
1368 /* fall through */
1369 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1370 if (startElementHandler || endElementHandler) {
1371 const char *rawName = s + enc->minBytesPerChar;
1372 enum XML_Error result;
1373 BINDING *bindings = 0;
1374 TAG_NAME name;
1375 name.str = poolStoreString(&tempPool, enc, rawName,
1376 rawName + XmlNameLength(enc, rawName));
1377 if (!name.str)
1378 return XML_ERROR_NO_MEMORY;
1379 poolFinish(&tempPool);
1380 result = storeAtts(parser, enc, s, &name, &bindings);
1381 if (result)
1382 return result;
1383 poolFinish(&tempPool);
1384 if (startElementHandler)
1385 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1386 if (endElementHandler) {
1387 if (startElementHandler)
1388 *eventPP = *eventEndPP;
1389 endElementHandler(handlerArg, name.str);
1390 }
1391 poolClear(&tempPool);
1392 while (bindings) {
1393 BINDING *b = bindings;
1394 if (endNamespaceDeclHandler)
1395 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1396 bindings = bindings->nextTagBinding;
1397 b->nextTagBinding = freeBindingList;
1398 freeBindingList = b;
1399 b->prefix->binding = b->prevPrefixBinding;
1400 }
1401 }
1402 else if (defaultHandler)
1403 reportDefault(parser, enc, s, next);
1404 if (tagLevel == 0)
1405 return epilogProcessor(parser, next, end, nextPtr);
1406 break;
1407 case XML_TOK_END_TAG:
1408 if (tagLevel == startTagLevel)
1409 return XML_ERROR_ASYNC_ENTITY;
1410 else {
1411 int len;
1412 const char *rawName;
1413 TAG *tag = tagStack;
1414 tagStack = tag->parent;
1415 tag->parent = freeTagList;
1416 freeTagList = tag;
1417 rawName = s + enc->minBytesPerChar*2;
1418 len = XmlNameLength(enc, rawName);
1419 if (len != tag->rawNameLength
1420 || memcmp(tag->rawName, rawName, len) != 0) {
1421 *eventPP = rawName;
1422 return XML_ERROR_TAG_MISMATCH;
1423 }
1424 --tagLevel;
1425 if (endElementHandler && tag->name.str) {
1426 if (tag->name.localPart) {
1427 XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
1428 const XML_Char *from = tag->name.localPart;
1429 while ((*to++ = *from++) != 0)
1430 ;
1431 }
1432 endElementHandler(handlerArg, tag->name.str);
1433 }
1434 else if (defaultHandler)
1435 reportDefault(parser, enc, s, next);
1436 while (tag->bindings) {
1437 BINDING *b = tag->bindings;
1438 if (endNamespaceDeclHandler)
1439 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1440 tag->bindings = tag->bindings->nextTagBinding;
1441 b->nextTagBinding = freeBindingList;
1442 freeBindingList = b;
1443 b->prefix->binding = b->prevPrefixBinding;
1444 }
1445 if (tagLevel == 0)
1446 return epilogProcessor(parser, next, end, nextPtr);
1447 }
1448 break;
1449 case XML_TOK_CHAR_REF:
1450 {
1451 int n = XmlCharRefNumber(enc, s);
1452 if (n < 0)
1453 return XML_ERROR_BAD_CHAR_REF;
1454 if (characterDataHandler) {
1455 XML_Char buf[XML_ENCODE_MAX];
1456 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1457 }
1458 else if (defaultHandler)
1459 reportDefault(parser, enc, s, next);
1460 }
1461 break;
1462 case XML_TOK_XML_DECL:
1463 return XML_ERROR_MISPLACED_XML_PI;
1464 case XML_TOK_DATA_NEWLINE:
1465 if (characterDataHandler) {
1466 XML_Char c = 0xA;
1467 characterDataHandler(handlerArg, &c, 1);
1468 }
1469 else if (defaultHandler)
1470 reportDefault(parser, enc, s, next);
1471 break;
1472 case XML_TOK_CDATA_SECT_OPEN:
1473 {
1474 enum XML_Error result;
1475 if (startCdataSectionHandler)
1476 startCdataSectionHandler(handlerArg);
1477 #if 0
1478 /* Suppose you doing a transformation on a document that involves
1479 changing only the character data. You set up a defaultHandler
1480 and a characterDataHandler. The defaultHandler simply copies
1481 characters through. The characterDataHandler does the transformation
1482 and writes the characters out escaping them as necessary. This case
1483 will fail to work if we leave out the following two lines (because &
1484 and < inside CDATA sections will be incorrectly escaped).
1485
1486 However, now we have a start/endCdataSectionHandler, so it seems
1487 easier to let the user deal with this. */
1488
1489 else if (characterDataHandler)
1490 characterDataHandler(handlerArg, dataBuf, 0);
1491 #endif
1492 else if (defaultHandler)
1493 reportDefault(parser, enc, s, next);
1494 result = doCdataSection(parser, enc, &next, end, nextPtr);
1495 if (!next) {
1496 processor = cdataSectionProcessor;
1497 return result;
1498 }
1499 }
1500 break;
1501 case XML_TOK_TRAILING_RSQB:
1502 if (nextPtr) {
1503 *nextPtr = s;
1504 return XML_ERROR_NONE;
1505 }
1506 if (characterDataHandler) {
1507 if (MUST_CONVERT(enc, s)) {
1508 ICHAR *dataPtr = (ICHAR *)dataBuf;
1509 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1510 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1511 }
1512 else
1513 characterDataHandler(handlerArg,
1514 (XML_Char *)s,
1515 (XML_Char *)end - (XML_Char *)s);
1516 }
1517 else if (defaultHandler)
1518 reportDefault(parser, enc, s, end);
1519 if (startTagLevel == 0) {
1520 *eventPP = end;
1521 return XML_ERROR_NO_ELEMENTS;
1522 }
1523 if (tagLevel != startTagLevel) {
1524 *eventPP = end;
1525 return XML_ERROR_ASYNC_ENTITY;
1526 }
1527 return XML_ERROR_NONE;
1528 case XML_TOK_DATA_CHARS:
1529 if (characterDataHandler) {
1530 if (MUST_CONVERT(enc, s)) {
1531 for (;;) {
1532 ICHAR *dataPtr = (ICHAR *)dataBuf;
1533 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1534 *eventEndPP = s;
1535 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1536 if (s == next)
1537 break;
1538 *eventPP = s;
1539 }
1540 }
1541 else
1542 characterDataHandler(handlerArg,
1543 (XML_Char *)s,
1544 (XML_Char *)next - (XML_Char *)s);
1545 }
1546 else if (defaultHandler)
1547 reportDefault(parser, enc, s, next);
1548 break;
1549 case XML_TOK_PI:
1550 if (!reportProcessingInstruction(parser, enc, s, next))
1551 return XML_ERROR_NO_MEMORY;
1552 break;
1553 case XML_TOK_COMMENT:
1554 if (!reportComment(parser, enc, s, next))
1555 return XML_ERROR_NO_MEMORY;
1556 break;
1557 default:
1558 if (defaultHandler)
1559 reportDefault(parser, enc, s, next);
1560 break;
1561 }
1562 *eventPP = s = next;
1563 }
1564 /* not reached */
1565 }
1566
1567 /* If tagNamePtr is non-null, build a real list of attributes,
1568 otherwise just check the attributes for well-formedness. */
1569
1570 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1571 const char *attStr, TAG_NAME *tagNamePtr,
1572 BINDING **bindingsPtr)
1573 {
1574 ELEMENT_TYPE *elementType = 0;
1575 int nDefaultAtts = 0;
1576 const XML_Char **appAtts;
1577 int attIndex = 0;
1578 int i;
1579 int n;
1580 int nPrefixes = 0;
1581 BINDING *binding;
1582 const XML_Char *localPart;
1583
1584 if (tagNamePtr) {
1585 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0);
1586 if (!elementType) {
1587 tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
1588 if (!tagNamePtr->str)
1589 return XML_ERROR_NO_MEMORY;
1590 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
1591 if (!elementType)
1592 return XML_ERROR_NO_MEMORY;
1593 if (ns && !setElementTypePrefix(parser, elementType))
1594 return XML_ERROR_NO_MEMORY;
1595 }
1596 nDefaultAtts = elementType->nDefaultAtts;
1597 }
1598 n = XmlGetAttributes(enc, attStr, attsSize, atts);
1599 if (n + nDefaultAtts > attsSize) {
1600 int oldAttsSize = attsSize;
1601 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1602 atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1603 if (!atts)
1604 return XML_ERROR_NO_MEMORY;
1605 if (n > oldAttsSize)
1606 XmlGetAttributes(enc, attStr, n, atts);
1607 }
1608 appAtts = (const XML_Char **)atts;
1609 for (i = 0; i < n; i++) {
1610 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1611 atts[i].name
1612 + XmlNameLength(enc, atts[i].name));
1613 if (!attId)
1614 return XML_ERROR_NO_MEMORY;
1615 if ((attId->name)[-1]) {
1616 if (enc == encoding)
1617 eventPtr = atts[i].name;
1618 return XML_ERROR_DUPLICATE_ATTRIBUTE;
1619 }
1620 (attId->name)[-1] = 1;
1621 appAtts[attIndex++] = attId->name;
1622 if (!atts[i].normalized) {
1623 enum XML_Error result;
1624 int isCdata = 1;
1625
1626 if (attId->maybeTokenized) {
1627 int j;
1628 for (j = 0; j < nDefaultAtts; j++) {
1629 if (attId == elementType->defaultAtts[j].id) {
1630 isCdata = elementType->defaultAtts[j].isCdata;
1631 break;
1632 }
1633 }
1634 }
1635
1636 result = storeAttributeValue(parser, enc, isCdata,
1637 atts[i].valuePtr, atts[i].valueEnd,
1638 &tempPool);
1639 if (result)
1640 return result;
1641 if (tagNamePtr) {
1642 appAtts[attIndex] = poolStart(&tempPool);
1643 poolFinish(&tempPool);
1644 }
1645 else
1646 poolDiscard(&tempPool);
1647 }
1648 else if (tagNamePtr) {
1649 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1650 if (appAtts[attIndex] == 0)
1651 return XML_ERROR_NO_MEMORY;
1652 poolFinish(&tempPool);
1653 }
1654 if (attId->prefix && tagNamePtr) {
1655 if (attId->xmlns) {
1656 if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
1657 return XML_ERROR_NO_MEMORY;
1658 --attIndex;
1659 }
1660 else {
1661 attIndex++;
1662 nPrefixes++;
1663 (attId->name)[-1] = 2;
1664 }
1665 }
1666 else
1667 attIndex++;
1668 }
1669 nSpecifiedAtts = attIndex;
1670 if (tagNamePtr) {
1671 int j;
1672 for (j = 0; j < nDefaultAtts; j++) {
1673 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1674 if (!(da->id->name)[-1] && da->value) {
1675 if (da->id->prefix) {
1676 if (da->id->xmlns) {
1677 if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
1678 return XML_ERROR_NO_MEMORY;
1679 }
1680 else {
1681 (da->id->name)[-1] = 2;
1682 nPrefixes++;
1683 appAtts[attIndex++] = da->id->name;
1684 appAtts[attIndex++] = da->value;
1685 }
1686 }
1687 else {
1688 (da->id->name)[-1] = 1;
1689 appAtts[attIndex++] = da->id->name;
1690 appAtts[attIndex++] = da->value;
1691 }
1692 }
1693 }
1694 appAtts[attIndex] = 0;
1695 }
1696 i = 0;
1697 if (nPrefixes) {
1698 for (; i < attIndex; i += 2) {
1699 if (appAtts[i][-1] == 2) {
1700 ATTRIBUTE_ID *id;
1701 ((XML_Char *)(appAtts[i]))[-1] = 0;
1702 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
1703 if (id->prefix->binding) {
1704 int j;
1705 const BINDING *b = id->prefix->binding;
1706 const XML_Char *s = appAtts[i];
1707 for (j = 0; j < b->uriLen; j++) {
1708 if (!poolAppendChar(&tempPool, b->uri[j]))
1709 return XML_ERROR_NO_MEMORY;
1710 }
1711 while (*s++ != ':')
1712 ;
1713 do {
1714 if (!poolAppendChar(&tempPool, *s))
1715 return XML_ERROR_NO_MEMORY;
1716 } while (*s++);
1717 appAtts[i] = poolStart(&tempPool);
1718 poolFinish(&tempPool);
1719 }
1720 if (!--nPrefixes)
1721 break;
1722 }
1723 else
1724 ((XML_Char *)(appAtts[i]))[-1] = 0;
1725 }
1726 }
1727 for (; i < attIndex; i += 2)
1728 ((XML_Char *)(appAtts[i]))[-1] = 0;
1729 if (!tagNamePtr)
1730 return XML_ERROR_NONE;
1731 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
1732 binding->attId->name[-1] = 0;
1733 if (elementType->prefix) {
1734 binding = elementType->prefix->binding;
1735 if (!binding)
1736 return XML_ERROR_NONE;
1737 localPart = tagNamePtr->str;
1738 while (*localPart++ != XML_T(':'))
1739 ;
1740 }
1741 else if (dtd.defaultPrefix.binding) {
1742 binding = dtd.defaultPrefix.binding;
1743 localPart = tagNamePtr->str;
1744 }
1745 else
1746 return XML_ERROR_NONE;
1747 tagNamePtr->localPart = localPart;
1748 tagNamePtr->uriLen = binding->uriLen;
1749 i = binding->uriLen;
1750 do {
1751 if (i == binding->uriAlloc) {
1752 binding->uri = realloc(binding->uri, binding->uriAlloc *= 2);
1753 if (!binding->uri)
1754 return XML_ERROR_NO_MEMORY;
1755 }
1756 binding->uri[i++] = *localPart;
1757 } while (*localPart++);
1758 tagNamePtr->str = binding->uri;
1759 return XML_ERROR_NONE;
1760 }
1761
1762 static
1763 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
1764 {
1765 BINDING *b;
1766 int len;
1767 for (len = 0; uri[len]; len++)
1768 ;
1769 if (namespaceSeparator)
1770 len++;
1771 if (freeBindingList) {
1772 b = freeBindingList;
1773 if (len > b->uriAlloc) {
1774 b->uri = realloc(b->uri, len + EXPAND_SPARE);
1775 if (!b->uri)
1776 return 0;
1777 b->uriAlloc = len + EXPAND_SPARE;
1778 }
1779 freeBindingList = b->nextTagBinding;
1780 }
1781 else {
1782 b = malloc(sizeof(BINDING));
1783 if (!b)
1784 return 0;
1785 b->uri = malloc(sizeof(XML_Char) * len + EXPAND_SPARE);
1786 if (!b->uri) {
1787 free(b);
1788 return 0;
1789 }
1790 b->uriAlloc = len;
1791 }
1792 b->uriLen = len;
1793 memcpy(b->uri, uri, len * sizeof(XML_Char));
1794 if (namespaceSeparator)
1795 b->uri[len - 1] = namespaceSeparator;
1796 b->prefix = prefix;
1797 b->attId = attId;
1798 b->prevPrefixBinding = prefix->binding;
1799 if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
1800 prefix->binding = 0;
1801 else
1802 prefix->binding = b;
1803 b->nextTagBinding = *bindingsPtr;
1804 *bindingsPtr = b;
1805 if (startNamespaceDeclHandler)
1806 startNamespaceDeclHandler(handlerArg, prefix->name,
1807 prefix->binding ? uri : 0);
1808 return 1;
1809 }
1810
1811 /* The idea here is to avoid using stack for each CDATA section when
1812 the whole file is parsed with one call. */
1813
1814 static
1815 enum XML_Error cdataSectionProcessor(XML_Parser parser,
1816 const char *start,
1817 const char *end,
1818 const char **endPtr)
1819 {
1820 enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1821 if (start) {
1822 processor = contentProcessor;
1823 return contentProcessor(parser, start, end, endPtr);
1824 }
1825 return result;
1826 }
1827
1828 /* startPtr gets set to non-null is the section is closed, and to null if
1829 the section is not yet closed. */
1830
1831 static
1832 enum XML_Error doCdataSection(XML_Parser parser,
1833 const ENCODING *enc,
1834 const char **startPtr,
1835 const char *end,
1836 const char **nextPtr)
1837 {
1838 const char *s = *startPtr;
1839 const char **eventPP;
1840 const char **eventEndPP;
1841 if (enc == encoding) {
1842 eventPP = &eventPtr;
1843 *eventPP = s;
1844 eventEndPP = &eventEndPtr;
1845 }
1846 else {
1847 eventPP = &(openInternalEntities->internalEventPtr);
1848 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1849 }
1850 *eventPP = s;
1851 *startPtr = 0;
1852 for (;;) {
1853 const char *next;
1854 int tok = XmlCdataSectionTok(enc, s, end, &next);
1855 *eventEndPP = next;
1856 switch (tok) {
1857 case XML_TOK_CDATA_SECT_CLOSE:
1858 if (endCdataSectionHandler)
1859 endCdataSectionHandler(handlerArg);
1860 #if 0
1861 /* see comment under XML_TOK_CDATA_SECT_OPEN */
1862 else if (characterDataHandler)
1863 characterDataHandler(handlerArg, dataBuf, 0);
1864 #endif
1865 else if (defaultHandler)
1866 reportDefault(parser, enc, s, next);
1867 *startPtr = next;
1868 return XML_ERROR_NONE;
1869 case XML_TOK_DATA_NEWLINE:
1870 if (characterDataHandler) {
1871 XML_Char c = 0xA;
1872 characterDataHandler(handlerArg, &c, 1);
1873 }
1874 else if (defaultHandler)
1875 reportDefault(parser, enc, s, next);
1876 break;
1877 case XML_TOK_DATA_CHARS:
1878 if (characterDataHandler) {
1879 if (MUST_CONVERT(enc, s)) {
1880 for (;;) {
1881 ICHAR *dataPtr = (ICHAR *)dataBuf;
1882 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1883 *eventEndPP = next;
1884 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1885 if (s == next)
1886 break;
1887 *eventPP = s;
1888 }
1889 }
1890 else
1891 characterDataHandler(handlerArg,
1892 (XML_Char *)s,
1893 (XML_Char *)next - (XML_Char *)s);
1894 }
1895 else if (defaultHandler)
1896 reportDefault(parser, enc, s, next);
1897 break;
1898 case XML_TOK_INVALID:
1899 *eventPP = next;
1900 return XML_ERROR_INVALID_TOKEN;
1901 case XML_TOK_PARTIAL_CHAR:
1902 if (nextPtr) {
1903 *nextPtr = s;
1904 return XML_ERROR_NONE;
1905 }
1906 return XML_ERROR_PARTIAL_CHAR;
1907 case XML_TOK_PARTIAL:
1908 case XML_TOK_NONE:
1909 if (nextPtr) {
1910 *nextPtr = s;
1911 return XML_ERROR_NONE;
1912 }
1913 return XML_ERROR_UNCLOSED_CDATA_SECTION;
1914 default:
1915 abort();
1916 }
1917 *eventPP = s = next;
1918 }
1919 /* not reached */
1920 }
1921
1922 #ifdef XML_DTD
1923
1924 /* The idea here is to avoid using stack for each IGNORE section when
1925 the whole file is parsed with one call. */
1926
1927 static
1928 enum XML_Error ignoreSectionProcessor(XML_Parser parser,
1929 const char *start,
1930 const char *end,
1931 const char **endPtr)
1932 {
1933 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr);
1934 if (start) {
1935 processor = prologProcessor;
1936 return prologProcessor(parser, start, end, endPtr);
1937 }
1938 return result;
1939 }
1940
1941 /* startPtr gets set to non-null is the section is closed, and to null if
1942 the section is not yet closed. */
1943
1944 static
1945 enum XML_Error doIgnoreSection(XML_Parser parser,
1946 const ENCODING *enc,
1947 const char **startPtr,
1948 const char *end,
1949 const char **nextPtr)
1950 {
1951 const char *next;
1952 int tok;
1953 const char *s = *startPtr;
1954 const char **eventPP;
1955 const char **eventEndPP;
1956 if (enc == encoding) {
1957 eventPP = &eventPtr;
1958 *eventPP = s;
1959 eventEndPP = &eventEndPtr;
1960 }
1961 else {
1962 eventPP = &(openInternalEntities->internalEventPtr);
1963 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1964 }
1965 *eventPP = s;
1966 *startPtr = 0;
1967 tok = XmlIgnoreSectionTok(enc, s, end, &next);
1968 *eventEndPP = next;
1969 switch (tok) {
1970 case XML_TOK_IGNORE_SECT:
1971 if (defaultHandler)
1972 reportDefault(parser, enc, s, next);
1973 *startPtr = next;
1974 return XML_ERROR_NONE;
1975 case XML_TOK_INVALID:
1976 *eventPP = next;
1977 return XML_ERROR_INVALID_TOKEN;
1978 case XML_TOK_PARTIAL_CHAR:
1979 if (nextPtr) {
1980 *nextPtr = s;
1981 return XML_ERROR_NONE;
1982 }
1983 return XML_ERROR_PARTIAL_CHAR;
1984 case XML_TOK_PARTIAL:
1985 case XML_TOK_NONE:
1986 if (nextPtr) {
1987 *nextPtr = s;
1988 return XML_ERROR_NONE;
1989 }
1990 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
1991 default:
1992 abort();
1993 }
1994 /* not reached */
1995 }
1996
1997 #endif /* XML_DTD */
1998
1999 static enum XML_Error
2000 initializeEncoding(XML_Parser parser)
2001 {
2002 const char *s;
2003 #ifdef XML_UNICODE
2004 char encodingBuf[128];
2005 if (!protocolEncodingName)
2006 s = 0;
2007 else {
2008 int i;
2009 for (i = 0; protocolEncodingName[i]; i++) {
2010 if (i == sizeof(encodingBuf) - 1
2011 || (protocolEncodingName[i] & ~0x7f) != 0) {
2012 encodingBuf[0] = '\0';
2013 break;
2014 }
2015 encodingBuf[i] = (char)protocolEncodingName[i];
2016 }
2017 encodingBuf[i] = '\0';
2018 s = encodingBuf;
2019 }
2020 #else
2021 s = protocolEncodingName;
2022 #endif
2023 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
2024 return XML_ERROR_NONE;
2025 return handleUnknownEncoding(parser, protocolEncodingName);
2026 }
2027
2028 static enum XML_Error
2029 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
2030 const char *s, const char *next)
2031 {
2032 const char *encodingName = 0;
2033 const ENCODING *newEncoding = 0;
2034 const char *version;
2035 int standalone = -1;
2036 if (!(ns
2037 ? XmlParseXmlDeclNS
2038 : XmlParseXmlDecl)(isGeneralTextEntity,
2039 encoding,
2040 s,
2041 next,
2042 &eventPtr,
2043 &version,
2044 &encodingName,
2045 &newEncoding,
2046 &standalone))
2047 return XML_ERROR_SYNTAX;
2048 if (!isGeneralTextEntity && standalone == 1) {
2049 dtd.standalone = 1;
2050 #ifdef XML_DTD
2051 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
2052 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
2053 #endif /* XML_DTD */
2054 }
2055 if (defaultHandler)
2056 reportDefault(parser, encoding, s, next);
2057 if (!protocolEncodingName) {
2058 if (newEncoding) {
2059 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
2060 eventPtr = encodingName;
2061 return XML_ERROR_INCORRECT_ENCODING;
2062 }
2063 encoding = newEncoding;
2064 }
2065 else if (encodingName) {
2066 enum XML_Error result;
2067 const XML_Char *str = poolStoreString(&tempPool,
2068 encoding,
2069 encodingName,
2070 encodingName
2071 + XmlNameLength(encoding, encodingName));
2072 if (!str)
2073 return XML_ERROR_NO_MEMORY;
2074 result = handleUnknownEncoding(parser, str);
2075 poolDiscard(&tempPool);
2076 if (result == XML_ERROR_UNKNOWN_ENCODING)
2077 eventPtr = encodingName;
2078 return result;
2079 }
2080 }
2081 return XML_ERROR_NONE;
2082 }
2083
2084 static enum XML_Error
2085 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
2086 {
2087 if (unknownEncodingHandler) {
2088 XML_Encoding info;
2089 int i;
2090 for (i = 0; i < 256; i++)
2091 info.map[i] = -1;
2092 info.convert = 0;
2093 info.data = 0;
2094 info.release = 0;
2095 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
2096 ENCODING *enc;
2097 unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
2098 if (!unknownEncodingMem) {
2099 if (info.release)
2100 info.release(info.data);
2101 return XML_ERROR_NO_MEMORY;
2102 }
2103 enc = (ns
2104 ? XmlInitUnknownEncodingNS
2105 : XmlInitUnknownEncoding)(unknownEncodingMem,
2106 info.map,
2107 info.convert,
2108 info.data);
2109 if (enc) {
2110 unknownEncodingData = info.data;
2111 unknownEncodingRelease = info.release;
2112 encoding = enc;
2113 return XML_ERROR_NONE;
2114 }
2115 }
2116 if (info.release)
2117 info.release(info.data);
2118 }
2119 return XML_ERROR_UNKNOWN_ENCODING;
2120 }
2121
2122 static enum XML_Error
2123 prologInitProcessor(XML_Parser parser,
2124 const char *s,
2125 const char *end,
2126 const char **nextPtr)
2127 {
2128 enum XML_Error result = initializeEncoding(parser);
2129 if (result != XML_ERROR_NONE)
2130 return result;
2131 processor = prologProcessor;
2132 return prologProcessor(parser, s, end, nextPtr);
2133 }
2134
2135 static enum XML_Error
2136 prologProcessor(XML_Parser parser,
2137 const char *s,
2138 const char *end,
2139 const char **nextPtr)
2140 {
2141 const char *next;
2142 int tok = XmlPrologTok(encoding, s, end, &next);
2143 return doProlog(parser, encoding, s, end, tok, next, nextPtr);
2144 }
2145
2146 static enum XML_Error
2147 doProlog(XML_Parser parser,
2148 const ENCODING *enc,
2149 const char *s,
2150 const char *end,
2151 int tok,
2152 const char *next,
2153 const char **nextPtr)
2154 {
2155 #ifdef XML_DTD
2156 static const XML_Char externalSubsetName[] = { '#' , '\0' };
2157 #endif /* XML_DTD */
2158
2159 const char **eventPP;
2160 const char **eventEndPP;
2161 if (enc == encoding) {
2162 eventPP = &eventPtr;
2163 eventEndPP = &eventEndPtr;
2164 }
2165 else {
2166 eventPP = &(openInternalEntities->internalEventPtr);
2167 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2168 }
2169 for (;;) {
2170 int role;
2171 *eventPP = s;
2172 *eventEndPP = next;
2173 if (tok <= 0) {
2174 if (nextPtr != 0 && tok != XML_TOK_INVALID) {
2175 *nextPtr = s;
2176 return XML_ERROR_NONE;
2177 }
2178 switch (tok) {
2179 case XML_TOK_INVALID:
2180 *eventPP = next;
2181 return XML_ERROR_INVALID_TOKEN;
2182 case XML_TOK_PARTIAL:
2183 return XML_ERROR_UNCLOSED_TOKEN;
2184 case XML_TOK_PARTIAL_CHAR:
2185 return XML_ERROR_PARTIAL_CHAR;
2186 case XML_TOK_NONE:
2187 #ifdef XML_DTD
2188 if (enc != encoding)
2189 return XML_ERROR_NONE;
2190 if (parentParser) {
2191 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
2192 == XML_ROLE_ERROR)
2193 return XML_ERROR_SYNTAX;
2194 hadExternalDoctype = 0;
2195 return XML_ERROR_NONE;
2196 }
2197 #endif /* XML_DTD */
2198 return XML_ERROR_NO_ELEMENTS;
2199 default:
2200 tok = -tok;
2201 next = end;
2202 break;
2203 }
2204 }
2205 role = XmlTokenRole(&prologState, tok, s, next, enc);
2206 switch (role) {
2207 case XML_ROLE_XML_DECL:
2208 {
2209 enum XML_Error result = processXmlDecl(parser, 0, s, next);
2210 if (result != XML_ERROR_NONE)
2211 return result;
2212 enc = encoding;
2213 }
2214 break;
2215 case XML_ROLE_DOCTYPE_NAME:
2216 if (startDoctypeDeclHandler) {
2217 const XML_Char *name = poolStoreString(&tempPool, enc, s, next);
2218 if (!name)
2219 return XML_ERROR_NO_MEMORY;
2220 startDoctypeDeclHandler(handlerArg, name);
2221 poolClear(&tempPool);
2222 }
2223 break;
2224 #ifdef XML_DTD
2225 case XML_ROLE_TEXT_DECL:
2226 {
2227 enum XML_Error result = processXmlDecl(parser, 1, s, next);
2228 if (result != XML_ERROR_NONE)
2229 return result;
2230 enc = encoding;
2231 }
2232 break;
2233 #endif /* XML_DTD */
2234 case XML_ROLE_DOCTYPE_PUBLIC_ID:
2235 #ifdef XML_DTD
2236 declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2237 externalSubsetName,
2238 sizeof(ENTITY));
2239 if (!declEntity)
2240 return XML_ERROR_NO_MEMORY;
2241 #endif /* XML_DTD */
2242 /* fall through */
2243 case XML_ROLE_ENTITY_PUBLIC_ID:
2244 if (!XmlIsPublicId(enc, s, next, eventPP))
2245 return XML_ERROR_SYNTAX;
2246 if (declEntity) {
2247 XML_Char *tem = poolStoreString(&dtd.pool,
2248 enc,
2249 s + enc->minBytesPerChar,
2250 next - enc->minBytesPerChar);
2251 if (!tem)
2252 return XML_ERROR_NO_MEMORY;
2253 normalizePublicId(tem);
2254 declEntity->publicId = tem;
2255 poolFinish(&dtd.pool);
2256 }
2257 break;
2258 case XML_ROLE_DOCTYPE_CLOSE:
2259 if (dtd.complete && hadExternalDoctype) {
2260 dtd.complete = 0;
2261 #ifdef XML_DTD
2262 if (paramEntityParsing && externalEntityRefHandler) {
2263 ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities,
2264 externalSubsetName,
2265 0);
2266 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2267 0,
2268 entity->base,
2269 entity->systemId,
2270 entity->publicId))
2271 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2272 }
2273 #endif /* XML_DTD */
2274 if (!dtd.complete
2275 && !dtd.standalone
2276 && notStandaloneHandler
2277 && !notStandaloneHandler(handlerArg))
2278 return XML_ERROR_NOT_STANDALONE;
2279 }
2280 if (endDoctypeDeclHandler)
2281 endDoctypeDeclHandler(handlerArg);
2282 break;
2283 case XML_ROLE_INSTANCE_START:
2284 processor = contentProcessor;
2285 return contentProcessor(parser, s, end, nextPtr);
2286 case XML_ROLE_ATTLIST_ELEMENT_NAME:
2287 {
2288 const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
2289 if (!name)
2290 return XML_ERROR_NO_MEMORY;
2291 declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
2292 if (!declElementType)
2293 return XML_ERROR_NO_MEMORY;
2294 if (declElementType->name != name)
2295 poolDiscard(&dtd.pool);
2296 else {
2297 poolFinish(&dtd.pool);
2298 if (!setElementTypePrefix(parser, declElementType))
2299 return XML_ERROR_NO_MEMORY;
2300 }
2301 break;
2302 }
2303 case XML_ROLE_ATTRIBUTE_NAME:
2304 declAttributeId = getAttributeId(parser, enc, s, next);
2305 if (!declAttributeId)
2306 return XML_ERROR_NO_MEMORY;
2307 declAttributeIsCdata = 0;
2308 break;
2309 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2310 declAttributeIsCdata = 1;
2311 break;
2312 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2313 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2314 if (dtd.complete
2315 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
2316 return XML_ERROR_NO_MEMORY;
2317 break;
2318 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2319 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2320 {
2321 const XML_Char *attVal;
2322 enum XML_Error result
2323 = storeAttributeValue(parser, enc, declAttributeIsCdata,
2324 s + enc->minBytesPerChar,
2325 next - enc->minBytesPerChar,
2326 &dtd.pool);
2327 if (result)
2328 return result;
2329 attVal = poolStart(&dtd.pool);
2330 poolFinish(&dtd.pool);
2331 if (dtd.complete
2332 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
2333 return XML_ERROR_NO_MEMORY;
2334 break;
2335 }
2336 case XML_ROLE_ENTITY_VALUE:
2337 {
2338 enum XML_Error result = storeEntityValue(parser, enc,
2339 s + enc->minBytesPerChar,
2340 next - enc->minBytesPerChar);
2341 if (declEntity) {
2342 declEntity->textPtr = poolStart(&dtd.pool);
2343 declEntity->textLen = poolLength(&dtd.pool);
2344 poolFinish(&dtd.pool);
2345 }
2346 else
2347 poolDiscard(&dtd.pool);
2348 if (result != XML_ERROR_NONE)
2349 return result;
2350 }
2351 break;
2352 case XML_ROLE_DOCTYPE_SYSTEM_ID:
2353 if (!dtd.standalone
2354 #ifdef XML_DTD
2355 && !paramEntityParsing
2356 #endif /* XML_DTD */
2357 && notStandaloneHandler
2358 && !notStandaloneHandler(handlerArg))
2359 return XML_ERROR_NOT_STANDALONE;
2360 hadExternalDoctype = 1;
2361 #ifndef XML_DTD
2362 break;
2363 #else /* XML_DTD */
2364 if (!declEntity) {
2365 declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2366 externalSubsetName,
2367 sizeof(ENTITY));
2368 if (!declEntity)
2369 return XML_ERROR_NO_MEMORY;
2370 }
2371 /* fall through */
2372 #endif /* XML_DTD */
2373 case XML_ROLE_ENTITY_SYSTEM_ID:
2374 if (declEntity) {
2375 declEntity->systemId = poolStoreString(&dtd.pool, enc,
2376 s + enc->minBytesPerChar,
2377 next - enc->minBytesPerChar);
2378 if (!declEntity->systemId)
2379 return XML_ERROR_NO_MEMORY;
2380 declEntity->base = curBase;
2381 poolFinish(&dtd.pool);
2382 }
2383 break;
2384 case XML_ROLE_ENTITY_NOTATION_NAME:
2385 if (declEntity) {
2386 declEntity->notation = poolStoreString(&dtd.pool, enc, s, next);
2387 if (!declEntity->notation)
2388 return XML_ERROR_NO_MEMORY;
2389 poolFinish(&dtd.pool);
2390 if (unparsedEntityDeclHandler) {
2391 *eventEndPP = s;
2392 unparsedEntityDeclHandler(handlerArg,
2393 declEntity->name,
2394 declEntity->base,
2395 declEntity->systemId,
2396 declEntity->publicId,
2397 declEntity->notation);
2398 }
2399
2400 }
2401 break;
2402 case XML_ROLE_GENERAL_ENTITY_NAME:
2403 {
2404 const XML_Char *name;
2405 if (XmlPredefinedEntityName(enc, s, next)) {
2406 declEntity = 0;
2407 break;
2408 }
2409 name = poolStoreString(&dtd.pool, enc, s, next);
2410 if (!name)
2411 return XML_ERROR_NO_MEMORY;
2412 if (dtd.complete) {
2413 declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2414 if (!declEntity)
2415 return XML_ERROR_NO_MEMORY;
2416 if (declEntity->name != name) {
2417 poolDiscard(&dtd.pool);
2418 declEntity = 0;
2419 }
2420 else
2421 poolFinish(&dtd.pool);
2422 }
2423 else {
2424 poolDiscard(&dtd.pool);
2425 declEntity = 0;
2426 }
2427 }
2428 break;
2429 case XML_ROLE_PARAM_ENTITY_NAME:
2430 #ifdef XML_DTD
2431 if (dtd.complete) {
2432 const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
2433 if (!name)
2434 return XML_ERROR_NO_MEMORY;
2435 declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY));
2436 if (!declEntity)
2437 return XML_ERROR_NO_MEMORY;
2438 if (declEntity->name != name) {
2439 poolDiscard(&dtd.pool);
2440 declEntity = 0;
2441 }
2442 else
2443 poolFinish(&dtd.pool);
2444 }
2445 #else /* not XML_DTD */
2446 declEntity = 0;
2447 #endif /* not XML_DTD */
2448 break;
2449 case XML_ROLE_NOTATION_NAME:
2450 declNotationPublicId = 0;
2451 declNotationName = 0;
2452 if (notationDeclHandler) {
2453 declNotationName = poolStoreString(&tempPool, enc, s, next);
2454 if (!declNotationName)
2455 return XML_ERROR_NO_MEMORY;
2456 poolFinish(&tempPool);
2457 }
2458 break;
2459 case XML_ROLE_NOTATION_PUBLIC_ID:
2460 if (!XmlIsPublicId(enc, s, next, eventPP))
2461 return XML_ERROR_SYNTAX;
2462 if (declNotationName) {
2463 XML_Char *tem = poolStoreString(&tempPool,
2464 enc,
2465 s + enc->minBytesPerChar,
2466 next - enc->minBytesPerChar);
2467 if (!tem)
2468 return XML_ERROR_NO_MEMORY;
2469 normalizePublicId(tem);
2470 declNotationPublicId = tem;
2471 poolFinish(&tempPool);
2472 }
2473 break;
2474 case XML_ROLE_NOTATION_SYSTEM_ID:
2475 if (declNotationName && notationDeclHandler) {
2476 const XML_Char *systemId
2477 = poolStoreString(&tempPool, enc,
2478 s + enc->minBytesPerChar,
2479 next - enc->minBytesPerChar);
2480 if (!systemId)
2481 return XML_ERROR_NO_MEMORY;
2482 *eventEndPP = s;
2483 notationDeclHandler(handlerArg,
2484 declNotationName,
2485 curBase,
2486 systemId,
2487 declNotationPublicId);
2488 }
2489 poolClear(&tempPool);
2490 break;
2491 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
2492 if (declNotationPublicId && notationDeclHandler) {
2493 *eventEndPP = s;
2494 notationDeclHandler(handlerArg,
2495 declNotationName,
2496 curBase,
2497 0,
2498 declNotationPublicId);
2499 }
2500 poolClear(&tempPool);
2501 break;
2502 case XML_ROLE_ERROR:
2503 switch (tok) {
2504 case XML_TOK_PARAM_ENTITY_REF:
2505 return XML_ERROR_PARAM_ENTITY_REF;
2506 case XML_TOK_XML_DECL:
2507 return XML_ERROR_MISPLACED_XML_PI;
2508 default:
2509 return XML_ERROR_SYNTAX;
2510 }
2511 #ifdef XML_DTD
2512 case XML_ROLE_IGNORE_SECT:
2513 {
2514 enum XML_Error result;
2515 if (defaultHandler)
2516 reportDefault(parser, enc, s, next);
2517 result = doIgnoreSection(parser, enc, &next, end, nextPtr);
2518 if (!next) {
2519 processor = ignoreSectionProcessor;
2520 return result;
2521 }
2522 }
2523 break;
2524 #endif /* XML_DTD */
2525 case XML_ROLE_GROUP_OPEN:
2526 if (prologState.level >= groupSize) {
2527 if (groupSize)
2528 groupConnector = realloc(groupConnector, groupSize *= 2);
2529 else
2530 groupConnector = malloc(groupSize = 32);
2531 if (!groupConnector)
2532 return XML_ERROR_NO_MEMORY;
2533 }
2534 groupConnector[prologState.level] = 0;
2535 break;
2536 case XML_ROLE_GROUP_SEQUENCE:
2537 if (groupConnector[prologState.level] == '|')
2538 return XML_ERROR_SYNTAX;
2539 groupConnector[prologState.level] = ',';
2540 break;
2541 case XML_ROLE_GROUP_CHOICE:
2542 if (groupConnector[prologState.level] == ',')
2543 return XML_ERROR_SYNTAX;
2544 groupConnector[prologState.level] = '|';
2545 break;
2546 case XML_ROLE_PARAM_ENTITY_REF:
2547 #ifdef XML_DTD
2548 case XML_ROLE_INNER_PARAM_ENTITY_REF:
2549 if (paramEntityParsing
2550 && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) {
2551 const XML_Char *name;
2552 ENTITY *entity;
2553 name = poolStoreString(&dtd.pool, enc,
2554 s + enc->minBytesPerChar,
2555 next - enc->minBytesPerChar);
2556 if (!name)
2557 return XML_ERROR_NO_MEMORY;
2558 entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
2559 poolDiscard(&dtd.pool);
2560 if (!entity) {
2561 /* FIXME what to do if !dtd.complete? */
2562 return XML_ERROR_UNDEFINED_ENTITY;
2563 }
2564 if (entity->open)
2565 return XML_ERROR_RECURSIVE_ENTITY_REF;
2566 if (entity->textPtr) {
2567 enum XML_Error result;
2568 result = processInternalParamEntity(parser, entity);
2569 if (result != XML_ERROR_NONE)
2570 return result;
2571 break;
2572 }
2573 if (role == XML_ROLE_INNER_PARAM_ENTITY_REF)
2574 return XML_ERROR_PARAM_ENTITY_REF;
2575 if (externalEntityRefHandler) {
2576 dtd.complete = 0;
2577 entity->open = 1;
2578 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2579 0,
2580 entity->base,
2581 entity->systemId,
2582 entity->publicId)) {
2583 entity->open = 0;
2584 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2585 }
2586 entity->open = 0;
2587 if (dtd.complete)
2588 break;
2589 }
2590 }
2591 #endif /* XML_DTD */
2592 if (!dtd.standalone
2593 && notStandaloneHandler
2594 && !notStandaloneHandler(handlerArg))
2595 return XML_ERROR_NOT_STANDALONE;
2596 dtd.complete = 0;
2597 if (defaultHandler)
2598 reportDefault(parser, enc, s, next);
2599 break;
2600 case XML_ROLE_NONE:
2601 switch (tok) {
2602 case XML_TOK_PI:
2603 if (!reportProcessingInstruction(parser, enc, s, next))
2604 return XML_ERROR_NO_MEMORY;
2605 break;
2606 case XML_TOK_COMMENT:
2607 if (!reportComment(parser, enc, s, next))
2608 return XML_ERROR_NO_MEMORY;
2609 break;
2610 }
2611 break;
2612 }
2613 if (defaultHandler) {
2614 switch (tok) {
2615 case XML_TOK_PI:
2616 case XML_TOK_COMMENT:
2617 case XML_TOK_BOM:
2618 case XML_TOK_XML_DECL:
2619 #ifdef XML_DTD
2620 case XML_TOK_IGNORE_SECT:
2621 #endif /* XML_DTD */
2622 case XML_TOK_PARAM_ENTITY_REF:
2623 break;
2624 default:
2625 if (role != XML_ROLE_IGNORE_SECT)
2626 reportDefault(parser, enc, s, next);
2627 }
2628 }
2629 s = next;
2630 tok = XmlPrologTok(enc, s, end, &next);
2631 }
2632 /* not reached */
2633 }
2634
2635 static
2636 enum XML_Error epilogProcessor(XML_Parser parser,
2637 const char *s,
2638 const char *end,
2639 const char **nextPtr)
2640 {
2641 processor = epilogProcessor;
2642 eventPtr = s;
2643 for (;;) {
2644 const char *next;
2645 int tok = XmlPrologTok(encoding, s, end, &next);
2646 eventEndPtr = next;
2647 switch (tok) {
2648 case -XML_TOK_PROLOG_S:
2649 if (defaultHandler) {
2650 eventEndPtr = end;
2651 reportDefault(parser, encoding, s, end);
2652 }
2653 /* fall through */
2654 case XML_TOK_NONE:
2655 if (nextPtr)
2656 *nextPtr = end;
2657 return XML_ERROR_NONE;
2658 case XML_TOK_PROLOG_S:
2659 if (defaultHandler)
2660 reportDefault(parser, encoding, s, next);
2661 break;
2662 case XML_TOK_PI:
2663 if (!reportProcessingInstruction(parser, encoding, s, next))
2664 return XML_ERROR_NO_MEMORY;
2665 break;
2666 case XML_TOK_COMMENT:
2667 if (!reportComment(parser, encoding, s, next))
2668 return XML_ERROR_NO_MEMORY;
2669 break;
2670 case XML_TOK_INVALID:
2671 eventPtr = next;
2672 return XML_ERROR_INVALID_TOKEN;
2673 case XML_TOK_PARTIAL:
2674 if (nextPtr) {
2675 *nextPtr = s;
2676 return XML_ERROR_NONE;
2677 }
2678 return XML_ERROR_UNCLOSED_TOKEN;
2679 case XML_TOK_PARTIAL_CHAR:
2680 if (nextPtr) {
2681 *nextPtr = s;
2682 return XML_ERROR_NONE;
2683 }
2684 return XML_ERROR_PARTIAL_CHAR;
2685 default:
2686 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
2687 }
2688 eventPtr = s = next;
2689 }
2690 }
2691
2692 #ifdef XML_DTD
2693
2694 static enum XML_Error
2695 processInternalParamEntity(XML_Parser parser, ENTITY *entity)
2696 {
2697 const char *s, *end, *next;
2698 int tok;
2699 enum XML_Error result;
2700 OPEN_INTERNAL_ENTITY openEntity;
2701 entity->open = 1;
2702 openEntity.next = openInternalEntities;
2703 openInternalEntities = &openEntity;
2704 openEntity.entity = entity;
2705 openEntity.internalEventPtr = 0;
2706 openEntity.internalEventEndPtr = 0;
2707 s = (char *)entity->textPtr;
2708 end = (char *)(entity->textPtr + entity->textLen);
2709 tok = XmlPrologTok(internalEncoding, s, end, &next);
2710 result = doProlog(parser, internalEncoding, s, end, tok, next, 0);
2711 entity->open = 0;
2712 openInternalEntities = openEntity.next;
2713 return result;
2714 }
2715
2716 #endif /* XML_DTD */
2717
2718 static
2719 enum XML_Error errorProcessor(XML_Parser parser,
2720 const char *s,
2721 const char *end,
2722 const char **nextPtr)
2723 {
2724 return errorCode;
2725 }
2726
2727 static enum XML_Error
2728 storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2729 const char *ptr, const char *end,
2730 STRING_POOL *pool)
2731 {
2732 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
2733 if (result)
2734 return result;
2735 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
2736 poolChop(pool);
2737 if (!poolAppendChar(pool, XML_T('\0')))
2738 return XML_ERROR_NO_MEMORY;
2739 return XML_ERROR_NONE;
2740 }
2741
2742 static enum XML_Error
2743 appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2744 const char *ptr, const char *end,
2745 STRING_POOL *pool)
2746 {
2747 for (;;) {
2748 const char *next;
2749 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
2750 switch (tok) {
2751 case XML_TOK_NONE:
2752 return XML_ERROR_NONE;
2753 case XML_TOK_INVALID:
2754 if (enc == encoding)
2755 eventPtr = next;
2756 return XML_ERROR_INVALID_TOKEN;
2757 case XML_TOK_PARTIAL:
2758 if (enc == encoding)
2759 eventPtr = ptr;
2760 return XML_ERROR_INVALID_TOKEN;
2761 case XML_TOK_CHAR_REF:
2762 {
2763 XML_Char buf[XML_ENCODE_MAX];
2764 int i;
2765 int n = XmlCharRefNumber(enc, ptr);
2766 if (n < 0) {
2767 if (enc == encoding)
2768 eventPtr = ptr;
2769 return XML_ERROR_BAD_CHAR_REF;
2770 }
2771 if (!isCdata
2772 && n == 0x20 /* space */
2773 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
2774 break;
2775 n = XmlEncode(n, (ICHAR *)buf);
2776 if (!n) {
2777 if (enc == encoding)
2778 eventPtr = ptr;
2779 return XML_ERROR_BAD_CHAR_REF;
2780 }
2781 for (i = 0; i < n; i++) {
2782 if (!poolAppendChar(pool, buf[i]))
2783 return XML_ERROR_NO_MEMORY;
2784 }
2785 }
2786 break;
2787 case XML_TOK_DATA_CHARS:
2788 if (!poolAppend(pool, enc, ptr, next))
2789 return XML_ERROR_NO_MEMORY;
2790 break;
2791 break;
2792 case XML_TOK_TRAILING_CR:
2793 next = ptr + enc->minBytesPerChar;
2794 /* fall through */
2795 case XML_TOK_ATTRIBUTE_VALUE_S:
2796 case XML_TOK_DATA_NEWLINE:
2797 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
2798 break;
2799 if (!poolAppendChar(pool, 0x20))
2800 return XML_ERROR_NO_MEMORY;
2801 break;
2802 case XML_TOK_ENTITY_REF:
2803 {
2804 const XML_Char *name;
2805 ENTITY *entity;
2806 XML_Char ch = XmlPredefinedEntityName(enc,
2807 ptr + enc->minBytesPerChar,
2808 next - enc->minBytesPerChar);
2809 if (ch) {
2810 if (!poolAppendChar(pool, ch))
2811 return XML_ERROR_NO_MEMORY;
2812 break;
2813 }
2814 name = poolStoreString(&temp2Pool, enc,
2815 ptr + enc->minBytesPerChar,
2816 next - enc->minBytesPerChar);
2817 if (!name)
2818 return XML_ERROR_NO_MEMORY;
2819 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
2820 poolDiscard(&temp2Pool);
2821 if (!entity) {
2822 if (dtd.complete) {
2823 if (enc == encoding)
2824 eventPtr = ptr;
2825 return XML_ERROR_UNDEFINED_ENTITY;
2826 }
2827 }
2828 else if (entity->open) {
2829 if (enc == encoding)
2830 eventPtr = ptr;
2831 return XML_ERROR_RECURSIVE_ENTITY_REF;
2832 }
2833 else if (entity->notation) {
2834 if (enc == encoding)
2835 eventPtr = ptr;
2836 return XML_ERROR_BINARY_ENTITY_REF;
2837 }
2838 else if (!entity->textPtr) {
2839 if (enc == encoding)
2840 eventPtr = ptr;
2841 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
2842 }
2843 else {
2844 enum XML_Error result;
2845 const XML_Char *textEnd = entity->textPtr + entity->textLen;
2846 entity->open = 1;
2847 result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
2848 entity->open = 0;
2849 if (result)
2850 return result;
2851 }
2852 }
2853 break;
2854 default:
2855 abort();
2856 }
2857 ptr = next;
2858 }
2859 /* not reached */
2860 }
2861
2862 static
2863 enum XML_Error storeEntityValue(XML_Parser parser,
2864 const ENCODING *enc,
2865 const char *entityTextPtr,
2866 const char *entityTextEnd)
2867 {
2868 STRING_POOL *pool = &(dtd.pool);
2869 for (;;) {
2870 const char *next;
2871 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
2872 switch (tok) {
2873 case XML_TOK_PARAM_ENTITY_REF:
2874 #ifdef XML_DTD
2875 if (parentParser || enc != encoding) {
2876 enum XML_Error result;
2877 const XML_Char *name;
2878 ENTITY *entity;
2879 name = poolStoreString(&tempPool, enc,
2880 entityTextPtr + enc->minBytesPerChar,
2881 next - enc->minBytesPerChar);
2882 if (!name)
2883 return XML_ERROR_NO_MEMORY;
2884 entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
2885 poolDiscard(&tempPool);
2886 if (!entity) {
2887 if (enc == encoding)
2888 eventPtr = entityTextPtr;
2889 return XML_ERROR_UNDEFINED_ENTITY;
2890 }
2891 if (entity->open) {
2892 if (enc == encoding)
2893 eventPtr = entityTextPtr;
2894 return XML_ERROR_RECURSIVE_ENTITY_REF;
2895 }
2896 if (entity->systemId) {
2897 if (enc == encoding)
2898 eventPtr = entityTextPtr;
2899 return XML_ERROR_PARAM_ENTITY_REF;
2900 }
2901 entity->open = 1;
2902 result = storeEntityValue(parser,
2903 internalEncoding,
2904 (char *)entity->textPtr,
2905 (char *)(entity->textPtr + entity->textLen));
2906 entity->open = 0;
2907 if (result)
2908 return result;
2909 break;
2910 }
2911 #endif /* XML_DTD */
2912 eventPtr = entityTextPtr;
2913 return XML_ERROR_SYNTAX;
2914 case XML_TOK_NONE:
2915 return XML_ERROR_NONE;
2916 case XML_TOK_ENTITY_REF:
2917 case XML_TOK_DATA_CHARS:
2918 if (!poolAppend(pool, enc, entityTextPtr, next))
2919 return XML_ERROR_NO_MEMORY;
2920 break;
2921 case XML_TOK_TRAILING_CR:
2922 next = entityTextPtr + enc->minBytesPerChar;
2923 /* fall through */
2924 case XML_TOK_DATA_NEWLINE:
2925 if (pool->end == pool->ptr && !poolGrow(pool))
2926 return XML_ERROR_NO_MEMORY;
2927 *(pool->ptr)++ = 0xA;
2928 break;
2929 case XML_TOK_CHAR_REF:
2930 {
2931 XML_Char buf[XML_ENCODE_MAX];
2932 int i;
2933 int n = XmlCharRefNumber(enc, entityTextPtr);
2934 if (n < 0) {
2935 if (enc == encoding)
2936 eventPtr = entityTextPtr;
2937 return XML_ERROR_BAD_CHAR_REF;
2938 }
2939 n = XmlEncode(n, (ICHAR *)buf);
2940 if (!n) {
2941 if (enc == encoding)
2942 eventPtr = entityTextPtr;
2943 return XML_ERROR_BAD_CHAR_REF;
2944 }
2945 for (i = 0; i < n; i++) {
2946 if (pool->end == pool->ptr && !poolGrow(pool))
2947 return XML_ERROR_NO_MEMORY;
2948 *(pool->ptr)++ = buf[i];
2949 }
2950 }
2951 break;
2952 case XML_TOK_PARTIAL:
2953 if (enc == encoding)
2954 eventPtr = entityTextPtr;
2955 return XML_ERROR_INVALID_TOKEN;
2956 case XML_TOK_INVALID:
2957 if (enc == encoding)
2958 eventPtr = next;
2959 return XML_ERROR_INVALID_TOKEN;
2960 default:
2961 abort();
2962 }
2963 entityTextPtr = next;
2964 }
2965 /* not reached */
2966 }
2967
2968 static void
2969 normalizeLines(XML_Char *s)
2970 {
2971 XML_Char *p;
2972 for (;; s++) {
2973 if (*s == XML_T('\0'))
2974 return;
2975 if (*s == 0xD)
2976 break;
2977 }
2978 p = s;
2979 do {
2980 if (*s == 0xD) {
2981 *p++ = 0xA;
2982 if (*++s == 0xA)
2983 s++;
2984 }
2985 else
2986 *p++ = *s++;
2987 } while (*s);
2988 *p = XML_T('\0');
2989 }
2990
2991 static int
2992 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2993 {
2994 const XML_Char *target;
2995 XML_Char *data;
2996 const char *tem;
2997 if (!processingInstructionHandler) {
2998 if (defaultHandler)
2999 reportDefault(parser, enc, start, end);
3000 return 1;
3001 }
3002 start += enc->minBytesPerChar * 2;
3003 tem = start + XmlNameLength(enc, start);
3004 target = poolStoreString(&tempPool, enc, start, tem);
3005 if (!target)
3006 return 0;
3007 poolFinish(&tempPool);
3008 data = poolStoreString(&tempPool, enc,
3009 XmlSkipS(enc, tem),
3010 end - enc->minBytesPerChar*2);
3011 if (!data)
3012 return 0;
3013 normalizeLines(data);
3014 processingInstructionHandler(handlerArg, target, data);
3015 poolClear(&tempPool);
3016 return 1;
3017 }
3018
3019 static int
3020 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3021 {
3022 XML_Char *data;
3023 if (!commentHandler) {
3024 if (defaultHandler)
3025 reportDefault(parser, enc, start, end);
3026 return 1;
3027 }
3028 data = poolStoreString(&tempPool,
3029 enc,
3030 start + enc->minBytesPerChar * 4,
3031 end - enc->minBytesPerChar * 3);
3032 if (!data)
3033 return 0;
3034 normalizeLines(data);
3035 commentHandler(handlerArg, data);
3036 poolClear(&tempPool);
3037 return 1;
3038 }
3039
3040 static void
3041 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
3042 {
3043 if (MUST_CONVERT(enc, s)) {
3044 const char **eventPP;
3045 const char **eventEndPP;
3046 if (enc == encoding) {
3047 eventPP = &eventPtr;
3048 eventEndPP = &eventEndPtr;
3049 }
3050 else {
3051 eventPP = &(openInternalEntities->internalEventPtr);
3052 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3053 }
3054 do {
3055 ICHAR *dataPtr = (ICHAR *)dataBuf;
3056 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
3057 *eventEndPP = s;
3058 defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
3059 *eventPP = s;
3060 } while (s != end);
3061 }
3062 else
3063 defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
3064 }
3065
3066
3067 static int
3068 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
3069 {
3070 DEFAULT_ATTRIBUTE *att;
3071 if (value) {
3072 /* The handling of default attributes gets messed up if we have
3073 a default which duplicates a non-default. */
3074 int i;
3075 for (i = 0; i < type->nDefaultAtts; i++)
3076 if (attId == type->defaultAtts[i].id)
3077 return 1;
3078 }
3079 if (type->nDefaultAtts == type->allocDefaultAtts) {
3080 if (type->allocDefaultAtts == 0) {
3081 type->allocDefaultAtts = 8;
3082 type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3083 }
3084 else {
3085 type->allocDefaultAtts *= 2;
3086 type->defaultAtts = realloc(type->defaultAtts,
3087 type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3088 }
3089 if (!type->defaultAtts)
3090 return 0;
3091 }
3092 att = type->defaultAtts + type->nDefaultAtts;
3093 att->id = attId;
3094 att->value = value;
3095 att->isCdata = isCdata;
3096 if (!isCdata)
3097 attId->maybeTokenized = 1;
3098 type->nDefaultAtts += 1;
3099 return 1;
3100 }
3101
3102 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
3103 {
3104 const XML_Char *name;
3105 for (name = elementType->name; *name; name++) {
3106 if (*name == XML_T(':')) {
3107 PREFIX *prefix;
3108 const XML_Char *s;
3109 for (s = elementType->name; s != name; s++) {
3110 if (!poolAppendChar(&dtd.pool, *s))
3111 return 0;
3112 }
3113 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3114 return 0;
3115 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3116 if (!prefix)
3117 return 0;
3118 if (prefix->name == poolStart(&dtd.pool))
3119 poolFinish(&dtd.pool);
3120 else
3121 poolDiscard(&dtd.pool);
3122 elementType->prefix = prefix;
3123
3124 }
3125 }
3126 return 1;
3127 }
3128
3129 static ATTRIBUTE_ID *
3130 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3131 {
3132 ATTRIBUTE_ID *id;
3133 const XML_Char *name;
3134 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3135 return 0;
3136 name = poolStoreString(&dtd.pool, enc, start, end);
3137 if (!name)
3138 return 0;
3139 ++name;
3140 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
3141 if (!id)
3142 return 0;
3143 if (id->name != name)
3144 poolDiscard(&dtd.pool);
3145 else {
3146 poolFinish(&dtd.pool);
3147 if (!ns)
3148 ;
3149 else if (name[0] == 'x'
3150 && name[1] == 'm'
3151 && name[2] == 'l'
3152 && name[3] == 'n'
3153 && name[4] == 's'
3154 && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
3155 if (name[5] == '\0')
3156 id->prefix = &dtd.defaultPrefix;
3157 else
3158 id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
3159 id->xmlns = 1;
3160 }
3161 else {
3162 int i;
3163 for (i = 0; name[i]; i++) {
3164 if (name[i] == XML_T(':')) {
3165 int j;
3166 for (j = 0; j < i; j++) {
3167 if (!poolAppendChar(&dtd.pool, name[j]))
3168 return 0;
3169 }
3170 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3171 return 0;
3172 id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3173 if (id->prefix->name == poolStart(&dtd.pool))
3174 poolFinish(&dtd.pool);
3175 else
3176 poolDiscard(&dtd.pool);
3177 break;
3178 }
3179 }
3180 }
3181 }
3182 return id;
3183 }
3184
3185 #define CONTEXT_SEP XML_T('\f')
3186
3187 static
3188 const XML_Char *getContext(XML_Parser parser)
3189 {
3190 HASH_TABLE_ITER iter;
3191 int needSep = 0;
3192
3193 if (dtd.defaultPrefix.binding) {
3194 int i;
3195 int len;
3196 if (!poolAppendChar(&tempPool, XML_T('=')))
3197 return 0;
3198 len = dtd.defaultPrefix.binding->uriLen;
3199 if (namespaceSeparator != XML_T('\0'))
3200 len--;
3201 for (i = 0; i < len; i++)
3202 if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
3203 return 0;
3204 needSep = 1;
3205 }
3206
3207 hashTableIterInit(&iter, &(dtd.prefixes));
3208 for (;;) {
3209 int i;
3210 int len;
3211 const XML_Char *s;
3212 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
3213 if (!prefix)
3214 break;
3215 if (!prefix->binding)
3216 continue;
3217 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
3218 return 0;
3219 for (s = prefix->name; *s; s++)
3220 if (!poolAppendChar(&tempPool, *s))
3221 return 0;
3222 if (!poolAppendChar(&tempPool, XML_T('=')))
3223 return 0;
3224 len = prefix->binding->uriLen;
3225 if (namespaceSeparator != XML_T('\0'))
3226 len--;
3227 for (i = 0; i < len; i++)
3228 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
3229 return 0;
3230 needSep = 1;
3231 }
3232
3233
3234 hashTableIterInit(&iter, &(dtd.generalEntities));
3235 for (;;) {
3236 const XML_Char *s;
3237 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
3238 if (!e)
3239 break;
3240 if (!e->open)
3241 continue;
3242 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
3243 return 0;
3244 for (s = e->name; *s; s++)
3245 if (!poolAppendChar(&tempPool, *s))
3246 return 0;
3247 needSep = 1;
3248 }
3249
3250 if (!poolAppendChar(&tempPool, XML_T('\0')))
3251 return 0;
3252 return tempPool.start;
3253 }
3254
3255 static
3256 int setContext(XML_Parser parser, const XML_Char *context)
3257 {
3258 const XML_Char *s = context;
3259
3260 while (*context != XML_T('\0')) {
3261 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
3262 ENTITY *e;
3263 if (!poolAppendChar(&tempPool, XML_T('\0')))
3264 return 0;
3265 e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
3266 if (e)
3267 e->open = 1;
3268 if (*s != XML_T('\0'))
3269 s++;
3270 context = s;
3271 poolDiscard(&tempPool);
3272 }
3273 else if (*s == '=') {
3274 PREFIX *prefix;
3275 if (poolLength(&tempPool) == 0)
3276 prefix = &dtd.defaultPrefix;
3277 else {
3278 if (!poolAppendChar(&tempPool, XML_T('\0')))
3279 return 0;
3280 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
3281 if (!prefix)
3282 return 0;
3283 if (prefix->name == poolStart(&tempPool))
3284 poolFinish(&tempPool);
3285 else
3286 poolDiscard(&tempPool);
3287 }
3288 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
3289 if (!poolAppendChar(&tempPool, *context))
3290 return 0;
3291 if (!poolAppendChar(&tempPool, XML_T('\0')))
3292 return 0;
3293 if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
3294 return 0;
3295 poolDiscard(&tempPool);
3296 if (*context != XML_T('\0'))
3297 ++context;
3298 s = context;
3299 }
3300 else {
3301 if (!poolAppendChar(&tempPool, *s))
3302 return 0;
3303 s++;
3304 }
3305 }
3306 return 1;
3307 }
3308
3309
3310 static
3311 void normalizePublicId(XML_Char *publicId)
3312 {
3313 XML_Char *p = publicId;
3314 XML_Char *s;
3315 for (s = publicId; *s; s++) {
3316 switch (*s) {
3317 case 0x20:
3318 case 0xD:
3319 case 0xA:
3320 if (p != publicId && p[-1] != 0x20)
3321 *p++ = 0x20;
3322 break;
3323 default:
3324 *p++ = *s;
3325 }
3326 }
3327 if (p != publicId && p[-1] == 0x20)
3328 --p;
3329 *p = XML_T('\0');
3330 }
3331
3332 static int dtdInit(DTD *p)
3333 {
3334 poolInit(&(p->pool));
3335 hashTableInit(&(p->generalEntities));
3336 hashTableInit(&(p->elementTypes));
3337 hashTableInit(&(p->attributeIds));
3338 hashTableInit(&(p->prefixes));
3339 p->complete = 1;
3340 p->standalone = 0;
3341 #ifdef XML_DTD
3342 hashTableInit(&(p->paramEntities));
3343 #endif /* XML_DTD */
3344 p->defaultPrefix.name = 0;
3345 p->defaultPrefix.binding = 0;
3346 return 1;
3347 }
3348
3349 #ifdef XML_DTD
3350
3351 static void dtdSwap(DTD *p1, DTD *p2)
3352 {
3353 DTD tem;
3354 memcpy(&tem, p1, sizeof(DTD));
3355 memcpy(p1, p2, sizeof(DTD));
3356 memcpy(p2, &tem, sizeof(DTD));
3357 }
3358
3359 #endif /* XML_DTD */
3360
3361 static void dtdDestroy(DTD *p)
3362 {
3363 HASH_TABLE_ITER iter;
3364 hashTableIterInit(&iter, &(p->elementTypes));
3365 for (;;) {
3366 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3367 if (!e)
3368 break;
3369 if (e->allocDefaultAtts != 0)
3370 free(e->defaultAtts);
3371 }
3372 hashTableDestroy(&(p->generalEntities));
3373 #ifdef XML_DTD
3374 hashTableDestroy(&(p->paramEntities));
3375 #endif /* XML_DTD */
3376 hashTableDestroy(&(p->elementTypes));
3377 hashTableDestroy(&(p->attributeIds));
3378 hashTableDestroy(&(p->prefixes));
3379 poolDestroy(&(p->pool));
3380 }
3381
3382 /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
3383 The new DTD has already been initialized. */
3384
3385 static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
3386 {
3387 HASH_TABLE_ITER iter;
3388
3389 /* Copy the prefix table. */
3390
3391 hashTableIterInit(&iter, &(oldDtd->prefixes));
3392 for (;;) {
3393 const XML_Char *name;
3394 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
3395 if (!oldP)
3396 break;
3397 name = poolCopyString(&(newDtd->pool), oldP->name);
3398 if (!name)
3399 return 0;
3400 if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
3401 return 0;
3402 }
3403
3404 hashTableIterInit(&iter, &(oldDtd->attributeIds));
3405
3406 /* Copy the attribute id table. */
3407
3408 for (;;) {
3409 ATTRIBUTE_ID *newA;
3410 const XML_Char *name;
3411 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
3412
3413 if (!oldA)
3414 break;
3415 /* Remember to allocate the scratch byte before the name. */
3416 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
3417 return 0;
3418 name = poolCopyString(&(newDtd->pool), oldA->name);
3419 if (!name)
3420 return 0;
3421 ++name;
3422 newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
3423 if (!newA)
3424 return 0;
3425 newA->maybeTokenized = oldA->maybeTokenized;
3426 if (oldA->prefix) {
3427 newA->xmlns = oldA->xmlns;
3428 if (oldA->prefix == &oldDtd->defaultPrefix)
3429 newA->prefix = &newDtd->defaultPrefix;
3430 else
3431 newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
3432 }
3433 }
3434
3435 /* Copy the element type table. */
3436
3437 hashTableIterInit(&iter, &(oldDtd->elementTypes));
3438
3439 for (;;) {
3440 int i;
3441 ELEMENT_TYPE *newE;
3442 const XML_Char *name;
3443 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3444 if (!oldE)
3445 break;
3446 name = poolCopyString(&(newDtd->pool), oldE->name);
3447 if (!name)
3448 return 0;
3449 newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
3450 if (!newE)
3451 return 0;
3452 if (oldE->nDefaultAtts) {
3453 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
3454 if (!newE->defaultAtts)
3455 return 0;
3456 }
3457 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
3458 if (oldE->prefix)
3459 newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
3460 for (i = 0; i < newE->nDefaultAtts; i++) {
3461 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
3462 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
3463 if (oldE->defaultAtts[i].value) {
3464 newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
3465 if (!newE->defaultAtts[i].value)
3466 return 0;
3467 }
3468 else
3469 newE->defaultAtts[i].value = 0;
3470 }
3471 }
3472
3473 /* Copy the entity tables. */
3474 if (!copyEntityTable(&(newDtd->generalEntities),
3475 &(newDtd->pool),
3476 &(oldDtd->generalEntities)))
3477 return 0;
3478
3479 #ifdef XML_DTD
3480 if (!copyEntityTable(&(newDtd->paramEntities),
3481 &(newDtd->pool),
3482 &(oldDtd->paramEntities)))
3483 return 0;
3484 #endif /* XML_DTD */
3485
3486 newDtd->complete = oldDtd->complete;
3487 newDtd->standalone = oldDtd->standalone;
3488 return 1;
3489 }
3490
3491 static int copyEntityTable(HASH_TABLE *newTable,
3492 STRING_POOL *newPool,
3493 const HASH_TABLE *oldTable)
3494 {
3495 HASH_TABLE_ITER iter;
3496 const XML_Char *cachedOldBase = 0;
3497 const XML_Char *cachedNewBase = 0;
3498
3499 hashTableIterInit(&iter, oldTable);
3500
3501 for (;;) {
3502 ENTITY *newE;
3503 const XML_Char *name;
3504 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
3505 if (!oldE)
3506 break;
3507 name = poolCopyString(newPool, oldE->name);
3508 if (!name)
3509 return 0;
3510 newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY));
3511 if (!newE)
3512 return 0;
3513 if (oldE->systemId) {
3514 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
3515 if (!tem)
3516 return 0;
3517 newE->systemId = tem;
3518 if (oldE->base) {
3519 if (oldE->base == cachedOldBase)
3520 newE->base = cachedNewBase;
3521 else {
3522 cachedOldBase = oldE->base;
3523 tem = poolCopyString(newPool, cachedOldBase);
3524 if (!tem)
3525 return 0;
3526 cachedNewBase = newE->base = tem;
3527 }
3528 }
3529 }
3530 else {
3531 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
3532 if (!tem)
3533 return 0;
3534 newE->textPtr = tem;
3535 newE->textLen = oldE->textLen;
3536 }
3537 if (oldE->notation) {
3538 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
3539 if (!tem)
3540 return 0;
3541 newE->notation = tem;
3542 }
3543 }
3544 return 1;
3545 }
3546
3547 static
3548 void poolInit(STRING_POOL *pool)
3549 {
3550 pool->blocks = 0;
3551 pool->freeBlocks = 0;
3552 pool->start = 0;
3553 pool->ptr = 0;
3554 pool->end = 0;
3555 }
3556
3557 static
3558 void poolClear(STRING_POOL *pool)
3559 {
3560 if (!pool->freeBlocks)
3561 pool->freeBlocks = pool->blocks;
3562 else {
3563 BLOCK *p = pool->blocks;
3564 while (p) {
3565 BLOCK *tem = p->next;
3566 p->next = pool->freeBlocks;
3567 pool->freeBlocks = p;
3568 p = tem;
3569 }
3570 }
3571 pool->blocks = 0;
3572 pool->start = 0;
3573 pool->ptr = 0;
3574 pool->end = 0;
3575 }
3576
3577 static
3578 void poolDestroy(STRING_POOL *pool)
3579 {
3580 BLOCK *p = pool->blocks;
3581 while (p) {
3582 BLOCK *tem = p->next;
3583 free(p);
3584 p = tem;
3585 }
3586 pool->blocks = 0;
3587 p = pool->freeBlocks;
3588 while (p) {
3589 BLOCK *tem = p->next;
3590 free(p);
3591 p = tem;
3592 }
3593 pool->freeBlocks = 0;
3594 pool->ptr = 0;
3595 pool->start = 0;
3596 pool->end = 0;
3597 }
3598
3599 static
3600 XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
3601 const char *ptr, const char *end)
3602 {
3603 if (!pool->ptr && !poolGrow(pool))
3604 return 0;
3605 for (;;) {
3606 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
3607 if (ptr == end)
3608 break;
3609 if (!poolGrow(pool))
3610 return 0;
3611 }
3612 return pool->start;
3613 }
3614
3615 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
3616 {
3617 do {
3618 if (!poolAppendChar(pool, *s))
3619 return 0;
3620 } while (*s++);
3621 s = pool->start;
3622 poolFinish(pool);
3623 return s;
3624 }
3625
3626 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
3627 {
3628 if (!pool->ptr && !poolGrow(pool))
3629 return 0;
3630 for (; n > 0; --n, s++) {
3631 if (!poolAppendChar(pool, *s))
3632 return 0;
3633
3634 }
3635 s = pool->start;
3636 poolFinish(pool);
3637 return s;
3638 }
3639
3640 static
3641 XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
3642 const char *ptr, const char *end)
3643 {
3644 if (!poolAppend(pool, enc, ptr, end))
3645 return 0;
3646 if (pool->ptr == pool->end && !poolGrow(pool))
3647 return 0;
3648 *(pool->ptr)++ = 0;
3649 return pool->start;
3650 }
3651
3652 static
3653 int poolGrow(STRING_POOL *pool)
3654 {
3655 if (pool->freeBlocks) {
3656 if (pool->start == 0) {
3657 pool->blocks = pool->freeBlocks;
3658 pool->freeBlocks = pool->freeBlocks->next;
3659 pool->blocks->next = 0;
3660 pool->start = pool->blocks->s;
3661 pool->end = pool->start + pool->blocks->size;
3662 pool->ptr = pool->start;
3663 return 1;
3664 }
3665 if (pool->end - pool->start < pool->freeBlocks->size) {
3666 BLOCK *tem = pool->freeBlocks->next;
3667 pool->freeBlocks->next = pool->blocks;
3668 pool->blocks = pool->freeBlocks;
3669 pool->freeBlocks = tem;
3670 memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
3671 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3672 pool->start = pool->blocks->s;
3673 pool->end = pool->start + pool->blocks->size;
3674 return 1;
3675 }
3676 }
3677 if (pool->blocks && pool->start == pool->blocks->s) {
3678 int blockSize = (pool->end - pool->start)*2;
3679 pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3680 if (!pool->blocks)
3681 return 0;
3682 pool->blocks->size = blockSize;
3683 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3684 pool->start = pool->blocks->s;
3685 pool->end = pool->start + blockSize;
3686 }
3687 else {
3688 BLOCK *tem;
3689 int blockSize = pool->end - pool->start;
3690 if (blockSize < INIT_BLOCK_SIZE)
3691 blockSize = INIT_BLOCK_SIZE;
3692 else
3693 blockSize *= 2;
3694 tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3695 if (!tem)
3696 return 0;
3697 tem->size = blockSize;
3698 tem->next = pool->blocks;
3699 pool->blocks = tem;
3700 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
3701 pool->ptr = tem->s + (pool->ptr - pool->start);
3702 pool->start = tem->s;
3703 pool->end = tem->s + blockSize;
3704 }
3705 return 1;
3706 }
+0
-1265
SFconv/expat/xmltok/xmlrole.c less more
0 /*
1 The contents of this file are subject to the Mozilla Public License
2 Version 1.1 (the "License"); you may not use this file except in
3 compliance with the License. You may obtain a copy of the License at
4 http://www.mozilla.org/MPL/
5
6 Software distributed under the License is distributed on an "AS IS"
7 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
8 License for the specific language governing rights and limitations
9 under the License.
10
11 The Original Code is expat.
12
13 The Initial Developer of the Original Code is James Clark.
14 Portions created by James Clark are Copyright (C) 1998, 1999
15 James Clark. All Rights Reserved.
16
17 Contributor(s):
18
19 Alternatively, the contents of this file may be used under the terms
20 of the GNU General Public License (the "GPL"), in which case the
21 provisions of the GPL are applicable instead of those above. If you
22 wish to allow use of your version of this file only under the terms of
23 the GPL and not to allow others to use your version of this file under
24 the MPL, indicate your decision by deleting the provisions above and
25 replace them with the notice and other provisions required by the
26 GPL. If you do not delete the provisions above, a recipient may use
27 your version of this file under either the MPL or the GPL.
28 */
29
30 #include "xmldef.h"
31 #include "xmlrole.h"
32
33 /* Doesn't check:
34
35 that ,| are not mixed in a model group
36 content of literals
37
38 */
39
40 #ifndef MIN_BYTES_PER_CHAR
41 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
42 #endif
43
44 #ifdef XML_DTD
45 #define setTopLevel(state) \
46 ((state)->handler = ((state)->documentEntity \
47 ? internalSubset \
48 : externalSubset1))
49 #else /* not XML_DTD */
50 #define setTopLevel(state) ((state)->handler = internalSubset)
51 #endif /* not XML_DTD */
52
53 typedef int PROLOG_HANDLER(PROLOG_STATE *state,
54 int tok,
55 const char *ptr,
56 const char *end,
57 const ENCODING *enc);
58
59 static PROLOG_HANDLER
60 prolog0, prolog1, prolog2,
61 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
62 internalSubset,
63 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
64 entity7, entity8, entity9,
65 notation0, notation1, notation2, notation3, notation4,
66 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
67 attlist7, attlist8, attlist9,
68 element0, element1, element2, element3, element4, element5, element6,
69 element7,
70 #ifdef XML_DTD
71 externalSubset0, externalSubset1,
72 condSect0, condSect1, condSect2,
73 #endif /* XML_DTD */
74 declClose,
75 error;
76
77 static
78 int common(PROLOG_STATE *state, int tok);
79
80 static
81 int prolog0(PROLOG_STATE *state,
82 int tok,
83 const char *ptr,
84 const char *end,
85 const ENCODING *enc)
86 {
87 switch (tok) {
88 case XML_TOK_PROLOG_S:
89 state->handler = prolog1;
90 return XML_ROLE_NONE;
91 case XML_TOK_XML_DECL:
92 state->handler = prolog1;
93 return XML_ROLE_XML_DECL;
94 case XML_TOK_PI:
95 state->handler = prolog1;
96 return XML_ROLE_NONE;
97 case XML_TOK_COMMENT:
98 state->handler = prolog1;
99 case XML_TOK_BOM:
100 return XML_ROLE_NONE;
101 case XML_TOK_DECL_OPEN:
102 if (!XmlNameMatchesAscii(enc,
103 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
104 end,
105 "DOCTYPE"))
106 break;
107 state->handler = doctype0;
108 return XML_ROLE_NONE;
109 case XML_TOK_INSTANCE_START:
110 state->handler = error;
111 return XML_ROLE_INSTANCE_START;
112 }
113 return common(state, tok);
114 }
115
116 static
117 int prolog1(PROLOG_STATE *state,
118 int tok,
119 const char *ptr,
120 const char *end,
121 const ENCODING *enc)
122 {
123 switch (tok) {
124 case XML_TOK_PROLOG_S:
125 return XML_ROLE_NONE;
126 case XML_TOK_PI:
127 case XML_TOK_COMMENT:
128 case XML_TOK_BOM:
129 return XML_ROLE_NONE;
130 case XML_TOK_DECL_OPEN:
131 if (!XmlNameMatchesAscii(enc,
132 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
133 end,
134 "DOCTYPE"))
135 break;
136 state->handler = doctype0;
137 return XML_ROLE_NONE;
138 case XML_TOK_INSTANCE_START:
139 state->handler = error;
140 return XML_ROLE_INSTANCE_START;
141 }
142 return common(state, tok);
143 }
144
145 static
146 int prolog2(PROLOG_STATE *state,
147 int tok,
148 const char *ptr,
149 const char *end,
150 const ENCODING *enc)
151 {
152 switch (tok) {
153 case XML_TOK_PROLOG_S:
154 return XML_ROLE_NONE;
155 case XML_TOK_PI:
156 case XML_TOK_COMMENT:
157 return XML_ROLE_NONE;
158 case XML_TOK_INSTANCE_START:
159 state->handler = error;
160 return XML_ROLE_INSTANCE_START;
161 }
162 return common(state, tok);
163 }
164
165 static
166 int doctype0(PROLOG_STATE *state,
167 int tok,
168 const char *ptr,
169 const char *end,
170 const ENCODING *enc)
171 {
172 switch (tok) {
173 case XML_TOK_PROLOG_S:
174 return XML_ROLE_NONE;
175 case XML_TOK_NAME:
176 case XML_TOK_PREFIXED_NAME:
177 state->handler = doctype1;
178 return XML_ROLE_DOCTYPE_NAME;
179 }
180 return common(state, tok);
181 }
182
183 static
184 int doctype1(PROLOG_STATE *state,
185 int tok,
186 const char *ptr,
187 const char *end,
188 const ENCODING *enc)
189 {
190 switch (tok) {
191 case XML_TOK_PROLOG_S:
192 return XML_ROLE_NONE;
193 case XML_TOK_OPEN_BRACKET:
194 state->handler = internalSubset;
195 return XML_ROLE_NONE;
196 case XML_TOK_DECL_CLOSE:
197 state->handler = prolog2;
198 return XML_ROLE_DOCTYPE_CLOSE;
199 case XML_TOK_NAME:
200 if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
201 state->handler = doctype3;
202 return XML_ROLE_NONE;
203 }
204 if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
205 state->handler = doctype2;
206 return XML_ROLE_NONE;
207 }
208 break;
209 }
210 return common(state, tok);
211 }
212
213 static
214 int doctype2(PROLOG_STATE *state,
215 int tok,
216 const char *ptr,
217 const char *end,
218 const ENCODING *enc)
219 {
220 switch (tok) {
221 case XML_TOK_PROLOG_S:
222 return XML_ROLE_NONE;
223 case XML_TOK_LITERAL:
224 state->handler = doctype3;
225 return XML_ROLE_DOCTYPE_PUBLIC_ID;
226 }
227 return common(state, tok);
228 }
229
230 static
231 int doctype3(PROLOG_STATE *state,
232 int tok,
233 const char *ptr,
234 const char *end,
235 const ENCODING *enc)
236 {
237 switch (tok) {
238 case XML_TOK_PROLOG_S:
239 return XML_ROLE_NONE;
240 case XML_TOK_LITERAL:
241 state->handler = doctype4;
242 return XML_ROLE_DOCTYPE_SYSTEM_ID;
243 }
244 return common(state, tok);
245 }
246
247 static
248 int doctype4(PROLOG_STATE *state,
249 int tok,
250 const char *ptr,
251 const char *end,
252 const ENCODING *enc)
253 {
254 switch (tok) {
255 case XML_TOK_PROLOG_S:
256 return XML_ROLE_NONE;
257 case XML_TOK_OPEN_BRACKET:
258 state->handler = internalSubset;
259 return XML_ROLE_NONE;
260 case XML_TOK_DECL_CLOSE:
261 state->handler = prolog2;
262 return XML_ROLE_DOCTYPE_CLOSE;
263 }
264 return common(state, tok);
265 }
266
267 static
268 int doctype5(PROLOG_STATE *state,
269 int tok,
270 const char *ptr,
271 const char *end,
272 const ENCODING *enc)
273 {
274 switch (tok) {
275 case XML_TOK_PROLOG_S:
276 return XML_ROLE_NONE;
277 case XML_TOK_DECL_CLOSE:
278 state->handler = prolog2;
279 return XML_ROLE_DOCTYPE_CLOSE;
280 }
281 return common(state, tok);
282 }
283
284 static
285 int internalSubset(PROLOG_STATE *state,
286 int tok,
287 const char *ptr,
288 const char *end,
289 const ENCODING *enc)
290 {
291 switch (tok) {
292 case XML_TOK_PROLOG_S:
293 return XML_ROLE_NONE;
294 case XML_TOK_DECL_OPEN:
295 if (XmlNameMatchesAscii(enc,
296 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
297 end,
298 "ENTITY")) {
299 state->handler = entity0;
300 return XML_ROLE_NONE;
301 }
302 if (XmlNameMatchesAscii(enc,
303 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
304 end,
305 "ATTLIST")) {
306 state->handler = attlist0;
307 return XML_ROLE_NONE;
308 }
309 if (XmlNameMatchesAscii(enc,
310 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
311 end,
312 "ELEMENT")) {
313 state->handler = element0;
314 return XML_ROLE_NONE;
315 }
316 if (XmlNameMatchesAscii(enc,
317 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
318 end,
319 "NOTATION")) {
320 state->handler = notation0;
321 return XML_ROLE_NONE;
322 }
323 break;
324 case XML_TOK_PI:
325 case XML_TOK_COMMENT:
326 return XML_ROLE_NONE;
327 case XML_TOK_PARAM_ENTITY_REF:
328 return XML_ROLE_PARAM_ENTITY_REF;
329 case XML_TOK_CLOSE_BRACKET:
330 state->handler = doctype5;
331 return XML_ROLE_NONE;
332 }
333 return common(state, tok);
334 }
335
336 #ifdef XML_DTD
337
338 static
339 int externalSubset0(PROLOG_STATE *state,
340 int tok,
341 const char *ptr,
342 const char *end,
343 const ENCODING *enc)
344 {
345 state->handler = externalSubset1;
346 if (tok == XML_TOK_XML_DECL)
347 return XML_ROLE_TEXT_DECL;
348 return externalSubset1(state, tok, ptr, end, enc);
349 }
350
351 static
352 int externalSubset1(PROLOG_STATE *state,
353 int tok,
354 const char *ptr,
355 const char *end,
356 const ENCODING *enc)
357 {
358 switch (tok) {
359 case XML_TOK_COND_SECT_OPEN:
360 state->handler = condSect0;
361 return XML_ROLE_NONE;
362 case XML_TOK_COND_SECT_CLOSE:
363 if (state->includeLevel == 0)
364 break;
365 state->includeLevel -= 1;
366 return XML_ROLE_NONE;
367 case XML_TOK_PROLOG_S:
368 return XML_ROLE_NONE;
369 case XML_TOK_CLOSE_BRACKET:
370 break;
371 case XML_TOK_NONE:
372 if (state->includeLevel)
373 break;
374 return XML_ROLE_NONE;
375 default:
376 return internalSubset(state, tok, ptr, end, enc);
377 }
378 return common(state, tok);
379 }
380
381 #endif /* XML_DTD */
382
383 static
384 int entity0(PROLOG_STATE *state,
385 int tok,
386 const char *ptr,
387 const char *end,
388 const ENCODING *enc)
389 {
390 switch (tok) {
391 case XML_TOK_PROLOG_S:
392 return XML_ROLE_NONE;
393 case XML_TOK_PERCENT:
394 state->handler = entity1;
395 return XML_ROLE_NONE;
396 case XML_TOK_NAME:
397 state->handler = entity2;
398 return XML_ROLE_GENERAL_ENTITY_NAME;
399 }
400 return common(state, tok);
401 }
402
403 static
404 int entity1(PROLOG_STATE *state,
405 int tok,
406 const char *ptr,
407 const char *end,
408 const ENCODING *enc)
409 {
410 switch (tok) {
411 case XML_TOK_PROLOG_S:
412 return XML_ROLE_NONE;
413 case XML_TOK_NAME:
414 state->handler = entity7;
415 return XML_ROLE_PARAM_ENTITY_NAME;
416 }
417 return common(state, tok);
418 }
419
420 static
421 int entity2(PROLOG_STATE *state,
422 int tok,
423 const char *ptr,
424 const char *end,
425 const ENCODING *enc)
426 {
427 switch (tok) {
428 case XML_TOK_PROLOG_S:
429 return XML_ROLE_NONE;
430 case XML_TOK_NAME:
431 if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
432 state->handler = entity4;
433 return XML_ROLE_NONE;
434 }
435 if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
436 state->handler = entity3;
437 return XML_ROLE_NONE;
438 }
439 break;
440 case XML_TOK_LITERAL:
441 state->handler = declClose;
442 return XML_ROLE_ENTITY_VALUE;
443 }
444 return common(state, tok);
445 }
446
447 static
448 int entity3(PROLOG_STATE *state,
449 int tok,
450 const char *ptr,
451 const char *end,
452 const ENCODING *enc)
453 {
454 switch (tok) {
455 case XML_TOK_PROLOG_S:
456 return XML_ROLE_NONE;
457 case XML_TOK_LITERAL:
458 state->handler = entity4;
459 return XML_ROLE_ENTITY_PUBLIC_ID;
460 }
461 return common(state, tok);
462 }
463
464
465 static
466 int entity4(PROLOG_STATE *state,
467 int tok,
468 const char *ptr,
469 const char *end,
470 const ENCODING *enc)
471 {
472 switch (tok) {
473 case XML_TOK_PROLOG_S:
474 return XML_ROLE_NONE;
475 case XML_TOK_LITERAL:
476 state->handler = entity5;
477 return XML_ROLE_ENTITY_SYSTEM_ID;
478 }
479 return common(state, tok);
480 }
481
482 static
483 int entity5(PROLOG_STATE *state,
484 int tok,
485 const char *ptr,
486 const char *end,
487 const ENCODING *enc)
488 {
489 switch (tok) {
490 case XML_TOK_PROLOG_S:
491 return XML_ROLE_NONE;
492 case XML_TOK_DECL_CLOSE:
493 setTopLevel(state);
494 return XML_ROLE_NONE;
495 case XML_TOK_NAME:
496 if (XmlNameMatchesAscii(enc, ptr, end, "NDATA")) {
497 state->handler = entity6;
498 return XML_ROLE_NONE;
499 }
500 break;
501 }
502 return common(state, tok);
503 }
504
505 static
506 int entity6(PROLOG_STATE *state,
507 int tok,
508 const char *ptr,
509 const char *end,
510 const ENCODING *enc)
511 {
512 switch (tok) {
513 case XML_TOK_PROLOG_S:
514 return XML_ROLE_NONE;
515 case XML_TOK_NAME:
516 state->handler = declClose;
517 return XML_ROLE_ENTITY_NOTATION_NAME;
518 }
519 return common(state, tok);
520 }
521
522 static
523 int entity7(PROLOG_STATE *state,
524 int tok,
525 const char *ptr,
526 const char *end,
527 const ENCODING *enc)
528 {
529 switch (tok) {
530 case XML_TOK_PROLOG_S:
531 return XML_ROLE_NONE;
532 case XML_TOK_NAME:
533 if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
534 state->handler = entity9;
535 return XML_ROLE_NONE;
536 }
537 if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
538 state->handler = entity8;
539 return XML_ROLE_NONE;
540 }
541 break;
542 case XML_TOK_LITERAL:
543 state->handler = declClose;
544 return XML_ROLE_ENTITY_VALUE;
545 }
546 return common(state, tok);
547 }
548
549 static
550 int entity8(PROLOG_STATE *state,
551 int tok,
552 const char *ptr,
553 const char *end,
554 const ENCODING *enc)
555 {
556 switch (tok) {
557 case XML_TOK_PROLOG_S:
558 return XML_ROLE_NONE;
559 case XML_TOK_LITERAL:
560 state->handler = entity9;
561 return XML_ROLE_ENTITY_PUBLIC_ID;
562 }
563 return common(state, tok);
564 }
565
566 static
567 int entity9(PROLOG_STATE *state,
568 int tok,
569 const char *ptr,
570 const char *end,
571 const ENCODING *enc)
572 {
573 switch (tok) {
574 case XML_TOK_PROLOG_S:
575 return XML_ROLE_NONE;
576 case XML_TOK_LITERAL:
577 state->handler = declClose;
578 return XML_ROLE_ENTITY_SYSTEM_ID;
579 }
580 return common(state, tok);
581 }
582
583 static
584 int notation0(PROLOG_STATE *state,
585 int tok,
586 const char *ptr,
587 const char *end,
588 const ENCODING *enc)
589 {
590 switch (tok) {
591 case XML_TOK_PROLOG_S:
592 return XML_ROLE_NONE;
593 case XML_TOK_NAME:
594 state->handler = notation1;
595 return XML_ROLE_NOTATION_NAME;
596 }
597 return common(state, tok);
598 }
599
600 static
601 int notation1(PROLOG_STATE *state,
602 int tok,
603 const char *ptr,
604 const char *end,
605 const ENCODING *enc)
606 {
607 switch (tok) {
608 case XML_TOK_PROLOG_S:
609 return XML_ROLE_NONE;
610 case XML_TOK_NAME:
611 if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
612 state->handler = notation3;
613 return XML_ROLE_NONE;
614 }
615 if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
616 state->handler = notation2;
617 return XML_ROLE_NONE;
618 }
619 break;
620 }
621 return common(state, tok);
622 }
623
624 static
625 int notation2(PROLOG_STATE *state,
626 int tok,
627 const char *ptr,
628 const char *end,
629 const ENCODING *enc)
630 {
631 switch (tok) {
632 case XML_TOK_PROLOG_S:
633 return XML_ROLE_NONE;
634 case XML_TOK_LITERAL:
635 state->handler = notation4;
636 return XML_ROLE_NOTATION_PUBLIC_ID;
637 }
638 return common(state, tok);
639 }
640
641 static
642 int notation3(PROLOG_STATE *state,
643 int tok,
644 const char *ptr,
645 const char *end,
646 const ENCODING *enc)
647 {
648 switch (tok) {
649 case XML_TOK_PROLOG_S:
650 return XML_ROLE_NONE;
651 case XML_TOK_LITERAL:
652 state->handler = declClose;
653 return XML_ROLE_NOTATION_SYSTEM_ID;
654 }
655 return common(state, tok);
656 }
657
658 static
659 int notation4(PROLOG_STATE *state,
660 int tok,
661 const char *ptr,
662 const char *end,
663 const ENCODING *enc)
664 {
665 switch (tok) {
666 case XML_TOK_PROLOG_S:
667 return XML_ROLE_NONE;
668 case XML_TOK_LITERAL:
669 state->handler = declClose;
670 return XML_ROLE_NOTATION_SYSTEM_ID;
671 case XML_TOK_DECL_CLOSE:
672 setTopLevel(state);
673 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
674 }
675 return common(state, tok);
676 }
677
678 static
679 int attlist0(PROLOG_STATE *state,
680 int tok,
681 const char *ptr,
682 const char *end,
683 const ENCODING *enc)
684 {
685 switch (tok) {
686 case XML_TOK_PROLOG_S:
687 return XML_ROLE_NONE;
688 case XML_TOK_NAME:
689 case XML_TOK_PREFIXED_NAME:
690 state->handler = attlist1;
691 return XML_ROLE_ATTLIST_ELEMENT_NAME;
692 }
693 return common(state, tok);
694 }
695
696 static
697 int attlist1(PROLOG_STATE *state,
698 int tok,
699 const char *ptr,
700 const char *end,
701 const ENCODING *enc)
702 {
703 switch (tok) {
704 case XML_TOK_PROLOG_S:
705 return XML_ROLE_NONE;
706 case XML_TOK_DECL_CLOSE:
707 setTopLevel(state);
708 return XML_ROLE_NONE;
709 case XML_TOK_NAME:
710 case XML_TOK_PREFIXED_NAME:
711 state->handler = attlist2;
712 return XML_ROLE_ATTRIBUTE_NAME;
713 }
714 return common(state, tok);
715 }
716
717 static
718 int attlist2(PROLOG_STATE *state,
719 int tok,
720 const char *ptr,
721 const char *end,
722 const ENCODING *enc)
723 {
724 switch (tok) {
725 case XML_TOK_PROLOG_S:
726 return XML_ROLE_NONE;
727 case XML_TOK_NAME:
728 {
729 static const char *types[] = {
730 "CDATA",
731 "ID",
732 "IDREF",
733 "IDREFS",
734 "ENTITY",
735 "ENTITIES",
736 "NMTOKEN",
737 "NMTOKENS",
738 };
739 int i;
740 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
741 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
742 state->handler = attlist8;
743 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
744 }
745 }
746 if (XmlNameMatchesAscii(enc, ptr, end, "NOTATION")) {
747 state->handler = attlist5;
748 return XML_ROLE_NONE;
749 }
750 break;
751 case XML_TOK_OPEN_PAREN:
752 state->handler = attlist3;
753 return XML_ROLE_NONE;
754 }
755 return common(state, tok);
756 }
757
758 static
759 int attlist3(PROLOG_STATE *state,
760 int tok,
761 const char *ptr,
762 const char *end,
763 const ENCODING *enc)
764 {
765 switch (tok) {
766 case XML_TOK_PROLOG_S:
767 return XML_ROLE_NONE;
768 case XML_TOK_NMTOKEN:
769 case XML_TOK_NAME:
770 case XML_TOK_PREFIXED_NAME:
771 state->handler = attlist4;
772 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
773 }
774 return common(state, tok);
775 }
776
777 static
778 int attlist4(PROLOG_STATE *state,
779 int tok,
780 const char *ptr,
781 const char *end,
782 const ENCODING *enc)
783 {
784 switch (tok) {
785 case XML_TOK_PROLOG_S:
786 return XML_ROLE_NONE;
787 case XML_TOK_CLOSE_PAREN:
788 state->handler = attlist8;
789 return XML_ROLE_NONE;
790 case XML_TOK_OR:
791 state->handler = attlist3;
792 return XML_ROLE_NONE;
793 }
794 return common(state, tok);
795 }
796
797 static
798 int attlist5(PROLOG_STATE *state,
799 int tok,
800 const char *ptr,
801 const char *end,
802 const ENCODING *enc)
803 {
804 switch (tok) {
805 case XML_TOK_PROLOG_S:
806 return XML_ROLE_NONE;
807 case XML_TOK_OPEN_PAREN:
808 state->handler = attlist6;
809 return XML_ROLE_NONE;
810 }
811 return common(state, tok);
812 }
813
814
815 static
816 int attlist6(PROLOG_STATE *state,
817 int tok,
818 const char *ptr,
819 const char *end,
820 const ENCODING *enc)
821 {
822 switch (tok) {
823 case XML_TOK_PROLOG_S:
824 return XML_ROLE_NONE;
825 case XML_TOK_NAME:
826 state->handler = attlist7;
827 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
828 }
829 return common(state, tok);
830 }
831
832 static
833 int attlist7(PROLOG_STATE *state,
834 int tok,
835 const char *ptr,
836 const char *end,
837 const ENCODING *enc)
838 {
839 switch (tok) {
840 case XML_TOK_PROLOG_S:
841 return XML_ROLE_NONE;
842 case XML_TOK_CLOSE_PAREN:
843 state->handler = attlist8;
844 return XML_ROLE_NONE;
845 case XML_TOK_OR:
846 state->handler = attlist6;
847 return XML_ROLE_NONE;
848 }
849 return common(state, tok);
850 }
851
852 /* default value */
853 static
854 int attlist8(PROLOG_STATE *state,
855 int tok,
856 const char *ptr,
857 const char *end,
858 const ENCODING *enc)
859 {
860 switch (tok) {
861 case XML_TOK_PROLOG_S:
862 return XML_ROLE_NONE;
863 case XML_TOK_POUND_NAME:
864 if (XmlNameMatchesAscii(enc,
865 ptr + MIN_BYTES_PER_CHAR(enc),
866 end,
867 "IMPLIED")) {
868 state->handler = attlist1;
869 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
870 }
871 if (XmlNameMatchesAscii(enc,
872 ptr + MIN_BYTES_PER_CHAR(enc),
873 end,
874 "REQUIRED")) {
875 state->handler = attlist1;
876 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
877 }
878 if (XmlNameMatchesAscii(enc,
879 ptr + MIN_BYTES_PER_CHAR(enc),
880 end,
881 "FIXED")) {
882 state->handler = attlist9;
883 return XML_ROLE_NONE;
884 }
885 break;
886 case XML_TOK_LITERAL:
887 state->handler = attlist1;
888 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
889 }
890 return common(state, tok);
891 }
892
893 static
894 int attlist9(PROLOG_STATE *state,
895 int tok,
896 const char *ptr,
897 const char *end,
898 const ENCODING *enc)
899 {
900 switch (tok) {
901 case XML_TOK_PROLOG_S:
902 return XML_ROLE_NONE;
903 case XML_TOK_LITERAL:
904 state->handler = attlist1;
905 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
906 }
907 return common(state, tok);
908 }
909
910 static
911 int element0(PROLOG_STATE *state,
912 int tok,
913 const char *ptr,
914 const char *end,
915 const ENCODING *enc)
916 {
917 switch (tok) {
918 case XML_TOK_PROLOG_S:
919 return XML_ROLE_NONE;
920 case XML_TOK_NAME:
921 case XML_TOK_PREFIXED_NAME:
922 state->handler = element1;
923 return XML_ROLE_ELEMENT_NAME;
924 }
925 return common(state, tok);
926 }
927
928 static
929 int element1(PROLOG_STATE *state,
930 int tok,
931 const char *ptr,
932 const char *end,
933 const ENCODING *enc)
934 {
935 switch (tok) {
936 case XML_TOK_PROLOG_S:
937 return XML_ROLE_NONE;
938 case XML_TOK_NAME:
939 if (XmlNameMatchesAscii(enc, ptr, end, "EMPTY")) {
940 state->handler = declClose;
941 return XML_ROLE_CONTENT_EMPTY;
942 }
943 if (XmlNameMatchesAscii(enc, ptr, end, "ANY")) {
944 state->handler = declClose;
945 return XML_ROLE_CONTENT_ANY;
946 }
947 break;
948 case XML_TOK_OPEN_PAREN:
949 state->handler = element2;
950 state->level = 1;
951 return XML_ROLE_GROUP_OPEN;
952 }
953 return common(state, tok);
954 }
955
956 static
957 int element2(PROLOG_STATE *state,
958 int tok,
959 const char *ptr,
960 const char *end,
961 const ENCODING *enc)
962 {
963 switch (tok) {
964 case XML_TOK_PROLOG_S:
965 return XML_ROLE_NONE;
966 case XML_TOK_POUND_NAME:
967 if (XmlNameMatchesAscii(enc,
968 ptr + MIN_BYTES_PER_CHAR(enc),
969 end,
970 "PCDATA")) {
971 state->handler = element3;
972 return XML_ROLE_CONTENT_PCDATA;
973 }
974 break;
975 case XML_TOK_OPEN_PAREN:
976 state->level = 2;
977 state->handler = element6;
978 return XML_ROLE_GROUP_OPEN;
979 case XML_TOK_NAME:
980 case XML_TOK_PREFIXED_NAME:
981 state->handler = element7;
982 return XML_ROLE_CONTENT_ELEMENT;
983 case XML_TOK_NAME_QUESTION:
984 state->handler = element7;
985 return XML_ROLE_CONTENT_ELEMENT_OPT;
986 case XML_TOK_NAME_ASTERISK:
987 state->handler = element7;
988 return XML_ROLE_CONTENT_ELEMENT_REP;
989 case XML_TOK_NAME_PLUS:
990 state->handler = element7;
991 return XML_ROLE_CONTENT_ELEMENT_PLUS;
992 }
993 return common(state, tok);
994 }
995
996 static
997 int element3(PROLOG_STATE *state,
998 int tok,
999 const char *ptr,
1000 const char *end,
1001 const ENCODING *enc)
1002 {
1003 switch (tok) {
1004 case XML_TOK_PROLOG_S:
1005 return XML_ROLE_NONE;
1006 case XML_TOK_CLOSE_PAREN:
1007 case XML_TOK_CLOSE_PAREN_ASTERISK:
1008 state->handler = declClose;
1009 return XML_ROLE_GROUP_CLOSE_REP;
1010 case XML_TOK_OR:
1011 state->handler = element4;
1012 return XML_ROLE_NONE;
1013 }
1014 return common(state, tok);
1015 }
1016
1017 static
1018 int element4(PROLOG_STATE *state,
1019 int tok,
1020 const char *ptr,
1021 const char *end,
1022 const ENCODING *enc)
1023 {
1024 switch (tok) {
1025 case XML_TOK_PROLOG_S:
1026 return XML_ROLE_NONE;
1027 case XML_TOK_NAME:
1028 case XML_TOK_PREFIXED_NAME:
1029 state->handler = element5;
1030 return XML_ROLE_CONTENT_ELEMENT;
1031 }
1032 return common(state, tok);
1033 }
1034
1035 static
1036 int element5(PROLOG_STATE *state,
1037 int tok,
1038 const char *ptr,
1039 const char *end,
1040 const ENCODING *enc)
1041 {
1042 switch (tok) {
1043 case XML_TOK_PROLOG_S:
1044 return XML_ROLE_NONE;
1045 case XML_TOK_CLOSE_PAREN_ASTERISK:
1046 state->handler = declClose;
1047 return XML_ROLE_GROUP_CLOSE_REP;
1048 case XML_TOK_OR:
1049 state->handler = element4;
1050 return XML_ROLE_NONE;
1051 }
1052 return common(state, tok);
1053 }
1054
1055 static
1056 int element6(PROLOG_STATE *state,
1057 int tok,
1058 const char *ptr,
1059 const char *end,
1060 const ENCODING *enc)
1061 {
1062 switch (tok) {
1063 case XML_TOK_PROLOG_S:
1064 return XML_ROLE_NONE;
1065 case XML_TOK_OPEN_PAREN:
1066 state->level += 1;
1067 return XML_ROLE_GROUP_OPEN;
1068 case XML_TOK_NAME:
1069 case XML_TOK_PREFIXED_NAME:
1070 state->handler = element7;
1071 return XML_ROLE_CONTENT_ELEMENT;
1072 case XML_TOK_NAME_QUESTION:
1073 state->handler = element7;
1074 return XML_ROLE_CONTENT_ELEMENT_OPT;
1075 case XML_TOK_NAME_ASTERISK:
1076 state->handler = element7;
1077 return XML_ROLE_CONTENT_ELEMENT_REP;
1078 case XML_TOK_NAME_PLUS:
1079 state->handler = element7;
1080 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1081 }
1082 return common(state, tok);
1083 }
1084
1085 static
1086 int element7(PROLOG_STATE *state,
1087 int tok,
1088 const char *ptr,
1089 const char *end,
1090 const ENCODING *enc)
1091 {
1092 switch (tok) {
1093 case XML_TOK_PROLOG_S:
1094 return XML_ROLE_NONE;
1095 case XML_TOK_CLOSE_PAREN:
1096 state->level -= 1;
1097 if (state->level == 0)
1098 state->handler = declClose;
1099 return XML_ROLE_GROUP_CLOSE;
1100 case XML_TOK_CLOSE_PAREN_ASTERISK:
1101 state->level -= 1;
1102 if (state->level == 0)
1103 state->handler = declClose;
1104 return XML_ROLE_GROUP_CLOSE_REP;
1105 case XML_TOK_CLOSE_PAREN_QUESTION:
1106 state->level -= 1;
1107 if (state->level == 0)
1108 state->handler = declClose;
1109 return XML_ROLE_GROUP_CLOSE_OPT;
1110 case XML_TOK_CLOSE_PAREN_PLUS:
1111 state->level -= 1;
1112 if (state->level == 0)
1113 state->handler = declClose;
1114 return XML_ROLE_GROUP_CLOSE_PLUS;
1115 case XML_TOK_COMMA:
1116 state->handler = element6;
1117 return XML_ROLE_GROUP_SEQUENCE;
1118 case XML_TOK_OR:
1119 state->handler = element6;
1120 return XML_ROLE_GROUP_CHOICE;
1121 }
1122 return common(state, tok);
1123 }
1124
1125 #ifdef XML_DTD
1126
1127 static
1128 int condSect0(PROLOG_STATE *state,
1129 int tok,
1130 const char *ptr,
1131 const char *end,
1132 const ENCODING *enc)
1133 {
1134 switch (tok) {
1135 case XML_TOK_PROLOG_S:
1136 return XML_ROLE_NONE;
1137 case XML_TOK_NAME:
1138 if (XmlNameMatchesAscii(enc, ptr, end, "INCLUDE")) {
1139 state->handler = condSect1;
1140 return XML_ROLE_NONE;
1141 }
1142 if (XmlNameMatchesAscii(enc, ptr, end, "IGNORE")) {
1143 state->handler = condSect2;
1144 return XML_ROLE_NONE;
1145 }
1146 break;
1147 }
1148 return common(state, tok);
1149 }
1150
1151 static
1152 int condSect1(PROLOG_STATE *state,
1153 int tok,
1154 const char *ptr,
1155 const char *end,
1156 const ENCODING *enc)
1157 {
1158 switch (tok) {
1159 case XML_TOK_PROLOG_S:
1160 return XML_ROLE_NONE;
1161 case XML_TOK_OPEN_BRACKET:
1162 state->handler = externalSubset1;
1163 state->includeLevel += 1;
1164 return XML_ROLE_NONE;
1165 }
1166 return common(state, tok);
1167 }
1168
1169 static
1170 int condSect2(PROLOG_STATE *state,
1171 int tok,
1172 const char *ptr,
1173 const char *end,
1174 const ENCODING *enc)
1175 {
1176 switch (tok) {
1177 case XML_TOK_PROLOG_S:
1178 return XML_ROLE_NONE;
1179 case XML_TOK_OPEN_BRACKET:
1180 state->handler = externalSubset1;
1181 return XML_ROLE_IGNORE_SECT;
1182 }
1183 return common(state, tok);
1184 }
1185
1186 #endif /* XML_DTD */
1187
1188 static
1189 int declClose(PROLOG_STATE *state,
1190 int tok,
1191 const char *ptr,
1192 const char *end,
1193 const ENCODING *enc)
1194 {
1195 switch (tok) {
1196 case XML_TOK_PROLOG_S:
1197 return XML_ROLE_NONE;
1198 case XML_TOK_DECL_CLOSE:
1199 setTopLevel(state);
1200 return XML_ROLE_NONE;
1201 }
1202 return common(state, tok);
1203 }
1204
1205 #if 0
1206
1207 static
1208 int ignore(PROLOG_STATE *state,
1209 int tok,
1210 const char *ptr,
1211 const char *end,
1212 const ENCODING *enc)
1213 {
1214 switch (tok) {
1215 case XML_TOK_DECL_CLOSE:
1216 state->handler = internalSubset;
1217 return 0;
1218 default:
1219 return XML_ROLE_NONE;
1220 }
1221 return common(state, tok);
1222 }
1223 #endif
1224
1225 static
1226 int error(PROLOG_STATE *state,
1227 int tok,
1228 const char *ptr,
1229 const char *end,
1230 const ENCODING *enc)
1231 {
1232 return XML_ROLE_NONE;
1233 }
1234
1235 static
1236 int common(PROLOG_STATE *state, int tok)
1237 {
1238 #ifdef XML_DTD
1239 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1240 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1241 #endif
1242 state->handler = error;
1243 return XML_ROLE_ERROR;
1244 }
1245
1246 void XmlPrologStateInit(PROLOG_STATE *state)
1247 {
1248 state->handler = prolog0;
1249 #ifdef XML_DTD
1250 state->documentEntity = 1;
1251 state->includeLevel = 0;
1252 #endif /* XML_DTD */
1253 }
1254
1255 #ifdef XML_DTD
1256
1257 void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1258 {
1259 state->handler = externalSubset0;
1260 state->documentEntity = 0;
1261 state->includeLevel = 0;
1262 }
1263
1264 #endif /* XML_DTD */
+0
-1542
SFconv/expat/xmltok/xmltok.c less more
0 /*
1 The contents of this file are subject to the Mozilla Public License
2 Version 1.1 (the "License"); you may not use this file except in
3 compliance with the License. You may obtain a copy of the License at
4 http://www.mozilla.org/MPL/
5
6 Software distributed under the License is distributed on an "AS IS"
7 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
8 License for the specific language governing rights and limitations
9 under the License.
10
11 The Original Code is expat.
12
13 The Initial Developer of the Original Code is James Clark.
14 Portions created by James Clark are Copyright (C) 1998, 1999
15 James Clark. All Rights Reserved.
16
17 Contributor(s):
18
19 Alternatively, the contents of this file may be used under the terms
20 of the GNU General Public License (the "GPL"), in which case the
21 provisions of the GPL are applicable instead of those above. If you
22 wish to allow use of your version of this file only under the terms of
23 the GPL and not to allow others to use your version of this file under
24 the MPL, indicate your decision by deleting the provisions above and
25 replace them with the notice and other provisions required by the
26 GPL. If you do not delete the provisions above, a recipient may use
27 your version of this file under either the MPL or the GPL.
28 */
29
30 #include "xmldef.h"
31 #include "xmltok.h"
32 #include "nametab.h"
33
34 #ifdef XML_DTD
35 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
36 #else
37 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */
38 #endif
39
40 #define VTABLE1 \
41 { PREFIX(prologTok), PREFIX(contentTok), \
42 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
43 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
44 PREFIX(sameName), \
45 PREFIX(nameMatchesAscii), \
46 PREFIX(nameLength), \
47 PREFIX(skipS), \
48 PREFIX(getAtts), \
49 PREFIX(charRefNumber), \
50 PREFIX(predefinedEntityName), \
51 PREFIX(updatePosition), \
52 PREFIX(isPublicId)
53
54 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
55
56 #define UCS2_GET_NAMING(pages, hi, lo) \
57 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
58
59 /* A 2 byte UTF-8 representation splits the characters 11 bits
60 between the bottom 5 and 6 bits of the bytes.
61 We need 8 bits to index into pages, 3 bits to add to that index and
62 5 bits to generate the mask. */
63 #define UTF8_GET_NAMING2(pages, byte) \
64 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
65 + ((((byte)[0]) & 3) << 1) \
66 + ((((byte)[1]) >> 5) & 1)] \
67 & (1 << (((byte)[1]) & 0x1F)))
68
69 /* A 3 byte UTF-8 representation splits the characters 16 bits
70 between the bottom 4, 6 and 6 bits of the bytes.
71 We need 8 bits to index into pages, 3 bits to add to that index and
72 5 bits to generate the mask. */
73 #define UTF8_GET_NAMING3(pages, byte) \
74 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
75 + ((((byte)[1]) >> 2) & 0xF)] \
76 << 3) \
77 + ((((byte)[1]) & 3) << 1) \
78 + ((((byte)[2]) >> 5) & 1)] \
79 & (1 << (((byte)[2]) & 0x1F)))
80
81 #define UTF8_GET_NAMING(pages, p, n) \
82 ((n) == 2 \
83 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
84 : ((n) == 3 \
85 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
86 : 0))
87
88 #define UTF8_INVALID3(p) \
89 ((*p) == 0xED \
90 ? (((p)[1] & 0x20) != 0) \
91 : ((*p) == 0xEF \
92 ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
93 : 0))
94
95 #define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
96
97 static
98 int isNever(const ENCODING *enc, const char *p)
99 {
100 return 0;
101 }
102
103 static
104 int utf8_isName2(const ENCODING *enc, const char *p)
105 {
106 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
107 }
108
109 static
110 int utf8_isName3(const ENCODING *enc, const char *p)
111 {
112 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
113 }
114
115 #define utf8_isName4 isNever
116
117 static
118 int utf8_isNmstrt2(const ENCODING *enc, const char *p)
119 {
120 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
121 }
122
123 static
124 int utf8_isNmstrt3(const ENCODING *enc, const char *p)
125 {
126 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
127 }
128
129 #define utf8_isNmstrt4 isNever
130
131 #define utf8_isInvalid2 isNever
132
133 static
134 int utf8_isInvalid3(const ENCODING *enc, const char *p)
135 {
136 return UTF8_INVALID3((const unsigned char *)p);
137 }
138
139 static
140 int utf8_isInvalid4(const ENCODING *enc, const char *p)
141 {
142 return UTF8_INVALID4((const unsigned char *)p);
143 }
144
145 struct normal_encoding {
146 ENCODING enc;
147 unsigned char type[256];
148 #ifdef XML_MIN_SIZE
149 int (*byteType)(const ENCODING *, const char *);
150 int (*isNameMin)(const ENCODING *, const char *);
151 int (*isNmstrtMin)(const ENCODING *, const char *);
152 int (*byteToAscii)(const ENCODING *, const char *);
153 int (*charMatches)(const ENCODING *, const char *, int);
154 #endif /* XML_MIN_SIZE */
155 int (*isName2)(const ENCODING *, const char *);
156 int (*isName3)(const ENCODING *, const char *);
157 int (*isName4)(const ENCODING *, const char *);
158 int (*isNmstrt2)(const ENCODING *, const char *);
159 int (*isNmstrt3)(const ENCODING *, const char *);
160 int (*isNmstrt4)(const ENCODING *, const char *);
161 int (*isInvalid2)(const ENCODING *, const char *);
162 int (*isInvalid3)(const ENCODING *, const char *);
163 int (*isInvalid4)(const ENCODING *, const char *);
164 };
165
166 #ifdef XML_MIN_SIZE
167
168 #define STANDARD_VTABLE(E) \
169 E ## byteType, \
170 E ## isNameMin, \
171 E ## isNmstrtMin, \
172 E ## byteToAscii, \
173 E ## charMatches,
174
175 #else
176
177 #define STANDARD_VTABLE(E) /* as nothing */
178
179 #endif
180
181 #define NORMAL_VTABLE(E) \
182 E ## isName2, \
183 E ## isName3, \
184 E ## isName4, \
185 E ## isNmstrt2, \
186 E ## isNmstrt3, \
187 E ## isNmstrt4, \
188 E ## isInvalid2, \
189 E ## isInvalid3, \
190 E ## isInvalid4
191
192 static int checkCharRefNumber(int);
193
194 #include "xmltok_impl.h"
195
196 #ifdef XML_MIN_SIZE
197 #define sb_isNameMin isNever
198 #define sb_isNmstrtMin isNever
199 #endif
200
201 #ifdef XML_MIN_SIZE
202 #define MINBPC(enc) ((enc)->minBytesPerChar)
203 #else
204 /* minimum bytes per character */
205 #define MINBPC(enc) 1
206 #endif
207
208 #define SB_BYTE_TYPE(enc, p) \
209 (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
210
211 #ifdef XML_MIN_SIZE
212 static
213 int sb_byteType(const ENCODING *enc, const char *p)
214 {
215 return SB_BYTE_TYPE(enc, p);
216 }
217 #define BYTE_TYPE(enc, p) \
218 (((const struct normal_encoding *)(enc))->byteType(enc, p))
219 #else
220 #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
221 #endif
222
223 #ifdef XML_MIN_SIZE
224 #define BYTE_TO_ASCII(enc, p) \
225 (((const struct normal_encoding *)(enc))->byteToAscii(enc, p))
226 static
227 int sb_byteToAscii(const ENCODING *enc, const char *p)
228 {
229 return *p;
230 }
231 #else
232 #define BYTE_TO_ASCII(enc, p) (*p)
233 #endif
234
235 #define IS_NAME_CHAR(enc, p, n) \
236 (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
237 #define IS_NMSTRT_CHAR(enc, p, n) \
238 (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
239 #define IS_INVALID_CHAR(enc, p, n) \
240 (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
241
242 #ifdef XML_MIN_SIZE
243 #define IS_NAME_CHAR_MINBPC(enc, p) \
244 (((const struct normal_encoding *)(enc))->isNameMin(enc, p))
245 #define IS_NMSTRT_CHAR_MINBPC(enc, p) \
246 (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))
247 #else
248 #define IS_NAME_CHAR_MINBPC(enc, p) (0)
249 #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
250 #endif
251
252 #ifdef XML_MIN_SIZE
253 #define CHAR_MATCHES(enc, p, c) \
254 (((const struct normal_encoding *)(enc))->charMatches(enc, p, c))
255 static
256 int sb_charMatches(const ENCODING *enc, const char *p, int c)
257 {
258 return *p == c;
259 }
260 #else
261 /* c is an ASCII character */
262 #define CHAR_MATCHES(enc, p, c) (*(p) == c)
263 #endif
264
265 #define PREFIX(ident) normal_ ## ident
266 #include "xmltok_impl.c"
267
268 #undef MINBPC
269 #undef BYTE_TYPE
270 #undef BYTE_TO_ASCII
271 #undef CHAR_MATCHES
272 #undef IS_NAME_CHAR
273 #undef IS_NAME_CHAR_MINBPC
274 #undef IS_NMSTRT_CHAR
275 #undef IS_NMSTRT_CHAR_MINBPC
276 #undef IS_INVALID_CHAR
277
278 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
279 UTF8_cval1 = 0x00,
280 UTF8_cval2 = 0xc0,
281 UTF8_cval3 = 0xe0,
282 UTF8_cval4 = 0xf0
283 };
284
285 static
286 void utf8_toUtf8(const ENCODING *enc,
287 const char **fromP, const char *fromLim,
288 char **toP, const char *toLim)
289 {
290 char *to;
291 const char *from;
292 if (fromLim - *fromP > toLim - *toP) {
293 /* Avoid copying partial characters. */
294 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
295 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
296 break;
297 }
298 for (to = *toP, from = *fromP; from != fromLim; from++, to++)
299 *to = *from;
300 *fromP = from;
301 *toP = to;
302 }
303
304 static
305 void utf8_toUtf16(const ENCODING *enc,
306 const char **fromP, const char *fromLim,
307 unsigned short **toP, const unsigned short *toLim)
308 {
309 unsigned short *to = *toP;
310 const char *from = *fromP;
311 while (from != fromLim && to != toLim) {
312 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
313 case BT_LEAD2:
314 *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
315 from += 2;
316 break;
317 case BT_LEAD3:
318 *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
319 from += 3;
320 break;
321 case BT_LEAD4:
322 {
323 unsigned long n;
324 if (to + 1 == toLim)
325 break;
326 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
327 n -= 0x10000;
328 to[0] = (unsigned short)((n >> 10) | 0xD800);
329 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
330 to += 2;
331 from += 4;
332 }
333 break;
334 default:
335 *to++ = *from++;
336 break;
337 }
338 }
339 *fromP = from;
340 *toP = to;
341 }
342
343 #ifdef XML_NS
344 static const struct normal_encoding utf8_encoding_ns = {
345 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
346 {
347 #include "asciitab.h"
348 #include "utf8tab.h"
349 },
350 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
351 };
352 #endif
353
354 static const struct normal_encoding utf8_encoding = {
355 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
356 {
357 #define BT_COLON BT_NMSTRT
358 #include "asciitab.h"
359 #undef BT_COLON
360 #include "utf8tab.h"
361 },
362 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
363 };
364
365 #ifdef XML_NS
366
367 static const struct normal_encoding internal_utf8_encoding_ns = {
368 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
369 {
370 #include "iasciitab.h"
371 #include "utf8tab.h"
372 },
373 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
374 };
375
376 #endif
377
378 static const struct normal_encoding internal_utf8_encoding = {
379 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
380 {
381 #define BT_COLON BT_NMSTRT
382 #include "iasciitab.h"
383 #undef BT_COLON
384 #include "utf8tab.h"
385 },
386 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
387 };
388
389 static
390 void latin1_toUtf8(const ENCODING *enc,
391 const char **fromP, const char *fromLim,
392 char **toP, const char *toLim)
393 {
394 for (;;) {
395 unsigned char c;
396 if (*fromP == fromLim)
397 break;
398 c = (unsigned char)**fromP;
399 if (c & 0x80) {
400 if (toLim - *toP < 2)
401 break;
402 *(*toP)++ = ((c >> 6) | UTF8_cval2);
403 *(*toP)++ = ((c & 0x3f) | 0x80);
404 (*fromP)++;
405 }
406 else {
407 if (*toP == toLim)
408 break;
409 *(*toP)++ = *(*fromP)++;
410 }
411 }
412 }
413
414 static
415 void latin1_toUtf16(const ENCODING *enc,
416 const char **fromP, const char *fromLim,
417 unsigned short **toP, const unsigned short *toLim)
418 {
419 while (*fromP != fromLim && *toP != toLim)
420 *(*toP)++ = (unsigned char)*(*fromP)++;
421 }
422
423 #ifdef XML_NS
424
425 static const struct normal_encoding latin1_encoding_ns = {
426 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
427 {
428 #include "asciitab.h"
429 #include "latin1tab.h"
430 },
431 STANDARD_VTABLE(sb_)
432 };
433
434 #endif
435
436 static const struct normal_encoding latin1_encoding = {
437 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
438 {
439 #define BT_COLON BT_NMSTRT
440 #include "asciitab.h"
441 #undef BT_COLON
442 #include "latin1tab.h"
443 },
444 STANDARD_VTABLE(sb_)
445 };
446
447 static
448 void ascii_toUtf8(const ENCODING *enc,
449 const char **fromP, const char *fromLim,
450 char **toP, const char *toLim)
451 {
452 while (*fromP != fromLim && *toP != toLim)
453 *(*toP)++ = *(*fromP)++;
454 }
455
456 #ifdef XML_NS
457
458 static const struct normal_encoding ascii_encoding_ns = {
459 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
460 {
461 #include "asciitab.h"
462 /* BT_NONXML == 0 */
463 },
464 STANDARD_VTABLE(sb_)
465 };
466
467 #endif
468
469 static const struct normal_encoding ascii_encoding = {
470 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
471 {
472 #define BT_COLON BT_NMSTRT
473 #include "asciitab.h"
474 #undef BT_COLON
475 /* BT_NONXML == 0 */
476 },
477 STANDARD_VTABLE(sb_)
478 };
479
480 static int unicode_byte_type(char hi, char lo)
481 {
482 switch ((unsigned char)hi) {
483 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
484 return BT_LEAD4;
485 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
486 return BT_TRAIL;
487 case 0xFF:
488 switch ((unsigned char)lo) {
489 case 0xFF:
490 case 0xFE:
491 return BT_NONXML;
492 }
493 break;
494 }
495 return BT_NONASCII;
496 }
497
498 #define DEFINE_UTF16_TO_UTF8(E) \
499 static \
500 void E ## toUtf8(const ENCODING *enc, \
501 const char **fromP, const char *fromLim, \
502 char **toP, const char *toLim) \
503 { \
504 const char *from; \
505 for (from = *fromP; from != fromLim; from += 2) { \
506 int plane; \
507 unsigned char lo2; \
508 unsigned char lo = GET_LO(from); \
509 unsigned char hi = GET_HI(from); \
510 switch (hi) { \
511 case 0: \
512 if (lo < 0x80) { \
513 if (*toP == toLim) { \
514 *fromP = from; \
515 return; \
516 } \
517 *(*toP)++ = lo; \
518 break; \
519 } \
520 /* fall through */ \
521 case 0x1: case 0x2: case 0x3: \
522 case 0x4: case 0x5: case 0x6: case 0x7: \
523 if (toLim - *toP < 2) { \
524 *fromP = from; \
525 return; \
526 } \
527 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
528 *(*toP)++ = ((lo & 0x3f) | 0x80); \
529 break; \
530 default: \
531 if (toLim - *toP < 3) { \
532 *fromP = from; \
533 return; \
534 } \
535 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
536 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
537 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
538 *(*toP)++ = ((lo & 0x3f) | 0x80); \
539 break; \
540 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
541 if (toLim - *toP < 4) { \
542 *fromP = from; \
543 return; \
544 } \
545 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
546 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
547 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
548 from += 2; \
549 lo2 = GET_LO(from); \
550 *(*toP)++ = (((lo & 0x3) << 4) \
551 | ((GET_HI(from) & 0x3) << 2) \
552 | (lo2 >> 6) \
553 | 0x80); \
554 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
555 break; \
556 } \
557 } \
558 *fromP = from; \
559 }
560
561 #define DEFINE_UTF16_TO_UTF16(E) \
562 static \
563 void E ## toUtf16(const ENCODING *enc, \
564 const char **fromP, const char *fromLim, \
565 unsigned short **toP, const unsigned short *toLim) \
566 { \
567 /* Avoid copying first half only of surrogate */ \
568 if (fromLim - *fromP > ((toLim - *toP) << 1) \
569 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
570 fromLim -= 2; \
571 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
572 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
573 }
574
575 #define SET2(ptr, ch) \
576 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
577 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
578 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
579
580 DEFINE_UTF16_TO_UTF8(little2_)
581 DEFINE_UTF16_TO_UTF16(little2_)
582
583 #undef SET2
584 #undef GET_LO
585 #undef GET_HI
586
587 #define SET2(ptr, ch) \
588 (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
589 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
590 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
591
592 DEFINE_UTF16_TO_UTF8(big2_)
593 DEFINE_UTF16_TO_UTF16(big2_)
594
595 #undef SET2
596 #undef GET_LO
597 #undef GET_HI
598
599 #define LITTLE2_BYTE_TYPE(enc, p) \
600 ((p)[1] == 0 \
601 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
602 : unicode_byte_type((p)[1], (p)[0]))
603 #define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
604 #define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
605 #define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
606 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
607 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
608 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
609
610 #ifdef XML_MIN_SIZE
611
612 static
613 int little2_byteType(const ENCODING *enc, const char *p)
614 {
615 return LITTLE2_BYTE_TYPE(enc, p);
616 }
617
618 static
619 int little2_byteToAscii(const ENCODING *enc, const char *p)
620 {
621 return LITTLE2_BYTE_TO_ASCII(enc, p);
622 }
623
624 static
625 int little2_charMatches(const ENCODING *enc, const char *p, int c)
626 {
627 return LITTLE2_CHAR_MATCHES(enc, p, c);
628 }
629
630 static
631 int little2_isNameMin(const ENCODING *enc, const char *p)
632 {
633 return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
634 }
635
636 static
637 int little2_isNmstrtMin(const ENCODING *enc, const char *p)
638 {
639 return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
640 }
641
642 #undef VTABLE
643 #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
644
645 #else /* not XML_MIN_SIZE */
646
647 #undef PREFIX
648 #define PREFIX(ident) little2_ ## ident
649 #define MINBPC(enc) 2
650 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
651 #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
652 #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
653 #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
654 #define IS_NAME_CHAR(enc, p, n) 0
655 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
656 #define IS_NMSTRT_CHAR(enc, p, n) (0)
657 #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
658
659 #include "xmltok_impl.c"
660
661 #undef MINBPC
662 #undef BYTE_TYPE
663 #undef BYTE_TO_ASCII
664 #undef CHAR_MATCHES
665 #undef IS_NAME_CHAR
666 #undef IS_NAME_CHAR_MINBPC
667 #undef IS_NMSTRT_CHAR
668 #undef IS_NMSTRT_CHAR_MINBPC
669 #undef IS_INVALID_CHAR
670
671 #endif /* not XML_MIN_SIZE */
672
673 #ifdef XML_NS
674
675 static const struct normal_encoding little2_encoding_ns = {
676 { VTABLE, 2, 0,
677 #if XML_BYTE_ORDER == 12
678 1
679 #else
680 0
681 #endif
682 },
683 {
684 #include "asciitab.h"
685 #include "latin1tab.h"
686 },
687 STANDARD_VTABLE(little2_)
688 };
689
690 #endif
691
692 static const struct normal_encoding little2_encoding = {
693 { VTABLE, 2, 0,
694 #if XML_BYTE_ORDER == 12
695 1
696 #else
697 0
698 #endif
699 },
700 {
701 #define BT_COLON BT_NMSTRT
702 #include "asciitab.h"
703 #undef BT_COLON
704 #include "latin1tab.h"
705 },
706 STANDARD_VTABLE(little2_)
707 };
708
709 #if XML_BYTE_ORDER != 21
710
711 #ifdef XML_NS
712
713 static const struct normal_encoding internal_little2_encoding_ns = {
714 { VTABLE, 2, 0, 1 },
715 {
716 #include "iasciitab.h"
717 #include "latin1tab.h"
718 },
719 STANDARD_VTABLE(little2_)
720 };
721
722 #endif
723
724 static const struct normal_encoding internal_little2_encoding = {
725 { VTABLE, 2, 0, 1 },
726 {
727 #define BT_COLON BT_NMSTRT
728 #include "iasciitab.h"
729 #undef BT_COLON
730 #include "latin1tab.h"
731 },
732 STANDARD_VTABLE(little2_)
733 };
734
735 #endif
736
737
738 #define BIG2_BYTE_TYPE(enc, p) \
739 ((p)[0] == 0 \
740 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
741 : unicode_byte_type((p)[0], (p)[1]))
742 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
743 #define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
744 #define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
745 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
746 #define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
747 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
748
749 #ifdef XML_MIN_SIZE
750
751 static
752 int big2_byteType(const ENCODING *enc, const char *p)
753 {
754 return BIG2_BYTE_TYPE(enc, p);
755 }
756
757 static
758 int big2_byteToAscii(const ENCODING *enc, const char *p)
759 {
760 return BIG2_BYTE_TO_ASCII(enc, p);
761 }
762
763 static
764 int big2_charMatches(const ENCODING *enc, const char *p, int c)
765 {
766 return BIG2_CHAR_MATCHES(enc, p, c);
767 }
768
769 static
770 int big2_isNameMin(const ENCODING *enc, const char *p)
771 {
772 return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
773 }
774
775 static
776 int big2_isNmstrtMin(const ENCODING *enc, const char *p)
777 {
778 return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
779 }
780
781 #undef VTABLE
782 #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
783
784 #else /* not XML_MIN_SIZE */
785
786 #undef PREFIX
787 #define PREFIX(ident) big2_ ## ident
788 #define MINBPC(enc) 2
789 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
790 #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
791 #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
792 #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
793 #define IS_NAME_CHAR(enc, p, n) 0
794 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
795 #define IS_NMSTRT_CHAR(enc, p, n) (0)
796 #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
797
798 #include "xmltok_impl.c"
799
800 #undef MINBPC
801 #undef BYTE_TYPE
802 #undef BYTE_TO_ASCII
803 #undef CHAR_MATCHES
804 #undef IS_NAME_CHAR
805 #undef IS_NAME_CHAR_MINBPC
806 #undef IS_NMSTRT_CHAR
807 #undef IS_NMSTRT_CHAR_MINBPC
808 #undef IS_INVALID_CHAR
809
810 #endif /* not XML_MIN_SIZE */
811
812 #ifdef XML_NS
813
814 static const struct normal_encoding big2_encoding_ns = {
815 { VTABLE, 2, 0,
816 #if XML_BYTE_ORDER == 21
817 1
818 #else
819 0
820 #endif
821 },
822 {
823 #include "asciitab.h"
824 #include "latin1tab.h"
825 },
826 STANDARD_VTABLE(big2_)
827 };
828
829 #endif
830
831 static const struct normal_encoding big2_encoding = {
832 { VTABLE, 2, 0,
833 #if XML_BYTE_ORDER == 21
834 1
835 #else
836 0
837 #endif
838 },
839 {
840 #define BT_COLON BT_NMSTRT
841 #include "asciitab.h"
842 #undef BT_COLON
843 #include "latin1tab.h"
844 },
845 STANDARD_VTABLE(big2_)
846 };
847
848 #if XML_BYTE_ORDER != 12
849
850 #ifdef XML_NS
851
852 static const struct normal_encoding internal_big2_encoding_ns = {
853 { VTABLE, 2, 0, 1 },
854 {
855 #include "iasciitab.h"
856 #include "latin1tab.h"
857 },
858 STANDARD_VTABLE(big2_)
859 };
860
861 #endif
862
863 static const struct normal_encoding internal_big2_encoding = {
864 { VTABLE, 2, 0, 1 },
865 {
866 #define BT_COLON BT_NMSTRT
867 #include "iasciitab.h"
868 #undef BT_COLON
869 #include "latin1tab.h"
870 },
871 STANDARD_VTABLE(big2_)
872 };
873
874 #endif
875
876 #undef PREFIX
877
878 static
879 int streqci(const char *s1, const char *s2)
880 {
881 for (;;) {
882 char c1 = *s1++;
883 char c2 = *s2++;
884 if ('a' <= c1 && c1 <= 'z')
885 c1 += 'A' - 'a';
886 if ('a' <= c2 && c2 <= 'z')
887 c2 += 'A' - 'a';
888 if (c1 != c2)
889 return 0;
890 if (!c1)
891 break;
892 }
893 return 1;
894 }
895
896 static
897 void initUpdatePosition(const ENCODING *enc, const char *ptr,
898 const char *end, POSITION *pos)
899 {
900 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
901 }
902
903 static
904 int toAscii(const ENCODING *enc, const char *ptr, const char *end)
905 {
906 char buf[1];
907 char *p = buf;
908 XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
909 if (p == buf)
910 return -1;
911 else
912 return buf[0];
913 }
914
915 static
916 int isSpace(int c)
917 {
918 switch (c) {
919 case 0x20:
920 case 0xD:
921 case 0xA:
922 case 0x9:
923 return 1;
924 }
925 return 0;
926 }
927
928 /* Return 1 if there's just optional white space
929 or there's an S followed by name=val. */
930 static
931 int parsePseudoAttribute(const ENCODING *enc,
932 const char *ptr,
933 const char *end,
934 const char **namePtr,
935 const char **nameEndPtr,
936 const char **valPtr,
937 const char **nextTokPtr)
938 {
939 int c;
940 char open;
941 if (ptr == end) {
942 *namePtr = 0;
943 return 1;
944 }
945 if (!isSpace(toAscii(enc, ptr, end))) {
946 *nextTokPtr = ptr;
947 return 0;
948 }
949 do {
950 ptr += enc->minBytesPerChar;
951 } while (isSpace(toAscii(enc, ptr, end)));
952 if (ptr == end) {
953 *namePtr = 0;
954 return 1;
955 }
956 *namePtr = ptr;
957 for (;;) {
958 c = toAscii(enc, ptr, end);
959 if (c == -1) {
960 *nextTokPtr = ptr;
961 return 0;
962 }
963 if (c == '=') {
964 *nameEndPtr = ptr;
965 break;
966 }
967 if (isSpace(c)) {
968 *nameEndPtr = ptr;
969 do {
970 ptr += enc->minBytesPerChar;
971 } while (isSpace(c = toAscii(enc, ptr, end)));
972 if (c != '=') {
973 *nextTokPtr = ptr;
974 return 0;
975 }
976 break;
977 }
978 ptr += enc->minBytesPerChar;
979 }
980 if (ptr == *namePtr) {
981 *nextTokPtr = ptr;
982 return 0;
983 }
984 ptr += enc->minBytesPerChar;
985 c = toAscii(enc, ptr, end);
986 while (isSpace(c)) {
987 ptr += enc->minBytesPerChar;
988 c = toAscii(enc, ptr, end);
989 }
990 if (c != '"' && c != '\'') {
991 *nextTokPtr = ptr;
992 return 0;
993 }
994 open = c;
995 ptr += enc->minBytesPerChar;
996 *valPtr = ptr;
997 for (;; ptr += enc->minBytesPerChar) {
998 c = toAscii(enc, ptr, end);
999 if (c == open)
1000 break;
1001 if (!('a' <= c && c <= 'z')
1002 && !('A' <= c && c <= 'Z')
1003 && !('0' <= c && c <= '9')
1004 && c != '.'
1005 && c != '-'
1006 && c != '_') {
1007 *nextTokPtr = ptr;
1008 return 0;
1009 }
1010 }
1011 *nextTokPtr = ptr + enc->minBytesPerChar;
1012 return 1;
1013 }
1014
1015 static
1016 int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
1017 const char *,
1018 const char *),
1019 int isGeneralTextEntity,
1020 const ENCODING *enc,
1021 const char *ptr,
1022 const char *end,
1023 const char **badPtr,
1024 const char **versionPtr,
1025 const char **encodingName,
1026 const ENCODING **encoding,
1027 int *standalone)
1028 {
1029 const char *val = 0;
1030 const char *name = 0;
1031 const char *nameEnd = 0;
1032 ptr += 5 * enc->minBytesPerChar;
1033 end -= 2 * enc->minBytesPerChar;
1034 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) {
1035 *badPtr = ptr;
1036 return 0;
1037 }
1038 if (!XmlNameMatchesAscii(enc, name, nameEnd, "version")) {
1039 if (!isGeneralTextEntity) {
1040 *badPtr = name;
1041 return 0;
1042 }
1043 }
1044 else {
1045 if (versionPtr)
1046 *versionPtr = val;
1047 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1048 *badPtr = ptr;
1049 return 0;
1050 }
1051 if (!name) {
1052 if (isGeneralTextEntity) {
1053 /* a TextDecl must have an EncodingDecl */
1054 *badPtr = ptr;
1055 return 0;
1056 }
1057 return 1;
1058 }
1059 }
1060 if (XmlNameMatchesAscii(enc, name, nameEnd, "encoding")) {
1061 int c = toAscii(enc, val, end);
1062 if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
1063 *badPtr = val;
1064 return 0;
1065 }
1066 if (encodingName)
1067 *encodingName = val;
1068 if (encoding)
1069 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1070 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1071 *badPtr = ptr;
1072 return 0;
1073 }
1074 if (!name)
1075 return 1;
1076 }
1077 if (!XmlNameMatchesAscii(enc, name, nameEnd, "standalone") || isGeneralTextEntity) {
1078 *badPtr = name;
1079 return 0;
1080 }
1081 if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, "yes")) {
1082 if (standalone)
1083 *standalone = 1;
1084 }
1085 else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, "no")) {
1086 if (standalone)
1087 *standalone = 0;
1088 }
1089 else {
1090 *badPtr = val;
1091 return 0;
1092 }
1093 while (isSpace(toAscii(enc, ptr, end)))
1094 ptr += enc->minBytesPerChar;
1095 if (ptr != end) {
1096 *badPtr = ptr;
1097 return 0;
1098 }
1099 return 1;
1100 }
1101
1102 static
1103 int checkCharRefNumber(int result)
1104 {
1105 switch (result >> 8) {
1106 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
1107 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
1108 return -1;
1109 case 0:
1110 if (latin1_encoding.type[result] == BT_NONXML)
1111 return -1;
1112 break;
1113 case 0xFF:
1114 if (result == 0xFFFE || result == 0xFFFF)
1115 return -1;
1116 break;
1117 }
1118 return result;
1119 }
1120
1121 int XmlUtf8Encode(int c, char *buf)
1122 {
1123 enum {
1124 /* minN is minimum legal resulting value for N byte sequence */
1125 min2 = 0x80,
1126 min3 = 0x800,
1127 min4 = 0x10000
1128 };
1129
1130 if (c < 0)
1131 return 0;
1132 if (c < min2) {
1133 buf[0] = (c | UTF8_cval1);
1134 return 1;
1135 }
1136 if (c < min3) {
1137 buf[0] = ((c >> 6) | UTF8_cval2);
1138 buf[1] = ((c & 0x3f) | 0x80);
1139 return 2;
1140 }
1141 if (c < min4) {
1142 buf[0] = ((c >> 12) | UTF8_cval3);
1143 buf[1] = (((c >> 6) & 0x3f) | 0x80);
1144 buf[2] = ((c & 0x3f) | 0x80);
1145 return 3;
1146 }
1147 if (c < 0x110000) {
1148 buf[0] = ((c >> 18) | UTF8_cval4);
1149 buf[1] = (((c >> 12) & 0x3f) | 0x80);
1150 buf[2] = (((c >> 6) & 0x3f) | 0x80);
1151 buf[3] = ((c & 0x3f) | 0x80);
1152 return 4;
1153 }
1154 return 0;
1155 }
1156
1157 int XmlUtf16Encode(int charNum, unsigned short *buf)
1158 {
1159 if (charNum < 0)
1160 return 0;
1161 if (charNum < 0x10000) {
1162 buf[0] = charNum;
1163 return 1;
1164 }
1165 if (charNum < 0x110000) {
1166 charNum -= 0x10000;
1167 buf[0] = (charNum >> 10) + 0xD800;
1168 buf[1] = (charNum & 0x3FF) + 0xDC00;
1169 return 2;
1170 }
1171 return 0;
1172 }
1173
1174 struct unknown_encoding {
1175 struct normal_encoding normal;
1176 int (*convert)(void *userData, const char *p);
1177 void *userData;
1178 unsigned short utf16[256];
1179 char utf8[256][4];
1180 };
1181
1182 int XmlSizeOfUnknownEncoding(void)
1183 {
1184 return sizeof(struct unknown_encoding);
1185 }
1186
1187 static
1188 int unknown_isName(const ENCODING *enc, const char *p)
1189 {
1190 int c = ((const struct unknown_encoding *)enc)
1191 ->convert(((const struct unknown_encoding *)enc)->userData, p);
1192 if (c & ~0xFFFF)
1193 return 0;
1194 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1195 }
1196
1197 static
1198 int unknown_isNmstrt(const ENCODING *enc, const char *p)
1199 {
1200 int c = ((const struct unknown_encoding *)enc)
1201 ->convert(((const struct unknown_encoding *)enc)->userData, p);
1202 if (c & ~0xFFFF)
1203 return 0;
1204 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1205 }
1206
1207 static
1208 int unknown_isInvalid(const ENCODING *enc, const char *p)
1209 {
1210 int c = ((const struct unknown_encoding *)enc)
1211 ->convert(((const struct unknown_encoding *)enc)->userData, p);
1212 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1213 }
1214
1215 static
1216 void unknown_toUtf8(const ENCODING *enc,
1217 const char **fromP, const char *fromLim,
1218 char **toP, const char *toLim)
1219 {
1220 char buf[XML_UTF8_ENCODE_MAX];
1221 for (;;) {
1222 const char *utf8;
1223 int n;
1224 if (*fromP == fromLim)
1225 break;
1226 utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
1227 n = *utf8++;
1228 if (n == 0) {
1229 int c = ((const struct unknown_encoding *)enc)
1230 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
1231 n = XmlUtf8Encode(c, buf);
1232 if (n > toLim - *toP)
1233 break;
1234 utf8 = buf;
1235 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
1236 - (BT_LEAD2 - 2);
1237 }
1238 else {
1239 if (n > toLim - *toP)
1240 break;
1241 (*fromP)++;
1242 }
1243 do {
1244 *(*toP)++ = *utf8++;
1245 } while (--n != 0);
1246 }
1247 }
1248
1249 static
1250 void unknown_toUtf16(const ENCODING *enc,
1251 const char **fromP, const char *fromLim,
1252 unsigned short **toP, const unsigned short *toLim)
1253 {
1254 while (*fromP != fromLim && *toP != toLim) {
1255 unsigned short c
1256 = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
1257 if (c == 0) {
1258 c = (unsigned short)((const struct unknown_encoding *)enc)
1259 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
1260 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
1261 - (BT_LEAD2 - 2);
1262 }
1263 else
1264 (*fromP)++;
1265 *(*toP)++ = c;
1266 }
1267 }
1268
1269 ENCODING *
1270 XmlInitUnknownEncoding(void *mem,
1271 int *table,
1272 int (*convert)(void *userData, const char *p),
1273 void *userData)
1274 {
1275 unsigned int i;
1276 struct unknown_encoding *e = mem;
1277 for (i = 0; i < sizeof(struct normal_encoding); i++)
1278 ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
1279 for (i = 0; i < 128; i++)
1280 if (latin1_encoding.type[i] != BT_OTHER
1281 && latin1_encoding.type[i] != BT_NONXML
1282 && table[i] != (int)i)
1283 return 0;
1284 for (i = 0; i < 256; i++) {
1285 int c = table[i];
1286 if (c == -1) {
1287 e->normal.type[i] = BT_MALFORM;
1288 /* This shouldn't really get used. */
1289 e->utf16[i] = 0xFFFF;
1290 e->utf8[i][0] = 1;
1291 e->utf8[i][1] = 0;
1292 }
1293 else if (c < 0) {
1294 if (c < -4)
1295 return 0;
1296 e->normal.type[i] = BT_LEAD2 - (c + 2);
1297 e->utf8[i][0] = 0;
1298 e->utf16[i] = 0;
1299 }
1300 else if (c < 0x80) {
1301 if (latin1_encoding.type[c] != BT_OTHER
1302 && latin1_encoding.type[c] != BT_NONXML
1303 && c != i)
1304 return 0;
1305 e->normal.type[i] = latin1_encoding.type[c];
1306 e->utf8[i][0] = 1;
1307 e->utf8[i][1] = (char)c;
1308 e->utf16[i] = c == 0 ? 0xFFFF : c;
1309 }
1310 else if (checkCharRefNumber(c) < 0) {
1311 e->normal.type[i] = BT_NONXML;
1312 /* This shouldn't really get used. */
1313 e->utf16[i] = 0xFFFF;
1314 e->utf8[i][0] = 1;
1315 e->utf8[i][1] = 0;
1316 }
1317 else {
1318 if (c > 0xFFFF)
1319 return 0;
1320 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1321 e->normal.type[i] = BT_NMSTRT;
1322 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1323 e->normal.type[i] = BT_NAME;
1324 else
1325 e->normal.type[i] = BT_OTHER;
1326 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1327 e->utf16[i] = c;
1328 }
1329 }
1330 e->userData = userData;
1331 e->convert = convert;
1332 if (convert) {
1333 e->normal.isName2 = unknown_isName;
1334 e->normal.isName3 = unknown_isName;
1335 e->normal.isName4 = unknown_isName;
1336 e->normal.isNmstrt2 = unknown_isNmstrt;
1337 e->normal.isNmstrt3 = unknown_isNmstrt;
1338 e->normal.isNmstrt4 = unknown_isNmstrt;
1339 e->normal.isInvalid2 = unknown_isInvalid;
1340 e->normal.isInvalid3 = unknown_isInvalid;
1341 e->normal.isInvalid4 = unknown_isInvalid;
1342 }
1343 e->normal.enc.utf8Convert = unknown_toUtf8;
1344 e->normal.enc.utf16Convert = unknown_toUtf16;
1345 return &(e->normal.enc);
1346 }
1347
1348 /* If this enumeration is changed, getEncodingIndex and encodings
1349 must also be changed. */
1350 enum {
1351 UNKNOWN_ENC = -1,
1352 ISO_8859_1_ENC = 0,
1353 US_ASCII_ENC,
1354 UTF_8_ENC,
1355 UTF_16_ENC,
1356 UTF_16BE_ENC,
1357 UTF_16LE_ENC,
1358 /* must match encodingNames up to here */
1359 NO_ENC
1360 };
1361
1362 static
1363 int getEncodingIndex(const char *name)
1364 {
1365 static const char *encodingNames[] = {
1366 "ISO-8859-1",
1367 "US-ASCII",
1368 "UTF-8",
1369 "UTF-16",
1370 "UTF-16BE"
1371 "UTF-16LE",
1372 };
1373 int i;
1374 if (name == 0)
1375 return NO_ENC;
1376 for (i = 0; i < sizeof(encodingNames)/sizeof(encodingNames[0]); i++)
1377 if (streqci(name, encodingNames[i]))
1378 return i;
1379 return UNKNOWN_ENC;
1380 }
1381
1382 /* For binary compatibility, we store the index of the encoding specified
1383 at initialization in the isUtf16 member. */
1384
1385 #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1386 #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1387
1388 /* This is what detects the encoding.
1389 encodingTable maps from encoding indices to encodings;
1390 INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding;
1391 state is XML_CONTENT_STATE if we're parsing an external text entity,
1392 and XML_PROLOG_STATE otherwise.
1393 */
1394
1395
1396 static
1397 int initScan(const ENCODING **encodingTable,
1398 const INIT_ENCODING *enc,
1399 int state,
1400 const char *ptr,
1401 const char *end,
1402 const char **nextTokPtr)
1403 {
1404 const ENCODING **encPtr;
1405
1406 if (ptr == end)
1407 return XML_TOK_NONE;
1408 encPtr = enc->encPtr;
1409 if (ptr + 1 == end) {
1410 /* only a single byte available for auto-detection */
1411 #ifndef XML_DTD /* FIXME */
1412 /* a well-formed document entity must have more than one byte */
1413 if (state != XML_CONTENT_STATE)
1414 return XML_TOK_PARTIAL;
1415 #endif
1416 /* so we're parsing an external text entity... */
1417 /* if UTF-16 was externally specified, then we need at least 2 bytes */
1418 switch (INIT_ENC_INDEX(enc)) {
1419 case UTF_16_ENC:
1420 case UTF_16LE_ENC:
1421 case UTF_16BE_ENC:
1422 return XML_TOK_PARTIAL;
1423 }
1424 switch ((unsigned char)*ptr) {
1425 case 0xFE:
1426 case 0xFF:
1427 case 0xEF: /* possibly first byte of UTF-8 BOM */
1428 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1429 && state == XML_CONTENT_STATE)
1430 break;
1431 /* fall through */
1432 case 0x00:
1433 case 0x3C:
1434 return XML_TOK_PARTIAL;
1435 }
1436 }
1437 else {
1438 switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1439 case 0xFEFF:
1440 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1441 && state == XML_CONTENT_STATE)
1442 break;
1443 *nextTokPtr = ptr + 2;
1444 *encPtr = encodingTable[UTF_16BE_ENC];
1445 return XML_TOK_BOM;
1446 /* 00 3C is handled in the default case */
1447 case 0x3C00:
1448 if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1449 || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1450 && state == XML_CONTENT_STATE)
1451 break;
1452 *encPtr = encodingTable[UTF_16LE_ENC];
1453 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1454 case 0xFFFE:
1455 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1456 && state == XML_CONTENT_STATE)
1457 break;
1458 *nextTokPtr = ptr + 2;
1459 *encPtr = encodingTable[UTF_16LE_ENC];
1460 return XML_TOK_BOM;
1461 case 0xEFBB:
1462 /* Maybe a UTF-8 BOM (EF BB BF) */
1463 /* If there's an explicitly specified (external) encoding
1464 of ISO-8859-1 or some flavour of UTF-16
1465 and this is an external text entity,
1466 don't look for the BOM,
1467 because it might be a legal data. */
1468 if (state == XML_CONTENT_STATE) {
1469 int e = INIT_ENC_INDEX(enc);
1470 if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
1471 break;
1472 }
1473 if (ptr + 2 == end)
1474 return XML_TOK_PARTIAL;
1475 if ((unsigned char)ptr[2] == 0xBF) {
1476 *encPtr = encodingTable[UTF_8_ENC];
1477 return XML_TOK_BOM;
1478 }
1479 break;
1480 default:
1481 if (ptr[0] == '\0') {
1482 /* 0 isn't a legal data character. Furthermore a document entity can only
1483 start with ASCII characters. So the only way this can fail to be big-endian
1484 UTF-16 if it it's an external parsed general entity that's labelled as
1485 UTF-16LE. */
1486 if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1487 break;
1488 *encPtr = encodingTable[UTF_16BE_ENC];
1489 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1490 }
1491 else if (ptr[1] == '\0') {
1492 /* We could recover here in the case:
1493 - parsing an external entity
1494 - second byte is 0
1495 - no externally specified encoding
1496 - no encoding declaration
1497 by assuming UTF-16LE. But we don't, because this would mean when
1498 presented just with a single byte, we couldn't reliably determine
1499 whether we needed further bytes. */
1500 if (state == XML_CONTENT_STATE)
1501 break;
1502 *encPtr = encodingTable[UTF_16LE_ENC];
1503 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1504 }
1505 break;
1506 }
1507 }
1508 *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1509 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1510 }
1511
1512
1513 #define NS(x) x
1514 #define ns(x) x
1515 #include "xmltok_ns.c"
1516 #undef NS
1517 #undef ns
1518
1519 #ifdef XML_NS
1520
1521 #define NS(x) x ## NS
1522 #define ns(x) x ## _ns
1523
1524 #include "xmltok_ns.c"
1525
1526 #undef NS
1527 #undef ns
1528
1529 ENCODING *
1530 XmlInitUnknownEncodingNS(void *mem,
1531 int *table,
1532 int (*convert)(void *userData, const char *p),
1533 void *userData)
1534 {
1535 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1536 if (enc)
1537 ((struct normal_encoding *)enc)->type[':'] = BT_COLON;
1538 return enc;
1539 }
1540
1541 #endif /* XML_NS */