Codebase list libhtml5parser-java / 8871785
Imported Upstream version 1.4+r1.3.1 Markus Koschany 8 years ago
16 changed file(s) with 336 addition(s) and 962 deletion(s). Raw diff Collapse all Expand all
33
44 /*
55 * Copyright (c) 2005, 2006, 2007 Henri Sivonen
6 * Copyright (c) 2007-2012 Mozilla Foundation
6 * Copyright (c) 2007-2011 Mozilla Foundation
77 * Portions of comments Copyright 2004-2007 Apple Computer, Inc., Mozilla
88 * Foundation, and Opera Software ASA.
99 *
0 <!--
1 * Copyright (c) 2007-2012 Mozilla Foundation
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 * DEALINGS IN THE SOFTWARE.
20 -->
210 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
221 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
232 <modelVersion>4.0.0</modelVersion>
243 <groupId>nu.validator.htmlparser</groupId>
254 <artifactId>htmlparser</artifactId>
26 <packaging>bundle</packaging>
27 <version>1.4</version>
5 <packaging>jar</packaging>
6 <version>1.3.1</version>
287 <name>htmlparser</name>
298 <url>http://about.validator.nu/htmlparser/</url>
30 <description>The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser.</description>
31 <!--
32 Usage notes for this POM:
33
34 To build without signing, run:
35 mvn clean source:jar javadoc:jar repository:bundle-create
36 (enter 0 <return> when prompted)
37
38 To build and sign, run:
39 mvn clean source:jar javadoc:jar package gpg:sign repository:bundle-create
40 (enter 0 <return> when prompted)
41
42 This POM file is used for creating the bundle for distribution via the
43 Maven Central Repository. It is not used as part of the normal development
44 process of the parser and the maintainer of the parser (Henri Sivonen)
45 isn't experienced in POM tweaking. If you need this POM to do something
46 that it currently does not do or do something better, you need to write
47 the changes you need yourself and contribute a patch via
48 http://bugzilla.validator.nu/
49 -->
9 <description>The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications that do not run scripts. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser.</description>
5010 <developers>
5111 <developer>
5212 <id>hsivonen</id>
6828 </license>
6929 </licenses>
7030 <scm>
71 <connection>scm:hg:http://hg.mozilla.org/projects/htmlparser/</connection>
72 <url>http://hg.mozilla.org/projects/htmlparser/</url>
31 <connection>scm:svn:http://svn.versiondude.net/whattf/htmlparser/</connection>
7332 </scm>
7433 <build>
75 <sourceDirectory>${project.build.directory}/src</sourceDirectory>
34 <sourceDirectory>${basedir}/src</sourceDirectory>
7635 <testSourceDirectory>${basedir}/test-src</testSourceDirectory>
7736 <plugins>
7837 <plugin>
8443 </configuration>
8544 </plugin>
8645 <plugin>
87 <artifactId>maven-antrun-plugin</artifactId>
88 <version>1.7</version>
89 <dependencies>
90 <dependency>
91 <groupId>com.sun</groupId>
92 <artifactId>tools</artifactId>
93 <version>1.5.0</version>
94 <scope>system</scope>
95 <systemPath>${java.home}/../lib/tools.jar</systemPath>
96 </dependency>
97 </dependencies>
98 <executions>
99 <execution>
100 <id>intitialize-sources</id>
101 <phase>initialize</phase>
102 <goals>
103 <goal>run</goal>
104 </goals>
105 <configuration>
106 <target>
107 <delete dir="${project.build.sourceDirectory}"/>
108 <mkdir dir="${project.build.sourceDirectory}"/>
109 <copy todir="${project.build.sourceDirectory}">
110 <fileset dir="${basedir}/src"/>
111 </copy>
112 </target>
113 </configuration>
114 </execution>
115 <execution>
116 <id>tokenizer-hotspot-workaround</id>
117 <phase>process-sources</phase>
118 <goals>
119 <goal>run</goal>
120 </goals>
121 <configuration>
122 <target>
123 <property name="translator.sources" value="${basedir}/translator-src"/>
124 <property name="translator.classes" value="${project.build.directory}/translator-classes"/>
125 <mkdir dir="${translator.classes}"/>
126 <javac srcdir="${translator.sources}" includes="nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java" destdir="${translator.classes}" includeantruntime="false"/>
127 <java classname="nu.validator.htmlparser.generator.ApplyHotSpotWorkaround">
128 <classpath>
129 <pathelement location="${translator.classes}"/>
130 </classpath>
131 <arg value="${project.build.sourceDirectory}/nu/validator/htmlparser/impl/Tokenizer.java"/>
132 <arg value="${project.build.sourceDirectory}/nu/validator/htmlparser/impl/HotSpotWorkaround.txt"/>
133 </java>
134 </target>
135 </configuration>
136 </execution>
137 </executions>
138 </plugin>
139 <plugin>
14046 <groupId>org.apache.maven.plugins</groupId>
14147 <artifactId>maven-surefire-plugin</artifactId>
14248 <configuration>
14349 <skip>true</skip>
144 </configuration>
145 </plugin>
146 <plugin>
147 <groupId>org.apache.felix</groupId>
148 <artifactId>maven-bundle-plugin</artifactId>
149 <version>2.3.7</version>
150 <extensions>true</extensions>
151 <configuration>
152 <archive>
153 <addMavenDescriptor>false</addMavenDescriptor>
154 </archive>
155 <instructions>
156 <Bundle-Name>${project.name}</Bundle-Name>
157 <Bundle-SymbolicName>nu.validator.htmlparser</Bundle-SymbolicName>
158 <Bundle-Version>${project.version}</Bundle-Version>
159 <Bundle-RequiredExecutionEnvironment>J2SE-1.5</Bundle-RequiredExecutionEnvironment>
160 <_removeheaders>Built-By,Bnd-LastModified</_removeheaders>
161 </instructions>
16250 </configuration>
16351 </plugin>
16452 <plugin>
234122 <properties>
235123 <rpm.java.dir>/usr/share/java</rpm.java.dir>
236124 <rpm.javadoc.dir>/usr/share/javadoc</rpm.javadoc.dir>
237 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
238125 </properties>
239126 </project>
350350 */
351351 @Override public void setErrorHandler(ErrorHandler errorHandler) {
352352 treeBuilder.setErrorHandler(errorHandler);
353 if (driver != null) {
354 driver.setErrorHandler(errorHandler);
355 }
353 driver.setErrorHandler(errorHandler);
356354 }
357355
358356 public void setTransitionHander(TransitionHandler handler) {
784784 public static final AttributeName MASK = new AttributeName(ALL_NO_NS, SAME_LOCAL("mask"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
785785 public static final AttributeName LINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("link"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
786786 public static final AttributeName LANG = new AttributeName(LANG_NS, SAME_LOCAL("lang"), LANG_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
787 public static final AttributeName LOOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("loop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
788787 public static final AttributeName LIST = new AttributeName(ALL_NO_NS, SAME_LOCAL("list"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
789788 public static final AttributeName TYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("type"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
790789 public static final AttributeName WHEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("when"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
929928 public static final AttributeName NOSHADE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noshade"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
930929 public static final AttributeName MINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("minsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
931930 public static final AttributeName MAXSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
931 public static final AttributeName LOOPEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("loopend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
932932 public static final AttributeName LARGEOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("largeop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
933933 public static final AttributeName UNICODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
934934 public static final AttributeName TARGETX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targetx", "targetX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10261026 public static final AttributeName MASKUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskunits", "maskUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10271027 public static final AttributeName MAXLENGTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxlength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10281028 public static final AttributeName LINEBREAK = new AttributeName(ALL_NO_NS, SAME_LOCAL("linebreak"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
1029 public static final AttributeName LOOPSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("loopstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10291030 public static final AttributeName TRANSFORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("transform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10301031 public static final AttributeName V_HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10311032 public static final AttributeName VALUETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("valuetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
10321033 public static final AttributeName POINTSATZ = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatz", "pointsAtZ"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10331034 public static final AttributeName POINTSATX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatx", "pointsAtX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10341035 public static final AttributeName POINTSATY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsaty", "pointsAtY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
1036 public static final AttributeName PLAYCOUNT = new AttributeName(ALL_NO_NS, SAME_LOCAL("playcount"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10351037 public static final AttributeName SYMMETRIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("symmetric"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10361038 public static final AttributeName SCROLLING = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolling"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
10371039 public static final AttributeName REPEATDUR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatdur", "repeatDur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10911093 public static final AttributeName CELLPADDING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellpadding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10921094 public static final AttributeName CELLSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10931095 public static final AttributeName COLUMNWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
1094 public static final AttributeName CROSSORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("crossorigin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10951096 public static final AttributeName COLUMNALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10961097 public static final AttributeName COLUMNLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
10971098 public static final AttributeName CONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("contextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
13651366 MASK,
13661367 LINK,
13671368 LANG,
1368 LOOP,
13691369 LIST,
13701370 TYPE,
13711371 WHEN,
15101510 NOSHADE,
15111511 MINSIZE,
15121512 MAXSIZE,
1513 LOOPEND,
15131514 LARGEOP,
15141515 UNICODE,
15151516 TARGETX,
16071608 MASKUNITS,
16081609 MAXLENGTH,
16091610 LINEBREAK,
1611 LOOPSTART,
16101612 TRANSFORM,
16111613 V_HANGING,
16121614 VALUETYPE,
16131615 POINTSATZ,
16141616 POINTSATX,
16151617 POINTSATY,
1618 PLAYCOUNT,
16161619 SYMMETRIC,
16171620 SCROLLING,
16181621 REPEATDUR,
16721675 CELLPADDING,
16731676 CELLSPACING,
16741677 COLUMNWIDTH,
1675 CROSSORIGIN,
16761678 COLUMNALIGN,
16771679 COLUMNLINES,
16781680 CONTEXTMENU,
19471949 149809441,
19481950 150018784,
19491951 150445028,
1950 150813181,
19511952 150923321,
19521953 152528754,
19531954 152536216,
20922093 249533729,
20932094 250235623,
20942095 250269543,
2096 251083937,
20952097 251402351,
20962098 252339047,
20972099 253260911,
21892191 316797986,
21902192 317486755,
21912193 317794164,
2194 318721061,
21922195 320076137,
21932196 322657125,
21942197 322887778,
21952198 323506876,
21962199 323572412,
21972200 323605180,
2201 323938869,
21982202 325060058,
21992203 325320188,
22002204 325398738,
22542258 371448425,
22552259 371448430,
22562260 371545055,
2257 371593469,
22582261 371596922,
22592262 371758751,
22602263 371964792,
369369 // return "ANNOTATION_XML";
370370 // case TreeBuilder.FOREIGNOBJECT_OR_DESC:
371371 // return "FOREIGNOBJECT_OR_DESC";
372 // case TreeBuilder.MENUITEM:
373 // return "MENUITEM";
374372 // }
375373 // return null;
376374 // }
714712 public static final ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", TreeBuilder.OTHER);
715713 public static final ElementName METADATA = new ElementName("metadata", "metadata", TreeBuilder.OTHER);
716714 public static final ElementName MENCLOSE = new ElementName("menclose", "menclose", TreeBuilder.OTHER);
717 public static final ElementName MENUITEM = new ElementName("menuitem", "menuitem", TreeBuilder.MENUITEM | SPECIAL);
718715 public static final ElementName MPHANTOM = new ElementName("mphantom", "mphantom", TreeBuilder.OTHER);
719716 public static final ElementName NOFRAMES = new ElementName("noframes", "noframes", TreeBuilder.NOFRAMES | SPECIAL);
720717 public static final ElementName NOSCRIPT = new ElementName("noscript", "noscript", TreeBuilder.NOSCRIPT | SPECIAL);
11071104 LOWLIMIT,
11081105 METADATA,
11091106 MENCLOSE,
1110 MENUITEM,
11111107 MPHANTOM,
11121108 NOFRAMES,
11131109 NOSCRIPT,
15011497 281683369,
15021498 282120228,
15031499 282250732,
1504 282498697,
15051500 282508942,
15061501 283743649,
15071502 283787570,
476476 }
477477
478478 @Override protected void errSlashNotFollowedByGt() throws SAXException {
479 err("A slash was not immediately followed by \u201C>\u201D.");
479 err("A slash was not immediate followed by \u201C>\u201D.");
480480 }
481481
482482 @Override protected void errHtml4XmlVoidSyntax() throws SAXException {
1919 * DEALINGS IN THE SOFTWARE.
2020 */
2121
22 /**
23 * compressed returnValue:
24 * int returnState = returnValue >> 33
25 * boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0)
26 * int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue
27 */
2822 @SuppressWarnings("unused") private long workAroundHotSpotHugeMethodLimit(
2923 int state, char c, int pos, @NoLength char[] buf,
3024 boolean reconsume, int returnState, int endPos) throws SAXException {
3529 long returnStateAndPos = workAroundHotSpotHugeMethodLimit(
3630 state, c, pos, buf, reconsume, returnState, endPos);
3731 pos = (int)returnStateAndPos; // 5.1.3 in the Java spec
38 returnState = (int)(returnStateAndPos >> 33);
32 returnState = (int)(returnStateAndPos >> 32);
3933 state = stateSave;
40 if ( (pos == endPos) || ( (((int)(returnStateAndPos >> 32)) & 0x1) != 0) ) {
34 if (pos == endPos) {
4135 break stateloop;
4236 }
4337 continue stateloop;
4438 // END HOTSPOT WORKAROUND
4539 default:
46 assert !reconsume : "Must not reconsume when returning from HotSpot workaround.";
47 stateSave = state;
48 return (((long)returnState) << 33) | pos;
40 break stateloop;
4941 }
5042 }
5143 assert !reconsume : "Must not reconsume when returning from HotSpot workaround.";
5244 stateSave = state;
53 return (((long)returnState) << 33) | (1L << 32) | pos ;
45 return (((long)returnState) << 32) | pos;
5446 }
577577 state = MetaScanner.ATTRIBUTE_NAME;
578578 continue stateloop;
579579 default:
580 contentIndex = Integer.MAX_VALUE;
581 charsetIndex = Integer.MAX_VALUE;
580 contentIndex = -1;
581 charsetIndex = -1;
582582 state = MetaScanner.ATTRIBUTE_NAME;
583583 continue stateloop;
584584 }
215215
216216 public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
217217
218 public static final int PROCESSING_INSTRUCTION = 73;
219
220 public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
221
222218 /**
223219 * Magic value for UTF-16 operations.
224220 */
507503 private int line;
508504
509505 private Interner interner;
510
511 // CPPONLY: private boolean viewingXmlSource;
512506
513507 // [NOCPP[
514508
536530 * @param tokenHandler
537531 * the handler for receiving tokens
538532 */
539 public Tokenizer(TokenHandler tokenHandler
540 // CPPONLY: , boolean viewingXmlSource
541 ) {
533 public Tokenizer(TokenHandler tokenHandler) {
542534 this.tokenHandler = tokenHandler;
543535 this.encodingDeclarationHandler = null;
544536 // [NOCPP[
552544 this.publicIdentifier = null;
553545 this.systemIdentifier = null;
554546 this.attributes = null;
555 // CPPONLY: this.viewingXmlSource = viewingXmlSource;
556547 }
557548
558549 public void setInterner(Interner interner) {
564555 this.publicId = newPublicId;
565556
566557 }
567
568 // CPPONLY: boolean isViewingXmlSource() {
569 // CPPONLY: return viewingXmlSource;
570 // CPPONLY: }
571558
572559 // [NOCPP[
573560
11301117 * switched to the PCDATA state.
11311118 */
11321119 maybeErrAttributesOnEndTag(attrs);
1133 // CPPONLY: if (!viewingXmlSource) {
11341120 tokenHandler.endTag(tagName);
1135 // CPPONLY: }
11361121 Portability.delete(attributes);
11371122 } else {
1138 // CPPONLY: if (viewingXmlSource) {
1139 // CPPONLY: Portability.delete(attributes);
1140 // CPPONLY: } else {
11411123 tokenHandler.startTag(tagName, attrs, selfClosing);
1142 // CPPONLY: }
11431124 }
11441125 tagName.release();
11451126 tagName = null;
12031184 attributes.addAttribute(attributeName, "", xmlnsPolicy);
12041185 }
12051186 } else {
1206 if (AttributeName.BORDER != attributeName) {
1207 err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
1208 attributes.addAttribute(attributeName, "", xmlnsPolicy);
1209 }
1187 err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
1188 attributes.addAttribute(attributeName, "", xmlnsPolicy);
12101189 }
12111190 } else {
12121191 if (AttributeName.SRC == attributeName
12401219 if (attributeName != null) {
12411220 String val = longStrBufToString(); // Ownership transferred to
12421221 // HtmlAttributes
1243 // CPPONLY: if (mViewSource) {
1244 // CPPONLY: mViewSource.MaybeLinkifyAttributeValue(attributeName, val);
1245 // CPPONLY: }
12461222 // [NOCPP[
12471223 if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
12481224 && attributeName.isCaseFolded()) {
13391315 * meaning. (The rest of the array is garbage and should not be
13401316 * examined.)
13411317 */
1342 // CPPONLY: if (mViewSource) {
1343 // CPPONLY: mViewSource.SetBuffer(buffer);
1344 // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1345 // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
1346 // CPPONLY: } else {
1347 // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1348 // CPPONLY: }
1349 // [NOCPP[
13501318 pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
13511319 buffer.getEnd());
1352 // ]NOCPP]
13531320 if (pos == buffer.getEnd()) {
13541321 // exiting due to end of buffer
13551322 buffer.setStart(pos);
15541521 state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
15551522 continue stateloop;
15561523 case '?':
1557 // CPPONLY: if (viewingXmlSource) {
1558 // CPPONLY: state = transition(state,
1559 // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
1560 // CPPONLY: reconsume,
1561 // CPPONLY: pos);
1562 // CPPONLY: continue stateloop;
1563 // CPPONLY: }
15641524 /*
15651525 * U+003F QUESTION MARK (?) Parse error.
15661526 */
16001560 * the data state.
16011561 */
16021562 cstart = pos;
1563 state = transition(state, Tokenizer.DATA, reconsume, pos);
16031564 reconsume = true;
1604 state = transition(state, Tokenizer.DATA, reconsume, pos);
16051565 continue stateloop;
16061566 }
16071567 }
19261886 * input character.
19271887 */
19281888 clearLongStrBuf();
1929 reconsume = true;
19301889 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
19311890 noteUnquotedAttributeValue();
1891 reconsume = true;
19321892 continue stateloop;
19331893 case '\'':
19341894 /*
21062066 * Reconsume the character in the before
21072067 * attribute name state.
21082068 */
2069 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
21092070 reconsume = true;
2110 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
21112071 continue stateloop;
21122072 }
21132073 }
21452105 * Reconsume the character in the before attribute
21462106 * name state.
21472107 */
2108 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
21482109 reconsume = true;
2149 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
21502110 continue stateloop;
21512111 }
21522112 // XXX reorder point
23962356 default:
23972357 errBogusComment();
23982358 clearLongStrBuf();
2359 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
23992360 reconsume = true;
2400 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
24012361 continue stateloop;
24022362 }
24032363 }
24182378 // continue stateloop;
24192379 default:
24202380 errBogusComment();
2381 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
24212382 reconsume = true;
2422 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
24232383 continue stateloop;
24242384 }
24252385 }
27592719 appendLongStrBuf(c);
27602720 } else {
27612721 errBogusComment();
2722 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
27622723 reconsume = true;
2763 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
27642724 continue stateloop;
27652725 }
27662726 index++;
27672727 continue;
27682728 } else {
27692729 cstart = pos; // start coalescing
2730 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
27702731 reconsume = true;
2771 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
27722732 break; // FALL THROUGH continue stateloop;
27732733 }
27742734 }
28162776 tokenHandler.characters(Tokenizer.RSQB_RSQB, 0,
28172777 1);
28182778 cstart = pos;
2779 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
28192780 reconsume = true;
2820 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
28212781 continue stateloop;
28222782 }
28232783 }
28352795 default:
28362796 tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
28372797 cstart = pos;
2798 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
28382799 reconsume = true;
2839 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
28402800 continue stateloop;
28412801
28422802 }
29372897 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
29382898 cstart = pos;
29392899 }
2900 state = transition(state, returnState, reconsume, pos);
29402901 reconsume = true;
2941 state = transition(state, returnState, reconsume, pos);
29422902 continue stateloop;
29432903 case '#':
29442904 /*
29512911 default:
29522912 if (c == additional) {
29532913 emitOrAppendStrBuf(returnState);
2914 state = transition(state, returnState, reconsume, pos);
29542915 reconsume = true;
2955 state = transition(state, returnState, reconsume, pos);
29562916 continue stateloop;
29572917 }
29582918 if (c >= 'a' && c <= 'z') {
29702930 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
29712931 cstart = pos;
29722932 }
2933 state = transition(state, returnState, reconsume, pos);
29732934 reconsume = true;
2974 state = transition(state, returnState, reconsume, pos);
29752935 continue stateloop;
29762936 }
29772937 // Didn't fail yet
30443004 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
30453005 cstart = pos;
30463006 }
3007 state = transition(state, returnState, reconsume, pos);
30473008 reconsume = true;
3048 state = transition(state, returnState, reconsume, pos);
30493009 continue stateloop;
30503010 }
30513011 // Didn't fail yet
31083068 }
31093069 }
31103070
3111 if (c == ';') {
3112 // If we see a semicolon, there cannot be a
3113 // longer match. Break the loop. However, before
3114 // breaking, take the longest match so far as the
3115 // candidate, if we are just about to complete a
3116 // match.
3117 if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
3118 candidate = lo;
3119 strBufMark = strBufLen;
3120 }
3121 break outer;
3122 }
3123
31243071 if (hi < lo) {
31253072 break outer;
31263073 }
31383085 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
31393086 cstart = pos;
31403087 }
3088 state = transition(state, returnState, reconsume, pos);
31413089 reconsume = true;
3142 state = transition(state, returnState, reconsume, pos);
31433090 continue stateloop;
31443091 } else {
31453092 // c can't be CR, LF or nul if we got here
31833130 */
31843131 errNoNamedCharacterMatch();
31853132 appendStrBufToLongStrBuf();
3133 state = transition(state, returnState, reconsume, pos);
31863134 reconsume = true;
3187 state = transition(state, returnState, reconsume, pos);
31883135 continue stateloop;
31893136 }
31903137 }
32013148 * second column of the named character references
32023149 * table).
32033150 */
3204 // CPPONLY: completedNamedCharacterReference();
32053151 @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
32063152 if (
32073153 // [NOCPP[
32153161 }
32163162 // this is so complicated!
32173163 if (strBufMark < strBufLen) {
3164 // if (strBufOffset != -1) {
3165 // if ((returnState & (~1)) != 0) {
3166 // for (int i = strBufMark; i < strBufLen; i++) {
3167 // appendLongStrBuf(buf[strBufOffset + i]);
3168 // }
3169 // } else {
3170 // tokenHandler.characters(buf, strBufOffset
3171 // + strBufMark, strBufLen
3172 // - strBufMark);
3173 // }
3174 // } else {
32183175 if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
32193176 for (int i = strBufMark; i < strBufLen; i++) {
32203177 appendLongStrBuf(strBuf[i]);
32233180 tokenHandler.characters(strBuf, strBufMark,
32243181 strBufLen - strBufMark);
32253182 }
3226 }
3227 // Check if we broke out early with c being the last
3228 // character that matched as opposed to being the
3229 // first one that didn't match. In the case of an
3230 // early break, the next run on text should start
3231 // *after* the current character and the current
3232 // character shouldn't be reconsumed.
3233 boolean earlyBreak = (c == ';' && strBufMark == strBufLen);
3183 // }
3184 }
32343185 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3235 cstart = earlyBreak ? pos + 1 : pos;
3236 }
3237 reconsume = !earlyBreak;
3186 cstart = pos;
3187 }
32383188 state = transition(state, returnState, reconsume, pos);
3189 reconsume = true;
32393190 continue stateloop;
32403191 /*
32413192 * If the markup contains I'm &notit; I tell you, the
32883239 * When it comes to interpreting the number,
32893240 * interpret it as a decimal number.
32903241 */
3242 state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
32913243 reconsume = true;
3292 state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
32933244 // FALL THROUGH continue stateloop;
32943245 }
32953246 // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
33553306 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
33563307 cstart = pos;
33573308 }
3309 state = transition(state, returnState, reconsume, pos);
33583310 reconsume = true;
3359 state = transition(state, returnState, reconsume, pos);
33603311 continue stateloop;
33613312 } else {
33623313 errCharRefLacksSemicolon();
33633314 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
33643315 cstart = pos;
33653316 }
3317 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
33663318 reconsume = true;
3367 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
33683319 // FALL THROUGH continue stateloop;
33693320 break decimalloop;
33703321 }
34453396 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
34463397 cstart = pos;
34473398 }
3399 state = transition(state, returnState, reconsume, pos);
34483400 reconsume = true;
3449 state = transition(state, returnState, reconsume, pos);
34503401 continue stateloop;
34513402 } else {
34523403 errCharRefLacksSemicolon();
34533404 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
34543405 cstart = pos;
34553406 }
3407 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
34563408 reconsume = true;
3457 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
34583409 continue stateloop;
34593410 }
34603411 }
36833634 * the data state.
36843635 */
36853636 cstart = pos;
3637 state = transition(state, returnState, reconsume, pos);
36863638 reconsume = true;
3687 state = transition(state, returnState, reconsume, pos);
36883639 continue stateloop;
36893640 }
36903641 }
37153666 0, 2);
37163667 emitStrBuf();
37173668 cstart = pos;
3669 state = transition(state, returnState, reconsume, pos);
37183670 reconsume = true;
3719 state = transition(state, returnState, reconsume, pos);
37203671 continue stateloop;
37213672 }
37223673 appendStrBuf(c);
39583909 * the data state.
39593910 */
39603911 cstart = pos;
3912 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
39613913 reconsume = true;
3962 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
39633914 continue stateloop;
39643915 }
39653916 }
39893940 * Anything else Reconsume the current input
39903941 * character in the script data state.
39913942 */
3943 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
39923944 reconsume = true;
3993 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
39943945 continue stateloop;
39953946 }
39963947 }
40193970 * Anything else Reconsume the current input
40203971 * character in the script data state.
40213972 */
3973 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
40223974 reconsume = true;
4023 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
40243975 continue stateloop;
40253976 }
40263977 }
45464497 appendLongStrBuf(c);
45474498 } else {
45484499 errBogusComment();
4500 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
45494501 reconsume = true;
4550 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
45514502 continue stateloop;
45524503 }
45534504 index++;
45544505 continue;
45554506 } else {
4507 state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
45564508 reconsume = true;
4557 state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
45584509 break markupdeclarationdoctypeloop;
45594510 // continue stateloop;
45604511 }
46024553 * Reconsume the current character in the before
46034554 * DOCTYPE name state.
46044555 */
4556 state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
46054557 reconsume = true;
4606 state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
46074558 break doctypeloop;
46084559 // continue stateloop;
46094560 }
48424793 if (folded != Tokenizer.UBLIC[index]) {
48434794 bogusDoctype();
48444795 // forceQuirks = true;
4796 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
48454797 reconsume = true;
4846 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
48474798 continue stateloop;
48484799 }
48494800 index++;
48504801 continue;
48514802 } else {
4803 state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
48524804 reconsume = true;
4853 state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
48544805 break doctypeublicloop;
48554806 // continue stateloop;
48564807 }
54385389 }
54395390 if (folded != Tokenizer.YSTEM[index]) {
54405391 bogusDoctype();
5392 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
54415393 reconsume = true;
5442 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
54435394 continue stateloop;
54445395 }
54455396 index++;
54465397 continue stateloop;
54475398 } else {
5399 state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
54485400 reconsume = true;
5449 state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
54505401 break doctypeystemloop;
54515402 // continue stateloop;
54525403 }
57505701 */
57515702 continue;
57525703 }
5753 }
5754 // XXX reorder point
5755 case PROCESSING_INSTRUCTION:
5756 processinginstructionloop: for (;;) {
5757 if (++pos == endPos) {
5758 break stateloop;
5759 }
5760 c = checkChar(buf, pos);
5761 switch (c) {
5762 case '?':
5763 state = transition(
5764 state,
5765 Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
5766 reconsume, pos);
5767 break processinginstructionloop;
5768 // continue stateloop;
5769 default:
5770 continue;
5771 }
5772 }
5773 case PROCESSING_INSTRUCTION_QUESTION_MARK:
5774 if (++pos == endPos) {
5775 break stateloop;
5776 }
5777 c = checkChar(buf, pos);
5778 switch (c) {
5779 case '>':
5780 state = transition(state, Tokenizer.DATA,
5781 reconsume, pos);
5782 continue stateloop;
5783 default:
5784 state = transition(state,
5785 Tokenizer.PROCESSING_INSTRUCTION,
5786 reconsume, pos);
5787 continue stateloop;
57885704 }
57895705 // END HOTSPOT WORKAROUND
57905706 }
67676683 attributeName = other.attributeName.cloneAttributeName(interner);
67686684 }
67696685
6770 Portability.delete(attributes);
6686 if (attributes != null) {
6687 Portability.delete(attributes);
6688 }
67716689 if (other.attributes == null) {
67726690 attributes = null;
67736691 } else {
5454 import nu.validator.htmlparser.common.XmlViolationPolicy;
5555
5656 import org.xml.sax.ErrorHandler;
57 import org.xml.sax.Locator;
5857 import org.xml.sax.SAXException;
5958 import org.xml.sax.SAXParseException;
6059
197196 final static int FONT = 64;
198197
199198 final static int KEYGEN = 65;
200
201 final static int MENUITEM = 66;
202199
203200 // start insertion modes
204201
342339 // [NOCPP[
343340
344341 private static final @Local String HTML_LOCAL = "html";
345
342
346343 // ]NOCPP]
347344
348345 private int mode = INITIAL;
364361
365362 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
366363
367 private LocatorImpl firstCommentLocation;
368
369364 // ]NOCPP]
370365
371366 private boolean scriptingEnabled = false;
480475 errorHandler.error(spe);
481476 }
482477
478 /**
479 * Reports a stray start tag.
480 * @param name the name of the stray tag
481 *
482 * @throws SAXException
483 */
484 private void errStrayStartTag(String name) throws SAXException {
485 err("Stray end tag \u201C" + name + "\u201D.");
486 }
487
488 /**
489 * Reports a stray end tag.
490 * @param name the name of the stray tag
491 *
492 * @throws SAXException
493 */
494 private void errStrayEndTag(String name) throws SAXException {
495 err("Stray end tag \u201C" + name + "\u201D.");
496 }
497
498 /**
499 * Reports a state when elements expected to be closed were not.
500 *
501 * @param eltPos the position of the start tag on the stack of the element
502 * being closed.
503 * @param name the name of the end tag
504 *
505 * @throws SAXException
506 */
507 private void errUnclosedElements(int eltPos, String name) throws SAXException {
508 errNoCheck("End tag \u201C" + name + "\u201D seen, but there were open elements.");
509 errListUnclosedStartTags(eltPos);
510 }
511
512 /**
513 * Reports a state when elements expected to be closed ahead of an implied
514 * end tag but were not.
515 *
516 * @param eltPos the position of the start tag on the stack of the element
517 * being closed.
518 * @param name the name of the end tag
519 *
520 * @throws SAXException
521 */
522 private void errUnclosedElementsImplied(int eltPos, String name) throws SAXException {
523 errNoCheck("End tag \u201C" + name + "\u201D implied, but there were open elements.");
524 errListUnclosedStartTags(eltPos);
525 }
526
527 /**
528 * Reports a state when elements expected to be closed ahead of an implied
529 * table cell close.
530 *
531 * @param eltPos the position of the start tag on the stack of the element
532 * being closed.
533 * @throws SAXException
534 */
535 private void errUnclosedElementsCell(int eltPos) throws SAXException {
536 errNoCheck("A table cell was implicitly closed, but there were open elements.");
537 errListUnclosedStartTags(eltPos);
538 }
539
483540 private void errListUnclosedStartTags(int eltPos) throws SAXException {
484541 if (currentPtr != -1) {
485542 for (int i = currentPtr; i > eltPos; i--) {
486543 reportUnclosedElementNameAndLocation(i);
487544 }
488545 }
546 }
547
548 /**
549 * Reports arriving at/near end of document with unclosed elements remaining.
550 *
551 * @param message
552 * the message
553 * @throws SAXException
554 */
555 private void errEndWithUnclosedElements(String message) throws SAXException {
556 if (errorHandler == null) {
557 return;
558 }
559 errNoCheck(message);
560 // just report all remaining unclosed elements
561 errListUnclosedStartTags(0);
489562 }
490563
491564 /**
520593 return;
521594 }
522595 SAXParseException spe = new SAXParseException(message, tokenizer);
523 errorHandler.warning(spe);
524 }
525
526 /**
527 * Reports a warning with an explicit locator
528 *
529 * @param message
530 * the message
531 * @throws SAXException
532 */
533 final void warn(String message, Locator locator) throws SAXException {
534 if (errorHandler == null) {
535 return;
536 }
537 SAXParseException spe = new SAXParseException(message, locator);
538596 errorHandler.warning(spe);
539597 }
540598
555613 html4 = false;
556614 idLocations.clear();
557615 wantingComments = wantsComments();
558 firstCommentLocation = null;
559616 // ]NOCPP]
560617 start(fragment);
561618 charBufferLen = 0;
595652 contextNode = null;
596653 } else {
597654 mode = INITIAL;
598 // If we are viewing XML source, put a foreign element permanently
599 // on the stack so that cdataSectionAllowed() returns true.
600 // CPPONLY: if (tokenizer.isViewingXmlSource()) {
601 // CPPONLY: T elt = createElement("http://www.w3.org/2000/svg",
602 // CPPONLY: "svg",
603 // CPPONLY: tokenizer.emptyAttributes());
604 // CPPONLY: StackNode<T> node = new StackNode<T>(ElementName.SVG,
605 // CPPONLY: "svg",
606 // CPPONLY: elt);
607 // CPPONLY: currentPtr++;
608 // CPPONLY: stack[currentPtr] = node;
609 // CPPONLY: }
610655 }
611656 }
612657
633678 // ]NOCPP]
634679 if (isQuirky(name, publicIdentifier,
635680 systemIdentifier, forceQuirks)) {
636 errQuirkyDoctype();
681 err("Quirky doctype. Expected \u201C<!DOCTYPE html>\u201D.");
637682 documentModeInternal(DocumentMode.QUIRKS_MODE,
638683 publicIdentifier, systemIdentifier,
639684 false);
640685 } else if (isAlmostStandards(publicIdentifier,
641686 systemIdentifier)) {
642 // [NOCPP[
643 if (firstCommentLocation != null) {
644 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
645 }
646 // ]NOCPP]
647 errAlmostStandardsDoctype();
687 err("Almost standards mode doctype. Expected \u201C<!DOCTYPE html>\u201D.");
648688 documentModeInternal(
649689 DocumentMode.ALMOST_STANDARDS_MODE,
650690 publicIdentifier, systemIdentifier,
651691 false);
652692 } else {
653693 // [NOCPP[
654 if (firstCommentLocation != null) {
655 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
656 }
657694 if ((Portability.literalEqualsString(
658695 "-//W3C//DTD HTML 4.0//EN",
659696 publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
700737 true);
701738 } else if (isAlmostStandards(publicIdentifier,
702739 systemIdentifier)) {
703 if (firstCommentLocation != null) {
704 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
705 }
706740 err("Almost standards mode doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
707741 documentModeInternal(
708742 DocumentMode.ALMOST_STANDARDS_MODE,
709743 publicIdentifier, systemIdentifier,
710744 true);
711745 } else {
712 if (firstCommentLocation != null) {
713 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
714 }
715746 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
716747 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
717748 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
736767 true);
737768 } else if (isAlmostStandards(publicIdentifier,
738769 systemIdentifier)) {
739 if (firstCommentLocation != null) {
740 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
741 }
742770 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)
743771 && systemIdentifier != null) {
744772 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
752780 publicIdentifier, systemIdentifier,
753781 true);
754782 } else {
755 if (firstCommentLocation != null) {
756 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
757 }
758783 err("The doctype was not the HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
759784 documentModeInternal(
760785 DocumentMode.STANDARDS_MODE,
775800 html4);
776801 } else if (isAlmostStandards(publicIdentifier,
777802 systemIdentifier)) {
778 if (firstCommentLocation != null) {
779 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
780 }
781803 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
782804 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
783805 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
790812 publicIdentifier, systemIdentifier,
791813 html4);
792814 } else {
793 if (firstCommentLocation != null) {
794 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
795 }
796815 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
797816 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
798817 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
844863 /*
845864 * A DOCTYPE token Parse error.
846865 */
847 errStrayDoctype();
866 err("Stray doctype.");
848867 /*
849868 * Ignore the token.
850869 */
868887 throws SAXException {
869888 needToDropLF = false;
870889 // [NOCPP[
871 if (firstCommentLocation == null) {
872 firstCommentLocation = new LocatorImpl(tokenizer);
873 }
874890 if (!wantingComments) {
875891 return;
876892 }
917933 */
918934 public final void characters(@Const @NoLength char[] buf, int start, int length)
919935 throws SAXException {
920 // Note: Can't attach error messages to EOF in C++ yet
921
922 // CPPONLY: if (tokenizer.isViewingXmlSource()) {
923 // CPPONLY: return;
924 // CPPONLY: }
925936 if (needToDropLF) {
926937 needToDropLF = false;
927938 if (buf[start] == '\n') {
938949 case IN_BODY:
939950 case IN_CELL:
940951 case IN_CAPTION:
941 if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
952 if (!isInForeign()) {
942953 reconstructTheActiveFormattingElements();
943954 }
944955 // fall through
9971008 * Reconstruct the active formatting
9981009 * elements, if any.
9991010 */
1000 if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
1011 if (!isInForeign()) {
10011012 flushCharacters();
10021013 reconstructTheActiveFormattingElements();
10031014 }
10521063 err("Non-space characters found without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
10531064 break;
10541065 case HTML:
1055 // XXX figure out a way to report this in the Gecko View Source case
10561066 err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
10571067 break;
10581068 case HTML401_STRICT:
11491159 * Parse error. Act as if an end tag with
11501160 * the tag name "noscript" had been seen
11511161 */
1152 errNonSpaceInNoscriptInHead();
1162 err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D.");
11531163 flushCharacters();
11541164 pop();
11551165 mode = IN_HEAD;
11941204 * Reconstruct the active formatting
11951205 * elements, if any.
11961206 */
1197 if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
1207 if (!isInForeign()) {
11981208 flushCharacters();
11991209 reconstructTheActiveFormattingElements();
12001210 }
12221232 * current token.
12231233 */
12241234 if (currentPtr == 0) {
1225 errNonSpaceInColgroupInFragment();
1235 err("Non-space in \u201Ccolgroup\u201D when parsing fragment.");
12261236 start = i + 1;
12271237 continue;
12281238 }
12351245 case IN_SELECT_IN_TABLE:
12361246 break charactersloop;
12371247 case AFTER_BODY:
1238 errNonSpaceAfterBody();
1248 err("Non-space character after body.");
12391249 fatal();
12401250 mode = framesetOk ? FRAMESET_OK : IN_BODY;
12411251 i--;
12491259 /*
12501260 * Parse error.
12511261 */
1252 errNonSpaceInFrameset();
1262 err("Non-space in \u201Cframeset\u201D.");
12531263 /*
12541264 * Ignore the token.
12551265 */
12641274 /*
12651275 * Parse error.
12661276 */
1267 errNonSpaceAfterFrameset();
1277 err("Non-space after \u201Cframeset\u201D.");
12681278 /*
12691279 * Ignore the token.
12701280 */
12741284 /*
12751285 * Parse error.
12761286 */
1277 errNonSpaceInTrailer();
1287 err("Non-space character in page trailer.");
12781288 /*
12791289 * Switch back to the main mode and
12801290 * reprocess the token.
12831293 i--;
12841294 continue;
12851295 case AFTER_AFTER_FRAMESET:
1286 errNonSpaceInTrailer();
1296 /*
1297 * Parse error.
1298 */
1299 err("Non-space character in page trailer.");
12871300 /*
12881301 * Switch back to the main mode and
12891302 * reprocess the token.
13091322 return;
13101323 }
13111324 if (currentPtr >= 0) {
1312 if (isSpecialParentInForeign(stack[currentPtr])) {
1325 StackNode<T> stackNode = stack[currentPtr];
1326 if (stackNode.ns == "http://www.w3.org/1999/xhtml") {
1327 return;
1328 }
1329 if (stackNode.isHtmlIntegrationPoint()) {
1330 return;
1331 }
1332 if (stackNode.ns == "http://www.w3.org/1998/Math/MathML"
1333 && stackNode.getGroup() == MI_MO_MN_MS_MTEXT) {
13131334 return;
13141335 }
13151336 accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
13181339
13191340 public final void eof() throws SAXException {
13201341 flushCharacters();
1321 // Note: Can't attach error messages to EOF in C++ yet
13221342 eofloop: for (;;) {
13231343 if (isInForeign()) {
1324 // [NOCPP[
13251344 err("End of file in a foreign namespace context.");
1326 // ]NOCPP]
13271345 break eofloop;
13281346 }
13291347 switch (mode) {
13811399 mode = IN_HEAD;
13821400 continue;
13831401 case IN_HEAD:
1384 // [NOCPP[
13851402 if (errorHandler != null && currentPtr > 1) {
1386 errEofWithUnclosedElements();
1387 }
1388 // ]NOCPP]
1403 errEndWithUnclosedElements("End of file seen and there were open elements.");
1404 }
13891405 while (currentPtr > 0) {
13901406 popOnEof();
13911407 }
13921408 mode = AFTER_HEAD;
13931409 continue;
13941410 case IN_HEAD_NOSCRIPT:
1395 // [NOCPP[
1396 errEofWithUnclosedElements();
1397 // ]NOCPP]
1411 errEndWithUnclosedElements("End of file seen and there were open elements.");
13981412 while (currentPtr > 1) {
13991413 popOnEof();
14001414 }
14301444 case HTML:
14311445 break;
14321446 default:
1433 errEofWithUnclosedElements();
1447 errEndWithUnclosedElements("End of file seen and there were open elements.");
14341448 break openelementloop;
14351449 }
14361450 }
14371451 // ]NOCPP]
14381452 break eofloop;
14391453 case TEXT:
1440 // [NOCPP[
14411454 if (errorHandler != null) {
14421455 errNoCheck("End of file seen when expecting text or an end tag.");
14431456 errListUnclosedStartTags(0);
14441457 }
1445 // ]NOCPP]
14461458 // XXX mark script as already executed
14471459 if (originalMode == AFTER_HEAD) {
14481460 popOnEof();
14561468 case IN_SELECT:
14571469 case IN_SELECT_IN_TABLE:
14581470 case IN_FRAMESET:
1459 // [NOCPP[
14601471 if (errorHandler != null && currentPtr > 0) {
1461 errEofWithUnclosedElements();
1462 }
1463 // ]NOCPP]
1472 errEndWithUnclosedElements("End of file seen and there were open elements.");
1473 }
14641474 break eofloop;
14651475 case AFTER_BODY:
14661476 case AFTER_FRAMESET:
15651575 case P:
15661576 case PRE_OR_LISTING:
15671577 case TABLE:
1568 errHtmlStartTagInForeignContext(name);
1578 err("HTML start tag \u201C"
1579 + name
1580 + "\u201D in a foreign namespace context.");
15691581 while (!isSpecialParentInForeign(stack[currentPtr])) {
15701582 pop();
15711583 }
15741586 if (attributes.contains(AttributeName.COLOR)
15751587 || attributes.contains(AttributeName.FACE)
15761588 || attributes.contains(AttributeName.SIZE)) {
1577 errHtmlStartTagInForeignContext(name);
1589 err("HTML start tag \u201C"
1590 + name
1591 + "\u201D in a foreign namespace context.");
15781592 while (!isSpecialParentInForeign(stack[currentPtr])) {
15791593 pop();
15801594 }
16221636 attributes = null; // CPP
16231637 break starttagloop;
16241638 case TD_OR_TH:
1625 errStartTagInTableBody(name);
1639 err("\u201C" + name
1640 + "\u201D start tag in table body.");
16261641 clearStackBackTo(findLastInTableScopeOrRootTbodyTheadTfoot());
16271642 appendToCurrentNodeAndPushElement(
16281643 ElementName.TR,
16651680 eltPos = findLastOrRoot(TreeBuilder.TR);
16661681 if (eltPos == 0) {
16671682 assert fragment;
1668 errNoTableRowToClose();
1683 err("No table row to close.");
16691684 break starttagloop;
16701685 }
16711686 clearStackBackTo(eltPos);
17191734 mode = IN_TABLE_BODY;
17201735 continue starttagloop;
17211736 case TABLE:
1722 errTableSeenWhileTableOpen();
1737 err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open.");
17231738 eltPos = findLastInTableScope(name);
17241739 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
17251740 assert fragment;
17281743 generateImpliedEndTags();
17291744 // XXX is the next if dead code?
17301745 if (errorHandler != null && !isCurrent("table")) {
1731 errNoCheckUnclosedElementsOnStack();
1746 errNoCheck("Unclosed elements on stack.");
17321747 }
17331748 while (currentPtr >= eltPos) {
17341749 pop();
17731788 break starttagloop;
17741789 case FORM:
17751790 if (formPointer != null) {
1776 errFormWhenFormOpen();
1791 err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag.");
17771792 break starttagloop;
17781793 } else {
1779 errStartTagInTable(name);
1794 err("Start tag \u201Cform\u201D seen in \u201Ctable\u201D.");
17801795 appendVoidFormToCurrent(attributes);
17811796 attributes = null; // CPP
17821797 break starttagloop;
17831798 }
17841799 default:
1785 errStartTagInTable(name);
1800 err("Start tag \u201C" + name
1801 + "\u201D seen in \u201Ctable\u201D.");
17861802 // fall through to IN_BODY
17871803 break intableloop;
17881804 }
18021818 }
18031819 generateImpliedEndTags();
18041820 if (errorHandler != null && currentPtr != eltPos) {
1805 errNoCheckUnclosedElementsOnStack();
1821 errNoCheck("Unclosed elements on stack.");
18061822 }
18071823 while (currentPtr >= eltPos) {
18081824 pop();
18231839 case TD_OR_TH:
18241840 eltPos = findLastInTableScopeTdTh();
18251841 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
1826 errNoCellToClose();
1842 err("No cell to close.");
18271843 break starttagloop;
18281844 } else {
18291845 closeTheCell(eltPos);
18411857 errStrayStartTag(name);
18421858 break starttagloop;
18431859 } else {
1844 errFramesetStart();
1860 err("\u201Cframeset\u201D start tag seen.");
18451861 detachFromParent(stack[1].node);
18461862 while (currentPtr > 0) {
18471863 pop();
19121928 errStrayStartTag(name);
19131929 break starttagloop;
19141930 }
1915 errFooSeenWhenFooOpen(name);
1931 err("\u201Cbody\u201D start tag found but the \u201Cbody\u201D element is already open.");
19161932 framesetOk = false;
19171933 if (mode == FRAMESET_OK) {
19181934 mode = IN_BODY;
19341950 case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
19351951 implicitlyCloseP();
19361952 if (stack[currentPtr].getGroup() == H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) {
1937 errHeadingWhenHeadingOpen();
1953 err("Heading cannot be a child of another heading.");
19381954 pop();
19391955 }
19401956 appendToCurrentNodeAndPushElementMayFoster(
19591975 break starttagloop;
19601976 case FORM:
19611977 if (formPointer != null) {
1962 errFormWhenFormOpen();
1978 err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag.");
19631979 break starttagloop;
19641980 } else {
19651981 implicitlyCloseP();
20102026 case A:
20112027 int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a");
20122028 if (activeAPos != -1) {
2013 errFooSeenWhenFooOpen(name);
2029 err("An \u201Ca\u201D start tag seen with already an active \u201Ca\u201D element.");
20142030 StackNode<T> activeA = listOfActiveFormattingElements[activeAPos];
20152031 activeA.retain();
20162032 adoptionAgencyEndTag("a");
20392055 case NOBR:
20402056 reconstructTheActiveFormattingElements();
20412057 if (TreeBuilder.NOT_FOUND_ON_STACK != findLastInScope("nobr")) {
2042 errFooSeenWhenFooOpen(name);
2058 err("\u201Cnobr\u201D start tag seen when there was an open \u201Cnobr\u201D element in scope.");
20432059 adoptionAgencyEndTag("nobr");
20442060 reconstructTheActiveFormattingElements();
20452061 }
20512067 case BUTTON:
20522068 eltPos = findLastInScope(name);
20532069 if (eltPos != TreeBuilder.NOT_FOUND_ON_STACK) {
2054 errFooSeenWhenFooOpen(name);
2070 err("\u201Cbutton\u201D start tag seen when there was an open \u201Cbutton\u201D element in scope.");
2071
20552072 generateImpliedEndTags();
20562073 if (errorHandler != null
20572074 && !isCurrent(name)) {
21022119 case AREA_OR_WBR:
21032120 reconstructTheActiveFormattingElements();
21042121 // FALL THROUGH to PARAM_OR_SOURCE_OR_TRACK
2105 // CPPONLY: case MENUITEM:
21062122 case PARAM_OR_SOURCE_OR_TRACK:
21072123 appendVoidElementToCurrentMayFoster(
21082124 elementName,
21192135 attributes = null; // CPP
21202136 break starttagloop;
21212137 case IMAGE:
2122 errImage();
2138 err("Saw a start tag \u201Cimage\u201D.");
21232139 elementName = ElementName.IMG;
21242140 continue starttagloop;
21252141 case KEYGEN:
21322148 attributes = null; // CPP
21332149 break starttagloop;
21342150 case ISINDEX:
2135 errIsindex();
2151 err("\u201Cisindex\u201D seen.");
21362152 if (formPointer != null) {
21372153 break starttagloop;
21382154 }
22812297 attributes = null; // CPP
22822298 break starttagloop;
22832299 case RT_OR_RP:
2300 /*
2301 * If the stack of open elements has a ruby
2302 * element in scope, then generate implied end
2303 * tags. If the current node is not then a ruby
2304 * element, this is a parse error; pop all the
2305 * nodes from the current node up to the node
2306 * immediately before the bottommost ruby
2307 * element on the stack of open elements.
2308 *
2309 * Insert an HTML element for the token.
2310 */
22842311 eltPos = findLastInScope("ruby");
22852312 if (eltPos != NOT_FOUND_ON_STACK) {
22862313 generateImpliedEndTags();
22872314 }
22882315 if (eltPos != currentPtr) {
2289 if (eltPos != NOT_FOUND_ON_STACK) {
2290 errStartTagSeenWithoutRuby(name);
2291 } else {
2292 errUnclosedChildrenInRuby();
2316 if (errorHandler != null) {
2317 if (eltPos != NOT_FOUND_ON_STACK) {
2318
2319 errNoCheck("Start tag \u201C"
2320 + name
2321 + "\u201D seen without a \u201Cruby\u201D element being open.");
2322 } else {
2323 errNoCheck("Unclosed children in \u201Cruby\u201D.");
2324 }
2325 }
2326 while (currentPtr > eltPos) {
2327 pop();
22932328 }
22942329 }
22952330 appendToCurrentNodeAndPushElementMayFoster(
24272462 break starttagloop;
24282463 case HEAD:
24292464 /* Parse error. */
2430 errFooSeenWhenFooOpen(name);
2465 err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open.");
24312466 /* Ignore the token. */
24322467 break starttagloop;
24332468 default:
24742509 attributes = null; // CPP
24752510 break starttagloop;
24762511 case HEAD:
2477 errFooSeenWhenFooOpen(name);
2512 err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open.");
24782513 break starttagloop;
24792514 case NOSCRIPT:
2480 errFooSeenWhenFooOpen(name);
2515 err("Start tag for \u201Cnoscript\u201D seen when \u201Cnoscript\u201D was already open.");
24812516 break starttagloop;
24822517 default:
2483 errBadStartTagInHead(name);
2518 err("Bad start tag in \u201C" + name
2519 + "\u201D in \u201Chead\u201D.");
24842520 pop();
24852521 mode = IN_HEAD;
24862522 continue;
25042540 default:
25052541 if (currentPtr == 0) {
25062542 assert fragment;
2507 errGarbageInColgroup();
2543 err("Garbage in \u201Ccolgroup\u201D fragment.");
25082544 break starttagloop;
25092545 }
25102546 pop();
25182554 case TR:
25192555 case TD_OR_TH:
25202556 case TABLE:
2521 errStartTagWithSelectOpen(name);
2557 err("\u201C"
2558 + name
2559 + "\u201D start tag with \u201Cselect\u201D open.");
25222560 eltPos = findLastInTableScope("select");
25232561 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
25242562 assert fragment;
25632601 attributes = null; // CPP
25642602 break starttagloop;
25652603 case SELECT:
2566 errStartSelectWhereEndSelectExpected();
2604 err("\u201Cselect\u201D start tag where end tag expected.");
25672605 eltPos = findLastInTableScope(name);
25682606 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
25692607 assert fragment;
2570 errNoSelectInTableScope();
2608 err("No \u201Cselect\u201D in table scope.");
25712609 break starttagloop;
25722610 } else {
25732611 while (currentPtr >= eltPos) {
25792617 case INPUT:
25802618 case TEXTAREA:
25812619 case KEYGEN:
2582 errStartTagWithSelectOpen(name);
2620 err("\u201C"
2621 + name
2622 + "\u201D start tag seen in \u201Cselect\2201D.");
25832623 eltPos = findLastInTableScope("select");
25842624 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
25852625 assert fragment;
26732713 err("Start tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
26742714 break;
26752715 case HTML:
2676 // ]NOCPP]
2677 errStartTagWithoutDoctype();
2678 // [NOCPP[
2716 err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
26792717 break;
26802718 case HTML401_STRICT:
26812719 err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
28112849 attributes = null; // CPP
28122850 break starttagloop;
28132851 case BASE:
2814 errFooBetweenHeadAndBody(name);
2852 err("\u201Cbase\u201D element outside \u201Chead\u201D.");
28152853 pushHeadPointerOntoStack();
28162854 appendVoidElementToCurrentMayFoster(
28172855 elementName,
28212859 attributes = null; // CPP
28222860 break starttagloop;
28232861 case LINK_OR_BASEFONT_OR_BGSOUND:
2824 errFooBetweenHeadAndBody(name);
2862 err("\u201Clink\u201D element outside \u201Chead\u201D.");
28252863 pushHeadPointerOntoStack();
28262864 appendVoidElementToCurrentMayFoster(
28272865 elementName,
28312869 attributes = null; // CPP
28322870 break starttagloop;
28332871 case META:
2834 errFooBetweenHeadAndBody(name);
2872 err("\u201Cmeta\u201D element outside \u201Chead\u201D.");
28352873 checkMetaCharset(attributes);
28362874 pushHeadPointerOntoStack();
28372875 appendVoidElementToCurrentMayFoster(
28422880 attributes = null; // CPP
28432881 break starttagloop;
28442882 case SCRIPT:
2845 errFooBetweenHeadAndBody(name);
2883 err("\u201Cscript\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
28462884 pushHeadPointerOntoStack();
28472885 appendToCurrentNodeAndPushElement(
28482886 elementName,
28552893 break starttagloop;
28562894 case STYLE:
28572895 case NOFRAMES:
2858 errFooBetweenHeadAndBody(name);
2896 err("\u201C"
2897 + name
2898 + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
28592899 pushHeadPointerOntoStack();
28602900 appendToCurrentNodeAndPushElement(
28612901 elementName,
28672907 attributes = null; // CPP
28682908 break starttagloop;
28692909 case TITLE:
2870 errFooBetweenHeadAndBody(name);
2910 err("\u201Ctitle\u201D element outside \u201Chead\u201D.");
28712911 pushHeadPointerOntoStack();
28722912 appendToCurrentNodeAndPushElement(
28732913 elementName,
29302970 // fails
29312971 }
29322972 }
2933 if (selfClosing) {
2934 errSelfClosing();
2973 if (errorHandler != null && selfClosing) {
2974 errNoCheck("Self-closing syntax (\u201C/>\u201D) used on a non-void HTML element. Ignoring the slash and treating as a start tag.");
29352975 }
29362976 if (attributes != HtmlAttributes.EMPTY_ATTRIBUTES) {
29372977 Portability.delete(attributes);
31473187 @Local String name = elementName.name;
31483188 endtagloop: for (;;) {
31493189 if (isInForeign()) {
3150 if (stack[currentPtr].name != name) {
3151 errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName);
3190 if (errorHandler != null && stack[currentPtr].name != name) {
3191 errNoCheck("End tag \u201C"
3192 + name
3193 + "\u201D did not match the name of the current open element (\u201C"
3194 + stack[currentPtr].popName + "\u201D).");
31523195 }
31533196 eltPos = currentPtr;
31543197 for (;;) {
31703213 eltPos = findLastOrRoot(TreeBuilder.TR);
31713214 if (eltPos == 0) {
31723215 assert fragment;
3173 errNoTableRowToClose();
3216 err("No table row to close.");
31743217 break endtagloop;
31753218 }
31763219 clearStackBackTo(eltPos);
31813224 eltPos = findLastOrRoot(TreeBuilder.TR);
31823225 if (eltPos == 0) {
31833226 assert fragment;
3184 errNoTableRowToClose();
3227 err("No table row to close.");
31853228 break endtagloop;
31863229 }
31873230 clearStackBackTo(eltPos);
31963239 eltPos = findLastOrRoot(TreeBuilder.TR);
31973240 if (eltPos == 0) {
31983241 assert fragment;
3199 errNoTableRowToClose();
3242 err("No table row to close.");
32003243 break endtagloop;
32013244 }
32023245 clearStackBackTo(eltPos);
32953338 mode = IN_TABLE;
32963339 break endtagloop;
32973340 case TABLE:
3298 errTableClosedWhileCaptionOpen();
3341 err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open.");
32993342 eltPos = findLastInTableScope("caption");
33003343 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
33013344 break endtagloop;
33823425 case TBODY_OR_THEAD_OR_TFOOT:
33833426 break;
33843427 default:
3385 errEndWithUnclosedElements(name);
3428 errEndWithUnclosedElements("End tag for \u201Cbody\u201D seen but there were unclosed elements.");
33863429 break uncloseloop1;
33873430 }
33883431 }
34073450 case HTML:
34083451 break;
34093452 default:
3410 errEndWithUnclosedElements(name);
3453 errEndWithUnclosedElements("End tag for \u201Chtml\u201D seen but there were unclosed elements.");
34113454 break uncloseloop2;
34123455 }
34133456 }
34533496 case P:
34543497 eltPos = findLastInButtonScope("p");
34553498 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3456 errNoElementToCloseButEndTagSeen("p");
3499 err("No \u201Cp\u201D element in scope but a \u201Cp\u201D end tag seen.");
34573500 // XXX Can the 'in foreign' case happen anymore?
34583501 if (isInForeign()) {
3459 errHtmlStartTagInForeignContext(name);
3502 err("HTML start tag \u201C"
3503 + name
3504 + "\u201D in a foreign namespace context.");
34603505 while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
34613506 pop();
34623507 }
34783523 case LI:
34793524 eltPos = findLastInListScope(name);
34803525 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3481 errNoElementToCloseButEndTagSeen(name);
3526 err("No \u201Cli\u201D element in list scope but a \u201Cli\u201D end tag seen.");
34823527 } else {
34833528 generateImpliedEndTagsExceptFor(name);
34843529 if (errorHandler != null
34933538 case DD_OR_DT:
34943539 eltPos = findLastInScope(name);
34953540 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3496 errNoElementToCloseButEndTagSeen(name);
3541 err("No \u201C"
3542 + name
3543 + "\u201D element in scope but a \u201C"
3544 + name + "\u201D end tag seen.");
34973545 } else {
34983546 generateImpliedEndTagsExceptFor(name);
34993547 if (errorHandler != null
35363584 }
35373585 break endtagloop;
35383586 case BR:
3539 errEndTagBr();
3587 err("End tag \u201Cbr\u201D.");
35403588 if (isInForeign()) {
3541 errHtmlStartTagInForeignContext(name);
3589 err("HTML start tag \u201C"
3590 + name
3591 + "\u201D in a foreign namespace context.");
35423592 while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
35433593 pop();
35443594 }
35493599 HtmlAttributes.EMPTY_ATTRIBUTES);
35503600 break endtagloop;
35513601 case AREA_OR_WBR:
3552 // CPPONLY: case MENUITEM:
35533602 case PARAM_OR_SOURCE_OR_TRACK:
35543603 case EMBED_OR_IMG:
35553604 case IMAGE:
36113660 case COLGROUP:
36123661 if (currentPtr == 0) {
36133662 assert fragment;
3614 errGarbageInColgroup();
3663 err("Garbage in \u201Ccolgroup\u201D fragment.");
36153664 break endtagloop;
36163665 }
36173666 pop();
36233672 default:
36243673 if (currentPtr == 0) {
36253674 assert fragment;
3626 errGarbageInColgroup();
3675 err("Garbage in \u201Ccolgroup\u201D fragment.");
36273676 break endtagloop;
36283677 }
36293678 pop();
36373686 case TBODY_OR_THEAD_OR_TFOOT:
36383687 case TR:
36393688 case TD_OR_TH:
3640 errEndTagSeenWithSelectOpen(name);
3689 err("\u201C"
3690 + name
3691 + "\u201D end tag with \u201Cselect\u201D open.");
36413692 if (findLastInTableScope(name) != TreeBuilder.NOT_FOUND_ON_STACK) {
36423693 eltPos = findLastInTableScope("select");
36433694 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
37033754 break endtagloop;
37043755 }
37053756 default:
3706 errEndTagAfterBody();
3757 err("Saw an end tag after \u201Cbody\u201D had been closed.");
37073758 mode = framesetOk ? FRAMESET_OK : IN_BODY;
37083759 continue;
37093760 }
37433794 err("End tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
37443795 break;
37453796 case HTML:
3746 // ]NOCPP]
3747 errEndTagSeenWithoutDoctype();
3748 // [NOCPP[
3797 err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
37493798 break;
37503799 case HTML401_STRICT:
37513800 err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
43314380 formattingEltStackPos--;
43324381 }
43334382 if (formattingEltStackPos == -1) {
4334 errNoElementToCloseButEndTagSeen(name);
4383 err("No element \u201C" + name + "\u201D to close.");
43354384 removeFromListOfActiveFormattingElements(formattingEltListPos);
43364385 return true;
43374386 }
43384387 if (!inScope) {
4339 errNoElementToCloseButEndTagSeen(name);
4388 err("No element \u201C" + name + "\u201D to close.");
43404389 return true;
43414390 }
43424391 // stackPos now points to the formatting element and it is in scope
4343 if (formattingEltStackPos != currentPtr) {
4344 errEndTagViolatesNestingRules(name);
4392 if (errorHandler != null && formattingEltStackPos != currentPtr) {
4393 errNoCheck("End tag \u201C" + name + "\u201D violates nesting rules.");
43454394 }
43464395 int furthestBlockPos = formattingEltStackPos + 1;
43474396 while (furthestBlockPos <= currentPtr) {
47164765 case ALTER_INFOSET:
47174766 // fall through
47184767 case ALLOW:
4719 warn("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
4768 warn("Attribute \u201Cxmlns:xlink\u201D with the value \u201Chttp://www.w3org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
47204769 break;
47214770 case FATAL:
4722 fatal("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
4771 fatal("Attribute \u201Cxmlns:xlink\u201D with the value \u201Chttp://www.w3org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
47234772 break;
47244773 }
47254774 }
52755324 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml";
52765325 }
52775326
5278 private boolean isInForeignButNotHtmlOrMathTextIntegrationPoint() {
5279 if (currentPtr < 0) {
5280 return false;
5281 }
5282 return !isSpecialParentInForeign(stack[currentPtr]);
5283 }
5284
52855327 /**
52865328 * The argument MUST be an interned string or <code>null</code>.
52875329 *
53655407 if (charBufferLen > 0) {
53665408 if ((mode == IN_TABLE || mode == IN_TABLE_BODY || mode == IN_ROW)
53675409 && charBufferContainsNonWhitespace()) {
5368 errNonSpaceInTable();
5410 err("Misplaced non-space characters insided a table.");
53695411 reconstructTheActiveFormattingElements();
53705412 if (!stack[currentPtr].isFosterParenting()) {
53715413 // reconstructing gave us a new current node
56725714 return currentPtr + 1;
56735715 }
56745716
5675 /**
5676 * Reports a stray start tag.
5677 * @param name the name of the stray tag
5678 *
5679 * @throws SAXException
5680 */
5681 private void errStrayStartTag(@Local String name) throws SAXException {
5682 err("Stray end tag \u201C" + name + "\u201D.");
5683 }
5684
5685 /**
5686 * Reports a stray end tag.
5687 * @param name the name of the stray tag
5688 *
5689 * @throws SAXException
5690 */
5691 private void errStrayEndTag(@Local String name) throws SAXException {
5692 err("Stray end tag \u201C" + name + "\u201D.");
5693 }
5694
5695 /**
5696 * Reports a state when elements expected to be closed were not.
5697 *
5698 * @param eltPos the position of the start tag on the stack of the element
5699 * being closed.
5700 * @param name the name of the end tag
5701 *
5702 * @throws SAXException
5703 */
5704 private void errUnclosedElements(int eltPos, @Local String name) throws SAXException {
5705 errNoCheck("End tag \u201C" + name + "\u201D seen, but there were open elements.");
5706 errListUnclosedStartTags(eltPos);
5707 }
5708
5709 /**
5710 * Reports a state when elements expected to be closed ahead of an implied
5711 * end tag but were not.
5712 *
5713 * @param eltPos the position of the start tag on the stack of the element
5714 * being closed.
5715 * @param name the name of the end tag
5716 *
5717 * @throws SAXException
5718 */
5719 private void errUnclosedElementsImplied(int eltPos, String name) throws SAXException {
5720 errNoCheck("End tag \u201C" + name + "\u201D implied, but there were open elements.");
5721 errListUnclosedStartTags(eltPos);
5722 }
5723
5724 /**
5725 * Reports a state when elements expected to be closed ahead of an implied
5726 * table cell close.
5727 *
5728 * @param eltPos the position of the start tag on the stack of the element
5729 * being closed.
5730 * @throws SAXException
5731 */
5732 private void errUnclosedElementsCell(int eltPos) throws SAXException {
5733 errNoCheck("A table cell was implicitly closed, but there were open elements.");
5734 errListUnclosedStartTags(eltPos);
5735 }
5736
5737 private void errStrayDoctype() throws SAXException {
5738 err("Stray doctype.");
5739 }
5740
5741 private void errAlmostStandardsDoctype() throws SAXException {
5742 err("Almost standards mode doctype. Expected \u201C<!DOCTYPE html>\u201D.");
5743 }
5744
5745 private void errQuirkyDoctype() throws SAXException {
5746 err("Quirky doctype. Expected \u201C<!DOCTYPE html>\u201D.");
5747 }
5748
5749 private void errNonSpaceInTrailer() throws SAXException {
5750 err("Non-space character in page trailer.");
5751 }
5752
5753 private void errNonSpaceAfterFrameset() throws SAXException {
5754 err("Non-space after \u201Cframeset\u201D.");
5755 }
5756
5757 private void errNonSpaceInFrameset() throws SAXException {
5758 err("Non-space in \u201Cframeset\u201D.");
5759 }
5760
5761 private void errNonSpaceAfterBody() throws SAXException {
5762 err("Non-space character after body.");
5763 }
5764
5765 private void errNonSpaceInColgroupInFragment() throws SAXException {
5766 err("Non-space in \u201Ccolgroup\u201D when parsing fragment.");
5767 }
5768
5769 private void errNonSpaceInNoscriptInHead() throws SAXException {
5770 err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D.");
5771 }
5772
5773 private void errFooBetweenHeadAndBody(@Local String name) throws SAXException {
5774 if (errorHandler == null) {
5775 return;
5776 }
5777 errNoCheck("\u201C" + name + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
5778 }
5779
5780 private void errStartTagWithoutDoctype() throws SAXException {
5781 err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
5782 }
5783
5784 private void errNoSelectInTableScope() throws SAXException {
5785 err("No \u201Cselect\u201D in table scope.");
5786 }
5787
5788 private void errStartSelectWhereEndSelectExpected() throws SAXException {
5789 err("\u201Cselect\u201D start tag where end tag expected.");
5790 }
5791
5792 private void errStartTagWithSelectOpen(@Local String name)
5793 throws SAXException {
5794 if (errorHandler == null) {
5795 return;
5796 }
5797 errNoCheck("\u201C" + name
5798 + "\u201D start tag with \u201Cselect\u201D open.");
5799 }
5800
5801 private void errBadStartTagInHead(@Local String name) throws SAXException {
5802 if (errorHandler == null) {
5803 return;
5804 }
5805 errNoCheck("Bad start tag in \u201C" + name
5806 + "\u201D in \u201Chead\u201D.");
5807 }
5808
5809 private void errImage() throws SAXException {
5810 err("Saw a start tag \u201Cimage\u201D.");
5811 }
5812
5813 private void errIsindex() throws SAXException {
5814 err("\u201Cisindex\u201D seen.");
5815 }
5816
5817 private void errFooSeenWhenFooOpen(@Local String name) throws SAXException {
5818 if (errorHandler == null) {
5819 return;
5820 }
5821 errNoCheck("An \u201C" + name + "\u201D start tag seen but an element of the same type was already open.");
5822 }
5823
5824 private void errHeadingWhenHeadingOpen() throws SAXException {
5825 err("Heading cannot be a child of another heading.");
5826 }
5827
5828 private void errFramesetStart() throws SAXException {
5829 err("\u201Cframeset\u201D start tag seen.");
5830 }
5831
5832 private void errNoCellToClose() throws SAXException {
5833 err("No cell to close.");
5834 }
5835
5836 private void errStartTagInTable(@Local String name) throws SAXException {
5837 if (errorHandler == null) {
5838 return;
5839 }
5840 errNoCheck("Start tag \u201C" + name
5841 + "\u201D seen in \u201Ctable\u201D.");
5842 }
5843
5844 private void errFormWhenFormOpen() throws SAXException {
5845 err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag.");
5846 }
5847
5848 private void errTableSeenWhileTableOpen() throws SAXException {
5849 err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open.");
5850 }
5851
5852 private void errStartTagInTableBody(@Local String name) throws SAXException {
5853 if (errorHandler == null) {
5854 return;
5855 }
5856 errNoCheck("\u201C" + name + "\u201D start tag in table body.");
5857 }
5858
5859 private void errEndTagSeenWithoutDoctype() throws SAXException {
5860 err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
5861 }
5862
5863 private void errEndTagAfterBody() throws SAXException {
5864 err("Saw an end tag after \u201Cbody\u201D had been closed.");
5865 }
5866
5867 private void errEndTagSeenWithSelectOpen(@Local String name) throws SAXException {
5868 if (errorHandler == null) {
5869 return;
5870 }
5871 errNoCheck("\u201C" + name
5872 + "\u201D end tag with \u201Cselect\u201D open.");
5873 }
5874
5875 private void errGarbageInColgroup() throws SAXException {
5876 err("Garbage in \u201Ccolgroup\u201D fragment.");
5877 }
5878
5879 private void errEndTagBr() throws SAXException {
5880 err("End tag \u201Cbr\u201D.");
5881 }
5882
5883 private void errNoElementToCloseButEndTagSeen(@Local String name)
5884 throws SAXException {
5885 if (errorHandler == null) {
5886 return;
5887 }
5888 errNoCheck("No \u201C" + name + "\u201D element in scope but a \u201C"
5889 + name + "\u201D end tag seen.");
5890 }
5891
5892 private void errHtmlStartTagInForeignContext(@Local String name)
5893 throws SAXException {
5894 if (errorHandler == null) {
5895 return;
5896 }
5897 errNoCheck("HTML start tag \u201C" + name
5898 + "\u201D in a foreign namespace context.");
5899 }
5900
5901 private void errTableClosedWhileCaptionOpen() throws SAXException {
5902 err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open.");
5903 }
5904
5905 private void errNoTableRowToClose() throws SAXException {
5906 err("No table row to close.");
5907 }
5908
5909 private void errNonSpaceInTable() throws SAXException {
5910 err("Misplaced non-space characters insided a table.");
5911 }
5912
5913 private void errUnclosedChildrenInRuby() throws SAXException {
5914 if (errorHandler == null) {
5915 return;
5916 }
5917 errNoCheck("Unclosed children in \u201Cruby\u201D.");
5918 }
5919
5920 private void errStartTagSeenWithoutRuby(@Local String name) throws SAXException {
5921 if (errorHandler == null) {
5922 return;
5923 }
5924 errNoCheck("Start tag \u201C"
5925 + name
5926 + "\u201D seen without a \u201Cruby\u201D element being open.");
5927 }
5928
5929 private void errSelfClosing() throws SAXException {
5930 if (errorHandler == null) {
5931 return;
5932 }
5933 errNoCheck("Self-closing syntax (\u201C/>\u201D) used on a non-void HTML element. Ignoring the slash and treating as a start tag.");
5934 }
5935
5936 private void errNoCheckUnclosedElementsOnStack() throws SAXException {
5937 errNoCheck("Unclosed elements on stack.");
5938 }
5939
5940 private void errEndTagDidNotMatchCurrentOpenElement(@Local String name,
5941 @Local String currOpenName) throws SAXException {
5942 if (errorHandler == null) {
5943 return;
5944 }
5945 errNoCheck("End tag \u201C"
5946 + name
5947 + "\u201D did not match the name of the current open element (\u201C"
5948 + currOpenName + "\u201D).");
5949 }
5950
5951 private void errEndTagViolatesNestingRules(@Local String name) throws SAXException {
5952 if (errorHandler == null) {
5953 return;
5954 }
5955 errNoCheck("End tag \u201C" + name + "\u201D violates nesting rules.");
5956 }
5957
5958 private void errEofWithUnclosedElements() throws SAXException {
5959 if (errorHandler == null) {
5960 return;
5961 }
5962 errNoCheck("End of file seen and there were open elements.");
5963 // just report all remaining unclosed elements
5964 errListUnclosedStartTags(0);
5965 }
5966
5967 /**
5968 * Reports arriving at/near end of document with unclosed elements remaining.
5969 *
5970 * @param message
5971 * the message
5972 * @throws SAXException
5973 */
5974 private void errEndWithUnclosedElements(@Local String name) throws SAXException {
5975 if (errorHandler == null) {
5976 return;
5977 }
5978 errNoCheck("End tag for \u201C"
5979 + name
5980 + "\u201D seen, but there were unclosed elements.");
5981 // just report all remaining unclosed elements
5982 errListUnclosedStartTags(0);
5983 }
59845717 }
5050 */
5151 private int end;
5252
53 //[NOCPP[
54
5553 /**
5654 * Constructor for wrapping an existing UTF-16 code unit array.
5755 *
6866 this.end = end;
6967 }
7068
71 // ]NOCPP]
72
7369 /**
7470 * Returns the start index.
7571 *
281281 } else {
282282 // The usual stuff. Want more bytes next time.
283283 shouldReadBytes = true;
284 // return -1 if zero
284285 int cPos = charBuffer.position();
285 if (cPos == 0) {
286 // No output. Read more bytes right away
287 break;
288 }
289 return cPos;
286 return cPos == 0 ? -1 : cPos;
290287 }
291288 } else {
292289 // The result is in error. No need to test.
154154 "xlink");
155155 WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
156156 "http://www.w3.org/2001/XMLSchema-instance", "xsi");
157 WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink",
157 WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3org/1999/xlink",
158158 "xlink");
159159 }
160160
100100 "nsINameSpaceManager", "nsIContent", "nsIDocument",
101101 "nsTraceRefcnt", "jArray", "nsHtml5DocumentMode",
102102 "nsHtml5ArrayCopy", "nsHtml5Parser", "nsHtml5Atoms",
103 "nsHtml5TreeOperation", "nsHtml5PendingNotification",
104 "nsHtml5StateSnapshot", "nsHtml5StackNode",
105 "nsHtml5TreeOpExecutor", "nsHtml5StreamParser",
106 "nsAHtml5TreeBuilderState", "nsHtml5Highlighter",
107 "nsHtml5ViewSourceUtils" };
108
109 private static final String[] TOKENIZER_INCLUDES = { "prtypes", "nsIAtom",
110 "nsHtml5AtomTable", "nsString", "nsIContent", "nsTraceRefcnt",
111 "jArray", "nsHtml5DocumentMode", "nsHtml5ArrayCopy",
112 "nsHtml5NamedCharacters", "nsHtml5NamedCharactersAccel",
113 "nsHtml5Atoms", "nsAHtml5TreeBuilderState", "nsHtml5Macros",
114 "nsHtml5Highlighter", "nsHtml5TokenizerLoopPolicies" };
103 "nsHtml5ByteReadable", "nsHtml5TreeOperation",
104 "nsHtml5PendingNotification", "nsHtml5StateSnapshot",
105 "nsHtml5StackNode", "nsHtml5TreeOpExecutor", "nsHtml5StreamParser",
106 "nsAHtml5TreeBuilderState" };
115107
116108 private static final String[] INCLUDES = { "prtypes", "nsIAtom",
117109 "nsHtml5AtomTable", "nsString", "nsINameSpaceManager",
118 "nsIContent", "nsTraceRefcnt", "jArray", "nsHtml5ArrayCopy",
119 "nsAHtml5TreeBuilderState", "nsHtml5Atoms", "nsHtml5ByteReadable",
120 "nsIUnicodeDecoder", "nsHtml5Macros" };
110 "nsIContent", "nsIDocument", "nsTraceRefcnt", "jArray",
111 "nsHtml5DocumentMode", "nsHtml5ArrayCopy",
112 "nsHtml5NamedCharacters", "nsHtml5NamedCharactersAccel",
113 "nsHtml5Atoms", "nsHtml5ByteReadable", "nsIUnicodeDecoder",
114 "nsAHtml5TreeBuilderState", "nsHtml5Macros" };
121115
122116 private static final String[] OTHER_DECLATIONS = {};
123117
126120 private static final String[] NAMED_CHARACTERS_INCLUDES = { "prtypes",
127121 "jArray", "nscore", "nsDebug", "prlog", "nsMemory" };
128122
129 private static final String[] FORWARD_DECLARATIONS = {
130 "nsHtml5StreamParser" };
123 private static final String[] FORWARD_DECLARATIONS = { "nsHtml5StreamParser", };
131124
132125 private static final String[] CLASSES_THAT_NEED_SUPPLEMENT = {
133 "MetaScanner", "Tokenizer", "TreeBuilder", "UTF16Buffer", };
134
135 private static final String[] STATE_LOOP_POLICIES = {
136 "nsHtml5ViewSourcePolicy", "nsHtml5SilentPolicy" };
126 "MetaScanner", "TreeBuilder", "UTF16Buffer", };
137127
138128 private final Map<String, String> atomMap = new HashMap<String, String>();
139129
169159 }
170160
171161 public String booleanType() {
172 return "bool";
162 return "PRBool";
173163 }
174164
175165 public String byteType() {
209199 }
210200
211201 public String falseLiteral() {
212 return "false";
202 return "PR_FALSE";
213203 }
214204
215205 public String trueLiteral() {
216 return "true";
206 return "PR_TRUE";
217207 }
218208
219209 public String nullLiteral() {
308298 public String[] boilerplateIncludes(String javaClass) {
309299 if ("TreeBuilder".equals(javaClass)) {
310300 return TREE_BUILDER_INCLUDES;
311 } else if ("Tokenizer".equals(javaClass)) {
312 return TOKENIZER_INCLUDES;
313301 } else {
314302 return INCLUDES;
315303 }
402390 public String characterNameTypeDeclaration() {
403391 return "nsHtml5CharacterName";
404392 }
405
406 public String transition() {
407 return "P::transition";
408 }
409
410 public String tokenizerErrorCondition() {
411 return "P::reportErrors";
412 }
413
414 public String firstTransitionArg() {
415 return "mViewSource";
416 }
417
418 public String errorHandler() {
419 return "NS_UNLIKELY(mViewSource)";
420 }
421
422 public String completedCharacterReference() {
423 return "P::completedNamedCharacterReference(mViewSource)";
424 }
425
426 public String[] stateLoopPolicies() {
427 return STATE_LOOP_POLICIES;
428 }
429393 }
150150 }
151151 }
152152
153 public void printWithoutIndent(String arg) {
154 indented = false;
155 buf.append(arg);
156 }
157
158153 public void print(String arg) {
159154 if (!indented) {
160155 makeIndent();
182177 }
183178 }
184179
185 private boolean supportErrorReporting = true;
180 private boolean skipRestOfStatementsInBlock = false;
186181
182 private String currentTokenizerState = null;
183
184 private boolean inTokenizerLoop() {
185 return "stateLoop".equals(currentMethod)
186 && "Tokenizer".equals(javaClassName);
187 }
188
187189 protected SourcePrinter printer = new SourcePrinter();
188190
189191 private SourcePrinter staticInitializerPrinter = new SourcePrinter();
219221 private boolean destructor;
220222
221223 protected boolean inStatic = false;
222
223 private boolean reportTransitions = false;
224
225 private int stateLoopCallCount = 0;
226224
227225 /**
228226 * @param cppTypes
254252 private void printMembers(List<BodyDeclaration> members,
255253 LocalSymbolTable arg) {
256254 for (BodyDeclaration member : members) {
257 if ("Tokenizer".equals(javaClassName)
258 && member instanceof MethodDeclaration
259 && "stateLoop".equals(((MethodDeclaration) member).getName())) {
260 reportTransitions = true;
261 }
262255 member.accept(this, arg);
263 reportTransitions = false;
264256 }
265257 }
266258
324316 printer.print(cppTypes.localForLiteral("html"));
325317 } else if ("documentModeHandler".equals(n.getName())) {
326318 printer.print("this");
327 } else if ("errorHandler".equals(n.getName())) {
328 printer.print(cppTypes.errorHandler());
329319 } else {
330320 String prefixedName = javaClassName + "." + n.getName();
331321 String constant = symbolTable.cppDefinesByJavaNames.get(prefixedName);
13431333 }
13441334 printTypeArgs(n.getTypeArgs(), arg);
13451335 printer.print(n.getName());
1346 if ("stateLoop".equals(n.getName())
1347 && "Tokenizer".equals(javaClassName)
1348 && cppTypes.stateLoopPolicies().length > 0) {
1349 printer.print("<");
1350 printer.print(cppTypes.stateLoopPolicies()[stateLoopCallCount]);
1351 printer.print(">");
1352 stateLoopCallCount++;
1353 }
13541336 printer.print("(");
13551337 if (n.getArgs() != null) {
13561338 for (Iterator<Expression> i = n.getArgs().iterator(); i.hasNext();) {
15531535
15541536 currentMethod = n.getName();
15551537
1556 destructor = "destructor".equals(currentMethod);
1538 destructor = "destructor".equals(n.getName());
15571539
15581540 // if (n.getJavaDoc() != null) {
15591541 // n.getJavaDoc().accept(this, arg);
15701552 printModifiers(n.getModifiers());
15711553 }
15721554
1573 if ("stateLoop".equals(currentMethod)
1574 && "Tokenizer".equals(javaClassName)
1575 && cppTypes.stateLoopPolicies().length > 0) {
1576 printer.print("template<class P>");
1577 if (inHeader()) {
1578 printer.print(" ");
1579 } else {
1580 printer.printLn();
1581 }
1582 }
1583
15841555 printTypeParameters(n.getTypeParameters(), arg);
15851556 if (n.getTypeParameters() != null) {
15861557 printer.print(" ");
17681739
17691740 public void visit(ExpressionStmt n, LocalSymbolTable arg) {
17701741 Expression e = n.getExpression();
1771 if (isCompletedCharacterReference(e)) {
1772 printer.print(cppTypes.completedCharacterReference());
1773 printer.print(";");
1742 if (isDroppedExpression(e)) {
17741743 return;
1775 }
1776 boolean needsCondition = isTokenizerErrorReportingExpression(e);
1777 if (!needsCondition && isDroppedExpression(e)) {
1778 return;
1779 }
1780 if (needsCondition) {
1781 printer.print("if (");
1782 printer.print(cppTypes.tokenizerErrorCondition());
1783 printer.printLn(") {");
1784 printer.indent();
17851744 }
17861745 e.accept(this, arg);
17871746 if (!inConstructorBody) {
17881747 printer.print(";");
17891748 }
1790 if (needsCondition) {
1791 printer.printLn();
1792 printer.unindent();
1793 printer.print("}");
1794 }
17951749 }
17961750
17971751 private void visitTransition(MethodCallExpr call, LocalSymbolTable arg) {
17981752 List<Expression> args = call.getArgs();
1799 if (reportTransitions) {
1800 printer.print(cppTypes.transition());
1801 printer.print("(");
1802 printer.print(cppTypes.firstTransitionArg());
1803 printer.print(", ");
1804 args.get(1).accept(this, arg);
1805 printer.print(", ");
1806 args.get(2).accept(this, arg);
1807 printer.print(", ");
1808 args.get(3).accept(this, arg);
1809 printer.print(")");
1810 } else {
1811 args.get(1).accept(this, arg);
1812 }
1813 }
1814
1815 private boolean isTokenizerErrorReportingExpression(Expression e) {
1816 if (!reportTransitions) {
1817 return false;
1818 }
1819 if (e instanceof MethodCallExpr) {
1820 MethodCallExpr methodCallExpr = (MethodCallExpr) e;
1821 String name = methodCallExpr.getName();
1822 if (supportErrorReporting && !name.startsWith("errHtml4")
1823 && ("stateLoop".equals(currentMethod))
1824 && (name.startsWith("err") || name.startsWith("maybeErr"))) {
1825 return true;
1826 }
1827 }
1828 return false;
1829 }
1830
1831 private boolean isCompletedCharacterReference(Expression e) {
1832 if (!reportTransitions) {
1833 return false;
1834 }
1835 if (e instanceof MethodCallExpr) {
1836 MethodCallExpr methodCallExpr = (MethodCallExpr) e;
1837 String name = methodCallExpr.getName();
1838 if (name.equals("completedNamedCharacterReference")) {
1839 return true;
1840 }
1841 }
1842 return false;
1843 }
1844
1753 args.get(1).accept(this, arg);
1754 }
1755
18451756 private boolean isDroppedExpression(Expression e) {
18461757 if (e instanceof MethodCallExpr) {
18471758 MethodCallExpr methodCallExpr = (MethodCallExpr) e;
18481759 String name = methodCallExpr.getName();
1849 if (name.startsWith("fatal") || name.startsWith("note")
1850 || name.startsWith("errHtml4") || name.startsWith("warn")
1851 || name.startsWith("maybeWarn")) {
1852 return true;
1853 }
1854 if (supportErrorReporting
1855 && ("stateLoop".equals(currentMethod) && !reportTransitions)
1856 && (name.startsWith("err") || name.startsWith("maybeErr"))) {
1857 return true;
1858 }
1859 if (name.equals("completedNamedCharacterReference")
1860 && !reportTransitions) {
1760 if (name.startsWith("fatal") || name.startsWith("err")
1761 || name.startsWith("warn") || name.startsWith("maybeErr")
1762 || name.startsWith("maybeWarn") || name.startsWith("note")) {
18611763 return true;
18621764 }
18631765 }
18811783
18821784 public void visit(SwitchEntryStmt n, LocalSymbolTable arg) {
18831785 if (n.getLabel() != null) {
1884 boolean isMenuitem = n.getLabel().toString().equals("MENUITEM");
1885 if (isMenuitem) {
1886 printer.printWithoutIndent("#ifdef ENABLE_VOID_MENUITEM\n");
1887 }
18881786 printer.print("case ");
18891787 n.getLabel().accept(this, arg);
18901788 printer.print(":");
1891 if (isMenuitem) {
1892 printer.printWithoutIndent("\n#endif");
1893 }
18941789 } else {
18951790 printer.print("default:");
18961791 }
21292024 }
21302025
21312026 private boolean isErrorOnlyBlock(Statement elseStmt) {
2132 if (supportErrorReporting) {
2133 return false;
2134 }
21352027 if (elseStmt instanceof BlockStmt) {
21362028 BlockStmt block = (BlockStmt) elseStmt;
21372029 List<Statement> statements = block.getStmts();
21572049 }
21582050
21592051 private boolean isErrorHandlerIf(Expression condition) {
2160 if (supportErrorReporting) {
2161 return false;
2162 }
21632052 return condition.toString().indexOf("errorHandler") != -1;
21642053 }
21652054
3030 import java.io.Writer;
3131 import java.util.regex.Matcher;
3232 import java.util.regex.Pattern;
33
34 import nu.validator.htmlparser.impl.Tokenizer;
3335
3436 /**
3537 * Applies a workaround that splits the <code>stateLoop</code> method in the