2 * @(#)ParserImpl.java 1.11 2000/08/16
6 package net.sourceforge.phpdt.tidy;
10 * HTML Parser implementation
12 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13 * See Tidy.java for the copyright notice.
14 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15 * HTML Tidy Release 4 Aug 2000</a>
17 * @author Dave Raggett <dsr@w3.org>
18 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19 * @version 1.0, 1999/05/22
20 * @version 1.0.1, 1999/05/29
21 * @version 1.1, 1999/06/18 Java Bean
22 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24 * @version 1.4, 1999/09/04 DOM support
25 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
34 public class ParserImpl {
36 //private static int SeenBodyEndTag; /* AQ: moved into lexer structure */
38 private static void parseTag(Lexer lexer, Node node, short mode)
40 // Local fix by GLP 2000-12-21. Need to reset insertspace if this
41 // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
42 // Remove this code once the fix is made in Tidy.
44 /****** (Original code follows)
45 if ((node.tag.model & Dict.CM_EMPTY) != 0)
47 lexer.waswhite = false;
50 else if (!((node.tag.model & Dict.CM_INLINE) != 0))
51 lexer.insertspace = false;
54 if (!((node.tag.model & Dict.CM_INLINE) != 0))
55 lexer.insertspace = false;
57 if ((node.tag.model & Dict.CM_EMPTY) != 0)
59 lexer.waswhite = false;
63 if (node.tag.parser == null || node.type == Node.StartEndTag)
66 node.tag.parser.parse(lexer, node, mode);
69 private static void moveToHead(Lexer lexer, Node element, Node node)
72 TagTable tt = lexer.configuration.tt;
75 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
77 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
79 while (element.tag != tt.tagHtml)
80 element = element.parent;
82 for (head = element.content; head != null; head = head.next)
84 if (head.tag == tt.tagHead)
86 Node.insertNodeAtEnd(head, node);
91 if (node.tag.parser != null)
92 parseTag(lexer, node, Lexer.IgnoreWhitespace);
96 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
100 public static class ParseHTML implements Parser {
102 public void parse( Lexer lexer, Node html, short mode )
105 Node frameset = null;
106 Node noframes = null;
108 lexer.configuration.XmlTags = false;
109 lexer.seenBodyEndTag = 0;
110 TagTable tt = lexer.configuration.tt;
114 node = lexer.getToken(Lexer.IgnoreWhitespace);
118 node = lexer.inferredTag("head");
122 if (node.tag == tt.tagHead)
125 if (node.tag == html.tag && node.type == Node.EndTag)
127 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
131 /* deal with comments etc. */
132 if (Node.insertMisc(html, node))
136 node = lexer.inferredTag("head");
141 Node.insertNodeAtEnd(html, head);
142 getParseHead().parse(lexer, head, mode);
146 node = lexer.getToken(Lexer.IgnoreWhitespace);
150 if (frameset == null) /* create an empty body */
151 node = lexer.inferredTag("body");
156 /* robustly handle html tags */
157 if (node.tag == html.tag)
159 if (node.type != Node.StartTag && frameset == null)
160 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
165 /* deal with comments etc. */
166 if (Node.insertMisc(html, node))
169 /* if frameset document coerce <body> to <noframes> */
170 if (node.tag == tt.tagBody)
172 if (node.type != Node.StartTag)
174 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
178 if (frameset != null)
182 if (noframes == null)
184 noframes = lexer.inferredTag("noframes");
185 Node.insertNodeAtEnd(frameset, noframes);
186 Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
189 parseTag(lexer, noframes, mode);
193 break; /* to parse body */
196 /* flag an error if we see more than one frameset */
197 if (node.tag == tt.tagFrameset)
199 if (node.type != Node.StartTag)
201 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
205 if (frameset != null)
206 Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
210 Node.insertNodeAtEnd(html, node);
211 parseTag(lexer, node, mode);
214 see if it includes a noframes element so
215 that we can merge subsequent noframes elements
218 for (node = frameset.content; node != null; node = node.next)
220 if (node.tag == tt.tagNoframes)
226 /* if not a frameset document coerce <noframes> to <body> */
227 if (node.tag == tt.tagNoframes)
229 if (node.type != Node.StartTag)
231 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
235 if (frameset == null)
237 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
238 node = lexer.inferredTag("body");
242 if (noframes == null)
245 Node.insertNodeAtEnd(frameset, noframes);
248 parseTag(lexer, noframes, mode);
252 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
254 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
256 moveToHead(lexer, html, node);
263 /* insert other content into noframes element */
265 if (frameset != null)
267 if (noframes == null)
269 noframes = lexer.inferredTag("noframes");
270 Node.insertNodeAtEnd(frameset, noframes);
273 Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
275 parseTag(lexer, noframes, mode);
279 node = lexer.inferredTag("body");
283 /* node must be body */
285 Node.insertNodeAtEnd(html, node);
286 parseTag(lexer, node, mode);
291 public static class ParseHead implements Parser {
293 public void parse( Lexer lexer, Node head, short mode )
298 TagTable tt = lexer.configuration.tt;
302 node = lexer.getToken(Lexer.IgnoreWhitespace);
303 if (node == null) break;
304 if (node.tag == head.tag && node.type == Node.EndTag)
310 if (node.type == Node.TextNode)
316 /* deal with comments etc. */
317 if (Node.insertMisc(head, node))
320 if (node.type == Node.DocTypeTag)
322 Node.insertDocType(lexer, head, node);
326 /* discard unknown tags */
327 if (node.tag == null)
329 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
333 if (!((node.tag.model & Dict.CM_HEAD) != 0))
339 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
341 if (node.tag == tt.tagTitle)
346 Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
348 else if (node.tag == tt.tagBase)
353 Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
355 else if (node.tag == tt.tagNoscript)
356 Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
358 Node.insertNodeAtEnd(head, node);
359 parseTag(lexer, node, Lexer.IgnoreWhitespace);
363 /* discard unexpected text nodes and end tags */
364 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
369 Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
370 Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
376 public static class ParseTitle implements Parser {
378 public void parse( Lexer lexer, Node title, short mode )
384 node = lexer.getToken(Lexer.MixedContent);
385 if (node == null) break;
386 if (node.tag == title.tag && node.type == Node.EndTag)
389 Node.trimSpaces(lexer, title);
393 if (node.type == Node.TextNode)
395 /* only called for 1st child */
396 if (title.content == null)
397 Node.trimInitialSpace(lexer, title, node);
399 if (node.start >= node.end)
404 Node.insertNodeAtEnd(title, node);
408 /* deal with comments etc. */
409 if (Node.insertMisc(title, node))
412 /* discard unknown tags */
413 if (node.tag == null)
415 Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
419 /* pushback unexpected tokens */
420 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
422 Node.trimSpaces(lexer, title);
426 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
431 public static class ParseScript implements Parser {
433 public void parse( Lexer lexer, Node script, short mode )
436 This isn't quite right for CDATA content as it recognises
437 tags within the content and parses them accordingly.
438 This will unfortunately screw up scripts which include
439 < + letter, < + !, < + ? or < + / + letter
444 node = lexer.getCDATA( script);
447 Node.insertNodeAtEnd(script, node);
452 public static class ParseBody implements Parser {
454 public void parse( Lexer lexer, Node body, short mode )
457 boolean checkstack, iswhitenode;
459 mode = Lexer.IgnoreWhitespace;
461 TagTable tt = lexer.configuration.tt;
465 node = lexer.getToken(mode);
466 if (node == null) break;
467 if (node.tag == body.tag && node.type == Node.EndTag)
470 Node.trimSpaces(lexer, body);
471 lexer.seenBodyEndTag = 1;
472 mode = Lexer.IgnoreWhitespace;
474 if (body.parent.tag == tt.tagNoframes)
480 if (node.tag == tt.tagNoframes)
482 if (node.type == Node.StartTag)
484 Node.insertNodeAtEnd(body, node);
485 getParseBlock().parse(lexer, node, mode);
489 if (node.type == Node.EndTag &&
490 body.parent.tag == tt.tagNoframes)
492 Node.trimSpaces(lexer, body);
498 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
499 && body.parent.tag == tt.tagNoframes)
501 Node.trimSpaces(lexer, body);
506 if (node.tag == tt.tagHtml)
508 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
509 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
516 if (node.type == Node.TextNode &&
517 node.end <= node.start + 1 &&
518 node.textarray[node.start] == (byte)' ')
521 /* deal with comments etc. */
522 if (Node.insertMisc(body, node))
525 if (lexer.seenBodyEndTag == 1 && !iswhitenode)
527 ++lexer.seenBodyEndTag;
528 Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
531 /* mixed content model permits text */
532 if (node.type == Node.TextNode)
534 if (iswhitenode && mode == Lexer.IgnoreWhitespace)
539 if (lexer.configuration.EncloseBodyText && !iswhitenode)
544 para = lexer.inferredTag("p");
545 Node.insertNodeAtEnd(body, para);
546 parseTag(lexer, para, mode);
547 mode = Lexer.MixedContent;
550 else /* strict doesn't allow text here */
551 lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
557 if (lexer.inlineDup( node) > 0)
561 Node.insertNodeAtEnd(body, node);
562 mode = Lexer.MixedContent;
566 if (node.type == Node.DocTypeTag)
568 Node.insertDocType(lexer, body, node);
571 /* discard unknown and PARAM tags */
572 if (node.tag == null || node.tag == tt.tagParam)
574 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
579 Netscape allows LI and DD directly in BODY
580 We infer UL or DL respectively and use this
581 boolean to exclude block-level elements so as
582 to match Netscape's observed behaviour.
584 lexer.excludeBlocks = false;
586 if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
587 !((node.tag.model & Dict.CM_INLINE) != 0))
589 /* avoid this error message being issued twice */
590 if (!((node.tag.model & Dict.CM_HEAD) != 0))
591 Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
593 if ((node.tag.model & Dict.CM_HTML) != 0)
595 /* copy body attributes if current body was inferred */
596 if (node.tag == tt.tagBody && body.implicit
597 && body.attributes == null)
599 body.attributes = node.attributes;
600 node.attributes = null;
606 if ((node.tag.model & Dict.CM_HEAD) != 0)
608 moveToHead(lexer, body, node);
612 if ((node.tag.model & Dict.CM_LIST) != 0)
615 node = lexer.inferredTag( "ul");
616 Node.addClass(node, "noindent");
617 lexer.excludeBlocks = true;
619 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
622 node = lexer.inferredTag( "dl");
623 lexer.excludeBlocks = true;
625 else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
628 node = lexer.inferredTag( "table");
629 lexer.excludeBlocks = true;
633 /* AQ: The following line is from the official C
634 version of tidy. It doesn't make sense to me
635 because the '!' operator has higher precedence
636 than the '&' operator. It seems to me that the
637 expression always evaluates to 0.
639 if (!node->tag->model & (CM_ROW | CM_FIELD))
641 AQ: 13Jan2000 fixed in C tidy
643 if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
649 /* ignore </td> </th> <option> etc. */
654 if (node.type == Node.EndTag)
656 if (node.tag == tt.tagBr)
657 node.type = Node.StartTag;
658 else if (node.tag == tt.tagP)
660 Node.coerceNode(lexer, node, tt.tagBr);
661 Node.insertNodeAtEnd(body, node);
662 node = lexer.inferredTag("br");
664 else if ((node.tag.model & Dict.CM_INLINE) != 0)
665 lexer.popInline(node);
668 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
670 if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
672 /* HTML4 strict doesn't allow inline content here */
673 /* but HTML2 does allow img elements as children of body */
674 if (node.tag == tt.tagImg)
675 lexer.versions &= ~Dict.VERS_HTML40_STRICT;
677 lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
679 if (checkstack && !node.implicit)
683 if (lexer.inlineDup( node) > 0)
687 mode = Lexer.MixedContent;
692 mode = Lexer.IgnoreWhitespace;
696 Report.warning(lexer, body, node, Report.INSERTING_TAG);
698 Node.insertNodeAtEnd(body, node);
699 parseTag(lexer, node, mode);
703 /* discard unexpected tags */
704 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
710 public static class ParseFrameSet implements Parser {
712 public void parse( Lexer lexer, Node frameset, short mode )
715 TagTable tt = lexer.configuration.tt;
717 lexer.badAccess |= Report.USING_FRAMES;
721 node = lexer.getToken(Lexer.IgnoreWhitespace);
722 if (node == null) break;
723 if (node.tag == frameset.tag && node.type == Node.EndTag)
725 frameset.closed = true;
726 Node.trimSpaces(lexer, frameset);
730 /* deal with comments etc. */
731 if (Node.insertMisc(frameset, node))
734 if (node.tag == null)
736 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
740 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
742 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
744 moveToHead(lexer, frameset, node);
749 if (node.tag == tt.tagBody)
752 node = lexer.inferredTag("noframes");
753 Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
756 if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
758 Node.insertNodeAtEnd(frameset, node);
759 lexer.excludeBlocks = false;
760 parseTag(lexer, node, Lexer.MixedContent);
763 else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
765 Node.insertNodeAtEnd(frameset, node);
769 /* discard unexpected tags */
770 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
773 Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
778 public static class ParseInline implements Parser {
780 public void parse( Lexer lexer, Node element, short mode )
783 TagTable tt = lexer.configuration.tt;
785 if ((element.tag.model & Dict.CM_EMPTY) != 0)
788 if (element.tag == tt.tagA)
790 if (element.attributes == null)
792 Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
793 Node.discardElement(element);
799 ParseInline is used for some block level elements like H1 to H6
800 For such elements we need to insert inline emphasis tags currently
801 on the inline stack. For Inline elements, we normally push them
802 onto the inline stack provided they aren't implicit or OBJECT/APPLET.
803 This test is carried out in PushInline and PopInline, see istack.c
804 We don't push A or SPAN to replicate current browser behavior
806 if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
807 lexer.inlineDup( null);
808 else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
809 element.tag != tt.tagA && element.tag != tt.tagSpan)
810 lexer.pushInline( element);
812 if (element.tag == tt.tagNobr)
813 lexer.badLayout |= Report.USING_NOBR;
814 else if (element.tag == tt.tagFont)
815 lexer.badLayout |= Report.USING_FONT;
817 /* Inline elements may or may not be within a preformatted element */
818 if (mode != Lexer.Preformatted)
819 mode = Lexer.MixedContent;
823 node = lexer.getToken(mode);
824 if (node == null) break;
825 /* end tag for current element */
826 if (node.tag == element.tag && node.type == Node.EndTag)
828 if ((element.tag.model & Dict.CM_INLINE) != 0 &&
829 element.tag != tt.tagA)
830 lexer.popInline( node);
832 if (!((mode & Lexer.Preformatted) != 0))
833 Node.trimSpaces(lexer, element);
835 if a font element wraps an anchor and nothing else
836 then move the font element inside the anchor since
837 otherwise it won't alter the anchor text color
839 if (element.tag == tt.tagFont &&
840 element.content != null &&
841 element.content == element.last)
843 Node child = element.content;
845 if (child.tag == tt.tagA)
847 child.parent = element.parent;
848 child.next = element.next;
849 child.prev = element.prev;
851 if (child.prev != null)
852 child.prev.next = child;
854 child.parent.content = child;
856 if (child.next != null)
857 child.next.prev = child;
859 child.parent.last = child;
863 element.parent = child;
864 element.content = child.content;
865 element.last = child.last;
866 child.content = element;
867 child.last = element;
868 for (child = element.content; child != null; child = child.next)
869 child.parent = element;
872 element.closed = true;
873 Node.trimSpaces(lexer, element);
874 Node.trimEmptyElement(lexer, element);
878 /* <u>...<u> map 2nd <u> to </u> if 1st is explicit */
879 /* otherwise emphasis nesting is probably unintentional */
880 /* big and small have cumulative effect to leave them alone */
881 if (node.type == Node.StartTag
882 && node.tag == element.tag
883 && lexer.isPushed(node)
886 && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
887 && node.tag != tt.tagA
888 && node.tag != tt.tagFont
889 && node.tag != tt.tagBig
890 && node.tag != tt.tagSmall)
892 if (element.content != null && node.attributes == null)
894 Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
895 node.type = Node.EndTag;
900 Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
903 if (node.type == Node.TextNode)
905 /* only called for 1st child */
906 if (element.content == null &&
907 !((mode & Lexer.Preformatted) != 0))
908 Node.trimSpaces(lexer, element);
910 if (node.start >= node.end)
915 Node.insertNodeAtEnd(element, node);
919 /* mixed content model so allow text */
920 if (Node.insertMisc(element, node))
923 /* deal with HTML tags */
924 if (node.tag == tt.tagHtml)
926 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
928 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
932 /* otherwise infer end of inline element */
934 if (!((mode & Lexer.Preformatted) != 0))
935 Node.trimSpaces(lexer, element);
936 Node.trimEmptyElement(lexer, element);
940 /* within <dt> or <pre> map <p> to <br> */
941 if (node.tag == tt.tagP &&
942 node.type == Node.StartTag &&
943 ((mode & Lexer.Preformatted) != 0 ||
944 element.tag == tt.tagDt ||
945 element.isDescendantOf(tt.tagDt)))
949 Node.trimSpaces(lexer, element);
950 Node.insertNodeAtEnd(element, node);
954 /* ignore unknown and PARAM tags */
955 if (node.tag == null || node.tag == tt.tagParam)
957 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
961 if (node.tag == tt.tagBr && node.type == Node.EndTag)
962 node.type = Node.StartTag;
964 if (node.type == Node.EndTag)
966 /* coerce </br> to <br> */
967 if (node.tag == tt.tagBr)
968 node.type = Node.StartTag;
969 else if (node.tag == tt.tagP)
971 /* coerce unmatched </p> to <br><br> */
972 if (!element.isDescendantOf(tt.tagP))
974 Node.coerceNode(lexer, node, tt.tagBr);
975 Node.trimSpaces(lexer, element);
976 Node.insertNodeAtEnd(element, node);
977 node = lexer.inferredTag("br");
981 else if ((node.tag.model & Dict.CM_INLINE) != 0
982 && node.tag != tt.tagA
983 && !((node.tag.model & Dict.CM_OBJECT) != 0)
984 && (element.tag.model & Dict.CM_INLINE) != 0)
986 /* allow any inline end tag to end current element */
987 lexer.popInline( element);
989 if (element.tag != tt.tagA)
991 if (node.tag == tt.tagA && node.tag != element.tag)
993 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
998 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1001 if (!((mode & Lexer.Preformatted) != 0))
1002 Node.trimSpaces(lexer, element);
1003 Node.trimEmptyElement(lexer, element);
1007 /* if parent is <a> then discard unexpected inline end tag */
1008 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1010 } /* special case </tr> etc. for stuff moved in front of table */
1011 else if (lexer.exiled
1012 && node.tag.model != 0
1013 && (node.tag.model & Dict.CM_TABLE) != 0)
1016 Node.trimSpaces(lexer, element);
1017 Node.trimEmptyElement(lexer, element);
1022 /* allow any header tag to end current header */
1023 if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
1025 if (node.tag == element.tag)
1027 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1031 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1034 if (!((mode & Lexer.Preformatted) != 0))
1035 Node.trimSpaces(lexer, element);
1036 Node.trimEmptyElement(lexer, element);
1041 an <A> tag to ends any open <A> element
1042 but <A href=...> is mapped to </A><A href=...>
1044 if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
1046 /* coerce <a> to </a> unless it has some attributes */
1047 if (node.attributes == null)
1049 node.type = Node.EndTag;
1050 Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1051 lexer.popInline( node);
1057 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1058 lexer.popInline( element);
1059 if (!((mode & Lexer.Preformatted) != 0))
1060 Node.trimSpaces(lexer, element);
1061 Node.trimEmptyElement(lexer, element);
1065 if ((element.tag.model & Dict.CM_HEADING) != 0)
1067 if (node.tag == tt.tagCenter ||
1068 node.tag == tt.tagDiv)
1070 if (node.type != Node.StartTag &&
1071 node.type != Node.StartEndTag)
1073 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1077 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1079 /* insert center as parent if heading is empty */
1080 if (element.content == null)
1082 Node.insertNodeAsParent(element, node);
1086 /* split heading and make center parent of 2nd part */
1087 Node.insertNodeAfterElement(element, node);
1089 if (!((mode & Lexer.Preformatted) != 0))
1090 Node.trimSpaces(lexer, element);
1092 element = lexer.cloneNode(element);
1093 element.start = lexer.lexsize;
1094 element.end = lexer.lexsize;
1095 Node.insertNodeAtEnd(node, element);
1099 if (node.tag == tt.tagHr)
1101 if (node.type != Node.StartTag &&
1102 node.type != Node.StartEndTag)
1104 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1108 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1110 /* insert hr before heading if heading is empty */
1111 if (element.content == null)
1113 Node.insertNodeBeforeElement(element, node);
1117 /* split heading and insert hr before 2nd part */
1118 Node.insertNodeAfterElement(element, node);
1120 if (!((mode & Lexer.Preformatted) != 0))
1121 Node.trimSpaces(lexer, element);
1123 element = lexer.cloneNode(element);
1124 element.start = lexer.lexsize;
1125 element.end = lexer.lexsize;
1126 Node.insertNodeAfterElement(node, element);
1131 if (element.tag == tt.tagDt)
1133 if (node.tag == tt.tagHr)
1137 if (node.type != Node.StartTag &&
1138 node.type != Node.StartEndTag)
1140 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1144 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1145 dd = lexer.inferredTag("dd");
1147 /* insert hr within dd before dt if dt is empty */
1148 if (element.content == null)
1150 Node.insertNodeBeforeElement(element, dd);
1151 Node.insertNodeAtEnd(dd, node);
1155 /* split dt and insert hr within dd before 2nd part */
1156 Node.insertNodeAfterElement(element, dd);
1157 Node.insertNodeAtEnd(dd, node);
1159 if (!((mode & Lexer.Preformatted) != 0))
1160 Node.trimSpaces(lexer, element);
1162 element = lexer.cloneNode(element);
1163 element.start = lexer.lexsize;
1164 element.end = lexer.lexsize;
1165 Node.insertNodeAfterElement(dd, element);
1172 if this is the end tag for an ancestor element
1173 then infer end tag for this element
1175 if (node.type == Node.EndTag)
1177 for (parent = element.parent;
1178 parent != null; parent = parent.parent)
1180 if (node.tag == parent.tag)
1182 if (!((element.tag.model & Dict.CM_OPT) != 0) &&
1184 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1186 if (element.tag == tt.tagA)
1187 lexer.popInline(element);
1191 if (!((mode & Lexer.Preformatted) != 0))
1192 Node.trimSpaces(lexer, element);
1194 Node.trimEmptyElement(lexer, element);
1200 /* block level tags end this element */
1201 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1203 if (node.type != Node.StartTag)
1205 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1209 if (!((element.tag.model & Dict.CM_OPT) != 0))
1210 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1212 if ((node.tag.model & Dict.CM_HEAD) != 0 &&
1213 !((node.tag.model & Dict.CM_BLOCK) != 0))
1215 moveToHead(lexer, element, node);
1220 prevent anchors from propagating into block tags
1221 except for headings h1 to h6
1223 if (element.tag == tt.tagA)
1225 if (node.tag != null &&
1226 !((node.tag.model & Dict.CM_HEADING) != 0))
1227 lexer.popInline(element);
1228 else if (!(element.content != null))
1230 Node.discardElement(element);
1238 if (!((mode & Lexer.Preformatted) != 0))
1239 Node.trimSpaces(lexer, element);
1241 Node.trimEmptyElement(lexer, element);
1245 /* parse inline element */
1246 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1249 Report.warning(lexer, element, node, Report.INSERTING_TAG);
1251 /* trim white space before <br> */
1252 if (node.tag == tt.tagBr)
1253 Node.trimSpaces(lexer, element);
1255 Node.insertNodeAtEnd(element, node);
1256 parseTag(lexer, node, mode);
1260 /* discard unexpected tags */
1261 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1264 if (!((element.tag.model & Dict.CM_OPT) != 0))
1265 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
1267 Node.trimEmptyElement(lexer, element);
1271 public static class ParseList implements Parser {
1273 public void parse( Lexer lexer, Node list, short mode )
1277 TagTable tt = lexer.configuration.tt;
1279 if ((list.tag.model & Dict.CM_EMPTY) != 0)
1282 lexer.insert = -1; /* defer implicit inline start tags */
1286 node = lexer.getToken(Lexer.IgnoreWhitespace);
1287 if (node == null) break;
1289 if (node.tag == list.tag && node.type == Node.EndTag)
1291 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1292 Node.coerceNode(lexer, list, tt.tagUl);
1295 Node.trimEmptyElement(lexer, list);
1299 /* deal with comments etc. */
1300 if (Node.insertMisc(list, node))
1303 if (node.type != Node.TextNode && node.tag == null)
1305 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1310 if this is the end tag for an ancestor element
1311 then infer end tag for this element
1313 if (node.type == Node.EndTag)
1315 if (node.tag == tt.tagForm)
1318 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1322 if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
1324 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1325 lexer.popInline(node);
1329 for (parent = list.parent;
1330 parent != null; parent = parent.parent)
1332 if (node.tag == parent.tag)
1334 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1337 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1338 Node.coerceNode(lexer, list, tt.tagUl);
1340 Node.trimEmptyElement(lexer, list);
1345 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1349 if (node.tag != tt.tagLi)
1353 if (node.tag != null &&
1354 (node.tag.model & Dict.CM_BLOCK) != 0 &&
1355 lexer.excludeBlocks)
1357 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1358 Node.trimEmptyElement(lexer, list);
1362 node = lexer.inferredTag("li");
1363 node.addAttribute("style", "list-style: none");
1364 Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1367 /* node should be <LI> */
1368 Node.insertNodeAtEnd(list, node);
1369 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1372 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1373 Node.coerceNode(lexer, list, tt.tagUl);
1375 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1376 Node.trimEmptyElement(lexer, list);
1381 public static class ParseDefList implements Parser {
1383 public void parse( Lexer lexer, Node list, short mode )
1386 TagTable tt = lexer.configuration.tt;
1388 if ((list.tag.model & Dict.CM_EMPTY) != 0)
1391 lexer.insert = -1; /* defer implicit inline start tags */
1395 node = lexer.getToken(Lexer.IgnoreWhitespace);
1396 if (node == null) break;
1397 if (node.tag == list.tag && node.type == Node.EndTag)
1400 Node.trimEmptyElement(lexer, list);
1404 /* deal with comments etc. */
1405 if (Node.insertMisc(list, node))
1408 if (node.type == Node.TextNode)
1411 node = lexer.inferredTag( "dt");
1412 Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1415 if (node.tag == null)
1417 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1422 if this is the end tag for an ancestor element
1423 then infer end tag for this element
1425 if (node.type == Node.EndTag)
1427 if (node.tag == tt.tagForm)
1430 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1434 for (parent = list.parent;
1435 parent != null; parent = parent.parent)
1437 if (node.tag == parent.tag)
1439 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1442 Node.trimEmptyElement(lexer, list);
1448 /* center in a dt or a dl breaks the dl list in two */
1449 if (node.tag == tt.tagCenter)
1451 if (list.content != null)
1452 Node.insertNodeAfterElement(list, node);
1453 else /* trim empty dl list */
1455 Node.insertNodeBeforeElement(list, node);
1456 Node.discardElement(list);
1459 /* and parse contents of center */
1460 parseTag(lexer, node, mode);
1462 /* now create a new dl element */
1463 list = lexer.inferredTag("dl");
1464 Node.insertNodeAfterElement(node, list);
1468 if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
1472 if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
1474 Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
1475 Node.trimEmptyElement(lexer, list);
1479 /* if DD appeared directly in BODY then exclude blocks */
1480 if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
1482 Node.trimEmptyElement(lexer, list);
1486 node = lexer.inferredTag( "dd");
1487 Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1490 if (node.type == Node.EndTag)
1492 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1496 /* node should be <DT> or <DD>*/
1497 Node.insertNodeAtEnd(list, node);
1498 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1501 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1502 Node.trimEmptyElement(lexer, list);
1507 public static class ParsePre implements Parser {
1509 public void parse( Lexer lexer, Node pre, short mode )
1512 TagTable tt = lexer.configuration.tt;
1514 if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1517 if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1518 Node.coerceNode(lexer, pre, tt.tagPre);
1520 lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
1524 node = lexer.getToken(Lexer.Preformatted);
1525 if (node == null) break;
1526 if (node.tag == pre.tag && node.type == Node.EndTag)
1528 Node.trimSpaces(lexer, pre);
1530 Node.trimEmptyElement(lexer, pre);
1534 if (node.tag == tt.tagHtml)
1536 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1537 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1542 if (node.type == Node.TextNode)
1544 /* if first check for inital newline */
1545 if (pre.content == null)
1547 if (node.textarray[node.start] == (byte)'\n')
1550 if (node.start >= node.end)
1556 Node.insertNodeAtEnd(pre, node);
1560 /* deal with comments etc. */
1561 if (Node.insertMisc(pre, node))
1564 /* discard unknown and PARAM tags */
1565 if (node.tag == null || node.tag == tt.tagParam)
1567 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1571 if (node.tag == tt.tagP)
1573 if (node.type == Node.StartTag)
1575 Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
1577 /* trim white space before <p> in <pre>*/
1578 Node.trimSpaces(lexer, pre);
1580 /* coerce both <p> and </p> to <br> */
1581 Node.coerceNode(lexer, node, tt.tagBr);
1582 Node.insertNodeAtEnd(pre, node);
1586 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1591 if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
1593 moveToHead(lexer, pre, node);
1598 if this is the end tag for an ancestor element
1599 then infer end tag for this element
1601 if (node.type == Node.EndTag)
1603 if (node.tag == tt.tagForm)
1606 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1610 for (parent = pre.parent;
1611 parent != null; parent = parent.parent)
1613 if (node.tag == parent.tag)
1615 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1618 Node.trimSpaces(lexer, pre);
1619 Node.trimEmptyElement(lexer, pre);
1625 /* what about head content, HEAD, BODY tags etc? */
1626 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1628 if (node.type != Node.StartTag)
1630 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1634 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1635 lexer.excludeBlocks = true;
1637 /* check if we need to infer a container */
1638 if ((node.tag.model & Dict.CM_LIST) != 0)
1641 node = lexer.inferredTag( "ul");
1642 Node.addClass(node, "noindent");
1644 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1647 node = lexer.inferredTag( "dl");
1649 else if ((node.tag.model & Dict.CM_TABLE) != 0)
1652 node = lexer.inferredTag( "table");
1655 Node.insertNodeAfterElement(pre, node);
1656 pre = lexer.inferredTag( "pre");
1657 Node.insertNodeAfterElement(node, pre);
1658 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1659 lexer.excludeBlocks = false;
1663 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1665 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1670 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1672 /* trim white space before <br> */
1673 if (node.tag == tt.tagBr)
1674 Node.trimSpaces(lexer, pre);
1676 Node.insertNodeAtEnd(pre, node);
1677 parseTag(lexer, node, Lexer.Preformatted);
1681 /* discard unexpected tags */
1682 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1685 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1686 Node.trimEmptyElement(lexer, pre);
1691 public static class ParseBlock implements Parser {
1693 public void parse( Lexer lexer, Node element, short mode )
1695 element is node created by the lexer
1696 upon seeing the start tag, or by the
1697 parser when the start tag is inferred
1703 TagTable tt = lexer.configuration.tt;
1707 if ((element.tag.model & Dict.CM_EMPTY) != 0)
1710 if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
1711 Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
1714 InlineDup() asks the lexer to insert inline emphasis tags
1715 currently pushed on the istack, but take care to avoid
1716 propagating inline emphasis inside OBJECT or APPLET.
1717 For these elements a fresh inline stack context is created
1718 and disposed of upon reaching the end of the element.
1719 They thus behave like table cells in this respect.
1721 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1723 istackbase = lexer.istackbase;
1724 lexer.istackbase = lexer.istack.size();
1727 if (!((element.tag.model & Dict.CM_MIXED) != 0))
1728 lexer.inlineDup( null);
1730 mode = Lexer.IgnoreWhitespace;
1734 node = lexer.getToken(mode /*Lexer.MixedContent*/);
1735 if (node == null) break;
1736 /* end tag for this element */
1737 if (node.type == Node.EndTag && node.tag != null &&
1738 (node.tag == element.tag || element.was == node.tag))
1741 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1743 /* pop inline stack */
1744 while (lexer.istack.size() > lexer.istackbase)
1745 lexer.popInline( null);
1746 lexer.istackbase = istackbase;
1749 element.closed = true;
1750 Node.trimSpaces(lexer, element);
1751 Node.trimEmptyElement(lexer, element);
1755 if (node.tag == tt.tagHtml ||
1756 node.tag == tt.tagHead ||
1757 node.tag == tt.tagBody)
1759 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1760 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1765 if (node.type == Node.EndTag)
1767 if (node.tag == null)
1769 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1773 else if (node.tag == tt.tagBr)
1774 node.type = Node.StartTag;
1775 else if (node.tag == tt.tagP)
1777 Node.coerceNode(lexer, node, tt.tagBr);
1778 Node.insertNodeAtEnd(element, node);
1779 node = lexer.inferredTag("br");
1784 if this is the end tag for an ancestor element
1785 then infer end tag for this element
1787 for (parent = element.parent;
1788 parent != null; parent = parent.parent)
1790 if (node.tag == parent.tag)
1792 if (!((element.tag.model & Dict.CM_OPT) != 0))
1793 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1797 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1799 /* pop inline stack */
1800 while (lexer.istack.size() > lexer.istackbase)
1801 lexer.popInline( null);
1802 lexer.istackbase = istackbase;
1805 Node.trimSpaces(lexer, element);
1806 Node.trimEmptyElement(lexer, element);
1810 /* special case </tr> etc. for stuff moved in front of table */
1812 && node.tag.model != 0
1813 && (node.tag.model & Dict.CM_TABLE) != 0)
1816 Node.trimSpaces(lexer, element);
1817 Node.trimEmptyElement(lexer, element);
1823 /* mixed content model permits text */
1824 if (node.type == Node.TextNode)
1826 boolean iswhitenode = false;
1828 if (node.type == Node.TextNode &&
1829 node.end <= node.start + 1 &&
1830 lexer.lexbuf[node.start] == (byte)' ')
1833 if (lexer.configuration.EncloseBlockText && !iswhitenode)
1836 node = lexer.inferredTag("p");
1837 Node.insertNodeAtEnd(element, node);
1838 parseTag(lexer, node, Lexer.MixedContent);
1846 if (!((element.tag.model & Dict.CM_MIXED) != 0))
1848 if (lexer.inlineDup( node) > 0)
1853 Node.insertNodeAtEnd(element, node);
1854 mode = Lexer.MixedContent;
1856 HTML4 strict doesn't allow mixed content for
1857 elements with %block; as their content model
1859 lexer.versions &= ~Dict.VERS_HTML40_STRICT;
1863 if (Node.insertMisc(element, node))
1866 /* allow PARAM elements? */
1867 if (node.tag == tt.tagParam)
1869 if (((element.tag.model & Dict.CM_PARAM) != 0) &&
1870 (node.type == Node.StartTag || node.type == Node.StartEndTag))
1872 Node.insertNodeAtEnd(element, node);
1876 /* otherwise discard it */
1877 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1881 /* allow AREA elements? */
1882 if (node.tag == tt.tagArea)
1884 if ((element.tag == tt.tagMap) &&
1885 (node.type == Node.StartTag || node.type == Node.StartEndTag))
1887 Node.insertNodeAtEnd(element, node);
1891 /* otherwise discard it */
1892 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1896 /* ignore unknown start/end tags */
1897 if (node.tag == null)
1899 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1904 Allow Dict.CM_INLINE elements here.
1906 Allow Dict.CM_BLOCK elements here unless
1907 lexer.excludeBlocks is yes.
1909 LI and DD are special cased.
1911 Otherwise infer end tag for this element.
1914 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1916 if (node.type != Node.StartTag && node.type != Node.StartEndTag)
1918 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1922 if (element.tag == tt.tagTd || element.tag == tt.tagTh)
1924 /* if parent is a table cell, avoid inferring the end of the cell */
1926 if ((node.tag.model & Dict.CM_HEAD) != 0)
1928 moveToHead(lexer, element, node);
1932 if ((node.tag.model & Dict.CM_LIST) != 0)
1935 node = lexer.inferredTag( "ul");
1936 Node.addClass(node, "noindent");
1937 lexer.excludeBlocks = true;
1939 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1942 node = lexer.inferredTag( "dl");
1943 lexer.excludeBlocks = true;
1946 /* infer end of current table cell */
1947 if (!((node.tag.model & Dict.CM_BLOCK) != 0))
1950 Node.trimSpaces(lexer, element);
1951 Node.trimEmptyElement(lexer, element);
1955 else if ((node.tag.model & Dict.CM_BLOCK) != 0)
1957 if (lexer.excludeBlocks)
1959 if (!((element.tag.model & Dict.CM_OPT) != 0))
1960 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1964 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1965 lexer.istackbase = istackbase;
1967 Node.trimSpaces(lexer, element);
1968 Node.trimEmptyElement(lexer, element);
1972 else /* things like list items */
1974 if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
1975 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1977 if ((node.tag.model & Dict.CM_HEAD) != 0)
1979 moveToHead(lexer, element, node);
1985 if ((node.tag.model & Dict.CM_LIST) != 0)
1987 if (element.parent != null && element.parent.tag != null &&
1988 element.parent.tag.parser == getParseList())
1990 Node.trimSpaces(lexer, element);
1991 Node.trimEmptyElement(lexer, element);
1995 node = lexer.inferredTag("ul");
1996 Node.addClass(node, "noindent");
1998 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
2000 if (element.parent.tag == tt.tagDl)
2002 Node.trimSpaces(lexer, element);
2003 Node.trimEmptyElement(lexer, element);
2007 node = lexer.inferredTag("dl");
2009 else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
2010 (node.tag.model & Dict.CM_ROW) != 0)
2012 node = lexer.inferredTag("table");
2014 else if ((element.tag.model & Dict.CM_OBJECT) != 0)
2016 /* pop inline stack */
2017 while (lexer.istack.size() > lexer.istackbase)
2018 lexer.popInline( null);
2019 lexer.istackbase = istackbase;
2020 Node.trimSpaces(lexer, element);
2021 Node.trimEmptyElement(lexer, element);
2027 Node.trimSpaces(lexer, element);
2028 Node.trimEmptyElement(lexer, element);
2034 /* parse known element */
2035 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2037 if ((node.tag.model & Dict.CM_INLINE) != 0)
2039 if (checkstack && !node.implicit)
2043 if (lexer.inlineDup( node) > 0)
2047 mode = Lexer.MixedContent;
2052 mode = Lexer.IgnoreWhitespace;
2055 /* trim white space before <br> */
2056 if (node.tag == tt.tagBr)
2057 Node.trimSpaces(lexer, element);
2059 Node.insertNodeAtEnd(element, node);
2062 Report.warning(lexer, element, node, Report.INSERTING_TAG);
2064 parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
2068 /* discard unexpected tags */
2069 if (node.type == Node.EndTag)
2070 lexer.popInline( node); /* if inline end tag */
2072 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2075 if (!((element.tag.model & Dict.CM_OPT) != 0))
2076 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
2078 if ((element.tag.model & Dict.CM_OBJECT) != 0)
2080 /* pop inline stack */
2081 while (lexer.istack.size() > lexer.istackbase)
2082 lexer.popInline( null);
2083 lexer.istackbase = istackbase;
2086 Node.trimSpaces(lexer, element);
2087 Node.trimEmptyElement(lexer, element);
2092 public static class ParseTableTag implements Parser {
2094 public void parse( Lexer lexer, Node table, short mode )
2098 TagTable tt = lexer.configuration.tt;
2101 istackbase = lexer.istackbase;
2102 lexer.istackbase = lexer.istack.size();
2106 node = lexer.getToken(Lexer.IgnoreWhitespace);
2107 if (node == null) break;
2108 if (node.tag == table.tag && node.type == Node.EndTag)
2110 lexer.istackbase = istackbase;
2111 table.closed = true;
2112 Node.trimEmptyElement(lexer, table);
2116 /* deal with comments etc. */
2117 if (Node.insertMisc(table, node))
2120 /* discard unknown tags */
2121 if (node.tag == null && node.type != Node.TextNode)
2123 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2127 /* if TD or TH or text or inline or block then infer <TR> */
2129 if (node.type != Node.EndTag)
2131 if (node.tag == tt.tagTd ||
2132 node.tag == tt.tagTh ||
2133 node.tag == tt.tagTable)
2136 node = lexer.inferredTag( "tr");
2137 Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
2139 else if (node.type == Node.TextNode
2140 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2142 Node.insertNodeBeforeElement(table, node);
2143 Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2144 lexer.exiled = true;
2147 Line 2040 of parser.c (13 Jan 2000) reads as follows:
2148 if (!node->type == TextNode)
2149 This will always evaluate to false.
2150 This has been reported to Dave Raggett <dsr@w3.org>
2152 //Should be?: if (!(node.type == Node.TextNode))
2154 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2156 lexer.exiled = false;
2159 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2161 moveToHead(lexer, table, node);
2167 if this is the end tag for an ancestor element
2168 then infer end tag for this element
2170 if (node.type == Node.EndTag)
2172 if (node.tag == tt.tagForm)
2175 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2179 if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
2181 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2185 for (parent = table.parent;
2186 parent != null; parent = parent.parent)
2188 if (node.tag == parent.tag)
2190 Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
2192 lexer.istackbase = istackbase;
2193 Node.trimEmptyElement(lexer, table);
2199 if (!((node.tag.model & Dict.CM_TABLE) != 0))
2202 Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2203 lexer.istackbase = istackbase;
2204 Node.trimEmptyElement(lexer, table);
2208 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2210 Node.insertNodeAtEnd(table, node);;
2211 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2215 /* discard unexpected text nodes and end tags */
2216 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2219 Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
2220 Node.trimEmptyElement(lexer, table);
2221 lexer.istackbase = istackbase;
2226 public static class ParseColGroup implements Parser {
2228 public void parse( Lexer lexer, Node colgroup, short mode )
2231 TagTable tt = lexer.configuration.tt;
2233 if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2238 node = lexer.getToken(Lexer.IgnoreWhitespace);
2239 if (node == null) break;
2240 if (node.tag == colgroup.tag && node.type == Node.EndTag)
2242 colgroup.closed = true;
2247 if this is the end tag for an ancestor element
2248 then infer end tag for this element
2250 if (node.type == Node.EndTag)
2252 if (node.tag == tt.tagForm)
2255 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2259 for (parent = colgroup.parent;
2260 parent != null; parent = parent.parent)
2263 if (node.tag == parent.tag)
2271 if (node.type == Node.TextNode)
2277 /* deal with comments etc. */
2278 if (Node.insertMisc(colgroup, node))
2281 /* discard unknown tags */
2282 if (node.tag == null)
2284 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2288 if (node.tag != tt.tagCol)
2294 if (node.type == Node.EndTag)
2296 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2300 /* node should be <COL> */
2301 Node.insertNodeAtEnd(colgroup, node);
2302 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2308 public static class ParseRowGroup implements Parser {
2310 public void parse( Lexer lexer, Node rowgroup, short mode )
2313 TagTable tt = lexer.configuration.tt;
2315 if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2320 node = lexer.getToken(Lexer.IgnoreWhitespace);
2321 if (node == null) break;
2322 if (node.tag == rowgroup.tag)
2324 if (node.type == Node.EndTag)
2326 rowgroup.closed = true;
2327 Node.trimEmptyElement(lexer, rowgroup);
2335 /* if </table> infer end tag */
2336 if (node.tag == tt.tagTable && node.type == Node.EndTag)
2339 Node.trimEmptyElement(lexer, rowgroup);
2343 /* deal with comments etc. */
2344 if (Node.insertMisc(rowgroup, node))
2347 /* discard unknown tags */
2348 if (node.tag == null && node.type != Node.TextNode)
2350 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2355 if TD or TH then infer <TR>
2356 if text or inline or block move before table
2357 if head content move to head
2360 if (node.type != Node.EndTag)
2362 if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2365 node = lexer.inferredTag("tr");
2366 Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2368 else if (node.type == Node.TextNode
2369 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2371 Node.moveBeforeTable(rowgroup, node, tt);
2372 Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2373 lexer.exiled = true;
2375 if (node.type != Node.TextNode)
2376 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2378 lexer.exiled = false;
2381 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2383 Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2384 moveToHead(lexer, rowgroup, node);
2390 if this is the end tag for ancestor element
2391 then infer end tag for this element
2393 if (node.type == Node.EndTag)
2395 if (node.tag == tt.tagForm)
2398 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2402 if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
2404 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2408 for (parent = rowgroup.parent;
2409 parent != null; parent = parent.parent)
2411 if (node.tag == parent.tag)
2414 Node.trimEmptyElement(lexer, rowgroup);
2421 if THEAD, TFOOT or TBODY then implied end tag
2424 if ((node.tag.model & Dict.CM_ROWGRP) != 0)
2426 if (node.type != Node.EndTag)
2429 Node.trimEmptyElement(lexer, rowgroup);
2433 if (node.type == Node.EndTag)
2435 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2439 if (!(node.tag == tt.tagTr))
2441 node = lexer.inferredTag( "tr");
2442 Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2446 /* node should be <TR> */
2447 Node.insertNodeAtEnd(rowgroup, node);
2448 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2451 Node.trimEmptyElement(lexer, rowgroup);
2456 public static class ParseRow implements Parser {
2458 public void parse( Lexer lexer, Node row, short mode )
2461 boolean exclude_state;
2462 TagTable tt = lexer.configuration.tt;
2464 if ((row.tag.model & Dict.CM_EMPTY) != 0)
2469 node = lexer.getToken(Lexer.IgnoreWhitespace);
2470 if (node == null) break;
2471 if (node.tag == row.tag)
2473 if (node.type == Node.EndTag)
2476 Node.fixEmptyRow(lexer, row);
2481 Node.fixEmptyRow(lexer, row);
2486 if this is the end tag for an ancestor element
2487 then infer end tag for this element
2489 if (node.type == Node.EndTag)
2491 if (node.tag == tt.tagForm)
2494 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2498 if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2500 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2504 for (parent = row.parent;
2505 parent != null; parent = parent.parent)
2507 if (node.tag == parent.tag)
2510 Node.trimEmptyElement(lexer, row);
2516 /* deal with comments etc. */
2517 if (Node.insertMisc(row, node))
2520 /* discard unknown tags */
2521 if (node.tag == null && node.type != Node.TextNode)
2523 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2527 /* discard unexpected <table> element */
2528 if (node.tag == tt.tagTable)
2530 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2534 /* THEAD, TFOOT or TBODY */
2535 if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
2538 Node.trimEmptyElement(lexer, row);
2542 if (node.type == Node.EndTag)
2544 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2549 if text or inline or block move before table
2550 if head content move to head
2553 if (node.type != Node.EndTag)
2555 if (node.tag == tt.tagForm)
2558 node = lexer.inferredTag("td");
2559 Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
2561 else if (node.type == Node.TextNode
2562 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2564 Node.moveBeforeTable(row, node, tt);
2565 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2566 lexer.exiled = true;
2568 if (node.type != Node.TextNode)
2569 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2571 lexer.exiled = false;
2574 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2576 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2577 moveToHead(lexer, row, node);
2582 if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
2584 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2588 /* node should be <TD> or <TH> */
2589 Node.insertNodeAtEnd(row, node);
2590 exclude_state = lexer.excludeBlocks;
2591 lexer.excludeBlocks = false;
2592 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2593 lexer.excludeBlocks = exclude_state;
2595 /* pop inline stack */
2597 while (lexer.istack.size() > lexer.istackbase)
2598 lexer.popInline( null);
2601 Node.trimEmptyElement(lexer, row);
2606 public static class ParseNoFrames implements Parser {
2608 public void parse( Lexer lexer, Node noframes, short mode )
2612 TagTable tt = lexer.configuration.tt;
2614 lexer.badAccess |= Report.USING_NOFRAMES;
2615 mode = Lexer.IgnoreWhitespace;
2620 node = lexer.getToken(mode);
2621 if (node == null) break;
2622 if (node.tag == noframes.tag && node.type == Node.EndTag)
2624 noframes.closed = true;
2625 Node.trimSpaces(lexer, noframes);
2629 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
2631 Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
2632 Node.trimSpaces(lexer, noframes);
2637 if (node.tag == tt.tagHtml)
2639 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2640 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2645 /* deal with comments etc. */
2646 if (Node.insertMisc(noframes, node))
2649 if (node.tag == tt.tagBody && node.type == Node.StartTag)
2651 Node.insertNodeAtEnd(noframes, node);
2652 parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2656 /* implicit body element inferred */
2657 if (node.type == Node.TextNode || node.tag != null)
2660 node = lexer.inferredTag("body");
2661 if (lexer.configuration.XmlOut)
2662 Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
2663 Node.insertNodeAtEnd(noframes, node);
2664 parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2667 /* discard unexpected end tags */
2668 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2671 Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
2676 public static class ParseSelect implements Parser {
2678 public void parse( Lexer lexer, Node field, short mode )
2681 TagTable tt = lexer.configuration.tt;
2683 lexer.insert = -1; /* defer implicit inline start tags */
2687 node = lexer.getToken(Lexer.IgnoreWhitespace);
2688 if (node == null) break;
2689 if (node.tag == field.tag && node.type == Node.EndTag)
2691 field.closed = true;
2692 Node.trimSpaces(lexer, field);
2696 /* deal with comments etc. */
2697 if (Node.insertMisc(field, node))
2700 if (node.type == Node.StartTag &&
2701 (node.tag == tt.tagOption ||
2702 node.tag == tt.tagOptgroup ||
2703 node.tag == tt.tagScript))
2705 Node.insertNodeAtEnd(field, node);
2706 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2710 /* discard unexpected tags */
2711 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2714 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2719 public static class ParseText implements Parser {
2721 public void parse( Lexer lexer, Node field, short mode )
2724 TagTable tt = lexer.configuration.tt;
2726 lexer.insert = -1; /* defer implicit inline start tags */
2728 if (field.tag == tt.tagTextarea)
2729 mode = Lexer.Preformatted;
2733 node = lexer.getToken(mode);
2734 if (node == null) break;
2735 if (node.tag == field.tag && node.type == Node.EndTag)
2737 field.closed = true;
2738 Node.trimSpaces(lexer, field);
2742 /* deal with comments etc. */
2743 if (Node.insertMisc(field, node))
2746 if (node.type == Node.TextNode)
2748 /* only called for 1st child */
2749 if (field.content == null && !((mode & Lexer.Preformatted) != 0))
2750 Node.trimSpaces(lexer, field);
2752 if (node.start >= node.end)
2757 Node.insertNodeAtEnd(field, node);
2761 if (node.tag == tt.tagFont)
2763 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2767 /* terminate element on other tags */
2768 if (!((field.tag.model & Dict.CM_OPT) != 0))
2769 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
2772 Node.trimSpaces(lexer, field);
2776 if (!((field.tag.model & Dict.CM_OPT) != 0))
2777 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2782 public static class ParseOptGroup implements Parser {
2784 public void parse( Lexer lexer, Node field, short mode )
2787 TagTable tt = lexer.configuration.tt;
2789 lexer.insert = -1; /* defer implicit inline start tags */
2793 node = lexer.getToken(Lexer.IgnoreWhitespace);
2794 if (node == null) break;
2795 if (node.tag == field.tag && node.type == Node.EndTag)
2797 field.closed = true;
2798 Node.trimSpaces(lexer, field);
2802 /* deal with comments etc. */
2803 if (Node.insertMisc(field, node))
2806 if (node.type == Node.StartTag &&
2807 (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
2809 if (node.tag == tt.tagOptgroup)
2810 Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
2812 Node.insertNodeAtEnd(field, node);
2813 parseTag(lexer, node, Lexer.MixedContent);
2817 /* discard unexpected tags */
2818 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2824 public static Parser getParseHTML()
2829 public static Parser getParseHead()
2834 public static Parser getParseTitle()
2839 public static Parser getParseScript()
2841 return _parseScript;
2844 public static Parser getParseBody()
2849 public static Parser getParseFrameSet()
2851 return _parseFrameSet;
2854 public static Parser getParseInline()
2856 return _parseInline;
2859 public static Parser getParseList()
2864 public static Parser getParseDefList()
2866 return _parseDefList;
2869 public static Parser getParsePre()
2874 public static Parser getParseBlock()
2879 public static Parser getParseTableTag()
2881 return _parseTableTag;
2884 public static Parser getParseColGroup()
2886 return _parseColGroup;
2889 public static Parser getParseRowGroup()
2891 return _parseRowGroup;
2894 public static Parser getParseRow()
2899 public static Parser getParseNoFrames()
2901 return _parseNoFrames;
2904 public static Parser getParseSelect()
2906 return _parseSelect;
2909 public static Parser getParseText()
2914 public static Parser getParseOptGroup()
2916 return _parseOptGroup;
2920 private static Parser _parseHTML = new ParseHTML();
2921 private static Parser _parseHead = new ParseHead();
2922 private static Parser _parseTitle = new ParseTitle();
2923 private static Parser _parseScript = new ParseScript();
2924 private static Parser _parseBody = new ParseBody();
2925 private static Parser _parseFrameSet = new ParseFrameSet();
2926 private static Parser _parseInline = new ParseInline();
2927 private static Parser _parseList = new ParseList();
2928 private static Parser _parseDefList = new ParseDefList();
2929 private static Parser _parsePre = new ParsePre();
2930 private static Parser _parseBlock = new ParseBlock();
2931 private static Parser _parseTableTag = new ParseTableTag();
2932 private static Parser _parseColGroup = new ParseColGroup();
2933 private static Parser _parseRowGroup = new ParseRowGroup();
2934 private static Parser _parseRow = new ParseRow();
2935 private static Parser _parseNoFrames = new ParseNoFrames();
2936 private static Parser _parseSelect = new ParseSelect();
2937 private static Parser _parseText = new ParseText();
2938 private static Parser _parseOptGroup = new ParseOptGroup();
2941 HTML is the top level element
2943 public static Node parseDocument(Lexer lexer)
2945 Node node, document, html;
2946 Node doctype = null;
2947 TagTable tt = lexer.configuration.tt;
2949 document = lexer.newNode();
2950 document.type = Node.RootNode;
2954 node = lexer.getToken(Lexer.IgnoreWhitespace);
2955 if (node == null) break;
2957 /* deal with comments etc. */
2958 if (Node.insertMisc(document, node))
2961 if (node.type == Node.DocTypeTag)
2963 if (doctype == null)
2965 Node.insertNodeAtEnd(document, node);
2969 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
2973 if (node.type == Node.EndTag)
2975 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
2979 if (node.type != Node.StartTag || node.tag != tt.tagHtml)
2982 html = lexer.inferredTag("html");
2987 Node.insertNodeAtEnd(document, html);
2988 getParseHTML().parse(lexer, html, (short)0); // TODO?
2996 * Indicates whether or not whitespace should be preserved for this element.
2997 * If an <code>xml:space</code> attribute is found, then if the attribute value is
2998 * <code>preserve</code>, returns <code>true</code>. For any other value, returns
2999 * <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
3000 * found, then the following element names result in a return value of <code>true:
3001 * pre, script, style,</code> and <code>xsl:text</code>. Finally, if a
3002 * <code>TagTable</code> was passed in and the element appears as the "pre" element
3003 * in the <code>TagTable</code>, then <code>true</code> will be returned.
3004 * Otherwise, <code>false</code> is returned.
3005 * @param element The <code>Node</code> to test to see if whitespace should be
3007 * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
3008 * function. This may be <code>null</code>, in which case this test
3010 * @return <code>true</code> or <code>false</code>, as explained above.
3013 public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
3017 /* search attributes for xml:space */
3018 for (attribute = element.attributes; attribute != null; attribute = attribute.next)
3020 if (attribute.attribute.equals("xml:space"))
3022 if (attribute.value.equals("preserve"))
3029 /* kludge for html docs without explicit xml:space attribute */
3030 if (Lexer.wstrcasecmp(element.element, "pre") == 0
3031 || Lexer.wstrcasecmp(element.element, "script") == 0
3032 || Lexer.wstrcasecmp(element.element, "style") == 0)
3035 if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
3038 /* kludge for XSL docs */
3039 if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3048 public static void parseXMLElement(Lexer lexer, Node element, short mode)
3052 /* Jeff Young's kludge for XSL docs */
3054 if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3057 /* if node is pre or has xml:space="preserve" then do so */
3059 if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
3060 mode = Lexer.Preformatted;
3064 node = lexer.getToken(mode);
3065 if (node == null) break;
3066 if (node.type == Node.EndTag && node.element.equals(element.element))
3068 element.closed = true;
3072 /* discard unexpected end tags */
3073 if (node.type == Node.EndTag)
3075 Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
3079 /* parse content on seeing start tag */
3080 if (node.type == Node.StartTag)
3081 parseXMLElement(lexer, node, mode);
3083 Node.insertNodeAtEnd(element, node);
3087 if first child is text then trim initial space and
3088 delete text node if it is empty.
3091 node = element.content;
3093 if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3095 if (node.textarray[node.start] == (byte)' ')
3099 if (node.start >= node.end)
3100 Node.discardElement(node);
3105 if last child is text then trim final space and
3106 delete the text node if it is empty
3109 node = element.last;
3111 if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3113 if (node.textarray[node.end - 1] == (byte)' ')
3117 if (node.start >= node.end)
3118 Node.discardElement(node);
3123 public static Node parseXMLDocument(Lexer lexer)
3125 Node node, document, doctype;
3127 document = lexer.newNode();
3128 document.type = Node.RootNode;
3130 lexer.configuration.XmlTags = true;
3134 node = lexer.getToken(Lexer.IgnoreWhitespace);
3135 if (node == null) break;
3136 /* discard unexpected end tags */
3137 if (node.type == Node.EndTag)
3139 Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
3143 /* deal with comments etc. */
3144 if (Node.insertMisc(document, node))
3147 if (node.type == Node.DocTypeTag)
3149 if (doctype == null)
3151 Node.insertNodeAtEnd(document, node);
3155 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
3159 /* if start tag then parse element's content */
3160 if (node.type == Node.StartTag)
3162 Node.insertNodeAtEnd(document, node);
3163 parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
3168 if (false) { //#if 0
3169 /* discard the document type */
3170 node = document.findDocType();
3173 Node.discardElement(node);
3176 if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
3177 Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
3179 /* ensure presence of initial <?XML version="1.0"?> */
3180 if (lexer.configuration.XmlPi)
3181 lexer.fixXMLPI(document);
3186 public static boolean isJavaScript(Node node)
3188 boolean result = false;
3191 if (node.attributes == null)
3194 for (attr = node.attributes; attr != null; attr = attr.next)
3196 if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
3197 || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
3198 && Lexer.wsubstr(attr.value, "javascript"))