intial version
[phpeclipse.git] / archive / net.sourceforge.phpeclipse.jtidy / src / net / sourceforge / phpdt / tidy / w3c / ParserImpl.java
1 /*
2  * @(#)ParserImpl.java   1.11 2000/08/16
3  *
4  */
5
6 package net.sourceforge.phpdt.tidy.w3c;
7
8 /**
9  *
10  * HTML Parser implementation
11  *
12  * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13  * See Tidy.java for the copyright notice.
14  * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15  * HTML Tidy Release 4 Aug 2000</a>
16  *
17  * @author  Dave Raggett <dsr@w3.org>
18  * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19  * @version 1.0, 1999/05/22
20  * @version 1.0.1, 1999/05/29
21  * @version 1.1, 1999/06/18 Java Bean
22  * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23  * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24  * @version 1.4, 1999/09/04 DOM support
25  * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26  * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27  * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28  * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29  * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30  * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31  * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
32  */
33
34 public class ParserImpl {
35
36     //private static int SeenBodyEndTag;  /* AQ: moved into lexer structure */
37
38     private static void parseTag(Lexer lexer, Node node, short mode)
39     {
40         // Local fix by GLP 2000-12-21.  Need to reset insertspace if this 
41         // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
42         // Remove this code once the fix is made in Tidy.
43
44 /******  (Original code follows)
45         if ((node.tag.model & Dict.CM_EMPTY) != 0)
46         {
47             lexer.waswhite = false;
48             return;
49         }
50         else if (!((node.tag.model & Dict.CM_INLINE) != 0))
51             lexer.insertspace = false;
52 *******/
53
54         if (!((node.tag.model & Dict.CM_INLINE) != 0))
55             lexer.insertspace = false;
56
57         if ((node.tag.model & Dict.CM_EMPTY) != 0)
58         {
59             lexer.waswhite = false;
60             return;
61         }
62
63         if (node.tag.parser == null || node.type == Node.StartEndTag)
64             return;
65
66         node.tag.parser.parse(lexer, node, mode);
67     }
68
69     private static void moveToHead(Lexer lexer, Node element, Node node)
70     {
71         Node head;
72         TagTable tt = lexer.configuration.tt;
73
74
75         if (node.type == Node.StartTag || node.type == Node.StartEndTag)
76         {
77             Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
78
79             while (element.tag != tt.tagHtml)
80                 element = element.parent;
81
82             for (head = element.content; head != null; head = head.next)
83             {
84                 if (head.tag == tt.tagHead)
85                 {
86                     Node.insertNodeAtEnd(head, node);
87                     break;
88                 }
89             }
90
91             if (node.tag.parser != null)
92                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
93         }
94         else
95         {
96             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
97         }
98     }
99
100     public static class ParseHTML implements Parser {
101
102         public void parse( Lexer lexer, Node html, short mode )
103         {
104             Node node, head;
105             Node frameset = null;
106             Node noframes = null;
107
108             lexer.configuration.XmlTags = false;
109             lexer.seenBodyEndTag = 0;
110             TagTable tt = lexer.configuration.tt;
111
112             for (;;)
113             {
114                 node = lexer.getToken(Lexer.IgnoreWhitespace);
115
116                 if (node == null)
117                 {
118                     node = lexer.inferredTag("head");
119                     break;
120                 }
121
122                 if (node.tag == tt.tagHead)
123                     break;
124
125                 if (node.tag == html.tag && node.type == Node.EndTag)
126                 {
127                     Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
128                     continue;
129                 }
130
131                 /* deal with comments etc. */
132                 if (Node.insertMisc(html, node))
133                     continue;
134
135                 lexer.ungetToken();
136                 node = lexer.inferredTag("head");
137                 break;
138             }
139
140             head = node;
141             Node.insertNodeAtEnd(html, head);
142             getParseHead().parse(lexer, head, mode);
143
144             for (;;)
145             {
146                 node = lexer.getToken(Lexer.IgnoreWhitespace);
147
148                 if (node == null)
149                 {
150                     if (frameset == null) /* create an empty body */
151                         node = lexer.inferredTag("body");
152
153                     return;
154                 }
155
156                 /* robustly handle html tags */
157                 if (node.tag == html.tag)
158                 {
159                     if (node.type != Node.StartTag && frameset == null)
160                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
161
162                     continue;
163                 }
164
165                 /* deal with comments etc. */
166                 if (Node.insertMisc(html, node))
167                     continue;
168
169                 /* if frameset document coerce <body> to <noframes> */
170                 if (node.tag == tt.tagBody)
171                 {
172                     if (node.type != Node.StartTag)
173                     {
174                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
175                         continue;
176                     }
177
178                     if (frameset != null)
179                     {
180                         lexer.ungetToken();
181
182                         if (noframes == null)
183                         {
184                             noframes = lexer.inferredTag("noframes");
185                             Node.insertNodeAtEnd(frameset, noframes);
186                             Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
187                         }
188
189                         parseTag(lexer, noframes, mode);
190                         continue;
191                     }
192
193                     break;  /* to parse body */
194                 }
195
196                 /* flag an error if we see more than one frameset */
197                 if (node.tag == tt.tagFrameset)
198                 {
199                     if (node.type != Node.StartTag)
200                     {
201                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
202                         continue;
203                     }
204
205                     if (frameset != null)
206                         Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
207                     else
208                         frameset = node;
209
210                     Node.insertNodeAtEnd(html, node);
211                     parseTag(lexer, node, mode);
212
213                     /*
214                       see if it includes a noframes element so
215                       that we can merge subsequent noframes elements
216                     */
217
218                     for (node = frameset.content; node != null; node = node.next)
219                     {
220                         if (node.tag == tt.tagNoframes)
221                             noframes = node;
222                     }
223                     continue;
224                 }
225
226                 /* if not a frameset document coerce <noframes> to <body> */
227                 if (node.tag == tt.tagNoframes)
228                 {
229                     if (node.type != Node.StartTag)
230                     {
231                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
232                         continue;
233                     }
234
235                     if (frameset == null)
236                     {
237                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
238                         node = lexer.inferredTag("body");
239                         break;
240                     }
241
242                     if (noframes == null)
243                     {
244                         noframes = node;
245                         Node.insertNodeAtEnd(frameset, noframes);
246                     }
247
248                     parseTag(lexer, noframes, mode);
249                     continue;
250                 }
251
252                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
253                 {
254                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
255                     {
256                         moveToHead(lexer, html, node);
257                         continue;
258                     }
259                 }
260
261                 lexer.ungetToken();
262
263                 /* insert other content into noframes element */
264
265                 if (frameset != null)
266                 {
267                     if (noframes == null)
268                     {
269                         noframes = lexer.inferredTag("noframes");
270                         Node.insertNodeAtEnd(frameset, noframes);
271                     }
272                     else
273                         Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
274
275                     parseTag(lexer, noframes, mode);
276                     continue;
277                 }
278
279                 node = lexer.inferredTag("body");
280                 break;
281             }
282
283             /* node must be body */
284
285             Node.insertNodeAtEnd(html, node);
286             parseTag(lexer, node, mode);
287         }
288
289     };
290
291     public static class ParseHead implements Parser {
292
293         public void parse( Lexer lexer, Node head, short mode )
294         {
295             Node node;
296             int HasTitle = 0;
297             int HasBase = 0;
298             TagTable tt = lexer.configuration.tt;
299
300             while (true)
301             {
302                 node = lexer.getToken(Lexer.IgnoreWhitespace);
303                 if (node == null) break;
304                 if (node.tag == head.tag && node.type == Node.EndTag)
305                 {
306                     head.closed = true;
307                     break;
308                 }
309
310                 if (node.type == Node.TextNode)
311                 {
312                     lexer.ungetToken();
313                     break;
314                 }
315
316                 /* deal with comments etc. */
317                 if (Node.insertMisc(head, node))
318                     continue;
319
320                 if (node.type == Node.DocTypeTag)
321                 {
322                     Node.insertDocType(lexer, head, node);
323                     continue;
324                 }
325
326                 /* discard unknown tags */
327                 if (node.tag == null)
328                 {
329                     Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
330                     continue;
331                 }
332         
333                 if (!((node.tag.model & Dict.CM_HEAD) != 0))
334                 {
335                     lexer.ungetToken();
336                     break;
337                 }
338
339                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
340                 {
341                     if (node.tag == tt.tagTitle)
342                     {
343                         ++HasTitle;
344
345                         if (HasTitle > 1)
346                             Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
347                     }
348                     else if (node.tag == tt.tagBase)
349                     {
350                         ++HasBase;
351
352                         if (HasBase > 1)
353                             Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
354                     }
355                     else if (node.tag == tt.tagNoscript)
356                         Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
357
358                     Node.insertNodeAtEnd(head, node);
359                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
360                     continue;
361                 }
362
363                 /* discard unexpected text nodes and end tags */
364                 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
365             }
366
367             if (HasTitle == 0)
368             {
369                 Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
370                 Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
371             }
372         }
373
374     };
375
376     public static class ParseTitle implements Parser {
377
378         public void parse( Lexer lexer, Node title, short mode )
379         {
380             Node node;
381
382             while (true)
383             {
384                 node = lexer.getToken(Lexer.MixedContent);
385                 if (node == null) break;
386                 if (node.tag == title.tag && node.type == Node.EndTag)
387                 {
388                     title.closed = true;
389                     Node.trimSpaces(lexer, title);
390                     return;
391                 }
392
393                 if (node.type == Node.TextNode)
394                 {
395                     /* only called for 1st child */
396                     if (title.content == null)
397                         Node.trimInitialSpace(lexer, title, node);
398
399                     if (node.start >= node.end)
400                     {
401                         continue;
402                     }
403
404                     Node.insertNodeAtEnd(title, node);
405                     continue;
406                 }
407
408                 /* deal with comments etc. */
409                 if (Node.insertMisc(title, node))
410                     continue;
411
412                 /* discard unknown tags */
413                 if (node.tag == null)
414                 {
415                     Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
416                     continue;
417                 }
418
419                 /* pushback unexpected tokens */
420                 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
421                 lexer.ungetToken();
422                 Node.trimSpaces(lexer, title);
423                 return;
424             }
425
426             Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
427         }
428
429     };
430
431     public static class ParseScript implements Parser {
432
433         public void parse( Lexer lexer, Node script, short mode )
434         {
435         /*
436           This isn't quite right for CDATA content as it recognises
437           tags within the content and parses them accordingly.
438           This will unfortunately screw up scripts which include
439           < + letter,  < + !, < + ?  or  < + / + letter
440         */
441
442             Node node;
443
444             node = lexer.getCDATA( script);
445
446             if (node != null)
447                 Node.insertNodeAtEnd(script, node);
448         }
449
450     };
451
452     public static class ParseBody implements Parser {
453
454         public void parse( Lexer lexer, Node body, short mode )
455         {
456             Node node;
457             boolean checkstack, iswhitenode;
458
459             mode = Lexer.IgnoreWhitespace;
460             checkstack = true;
461             TagTable tt = lexer.configuration.tt;
462
463             while (true)
464             {
465                 node = lexer.getToken(mode);
466                 if (node == null) break;
467                 if (node.tag == body.tag && node.type == Node.EndTag)
468                 {
469                     body.closed = true;
470                     Node.trimSpaces(lexer, body);
471                     lexer.seenBodyEndTag = 1;
472                     mode = Lexer.IgnoreWhitespace;
473
474                     if (body.parent.tag == tt.tagNoframes)
475                         break;
476
477                     continue;
478                 }
479         
480                 if (node.tag == tt.tagNoframes)
481                 {
482                     if (node.type == Node.StartTag)
483                     {
484                         Node.insertNodeAtEnd(body, node);
485                         getParseBlock().parse(lexer, node, mode);
486                         continue;
487                     }
488
489                     if (node.type == Node.EndTag &&
490                         body.parent.tag == tt.tagNoframes)
491                     {
492                         Node.trimSpaces(lexer, body);
493                         lexer.ungetToken();
494                         break;
495                     }
496                 }
497
498                 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
499                     && body.parent.tag == tt.tagNoframes)
500                 {
501                     Node.trimSpaces(lexer, body);
502                     lexer.ungetToken();
503                     break;
504                 }
505         
506                 if (node.tag == tt.tagHtml)
507                 {
508                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
509                         Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
510
511                     continue;
512                 }
513
514                 iswhitenode = false;
515
516                 if (node.type == Node.TextNode &&
517                        node.end <= node.start + 1 &&
518                        node.textarray[node.start] == (byte)' ')
519                     iswhitenode = true;
520
521                 /* deal with comments etc. */
522                 if (Node.insertMisc(body, node))
523                     continue;
524
525                 if (lexer.seenBodyEndTag == 1 && !iswhitenode)
526                 {
527                     ++lexer.seenBodyEndTag;
528                     Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
529                 }
530
531                 /* mixed content model permits text */
532                 if (node.type == Node.TextNode)
533                 {
534                     if (iswhitenode && mode == Lexer.IgnoreWhitespace)
535                     {
536                         continue;
537                     }
538
539                     if (lexer.configuration.EncloseBodyText && !iswhitenode)
540                     {
541                         Node para;
542                 
543                         lexer.ungetToken();
544                         para = lexer.inferredTag("p");
545                         Node.insertNodeAtEnd(body, para);
546                         parseTag(lexer, para, mode);
547                         mode = Lexer.MixedContent;
548                         continue;
549                     }
550                     else /* strict doesn't allow text here */
551                         lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
552
553                     if (checkstack)
554                     {
555                         checkstack = false;
556
557                         if (lexer.inlineDup( node) > 0)
558                             continue;
559                     }
560
561                     Node.insertNodeAtEnd(body, node);
562                     mode = Lexer.MixedContent;
563                     continue;
564                 }
565
566                 if (node.type == Node.DocTypeTag)
567                 {
568                     Node.insertDocType(lexer, body, node);
569                     continue;
570                 }
571                 /* discard unknown  and PARAM tags */
572                 if (node.tag == null || node.tag == tt.tagParam)
573                 {
574                         //TODO: message Fix...
575                     //Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
576                     continue;
577                 }
578
579                 /*
580                   Netscape allows LI and DD directly in BODY
581                   We infer UL or DL respectively and use this
582                   boolean to exclude block-level elements so as
583                   to match Netscape's observed behaviour.
584                 */
585                 lexer.excludeBlocks = false;
586         
587                 if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
588                     !((node.tag.model & Dict.CM_INLINE) != 0))
589                 {
590                     /* avoid this error message being issued twice */
591                     if (!((node.tag.model & Dict.CM_HEAD) != 0))
592                         Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
593
594                     if ((node.tag.model & Dict.CM_HTML) != 0)
595                     {
596                         /* copy body attributes if current body was inferred */
597                         if (node.tag == tt.tagBody && body.implicit 
598                                             && body.attributes == null)
599                         {
600                             body.attributes = node.attributes;
601                             node.attributes = null;
602                         }
603
604                         continue;
605                     }
606
607                     if ((node.tag.model & Dict.CM_HEAD) != 0)
608                     {
609                         moveToHead(lexer, body, node);
610                         continue;
611                     }
612
613                     if ((node.tag.model & Dict.CM_LIST) != 0)
614                     {
615                         lexer.ungetToken();
616                         node = lexer.inferredTag( "ul");
617                         Node.addClass(node, "noindent");
618                         lexer.excludeBlocks = true;
619                     }
620                     else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
621                     {
622                         lexer.ungetToken();
623                         node = lexer.inferredTag( "dl");
624                         lexer.excludeBlocks = true;
625                     }
626                     else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
627                     {
628                         lexer.ungetToken();
629                         node = lexer.inferredTag( "table");
630                         lexer.excludeBlocks = true;
631                     }
632                     else
633                     {
634                         /* AQ: The following line is from the official C
635                            version of tidy.  It doesn't make sense to me
636                            because the '!' operator has higher precedence
637                            than the '&' operator.  It seems to me that the
638                            expression always evaluates to 0.
639
640                            if (!node->tag->model & (CM_ROW | CM_FIELD))
641
642                            AQ: 13Jan2000 fixed in C tidy
643                         */
644                         if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
645                         {
646                             lexer.ungetToken();
647                             return;
648                         }
649
650                         /* ignore </td> </th> <option> etc. */
651                         continue;
652                     }
653                 }
654
655                 if (node.type == Node.EndTag)
656                 {
657                     if (node.tag == tt.tagBr)
658                         node.type = Node.StartTag;
659                     else if (node.tag == tt.tagP)
660                     {
661                         Node.coerceNode(lexer, node, tt.tagBr);
662                         Node.insertNodeAtEnd(body, node);
663                         node = lexer.inferredTag("br");
664                     }
665                     else if ((node.tag.model & Dict.CM_INLINE) != 0)
666                         lexer.popInline(node);
667                 }
668
669                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
670                 {
671                     if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
672                     {
673                         /* HTML4 strict doesn't allow inline content here */
674                         /* but HTML2 does allow img elements as children of body */
675                         if (node.tag == tt.tagImg)
676                             lexer.versions &= ~Dict.VERS_HTML40_STRICT;
677                         else
678                             lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
679
680                         if (checkstack && !node.implicit)
681                         {
682                             checkstack = false;
683
684                             if (lexer.inlineDup( node) > 0)
685                                 continue;
686                         }
687
688                         mode = Lexer.MixedContent;
689                     }
690                     else
691                     {
692                         checkstack = true;
693                         mode = Lexer.IgnoreWhitespace;
694                     }
695
696                     if (node.implicit)
697                         Report.warning(lexer, body, node, Report.INSERTING_TAG);
698
699                     Node.insertNodeAtEnd(body, node);
700                     parseTag(lexer, node, mode);
701                     continue;
702                 }
703
704                 /* discard unexpected tags */
705                 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
706             }
707         }
708
709     };
710
711     public static class ParseFrameSet implements Parser {
712
713         public void parse( Lexer lexer, Node frameset, short mode )
714         {
715             Node node;
716             TagTable tt = lexer.configuration.tt;
717
718             lexer.badAccess |=  Report.USING_FRAMES;
719
720             while (true)
721             {
722                 node = lexer.getToken(Lexer.IgnoreWhitespace);
723                 if (node == null) break;
724                 if (node.tag == frameset.tag && node.type == Node.EndTag)
725                 {
726                     frameset.closed = true;
727                     Node.trimSpaces(lexer, frameset);
728                     return;
729                 }
730
731                 /* deal with comments etc. */
732                 if (Node.insertMisc(frameset, node))
733                     continue;
734
735                 if (node.tag == null)
736                 {
737                     Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
738                     continue; 
739                 }
740
741                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
742                 {
743                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
744                     {
745                         moveToHead(lexer, frameset, node);
746                         continue;
747                     }
748                 }
749
750                 if (node.tag == tt.tagBody)
751                 {
752                     lexer.ungetToken();
753                     node = lexer.inferredTag("noframes");
754                     Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
755                 }
756
757                 if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
758                 {
759                     Node.insertNodeAtEnd(frameset, node);
760                     lexer.excludeBlocks = false;
761                     parseTag(lexer, node, Lexer.MixedContent);
762                     continue;
763                 }
764                 else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
765                 {
766                     Node.insertNodeAtEnd(frameset, node);
767                     continue;
768                 }
769
770                 /* discard unexpected tags */
771                 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
772             }
773
774             Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
775         }
776
777     };
778
779     public static class ParseInline implements Parser {
780
781         public void parse( Lexer lexer, Node element, short mode )
782         {
783             Node node, parent;
784             TagTable tt = lexer.configuration.tt;
785
786             if ((element.tag.model & Dict.CM_EMPTY) != 0)
787                 return;
788
789             if (element.tag == tt.tagA)
790             {
791                 if (element.attributes == null)
792                 {
793                     Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
794                     Node.discardElement(element);
795                     return;
796                 }
797             }
798
799             /*
800              ParseInline is used for some block level elements like H1 to H6
801              For such elements we need to insert inline emphasis tags currently
802              on the inline stack. For Inline elements, we normally push them
803              onto the inline stack provided they aren't implicit or OBJECT/APPLET.
804              This test is carried out in PushInline and PopInline, see istack.c
805              We don't push A or SPAN to replicate current browser behavior
806             */
807             if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
808                 lexer.inlineDup( null);
809             else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
810                         element.tag != tt.tagA && element.tag != tt.tagSpan)
811                 lexer.pushInline( element);
812
813             if (element.tag == tt.tagNobr)
814                 lexer.badLayout |= Report.USING_NOBR;
815             else if (element.tag == tt.tagFont)
816                 lexer.badLayout |= Report.USING_FONT;
817
818             /* Inline elements may or may not be within a preformatted element */
819             if (mode != Lexer.Preformatted)
820                 mode = Lexer.MixedContent;
821
822             while (true)
823             {
824                 node = lexer.getToken(mode);
825                 if (node == null) break;
826                 /* end tag for current element */
827                 if (node.tag == element.tag && node.type == Node.EndTag)
828                 {
829                     if ((element.tag.model & Dict.CM_INLINE) != 0 &&
830                         element.tag != tt.tagA)
831                         lexer.popInline( node);
832
833                     if (!((mode & Lexer.Preformatted) != 0))
834                         Node.trimSpaces(lexer, element);
835                     /*
836                      if a font element wraps an anchor and nothing else
837                      then move the font element inside the anchor since
838                      otherwise it won't alter the anchor text color
839                     */
840                     if (element.tag == tt.tagFont &&
841                         element.content != null &&
842                         element.content == element.last)
843                     {
844                         Node child = element.content;
845
846                         if (child.tag == tt.tagA)
847                         {
848                             child.parent = element.parent;
849                             child.next = element.next;
850                             child.prev = element.prev;
851
852                             if (child.prev != null)
853                                 child.prev.next = child;
854                             else
855                                 child.parent.content = child;
856
857                             if (child.next != null)
858                                 child.next.prev = child;
859                             else
860                                 child.parent.last = child;
861
862                             element.next = null;
863                             element.prev = null;
864                             element.parent = child;
865                             element.content = child.content;
866                             element.last = child.last;
867                             child.content = element;
868                             child.last = element;
869                             for (child = element.content; child != null; child = child.next)
870                                 child.parent = element;
871                         }
872                     }
873                     element.closed = true;
874                     Node.trimSpaces(lexer, element);
875                     Node.trimEmptyElement(lexer, element);
876                     return;
877                 }
878
879                 /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
880                 /* otherwise emphasis nesting is probably unintentional */
881                 /* big and small have cumulative effect to leave them alone */
882                 if (node.type == Node.StartTag
883                         && node.tag == element.tag
884                         && lexer.isPushed(node)
885                         && !node.implicit
886                         && !element.implicit
887                         && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
888                         && node.tag != tt.tagA
889                         && node.tag != tt.tagFont
890                         && node.tag != tt.tagBig
891                         && node.tag != tt.tagSmall)
892                 {
893                     if (element.content != null && node.attributes == null)
894                     {
895                         Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
896                         node.type = Node.EndTag;
897                         lexer.ungetToken();
898                         continue;
899                     }
900
901                     Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
902                 }
903
904                 if (node.type == Node.TextNode)
905                 {
906                     /* only called for 1st child */
907                     if (element.content == null &&
908                         !((mode & Lexer.Preformatted) != 0))
909                         Node.trimSpaces(lexer, element);
910
911                     if (node.start >= node.end)
912                     {
913                         continue;
914                     }
915
916                     Node.insertNodeAtEnd(element, node);
917                     continue;
918                 }
919
920                 /* mixed content model so allow text */
921                 if (Node.insertMisc(element, node))
922                     continue;
923
924                 /* deal with HTML tags */
925                 if (node.tag == tt.tagHtml)
926                 {
927                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
928                     {
929                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
930                         continue;
931                     }
932
933                     /* otherwise infer end of inline element */
934                     lexer.ungetToken();
935                     if (!((mode & Lexer.Preformatted) != 0))
936                         Node.trimSpaces(lexer, element);
937                     Node.trimEmptyElement(lexer, element);
938                     return;
939                 }
940
941                 /* within <dt> or <pre> map <p> to <br> */
942                 if (node.tag == tt.tagP &&
943                       node.type == Node.StartTag &&
944                       ((mode & Lexer.Preformatted) != 0 ||
945                        element.tag == tt.tagDt ||
946                       element.isDescendantOf(tt.tagDt)))
947                 {
948                     node.tag = tt.tagBr;
949                     node.element = "br";
950                     Node.trimSpaces(lexer, element);
951                     Node.insertNodeAtEnd(element, node);
952                     continue;
953                 }
954
955                 /* ignore unknown and PARAM tags */
956                 if (node.tag == null || node.tag == tt.tagParam)
957                 {
958                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
959                     continue;
960                 }
961
962                 if (node.tag == tt.tagBr && node.type == Node.EndTag)
963                     node.type = Node.StartTag;
964
965                 if (node.type == Node.EndTag)
966                 {
967                     /* coerce </br> to <br> */
968                     if (node.tag == tt.tagBr)
969                         node.type = Node.StartTag;
970                     else if (node.tag == tt.tagP)
971                     {
972                         /* coerce unmatched </p> to <br><br> */
973                         if (!element.isDescendantOf(tt.tagP))
974                         {
975                             Node.coerceNode(lexer, node, tt.tagBr);
976                             Node.trimSpaces(lexer, element);
977                             Node.insertNodeAtEnd(element, node);
978                             node = lexer.inferredTag("br");
979                             continue;
980                         }
981                     }
982                     else if ((node.tag.model & Dict.CM_INLINE) != 0
983                                 && node.tag != tt.tagA
984                                         && !((node.tag.model & Dict.CM_OBJECT) != 0)
985                                         && (element.tag.model & Dict.CM_INLINE) != 0)
986                     {
987                         /* allow any inline end tag to end current element */
988                         lexer.popInline( element);
989
990                         if (element.tag != tt.tagA)
991                         {
992                             if (node.tag == tt.tagA && node.tag != element.tag)
993                             {
994                                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
995                                lexer.ungetToken();
996                             }
997                             else
998                             {
999                                 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1000                             }
1001
1002                             if (!((mode & Lexer.Preformatted) != 0))
1003                                 Node.trimSpaces(lexer, element);
1004                             Node.trimEmptyElement(lexer, element);
1005                             return;
1006                         }
1007
1008                         /* if parent is <a> then discard unexpected inline end tag */
1009                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1010                         continue;
1011                     }  /* special case </tr> etc. for stuff moved in front of table */
1012                     else if (lexer.exiled
1013                                 && node.tag.model != 0
1014                                 && (node.tag.model & Dict.CM_TABLE) != 0)
1015                     {
1016                         lexer.ungetToken();
1017                         Node.trimSpaces(lexer, element);
1018                         Node.trimEmptyElement(lexer, element);
1019                         return;
1020                     }
1021                 }
1022
1023                 /* allow any header tag to end current header */
1024                 if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
1025                 {
1026                     if (node.tag == element.tag)
1027                     {
1028                         Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1029                     }
1030                     else
1031                     {
1032                         Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1033                         lexer.ungetToken();
1034                     }
1035                     if (!((mode & Lexer.Preformatted) != 0))
1036                         Node.trimSpaces(lexer, element);
1037                     Node.trimEmptyElement(lexer, element);
1038                     return;
1039                 }
1040
1041                 /*
1042                    an <A> tag to ends any open <A> element
1043                    but <A href=...> is mapped to </A><A href=...>
1044                 */
1045                 if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
1046                 {
1047                  /* coerce <a> to </a> unless it has some attributes */
1048                     if (node.attributes == null)
1049                     {
1050                         node.type = Node.EndTag;
1051                         Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1052                         lexer.popInline( node);
1053                         lexer.ungetToken();
1054                         continue;
1055                     }
1056
1057                     lexer.ungetToken();
1058                     Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1059                     lexer.popInline( element);
1060                     if (!((mode & Lexer.Preformatted) != 0))
1061                         Node.trimSpaces(lexer, element);
1062                     Node.trimEmptyElement(lexer, element);
1063                     return;
1064                 }
1065
1066                 if ((element.tag.model & Dict.CM_HEADING) != 0)
1067                 {
1068                     if (node.tag == tt.tagCenter ||
1069                         node.tag == tt.tagDiv)
1070                     {
1071                         if (node.type != Node.StartTag &&
1072                             node.type != Node.StartEndTag)
1073                         {
1074                             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1075                             continue;
1076                         }
1077
1078                         Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1079
1080                         /* insert center as parent if heading is empty */
1081                         if (element.content == null)
1082                         {
1083                             Node.insertNodeAsParent(element, node);
1084                             continue;
1085                         }
1086
1087                         /* split heading and make center parent of 2nd part */
1088                         Node.insertNodeAfterElement(element, node);
1089
1090                         if (!((mode & Lexer.Preformatted) != 0))
1091                             Node.trimSpaces(lexer, element);
1092
1093                         element = lexer.cloneNode(element);
1094                         element.start = lexer.lexsize;
1095                         element.end   = lexer.lexsize;
1096                         Node.insertNodeAtEnd(node, element);
1097                         continue;
1098                     }
1099
1100                     if (node.tag == tt.tagHr)
1101                     {
1102                         if (node.type != Node.StartTag &&
1103                             node.type != Node.StartEndTag)
1104                         {
1105                             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1106                             continue;
1107                         }
1108
1109                         Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1110
1111                         /* insert hr before heading if heading is empty */
1112                         if (element.content == null)
1113                         {
1114                             Node.insertNodeBeforeElement(element, node);
1115                             continue;
1116                         }
1117
1118                         /* split heading and insert hr before 2nd part */
1119                         Node.insertNodeAfterElement(element, node);
1120
1121                         if (!((mode & Lexer.Preformatted) != 0))
1122                             Node.trimSpaces(lexer, element);
1123
1124                         element = lexer.cloneNode(element);
1125                         element.start = lexer.lexsize;
1126                         element.end   = lexer.lexsize;
1127                         Node.insertNodeAfterElement(node, element);
1128                         continue;
1129                     }
1130                 }
1131
1132                 if (element.tag == tt.tagDt)
1133                 {
1134                     if (node.tag == tt.tagHr)
1135                     {
1136                         Node dd;
1137
1138                         if (node.type != Node.StartTag &&
1139                             node.type != Node.StartEndTag)
1140                         {
1141                             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1142                             continue;
1143                         }
1144
1145                         Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1146                         dd = lexer.inferredTag("dd");
1147
1148                         /* insert hr within dd before dt if dt is empty */
1149                         if (element.content == null)
1150                         {
1151                             Node.insertNodeBeforeElement(element, dd);
1152                             Node.insertNodeAtEnd(dd, node);
1153                             continue;
1154                         }
1155
1156                         /* split dt and insert hr within dd before 2nd part */
1157                         Node.insertNodeAfterElement(element, dd);
1158                         Node.insertNodeAtEnd(dd, node);
1159
1160                         if (!((mode & Lexer.Preformatted) != 0))
1161                             Node.trimSpaces(lexer, element);
1162
1163                         element = lexer.cloneNode(element);
1164                         element.start = lexer.lexsize;
1165                         element.end   = lexer.lexsize;
1166                         Node.insertNodeAfterElement(dd, element);
1167                         continue;
1168                     }
1169                 }
1170
1171
1172                 /* 
1173                   if this is the end tag for an ancestor element
1174                   then infer end tag for this element
1175                 */
1176                 if (node.type == Node.EndTag)
1177                 {
1178                     for (parent = element.parent;
1179                             parent != null; parent = parent.parent)
1180                     {
1181                         if (node.tag == parent.tag)
1182                         {
1183                             if (!((element.tag.model & Dict.CM_OPT) != 0) &&
1184                                 !element.implicit)
1185                                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1186
1187                             if (element.tag == tt.tagA)
1188                                 lexer.popInline(element);
1189
1190                             lexer.ungetToken();
1191
1192                             if (!((mode & Lexer.Preformatted) != 0))
1193                                 Node.trimSpaces(lexer, element);
1194
1195                             Node.trimEmptyElement(lexer, element);
1196                             return;
1197                         }
1198                     }
1199                 }
1200
1201                 /* block level tags end this element */
1202                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1203                 {
1204                     if (node.type != Node.StartTag)
1205                     {
1206                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1207                         continue;
1208                     }
1209
1210                     if (!((element.tag.model & Dict.CM_OPT) != 0))
1211                         Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1212
1213                     if ((node.tag.model & Dict.CM_HEAD) != 0 &&
1214                         !((node.tag.model & Dict.CM_BLOCK) != 0))
1215                     {
1216                         moveToHead(lexer, element, node);
1217                         continue;
1218                     }
1219
1220                     /*
1221                        prevent anchors from propagating into block tags
1222                        except for headings h1 to h6
1223                     */
1224                     if (element.tag == tt.tagA)
1225                     {
1226                         if (node.tag != null &&
1227                             !((node.tag.model & Dict.CM_HEADING) != 0))
1228                             lexer.popInline(element);
1229                         else if (!(element.content != null))
1230                         {
1231                             Node.discardElement(element);
1232                             lexer.ungetToken();
1233                             return;
1234                         }
1235                     }
1236
1237                     lexer.ungetToken();
1238
1239                     if (!((mode & Lexer.Preformatted) != 0))
1240                         Node.trimSpaces(lexer, element);
1241
1242                     Node.trimEmptyElement(lexer, element);
1243                     return;
1244                 }
1245
1246                 /* parse inline element */
1247                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1248                 {
1249                     if (node.implicit)
1250                         Report.warning(lexer, element, node, Report.INSERTING_TAG);
1251
1252                     /* trim white space before <br> */
1253                     if (node.tag == tt.tagBr)
1254                         Node.trimSpaces(lexer, element);
1255             
1256                     Node.insertNodeAtEnd(element, node);
1257                     parseTag(lexer, node, mode);
1258                     continue;
1259                 }
1260
1261                 /* discard unexpected tags */
1262                 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1263             }
1264
1265             if (!((element.tag.model & Dict.CM_OPT) != 0))
1266                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
1267
1268             Node.trimEmptyElement(lexer, element);
1269         }
1270     };
1271
1272     public static class ParseList implements Parser {
1273
1274         public void parse( Lexer lexer, Node list, short mode )
1275         {
1276             Node node;
1277             Node parent;
1278             TagTable tt = lexer.configuration.tt;
1279
1280             if ((list.tag.model & Dict.CM_EMPTY) != 0)
1281                 return;
1282
1283             lexer.insert = -1;  /* defer implicit inline start tags */
1284
1285             while (true)
1286             {
1287                 node = lexer.getToken(Lexer.IgnoreWhitespace);
1288                 if (node == null) break;
1289
1290                 if (node.tag == list.tag && node.type == Node.EndTag)
1291                 {
1292                     if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1293                         Node.coerceNode(lexer, list, tt.tagUl);
1294
1295                     list.closed = true;
1296                     Node.trimEmptyElement(lexer, list);
1297                     return;
1298                 }
1299
1300                 /* deal with comments etc. */
1301                 if (Node.insertMisc(list, node))
1302                     continue;
1303
1304                 if (node.type != Node.TextNode && node.tag == null)
1305                 {
1306                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1307                     continue;
1308                 }
1309
1310                 /* 
1311                   if this is the end tag for an ancestor element
1312                   then infer end tag for this element
1313                 */
1314                 if (node.type == Node.EndTag)
1315                 {
1316                     if (node.tag == tt.tagForm)
1317                     {
1318                         lexer.badForm = 1;
1319                         Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1320                         continue;
1321                     }
1322
1323                     if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
1324                     {
1325                         Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1326                         lexer.popInline(node);
1327                         continue;
1328                     }
1329
1330                     for (parent = list.parent;
1331                             parent != null; parent = parent.parent)
1332                     {
1333                         if (node.tag == parent.tag)
1334                         {
1335                             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1336                             lexer.ungetToken();
1337
1338                             if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1339                                 Node.coerceNode(lexer, list, tt.tagUl);
1340
1341                             Node.trimEmptyElement(lexer, list);
1342                             return;
1343                         }
1344                     }
1345
1346                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1347                     continue;
1348                 }
1349
1350                 if (node.tag != tt.tagLi)
1351                 {
1352                     lexer.ungetToken();
1353
1354                     if (node.tag != null &&
1355                         (node.tag.model & Dict.CM_BLOCK) != 0 &&
1356                         lexer.excludeBlocks)
1357                     {
1358                         Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1359                         Node.trimEmptyElement(lexer, list);
1360                         return;
1361                     }
1362
1363                     node = lexer.inferredTag("li");
1364                     node.addAttribute("style", "list-style: none");
1365                     Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1366                 }
1367
1368                 /* node should be <LI> */
1369                 Node.insertNodeAtEnd(list, node);
1370                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1371             }
1372
1373             if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1374                 Node.coerceNode(lexer, list, tt.tagUl);
1375
1376             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1377             Node.trimEmptyElement(lexer, list);
1378         }
1379
1380     };
1381
1382     public static class ParseDefList implements Parser {
1383
1384         public void parse( Lexer lexer, Node list, short mode )
1385         {
1386             Node node, parent;
1387             TagTable tt = lexer.configuration.tt;
1388
1389             if ((list.tag.model & Dict.CM_EMPTY) != 0)
1390                 return;
1391
1392             lexer.insert = -1;  /* defer implicit inline start tags */
1393
1394             while (true)
1395             {
1396                 node = lexer.getToken(Lexer.IgnoreWhitespace);
1397                 if (node == null) break;
1398                 if (node.tag == list.tag && node.type == Node.EndTag)
1399                 {
1400                     list.closed = true;
1401                     Node.trimEmptyElement(lexer, list);
1402                     return;
1403                 }
1404
1405                 /* deal with comments etc. */
1406                 if (Node.insertMisc(list, node))
1407                     continue;
1408
1409                 if (node.type == Node.TextNode)
1410                 {
1411                     lexer.ungetToken();
1412                     node = lexer.inferredTag( "dt");
1413                     Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1414                 }
1415
1416                 if (node.tag == null)
1417                 {
1418                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1419                     continue;
1420                 }
1421
1422                 /* 
1423                   if this is the end tag for an ancestor element
1424                   then infer end tag for this element
1425                 */
1426                 if (node.type == Node.EndTag)
1427                 {
1428                     if (node.tag == tt.tagForm)
1429                     {
1430                         lexer.badForm = 1;
1431                         Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1432                         continue;
1433                     }
1434
1435                     for (parent = list.parent;
1436                             parent != null; parent = parent.parent)
1437                     {
1438                         if (node.tag == parent.tag)
1439                         {
1440                             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1441
1442                             lexer.ungetToken();
1443                             Node.trimEmptyElement(lexer, list);
1444                             return;
1445                         }
1446                     }
1447                 }
1448
1449                 /* center in a dt or a dl breaks the dl list in two */
1450                 if (node.tag == tt.tagCenter)
1451                 {
1452                     if (list.content != null)
1453                         Node.insertNodeAfterElement(list, node);
1454                     else /* trim empty dl list */
1455                     {
1456                         Node.insertNodeBeforeElement(list, node);
1457                         Node.discardElement(list);
1458                     }
1459
1460                     /* and parse contents of center */
1461                     parseTag(lexer, node, mode);
1462
1463                     /* now create a new dl element */
1464                     list = lexer.inferredTag("dl");
1465                     Node.insertNodeAfterElement(node, list);
1466                     continue;
1467                 }
1468
1469                 if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
1470                 {
1471                     lexer.ungetToken();
1472
1473                     if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
1474                     {
1475                         Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
1476                         Node.trimEmptyElement(lexer, list);
1477                         return;
1478                     }
1479
1480                     /* if DD appeared directly in BODY then exclude blocks */
1481                     if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
1482                     {
1483                         Node.trimEmptyElement(lexer, list);
1484                         return;
1485                     }
1486
1487                     node = lexer.inferredTag( "dd");
1488                     Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1489                 }
1490
1491                 if (node.type == Node.EndTag)
1492                 {
1493                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1494                     continue;
1495                 }
1496         
1497                 /* node should be <DT> or <DD>*/
1498                 Node.insertNodeAtEnd(list, node);
1499                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1500             }
1501
1502             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1503             Node.trimEmptyElement(lexer, list);
1504         }
1505
1506     };
1507
1508     public static class ParsePre implements Parser {
1509
1510         public void parse( Lexer lexer, Node pre, short mode )
1511         {
1512             Node node, parent;
1513             TagTable tt = lexer.configuration.tt;
1514
1515             if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1516                 return;
1517
1518             if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1519                 Node.coerceNode(lexer, pre, tt.tagPre);
1520
1521             lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
1522
1523             while (true)
1524             {
1525                 node = lexer.getToken(Lexer.Preformatted);
1526                 if (node == null) break;
1527                 if (node.tag == pre.tag && node.type == Node.EndTag)
1528                 {
1529                     Node.trimSpaces(lexer, pre);
1530                     pre.closed = true;
1531                     Node.trimEmptyElement(lexer, pre);
1532                     return;
1533                 }
1534
1535                 if (node.tag == tt.tagHtml)
1536                 {
1537                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1538                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1539
1540                     continue;
1541                 }
1542
1543                 if (node.type == Node.TextNode)
1544                 {
1545                     /* if first check for inital newline */
1546                     if (pre.content == null)
1547                     {
1548                         if (node.textarray[node.start] == (byte)'\n')
1549                             ++node.start;
1550
1551                         if (node.start >= node.end)
1552                         {
1553                             continue;
1554                         }
1555                     }
1556
1557                     Node.insertNodeAtEnd(pre, node);
1558                     continue;
1559                 }
1560
1561                 /* deal with comments etc. */
1562                 if (Node.insertMisc(pre, node))
1563                     continue;
1564
1565                 /* discard unknown  and PARAM tags */
1566                 if (node.tag == null || node.tag == tt.tagParam)
1567                 {
1568                     Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1569                     continue;
1570                 }
1571
1572                 if (node.tag == tt.tagP)
1573                 {
1574                     if (node.type == Node.StartTag)
1575                     {
1576                         Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
1577
1578                         /* trim white space before <p> in <pre>*/
1579                         Node.trimSpaces(lexer, pre);
1580             
1581                         /* coerce both <p> and </p> to <br> */
1582                         Node.coerceNode(lexer, node, tt.tagBr);
1583                         Node.insertNodeAtEnd(pre, node);
1584                     }
1585                     else
1586                     {
1587                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1588                     }
1589                     continue;
1590                 }
1591
1592                 if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
1593                 {
1594                     moveToHead(lexer, pre, node);
1595                     continue;
1596                 }
1597
1598                 /* 
1599                   if this is the end tag for an ancestor element
1600                   then infer end tag for this element
1601                 */
1602                 if (node.type == Node.EndTag)
1603                 {
1604                     if (node.tag == tt.tagForm)
1605                     {
1606                         lexer.badForm = 1;
1607                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1608                         continue;
1609                     }
1610
1611                     for (parent = pre.parent;
1612                             parent != null; parent = parent.parent)
1613                     {
1614                         if (node.tag == parent.tag)
1615                         {
1616                             Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1617
1618                             lexer.ungetToken();
1619                             Node.trimSpaces(lexer, pre);
1620                             Node.trimEmptyElement(lexer, pre);
1621                             return;
1622                         }
1623                     }
1624                 }
1625
1626                 /* what about head content, HEAD, BODY tags etc? */
1627                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1628                 {
1629                     if (node.type != Node.StartTag)
1630                     {
1631                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1632                         continue;
1633                     }
1634  
1635                     Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1636                     lexer.excludeBlocks = true;
1637
1638                     /* check if we need to infer a container */
1639                     if ((node.tag.model & Dict.CM_LIST) != 0)
1640                     {
1641                         lexer.ungetToken();
1642                         node = lexer.inferredTag( "ul");
1643                         Node.addClass(node, "noindent");
1644                     }
1645                     else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1646                     {
1647                         lexer.ungetToken();
1648                         node = lexer.inferredTag( "dl");
1649                     }
1650                     else if ((node.tag.model & Dict.CM_TABLE) != 0)
1651                     {
1652                         lexer.ungetToken();
1653                         node = lexer.inferredTag( "table");
1654                     }
1655
1656                     Node.insertNodeAfterElement(pre, node);
1657                     pre = lexer.inferredTag( "pre");
1658                     Node.insertNodeAfterElement(node, pre);
1659                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
1660                     lexer.excludeBlocks = false;
1661                     continue;
1662                 }
1663                 /*
1664                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1665                 {
1666                     Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1667                     lexer.ungetToken();
1668                     return;
1669                 }
1670                 */
1671                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1672                 {
1673                     /* trim white space before <br> */
1674                     if (node.tag == tt.tagBr)
1675                         Node.trimSpaces(lexer, pre);
1676             
1677                     Node.insertNodeAtEnd(pre, node);
1678                     parseTag(lexer, node, Lexer.Preformatted);
1679                     continue;
1680                 }
1681
1682                 /* discard unexpected tags */
1683                 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1684             }
1685
1686             Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1687             Node.trimEmptyElement(lexer, pre);
1688         }
1689
1690     };
1691
1692     public static class ParseBlock implements Parser {
1693
1694         public void parse( Lexer lexer, Node element, short mode )
1695         /*
1696            element is node created by the lexer
1697            upon seeing the start tag, or by the
1698            parser when the start tag is inferred
1699         */
1700         {
1701             Node node, parent;
1702             boolean checkstack;
1703             int istackbase = 0;
1704             TagTable tt = lexer.configuration.tt;
1705
1706             checkstack = true;
1707
1708             if ((element.tag.model & Dict.CM_EMPTY) != 0)
1709                 return;
1710
1711             if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
1712                 Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
1713
1714             /*
1715              InlineDup() asks the lexer to insert inline emphasis tags
1716              currently pushed on the istack, but take care to avoid
1717              propagating inline emphasis inside OBJECT or APPLET.
1718              For these elements a fresh inline stack context is created
1719              and disposed of upon reaching the end of the element.
1720              They thus behave like table cells in this respect.
1721             */
1722             if ((element.tag.model & Dict.CM_OBJECT) != 0)
1723             {
1724                 istackbase = lexer.istackbase;
1725                 lexer.istackbase = lexer.istack.size();
1726             }
1727
1728             if (!((element.tag.model & Dict.CM_MIXED) != 0))
1729                 lexer.inlineDup( null);
1730
1731             mode = Lexer.IgnoreWhitespace;
1732
1733             while (true)
1734             {
1735                 node = lexer.getToken(mode /*Lexer.MixedContent*/);
1736                 if (node == null) break;
1737                 /* end tag for this element */
1738                 if (node.type == Node.EndTag && node.tag != null &&
1739                     (node.tag == element.tag || element.was == node.tag))
1740                 {
1741
1742                     if ((element.tag.model & Dict.CM_OBJECT) != 0)
1743                     {
1744                         /* pop inline stack */
1745                         while (lexer.istack.size() > lexer.istackbase)
1746                             lexer.popInline( null);
1747                         lexer.istackbase = istackbase;
1748                     }
1749
1750                     element.closed = true;
1751                     Node.trimSpaces(lexer, element);
1752                     Node.trimEmptyElement(lexer, element);
1753                     return;
1754                 }
1755
1756                 if (node.tag == tt.tagHtml ||
1757                     node.tag == tt.tagHead ||
1758                     node.tag == tt.tagBody)
1759                 {
1760                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1761                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1762
1763                     continue;
1764                 }
1765
1766                 if (node.type == Node.EndTag)
1767                 {
1768                     if (node.tag == null)
1769                     {
1770                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1771
1772                         continue;
1773                     }
1774                     else if (node.tag == tt.tagBr)
1775                         node.type = Node.StartTag;
1776                     else if (node.tag == tt.tagP)
1777                     {
1778                         Node.coerceNode(lexer, node, tt.tagBr);
1779                         Node.insertNodeAtEnd(element, node);
1780                         node = lexer.inferredTag("br");
1781                     }
1782                     else
1783                     {
1784                         /* 
1785                           if this is the end tag for an ancestor element
1786                           then infer end tag for this element
1787                         */
1788                         for (parent = element.parent;
1789                                 parent != null; parent = parent.parent)
1790                         {
1791                             if (node.tag == parent.tag)
1792                             {
1793                                 if (!((element.tag.model & Dict.CM_OPT) != 0))
1794                                     Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1795
1796                                 lexer.ungetToken();
1797
1798                                 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1799                                 {
1800                                     /* pop inline stack */
1801                                     while (lexer.istack.size() > lexer.istackbase)
1802                                         lexer.popInline( null);
1803                                     lexer.istackbase = istackbase;
1804                                 }
1805
1806                                 Node.trimSpaces(lexer, element);
1807                                 Node.trimEmptyElement(lexer, element);
1808                                 return;
1809                             }
1810                         }
1811                         /* special case </tr> etc. for stuff moved in front of table */
1812                         if (lexer.exiled
1813                                     && node.tag.model != 0
1814                                     && (node.tag.model & Dict.CM_TABLE) != 0)
1815                         {
1816                             lexer.ungetToken();
1817                             Node.trimSpaces(lexer, element);
1818                             Node.trimEmptyElement(lexer, element);
1819                             return;
1820                         }
1821                     }
1822                 }
1823
1824                 /* mixed content model permits text */
1825                 if (node.type == Node.TextNode)
1826                 {
1827                     boolean iswhitenode = false;
1828
1829                     if (node.type == Node.TextNode &&
1830                            node.end <= node.start + 1 &&
1831                            lexer.lexbuf[node.start] == (byte)' ')
1832                         iswhitenode = true;
1833
1834                     if (lexer.configuration.EncloseBlockText && !iswhitenode)
1835                     {
1836                         lexer.ungetToken();
1837                         node = lexer.inferredTag("p");
1838                         Node.insertNodeAtEnd(element, node);
1839                         parseTag(lexer, node, Lexer.MixedContent);
1840                         continue;
1841                     }
1842
1843                     if (checkstack)
1844                     {
1845                         checkstack = false;
1846
1847                         if (!((element.tag.model & Dict.CM_MIXED) != 0))
1848                         {
1849                             if (lexer.inlineDup( node) > 0)
1850                                 continue;
1851                         }
1852                     }
1853
1854                     Node.insertNodeAtEnd(element, node);
1855                     mode = Lexer.MixedContent;
1856                     /*
1857                       HTML4 strict doesn't allow mixed content for
1858                       elements with %block; as their content model
1859                     */
1860                     lexer.versions &= ~Dict.VERS_HTML40_STRICT;
1861                     continue;
1862                 }
1863
1864                 if (Node.insertMisc(element, node))
1865                     continue;
1866
1867                 /* allow PARAM elements? */
1868                 if (node.tag == tt.tagParam)
1869                 {
1870                     if (((element.tag.model & Dict.CM_PARAM) != 0) &&
1871                             (node.type == Node.StartTag || node.type == Node.StartEndTag))
1872                     {
1873                         Node.insertNodeAtEnd(element, node);
1874                         continue;
1875                     }
1876
1877                     /* otherwise discard it */
1878                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1879                     continue;
1880                 }
1881
1882                 /* allow AREA elements? */
1883                 if (node.tag == tt.tagArea)
1884                 {
1885                     if ((element.tag == tt.tagMap) &&
1886                             (node.type == Node.StartTag || node.type == Node.StartEndTag))
1887                     {
1888                         Node.insertNodeAtEnd(element, node);
1889                         continue;
1890                     }
1891
1892                     /* otherwise discard it */
1893                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1894                     continue;
1895                 }
1896
1897                 /* ignore unknown start/end tags */
1898                 if (node.tag == null)
1899                 {
1900                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1901                     continue;
1902                 }
1903
1904                 /*
1905                   Allow Dict.CM_INLINE elements here.
1906
1907                   Allow Dict.CM_BLOCK elements here unless
1908                   lexer.excludeBlocks is yes.
1909
1910                   LI and DD are special cased.
1911
1912                   Otherwise infer end tag for this element.
1913                 */
1914
1915                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1916                 {
1917                     if (node.type != Node.StartTag && node.type != Node.StartEndTag)
1918                     {
1919                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1920                         continue;
1921                     }
1922
1923                     if (element.tag == tt.tagTd || element.tag == tt.tagTh)
1924                     {
1925                         /* if parent is a table cell, avoid inferring the end of the cell */
1926
1927                         if ((node.tag.model & Dict.CM_HEAD) != 0)
1928                         {
1929                             moveToHead(lexer, element, node);
1930                             continue;
1931                         }
1932
1933                         if ((node.tag.model & Dict.CM_LIST) != 0)
1934                         {
1935                             lexer.ungetToken();
1936                             node = lexer.inferredTag( "ul");
1937                             Node.addClass(node, "noindent");
1938                             lexer.excludeBlocks = true;
1939                         }
1940                         else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1941                         {
1942                             lexer.ungetToken();
1943                             node = lexer.inferredTag( "dl");
1944                             lexer.excludeBlocks = true;
1945                         }
1946
1947                         /* infer end of current table cell */
1948                         if (!((node.tag.model & Dict.CM_BLOCK) != 0))
1949                         {
1950                             lexer.ungetToken();
1951                             Node.trimSpaces(lexer, element);
1952                             Node.trimEmptyElement(lexer, element);
1953                             return;
1954                         }
1955                     }
1956                     else if ((node.tag.model & Dict.CM_BLOCK) != 0)
1957                     {
1958                         if (lexer.excludeBlocks)
1959                         {
1960                             if (!((element.tag.model & Dict.CM_OPT) != 0))
1961                                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1962
1963                             lexer.ungetToken();
1964
1965                             if ((element.tag.model & Dict.CM_OBJECT) != 0)
1966                                 lexer.istackbase = istackbase;
1967
1968                             Node.trimSpaces(lexer, element);
1969                             Node.trimEmptyElement(lexer, element);
1970                             return;
1971                         }
1972                     }
1973                     else /* things like list items */
1974                     {
1975                         if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
1976                             Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1977
1978                         if ((node.tag.model & Dict.CM_HEAD) != 0)
1979                         {
1980                             moveToHead(lexer, element, node);
1981                             continue;
1982                         }
1983
1984                         lexer.ungetToken();
1985
1986                         if ((node.tag.model & Dict.CM_LIST) != 0)
1987                         {
1988                             if (element.parent != null && element.parent.tag != null &&
1989                                 element.parent.tag.parser == getParseList())
1990                             {
1991                                 Node.trimSpaces(lexer, element);
1992                                 Node.trimEmptyElement(lexer, element);
1993                                 return;
1994                             }
1995
1996                             node = lexer.inferredTag("ul");
1997                             Node.addClass(node, "noindent");
1998                         }
1999                         else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
2000                         {
2001                             if (element.parent.tag == tt.tagDl)
2002                             {
2003                                 Node.trimSpaces(lexer, element);
2004                                 Node.trimEmptyElement(lexer, element);
2005                                 return;
2006                             }
2007
2008                             node = lexer.inferredTag("dl");
2009                         }
2010                         else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
2011                                  (node.tag.model & Dict.CM_ROW) != 0)
2012                         {
2013                             node = lexer.inferredTag("table");
2014                         }
2015                         else if ((element.tag.model & Dict.CM_OBJECT) != 0)
2016                         {
2017                             /* pop inline stack */
2018                             while (lexer.istack.size() > lexer.istackbase)
2019                                 lexer.popInline( null);
2020                             lexer.istackbase = istackbase;
2021                             Node.trimSpaces(lexer, element);
2022                             Node.trimEmptyElement(lexer, element);
2023                             return;
2024
2025                         }
2026                         else
2027                         {
2028                             Node.trimSpaces(lexer, element);
2029                             Node.trimEmptyElement(lexer, element);
2030                             return;
2031                         }
2032                     }
2033                 }
2034
2035                 /* parse known element */
2036                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2037                 {
2038                     if ((node.tag.model & Dict.CM_INLINE) != 0)
2039                     {
2040                         if (checkstack && !node.implicit)
2041                         {
2042                             checkstack = false;
2043
2044                             if (lexer.inlineDup( node) > 0)
2045                                 continue;
2046                         }
2047
2048                         mode = Lexer.MixedContent;
2049                     }
2050                     else
2051                     {
2052                         checkstack = true;
2053                         mode = Lexer.IgnoreWhitespace;
2054                     }
2055
2056                     /* trim white space before <br> */
2057                     if (node.tag == tt.tagBr)
2058                         Node.trimSpaces(lexer, element);
2059
2060                     Node.insertNodeAtEnd(element, node);
2061             
2062                     if (node.implicit)
2063                         Report.warning(lexer, element, node, Report.INSERTING_TAG);
2064
2065                     parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
2066                     continue;
2067                 }
2068
2069                 /* discard unexpected tags */
2070                 if (node.type == Node.EndTag)
2071                     lexer.popInline( node);  /* if inline end tag */
2072
2073                 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2074             }
2075
2076             if (!((element.tag.model & Dict.CM_OPT) != 0))
2077                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
2078
2079             if ((element.tag.model & Dict.CM_OBJECT) != 0)
2080             {
2081                 /* pop inline stack */
2082                 while (lexer.istack.size() > lexer.istackbase)
2083                     lexer.popInline( null);
2084                 lexer.istackbase = istackbase;
2085             }
2086
2087             Node.trimSpaces(lexer, element);
2088             Node.trimEmptyElement(lexer, element);
2089         }
2090
2091     };
2092
2093     public static class ParseTableTag implements Parser {
2094
2095         public void parse( Lexer lexer, Node table, short mode )
2096         {
2097             Node node, parent;
2098             int istackbase;
2099             TagTable tt = lexer.configuration.tt;
2100
2101             lexer.deferDup();
2102             istackbase = lexer.istackbase;
2103             lexer.istackbase = lexer.istack.size();
2104     
2105             while (true)
2106             {
2107                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2108                 if (node == null) break;
2109                 if (node.tag == table.tag && node.type == Node.EndTag)
2110                 {
2111                     lexer.istackbase = istackbase;
2112                     table.closed = true;
2113                     Node.trimEmptyElement(lexer, table);
2114                     return;
2115                 }
2116
2117                 /* deal with comments etc. */
2118                 if (Node.insertMisc(table, node))
2119                     continue;
2120
2121                 /* discard unknown tags */
2122                 if (node.tag == null && node.type != Node.TextNode)
2123                 {
2124                     Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2125                     continue;
2126                 }
2127
2128                 /* if TD or TH or text or inline or block then infer <TR> */
2129
2130                 if (node.type != Node.EndTag)
2131                 {
2132                     if (node.tag == tt.tagTd || 
2133                         node.tag == tt.tagTh || 
2134                         node.tag == tt.tagTable)
2135                     {
2136                         lexer.ungetToken();
2137                         node = lexer.inferredTag( "tr");
2138                         Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
2139                     }
2140                     else if (node.type == Node.TextNode
2141                                || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2142                     {
2143                         Node.insertNodeBeforeElement(table, node);
2144                         Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2145                         lexer.exiled = true;
2146
2147                         /* AQ: TODO
2148                            Line 2040 of parser.c (13 Jan 2000) reads as follows:
2149                            if (!node->type == TextNode)
2150                            This will always evaluate to false.
2151                            This has been reported to Dave Raggett <dsr@w3.org>
2152                         */
2153                         //Should be?: if (!(node.type == Node.TextNode))
2154                         if (false)
2155                             parseTag(lexer, node, Lexer.IgnoreWhitespace);
2156
2157                         lexer.exiled = false;
2158                         continue;
2159                     }
2160                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
2161                     {
2162                         moveToHead(lexer, table, node);
2163                         continue;
2164                     }
2165                 }
2166
2167                 /* 
2168                   if this is the end tag for an ancestor element
2169                   then infer end tag for this element
2170                 */
2171                 if (node.type == Node.EndTag)
2172                 {
2173                     if (node.tag == tt.tagForm)
2174                     {
2175                         lexer.badForm = 1;
2176                         Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2177                         continue;
2178                     }
2179
2180                     if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
2181                     {
2182                         Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2183                         continue;
2184                     }
2185
2186                     for (parent = table.parent;
2187                             parent != null; parent = parent.parent)
2188                     {
2189                         if (node.tag == parent.tag)
2190                         {
2191                             Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
2192                             lexer.ungetToken();
2193                             lexer.istackbase = istackbase;
2194                             Node.trimEmptyElement(lexer, table);
2195                             return;
2196                         }
2197                     }
2198                 }
2199
2200                 if (!((node.tag.model & Dict.CM_TABLE) != 0))
2201                 {
2202                     lexer.ungetToken();
2203                     Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2204                     lexer.istackbase = istackbase;
2205                     Node.trimEmptyElement(lexer, table);
2206                     return;
2207                 }
2208
2209                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2210                 {
2211                     Node.insertNodeAtEnd(table, node);;
2212                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
2213                     continue;
2214                 }
2215
2216                 /* discard unexpected text nodes and end tags */
2217                 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2218             }
2219
2220             Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
2221             Node.trimEmptyElement(lexer, table);
2222             lexer.istackbase = istackbase;
2223         }
2224
2225     };
2226
2227     public static class ParseColGroup implements Parser {
2228
2229         public void parse( Lexer lexer, Node colgroup, short mode )
2230         {
2231             Node node, parent;
2232             TagTable tt = lexer.configuration.tt;
2233
2234             if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2235                 return;
2236
2237             while (true)
2238             {
2239                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2240                 if (node == null) break;
2241                 if (node.tag == colgroup.tag && node.type == Node.EndTag)
2242                 {
2243                     colgroup.closed = true;
2244                     return;
2245                 }
2246
2247                 /* 
2248                   if this is the end tag for an ancestor element
2249                   then infer end tag for this element
2250                 */
2251                 if (node.type == Node.EndTag)
2252                 {
2253                     if (node.tag == tt.tagForm)
2254                     {
2255                         lexer.badForm = 1;
2256                         Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2257                         continue;
2258                     }
2259
2260                     for (parent = colgroup.parent;
2261                             parent != null; parent = parent.parent)
2262                     {
2263
2264                         if (node.tag == parent.tag)
2265                         {
2266                             lexer.ungetToken();
2267                             return;
2268                         }
2269                     }
2270                 }
2271
2272                 if (node.type == Node.TextNode)
2273                 {
2274                     lexer.ungetToken();
2275                     return;
2276                 }
2277
2278                 /* deal with comments etc. */
2279                 if (Node.insertMisc(colgroup, node))
2280                     continue;
2281
2282                 /* discard unknown tags */
2283                 if (node.tag == null)
2284                 {
2285                     Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2286                     continue;
2287                 }
2288
2289                 if (node.tag != tt.tagCol)
2290                 {
2291                     lexer.ungetToken();
2292                     return;
2293                 }
2294
2295                 if (node.type == Node.EndTag)
2296                 {
2297                     Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2298                     continue;
2299                 }
2300         
2301                 /* node should be <COL> */
2302                 Node.insertNodeAtEnd(colgroup, node);
2303                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2304             }
2305         }
2306
2307     };
2308
2309     public static class ParseRowGroup implements Parser {
2310
2311         public void parse( Lexer lexer, Node rowgroup, short mode )
2312         {
2313             Node node, parent;
2314             TagTable tt = lexer.configuration.tt;
2315
2316             if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2317                 return;
2318
2319             while (true)
2320             {
2321                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2322                 if (node == null) break;
2323                 if (node.tag == rowgroup.tag)
2324                 {
2325                     if (node.type == Node.EndTag)
2326                     {
2327                         rowgroup.closed = true;
2328                         Node.trimEmptyElement(lexer, rowgroup);
2329                         return;
2330                     }
2331
2332                     lexer.ungetToken();
2333                     return;
2334                 }
2335
2336                 /* if </table> infer end tag */
2337                 if (node.tag == tt.tagTable && node.type == Node.EndTag)
2338                 {
2339                     lexer.ungetToken();
2340                     Node.trimEmptyElement(lexer, rowgroup);
2341                     return;
2342                 }
2343
2344                 /* deal with comments etc. */
2345                 if (Node.insertMisc(rowgroup, node))
2346                     continue;
2347
2348                 /* discard unknown tags */
2349                 if (node.tag == null && node.type != Node.TextNode)
2350                 {
2351                     Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2352                     continue;
2353                 }
2354
2355                 /*
2356                   if TD or TH then infer <TR>
2357                   if text or inline or block move before table
2358                   if head content move to head
2359                 */
2360
2361                 if (node.type != Node.EndTag)
2362                 {
2363                     if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2364                     {
2365                         lexer.ungetToken();
2366                         node = lexer.inferredTag("tr");
2367                         Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2368                     }
2369                     else if (node.type == Node.TextNode
2370                             || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2371                     {
2372                         Node.moveBeforeTable(rowgroup, node, tt);
2373                         Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2374                         lexer.exiled = true;
2375
2376                         if (node.type != Node.TextNode)
2377                             parseTag(lexer, node, Lexer.IgnoreWhitespace);
2378
2379                         lexer.exiled = false;
2380                         continue;
2381                     }
2382                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
2383                     {
2384                         Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2385                         moveToHead(lexer, rowgroup, node);
2386                         continue;
2387                     }
2388                 }
2389
2390                 /* 
2391                   if this is the end tag for ancestor element
2392                   then infer end tag for this element
2393                 */
2394                 if (node.type == Node.EndTag)
2395                 {
2396                     if (node.tag == tt.tagForm)
2397                     {
2398                         lexer.badForm = 1;
2399                         Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2400                         continue;
2401                     }
2402
2403                     if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
2404                     {
2405                         Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2406                         continue;
2407                     }
2408
2409                     for (parent = rowgroup.parent;
2410                             parent != null; parent = parent.parent)
2411                     {
2412                         if (node.tag == parent.tag)
2413                         {
2414                             lexer.ungetToken();
2415                             Node.trimEmptyElement(lexer, rowgroup);
2416                             return;
2417                         }
2418                     }
2419                 }
2420
2421                 /*
2422                   if THEAD, TFOOT or TBODY then implied end tag
2423
2424                 */
2425                 if ((node.tag.model & Dict.CM_ROWGRP) != 0)
2426                 {
2427                     if (node.type != Node.EndTag)
2428                         lexer.ungetToken();
2429
2430                     Node.trimEmptyElement(lexer, rowgroup);
2431                     return;
2432                 }
2433
2434                 if (node.type == Node.EndTag)
2435                 {
2436                     Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2437                     continue;
2438                 }
2439         
2440                 if (!(node.tag == tt.tagTr))
2441                 {
2442                     node = lexer.inferredTag( "tr");
2443                     Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2444                     lexer.ungetToken();
2445                 }
2446
2447                /* node should be <TR> */
2448                 Node.insertNodeAtEnd(rowgroup, node);
2449                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2450             }
2451
2452             Node.trimEmptyElement(lexer, rowgroup);
2453         }
2454
2455     };
2456
2457     public static class ParseRow implements Parser {
2458
2459         public void parse( Lexer lexer, Node row, short mode )
2460         {
2461             Node node, parent;
2462             boolean exclude_state;
2463             TagTable tt = lexer.configuration.tt;
2464
2465             if ((row.tag.model & Dict.CM_EMPTY) != 0)
2466                 return;
2467
2468             while (true)
2469             {
2470                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2471                 if (node == null) break;
2472                 if (node.tag == row.tag)
2473                 {
2474                     if (node.type == Node.EndTag)
2475                     {
2476                         row.closed = true;
2477                         Node.fixEmptyRow(lexer, row);
2478                         return;
2479                     }
2480
2481                     lexer.ungetToken();
2482                     Node.fixEmptyRow(lexer, row);
2483                     return;
2484                 }
2485
2486                 /* 
2487                   if this is the end tag for an ancestor element
2488                   then infer end tag for this element
2489                 */
2490                 if (node.type == Node.EndTag)
2491                 {
2492                     if (node.tag == tt.tagForm)
2493                     {
2494                         lexer.badForm = 1;
2495                         Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2496                         continue;
2497                     }
2498
2499                     if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2500                     {
2501                         Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2502                         continue;
2503                     }
2504
2505                     for (parent = row.parent;
2506                             parent != null; parent = parent.parent)
2507                     {
2508                         if (node.tag == parent.tag)
2509                         {
2510                             lexer.ungetToken();
2511                             Node.trimEmptyElement(lexer, row);
2512                             return;
2513                         }
2514                     }
2515                 }
2516
2517                 /* deal with comments etc. */
2518                 if (Node.insertMisc(row, node))
2519                     continue;
2520
2521                 /* discard unknown tags */
2522                 if (node.tag == null && node.type != Node.TextNode)
2523                 {
2524                     Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2525                     continue;
2526                 }
2527
2528                 /* discard unexpected <table> element */
2529                 if (node.tag == tt.tagTable)
2530                 {
2531                     Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2532                     continue;
2533                 }
2534
2535                 /* THEAD, TFOOT or TBODY */
2536                 if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
2537                 {
2538                     lexer.ungetToken();
2539                     Node.trimEmptyElement(lexer, row);
2540                     return;
2541                 }
2542
2543                 if (node.type == Node.EndTag)
2544                 {
2545                     Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2546                     continue;
2547                 }
2548
2549                 /*
2550                   if text or inline or block move before table
2551                   if head content move to head
2552                 */
2553
2554                 if (node.type != Node.EndTag)
2555                 {
2556                     if (node.tag == tt.tagForm)
2557                     {
2558                         lexer.ungetToken();
2559                         node = lexer.inferredTag("td");
2560                         Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
2561                     }
2562                     else if (node.type == Node.TextNode
2563                             || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2564                     {
2565                         Node.moveBeforeTable(row, node, tt);
2566                         Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2567                         lexer.exiled = true;
2568
2569                         if (node.type != Node.TextNode)
2570                             parseTag(lexer, node, Lexer.IgnoreWhitespace);
2571
2572                         lexer.exiled = false;
2573                         continue;
2574                     }
2575                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
2576                     {
2577                         Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2578                         moveToHead(lexer, row, node);
2579                         continue;
2580                     }
2581                 }
2582
2583                 if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
2584                 {
2585                     Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2586                     continue;
2587                 }
2588         
2589                 /* node should be <TD> or <TH> */
2590                 Node.insertNodeAtEnd(row, node);
2591                 exclude_state = lexer.excludeBlocks;
2592                 lexer.excludeBlocks = false;
2593                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2594                 lexer.excludeBlocks = exclude_state;
2595
2596                 /* pop inline stack */
2597
2598                 while (lexer.istack.size() > lexer.istackbase)
2599                     lexer.popInline( null);
2600             }
2601
2602             Node.trimEmptyElement(lexer, row);
2603         }
2604
2605     };
2606
2607     public static class ParseNoFrames implements Parser {
2608
2609         public void parse( Lexer lexer, Node noframes, short mode )
2610         {
2611             Node node;
2612             boolean checkstack;
2613             TagTable tt = lexer.configuration.tt;
2614
2615             lexer.badAccess |=  Report.USING_NOFRAMES;
2616             mode = Lexer.IgnoreWhitespace;
2617             checkstack = true;
2618
2619             while (true)
2620             {
2621                 node = lexer.getToken(mode);
2622                 if (node == null) break;
2623                 if (node.tag == noframes.tag && node.type == Node.EndTag)
2624                 {
2625                     noframes.closed = true;
2626                     Node.trimSpaces(lexer, noframes);
2627                     return;
2628                 }
2629
2630                 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
2631                 {
2632                     Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
2633                     Node.trimSpaces(lexer, noframes);
2634                     lexer.ungetToken();
2635                     return;
2636                 }
2637
2638                 if (node.tag == tt.tagHtml)
2639                 {
2640                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2641                         Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2642
2643                     continue;
2644                 }
2645
2646                 /* deal with comments etc. */
2647                 if (Node.insertMisc(noframes, node))
2648                     continue;
2649
2650                 if (node.tag == tt.tagBody && node.type == Node.StartTag)
2651                 {
2652                     Node.insertNodeAtEnd(noframes, node);
2653                     parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2654                     continue;
2655                 }
2656
2657                 /* implicit body element inferred */
2658                 if (node.type == Node.TextNode || node.tag != null)
2659                 {
2660                     lexer.ungetToken();
2661                     node = lexer.inferredTag("body");
2662                     if (lexer.configuration.XmlOut)
2663                         Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
2664                     Node.insertNodeAtEnd(noframes, node);
2665                     parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2666                     continue;
2667                 }
2668                 /* discard unexpected end tags */
2669                 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2670             }
2671
2672             Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
2673         }
2674
2675     };
2676
2677     public static class ParseSelect implements Parser {
2678
2679         public void parse( Lexer lexer, Node field, short mode )
2680         {
2681             Node node;
2682             TagTable tt = lexer.configuration.tt;
2683
2684             lexer.insert = -1;  /* defer implicit inline start tags */
2685
2686             while (true)
2687             {
2688                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2689                 if (node == null) break;
2690                 if (node.tag == field.tag && node.type == Node.EndTag)
2691                 {
2692                     field.closed = true;
2693                     Node.trimSpaces(lexer, field);
2694                     return;
2695                 }
2696
2697                 /* deal with comments etc. */
2698                 if (Node.insertMisc(field, node))
2699                     continue;
2700
2701                 if (node.type == Node.StartTag && 
2702                      (node.tag == tt.tagOption ||
2703                       node.tag == tt.tagOptgroup ||
2704                       node.tag == tt.tagScript))
2705                 {
2706                     Node.insertNodeAtEnd(field, node);
2707                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
2708                     continue;
2709                 }
2710
2711                 /* discard unexpected tags */
2712                 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2713             }
2714
2715             Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2716         }
2717
2718     };
2719
2720     public static class ParseText implements Parser {
2721
2722         public void parse( Lexer lexer, Node field, short mode )
2723         {
2724             Node node;
2725             TagTable tt = lexer.configuration.tt;
2726
2727             lexer.insert = -1;  /* defer implicit inline start tags */
2728
2729             if (field.tag == tt.tagTextarea)
2730                 mode = Lexer.Preformatted;
2731
2732             while (true)
2733             {
2734                 node = lexer.getToken(mode);
2735                 if (node == null) break;
2736                 if (node.tag == field.tag && node.type == Node.EndTag)
2737                 {
2738                     field.closed = true;
2739                     Node.trimSpaces(lexer, field);
2740                     return;
2741                 }
2742
2743                 /* deal with comments etc. */
2744                 if (Node.insertMisc(field, node))
2745                     continue;
2746
2747                 if (node.type == Node.TextNode)
2748                 {
2749                     /* only called for 1st child */
2750                     if (field.content == null && !((mode & Lexer.Preformatted) != 0))
2751                         Node.trimSpaces(lexer, field);
2752
2753                     if (node.start >= node.end)
2754                     {
2755                         continue;
2756                     }
2757
2758                     Node.insertNodeAtEnd(field, node);
2759                     continue;
2760                 }
2761
2762                 if (node.tag == tt.tagFont)
2763                 {
2764                     Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2765                     continue;
2766                 }
2767
2768                 /* terminate element on other tags */
2769                 if (!((field.tag.model & Dict.CM_OPT) != 0))
2770                         Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
2771
2772                 lexer.ungetToken();
2773                 Node.trimSpaces(lexer, field);
2774                 return;
2775             }
2776
2777             if (!((field.tag.model & Dict.CM_OPT) != 0))
2778                 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2779         }
2780
2781     };
2782
2783     public static class ParseOptGroup implements Parser {
2784
2785         public void parse( Lexer lexer, Node field, short mode )
2786         {
2787             Node node;
2788             TagTable tt = lexer.configuration.tt;
2789
2790             lexer.insert = -1;  /* defer implicit inline start tags */
2791
2792             while (true)
2793             {
2794                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2795                 if (node == null) break;
2796                 if (node.tag == field.tag && node.type == Node.EndTag)
2797                 {
2798                     field.closed = true;
2799                     Node.trimSpaces(lexer, field);
2800                     return;
2801                 }
2802
2803                 /* deal with comments etc. */
2804                 if (Node.insertMisc(field, node))
2805                     continue;
2806
2807                 if (node.type == Node.StartTag && 
2808                      (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
2809                 {
2810                     if (node.tag == tt.tagOptgroup)
2811                         Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
2812
2813                     Node.insertNodeAtEnd(field, node);
2814                     parseTag(lexer, node, Lexer.MixedContent);
2815                     continue;
2816                 }
2817
2818                 /* discard unexpected tags */
2819                 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2820             }
2821         }
2822
2823     };
2824
2825     public static Parser getParseHTML()
2826     {
2827         return _parseHTML;
2828     }
2829
2830     public static Parser getParseHead()
2831     {
2832         return _parseHead;
2833     }
2834
2835     public static Parser getParseTitle()
2836     {
2837         return _parseTitle;
2838     }
2839
2840     public static Parser getParseScript()
2841     {
2842         return _parseScript;
2843     }
2844
2845     public static Parser getParseBody()
2846     {
2847         return _parseBody;
2848     }
2849
2850     public static Parser getParseFrameSet()
2851     {
2852         return _parseFrameSet;
2853     }
2854
2855     public static Parser getParseInline()
2856     {
2857         return _parseInline;
2858     }
2859
2860     public static Parser getParseList()
2861     {
2862         return _parseList;
2863     }
2864
2865     public static Parser getParseDefList()
2866     {
2867         return _parseDefList;
2868     }
2869
2870     public static Parser getParsePre()
2871     {
2872         return _parsePre;
2873     }
2874
2875     public static Parser getParseBlock()
2876     {
2877         return _parseBlock;
2878     }
2879
2880     public static Parser getParseTableTag()
2881     {
2882         return _parseTableTag;
2883     }
2884
2885     public static Parser getParseColGroup()
2886     {
2887         return _parseColGroup;
2888     }
2889
2890     public static Parser getParseRowGroup()
2891     {
2892         return _parseRowGroup;
2893     }
2894
2895     public static Parser getParseRow()
2896     {
2897         return _parseRow;
2898     }
2899
2900     public static Parser getParseNoFrames()
2901     {
2902         return _parseNoFrames;
2903     }
2904
2905     public static Parser getParseSelect()
2906     {
2907         return _parseSelect;
2908     }
2909
2910     public static Parser getParseText()
2911     {
2912         return _parseText;
2913     }
2914
2915     public static Parser getParseOptGroup()
2916     {
2917         return _parseOptGroup;
2918     }
2919
2920
2921     private static Parser _parseHTML = new ParseHTML();
2922     private static Parser _parseHead = new ParseHead();
2923     private static Parser _parseTitle = new ParseTitle();
2924     private static Parser _parseScript = new ParseScript();
2925     private static Parser _parseBody = new ParseBody();
2926     private static Parser _parseFrameSet = new ParseFrameSet();
2927     private static Parser _parseInline = new ParseInline();
2928     private static Parser _parseList = new ParseList();
2929     private static Parser _parseDefList = new ParseDefList();
2930     private static Parser _parsePre = new ParsePre();
2931     private static Parser _parseBlock = new ParseBlock();
2932     private static Parser _parseTableTag = new ParseTableTag();
2933     private static Parser _parseColGroup = new ParseColGroup();
2934     private static Parser _parseRowGroup = new ParseRowGroup();
2935     private static Parser _parseRow = new ParseRow();
2936     private static Parser _parseNoFrames = new ParseNoFrames();
2937     private static Parser _parseSelect = new ParseSelect();
2938     private static Parser _parseText = new ParseText();
2939     private static Parser _parseOptGroup = new ParseOptGroup();
2940
2941     /*
2942       HTML is the top level element
2943     */
2944     public static Node parseDocument(Lexer lexer)
2945     {
2946         Node node, document, html;
2947         Node doctype = null;
2948         TagTable tt = lexer.configuration.tt;
2949
2950         document = lexer.newNode();
2951         document.type = Node.RootNode;
2952
2953         while (true)
2954         {
2955             node = lexer.getToken(Lexer.IgnoreWhitespace);
2956             if (node == null) break;
2957
2958             /* deal with comments etc. */
2959             if (Node.insertMisc(document, node))
2960                 continue;
2961
2962             if (node.type == Node.DocTypeTag)
2963             {
2964                 if (doctype == null)
2965                 {
2966                     Node.insertNodeAtEnd(document, node);
2967                     doctype = node;
2968                 }
2969                 else
2970                     Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
2971                 continue;
2972             }
2973
2974             if (node.type == Node.EndTag)
2975             {
2976                 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
2977                 continue;
2978             }
2979
2980             if (node.type != Node.StartTag || node.tag != tt.tagHtml)
2981             {
2982                 lexer.ungetToken();
2983                 html = lexer.inferredTag("html");
2984             }
2985             else
2986                 html = node;
2987
2988             Node.insertNodeAtEnd(document, html);
2989             getParseHTML().parse(lexer, html, (short)0); // TODO?
2990             break;
2991         }
2992
2993         return document;
2994     }
2995
2996     /**
2997      *  Indicates whether or not whitespace should be preserved for this element.
2998      *  If an <code>xml:space</code> attribute is found, then if the attribute value is
2999      *  <code>preserve</code>, returns <code>true</code>.  For any other value, returns
3000      *  <code>false</code>.  If an <code>xml:space</code> attribute was <em>not</em>
3001      *  found, then the following element names result in a return value of <code>true:
3002      *  pre, script, style,</code> and <code>xsl:text</code>.  Finally, if a
3003      *  <code>TagTable</code> was passed in and the element appears as the "pre" element
3004      *  in the <code>TagTable</code>, then <code>true</code> will be returned.
3005      *  Otherwise, <code>false</code> is returned.
3006      *  @param element The <code>Node</code> to test to see if whitespace should be
3007      *                 preserved.
3008      *  @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
3009      *            function.  This may be <code>null</code>, in which case this test
3010      *            is bypassed.
3011      *  @return <code>true</code> or <code>false</code>, as explained above.
3012      */
3013
3014     public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
3015     {
3016         AttVal attribute;
3017
3018         /* search attributes for xml:space */
3019         for (attribute = element.attributes; attribute != null; attribute = attribute.next)
3020         {
3021             if (attribute.attribute.equals("xml:space"))
3022             {
3023                 if (attribute.value.equals("preserve"))
3024                     return true;
3025
3026                 return false;
3027             }
3028         }
3029
3030         /* kludge for html docs without explicit xml:space attribute */
3031         if (Lexer.wstrcasecmp(element.element, "pre") == 0
3032             || Lexer.wstrcasecmp(element.element, "script") == 0
3033             || Lexer.wstrcasecmp(element.element, "style") == 0)
3034             return true;
3035
3036         if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
3037             return true;
3038
3039         /* kludge for XSL docs */
3040         if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3041             return true;
3042
3043         return false;
3044     }
3045
3046     /*
3047       XML documents
3048     */
3049     public static void parseXMLElement(Lexer lexer, Node element, short mode)
3050     {
3051         Node node;
3052
3053         /* Jeff Young's kludge for XSL docs */
3054
3055         if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3056             return;
3057
3058         /* if node is pre or has xml:space="preserve" then do so */
3059
3060         if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
3061             mode = Lexer.Preformatted;
3062
3063         while (true)
3064         {
3065             node = lexer.getToken(mode);
3066             if (node == null) break;
3067             if (node.type == Node.EndTag && node.element.equals(element.element))
3068             {
3069                 element.closed = true;
3070                 break;
3071             }
3072
3073             /* discard unexpected end tags */
3074             if (node.type == Node.EndTag)
3075             {
3076                 Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
3077                 continue;
3078             }
3079
3080             /* parse content on seeing start tag */
3081             if (node.type == Node.StartTag)
3082                 parseXMLElement(lexer, node, mode);
3083
3084             Node.insertNodeAtEnd(element, node);
3085         }
3086
3087         /*
3088          if first child is text then trim initial space and
3089          delete text node if it is empty.
3090         */
3091
3092         node = element.content;
3093
3094         if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3095         {
3096             if (node.textarray[node.start] == (byte)' ')
3097             {
3098                 node.start++;
3099
3100                 if (node.start >= node.end)
3101                     Node.discardElement(node);
3102             }
3103         }
3104
3105         /*
3106          if last child is text then trim final space and
3107          delete the text node if it is empty
3108         */
3109
3110         node = element.last;
3111
3112         if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3113         {
3114             if (node.textarray[node.end - 1] == (byte)' ')
3115             {
3116                 node.end--;
3117
3118                 if (node.start >= node.end)
3119                     Node.discardElement(node);
3120             }
3121         }
3122     }
3123
3124     public static Node parseXMLDocument(Lexer lexer)
3125     {
3126         Node node, document, doctype;
3127
3128         document = lexer.newNode();
3129         document.type = Node.RootNode;
3130         doctype = null;
3131         lexer.configuration.XmlTags = true;
3132
3133         while (true)
3134         {
3135             node = lexer.getToken(Lexer.IgnoreWhitespace);
3136             if (node == null) break;
3137             /* discard unexpected end tags */
3138             if (node.type == Node.EndTag)
3139             {
3140                 Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
3141                 continue;
3142             }
3143
3144             /* deal with comments etc. */
3145             if (Node.insertMisc(document, node))
3146                 continue;
3147
3148             if (node.type == Node.DocTypeTag)
3149             {
3150                 if (doctype == null)
3151                 {
3152                     Node.insertNodeAtEnd(document, node);
3153                     doctype = node;
3154                 }
3155                 else
3156                     Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
3157                 continue;
3158             }
3159
3160             /* if start tag then parse element's content */
3161             if (node.type == Node.StartTag)
3162             {
3163                 Node.insertNodeAtEnd(document, node);
3164                 parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
3165             }
3166
3167         }
3168
3169 if (false) { //#if 0
3170         /* discard the document type */
3171         node = document.findDocType();
3172
3173         if (node != null)
3174             Node.discardElement(node);
3175 } // #endif
3176
3177         if  (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
3178                 Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
3179
3180         /* ensure presence of initial <?XML version="1.0"?> */
3181         if (lexer.configuration.XmlPi)
3182             lexer.fixXMLPI(document);
3183
3184         return document;
3185     }
3186
3187     public static boolean isJavaScript(Node node)
3188     {
3189         boolean result = false;
3190         AttVal attr;
3191
3192         if (node.attributes == null)
3193             return true;
3194
3195         for (attr = node.attributes; attr != null; attr = attr.next)
3196         {
3197             if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
3198                     || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
3199                     && Lexer.wsubstr(attr.value, "javascript"))
3200                 result = true;
3201         }
3202
3203         return result;
3204     }
3205
3206 }