first scanner /parser copied from the jdt java version
[phpeclipse.git] / net.sourceforge.phpeclipse / src / org / w3c / tidy / ParserImpl.java
1 /*
2  * @(#)ParserImpl.java   1.11 2000/08/16
3  *
4  */
5
6 package org.w3c.tidy;
7
8 /**
9  *
10  * HTML Parser implementation
11  *
12  * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13  * See Tidy.java for the copyright notice.
14  * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15  * HTML Tidy Release 4 Aug 2000</a>
16  *
17  * @author  Dave Raggett <dsr@w3.org>
18  * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19  * @version 1.0, 1999/05/22
20  * @version 1.0.1, 1999/05/29
21  * @version 1.1, 1999/06/18 Java Bean
22  * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23  * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24  * @version 1.4, 1999/09/04 DOM support
25  * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26  * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27  * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28  * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29  * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30  * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31  * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
32  */
33
34 public class ParserImpl {
35
36     //private static int SeenBodyEndTag;  /* AQ: moved into lexer structure */
37
38     private static void parseTag(Lexer lexer, Node node, short mode)
39     {
40         // Local fix by GLP 2000-12-21.  Need to reset insertspace if this 
41         // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
42         // Remove this code once the fix is made in Tidy.
43
44 /******  (Original code follows)
45         if ((node.tag.model & Dict.CM_EMPTY) != 0)
46         {
47             lexer.waswhite = false;
48             return;
49         }
50         else if (!((node.tag.model & Dict.CM_INLINE) != 0))
51             lexer.insertspace = false;
52 *******/
53
54         if (!((node.tag.model & Dict.CM_INLINE) != 0))
55             lexer.insertspace = false;
56
57         if ((node.tag.model & Dict.CM_EMPTY) != 0)
58         {
59             lexer.waswhite = false;
60             return;
61         }
62
63         if (node.tag.parser == null || node.type == Node.StartEndTag)
64             return;
65
66         node.tag.parser.parse(lexer, node, mode);
67     }
68
69     private static void moveToHead(Lexer lexer, Node element, Node node)
70     {
71         Node head;
72         TagTable tt = lexer.configuration.tt;
73
74
75         if (node.type == Node.StartTag || node.type == Node.StartEndTag)
76         {
77             Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
78
79             while (element.tag != tt.tagHtml)
80                 element = element.parent;
81
82             for (head = element.content; head != null; head = head.next)
83             {
84                 if (head.tag == tt.tagHead)
85                 {
86                     Node.insertNodeAtEnd(head, node);
87                     break;
88                 }
89             }
90
91             if (node.tag.parser != null)
92                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
93         }
94         else
95         {
96             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
97         }
98     }
99
100     public static class ParseHTML implements Parser {
101
102         public void parse( Lexer lexer, Node html, short mode )
103         {
104             Node node, head;
105             Node frameset = null;
106             Node noframes = null;
107
108             lexer.configuration.XmlTags = false;
109             lexer.seenBodyEndTag = 0;
110             TagTable tt = lexer.configuration.tt;
111
112             for (;;)
113             {
114                 node = lexer.getToken(Lexer.IgnoreWhitespace);
115
116                 if (node == null)
117                 {
118                     node = lexer.inferredTag("head");
119                     break;
120                 }
121
122                 if (node.tag == tt.tagHead)
123                     break;
124
125                 if (node.tag == html.tag && node.type == Node.EndTag)
126                 {
127                     Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
128                     continue;
129                 }
130
131                 /* deal with comments etc. */
132                 if (Node.insertMisc(html, node))
133                     continue;
134
135                 lexer.ungetToken();
136                 node = lexer.inferredTag("head");
137                 break;
138             }
139
140             head = node;
141             Node.insertNodeAtEnd(html, head);
142             getParseHead().parse(lexer, head, mode);
143
144             for (;;)
145             {
146                 node = lexer.getToken(Lexer.IgnoreWhitespace);
147
148                 if (node == null)
149                 {
150                     if (frameset == null) /* create an empty body */
151                         node = lexer.inferredTag("body");
152
153                     return;
154                 }
155
156                 /* robustly handle html tags */
157                 if (node.tag == html.tag)
158                 {
159                     if (node.type != Node.StartTag && frameset == null)
160                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
161
162                     continue;
163                 }
164
165                 /* deal with comments etc. */
166                 if (Node.insertMisc(html, node))
167                     continue;
168
169                 /* if frameset document coerce <body> to <noframes> */
170                 if (node.tag == tt.tagBody)
171                 {
172                     if (node.type != Node.StartTag)
173                     {
174                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
175                         continue;
176                     }
177
178                     if (frameset != null)
179                     {
180                         lexer.ungetToken();
181
182                         if (noframes == null)
183                         {
184                             noframes = lexer.inferredTag("noframes");
185                             Node.insertNodeAtEnd(frameset, noframes);
186                             Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
187                         }
188
189                         parseTag(lexer, noframes, mode);
190                         continue;
191                     }
192
193                     break;  /* to parse body */
194                 }
195
196                 /* flag an error if we see more than one frameset */
197                 if (node.tag == tt.tagFrameset)
198                 {
199                     if (node.type != Node.StartTag)
200                     {
201                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
202                         continue;
203                     }
204
205                     if (frameset != null)
206                         Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
207                     else
208                         frameset = node;
209
210                     Node.insertNodeAtEnd(html, node);
211                     parseTag(lexer, node, mode);
212
213                     /*
214                       see if it includes a noframes element so
215                       that we can merge subsequent noframes elements
216                     */
217
218                     for (node = frameset.content; node != null; node = node.next)
219                     {
220                         if (node.tag == tt.tagNoframes)
221                             noframes = node;
222                     }
223                     continue;
224                 }
225
226                 /* if not a frameset document coerce <noframes> to <body> */
227                 if (node.tag == tt.tagNoframes)
228                 {
229                     if (node.type != Node.StartTag)
230                     {
231                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
232                         continue;
233                     }
234
235                     if (frameset == null)
236                     {
237                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
238                         node = lexer.inferredTag("body");
239                         break;
240                     }
241
242                     if (noframes == null)
243                     {
244                         noframes = node;
245                         Node.insertNodeAtEnd(frameset, noframes);
246                     }
247
248                     parseTag(lexer, noframes, mode);
249                     continue;
250                 }
251
252                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
253                 {
254                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
255                     {
256                         moveToHead(lexer, html, node);
257                         continue;
258                     }
259                 }
260
261                 lexer.ungetToken();
262
263                 /* insert other content into noframes element */
264
265                 if (frameset != null)
266                 {
267                     if (noframes == null)
268                     {
269                         noframes = lexer.inferredTag("noframes");
270                         Node.insertNodeAtEnd(frameset, noframes);
271                     }
272                     else
273                         Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
274
275                     parseTag(lexer, noframes, mode);
276                     continue;
277                 }
278
279                 node = lexer.inferredTag("body");
280                 break;
281             }
282
283             /* node must be body */
284
285             Node.insertNodeAtEnd(html, node);
286             parseTag(lexer, node, mode);
287         }
288
289     };
290
291     public static class ParseHead implements Parser {
292
293         public void parse( Lexer lexer, Node head, short mode )
294         {
295             Node node;
296             int HasTitle = 0;
297             int HasBase = 0;
298             TagTable tt = lexer.configuration.tt;
299
300             while (true)
301             {
302                 node = lexer.getToken(Lexer.IgnoreWhitespace);
303                 if (node == null) break;
304                 if (node.tag == head.tag && node.type == Node.EndTag)
305                 {
306                     head.closed = true;
307                     break;
308                 }
309
310                 if (node.type == Node.TextNode)
311                 {
312                     lexer.ungetToken();
313                     break;
314                 }
315
316                 /* deal with comments etc. */
317                 if (Node.insertMisc(head, node))
318                     continue;
319
320                 if (node.type == Node.DocTypeTag)
321                 {
322                     Node.insertDocType(lexer, head, node);
323                     continue;
324                 }
325
326                 /* discard unknown tags */
327                 if (node.tag == null)
328                 {
329                     Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
330                     continue;
331                 }
332         
333                 if (!((node.tag.model & Dict.CM_HEAD) != 0))
334                 {
335                     lexer.ungetToken();
336                     break;
337                 }
338
339                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
340                 {
341                     if (node.tag == tt.tagTitle)
342                     {
343                         ++HasTitle;
344
345                         if (HasTitle > 1)
346                             Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
347                     }
348                     else if (node.tag == tt.tagBase)
349                     {
350                         ++HasBase;
351
352                         if (HasBase > 1)
353                             Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
354                     }
355                     else if (node.tag == tt.tagNoscript)
356                         Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
357
358                     Node.insertNodeAtEnd(head, node);
359                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
360                     continue;
361                 }
362
363                 /* discard unexpected text nodes and end tags */
364                 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
365             }
366
367             if (HasTitle == 0)
368             {
369                 Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
370                 Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
371             }
372         }
373
374     };
375
376     public static class ParseTitle implements Parser {
377
378         public void parse( Lexer lexer, Node title, short mode )
379         {
380             Node node;
381
382             while (true)
383             {
384                 node = lexer.getToken(Lexer.MixedContent);
385                 if (node == null) break;
386                 if (node.tag == title.tag && node.type == Node.EndTag)
387                 {
388                     title.closed = true;
389                     Node.trimSpaces(lexer, title);
390                     return;
391                 }
392
393                 if (node.type == Node.TextNode)
394                 {
395                     /* only called for 1st child */
396                     if (title.content == null)
397                         Node.trimInitialSpace(lexer, title, node);
398
399                     if (node.start >= node.end)
400                     {
401                         continue;
402                     }
403
404                     Node.insertNodeAtEnd(title, node);
405                     continue;
406                 }
407
408                 /* deal with comments etc. */
409                 if (Node.insertMisc(title, node))
410                     continue;
411
412                 /* discard unknown tags */
413                 if (node.tag == null)
414                 {
415                     Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
416                     continue;
417                 }
418
419                 /* pushback unexpected tokens */
420                 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
421                 lexer.ungetToken();
422                 Node.trimSpaces(lexer, title);
423                 return;
424             }
425
426             Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
427         }
428
429     };
430
431     public static class ParseScript implements Parser {
432
433         public void parse( Lexer lexer, Node script, short mode )
434         {
435         /*
436           This isn't quite right for CDATA content as it recognises
437           tags within the content and parses them accordingly.
438           This will unfortunately screw up scripts which include
439           < + letter,  < + !, < + ?  or  < + / + letter
440         */
441
442             Node node;
443
444             node = lexer.getCDATA( script);
445
446             if (node != null)
447                 Node.insertNodeAtEnd(script, node);
448         }
449
450     };
451
452     public static class ParseBody implements Parser {
453
454         public void parse( Lexer lexer, Node body, short mode )
455         {
456             Node node;
457             boolean checkstack, iswhitenode;
458
459             mode = Lexer.IgnoreWhitespace;
460             checkstack = true;
461             TagTable tt = lexer.configuration.tt;
462
463             while (true)
464             {
465                 node = lexer.getToken(mode);
466                 if (node == null) break;
467                 if (node.tag == body.tag && node.type == Node.EndTag)
468                 {
469                     body.closed = true;
470                     Node.trimSpaces(lexer, body);
471                     lexer.seenBodyEndTag = 1;
472                     mode = Lexer.IgnoreWhitespace;
473
474                     if (body.parent.tag == tt.tagNoframes)
475                         break;
476
477                     continue;
478                 }
479         
480                 if (node.tag == tt.tagNoframes)
481                 {
482                     if (node.type == Node.StartTag)
483                     {
484                         Node.insertNodeAtEnd(body, node);
485                         getParseBlock().parse(lexer, node, mode);
486                         continue;
487                     }
488
489                     if (node.type == Node.EndTag &&
490                         body.parent.tag == tt.tagNoframes)
491                     {
492                         Node.trimSpaces(lexer, body);
493                         lexer.ungetToken();
494                         break;
495                     }
496                 }
497
498                 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
499                     && body.parent.tag == tt.tagNoframes)
500                 {
501                     Node.trimSpaces(lexer, body);
502                     lexer.ungetToken();
503                     break;
504                 }
505         
506                 if (node.tag == tt.tagHtml)
507                 {
508                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
509                         Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
510
511                     continue;
512                 }
513
514                 iswhitenode = false;
515
516                 if (node.type == Node.TextNode &&
517                        node.end <= node.start + 1 &&
518                        node.textarray[node.start] == (byte)' ')
519                     iswhitenode = true;
520
521                 /* deal with comments etc. */
522                 if (Node.insertMisc(body, node))
523                     continue;
524
525                 if (lexer.seenBodyEndTag == 1 && !iswhitenode)
526                 {
527                     ++lexer.seenBodyEndTag;
528                     Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
529                 }
530
531                 /* mixed content model permits text */
532                 if (node.type == Node.TextNode)
533                 {
534                     if (iswhitenode && mode == Lexer.IgnoreWhitespace)
535                     {
536                         continue;
537                     }
538
539                     if (lexer.configuration.EncloseBodyText && !iswhitenode)
540                     {
541                         Node para;
542                 
543                         lexer.ungetToken();
544                         para = lexer.inferredTag("p");
545                         Node.insertNodeAtEnd(body, para);
546                         parseTag(lexer, para, mode);
547                         mode = Lexer.MixedContent;
548                         continue;
549                     }
550                     else /* strict doesn't allow text here */
551                         lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
552
553                     if (checkstack)
554                     {
555                         checkstack = false;
556
557                         if (lexer.inlineDup( node) > 0)
558                             continue;
559                     }
560
561                     Node.insertNodeAtEnd(body, node);
562                     mode = Lexer.MixedContent;
563                     continue;
564                 }
565
566                 if (node.type == Node.DocTypeTag)
567                 {
568                     Node.insertDocType(lexer, body, node);
569                     continue;
570                 }
571                 /* discard unknown  and PARAM tags */
572                 if (node.tag == null || node.tag == tt.tagParam)
573                 {
574                     Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
575                     continue;
576                 }
577
578                 /*
579                   Netscape allows LI and DD directly in BODY
580                   We infer UL or DL respectively and use this
581                   boolean to exclude block-level elements so as
582                   to match Netscape's observed behaviour.
583                 */
584                 lexer.excludeBlocks = false;
585         
586                 if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
587                     !((node.tag.model & Dict.CM_INLINE) != 0))
588                 {
589                     /* avoid this error message being issued twice */
590                     if (!((node.tag.model & Dict.CM_HEAD) != 0))
591                         Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
592
593                     if ((node.tag.model & Dict.CM_HTML) != 0)
594                     {
595                         /* copy body attributes if current body was inferred */
596                         if (node.tag == tt.tagBody && body.implicit 
597                                             && body.attributes == null)
598                         {
599                             body.attributes = node.attributes;
600                             node.attributes = null;
601                         }
602
603                         continue;
604                     }
605
606                     if ((node.tag.model & Dict.CM_HEAD) != 0)
607                     {
608                         moveToHead(lexer, body, node);
609                         continue;
610                     }
611
612                     if ((node.tag.model & Dict.CM_LIST) != 0)
613                     {
614                         lexer.ungetToken();
615                         node = lexer.inferredTag( "ul");
616                         Node.addClass(node, "noindent");
617                         lexer.excludeBlocks = true;
618                     }
619                     else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
620                     {
621                         lexer.ungetToken();
622                         node = lexer.inferredTag( "dl");
623                         lexer.excludeBlocks = true;
624                     }
625                     else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
626                     {
627                         lexer.ungetToken();
628                         node = lexer.inferredTag( "table");
629                         lexer.excludeBlocks = true;
630                     }
631                     else
632                     {
633                         /* AQ: The following line is from the official C
634                            version of tidy.  It doesn't make sense to me
635                            because the '!' operator has higher precedence
636                            than the '&' operator.  It seems to me that the
637                            expression always evaluates to 0.
638
639                            if (!node->tag->model & (CM_ROW | CM_FIELD))
640
641                            AQ: 13Jan2000 fixed in C tidy
642                         */
643                         if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
644                         {
645                             lexer.ungetToken();
646                             return;
647                         }
648
649                         /* ignore </td> </th> <option> etc. */
650                         continue;
651                     }
652                 }
653
654                 if (node.type == Node.EndTag)
655                 {
656                     if (node.tag == tt.tagBr)
657                         node.type = Node.StartTag;
658                     else if (node.tag == tt.tagP)
659                     {
660                         Node.coerceNode(lexer, node, tt.tagBr);
661                         Node.insertNodeAtEnd(body, node);
662                         node = lexer.inferredTag("br");
663                     }
664                     else if ((node.tag.model & Dict.CM_INLINE) != 0)
665                         lexer.popInline(node);
666                 }
667
668                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
669                 {
670                     if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
671                     {
672                         /* HTML4 strict doesn't allow inline content here */
673                         /* but HTML2 does allow img elements as children of body */
674                         if (node.tag == tt.tagImg)
675                             lexer.versions &= ~Dict.VERS_HTML40_STRICT;
676                         else
677                             lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
678
679                         if (checkstack && !node.implicit)
680                         {
681                             checkstack = false;
682
683                             if (lexer.inlineDup( node) > 0)
684                                 continue;
685                         }
686
687                         mode = Lexer.MixedContent;
688                     }
689                     else
690                     {
691                         checkstack = true;
692                         mode = Lexer.IgnoreWhitespace;
693                     }
694
695                     if (node.implicit)
696                         Report.warning(lexer, body, node, Report.INSERTING_TAG);
697
698                     Node.insertNodeAtEnd(body, node);
699                     parseTag(lexer, node, mode);
700                     continue;
701                 }
702
703                 /* discard unexpected tags */
704                 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
705             }
706         }
707
708     };
709
710     public static class ParseFrameSet implements Parser {
711
712         public void parse( Lexer lexer, Node frameset, short mode )
713         {
714             Node node;
715             TagTable tt = lexer.configuration.tt;
716
717             lexer.badAccess |=  Report.USING_FRAMES;
718
719             while (true)
720             {
721                 node = lexer.getToken(Lexer.IgnoreWhitespace);
722                 if (node == null) break;
723                 if (node.tag == frameset.tag && node.type == Node.EndTag)
724                 {
725                     frameset.closed = true;
726                     Node.trimSpaces(lexer, frameset);
727                     return;
728                 }
729
730                 /* deal with comments etc. */
731                 if (Node.insertMisc(frameset, node))
732                     continue;
733
734                 if (node.tag == null)
735                 {
736                     Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
737                     continue; 
738                 }
739
740                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
741                 {
742                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
743                     {
744                         moveToHead(lexer, frameset, node);
745                         continue;
746                     }
747                 }
748
749                 if (node.tag == tt.tagBody)
750                 {
751                     lexer.ungetToken();
752                     node = lexer.inferredTag("noframes");
753                     Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
754                 }
755
756                 if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
757                 {
758                     Node.insertNodeAtEnd(frameset, node);
759                     lexer.excludeBlocks = false;
760                     parseTag(lexer, node, Lexer.MixedContent);
761                     continue;
762                 }
763                 else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
764                 {
765                     Node.insertNodeAtEnd(frameset, node);
766                     continue;
767                 }
768
769                 /* discard unexpected tags */
770                 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
771             }
772
773             Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
774         }
775
776     };
777
778     public static class ParseInline implements Parser {
779
780         public void parse( Lexer lexer, Node element, short mode )
781         {
782             Node node, parent;
783             TagTable tt = lexer.configuration.tt;
784
785             if ((element.tag.model & Dict.CM_EMPTY) != 0)
786                 return;
787
788             if (element.tag == tt.tagA)
789             {
790                 if (element.attributes == null)
791                 {
792                     Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
793                     Node.discardElement(element);
794                     return;
795                 }
796             }
797
798             /*
799              ParseInline is used for some block level elements like H1 to H6
800              For such elements we need to insert inline emphasis tags currently
801              on the inline stack. For Inline elements, we normally push them
802              onto the inline stack provided they aren't implicit or OBJECT/APPLET.
803              This test is carried out in PushInline and PopInline, see istack.c
804              We don't push A or SPAN to replicate current browser behavior
805             */
806             if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
807                 lexer.inlineDup( null);
808             else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
809                         element.tag != tt.tagA && element.tag != tt.tagSpan)
810                 lexer.pushInline( element);
811
812             if (element.tag == tt.tagNobr)
813                 lexer.badLayout |= Report.USING_NOBR;
814             else if (element.tag == tt.tagFont)
815                 lexer.badLayout |= Report.USING_FONT;
816
817             /* Inline elements may or may not be within a preformatted element */
818             if (mode != Lexer.Preformatted)
819                 mode = Lexer.MixedContent;
820
821             while (true)
822             {
823                 node = lexer.getToken(mode);
824                 if (node == null) break;
825                 /* end tag for current element */
826                 if (node.tag == element.tag && node.type == Node.EndTag)
827                 {
828                     if ((element.tag.model & Dict.CM_INLINE) != 0 &&
829                         element.tag != tt.tagA)
830                         lexer.popInline( node);
831
832                     if (!((mode & Lexer.Preformatted) != 0))
833                         Node.trimSpaces(lexer, element);
834                     /*
835                      if a font element wraps an anchor and nothing else
836                      then move the font element inside the anchor since
837                      otherwise it won't alter the anchor text color
838                     */
839                     if (element.tag == tt.tagFont &&
840                         element.content != null &&
841                         element.content == element.last)
842                     {
843                         Node child = element.content;
844
845                         if (child.tag == tt.tagA)
846                         {
847                             child.parent = element.parent;
848                             child.next = element.next;
849                             child.prev = element.prev;
850
851                             if (child.prev != null)
852                                 child.prev.next = child;
853                             else
854                                 child.parent.content = child;
855
856                             if (child.next != null)
857                                 child.next.prev = child;
858                             else
859                                 child.parent.last = child;
860
861                             element.next = null;
862                             element.prev = null;
863                             element.parent = child;
864                             element.content = child.content;
865                             element.last = child.last;
866                             child.content = element;
867                             child.last = element;
868                             for (child = element.content; child != null; child = child.next)
869                                 child.parent = element;
870                         }
871                     }
872                     element.closed = true;
873                     Node.trimSpaces(lexer, element);
874                     Node.trimEmptyElement(lexer, element);
875                     return;
876                 }
877
878                 /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
879                 /* otherwise emphasis nesting is probably unintentional */
880                 /* big and small have cumulative effect to leave them alone */
881                 if (node.type == Node.StartTag
882                         && node.tag == element.tag
883                         && lexer.isPushed(node)
884                         && !node.implicit
885                         && !element.implicit
886                         && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
887                         && node.tag != tt.tagA
888                         && node.tag != tt.tagFont
889                         && node.tag != tt.tagBig
890                         && node.tag != tt.tagSmall)
891                 {
892                     if (element.content != null && node.attributes == null)
893                     {
894                         Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
895                         node.type = Node.EndTag;
896                         lexer.ungetToken();
897                         continue;
898                     }
899
900                     Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
901                 }
902
903                 if (node.type == Node.TextNode)
904                 {
905                     /* only called for 1st child */
906                     if (element.content == null &&
907                         !((mode & Lexer.Preformatted) != 0))
908                         Node.trimSpaces(lexer, element);
909
910                     if (node.start >= node.end)
911                     {
912                         continue;
913                     }
914
915                     Node.insertNodeAtEnd(element, node);
916                     continue;
917                 }
918
919                 /* mixed content model so allow text */
920                 if (Node.insertMisc(element, node))
921                     continue;
922
923                 /* deal with HTML tags */
924                 if (node.tag == tt.tagHtml)
925                 {
926                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
927                     {
928                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
929                         continue;
930                     }
931
932                     /* otherwise infer end of inline element */
933                     lexer.ungetToken();
934                     if (!((mode & Lexer.Preformatted) != 0))
935                         Node.trimSpaces(lexer, element);
936                     Node.trimEmptyElement(lexer, element);
937                     return;
938                 }
939
940                 /* within <dt> or <pre> map <p> to <br> */
941                 if (node.tag == tt.tagP &&
942                       node.type == Node.StartTag &&
943                       ((mode & Lexer.Preformatted) != 0 ||
944                        element.tag == tt.tagDt ||
945                       element.isDescendantOf(tt.tagDt)))
946                 {
947                     node.tag = tt.tagBr;
948                     node.element = "br";
949                     Node.trimSpaces(lexer, element);
950                     Node.insertNodeAtEnd(element, node);
951                     continue;
952                 }
953
954                 /* ignore unknown and PARAM tags */
955                 if (node.tag == null || node.tag == tt.tagParam)
956                 {
957                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
958                     continue;
959                 }
960
961                 if (node.tag == tt.tagBr && node.type == Node.EndTag)
962                     node.type = Node.StartTag;
963
964                 if (node.type == Node.EndTag)
965                 {
966                     /* coerce </br> to <br> */
967                     if (node.tag == tt.tagBr)
968                         node.type = Node.StartTag;
969                     else if (node.tag == tt.tagP)
970                     {
971                         /* coerce unmatched </p> to <br><br> */
972                         if (!element.isDescendantOf(tt.tagP))
973                         {
974                             Node.coerceNode(lexer, node, tt.tagBr);
975                             Node.trimSpaces(lexer, element);
976                             Node.insertNodeAtEnd(element, node);
977                             node = lexer.inferredTag("br");
978                             continue;
979                         }
980                     }
981                     else if ((node.tag.model & Dict.CM_INLINE) != 0
982                                 && node.tag != tt.tagA
983                                         && !((node.tag.model & Dict.CM_OBJECT) != 0)
984                                         && (element.tag.model & Dict.CM_INLINE) != 0)
985                     {
986                         /* allow any inline end tag to end current element */
987                         lexer.popInline( element);
988
989                         if (element.tag != tt.tagA)
990                         {
991                             if (node.tag == tt.tagA && node.tag != element.tag)
992                             {
993                                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
994                                lexer.ungetToken();
995                             }
996                             else
997                             {
998                                 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
999                             }
1000
1001                             if (!((mode & Lexer.Preformatted) != 0))
1002                                 Node.trimSpaces(lexer, element);
1003                             Node.trimEmptyElement(lexer, element);
1004                             return;
1005                         }
1006
1007                         /* if parent is <a> then discard unexpected inline end tag */
1008                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1009                         continue;
1010                     }  /* special case </tr> etc. for stuff moved in front of table */
1011                     else if (lexer.exiled
1012                                 && node.tag.model != 0
1013                                 && (node.tag.model & Dict.CM_TABLE) != 0)
1014                     {
1015                         lexer.ungetToken();
1016                         Node.trimSpaces(lexer, element);
1017                         Node.trimEmptyElement(lexer, element);
1018                         return;
1019                     }
1020                 }
1021
1022                 /* allow any header tag to end current header */
1023                 if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
1024                 {
1025                     if (node.tag == element.tag)
1026                     {
1027                         Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1028                     }
1029                     else
1030                     {
1031                         Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1032                         lexer.ungetToken();
1033                     }
1034                     if (!((mode & Lexer.Preformatted) != 0))
1035                         Node.trimSpaces(lexer, element);
1036                     Node.trimEmptyElement(lexer, element);
1037                     return;
1038                 }
1039
1040                 /*
1041                    an <A> tag to ends any open <A> element
1042                    but <A href=...> is mapped to </A><A href=...>
1043                 */
1044                 if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
1045                 {
1046                  /* coerce <a> to </a> unless it has some attributes */
1047                     if (node.attributes == null)
1048                     {
1049                         node.type = Node.EndTag;
1050                         Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1051                         lexer.popInline( node);
1052                         lexer.ungetToken();
1053                         continue;
1054                     }
1055
1056                     lexer.ungetToken();
1057                     Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1058                     lexer.popInline( element);
1059                     if (!((mode & Lexer.Preformatted) != 0))
1060                         Node.trimSpaces(lexer, element);
1061                     Node.trimEmptyElement(lexer, element);
1062                     return;
1063                 }
1064
1065                 if ((element.tag.model & Dict.CM_HEADING) != 0)
1066                 {
1067                     if (node.tag == tt.tagCenter ||
1068                         node.tag == tt.tagDiv)
1069                     {
1070                         if (node.type != Node.StartTag &&
1071                             node.type != Node.StartEndTag)
1072                         {
1073                             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1074                             continue;
1075                         }
1076
1077                         Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1078
1079                         /* insert center as parent if heading is empty */
1080                         if (element.content == null)
1081                         {
1082                             Node.insertNodeAsParent(element, node);
1083                             continue;
1084                         }
1085
1086                         /* split heading and make center parent of 2nd part */
1087                         Node.insertNodeAfterElement(element, node);
1088
1089                         if (!((mode & Lexer.Preformatted) != 0))
1090                             Node.trimSpaces(lexer, element);
1091
1092                         element = lexer.cloneNode(element);
1093                         element.start = lexer.lexsize;
1094                         element.end   = lexer.lexsize;
1095                         Node.insertNodeAtEnd(node, element);
1096                         continue;
1097                     }
1098
1099                     if (node.tag == tt.tagHr)
1100                     {
1101                         if (node.type != Node.StartTag &&
1102                             node.type != Node.StartEndTag)
1103                         {
1104                             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1105                             continue;
1106                         }
1107
1108                         Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1109
1110                         /* insert hr before heading if heading is empty */
1111                         if (element.content == null)
1112                         {
1113                             Node.insertNodeBeforeElement(element, node);
1114                             continue;
1115                         }
1116
1117                         /* split heading and insert hr before 2nd part */
1118                         Node.insertNodeAfterElement(element, node);
1119
1120                         if (!((mode & Lexer.Preformatted) != 0))
1121                             Node.trimSpaces(lexer, element);
1122
1123                         element = lexer.cloneNode(element);
1124                         element.start = lexer.lexsize;
1125                         element.end   = lexer.lexsize;
1126                         Node.insertNodeAfterElement(node, element);
1127                         continue;
1128                     }
1129                 }
1130
1131                 if (element.tag == tt.tagDt)
1132                 {
1133                     if (node.tag == tt.tagHr)
1134                     {
1135                         Node dd;
1136
1137                         if (node.type != Node.StartTag &&
1138                             node.type != Node.StartEndTag)
1139                         {
1140                             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1141                             continue;
1142                         }
1143
1144                         Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1145                         dd = lexer.inferredTag("dd");
1146
1147                         /* insert hr within dd before dt if dt is empty */
1148                         if (element.content == null)
1149                         {
1150                             Node.insertNodeBeforeElement(element, dd);
1151                             Node.insertNodeAtEnd(dd, node);
1152                             continue;
1153                         }
1154
1155                         /* split dt and insert hr within dd before 2nd part */
1156                         Node.insertNodeAfterElement(element, dd);
1157                         Node.insertNodeAtEnd(dd, node);
1158
1159                         if (!((mode & Lexer.Preformatted) != 0))
1160                             Node.trimSpaces(lexer, element);
1161
1162                         element = lexer.cloneNode(element);
1163                         element.start = lexer.lexsize;
1164                         element.end   = lexer.lexsize;
1165                         Node.insertNodeAfterElement(dd, element);
1166                         continue;
1167                     }
1168                 }
1169
1170
1171                 /* 
1172                   if this is the end tag for an ancestor element
1173                   then infer end tag for this element
1174                 */
1175                 if (node.type == Node.EndTag)
1176                 {
1177                     for (parent = element.parent;
1178                             parent != null; parent = parent.parent)
1179                     {
1180                         if (node.tag == parent.tag)
1181                         {
1182                             if (!((element.tag.model & Dict.CM_OPT) != 0) &&
1183                                 !element.implicit)
1184                                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1185
1186                             if (element.tag == tt.tagA)
1187                                 lexer.popInline(element);
1188
1189                             lexer.ungetToken();
1190
1191                             if (!((mode & Lexer.Preformatted) != 0))
1192                                 Node.trimSpaces(lexer, element);
1193
1194                             Node.trimEmptyElement(lexer, element);
1195                             return;
1196                         }
1197                     }
1198                 }
1199
1200                 /* block level tags end this element */
1201                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1202                 {
1203                     if (node.type != Node.StartTag)
1204                     {
1205                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1206                         continue;
1207                     }
1208
1209                     if (!((element.tag.model & Dict.CM_OPT) != 0))
1210                         Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1211
1212                     if ((node.tag.model & Dict.CM_HEAD) != 0 &&
1213                         !((node.tag.model & Dict.CM_BLOCK) != 0))
1214                     {
1215                         moveToHead(lexer, element, node);
1216                         continue;
1217                     }
1218
1219                     /*
1220                        prevent anchors from propagating into block tags
1221                        except for headings h1 to h6
1222                     */
1223                     if (element.tag == tt.tagA)
1224                     {
1225                         if (node.tag != null &&
1226                             !((node.tag.model & Dict.CM_HEADING) != 0))
1227                             lexer.popInline(element);
1228                         else if (!(element.content != null))
1229                         {
1230                             Node.discardElement(element);
1231                             lexer.ungetToken();
1232                             return;
1233                         }
1234                     }
1235
1236                     lexer.ungetToken();
1237
1238                     if (!((mode & Lexer.Preformatted) != 0))
1239                         Node.trimSpaces(lexer, element);
1240
1241                     Node.trimEmptyElement(lexer, element);
1242                     return;
1243                 }
1244
1245                 /* parse inline element */
1246                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1247                 {
1248                     if (node.implicit)
1249                         Report.warning(lexer, element, node, Report.INSERTING_TAG);
1250
1251                     /* trim white space before <br> */
1252                     if (node.tag == tt.tagBr)
1253                         Node.trimSpaces(lexer, element);
1254             
1255                     Node.insertNodeAtEnd(element, node);
1256                     parseTag(lexer, node, mode);
1257                     continue;
1258                 }
1259
1260                 /* discard unexpected tags */
1261                 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1262             }
1263
1264             if (!((element.tag.model & Dict.CM_OPT) != 0))
1265                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
1266
1267             Node.trimEmptyElement(lexer, element);
1268         }
1269     };
1270
1271     public static class ParseList implements Parser {
1272
1273         public void parse( Lexer lexer, Node list, short mode )
1274         {
1275             Node node;
1276             Node parent;
1277             TagTable tt = lexer.configuration.tt;
1278
1279             if ((list.tag.model & Dict.CM_EMPTY) != 0)
1280                 return;
1281
1282             lexer.insert = -1;  /* defer implicit inline start tags */
1283
1284             while (true)
1285             {
1286                 node = lexer.getToken(Lexer.IgnoreWhitespace);
1287                 if (node == null) break;
1288
1289                 if (node.tag == list.tag && node.type == Node.EndTag)
1290                 {
1291                     if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1292                         Node.coerceNode(lexer, list, tt.tagUl);
1293
1294                     list.closed = true;
1295                     Node.trimEmptyElement(lexer, list);
1296                     return;
1297                 }
1298
1299                 /* deal with comments etc. */
1300                 if (Node.insertMisc(list, node))
1301                     continue;
1302
1303                 if (node.type != Node.TextNode && node.tag == null)
1304                 {
1305                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1306                     continue;
1307                 }
1308
1309                 /* 
1310                   if this is the end tag for an ancestor element
1311                   then infer end tag for this element
1312                 */
1313                 if (node.type == Node.EndTag)
1314                 {
1315                     if (node.tag == tt.tagForm)
1316                     {
1317                         lexer.badForm = 1;
1318                         Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1319                         continue;
1320                     }
1321
1322                     if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
1323                     {
1324                         Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1325                         lexer.popInline(node);
1326                         continue;
1327                     }
1328
1329                     for (parent = list.parent;
1330                             parent != null; parent = parent.parent)
1331                     {
1332                         if (node.tag == parent.tag)
1333                         {
1334                             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1335                             lexer.ungetToken();
1336
1337                             if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1338                                 Node.coerceNode(lexer, list, tt.tagUl);
1339
1340                             Node.trimEmptyElement(lexer, list);
1341                             return;
1342                         }
1343                     }
1344
1345                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1346                     continue;
1347                 }
1348
1349                 if (node.tag != tt.tagLi)
1350                 {
1351                     lexer.ungetToken();
1352
1353                     if (node.tag != null &&
1354                         (node.tag.model & Dict.CM_BLOCK) != 0 &&
1355                         lexer.excludeBlocks)
1356                     {
1357                         Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1358                         Node.trimEmptyElement(lexer, list);
1359                         return;
1360                     }
1361
1362                     node = lexer.inferredTag("li");
1363                     node.addAttribute("style", "list-style: none");
1364                     Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1365                 }
1366
1367                 /* node should be <LI> */
1368                 Node.insertNodeAtEnd(list, node);
1369                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1370             }
1371
1372             if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1373                 Node.coerceNode(lexer, list, tt.tagUl);
1374
1375             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1376             Node.trimEmptyElement(lexer, list);
1377         }
1378
1379     };
1380
1381     public static class ParseDefList implements Parser {
1382
1383         public void parse( Lexer lexer, Node list, short mode )
1384         {
1385             Node node, parent;
1386             TagTable tt = lexer.configuration.tt;
1387
1388             if ((list.tag.model & Dict.CM_EMPTY) != 0)
1389                 return;
1390
1391             lexer.insert = -1;  /* defer implicit inline start tags */
1392
1393             while (true)
1394             {
1395                 node = lexer.getToken(Lexer.IgnoreWhitespace);
1396                 if (node == null) break;
1397                 if (node.tag == list.tag && node.type == Node.EndTag)
1398                 {
1399                     list.closed = true;
1400                     Node.trimEmptyElement(lexer, list);
1401                     return;
1402                 }
1403
1404                 /* deal with comments etc. */
1405                 if (Node.insertMisc(list, node))
1406                     continue;
1407
1408                 if (node.type == Node.TextNode)
1409                 {
1410                     lexer.ungetToken();
1411                     node = lexer.inferredTag( "dt");
1412                     Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1413                 }
1414
1415                 if (node.tag == null)
1416                 {
1417                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1418                     continue;
1419                 }
1420
1421                 /* 
1422                   if this is the end tag for an ancestor element
1423                   then infer end tag for this element
1424                 */
1425                 if (node.type == Node.EndTag)
1426                 {
1427                     if (node.tag == tt.tagForm)
1428                     {
1429                         lexer.badForm = 1;
1430                         Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1431                         continue;
1432                     }
1433
1434                     for (parent = list.parent;
1435                             parent != null; parent = parent.parent)
1436                     {
1437                         if (node.tag == parent.tag)
1438                         {
1439                             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1440
1441                             lexer.ungetToken();
1442                             Node.trimEmptyElement(lexer, list);
1443                             return;
1444                         }
1445                     }
1446                 }
1447
1448                 /* center in a dt or a dl breaks the dl list in two */
1449                 if (node.tag == tt.tagCenter)
1450                 {
1451                     if (list.content != null)
1452                         Node.insertNodeAfterElement(list, node);
1453                     else /* trim empty dl list */
1454                     {
1455                         Node.insertNodeBeforeElement(list, node);
1456                         Node.discardElement(list);
1457                     }
1458
1459                     /* and parse contents of center */
1460                     parseTag(lexer, node, mode);
1461
1462                     /* now create a new dl element */
1463                     list = lexer.inferredTag("dl");
1464                     Node.insertNodeAfterElement(node, list);
1465                     continue;
1466                 }
1467
1468                 if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
1469                 {
1470                     lexer.ungetToken();
1471
1472                     if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
1473                     {
1474                         Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
1475                         Node.trimEmptyElement(lexer, list);
1476                         return;
1477                     }
1478
1479                     /* if DD appeared directly in BODY then exclude blocks */
1480                     if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
1481                     {
1482                         Node.trimEmptyElement(lexer, list);
1483                         return;
1484                     }
1485
1486                     node = lexer.inferredTag( "dd");
1487                     Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1488                 }
1489
1490                 if (node.type == Node.EndTag)
1491                 {
1492                     Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1493                     continue;
1494                 }
1495         
1496                 /* node should be <DT> or <DD>*/
1497                 Node.insertNodeAtEnd(list, node);
1498                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1499             }
1500
1501             Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1502             Node.trimEmptyElement(lexer, list);
1503         }
1504
1505     };
1506
1507     public static class ParsePre implements Parser {
1508
1509         public void parse( Lexer lexer, Node pre, short mode )
1510         {
1511             Node node, parent;
1512             TagTable tt = lexer.configuration.tt;
1513
1514             if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1515                 return;
1516
1517             if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1518                 Node.coerceNode(lexer, pre, tt.tagPre);
1519
1520             lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
1521
1522             while (true)
1523             {
1524                 node = lexer.getToken(Lexer.Preformatted);
1525                 if (node == null) break;
1526                 if (node.tag == pre.tag && node.type == Node.EndTag)
1527                 {
1528                     Node.trimSpaces(lexer, pre);
1529                     pre.closed = true;
1530                     Node.trimEmptyElement(lexer, pre);
1531                     return;
1532                 }
1533
1534                 if (node.tag == tt.tagHtml)
1535                 {
1536                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1537                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1538
1539                     continue;
1540                 }
1541
1542                 if (node.type == Node.TextNode)
1543                 {
1544                     /* if first check for inital newline */
1545                     if (pre.content == null)
1546                     {
1547                         if (node.textarray[node.start] == (byte)'\n')
1548                             ++node.start;
1549
1550                         if (node.start >= node.end)
1551                         {
1552                             continue;
1553                         }
1554                     }
1555
1556                     Node.insertNodeAtEnd(pre, node);
1557                     continue;
1558                 }
1559
1560                 /* deal with comments etc. */
1561                 if (Node.insertMisc(pre, node))
1562                     continue;
1563
1564                 /* discard unknown  and PARAM tags */
1565                 if (node.tag == null || node.tag == tt.tagParam)
1566                 {
1567                     Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1568                     continue;
1569                 }
1570
1571                 if (node.tag == tt.tagP)
1572                 {
1573                     if (node.type == Node.StartTag)
1574                     {
1575                         Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
1576
1577                         /* trim white space before <p> in <pre>*/
1578                         Node.trimSpaces(lexer, pre);
1579             
1580                         /* coerce both <p> and </p> to <br> */
1581                         Node.coerceNode(lexer, node, tt.tagBr);
1582                         Node.insertNodeAtEnd(pre, node);
1583                     }
1584                     else
1585                     {
1586                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1587                     }
1588                     continue;
1589                 }
1590
1591                 if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
1592                 {
1593                     moveToHead(lexer, pre, node);
1594                     continue;
1595                 }
1596
1597                 /* 
1598                   if this is the end tag for an ancestor element
1599                   then infer end tag for this element
1600                 */
1601                 if (node.type == Node.EndTag)
1602                 {
1603                     if (node.tag == tt.tagForm)
1604                     {
1605                         lexer.badForm = 1;
1606                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1607                         continue;
1608                     }
1609
1610                     for (parent = pre.parent;
1611                             parent != null; parent = parent.parent)
1612                     {
1613                         if (node.tag == parent.tag)
1614                         {
1615                             Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1616
1617                             lexer.ungetToken();
1618                             Node.trimSpaces(lexer, pre);
1619                             Node.trimEmptyElement(lexer, pre);
1620                             return;
1621                         }
1622                     }
1623                 }
1624
1625                 /* what about head content, HEAD, BODY tags etc? */
1626                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1627                 {
1628                     if (node.type != Node.StartTag)
1629                     {
1630                         Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1631                         continue;
1632                     }
1633  
1634                     Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1635                     lexer.excludeBlocks = true;
1636
1637                     /* check if we need to infer a container */
1638                     if ((node.tag.model & Dict.CM_LIST) != 0)
1639                     {
1640                         lexer.ungetToken();
1641                         node = lexer.inferredTag( "ul");
1642                         Node.addClass(node, "noindent");
1643                     }
1644                     else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1645                     {
1646                         lexer.ungetToken();
1647                         node = lexer.inferredTag( "dl");
1648                     }
1649                     else if ((node.tag.model & Dict.CM_TABLE) != 0)
1650                     {
1651                         lexer.ungetToken();
1652                         node = lexer.inferredTag( "table");
1653                     }
1654
1655                     Node.insertNodeAfterElement(pre, node);
1656                     pre = lexer.inferredTag( "pre");
1657                     Node.insertNodeAfterElement(node, pre);
1658                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
1659                     lexer.excludeBlocks = false;
1660                     continue;
1661                 }
1662                 /*
1663                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1664                 {
1665                     Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1666                     lexer.ungetToken();
1667                     return;
1668                 }
1669                 */
1670                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1671                 {
1672                     /* trim white space before <br> */
1673                     if (node.tag == tt.tagBr)
1674                         Node.trimSpaces(lexer, pre);
1675             
1676                     Node.insertNodeAtEnd(pre, node);
1677                     parseTag(lexer, node, Lexer.Preformatted);
1678                     continue;
1679                 }
1680
1681                 /* discard unexpected tags */
1682                 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1683             }
1684
1685             Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1686             Node.trimEmptyElement(lexer, pre);
1687         }
1688
1689     };
1690
1691     public static class ParseBlock implements Parser {
1692
1693         public void parse( Lexer lexer, Node element, short mode )
1694         /*
1695            element is node created by the lexer
1696            upon seeing the start tag, or by the
1697            parser when the start tag is inferred
1698         */
1699         {
1700             Node node, parent;
1701             boolean checkstack;
1702             int istackbase = 0;
1703             TagTable tt = lexer.configuration.tt;
1704
1705             checkstack = true;
1706
1707             if ((element.tag.model & Dict.CM_EMPTY) != 0)
1708                 return;
1709
1710             if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
1711                 Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
1712
1713             /*
1714              InlineDup() asks the lexer to insert inline emphasis tags
1715              currently pushed on the istack, but take care to avoid
1716              propagating inline emphasis inside OBJECT or APPLET.
1717              For these elements a fresh inline stack context is created
1718              and disposed of upon reaching the end of the element.
1719              They thus behave like table cells in this respect.
1720             */
1721             if ((element.tag.model & Dict.CM_OBJECT) != 0)
1722             {
1723                 istackbase = lexer.istackbase;
1724                 lexer.istackbase = lexer.istack.size();
1725             }
1726
1727             if (!((element.tag.model & Dict.CM_MIXED) != 0))
1728                 lexer.inlineDup( null);
1729
1730             mode = Lexer.IgnoreWhitespace;
1731
1732             while (true)
1733             {
1734                 node = lexer.getToken(mode /*Lexer.MixedContent*/);
1735                 if (node == null) break;
1736                 /* end tag for this element */
1737                 if (node.type == Node.EndTag && node.tag != null &&
1738                     (node.tag == element.tag || element.was == node.tag))
1739                 {
1740
1741                     if ((element.tag.model & Dict.CM_OBJECT) != 0)
1742                     {
1743                         /* pop inline stack */
1744                         while (lexer.istack.size() > lexer.istackbase)
1745                             lexer.popInline( null);
1746                         lexer.istackbase = istackbase;
1747                     }
1748
1749                     element.closed = true;
1750                     Node.trimSpaces(lexer, element);
1751                     Node.trimEmptyElement(lexer, element);
1752                     return;
1753                 }
1754
1755                 if (node.tag == tt.tagHtml ||
1756                     node.tag == tt.tagHead ||
1757                     node.tag == tt.tagBody)
1758                 {
1759                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1760                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1761
1762                     continue;
1763                 }
1764
1765                 if (node.type == Node.EndTag)
1766                 {
1767                     if (node.tag == null)
1768                     {
1769                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1770
1771                         continue;
1772                     }
1773                     else if (node.tag == tt.tagBr)
1774                         node.type = Node.StartTag;
1775                     else if (node.tag == tt.tagP)
1776                     {
1777                         Node.coerceNode(lexer, node, tt.tagBr);
1778                         Node.insertNodeAtEnd(element, node);
1779                         node = lexer.inferredTag("br");
1780                     }
1781                     else
1782                     {
1783                         /* 
1784                           if this is the end tag for an ancestor element
1785                           then infer end tag for this element
1786                         */
1787                         for (parent = element.parent;
1788                                 parent != null; parent = parent.parent)
1789                         {
1790                             if (node.tag == parent.tag)
1791                             {
1792                                 if (!((element.tag.model & Dict.CM_OPT) != 0))
1793                                     Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1794
1795                                 lexer.ungetToken();
1796
1797                                 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1798                                 {
1799                                     /* pop inline stack */
1800                                     while (lexer.istack.size() > lexer.istackbase)
1801                                         lexer.popInline( null);
1802                                     lexer.istackbase = istackbase;
1803                                 }
1804
1805                                 Node.trimSpaces(lexer, element);
1806                                 Node.trimEmptyElement(lexer, element);
1807                                 return;
1808                             }
1809                         }
1810                         /* special case </tr> etc. for stuff moved in front of table */
1811                         if (lexer.exiled
1812                                     && node.tag.model != 0
1813                                     && (node.tag.model & Dict.CM_TABLE) != 0)
1814                         {
1815                             lexer.ungetToken();
1816                             Node.trimSpaces(lexer, element);
1817                             Node.trimEmptyElement(lexer, element);
1818                             return;
1819                         }
1820                     }
1821                 }
1822
1823                 /* mixed content model permits text */
1824                 if (node.type == Node.TextNode)
1825                 {
1826                     boolean iswhitenode = false;
1827
1828                     if (node.type == Node.TextNode &&
1829                            node.end <= node.start + 1 &&
1830                            lexer.lexbuf[node.start] == (byte)' ')
1831                         iswhitenode = true;
1832
1833                     if (lexer.configuration.EncloseBlockText && !iswhitenode)
1834                     {
1835                         lexer.ungetToken();
1836                         node = lexer.inferredTag("p");
1837                         Node.insertNodeAtEnd(element, node);
1838                         parseTag(lexer, node, Lexer.MixedContent);
1839                         continue;
1840                     }
1841
1842                     if (checkstack)
1843                     {
1844                         checkstack = false;
1845
1846                         if (!((element.tag.model & Dict.CM_MIXED) != 0))
1847                         {
1848                             if (lexer.inlineDup( node) > 0)
1849                                 continue;
1850                         }
1851                     }
1852
1853                     Node.insertNodeAtEnd(element, node);
1854                     mode = Lexer.MixedContent;
1855                     /*
1856                       HTML4 strict doesn't allow mixed content for
1857                       elements with %block; as their content model
1858                     */
1859                     lexer.versions &= ~Dict.VERS_HTML40_STRICT;
1860                     continue;
1861                 }
1862
1863                 if (Node.insertMisc(element, node))
1864                     continue;
1865
1866                 /* allow PARAM elements? */
1867                 if (node.tag == tt.tagParam)
1868                 {
1869                     if (((element.tag.model & Dict.CM_PARAM) != 0) &&
1870                             (node.type == Node.StartTag || node.type == Node.StartEndTag))
1871                     {
1872                         Node.insertNodeAtEnd(element, node);
1873                         continue;
1874                     }
1875
1876                     /* otherwise discard it */
1877                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1878                     continue;
1879                 }
1880
1881                 /* allow AREA elements? */
1882                 if (node.tag == tt.tagArea)
1883                 {
1884                     if ((element.tag == tt.tagMap) &&
1885                             (node.type == Node.StartTag || node.type == Node.StartEndTag))
1886                     {
1887                         Node.insertNodeAtEnd(element, node);
1888                         continue;
1889                     }
1890
1891                     /* otherwise discard it */
1892                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1893                     continue;
1894                 }
1895
1896                 /* ignore unknown start/end tags */
1897                 if (node.tag == null)
1898                 {
1899                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1900                     continue;
1901                 }
1902
1903                 /*
1904                   Allow Dict.CM_INLINE elements here.
1905
1906                   Allow Dict.CM_BLOCK elements here unless
1907                   lexer.excludeBlocks is yes.
1908
1909                   LI and DD are special cased.
1910
1911                   Otherwise infer end tag for this element.
1912                 */
1913
1914                 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1915                 {
1916                     if (node.type != Node.StartTag && node.type != Node.StartEndTag)
1917                     {
1918                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1919                         continue;
1920                     }
1921
1922                     if (element.tag == tt.tagTd || element.tag == tt.tagTh)
1923                     {
1924                         /* if parent is a table cell, avoid inferring the end of the cell */
1925
1926                         if ((node.tag.model & Dict.CM_HEAD) != 0)
1927                         {
1928                             moveToHead(lexer, element, node);
1929                             continue;
1930                         }
1931
1932                         if ((node.tag.model & Dict.CM_LIST) != 0)
1933                         {
1934                             lexer.ungetToken();
1935                             node = lexer.inferredTag( "ul");
1936                             Node.addClass(node, "noindent");
1937                             lexer.excludeBlocks = true;
1938                         }
1939                         else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1940                         {
1941                             lexer.ungetToken();
1942                             node = lexer.inferredTag( "dl");
1943                             lexer.excludeBlocks = true;
1944                         }
1945
1946                         /* infer end of current table cell */
1947                         if (!((node.tag.model & Dict.CM_BLOCK) != 0))
1948                         {
1949                             lexer.ungetToken();
1950                             Node.trimSpaces(lexer, element);
1951                             Node.trimEmptyElement(lexer, element);
1952                             return;
1953                         }
1954                     }
1955                     else if ((node.tag.model & Dict.CM_BLOCK) != 0)
1956                     {
1957                         if (lexer.excludeBlocks)
1958                         {
1959                             if (!((element.tag.model & Dict.CM_OPT) != 0))
1960                                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1961
1962                             lexer.ungetToken();
1963
1964                             if ((element.tag.model & Dict.CM_OBJECT) != 0)
1965                                 lexer.istackbase = istackbase;
1966
1967                             Node.trimSpaces(lexer, element);
1968                             Node.trimEmptyElement(lexer, element);
1969                             return;
1970                         }
1971                     }
1972                     else /* things like list items */
1973                     {
1974                         if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
1975                             Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1976
1977                         if ((node.tag.model & Dict.CM_HEAD) != 0)
1978                         {
1979                             moveToHead(lexer, element, node);
1980                             continue;
1981                         }
1982
1983                         lexer.ungetToken();
1984
1985                         if ((node.tag.model & Dict.CM_LIST) != 0)
1986                         {
1987                             if (element.parent != null && element.parent.tag != null &&
1988                                 element.parent.tag.parser == getParseList())
1989                             {
1990                                 Node.trimSpaces(lexer, element);
1991                                 Node.trimEmptyElement(lexer, element);
1992                                 return;
1993                             }
1994
1995                             node = lexer.inferredTag("ul");
1996                             Node.addClass(node, "noindent");
1997                         }
1998                         else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1999                         {
2000                             if (element.parent.tag == tt.tagDl)
2001                             {
2002                                 Node.trimSpaces(lexer, element);
2003                                 Node.trimEmptyElement(lexer, element);
2004                                 return;
2005                             }
2006
2007                             node = lexer.inferredTag("dl");
2008                         }
2009                         else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
2010                                  (node.tag.model & Dict.CM_ROW) != 0)
2011                         {
2012                             node = lexer.inferredTag("table");
2013                         }
2014                         else if ((element.tag.model & Dict.CM_OBJECT) != 0)
2015                         {
2016                             /* pop inline stack */
2017                             while (lexer.istack.size() > lexer.istackbase)
2018                                 lexer.popInline( null);
2019                             lexer.istackbase = istackbase;
2020                             Node.trimSpaces(lexer, element);
2021                             Node.trimEmptyElement(lexer, element);
2022                             return;
2023
2024                         }
2025                         else
2026                         {
2027                             Node.trimSpaces(lexer, element);
2028                             Node.trimEmptyElement(lexer, element);
2029                             return;
2030                         }
2031                     }
2032                 }
2033
2034                 /* parse known element */
2035                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2036                 {
2037                     if ((node.tag.model & Dict.CM_INLINE) != 0)
2038                     {
2039                         if (checkstack && !node.implicit)
2040                         {
2041                             checkstack = false;
2042
2043                             if (lexer.inlineDup( node) > 0)
2044                                 continue;
2045                         }
2046
2047                         mode = Lexer.MixedContent;
2048                     }
2049                     else
2050                     {
2051                         checkstack = true;
2052                         mode = Lexer.IgnoreWhitespace;
2053                     }
2054
2055                     /* trim white space before <br> */
2056                     if (node.tag == tt.tagBr)
2057                         Node.trimSpaces(lexer, element);
2058
2059                     Node.insertNodeAtEnd(element, node);
2060             
2061                     if (node.implicit)
2062                         Report.warning(lexer, element, node, Report.INSERTING_TAG);
2063
2064                     parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
2065                     continue;
2066                 }
2067
2068                 /* discard unexpected tags */
2069                 if (node.type == Node.EndTag)
2070                     lexer.popInline( node);  /* if inline end tag */
2071
2072                 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2073             }
2074
2075             if (!((element.tag.model & Dict.CM_OPT) != 0))
2076                 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
2077
2078             if ((element.tag.model & Dict.CM_OBJECT) != 0)
2079             {
2080                 /* pop inline stack */
2081                 while (lexer.istack.size() > lexer.istackbase)
2082                     lexer.popInline( null);
2083                 lexer.istackbase = istackbase;
2084             }
2085
2086             Node.trimSpaces(lexer, element);
2087             Node.trimEmptyElement(lexer, element);
2088         }
2089
2090     };
2091
2092     public static class ParseTableTag implements Parser {
2093
2094         public void parse( Lexer lexer, Node table, short mode )
2095         {
2096             Node node, parent;
2097             int istackbase;
2098             TagTable tt = lexer.configuration.tt;
2099
2100             lexer.deferDup();
2101             istackbase = lexer.istackbase;
2102             lexer.istackbase = lexer.istack.size();
2103     
2104             while (true)
2105             {
2106                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2107                 if (node == null) break;
2108                 if (node.tag == table.tag && node.type == Node.EndTag)
2109                 {
2110                     lexer.istackbase = istackbase;
2111                     table.closed = true;
2112                     Node.trimEmptyElement(lexer, table);
2113                     return;
2114                 }
2115
2116                 /* deal with comments etc. */
2117                 if (Node.insertMisc(table, node))
2118                     continue;
2119
2120                 /* discard unknown tags */
2121                 if (node.tag == null && node.type != Node.TextNode)
2122                 {
2123                     Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2124                     continue;
2125                 }
2126
2127                 /* if TD or TH or text or inline or block then infer <TR> */
2128
2129                 if (node.type != Node.EndTag)
2130                 {
2131                     if (node.tag == tt.tagTd || 
2132                         node.tag == tt.tagTh || 
2133                         node.tag == tt.tagTable)
2134                     {
2135                         lexer.ungetToken();
2136                         node = lexer.inferredTag( "tr");
2137                         Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
2138                     }
2139                     else if (node.type == Node.TextNode
2140                                || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2141                     {
2142                         Node.insertNodeBeforeElement(table, node);
2143                         Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2144                         lexer.exiled = true;
2145
2146                         /* AQ: TODO
2147                            Line 2040 of parser.c (13 Jan 2000) reads as follows:
2148                            if (!node->type == TextNode)
2149                            This will always evaluate to false.
2150                            This has been reported to Dave Raggett <dsr@w3.org>
2151                         */
2152                         //Should be?: if (!(node.type == Node.TextNode))
2153                         if (false)
2154                             parseTag(lexer, node, Lexer.IgnoreWhitespace);
2155
2156                         lexer.exiled = false;
2157                         continue;
2158                     }
2159                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
2160                     {
2161                         moveToHead(lexer, table, node);
2162                         continue;
2163                     }
2164                 }
2165
2166                 /* 
2167                   if this is the end tag for an ancestor element
2168                   then infer end tag for this element
2169                 */
2170                 if (node.type == Node.EndTag)
2171                 {
2172                     if (node.tag == tt.tagForm)
2173                     {
2174                         lexer.badForm = 1;
2175                         Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2176                         continue;
2177                     }
2178
2179                     if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
2180                     {
2181                         Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2182                         continue;
2183                     }
2184
2185                     for (parent = table.parent;
2186                             parent != null; parent = parent.parent)
2187                     {
2188                         if (node.tag == parent.tag)
2189                         {
2190                             Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
2191                             lexer.ungetToken();
2192                             lexer.istackbase = istackbase;
2193                             Node.trimEmptyElement(lexer, table);
2194                             return;
2195                         }
2196                     }
2197                 }
2198
2199                 if (!((node.tag.model & Dict.CM_TABLE) != 0))
2200                 {
2201                     lexer.ungetToken();
2202                     Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2203                     lexer.istackbase = istackbase;
2204                     Node.trimEmptyElement(lexer, table);
2205                     return;
2206                 }
2207
2208                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2209                 {
2210                     Node.insertNodeAtEnd(table, node);;
2211                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
2212                     continue;
2213                 }
2214
2215                 /* discard unexpected text nodes and end tags */
2216                 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2217             }
2218
2219             Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
2220             Node.trimEmptyElement(lexer, table);
2221             lexer.istackbase = istackbase;
2222         }
2223
2224     };
2225
2226     public static class ParseColGroup implements Parser {
2227
2228         public void parse( Lexer lexer, Node colgroup, short mode )
2229         {
2230             Node node, parent;
2231             TagTable tt = lexer.configuration.tt;
2232
2233             if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2234                 return;
2235
2236             while (true)
2237             {
2238                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2239                 if (node == null) break;
2240                 if (node.tag == colgroup.tag && node.type == Node.EndTag)
2241                 {
2242                     colgroup.closed = true;
2243                     return;
2244                 }
2245
2246                 /* 
2247                   if this is the end tag for an ancestor element
2248                   then infer end tag for this element
2249                 */
2250                 if (node.type == Node.EndTag)
2251                 {
2252                     if (node.tag == tt.tagForm)
2253                     {
2254                         lexer.badForm = 1;
2255                         Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2256                         continue;
2257                     }
2258
2259                     for (parent = colgroup.parent;
2260                             parent != null; parent = parent.parent)
2261                     {
2262
2263                         if (node.tag == parent.tag)
2264                         {
2265                             lexer.ungetToken();
2266                             return;
2267                         }
2268                     }
2269                 }
2270
2271                 if (node.type == Node.TextNode)
2272                 {
2273                     lexer.ungetToken();
2274                     return;
2275                 }
2276
2277                 /* deal with comments etc. */
2278                 if (Node.insertMisc(colgroup, node))
2279                     continue;
2280
2281                 /* discard unknown tags */
2282                 if (node.tag == null)
2283                 {
2284                     Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2285                     continue;
2286                 }
2287
2288                 if (node.tag != tt.tagCol)
2289                 {
2290                     lexer.ungetToken();
2291                     return;
2292                 }
2293
2294                 if (node.type == Node.EndTag)
2295                 {
2296                     Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2297                     continue;
2298                 }
2299         
2300                 /* node should be <COL> */
2301                 Node.insertNodeAtEnd(colgroup, node);
2302                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2303             }
2304         }
2305
2306     };
2307
2308     public static class ParseRowGroup implements Parser {
2309
2310         public void parse( Lexer lexer, Node rowgroup, short mode )
2311         {
2312             Node node, parent;
2313             TagTable tt = lexer.configuration.tt;
2314
2315             if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2316                 return;
2317
2318             while (true)
2319             {
2320                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2321                 if (node == null) break;
2322                 if (node.tag == rowgroup.tag)
2323                 {
2324                     if (node.type == Node.EndTag)
2325                     {
2326                         rowgroup.closed = true;
2327                         Node.trimEmptyElement(lexer, rowgroup);
2328                         return;
2329                     }
2330
2331                     lexer.ungetToken();
2332                     return;
2333                 }
2334
2335                 /* if </table> infer end tag */
2336                 if (node.tag == tt.tagTable && node.type == Node.EndTag)
2337                 {
2338                     lexer.ungetToken();
2339                     Node.trimEmptyElement(lexer, rowgroup);
2340                     return;
2341                 }
2342
2343                 /* deal with comments etc. */
2344                 if (Node.insertMisc(rowgroup, node))
2345                     continue;
2346
2347                 /* discard unknown tags */
2348                 if (node.tag == null && node.type != Node.TextNode)
2349                 {
2350                     Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2351                     continue;
2352                 }
2353
2354                 /*
2355                   if TD or TH then infer <TR>
2356                   if text or inline or block move before table
2357                   if head content move to head
2358                 */
2359
2360                 if (node.type != Node.EndTag)
2361                 {
2362                     if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2363                     {
2364                         lexer.ungetToken();
2365                         node = lexer.inferredTag("tr");
2366                         Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2367                     }
2368                     else if (node.type == Node.TextNode
2369                             || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2370                     {
2371                         Node.moveBeforeTable(rowgroup, node, tt);
2372                         Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2373                         lexer.exiled = true;
2374
2375                         if (node.type != Node.TextNode)
2376                             parseTag(lexer, node, Lexer.IgnoreWhitespace);
2377
2378                         lexer.exiled = false;
2379                         continue;
2380                     }
2381                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
2382                     {
2383                         Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2384                         moveToHead(lexer, rowgroup, node);
2385                         continue;
2386                     }
2387                 }
2388
2389                 /* 
2390                   if this is the end tag for ancestor element
2391                   then infer end tag for this element
2392                 */
2393                 if (node.type == Node.EndTag)
2394                 {
2395                     if (node.tag == tt.tagForm)
2396                     {
2397                         lexer.badForm = 1;
2398                         Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2399                         continue;
2400                     }
2401
2402                     if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
2403                     {
2404                         Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2405                         continue;
2406                     }
2407
2408                     for (parent = rowgroup.parent;
2409                             parent != null; parent = parent.parent)
2410                     {
2411                         if (node.tag == parent.tag)
2412                         {
2413                             lexer.ungetToken();
2414                             Node.trimEmptyElement(lexer, rowgroup);
2415                             return;
2416                         }
2417                     }
2418                 }
2419
2420                 /*
2421                   if THEAD, TFOOT or TBODY then implied end tag
2422
2423                 */
2424                 if ((node.tag.model & Dict.CM_ROWGRP) != 0)
2425                 {
2426                     if (node.type != Node.EndTag)
2427                         lexer.ungetToken();
2428
2429                     Node.trimEmptyElement(lexer, rowgroup);
2430                     return;
2431                 }
2432
2433                 if (node.type == Node.EndTag)
2434                 {
2435                     Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2436                     continue;
2437                 }
2438         
2439                 if (!(node.tag == tt.tagTr))
2440                 {
2441                     node = lexer.inferredTag( "tr");
2442                     Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2443                     lexer.ungetToken();
2444                 }
2445
2446                /* node should be <TR> */
2447                 Node.insertNodeAtEnd(rowgroup, node);
2448                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2449             }
2450
2451             Node.trimEmptyElement(lexer, rowgroup);
2452         }
2453
2454     };
2455
2456     public static class ParseRow implements Parser {
2457
2458         public void parse( Lexer lexer, Node row, short mode )
2459         {
2460             Node node, parent;
2461             boolean exclude_state;
2462             TagTable tt = lexer.configuration.tt;
2463
2464             if ((row.tag.model & Dict.CM_EMPTY) != 0)
2465                 return;
2466
2467             while (true)
2468             {
2469                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2470                 if (node == null) break;
2471                 if (node.tag == row.tag)
2472                 {
2473                     if (node.type == Node.EndTag)
2474                     {
2475                         row.closed = true;
2476                         Node.fixEmptyRow(lexer, row);
2477                         return;
2478                     }
2479
2480                     lexer.ungetToken();
2481                     Node.fixEmptyRow(lexer, row);
2482                     return;
2483                 }
2484
2485                 /* 
2486                   if this is the end tag for an ancestor element
2487                   then infer end tag for this element
2488                 */
2489                 if (node.type == Node.EndTag)
2490                 {
2491                     if (node.tag == tt.tagForm)
2492                     {
2493                         lexer.badForm = 1;
2494                         Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2495                         continue;
2496                     }
2497
2498                     if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2499                     {
2500                         Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2501                         continue;
2502                     }
2503
2504                     for (parent = row.parent;
2505                             parent != null; parent = parent.parent)
2506                     {
2507                         if (node.tag == parent.tag)
2508                         {
2509                             lexer.ungetToken();
2510                             Node.trimEmptyElement(lexer, row);
2511                             return;
2512                         }
2513                     }
2514                 }
2515
2516                 /* deal with comments etc. */
2517                 if (Node.insertMisc(row, node))
2518                     continue;
2519
2520                 /* discard unknown tags */
2521                 if (node.tag == null && node.type != Node.TextNode)
2522                 {
2523                     Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2524                     continue;
2525                 }
2526
2527                 /* discard unexpected <table> element */
2528                 if (node.tag == tt.tagTable)
2529                 {
2530                     Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2531                     continue;
2532                 }
2533
2534                 /* THEAD, TFOOT or TBODY */
2535                 if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
2536                 {
2537                     lexer.ungetToken();
2538                     Node.trimEmptyElement(lexer, row);
2539                     return;
2540                 }
2541
2542                 if (node.type == Node.EndTag)
2543                 {
2544                     Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2545                     continue;
2546                 }
2547
2548                 /*
2549                   if text or inline or block move before table
2550                   if head content move to head
2551                 */
2552
2553                 if (node.type != Node.EndTag)
2554                 {
2555                     if (node.tag == tt.tagForm)
2556                     {
2557                         lexer.ungetToken();
2558                         node = lexer.inferredTag("td");
2559                         Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
2560                     }
2561                     else if (node.type == Node.TextNode
2562                             || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2563                     {
2564                         Node.moveBeforeTable(row, node, tt);
2565                         Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2566                         lexer.exiled = true;
2567
2568                         if (node.type != Node.TextNode)
2569                             parseTag(lexer, node, Lexer.IgnoreWhitespace);
2570
2571                         lexer.exiled = false;
2572                         continue;
2573                     }
2574                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
2575                     {
2576                         Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2577                         moveToHead(lexer, row, node);
2578                         continue;
2579                     }
2580                 }
2581
2582                 if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
2583                 {
2584                     Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2585                     continue;
2586                 }
2587         
2588                 /* node should be <TD> or <TH> */
2589                 Node.insertNodeAtEnd(row, node);
2590                 exclude_state = lexer.excludeBlocks;
2591                 lexer.excludeBlocks = false;
2592                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2593                 lexer.excludeBlocks = exclude_state;
2594
2595                 /* pop inline stack */
2596
2597                 while (lexer.istack.size() > lexer.istackbase)
2598                     lexer.popInline( null);
2599             }
2600
2601             Node.trimEmptyElement(lexer, row);
2602         }
2603
2604     };
2605
2606     public static class ParseNoFrames implements Parser {
2607
2608         public void parse( Lexer lexer, Node noframes, short mode )
2609         {
2610             Node node;
2611             boolean checkstack;
2612             TagTable tt = lexer.configuration.tt;
2613
2614             lexer.badAccess |=  Report.USING_NOFRAMES;
2615             mode = Lexer.IgnoreWhitespace;
2616             checkstack = true;
2617
2618             while (true)
2619             {
2620                 node = lexer.getToken(mode);
2621                 if (node == null) break;
2622                 if (node.tag == noframes.tag && node.type == Node.EndTag)
2623                 {
2624                     noframes.closed = true;
2625                     Node.trimSpaces(lexer, noframes);
2626                     return;
2627                 }
2628
2629                 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
2630                 {
2631                     Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
2632                     Node.trimSpaces(lexer, noframes);
2633                     lexer.ungetToken();
2634                     return;
2635                 }
2636
2637                 if (node.tag == tt.tagHtml)
2638                 {
2639                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2640                         Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2641
2642                     continue;
2643                 }
2644
2645                 /* deal with comments etc. */
2646                 if (Node.insertMisc(noframes, node))
2647                     continue;
2648
2649                 if (node.tag == tt.tagBody && node.type == Node.StartTag)
2650                 {
2651                     Node.insertNodeAtEnd(noframes, node);
2652                     parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2653                     continue;
2654                 }
2655
2656                 /* implicit body element inferred */
2657                 if (node.type == Node.TextNode || node.tag != null)
2658                 {
2659                     lexer.ungetToken();
2660                     node = lexer.inferredTag("body");
2661                     if (lexer.configuration.XmlOut)
2662                         Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
2663                     Node.insertNodeAtEnd(noframes, node);
2664                     parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2665                     continue;
2666                 }
2667                 /* discard unexpected end tags */
2668                 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2669             }
2670
2671             Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
2672         }
2673
2674     };
2675
2676     public static class ParseSelect implements Parser {
2677
2678         public void parse( Lexer lexer, Node field, short mode )
2679         {
2680             Node node;
2681             TagTable tt = lexer.configuration.tt;
2682
2683             lexer.insert = -1;  /* defer implicit inline start tags */
2684
2685             while (true)
2686             {
2687                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2688                 if (node == null) break;
2689                 if (node.tag == field.tag && node.type == Node.EndTag)
2690                 {
2691                     field.closed = true;
2692                     Node.trimSpaces(lexer, field);
2693                     return;
2694                 }
2695
2696                 /* deal with comments etc. */
2697                 if (Node.insertMisc(field, node))
2698                     continue;
2699
2700                 if (node.type == Node.StartTag && 
2701                      (node.tag == tt.tagOption ||
2702                       node.tag == tt.tagOptgroup ||
2703                       node.tag == tt.tagScript))
2704                 {
2705                     Node.insertNodeAtEnd(field, node);
2706                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
2707                     continue;
2708                 }
2709
2710                 /* discard unexpected tags */
2711                 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2712             }
2713
2714             Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2715         }
2716
2717     };
2718
2719     public static class ParseText implements Parser {
2720
2721         public void parse( Lexer lexer, Node field, short mode )
2722         {
2723             Node node;
2724             TagTable tt = lexer.configuration.tt;
2725
2726             lexer.insert = -1;  /* defer implicit inline start tags */
2727
2728             if (field.tag == tt.tagTextarea)
2729                 mode = Lexer.Preformatted;
2730
2731             while (true)
2732             {
2733                 node = lexer.getToken(mode);
2734                 if (node == null) break;
2735                 if (node.tag == field.tag && node.type == Node.EndTag)
2736                 {
2737                     field.closed = true;
2738                     Node.trimSpaces(lexer, field);
2739                     return;
2740                 }
2741
2742                 /* deal with comments etc. */
2743                 if (Node.insertMisc(field, node))
2744                     continue;
2745
2746                 if (node.type == Node.TextNode)
2747                 {
2748                     /* only called for 1st child */
2749                     if (field.content == null && !((mode & Lexer.Preformatted) != 0))
2750                         Node.trimSpaces(lexer, field);
2751
2752                     if (node.start >= node.end)
2753                     {
2754                         continue;
2755                     }
2756
2757                     Node.insertNodeAtEnd(field, node);
2758                     continue;
2759                 }
2760
2761                 if (node.tag == tt.tagFont)
2762                 {
2763                     Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2764                     continue;
2765                 }
2766
2767                 /* terminate element on other tags */
2768                 if (!((field.tag.model & Dict.CM_OPT) != 0))
2769                         Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
2770
2771                 lexer.ungetToken();
2772                 Node.trimSpaces(lexer, field);
2773                 return;
2774             }
2775
2776             if (!((field.tag.model & Dict.CM_OPT) != 0))
2777                 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2778         }
2779
2780     };
2781
2782     public static class ParseOptGroup implements Parser {
2783
2784         public void parse( Lexer lexer, Node field, short mode )
2785         {
2786             Node node;
2787             TagTable tt = lexer.configuration.tt;
2788
2789             lexer.insert = -1;  /* defer implicit inline start tags */
2790
2791             while (true)
2792             {
2793                 node = lexer.getToken(Lexer.IgnoreWhitespace);
2794                 if (node == null) break;
2795                 if (node.tag == field.tag && node.type == Node.EndTag)
2796                 {
2797                     field.closed = true;
2798                     Node.trimSpaces(lexer, field);
2799                     return;
2800                 }
2801
2802                 /* deal with comments etc. */
2803                 if (Node.insertMisc(field, node))
2804                     continue;
2805
2806                 if (node.type == Node.StartTag && 
2807                      (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
2808                 {
2809                     if (node.tag == tt.tagOptgroup)
2810                         Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
2811
2812                     Node.insertNodeAtEnd(field, node);
2813                     parseTag(lexer, node, Lexer.MixedContent);
2814                     continue;
2815                 }
2816
2817                 /* discard unexpected tags */
2818                 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2819             }
2820         }
2821
2822     };
2823
2824     public static Parser getParseHTML()
2825     {
2826         return _parseHTML;
2827     }
2828
2829     public static Parser getParseHead()
2830     {
2831         return _parseHead;
2832     }
2833
2834     public static Parser getParseTitle()
2835     {
2836         return _parseTitle;
2837     }
2838
2839     public static Parser getParseScript()
2840     {
2841         return _parseScript;
2842     }
2843
2844     public static Parser getParseBody()
2845     {
2846         return _parseBody;
2847     }
2848
2849     public static Parser getParseFrameSet()
2850     {
2851         return _parseFrameSet;
2852     }
2853
2854     public static Parser getParseInline()
2855     {
2856         return _parseInline;
2857     }
2858
2859     public static Parser getParseList()
2860     {
2861         return _parseList;
2862     }
2863
2864     public static Parser getParseDefList()
2865     {
2866         return _parseDefList;
2867     }
2868
2869     public static Parser getParsePre()
2870     {
2871         return _parsePre;
2872     }
2873
2874     public static Parser getParseBlock()
2875     {
2876         return _parseBlock;
2877     }
2878
2879     public static Parser getParseTableTag()
2880     {
2881         return _parseTableTag;
2882     }
2883
2884     public static Parser getParseColGroup()
2885     {
2886         return _parseColGroup;
2887     }
2888
2889     public static Parser getParseRowGroup()
2890     {
2891         return _parseRowGroup;
2892     }
2893
2894     public static Parser getParseRow()
2895     {
2896         return _parseRow;
2897     }
2898
2899     public static Parser getParseNoFrames()
2900     {
2901         return _parseNoFrames;
2902     }
2903
2904     public static Parser getParseSelect()
2905     {
2906         return _parseSelect;
2907     }
2908
2909     public static Parser getParseText()
2910     {
2911         return _parseText;
2912     }
2913
2914     public static Parser getParseOptGroup()
2915     {
2916         return _parseOptGroup;
2917     }
2918
2919
2920     private static Parser _parseHTML = new ParseHTML();
2921     private static Parser _parseHead = new ParseHead();
2922     private static Parser _parseTitle = new ParseTitle();
2923     private static Parser _parseScript = new ParseScript();
2924     private static Parser _parseBody = new ParseBody();
2925     private static Parser _parseFrameSet = new ParseFrameSet();
2926     private static Parser _parseInline = new ParseInline();
2927     private static Parser _parseList = new ParseList();
2928     private static Parser _parseDefList = new ParseDefList();
2929     private static Parser _parsePre = new ParsePre();
2930     private static Parser _parseBlock = new ParseBlock();
2931     private static Parser _parseTableTag = new ParseTableTag();
2932     private static Parser _parseColGroup = new ParseColGroup();
2933     private static Parser _parseRowGroup = new ParseRowGroup();
2934     private static Parser _parseRow = new ParseRow();
2935     private static Parser _parseNoFrames = new ParseNoFrames();
2936     private static Parser _parseSelect = new ParseSelect();
2937     private static Parser _parseText = new ParseText();
2938     private static Parser _parseOptGroup = new ParseOptGroup();
2939
2940     /*
2941       HTML is the top level element
2942     */
2943     public static Node parseDocument(Lexer lexer)
2944     {
2945         Node node, document, html;
2946         Node doctype = null;
2947         TagTable tt = lexer.configuration.tt;
2948
2949         document = lexer.newNode();
2950         document.type = Node.RootNode;
2951
2952         while (true)
2953         {
2954             node = lexer.getToken(Lexer.IgnoreWhitespace);
2955             if (node == null) break;
2956
2957             /* deal with comments etc. */
2958             if (Node.insertMisc(document, node))
2959                 continue;
2960
2961             if (node.type == Node.DocTypeTag)
2962             {
2963                 if (doctype == null)
2964                 {
2965                     Node.insertNodeAtEnd(document, node);
2966                     doctype = node;
2967                 }
2968                 else
2969                     Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
2970                 continue;
2971             }
2972
2973             if (node.type == Node.EndTag)
2974             {
2975                 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
2976                 continue;
2977             }
2978
2979             if (node.type != Node.StartTag || node.tag != tt.tagHtml)
2980             {
2981                 lexer.ungetToken();
2982                 html = lexer.inferredTag("html");
2983             }
2984             else
2985                 html = node;
2986
2987             Node.insertNodeAtEnd(document, html);
2988             getParseHTML().parse(lexer, html, (short)0); // TODO?
2989             break;
2990         }
2991
2992         return document;
2993     }
2994
2995     /**
2996      *  Indicates whether or not whitespace should be preserved for this element.
2997      *  If an <code>xml:space</code> attribute is found, then if the attribute value is
2998      *  <code>preserve</code>, returns <code>true</code>.  For any other value, returns
2999      *  <code>false</code>.  If an <code>xml:space</code> attribute was <em>not</em>
3000      *  found, then the following element names result in a return value of <code>true:
3001      *  pre, script, style,</code> and <code>xsl:text</code>.  Finally, if a
3002      *  <code>TagTable</code> was passed in and the element appears as the "pre" element
3003      *  in the <code>TagTable</code>, then <code>true</code> will be returned.
3004      *  Otherwise, <code>false</code> is returned.
3005      *  @param element The <code>Node</code> to test to see if whitespace should be
3006      *                 preserved.
3007      *  @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
3008      *            function.  This may be <code>null</code>, in which case this test
3009      *            is bypassed.
3010      *  @return <code>true</code> or <code>false</code>, as explained above.
3011      */
3012
3013     public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
3014     {
3015         AttVal attribute;
3016
3017         /* search attributes for xml:space */
3018         for (attribute = element.attributes; attribute != null; attribute = attribute.next)
3019         {
3020             if (attribute.attribute.equals("xml:space"))
3021             {
3022                 if (attribute.value.equals("preserve"))
3023                     return true;
3024
3025                 return false;
3026             }
3027         }
3028
3029         /* kludge for html docs without explicit xml:space attribute */
3030         if (Lexer.wstrcasecmp(element.element, "pre") == 0
3031             || Lexer.wstrcasecmp(element.element, "script") == 0
3032             || Lexer.wstrcasecmp(element.element, "style") == 0)
3033             return true;
3034
3035         if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
3036             return true;
3037
3038         /* kludge for XSL docs */
3039         if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3040             return true;
3041
3042         return false;
3043     }
3044
3045     /*
3046       XML documents
3047     */
3048     public static void parseXMLElement(Lexer lexer, Node element, short mode)
3049     {
3050         Node node;
3051
3052         /* Jeff Young's kludge for XSL docs */
3053
3054         if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3055             return;
3056
3057         /* if node is pre or has xml:space="preserve" then do so */
3058
3059         if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
3060             mode = Lexer.Preformatted;
3061
3062         while (true)
3063         {
3064             node = lexer.getToken(mode);
3065             if (node == null) break;
3066             if (node.type == Node.EndTag && node.element.equals(element.element))
3067             {
3068                 element.closed = true;
3069                 break;
3070             }
3071
3072             /* discard unexpected end tags */
3073             if (node.type == Node.EndTag)
3074             {
3075                 Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
3076                 continue;
3077             }
3078
3079             /* parse content on seeing start tag */
3080             if (node.type == Node.StartTag)
3081                 parseXMLElement(lexer, node, mode);
3082
3083             Node.insertNodeAtEnd(element, node);
3084         }
3085
3086         /*
3087          if first child is text then trim initial space and
3088          delete text node if it is empty.
3089         */
3090
3091         node = element.content;
3092
3093         if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3094         {
3095             if (node.textarray[node.start] == (byte)' ')
3096             {
3097                 node.start++;
3098
3099                 if (node.start >= node.end)
3100                     Node.discardElement(node);
3101             }
3102         }
3103
3104         /*
3105          if last child is text then trim final space and
3106          delete the text node if it is empty
3107         */
3108
3109         node = element.last;
3110
3111         if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3112         {
3113             if (node.textarray[node.end - 1] == (byte)' ')
3114             {
3115                 node.end--;
3116
3117                 if (node.start >= node.end)
3118                     Node.discardElement(node);
3119             }
3120         }
3121     }
3122
3123     public static Node parseXMLDocument(Lexer lexer)
3124     {
3125         Node node, document, doctype;
3126
3127         document = lexer.newNode();
3128         document.type = Node.RootNode;
3129         doctype = null;
3130         lexer.configuration.XmlTags = true;
3131
3132         while (true)
3133         {
3134             node = lexer.getToken(Lexer.IgnoreWhitespace);
3135             if (node == null) break;
3136             /* discard unexpected end tags */
3137             if (node.type == Node.EndTag)
3138             {
3139                 Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
3140                 continue;
3141             }
3142
3143             /* deal with comments etc. */
3144             if (Node.insertMisc(document, node))
3145                 continue;
3146
3147             if (node.type == Node.DocTypeTag)
3148             {
3149                 if (doctype == null)
3150                 {
3151                     Node.insertNodeAtEnd(document, node);
3152                     doctype = node;
3153                 }
3154                 else
3155                     Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
3156                 continue;
3157             }
3158
3159             /* if start tag then parse element's content */
3160             if (node.type == Node.StartTag)
3161             {
3162                 Node.insertNodeAtEnd(document, node);
3163                 parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
3164             }
3165
3166         }
3167
3168 if (false) { //#if 0
3169         /* discard the document type */
3170         node = document.findDocType();
3171
3172         if (node != null)
3173             Node.discardElement(node);
3174 } // #endif
3175
3176         if  (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
3177                 Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
3178
3179         /* ensure presence of initial <?XML version="1.0"?> */
3180         if (lexer.configuration.XmlPi)
3181             lexer.fixXMLPI(document);
3182
3183         return document;
3184     }
3185
3186     public static boolean isJavaScript(Node node)
3187     {
3188         boolean result = false;
3189         AttVal attr;
3190
3191         if (node.attributes == null)
3192             return true;
3193
3194         for (attr = node.attributes; attr != null; attr = attr.next)
3195         {
3196             if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
3197                     || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
3198                     && Lexer.wsubstr(attr.value, "javascript"))
3199                 result = true;
3200         }
3201
3202         return result;
3203     }
3204
3205 }