*/
Node.insertNodeAtEnd(list, node);
parseTag(lexer, node, Lexer.IgnoreWhitespace);
}
Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
Node.trimEmptyElement(lexer, list);
}
};
public static class ParsePre implements Parser {
public void parse( Lexer lexer, Node pre, short mode )
{
Node node, parent;
TagTable tt = lexer.configuration.tt;
if ((pre.tag.model & Dict.CM_EMPTY) != 0)
return;
if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
Node.coerceNode(lexer, pre, tt.tagPre);
lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
while (true)
{
node = lexer.getToken(Lexer.Preformatted);
if (node == null) break;
if (node.tag == pre.tag && node.type == Node.EndTag)
{
Node.trimSpaces(lexer, pre);
pre.closed = true;
Node.trimEmptyElement(lexer, pre);
return;
}
if (node.tag == tt.tagHtml)
{
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.type == Node.TextNode)
{
/* if first check for inital newline */
if (pre.content == null)
{
if (node.textarray[node.start] == (byte)'\n')
++node.start;
if (node.start >= node.end)
{
continue;
}
}
Node.insertNodeAtEnd(pre, node);
continue;
}
/* deal with comments etc. */
if (Node.insertMisc(pre, node))
continue;
/* discard unknown and PARAM tags */
if (node.tag == null || node.tag == tt.tagParam)
{
Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.tag == tt.tagP)
{
if (node.type == Node.StartTag)
{
Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
/* trim white space before in
*/
Node.trimSpaces(lexer, pre);
/* coerce both and
to
*/
Node.coerceNode(lexer, node, tt.tagBr);
Node.insertNodeAtEnd(pre, node);
}
else
{
Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
}
continue;
}
if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
{
moveToHead(lexer, pre, node);
continue;
}
/*
if this is the end tag for an ancestor element
then infer end tag for this element
*/
if (node.type == Node.EndTag)
{
if (node.tag == tt.tagForm)
{
lexer.badForm = 1;
Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
continue;
}
for (parent = pre.parent;
parent != null; parent = parent.parent)
{
if (node.tag == parent.tag)
{
Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
lexer.ungetToken();
Node.trimSpaces(lexer, pre);
Node.trimEmptyElement(lexer, pre);
return;
}
}
}
/* what about head content, HEAD, BODY tags etc? */
if (!((node.tag.model & Dict.CM_INLINE) != 0))
{
if (node.type != Node.StartTag)
{
Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
continue;
}
Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
lexer.excludeBlocks = true;
/* check if we need to infer a container */
if ((node.tag.model & Dict.CM_LIST) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "ul");
Node.addClass(node, "noindent");
}
else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "dl");
}
else if ((node.tag.model & Dict.CM_TABLE) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "table");
}
Node.insertNodeAfterElement(pre, node);
pre = lexer.inferredTag( "pre");
Node.insertNodeAfterElement(node, pre);
parseTag(lexer, node, Lexer.IgnoreWhitespace);
lexer.excludeBlocks = false;
continue;
}
/*
if (!((node.tag.model & Dict.CM_INLINE) != 0))
{
Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
lexer.ungetToken();
return;
}
*/
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
{
/* trim white space before
*/
if (node.tag == tt.tagBr)
Node.trimSpaces(lexer, pre);
Node.insertNodeAtEnd(pre, node);
parseTag(lexer, node, Lexer.Preformatted);
continue;
}
/* discard unexpected tags */
Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
}
Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
Node.trimEmptyElement(lexer, pre);
}
};
public static class ParseBlock implements Parser {
public void parse( Lexer lexer, Node element, short mode )
/*
element is node created by the lexer
upon seeing the start tag, or by the
parser when the start tag is inferred
*/
{
Node node, parent;
boolean checkstack;
int istackbase = 0;
TagTable tt = lexer.configuration.tt;
checkstack = true;
if ((element.tag.model & Dict.CM_EMPTY) != 0)
return;
if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
/*
InlineDup() asks the lexer to insert inline emphasis tags
currently pushed on the istack, but take care to avoid
propagating inline emphasis inside OBJECT or APPLET.
For these elements a fresh inline stack context is created
and disposed of upon reaching the end of the element.
They thus behave like table cells in this respect.
*/
if ((element.tag.model & Dict.CM_OBJECT) != 0)
{
istackbase = lexer.istackbase;
lexer.istackbase = lexer.istack.size();
}
if (!((element.tag.model & Dict.CM_MIXED) != 0))
lexer.inlineDup( null);
mode = Lexer.IgnoreWhitespace;
while (true)
{
node = lexer.getToken(mode /*Lexer.MixedContent*/);
if (node == null) break;
/* end tag for this element */
if (node.type == Node.EndTag && node.tag != null &&
(node.tag == element.tag || element.was == node.tag))
{
if ((element.tag.model & Dict.CM_OBJECT) != 0)
{
/* pop inline stack */
while (lexer.istack.size() > lexer.istackbase)
lexer.popInline( null);
lexer.istackbase = istackbase;
}
element.closed = true;
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
if (node.tag == tt.tagHtml ||
node.tag == tt.tagHead ||
node.tag == tt.tagBody)
{
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.type == Node.EndTag)
{
if (node.tag == null)
{
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
continue;
}
else if (node.tag == tt.tagBr)
node.type = Node.StartTag;
else if (node.tag == tt.tagP)
{
Node.coerceNode(lexer, node, tt.tagBr);
Node.insertNodeAtEnd(element, node);
node = lexer.inferredTag("br");
}
else
{
/*
if this is the end tag for an ancestor element
then infer end tag for this element
*/
for (parent = element.parent;
parent != null; parent = parent.parent)
{
if (node.tag == parent.tag)
{
if (!((element.tag.model & Dict.CM_OPT) != 0))
Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
lexer.ungetToken();
if ((element.tag.model & Dict.CM_OBJECT) != 0)
{
/* pop inline stack */
while (lexer.istack.size() > lexer.istackbase)
lexer.popInline( null);
lexer.istackbase = istackbase;
}
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
}
/* special case etc. for stuff moved in front of table */
if (lexer.exiled
&& node.tag.model != 0
&& (node.tag.model & Dict.CM_TABLE) != 0)
{
lexer.ungetToken();
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
}
}
/* mixed content model permits text */
if (node.type == Node.TextNode)
{
boolean iswhitenode = false;
if (node.type == Node.TextNode &&
node.end <= node.start + 1 &&
lexer.lexbuf[node.start] == (byte)' ')
iswhitenode = true;
if (lexer.configuration.EncloseBlockText && !iswhitenode)
{
lexer.ungetToken();
node = lexer.inferredTag("p");
Node.insertNodeAtEnd(element, node);
parseTag(lexer, node, Lexer.MixedContent);
continue;
}
if (checkstack)
{
checkstack = false;
if (!((element.tag.model & Dict.CM_MIXED) != 0))
{
if (lexer.inlineDup( node) > 0)
continue;
}
}
Node.insertNodeAtEnd(element, node);
mode = Lexer.MixedContent;
/*
HTML4 strict doesn't allow mixed content for
elements with %block; as their content model
*/
lexer.versions &= ~Dict.VERS_HTML40_STRICT;
continue;
}
if (Node.insertMisc(element, node))
continue;
/* allow PARAM elements? */
if (node.tag == tt.tagParam)
{
if (((element.tag.model & Dict.CM_PARAM) != 0) &&
(node.type == Node.StartTag || node.type == Node.StartEndTag))
{
Node.insertNodeAtEnd(element, node);
continue;
}
/* otherwise discard it */
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* allow AREA elements? */
if (node.tag == tt.tagArea)
{
if ((element.tag == tt.tagMap) &&
(node.type == Node.StartTag || node.type == Node.StartEndTag))
{
Node.insertNodeAtEnd(element, node);
continue;
}
/* otherwise discard it */
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* ignore unknown start/end tags */
if (node.tag == null)
{
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/*
Allow Dict.CM_INLINE elements here.
Allow Dict.CM_BLOCK elements here unless
lexer.excludeBlocks is yes.
LI and DD are special cased.
Otherwise infer end tag for this element.
*/
if (!((node.tag.model & Dict.CM_INLINE) != 0))
{
if (node.type != Node.StartTag && node.type != Node.StartEndTag)
{
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (element.tag == tt.tagTd || element.tag == tt.tagTh)
{
/* if parent is a table cell, avoid inferring the end of the cell */
if ((node.tag.model & Dict.CM_HEAD) != 0)
{
moveToHead(lexer, element, node);
continue;
}
if ((node.tag.model & Dict.CM_LIST) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "ul");
Node.addClass(node, "noindent");
lexer.excludeBlocks = true;
}
else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "dl");
lexer.excludeBlocks = true;
}
/* infer end of current table cell */
if (!((node.tag.model & Dict.CM_BLOCK) != 0))
{
lexer.ungetToken();
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
}
else if ((node.tag.model & Dict.CM_BLOCK) != 0)
{
if (lexer.excludeBlocks)
{
if (!((element.tag.model & Dict.CM_OPT) != 0))
Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
lexer.ungetToken();
if ((element.tag.model & Dict.CM_OBJECT) != 0)
lexer.istackbase = istackbase;
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
}
else /* things like list items */
{
if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
if ((node.tag.model & Dict.CM_HEAD) != 0)
{
moveToHead(lexer, element, node);
continue;
}
lexer.ungetToken();
if ((node.tag.model & Dict.CM_LIST) != 0)
{
if (element.parent != null && element.parent.tag != null &&
element.parent.tag.parser == getParseList())
{
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
node = lexer.inferredTag("ul");
Node.addClass(node, "noindent");
}
else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
{
if (element.parent.tag == tt.tagDl)
{
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
node = lexer.inferredTag("dl");
}
else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
(node.tag.model & Dict.CM_ROW) != 0)
{
node = lexer.inferredTag("table");
}
else if ((element.tag.model & Dict.CM_OBJECT) != 0)
{
/* pop inline stack */
while (lexer.istack.size() > lexer.istackbase)
lexer.popInline( null);
lexer.istackbase = istackbase;
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
else
{
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
return;
}
}
}
/* parse known element */
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
{
if ((node.tag.model & Dict.CM_INLINE) != 0)
{
if (checkstack && !node.implicit)
{
checkstack = false;
if (lexer.inlineDup( node) > 0)
continue;
}
mode = Lexer.MixedContent;
}
else
{
checkstack = true;
mode = Lexer.IgnoreWhitespace;
}
/* trim white space before
*/
if (node.tag == tt.tagBr)
Node.trimSpaces(lexer, element);
Node.insertNodeAtEnd(element, node);
if (node.implicit)
Report.warning(lexer, element, node, Report.INSERTING_TAG);
parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
continue;
}
/* discard unexpected tags */
if (node.type == Node.EndTag)
lexer.popInline( node); /* if inline end tag */
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
}
if (!((element.tag.model & Dict.CM_OPT) != 0))
Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
if ((element.tag.model & Dict.CM_OBJECT) != 0)
{
/* pop inline stack */
while (lexer.istack.size() > lexer.istackbase)
lexer.popInline( null);
lexer.istackbase = istackbase;
}
Node.trimSpaces(lexer, element);
Node.trimEmptyElement(lexer, element);
}
};
public static class ParseTableTag implements Parser {
public void parse( Lexer lexer, Node table, short mode )
{
Node node, parent;
int istackbase;
TagTable tt = lexer.configuration.tt;
lexer.deferDup();
istackbase = lexer.istackbase;
lexer.istackbase = lexer.istack.size();
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
if (node.tag == table.tag && node.type == Node.EndTag)
{
lexer.istackbase = istackbase;
table.closed = true;
Node.trimEmptyElement(lexer, table);
return;
}
/* deal with comments etc. */
if (Node.insertMisc(table, node))
continue;
/* discard unknown tags */
if (node.tag == null && node.type != Node.TextNode)
{
Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* if TD or TH or text or inline or block then infer */
if (node.type != Node.EndTag)
{
if (node.tag == tt.tagTd ||
node.tag == tt.tagTh ||
node.tag == tt.tagTable)
{
lexer.ungetToken();
node = lexer.inferredTag( "tr");
Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
}
else if (node.type == Node.TextNode
|| (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
{
Node.insertNodeBeforeElement(table, node);
Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
lexer.exiled = true;
/* AQ: TODO
Line 2040 of parser.c (13 Jan 2000) reads as follows:
if (!node->type == TextNode)
This will always evaluate to false.
This has been reported to Dave Raggett
*/
//Should be?: if (!(node.type == Node.TextNode))
if (false)
parseTag(lexer, node, Lexer.IgnoreWhitespace);
lexer.exiled = false;
continue;
}
else if ((node.tag.model & Dict.CM_HEAD) != 0)
{
moveToHead(lexer, table, node);
continue;
}
}
/*
if this is the end tag for an ancestor element
then infer end tag for this element
*/
if (node.type == Node.EndTag)
{
if (node.tag == tt.tagForm)
{
lexer.badForm = 1;
Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
{
Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
continue;
}
for (parent = table.parent;
parent != null; parent = parent.parent)
{
if (node.tag == parent.tag)
{
Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
lexer.ungetToken();
lexer.istackbase = istackbase;
Node.trimEmptyElement(lexer, table);
return;
}
}
}
if (!((node.tag.model & Dict.CM_TABLE) != 0))
{
lexer.ungetToken();
Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
lexer.istackbase = istackbase;
Node.trimEmptyElement(lexer, table);
return;
}
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
{
Node.insertNodeAtEnd(table, node);;
parseTag(lexer, node, Lexer.IgnoreWhitespace);
continue;
}
/* discard unexpected text nodes and end tags */
Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
}
Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
Node.trimEmptyElement(lexer, table);
lexer.istackbase = istackbase;
}
};
public static class ParseColGroup implements Parser {
public void parse( Lexer lexer, Node colgroup, short mode )
{
Node node, parent;
TagTable tt = lexer.configuration.tt;
if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
return;
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
if (node.tag == colgroup.tag && node.type == Node.EndTag)
{
colgroup.closed = true;
return;
}
/*
if this is the end tag for an ancestor element
then infer end tag for this element
*/
if (node.type == Node.EndTag)
{
if (node.tag == tt.tagForm)
{
lexer.badForm = 1;
Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
continue;
}
for (parent = colgroup.parent;
parent != null; parent = parent.parent)
{
if (node.tag == parent.tag)
{
lexer.ungetToken();
return;
}
}
}
if (node.type == Node.TextNode)
{
lexer.ungetToken();
return;
}
/* deal with comments etc. */
if (Node.insertMisc(colgroup, node))
continue;
/* discard unknown tags */
if (node.tag == null)
{
Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.tag != tt.tagCol)
{
lexer.ungetToken();
return;
}
if (node.type == Node.EndTag)
{
Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* node should be */
Node.insertNodeAtEnd(colgroup, node);
parseTag(lexer, node, Lexer.IgnoreWhitespace);
}
}
};
public static class ParseRowGroup implements Parser {
public void parse( Lexer lexer, Node rowgroup, short mode )
{
Node node, parent;
TagTable tt = lexer.configuration.tt;
if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
return;
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
if (node.tag == rowgroup.tag)
{
if (node.type == Node.EndTag)
{
rowgroup.closed = true;
Node.trimEmptyElement(lexer, rowgroup);
return;
}
lexer.ungetToken();
return;
}
/* if infer end tag */
if (node.tag == tt.tagTable && node.type == Node.EndTag)
{
lexer.ungetToken();
Node.trimEmptyElement(lexer, rowgroup);
return;
}
/* deal with comments etc. */
if (Node.insertMisc(rowgroup, node))
continue;
/* discard unknown tags */
if (node.tag == null && node.type != Node.TextNode)
{
Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/*
if TD or TH then infer
if text or inline or block move before table
if head content move to head
*/
if (node.type != Node.EndTag)
{
if (node.tag == tt.tagTd || node.tag == tt.tagTh)
{
lexer.ungetToken();
node = lexer.inferredTag("tr");
Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
}
else if (node.type == Node.TextNode
|| (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
{
Node.moveBeforeTable(rowgroup, node, tt);
Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
lexer.exiled = true;
if (node.type != Node.TextNode)
parseTag(lexer, node, Lexer.IgnoreWhitespace);
lexer.exiled = false;
continue;
}
else if ((node.tag.model & Dict.CM_HEAD) != 0)
{
Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
moveToHead(lexer, rowgroup, node);
continue;
}
}
/*
if this is the end tag for ancestor element
then infer end tag for this element
*/
if (node.type == Node.EndTag)
{
if (node.tag == tt.tagForm)
{
lexer.badForm = 1;
Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
{
Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
continue;
}
for (parent = rowgroup.parent;
parent != null; parent = parent.parent)
{
if (node.tag == parent.tag)
{
lexer.ungetToken();
Node.trimEmptyElement(lexer, rowgroup);
return;
}
}
}
/*
if THEAD, TFOOT or TBODY then implied end tag
*/
if ((node.tag.model & Dict.CM_ROWGRP) != 0)
{
if (node.type != Node.EndTag)
lexer.ungetToken();
Node.trimEmptyElement(lexer, rowgroup);
return;
}
if (node.type == Node.EndTag)
{
Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (!(node.tag == tt.tagTr))
{
node = lexer.inferredTag( "tr");
Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
lexer.ungetToken();
}
/* node should be
*/
Node.insertNodeAtEnd(rowgroup, node);
parseTag(lexer, node, Lexer.IgnoreWhitespace);
}
Node.trimEmptyElement(lexer, rowgroup);
}
};
public static class ParseRow implements Parser {
public void parse( Lexer lexer, Node row, short mode )
{
Node node, parent;
boolean exclude_state;
TagTable tt = lexer.configuration.tt;
if ((row.tag.model & Dict.CM_EMPTY) != 0)
return;
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
if (node.tag == row.tag)
{
if (node.type == Node.EndTag)
{
row.closed = true;
Node.fixEmptyRow(lexer, row);
return;
}
lexer.ungetToken();
Node.fixEmptyRow(lexer, row);
return;
}
/*
if this is the end tag for an ancestor element
then infer end tag for this element
*/
if (node.type == Node.EndTag)
{
if (node.tag == tt.tagForm)
{
lexer.badForm = 1;
Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.tag == tt.tagTd || node.tag == tt.tagTh)
{
Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
continue;
}
for (parent = row.parent;
parent != null; parent = parent.parent)
{
if (node.tag == parent.tag)
{
lexer.ungetToken();
Node.trimEmptyElement(lexer, row);
return;
}
}
}
/* deal with comments etc. */
if (Node.insertMisc(row, node))
continue;
/* discard unknown tags */
if (node.tag == null && node.type != Node.TextNode)
{
Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* discard unexpected element */
if (node.tag == tt.tagTable)
{
Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* THEAD, TFOOT or TBODY */
if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
{
lexer.ungetToken();
Node.trimEmptyElement(lexer, row);
return;
}
if (node.type == Node.EndTag)
{
Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/*
if text or inline or block move before table
if head content move to head
*/
if (node.type != Node.EndTag)
{
if (node.tag == tt.tagForm)
{
lexer.ungetToken();
node = lexer.inferredTag("td");
Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
}
else if (node.type == Node.TextNode
|| (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
{
Node.moveBeforeTable(row, node, tt);
Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
lexer.exiled = true;
if (node.type != Node.TextNode)
parseTag(lexer, node, Lexer.IgnoreWhitespace);
lexer.exiled = false;
continue;
}
else if ((node.tag.model & Dict.CM_HEAD) != 0)
{
Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
moveToHead(lexer, row, node);
continue;
}
}
if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
{
Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
continue;
}
/* node should be or | */
Node.insertNodeAtEnd(row, node);
exclude_state = lexer.excludeBlocks;
lexer.excludeBlocks = false;
parseTag(lexer, node, Lexer.IgnoreWhitespace);
lexer.excludeBlocks = exclude_state;
/* pop inline stack */
while (lexer.istack.size() > lexer.istackbase)
lexer.popInline( null);
}
Node.trimEmptyElement(lexer, row);
}
};
public static class ParseNoFrames implements Parser {
public void parse( Lexer lexer, Node noframes, short mode )
{
Node node;
boolean checkstack;
TagTable tt = lexer.configuration.tt;
lexer.badAccess |= Report.USING_NOFRAMES;
mode = Lexer.IgnoreWhitespace;
checkstack = true;
while (true)
{
node = lexer.getToken(mode);
if (node == null) break;
if (node.tag == noframes.tag && node.type == Node.EndTag)
{
noframes.closed = true;
Node.trimSpaces(lexer, noframes);
return;
}
if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
{
Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
Node.trimSpaces(lexer, noframes);
lexer.ungetToken();
return;
}
if (node.tag == tt.tagHtml)
{
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* deal with comments etc. */
if (Node.insertMisc(noframes, node))
continue;
if (node.tag == tt.tagBody && node.type == Node.StartTag)
{
Node.insertNodeAtEnd(noframes, node);
parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
continue;
}
/* implicit body element inferred */
if (node.type == Node.TextNode || node.tag != null)
{
lexer.ungetToken();
node = lexer.inferredTag("body");
if (lexer.configuration.XmlOut)
Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
Node.insertNodeAtEnd(noframes, node);
parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
continue;
}
/* discard unexpected end tags */
Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
}
Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
}
};
public static class ParseSelect implements Parser {
public void parse( Lexer lexer, Node field, short mode )
{
Node node;
TagTable tt = lexer.configuration.tt;
lexer.insert = -1; /* defer implicit inline start tags */
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
if (node.tag == field.tag && node.type == Node.EndTag)
{
field.closed = true;
Node.trimSpaces(lexer, field);
return;
}
/* deal with comments etc. */
if (Node.insertMisc(field, node))
continue;
if (node.type == Node.StartTag &&
(node.tag == tt.tagOption ||
node.tag == tt.tagOptgroup ||
node.tag == tt.tagScript))
{
Node.insertNodeAtEnd(field, node);
parseTag(lexer, node, Lexer.IgnoreWhitespace);
continue;
}
/* discard unexpected tags */
Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
}
Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
}
};
public static class ParseText implements Parser {
public void parse( Lexer lexer, Node field, short mode )
{
Node node;
TagTable tt = lexer.configuration.tt;
lexer.insert = -1; /* defer implicit inline start tags */
if (field.tag == tt.tagTextarea)
mode = Lexer.Preformatted;
while (true)
{
node = lexer.getToken(mode);
if (node == null) break;
if (node.tag == field.tag && node.type == Node.EndTag)
{
field.closed = true;
Node.trimSpaces(lexer, field);
return;
}
/* deal with comments etc. */
if (Node.insertMisc(field, node))
continue;
if (node.type == Node.TextNode)
{
/* only called for 1st child */
if (field.content == null && !((mode & Lexer.Preformatted) != 0))
Node.trimSpaces(lexer, field);
if (node.start >= node.end)
{
continue;
}
Node.insertNodeAtEnd(field, node);
continue;
}
if (node.tag == tt.tagFont)
{
Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* terminate element on other tags */
if (!((field.tag.model & Dict.CM_OPT) != 0))
Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
lexer.ungetToken();
Node.trimSpaces(lexer, field);
return;
}
if (!((field.tag.model & Dict.CM_OPT) != 0))
Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
}
};
public static class ParseOptGroup implements Parser {
public void parse( Lexer lexer, Node field, short mode )
{
Node node;
TagTable tt = lexer.configuration.tt;
lexer.insert = -1; /* defer implicit inline start tags */
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
if (node.tag == field.tag && node.type == Node.EndTag)
{
field.closed = true;
Node.trimSpaces(lexer, field);
return;
}
/* deal with comments etc. */
if (Node.insertMisc(field, node))
continue;
if (node.type == Node.StartTag &&
(node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
{
if (node.tag == tt.tagOptgroup)
Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
Node.insertNodeAtEnd(field, node);
parseTag(lexer, node, Lexer.MixedContent);
continue;
}
/* discard unexpected tags */
Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
}
}
};
public static Parser getParseHTML()
{
return _parseHTML;
}
public static Parser getParseHead()
{
return _parseHead;
}
public static Parser getParseTitle()
{
return _parseTitle;
}
public static Parser getParseScript()
{
return _parseScript;
}
public static Parser getParseBody()
{
return _parseBody;
}
public static Parser getParseFrameSet()
{
return _parseFrameSet;
}
public static Parser getParseInline()
{
return _parseInline;
}
public static Parser getParseList()
{
return _parseList;
}
public static Parser getParseDefList()
{
return _parseDefList;
}
public static Parser getParsePre()
{
return _parsePre;
}
public static Parser getParseBlock()
{
return _parseBlock;
}
public static Parser getParseTableTag()
{
return _parseTableTag;
}
public static Parser getParseColGroup()
{
return _parseColGroup;
}
public static Parser getParseRowGroup()
{
return _parseRowGroup;
}
public static Parser getParseRow()
{
return _parseRow;
}
public static Parser getParseNoFrames()
{
return _parseNoFrames;
}
public static Parser getParseSelect()
{
return _parseSelect;
}
public static Parser getParseText()
{
return _parseText;
}
public static Parser getParseOptGroup()
{
return _parseOptGroup;
}
private static Parser _parseHTML = new ParseHTML();
private static Parser _parseHead = new ParseHead();
private static Parser _parseTitle = new ParseTitle();
private static Parser _parseScript = new ParseScript();
private static Parser _parseBody = new ParseBody();
private static Parser _parseFrameSet = new ParseFrameSet();
private static Parser _parseInline = new ParseInline();
private static Parser _parseList = new ParseList();
private static Parser _parseDefList = new ParseDefList();
private static Parser _parsePre = new ParsePre();
private static Parser _parseBlock = new ParseBlock();
private static Parser _parseTableTag = new ParseTableTag();
private static Parser _parseColGroup = new ParseColGroup();
private static Parser _parseRowGroup = new ParseRowGroup();
private static Parser _parseRow = new ParseRow();
private static Parser _parseNoFrames = new ParseNoFrames();
private static Parser _parseSelect = new ParseSelect();
private static Parser _parseText = new ParseText();
private static Parser _parseOptGroup = new ParseOptGroup();
/*
HTML is the top level element
*/
public static Node parseDocument(Lexer lexer)
{
Node node, document, html;
Node doctype = null;
TagTable tt = lexer.configuration.tt;
document = lexer.newNode();
document.type = Node.RootNode;
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
/* deal with comments etc. */
if (Node.insertMisc(document, node))
continue;
if (node.type == Node.DocTypeTag)
{
if (doctype == null)
{
Node.insertNodeAtEnd(document, node);
doctype = node;
}
else
Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (node.type == Node.EndTag)
{
Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
continue;
}
if (node.type != Node.StartTag || node.tag != tt.tagHtml)
{
lexer.ungetToken();
html = lexer.inferredTag("html");
}
else
html = node;
Node.insertNodeAtEnd(document, html);
getParseHTML().parse(lexer, html, (short)0); // TODO?
break;
}
return document;
}
/**
* Indicates whether or not whitespace should be preserved for this element.
* If an xml:space attribute is found, then if the attribute value is
* preserve , returns true . For any other value, returns
* false . If an xml:space attribute was not
* found, then the following element names result in a return value of true:
* pre, script, style, and xsl:text . Finally, if a
* TagTable was passed in and the element appears as the "pre" element
* in the TagTable , then true will be returned.
* Otherwise, false is returned.
* @param element The Node to test to see if whitespace should be
* preserved.
* @param tt The TagTable to test for the getNodePre()
* function. This may be null , in which case this test
* is bypassed.
* @return true or false , as explained above.
*/
public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
{
AttVal attribute;
/* search attributes for xml:space */
for (attribute = element.attributes; attribute != null; attribute = attribute.next)
{
if (attribute.attribute.equals("xml:space"))
{
if (attribute.value.equals("preserve"))
return true;
return false;
}
}
/* kludge for html docs without explicit xml:space attribute */
if (Lexer.wstrcasecmp(element.element, "pre") == 0
|| Lexer.wstrcasecmp(element.element, "script") == 0
|| Lexer.wstrcasecmp(element.element, "style") == 0)
return true;
if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
return true;
/* kludge for XSL docs */
if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
return true;
return false;
}
/*
XML documents
*/
public static void parseXMLElement(Lexer lexer, Node element, short mode)
{
Node node;
/* Jeff Young's kludge for XSL docs */
if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
return;
/* if node is pre or has xml:space="preserve" then do so */
if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
mode = Lexer.Preformatted;
while (true)
{
node = lexer.getToken(mode);
if (node == null) break;
if (node.type == Node.EndTag && node.element.equals(element.element))
{
element.closed = true;
break;
}
/* discard unexpected end tags */
if (node.type == Node.EndTag)
{
Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
continue;
}
/* parse content on seeing start tag */
if (node.type == Node.StartTag)
parseXMLElement(lexer, node, mode);
Node.insertNodeAtEnd(element, node);
}
/*
if first child is text then trim initial space and
delete text node if it is empty.
*/
node = element.content;
if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
{
if (node.textarray[node.start] == (byte)' ')
{
node.start++;
if (node.start >= node.end)
Node.discardElement(node);
}
}
/*
if last child is text then trim final space and
delete the text node if it is empty
*/
node = element.last;
if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
{
if (node.textarray[node.end - 1] == (byte)' ')
{
node.end--;
if (node.start >= node.end)
Node.discardElement(node);
}
}
}
public static Node parseXMLDocument(Lexer lexer)
{
Node node, document, doctype;
document = lexer.newNode();
document.type = Node.RootNode;
doctype = null;
lexer.configuration.XmlTags = true;
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
/* discard unexpected end tags */
if (node.type == Node.EndTag)
{
Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
continue;
}
/* deal with comments etc. */
if (Node.insertMisc(document, node))
continue;
if (node.type == Node.DocTypeTag)
{
if (doctype == null)
{
Node.insertNodeAtEnd(document, node);
doctype = node;
}
else
Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
continue;
}
/* if start tag then parse element's content */
if (node.type == Node.StartTag)
{
Node.insertNodeAtEnd(document, node);
parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
}
}
if (false) { //#if 0
/* discard the document type */
node = document.findDocType();
if (node != null)
Node.discardElement(node);
} // #endif
if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
/* ensure presence of initial */
if (lexer.configuration.XmlPi)
lexer.fixXMLPI(document);
return document;
}
public static boolean isJavaScript(Node node)
{
boolean result = false;
AttVal attr;
if (node.attributes == null)
return true;
for (attr = node.attributes; attr != null; attr = attr.next)
{
if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
|| Lexer.wstrcasecmp(attr.attribute, "type") == 0)
&& Lexer.wsubstr(attr.value, "javascript"))
result = true;
}
return result;
}
}
|