From 1b3a3131074cddf48bb899f9214dfcb8e52aa3c4 Mon Sep 17 00:00:00 2001 From: khartlage Date: Fri, 17 Jan 2003 21:06:02 +0000 Subject: [PATCH 1/1] misc changes --- net.sourceforge.phpeclipse/TODO | 4 - .../sourceforge/phpeclipse/PHPParserTestCase.java | 2 + .../internal/corext/template/default-templates.xml | 13 +- .../phpeclipse/actions/HTMLParserAction.java | 4 +- .../phpeclipse/phpeditor/phpparser/PHPParser.java | 10 + .../src/org/w3c/tidy/AttVal.java | 182 -- .../src/org/w3c/tidy/AttrCheck.java | 38 - .../src/org/w3c/tidy/AttrCheckImpl.java | 189 -- .../src/org/w3c/tidy/Attribute.java | 65 - .../src/org/w3c/tidy/AttributeTable.java | 317 -- .../src/org/w3c/tidy/CheckAttribs.java | 39 - .../src/org/w3c/tidy/CheckAttribsImpl.java | 403 --- .../src/org/w3c/tidy/Clean.java | 1779 ----------- .../src/org/w3c/tidy/Configuration.java | 600 ---- .../src/org/w3c/tidy/DOMAttrImpl.java | 190 -- .../src/org/w3c/tidy/DOMAttrMapImpl.java | 138 - .../src/org/w3c/tidy/DOMCDATASectionImpl.java | 51 - .../src/org/w3c/tidy/DOMCharacterDataImpl.java | 143 - .../src/org/w3c/tidy/DOMCommentImpl.java | 55 - .../src/org/w3c/tidy/DOMDocumentImpl.java | 261 -- .../src/org/w3c/tidy/DOMDocumentTypeImpl.java | 107 - .../src/org/w3c/tidy/DOMElementImpl.java | 307 -- .../src/org/w3c/tidy/DOMExceptionImpl.java | 37 - .../src/org/w3c/tidy/DOMNodeImpl.java | 488 --- .../src/org/w3c/tidy/DOMNodeListByTagNameImpl.java | 99 - .../src/org/w3c/tidy/DOMNodeListImpl.java | 75 - .../org/w3c/tidy/DOMProcessingInstructionImpl.java | 74 - .../src/org/w3c/tidy/DOMTextImpl.java | 65 - .../src/org/w3c/tidy/Dict.java | 110 - .../src/org/w3c/tidy/Entity.java | 51 - .../src/org/w3c/tidy/EntityTable.java | 386 --- .../src/org/w3c/tidy/IStack.java | 65 - .../src/org/w3c/tidy/Lexer.java | 3134 ------------------- .../src/org/w3c/tidy/MutableBoolean.java | 38 - .../src/org/w3c/tidy/MutableInteger.java | 38 - .../src/org/w3c/tidy/MutableObject.java | 58 - .../src/org/w3c/tidy/Node.java | 917 ------ .../src/org/w3c/tidy/Out.java | 49 - .../src/org/w3c/tidy/OutImpl.java | 148 - .../src/org/w3c/tidy/PPrint.java | 1845 ----------- .../src/org/w3c/tidy/Parser.java | 39 - .../src/org/w3c/tidy/ParserImpl.java | 3205 -------------------- .../src/org/w3c/tidy/Report.java | 1130 ------- .../src/org/w3c/tidy/StreamIn.java | 81 - .../src/org/w3c/tidy/StreamInImpl.java | 367 --- .../src/org/w3c/tidy/Style.java | 58 - .../src/org/w3c/tidy/StyleProp.java | 57 - .../src/org/w3c/tidy/TagTable.java | 389 --- .../src/org/w3c/tidy/Tidy.java | 1424 --------- .../src/org/w3c/tidy/TidyBeanInfo.java | 39 - .../src/org/w3c/tidy/TidyMessages.properties | 194 -- .../src/org/w3c/tidy/config.txt | 20 - net.sourceforge.phpeclipse/templates.xml | 65 - 53 files changed, 26 insertions(+), 19616 deletions(-) delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/AttVal.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheck.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheckImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Attribute.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/AttributeTable.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribs.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribsImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Clean.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Lexer.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Out.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/OutImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/PPrint.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Parser.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/ParserImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Report.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamIn.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamInImpl.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Style.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/StyleProp.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/TagTable.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/Tidy.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyBeanInfo.java delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyMessages.properties delete mode 100644 net.sourceforge.phpeclipse/src/org/w3c/tidy/config.txt delete mode 100644 net.sourceforge.phpeclipse/templates.xml diff --git a/net.sourceforge.phpeclipse/TODO b/net.sourceforge.phpeclipse/TODO index 1e91331..cf9cc4b 100644 --- a/net.sourceforge.phpeclipse/TODO +++ b/net.sourceforge.phpeclipse/TODO @@ -1,7 +1,3 @@ net.sourceforge.phpeclipse - todo: -- synthax highlighting for php-files -- php project wizard -- update httpd context path (for project) -- html highlighting - debug for php \ No newline at end of file diff --git a/net.sourceforge.phpeclipse/src/junit/sourceforge/phpeclipse/PHPParserTestCase.java b/net.sourceforge.phpeclipse/src/junit/sourceforge/phpeclipse/PHPParserTestCase.java index 5675731..bd8ad1a 100644 --- a/net.sourceforge.phpeclipse/src/junit/sourceforge/phpeclipse/PHPParserTestCase.java +++ b/net.sourceforge.phpeclipse/src/junit/sourceforge/phpeclipse/PHPParserTestCase.java @@ -28,12 +28,14 @@ public class PHPParserTestCase extends TestCase { * Test the PHP Parser with different PHP snippets */ public void testPHPParser() { + checkHTML("\n\n\n\n "); checkHTML(""); checkHTML(""); checkHTML(" foo "); checkHTML(" "); + checkPHP("$add = 'a'.$i;$val = $$add;"); checkPHP("($a==\"b\") || (c($this->x)==\"d\");"); checkPHP("(substr($this->file, 0, 2) == \"MM\");"); checkPHP("(substr($this->file, 0, 2) == \"MM\") || substr($this->file, 0, 2) == \"II\";"); diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/corext/template/default-templates.xml b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/corext/template/default-templates.xml index 5be15b7..287f71a 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/corext/template/default-templates.xml +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/corext/template/default-templates.xml @@ -135,7 +135,18 @@ <?xml-stylesheet type="text/xsl" href="${url}"> ${cursor} - + + + diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/actions/HTMLParserAction.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/actions/HTMLParserAction.java index d55087d..9406623 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/actions/HTMLParserAction.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/actions/HTMLParserAction.java @@ -24,8 +24,8 @@ import org.eclipse.jface.viewers.ISelectionProvider; import org.eclipse.jface.viewers.StructuredSelection; import org.eclipse.ui.IObjectActionDelegate; import org.eclipse.ui.IWorkbenchPart; -import org.w3c.tidy.Configuration; -import org.w3c.tidy.Tidy; +import net.sourceforge.phpdt.tidy.Configuration; +import net.sourceforge.phpdt.tidy.Tidy; public class HTMLParserAction implements IObjectActionDelegate { diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/phpparser/PHPParser.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/phpparser/PHPParser.java index 9ccac8e..5c79a17 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/phpparser/PHPParser.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/phpparser/PHPParser.java @@ -1334,10 +1334,15 @@ public class PHPParser extends PHPKeywords { ident.append(ch); if (ch == '$') { + getChar(); + // attention recursive call: + getIdentifier(); token = TT_VARIABLE; + return; } else { token = TT_IDENTIFIER; } + getChar(); while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || (ch == '_')) { ident.append(ch); @@ -1346,6 +1351,8 @@ public class PHPParser extends PHPKeywords { identifier = ident.toString(); chIndx--; + // determine if this identitfer is a keyword + // @todo improve this in future version Integer i = (Integer) keywordMap.get(identifier.toLowerCase()); if (i != null) { token = i.intValue(); @@ -1790,6 +1797,9 @@ public class PHPParser extends PHPKeywords { getNextToken(); } else if (token == TT_function) { getNextToken(); + if (token == TT_AMPERSAND) { + getNextToken(); + } if (token == TT_IDENTIFIER) { outlineInfo.addVariable(identifier); current.add(new PHPFunctionDeclaration(current, identifier, chIndx - identifier.length())); diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttVal.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttVal.java deleted file mode 100644 index 7de977a..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttVal.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * @(#)AttVal.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Attribute/Value linked list node - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class AttVal extends Object implements Cloneable { - - public AttVal next; - public Attribute dict; - public Node asp; - public Node php; - public int delim; - public String attribute; - public String value; - - public AttVal() - { - this.next = null; - this.dict = null; - this.asp = null; - this.php = null; - this.delim = 0; - this.attribute = null; - this.value = null; - } - - public AttVal(AttVal next, Attribute dict, int delim, - String attribute, String value) - { - this.next = next; - this.dict = dict; - this.asp = null; - this.php = null; - this.delim = delim; - this.attribute = attribute; - this.value = value; - } - - public AttVal(AttVal next, Attribute dict, Node asp, Node php, - int delim, String attribute, String value) - { - this.next = next; - this.dict = dict; - this.asp = asp; - this.php = php; - this.delim = delim; - this.attribute = attribute; - this.value = value; - } - - protected Object clone() - { - AttVal av = new AttVal(); - if (next != null) { - av.next = (AttVal)next.clone(); - } - if (attribute != null) - av.attribute = attribute; - if (value != null) - av.value = value; - av.delim = delim; - if (asp != null) { - av.asp = (Node)asp.clone(); - } - if (php != null) { - av.php = (Node)php.clone(); - } - av.dict = - AttributeTable.getDefaultAttributeTable().findAttribute(this); - return av; - } - - public boolean isBoolAttribute() - { - Attribute attribute = this.dict; - if ( attribute != null ) { - if (attribute.attrchk == AttrCheckImpl.getCheckBool() ) { - return true; - } - } - - return false; - } - - /* ignore unknown attributes for proprietary elements */ - public Attribute checkAttribute( Lexer lexer, Node node ) - { - TagTable tt = lexer.configuration.tt; - - if (this.asp == null && this.php == null) - this.checkUniqueAttribute(lexer, node); - - Attribute attribute = this.dict; - if ( attribute != null ) { - /* title is vers 2.0 for A and LINK otherwise vers 4.0 */ - if (attribute == AttributeTable.attrTitle && - (node.tag == tt.tagA || node.tag == tt.tagLink)) - lexer.versions &= Dict.VERS_ALL; - else if ((attribute.versions & Dict.VERS_XML) != 0) - { - if (!(lexer.configuration.XmlTags || lexer.configuration.XmlOut)) - Report.attrError(lexer, node, this.attribute, Report.XML_ATTRIBUTE_VALUE); - } - else - lexer.versions &= attribute.versions; - - if (attribute.attrchk != null) - attribute.attrchk.check(lexer, node, this); - } - else if (!lexer.configuration.XmlTags && !(node.tag == null) && this.asp == null && - !(node.tag != null && ((node.tag.versions & Dict.VERS_PROPRIETARY) != 0))) - Report.attrError(lexer, node, this.attribute, Report.UNKNOWN_ATTRIBUTE); - - return attribute; - } - - /* - the same attribute name can't be used - more than once in each element - */ - public void checkUniqueAttribute(Lexer lexer, Node node) - { - AttVal attr; - int count = 0; - - for (attr = this.next; attr != null; attr = attr.next) - { - if (this.attribute != null && - attr.attribute != null && - attr.asp == null && - attr.php == null && - Lexer.wstrcasecmp(this.attribute, attr.attribute) == 0) - ++count; - } - - if (count > 0) - Report.attrError(lexer, node, this.attribute, Report.REPEATED_ATTRIBUTE); - } - - /* --------------------- DOM ---------------------------- */ - - protected org.w3c.dom.Attr adapter = null; - - protected org.w3c.dom.Attr getAdapter() - { - if (adapter == null) - { - adapter = new DOMAttrImpl(this); - } - return adapter; - } - /* --------------------- END DOM ------------------------ */ - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheck.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheck.java deleted file mode 100644 index 967ea32..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheck.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * @(#)AttrCheck.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Check attribute values - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public interface AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval); - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheckImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheckImpl.java deleted file mode 100644 index 4c9bc92..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheckImpl.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * @(#)AttrCheckImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Check attribute values implementations - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class AttrCheckImpl { - - public static class CheckUrl implements AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval) - { - if (attval.value == null) - Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE); - else if (lexer.configuration.FixBackslash) - { - attval.value = attval.value.replace('\\','/'); - } - } - - }; - - public static class CheckScript implements AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval) - { - } - - }; - - public static class CheckAlign implements AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval) - { - String value; - - /* IMG, OBJECT, APPLET and EMBED use align for vertical position */ - if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0)) - { - getCheckValign().check(lexer, node, attval); - return; - } - - value = attval.value; - - if (value == null) - Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE); - else if (! (Lexer.wstrcasecmp(value, "left") == 0 || - Lexer.wstrcasecmp(value, "center") == 0 || - Lexer.wstrcasecmp(value, "right") == 0 || - Lexer.wstrcasecmp(value, "justify") == 0)) - Report.attrError(lexer, node, attval.value, Report.BAD_ATTRIBUTE_VALUE); - } - - }; - - public static class CheckValign implements AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval) - { - String value; - - value = attval.value; - - if (value == null) - Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE); - else if (Lexer.wstrcasecmp(value, "top") == 0 || - Lexer.wstrcasecmp(value, "middle") == 0 || - Lexer.wstrcasecmp(value, "bottom") == 0 || - Lexer.wstrcasecmp(value, "baseline") == 0) - { - /* all is fine */ - } - else if (Lexer.wstrcasecmp(value, "left") == 0 || - Lexer.wstrcasecmp(value, "right") == 0) - { - if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))) - Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE); - } - else if (Lexer.wstrcasecmp(value, "texttop") == 0 || - Lexer.wstrcasecmp(value, "absmiddle") == 0 || - Lexer.wstrcasecmp(value, "absbottom") == 0 || - Lexer.wstrcasecmp(value, "textbottom") == 0) - { - lexer.versions &= Dict.VERS_PROPRIETARY; - Report.attrError(lexer, node, value, Report.PROPRIETARY_ATTR_VALUE); - } - else - Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE); - } - - }; - - public static class CheckBool implements AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval) - { - } - - }; - - public static class CheckId implements AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval) - { - } - - }; - - public static class CheckName implements AttrCheck { - - public void check( Lexer lexer, Node node, AttVal attval) - { - } - - }; - - public static AttrCheck getCheckUrl() - { - return _checkUrl; - } - - public static AttrCheck getCheckScript() - { - return _checkScript; - } - - public static AttrCheck getCheckAlign() - { - return _checkAlign; - } - - public static AttrCheck getCheckValign() - { - return _checkValign; - } - - public static AttrCheck getCheckBool() - { - return _checkBool; - } - - public static AttrCheck getCheckId() - { - return _checkId; - } - - public static AttrCheck getCheckName() - { - return _checkName; - } - - - private static AttrCheck _checkUrl = new CheckUrl(); - private static AttrCheck _checkScript = new CheckScript(); - private static AttrCheck _checkAlign = new CheckAlign(); - private static AttrCheck _checkValign = new CheckValign(); - private static AttrCheck _checkBool = new CheckBool(); - private static AttrCheck _checkId = new CheckId(); - private static AttrCheck _checkName = new CheckName(); - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Attribute.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Attribute.java deleted file mode 100644 index e42cf49..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Attribute.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @(#)Attribute.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * HTML attribute - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class Attribute { - - public Attribute( String name, - boolean nowrap, - short versions, - AttrCheck attrchk ) - { - this.name = name; - this.nowrap = nowrap; - this.literal = false; - this.versions = versions; - this.attrchk = attrchk; - } - - public Attribute( String name, - short versions, - AttrCheck attrchk ) - { - this.name = name; - this.nowrap = false; - this.literal = false; - this.versions = versions; - this.attrchk = attrchk; - } - - public String name; - public boolean nowrap; - public boolean literal; - public short versions; - public AttrCheck attrchk; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttributeTable.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttributeTable.java deleted file mode 100644 index 263f0f5..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttributeTable.java +++ /dev/null @@ -1,317 +0,0 @@ -/* - * @(#)AttributeTable.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import java.util.Hashtable; -import java.util.Enumeration; - -/** - * - * HTML attribute hash table - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class AttributeTable { - - public AttributeTable() - { - } - - public Attribute lookup( String name ) - { - return (Attribute)attributeHashtable.get( name ); - } - - public Attribute install( Attribute attr ) - { - return (Attribute)attributeHashtable.put( attr.name, attr ); - } - - /* public method for finding attribute definition by name */ - public Attribute findAttribute( AttVal attval ) - { - Attribute np; - - if ( attval.attribute != null ) { - np = lookup( attval.attribute ); - return np; - } - - return null; - } - - public boolean isUrl( String attrname ) - { - Attribute np; - - np = lookup( attrname ); - return ( np != null && np.attrchk == AttrCheckImpl.getCheckUrl() ); - } - - public boolean isScript( String attrname ) - { - Attribute np; - - np = lookup( attrname ); - return ( np != null && np.attrchk == AttrCheckImpl.getCheckScript() ); - } - - public boolean isLiteralAttribute( String attrname ) - { - Attribute np; - - np = lookup( attrname ); - return ( np != null && np.literal ); - } - - /* - Henry Zrepa reports that some folk are - using embed with script attributes where - newlines are signficant. These need to be - declared and handled specially! - */ - public void declareLiteralAttrib(String name) - { - Attribute attrib = lookup(name); - - if (attrib == null) - attrib = install(new Attribute(name, Dict.VERS_PROPRIETARY, null)); - - attrib.literal = true; - } - - private Hashtable attributeHashtable = new Hashtable(); - - private static AttributeTable defaultAttributeTable = null; - - private static Attribute[] attrs = { - - new Attribute( "abbr", Dict.VERS_HTML40, null ), - new Attribute( "accept-charset", Dict.VERS_HTML40, null ), - new Attribute( "accept", Dict.VERS_ALL, null ), - new Attribute( "accesskey", Dict.VERS_HTML40, null ), - new Attribute( "action", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ), - new Attribute( "add_date", Dict.VERS_NETSCAPE, null ), /* A */ - new Attribute( "align", Dict.VERS_ALL, AttrCheckImpl.getCheckAlign() ), /* set varies with element */ - new Attribute( "alink", Dict.VERS_LOOSE, null ), - new Attribute( "alt", Dict.VERS_ALL, null ), - new Attribute( "archive", Dict.VERS_HTML40, null ), /* space or comma separated list */ - new Attribute( "axis", Dict.VERS_HTML40, null ), - new Attribute( "background", Dict.VERS_LOOSE, AttrCheckImpl.getCheckUrl() ), - new Attribute( "bgcolor", Dict.VERS_LOOSE, null ), - new Attribute( "bgproperties", Dict.VERS_PROPRIETARY, null ), /* BODY "fixed" fixes background */ - new Attribute( "border", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* like LENGTH + "border" */ - new Attribute( "bordercolor", Dict.VERS_MICROSOFT, null ), /* used on TABLE */ - new Attribute( "bottommargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ - new Attribute( "cellpadding", Dict.VERS_FROM32, null ), /* % or pixel values */ - new Attribute( "cellspacing", Dict.VERS_FROM32, null ), - new Attribute( "char", Dict.VERS_HTML40, null ), - new Attribute( "charoff", Dict.VERS_HTML40, null ), - new Attribute( "charset", Dict.VERS_HTML40, null ), - new Attribute( "checked", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* i.e. "checked" or absent */ - new Attribute( "cite", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), - new Attribute( "class", Dict.VERS_HTML40, null ), - new Attribute( "classid", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), - new Attribute( "clear", Dict.VERS_LOOSE, null ), /* BR: left, right, all */ - new Attribute( "code", Dict.VERS_LOOSE, null ), /* APPLET */ - new Attribute( "codebase", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */ - new Attribute( "codetype", Dict.VERS_HTML40, null ), /* OBJECT */ - new Attribute( "color", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */ - new Attribute( "cols", Dict.VERS_IFRAMES, null ), /* TABLE & FRAMESET */ - new Attribute( "colspan", Dict.VERS_FROM32, null ), - new Attribute( "compact", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* lists */ - new Attribute( "content", Dict.VERS_ALL, null ), /* META */ - new Attribute( "coords", Dict.VERS_FROM32, null ), /* AREA, A */ - new Attribute( "data", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */ - new Attribute( "datafld", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */ - new Attribute( "dataformatas", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */ - new Attribute( "datapagesize", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */ - new Attribute( "datasrc", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckUrl() ), /* used on TABLE */ - new Attribute( "datetime", Dict.VERS_HTML40, null ), /* INS, DEL */ - new Attribute( "declare", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* OBJECT */ - new Attribute( "defer", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* SCRIPT */ - new Attribute( "dir", Dict.VERS_HTML40, null ), /* ltr or rtl */ - new Attribute( "disabled", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */ - new Attribute( "enctype", Dict.VERS_ALL, null ), /* FORM */ - new Attribute( "face", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */ - new Attribute( "for", Dict.VERS_HTML40, null ), /* LABEL */ - new Attribute( "frame", Dict.VERS_HTML40, null ), /* TABLE */ - new Attribute( "frameborder", Dict.VERS_FRAMES, null ), /* 0 or 1 */ - new Attribute( "framespacing", Dict.VERS_PROPRIETARY, null ), /* pixel value */ - new Attribute( "gridx", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive*/ - new Attribute( "gridy", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive */ - new Attribute( "headers", Dict.VERS_HTML40, null ), /* table cells */ - new Attribute( "height", Dict.VERS_ALL, null ), /* pixels only for TH/TD */ - new Attribute( "href", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ), /* A, AREA, LINK and BASE */ - new Attribute( "hreflang", Dict.VERS_HTML40, null ), /* A, LINK */ - new Attribute( "hspace", Dict.VERS_ALL, null ), /* APPLET, IMG, OBJECT */ - new Attribute( "http-equiv", Dict.VERS_ALL, null ), /* META */ - new Attribute( "id", Dict.VERS_HTML40, AttrCheckImpl.getCheckId() ), - new Attribute( "ismap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* IMG */ - new Attribute( "label", Dict.VERS_HTML40, null ), /* OPT, OPTGROUP */ - new Attribute( "lang", Dict.VERS_HTML40, null ), - new Attribute( "language", Dict.VERS_LOOSE, null ), /* SCRIPT */ - new Attribute( "last_modified", Dict.VERS_NETSCAPE, null ), /* A */ - new Attribute( "last_visit", Dict.VERS_NETSCAPE, null ), /* A */ - new Attribute( "leftmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ - new Attribute( "link", Dict.VERS_LOOSE, null ), /* BODY */ - new Attribute( "longdesc", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* IMG */ - new Attribute( "lowsrc", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckUrl() ), /* IMG */ - new Attribute( "marginheight", Dict.VERS_IFRAMES, null ), /* FRAME, IFRAME, BODY */ - new Attribute( "marginwidth", Dict.VERS_IFRAMES, null ), /* ditto */ - new Attribute( "maxlength", Dict.VERS_ALL, null ), /* INPUT */ - new Attribute( "media", Dict.VERS_HTML40, null ), /* STYLE, LINK */ - new Attribute( "method", Dict.VERS_ALL, null ), /* FORM: get or post */ - new Attribute( "multiple", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* SELECT */ - new Attribute( "name", Dict.VERS_ALL, AttrCheckImpl.getCheckName() ), - new Attribute( "nohref", Dict.VERS_FROM32, AttrCheckImpl.getCheckBool() ), /* AREA */ - new Attribute( "noresize", Dict.VERS_FRAMES, AttrCheckImpl.getCheckBool() ), /* FRAME */ - new Attribute( "noshade", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* HR */ - new Attribute( "nowrap", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* table cells */ - new Attribute( "object", Dict.VERS_HTML40_LOOSE, null ), /* APPLET */ - new Attribute( "onblur", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onchange", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "ondblclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onkeydown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onkeypress", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onkeyup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onmousedown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onmousemove", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onmouseout", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onmouseover", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onmouseup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onsubmit", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onreset", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onselect", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onunload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ - new Attribute( "onafterupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ - new Attribute( "onbeforeupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ - new Attribute( "onerrorupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ - new Attribute( "onrowenter", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ - new Attribute( "onrowexit", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ - new Attribute( "onbeforeunload", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ - new Attribute( "ondatasetchanged", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */ - new Attribute( "ondataavailable", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */ - new Attribute( "ondatasetcomplete",Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */ - new Attribute( "profile", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* HEAD */ - new Attribute( "prompt", Dict.VERS_LOOSE, null ), /* ISINDEX */ - new Attribute( "readonly", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */ - new Attribute( "rel", Dict.VERS_ALL, null ), /* A, LINK */ - new Attribute( "rev", Dict.VERS_ALL, null ), /* A, LINK */ - new Attribute( "rightmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ - new Attribute( "rows", Dict.VERS_ALL, null ), /* TEXTAREA */ - new Attribute( "rowspan", Dict.VERS_ALL, null ), /* table cells */ - new Attribute( "rules", Dict.VERS_HTML40, null ), /* TABLE */ - new Attribute( "scheme", Dict.VERS_HTML40, null ), /* META */ - new Attribute( "scope", Dict.VERS_HTML40, null ), /* table cells */ - new Attribute( "scrolling", Dict.VERS_IFRAMES, null ), /* yes, no or auto */ - new Attribute( "selected", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* OPTION */ - new Attribute( "shape", Dict.VERS_FROM32, null ), /* AREA, A */ - new Attribute( "showgrid", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive */ - new Attribute( "showgridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/ - new Attribute( "showgridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/ - new Attribute( "size", Dict.VERS_LOOSE, null ), /* HR, FONT, BASEFONT, SELECT */ - new Attribute( "span", Dict.VERS_HTML40, null ), /* COL, COLGROUP */ - new Attribute( "src", (short)(Dict.VERS_ALL | Dict.VERS_FRAMES), AttrCheckImpl.getCheckUrl() ), /* IMG, FRAME, IFRAME */ - new Attribute( "standby", Dict.VERS_HTML40, null ), /* OBJECT */ - new Attribute( "start", Dict.VERS_ALL, null ), /* OL */ - new Attribute( "style", Dict.VERS_HTML40, null ), - new Attribute( "summary", Dict.VERS_HTML40, null ), /* TABLE */ - new Attribute( "tabindex", Dict.VERS_HTML40, null ), /* fields, OBJECT and A */ - new Attribute( "target", Dict.VERS_HTML40, null ), /* names a frame/window */ - new Attribute( "text", Dict.VERS_LOOSE, null ), /* BODY */ - new Attribute( "title", Dict.VERS_HTML40, null ), /* text tool tip */ - new Attribute( "topmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ - new Attribute( "type", Dict.VERS_FROM32, null ), /* also used by SPACER */ - new Attribute( "usemap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* things with images */ - new Attribute( "valign", Dict.VERS_FROM32, AttrCheckImpl.getCheckValign() ), - new Attribute( "value", Dict.VERS_ALL, null ), /* OPTION, PARAM */ - new Attribute( "valuetype", Dict.VERS_HTML40, null ), /* PARAM: data, ref, object */ - new Attribute( "version", Dict.VERS_ALL, null ), /* HTML */ - new Attribute( "vlink", Dict.VERS_LOOSE, null ), /* BODY */ - new Attribute( "vspace", Dict.VERS_LOOSE, null ), /* IMG, OBJECT, APPLET */ - new Attribute( "width", Dict.VERS_ALL, null ), /* pixels only for TD/TH */ - new Attribute( "wrap", Dict.VERS_NETSCAPE, null ), /* textarea */ - new Attribute( "xml:lang", Dict.VERS_XML, null ), /* XML language */ - new Attribute( "xmlns", Dict.VERS_ALL, null ), /* name space */ - - }; - - public static Attribute attrHref = null; - public static Attribute attrSrc = null; - public static Attribute attrId = null; - public static Attribute attrName = null; - public static Attribute attrSummary = null; - public static Attribute attrAlt = null; - public static Attribute attrLongdesc = null; - public static Attribute attrUsemap = null; - public static Attribute attrIsmap = null; - public static Attribute attrLanguage = null; - public static Attribute attrType = null; - public static Attribute attrTitle = null; - public static Attribute attrXmlns = null; - public static Attribute attrValue = null; - public static Attribute attrContent = null; - public static Attribute attrDatafld = null; - public static Attribute attrWidth = null; - public static Attribute attrHeight = null; - - public static AttributeTable getDefaultAttributeTable() - { - if ( defaultAttributeTable == null ) { - defaultAttributeTable = new AttributeTable(); - for ( int i = 0; i < attrs.length; i++ ) { - defaultAttributeTable.install( attrs[i] ); - } - attrHref = defaultAttributeTable.lookup("href"); - attrSrc = defaultAttributeTable.lookup("src"); - attrId = defaultAttributeTable.lookup("id"); - attrName = defaultAttributeTable.lookup("name"); - attrSummary = defaultAttributeTable.lookup("summary"); - attrAlt = defaultAttributeTable.lookup("alt"); - attrLongdesc = defaultAttributeTable.lookup("longdesc"); - attrUsemap = defaultAttributeTable.lookup("usemap"); - attrIsmap = defaultAttributeTable.lookup("ismap"); - attrLanguage = defaultAttributeTable.lookup("language"); - attrType = defaultAttributeTable.lookup("type"); - attrTitle = defaultAttributeTable.lookup("title"); - attrXmlns = defaultAttributeTable.lookup("xmlns"); - attrValue = defaultAttributeTable.lookup("value"); - attrContent = defaultAttributeTable.lookup("content"); - attrDatafld = defaultAttributeTable.lookup("datafld");; - attrWidth = defaultAttributeTable.lookup("width");; - attrHeight = defaultAttributeTable.lookup("height");; - - attrAlt.nowrap = true; - attrValue.nowrap = true; - attrContent.nowrap = true; - } - return defaultAttributeTable; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribs.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribs.java deleted file mode 100644 index 62af24f..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribs.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * @(#)CheckAttribs.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Check HTML attributes - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public interface CheckAttribs { - - public void check( Lexer lexer, Node node ); - -} - diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribsImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribsImpl.java deleted file mode 100644 index 11ccf04..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribsImpl.java +++ /dev/null @@ -1,403 +0,0 @@ -/* - * @(#)CheckAttribsImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Check HTML attributes implementation - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class CheckAttribsImpl { - - public static class CheckHTML implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - AttVal attval; - Attribute attribute; - - node.checkUniqueAttributes(lexer); - - for (attval = node.attributes; attval != null; attval = attval.next) - { - attribute = attval.checkAttribute(lexer, node ); - - if (attribute == AttributeTable.attrXmlns) - lexer.isvoyager = true; - } - } - - }; - - public static class CheckSCRIPT implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - Attribute attribute; - AttVal lang, type; - - node.checkUniqueAttributes(lexer); - - lang = node.getAttrByName("language"); - type = node.getAttrByName("type"); - - if (type == null) - { - Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); - - /* check for javascript */ - - if (lang != null) - { - String str = lang.value; - if (str.length() > 10) - str = str.substring(0, 10); - if ( (Lexer.wstrcasecmp(str, "javascript") == 0) || - (Lexer.wstrcasecmp(str, "jscript") == 0) ) - { - node.addAttribute("type", "text/javascript"); - } - } - else - node.addAttribute("type", "text/javascript"); - } - } - - }; - - public static class CheckTABLE implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - AttVal attval; - Attribute attribute; - boolean hasSummary = false; - - node.checkUniqueAttributes(lexer); - - for (attval = node.attributes; attval != null; attval = attval.next) - { - attribute = attval.checkAttribute(lexer, node); - - if (attribute == AttributeTable.attrSummary) - hasSummary = true; - } - - /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */ - if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32) - { - lexer.badAccess |= Report.MISSING_SUMMARY; - Report.attrError(lexer, node, "summary", Report.MISSING_ATTRIBUTE); - } - - /* convert to
*/ - if (lexer.configuration.XmlOut) - { - attval = node.getAttrByName("border"); - if (attval != null) - { - if (attval.value == null) - attval.value = "1"; - } - } - } - - }; - - public static class CheckCaption implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - AttVal attval; - String value = null; - - node.checkUniqueAttributes(lexer); - - for (attval = node.attributes; attval != null; attval = attval.next) - { - if ( Lexer.wstrcasecmp(attval.attribute, "align") == 0 ) - { - value = attval.value; - break; - } - } - - if (value != null) - { - if (Lexer.wstrcasecmp(value, "left") == 0 || Lexer.wstrcasecmp(value, "right") == 0) - lexer.versions &= (short)(Dict.VERS_HTML40_LOOSE|Dict.VERS_FRAMES); - else if (Lexer.wstrcasecmp(value, "top") == 0 || Lexer.wstrcasecmp(value, "bottom") == 0) - lexer.versions &= Dict.VERS_FROM32; - else - Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE); - } - } - - }; - - public static class CheckHR implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - if (node.getAttrByName("src") != null) - Report.attrError(lexer, node, "src", Report.PROPRIETARY_ATTR_VALUE); - } - }; - - public static class CheckIMG implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - AttVal attval; - Attribute attribute; - boolean hasAlt = false; - boolean hasSrc = false; - boolean hasUseMap = false; - boolean hasIsMap = false; - boolean hasDataFld = false; - - node.checkUniqueAttributes(lexer); - - for (attval = node.attributes; attval != null; attval = attval.next) - { - attribute = attval.checkAttribute( lexer, node ); - - if (attribute == AttributeTable.attrAlt) - hasAlt = true; - else if (attribute == AttributeTable.attrSrc) - hasSrc = true; - else if (attribute == AttributeTable.attrUsemap) - hasUseMap = true; - else if (attribute == AttributeTable.attrIsmap) - hasIsMap = true; - else if (attribute == AttributeTable.attrDatafld) - hasDataFld = true; - else if (attribute == AttributeTable.attrWidth || - attribute == AttributeTable.attrHeight) - lexer.versions &= ~Dict.VERS_HTML20; - } - - if (!hasAlt) - { - lexer.badAccess |= Report.MISSING_IMAGE_ALT; - Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE); - if (lexer.configuration.altText != null) - node.addAttribute("alt", lexer.configuration.altText); - } - - if (!hasSrc && !hasDataFld) - Report.attrError(lexer, node, "src", Report.MISSING_ATTRIBUTE); - - if (hasIsMap && !hasUseMap) - Report.attrError(lexer, node, "ismap", Report.MISSING_IMAGEMAP); - } - - }; - - public static class CheckAREA implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - AttVal attval; - Attribute attribute; - boolean hasAlt = false; - boolean hasHref = false; - - node.checkUniqueAttributes(lexer); - - for (attval = node.attributes; attval != null; attval = attval.next) - { - attribute = attval.checkAttribute( lexer, node ); - - if (attribute == AttributeTable.attrAlt) - hasAlt = true; - else if (attribute == AttributeTable.attrHref) - hasHref = true; - } - - if (!hasAlt) - { - lexer.badAccess |= Report.MISSING_LINK_ALT; - Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE); - } - if (!hasHref) - Report.attrError(lexer, node, "href", Report.MISSING_ATTRIBUTE); - } - - }; - - public static class CheckAnchor implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - node.checkUniqueAttributes(lexer); - - lexer.fixId(node); - } - }; - - public static class CheckMap implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - node.checkUniqueAttributes(lexer); - - lexer.fixId(node); - } - } - - public static class CheckSTYLE implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - AttVal type = node.getAttrByName("type"); - - node.checkUniqueAttributes(lexer); - - if (type == null) - { - Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); - - node.addAttribute("type", "text/css"); - } - } - } - - public static class CheckTableCell implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - node.checkUniqueAttributes(lexer); - - /* - HTML4 strict doesn't allow mixed content for - elements with %block; as their content model - */ - if (node.getAttrByName("width") != null || node.getAttrByName("height") != null) - lexer.versions &= ~Dict.VERS_HTML40_STRICT; - } - } - - /* add missing type attribute when appropriate */ - public static class CheckLINK implements CheckAttribs { - - public void check( Lexer lexer, Node node ) - { - AttVal rel = node.getAttrByName("rel"); - - node.checkUniqueAttributes(lexer); - - if (rel != null && rel.value != null && - rel.value.equals("stylesheet")) - { - AttVal type = node.getAttrByName("type"); - - if (type == null) - { - Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); - - node.addAttribute("type", "text/css"); - } - } - } - } - - public static CheckAttribs getCheckHTML() - { - return _checkHTML; - } - - public static CheckAttribs getCheckSCRIPT() - { - return _checkSCRIPT; - } - - public static CheckAttribs getCheckTABLE() - { - return _checkTABLE; - } - - public static CheckAttribs getCheckCaption() - { - return _checkCaption; - } - - public static CheckAttribs getCheckIMG() - { - return _checkIMG; - } - - public static CheckAttribs getCheckAREA() - { - return _checkAREA; - } - - public static CheckAttribs getCheckAnchor() - { - return _checkAnchor; - } - - public static CheckAttribs getCheckMap() - { - return _checkMap; - } - - public static CheckAttribs getCheckSTYLE() - { - return _checkStyle; - } - - public static CheckAttribs getCheckTableCell() - { - return _checkTableCell; - } - - public static CheckAttribs getCheckLINK() - { - return _checkLINK; - } - - public static CheckAttribs getCheckHR() - { - return _checkHR; - } - - - private static CheckAttribs _checkHTML = new CheckHTML(); - private static CheckAttribs _checkSCRIPT = new CheckSCRIPT(); - private static CheckAttribs _checkTABLE = new CheckTABLE(); - private static CheckAttribs _checkCaption = new CheckCaption(); - private static CheckAttribs _checkIMG = new CheckIMG(); - private static CheckAttribs _checkAREA = new CheckAREA(); - private static CheckAttribs _checkAnchor = new CheckAnchor(); - private static CheckAttribs _checkMap = new CheckMap(); - private static CheckAttribs _checkStyle = new CheckSTYLE(); - private static CheckAttribs _checkTableCell = new CheckTableCell(); - private static CheckAttribs _checkLINK = new CheckLINK(); - private static CheckAttribs _checkHR = new CheckHR(); - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Clean.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Clean.java deleted file mode 100644 index 458c84e..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Clean.java +++ /dev/null @@ -1,1779 +0,0 @@ -/* - * @(#)Clean.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Clean up misuse of presentation markup - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -/* - Filters from other formats such as Microsoft Word - often make excessive use of presentation markup such - as font tags, B, I, and the align attribute. By applying - a set of production rules, it is straight forward to - transform this to use CSS. - - Some rules replace some of the children of an element by - style properties on the element, e.g. - -

...

->

...

- - Such rules are applied to the element's content and then - to the element itself until none of the rules more apply. - Having applied all the rules to an element, it will have - a style attribute with one or more properties. - - Other rules strip the element they apply to, replacing - it by style properties on the contents, e.g. - -
  • ...

  • ->

    ... - - These rules are applied to an element before processing - its content and replace the current element by the first - element in the exposed content. - - After applying both sets of rules, you can replace the - style attribute by a class value and style rule in the - document head. To support this, an association of styles - and class names is built. - - A naive approach is to rely on string matching to test - when two property lists are the same. A better approach - would be to first sort the properties before matching. -*/ - -public class Clean { - - private int classNum = 1; - - private TagTable tt; - - public Clean(TagTable tt) - { - this.tt = tt; - } - - private StyleProp insertProperty(StyleProp props, String name, - String value) - { - StyleProp first, prev, prop; - int cmp; - - prev = null; - first = props; - - while (props != null) - { - cmp = props.name.compareTo(name); - - if (cmp == 0) - { - /* this property is already defined, ignore new value */ - return first; - } - - if (cmp > 0) // props.name > name - { - /* insert before this */ - - prop = new StyleProp(name, value, props); - - if (prev != null) - prev.next = prop; - else - first = prop; - - return first; - } - - prev = props; - props = props.next; - } - - prop = new StyleProp(name, value); - - if (prev != null) - prev.next = prop; - else - first = prop; - - return first; - } - - /* - Create sorted linked list of properties from style string - It temporarily places nulls in place of ':' and ';' to - delimit the strings for the property name and value. - Some systems don't allow you to null literal strings, - so to avoid this, a copy is made first. - */ - private StyleProp createProps(StyleProp prop, String style) - { - int name_end; - int value_end; - int value_start = 0; - int name_start = 0; - boolean more; - - name_start = 0; - while (name_start < style.length()) - { - while (name_start < style.length() && - style.charAt(name_start) == ' ') - ++name_start; - - name_end = name_start; - - while (name_end < style.length()) - { - if (style.charAt(name_end) == ':') - { - value_start = name_end + 1; - break; - } - - ++name_end; - } - - if (name_end >= style.length() || style.charAt(name_end) != ':') - break; - - while (value_start < style.length() && - style.charAt(value_start) == ' ') - ++value_start; - - value_end = value_start; - more = false; - - while (value_end < style.length()) - { - if (style.charAt(value_end) == ';') - { - more = true; - break; - } - - ++value_end; - } - - prop = insertProperty(prop, - style.substring(name_start, name_end), - style.substring(value_start, value_end)); - - if (more) - { - name_start = value_end + 1; - continue; - } - - break; - } - - return prop; - } - - private String createPropString(StyleProp props) - { - String style = ""; - int len; - StyleProp prop; - - /* compute length */ - - for (len = 0, prop = props; prop != null; prop = prop.next) - { - len += prop.name.length() + 2; - len += prop.value.length() + 2; - } - - for (prop = props; prop != null; prop = prop.next) - { - style = style.concat(prop.name); - style = style.concat(": "); - - style = style.concat(prop.value); - - if (prop.next == null) - break; - - style = style.concat("; "); - } - - return style; - } - - /* - create string with merged properties - */ - private String addProperty(String style, String property) - { - StyleProp prop; - - prop = createProps(null, style); - prop = createProps(prop, property); - style = createPropString(prop); - return style; - } - - private String gensymClass(String tag) - { - String str; - - str = "c" + classNum; - classNum++; - return str; - } - - private String findStyle(Lexer lexer, String tag, String properties) - { - Style style; - - for (style = lexer.styles; style != null; style=style.next) - { - if (style.tag.equals(tag) && - style.properties.equals(properties)) - return style.tagClass; - } - - style = new Style(tag, gensymClass(tag), properties, lexer.styles); - lexer.styles = style; - return style.tagClass; - } - - /* - Find style attribute in node, and replace it - by corresponding class attribute. Search for - class in style dictionary otherwise gensym - new class and add to dictionary. - - Assumes that node doesn't have a class attribute - */ - private void style2Rule(Lexer lexer, Node node) - { - AttVal styleattr, classattr; - String classname; - - styleattr = node.getAttrByName("style"); - - if (styleattr != null) - { - classname = findStyle(lexer, node.element, styleattr.value); - classattr = node.getAttrByName("class"); - - /* - if there already is a class attribute - then append class name after a space - */ - if (classattr != null) - { - classattr.value = classattr.value + " " + classname; - node.removeAttribute(styleattr); - } - else /* reuse style attribute for class attribute */ - { - styleattr.attribute = "class"; - styleattr.value = classname; - } - } - } - - private void addColorRule(Lexer lexer, String selector, String color) - { - if (color != null) - { - lexer.addStringLiteral(selector); - lexer.addStringLiteral(" { color: "); - lexer.addStringLiteral(color); - lexer.addStringLiteral(" }\n"); - } - } - - /* - move presentation attribs from body to style element - - background="foo" -> body { background-image: url(foo) } - bgcolor="foo" -> body { background-color: foo } - text="foo" -> body { color: foo } - link="foo" -> :link { color: foo } - vlink="foo" -> :visited { color: foo } - alink="foo" -> :active { color: foo } - */ - private void cleanBodyAttrs(Lexer lexer, Node body) - { - AttVal attr; - String bgurl = null; - String bgcolor = null; - String color = null; - - attr = body.getAttrByName("background"); - - if (attr != null) - { - bgurl = attr.value; - attr.value = null; - body.removeAttribute(attr); - } - - attr = body.getAttrByName("bgcolor"); - - if (attr != null) - { - bgcolor = attr.value; - attr.value = null; - body.removeAttribute(attr); - } - - attr = body.getAttrByName("text"); - - if (attr != null) - { - color = attr.value; - attr.value = null; - body.removeAttribute(attr); - } - - if (bgurl != null || bgcolor != null || color != null) - { - lexer.addStringLiteral(" body {\n"); - - if (bgurl != null) - { - lexer.addStringLiteral(" background-image: url("); - lexer.addStringLiteral(bgurl); - lexer.addStringLiteral(");\n"); - } - - if (bgcolor != null) - { - lexer.addStringLiteral(" background-color: "); - lexer.addStringLiteral(bgcolor); - lexer.addStringLiteral(";\n"); - } - - if (color != null) - { - lexer.addStringLiteral(" color: "); - lexer.addStringLiteral(color); - lexer.addStringLiteral(";\n"); - } - - lexer.addStringLiteral(" }\n"); - } - - attr = body.getAttrByName("link"); - - if (attr != null) - { - addColorRule(lexer, " :link", attr.value); - body.removeAttribute(attr); - } - - attr = body.getAttrByName("vlink"); - - if (attr != null) - { - addColorRule(lexer, " :visited", attr.value); - body.removeAttribute(attr); - } - - attr = body.getAttrByName("alink"); - - if (attr != null) - { - addColorRule(lexer, " :active", attr.value); - body.removeAttribute(attr); - } - } - - private boolean niceBody(Lexer lexer, Node doc) - { - Node body = doc.findBody(lexer.configuration.tt); - - if (body != null) - { - if ( - body.getAttrByName("background") != null || - body.getAttrByName("bgcolor") != null || - body.getAttrByName("text") != null || - body.getAttrByName("link") != null || - body.getAttrByName("vlink") != null || - body.getAttrByName("alink") != null - ) - { - lexer.badLayout |= Report.USING_BODY; - return false; - } - } - - return true; - } - - /* create style element using rules from dictionary */ - private void createStyleElement(Lexer lexer, Node doc) - { - Node node, head, body; - Style style; - AttVal av; - - if (lexer.styles == null && niceBody(lexer, doc)) - return; - - node = lexer.newNode(Node.StartTag, null, 0, 0, "style"); - node.implicit = true; - - /* insert type attribute */ - av = new AttVal(null, null, '"', "type", "text/css"); - av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); - node.attributes = av; - - body = doc.findBody(lexer.configuration.tt); - - lexer.txtstart = lexer.lexsize; - - if (body != null) - cleanBodyAttrs(lexer, body); - - for (style = lexer.styles; style != null; style = style.next) - { - lexer.addCharToLexer(' '); - lexer.addStringLiteral(style.tag); - lexer.addCharToLexer('.'); - lexer.addStringLiteral(style.tagClass); - lexer.addCharToLexer(' '); - lexer.addCharToLexer('{'); - lexer.addStringLiteral(style.properties); - lexer.addCharToLexer('}'); - lexer.addCharToLexer('\n'); - } - - lexer.txtend = lexer.lexsize; - - Node.insertNodeAtEnd(node, - lexer.newNode(Node.TextNode, - lexer.lexbuf, - lexer.txtstart, - lexer.txtend)); - - /* - now insert style element into document head - - doc is root node. search its children for html node - the head node should be first child of html node - */ - - head = doc.findHEAD(lexer.configuration.tt); - - if (head != null) - Node.insertNodeAtEnd(head, node); - } - - /* ensure bidirectional links are consistent */ - private void fixNodeLinks(Node node) - { - Node child; - - if (node.prev != null) - node.prev.next = node; - else - node.parent.content = node; - - if (node.next != null) - node.next.prev = node; - else - node.parent.last = node; - - for (child = node.content; child != null; child = child.next) - child.parent = node; - } - - /* - used to strip child of node when - the node has one and only one child - */ - private void stripOnlyChild(Node node) - { - Node child; - - child = node.content; - node.content = child.content; - node.last = child.last; - child.content = null; - - for (child = node.content; child != null; child = child.next) - child.parent = node; - } - - /* used to strip font start and end tags */ - private void discardContainer(Node element, MutableObject pnode) - { - Node node; - Node parent = element.parent; - - if (element.content != null) - { - element.last.next = element.next; - - if (element.next != null) - { - element.next.prev = element.last; - element.last.next = element.next; - } - else - parent.last = element.last; - - if (element.prev != null) - { - element.content.prev = element.prev; - element.prev.next = element.content; - } - else - parent.content = element.content; - - for (node = element.content; node != null; node = node.next) - node.parent = parent; - - pnode.setObject(element.content); - } - else - { - if (element.next != null) - element.next.prev = element.prev; - else - parent.last = element.prev; - - if (element.prev != null) - element.prev.next = element.next; - else - parent.content = element.next; - - pnode.setObject(element.next); - } - - element.next = null; - element.content = null; - } - - /* - Add style property to element, creating style - attribute as needed and adding ; delimiter - */ - private void addStyleProperty(Node node, String property) - { - AttVal av; - - for (av = node.attributes; av != null; av = av.next) - { - if (av.attribute.equals("style")) - break; - } - - /* if style attribute already exists then insert property */ - - if (av != null) - { - String s; - - s = addProperty(av.value, property); - av.value = s; - } - else /* else create new style attribute */ - { - av = new AttVal(node.attributes, null, '"', "style", property); - av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); - node.attributes = av; - } - } - - /* - Create new string that consists of the - combined style properties in s1 and s2 - - To merge property lists, we build a linked - list of property/values and insert properties - into the list in order, merging values for - the same property name. - */ - private String mergeProperties(String s1, String s2) - { - String s; - StyleProp prop; - - prop = createProps(null, s1); - prop = createProps(prop, s2); - s = createPropString(prop); - return s; - } - - private void mergeStyles(Node node, Node child) - { - AttVal av; - String s1, s2, style; - - for (s2 = null, av = child.attributes; av != null; av = av.next) - { - if (av.attribute.equals("style")) - { - s2 = av.value; - break; - } - } - - for (s1 = null, av = node.attributes; av != null; av = av.next) - { - if (av.attribute.equals("style")) - { - s1 = av.value; - break; - } - } - - if (s1 != null) - { - if (s2 != null) /* merge styles from both */ - { - style = mergeProperties(s1, s2); - av.value = style; - } - } - else if (s2 != null) /* copy style of child */ - { - av = new AttVal(node.attributes, null, '"', "style", s2); - av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); - node.attributes = av; - } - } - - private String fontSize2Name(String size) - { - /* - String[] sizes = - { - "50%", - "60%", - "80%", - null, - "120%", - "150%", - "200%" - }; - */ - - String[] sizes = - { - "60%", - "70%", - "80%", - null, - "120%", - "150%", - "200%" - }; - String buf; - - if (size.length() > 0 && - '0' <= size.charAt(0) && size.charAt(0) <= '6') - { - int n = size.charAt(0) - '0'; - return sizes[n]; - } - - if (size.length() > 0 && size.charAt(0) == '-') - { - if (size.length() > 1 && - '0' <= size.charAt(1) && size.charAt(1) <= '6') - { - int n = size.charAt(1) - '0'; - double x; - - for (x = 1.0; n > 0; --n) - x *= 0.8; - - x *= 100.0; - buf = "" + (int)x + "%"; - - return buf; - } - - return "smaller"; /*"70%"; */ - } - - if (size.length() > 1 && - '0' <= size.charAt(1) && size.charAt(1) <= '6') - { - int n = size.charAt(1) - '0'; - double x; - - for (x = 1.0; n > 0; --n) - x *= 1.2; - - x *= 100.0; - buf = "" + (int)x + "%"; - - return buf; - } - - return "larger"; /* "140%" */ - } - - private void addFontFace(Node node, String face) - { - addStyleProperty(node, "font-family: " + face); - } - - private void addFontSize(Node node, String size) - { - String value; - - if (size.equals("6") && node.tag == tt.tagP) - { - node.element = "h1"; - tt.findTag(node); - return; - } - - if (size.equals("5") && node.tag == tt.tagP) - { - node.element = "h2"; - tt.findTag(node); - return; - } - - if (size.equals("4") && node.tag == tt.tagP) - { - node.element = "h3"; - tt.findTag(node); - return; - } - - value = fontSize2Name(size); - - if (value != null) - { - addStyleProperty(node, "font-size: " + value); - } - } - - private void addFontColor(Node node, String color) - { - addStyleProperty(node, "color: " + color); - } - - private void addAlign(Node node, String align) - { - /* force alignment value to lower case */ - addStyleProperty(node, "text-align: " + align.toLowerCase()); - } - - /* - add style properties to node corresponding to - the font face, size and color attributes - */ - private void addFontStyles(Node node, AttVal av) - { - while (av != null) - { - if (av.attribute.equals("face")) - addFontFace(node, av.value); - else if (av.attribute.equals("size")) - addFontSize(node, av.value); - else if (av.attribute.equals("color")) - addFontColor(node, av.value); - - av = av.next; - } - } - - /* - Symptom:

    - Action:

    - */ - private void textAlign(Lexer lexer, Node node) - { - AttVal av, prev; - - prev = null; - - for (av = node.attributes; av != null; av = av.next) - { - if (av.attribute.equals("align")) - { - if (prev != null) - prev.next = av.next; - else - node.attributes = av.next; - - if (av.value != null) - { - addAlign(node, av.value); - } - - break; - } - - prev = av; - } - } - - /* - The clean up rules use the pnode argument to return the - next node when the orignal node has been deleted - */ - - /* - Symptom:

  • where
  • is only child - Action: coerce
  • to
    with indent. - */ - - private boolean dir2Div(Lexer lexer, Node node, MutableObject pnode) - { - Node child; - - if (node.tag == tt.tagDir || - node.tag == tt.tagUl || - node.tag == tt.tagOl) - { - child = node.content; - - if (child == null) - return false; - - /* check child has no peers */ - - if (child.next != null) - return false; - - if (child.tag != tt.tagLi) - return false; - - if (!child.implicit) - return false; - - /* coerce dir to div */ - - node.tag = tt.tagDiv; - node.element = "div"; - addStyleProperty(node, "margin-left: 2em"); - stripOnlyChild(node); - return true; - -//#if 0 - //Node content; - //Node last; - //content = child.content; - //last = child.last; - //child.content = null; - - /* adjust parent and set margin on contents of
  • */ - - //for (child = content; child != null; child = child.next) - //{ - // child.parent = node.parent; - // addStyleProperty(child, "margin-left: 1em"); - //} - - /* hook first/last into sequence */ - - //if (content != null) - //{ - // content.prev = node.prev; - // last.next = node.next; - // fixNodeLinks(content); - // fixNodeLinks(last); - //} - - //node.next = null; - - /* ensure that new node is cleaned */ - //pnode.setObject(cleanNode(lexer, content)); - //return true; -//#endif - } - - return false; - } - - /* - Symptom:
    - Action: replace
    by
    - */ - - private boolean center2Div(Lexer lexer, Node node, MutableObject pnode) - { - if (node.tag == tt.tagCenter) - { - if (lexer.configuration.DropFontTags) - { - if (node.content != null) - { - Node last = node.last; - Node parent = node.parent; - - discardContainer(node, pnode); - - node = lexer.inferredTag("br"); - - if (last.next != null) - last.next.prev = node; - - node.next = last.next; - last.next = node; - node.prev = last; - - if (parent.last == last) - parent.last = node; - - node.parent = parent; - } - else - { - Node prev = node.prev; - Node next = node.next; - Node parent = node.parent; - discardContainer(node, pnode); - - node = lexer.inferredTag("br"); - node.next = next; - node.prev = prev; - node.parent = parent; - - if (next != null) - next.prev = node; - else - parent.last = node; - - if (prev != null) - prev.next = node; - else - parent.content = node; - } - - return true; - } - node.tag = tt.tagDiv; - node.element = "div"; - addStyleProperty(node, "text-align: center"); - return true; - } - - return false; - } - - /* - Symptom
    ...
    - Action: merge the two divs - - This is useful after nested s used by Word - for indenting have been converted to
    s - */ - private boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode) - { - Node child; - - if (node.tag != tt.tagDiv) - return false; - - child = node.content; - - if (child == null) - return false; - - if (child.tag != tt.tagDiv) - return false; - - if (child.next != null) - return false; - - mergeStyles(node, child); - stripOnlyChild(node); - return true; - } - - /* - Symptom:
      • ...
    - Action: discard outer list - */ - - private boolean nestedList(Lexer lexer, Node node, MutableObject pnode) - { - Node child, list; - - if (node.tag == tt.tagUl || node.tag == tt.tagOl) - { - child = node.content; - - if (child == null) - return false; - - /* check child has no peers */ - - if (child.next != null) - return false; - - list = child.content; - - if (list == null) - return false; - - if (list.tag != node.tag) - return false; - - pnode.setObject(node.next); - - /* move inner list node into position of outer node */ - list.prev = node.prev; - list.next = node.next; - list.parent = node.parent; - fixNodeLinks(list); - - /* get rid of outer ul and its li */ - child.content = null; - node.content = null; - node.next = null; - - /* - If prev node was a list the chances are this node - should be appended to that list. Word has no way of - recognizing nested lists and just uses indents - */ - - if (list.prev != null) - { - node = list; - list = node.prev; - - if (list.tag == tt.tagUl || list.tag == tt.tagOl) - { - list.next = node.next; - - if (list.next != null) - list.next.prev = list; - - child = list.last; /*
  • */ - - node.parent = child; - node.next = null; - node.prev = child.last; - fixNodeLinks(node); - } - } - - cleanNode(lexer, node); - return true; - } - - return false; - } - - /* - Symptom: the only child of a block-level element is a - presentation element such as B, I or FONT - - Action: add style "font-weight: bold" to the block and - strip the element, leaving its children. - - example: - -

    - Draft Recommended Practice -

    - - becomes: - -

    - Draft Recommended Practice -

    - - This code also replaces the align attribute by a style attribute. - However, to avoid CSS problems with Navigator 4, this isn't done - for the elements: caption, tr and table - */ - private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode) - { - Node child; - - if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0) - { - if (node.tag != tt.tagTable - && node.tag != tt.tagTr - && node.tag != tt.tagLi) - { - /* check for align attribute */ - if (node.tag != tt.tagCaption) - textAlign(lexer, node); - - child = node.content; - - if (child == null) - return false; - - /* check child has no peers */ - - if (child.next != null) - return false; - - if (child.tag == tt.tagB) - { - mergeStyles(node, child); - addStyleProperty(node, "font-weight: bold"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagI) - { - mergeStyles(node, child); - addStyleProperty(node, "font-style: italic"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagFont) - { - mergeStyles(node, child); - addFontStyles(node, child.attributes); - stripOnlyChild(node); - return true; - } - } - } - - return false; - } - - /* the only child of table cell or an inline element such as em */ - private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode) - { - Node child; - - if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE|Dict.CM_ROW)) != 0) - { - child = node.content; - - if (child == null) - return false; - - /* check child has no peers */ - - if (child.next != null) - return false; - - if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis) - { - mergeStyles(node, child); - addStyleProperty(node, "font-weight: bold"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis) - { - mergeStyles(node, child); - addStyleProperty(node, "font-style: italic"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagFont) - { - mergeStyles(node, child); - addFontStyles(node, child.attributes); - stripOnlyChild(node); - return true; - } - } - - return false; - } - - /* - Replace font elements by span elements, deleting - the font element's attributes and replacing them - by a single style attribute. - */ - private boolean font2Span(Lexer lexer, Node node, MutableObject pnode) - { - AttVal av, style, next; - - if (node.tag == tt.tagFont) - { - if (lexer.configuration.DropFontTags) - { - discardContainer(node, pnode); - return false; - } - - /* if FONT is only child of parent element then leave alone */ - if (node.parent.content == node - && node.next == null) - return false; - - addFontStyles(node, node.attributes); - - /* extract style attribute and free the rest */ - av = node.attributes; - style = null; - - while (av != null) - { - next = av.next; - - if (av.attribute.equals("style")) - { - av.next = null; - style = av; - } - - av = next; - } - - node.attributes = style; - - node.tag = tt.tagSpan; - node.element = "span"; - - return true; - } - - return false; - } - - /* - Applies all matching rules to a node. - */ - private Node cleanNode(Lexer lexer, Node node) - { - Node next = null; - MutableObject o = new MutableObject(); - boolean b = false; - - for (next = node; node.isElement(); node = next) - { - o.setObject(next); - - b = dir2Div(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = nestedList(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = center2Div(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = mergeDivs(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = blockStyle(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = inlineStyle(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = font2Span(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - break; - } - - return next; - } - - private Node createStyleProperties(Lexer lexer, Node node) - { - Node child; - - if (node.content != null) - { - for (child = node.content; child != null; child = child.next) - { - child = createStyleProperties(lexer, child); - } - } - - return cleanNode(lexer, node); - } - - private void defineStyleRules(Lexer lexer, Node node) - { - Node child; - - if (node.content != null) - { - for (child = node.content; - child != null; child = child.next) - { - defineStyleRules(lexer, child); - } - } - - style2Rule(lexer, node); - } - - public void cleanTree(Lexer lexer, Node doc) - { - doc = createStyleProperties(lexer, doc); - - if (!lexer.configuration.MakeClean) - { - defineStyleRules(lexer, doc); - createStyleElement(lexer, doc); - } - } - - /* simplifies ... ... etc. */ - public void nestedEmphasis(Node node) - { - MutableObject o = new MutableObject(); - Node next; - - while (node != null) - { - next = node.next; - - if ((node.tag == tt.tagB || node.tag == tt.tagI) - && node.parent != null && node.parent.tag == node.tag) - { - /* strip redundant inner element */ - o.setObject(next); - discardContainer(node, o); - next = (Node)o.getObject(); - node = next; - continue; - } - - if (node.content != null) - nestedEmphasis(node.content); - - node = next; - } - } - - /* replace i by em and b by strong */ - public void emFromI(Node node) - { - while (node != null) - { - if (node.tag == tt.tagI) - { - node.element = tt.tagEm.name; - node.tag = tt.tagEm; - } - else if (node.tag == tt.tagB) - { - node.element = tt.tagStrong.name; - node.tag = tt.tagStrong; - } - - if (node.content != null) - emFromI(node.content); - - node = node.next; - } - } - - /* - Some people use dir or ul without an li - to indent the content. The pattern to - look for is a list with a single implicit - li. This is recursively replaced by an - implicit blockquote. - */ - public void list2BQ(Node node) - { - while (node != null) - { - if (node.content != null) - list2BQ(node.content); - - if (node.tag != null && node.tag.parser == ParserImpl.getParseList() && - node.hasOneChild() && node.content.implicit) - { - stripOnlyChild(node); - node.element = tt.tagBlockquote.name; - node.tag = tt.tagBlockquote; - node.implicit = true; - } - - node = node.next; - } - } - - /* - Replace implicit blockquote by div with an indent - taking care to reduce nested blockquotes to a single - div with the indent set to match the nesting depth - */ - public void bQ2Div(Node node) - { - int indent; - String indent_buf; - - while (node != null) - { - if (node.tag == tt.tagBlockquote && node.implicit) - { - indent = 1; - - while(node.hasOneChild() && - node.content.tag == tt.tagBlockquote && - node.implicit) - { - ++indent; - stripOnlyChild(node); - } - - if (node.content != null) - bQ2Div(node.content); - - indent_buf = "margin-left: " + - (new Integer(2*indent)).toString() + "em"; - - node.element = tt.tagDiv.name; - node.tag = tt.tagDiv; - node.addAttribute("style", indent_buf); - } - else if (node.content != null) - bQ2Div(node.content); - - - node = node.next; - } - } - - /* node is prune up to */ - public Node pruneSection(Lexer lexer, Node node) - { - for (;;) - { - /* discard node and returns next */ - node = Node.discardElement(node); - - if (node == null) - return null; - - if (node.type == Node.SectionTag) - { - if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) - { - node = pruneSection(lexer, node); - continue; - } - - if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif")) - { - node = Node.discardElement(node); - break; - } - } - } - - return node; - } - - public void dropSections(Lexer lexer, Node node) - { - while (node != null) - { - if (node.type == Node.SectionTag) - { - /* prune up to matching endif */ - if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) - { - node = pruneSection(lexer, node); - continue; - } - - /* discard others as well */ - node = Node.discardElement(node); - continue; - } - - if (node.content != null) - dropSections(lexer, node.content); - - node = node.next; - } - } - - public void purgeAttributes(Node node) - { - AttVal attr = node.attributes; - AttVal next = null; - AttVal prev = null; - - while (attr != null) - { - next = attr.next; - - /* special check for class="Code" denoting pre text */ - if (attr.attribute != null && - attr.value != null && - attr.attribute.equals("class") && - attr.value.equals("Code")) - { - prev = attr; - } - else if (attr.attribute != null && - (attr.attribute.equals("class") || - attr.attribute.equals("style") || - attr.attribute.equals("lang") || - attr.attribute.startsWith("x:") || - ((attr.attribute.equals("height") || attr.attribute.equals("width")) && - (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh)))) - { - if (prev != null) - prev.next = next; - else - node.attributes = next; - - } - else - prev = attr; - - attr = next; - } - } - - /* Word2000 uses span excessively, so we strip span out */ - public Node stripSpan(Lexer lexer, Node span) - { - Node node; - Node prev = null; - Node content; - - /* - deal with span elements that have content - by splicing the content in place of the span - after having processed it - */ - - cleanWord2000(lexer, span.content); - content = span.content; - - if (span.prev != null) - prev = span.prev; - else if (content != null) - { - node = content; - content = content.next; - Node.removeNode(node); - Node.insertNodeBeforeElement(span, node); - prev = node; - } - - while (content != null) - { - node = content; - content = content.next; - Node.removeNode(node); - Node.insertNodeAfterElement(prev, node); - prev = node; - } - - if (span.next == null) - span.parent.last = prev; - - node = span.next; - span.content = null; - Node.discardElement(span); - return node; - } - - /* map non-breaking spaces to regular spaces */ - private void normalizeSpaces(Lexer lexer, Node node) - { - while (node != null) - { - if (node.content != null) - normalizeSpaces(lexer, node.content); - - if (node.type == Node.TextNode) - { - int i; - MutableInteger c = new MutableInteger(); - int p = node.start; - - for (i = node.start; i < node.end; ++i) - { - c.value = (int)node.textarray[i]; - - /* look for UTF-8 multibyte character */ - if (c.value > 0x7F) - i += PPrint.getUTF8(node.textarray, i, c); - - if (c.value == 160) - c.value = ' '; - - p = PPrint.putUTF8(node.textarray, p, c.value); - } - } - - node = node.next; - } - } - - /* - This is a major clean up to strip out all the extra stuff you get - when you save as web page from Word 2000. It doesn't yet know what - to do with VML tags, but these will appear as errors unless you - declare them as new tags, such as o:p which needs to be declared - as inline. - */ - public void cleanWord2000(Lexer lexer, Node node) - { - /* used to a list from a sequence of bulletted p's */ - Node list = null; - - while (node != null) - { - /* discard Word's style verbiage */ - if (node.tag == tt.tagStyle || - node.tag == tt.tagMeta || - node.type == Node.CommentTag) - { - node = Node.discardElement(node); - continue; - } - - /* strip out all span tags Word scatters so liberally! */ - if (node.tag == tt.tagSpan) - { - node = stripSpan(lexer, node); - continue; - } - - /* get rid of Word's xmlns attributes */ - if (node.tag == tt.tagHtml) - { - /* check that it's a Word 2000 document */ - if (node.getAttrByName("xmlns:o") == null) - return; - } - - if (node.tag == tt.tagLink) - { - AttVal attr = node.getAttrByName("rel"); - - if (attr != null && attr.value != null && - attr.value.equals("File-List")) - { - node = Node.discardElement(node); - continue; - } - } - - /* discard empty paragraphs */ - if (node.content == null && node.tag == tt.tagP) - { - node = Node.discardElement(node); - continue; - } - - if (node.tag == tt.tagP) - { - AttVal attr = node.getAttrByName("class"); - - /* map sequence of

    to

      ...
    */ - if (attr != null && attr.value != null && - attr.value.equals("MsoListBullet")) - { - Node.coerceNode(lexer, node, tt.tagLi); - - if (list == null || list.tag != tt.tagUl) - { - list = lexer.inferredTag("ul"); - Node.insertNodeBeforeElement(node, list); - } - - purgeAttributes(node); - - if (node.content != null) - cleanWord2000(lexer, node.content); - - /* remove node and append to contents of list */ - Node.removeNode(node); - Node.insertNodeAtEnd(list, node); - node = list.next; - } - /* map sequence of

    to

    ...
    */ - else if (attr != null && attr.value != null && - attr.value.equals("Code")) - { - Node br = lexer.newLineNode(); - normalizeSpaces(lexer, node); - - if (list == null || list.tag != tt.tagPre) - { - list = lexer.inferredTag("pre"); - Node.insertNodeBeforeElement(node, list); - } - - /* remove node and append to contents of list */ - Node.removeNode(node); - Node.insertNodeAtEnd(list, node); - stripSpan(lexer, node); - Node.insertNodeAtEnd(list, br); - node = list.next; - } - else - list = null; - } - else - list = null; - - /* strip out style and class attributes */ - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - purgeAttributes(node); - - if (node.content != null) - cleanWord2000(lexer, node.content); - - node = node.next; - } - } - - public boolean isWord2000(Node root, TagTable tt) - { - Node html = root.findHTML(tt); - - return (html != null && html.getAttrByName("xmlns:o") != null); - } -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java deleted file mode 100644 index b2d8dfd..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java +++ /dev/null @@ -1,600 +0,0 @@ -/* - * @(#)Configuration.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Read configuration file and manage configuration properties. - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -/* - Configuration files associate a property name with a value. - The format is that of a Java .properties file. -*/ - -import java.util.Enumeration; -import java.util.Properties; -import java.util.StringTokenizer; -import java.io.FileInputStream; -import java.io.IOException; - -public class Configuration implements java.io.Serializable { - - /* character encodings */ - public static final int RAW = 0; - public static final int ASCII = 1; - public static final int LATIN1 = 2; - public static final int UTF8 = 3; - public static final int ISO2022 = 4; - public static final int MACROMAN = 5; - - /* mode controlling treatment of doctype */ - public static final int DOCTYPE_OMIT = 0; - public static final int DOCTYPE_AUTO = 1; - public static final int DOCTYPE_STRICT= 2; - public static final int DOCTYPE_LOOSE = 3; - public static final int DOCTYPE_USER = 4; - - protected int spaces = 2; /* default indentation */ - protected int wraplen = 68; /* default wrap margin */ - protected int CharEncoding = ASCII; - protected int tabsize = 4; - - protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */ - protected String altText = null; /* default text for alt attribute */ - protected String slidestyle = null; /* style sheet for slides */ - protected String docTypeStr = null; /* user specified doctype */ - protected String errfile = null; /* file name to write errors to */ - protected boolean writeback = false; /* if true then output tidied markup */ - - protected boolean OnlyErrors = false; /* if true normal output is suppressed */ - protected boolean ShowWarnings = true; /* however errors are always shown */ - protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */ - protected boolean IndentContent = false; /* indent content of appropriate tags */ - protected boolean SmartIndent = false; /* does text/block level content effect indentation */ - protected boolean HideEndTags = false; /* suppress optional end tags */ - protected boolean XmlTags = false; /* treat input as XML */ - protected boolean XmlOut = false; /* create output as XML */ - protected boolean xHTML = false; /* output extensible HTML */ - protected boolean XmlPi = false; /* add for XML docs */ - protected boolean RawOut = false; /* avoid mapping values > 127 to entities */ - protected boolean UpperCaseTags = false; /* output tags in upper not lower case */ - protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */ - protected boolean MakeClean = false; /* remove presentational clutter */ - protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */ - protected boolean DropFontTags = false; /* discard presentation tags */ - protected boolean DropEmptyParas = true; /* discard empty p elements */ - protected boolean FixComments = true; /* fix comments with adjacent hyphens */ - protected boolean BreakBeforeBR = false; /* o/p newline before
    or not? */ - protected boolean BurstSlides = false; /* create slides on each h2 element */ - protected boolean NumEntities = false; /* use numeric entities */ - protected boolean QuoteMarks = false; /* output " marks as " */ - protected boolean QuoteNbsp = true; /* output non-breaking space as entity */ - protected boolean QuoteAmpersand = true; /* output naked ampersand as & */ - protected boolean WrapAttVals = false; /* wrap within attribute values */ - protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */ - protected boolean WrapSection = true; /* wrap within section tags */ - protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */ - protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */ - protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */ - protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */ - protected boolean IndentAttributes = false; /* newline+indent before each attribute */ - protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */ - protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */ - protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in

    's */ - protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in

    's */ - protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */ - protected boolean Word2000 = false; /* draconian cleaning for Word2000 */ - protected boolean TidyMark = true; /* add meta element indicating tidied doc */ - protected boolean Emacs = false; /* if true format error output for GNU Emacs */ - protected boolean LiteralAttribs = false; /* if true attributes may use newlines */ - - protected TagTable tt; /* TagTable associated with this Configuration */ - - private transient Properties _properties = new Properties(); - - public Configuration() - { - } - - public void addProps( Properties p ) - { - Enumeration enum = p.propertyNames(); - while (enum.hasMoreElements()) - { - String key = (String) enum.nextElement(); - String value = p.getProperty(key); - _properties.put(key, value); - } - parseProps(); - } - - public void parseFile( String filename ) - { - try - { - _properties.load( new FileInputStream( filename ) ); - } - catch (IOException e) - { - System.err.println(filename + e.toString()); - return; - } - parseProps(); - } - - private void parseProps() - { - String value; - - value = _properties.getProperty("indent-spaces"); - if (value != null) - spaces = parseInt(value, "indent-spaces"); - - value = _properties.getProperty("wrap"); - if (value != null) - wraplen = parseInt(value, "wrap"); - - value = _properties.getProperty("wrap-attributes"); - if (value != null) - WrapAttVals = parseBool(value, "wrap-attributes"); - - value = _properties.getProperty("wrap-script-literals"); - if (value != null) - WrapScriptlets = parseBool(value, "wrap-script-literals"); - - value = _properties.getProperty("wrap-sections"); - if (value != null) - WrapSection = parseBool(value, "wrap-sections"); - - value = _properties.getProperty("wrap-asp"); - if (value != null) - WrapAsp = parseBool(value, "wrap-asp"); - - value = _properties.getProperty("wrap-jste"); - if (value != null) - WrapJste = parseBool(value, "wrap-jste"); - - value = _properties.getProperty("wrap-php"); - if (value != null) - WrapPhp = parseBool(value, "wrap-php"); - - value = _properties.getProperty("literal-attributes"); - if (value != null) - LiteralAttribs = parseBool(value, "literal-attributes"); - - value = _properties.getProperty("tab-size"); - if (value != null) - tabsize = parseInt(value, "tab-size"); - - value = _properties.getProperty("markup"); - if (value != null) - OnlyErrors = parseInvBool(value, "markup"); - - value = _properties.getProperty("quiet"); - if (value != null) - Quiet = parseBool(value, "quiet"); - - value = _properties.getProperty("tidy-mark"); - if (value != null) - TidyMark = parseBool(value, "tidy-mark"); - - value = _properties.getProperty("indent"); - if (value != null) - IndentContent = parseIndent(value, "indent"); - - value = _properties.getProperty("indent-attributes"); - if (value != null) - IndentAttributes = parseBool(value, "ident-attributes"); - - value = _properties.getProperty("hide-endtags"); - if (value != null) - HideEndTags = parseBool(value, "hide-endtags"); - - value = _properties.getProperty("input-xml"); - if (value != null) - XmlTags = parseBool(value, "input-xml"); - - value = _properties.getProperty("output-xml"); - if (value != null) - XmlOut = parseBool(value, "output-xml"); - - value = _properties.getProperty("output-xhtml"); - if (value != null) - xHTML = parseBool(value, "output-xhtml"); - - value = _properties.getProperty("add-xml-pi"); - if (value != null) - XmlPi = parseBool(value, "add-xml-pi"); - - value = _properties.getProperty("add-xml-decl"); - if (value != null) - XmlPi = parseBool(value, "add-xml-decl"); - - value = _properties.getProperty("assume-xml-procins"); - if (value != null) - XmlPIs = parseBool(value, "assume-xml-procins"); - - value = _properties.getProperty("raw"); - if (value != null) - RawOut = parseBool(value, "raw"); - - value = _properties.getProperty("uppercase-tags"); - if (value != null) - UpperCaseTags = parseBool(value, "uppercase-tags"); - - value = _properties.getProperty("uppercase-attributes"); - if (value != null) - UpperCaseAttrs = parseBool(value, "uppercase-attributes"); - - value = _properties.getProperty("clean"); - if (value != null) - MakeClean = parseBool(value, "clean"); - - value = _properties.getProperty("logical-emphasis"); - if (value != null) - LogicalEmphasis = parseBool(value, "logical-emphasis"); - - value = _properties.getProperty("word-2000"); - if (value != null) - Word2000 = parseBool(value, "word-2000"); - - value = _properties.getProperty("drop-empty-paras"); - if (value != null) - DropEmptyParas = parseBool(value, "drop-empty-paras"); - - value = _properties.getProperty("drop-font-tags"); - if (value != null) - DropFontTags = parseBool(value, "drop-font-tags"); - - value = _properties.getProperty("enclose-text"); - if (value != null) - EncloseBodyText = parseBool(value, "enclose-text"); - - value = _properties.getProperty("enclose-block-text"); - if (value != null) - EncloseBlockText = parseBool(value, "enclose-block-text"); - - value = _properties.getProperty("alt-text"); - if (value != null) - altText = value; - - value = _properties.getProperty("add-xml-space"); - if (value != null) - XmlSpace = parseBool(value, "add-xml-space"); - - value = _properties.getProperty("fix-bad-comments"); - if (value != null) - FixComments = parseBool(value, "fix-bad-comments"); - - value = _properties.getProperty("split"); - if (value != null) - BurstSlides = parseBool(value, "split"); - - value = _properties.getProperty("break-before-br"); - if (value != null) - BreakBeforeBR = parseBool(value, "break-before-br"); - - value = _properties.getProperty("numeric-entities"); - if (value != null) - NumEntities = parseBool(value, "numeric-entities"); - - value = _properties.getProperty("quote-marks"); - if (value != null) - QuoteMarks = parseBool(value, "quote-marks"); - - value = _properties.getProperty("quote-nbsp"); - if (value != null) - QuoteNbsp = parseBool(value, "quote-nbsp"); - - value = _properties.getProperty("quote-ampersand"); - if (value != null) - QuoteAmpersand = parseBool(value, "quote-ampersand"); - - value = _properties.getProperty("write-back"); - if (value != null) - writeback = parseBool(value, "write-back"); - - value = _properties.getProperty("keep-time"); - if (value != null) - KeepFileTimes = parseBool(value, "keep-time"); - - value = _properties.getProperty("show-warnings"); - if (value != null) - ShowWarnings = parseBool(value, "show-warnings"); - - value = _properties.getProperty("error-file"); - if (value != null) - errfile = parseName(value, "error-file"); - - value = _properties.getProperty("slide-style"); - if (value != null) - slidestyle = parseName(value, "slide-style"); - - value = _properties.getProperty("new-inline-tags"); - if (value != null) - parseInlineTagNames(value, "new-inline-tags"); - - value = _properties.getProperty("new-blocklevel-tags"); - if (value != null) - parseBlockTagNames(value, "new-blocklevel-tags"); - - value = _properties.getProperty("new-empty-tags"); - if (value != null) - parseEmptyTagNames(value, "new-empty-tags"); - - value = _properties.getProperty("new-pre-tags"); - if (value != null) - parsePreTagNames(value, "new-pre-tags"); - - value = _properties.getProperty("char-encoding"); - if (value != null) - CharEncoding = parseCharEncoding(value, "char-encoding"); - - value = _properties.getProperty("doctype"); - if (value != null) - docTypeStr = parseDocType(value, "doctype"); - - value = _properties.getProperty("fix-backslash"); - if (value != null) - FixBackslash = parseBool(value, "fix-backslash"); - - value = _properties.getProperty("gnu-emacs"); - if (value != null) - Emacs = parseBool(value, "gnu-emacs"); - } - - /* ensure that config is self consistent */ - public void adjust() - { - if (EncloseBlockText) - EncloseBodyText = true; - - /* avoid the need to set IndentContent when SmartIndent is set */ - - if (SmartIndent) - IndentContent = true; - - /* disable wrapping */ - if (wraplen == 0) - wraplen = 0x7FFFFFFF; - - /* Word 2000 needs o:p to be declared as inline */ - if (Word2000) - { - tt.defineInlineTag("o:p"); - } - - /* XHTML is written in lower case */ - if (xHTML) - { - XmlOut = true; - UpperCaseTags = false; - UpperCaseAttrs = false; - } - - /* if XML in, then XML out */ - if (XmlTags) - { - XmlOut = true; - XmlPIs = true; - } - - /* XML requires end tags */ - if (XmlOut) - { - QuoteAmpersand = true; - HideEndTags = false; - } - } - - private static int parseInt( String s, String option ) - { - int i = 0; - try { - i = Integer.parseInt( s ); - } - catch ( NumberFormatException e ) { - Report.badArgument(option); - i = -1; - } - return i; - } - - private static boolean parseBool( String s, String option ) - { - boolean b = false; - if ( s != null && s.length() > 0 ) { - char c = s.charAt(0); - if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1')) - b = true; - else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0')) - b = false; - else - Report.badArgument(option); - } - return b; - } - - private static boolean parseInvBool( String s, String option ) - { - boolean b = false; - if ( s != null && s.length() > 0 ) { - char c = s.charAt(0); - if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y')) - b = true; - else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n')) - b = false; - else - Report.badArgument(option); - } - return !b; - } - - private static String parseName( String s, String option ) - { - StringTokenizer t = new StringTokenizer( s ); - String rs = null; - if ( t.countTokens() >= 1 ) - rs = t.nextToken(); - else - Report.badArgument(option); - return rs; - } - - private static int parseCharEncoding( String s, String option ) - { - int result = ASCII; - - if (Lexer.wstrcasecmp(s, "ascii") == 0) - result = ASCII; - else if (Lexer.wstrcasecmp(s, "latin1") == 0) - result = LATIN1; - else if (Lexer.wstrcasecmp(s, "raw") == 0) - result = RAW; - else if (Lexer.wstrcasecmp(s, "utf8") == 0) - result = UTF8; - else if (Lexer.wstrcasecmp(s, "iso2022") == 0) - result = ISO2022; - else if (Lexer.wstrcasecmp(s, "mac") == 0) - result = MACROMAN; - else - Report.badArgument(option); - - return result; - } - - /* slight hack to avoid changes to pprint.c */ - private boolean parseIndent( String s, String option ) - { - boolean b = IndentContent; - - if (Lexer.wstrcasecmp(s, "yes") == 0) - { - b = true; - SmartIndent = false; - } - else if (Lexer.wstrcasecmp(s, "true") == 0) - { - b = true; - SmartIndent = false; - } - else if (Lexer.wstrcasecmp(s, "no") == 0) - { - b = false; - SmartIndent = false; - } - else if (Lexer.wstrcasecmp(s, "false") == 0) - { - b = false; - SmartIndent = false; - } - else if (Lexer.wstrcasecmp(s, "auto") == 0) - { - b = true; - SmartIndent = true; - } - else - Report.badArgument(option); - return b; - } - - private void parseInlineTagNames( String s, String option ) - { - StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); - while ( t.hasMoreTokens() ) { - tt.defineInlineTag( t.nextToken() ); - } - } - - private void parseBlockTagNames( String s, String option ) - { - StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); - while ( t.hasMoreTokens() ) { - tt.defineBlockTag( t.nextToken() ); - } - } - - private void parseEmptyTagNames( String s, String option ) - { - StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); - while ( t.hasMoreTokens() ) { - tt.defineEmptyTag( t.nextToken() ); - } - } - - private void parsePreTagNames( String s, String option ) - { - StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); - while ( t.hasMoreTokens() ) { - tt.definePreTag( t.nextToken() ); - } - } - - /* - doctype: omit | auto | strict | loose | - - where the fpi is a string similar to - - "-//ACME//DTD HTML 3.14159//EN" - */ - protected String parseDocType( String s, String option ) - { - s = s.trim(); - - /* "-//ACME//DTD HTML 3.14159//EN" or similar */ - - if (s.startsWith("\"")) - { - docTypeMode = DOCTYPE_USER; - return s; - } - - /* read first word */ - String word = ""; - StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); - if (t.hasMoreTokens()) - word = t.nextToken(); - - if (Lexer.wstrcasecmp(word, "omit") == 0) - docTypeMode = DOCTYPE_OMIT; - else if (Lexer.wstrcasecmp(word, "strict") == 0) - docTypeMode = DOCTYPE_STRICT; - else if (Lexer.wstrcasecmp(word, "loose") == 0 || - Lexer.wstrcasecmp(word, "transitional") == 0) - docTypeMode = DOCTYPE_LOOSE; - else if (Lexer.wstrcasecmp(word, "auto") == 0) - docTypeMode = DOCTYPE_AUTO; - else - { - docTypeMode = DOCTYPE_AUTO; - Report.badArgument(option); - } - return null; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java deleted file mode 100644 index ebc8386..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java +++ /dev/null @@ -1,190 +0,0 @@ -/* - * @(#)DOMAttrImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMAttrImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM Support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr { - - protected AttVal avAdaptee; - - protected DOMAttrImpl(AttVal adaptee) - { - super(null); // must override all methods of DOMNodeImpl - this.avAdaptee = adaptee; - } - - - /* --------------------- DOM ---------------------------- */ - - public String getNodeValue() throws DOMException - { - return getValue(); - } - - public void setNodeValue(String nodeValue) throws DOMException - { - setValue(nodeValue); - } - - public String getNodeName() - { - return getName(); - } - - public short getNodeType() - { - return org.w3c.dom.Node.ATTRIBUTE_NODE; - } - - public org.w3c.dom.Node getParentNode() - { - return null; - } - - public org.w3c.dom.NodeList getChildNodes() - { - // NOT SUPPORTED - return null; - } - - public org.w3c.dom.Node getFirstChild() - { - // NOT SUPPORTED - return null; - } - - public org.w3c.dom.Node getLastChild() - { - // NOT SUPPORTED - return null; - } - - public org.w3c.dom.Node getPreviousSibling() - { - return null; - } - - public org.w3c.dom.Node getNextSibling() - { - return null; - } - - public org.w3c.dom.NamedNodeMap getAttributes() - { - return null; - } - - public org.w3c.dom.Document getOwnerDocument() - { - return null; - } - - public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, - org.w3c.dom.Node refChild) - throws DOMException - { - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, - org.w3c.dom.Node oldChild) - throws DOMException - { - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) - throws DOMException - { - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) - throws DOMException - { - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - public boolean hasChildNodes() - { - return false; - } - - public org.w3c.dom.Node cloneNode(boolean deep) - { - return null; - } - - /** - * @see org.w3c.dom.Attr#getName - */ - public String getName() - { - return avAdaptee.attribute; - } - - /** - * @see org.w3c.dom.Attr#getSpecified - */ - public boolean getSpecified() - { - return true; - } - - /** - * Returns value of this attribute. If this attribute has a null value, - * then the attribute name is returned instead. - * Thanks to Brett Knights for this fix. - * @see org.w3c.dom.Attr#getValue - * - */ - public String getValue() - { - return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value ; - } - - /** - * @see org.w3c.dom.Attr#setValue - */ - public void setValue(String value) - { - avAdaptee.value = value; - } - - /** - * DOM2 - not implemented. - */ - public org.w3c.dom.Element getOwnerElement() { - return null; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java deleted file mode 100644 index 2b1ca08..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * @(#)DOMAttrMapImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMAttrMapImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap { - - private AttVal first = null; - - protected DOMAttrMapImpl(AttVal first) - { - this.first = first; - } - - /** - * @see org.w3c.dom.NamedNodeMap#getNamedItem - */ - public org.w3c.dom.Node getNamedItem(String name) - { - AttVal att = this.first; - while (att != null) { - if (att.attribute.equals(name)) break; - att = att.next; - } - if (att != null) - return att.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.NamedNodeMap#setNamedItem - */ - public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg) - throws DOMException - { - // NOT SUPPORTED - return null; - } - - /** - * @see org.w3c.dom.NamedNodeMap#removeNamedItem - */ - public org.w3c.dom.Node removeNamedItem(String name) - throws DOMException - { - // NOT SUPPORTED - return null; - } - - /** - * @see org.w3c.dom.NamedNodeMap#item - */ - public org.w3c.dom.Node item(int index) - { - int i = 0; - AttVal att = this.first; - while (att != null) { - if (i >= index) break; - i++; - att = att.next; - } - if (att != null) - return att.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.NamedNodeMap#getLength - */ - public int getLength() - { - int len = 0; - AttVal att = this.first; - while (att != null) { - len++; - att = att.next; - } - return len; - } - - /** - * DOM2 - not implemented. - */ - public org.w3c.dom.Node getNamedItemNS(String namespaceURI, - String localName) - { - return null; - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg) - throws org.w3c.dom.DOMException - { - return null; - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public org.w3c.dom.Node removeNamedItemNS(String namespaceURI, - String localName) - throws org.w3c.dom.DOMException - { - return null; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java deleted file mode 100644 index 5e150db..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * @(#)DOMCDATASectionImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMCDATASectionImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @author Gary L Peskin - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMCDATASectionImpl extends DOMTextImpl - implements org.w3c.dom.CDATASection { - - protected DOMCDATASectionImpl(Node adaptee) - { - super(adaptee); - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeName - */ - public String getNodeName() - { - return "#cdata-section"; - } - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - return org.w3c.dom.Node.CDATA_SECTION_NODE; - } -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java deleted file mode 100644 index 37245fb..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * @(#)DOMCharacterDataImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMCharacterDataImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMCharacterDataImpl extends DOMNodeImpl - implements org.w3c.dom.CharacterData { - - protected DOMCharacterDataImpl(Node adaptee) - { - super(adaptee); - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.CharacterData#getData - */ - public String getData() throws DOMException - { - return getNodeValue(); - } - - /** - * @see org.w3c.dom.CharacterData#setData - */ - public void setData(String data) throws DOMException - { - // NOT SUPPORTED - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - /** - * @see org.w3c.dom.CharacterData#getLength - */ - public int getLength() - { - int len = 0; - if (adaptee.textarray != null && adaptee.start < adaptee.end) - len = adaptee.end - adaptee.start; - return len; - } - - /** - * @see org.w3c.dom.CharacterData#substringData - */ - public String substringData(int offset, - int count) throws DOMException - { - int len; - String value = null; - if (count < 0) - { - throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR, - "Invalid length"); - } - if (adaptee.textarray != null && adaptee.start < adaptee.end) - { - if (adaptee.start + offset >= adaptee.end) - { - throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR, - "Invalid offset"); - } - len = count; - if (adaptee.start + offset + len - 1 >= adaptee.end) - len = adaptee.end - adaptee.start - offset; - - value = Lexer.getString(adaptee.textarray, - adaptee.start + offset, - len); - } - return value; - } - - /** - * @see org.w3c.dom.CharacterData#appendData - */ - public void appendData(String arg) throws DOMException - { - // NOT SUPPORTED - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - /** - * @see org.w3c.dom.CharacterData#insertData - */ - public void insertData(int offset, - String arg) throws DOMException - { - // NOT SUPPORTED - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - /** - * @see org.w3c.dom.CharacterData#deleteData - */ - public void deleteData(int offset, - int count) throws DOMException - { - // NOT SUPPORTED - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - - /** - * @see org.w3c.dom.CharacterData#replaceData - */ - public void replaceData(int offset, - int count, - String arg) throws DOMException - { - // NOT SUPPORTED - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java deleted file mode 100644 index 2491714..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * @(#)DOMCommentImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMCommentImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMCommentImpl extends DOMCharacterDataImpl - implements org.w3c.dom.Comment { - - protected DOMCommentImpl(Node adaptee) - { - super(adaptee); - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeName - */ - public String getNodeName() - { - return "#comment"; - } - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - return org.w3c.dom.Node.COMMENT_NODE; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java deleted file mode 100644 index 52f4f73..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * @(#)DOMDocumentImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMDocumentImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM Support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document { - - private TagTable tt; // a DOM Document has its own TagTable. - - protected DOMDocumentImpl(Node adaptee) - { - super(adaptee); - tt = new TagTable(); - } - - public void setTagTable(TagTable tt) - { - this.tt = tt; - } - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeName - */ - public String getNodeName() - { - return "#document"; - } - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - return org.w3c.dom.Node.DOCUMENT_NODE; - } - - /** - * @see org.w3c.dom.Document#getDoctype - */ - public org.w3c.dom.DocumentType getDoctype() - { - Node node = adaptee.content; - while (node != null) { - if (node.type == Node.DocTypeTag) break; - node = node.next; - } - if (node != null) - return (org.w3c.dom.DocumentType)node.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Document#getImplementation - */ - public org.w3c.dom.DOMImplementation getImplementation() - { - // NOT SUPPORTED - return null; - } - - /** - * @see org.w3c.dom.Document#getDocumentElement - */ - public org.w3c.dom.Element getDocumentElement() - { - Node node = adaptee.content; - while (node != null) { - if (node.type == Node.StartTag || - node.type == Node.StartEndTag) break; - node = node.next; - } - if (node != null) - return (org.w3c.dom.Element)node.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Document#createElement - */ - public org.w3c.dom.Element createElement(String tagName) - throws DOMException - { - Node node = new Node(Node.StartEndTag, null, 0, 0, tagName, tt); - if (node != null) { - if (node.tag == null) // Fix Bug 121206 - node.tag = tt.xmlTags; - return (org.w3c.dom.Element)node.getAdapter(); - } - else - return null; - } - - /** - * @see org.w3c.dom.Document#createDocumentFragment - */ - public org.w3c.dom.DocumentFragment createDocumentFragment() - { - // NOT SUPPORTED - return null; - } - - /** - * @see org.w3c.dom.Document#createTextNode - */ - public org.w3c.dom.Text createTextNode(String data) - { - byte[] textarray = Lexer.getBytes(data); - Node node = new Node(Node.TextNode, textarray, 0, textarray.length); - if (node != null) - return (org.w3c.dom.Text)node.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Document#createComment - */ - public org.w3c.dom.Comment createComment(String data) - { - byte[] textarray = Lexer.getBytes(data); - Node node = new Node(Node.CommentTag, textarray, 0, textarray.length); - if (node != null) - return (org.w3c.dom.Comment)node.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Document#createCDATASection - */ - public org.w3c.dom.CDATASection createCDATASection(String data) - throws DOMException - { - // NOT SUPPORTED - return null; - } - - /** - * @see org.w3c.dom.Document#createProcessingInstruction - */ - public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target, - String data) - throws DOMException - { - throw new DOMExceptionImpl(DOMException.NOT_SUPPORTED_ERR, - "HTML document"); - } - - /** - * @see org.w3c.dom.Document#createAttribute - */ - public org.w3c.dom.Attr createAttribute(String name) - throws DOMException - { - AttVal av = new AttVal(null, null, (int)'"', name, null); - if (av != null) { - av.dict = - AttributeTable.getDefaultAttributeTable().findAttribute(av); - return (org.w3c.dom.Attr)av.getAdapter(); - } else { - return null; - } - } - - /** - * @see org.w3c.dom.Document#createEntityReference - */ - public org.w3c.dom.EntityReference createEntityReference(String name) - throws DOMException - { - // NOT SUPPORTED - return null; - } - - /** - * @see org.w3c.dom.Document#getElementsByTagName - */ - public org.w3c.dom.NodeList getElementsByTagName(String tagname) - { - return new DOMNodeListByTagNameImpl(this.adaptee, tagname); - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep) - throws org.w3c.dom.DOMException - { - return null; - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public org.w3c.dom.Attr createAttributeNS(String namespaceURI, - String qualifiedName) - throws org.w3c.dom.DOMException - { - return null; - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public org.w3c.dom.Element createElementNS(String namespaceURI, - String qualifiedName) - throws org.w3c.dom.DOMException - { - return null; - } - - /** - * DOM2 - not implemented. - */ - public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, - String localName) - { - return null; - } - - /** - * DOM2 - not implemented. - */ - public org.w3c.dom.Element getElementById(String elementId) - { - return null; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java deleted file mode 100644 index 3e9fb8f..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * @(#)DOMDocumentTypeImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMDocumentTypeImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMDocumentTypeImpl extends DOMNodeImpl - implements org.w3c.dom.DocumentType { - - protected DOMDocumentTypeImpl(Node adaptee) - { - super(adaptee); - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - return org.w3c.dom.Node.DOCUMENT_TYPE_NODE; - } - - /** - * @see org.w3c.dom.Node#getNodeName - */ - public String getNodeName() - { - return getName(); - } - - /** - * @see org.w3c.dom.DocumentType#getName - */ - public String getName() - { - String value = null; - if (adaptee.type == Node.DocTypeTag) - { - - if (adaptee.textarray != null && adaptee.start < adaptee.end) - { - value = Lexer.getString(adaptee.textarray, - adaptee.start, - adaptee.end - adaptee.start); - } - } - return value; - } - - public org.w3c.dom.NamedNodeMap getEntities() - { - // NOT SUPPORTED - return null; - } - - public org.w3c.dom.NamedNodeMap getNotations() - { - // NOT SUPPORTED - return null; - } - - /** - * DOM2 - not implemented. - */ - public String getPublicId() { - return null; - } - - /** - * DOM2 - not implemented. - */ - public String getSystemId() { - return null; - } - - /** - * DOM2 - not implemented. - */ - public String getInternalSubset() { - return null; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java deleted file mode 100644 index f9a367f..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java +++ /dev/null @@ -1,307 +0,0 @@ -/* - * @(#)DOMElementImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMElementImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM Support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMElementImpl extends DOMNodeImpl - implements org.w3c.dom.Element { - - protected DOMElementImpl(Node adaptee) - { - super(adaptee); - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - return org.w3c.dom.Node.ELEMENT_NODE; - } - - /** - * @see org.w3c.dom.Element#getTagName - */ - public String getTagName() - { - return super.getNodeName(); - } - - /** - * @see org.w3c.dom.Element#getAttribute - */ - public String getAttribute(String name) - { - if (this.adaptee == null) - return null; - - AttVal att = this.adaptee.attributes; - while (att != null) { - if (att.attribute.equals(name)) break; - att = att.next; - } - if (att != null) - return att.value; - else - return ""; - } - - /** - * @see org.w3c.dom.Element#setAttribute - */ - public void setAttribute(String name, - String value) - throws DOMException - { - if (this.adaptee == null) - return; - - AttVal att = this.adaptee.attributes; - while (att != null) { - if (att.attribute.equals(name)) break; - att = att.next; - } - if (att != null) { - att.value = value; - } else { - att = new AttVal(null, null, (int)'"', name, value); - att.dict = - AttributeTable.getDefaultAttributeTable().findAttribute(att); - if (this.adaptee.attributes == null) { - this.adaptee.attributes = att; - } else { - att.next = this.adaptee.attributes; - this.adaptee.attributes = att; - } - } - } - - /** - * @see org.w3c.dom.Element#removeAttribute - */ - public void removeAttribute(String name) - throws DOMException - { - if (this.adaptee == null) - return; - - AttVal att = this.adaptee.attributes; - AttVal pre = null; - while (att != null) { - if (att.attribute.equals(name)) break; - pre = att; - att = att.next; - } - if (att != null) { - if (pre == null) { - this.adaptee.attributes = att.next; - } else { - pre.next = att.next; - } - } - } - - /** - * @see org.w3c.dom.Element#getAttributeNode - */ - public org.w3c.dom.Attr getAttributeNode(String name) - { - if (this.adaptee == null) - return null; - - AttVal att = this.adaptee.attributes; - while (att != null) { - if (att.attribute.equals(name)) break; - att = att.next; - } - if (att != null) - return att.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Element#setAttributeNode - */ - public org.w3c.dom.Attr setAttributeNode(org.w3c.dom.Attr newAttr) - throws DOMException - { - if (newAttr == null) - return null; - if (!(newAttr instanceof DOMAttrImpl)) { - throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, - "newAttr not instanceof DOMAttrImpl"); - } - - DOMAttrImpl newatt = (DOMAttrImpl)newAttr; - String name = newatt.avAdaptee.attribute; - org.w3c.dom.Attr result = null; - - AttVal att = this.adaptee.attributes; - while (att != null) { - if (att.attribute.equals(name)) break; - att = att.next; - } - if (att != null) { - result = att.getAdapter(); - att.adapter = newAttr; - } else { - if (this.adaptee.attributes == null) { - this.adaptee.attributes = newatt.avAdaptee; - } else { - newatt.avAdaptee.next = this.adaptee.attributes; - this.adaptee.attributes = newatt.avAdaptee; - } - } - return result; - } - - /** - * @see org.w3c.dom.Element#removeAttributeNode - */ - public org.w3c.dom.Attr removeAttributeNode(org.w3c.dom.Attr oldAttr) - throws DOMException - { - if (oldAttr == null) - return null; - - org.w3c.dom.Attr result = null; - AttVal att = this.adaptee.attributes; - AttVal pre = null; - while (att != null) { - if (att.getAdapter() == oldAttr) break; - pre = att; - att = att.next; - } - if (att != null) { - if (pre == null) { - this.adaptee.attributes = att.next; - } else { - pre.next = att.next; - } - result = oldAttr; - } else { - throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, - "oldAttr not found"); - } - return result; - } - - /** - * @see org.w3c.dom.Element#getElementsByTagName - */ - public org.w3c.dom.NodeList getElementsByTagName(String name) - { - return new DOMNodeListByTagNameImpl(this.adaptee, name); - } - - /** - * @see org.w3c.dom.Element#normalize - */ - public void normalize() - { - // NOT SUPPORTED - } - - /** - * DOM2 - not implemented. - */ - public String getAttributeNS(String namespaceURI, String localName) - { - return null; - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public void setAttributeNS(String namespaceURI, - String qualifiedName, - String value) - throws org.w3c.dom.DOMException - { - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public void removeAttributeNS(String namespaceURI, String localName) - throws org.w3c.dom.DOMException - { - } - - /** - * DOM2 - not implemented. - */ - public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI, - String localName) - { - return null; - } - - /** - * DOM2 - not implemented. - * @exception org.w3c.dom.DOMException - */ - public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr) - throws org.w3c.dom.DOMException - { - return null; - } - - /** - * DOM2 - not implemented. - */ - public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, - String localName) - { - return null; - } - - /** - * DOM2 - not implemented. - */ - public boolean hasAttribute(String name) - { - return false; - } - - /** - * DOM2 - not implemented. - */ - public boolean hasAttributeNS(String namespaceURI, - String localName) - { - return false; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java deleted file mode 100644 index 75c5337..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @(#)DOMExceptionImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMExceptionImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM Support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMExceptionImpl extends org.w3c.dom.DOMException { - - public DOMExceptionImpl(short code, String message) { - super(code, message); - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java deleted file mode 100644 index d0b14e2..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java +++ /dev/null @@ -1,488 +0,0 @@ -/* - * @(#)DOMNodeImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMNodeImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM Support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMNodeImpl implements org.w3c.dom.Node { - - protected Node adaptee; - - protected DOMNodeImpl(Node adaptee) - { - this.adaptee = adaptee; - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeValue - */ - public String getNodeValue() throws DOMException - { - String value = ""; //BAK 10/10/2000 replaced null - if (adaptee.type == Node.TextNode || - adaptee.type == Node.CDATATag || - adaptee.type == Node.CommentTag || - adaptee.type == Node.ProcInsTag) - { - - if (adaptee.textarray != null && adaptee.start < adaptee.end) - { - value = Lexer.getString(adaptee.textarray, - adaptee.start, - adaptee.end - adaptee.start); - } - } - return value; - } - - /** - * @see org.w3c.dom.Node#setNodeValue - */ - public void setNodeValue(String nodeValue) throws DOMException - { - if (adaptee.type == Node.TextNode || - adaptee.type == Node.CDATATag || - adaptee.type == Node.CommentTag || - adaptee.type == Node.ProcInsTag) - { - byte[] textarray = Lexer.getBytes(nodeValue); - adaptee.textarray = textarray; - adaptee.start = 0; - adaptee.end = textarray.length; - } - } - - /** - * @see org.w3c.dom.Node#getNodeName - */ - public String getNodeName() - { - return adaptee.element; - } - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - short result = -1; - switch (adaptee.type) { - case Node.RootNode: - result = org.w3c.dom.Node.DOCUMENT_NODE; - break; - case Node.DocTypeTag: - result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE; - break; - case Node.CommentTag: - result = org.w3c.dom.Node.COMMENT_NODE; - break; - case Node.ProcInsTag: - result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; - break; - case Node.TextNode: - result = org.w3c.dom.Node.TEXT_NODE; - break; - case Node.CDATATag: - result = org.w3c.dom.Node.CDATA_SECTION_NODE; - break; - case Node.StartTag: - case Node.StartEndTag: - result = org.w3c.dom.Node.ELEMENT_NODE; - break; - } - return result; - } - - /** - * @see org.w3c.dom.Node#getParentNode - */ - public org.w3c.dom.Node getParentNode() - { - if (adaptee.parent != null) - return adaptee.parent.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Node#getChildNodes - */ - public org.w3c.dom.NodeList getChildNodes() - { - return new DOMNodeListImpl(adaptee); - } - - /** - * @see org.w3c.dom.Node#getFirstChild - */ - public org.w3c.dom.Node getFirstChild() - { - if (adaptee.content != null) - return adaptee.content.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Node#getLastChild - */ - public org.w3c.dom.Node getLastChild() - { - if (adaptee.last != null) - return adaptee.last.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Node#getPreviousSibling - */ - public org.w3c.dom.Node getPreviousSibling() - { - if (adaptee.prev != null) - return adaptee.prev.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Node#getNextSibling - */ - public org.w3c.dom.Node getNextSibling() - { - if (adaptee.next != null) - return adaptee.next.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Node#getAttributes - */ - public org.w3c.dom.NamedNodeMap getAttributes() - { - return new DOMAttrMapImpl(adaptee.attributes); - } - - /** - * @see org.w3c.dom.Node#getOwnerDocument - */ - public org.w3c.dom.Document getOwnerDocument() - { - Node node; - - node = this.adaptee; - if (node != null && node.type == Node.RootNode) - return null; - - for (node = this.adaptee; - node != null && node.type != Node.RootNode; node = node.parent); - - if (node != null) - return (org.w3c.dom.Document)node.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.Node#insertBefore - */ - public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, - org.w3c.dom.Node refChild) - throws DOMException - { - // TODO - handle newChild already in tree - - if (newChild == null) - return null; - if (!(newChild instanceof DOMNodeImpl)) { - throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, - "newChild not instanceof DOMNodeImpl"); - } - DOMNodeImpl newCh = (DOMNodeImpl)newChild; - - if (this.adaptee.type == Node.RootNode) { - if (newCh.adaptee.type != Node.DocTypeTag && - newCh.adaptee.type != Node.ProcInsTag) { - throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, - "newChild cannot be a child of this node"); - } - } else if (this.adaptee.type == Node.StartTag) { - if (newCh.adaptee.type != Node.StartTag && - newCh.adaptee.type != Node.StartEndTag && - newCh.adaptee.type != Node.CommentTag && - newCh.adaptee.type != Node.TextNode && - newCh.adaptee.type != Node.CDATATag) { - throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, - "newChild cannot be a child of this node"); - } - } - if (refChild == null) { - Node.insertNodeAtEnd(this.adaptee, newCh.adaptee); - if (this.adaptee.type == Node.StartEndTag) { - this.adaptee.setType(Node.StartTag); - } - } else { - Node ref = this.adaptee.content; - while (ref != null) { - if (ref.getAdapter() == refChild) break; - ref = ref.next; - } - if (ref == null) { - throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, - "refChild not found"); - } - Node.insertNodeBeforeElement(ref, newCh.adaptee); - } - return newChild; - } - - /** - * @see org.w3c.dom.Node#replaceChild - */ - public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, - org.w3c.dom.Node oldChild) - throws DOMException - { - // TODO - handle newChild already in tree - - if (newChild == null) - return null; - if (!(newChild instanceof DOMNodeImpl)) { - throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, - "newChild not instanceof DOMNodeImpl"); - } - DOMNodeImpl newCh = (DOMNodeImpl)newChild; - - if (this.adaptee.type == Node.RootNode) { - if (newCh.adaptee.type != Node.DocTypeTag && - newCh.adaptee.type != Node.ProcInsTag) { - throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, - "newChild cannot be a child of this node"); - } - } else if (this.adaptee.type == Node.StartTag) { - if (newCh.adaptee.type != Node.StartTag && - newCh.adaptee.type != Node.StartEndTag && - newCh.adaptee.type != Node.CommentTag && - newCh.adaptee.type != Node.TextNode && - newCh.adaptee.type != Node.CDATATag) { - throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, - "newChild cannot be a child of this node"); - } - } - if (oldChild == null) { - throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, - "oldChild not found"); - } else { - Node n; - Node ref = this.adaptee.content; - while (ref != null) { - if (ref.getAdapter() == oldChild) break; - ref = ref.next; - } - if (ref == null) { - throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, - "oldChild not found"); - } - newCh.adaptee.next = ref.next; - newCh.adaptee.prev = ref.prev; - newCh.adaptee.last = ref.last; - newCh.adaptee.parent = ref.parent; - newCh.adaptee.content = ref.content; - if (ref.parent != null) { - if (ref.parent.content == ref) - ref.parent.content = newCh.adaptee; - if (ref.parent.last == ref) - ref.parent.last = newCh.adaptee; - } - if (ref.prev != null) { - ref.prev.next = newCh.adaptee; - } - if (ref.next != null) { - ref.next.prev = newCh.adaptee; - } - for (n = ref.content; n != null; n = n.next) { - if (n.parent == ref) - n.parent = newCh.adaptee; - } - } - return oldChild; - } - - /** - * @see org.w3c.dom.Node#removeChild - */ - public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) - throws DOMException - { - if (oldChild == null) - return null; - - Node ref = this.adaptee.content; - while (ref != null) { - if (ref.getAdapter() == oldChild) break; - ref = ref.next; - } - if (ref == null) { - throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, - "refChild not found"); - } - Node.discardElement(ref); - - if (this.adaptee.content == null - && this.adaptee.type == Node.StartTag) { - this.adaptee.setType(Node.StartEndTag); - } - - return oldChild; - } - - /** - * @see org.w3c.dom.Node#appendChild - */ - public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) - throws DOMException - { - // TODO - handle newChild already in tree - - if (newChild == null) - return null; - if (!(newChild instanceof DOMNodeImpl)) { - throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, - "newChild not instanceof DOMNodeImpl"); - } - DOMNodeImpl newCh = (DOMNodeImpl)newChild; - - if (this.adaptee.type == Node.RootNode) { - if (newCh.adaptee.type != Node.DocTypeTag && - newCh.adaptee.type != Node.ProcInsTag) { - throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, - "newChild cannot be a child of this node"); - } - } else if (this.adaptee.type == Node.StartTag) { - if (newCh.adaptee.type != Node.StartTag && - newCh.adaptee.type != Node.StartEndTag && - newCh.adaptee.type != Node.CommentTag && - newCh.adaptee.type != Node.TextNode && - newCh.adaptee.type != Node.CDATATag) { - throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, - "newChild cannot be a child of this node"); - } - } - Node.insertNodeAtEnd(this.adaptee, newCh.adaptee); - - if (this.adaptee.type == Node.StartEndTag) { - this.adaptee.setType(Node.StartTag); - } - - return newChild; - } - - /** - * @see org.w3c.dom.Node#hasChildNodes - */ - public boolean hasChildNodes() - { - return (adaptee.content != null); - } - - /** - * @see org.w3c.dom.Node#cloneNode - */ - public org.w3c.dom.Node cloneNode(boolean deep) - { - Node node = adaptee.cloneNode(deep); - node.parent = null; - return node.getAdapter(); - } - - /** - * DOM2 - not implemented. - */ - public void normalize() - { - } - - /** - * DOM2 - not implemented. - */ - public boolean supports(String feature, String version) - { - return isSupported(feature, version); - } - - /** - * DOM2 - not implemented. - */ - public String getNamespaceURI() - { - return null; - } - - /** - * DOM2 - not implemented. - */ - public String getPrefix() - { - return null; - } - - /** - * DOM2 - not implemented. - */ - public void setPrefix(String prefix) - throws DOMException - { - } - - /** - * DOM2 - not implemented. - */ - public String getLocalName() - { - return null; - } - - /** - * DOM2 - not implemented. - */ - public boolean isSupported(String feature,String version) { - return false; - } - - /** - * DOM2 - @see org.w3c.dom.Node#hasAttributes - * contributed by dlp@users.sourceforge.net - */ - public boolean hasAttributes() - { - return adaptee.attributes != null; - } -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java deleted file mode 100644 index d69feb3..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * @(#)DOMNodeListByTagNameImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * DOMNodeListByTagNameImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -/** - *

    The items in the NodeList are accessible via an integral - * index, starting from 0. - * - */ -public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList { - - private Node first = null; - private String tagName = "*"; - private int currIndex = 0; - private int maxIndex = 0; - private Node currNode = null; - - protected DOMNodeListByTagNameImpl(Node first, String tagName) - { - this.first = first; - this.tagName = tagName; - } - - /** - * @see org.w3c.dom.NodeList#item - */ - public org.w3c.dom.Node item(int index) - { - currIndex = 0; - maxIndex = index; - preTraverse(first); - - if (currIndex > maxIndex && currNode != null) - return currNode.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.NodeList#getLength - */ - public int getLength() - { - currIndex = 0; - maxIndex = Integer.MAX_VALUE; - preTraverse(first); - return currIndex; - } - - protected void preTraverse(Node node) - { - if (node == null) - return; - - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - if (currIndex <= maxIndex && - (tagName.equals("*") || tagName.equals(node.element))) - { - currIndex += 1; - currNode = node; - } - } - if (currIndex > maxIndex) - return; - - node = node.content; - while (node != null) - { - preTraverse(node); - node = node.next; - } - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java deleted file mode 100644 index 726f007..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * @(#)DOMNodeListImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * DOMNodeListImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -/** - *

    The items in the NodeList are accessible via an integral - * index, starting from 0. - * - */ -public class DOMNodeListImpl implements org.w3c.dom.NodeList { - - private Node parent = null; - - protected DOMNodeListImpl(Node parent) - { - this.parent = parent; - } - - /** - * @see org.w3c.dom.NodeList#item - */ - public org.w3c.dom.Node item(int index) - { - int i = 0; - Node node = parent.content; - while (node != null) { - if (i >= index) break; - i++; - node = node.next; - } - if (node != null) - return node.getAdapter(); - else - return null; - } - - /** - * @see org.w3c.dom.NodeList#getLength - */ - public int getLength() - { - int len = 0; - Node node = parent.content; - while (node != null) { - len++; - node = node.next; - } - return len; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java deleted file mode 100644 index 1eefeca..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * @(#)DOMProcessingInstructionImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMProcessingInstructionImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMProcessingInstructionImpl extends DOMNodeImpl - implements org.w3c.dom.ProcessingInstruction { - - protected DOMProcessingInstructionImpl(Node adaptee) - { - super(adaptee); - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; - } - - /** - * @see org.w3c.dom.ProcessingInstruction#getTarget - */ - public String getTarget() - { - // TODO - return null; - } - - /** - * @see org.w3c.dom.ProcessingInstruction#getData - */ - public String getData() - { - return getNodeValue(); - } - - /** - * @see org.w3c.dom.ProcessingInstruction#setData - */ - public void setData(String data) throws DOMException - { - // NOT SUPPORTED - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java deleted file mode 100644 index 06ec997..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @(#)DOMTextImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -import org.w3c.dom.DOMException; - -/** - * - * DOMTextImpl - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class DOMTextImpl extends DOMCharacterDataImpl - implements org.w3c.dom.Text { - - protected DOMTextImpl(Node adaptee) - { - super(adaptee); - } - - - /* --------------------- DOM ---------------------------- */ - - /** - * @see org.w3c.dom.Node#getNodeName - */ - public String getNodeName() - { - return "#text"; - } - - /** - * @see org.w3c.dom.Node#getNodeType - */ - public short getNodeType() - { - return org.w3c.dom.Node.TEXT_NODE; - } - - /** - * @see org.w3c.dom.Text#splitText - */ - public org.w3c.dom.Text splitText(int offset) throws DOMException - { - // NOT SUPPORTED - throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, - "Not supported"); - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java deleted file mode 100644 index 4d05767..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * @(#)Dict.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Tag dictionary node - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class Dict { - - /* content model shortcut encoding */ - - public static final int CM_UNKNOWN = 0; - public static final int CM_EMPTY = (1 << 0); - public static final int CM_HTML = (1 << 1); - public static final int CM_HEAD = (1 << 2); - public static final int CM_BLOCK = (1 << 3); - public static final int CM_INLINE = (1 << 4); - public static final int CM_LIST = (1 << 5); - public static final int CM_DEFLIST = (1 << 6); - public static final int CM_TABLE = (1 << 7); - public static final int CM_ROWGRP = (1 << 8); - public static final int CM_ROW = (1 << 9); - public static final int CM_FIELD = (1 << 10); - public static final int CM_OBJECT = (1 << 11); - public static final int CM_PARAM = (1 << 12); - public static final int CM_FRAMES = (1 << 13); - public static final int CM_HEADING = (1 << 14); - public static final int CM_OPT = (1 << 15); - public static final int CM_IMG = (1 << 16); - public static final int CM_MIXED = (1 << 17); - public static final int CM_NO_INDENT = (1 << 18); - public static final int CM_OBSOLETE = (1 << 19); - public static final int CM_NEW = (1 << 20); - public static final int CM_OMITST = (1 << 21); - - /* - - If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 - Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary - tags and attributes then describe it as HTML Proprietary. If it includes the - xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe - it as one of the flavors of Voyager (strict, loose or frameset). - */ - - public static final short VERS_UNKNOWN = 0; - - public static final short VERS_HTML20 = 1; - public static final short VERS_HTML32 = 2; - public static final short VERS_HTML40_STRICT = 4; - public static final short VERS_HTML40_LOOSE = 8; - public static final short VERS_FRAMES = 16; - public static final short VERS_XML = 32; - - public static final short VERS_NETSCAPE = 64; - public static final short VERS_MICROSOFT = 128; - public static final short VERS_SUN = 256; - - public static final short VERS_MALFORMED = 512; - - public static final short VERS_ALL = (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES); - public static final short VERS_HTML40 = (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES); - public static final short VERS_LOOSE = (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES); - public static final short VERS_IFRAMES = (VERS_HTML40_LOOSE|VERS_FRAMES); - public static final short VERS_FROM32 = (VERS_HTML40_STRICT|VERS_LOOSE); - public static final short VERS_PROPRIETARY = (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN); - - public static final short VERS_EVERYTHING = (VERS_ALL|VERS_PROPRIETARY); - - public Dict( String name, short versions, int model, - Parser parser, CheckAttribs chkattrs ) - { - this.name = name; - this.versions = versions; - this.model = model; - this.parser = parser; - this.chkattrs = chkattrs; - } - - public String name; - public short versions; - public int model; - public Parser parser; - public CheckAttribs chkattrs; -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java deleted file mode 100644 index fea6e51..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * @(#)Entity.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * HTML ISO entity - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class Entity { - - public Entity( String name, short code ) - { - this.name = name; - this.code = code; - } - - public Entity( String name, int code ) - { - this.name = name; - this.code = (short)code; - } - - public String name; - public short code; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java deleted file mode 100644 index aeec74d..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java +++ /dev/null @@ -1,386 +0,0 @@ -/* - * @(#)EntityTable.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Entity hash table - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -import java.util.Hashtable; -import java.util.Enumeration; - -public class EntityTable { - - public EntityTable() - { - } - - public Entity lookup( String name ) - { - return (Entity)entityHashtable.get( name ); - } - - public Entity install( String name, short code ) - { - Entity ent = lookup( name ); - if ( ent == null ) { - ent = new Entity( name, code ); - entityHashtable.put( name, ent ); - } else { - ent.code = code; - } - return ent; - } - - public Entity install( Entity ent ) - { - return (Entity)entityHashtable.put( ent.name, ent ); - } - - /* entity starting with "&" returns zero on error */ - public short entityCode( String name ) - { - int c; - - if (name.length() <= 1) - return 0; - - /* numeric entitity: name = "&#" followed by number */ - if ( name.charAt(1) == '#' ) { - c = 0; /* zero on missing/bad number */ - - /* 'x' prefix denotes hexadecimal number format */ - try { - if (name.length() >= 4 && name.charAt(2) == 'x') { - c = Integer.parseInt( name.substring(3), 16 ); - } else if (name.length() >= 3) { - c = Integer.parseInt( name.substring(2) ); - } - } - catch ( NumberFormatException e ) {} - - return (short)c; - } - - /* Named entity: name ="&" followed by a name */ - Entity ent = lookup( name.substring(1) ); - if ( ent != null ) { - return ent.code; - } - - return 0; /* zero signifies unknown entity name */ - } - - public String entityName( short code ) - { - String name = null; - Entity ent; - Enumeration en = entityHashtable.elements(); - while ( en.hasMoreElements() ) { - ent = (Entity)en.nextElement(); - if ( ent.code == code ) { - name = ent.name; - break; - } - } - return name; - } - - private Hashtable entityHashtable = new Hashtable(); - - private static EntityTable defaultEntityTable = null; - - private static Entity[] entities = { - - new Entity( "nbsp", 160 ), - new Entity( "iexcl", 161 ), - new Entity( "cent", 162 ), - new Entity( "pound", 163 ), - new Entity( "curren", 164 ), - new Entity( "yen", 165 ), - new Entity( "brvbar", 166 ), - new Entity( "sect", 167 ), - new Entity( "uml", 168 ), - new Entity( "copy", 169 ), - new Entity( "ordf", 170 ), - new Entity( "laquo", 171 ), - new Entity( "not", 172 ), - new Entity( "shy", 173 ), - new Entity( "reg", 174 ), - new Entity( "macr", 175 ), - new Entity( "deg", 176 ), - new Entity( "plusmn", 177 ), - new Entity( "sup2", 178 ), - new Entity( "sup3", 179 ), - new Entity( "acute", 180 ), - new Entity( "micro", 181 ), - new Entity( "para", 182 ), - new Entity( "middot", 183 ), - new Entity( "cedil", 184 ), - new Entity( "sup1", 185 ), - new Entity( "ordm", 186 ), - new Entity( "raquo", 187 ), - new Entity( "frac14", 188 ), - new Entity( "frac12", 189 ), - new Entity( "frac34", 190 ), - new Entity( "iquest", 191 ), - new Entity( "Agrave", 192 ), - new Entity( "Aacute", 193 ), - new Entity( "Acirc", 194 ), - new Entity( "Atilde", 195 ), - new Entity( "Auml", 196 ), - new Entity( "Aring", 197 ), - new Entity( "AElig", 198 ), - new Entity( "Ccedil", 199 ), - new Entity( "Egrave", 200 ), - new Entity( "Eacute", 201 ), - new Entity( "Ecirc", 202 ), - new Entity( "Euml", 203 ), - new Entity( "Igrave", 204 ), - new Entity( "Iacute", 205 ), - new Entity( "Icirc", 206 ), - new Entity( "Iuml", 207 ), - new Entity( "ETH", 208 ), - new Entity( "Ntilde", 209 ), - new Entity( "Ograve", 210 ), - new Entity( "Oacute", 211 ), - new Entity( "Ocirc", 212 ), - new Entity( "Otilde", 213 ), - new Entity( "Ouml", 214 ), - new Entity( "times", 215 ), - new Entity( "Oslash", 216 ), - new Entity( "Ugrave", 217 ), - new Entity( "Uacute", 218 ), - new Entity( "Ucirc", 219 ), - new Entity( "Uuml", 220 ), - new Entity( "Yacute", 221 ), - new Entity( "THORN", 222 ), - new Entity( "szlig", 223 ), - new Entity( "agrave", 224 ), - new Entity( "aacute", 225 ), - new Entity( "acirc", 226 ), - new Entity( "atilde", 227 ), - new Entity( "auml", 228 ), - new Entity( "aring", 229 ), - new Entity( "aelig", 230 ), - new Entity( "ccedil", 231 ), - new Entity( "egrave", 232 ), - new Entity( "eacute", 233 ), - new Entity( "ecirc", 234 ), - new Entity( "euml", 235 ), - new Entity( "igrave", 236 ), - new Entity( "iacute", 237 ), - new Entity( "icirc", 238 ), - new Entity( "iuml", 239 ), - new Entity( "eth", 240 ), - new Entity( "ntilde", 241 ), - new Entity( "ograve", 242 ), - new Entity( "oacute", 243 ), - new Entity( "ocirc", 244 ), - new Entity( "otilde", 245 ), - new Entity( "ouml", 246 ), - new Entity( "divide", 247 ), - new Entity( "oslash", 248 ), - new Entity( "ugrave", 249 ), - new Entity( "uacute", 250 ), - new Entity( "ucirc", 251 ), - new Entity( "uuml", 252 ), - new Entity( "yacute", 253 ), - new Entity( "thorn", 254 ), - new Entity( "yuml", 255 ), - new Entity( "fnof", 402 ), - new Entity( "Alpha", 913 ), - new Entity( "Beta", 914 ), - new Entity( "Gamma", 915 ), - new Entity( "Delta", 916 ), - new Entity( "Epsilon", 917 ), - new Entity( "Zeta", 918 ), - new Entity( "Eta", 919 ), - new Entity( "Theta", 920 ), - new Entity( "Iota", 921 ), - new Entity( "Kappa", 922 ), - new Entity( "Lambda", 923 ), - new Entity( "Mu", 924 ), - new Entity( "Nu", 925 ), - new Entity( "Xi", 926 ), - new Entity( "Omicron", 927 ), - new Entity( "Pi", 928 ), - new Entity( "Rho", 929 ), - new Entity( "Sigma", 931 ), - new Entity( "Tau", 932 ), - new Entity( "Upsilon", 933 ), - new Entity( "Phi", 934 ), - new Entity( "Chi", 935 ), - new Entity( "Psi", 936 ), - new Entity( "Omega", 937 ), - new Entity( "alpha", 945 ), - new Entity( "beta", 946 ), - new Entity( "gamma", 947 ), - new Entity( "delta", 948 ), - new Entity( "epsilon", 949 ), - new Entity( "zeta", 950 ), - new Entity( "eta", 951 ), - new Entity( "theta", 952 ), - new Entity( "iota", 953 ), - new Entity( "kappa", 954 ), - new Entity( "lambda", 955 ), - new Entity( "mu", 956 ), - new Entity( "nu", 957 ), - new Entity( "xi", 958 ), - new Entity( "omicron", 959 ), - new Entity( "pi", 960 ), - new Entity( "rho", 961 ), - new Entity( "sigmaf", 962 ), - new Entity( "sigma", 963 ), - new Entity( "tau", 964 ), - new Entity( "upsilon", 965 ), - new Entity( "phi", 966 ), - new Entity( "chi", 967 ), - new Entity( "psi", 968 ), - new Entity( "omega", 969 ), - new Entity( "thetasym", 977 ), - new Entity( "upsih", 978 ), - new Entity( "piv", 982 ), - new Entity( "bull", 8226 ), - new Entity( "hellip", 8230 ), - new Entity( "prime", 8242 ), - new Entity( "Prime", 8243 ), - new Entity( "oline", 8254 ), - new Entity( "frasl", 8260 ), - new Entity( "weierp", 8472 ), - new Entity( "image", 8465 ), - new Entity( "real", 8476 ), - new Entity( "trade", 8482 ), - new Entity( "alefsym", 8501 ), - new Entity( "larr", 8592 ), - new Entity( "uarr", 8593 ), - new Entity( "rarr", 8594 ), - new Entity( "darr", 8595 ), - new Entity( "harr", 8596 ), - new Entity( "crarr", 8629 ), - new Entity( "lArr", 8656 ), - new Entity( "uArr", 8657 ), - new Entity( "rArr", 8658 ), - new Entity( "dArr", 8659 ), - new Entity( "hArr", 8660 ), - new Entity( "forall", 8704 ), - new Entity( "part", 8706 ), - new Entity( "exist", 8707 ), - new Entity( "empty", 8709 ), - new Entity( "nabla", 8711 ), - new Entity( "isin", 8712 ), - new Entity( "notin", 8713 ), - new Entity( "ni", 8715 ), - new Entity( "prod", 8719 ), - new Entity( "sum", 8721 ), - new Entity( "minus", 8722 ), - new Entity( "lowast", 8727 ), - new Entity( "radic", 8730 ), - new Entity( "prop", 8733 ), - new Entity( "infin", 8734 ), - new Entity( "ang", 8736 ), - new Entity( "and", 8743 ), - new Entity( "or", 8744 ), - new Entity( "cap", 8745 ), - new Entity( "cup", 8746 ), - new Entity( "int", 8747 ), - new Entity( "there4", 8756 ), - new Entity( "sim", 8764 ), - new Entity( "cong", 8773 ), - new Entity( "asymp", 8776 ), - new Entity( "ne", 8800 ), - new Entity( "equiv", 8801 ), - new Entity( "le", 8804 ), - new Entity( "ge", 8805 ), - new Entity( "sub", 8834 ), - new Entity( "sup", 8835 ), - new Entity( "nsub", 8836 ), - new Entity( "sube", 8838 ), - new Entity( "supe", 8839 ), - new Entity( "oplus", 8853 ), - new Entity( "otimes", 8855 ), - new Entity( "perp", 8869 ), - new Entity( "sdot", 8901 ), - new Entity( "lceil", 8968 ), - new Entity( "rceil", 8969 ), - new Entity( "lfloor", 8970 ), - new Entity( "rfloor", 8971 ), - new Entity( "lang", 9001 ), - new Entity( "rang", 9002 ), - new Entity( "loz", 9674 ), - new Entity( "spades", 9824 ), - new Entity( "clubs", 9827 ), - new Entity( "hearts", 9829 ), - new Entity( "diams", 9830 ), - new Entity( "quot", 34 ), - new Entity( "amp", 38 ), - new Entity( "lt", 60 ), - new Entity( "gt", 62 ), - new Entity( "OElig", 338 ), - new Entity( "oelig", 339 ), - new Entity( "Scaron", 352 ), - new Entity( "scaron", 353 ), - new Entity( "Yuml", 376 ), - new Entity( "circ", 710 ), - new Entity( "tilde", 732 ), - new Entity( "ensp", 8194 ), - new Entity( "emsp", 8195 ), - new Entity( "thinsp", 8201 ), - new Entity( "zwnj", 8204 ), - new Entity( "zwj", 8205 ), - new Entity( "lrm", 8206 ), - new Entity( "rlm", 8207 ), - new Entity( "ndash", 8211 ), - new Entity( "mdash", 8212 ), - new Entity( "lsquo", 8216 ), - new Entity( "rsquo", 8217 ), - new Entity( "sbquo", 8218 ), - new Entity( "ldquo", 8220 ), - new Entity( "rdquo", 8221 ), - new Entity( "bdquo", 8222 ), - new Entity( "dagger", 8224 ), - new Entity( "Dagger", 8225 ), - new Entity( "permil", 8240 ), - new Entity( "lsaquo", 8249 ), - new Entity( "rsaquo", 8250 ), - new Entity( "euro", 8364 ) - - }; - - public static EntityTable getDefaultEntityTable() - { - if ( defaultEntityTable == null ) { - defaultEntityTable = new EntityTable(); - for ( int i = 0; i < entities.length; i++ ) { - defaultEntityTable.install( entities[i] ); - } - } - return defaultEntityTable; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java deleted file mode 100644 index 8561a43..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @(#)IStack.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Inline stack node - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class IStack { - - /* - Mosaic handles inlines via a separate stack from other elements - We duplicate this to recover from inline markup errors such as: - - italic text -

    more italic text normal text - - which for compatibility with Mosaic is mapped to: - - italic text -

    more italic text normal text - - Note that any inline end tag pop's the effect of the current - inline start tag, so that pop's in the above example. - */ - - public IStack next; - public Dict tag; /* tag's dictionary definition */ - public String element; /* name (null for text nodes) */ - public AttVal attributes; - - public IStack() - { - next = null; - tag = null; - element = null; - attributes = null; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Lexer.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Lexer.java deleted file mode 100644 index 1ed3db6..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Lexer.java +++ /dev/null @@ -1,3134 +0,0 @@ -/* - * @(#)Lexer.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Lexer for html parser - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -/* - Given a file stream fp it returns a sequence of tokens. - - GetToken(fp) gets the next token - UngetToken(fp) provides one level undo - - The tags include an attribute list: - - - linked list of attribute/value nodes - - each node has 2 null-terminated strings. - - entities are replaced in attribute values - - white space is compacted if not in preformatted mode - If not in preformatted mode then leading white space - is discarded and subsequent white space sequences - compacted to single space chars. - - If XmlTags is no then Tag names are folded to upper - case and attribute names to lower case. - - Not yet done: - - Doctype subset and marked sections -*/ - -import java.io.PrintWriter; -import java.util.Stack; -import java.util.Vector; - -import org.eclipse.core.resources.IFile; -import sun.security.krb5.internal.av; - -public class Lexer { - - private IFile iFile; - public StreamIn in; /* file stream */ - public PrintWriter errout; /* error output stream */ - public short badAccess; /* for accessibility errors */ - public short badLayout; /* for bad style errors */ - public short badChars; /* for bad char encodings */ - public short badForm; /* for mismatched/mispositioned form tags */ - public short warnings; /* count of warnings in this document */ - public short errors; /* count of errors */ - public int lines; /* lines seen */ - public int columns; /* at start of current token */ - public boolean waswhite; /* used to collapse contiguous white space */ - public boolean pushed; /* true after token has been pushed back */ - public boolean insertspace; /* when space is moved after end tag */ - public boolean excludeBlocks; /* Netscape compatibility */ - public boolean exiled; /* true if moved out of table */ - public boolean isvoyager; /* true if xmlns attribute on html element */ - public short versions; /* bit vector of HTML versions */ - public int doctype; /* version as given by doctype (if any) */ - public boolean badDoctype; /* e.g. if html or PUBLIC is missing */ - public int txtstart; /* start of current node */ - public int txtend; /* end of current node */ - public short state; /* state of lexer's finite state machine */ - public Node token; - - /* - lexer character buffer - - parse tree nodes span onto this buffer - which contains the concatenated text - contents of all of the elements. - - lexsize must be reset for each file. - */ - public byte[] lexbuf; /* byte buffer of UTF-8 chars */ - public int lexlength; /* allocated */ - public int lexsize; /* used */ - - /* Inline stack for compatibility with Mosaic */ - public Node inode; /* for deferring text node */ - public int insert; /* for inferring inline tags */ - public Stack istack; - public int istackbase; /* start of frame */ - - public Style styles; /* used for cleaning up presentation markup */ - - public Configuration configuration; - protected int seenBodyEndTag; /* used by parser */ - private Vector nodeList; - - public Lexer(IFile iFile, StreamIn in, Configuration configuration) - { - this.iFile = iFile; - this.in = in; - this.lines = 1; - this.columns = 1; - this.state = LEX_CONTENT; - this.badAccess = 0; - this.badLayout = 0; - this.badChars = 0; - this.badForm = 0; - this.warnings = 0; - this.errors = 0; - this.waswhite = false; - this.pushed = false; - this.insertspace = false; - this.exiled = false; - this.isvoyager = false; - this.versions = Dict.VERS_EVERYTHING; - this.doctype = Dict.VERS_UNKNOWN; - this.badDoctype = false; - this.txtstart = 0; - this.txtend = 0; - this.token = null; - this.lexbuf = null; - this.lexlength = 0; - this.lexsize = 0; - this.inode = null; - this.insert = -1; - this.istack = new Stack(); - this.istackbase = 0; - this.styles = null; - this.configuration = configuration; - this.seenBodyEndTag = 0; - this.nodeList = new Vector(); - } - - public IFile getIFile() { - return iFile; - } - - public Node newNode() - { - Node node = new Node(); - nodeList.addElement(node); - return node; - } - - public Node newNode(short type, byte[] textarray, int start, int end) - { - Node node = new Node(type, textarray, start, end); - nodeList.addElement(node); - return node; - } - - public Node newNode(short type, byte[] textarray, int start, int end, String element) - { - Node node = new Node(type, textarray, start, end, element, configuration.tt); - nodeList.addElement(node); - return node; - } - - public Node cloneNode(Node node) - { - Node cnode = (Node)node.clone(); - nodeList.addElement(cnode); - for (AttVal att = cnode.attributes; att != null; att = att.next) { - if (att.asp != null) - nodeList.addElement(att.asp); - if (att.php != null) - nodeList.addElement(att.php); - } - return cnode; - } - - public AttVal cloneAttributes(AttVal attrs) - { - AttVal cattrs = (AttVal)attrs.clone(); - for (AttVal att = cattrs; att != null; att = att.next) { - if (att.asp != null) - nodeList.addElement(att.asp); - if (att.php != null) - nodeList.addElement(att.php); - } - return cattrs; - } - - protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray) - { - Node node; - for (int i = 0; i < nodeList.size(); i++) { - node = (Node)(nodeList.elementAt(i)); - if (node.textarray == oldtextarray) - node.textarray = newtextarray; - } - } - - /* used for creating preformatted text from Word2000 */ - public Node newLineNode() - { - Node node = newNode(); - - node.textarray = this.lexbuf; - node.start = this.lexsize; - addCharToLexer((int)'\n'); - node.end = this.lexsize; - return node; - } - - // Should always be able convert to/from UTF-8, so encoding exceptions are - // converted to an Error to avoid adding throws declarations in - // lots of methods. - - public static byte[] getBytes(String str) { - try { - return str.getBytes("UTF8"); - } catch (java.io.UnsupportedEncodingException e) { - throw new Error("string to UTF-8 conversion failed: " + e.getMessage()); - } - } - - public static String getString(byte[] bytes, int offset, int length) { - try { - return new String(bytes, offset, length, "UTF8"); - } catch (java.io.UnsupportedEncodingException e) { - throw new Error("UTF-8 to string conversion failed: " + e.getMessage()); - } - } - - public boolean endOfInput() - { - return this.in.isEndOfStream(); - } - - public void addByte(int c) - { - if (this.lexsize + 1 >= this.lexlength) - { - while (this.lexsize + 1 >= this.lexlength) - { - if (this.lexlength == 0) - this.lexlength = 8192; - else - this.lexlength = this.lexlength * 2; - } - - byte[] temp = this.lexbuf; - this.lexbuf = new byte[ this.lexlength ]; - if (temp != null) - { - System.arraycopy( temp, 0, this.lexbuf, 0, temp.length ); - updateNodeTextArrays(temp, this.lexbuf); - } - } - - this.lexbuf[this.lexsize++] = (byte)c; - this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */ - } - - public void changeChar(byte c) - { - if (this.lexsize > 0) - { - this.lexbuf[this.lexsize-1] = c; - } - } - - /* store char c as UTF-8 encoded byte stream */ - public void addCharToLexer(int c) - { - if (c < 128) - addByte(c); - else if (c <= 0x7FF) - { - addByte(0xC0 | (c >> 6)); - addByte(0x80 | (c & 0x3F)); - } - else if (c <= 0xFFFF) - { - addByte(0xE0 | (c >> 12)); - addByte(0x80 | ((c >> 6) & 0x3F)); - addByte(0x80 | (c & 0x3F)); - } - else if (c <= 0x1FFFFF) - { - addByte(0xF0 | (c >> 18)); - addByte(0x80 | ((c >> 12) & 0x3F)); - addByte(0x80 | ((c >> 6) & 0x3F)); - addByte(0x80 | (c & 0x3F)); - } - else - { - addByte(0xF8 | (c >> 24)); - addByte(0x80 | ((c >> 18) & 0x3F)); - addByte(0x80 | ((c >> 12) & 0x3F)); - addByte(0x80 | ((c >> 6) & 0x3F)); - addByte(0x80 | (c & 0x3F)); - } - } - - public void addStringToLexer(String str) - { - for ( int i = 0; i < str.length(); i++ ) { - addCharToLexer( (int)str.charAt(i) ); - } - } - - /* - No longer attempts to insert missing ';' for unknown - enitities unless one was present already, since this - gives unexpected results. - - For example: - was tidied to: - rather than: - - My thanks for Maurice Buxton for spotting this. - */ - public void parseEntity(short mode) - { - short map; - int start; - boolean first = true; - boolean semicolon = false; - boolean numeric = false; - int c, ch, startcol; - String str; - - start = this.lexsize - 1; /* to start at "&" */ - startcol = this.in.curcol - 1; - - while (true) - { - c = this.in.readChar(); - if (c == StreamIn.EndOfStream) break; - if (c == ';') - { - semicolon = true; - break; - } - - if (first && c == '#') - { - addCharToLexer(c); - first = false; - numeric = true; - continue; - } - - first = false; - map = MAP((char)c); - - /* AQ: Added flag for numeric entities so that numeric entities - with missing semi-colons are recognized. - Eg. "rep..." is recognized as "rep" - */ - if (numeric && ((c == 'x') || ((map & DIGIT) != 0))) - { - addCharToLexer(c); - continue; - } - if (!numeric && ((map & NAMECHAR) != 0)) - { - addCharToLexer(c); - continue; - } - - /* otherwise put it back */ - - this.in.ungetChar(c); - break; - } - - str = getString( this.lexbuf, start, this.lexsize - start ); - ch = EntityTable.getDefaultEntityTable().entityCode( str ); - - /* deal with unrecognized entities */ - if (ch <= 0) - { - /* set error position just before offending chararcter */ - this.lines = this.in.curline; - this.columns = startcol; - - if (this.lexsize > start +1 ) - { - Report.entityError(this, Report.UNKNOWN_ENTITY, str, ch); - - if (semicolon) - addCharToLexer(';'); - } - else /* naked & */ - { - Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch); - } - } - else - { - if (c != ';') /* issue warning if not terminated by ';' */ - { - /* set error position just before offending chararcter */ - this.lines = this.in.curline; - this.columns = startcol; - Report.entityError(this, Report.MISSING_SEMICOLON, str, c); - } - - this.lexsize = start; - - if (ch == 160 && (mode & Preformatted) != 0) - ch = ' '; - - addCharToLexer(ch); - - if (ch == '&' && !this.configuration.QuoteAmpersand) - { - addCharToLexer('a'); - addCharToLexer('m'); - addCharToLexer('p'); - addCharToLexer(';'); - } - } - } - - public char parseTagName() - { - short map; - int c; - - /* fold case of first char in buffer */ - - c = this.lexbuf[this.txtstart]; - map = MAP((char)c); - - if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) - { - c += (int)((int)'a' - (int)'A'); - this.lexbuf[this.txtstart] = (byte)c; - } - - while (true) - { - c = this.in.readChar(); - if (c == StreamIn.EndOfStream) break; - map = MAP((char)c); - - if ((map & NAMECHAR) == 0) - break; - - /* fold case of subsequent chars */ - - if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) - c += (int)((int)'a' - (int)'A'); - - addCharToLexer(c); - } - - this.txtend = this.lexsize; - return (char)c; - } - - public void addStringLiteral(String str) - { - for ( int i = 0; i < str.length(); i++ ) { - addCharToLexer( (int)str.charAt(i) ); - } - } - - /* choose what version to use for new doctype */ - public short HTMLVersion() - { - short versions; - - versions = this.versions; - - if ((versions & Dict.VERS_HTML20) != 0) - return Dict.VERS_HTML20; - - if ((versions & Dict.VERS_HTML32) != 0) - return Dict.VERS_HTML32; - - if ((versions & Dict.VERS_HTML40_STRICT) != 0) - return Dict.VERS_HTML40_STRICT; - - if ((versions & Dict.VERS_HTML40_LOOSE) != 0) - return Dict.VERS_HTML40_LOOSE; - - if ((versions & Dict.VERS_FRAMES) != 0) - return Dict.VERS_FRAMES; - - return Dict.VERS_UNKNOWN; - } - - public String HTMLVersionName() - { - short guessed; - int j; - - guessed = apparentVersion(); - - for (j = 0; j < W3CVersion.length; ++j) - { - if (guessed == W3CVersion[j].code) - { - if (this.isvoyager) - return W3CVersion[j].voyagerName; - - return W3CVersion[j].name; - } - } - - return null; - } - - /* add meta element for Tidy */ - public boolean addGenerator(Node root) - { - AttVal attval; - Node node; - Node head = root.findHEAD(configuration.tt); - - if (head != null) - { - for (node = head.content; node != null; node = node.next) - { - if (node.tag == configuration.tt.tagMeta) - { - attval = node.getAttrByName("name"); - - if (attval != null && attval.value != null && - Lexer.wstrcasecmp(attval.value, "generator") == 0) - { - attval = node.getAttrByName("content"); - - if (attval != null && attval.value != null && - attval.value.length() >= 9 && - Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0) - { - return false; - } - } - } - } - - node = this.inferredTag("meta"); - node.addAttribute("content", "HTML Tidy, see www.w3.org"); - node.addAttribute("name", "generator"); - Node.insertNodeAtStart(head, node); - return true; - } - - return false; - } - - /* return true if substring s is in p and isn't all in upper case */ - /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */ - /* len is how many chars to check in p */ - private static boolean findBadSubString(String s, String p, int len) - { - int n = s.length(); - int i = 0; - String ps; - - while (n < len) - { - ps = p.substring(i, i + n); - if (wstrcasecmp(s, ps) == 0) - return (!ps.equals(s.substring(0, n))); - - ++i; - --len; - } - - return false; - } - - public boolean checkDocTypeKeyWords(Node doctype) - { - int len = doctype.end - doctype.start; - String s = getString(this.lexbuf, doctype.start, len); - - return !( - findBadSubString("SYSTEM", s, len) || - findBadSubString("PUBLIC", s, len) || - findBadSubString("//DTD", s, len) || - findBadSubString("//W3C", s, len) || - findBadSubString("//EN", s, len) - ); - } - - /* examine to identify version */ - public short findGivenVersion(Node doctype) - { - String p, s; - int i, j; - int len; - String str1; - String str2; - - /* if root tag for doctype isn't html give up now */ - str1 = getString(this.lexbuf, doctype.start, 5); - if (wstrcasecmp(str1, "html ") != 0) - return 0; - - if (!checkDocTypeKeyWords(doctype)) - Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE); - - /* give up if all we are given is the system id for the doctype */ - str1 = getString(this.lexbuf, doctype.start + 5, 7); - if (wstrcasecmp(str1, "SYSTEM ") == 0) - { - /* but at least ensure the case is correct */ - if (!str1.substring(0, 6).equals("SYSTEM")) - System.arraycopy( getBytes("SYSTEM"), 0, - this.lexbuf, doctype.start + 5, 6 ); - return 0; /* unrecognized */ - } - - if (wstrcasecmp(str1, "PUBLIC ") == 0) - { - if (!str1.substring(0, 6).equals("PUBLIC")) - System.arraycopy( getBytes("PUBLIC "), 0, - this.lexbuf, doctype.start + 5, 6 ); - } - else - this.badDoctype = true; - - for (i = doctype.start; i < doctype.end; ++i) - { - if (this.lexbuf[i] == (byte)'"') - { - str1 = getString( this.lexbuf, i + 1, 12 ); - str2 = getString( this.lexbuf, i + 1, 13 ); - if (str1.equals("-//W3C//DTD ")) - { - /* compute length of identifier e.g. "HTML 4.0 Transitional" */ - for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j); - len = j - i - 13; - p = getString( this.lexbuf, i + 13, len ); - - for (j = 1; j < W3CVersion.length; ++j) - { - s = W3CVersion[j].name; - if (len == s.length() && s.equals(p)) - return W3CVersion[j].code; - } - - /* else unrecognized version */ - } - else if (str2.equals("-//IETF//DTD ")) - { - /* compute length of identifier e.g. "HTML 2.0" */ - for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j); - len = j - i - 14; - - p = getString( this.lexbuf, i + 14, len ); - s = W3CVersion[0].name; - if (len == s.length() && s.equals(p)) - return W3CVersion[0].code; - - /* else unrecognized version */ - } - break; - } - } - - return 0; - } - - public void fixHTMLNameSpace(Node root, String profile) - { - Node node; - AttVal prev, attr; - - for (node = root.content; - node != null && node.tag != configuration.tt.tagHtml; node = node.next); - - if (node != null) - { - prev = null; - - for (attr = node.attributes; attr != null; attr = attr.next) - { - if (attr.attribute.equals("xmlns")) - break; - - prev = attr; - } - - if (attr != null) - { - if (!attr.value.equals(profile)) - { - Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE); - attr.value = profile; - } - } - else - { - attr = new AttVal( node.attributes, null, (int)'"', - "xmlns", profile ); - attr.dict = - AttributeTable.getDefaultAttributeTable().findAttribute( attr ); - node.attributes = attr; - } - } - } - - public boolean setXHTMLDocType(Node root) - { - String fpi = " "; - String sysid = ""; - String namespace = XHTML_NAMESPACE; - Node doctype; - - doctype = root.findDocType(); - - if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) - { - if (doctype != null) - Node.discardElement(doctype); - return true; - } - - if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) - { - /* see what flavor of XHTML this document matches */ - if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) - { /* use XHTML strict */ - fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; - sysid = voyager_strict; - } - else if ((this.versions & Dict.VERS_LOOSE) != 0) - { - fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; - sysid = voyager_loose; - } - else if ((this.versions & Dict.VERS_FRAMES) != 0) - { /* use XHTML frames */ - fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN"; - sysid = voyager_frameset; - } - else /* lets assume XHTML transitional */ - { - fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; - sysid = voyager_loose; - } - } - else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) - { - fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; - sysid = voyager_strict; - } - else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) - { - fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; - sysid = voyager_loose; - } - - fixHTMLNameSpace(root, namespace); - - if (doctype == null) - { - doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0); - doctype.next = root.content; - doctype.parent = root; - doctype.prev = null; - root.content = doctype; - } - - if (configuration.docTypeMode == Configuration.DOCTYPE_USER && - configuration.docTypeStr != null) - { - fpi = configuration.docTypeStr; - sysid = ""; - } - - this.txtstart = this.lexsize; - this.txtend = this.lexsize; - - /* add public identifier */ - addStringLiteral("html PUBLIC "); - - /* check if the fpi is quoted or not */ - if (fpi.charAt(0) == '"') - addStringLiteral(fpi); - else - { - addStringLiteral("\""); - addStringLiteral(fpi); - addStringLiteral("\""); - } - - if (sysid.length() + 6 >= this.configuration.wraplen) - addStringLiteral("\n\""); - else - addStringLiteral("\n \""); - - /* add system identifier */ - addStringLiteral(sysid); - addStringLiteral("\""); - - this.txtend = this.lexsize; - - doctype.start = this.txtstart; - doctype.end = this.txtend; - - return false; - } - - public short apparentVersion() - { - switch (this.doctype) - { - case Dict.VERS_UNKNOWN: - return HTMLVersion(); - - case Dict.VERS_HTML20: - if ((this.versions & Dict.VERS_HTML20) != 0) - return Dict.VERS_HTML20; - - break; - - case Dict.VERS_HTML32: - if ((this.versions & Dict.VERS_HTML32) != 0) - return Dict.VERS_HTML32; - - break; /* to replace old version by new */ - - case Dict.VERS_HTML40_STRICT: - if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) - return Dict.VERS_HTML40_STRICT; - - break; - - case Dict.VERS_HTML40_LOOSE: - if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0) - return Dict.VERS_HTML40_LOOSE; - - break; /* to replace old version by new */ - - case Dict.VERS_FRAMES: - if ((this.versions & Dict.VERS_FRAMES) != 0) - return Dict.VERS_FRAMES; - - break; - } - - Report.warning(this, null, null, Report.INCONSISTENT_VERSION); - return this.HTMLVersion(); - } - - /* fixup doctype if missing */ - public boolean fixDocType(Node root) - { - Node doctype; - int guessed = Dict.VERS_HTML40_STRICT, i; - - if (this.badDoctype) - Report.warning(this, null, null, Report.MALFORMED_DOCTYPE); - - if (configuration.XmlOut) - return true; - - doctype = root.findDocType(); - - if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) - { - if (doctype != null) - Node.discardElement(doctype); - return true; - } - - if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) - { - Node.discardElement(doctype); - doctype = null; - guessed = Dict.VERS_HTML40_STRICT; - } - else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) - { - Node.discardElement(doctype); - doctype = null; - guessed = Dict.VERS_HTML40_LOOSE; - } - else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) - { - if (doctype != null) - { - if (this.doctype == Dict.VERS_UNKNOWN) - return false; - - switch (this.doctype) - { - case Dict.VERS_UNKNOWN: - return false; - - case Dict.VERS_HTML20: - if ((this.versions & Dict.VERS_HTML20) != 0) - return true; - - break; /* to replace old version by new */ - - case Dict.VERS_HTML32: - if ((this.versions & Dict.VERS_HTML32) != 0) - return true; - - break; /* to replace old version by new */ - - case Dict.VERS_HTML40_STRICT: - if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) - return true; - - break; /* to replace old version by new */ - - case Dict.VERS_HTML40_LOOSE: - if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0) - return true; - - break; /* to replace old version by new */ - - case Dict.VERS_FRAMES: - if ((this.versions & Dict.VERS_FRAMES) != 0) - return true; - - break; /* to replace old version by new */ - } - - /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */ - } - - /* choose new doctype */ - guessed = HTMLVersion(); - } - - if (guessed == Dict.VERS_UNKNOWN) - return false; - - /* for XML use the Voyager system identifier */ - if (this.configuration.XmlOut || this.configuration.XmlTags || this.isvoyager) - { - if (doctype != null) - Node.discardElement(doctype); - - for (i = 0; i < W3CVersion.length; ++i) - { - if (guessed == W3CVersion[i].code) - { - fixHTMLNameSpace(root, W3CVersion[i].profile); - break; - } - } - - return true; - } - - if (doctype == null) - { - doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0); - doctype.next = root.content; - doctype.parent = root; - doctype.prev = null; - root.content = doctype; - } - - this.txtstart = this.lexsize; - this.txtend = this.lexsize; - - /* use the appropriate public identifier */ - addStringLiteral("html PUBLIC "); - - if (configuration.docTypeMode == Configuration.DOCTYPE_USER && - configuration.docTypeStr != null) - addStringLiteral(configuration.docTypeStr); - else if (guessed == Dict.VERS_HTML20) - addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\""); - else - { - addStringLiteral("\"-//W3C//DTD "); - - for (i = 0; i < W3CVersion.length; ++i) - { - if (guessed == W3CVersion[i].code) - { - addStringLiteral(W3CVersion[i].name); - break; - } - } - - addStringLiteral("//EN\""); - } - - this.txtend = this.lexsize; - - doctype.start = this.txtstart; - doctype.end = this.txtend; - - return true; - } - - /* ensure XML document starts with */ - public boolean fixXMLPI(Node root) - { - Node xml; - int s; - - if( root.content != null && root.content.type == Node.ProcInsTag) - { - s = root.content.start; - - if (this.lexbuf[s] == (byte)'x' && - this.lexbuf[s+1] == (byte)'m' && - this.lexbuf[s+2] == (byte)'l') - return true; - } - - xml = newNode(Node.ProcInsTag, this.lexbuf, 0, 0); - xml.next = root.content; - - if (root.content != null) - { - root.content.prev = xml; - xml.next = root.content; - } - - root.content = xml; - - this.txtstart = this.lexsize; - this.txtend = this.lexsize; - addStringLiteral("xml version=\"1.0\""); - if (this.configuration.CharEncoding == Configuration.LATIN1) - addStringLiteral(" encoding=\"ISO-8859-1\""); - this.txtend = this.lexsize; - - xml.start = this.txtstart; - xml.end = this.txtend; - return false; - } - - public Node inferredTag(String name) - { - Node node; - - node = newNode(Node.StartTag, - this.lexbuf, - this.txtstart, - this.txtend, - name); - node.implicit = true; - return node; - } - - public static boolean expectsContent(Node node) - { - if (node.type != Node.StartTag) - return false; - - /* unknown element? */ - if (node.tag == null) - return true; - - if ((node.tag.model & Dict.CM_EMPTY) != 0) - return false; - - return true; - } - - /* - create a text node for the contents of - a CDATA element like style or script - which ends with for some foo. - */ - public Node getCDATA(Node container) - { - int c, lastc, start, len, i; - String str; - boolean endtag = false; - - this.lines = this.in.curline; - this.columns = this.in.curcol; - this.waswhite = false; - this.txtstart = this.lexsize; - this.txtend = this.lexsize; - - lastc = (int)'\0'; - start = -1; - - while (true) - { - c = this.in.readChar(); - if (c == StreamIn.EndOfStream) break; - /* treat \r\n as \n and \r as \n */ - - if (c == (int)'/' && lastc == (int)'<') - { - if (endtag) - { - this.lines = this.in.curline; - this.columns = this.in.curcol - 3; - - Report.warning(this, null, null, Report.BAD_CDATA_CONTENT); - } - - start = this.lexsize + 1; /* to first letter */ - endtag = true; - } - else if (c == (int)'>' && start >= 0) - { - len = this.lexsize - start; - if (len == container.element.length()) - { - str = getString( this.lexbuf, start, len ); - if (Lexer.wstrcasecmp(str, container.element) == 0) - { - this.txtend = start - 2; - break; - } - } - - this.lines = this.in.curline; - this.columns = this.in.curcol - 3; - - Report.warning(this, null, null, Report.BAD_CDATA_CONTENT); - - /* if javascript insert backslash before / */ - - if (ParserImpl.isJavaScript(container)) - { - for (i = this.lexsize; i > start-1; --i) - this.lexbuf[i] = this.lexbuf[i-1]; - - this.lexbuf[start-1] = (byte)'\\'; - this.lexsize++; - } - - start = -1; - } - else if (c == (int)'\r') - { - c = this.in.readChar(); - - if (c != (int)'\n') - this.in.ungetChar(c); - - c = (int)'\n'; - } - - addCharToLexer((int)c); - this.txtend = this.lexsize; - lastc = c; - } - - if (c == StreamIn.EndOfStream) - Report.warning(this, container, null, Report.MISSING_ENDTAG_FOR); - - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - return null; - } - - public void ungetToken() - { - this.pushed = true; - } - - public static final short IgnoreWhitespace = 0; - public static final short MixedContent = 1; - public static final short Preformatted = 2; - public static final short IgnoreMarkup = 3; - - /* - modes for GetToken() - - MixedContent -- for elements which don't accept PCDATA - Preformatted -- white space preserved as is - IgnoreMarkup -- for CDATA elements such as script, style - */ - - public Node getToken(short mode) - { - short map; - int c = 0; - int lastc; - int badcomment = 0; - MutableBoolean isempty = new MutableBoolean(); - AttVal attributes; - - if (this.pushed) - { - /* duplicate inlines in preference to pushed text nodes when appropriate */ - if (this.token.type != Node.TextNode || - (this.insert == -1 && this.inode == null)) - { - this.pushed = false; - return this.token; - } - } - - /* at start of block elements, unclosed inline - elements are inserted into the token stream */ - - if (this.insert != -1 || this.inode != null) - return insertedToken(); - - this.lines = this.in.curline; - this.columns = this.in.curcol; - this.waswhite = false; - - this.txtstart = this.lexsize; - this.txtend = this.lexsize; - - while (true) - { - c = this.in.readChar(); - if (c == StreamIn.EndOfStream) break; - if (this.insertspace && mode != IgnoreWhitespace) - { - addCharToLexer(' '); - this.waswhite = true; - this.insertspace = false; - } - - /* treat \r\n as \n and \r as \n */ - - if (c == '\r') - { - c = this.in.readChar(); - - if (c != '\n') - this.in.ungetChar(c); - - c = '\n'; - } - - addCharToLexer(c); - - switch (this.state) - { - case LEX_CONTENT: /* element content */ - map = MAP((char)c); - - /* - Discard white space if appropriate. Its cheaper - to do this here rather than in parser methods - for elements that don't have mixed content. - */ - if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) - && this.lexsize == this.txtstart + 1) - { - --this.lexsize; - this.waswhite = false; - this.lines = this.in.curline; - this.columns = this.in.curcol; - continue; - } - - if (c == '<') - { - this.state = LEX_GT; - continue; - } - - if ((map & WHITE) != 0) - { - /* was previous char white? */ - if (this.waswhite) - { - if (mode != Preformatted && mode != IgnoreMarkup) - { - --this.lexsize; - this.lines = this.in.curline; - this.columns = this.in.curcol; - } - } - else /* prev char wasn't white */ - { - this.waswhite = true; - lastc = c; - - if (mode != Preformatted && mode != IgnoreMarkup && c != ' ') - changeChar((byte)' '); - } - - continue; - } - else if (c == '&' && mode != IgnoreMarkup) - parseEntity(mode); - - /* this is needed to avoid trimming trailing whitespace */ - if (mode == IgnoreWhitespace) - mode = MixedContent; - - this.waswhite = false; - continue; - - case LEX_GT: /* < */ - - /* check for endtag */ - if (c == '/') - { - c = this.in.readChar(); - if (c == StreamIn.EndOfStream) - { - this.in.ungetChar(c); - continue; - } - - addCharToLexer(c); - map = MAP((char)c); - - if ((map & LETTER) != 0) - { - this.lexsize -= 3; - this.txtend = this.lexsize; - this.in.ungetChar(c); - this.state = LEX_ENDTAG; - this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */ - this.in.curcol -= 2; - - /* if some text before the this.txtstart) - { - /* trim space char before end tag */ - if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ') - { - this.lexsize -= 1; - this.txtend = this.lexsize; - } - - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - continue; /* no text so keep going */ - } - - /* otherwise treat as CDATA */ - this.waswhite = false; - this.state = LEX_CONTENT; - continue; - } - - if (mode == IgnoreMarkup) - { - /* otherwise treat as CDATA */ - this.waswhite = false; - this.state = LEX_CONTENT; - continue; - } - - /* - look out for comments, doctype or marked sections - this isn't quite right, but its getting there ... - */ - if (c == '!') - { - c = this.in.readChar(); - - if (c == '-') - { - c = this.in.readChar(); - - if (c == '-') - { - this.state = LEX_COMMENT; /* comment */ - this.lexsize -= 2; - this.txtend = this.lexsize; - - /* if some text before < return it now */ - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - this.txtstart = this.lexsize; - continue; - } - - Report.warning(this, null, null, Report.MALFORMED_COMMENT); - } - else if (c == 'd' || c == 'D') - { - this.state = LEX_DOCTYPE; /* doctype */ - this.lexsize -= 2; - this.txtend = this.lexsize; - mode = IgnoreWhitespace; - - /* skip until white space or '>' */ - - for (;;) - { - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream || c == '>') - { - this.in.ungetChar(c); - break; - } - - map = MAP((char)c); - - if ((map & WHITE) == 0) - continue; - - /* and skip to end of whitespace */ - - for (;;) - { - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream || c == '>') - { - this.in.ungetChar(c); - break; - } - - map = MAP((char)c); - - if ((map & WHITE) != 0) - continue; - - this.in.ungetChar(c); - break; - } - - break; - } - - /* if some text before < return it now */ - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - this.txtstart = this.lexsize; - continue; - } - else if (c == '[') - { - /* Word 2000 embeds ... sequences */ - this.lexsize -= 2; - this.state = LEX_SECTION; - this.txtend = this.lexsize; - - /* if some text before < return it now */ - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - this.txtstart = this.lexsize; - continue; - } - - /* otherwise swallow chars up to and including next '>' */ - while (true) - { - c = this.in.readChar(); - if (c == '>') break; - if (c == -1) - { - this.in.ungetChar(c); - break; - } - } - - this.lexsize -= 2; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - continue; - } - - /* - processing instructions - */ - - if (c == '?') - { - this.lexsize -= 2; - this.state = LEX_PROCINSTR; - this.txtend = this.lexsize; - - /* if some text before < return it now */ - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - this.txtstart = this.lexsize; - continue; - } - - /* Microsoft ASP's e.g. <% ... server-code ... %> */ - if (c == '%') - { - this.lexsize -= 2; - this.state = LEX_ASP; - this.txtend = this.lexsize; - - /* if some text before < return it now */ - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - this.txtstart = this.lexsize; - continue; - } - - /* Netscapes JSTE e.g. <# ... server-code ... #> */ - if (c == '#') - { - this.lexsize -= 2; - this.state = LEX_JSTE; - this.txtend = this.lexsize; - - /* if some text before < return it now */ - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - this.txtstart = this.lexsize; - continue; - } - - map = MAP((char)c); - - /* check for start tag */ - if ((map & LETTER) != 0) - { - this.in.ungetChar(c); /* push back letter */ - this.lexsize -= 2; /* discard "<" + letter */ - this.txtend = this.lexsize; - this.state = LEX_STARTTAG; /* ready to read tag name */ - - /* if some text before < return it now */ - if (this.txtend > this.txtstart) - { - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - continue; /* no text so keep going */ - } - - /* otherwise treat as CDATA */ - this.state = LEX_CONTENT; - this.waswhite = false; - continue; - - case LEX_ENDTAG: /* ' */ - while (c != '>') - { - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream) - break; - } - - if (c == StreamIn.EndOfStream) - { - this.in.ungetChar(c); - continue; - } - - this.state = LEX_CONTENT; - this.waswhite = false; - return this.token; /* the endtag token */ - - case LEX_STARTTAG: /* first letter of tagname */ - this.txtstart = this.lexsize - 1; /* set txtstart to first letter */ - c = parseTagName(); - isempty.value = false; - attributes = null; - this.token = newNode((isempty.value ? Node.StartEndTag : Node.StartTag), - this.lexbuf, - this.txtstart, - this.txtend, - getString(this.lexbuf, - this.txtstart, - this.txtend - this.txtstart)); - - /* parse attributes, consuming closing ">" */ - if (c != '>') - { - if (c == '/') - this.in.ungetChar(c); - - attributes = parseAttrs(isempty); - } - - if (isempty.value) - this.token.type = Node.StartEndTag; - - this.token.attributes = attributes; - this.lexsize = this.txtstart; - this.txtend = this.txtstart; - - /* swallow newline following start tag */ - /* special check needed for CRLF sequence */ - /* this doesn't apply to empty elements */ - - if (expectsContent(this.token) || - this.token.tag == configuration.tt.tagBr) - { - - c = this.in.readChar(); - - if (c == '\r') - { - c = this.in.readChar(); - - if (c != '\n') - this.in.ungetChar(c); - } - else if (c != '\n' && c != '\f') - this.in.ungetChar(c); - - this.waswhite = true; /* to swallow leading whitespace */ - } - else - this.waswhite = false; - - this.state = LEX_CONTENT; - - if (this.token.tag == null) - Report.error(this, null, this.token, Report.UNKNOWN_ELEMENT); - else if (!this.configuration.XmlTags) - { - this.versions &= this.token.tag.versions; - - if ((this.token.tag.versions & Dict.VERS_PROPRIETARY) != 0) - { - if (!this.configuration.MakeClean && (this.token.tag == configuration.tt.tagNobr || - this.token.tag == configuration.tt.tagWbr)) - Report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT); - } - - if (this.token.tag.chkattrs != null) - { - this.token.checkUniqueAttributes(this); - this.token.tag.chkattrs.check(this, this.token); - } - else - this.token.checkAttributes(this); - } - - return this.token; /* return start tag */ - - case LEX_COMMENT: /* seen */ - - if (c != '-') - continue; - - c = this.in.readChar(); - addCharToLexer(c); - - if (c != '-') - continue; - - end_comment: while (true) { - c = this.in.readChar(); - - if (c == '>') - { - if (badcomment != 0) - Report.warning(this, null, null, Report.MALFORMED_COMMENT); - - this.txtend = this.lexsize - 2; // AQ 8Jul2000 - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.CommentTag, - this.lexbuf, - this.txtstart, - this.txtend); - - /* now look for a line break */ - - c = this.in.readChar(); - - if (c == '\r') - { - c = this.in.readChar(); - - if (c != '\n') - this.token.linebreak = true; - } - - if (c == '\n') - this.token.linebreak = true; - else - this.in.ungetChar(c); - - return this.token; - } - - /* note position of first such error in the comment */ - if (badcomment == 0) - { - this.lines = this.in.curline; - this.columns = this.in.curcol - 3; - } - - badcomment++; - if (this.configuration.FixComments) - this.lexbuf[this.lexsize - 2] = (byte)'='; - - addCharToLexer(c); - - /* if '-' then look for '>' to end the comment */ - if (c != '-') - break end_comment; - - } - /* otherwise continue to look for --> */ - this.lexbuf[this.lexsize - 2] = (byte)'='; - continue; - - case LEX_DOCTYPE: /* seen ' munging whitespace */ - map = MAP((char)c); - - if ((map & WHITE) != 0) - { - if (this.waswhite) - this.lexsize -= 1; - - this.waswhite = true; - } - else - this.waswhite = false; - - if (c != '>') - continue; - - this.lexsize -= 1; - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.DocTypeTag, - this.lexbuf, - this.txtstart, - this.txtend); - /* make a note of the version named by the doctype */ - this.doctype = findGivenVersion(this.token); - return this.token; - - case LEX_PROCINSTR: /* seen ' */ - /* check for PHP preprocessor instructions */ - - if (this.lexsize - this.txtstart == 3) - { - if ((getString(this.lexbuf, this.txtstart, 3)).equals("php")) - { - this.state = LEX_PHP; - continue; - } - } - - if (this.configuration.XmlPIs) /* insist on ?> as terminator */ - { - if (c != '?') - continue; - - /* now look for '>' */ - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream) - { - Report.warning(this, null, null, Report.UNEXPECTED_END_OF_FILE); - this.in.ungetChar(c); - continue; - } - - addCharToLexer(c); - } - - if (c != '>') - continue; - - this.lexsize -= 1; - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.ProcInsTag, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - - case LEX_ASP: /* seen <% so look for "%>" */ - if (c != '%') - continue; - - /* now look for '>' */ - c = this.in.readChar(); - - - if (c != '>') - { - this.in.ungetChar(c); - continue; - } - - this.lexsize -= 1; - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.AspTag, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - - case LEX_JSTE: /* seen <# so look for "#>" */ - if (c != '#') - continue; - - /* now look for '>' */ - c = this.in.readChar(); - - - if (c != '>') - { - this.in.ungetChar(c); - continue; - } - - this.lexsize -= 1; - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.JsteTag, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - - case LEX_PHP: /* seen "" */ - if (c != '?') - continue; - - /* now look for '>' */ - c = this.in.readChar(); - - if (c != '>') - { - this.in.ungetChar(c); - continue; - } - - this.lexsize -= 1; - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.PhpTag, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - - case LEX_SECTION: /* seen "" */ - if (c == '[') - { - if (this.lexsize == (this.txtstart + 6) && - (getString(this.lexbuf, this.txtstart, 6)).equals("CDATA[")) - { - this.state = LEX_CDATA; - this.lexsize -= 6; - continue; - } - } - - if (c != ']') - continue; - - /* now look for '>' */ - c = this.in.readChar(); - - if (c != '>') - { - this.in.ungetChar(c); - continue; - } - - this.lexsize -= 1; - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.SectionTag, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - - case LEX_CDATA: /* seen "" */ - if (c != ']') - continue; - - /* now look for ']' */ - c = this.in.readChar(); - - if (c != ']') - { - this.in.ungetChar(c); - continue; - } - - /* now look for '>' */ - c = this.in.readChar(); - - if (c != '>') - { - this.in.ungetChar(c); - continue; - } - - this.lexsize -= 1; - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.CDATATag, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - } - - if (this.state == LEX_CONTENT) /* text string */ - { - this.txtend = this.lexsize; - - if (this.txtend > this.txtstart) - { - this.in.ungetChar(c); - - if (this.lexbuf[this.lexsize - 1] == (byte)' ') - { - this.lexsize -= 1; - this.txtend = this.lexsize; - } - - this.token = newNode(Node.TextNode, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - } - else if (this.state == LEX_COMMENT) /* comment */ - { - if (c == StreamIn.EndOfStream) - Report.warning(this, null, null, Report.MALFORMED_COMMENT); - - this.txtend = this.lexsize; - this.lexbuf[this.lexsize] = (byte)'\0'; - this.state = LEX_CONTENT; - this.waswhite = false; - this.token = newNode(Node.CommentTag, - this.lexbuf, - this.txtstart, - this.txtend); - return this.token; - } - - return null; - } - - /* - parser for ASP within start tags - - Some people use ASP for to customize attributes - Tidy isn't really well suited to dealing with ASP - This is a workaround for attributes, but won't - deal with the case where the ASP is used to tailor - the attribute value. Here is an example of a work - around for using ASP in attribute values: - - href="<%=rsSchool.Fields("ID").Value%>" - - where the ASP that generates the attribute value - is masked from Tidy by the quotemarks. - - */ - - public Node parseAsp() - { - int c; - Node asp = null; - - this.txtstart = this.lexsize; - - for (;;) - { - c = this.in.readChar(); - addCharToLexer(c); - - - if (c != '%') - continue; - - c = this.in.readChar(); - addCharToLexer(c); - - if (c == '>') - break; - } - - this.lexsize -= 2; - this.txtend = this.lexsize; - - if (this.txtend > this.txtstart) - asp = newNode(Node.AspTag, - this.lexbuf, - this.txtstart, - this.txtend); - - this.txtstart = this.txtend; - return asp; - } - - /* - PHP is like ASP but is based upon XML - processing instructions, e.g. - */ - public Node parsePhp() - { - int c; - Node php = null; - - this.txtstart = this.lexsize; - - for (;;) - { - c = this.in.readChar(); - addCharToLexer(c); - - - if (c != '?') - continue; - - c = this.in.readChar(); - addCharToLexer(c); - - if (c == '>') - break; - } - - this.lexsize -= 2; - this.txtend = this.lexsize; - - if (this.txtend > this.txtstart) - php = newNode(Node.PhpTag, - this.lexbuf, - this.txtstart, - this.txtend); - - this.txtstart = this.txtend; - return php; - } - - /* consumes the '>' terminating start tags */ - public String parseAttribute(MutableBoolean isempty, MutableObject asp, - MutableObject php) - { - int start = 0; - // int len = 0; Removed by BUGFIX for 126265 - short map; - String attr; - int c = 0; - - asp.setObject(null); /* clear asp pointer */ - php.setObject(null); /* clear php pointer */ - /* skip white space before the attribute */ - - for (;;) - { - c = this.in.readChar(); - - if (c == '/') - { - c = this.in.readChar(); - - if (c == '>') - { - isempty.value = true; - return null; - } - - this.in.ungetChar(c); - c = '/'; - break; - } - - if (c == '>') - return null; - - if (c =='<') - { - c = this.in.readChar(); - - if (c == '%') - { - asp.setObject(parseAsp()); - return null; - } - else if (c == '?') - { - php.setObject(parsePhp()); - return null; - } - - this.in.ungetChar(c); - Report.attrError(this, this.token, null, Report.UNEXPECTED_GT); - return null; - } - - if (c == '"' || c == '\'') - { - Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); - continue; - } - - if (c == StreamIn.EndOfStream) - { - Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); - this.in.ungetChar(c); - return null; - } - - map = MAP((char)c); - - if ((map & WHITE) == 0) - break; - } - - start = this.lexsize; - - for (;;) - { - /* but push back '=' for parseValue() */ - if (c == '=' || c == '>') - { - this.in.ungetChar(c); - break; - } - - if (c == '<' || c == StreamIn.EndOfStream) - { - this.in.ungetChar(c); - break; - } - - map = MAP((char)c); - - if ((map & WHITE) != 0) - break; - - /* what should be done about non-namechar characters? */ - /* currently these are incorporated into the attr name */ - - if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) - c += (int)('a' - 'A'); - - // ++len; Removed by BUGFIX for 126265 - addCharToLexer(c); - - c = this.in.readChar(); - } - - // Following line added by GLP to fix BUG 126265. This is a temporary comment - // and should be removed when Tidy is fixed. - int len = this.lexsize - start; - attr = (len > 0 ? getString(this.lexbuf, start, len) : null); - this.lexsize = start; - - return attr; - } - - /* - invoked when < is seen in place of attribute value - but terminates on whitespace if not ASP, PHP or Tango - this routine recognizes ' and " quoted strings - */ - public int parseServerInstruction() - { - int c, map, delim = '"'; - boolean isrule = false; - - c = this.in.readChar(); - addCharToLexer(c); - - /* check for ASP, PHP or Tango */ - if (c == '%' || c == '?' || c == '@') - isrule = true; - - for (;;) - { - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream) - break; - - if (c == '>') - { - if (isrule) - addCharToLexer(c); - else - this.in.ungetChar(c); - - break; - } - - /* if not recognized as ASP, PHP or Tango */ - /* then also finish value on whitespace */ - if (!isrule) - { - map = MAP((char)c); - - if ((map & WHITE) != 0) - break; - } - - addCharToLexer(c); - - if (c == '"') - { - do - { - c = this.in.readChar(); - addCharToLexer(c); - } - while (c != '"'); - delim = '\''; - continue; - } - - if (c == '\'') - { - do - { - c = this.in.readChar(); - addCharToLexer(c); - } - while (c != '\''); - } - } - - return delim; - } - - /* values start with "=" or " = " etc. */ - /* doesn't consume the ">" at end of start tag */ - - public String parseValue(String name, boolean foldCase, - MutableBoolean isempty, MutableInteger pdelim) - { - int len = 0; - int start; - short map; - boolean seen_gt = false; - boolean munge = true; - int c = 0; - int lastc, delim, quotewarning; - String value; - - delim = 0; - pdelim.value = (int)'"'; - - /* - Henry Zrepa reports that some folk are using the - embed element with script attributes where newlines - are significant and must be preserved - */ - if (configuration.LiteralAttribs) - munge = false; - - /* skip white space before the '=' */ - - for (;;) - { - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream) - { - this.in.ungetChar(c); - break; - } - - map = MAP((char)c); - - if ((map & WHITE) == 0) - break; - } - - /* - c should be '=' if there is a value - other legal possibilities are white - space, '/' and '>' - */ - - if (c != '=') - { - this.in.ungetChar(c); - return null; - } - - /* skip white space after '=' */ - - for (;;) - { - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream) - { - this.in.ungetChar(c); - break; - } - - map = MAP((char)c); - - if ((map & WHITE) == 0) - break; - } - - /* check for quote marks */ - - if (c == '"' || c == '\'') - delim = c; - else if (c == '<') - { - start = this.lexsize; - addCharToLexer(c); - pdelim.value = parseServerInstruction(); - len = this.lexsize - start; - this.lexsize = start; - return (len > 0 ? getString(this.lexbuf, start, len) : null); - } - else - this.in.ungetChar(c); - - /* - and read the value string - check for quote mark if needed - */ - - quotewarning = 0; - start = this.lexsize; - c = '\0'; - - for (;;) - { - lastc = c; /* track last character */ - c = this.in.readChar(); - - if (c == StreamIn.EndOfStream) - { - Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); - this.in.ungetChar(c); - break; - } - - if (delim == (char)0) - { - if (c == '>') - { - this.in.ungetChar(c); - break; - } - - if (c == '"' || c == '\'') - { - Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); - break; - } - - if (c == '<') - { - /* this.in.ungetChar(c); */ - Report.attrError(this, this.token, null, Report.UNEXPECTED_GT); - /* break; */ - } - - /* - For cases like
    need to avoid treating /> as - part of the attribute value, however care is needed to avoid - so treating
    in this way, which - would map the tag to - */ - if (c == '/') - { - /* peek ahead in case of /> */ - c = this.in.readChar(); - - if (c == '>' && - !AttributeTable.getDefaultAttributeTable().isUrl(name)) - { - isempty.value = true; - this.in.ungetChar(c); - break; - } - - /* unget peeked char */ - this.in.ungetChar(c); - c = '/'; - } - } - else /* delim is '\'' or '"' */ - { - if (c == delim) - break; - - /* treat CRLF, CR and LF as single line break */ - - if (c == '\r') - { - c = this.in.readChar(); - if (c != '\n') - this.in.ungetChar(c); - - c = '\n'; - } - - if (c == '\n' || c == '<' || c == '>') - ++quotewarning; - - if (c == '>') - seen_gt = true; - } - - if (c == '&') - { - addCharToLexer(c); - parseEntity((short)0); - continue; - } - - /* - kludge for JavaScript attribute values - with line continuations in string literals - */ - if (c == '\\') - { - c = this.in.readChar(); - - if (c != '\n') - { - this.in.ungetChar(c); - c = '\\'; - } - } - - map = MAP((char)c); - - if ((map & WHITE) != 0) - { - if (delim == (char)0) - break; - - if (munge) - { - c = ' '; - - if (lastc == ' ') - continue; - } - } - else if (foldCase && (map & UPPERCASE) != 0) - c += (int)('a' - 'A'); - - addCharToLexer(c); - } - - if (quotewarning > 10 && seen_gt && munge) - { - /* - there is almost certainly a missing trailling quote mark - as we have see too many newlines, < or > characters. - - an exception is made for Javascript attributes and the - javascript URL scheme which may legitimately include < and > - */ - if (!AttributeTable.getDefaultAttributeTable().isScript(name) && - !(AttributeTable.getDefaultAttributeTable().isUrl(name) && - (getString(this.lexbuf, start, 11)).equals("javascript:"))) - Report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE); - } - - len = this.lexsize - start; - this.lexsize = start; - - if (len > 0 || delim != 0) - value = getString(this.lexbuf, start, len); - else - value = null; - - /* note delimiter if given */ - if (delim != 0) - pdelim.value = delim; - else - pdelim.value = (int)'"'; - - return value; - } - - /* attr must be non-null */ - public static boolean isValidAttrName(String attr) - { - short map; - char c; - int i; - - /* first character should be a letter */ - c = attr.charAt(0); - map = MAP(c); - - if (!((map & LETTER) != 0)) - return false; - - /* remaining characters should be namechars */ - for( i = 1; i < attr.length(); i++) - { - c = attr.charAt(i); - map = MAP(c); - - if((map & NAMECHAR) != 0) - continue; - - return false; - } - - return true; - } - - /* swallows closing '>' */ - - public AttVal parseAttrs(MutableBoolean isempty) - { - AttVal av, list; - String attribute, value; - MutableInteger delim = new MutableInteger(); - MutableObject asp = new MutableObject(); - MutableObject php = new MutableObject(); - - list = null; - - for (; !endOfInput();) - { - attribute = parseAttribute(isempty, asp, php); - - if (attribute == null) - { - /* check if attributes are created by ASP markup */ - if (asp.getObject() != null) - { - av = new AttVal(list, null, (Node)asp.getObject(), null, - '\0', null, null ); - list = av; - continue; - } - - /* check if attributes are created by PHP markup */ - if (php.getObject() != null) - { - av = new AttVal(list, null, null, (Node)php.getObject(), - '\0', null, null ); - list = av; - continue; - } - - break; - } - - value = parseValue(attribute, false, isempty, delim); - - if (attribute != null && isValidAttrName(attribute)) - { - av = new AttVal( list, null, null, null, - delim.value, attribute, value ); - av.dict = - AttributeTable.getDefaultAttributeTable().findAttribute(av); - list = av; - } - else - { - av = new AttVal( null, null, null, null, - 0, attribute, value ); - Report.attrError(this, this.token, value, Report.BAD_ATTRIBUTE_VALUE); - } - } - - return list; - } - - /* - push a copy of an inline node onto stack - but don't push if implicit or OBJECT or APPLET - (implicit tags are ones generated from the istack) - - One issue arises with pushing inlines when - the tag is already pushed. For instance: - -

    text -

    more text - - Shouldn't be mapped to - -

    text

    -

    more text - */ - public void pushInline( Node node ) - { - IStack is; - - if (node.implicit) - return; - - if (node.tag == null) - return; - - if ((node.tag.model & Dict.CM_INLINE) == 0 ) - return; - - if ((node.tag.model & Dict.CM_OBJECT) != 0) - return; - - if (node.tag != configuration.tt.tagFont && isPushed(node)) - return; - - // make sure there is enough space for the stack - is = new IStack(); - is.tag = node.tag; - is.element = node.element; - if (node.attributes != null) - is.attributes = cloneAttributes(node.attributes); - this.istack.push( is ); - } - - /* pop inline stack */ - public void popInline( Node node ) - { - AttVal av; - IStack is; - - if (node != null) { - - if (node.tag == null) - return; - - if ((node.tag.model & Dict.CM_INLINE) == 0) - return; - - if ((node.tag.model & Dict.CM_OBJECT) != 0) - return; - - // if node is then pop until we find an - if (node.tag == configuration.tt.tagA) { - - while (this.istack.size() > 0) { - is = (IStack)this.istack.pop(); - if (is.tag == configuration.tt.tagA) { - break; - } - } - - if (this.insert >= this.istack.size()) - this.insert = -1; - return; - } - } - - if (this.istack.size() > 0) { - is = (IStack)this.istack.pop(); - if (this.insert >= this.istack.size()) - this.insert = -1; - } - } - - public boolean isPushed( Node node ) - { - int i; - IStack is; - - for (i = this.istack.size() - 1; i >= 0; --i) { - is = (IStack)this.istack.elementAt(i); - if (is.tag == node.tag) - return true; - } - - return false; - } - - /* - This has the effect of inserting "missing" inline - elements around the contents of blocklevel elements - such as P, TD, TH, DIV, PRE etc. This procedure is - called at the start of ParseBlock. when the inline - stack is not empty, as will be the case in: - -

    italic heading

    - - which is then treated as equivalent to - -

    italic heading

    - - This is implemented by setting the lexer into a mode - where it gets tokens from the inline stack rather than - from the input stream. - */ - public int inlineDup( Node node ) - { - int n; - - n = this.istack.size() - this.istackbase; - if ( n > 0 ) { - this.insert = this.istackbase; - this.inode = node; - } - - return n; - } - - public Node insertedToken() - { - Node node; - IStack is; - int n; - - // this will only be null if inode != null - if (this.insert == -1) { - node = this.inode; - this.inode = null; - return node; - } - - // is this is the "latest" node then update - // the position, otherwise use current values - - if (this.inode == null) { - this.lines = this.in.curline; - this.columns = this.in.curcol; - } - - node = newNode(Node.StartTag, - this.lexbuf, - this.txtstart, - this.txtend); // GLP: Bugfix 126261. Remove when this change - // is fixed in istack.c in the original Tidy - node.implicit = true; - is = (IStack)this.istack.elementAt( this.insert ); - node.element = is.element; - node.tag = is.tag; - if (is.attributes != null) - node.attributes = cloneAttributes(is.attributes); - - // advance lexer to next item on the stack - n = this.insert; - - // and recover state if we have reached the end - if (++n < this.istack.size() ) { - this.insert = n; - } else { - this.insert = -1; - } - - return node; - } - - /* AQ: Try this for speed optimization */ - public static int wstrcasecmp(String s1, String s2) - { - return (s1.equalsIgnoreCase(s2) ? 0 : 1); - } - - public static int wstrcaselexcmp(String s1, String s2) - { - char c; - int i = 0; - - while ( i < s1.length() && i < s2.length() ) { - c = s1.charAt(i); - if ( toLower(c) != toLower( s2.charAt(i) ) ) { - break; - } - i += 1; - } - if ( i == s1.length() && i == s2.length() ) { - return 0; - } else if ( i == s1.length() ) { - return -1; - } else if ( i == s2.length() ) { - return 1; - } else { - return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 ); - } - } - - public static boolean wsubstr(String s1, String s2) - { - int i; - int len1 = s1.length(); - int len2 = s2.length(); - - for (i = 0; i <= len1 - len2; ++i) - { - if (s2.equalsIgnoreCase(s1.substring(i))) - return true; - } - - return false; - } - - public boolean canPrune(Node element) - { - if (element.type == Node.TextNode) - return true; - - if (element.content != null) - return false; - - if (element.tag == configuration.tt.tagA && element.attributes != null) - return false; - - if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas) - return false; - - if (element.tag == null) - return false; - - if ((element.tag.model & Dict.CM_ROW) != 0) - return false; - - if (element.tag == configuration.tt.tagApplet) - return false; - - if (element.tag == configuration.tt.tagObject) - return false; - - if (element.attributes != null && - (element.getAttrByName("id") != null || - element.getAttrByName("name") != null) ) - return false; - - return true; - } - - /* duplicate name attribute as an id */ - public void fixId(Node node) - { - AttVal name = node.getAttrByName("name"); - AttVal id = node.getAttrByName("id"); - - if (name != null) - { - if (id != null) - { - if (!id.value.equals(name.value)) - Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH); - } - else if (this.configuration.XmlOut) - node.addAttribute("id", name.value); - } - } - - /* - defer duplicates when entering a table or other - element where the inlines shouldn't be duplicated - */ - public void deferDup() - { - this.insert = -1; - this.inode = null; - } - - /* Private methods and fields */ - - /* lexer char types */ - private static final short DIGIT = 1; - private static final short LETTER = 2; - private static final short NAMECHAR = 4; - private static final short WHITE = 8; - private static final short NEWLINE = 16; - private static final short LOWERCASE = 32; - private static final short UPPERCASE = 64; - - /* lexer GetToken states */ - - private static final short LEX_CONTENT = 0; - private static final short LEX_GT = 1; - private static final short LEX_ENDTAG = 2; - private static final short LEX_STARTTAG = 3; - private static final short LEX_COMMENT = 4; - private static final short LEX_DOCTYPE = 5; - private static final short LEX_PROCINSTR = 6; - private static final short LEX_ENDCOMMENT = 7; - private static final short LEX_CDATA = 8; - private static final short LEX_SECTION = 9; - private static final short LEX_ASP = 10; - private static final short LEX_JSTE = 11; - private static final short LEX_PHP = 12; - - /* used to classify chars for lexical purposes */ - private static short[] lexmap = new short[128]; - - private static void mapStr(String str, short code) - { - int j; - - for ( int i = 0; i < str.length(); i++ ) { - j = (int)str.charAt(i); - lexmap[j] |= code; - } - } - - static { - mapStr("\r\n\f", (short)(NEWLINE|WHITE)); - mapStr(" \t", WHITE); - mapStr("-.:_", NAMECHAR); - mapStr("0123456789", (short)(DIGIT|NAMECHAR)); - mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR)); - mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR)); - } - - private static short MAP( char c ) - { - return ((int)c < 128 ? lexmap[(int)c] : 0); - } - - private static boolean isWhite(char c) - { - short m = MAP(c); - - return (m & WHITE) != 0; - } - - private static boolean isDigit(char c) - { - short m; - - m = MAP(c); - - return (m & DIGIT) != 0; - } - - private static boolean isLetter(char c) - { - short m; - - m = MAP(c); - - return (m & LETTER) != 0; - } - - private static char toLower(char c) - { - short m = MAP(c); - - if ((m & UPPERCASE) != 0) - c = (char)( (int)c + (int)'a' - (int)'A' ); - - return c; - } - - private static char toUpper(char c) - { - short m = MAP(c); - - if ((m & LOWERCASE) != 0) - c = (char)( (int)c + (int)'A' - (int)'a' ); - - return c; - } - - public static char foldCase(char c, boolean tocaps, boolean xmlTags) - { - short m; - - if (!xmlTags) - { - m = MAP(c); - - if (tocaps) - { - if ((m & LOWERCASE) != 0) - c = (char)( (int)c + (int)'A' - (int)'a' ); - } - else /* force to lower case */ - { - if ((m & UPPERCASE) != 0) - c = (char)( (int)c + (int)'a' - (int)'A' ); - } - } - - return c; - } - - - private static class W3CVersionInfo - { - String name; - String voyagerName; - String profile; - short code; - - public W3CVersionInfo( String name, - String voyagerName, - String profile, - short code ) - { - this.name = name; - this.voyagerName = voyagerName; - this.profile = profile; - this.code = code; - } - } - - /* the 3 URIs for the XHTML 1.0 DTDs */ - private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; - private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; - private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"; - - private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; - - private static Lexer.W3CVersionInfo[] W3CVersion = - { - new W3CVersionInfo("HTML 4.01", - "XHTML 1.0 Strict", - voyager_strict, - Dict.VERS_HTML40_STRICT), - new W3CVersionInfo("HTML 4.01 Transitional", - "XHTML 1.0 Transitional", - voyager_loose, - Dict.VERS_HTML40_LOOSE), - new W3CVersionInfo("HTML 4.01 Frameset", - "XHTML 1.0 Frameset", - voyager_frameset, - Dict.VERS_FRAMES), - new W3CVersionInfo("HTML 4.0", - "XHTML 1.0 Strict", - voyager_strict, - Dict.VERS_HTML40_STRICT), - new W3CVersionInfo("HTML 4.0 Transitional", - "XHTML 1.0 Transitional", - voyager_loose, - Dict.VERS_HTML40_LOOSE), - new W3CVersionInfo("HTML 4.0 Frameset", - "XHTML 1.0 Frameset", - voyager_frameset, - Dict.VERS_FRAMES), - new W3CVersionInfo("HTML 3.2", - "XHTML 1.0 Transitional", - voyager_loose, - Dict.VERS_HTML32), - new W3CVersionInfo("HTML 2.0", - "XHTML 1.0 Strict", - voyager_strict, - Dict.VERS_HTML20) - }; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java deleted file mode 100644 index de0e64e..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * @(#)MutableBoolean.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Mutable Boolean - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from
    - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class MutableBoolean { - - public boolean value; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java deleted file mode 100644 index 00ef347..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * @(#)MutableInteger.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Mutable Integer - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class MutableInteger { - - public int value; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java deleted file mode 100644 index a66fa73..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * @(#)MutableObject.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Mutable Object - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class MutableObject { - - public MutableObject() - { - this(null); - } - - public MutableObject(Object o) - { - this.value = o; - } - - public void setObject(Object o) - { - value = o; - } - - public Object getObject() - { - return value; - } - - private Object value; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java deleted file mode 100644 index e502702..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java +++ /dev/null @@ -1,917 +0,0 @@ -/* - * @(#)Node.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Node - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -/* - Used for elements and text nodes - element name is null for text nodes - start and end are offsets into lexbuf - which contains the textual content of - all elements in the parse tree. - - parent and content allow traversal - of the parse tree in any direction. - attributes are represented as a linked - list of AttVal nodes which hold the - strings for attribute/value pairs. -*/ - -public class Node { - - public static final short RootNode = 0; - public static final short DocTypeTag = 1; - public static final short CommentTag = 2; - public static final short ProcInsTag = 3; - public static final short TextNode = 4; - public static final short StartTag = 5; - public static final short EndTag = 6; - public static final short StartEndTag = 7; - public static final short CDATATag = 8; - public static final short SectionTag = 9; - public static final short AspTag = 10; - public static final short JsteTag = 11; - public static final short PhpTag = 12; - - protected Node parent; - protected Node prev; - protected Node next; - protected Node last; - protected int start; /* start of span onto text array */ - protected int end; /* end of span onto text array */ - protected byte[] textarray; /* the text array */ - protected short type; /* TextNode, StartTag, EndTag etc. */ - protected boolean closed; /* true if closed by explicit end tag */ - protected boolean implicit; /* true if inferred */ - protected boolean linebreak; /* true if followed by a line break */ - protected Dict was; /* old tag when it was changed */ - protected Dict tag; /* tag's dictionary definition */ - protected String element; /* name (null for text nodes) */ - protected AttVal attributes; - protected Node content; - - public Node() - { - this(TextNode, null, 0, 0); - } - - public Node(short type, byte[] textarray, int start, int end) - { - this.parent = null; - this.prev = null; - this.next = null; - this.last = null; - this.start = start; - this.end = end; - this.textarray = textarray; - this.type = type; - this.closed = false; - this.implicit = false; - this.linebreak = false; - this.was = null; - this.tag = null; - this.element = null; - this.attributes = null; - this.content = null; - } - - public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt) - { - this.parent = null; - this.prev = null; - this.next = null; - this.last = null; - this.start = start; - this.end = end; - this.textarray = textarray; - this.type = type; - this.closed = false; - this.implicit = false; - this.linebreak = false; - this.was = null; - this.tag = null; - this.element = element; - this.attributes = null; - this.content = null; - if (type == StartTag || type == StartEndTag || type == EndTag) - tt.findTag(this); - } - - /* used to clone heading nodes when split by an
    */ - protected Object clone() - { - Node node = new Node(); - - node.parent = this.parent; - if (this.textarray != null) - { - node.textarray = new byte[this.end - this.start]; - node.start = 0; - node.end = this.end - this.start; - if (node.end > 0) - System.arraycopy(this.textarray, this.start, - node.textarray, node.start, node.end); - } - node.type = this.type; - node.closed = this.closed; - node.implicit = this.implicit; - node.linebreak = this.linebreak; - node.was = this.was; - node.tag = this.tag; - if (this.element != null) - node.element = this.element; - if (this.attributes != null) - node.attributes = (AttVal)this.attributes.clone(); - return node; - } - - public AttVal getAttrByName(String name) - { - AttVal attr; - - for (attr = this.attributes; attr != null; attr = attr.next) - { - if (name != null && - attr.attribute != null && - attr.attribute.equals(name)) - break; - } - - return attr; - } - - /* default method for checking an element's attributes */ - public void checkAttributes( Lexer lexer ) - { - AttVal attval; - - for (attval = this.attributes; attval != null; attval = attval.next) - attval.checkAttribute( lexer, this ); - } - - public void checkUniqueAttributes(Lexer lexer) - { - AttVal attval; - - for (attval = this.attributes; attval != null; attval = attval.next) { - if (attval.asp == null && attval.php == null) - attval.checkUniqueAttribute(lexer, this); - } - } - - public void addAttribute(String name, String value) - { - AttVal av = new AttVal(null, null, null, null, - '"', name, value); - av.dict = - AttributeTable.getDefaultAttributeTable().findAttribute(av); - - if (this.attributes == null) - this.attributes = av; - else /* append to end of attributes */ - { - AttVal here = this.attributes; - - while (here.next != null) - here = here.next; - - here.next = av; - } - } - - /* remove attribute from node then free it */ - public void removeAttribute(AttVal attr) - { - AttVal av; - AttVal prev = null; - AttVal next; - - for (av = this.attributes; av != null; av = next) - { - next = av.next; - - if (av == attr) - { - if (prev != null) - prev.next = next; - else - this.attributes = next; - } - else - prev = av; - } - } - - /* find doctype element */ - public Node findDocType() - { - Node node; - - for (node = this.content; - node != null && node.type != DocTypeTag; node = node.next); - - return node; - } - - public void discardDocType() - { - Node node; - - node = findDocType(); - if (node != null) - { - if (node.prev != null) - node.prev.next = node.next; - else - node.parent.content = node.next; - - if (node.next != null) - node.next.prev = node.prev; - - node.next = null; - } - } - - /* remove node from markup tree and discard it */ - public static Node discardElement(Node element) - { - Node next = null; - - if (element != null) - { - next = element.next; - removeNode(element); - } - - return next; - } - - /* insert node into markup tree */ - public static void insertNodeAtStart(Node element, Node node) - { - node.parent = element; - - if (element.content == null) - element.last = node; - else - element.content.prev = node; // AQ added 13 Apr 2000 - - node.next = element.content; - node.prev = null; - element.content = node; - } - - /* insert node into markup tree */ - public static void insertNodeAtEnd(Node element, Node node) - { - node.parent = element; - node.prev = element.last; - - if (element.last != null) - element.last.next = node; - else - element.content = node; - - element.last = node; - } - - /* - insert node into markup tree in pace of element - which is moved to become the child of the node - */ - public static void insertNodeAsParent(Node element, Node node) - { - node.content = element; - node.last = element; - node.parent = element.parent; - element.parent = node; - - if (node.parent.content == element) - node.parent.content = node; - - if (node.parent.last == element) - node.parent.last = node; - - node.prev = element.prev; - element.prev = null; - - if (node.prev != null) - node.prev.next = node; - - node.next = element.next; - element.next = null; - - if (node.next != null) - node.next.prev = node; - } - - /* insert node into markup tree before element */ - public static void insertNodeBeforeElement(Node element, Node node) - { - Node parent; - - parent = element.parent; - node.parent = parent; - node.next = element; - node.prev = element.prev; - element.prev = node; - - if (node.prev != null) - node.prev.next = node; - - if (parent.content == element) - parent.content = node; - } - - /* insert node into markup tree after element */ - public static void insertNodeAfterElement(Node element, Node node) - { - Node parent; - - parent = element.parent; - node.parent = parent; - - // AQ - 13Jan2000 fix for parent == null - if (parent != null && parent.last == element) - parent.last = node; - else - { - node.next = element.next; - // AQ - 13Jan2000 fix for node.next == null - if (node.next != null) - node.next.prev = node; - } - - element.next = node; - node.prev = element; - } - - public static void trimEmptyElement(Lexer lexer, Node element) - { - TagTable tt = lexer.configuration.tt; - - if (lexer.canPrune(element)) - { - if (element.type != TextNode) - Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT); - - discardElement(element); - } - else if (element.tag == tt.tagP && element.content == null) - { - /* replace

    by

    to preserve formatting */ - Node node = lexer.inferredTag("br"); - Node.coerceNode(lexer, element, tt.tagBr); - Node.insertNodeAfterElement(element, node); - } - } - - /* - This maps - hello world - to - hello world - - If last child of element is a text node - then trim trailing white space character - moving it to after element's end tag. - */ - public static void trimTrailingSpace(Lexer lexer, Node element, Node last) - { - byte c; - TagTable tt = lexer.configuration.tt; - - if (last != null && last.type == Node.TextNode && - last.end > last.start) - { - c = lexer.lexbuf[last.end - 1]; - - if (c == 160 || c == (byte)' ') - { - /* take care with
  • */ - if (element.tag == tt.tagTd || - element.tag == tt.tagTh) - { - if (last.end > last.start + 1) - last.end -= 1; - } - else - { - last.end -= 1; - - if (((element.tag.model & Dict.CM_INLINE) != 0) && - !((element.tag.model & Dict.CM_FIELD) != 0)) - lexer.insertspace = true; - - /* if empty string then delete from parse tree */ - if (last.start == last.end) - trimEmptyElement(lexer, last); - } - } - } - } - - /* - This maps -

    hello world - to -

    hello world - - Trims initial space, by moving it before the - start tag, or if this element is the first in - parent's content, then by discarding the space - */ - public static void trimInitialSpace(Lexer lexer, Node element, Node text) - { - Node prev, node; - - // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated. - // 31-Oct-00. - if (text.type == TextNode && text.textarray[text.start] == (byte)' ' - && (text.start < text.end)) - { - if (((element.tag.model & Dict.CM_INLINE) != 0) && - !((element.tag.model & Dict.CM_FIELD) != 0) && - element.parent.content != element) - { - prev = element.prev; - - if (prev != null && prev.type == TextNode) - { - if (prev.textarray[prev.end - 1] != (byte)' ') - prev.textarray[prev.end++] = (byte)' '; - - ++element.start; - } - else /* create new node */ - { - node = lexer.newNode(); - // Local fix for bug 228486 (GLP). This handles the case - // where we need to create a preceeding text node but there are - // no "slots" in textarray that we can steal from the current - // element. Therefore, we create a new textarray containing - // just the blank. When Tidy is fixed, this should be removed. - if (element.start >= element.end) - { - node.start = 0; - node.end = 1; - node.textarray = new byte[1]; - } - else - { - node.start = element.start++; - node.end = element.start; - node.textarray = element.textarray; - } - node.textarray[node.start] = (byte)' '; - node.prev = prev; - if (prev != null) - prev.next = node; - node.next = element; - element.prev = node; - node.parent = element.parent; - } - } - - /* discard the space in current node */ - ++text.start; - } - } - - /* - Move initial and trailing space out. - This routine maps: - - hello world - to - hello world - and - hello world - to - hello world - */ - public static void trimSpaces(Lexer lexer, Node element) - { - Node text = element.content; - TagTable tt = lexer.configuration.tt; - - if (text != null && text.type == Node.TextNode && - element.tag != tt.tagPre) - trimInitialSpace(lexer, element, text); - - text = element.last; - - if (text != null && text.type == Node.TextNode) - trimTrailingSpace(lexer, element, text); - } - - public boolean isDescendantOf(Dict tag) - { - Node parent; - - for (parent = this.parent; - parent != null; parent = parent.parent) - { - if (parent.tag == tag) - return true; - } - - return false; - } - - /* - the doctype has been found after other tags, - and needs moving to before the html element - */ - public static void insertDocType(Lexer lexer, Node element, Node doctype) - { - TagTable tt = lexer.configuration.tt; - - Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS); - - while (element.tag != tt.tagHtml) - element = element.parent; - - insertNodeBeforeElement(element, doctype); - } - - public Node findBody(TagTable tt) - { - Node node; - - node = this.content; - - while (node != null && node.tag != tt.tagHtml) - node = node.next; - - if (node == null) - return null; - - node = node.content; - - while (node != null && node.tag != tt.tagBody) - node = node.next; - - return node; - } - - public boolean isElement() - { - return (this.type == StartTag || this.type == StartEndTag ? true : false); - } - - /* - unexpected content in table row is moved to just before - the table in accordance with Netscape and IE. This code - assumes that node hasn't been inserted into the row. - */ - public static void moveBeforeTable(Node row, Node node, TagTable tt) - { - Node table; - - /* first find the table element */ - for (table = row.parent; table != null; table = table.parent) - { - if (table.tag == tt.tagTable) - { - if (table.parent.content == table) - table.parent.content = node; - - node.prev = table.prev; - node.next = table; - table.prev = node; - node.parent = table.parent; - - if (node.prev != null) - node.prev.next = node; - - break; - } - } - } - - /* - if a table row is empty then insert an empty cell - this practice is consistent with browser behavior - and avoids potential problems with row spanning cells - */ - public static void fixEmptyRow(Lexer lexer, Node row) - { - Node cell; - - if (row.content == null) - { - cell = lexer.inferredTag("td"); - insertNodeAtEnd(row, cell); - Report.warning(lexer, row, cell, Report.MISSING_STARTTAG); - } - } - - public static void coerceNode(Lexer lexer, Node node, Dict tag) - { - Node tmp = lexer.inferredTag(tag.name); - Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT); - node.was = node.tag; - node.tag = tag; - node.type = StartTag; - node.implicit = true; - node.element = tag.name; - } - - /* extract a node and its children from a markup tree */ - public static void removeNode(Node node) - { - if (node.prev != null) - node.prev.next = node.next; - - if (node.next != null) - node.next.prev = node.prev; - - if (node.parent != null) - { - if (node.parent.content == node) - node.parent.content = node.next; - - if (node.parent.last == node) - node.parent.last = node.prev; - } - - node.parent = node.prev = node.next = null; - } - - public static boolean insertMisc(Node element, Node node) - { - if (node.type == CommentTag || - node.type == ProcInsTag || - node.type == CDATATag || - node.type == SectionTag || - node.type == AspTag || - node.type == JsteTag || - node.type == PhpTag) - { - insertNodeAtEnd(element, node); - return true; - } - - return false; - } - - /* - used to determine how attributes - without values should be printed - this was introduced to deal with - user defined tags e.g. Cold Fusion - */ - public static boolean isNewNode(Node node) - { - if (node != null && node.tag != null) - { - return ((node.tag.model & Dict.CM_NEW) != 0); - } - - return true; - } - - public boolean hasOneChild() - { - return (this.content != null && this.content.next == null); - } - - /* find html element */ - public Node findHTML(TagTable tt) - { - Node node; - - for (node = this.content; - node != null && node.tag != tt.tagHtml; node = node.next); - - return node; - } - - public Node findHEAD(TagTable tt) - { - Node node; - - node = this.findHTML(tt); - - if (node != null) - { - for (node = node.content; - node != null && node.tag != tt.tagHead; - node = node.next); - } - - return node; - } - - public boolean checkNodeIntegrity() - { - Node child; - boolean found = false; - - if (this.prev != null) - { - if (this.prev.next != this) - return false; - } - - if (this.next != null) - { - if (this.next.prev != this) - return false; - } - - if (this.parent != null) - { - if (this.prev == null && this.parent.content != this) - return false; - - if (this.next == null && this.parent.last != this) - return false; - - for (child = this.parent.content; child != null; child = child.next) - if (child == this) - { - found = true; - break; - } - - if (!found) - return false; - } - - for (child = this.content; child != null; child = child.next) - if (!child.checkNodeIntegrity()) - return false; - - return true; - } - - /* - Add class="foo" to node - */ - public static void addClass(Node node, String classname) - { - AttVal classattr = node.getAttrByName("class"); - - /* - if there already is a class attribute - then append class name after a space - */ - if (classattr != null) - { - classattr.value = classattr.value + " " + classname; - } - else /* create new class attribute */ - node.addAttribute("class", classname); - } - - /* --------------------- DEBUG -------------------------- */ - - private static final String[] nodeTypeString = - { - "RootNode", - "DocTypeTag", - "CommentTag", - "ProcInsTag", - "TextNode", - "StartTag", - "EndTag", - "StartEndTag", - "SectionTag", - "AspTag", - "PhpTag" - }; - - public String toString() - { - String s = ""; - Node n = this; - - while (n != null) { - s += "[Node type="; - s += nodeTypeString[n.type]; - s += ",element="; - if (n.element != null) - s += n.element; - else - s += "null"; - if (n.type == TextNode || - n.type == CommentTag || - n.type == ProcInsTag) { - s += ",text="; - if (n.textarray != null && n.start <= n.end) { - s += "\""; - s += Lexer.getString(n.textarray, n.start, n.end - n.start); - s += "\""; - } else { - s += "null"; - } - } - s += ",content="; - if (n.content != null) - s += n.content.toString(); - else - s += "null"; - s += "]"; - if (n.next != null) - s += ","; - n = n.next; - } - return s; - } - /* --------------------- END DEBUG ---------------------- */ - - - /* --------------------- DOM ---------------------------- */ - - protected org.w3c.dom.Node adapter = null; - - protected org.w3c.dom.Node getAdapter() - { - if (adapter == null) - { - switch (this.type) - { - case RootNode: - adapter = new DOMDocumentImpl(this); - break; - case StartTag: - case StartEndTag: - adapter = new DOMElementImpl(this); - break; - case DocTypeTag: - adapter = new DOMDocumentTypeImpl(this); - break; - case CommentTag: - adapter = new DOMCommentImpl(this); - break; - case TextNode: - adapter = new DOMTextImpl(this); - break; - case CDATATag: - adapter = new DOMCDATASectionImpl(this); - break; - case ProcInsTag: - adapter = new DOMProcessingInstructionImpl(this); - break; - default: - adapter = new DOMNodeImpl(this); - } - } - return adapter; - } - - protected Node cloneNode(boolean deep) - { - Node node = (Node)this.clone(); - if (deep) - { - Node child; - Node newChild; - for (child = this.content; child != null; child = child.next) - { - newChild = child.cloneNode(deep); - insertNodeAtEnd(node, newChild); - } - } - return node; - } - - - protected void setType(short newType) - { - this.type = newType; - } - - /* --------------------- END DOM ------------------------ */ - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Out.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Out.java deleted file mode 100644 index ac6916d..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Out.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * @(#)Out.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Output Stream - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -import java.io.OutputStream; - -public abstract class Out -{ - public int encoding; - public int state; /* for ISO 2022 */ - public OutputStream out; - - public abstract void outc(int c); - - public abstract void outc(byte c); - - public abstract void newline(); - -}; - diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/OutImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/OutImpl.java deleted file mode 100644 index 1701502..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/OutImpl.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * @(#)OutImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Output Stream Implementation - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -import java.io.IOException; - -public class OutImpl extends Out -{ - - public OutImpl() - { - this.out = null; - } - - public void outc(byte c) { - outc(((int)c) & 0xFF); // Convert to unsigned. - } - - /* For mac users, should we map Unicode back to MacRoman? */ - public void outc(int c) - { - int ch; - - try { - if (this.encoding == Configuration.UTF8) - { - if (c < 128) - this.out.write(c); - else if (c <= 0x7FF) - { - ch = (0xC0 | (c >> 6)); this.out.write(ch); - ch = (0x80 | (c & 0x3F)); this.out.write(ch); - } - else if (c <= 0xFFFF) - { - ch = (0xE0 | (c >> 12)); this.out.write(ch); - ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch); - ch = (0x80 | (c & 0x3F)); this.out.write(ch); - } - else if (c <= 0x1FFFFF) - { - ch = (0xF0 | (c >> 18)); this.out.write(ch); - ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch); - ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch); - ch = (0x80 | (c & 0x3F)); this.out.write(ch); - } - else - { - ch = (0xF8 | (c >> 24)); this.out.write(ch); - ch = (0x80 | ((c >> 18) & 0x3F)); this.out.write(ch); - ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch); - ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch); - ch = (0x80 | (c & 0x3F)); this.out.write(ch); - } - } - else if (this.encoding == Configuration.ISO2022) - { - if (c == 0x1b) /* ESC */ - this.state = StreamIn.FSM_ESC; - else - { - switch (this.state) - { - case StreamIn.FSM_ESC: - if (c == '$') - this.state = StreamIn.FSM_ESCD; - else if (c == '(') - this.state = StreamIn.FSM_ESCP; - else - this.state = StreamIn.FSM_ASCII; - break; - - case StreamIn.FSM_ESCD: - if (c == '(') - this.state = StreamIn.FSM_ESCDP; - else - this.state = StreamIn.FSM_NONASCII; - break; - - case StreamIn.FSM_ESCDP: - this.state = StreamIn.FSM_NONASCII; - break; - - case StreamIn.FSM_ESCP: - this.state = StreamIn.FSM_ASCII; - break; - - case StreamIn.FSM_NONASCII: - c &= 0x7F; - break; - } - } - - this.out.write(c); - } - else - this.out.write(c); - } - catch (IOException e) { - System.err.println("OutImpl.outc: " + e.toString()); - } - } - - public void newline() - { - try { - this.out.write(nlBytes); - this.out.flush(); - } - catch (IOException e) { - System.err.println("OutImpl.newline: " + e.toString()); - } - } - - private static final byte[] nlBytes = - (System.getProperty("line.separator")).getBytes(); - -}; - diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/PPrint.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/PPrint.java deleted file mode 100644 index aba3656..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/PPrint.java +++ /dev/null @@ -1,1845 +0,0 @@ -/* - * @(#)PPrint.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Pretty print parse tree - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -/* - Block-level and unknown elements are printed on - new lines and their contents indented 2 spaces - - Inline elements are printed inline. - - Inline content is wrapped on spaces (except in - attribute values or preformatted text, after - start tags and before end tags -*/ - -import java.io.FileOutputStream; -import java.io.File; - -import java.io.IOException; -import java.io.FileNotFoundException; - -public class PPrint { - - /* page transition effects */ - - public static final short EFFECT_BLEND = -1; - public static final short EFFECT_BOX_IN = 0; - public static final short EFFECT_BOX_OUT = 1; - public static final short EFFECT_CIRCLE_IN = 2; - public static final short EFFECT_CIRCLE_OUT = 3; - public static final short EFFECT_WIPE_UP = 4; - public static final short EFFECT_WIPE_DOWN = 5; - public static final short EFFECT_WIPE_RIGHT = 6; - public static final short EFFECT_WIPE_LEFT = 7; - public static final short EFFECT_VERT_BLINDS = 8; - public static final short EFFECT_HORZ_BLINDS = 9; - public static final short EFFECT_CHK_ACROSS = 10; - public static final short EFFECT_CHK_DOWN = 11; - public static final short EFFECT_RND_DISSOLVE = 12; - public static final short EFFECT_SPLIT_VIRT_IN = 13; - public static final short EFFECT_SPLIT_VIRT_OUT = 14; - public static final short EFFECT_SPLIT_HORZ_IN = 15; - public static final short EFFECT_SPLIT_HORZ_OUT = 16; - public static final short EFFECT_STRIPS_LEFT_DOWN = 17; - public static final short EFFECT_STRIPS_LEFT_UP = 18; - public static final short EFFECT_STRIPS_RIGHT_DOWN = 19; - public static final short EFFECT_STRIPS_RIGHT_UP = 20; - public static final short EFFECT_RND_BARS_HORZ = 21; - public static final short EFFECT_RND_BARS_VERT = 22; - public static final short EFFECT_RANDOM = 23; - - private static final short NORMAL = 0; - private static final short PREFORMATTED = 1; - private static final short COMMENT = 2; - private static final short ATTRIBVALUE = 4; - private static final short NOWRAP = 8; - private static final short CDATA = 16; - - private int[] linebuf = null; - private int lbufsize = 0; - private int linelen = 0; - private int wraphere = 0; - private boolean inAttVal = false; - private boolean InString = false; - - private int slide = 0; - private int count = 0; - private Node slidecontent = null; - - private Configuration configuration; - - public PPrint(Configuration configuration) - { - this.configuration = configuration; - } - - /* - 1010 A - 1011 B - 1100 C - 1101 D - 1110 E - 1111 F - */ - - /* return one less that the number of bytes used by UTF-8 char */ - /* str points to 1st byte, *ch initialized to 1st byte */ - public static int getUTF8(byte[] str, int start, MutableInteger ch) - { - int c, n, i, bytes; - - c = ((int)str[start]) & 0xFF; // Convert to unsigned. - - if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */ - { - n = c & 31; - bytes = 2; - } - else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */ - { - n = c & 15; - bytes = 3; - } - else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */ - { - n = c & 7; - bytes = 4; - } - else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */ - { - n = c & 3; - bytes = 5; - } - else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */ - - { - n = c & 1; - bytes = 6; - } - else /* 0XXX XXXX one byte */ - { - ch.value = c; - return 0; - } - - /* successor bytes should have the form 10XX XXXX */ - for (i = 1; i < bytes; ++i) - { - c = ((int)str[start + i]) & 0xFF; // Convert to unsigned. - n = (n << 6) | (c & 0x3F); - } - - ch.value = n; - return bytes - 1; - } - - /* store char c as UTF-8 encoded byte stream */ - public static int putUTF8(byte[] buf, int start, int c) - { - if (c < 128) - buf[start++] = (byte)c; - else if (c <= 0x7FF) - { - buf[start++] = (byte)(0xC0 | (c >> 6)); - buf[start++] = (byte)(0x80 | (c & 0x3F)); - } - else if (c <= 0xFFFF) - { - buf[start++] = (byte)(0xE0 | (c >> 12)); - buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F)); - buf[start++] = (byte)(0x80 | (c & 0x3F)); - } - else if (c <= 0x1FFFFF) - { - buf[start++] = (byte)(0xF0 | (c >> 18)); - buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F)); - buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F)); - buf[start++] = (byte)(0x80 | (c & 0x3F)); - } - else - { - buf[start++] = (byte)(0xF8 | (c >> 24)); - buf[start++] = (byte)(0x80 | ((c >> 18) & 0x3F)); - buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F)); - buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F)); - buf[start++] = (byte)(0x80 | (c & 0x3F)); - } - - return start; - } - - private void addC(int c, int index) - { - if (index + 1 >= lbufsize) - { - while (index + 1 >= lbufsize) - { - if (lbufsize == 0) - lbufsize = 256; - else - lbufsize = lbufsize * 2; - } - - int[] temp = new int[ lbufsize ]; - if (linebuf != null) - System.arraycopy(linebuf, 0, temp, 0, index); - linebuf = temp; - } - - linebuf[index] = c; - } - - private void wrapLine(Out fout, int indent) - { - int i, p, q; - - if (wraphere == 0) - return; - - for (i = 0; i < indent; ++i) - fout.outc((int)' '); - - for (i = 0; i < wraphere; ++i) - fout.outc(linebuf[i]); - - if (InString) - { - fout.outc((int)' '); - fout.outc((int)'\\'); - } - - fout.newline(); - - if (linelen > wraphere) - { - p = 0; - - if (linebuf[wraphere] == ' ') - ++wraphere; - - q = wraphere; - addC('\0', linelen); - - while (true) - { - linebuf[p] = linebuf[q]; - if (linebuf[q] == 0) break; - p++; - q++; - } - linelen -= wraphere; - } - else - linelen = 0; - - wraphere = 0; - } - - private void wrapAttrVal(Out fout, int indent, boolean inString) - { - int i, p, q; - - for (i = 0; i < indent; ++i) - fout.outc((int)' '); - - for (i = 0; i < wraphere; ++i) - fout.outc(linebuf[i]); - - fout.outc((int)' '); - - if (inString) - fout.outc((int)'\\'); - - fout.newline(); - - if (linelen > wraphere) - { - p = 0; - - if (linebuf[wraphere] == ' ') - ++wraphere; - - q = wraphere; - addC('\0', linelen); - - while (true) - { - linebuf[p] = linebuf[q]; - if (linebuf[q] == 0) break; - p++; - q++; - } - linelen -= wraphere; - } - else - linelen = 0; - - wraphere = 0; - } - - public void flushLine(Out fout, int indent) - { - int i; - - if (linelen > 0) - { - if (indent + linelen >= this.configuration.wraplen) - wrapLine(fout, indent); - - if (!inAttVal || this.configuration.IndentAttributes) - { - for (i = 0; i < indent; ++i) - fout.outc((int)' '); - } - - for (i = 0; i < linelen; ++i) - fout.outc(linebuf[i]); - } - - fout.newline(); - linelen = 0; - wraphere = 0; - inAttVal = false; - } - - public void condFlushLine(Out fout, int indent) - { - int i; - - if (linelen > 0) - { - if (indent + linelen >= this.configuration.wraplen) - wrapLine(fout, indent); - - if (!inAttVal || this.configuration.IndentAttributes) - { - for (i = 0; i < indent; ++i) - fout.outc((int)' '); - } - - for (i = 0; i < linelen; ++i) - fout.outc(linebuf[i]); - - fout.newline(); - linelen = 0; - wraphere = 0; - inAttVal = false; - } - } - - private void printChar(int c, short mode) - { - String entity; - - if (c == ' ' && !((mode & (PREFORMATTED | COMMENT | ATTRIBVALUE)) != 0)) - { - /* coerce a space character to a non-breaking space */ - if ((mode & NOWRAP) != 0) - { - /* by default XML doesn't define   */ - if (this.configuration.NumEntities || this.configuration.XmlTags) - { - addC('&', linelen++); - addC('#', linelen++); - addC('1', linelen++); - addC('6', linelen++); - addC('0', linelen++); - addC(';', linelen++); - } - else /* otherwise use named entity */ - { - addC('&', linelen++); - addC('n', linelen++); - addC('b', linelen++); - addC('s', linelen++); - addC('p', linelen++); - addC(';', linelen++); - } - return; - } - else - wraphere = linelen; - } - - /* comment characters are passed raw */ - if ((mode & COMMENT) != 0) - { - addC(c, linelen++); - return; - } - - /* except in CDATA map < to < etc. */ - if (! ((mode & CDATA) != 0) ) - { - if (c == '<') - { - addC('&', linelen++); - addC('l', linelen++); - addC('t', linelen++); - addC(';', linelen++); - return; - } - - if (c == '>') - { - addC('&', linelen++); - addC('g', linelen++); - addC('t', linelen++); - addC(';', linelen++); - return; - } - - /* - naked '&' chars can be left alone or - quoted as & The latter is required - for XML where naked '&' are illegal. - */ - if (c == '&' && this.configuration.QuoteAmpersand) - { - addC('&', linelen++); - addC('a', linelen++); - addC('m', linelen++); - addC('p', linelen++); - addC(';', linelen++); - return; - } - - if (c == '"' && this.configuration.QuoteMarks) - { - addC('&', linelen++); - addC('q', linelen++); - addC('u', linelen++); - addC('o', linelen++); - addC('t', linelen++); - addC(';', linelen++); - return; - } - - if (c == '\'' && this.configuration.QuoteMarks) - { - addC('&', linelen++); - addC('#', linelen++); - addC('3', linelen++); - addC('9', linelen++); - addC(';', linelen++); - return; - } - - if (c == 160 && this.configuration.CharEncoding != Configuration.RAW) - { - if (this.configuration.QuoteNbsp) - { - addC('&', linelen++); - - if (this.configuration.NumEntities) - { - addC('#', linelen++); - addC('1', linelen++); - addC('6', linelen++); - addC('0', linelen++); - } - else - { - addC('n', linelen++); - addC('b', linelen++); - addC('s', linelen++); - addC('p', linelen++); - } - - addC(';', linelen++); - } - else - addC(c, linelen++); - - return; - } - } - - /* otherwise ISO 2022 characters are passed raw */ - if (this.configuration.CharEncoding == Configuration.ISO2022 || - this.configuration.CharEncoding == Configuration.RAW) - { - addC(c, linelen++); - return; - } - - /* if preformatted text, map   to space */ - if (c == 160 && ((mode & PREFORMATTED) != 0)) - { - addC(' ', linelen++); - return; - } - - /* - Filters from Word and PowerPoint often use smart - quotes resulting in character codes between 128 - and 159. Unfortunately, the corresponding HTML 4.0 - entities for these are not widely supported. The - following converts dashes and quotation marks to - the nearest ASCII equivalent. My thanks to - Andrzej Novosiolov for his help with this code. - */ - - if (this.configuration.MakeClean) - { - if (c >= 0x2013 && c <= 0x201E) - { - switch (c) { - case 0x2013: - case 0x2014: - c = '-'; - break; - case 0x2018: - case 0x2019: - case 0x201A: - c = '\''; - break; - case 0x201C: - case 0x201D: - case 0x201E: - c = '"'; - break; - } - } - } - - /* don't map latin-1 chars to entities */ - if (this.configuration.CharEncoding == Configuration.LATIN1) - { - if (c > 255) /* multi byte chars */ - { - if (!this.configuration.NumEntities) - { - entity = EntityTable.getDefaultEntityTable().entityName((short)c); - if (entity != null) - entity = "&" + entity + ";"; - else - entity = "&#" + c + ";"; - } - else - entity = "&#" + c + ";"; - - for (int i = 0; i < entity.length(); i++) - addC((int)entity.charAt(i), linelen++); - - return; - } - - if (c > 126 && c < 160) - { - entity = "&#" + c + ";"; - - for (int i = 0; i < entity.length(); i++) - addC((int)entity.charAt(i), linelen++); - - return; - } - - addC(c, linelen++); - return; - } - - /* don't map utf8 chars to entities */ - if (this.configuration.CharEncoding == Configuration.UTF8) - { - addC(c, linelen++); - return; - } - - /* use numeric entities only for XML */ - if (this.configuration.XmlTags) - { - /* if ASCII use numeric entities for chars > 127 */ - if (c > 127 && this.configuration.CharEncoding == Configuration.ASCII) - { - entity = "&#" + c + ";"; - - for (int i = 0; i < entity.length(); i++) - addC((int)entity.charAt(i), linelen++); - - return; - } - - /* otherwise output char raw */ - addC(c, linelen++); - return; - } - - /* default treatment for ASCII */ - if (c > 126 || (c < ' ' && c != '\t')) - { - if (!this.configuration.NumEntities) - { - entity = EntityTable.getDefaultEntityTable().entityName((short)c); - if (entity != null) - entity = "&" + entity + ";"; - else - entity = "&#" + c + ";"; - } - else - entity = "&#" + c + ";"; - - for (int i = 0; i < entity.length(); i++) - addC((int)entity.charAt(i), linelen++); - - return; - } - - addC(c, linelen++); - } - - /* - The line buffer is uint not char so we can - hold Unicode values unencoded. The translation - to UTF-8 is deferred to the outc routine called - to flush the line buffer. - */ - private void printText(Out fout, short mode, int indent, - byte[] textarray, int start, int end) - { - int i, c; - MutableInteger ci = new MutableInteger(); - - for (i = start; i < end; ++i) - { - if (indent + linelen >= this.configuration.wraplen) - wrapLine(fout, indent); - - c = ((int)textarray[i]) & 0xFF; // Convert to unsigned. - - /* look for UTF-8 multibyte character */ - if (c > 0x7F) - { - i += getUTF8(textarray, i, ci); - c = ci.value; - } - - if (c == '\n') - { - flushLine(fout, indent); - continue; - } - - printChar(c, mode); - } - } - - private void printString(Out fout, int indent, String str) - { - for (int i = 0; i < str.length(); i++ ) - addC((int)str.charAt(i), linelen++); - } - - private void printAttrValue(Out fout, int indent, String value, int delim, boolean wrappable) - { - int c; - MutableInteger ci = new MutableInteger(); - boolean wasinstring = false; - byte[] valueChars = null; - int i; - short mode = (wrappable ? (short)(NORMAL | ATTRIBVALUE) : - (short)(PREFORMATTED | ATTRIBVALUE)); - - if (value != null) - { - valueChars = Lexer.getBytes(value); - } - - /* look for ASP, Tango or PHP instructions for computed attribute value */ - if (valueChars != null && valueChars.length >= 5 && valueChars[0] == '<') - { - if (valueChars[1] == '%' || valueChars[1] == '@'|| - (new String(valueChars, 0, 5)).equals("= this.configuration.wraplen) - wrapLine(fout, indent); - - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; - else - condFlushLine(fout, indent); - } - - addC(delim, linelen++); - - if (value != null) - { - InString = false; - - i = 0; - while (i < valueChars.length) - { - c = ((int)valueChars[i]) & 0xFF; // Convert to unsigned. - - if (wrappable && c == ' ' && indent + linelen < this.configuration.wraplen) - { - wraphere = linelen; - wasinstring = InString; - } - - if (wrappable && wraphere > 0 && indent + linelen >= this.configuration.wraplen) - wrapAttrVal(fout, indent, wasinstring); - - if (c == delim) - { - String entity; - - entity = (c == '"' ? """ : "'"); - - for (int j = 0; j < entity.length(); j++ ) - addC(entity.charAt(j), linelen++); - - ++i; - continue; - } - else if (c == '"') - { - if (this.configuration.QuoteMarks) - { - addC('&', linelen++); - addC('q', linelen++); - addC('u', linelen++); - addC('o', linelen++); - addC('t', linelen++); - addC(';', linelen++); - } - else - addC('"', linelen++); - - if (delim == '\'') - InString = !InString; - - ++i; - continue; - } - else if (c == '\'') - { - if (this.configuration.QuoteMarks) - { - addC('&', linelen++); - addC('#', linelen++); - addC('3', linelen++); - addC('9', linelen++); - addC(';', linelen++); - } - else - addC('\'', linelen++); - - if (delim == '"') - InString = !InString; - - ++i; - continue; - } - - /* look for UTF-8 multibyte character */ - if (c > 0x7F) - { - i += getUTF8(valueChars, i, ci); - c = ci.value; - } - - ++i; - - if (c == '\n') - { - flushLine(fout, indent); - continue; - } - - printChar(c, mode); - } - } - - InString = false; - addC(delim, linelen++); - } - - private void printAttribute(Out fout, int indent, Node node, AttVal attr) - { - String name; - boolean wrappable = false; - - if (this.configuration.IndentAttributes) - { - flushLine(fout, indent); - indent += this.configuration.spaces; - } - - name = attr.attribute; - - if (indent + linelen >= this.configuration.wraplen) - wrapLine(fout, indent); - - if (!this.configuration.XmlTags && !this.configuration.XmlOut && attr.dict != null) - { - if (AttributeTable.getDefaultAttributeTable().isScript(name)) - wrappable = this.configuration.WrapScriptlets; - else if (!attr.dict.nowrap && this.configuration.WrapAttVals) - wrappable = true; - } - - if (indent + linelen < this.configuration.wraplen) - { - wraphere = linelen; - addC(' ', linelen++); - } - else - { - condFlushLine(fout, indent); - addC(' ', linelen++); - } - - for (int i = 0; i < name.length(); i++ ) - addC((int)Lexer.foldCase(name.charAt(i), - this.configuration.UpperCaseAttrs, - this.configuration.XmlTags), - linelen++); - - if (indent + linelen >= this.configuration.wraplen) - wrapLine(fout, indent); - - if (attr.value == null) - { - if (this.configuration.XmlTags || this.configuration.XmlOut) - printAttrValue(fout, indent, attr.attribute, attr.delim, true); - else if (!attr.isBoolAttribute() && !Node.isNewNode(node)) - printAttrValue(fout, indent, "", attr.delim, true); - else if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; - - } - else - printAttrValue(fout, indent, attr.value, attr.delim, wrappable); - } - - private void printAttrs(Out fout, int indent, - Node node, AttVal attr) - { - if (attr != null) - { - if (attr.next != null) - printAttrs(fout, indent, node, attr.next); - - if (attr.attribute != null) - printAttribute(fout, indent, node, attr); - else if (attr.asp != null) - { - addC(' ', linelen++); - printAsp(fout, indent, attr.asp); - } - else if (attr.php != null) - { - addC(' ', linelen++); - printPhp(fout, indent, attr.php); - } - } - - /* add xml:space attribute to pre and other elements */ - if (configuration.XmlOut && - configuration.XmlSpace && - ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt) && - node.getAttrByName("xml:space") == null) - printString(fout, indent, " xml:space=\"preserve\""); - } - - /* - Line can be wrapped immediately after inline start tag provided - if follows a text node ending in a space, or it parent is an - inline element that that rule applies to. This behaviour was - reverse engineered from Netscape 3.0 - */ - private static boolean afterSpace(Node node) - { - Node prev; - int c; - - if (node == null || node.tag == null || !((node.tag.model & Dict.CM_INLINE) != 0)) - return true; - - prev = node.prev; - - if (prev != null) - { - if (prev.type == Node.TextNode && prev.end > prev.start) - { - c = ((int)prev.textarray[prev.end - 1]) & 0xFF; // Convert to unsigned. - - if (c == 160 || c == ' ' || c == '\n') - return true; - } - - return false; - } - - return afterSpace(node.parent); - } - - private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node) - { - char c; - String p; - TagTable tt = this.configuration.tt; - - addC('<', linelen++); - - if (node.type == Node.EndTag) - addC('/', linelen++); - - p = node.element; - for (int i = 0; i < p.length(); i++ ) - addC((int)Lexer.foldCase(p.charAt(i), - this.configuration.UpperCaseTags, - this.configuration.XmlTags), - linelen++); - - printAttrs(fout, indent, node, node.attributes); - - if ((this.configuration.XmlOut || lexer != null && lexer.isvoyager) && - (node.type == Node.StartEndTag || (node.tag.model & Dict.CM_EMPTY) != 0)) - { - addC(' ', linelen++); /* compatibility hack */ - addC('/', linelen++); - } - - addC('>', linelen++);; - - if (node.type != Node.StartEndTag && !((mode & PREFORMATTED) != 0)) - { - if (indent + linelen >= this.configuration.wraplen) - wrapLine(fout, indent); - - if (indent + linelen < this.configuration.wraplen) - { - /* - wrap after start tag if is
    or if it's not - inline or it is an empty tag followed by - */ - if (afterSpace(node)) - { - if (!((mode & NOWRAP) != 0) && - (!((node.tag.model & Dict.CM_INLINE) != 0) || - (node.tag == tt.tagBr) || - (((node.tag.model & Dict.CM_EMPTY) != 0) && - node.next == null && - node.parent.tag == tt.tagA))) - { - wraphere = linelen; - } - } - } - else - condFlushLine(fout, indent); - } - } - - private void printEndTag(Out fout, short mode, int indent, Node node) - { - char c; - String p; - - /* - Netscape ignores SGML standard by not ignoring a - line break before or etc. To avoid rendering - this as an underlined space, I disable line wrapping - before inline end tags by the #if 0 ... #endif - */ -if (false) { - if (indent + linelen < this.configuration.wraplen && !((mode & NOWRAP) != 0)) - wraphere = linelen; -} - - addC('<', linelen++); - addC('/', linelen++); - - p = node.element; - for (int i = 0; i < p.length(); i++ ) - addC((int)Lexer.foldCase(p.charAt(i), - this.configuration.UpperCaseTags, - this.configuration.XmlTags), - linelen++); - - addC('>', linelen++); - } - - private void printComment(Out fout, int indent, Node node) - { - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; - - addC('<', linelen++); - addC('!', linelen++); - addC('-', linelen++); - addC('-', linelen++); -if (false) { - if (linelen < this.configuration.wraplen) - wraphere = linelen; -} - printText(fout, COMMENT, indent, - node.textarray, node.start, node.end); -if (false) { - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; -} - // See Lexer.java: AQ 8Jul2000 - addC('-', linelen++); - addC('-', linelen++); - addC('>', linelen++); - - if (node.linebreak) - flushLine(fout, indent); - } - - private void printDocType(Out fout, int indent, Node node) - { - boolean q = this.configuration.QuoteMarks; - - this.configuration.QuoteMarks = false; - - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; - - condFlushLine(fout, indent); - - addC('<', linelen++); - addC('!', linelen++); - addC('D', linelen++); - addC('O', linelen++); - addC('C', linelen++); - addC('T', linelen++); - addC('Y', linelen++); - addC('P', linelen++); - addC('E', linelen++); - addC(' ', linelen++); - - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; - - printText(fout, (short)0, indent, - node.textarray, node.start, node.end); - - if (linelen < this.configuration.wraplen) - wraphere = linelen; - - addC('>', linelen++); - this.configuration.QuoteMarks = q; - condFlushLine(fout, indent); - } - - private void printPI(Out fout, int indent, Node node) - { - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; - - addC('<', linelen++); - addC('?', linelen++); - - /* set CDATA to pass < and > unescaped */ - printText(fout, CDATA, indent, - node.textarray, node.start, node.end); - - if (node.textarray[node.end - 1] != (byte)'?') - addC('?', linelen++); - - addC('>', linelen++); - condFlushLine(fout, indent); - } - - /* note ASP and JSTE share <% ... %> syntax */ - private void printAsp(Out fout, int indent, Node node) - { - int savewraplen = this.configuration.wraplen; - - /* disable wrapping if so requested */ - - if (!this.configuration.WrapAsp || !this.configuration.WrapJste) - this.configuration.wraplen = 0xFFFFFF; /* a very large number */ -if (false) { //#if 0 - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; -} //#endif - - addC('<', linelen++); - addC('%', linelen++); - - printText(fout, (this.configuration.WrapAsp ? CDATA : COMMENT), indent, - node.textarray, node.start, node.end); - - addC('%', linelen++); - addC('>', linelen++); - /* condFlushLine(fout, indent); */ - this.configuration.wraplen = savewraplen; - } - - /* JSTE also supports <# ... #> syntax */ - private void printJste(Out fout, int indent, Node node) - { - int savewraplen = this.configuration.wraplen; - - /* disable wrapping if so requested */ - - if (!this.configuration.WrapJste) - this.configuration.wraplen = 0xFFFFFF; /* a very large number */ - - addC('<', linelen++); - addC('#', linelen++); - - printText(fout, (this.configuration.WrapJste ? CDATA : COMMENT), indent, - node.textarray, node.start, node.end); - - addC('#', linelen++); - addC('>', linelen++); - /* condFlushLine(fout, indent); */ - this.configuration.wraplen = savewraplen; - } - - /* PHP is based on XML processing instructions */ - private void printPhp(Out fout, int indent, Node node) - { - int savewraplen = this.configuration.wraplen; - - /* disable wrapping if so requested */ - - if (!this.configuration.WrapPhp) - this.configuration.wraplen = 0xFFFFFF; /* a very large number */ - -if (false) { //#if 0 - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; -} //#endif - addC('<', linelen++); - addC('?', linelen++); - - printText(fout, (this.configuration.WrapPhp ? CDATA : COMMENT), indent, - node.textarray, node.start, node.end); - - addC('?', linelen++); - addC('>', linelen++); - /* PCondFlushLine(fout, indent); */ - this.configuration.wraplen = savewraplen; - } - - private void printCDATA(Out fout, int indent, Node node) - { - int savewraplen = this.configuration.wraplen; - - condFlushLine(fout, indent); - - /* disable wrapping */ - - this.configuration.wraplen = 0xFFFFFF; /* a very large number */ - - addC('<', linelen++); - addC('!', linelen++); - addC('[', linelen++); - addC('C', linelen++); - addC('D', linelen++); - addC('A', linelen++); - addC('T', linelen++); - addC('A', linelen++); - addC('[', linelen++); - - printText(fout, COMMENT, indent, - node.textarray, node.start, node.end); - - addC(']', linelen++); - addC(']', linelen++); - addC('>', linelen++); - condFlushLine(fout, indent); - this.configuration.wraplen = savewraplen; - } - - private void printSection(Out fout, int indent, Node node) - { - int savewraplen = this.configuration.wraplen; - - /* disable wrapping if so requested */ - - if (!this.configuration.WrapSection) - this.configuration.wraplen = 0xFFFFFF; /* a very large number */ - -if (false) { //#if 0 - if (indent + linelen < this.configuration.wraplen) - wraphere = linelen; -} //#endif - addC('<', linelen++); - addC('!', linelen++); - addC('[', linelen++); - - printText(fout, (this.configuration.WrapSection ? CDATA : COMMENT), indent, - node.textarray, node.start, node.end); - - addC(']', linelen++); - addC('>', linelen++); - /* PCondFlushLine(fout, indent); */ - this.configuration.wraplen = savewraplen; - } - - private boolean shouldIndent(Node node) - { - TagTable tt = this.configuration.tt; - - if (!this.configuration.IndentContent) - return false; - - if (this.configuration.SmartIndent) - { - if (node.content != null && ((node.tag.model & Dict.CM_NO_INDENT) != 0)) - { - for (node = node.content; node != null; node = node.next) - if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0) - return true; - - return false; - } - - if ((node.tag.model & Dict.CM_HEADING) != 0) - return false; - - if (node.tag == tt.tagP) - return false; - - if (node.tag == tt.tagTitle) - return false; - } - - if ((node.tag.model & (Dict.CM_FIELD | Dict.CM_OBJECT)) != 0) - return true; - - if (node.tag == tt.tagMap) - return true; - - return !((node.tag.model & Dict.CM_INLINE) != 0); - } - - public void printTree(Out fout, short mode, int indent, - Lexer lexer, Node node) - { - Node content, last; - TagTable tt = this.configuration.tt; - - if (node == null) - return; - - if (node.type == Node.TextNode) - printText(fout, mode, indent, - node.textarray, node.start, node.end); - else if (node.type == Node.CommentTag) - { - printComment(fout, indent, node); - } - else if (node.type == Node.RootNode) - { - for (content = node.content; - content != null; - content = content.next) - printTree(fout, mode, indent, lexer, content); - } - else if (node.type == Node.DocTypeTag) - printDocType(fout, indent, node); - else if (node.type == Node.ProcInsTag) - printPI(fout, indent, node); - else if (node.type == Node.CDATATag) - printCDATA(fout, indent, node); - else if (node.type == Node.SectionTag) - printSection(fout, indent, node); - else if (node.type == Node.AspTag) - printAsp(fout, indent, node); - else if (node.type == Node.JsteTag) - printJste(fout, indent, node); - else if (node.type == Node.PhpTag) - printPhp(fout, indent, node); - else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag) - { - if (!((node.tag.model & Dict.CM_INLINE) != 0)) - condFlushLine(fout, indent); - - if (node.tag == tt.tagBr && node.prev != null && - node.prev.tag != tt.tagBr && this.configuration.BreakBeforeBR) - flushLine(fout, indent); - - if (this.configuration.MakeClean && node.tag == tt.tagWbr) - printString(fout, indent, " "); - else - printTag(lexer, fout, mode, indent, node); - - if (node.tag == tt.tagParam || node.tag == tt.tagArea) - condFlushLine(fout, indent); - else if (node.tag == tt.tagBr || node.tag == tt.tagHr) - flushLine(fout, indent); - } - else /* some kind of container element */ - { - if (node.tag != null && node.tag.parser == ParserImpl.getParsePre()) - { - condFlushLine(fout, indent); - - indent = 0; - condFlushLine(fout, indent); - printTag(lexer, fout, mode, indent, node); - flushLine(fout, indent); - - for (content = node.content; - content != null; - content = content.next) - printTree(fout, (short)(mode | PREFORMATTED | NOWRAP), indent, lexer, content); - - condFlushLine(fout, indent); - printEndTag(fout, mode, indent, node); - flushLine(fout, indent); - - if (this.configuration.IndentContent == false && node.next != null) - flushLine(fout, indent); - } - else if (node.tag == tt.tagStyle || node.tag == tt.tagScript) - { - condFlushLine(fout, indent); - - indent = 0; - condFlushLine(fout, indent); - printTag(lexer, fout, mode, indent, node); - flushLine(fout, indent); - - for (content = node.content; - content != null; - content = content.next) - printTree(fout, (short)(mode | PREFORMATTED | NOWRAP |CDATA), indent, lexer, content); - - condFlushLine(fout, indent); - printEndTag(fout, mode, indent, node); - flushLine(fout, indent); - - if (this.configuration.IndentContent == false && node.next != null) - flushLine(fout, indent); - } - else if ((node.tag.model & Dict.CM_INLINE) != 0) - { - if (this.configuration.MakeClean) - { - /* discards and tags */ - if (node.tag == tt.tagFont) - { - for (content = node.content; - content != null; - content = content.next) - printTree(fout, mode, indent, lexer, content); - return; - } - - /* replace ... by   or   etc. */ - if (node.tag == tt.tagNobr) - { - for (content = node.content; - content != null; - content = content.next) - printTree(fout, (short)(mode|NOWRAP), indent, lexer, content); - return; - } - } - - /* otherwise a normal inline element */ - - printTag(lexer, fout, mode, indent, node); - - /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */ - - if (shouldIndent(node)) - { - condFlushLine(fout, indent); - indent += this.configuration.spaces; - - for (content = node.content; - content != null; - content = content.next) - printTree(fout, mode, indent, lexer, content); - - condFlushLine(fout, indent); - indent -= this.configuration.spaces; - condFlushLine(fout, indent); - } - else - { - - for (content = node.content; - content != null; - content = content.next) - printTree(fout, mode, indent, lexer, content); - } - - printEndTag(fout, mode, indent, node); - } - else /* other tags */ - { - condFlushLine(fout, indent); - - if (this.configuration.SmartIndent && node.prev != null) - flushLine(fout, indent); - - if (this.configuration.HideEndTags == false || - !(node.tag != null && ((node.tag.model & Dict.CM_OMITST) != 0))) - { - printTag(lexer, fout, mode, indent, node); - - if (shouldIndent(node)) - condFlushLine(fout, indent); - else if ((node.tag.model & Dict.CM_HTML) != 0 || - node.tag == tt.tagNoframes || - ((node.tag.model & Dict.CM_HEAD) != 0 && - !(node.tag == tt.tagTitle))) - flushLine(fout, indent); - } - - if (node.tag == tt.tagBody && this.configuration.BurstSlides) - printSlide(fout, mode, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer); - else - { - last = null; - - for (content = node.content; - content != null; content = content.next) - { - /* kludge for naked text before block level tag */ - if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode && - content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0) - { - flushLine(fout, indent); - flushLine(fout, indent); - } - - printTree(fout, mode, - (shouldIndent(node) ? indent+this.configuration.spaces : indent), lexer, content); - - last = content; - } - } - - /* don't flush line for td and th */ - if (shouldIndent(node) || - (((node.tag.model & Dict.CM_HTML) != 0 || node.tag == tt.tagNoframes || - ((node.tag.model & Dict.CM_HEAD) != 0 && !(node.tag == tt.tagTitle))) - && this.configuration.HideEndTags == false)) - { - condFlushLine(fout, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent)); - - if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0)) - { - printEndTag(fout, mode, indent, node); - flushLine(fout, indent); - } - } - else - { - if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0)) - printEndTag(fout, mode, indent, node); - - flushLine(fout, indent); - } - - if (this.configuration.IndentContent == false && - node.next != null && - this.configuration.HideEndTags == false && - (node.tag.model & (Dict.CM_BLOCK|Dict.CM_LIST|Dict.CM_DEFLIST|Dict.CM_TABLE)) != 0) - { - flushLine(fout, indent); - } - } - } - } - - public void printXMLTree(Out fout, short mode, int indent, - Lexer lexer, Node node) - { - TagTable tt = this.configuration.tt; - - if (node == null) - return; - - if (node.type == Node.TextNode) - { - printText(fout, mode, indent, - node.textarray, node.start, node.end); - } - else if (node.type == Node.CommentTag) - { - condFlushLine(fout, indent); - printComment(fout, 0, node); - condFlushLine(fout, 0); - } - else if (node.type == Node.RootNode) - { - Node content; - - for (content = node.content; - content != null; - content = content.next) - printXMLTree(fout, mode, indent, lexer, content); - } - else if (node.type == Node.DocTypeTag) - printDocType(fout, indent, node); - else if (node.type == Node.ProcInsTag) - printPI(fout, indent, node); - else if (node.type == Node.SectionTag) - printSection(fout, indent, node); - else if (node.type == Node.AspTag) - printAsp(fout, indent, node); - else if (node.type == Node.JsteTag) - printJste(fout, indent, node); - else if (node.type == Node.PhpTag) - printPhp(fout, indent, node); - else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag) - { - condFlushLine(fout, indent); - printTag(lexer, fout, mode, indent, node); - flushLine(fout, indent); - - if (node.next != null) - flushLine(fout, indent); - } - else /* some kind of container element */ - { - Node content; - boolean mixed = false; - int cindent; - - for (content = node.content; content != null; content = content.next) - { - if (content.type == Node.TextNode) - { - mixed = true; - break; - } - } - - condFlushLine(fout, indent); - - if (ParserImpl.XMLPreserveWhiteSpace(node, tt)) - { - indent = 0; - cindent = 0; - mixed = false; - } - else if (mixed) - cindent = indent; - else - cindent = indent + this.configuration.spaces; - - printTag(lexer, fout, mode, indent, node); - - if (!mixed) - flushLine(fout, indent); - - for (content = node.content; - content != null; - content = content.next) - printXMLTree(fout, mode, cindent, lexer, content); - - if (!mixed) - condFlushLine(fout, cindent); - printEndTag(fout, mode, indent, node); - condFlushLine(fout, indent); - - if (node.next != null) - flushLine(fout, indent); - } - } - - - /* split parse tree by h2 elements and output to separate files */ - - /* counts number of h2 children belonging to node */ - public int countSlides(Node node) - { - int n = 1; - TagTable tt = this.configuration.tt; - - for (node = node.content; node != null; node = node.next) - if (node.tag == tt.tagH2) - ++n; - - return n; - } - - /* - inserts a space gif called "dot.gif" to ensure - that the slide is at least n pixels high - */ - private void printVertSpacer(Out fout, int indent) - { - condFlushLine(fout, indent); - printString(fout, indent , - ""); - condFlushLine(fout, indent); - } - - private void printNavBar(Out fout, int indent) - { - String buf; - - condFlushLine(fout, indent); - printString(fout, indent , "

    "); - - if (slide > 1) - { - buf = "previous | "; - printString(fout, indent , buf); - condFlushLine(fout, indent); - - if (slide < count) - printString(fout, indent , "start | "); - else - printString(fout, indent , "start"); - - condFlushLine(fout, indent); - } - - if (slide < count) - { - buf = "next"; - printString(fout, indent , buf); - } - - printString(fout, indent , "
    "); - condFlushLine(fout, indent); - } - - /* - Called from printTree to print the content of a slide from - the node slidecontent. On return slidecontent points to the - node starting the next slide or null. The variables slide - and count are used to customise the navigation bar. - */ - public void printSlide(Out fout, short mode, int indent, Lexer lexer) - { - Node content, last; - TagTable tt = this.configuration.tt; - - /* insert div for onclick handler */ - String s; - s = "
    "; - printString(fout, indent, s); - condFlushLine(fout, indent); - - /* first print the h2 element and navbar */ - if (slidecontent.tag == tt.tagH2) - { - printNavBar(fout, indent); - - /* now print an hr after h2 */ - - addC('<', linelen++); - - - addC((int)Lexer.foldCase('h', - this.configuration.UpperCaseTags, - this.configuration.XmlTags), - linelen++); - addC((int)Lexer.foldCase('r', - this.configuration.UpperCaseTags, - this.configuration.XmlTags), - linelen++); - - if (this.configuration.XmlOut == true) - printString(fout, indent , " />"); - else - addC('>', linelen++); - - - if (this.configuration.IndentContent == true) - condFlushLine(fout, indent); - - /* PrintVertSpacer(fout, indent); */ - - /*condFlushLine(fout, indent); */ - - /* print the h2 element */ - printTree(fout, mode, - (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, slidecontent); - - slidecontent = slidecontent.next; - } - - /* now continue until we reach the next h2 */ - - last = null; - content = slidecontent; - - for (; content != null; content = content.next) - { - if (content.tag == tt.tagH2) - break; - - /* kludge for naked text before block level tag */ - if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode && - content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0) - { - flushLine(fout, indent); - flushLine(fout, indent); - } - - printTree(fout, mode, - (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, content); - - last = content; - } - - slidecontent = content; - - /* now print epilog */ - - condFlushLine(fout, indent); - - printString(fout, indent , "
    "); - condFlushLine(fout, indent); - - addC('<', linelen++); - - - addC((int)Lexer.foldCase('h', - this.configuration.UpperCaseTags, - this.configuration.XmlTags), - linelen++); - addC((int)Lexer.foldCase('r', - this.configuration.UpperCaseTags, - this.configuration.XmlTags), - linelen++); - - if (this.configuration.XmlOut == true) - printString(fout, indent , " />"); - else - addC('>', linelen++); - - - if (this.configuration.IndentContent == true) - condFlushLine(fout, indent); - - printNavBar(fout, indent); - - /* end tag for div */ - printString(fout, indent, "
    "); - condFlushLine(fout, indent); - } - - - /* - Add meta element for page transition effect, this works on IE but not NS - */ - - public void addTransitionEffect(Lexer lexer, Node root, short effect, double duration) - { - Node head = root.findHEAD(lexer.configuration.tt); - String transition; - - if (0 <= effect && effect <= 23) - transition = "revealTrans(Duration=" + - (new Double(duration)).toString() + - ",Transition=" + effect + ")"; - else - transition = "blendTrans(Duration=" + - (new Double(duration)).toString() + ")"; - - if (head != null) - { - Node meta = lexer.inferredTag("meta"); - meta.addAttribute("http-equiv", "Page-Enter"); - meta.addAttribute("content", transition); - Node.insertNodeAtStart(head, meta); - } - } - - public void createSlides(Lexer lexer, Node root) - { - Node body; - String buf; - Out out = new OutImpl(); - - body = root.findBody(lexer.configuration.tt); - count = countSlides(body); - slidecontent = body.content; - addTransitionEffect(lexer, root, EFFECT_BLEND, 3.0); - - for (slide = 1; slide <= count; ++slide) - { - buf = "slide" + slide + ".html"; - out.state = StreamIn.FSM_ASCII; - out.encoding = this.configuration.CharEncoding; - - try - { - out.out = new FileOutputStream(buf); - printTree(out, (short)0, 0, lexer, root); - flushLine(out, 0); - out.out.close(); - } - catch (IOException e) - { - System.err.println(buf + e.toString() ); - } - } - - /* - delete superfluous slides by deleting slideN.html - for N = count+1, count+2, etc. until no such file - is found. - */ - - for (;;) - { - buf = "slide" + slide + "html"; - - if (!(new File(buf)).delete()) - break; - - ++slide; - } - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Parser.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Parser.java deleted file mode 100644 index b3851ea..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Parser.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * @(#)Parser.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * HTML Parser - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public interface Parser { - - public void parse( Lexer lexer, Node node, short mode ); - -} - diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/ParserImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/ParserImpl.java deleted file mode 100644 index 044cfa9..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/ParserImpl.java +++ /dev/null @@ -1,3205 +0,0 @@ -/* - * @(#)ParserImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * HTML Parser implementation - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class ParserImpl { - - //private static int SeenBodyEndTag; /* AQ: moved into lexer structure */ - - private static void parseTag(Lexer lexer, Node node, short mode) - { - // Local fix by GLP 2000-12-21. Need to reset insertspace if this - // is both a non-inline and empty tag (base, link, meta, isindex, hr, area). - // Remove this code once the fix is made in Tidy. - -/****** (Original code follows) - if ((node.tag.model & Dict.CM_EMPTY) != 0) - { - lexer.waswhite = false; - return; - } - else if (!((node.tag.model & Dict.CM_INLINE) != 0)) - lexer.insertspace = false; -*******/ - - if (!((node.tag.model & Dict.CM_INLINE) != 0)) - lexer.insertspace = false; - - if ((node.tag.model & Dict.CM_EMPTY) != 0) - { - lexer.waswhite = false; - return; - } - - if (node.tag.parser == null || node.type == Node.StartEndTag) - return; - - node.tag.parser.parse(lexer, node, mode); - } - - private static void moveToHead(Lexer lexer, Node element, Node node) - { - Node head; - TagTable tt = lexer.configuration.tt; - - - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); - - while (element.tag != tt.tagHtml) - element = element.parent; - - for (head = element.content; head != null; head = head.next) - { - if (head.tag == tt.tagHead) - { - Node.insertNodeAtEnd(head, node); - break; - } - } - - if (node.tag.parser != null) - parseTag(lexer, node, Lexer.IgnoreWhitespace); - } - else - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - } - } - - public static class ParseHTML implements Parser { - - public void parse( Lexer lexer, Node html, short mode ) - { - Node node, head; - Node frameset = null; - Node noframes = null; - - lexer.configuration.XmlTags = false; - lexer.seenBodyEndTag = 0; - TagTable tt = lexer.configuration.tt; - - for (;;) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - - if (node == null) - { - node = lexer.inferredTag("head"); - break; - } - - if (node.tag == tt.tagHead) - break; - - if (node.tag == html.tag && node.type == Node.EndTag) - { - Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* deal with comments etc. */ - if (Node.insertMisc(html, node)) - continue; - - lexer.ungetToken(); - node = lexer.inferredTag("head"); - break; - } - - head = node; - Node.insertNodeAtEnd(html, head); - getParseHead().parse(lexer, head, mode); - - for (;;) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - - if (node == null) - { - if (frameset == null) /* create an empty body */ - node = lexer.inferredTag("body"); - - return; - } - - /* robustly handle html tags */ - if (node.tag == html.tag) - { - if (node.type != Node.StartTag && frameset == null) - Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); - - continue; - } - - /* deal with comments etc. */ - if (Node.insertMisc(html, node)) - continue; - - /* if frameset document coerce to */ - if (node.tag == tt.tagBody) - { - if (node.type != Node.StartTag) - { - Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (frameset != null) - { - lexer.ungetToken(); - - if (noframes == null) - { - noframes = lexer.inferredTag("noframes"); - Node.insertNodeAtEnd(frameset, noframes); - Report.warning(lexer, html, noframes, Report.INSERTING_TAG); - } - - parseTag(lexer, noframes, mode); - continue; - } - - break; /* to parse body */ - } - - /* flag an error if we see more than one frameset */ - if (node.tag == tt.tagFrameset) - { - if (node.type != Node.StartTag) - { - Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (frameset != null) - Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET); - else - frameset = node; - - Node.insertNodeAtEnd(html, node); - parseTag(lexer, node, mode); - - /* - see if it includes a noframes element so - that we can merge subsequent noframes elements - */ - - for (node = frameset.content; node != null; node = node.next) - { - if (node.tag == tt.tagNoframes) - noframes = node; - } - continue; - } - - /* if not a frameset document coerce <noframes> to <body> */ - if (node.tag == tt.tagNoframes) - { - if (node.type != Node.StartTag) - { - Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (frameset == null) - { - Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); - node = lexer.inferredTag("body"); - break; - } - - if (noframes == null) - { - noframes = node; - Node.insertNodeAtEnd(frameset, noframes); - } - - parseTag(lexer, noframes, mode); - continue; - } - - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) - { - moveToHead(lexer, html, node); - continue; - } - } - - lexer.ungetToken(); - - /* insert other content into noframes element */ - - if (frameset != null) - { - if (noframes == null) - { - noframes = lexer.inferredTag("noframes"); - Node.insertNodeAtEnd(frameset, noframes); - } - else - Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT); - - parseTag(lexer, noframes, mode); - continue; - } - - node = lexer.inferredTag("body"); - break; - } - - /* node must be body */ - - Node.insertNodeAtEnd(html, node); - parseTag(lexer, node, mode); - } - - }; - - public static class ParseHead implements Parser { - - public void parse( Lexer lexer, Node head, short mode ) - { - Node node; - int HasTitle = 0; - int HasBase = 0; - TagTable tt = lexer.configuration.tt; - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == head.tag && node.type == Node.EndTag) - { - head.closed = true; - break; - } - - if (node.type == Node.TextNode) - { - lexer.ungetToken(); - break; - } - - /* deal with comments etc. */ - if (Node.insertMisc(head, node)) - continue; - - if (node.type == Node.DocTypeTag) - { - Node.insertDocType(lexer, head, node); - continue; - } - - /* discard unknown tags */ - if (node.tag == null) - { - Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (!((node.tag.model & Dict.CM_HEAD) != 0)) - { - lexer.ungetToken(); - break; - } - - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - if (node.tag == tt.tagTitle) - { - ++HasTitle; - - if (HasTitle > 1) - Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); - } - else if (node.tag == tt.tagBase) - { - ++HasBase; - - if (HasBase > 1) - Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); - } - else if (node.tag == tt.tagNoscript) - Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN); - - Node.insertNodeAtEnd(head, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace); - continue; - } - - /* discard unexpected text nodes and end tags */ - Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); - } - - if (HasTitle == 0) - { - Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT); - Node.insertNodeAtEnd(head, lexer.inferredTag( "title")); - } - } - - }; - - public static class ParseTitle implements Parser { - - public void parse( Lexer lexer, Node title, short mode ) - { - Node node; - - while (true) - { - node = lexer.getToken(Lexer.MixedContent); - if (node == null) break; - if (node.tag == title.tag && node.type == Node.EndTag) - { - title.closed = true; - Node.trimSpaces(lexer, title); - return; - } - - if (node.type == Node.TextNode) - { - /* only called for 1st child */ - if (title.content == null) - Node.trimInitialSpace(lexer, title, node); - - if (node.start >= node.end) - { - continue; - } - - Node.insertNodeAtEnd(title, node); - continue; - } - - /* deal with comments etc. */ - if (Node.insertMisc(title, node)) - continue; - - /* discard unknown tags */ - if (node.tag == null) - { - Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* pushback unexpected tokens */ - Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE); - lexer.ungetToken(); - Node.trimSpaces(lexer, title); - return; - } - - Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR); - } - - }; - - public static class ParseScript implements Parser { - - public void parse( Lexer lexer, Node script, short mode ) - { - /* - This isn't quite right for CDATA content as it recognises - tags within the content and parses them accordingly. - This will unfortunately screw up scripts which include - < + letter, < + !, < + ? or < + / + letter - */ - - Node node; - - node = lexer.getCDATA( script); - - if (node != null) - Node.insertNodeAtEnd(script, node); - } - - }; - - public static class ParseBody implements Parser { - - public void parse( Lexer lexer, Node body, short mode ) - { - Node node; - boolean checkstack, iswhitenode; - - mode = Lexer.IgnoreWhitespace; - checkstack = true; - TagTable tt = lexer.configuration.tt; - - while (true) - { - node = lexer.getToken(mode); - if (node == null) break; - if (node.tag == body.tag && node.type == Node.EndTag) - { - body.closed = true; - Node.trimSpaces(lexer, body); - lexer.seenBodyEndTag = 1; - mode = Lexer.IgnoreWhitespace; - - if (body.parent.tag == tt.tagNoframes) - break; - - continue; - } - - if (node.tag == tt.tagNoframes) - { - if (node.type == Node.StartTag) - { - Node.insertNodeAtEnd(body, node); - getParseBlock().parse(lexer, node, mode); - continue; - } - - if (node.type == Node.EndTag && - body.parent.tag == tt.tagNoframes) - { - Node.trimSpaces(lexer, body); - lexer.ungetToken(); - break; - } - } - - if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset) - && body.parent.tag == tt.tagNoframes) - { - Node.trimSpaces(lexer, body); - lexer.ungetToken(); - break; - } - - if (node.tag == tt.tagHtml) - { - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); - - continue; - } - - iswhitenode = false; - - if (node.type == Node.TextNode && - node.end <= node.start + 1 && - node.textarray[node.start] == (byte)' ') - iswhitenode = true; - - /* deal with comments etc. */ - if (Node.insertMisc(body, node)) - continue; - - if (lexer.seenBodyEndTag == 1 && !iswhitenode) - { - ++lexer.seenBodyEndTag; - Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY); - } - - /* mixed content model permits text */ - if (node.type == Node.TextNode) - { - if (iswhitenode && mode == Lexer.IgnoreWhitespace) - { - continue; - } - - if (lexer.configuration.EncloseBodyText && !iswhitenode) - { - Node para; - - lexer.ungetToken(); - para = lexer.inferredTag("p"); - Node.insertNodeAtEnd(body, para); - parseTag(lexer, para, mode); - mode = Lexer.MixedContent; - continue; - } - else /* strict doesn't allow text here */ - lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20); - - if (checkstack) - { - checkstack = false; - - if (lexer.inlineDup( node) > 0) - continue; - } - - Node.insertNodeAtEnd(body, node); - mode = Lexer.MixedContent; - continue; - } - - if (node.type == Node.DocTypeTag) - { - Node.insertDocType(lexer, body, node); - continue; - } - /* discard unknown and PARAM tags */ - if (node.tag == null || node.tag == tt.tagParam) - { - Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* - Netscape allows LI and DD directly in BODY - We infer UL or DL respectively and use this - boolean to exclude block-level elements so as - to match Netscape's observed behaviour. - */ - lexer.excludeBlocks = false; - - if (!((node.tag.model & Dict.CM_BLOCK) != 0) && - !((node.tag.model & Dict.CM_INLINE) != 0)) - { - /* avoid this error message being issued twice */ - if (!((node.tag.model & Dict.CM_HEAD) != 0)) - Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN); - - if ((node.tag.model & Dict.CM_HTML) != 0) - { - /* copy body attributes if current body was inferred */ - if (node.tag == tt.tagBody && body.implicit - && body.attributes == null) - { - body.attributes = node.attributes; - node.attributes = null; - } - - continue; - } - - if ((node.tag.model & Dict.CM_HEAD) != 0) - { - moveToHead(lexer, body, node); - continue; - } - - if ((node.tag.model & Dict.CM_LIST) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "ul"); - Node.addClass(node, "noindent"); - lexer.excludeBlocks = true; - } - else if ((node.tag.model & Dict.CM_DEFLIST) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "dl"); - lexer.excludeBlocks = true; - } - else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "table"); - lexer.excludeBlocks = true; - } - else - { - /* AQ: The following line is from the official C - version of tidy. It doesn't make sense to me - because the '!' operator has higher precedence - than the '&' operator. It seems to me that the - expression always evaluates to 0. - - if (!node->tag->model & (CM_ROW | CM_FIELD)) - - AQ: 13Jan2000 fixed in C tidy - */ - if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0)) - { - lexer.ungetToken(); - return; - } - - /* ignore </td> </th> <option> etc. */ - continue; - } - } - - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagBr) - node.type = Node.StartTag; - else if (node.tag == tt.tagP) - { - Node.coerceNode(lexer, node, tt.tagBr); - Node.insertNodeAtEnd(body, node); - node = lexer.inferredTag("br"); - } - else if ((node.tag.model & Dict.CM_INLINE) != 0) - lexer.popInline(node); - } - - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0)) - { - /* HTML4 strict doesn't allow inline content here */ - /* but HTML2 does allow img elements as children of body */ - if (node.tag == tt.tagImg) - lexer.versions &= ~Dict.VERS_HTML40_STRICT; - else - lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20); - - if (checkstack && !node.implicit) - { - checkstack = false; - - if (lexer.inlineDup( node) > 0) - continue; - } - - mode = Lexer.MixedContent; - } - else - { - checkstack = true; - mode = Lexer.IgnoreWhitespace; - } - - if (node.implicit) - Report.warning(lexer, body, node, Report.INSERTING_TAG); - - Node.insertNodeAtEnd(body, node); - parseTag(lexer, node, mode); - continue; - } - - /* discard unexpected tags */ - Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); - } - } - - }; - - public static class ParseFrameSet implements Parser { - - public void parse( Lexer lexer, Node frameset, short mode ) - { - Node node; - TagTable tt = lexer.configuration.tt; - - lexer.badAccess |= Report.USING_FRAMES; - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == frameset.tag && node.type == Node.EndTag) - { - frameset.closed = true; - Node.trimSpaces(lexer, frameset); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(frameset, node)) - continue; - - if (node.tag == null) - { - Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) - { - moveToHead(lexer, frameset, node); - continue; - } - } - - if (node.tag == tt.tagBody) - { - lexer.ungetToken(); - node = lexer.inferredTag("noframes"); - Report.warning(lexer, frameset, node, Report.INSERTING_TAG); - } - - if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0) - { - Node.insertNodeAtEnd(frameset, node); - lexer.excludeBlocks = false; - parseTag(lexer, node, Lexer.MixedContent); - continue; - } - else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0) - { - Node.insertNodeAtEnd(frameset, node); - continue; - } - - /* discard unexpected tags */ - Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); - } - - Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR); - } - - }; - - public static class ParseInline implements Parser { - - public void parse( Lexer lexer, Node element, short mode ) - { - Node node, parent; - TagTable tt = lexer.configuration.tt; - - if ((element.tag.model & Dict.CM_EMPTY) != 0) - return; - - if (element.tag == tt.tagA) - { - if (element.attributes == null) - { - Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED); - Node.discardElement(element); - return; - } - } - - /* - ParseInline is used for some block level elements like H1 to H6 - For such elements we need to insert inline emphasis tags currently - on the inline stack. For Inline elements, we normally push them - onto the inline stack provided they aren't implicit or OBJECT/APPLET. - This test is carried out in PushInline and PopInline, see istack.c - We don't push A or SPAN to replicate current browser behavior - */ - if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt)) - lexer.inlineDup( null); - else if ((element.tag.model & Dict.CM_INLINE) != 0 && - element.tag != tt.tagA && element.tag != tt.tagSpan) - lexer.pushInline( element); - - if (element.tag == tt.tagNobr) - lexer.badLayout |= Report.USING_NOBR; - else if (element.tag == tt.tagFont) - lexer.badLayout |= Report.USING_FONT; - - /* Inline elements may or may not be within a preformatted element */ - if (mode != Lexer.Preformatted) - mode = Lexer.MixedContent; - - while (true) - { - node = lexer.getToken(mode); - if (node == null) break; - /* end tag for current element */ - if (node.tag == element.tag && node.type == Node.EndTag) - { - if ((element.tag.model & Dict.CM_INLINE) != 0 && - element.tag != tt.tagA) - lexer.popInline( node); - - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - /* - if a font element wraps an anchor and nothing else - then move the font element inside the anchor since - otherwise it won't alter the anchor text color - */ - if (element.tag == tt.tagFont && - element.content != null && - element.content == element.last) - { - Node child = element.content; - - if (child.tag == tt.tagA) - { - child.parent = element.parent; - child.next = element.next; - child.prev = element.prev; - - if (child.prev != null) - child.prev.next = child; - else - child.parent.content = child; - - if (child.next != null) - child.next.prev = child; - else - child.parent.last = child; - - element.next = null; - element.prev = null; - element.parent = child; - element.content = child.content; - element.last = child.last; - child.content = element; - child.last = element; - for (child = element.content; child != null; child = child.next) - child.parent = element; - } - } - element.closed = true; - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - /* <u>...<u> map 2nd <u> to </u> if 1st is explicit */ - /* otherwise emphasis nesting is probably unintentional */ - /* big and small have cumulative effect to leave them alone */ - if (node.type == Node.StartTag - && node.tag == element.tag - && lexer.isPushed(node) - && !node.implicit - && !element.implicit - && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0) - && node.tag != tt.tagA - && node.tag != tt.tagFont - && node.tag != tt.tagBig - && node.tag != tt.tagSmall) - { - if (element.content != null && node.attributes == null) - { - Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); - node.type = Node.EndTag; - lexer.ungetToken(); - continue; - } - - Report.warning(lexer, element, node, Report.NESTED_EMPHASIS); - } - - if (node.type == Node.TextNode) - { - /* only called for 1st child */ - if (element.content == null && - !((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - - if (node.start >= node.end) - { - continue; - } - - Node.insertNodeAtEnd(element, node); - continue; - } - - /* mixed content model so allow text */ - if (Node.insertMisc(element, node)) - continue; - - /* deal with HTML tags */ - if (node.tag == tt.tagHtml) - { - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* otherwise infer end of inline element */ - lexer.ungetToken(); - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - /* within <dt> or <pre> map <p> to <br> */ - if (node.tag == tt.tagP && - node.type == Node.StartTag && - ((mode & Lexer.Preformatted) != 0 || - element.tag == tt.tagDt || - element.isDescendantOf(tt.tagDt))) - { - node.tag = tt.tagBr; - node.element = "br"; - Node.trimSpaces(lexer, element); - Node.insertNodeAtEnd(element, node); - continue; - } - - /* ignore unknown and PARAM tags */ - if (node.tag == null || node.tag == tt.tagParam) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag == tt.tagBr && node.type == Node.EndTag) - node.type = Node.StartTag; - - if (node.type == Node.EndTag) - { - /* coerce </br> to <br> */ - if (node.tag == tt.tagBr) - node.type = Node.StartTag; - else if (node.tag == tt.tagP) - { - /* coerce unmatched </p> to <br><br> */ - if (!element.isDescendantOf(tt.tagP)) - { - Node.coerceNode(lexer, node, tt.tagBr); - Node.trimSpaces(lexer, element); - Node.insertNodeAtEnd(element, node); - node = lexer.inferredTag("br"); - continue; - } - } - else if ((node.tag.model & Dict.CM_INLINE) != 0 - && node.tag != tt.tagA - && !((node.tag.model & Dict.CM_OBJECT) != 0) - && (element.tag.model & Dict.CM_INLINE) != 0) - { - /* allow any inline end tag to end current element */ - lexer.popInline( element); - - if (element.tag != tt.tagA) - { - if (node.tag == tt.tagA && node.tag != element.tag) - { - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - lexer.ungetToken(); - } - else - { - Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); - } - - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - /* if parent is <a> then discard unexpected inline end tag */ - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } /* special case </tr> etc. for stuff moved in front of table */ - else if (lexer.exiled - && node.tag.model != 0 - && (node.tag.model & Dict.CM_TABLE) != 0) - { - lexer.ungetToken(); - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - } - - /* allow any header tag to end current header */ - if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0) - { - if (node.tag == element.tag) - { - Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); - } - else - { - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - lexer.ungetToken(); - } - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - /* - an <A> tag to ends any open <A> element - but <A href=...> is mapped to </A><A href=...> - */ - if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node)) - { - /* coerce <a> to </a> unless it has some attributes */ - if (node.attributes == null) - { - node.type = Node.EndTag; - Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); - lexer.popInline( node); - lexer.ungetToken(); - continue; - } - - lexer.ungetToken(); - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - lexer.popInline( element); - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - if ((element.tag.model & Dict.CM_HEADING) != 0) - { - if (node.tag == tt.tagCenter || - node.tag == tt.tagDiv) - { - if (node.type != Node.StartTag && - node.type != Node.StartEndTag) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); - - /* insert center as parent if heading is empty */ - if (element.content == null) - { - Node.insertNodeAsParent(element, node); - continue; - } - - /* split heading and make center parent of 2nd part */ - Node.insertNodeAfterElement(element, node); - - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - - element = lexer.cloneNode(element); - element.start = lexer.lexsize; - element.end = lexer.lexsize; - Node.insertNodeAtEnd(node, element); - continue; - } - - if (node.tag == tt.tagHr) - { - if (node.type != Node.StartTag && - node.type != Node.StartEndTag) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); - - /* insert hr before heading if heading is empty */ - if (element.content == null) - { - Node.insertNodeBeforeElement(element, node); - continue; - } - - /* split heading and insert hr before 2nd part */ - Node.insertNodeAfterElement(element, node); - - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - - element = lexer.cloneNode(element); - element.start = lexer.lexsize; - element.end = lexer.lexsize; - Node.insertNodeAfterElement(node, element); - continue; - } - } - - if (element.tag == tt.tagDt) - { - if (node.tag == tt.tagHr) - { - Node dd; - - if (node.type != Node.StartTag && - node.type != Node.StartEndTag) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); - dd = lexer.inferredTag("dd"); - - /* insert hr within dd before dt if dt is empty */ - if (element.content == null) - { - Node.insertNodeBeforeElement(element, dd); - Node.insertNodeAtEnd(dd, node); - continue; - } - - /* split dt and insert hr within dd before 2nd part */ - Node.insertNodeAfterElement(element, dd); - Node.insertNodeAtEnd(dd, node); - - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - - element = lexer.cloneNode(element); - element.start = lexer.lexsize; - element.end = lexer.lexsize; - Node.insertNodeAfterElement(dd, element); - continue; - } - } - - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - for (parent = element.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - if (!((element.tag.model & Dict.CM_OPT) != 0) && - !element.implicit) - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - - if (element.tag == tt.tagA) - lexer.popInline(element); - - lexer.ungetToken(); - - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - - Node.trimEmptyElement(lexer, element); - return; - } - } - } - - /* block level tags end this element */ - if (!((node.tag.model & Dict.CM_INLINE) != 0)) - { - if (node.type != Node.StartTag) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (!((element.tag.model & Dict.CM_OPT) != 0)) - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - - if ((node.tag.model & Dict.CM_HEAD) != 0 && - !((node.tag.model & Dict.CM_BLOCK) != 0)) - { - moveToHead(lexer, element, node); - continue; - } - - /* - prevent anchors from propagating into block tags - except for headings h1 to h6 - */ - if (element.tag == tt.tagA) - { - if (node.tag != null && - !((node.tag.model & Dict.CM_HEADING) != 0)) - lexer.popInline(element); - else if (!(element.content != null)) - { - Node.discardElement(element); - lexer.ungetToken(); - return; - } - } - - lexer.ungetToken(); - - if (!((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, element); - - Node.trimEmptyElement(lexer, element); - return; - } - - /* parse inline element */ - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - if (node.implicit) - Report.warning(lexer, element, node, Report.INSERTING_TAG); - - /* trim white space before <br> */ - if (node.tag == tt.tagBr) - Node.trimSpaces(lexer, element); - - Node.insertNodeAtEnd(element, node); - parseTag(lexer, node, mode); - continue; - } - - /* discard unexpected tags */ - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - } - - if (!((element.tag.model & Dict.CM_OPT) != 0)) - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); - - Node.trimEmptyElement(lexer, element); - } - }; - - public static class ParseList implements Parser { - - public void parse( Lexer lexer, Node list, short mode ) - { - Node node; - Node parent; - TagTable tt = lexer.configuration.tt; - - if ((list.tag.model & Dict.CM_EMPTY) != 0) - return; - - lexer.insert = -1; /* defer implicit inline start tags */ - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - - if (node.tag == list.tag && node.type == Node.EndTag) - { - if ((list.tag.model & Dict.CM_OBSOLETE) != 0) - Node.coerceNode(lexer, list, tt.tagUl); - - list.closed = true; - Node.trimEmptyElement(lexer, list); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(list, node)) - continue; - - if (node.type != Node.TextNode && node.tag == null) - { - Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.badForm = 1; - Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0) - { - Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); - lexer.popInline(node); - continue; - } - - for (parent = list.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); - lexer.ungetToken(); - - if ((list.tag.model & Dict.CM_OBSOLETE) != 0) - Node.coerceNode(lexer, list, tt.tagUl); - - Node.trimEmptyElement(lexer, list); - return; - } - } - - Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag != tt.tagLi) - { - lexer.ungetToken(); - - if (node.tag != null && - (node.tag.model & Dict.CM_BLOCK) != 0 && - lexer.excludeBlocks) - { - Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); - Node.trimEmptyElement(lexer, list); - return; - } - - node = lexer.inferredTag("li"); - node.addAttribute("style", "list-style: none"); - Report.warning(lexer, list, node, Report.MISSING_STARTTAG); - } - - /* node should be <LI> */ - Node.insertNodeAtEnd(list, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace); - } - - if ((list.tag.model & Dict.CM_OBSOLETE) != 0) - Node.coerceNode(lexer, list, tt.tagUl); - - Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); - Node.trimEmptyElement(lexer, list); - } - - }; - - public static class ParseDefList implements Parser { - - public void parse( Lexer lexer, Node list, short mode ) - { - Node node, parent; - TagTable tt = lexer.configuration.tt; - - if ((list.tag.model & Dict.CM_EMPTY) != 0) - return; - - lexer.insert = -1; /* defer implicit inline start tags */ - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == list.tag && node.type == Node.EndTag) - { - list.closed = true; - Node.trimEmptyElement(lexer, list); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(list, node)) - continue; - - if (node.type == Node.TextNode) - { - lexer.ungetToken(); - node = lexer.inferredTag( "dt"); - Report.warning(lexer, list, node, Report.MISSING_STARTTAG); - } - - if (node.tag == null) - { - Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.badForm = 1; - Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - for (parent = list.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); - - lexer.ungetToken(); - Node.trimEmptyElement(lexer, list); - return; - } - } - } - - /* center in a dt or a dl breaks the dl list in two */ - if (node.tag == tt.tagCenter) - { - if (list.content != null) - Node.insertNodeAfterElement(list, node); - else /* trim empty dl list */ - { - Node.insertNodeBeforeElement(list, node); - Node.discardElement(list); - } - - /* and parse contents of center */ - parseTag(lexer, node, mode); - - /* now create a new dl element */ - list = lexer.inferredTag("dl"); - Node.insertNodeAfterElement(node, list); - continue; - } - - if (!(node.tag == tt.tagDt || node.tag == tt.tagDd)) - { - lexer.ungetToken(); - - if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) - { - Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN); - Node.trimEmptyElement(lexer, list); - return; - } - - /* if DD appeared directly in BODY then exclude blocks */ - if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks) - { - Node.trimEmptyElement(lexer, list); - return; - } - - node = lexer.inferredTag( "dd"); - Report.warning(lexer, list, node, Report.MISSING_STARTTAG); - } - - if (node.type == Node.EndTag) - { - Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* node should be <DT> or <DD>*/ - Node.insertNodeAtEnd(list, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace); - } - - Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); - Node.trimEmptyElement(lexer, list); - } - - }; - - public static class ParsePre implements Parser { - - public void parse( Lexer lexer, Node pre, short mode ) - { - Node node, parent; - TagTable tt = lexer.configuration.tt; - - if ((pre.tag.model & Dict.CM_EMPTY) != 0) - return; - - if ((pre.tag.model & Dict.CM_OBSOLETE) != 0) - Node.coerceNode(lexer, pre, tt.tagPre); - - lexer.inlineDup( null); /* tell lexer to insert inlines if needed */ - - while (true) - { - node = lexer.getToken(Lexer.Preformatted); - if (node == null) break; - if (node.tag == pre.tag && node.type == Node.EndTag) - { - Node.trimSpaces(lexer, pre); - pre.closed = true; - Node.trimEmptyElement(lexer, pre); - return; - } - - if (node.tag == tt.tagHtml) - { - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); - - continue; - } - - if (node.type == Node.TextNode) - { - /* if first check for inital newline */ - if (pre.content == null) - { - if (node.textarray[node.start] == (byte)'\n') - ++node.start; - - if (node.start >= node.end) - { - continue; - } - } - - Node.insertNodeAtEnd(pre, node); - continue; - } - - /* deal with comments etc. */ - if (Node.insertMisc(pre, node)) - continue; - - /* discard unknown and PARAM tags */ - if (node.tag == null || node.tag == tt.tagParam) - { - Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag == tt.tagP) - { - if (node.type == Node.StartTag) - { - Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF); - - /* trim white space before <p> in <pre>*/ - Node.trimSpaces(lexer, pre); - - /* coerce both <p> and </p> to <br> */ - Node.coerceNode(lexer, node, tt.tagBr); - Node.insertNodeAtEnd(pre, node); - } - else - { - Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); - } - continue; - } - - if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0)) - { - moveToHead(lexer, pre, node); - continue; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.badForm = 1; - Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - for (parent = pre.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); - - lexer.ungetToken(); - Node.trimSpaces(lexer, pre); - Node.trimEmptyElement(lexer, pre); - return; - } - } - } - - /* what about head content, HEAD, BODY tags etc? */ - if (!((node.tag.model & Dict.CM_INLINE) != 0)) - { - if (node.type != Node.StartTag) - { - Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); - lexer.excludeBlocks = true; - - /* check if we need to infer a container */ - if ((node.tag.model & Dict.CM_LIST) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "ul"); - Node.addClass(node, "noindent"); - } - else if ((node.tag.model & Dict.CM_DEFLIST) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "dl"); - } - else if ((node.tag.model & Dict.CM_TABLE) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "table"); - } - - Node.insertNodeAfterElement(pre, node); - pre = lexer.inferredTag( "pre"); - Node.insertNodeAfterElement(node, pre); - parseTag(lexer, node, Lexer.IgnoreWhitespace); - lexer.excludeBlocks = false; - continue; - } - /* - if (!((node.tag.model & Dict.CM_INLINE) != 0)) - { - Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); - lexer.ungetToken(); - return; - } - */ - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - /* trim white space before <br> */ - if (node.tag == tt.tagBr) - Node.trimSpaces(lexer, pre); - - Node.insertNodeAtEnd(pre, node); - parseTag(lexer, node, Lexer.Preformatted); - continue; - } - - /* discard unexpected tags */ - Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); - } - - Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR); - Node.trimEmptyElement(lexer, pre); - } - - }; - - public static class ParseBlock implements Parser { - - public void parse( Lexer lexer, Node element, short mode ) - /* - element is node created by the lexer - upon seeing the start tag, or by the - parser when the start tag is inferred - */ - { - Node node, parent; - boolean checkstack; - int istackbase = 0; - TagTable tt = lexer.configuration.tt; - - checkstack = true; - - if ((element.tag.model & Dict.CM_EMPTY) != 0) - return; - - if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm)) - Report.warning(lexer, element, null, Report.ILLEGAL_NESTING); - - /* - InlineDup() asks the lexer to insert inline emphasis tags - currently pushed on the istack, but take care to avoid - propagating inline emphasis inside OBJECT or APPLET. - For these elements a fresh inline stack context is created - and disposed of upon reaching the end of the element. - They thus behave like table cells in this respect. - */ - if ((element.tag.model & Dict.CM_OBJECT) != 0) - { - istackbase = lexer.istackbase; - lexer.istackbase = lexer.istack.size(); - } - - if (!((element.tag.model & Dict.CM_MIXED) != 0)) - lexer.inlineDup( null); - - mode = Lexer.IgnoreWhitespace; - - while (true) - { - node = lexer.getToken(mode /*Lexer.MixedContent*/); - if (node == null) break; - /* end tag for this element */ - if (node.type == Node.EndTag && node.tag != null && - (node.tag == element.tag || element.was == node.tag)) - { - - if ((element.tag.model & Dict.CM_OBJECT) != 0) - { - /* pop inline stack */ - while (lexer.istack.size() > lexer.istackbase) - lexer.popInline( null); - lexer.istackbase = istackbase; - } - - element.closed = true; - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - if (node.tag == tt.tagHtml || - node.tag == tt.tagHead || - node.tag == tt.tagBody) - { - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - - continue; - } - - if (node.type == Node.EndTag) - { - if (node.tag == null) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - - continue; - } - else if (node.tag == tt.tagBr) - node.type = Node.StartTag; - else if (node.tag == tt.tagP) - { - Node.coerceNode(lexer, node, tt.tagBr); - Node.insertNodeAtEnd(element, node); - node = lexer.inferredTag("br"); - } - else - { - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - for (parent = element.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - if (!((element.tag.model & Dict.CM_OPT) != 0)) - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - - lexer.ungetToken(); - - if ((element.tag.model & Dict.CM_OBJECT) != 0) - { - /* pop inline stack */ - while (lexer.istack.size() > lexer.istackbase) - lexer.popInline( null); - lexer.istackbase = istackbase; - } - - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - } - /* special case </tr> etc. for stuff moved in front of table */ - if (lexer.exiled - && node.tag.model != 0 - && (node.tag.model & Dict.CM_TABLE) != 0) - { - lexer.ungetToken(); - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - } - } - - /* mixed content model permits text */ - if (node.type == Node.TextNode) - { - boolean iswhitenode = false; - - if (node.type == Node.TextNode && - node.end <= node.start + 1 && - lexer.lexbuf[node.start] == (byte)' ') - iswhitenode = true; - - if (lexer.configuration.EncloseBlockText && !iswhitenode) - { - lexer.ungetToken(); - node = lexer.inferredTag("p"); - Node.insertNodeAtEnd(element, node); - parseTag(lexer, node, Lexer.MixedContent); - continue; - } - - if (checkstack) - { - checkstack = false; - - if (!((element.tag.model & Dict.CM_MIXED) != 0)) - { - if (lexer.inlineDup( node) > 0) - continue; - } - } - - Node.insertNodeAtEnd(element, node); - mode = Lexer.MixedContent; - /* - HTML4 strict doesn't allow mixed content for - elements with %block; as their content model - */ - lexer.versions &= ~Dict.VERS_HTML40_STRICT; - continue; - } - - if (Node.insertMisc(element, node)) - continue; - - /* allow PARAM elements? */ - if (node.tag == tt.tagParam) - { - if (((element.tag.model & Dict.CM_PARAM) != 0) && - (node.type == Node.StartTag || node.type == Node.StartEndTag)) - { - Node.insertNodeAtEnd(element, node); - continue; - } - - /* otherwise discard it */ - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* allow AREA elements? */ - if (node.tag == tt.tagArea) - { - if ((element.tag == tt.tagMap) && - (node.type == Node.StartTag || node.type == Node.StartEndTag)) - { - Node.insertNodeAtEnd(element, node); - continue; - } - - /* otherwise discard it */ - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* ignore unknown start/end tags */ - if (node.tag == null) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* - Allow Dict.CM_INLINE elements here. - - Allow Dict.CM_BLOCK elements here unless - lexer.excludeBlocks is yes. - - LI and DD are special cased. - - Otherwise infer end tag for this element. - */ - - if (!((node.tag.model & Dict.CM_INLINE) != 0)) - { - if (node.type != Node.StartTag && node.type != Node.StartEndTag) - { - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (element.tag == tt.tagTd || element.tag == tt.tagTh) - { - /* if parent is a table cell, avoid inferring the end of the cell */ - - if ((node.tag.model & Dict.CM_HEAD) != 0) - { - moveToHead(lexer, element, node); - continue; - } - - if ((node.tag.model & Dict.CM_LIST) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "ul"); - Node.addClass(node, "noindent"); - lexer.excludeBlocks = true; - } - else if ((node.tag.model & Dict.CM_DEFLIST) != 0) - { - lexer.ungetToken(); - node = lexer.inferredTag( "dl"); - lexer.excludeBlocks = true; - } - - /* infer end of current table cell */ - if (!((node.tag.model & Dict.CM_BLOCK) != 0)) - { - lexer.ungetToken(); - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - } - else if ((node.tag.model & Dict.CM_BLOCK) != 0) - { - if (lexer.excludeBlocks) - { - if (!((element.tag.model & Dict.CM_OPT) != 0)) - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - - lexer.ungetToken(); - - if ((element.tag.model & Dict.CM_OBJECT) != 0) - lexer.istackbase = istackbase; - - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - } - else /* things like list items */ - { - if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit) - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); - - if ((node.tag.model & Dict.CM_HEAD) != 0) - { - moveToHead(lexer, element, node); - continue; - } - - lexer.ungetToken(); - - if ((node.tag.model & Dict.CM_LIST) != 0) - { - if (element.parent != null && element.parent.tag != null && - element.parent.tag.parser == getParseList()) - { - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - node = lexer.inferredTag("ul"); - Node.addClass(node, "noindent"); - } - else if ((node.tag.model & Dict.CM_DEFLIST) != 0) - { - if (element.parent.tag == tt.tagDl) - { - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - - node = lexer.inferredTag("dl"); - } - else if ((node.tag.model & Dict.CM_TABLE) != 0 || - (node.tag.model & Dict.CM_ROW) != 0) - { - node = lexer.inferredTag("table"); - } - else if ((element.tag.model & Dict.CM_OBJECT) != 0) - { - /* pop inline stack */ - while (lexer.istack.size() > lexer.istackbase) - lexer.popInline( null); - lexer.istackbase = istackbase; - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - - } - else - { - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - return; - } - } - } - - /* parse known element */ - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - if ((node.tag.model & Dict.CM_INLINE) != 0) - { - if (checkstack && !node.implicit) - { - checkstack = false; - - if (lexer.inlineDup( node) > 0) - continue; - } - - mode = Lexer.MixedContent; - } - else - { - checkstack = true; - mode = Lexer.IgnoreWhitespace; - } - - /* trim white space before <br> */ - if (node.tag == tt.tagBr) - Node.trimSpaces(lexer, element); - - Node.insertNodeAtEnd(element, node); - - if (node.implicit) - Report.warning(lexer, element, node, Report.INSERTING_TAG); - - parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/); - continue; - } - - /* discard unexpected tags */ - if (node.type == Node.EndTag) - lexer.popInline( node); /* if inline end tag */ - - Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); - } - - if (!((element.tag.model & Dict.CM_OPT) != 0)) - Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); - - if ((element.tag.model & Dict.CM_OBJECT) != 0) - { - /* pop inline stack */ - while (lexer.istack.size() > lexer.istackbase) - lexer.popInline( null); - lexer.istackbase = istackbase; - } - - Node.trimSpaces(lexer, element); - Node.trimEmptyElement(lexer, element); - } - - }; - - public static class ParseTableTag implements Parser { - - public void parse( Lexer lexer, Node table, short mode ) - { - Node node, parent; - int istackbase; - TagTable tt = lexer.configuration.tt; - - lexer.deferDup(); - istackbase = lexer.istackbase; - lexer.istackbase = lexer.istack.size(); - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == table.tag && node.type == Node.EndTag) - { - lexer.istackbase = istackbase; - table.closed = true; - Node.trimEmptyElement(lexer, table); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(table, node)) - continue; - - /* discard unknown tags */ - if (node.tag == null && node.type != Node.TextNode) - { - Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* if TD or TH or text or inline or block then infer <TR> */ - - if (node.type != Node.EndTag) - { - if (node.tag == tt.tagTd || - node.tag == tt.tagTh || - node.tag == tt.tagTable) - { - lexer.ungetToken(); - node = lexer.inferredTag( "tr"); - Report.warning(lexer, table, node, Report.MISSING_STARTTAG); - } - else if (node.type == Node.TextNode - || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) - { - Node.insertNodeBeforeElement(table, node); - Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); - lexer.exiled = true; - - /* AQ: TODO - Line 2040 of parser.c (13 Jan 2000) reads as follows: - if (!node->type == TextNode) - This will always evaluate to false. - This has been reported to Dave Raggett <dsr@w3.org> - */ - //Should be?: if (!(node.type == Node.TextNode)) - if (false) - parseTag(lexer, node, Lexer.IgnoreWhitespace); - - lexer.exiled = false; - continue; - } - else if ((node.tag.model & Dict.CM_HEAD) != 0) - { - moveToHead(lexer, table, node); - continue; - } - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.badForm = 1; - Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0) - { - Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - for (parent = table.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE); - lexer.ungetToken(); - lexer.istackbase = istackbase; - Node.trimEmptyElement(lexer, table); - return; - } - } - } - - if (!((node.tag.model & Dict.CM_TABLE) != 0)) - { - lexer.ungetToken(); - Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); - lexer.istackbase = istackbase; - Node.trimEmptyElement(lexer, table); - return; - } - - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - { - Node.insertNodeAtEnd(table, node);; - parseTag(lexer, node, Lexer.IgnoreWhitespace); - continue; - } - - /* discard unexpected text nodes and end tags */ - Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); - } - - Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR); - Node.trimEmptyElement(lexer, table); - lexer.istackbase = istackbase; - } - - }; - - public static class ParseColGroup implements Parser { - - public void parse( Lexer lexer, Node colgroup, short mode ) - { - Node node, parent; - TagTable tt = lexer.configuration.tt; - - if ((colgroup.tag.model & Dict.CM_EMPTY) != 0) - return; - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == colgroup.tag && node.type == Node.EndTag) - { - colgroup.closed = true; - return; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.badForm = 1; - Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - for (parent = colgroup.parent; - parent != null; parent = parent.parent) - { - - if (node.tag == parent.tag) - { - lexer.ungetToken(); - return; - } - } - } - - if (node.type == Node.TextNode) - { - lexer.ungetToken(); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(colgroup, node)) - continue; - - /* discard unknown tags */ - if (node.tag == null) - { - Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag != tt.tagCol) - { - lexer.ungetToken(); - return; - } - - if (node.type == Node.EndTag) - { - Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* node should be <COL> */ - Node.insertNodeAtEnd(colgroup, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace); - } - } - - }; - - public static class ParseRowGroup implements Parser { - - public void parse( Lexer lexer, Node rowgroup, short mode ) - { - Node node, parent; - TagTable tt = lexer.configuration.tt; - - if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0) - return; - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == rowgroup.tag) - { - if (node.type == Node.EndTag) - { - rowgroup.closed = true; - Node.trimEmptyElement(lexer, rowgroup); - return; - } - - lexer.ungetToken(); - return; - } - - /* if </table> infer end tag */ - if (node.tag == tt.tagTable && node.type == Node.EndTag) - { - lexer.ungetToken(); - Node.trimEmptyElement(lexer, rowgroup); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(rowgroup, node)) - continue; - - /* discard unknown tags */ - if (node.tag == null && node.type != Node.TextNode) - { - Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* - if TD or TH then infer <TR> - if text or inline or block move before table - if head content move to head - */ - - if (node.type != Node.EndTag) - { - if (node.tag == tt.tagTd || node.tag == tt.tagTh) - { - lexer.ungetToken(); - node = lexer.inferredTag("tr"); - Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); - } - else if (node.type == Node.TextNode - || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) - { - Node.moveBeforeTable(rowgroup, node, tt); - Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); - lexer.exiled = true; - - if (node.type != Node.TextNode) - parseTag(lexer, node, Lexer.IgnoreWhitespace); - - lexer.exiled = false; - continue; - } - else if ((node.tag.model & Dict.CM_HEAD) != 0) - { - Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); - moveToHead(lexer, rowgroup, node); - continue; - } - } - - /* - if this is the end tag for ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.badForm = 1; - Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh) - { - Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - for (parent = rowgroup.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - lexer.ungetToken(); - Node.trimEmptyElement(lexer, rowgroup); - return; - } - } - } - - /* - if THEAD, TFOOT or TBODY then implied end tag - - */ - if ((node.tag.model & Dict.CM_ROWGRP) != 0) - { - if (node.type != Node.EndTag) - lexer.ungetToken(); - - Node.trimEmptyElement(lexer, rowgroup); - return; - } - - if (node.type == Node.EndTag) - { - Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (!(node.tag == tt.tagTr)) - { - node = lexer.inferredTag( "tr"); - Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); - lexer.ungetToken(); - } - - /* node should be <TR> */ - Node.insertNodeAtEnd(rowgroup, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace); - } - - Node.trimEmptyElement(lexer, rowgroup); - } - - }; - - public static class ParseRow implements Parser { - - public void parse( Lexer lexer, Node row, short mode ) - { - Node node, parent; - boolean exclude_state; - TagTable tt = lexer.configuration.tt; - - if ((row.tag.model & Dict.CM_EMPTY) != 0) - return; - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == row.tag) - { - if (node.type == Node.EndTag) - { - row.closed = true; - Node.fixEmptyRow(lexer, row); - return; - } - - lexer.ungetToken(); - Node.fixEmptyRow(lexer, row); - return; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node.type == Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.badForm = 1; - Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.tag == tt.tagTd || node.tag == tt.tagTh) - { - Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - for (parent = row.parent; - parent != null; parent = parent.parent) - { - if (node.tag == parent.tag) - { - lexer.ungetToken(); - Node.trimEmptyElement(lexer, row); - return; - } - } - } - - /* deal with comments etc. */ - if (Node.insertMisc(row, node)) - continue; - - /* discard unknown tags */ - if (node.tag == null && node.type != Node.TextNode) - { - Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* discard unexpected <table> element */ - if (node.tag == tt.tagTable) - { - Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* THEAD, TFOOT or TBODY */ - if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0) - { - lexer.ungetToken(); - Node.trimEmptyElement(lexer, row); - return; - } - - if (node.type == Node.EndTag) - { - Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* - if text or inline or block move before table - if head content move to head - */ - - if (node.type != Node.EndTag) - { - if (node.tag == tt.tagForm) - { - lexer.ungetToken(); - node = lexer.inferredTag("td"); - Report.warning(lexer, row, node, Report.MISSING_STARTTAG); - } - else if (node.type == Node.TextNode - || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) - { - Node.moveBeforeTable(row, node, tt); - Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); - lexer.exiled = true; - - if (node.type != Node.TextNode) - parseTag(lexer, node, Lexer.IgnoreWhitespace); - - lexer.exiled = false; - continue; - } - else if ((node.tag.model & Dict.CM_HEAD) != 0) - { - Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); - moveToHead(lexer, row, node); - continue; - } - } - - if (!(node.tag == tt.tagTd || node.tag == tt.tagTh)) - { - Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); - continue; - } - - /* node should be <TD> or <TH> */ - Node.insertNodeAtEnd(row, node); - exclude_state = lexer.excludeBlocks; - lexer.excludeBlocks = false; - parseTag(lexer, node, Lexer.IgnoreWhitespace); - lexer.excludeBlocks = exclude_state; - - /* pop inline stack */ - - while (lexer.istack.size() > lexer.istackbase) - lexer.popInline( null); - } - - Node.trimEmptyElement(lexer, row); - } - - }; - - public static class ParseNoFrames implements Parser { - - public void parse( Lexer lexer, Node noframes, short mode ) - { - Node node; - boolean checkstack; - TagTable tt = lexer.configuration.tt; - - lexer.badAccess |= Report.USING_NOFRAMES; - mode = Lexer.IgnoreWhitespace; - checkstack = true; - - while (true) - { - node = lexer.getToken(mode); - if (node == null) break; - if (node.tag == noframes.tag && node.type == Node.EndTag) - { - noframes.closed = true; - Node.trimSpaces(lexer, noframes); - return; - } - - if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)) - { - Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE); - Node.trimSpaces(lexer, noframes); - lexer.ungetToken(); - return; - } - - if (node.tag == tt.tagHtml) - { - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); - - continue; - } - - /* deal with comments etc. */ - if (Node.insertMisc(noframes, node)) - continue; - - if (node.tag == tt.tagBody && node.type == Node.StartTag) - { - Node.insertNodeAtEnd(noframes, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/); - continue; - } - - /* implicit body element inferred */ - if (node.type == Node.TextNode || node.tag != null) - { - lexer.ungetToken(); - node = lexer.inferredTag("body"); - if (lexer.configuration.XmlOut) - Report.warning(lexer, noframes, node, Report.INSERTING_TAG); - Node.insertNodeAtEnd(noframes, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/); - continue; - } - /* discard unexpected end tags */ - Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); - } - - Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR); - } - - }; - - public static class ParseSelect implements Parser { - - public void parse( Lexer lexer, Node field, short mode ) - { - Node node; - TagTable tt = lexer.configuration.tt; - - lexer.insert = -1; /* defer implicit inline start tags */ - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == field.tag && node.type == Node.EndTag) - { - field.closed = true; - Node.trimSpaces(lexer, field); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(field, node)) - continue; - - if (node.type == Node.StartTag && - (node.tag == tt.tagOption || - node.tag == tt.tagOptgroup || - node.tag == tt.tagScript)) - { - Node.insertNodeAtEnd(field, node); - parseTag(lexer, node, Lexer.IgnoreWhitespace); - continue; - } - - /* discard unexpected tags */ - Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); - } - - Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); - } - - }; - - public static class ParseText implements Parser { - - public void parse( Lexer lexer, Node field, short mode ) - { - Node node; - TagTable tt = lexer.configuration.tt; - - lexer.insert = -1; /* defer implicit inline start tags */ - - if (field.tag == tt.tagTextarea) - mode = Lexer.Preformatted; - - while (true) - { - node = lexer.getToken(mode); - if (node == null) break; - if (node.tag == field.tag && node.type == Node.EndTag) - { - field.closed = true; - Node.trimSpaces(lexer, field); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(field, node)) - continue; - - if (node.type == Node.TextNode) - { - /* only called for 1st child */ - if (field.content == null && !((mode & Lexer.Preformatted) != 0)) - Node.trimSpaces(lexer, field); - - if (node.start >= node.end) - { - continue; - } - - Node.insertNodeAtEnd(field, node); - continue; - } - - if (node.tag == tt.tagFont) - { - Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - /* terminate element on other tags */ - if (!((field.tag.model & Dict.CM_OPT) != 0)) - Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE); - - lexer.ungetToken(); - Node.trimSpaces(lexer, field); - return; - } - - if (!((field.tag.model & Dict.CM_OPT) != 0)) - Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); - } - - }; - - public static class ParseOptGroup implements Parser { - - public void parse( Lexer lexer, Node field, short mode ) - { - Node node; - TagTable tt = lexer.configuration.tt; - - lexer.insert = -1; /* defer implicit inline start tags */ - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - if (node.tag == field.tag && node.type == Node.EndTag) - { - field.closed = true; - Node.trimSpaces(lexer, field); - return; - } - - /* deal with comments etc. */ - if (Node.insertMisc(field, node)) - continue; - - if (node.type == Node.StartTag && - (node.tag == tt.tagOption || node.tag == tt.tagOptgroup)) - { - if (node.tag == tt.tagOptgroup) - Report.warning(lexer, field, node, Report.CANT_BE_NESTED); - - Node.insertNodeAtEnd(field, node); - parseTag(lexer, node, Lexer.MixedContent); - continue; - } - - /* discard unexpected tags */ - Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); - } - } - - }; - - public static Parser getParseHTML() - { - return _parseHTML; - } - - public static Parser getParseHead() - { - return _parseHead; - } - - public static Parser getParseTitle() - { - return _parseTitle; - } - - public static Parser getParseScript() - { - return _parseScript; - } - - public static Parser getParseBody() - { - return _parseBody; - } - - public static Parser getParseFrameSet() - { - return _parseFrameSet; - } - - public static Parser getParseInline() - { - return _parseInline; - } - - public static Parser getParseList() - { - return _parseList; - } - - public static Parser getParseDefList() - { - return _parseDefList; - } - - public static Parser getParsePre() - { - return _parsePre; - } - - public static Parser getParseBlock() - { - return _parseBlock; - } - - public static Parser getParseTableTag() - { - return _parseTableTag; - } - - public static Parser getParseColGroup() - { - return _parseColGroup; - } - - public static Parser getParseRowGroup() - { - return _parseRowGroup; - } - - public static Parser getParseRow() - { - return _parseRow; - } - - public static Parser getParseNoFrames() - { - return _parseNoFrames; - } - - public static Parser getParseSelect() - { - return _parseSelect; - } - - public static Parser getParseText() - { - return _parseText; - } - - public static Parser getParseOptGroup() - { - return _parseOptGroup; - } - - - private static Parser _parseHTML = new ParseHTML(); - private static Parser _parseHead = new ParseHead(); - private static Parser _parseTitle = new ParseTitle(); - private static Parser _parseScript = new ParseScript(); - private static Parser _parseBody = new ParseBody(); - private static Parser _parseFrameSet = new ParseFrameSet(); - private static Parser _parseInline = new ParseInline(); - private static Parser _parseList = new ParseList(); - private static Parser _parseDefList = new ParseDefList(); - private static Parser _parsePre = new ParsePre(); - private static Parser _parseBlock = new ParseBlock(); - private static Parser _parseTableTag = new ParseTableTag(); - private static Parser _parseColGroup = new ParseColGroup(); - private static Parser _parseRowGroup = new ParseRowGroup(); - private static Parser _parseRow = new ParseRow(); - private static Parser _parseNoFrames = new ParseNoFrames(); - private static Parser _parseSelect = new ParseSelect(); - private static Parser _parseText = new ParseText(); - private static Parser _parseOptGroup = new ParseOptGroup(); - - /* - HTML is the top level element - */ - public static Node parseDocument(Lexer lexer) - { - Node node, document, html; - Node doctype = null; - TagTable tt = lexer.configuration.tt; - - document = lexer.newNode(); - document.type = Node.RootNode; - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - - /* deal with comments etc. */ - if (Node.insertMisc(document, node)) - continue; - - if (node.type == Node.DocTypeTag) - { - if (doctype == null) - { - Node.insertNodeAtEnd(document, node); - doctype = node; - } - else - Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); - continue; - } - - if (node.type == Node.EndTag) - { - Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO? - continue; - } - - if (node.type != Node.StartTag || node.tag != tt.tagHtml) - { - lexer.ungetToken(); - html = lexer.inferredTag("html"); - } - else - html = node; - - Node.insertNodeAtEnd(document, html); - getParseHTML().parse(lexer, html, (short)0); // TODO? - break; - } - - return document; - } - - /** - * Indicates whether or not whitespace should be preserved for this element. - * If an <code>xml:space</code> attribute is found, then if the attribute value is - * <code>preserve</code>, returns <code>true</code>. For any other value, returns - * <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em> - * found, then the following element names result in a return value of <code>true: - * pre, script, style,</code> and <code>xsl:text</code>. Finally, if a - * <code>TagTable</code> was passed in and the element appears as the "pre" element - * in the <code>TagTable</code>, then <code>true</code> will be returned. - * Otherwise, <code>false</code> is returned. - * @param element The <code>Node</code> to test to see if whitespace should be - * preserved. - * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code> - * function. This may be <code>null</code>, in which case this test - * is bypassed. - * @return <code>true</code> or <code>false</code>, as explained above. - */ - - public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt) - { - AttVal attribute; - - /* search attributes for xml:space */ - for (attribute = element.attributes; attribute != null; attribute = attribute.next) - { - if (attribute.attribute.equals("xml:space")) - { - if (attribute.value.equals("preserve")) - return true; - - return false; - } - } - - /* kludge for html docs without explicit xml:space attribute */ - if (Lexer.wstrcasecmp(element.element, "pre") == 0 - || Lexer.wstrcasecmp(element.element, "script") == 0 - || Lexer.wstrcasecmp(element.element, "style") == 0) - return true; - - if ( (tt != null) && (tt.findParser(element) == getParsePre()) ) - return true; - - /* kludge for XSL docs */ - if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0) - return true; - - return false; - } - - /* - XML documents - */ - public static void parseXMLElement(Lexer lexer, Node element, short mode) - { - Node node; - - /* Jeff Young's kludge for XSL docs */ - - if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0) - return; - - /* if node is pre or has xml:space="preserve" then do so */ - - if (XMLPreserveWhiteSpace(element, lexer.configuration.tt)) - mode = Lexer.Preformatted; - - while (true) - { - node = lexer.getToken(mode); - if (node == null) break; - if (node.type == Node.EndTag && node.element.equals(element.element)) - { - element.closed = true; - break; - } - - /* discard unexpected end tags */ - if (node.type == Node.EndTag) - { - Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG); - continue; - } - - /* parse content on seeing start tag */ - if (node.type == Node.StartTag) - parseXMLElement(lexer, node, mode); - - Node.insertNodeAtEnd(element, node); - } - - /* - if first child is text then trim initial space and - delete text node if it is empty. - */ - - node = element.content; - - if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted) - { - if (node.textarray[node.start] == (byte)' ') - { - node.start++; - - if (node.start >= node.end) - Node.discardElement(node); - } - } - - /* - if last child is text then trim final space and - delete the text node if it is empty - */ - - node = element.last; - - if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted) - { - if (node.textarray[node.end - 1] == (byte)' ') - { - node.end--; - - if (node.start >= node.end) - Node.discardElement(node); - } - } - } - - public static Node parseXMLDocument(Lexer lexer) - { - Node node, document, doctype; - - document = lexer.newNode(); - document.type = Node.RootNode; - doctype = null; - lexer.configuration.XmlTags = true; - - while (true) - { - node = lexer.getToken(Lexer.IgnoreWhitespace); - if (node == null) break; - /* discard unexpected end tags */ - if (node.type == Node.EndTag) - { - Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG); - continue; - } - - /* deal with comments etc. */ - if (Node.insertMisc(document, node)) - continue; - - if (node.type == Node.DocTypeTag) - { - if (doctype == null) - { - Node.insertNodeAtEnd(document, node); - doctype = node; - } - else - Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO - continue; - } - - /* if start tag then parse element's content */ - if (node.type == Node.StartTag) - { - Node.insertNodeAtEnd(document, node); - parseXMLElement(lexer, node, Lexer.IgnoreWhitespace); - } - - } - -if (false) { //#if 0 - /* discard the document type */ - node = document.findDocType(); - - if (node != null) - Node.discardElement(node); -} // #endif - - if (doctype != null && !lexer.checkDocTypeKeyWords(doctype)) - Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE); - - /* ensure presence of initial <?XML version="1.0"?> */ - if (lexer.configuration.XmlPi) - lexer.fixXMLPI(document); - - return document; - } - - public static boolean isJavaScript(Node node) - { - boolean result = false; - AttVal attr; - - if (node.attributes == null) - return true; - - for (attr = node.attributes; attr != null; attr = attr.next) - { - if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0 - || Lexer.wstrcasecmp(attr.attribute, "type") == 0) - && Lexer.wsubstr(attr.value, "javascript")) - result = true; - } - - return result; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Report.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Report.java deleted file mode 100644 index f58e5d2..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Report.java +++ /dev/null @@ -1,1130 +0,0 @@ -/* - * @(#)Report.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Error/informational message reporter. - * - * You should only need to edit the file TidyMessages.properties - * to localize HTML tidy. - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -import java.io.PrintWriter; -import java.text.MessageFormat; -import java.util.Hashtable; -import java.util.MissingResourceException; -import java.util.ResourceBundle; - -import org.eclipse.core.resources.IMarker; -import org.eclipse.core.runtime.CoreException; -import org.eclipse.ui.texteditor.MarkerUtilities; - -public class Report { - - /* used to point to Web Accessibility Guidelines */ - public static final String ACCESS_URL = "http://www.w3.org/WAI/GL"; - - public static final String RELEASE_DATE = "4th August 2000"; - - public static String currentFile; /* sasdjb 01May00 for GNU Emacs error parsing */ - - /* error codes for entities */ - - public static final short MISSING_SEMICOLON = 1; - public static final short UNKNOWN_ENTITY = 2; - public static final short UNESCAPED_AMPERSAND = 3; - - /* error codes for element messages */ - - public static final short MISSING_ENDTAG_FOR = 1; - public static final short MISSING_ENDTAG_BEFORE = 2; - public static final short DISCARDING_UNEXPECTED = 3; - public static final short NESTED_EMPHASIS = 4; - public static final short NON_MATCHING_ENDTAG = 5; - public static final short TAG_NOT_ALLOWED_IN = 6; - public static final short MISSING_STARTTAG = 7; - public static final short UNEXPECTED_ENDTAG = 8; - public static final short USING_BR_INPLACE_OF = 9; - public static final short INSERTING_TAG = 10; - public static final short SUSPECTED_MISSING_QUOTE = 11; - public static final short MISSING_TITLE_ELEMENT = 12; - public static final short DUPLICATE_FRAMESET = 13; - public static final short CANT_BE_NESTED = 14; - public static final short OBSOLETE_ELEMENT = 15; - public static final short PROPRIETARY_ELEMENT = 16; - public static final short UNKNOWN_ELEMENT = 17; - public static final short TRIM_EMPTY_ELEMENT = 18; - public static final short COERCE_TO_ENDTAG = 19; - public static final short ILLEGAL_NESTING = 20; - public static final short NOFRAMES_CONTENT = 21; - public static final short CONTENT_AFTER_BODY = 22; - public static final short INCONSISTENT_VERSION = 23; - public static final short MALFORMED_COMMENT = 24; - public static final short BAD_COMMENT_CHARS = 25; - public static final short BAD_XML_COMMENT = 26; - public static final short BAD_CDATA_CONTENT = 27; - public static final short INCONSISTENT_NAMESPACE = 28; - public static final short DOCTYPE_AFTER_TAGS = 29; - public static final short MALFORMED_DOCTYPE = 30; - public static final short UNEXPECTED_END_OF_FILE = 31; - public static final short DTYPE_NOT_UPPER_CASE = 32; - public static final short TOO_MANY_ELEMENTS = 33; - - /* error codes used for attribute messages */ - - public static final short UNKNOWN_ATTRIBUTE = 1; - public static final short MISSING_ATTRIBUTE = 2; - public static final short MISSING_ATTR_VALUE = 3; - public static final short BAD_ATTRIBUTE_VALUE = 4; - public static final short UNEXPECTED_GT = 5; - public static final short PROPRIETARY_ATTR_VALUE = 6; - public static final short REPEATED_ATTRIBUTE = 7; - public static final short MISSING_IMAGEMAP = 8; - public static final short XML_ATTRIBUTE_VALUE = 9; - public static final short UNEXPECTED_QUOTEMARK = 10; - public static final short ID_NAME_MISMATCH = 11; - - /* accessibility flaws */ - - public static final short MISSING_IMAGE_ALT = 1; - public static final short MISSING_LINK_ALT = 2; - public static final short MISSING_SUMMARY = 4; - public static final short MISSING_IMAGE_MAP = 8; - public static final short USING_FRAMES = 16; - public static final short USING_NOFRAMES = 32; - - /* presentation flaws */ - - public static final short USING_SPACER = 1; - public static final short USING_LAYER = 2; - public static final short USING_NOBR = 4; - public static final short USING_FONT = 8; - public static final short USING_BODY = 16; - - /* character encoding errors */ - public static final short WINDOWS_CHARS = 1; - public static final short NON_ASCII = 2; - public static final short FOUND_UTF16 = 4; - - private static short optionerrors; - - private static ResourceBundle res = null; - - static { - try { - res = ResourceBundle.getBundle("org/w3c/tidy/TidyMessages"); - } catch (MissingResourceException e) { - throw new Error(e.toString()); - } - } - - public static void tidyPrint(PrintWriter p, String msg) { - p.print(msg); - } - - public static void tidyPrintln(PrintWriter p, String msg) { - p.println(msg); - } - - public static void tidyPrintln(PrintWriter p) { - p.println(); - } - - public static void showVersion(PrintWriter p) { - tidyPrintln(p, "Java HTML Tidy release date: " + RELEASE_DATE); - tidyPrintln(p, "See http://www.w3.org/People/Raggett for details"); - } - - public static void tag(Lexer lexer, Node tag) { - if (tag != null) { - if (tag.type == Node.StartTag) - tidyPrint(lexer.errout, "<" + tag.element + ">"); - else if (tag.type == Node.EndTag) - tidyPrint(lexer.errout, "</" + tag.element + ">"); - else if (tag.type == Node.DocTypeTag) - tidyPrint(lexer.errout, "<!DOCTYPE>"); - else if (tag.type == Node.TextNode) - tidyPrint(lexer.errout, "plain text"); - else - tidyPrint(lexer.errout, tag.element); - } - } - - public static void tag(StringBuffer errorMessage, Lexer lexer, Node tag) { - if (tag != null) { - if (tag.type == Node.StartTag) { - tidyPrint(lexer.errout, "<" + tag.element + ">"); - errorMessage.append("<" + tag.element + ">"); - } else if (tag.type == Node.EndTag) { - tidyPrint(lexer.errout, "</" + tag.element + ">"); - errorMessage.append("</" + tag.element + ">"); - } else if (tag.type == Node.DocTypeTag) { - tidyPrint(lexer.errout, "<!DOCTYPE>"); - errorMessage.append("</" + tag.element + ">"); - } else if (tag.type == Node.TextNode) { - tidyPrint(lexer.errout, "plain text"); - errorMessage.append("plain text"); - } else { - tidyPrint(lexer.errout, tag.element); - errorMessage.append(tag.element); - } - } - } - - /* lexer is not defined when this is called */ - public static void unknownOption(String option) { - optionerrors++; - try { - System.err.println(MessageFormat.format(res.getString("unknown_option"), new Object[] { option })); - } catch (MissingResourceException e) { - System.err.println(e.toString()); - } - } - - /* lexer is not defined when this is called */ - public static void badArgument(String option) { - optionerrors++; - try { - System.err.println(MessageFormat.format(res.getString("bad_argument"), new Object[] { option })); - } catch (MissingResourceException e) { - System.err.println(e.toString()); - } - } - - public static void position(Lexer lexer) { - try { - /* Change formatting to be parsable by GNU Emacs */ - if (lexer.configuration.Emacs) { - tidyPrint( - lexer.errout, - MessageFormat.format( - res.getString("emacs_format"), - new Object[] { currentFile, new Integer(lexer.lines), new Integer(lexer.columns)})); - tidyPrint(lexer.errout, " "); - } else /* traditional format */ { - tidyPrint( - lexer.errout, - MessageFormat.format(res.getString("line_column"), new Object[] { new Integer(lexer.lines), new Integer(lexer.columns)})); - } - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - public static void encodingError(Lexer lexer, short code, int c) { - lexer.warnings++; - - if (lexer.configuration.ShowWarnings) { - position(lexer); - - if (code == WINDOWS_CHARS) { - lexer.badChars |= WINDOWS_CHARS; - try { - Hashtable attributes = new Hashtable(); - StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); - MarkerUtilities.setLineNumber(attributes, lexer.lines); - tidyPrint(lexer.errout, MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)})); - errorMessage.append(MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)})); - attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); - try { - MarkerUtilities.setMessage(attributes, errorMessage.toString()); - MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); - } catch (CoreException e) { - } - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - tidyPrintln(lexer.errout); - } - } - - public static void entityError(Lexer lexer, short code, String entity, int c) { - lexer.warnings++; - - if (lexer.configuration.ShowWarnings) { - position(lexer); - Hashtable attributes = new Hashtable(); - StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); - MarkerUtilities.setLineNumber(attributes, lexer.lines); - - if (code == MISSING_SEMICOLON) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity })); - errorMessage.append(MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNKNOWN_ENTITY) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity })); - errorMessage.append(MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNESCAPED_AMPERSAND) { - try { - tidyPrint(lexer.errout, res.getString("unescaped_ampersand")); - errorMessage.append(res.getString("unescaped_ampersand")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); - try { - MarkerUtilities.setMessage(attributes, errorMessage.toString()); - MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); - } catch (CoreException e) { - } - tidyPrintln(lexer.errout); - } - } - - public static void attrError(Lexer lexer, Node node, String attr, short code) { - lexer.warnings++; - - /* keep quiet after 6 errors */ - if (lexer.errors > 6) - return; - - Hashtable attributes = new Hashtable(); - StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); - - if (lexer.configuration.ShowWarnings) { - /* on end of file adjust reported position to end of input */ - if (code == UNEXPECTED_END_OF_FILE) { - lexer.lines = lexer.in.curline; - lexer.columns = lexer.in.curcol; - } - - position(lexer); - - MarkerUtilities.setLineNumber(attributes, lexer.lines); - - if (code == UNKNOWN_ATTRIBUTE) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr })); - errorMessage.append(MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == MISSING_ATTRIBUTE) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr })); - errorMessage.append(MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == MISSING_ATTR_VALUE) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr })); - errorMessage.append(MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == MISSING_IMAGEMAP) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, res.getString("missing_imagemap")); - errorMessage.append(res.getString("missing_imagemap")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - lexer.badAccess |= MISSING_IMAGE_MAP; - } else if (code == BAD_ATTRIBUTE_VALUE) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(lexer, node); - tidyPrint(lexer.errout, MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr })); - errorMessage.append(MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == XML_ATTRIBUTE_VALUE) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr })); - errorMessage.append(MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNEXPECTED_GT) { - try { - tidyPrint(lexer.errout, res.getString("error")); - errorMessage.append(res.getString("error")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, res.getString("unexpected_gt")); - errorMessage.append(res.getString("unexpected_gt")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - lexer.errors++; - ; - } else if (code == UNEXPECTED_QUOTEMARK) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, res.getString("unexpected_quotemark")); - errorMessage.append(res.getString("unexpected_quotemark")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == REPEATED_ATTRIBUTE) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, res.getString("repeated_attribute")); - errorMessage.append(res.getString("repeated_attribute")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == PROPRIETARY_ATTR_VALUE) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr })); - errorMessage.append(MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNEXPECTED_END_OF_FILE) { - try { - tidyPrint(lexer.errout, res.getString("unexpected_end_of_file")); - errorMessage.append(res.getString("unexpected_end_of_file")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == ID_NAME_MISMATCH) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, res.getString("id_name_mismatch")); - errorMessage.append(res.getString("id_name_mismatch")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); - attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); - try { - MarkerUtilities.setMessage(attributes, errorMessage.toString()); - MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); - } catch (CoreException e) { - } - tidyPrintln(lexer.errout); - } else if (code == UNEXPECTED_GT) { - position(lexer); - MarkerUtilities.setLineNumber(attributes, lexer.lines); - try { - tidyPrint(lexer.errout, res.getString("error")); - errorMessage.append(res.getString("error")); - tag(errorMessage, lexer, node); - tidyPrint(lexer.errout, res.getString("unexpected_gt")); - errorMessage.append(res.getString("unexpected_gt")); - attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); - try { - MarkerUtilities.setMessage(attributes, errorMessage.toString()); - MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); - } catch (CoreException e) { - } - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tidyPrintln(lexer.errout); - lexer.errors++; - ; - } - - } - - public static void warning(Lexer lexer, Node element, Node node, short code) { - - TagTable tt = lexer.configuration.tt; - - lexer.warnings++; - - /* keep quiet after 6 errors */ - if (lexer.errors > 6) - return; - - if (lexer.configuration.ShowWarnings) { - - /* on end of file adjust reported position to end of input */ - if (code == UNEXPECTED_END_OF_FILE) { - lexer.lines = lexer.in.curline; - lexer.columns = lexer.in.curcol; - } - - position(lexer); - Hashtable attributes = new Hashtable(); - StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); - - MarkerUtilities.setLineNumber(attributes, lexer.lines); - - if (code == MISSING_ENDTAG_FOR) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == MISSING_ENDTAG_BEFORE) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element })); - - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - } else if (code == DISCARDING_UNEXPECTED) { - try { - tidyPrint(lexer.errout, res.getString("discarding_unexpected")); - errorMessage.append(res.getString("discarding_unexpected")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - } else if (code == NESTED_EMPHASIS) { - try { - tidyPrint(lexer.errout, res.getString("nested_emphasis")); - errorMessage.append(res.getString("nested_emphasis")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - } else if (code == COERCE_TO_ENDTAG) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == NON_MATCHING_ENDTAG) { - try { - tidyPrint(lexer.errout, res.getString("non_matching_endtag_1")); - errorMessage.append(res.getString("non_matching_endtag_1")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == TAG_NOT_ALLOWED_IN) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == DOCTYPE_AFTER_TAGS) { - try { - tidyPrint(lexer.errout, res.getString("doctype_after_tags")); - errorMessage.append(res.getString("doctype_after_tags")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == MISSING_STARTTAG) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element })); - errorMessage.append(MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNEXPECTED_ENDTAG) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); - if (element != null) - tidyPrint( - lexer.errout, - MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == TOO_MANY_ELEMENTS) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element })); - if (element != null) - tidyPrint( - lexer.errout, - MessageFormat.format(res.getString("too_many_elements_suffix"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == USING_BR_INPLACE_OF) { - try { - tidyPrint(lexer.errout, res.getString("using_br_inplace_of")); - errorMessage.append(res.getString("using_br_inplace_of")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - } else if (code == INSERTING_TAG) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element })); - errorMessage.append(MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == CANT_BE_NESTED) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - try { - tidyPrint(lexer.errout, res.getString("cant_be_nested")); - errorMessage.append(res.getString("cant_be_nested")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == PROPRIETARY_ELEMENT) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - try { - tidyPrint(lexer.errout, res.getString("proprietary_element")); - errorMessage.append(res.getString("proprietary_element")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - - if (node.tag == tt.tagLayer) - lexer.badLayout |= USING_LAYER; - else if (node.tag == tt.tagSpacer) - lexer.badLayout |= USING_SPACER; - else if (node.tag == tt.tagNobr) - lexer.badLayout |= USING_NOBR; - } else if (code == OBSOLETE_ELEMENT) { - try { - if (element.tag != null && (element.tag.model & Dict.CM_OBSOLETE) != 0) { - tidyPrint(lexer.errout, res.getString("obsolete_element")); - errorMessage.append(res.getString("obsolete_element")); - } else { - tidyPrint(lexer.errout, res.getString("replacing_element")); - errorMessage.append(res.getString("replacing_element")); - } - - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, element); - try { - tidyPrint(lexer.errout, res.getString("by")); - errorMessage.append(res.getString("by")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - } else if (code == TRIM_EMPTY_ELEMENT) { - try { - tidyPrint(lexer.errout, res.getString("trim_empty_element")); - errorMessage.append(res.getString("trim_empty_element")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, element); - } else if (code == MISSING_TITLE_ELEMENT) { - try { - tidyPrint(lexer.errout, res.getString("missing_title_element")); - errorMessage.append(res.getString("missing_title_element")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == ILLEGAL_NESTING) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, element); - try { - tidyPrint(lexer.errout, res.getString("illegal_nesting")); - errorMessage.append(res.getString("illegal_nesting")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == NOFRAMES_CONTENT) { - try { - tidyPrint(lexer.errout, res.getString("warning")); - errorMessage.append(res.getString("warning")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - try { - tidyPrint(lexer.errout, res.getString("noframes_content")); - errorMessage.append(res.getString("noframes_content")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == INCONSISTENT_VERSION) { - try { - tidyPrint(lexer.errout, res.getString("inconsistent_version")); - errorMessage.append(res.getString("inconsistent_version")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == MALFORMED_DOCTYPE) { - try { - tidyPrint(lexer.errout, res.getString("malformed_doctype")); - errorMessage.append(res.getString("malformed_doctype")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == CONTENT_AFTER_BODY) { - try { - tidyPrint(lexer.errout, res.getString("content_after_body")); - errorMessage.append(res.getString("content_after_body")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == MALFORMED_COMMENT) { - try { - tidyPrint(lexer.errout, res.getString("malformed_comment")); - errorMessage.append(res.getString("malformed_comment")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == BAD_COMMENT_CHARS) { - try { - tidyPrint(lexer.errout, res.getString("bad_comment_chars")); - errorMessage.append(res.getString("bad_comment_chars")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == BAD_XML_COMMENT) { - try { - tidyPrint(lexer.errout, res.getString("bad_xml_comment")); - errorMessage.append(res.getString("bad_xml_comment")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == BAD_CDATA_CONTENT) { - try { - tidyPrint(lexer.errout, res.getString("bad_cdata_content")); - errorMessage.append(res.getString("bad_cdata_content")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == INCONSISTENT_NAMESPACE) { - try { - tidyPrint(lexer.errout, res.getString("inconsistent_namespace")); - errorMessage.append(res.getString("inconsistent_namespace")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == DTYPE_NOT_UPPER_CASE) { - try { - tidyPrint(lexer.errout, res.getString("dtype_not_upper_case")); - errorMessage.append(res.getString("dtype_not_upper_case")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNEXPECTED_END_OF_FILE) { - try { - tidyPrint(lexer.errout, res.getString("unexpected_end_of_file")); - errorMessage.append(res.getString("unexpected_end_of_file")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, element); - } - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); - attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); - try { - MarkerUtilities.setMessage(attributes, errorMessage.toString()); - MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); - } catch (CoreException e) { - } - tidyPrintln(lexer.errout); - } - } - - public static void error(Lexer lexer, Node element, Node node, short code) { - lexer.warnings++; - - /* keep quiet after 6 errors */ - if (lexer.errors > 6) - return; - - lexer.errors++; - - position(lexer); - - Hashtable attributes = new Hashtable(); - StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); - - MarkerUtilities.setLineNumber(attributes, lexer.lines); - - if (code == SUSPECTED_MISSING_QUOTE) { - try { - tidyPrint(lexer.errout, res.getString("suspected_missing_quote")); - errorMessage.append(res.getString("suspected_missing_quote")); - - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == DUPLICATE_FRAMESET) { - try { - tidyPrint(lexer.errout, res.getString("duplicate_frameset")); - errorMessage.append(res.getString("duplicate_frameset")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNKNOWN_ELEMENT) { - try { - tidyPrint(lexer.errout, res.getString("error")); - errorMessage.append(res.getString("error")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - tag(errorMessage, lexer, node); - try { - tidyPrint(lexer.errout, res.getString("unknown_element")); - errorMessage.append(res.getString("unknown_element")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } else if (code == UNEXPECTED_ENDTAG) { - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); - errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); - if (element != null) { - tidyPrint( - lexer.errout, - MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element })); - errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element })); - } - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); - // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); - try { - MarkerUtilities.setMessage(attributes, errorMessage.toString()); - MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); - } catch (CoreException e) { - } - tidyPrintln(lexer.errout); - } - - public static void errorSummary(Lexer lexer) { - /* adjust badAccess to that its null if frames are ok */ - if ((lexer.badAccess & (USING_FRAMES | USING_NOFRAMES)) != 0) { - if (!(((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0))) - lexer.badAccess &= ~(USING_FRAMES | USING_NOFRAMES); - } - - if (lexer.badChars != 0) { - if ((lexer.badChars & WINDOWS_CHARS) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badchars_summary")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - } - - if (lexer.badForm != 0) { - try { - tidyPrint(lexer.errout, res.getString("badform_summary")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if (lexer.badAccess != 0) { - if ((lexer.badAccess & MISSING_SUMMARY) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badaccess_missing_summary")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if ((lexer.badAccess & MISSING_IMAGE_ALT) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badaccess_missing_image_alt")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if ((lexer.badAccess & MISSING_IMAGE_MAP) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badaccess_missing_image_map")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if ((lexer.badAccess & MISSING_LINK_ALT) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badaccess_missing_link_alt")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if (((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0)) { - try { - tidyPrint(lexer.errout, res.getString("badaccess_frames")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - try { - tidyPrint(lexer.errout, MessageFormat.format(res.getString("badaccess_summary"), new Object[] { ACCESS_URL })); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if (lexer.badLayout != 0) { - if ((lexer.badLayout & USING_LAYER) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badlayout_using_layer")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if ((lexer.badLayout & USING_SPACER) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badlayout_using_spacer")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if ((lexer.badLayout & USING_FONT) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badlayout_using_font")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if ((lexer.badLayout & USING_NOBR) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badlayout_using_nobr")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - - if ((lexer.badLayout & USING_BODY) != 0) { - try { - tidyPrint(lexer.errout, res.getString("badlayout_using_body")); - } catch (MissingResourceException e) { - lexer.errout.println(e.toString()); - } - } - } - } - - public static void unknownOption(PrintWriter errout, char c) { - try { - tidyPrintln( - errout, - MessageFormat.format(res.getString("unrecognized_option"), new Object[] { new String(new char[] { c }) - })); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void unknownFile(PrintWriter errout, String program, String file) { - try { - tidyPrintln(errout, MessageFormat.format(res.getString("unknown_file"), new Object[] { program, file })); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void needsAuthorIntervention(PrintWriter errout) { - try { - tidyPrintln(errout, res.getString("needs_author_intervention")); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void missingBody(PrintWriter errout) { - try { - tidyPrintln(errout, res.getString("missing_body")); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void reportNumberOfSlides(PrintWriter errout, int count) { - try { - tidyPrintln(errout, MessageFormat.format(res.getString("slides_found"), new Object[] { new Integer(count)})); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void generalInfo(PrintWriter errout) { - try { - tidyPrintln(errout, res.getString("general_info")); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void helloMessage(PrintWriter errout, String date, String filename) { - currentFile = filename; /* for use with Gnu Emacs */ - - try { - tidyPrintln(errout, MessageFormat.format(res.getString("hello_message"), new Object[] { date, filename })); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void reportVersion(PrintWriter errout, Lexer lexer, String filename, Node doctype) { - int i, c; - int state = 0; - String vers = lexer.HTMLVersionName(); - MutableInteger cc = new MutableInteger(); - - try { - if (doctype != null) { - tidyPrint(errout, MessageFormat.format(res.getString("doctype_given"), new Object[] { filename })); - - for (i = doctype.start; i < doctype.end; ++i) { - c = (int) doctype.textarray[i]; - - /* look for UTF-8 multibyte character */ - if (c < 0) { - i += PPrint.getUTF8(doctype.textarray, i, cc); - c = cc.value; - } - - if (c == (char) '"') - ++state; - else if (state == 1) - errout.print((char) c); - } - - errout.print('"'); - } - - tidyPrintln( - errout, - MessageFormat.format( - res.getString("report_version"), - new Object[] { filename, (vers != null ? vers : "HTML proprietary")})); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - - public static void reportNumWarnings(PrintWriter errout, Lexer lexer) { - if (lexer.warnings > 0) { - try { - tidyPrintln(errout, MessageFormat.format(res.getString("num_warnings"), new Object[] { new Integer(lexer.warnings)})); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } else { - try { - tidyPrintln(errout, res.getString("no_warnings")); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - } - - public static void helpText(PrintWriter out, String prog) { - try { - tidyPrintln(out, MessageFormat.format(res.getString("help_text"), new Object[] { prog, RELEASE_DATE })); - } catch (MissingResourceException e) { - out.println(e.toString()); - } - } - - public static void badTree(PrintWriter errout) { - try { - tidyPrintln(errout, res.getString("bad_tree")); - } catch (MissingResourceException e) { - errout.println(e.toString()); - } - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamIn.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamIn.java deleted file mode 100644 index e2b83a7..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamIn.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * @(#)StreamIn.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Input Stream - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -import java.io.InputStream; - -public abstract class StreamIn { - - public static final int EndOfStream = -1; // EOF - - /* states for ISO 2022 - - A document in ISO-2022 based encoding uses some ESC sequences called - "designator" to switch character sets. The designators defined and - used in ISO-2022-JP are: - - "ESC" + "(" + ? for ISO646 variants - - "ESC" + "$" + ? and - "ESC" + "$" + "(" + ? for multibyte character sets - */ - - public static final int FSM_ASCII = 0; - public static final int FSM_ESC = 1; - public static final int FSM_ESCD = 2; - public static final int FSM_ESCDP = 3; - public static final int FSM_ESCP = 4; - public static final int FSM_NONASCII = 5; - - /* non-raw input is cleaned up*/ - public int state; /* FSM for ISO2022 */ - public boolean pushed; - public int c; - public int tabs; - public int tabsize; - public int lastcol; - public int curcol; - public int curline; - public int encoding; - public InputStream stream; - public boolean endOfStream; - public Object lexer; /* needed for error reporting */ - - /* read char from stream */ - public abstract int readCharFromStream(); - - public abstract int readChar(); - - public abstract void ungetChar(int c); - - public abstract boolean isEndOfStream(); - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamInImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamInImpl.java deleted file mode 100644 index 5c12c8a..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamInImpl.java +++ /dev/null @@ -1,367 +0,0 @@ -/* - * @(#)StreamInImpl.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Input Stream Implementation - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -import java.io.InputStream; -import java.io.IOException; - -public class StreamInImpl extends StreamIn { - - /* Mapping for Windows Western character set (128-159) to Unicode */ - private static int[] Win2Unicode = - { - 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000, - 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178 - }; - - /* - John Love-Jensen contributed this table for mapping MacRoman - character set to Unicode - */ - - private static int[] Mac2Unicode = - { - - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, - - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, - 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, - - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, - - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, - - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, - - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, - 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, - - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, - - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, - 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, - /* x7F = DEL */ - 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, - 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, - - 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, - 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, - - 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, - 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, - - 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, - 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, - - 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, - 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, - - 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, - 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, - - 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, - 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, - /* xF0 = Apple Logo */ - 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, - 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7 - }; - - public StreamInImpl(InputStream stream, int encoding, int tabsize) - { - this.stream = stream; - this.pushed = false; - this.c = (int)'\0'; - this.tabs = 0; - this.tabsize = tabsize; - this.curline = 1; - this.curcol = 1; - this.encoding = encoding; - this.state = FSM_ASCII; - this.endOfStream = false; - } - - /* read char from stream */ - public int readCharFromStream() - { - int n, c, i, count; - - try { - c = this.stream.read(); - - if (c == EndOfStream) { - this.endOfStream = true; - return c; - } - - /* - A document in ISO-2022 based encoding uses some ESC sequences - called "designator" to switch character sets. The designators - defined and used in ISO-2022-JP are: - - "ESC" + "(" + ? for ISO646 variants - - "ESC" + "$" + ? and - "ESC" + "$" + "(" + ? for multibyte character sets - - Where ? stands for a single character used to indicate the - character set for multibyte characters. - - Tidy handles this by preserving the escape sequence and - setting the top bit of each byte for non-ascii chars. This - bit is then cleared on output. The input stream keeps track - of the state to determine when to set/clear the bit. - */ - - if (this.encoding == Configuration.ISO2022) - { - if (c == 0x1b) /* ESC */ - { - this.state = FSM_ESC; - return c; - } - - switch (this.state) - { - case FSM_ESC: - if (c == '$') - this.state = FSM_ESCD; - else if (c == '(') - this.state = FSM_ESCP; - else - this.state = FSM_ASCII; - break; - - case FSM_ESCD: - if (c == '(') - this.state = FSM_ESCDP; - else - this.state = FSM_NONASCII; - break; - - case FSM_ESCDP: - this.state = FSM_NONASCII; - break; - - case FSM_ESCP: - this.state = FSM_ASCII; - break; - - case FSM_NONASCII: - c |= 0x80; - break; - } - - return c; - } - - if (this.encoding != Configuration.UTF8) - return c; - - /* deal with UTF-8 encoded char */ - - if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */ - { - n = c & 31; - count = 1; - } - else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */ - { - n = c & 15; - count = 2; - } - else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */ - { - n = c & 7; - count = 3; - } - else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */ - { - n = c & 3; - count = 4; - } - else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */ - { - n = c & 1; - count = 5; - } - else /* 0XXX XXXX one byte */ - return c; - - /* successor bytes should have the form 10XX XXXX */ - for (i = 1; i <= count; ++i) - { - c = this.stream.read(); - - if (c == EndOfStream) { - this.endOfStream = true; - return c; - } - - n = (n << 6) | (c & 0x3F); - } - } - catch (IOException e) { - System.err.println("StreamInImpl.readCharFromStream: " + e.toString()); - n = EndOfStream; - } - - return n; - } - - public int readChar() - { - int c; - - if (this.pushed) - { - this.pushed = false; - c = this.c; - - if (c == '\n') - { - this.curcol = 1; - this.curline++; - return c; - } - - this.curcol++; - return c; - } - - this.lastcol = this.curcol; - - if (this.tabs > 0) - { - this.curcol++; - this.tabs--; - return ' '; - } - - for (;;) - { - c = readCharFromStream(); - - if (c < 0) - return EndOfStream; - - if (c == '\n') - { - this.curcol = 1; - this.curline++; - break; - } - - if (c == '\r') - { - c = readCharFromStream(); - if (c != '\n') - { - ungetChar(c); - c = '\n'; - } - this.curcol = 1; - this.curline++; - break; - } - - if (c == '\t') - { - this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1; - this.curcol++; - c = ' '; - break; - } - - /* strip control characters, except for Esc */ - - if (c == '\033') - break; - - if (0 < c && c < 32) - continue; - - /* watch out for IS02022 */ - - if (this.encoding == Configuration.RAW || - this.encoding == Configuration.ISO2022) - { - this.curcol++; - break; - } - - if (this.encoding == Configuration.MACROMAN) - c = Mac2Unicode[c]; - - /* produced e.g. as a side-effect of smart quotes in Word */ - - if (127 < c && c < 160) - { - Report.encodingError((Lexer)this.lexer, Report.WINDOWS_CHARS, c); - - c = Win2Unicode[c - 128]; - - if (c == 0) - continue; - } - - this.curcol++; - break; - } - - return c; - } - - public void ungetChar(int c) - { - this.pushed = true; - this.c = c; - - if (c == '\n') - { - --this.curline; - } - - this.curcol = this.lastcol; - } - - public boolean isEndOfStream() - { - return this.endOfStream; - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Style.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Style.java deleted file mode 100644 index 061e332..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Style.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * @(#)Style.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Linked list of class names and styles - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class Style { - - public Style(String tag, String tagClass, String properties, Style next) - { - this.tag = tag; - this.tagClass = tagClass; - this.properties = properties; - this.next = next; - } - - public Style(String tag, String tagClass, String properties) - { - this(tag, tagClass, properties, null); - } - - public Style() - { - this(null, null, null, null); - } - - public String tag; - public String tagClass; - public String properties; - public Style next; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StyleProp.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StyleProp.java deleted file mode 100644 index 773e414..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StyleProp.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * @(#)StyleProp.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Linked list of style properties - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -public class StyleProp { - - public StyleProp(String name, String value, StyleProp next) - { - this.name = name; - this.value = value; - this.next = next; - } - - public StyleProp(String name, String value) - { - this(name, value, null); - } - - public StyleProp() - { - this(null, null, null); - } - - public String name; - public String value; - public StyleProp next; - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TagTable.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TagTable.java deleted file mode 100644 index dd81b1d..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TagTable.java +++ /dev/null @@ -1,389 +0,0 @@ -/* - * @(#)TagTable.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * Tag dictionary node hash table - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - * Modified from a Singleton to a non-Singleton. - */ - -import java.util.Hashtable; -import java.util.Enumeration; - -public class TagTable { - - private Configuration configuration = null; - - public TagTable() - { - for ( int i = 0; i < tags.length; i++ ) { - install( tags[i] ); - } - tagHtml = lookup("html"); - tagHead = lookup("head"); - tagBody = lookup("body"); - tagFrameset = lookup("frameset"); - tagFrame = lookup("frame"); - tagNoframes = lookup("noframes"); - tagMeta = lookup("meta"); - tagTitle = lookup("title"); - tagBase = lookup("base"); - tagHr = lookup("hr"); - tagPre = lookup("pre"); - tagListing = lookup("listing"); - tagH1 = lookup("h1"); - tagH2 = lookup("h2"); - tagP = lookup("p"); - tagUl = lookup("ul"); - tagOl = lookup("ol"); - tagDir = lookup("dir"); - tagLi = lookup("li"); - tagDt = lookup("dt"); - tagDd = lookup("dd"); - tagDl = lookup("dl"); - tagTd = lookup("td"); - tagTh = lookup("th"); - tagTr = lookup("tr"); - tagCol = lookup("col"); - tagBr = lookup("br"); - tagA = lookup("a"); - tagLink = lookup("link"); - tagB = lookup("b"); - tagI = lookup("i"); - tagStrong = lookup("strong"); - tagEm = lookup("em"); - tagBig = lookup("big"); - tagSmall = lookup("small"); - tagParam = lookup("param"); - tagOption = lookup("option"); - tagOptgroup = lookup("optgroup"); - tagImg = lookup("img"); - tagMap = lookup("map"); - tagArea = lookup("area"); - tagNobr = lookup("nobr"); - tagWbr = lookup("wbr"); - tagFont = lookup("font"); - tagSpacer = lookup("spacer"); - tagLayer = lookup("layer"); - tagCenter = lookup("center"); - tagStyle = lookup("style"); - tagScript = lookup("script"); - tagNoscript = lookup("noscript"); - tagTable = lookup("table"); - tagCaption = lookup("caption"); - tagForm = lookup("form"); - tagTextarea = lookup("textarea"); - tagBlockquote = lookup("blockquote"); - tagApplet = lookup("applet"); - tagObject = lookup("object"); - tagDiv = lookup("div"); - tagSpan = lookup("span"); - } - - public void setConfiguration(Configuration configuration) - { - this.configuration = configuration; - } - - public Dict lookup( String name ) - { - return (Dict)tagHashtable.get( name ); - } - - public Dict install( Dict dict ) - { - Dict d = (Dict)tagHashtable.get(dict.name); - if (d != null) - { - d.versions = dict.versions; - d.model |= dict.model; - d.parser = dict.parser; - d.chkattrs = dict.chkattrs; - return d; - } - else - { - tagHashtable.put(dict.name, dict); - return dict; - } - } - - /* public interface for finding tag by name */ - public boolean findTag( Node node ) - { - Dict np; - - if ( configuration != null && configuration.XmlTags ) { - node.tag = xmlTags; - return true; - } - - if ( node.element != null ) { - np = lookup( node.element ); - if ( np != null ) { - node.tag = np; - return true; - } - } - - return false; - } - - public Parser findParser(Node node) - { - Dict np; - - if (node.element != null) { - np = lookup(node.element); - if (np != null) { - return np.parser; - } - } - - return null; - } - - private Hashtable tagHashtable = new Hashtable(); - - private static Dict[] tags = { - - new Dict( "html", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHTML(), CheckAttribsImpl.getCheckHTML() ), - - new Dict( "head", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHead(), null ), - - new Dict( "title", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseTitle(), null ), - new Dict( "base", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ), - new Dict( "link", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckLINK() ), - new Dict( "meta", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ), - new Dict( "style", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSTYLE() ), - new Dict( "script", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSCRIPT() ), - new Dict( "server", Dict.VERS_NETSCAPE, (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), null ), - - new Dict( "body", Dict.VERS_ALL, (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseBody(), null ), - new Dict( "frameset", Dict.VERS_FRAMES, (Dict.CM_HTML|Dict.CM_FRAMES), ParserImpl.getParseFrameSet(), null ), - - new Dict( "p", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OPT), ParserImpl.getParseInline(), null ), - new Dict( "h1", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), - new Dict( "h2", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), - new Dict( "h3", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), - new Dict( "h4", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), - new Dict( "h5", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), - new Dict( "h6", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), - new Dict( "ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ), - new Dict( "ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ), - new Dict( "dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseDefList(), null ), - new Dict( "dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ), - new Dict( "menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ), - new Dict( "pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParsePre(), null ), - new Dict( "listing", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ), - new Dict( "xmp", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ), - new Dict( "plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ), - new Dict( "address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, null ), - new Dict( "fieldset", Dict.VERS_HTML40, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseTableTag(), CheckAttribsImpl.getCheckTABLE() ), - new Dict( "hr", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckHR() ), - new Dict( "div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ), - new Dict( "align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), - new Dict( "ins", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ), - new Dict( "del", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ), - - new Dict( "li", Dict.VERS_ALL, (Dict.CM_LIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ), - new Dict( "dt", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseInline(), null ), - new Dict( "dd", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ), - - new Dict( "caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckCaption() ), - new Dict( "colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseColGroup(), null ), - new Dict( "col", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_EMPTY), null, null ), - new Dict( "thead", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ), - new Dict( "tfoot", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ), - new Dict( "tbody", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ), - new Dict( "tr", Dict.VERS_FROM32, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseRow(), null ), - new Dict( "td", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ), - new Dict( "th", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ), - - new Dict( "q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckAnchor() ), - new Dict( "br", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), - new Dict( "img", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckIMG() ), - new Dict( "object", Dict.VERS_HTML40, (Dict.CM_OBJECT|Dict.CM_HEAD|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ), - new Dict( "applet", Dict.VERS_LOOSE, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ), - new Dict( "servlet", Dict.VERS_SUN, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ), - new Dict( "param", Dict.VERS_FROM32, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), - new Dict( "embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ), - new Dict( "noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.getParseBlock(), null ), - new Dict( "frame", Dict.VERS_FRAMES, (Dict.CM_FRAMES|Dict.CM_EMPTY), null, null ), - new Dict( "noframes", Dict.VERS_IFRAMES, (Dict.CM_BLOCK|Dict.CM_FRAMES), ParserImpl.getParseNoFrames(), null ), - new Dict( "noscript", (short)(Dict.VERS_FRAMES|Dict.VERS_HTML40), (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ), - new Dict( "b", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "i", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "tt", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "big", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "small", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "sub", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "sup", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "acronym", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), - new Dict( "marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE|Dict.CM_OPT), ParserImpl.getParseInline(), null ), - new Dict( "bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ), - new Dict( "comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), - new Dict( "keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), - new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ), - new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "map", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckMap() ), - new Dict( "area", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckAREA() ), - new Dict( "input", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ), - new Dict( "select", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseSelect(), null ), - new Dict( "option", Dict.VERS_ALL, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseText(), null ), - new Dict( "optgroup", Dict.VERS_HTML40, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseOptGroup(), null ), - new Dict( "textarea", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseText(), null ), - new Dict( "label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "legend", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "button", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), - new Dict( "font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - new Dict( "bdo", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), - - }; - - /* create dummy entry for all xml tags */ - public Dict xmlTags = new Dict( null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null ); - - public Dict tagHtml = null; - public Dict tagHead = null; - public Dict tagBody = null; - public Dict tagFrameset = null; - public Dict tagFrame = null; - public Dict tagNoframes = null; - public Dict tagMeta = null; - public Dict tagTitle = null; - public Dict tagBase = null; - public Dict tagHr = null; - public Dict tagPre = null; - public Dict tagListing = null; - public Dict tagH1 = null; - public Dict tagH2 = null; - public Dict tagP = null; - public Dict tagUl = null; - public Dict tagOl = null; - public Dict tagDir = null; - public Dict tagLi = null; - public Dict tagDt = null; - public Dict tagDd = null; - public Dict tagDl = null; - public Dict tagTd = null; - public Dict tagTh = null; - public Dict tagTr = null; - public Dict tagCol = null; - public Dict tagBr = null; - public Dict tagA = null; - public Dict tagLink = null; - public Dict tagB = null; - public Dict tagI = null; - public Dict tagStrong = null; - public Dict tagEm = null; - public Dict tagBig = null; - public Dict tagSmall = null; - public Dict tagParam = null; - public Dict tagOption = null; - public Dict tagOptgroup = null; - public Dict tagImg = null; - public Dict tagMap = null; - public Dict tagArea = null; - public Dict tagNobr = null; - public Dict tagWbr = null; - public Dict tagFont = null; - public Dict tagSpacer = null; - public Dict tagLayer = null; - public Dict tagCenter = null; - public Dict tagStyle = null; - public Dict tagScript = null; - public Dict tagNoscript = null; - public Dict tagTable = null; - public Dict tagCaption = null; - public Dict tagForm = null; - public Dict tagTextarea = null; - public Dict tagBlockquote = null; - public Dict tagApplet = null; - public Dict tagObject = null; - public Dict tagDiv = null; - public Dict tagSpan = null; - - public void defineInlineTag( String name ) - { - install( new Dict( name, Dict.VERS_PROPRIETARY, - (Dict.CM_INLINE|Dict.CM_NO_INDENT|Dict.CM_NEW), - ParserImpl.getParseBlock(), null ) ); - } - - public void defineBlockTag( String name ) - { - install( new Dict( name, Dict.VERS_PROPRIETARY, - (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW), - ParserImpl.getParseBlock(), null ) ); - } - - public void defineEmptyTag(String name) - { - install(new Dict(name, Dict.VERS_PROPRIETARY, - (Dict.CM_EMPTY|Dict.CM_NO_INDENT|Dict.CM_NEW), - ParserImpl.getParseBlock(), null)); - } - - public void definePreTag(String name) - { - install(new Dict(name, Dict.VERS_PROPRIETARY, - (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW), - ParserImpl.getParsePre(), null)); - } -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Tidy.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Tidy.java deleted file mode 100644 index c97dda4..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Tidy.java +++ /dev/null @@ -1,1424 +0,0 @@ -/* - * @(#)Tidy.java 1.11 2000/08/16 - * - */ - -/* - HTML parser and pretty printer - - Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts - Institute of Technology, Institut National de Recherche en - Informatique et en Automatique, Keio University). All Rights - Reserved. - - Contributing Author(s): - - Dave Raggett <dsr@w3.org> - Andy Quick <ac.quick@sympatico.ca> (translation to Java) - - The contributing author(s) would like to thank all those who - helped with testing, bug fixes, and patience. This wouldn't - have been possible without all of you. - - COPYRIGHT NOTICE: - - This software and documentation is provided "as is," and - the copyright holders and contributing author(s) make no - representations or warranties, express or implied, including - but not limited to, warranties of merchantability or fitness - for any particular purpose or that the use of the software or - documentation will not infringe any third party patents, - copyrights, trademarks or other rights. - - The copyright holders and contributing author(s) will not be - liable for any direct, indirect, special or consequential damages - arising out of any use of the software or documentation, even if - advised of the possibility of such damage. - - Permission is hereby granted to use, copy, modify, and distribute - this source code, or portions hereof, documentation and executables, - for any purpose, without fee, subject to the following restrictions: - - 1. The origin of this source code must not be misrepresented. - 2. Altered versions must be plainly marked as such and must - not be misrepresented as being the original source. - 3. This Copyright notice may not be removed or altered from any - source or altered source distribution. - - The copyright holders and contributing author(s) specifically - permit, without fee, and encourage the use of this source code - as a component for supporting the Hypertext Markup Language in - commercial products. If you use this source code in a product, - acknowledgment is not required but would be appreciated. -*/ - -package org.w3c.tidy; - -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.PrintWriter; -import java.util.Properties; - -import org.eclipse.core.resources.IFile; -import org.eclipse.core.resources.IMarker; -import org.eclipse.core.runtime.CoreException; - -/** - * - * <p>HTML parser and pretty printer</p> - * - * <p> - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * </p> - * - * <p> - * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts - * Institute of Technology, Institut National de Recherche en - * Informatique et en Automatique, Keio University). All Rights - * Reserved. - * </p> - * - * <p> - * Contributing Author(s):<br> - * <a href="mailto:dsr@w3.org">Dave Raggett</a><br> - * <a href="mailto:ac.quick@sympatico.ca">Andy Quick</a> (translation to Java) - * </p> - * - * <p> - * The contributing author(s) would like to thank all those who - * helped with testing, bug fixes, and patience. This wouldn't - * have been possible without all of you. - * </p> - * - * <p> - * COPYRIGHT NOTICE:<br> - * - * This software and documentation is provided "as is," and - * the copyright holders and contributing author(s) make no - * representations or warranties, express or implied, including - * but not limited to, warranties of merchantability or fitness - * for any particular purpose or that the use of the software or - * documentation will not infringe any third party patents, - * copyrights, trademarks or other rights. - * </p> - * - * <p> - * The copyright holders and contributing author(s) will not be - * liable for any direct, indirect, special or consequential damages - * arising out of any use of the software or documentation, even if - * advised of the possibility of such damage. - * </p> - * - * <p> - * Permission is hereby granted to use, copy, modify, and distribute - * this source code, or portions hereof, documentation and executables, - * for any purpose, without fee, subject to the following restrictions: - * </p> - * - * <p> - * <ol> - * <li>The origin of this source code must not be misrepresented.</li> - * <li>Altered versions must be plainly marked as such and must - * not be misrepresented as being the original source.</li> - * <li>This Copyright notice may not be removed or altered from any - * source or altered source distribution.</li> - * </ol> - * </p> - * - * <p> - * The copyright holders and contributing author(s) specifically - * permit, without fee, and encourage the use of this source code - * as a component for supporting the Hypertext Markup Language in - * commercial products. If you use this source code in a product, - * acknowledgment is not required but would be appreciated. - * </p> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - * - */ - -public class Tidy implements java.io.Serializable { - - static final long serialVersionUID = -2794371560623987718L; - - private boolean initialized = false; - private PrintWriter errout = null; /* error output stream */ - private PrintWriter stderr = null; - private Configuration configuration = null; - private String inputStreamName = "InputStream"; - private int parseErrors = 0; - private int parseWarnings = 0; - - public Tidy() { - init(); - } - - public Configuration getConfiguration() { - return configuration; - } - - public PrintWriter getStderr() { - return stderr; - } - - /** - * ParseErrors - the number of errors that occurred in the most - * recent parse operation - */ - - public int getParseErrors() { - return parseErrors; - } - - /** - * ParseWarnings - the number of warnings that occurred in the most - * recent parse operation - */ - - public int getParseWarnings() { - return parseWarnings; - } - - /** - * Errout - the error output stream - */ - - public PrintWriter getErrout() { - return errout; - } - - public void setErrout(PrintWriter errout) { - this.errout = errout; - } - - /** - * Spaces - default indentation - * @see org.w3c.tidy.Configuration#spaces - */ - - public void setSpaces(int spaces) { - configuration.spaces = spaces; - } - - public int getSpaces() { - return configuration.spaces; - } - - /** - * Wraplen - default wrap margin - * @see org.w3c.tidy.Configuration#wraplen - */ - - public void setWraplen(int wraplen) { - configuration.wraplen = wraplen; - } - - public int getWraplen() { - return configuration.wraplen; - } - - /** - * CharEncoding - * @see org.w3c.tidy.Configuration#CharEncoding - */ - - public void setCharEncoding(int charencoding) { - configuration.CharEncoding = charencoding; - } - - public int getCharEncoding() { - return configuration.CharEncoding; - } - - /** - * Tabsize - * @see org.w3c.tidy.Configuration#tabsize - */ - - public void setTabsize(int tabsize) { - configuration.tabsize = tabsize; - } - - public int getTabsize() { - return configuration.tabsize; - } - - /** - * Errfile - file name to write errors to - * @see org.w3c.tidy.Configuration#errfile - */ - - public void setErrfile(String errfile) { - configuration.errfile = errfile; - } - - public String getErrfile() { - return configuration.errfile; - } - - /** - * Writeback - if true then output tidied markup - * NOTE: this property is ignored when parsing from an InputStream. - * @see org.w3c.tidy.Configuration#writeback - */ - - public void setWriteback(boolean writeback) { - configuration.writeback = writeback; - } - - public boolean getWriteback() { - return configuration.writeback; - } - - /** - * OnlyErrors - if true normal output is suppressed - * @see org.w3c.tidy.Configuration#OnlyErrors - */ - - public void setOnlyErrors(boolean OnlyErrors) { - configuration.OnlyErrors = OnlyErrors; - } - - public boolean getOnlyErrors() { - return configuration.OnlyErrors; - } - - /** - * ShowWarnings - however errors are always shown - * @see org.w3c.tidy.Configuration#ShowWarnings - */ - - public void setShowWarnings(boolean ShowWarnings) { - configuration.ShowWarnings = ShowWarnings; - } - - public boolean getShowWarnings() { - return configuration.ShowWarnings; - } - - /** - * Quiet - no 'Parsing X', guessed DTD or summary - * @see org.w3c.tidy.Configuration#Quiet - */ - - public void setQuiet(boolean Quiet) { - configuration.Quiet = Quiet; - } - - public boolean getQuiet() { - return configuration.Quiet; - } - - /** - * IndentContent - indent content of appropriate tags - * @see org.w3c.tidy.Configuration#IndentContent - */ - - public void setIndentContent(boolean IndentContent) { - configuration.IndentContent = IndentContent; - } - - public boolean getIndentContent() { - return configuration.IndentContent; - } - - /** - * SmartIndent - does text/block level content effect indentation - * @see org.w3c.tidy.Configuration#SmartIndent - */ - - public void setSmartIndent(boolean SmartIndent) { - configuration.SmartIndent = SmartIndent; - } - - public boolean getSmartIndent() { - return configuration.SmartIndent; - } - - /** - * HideEndTags - suppress optional end tags - * @see org.w3c.tidy.Configuration#HideEndTags - */ - - public void setHideEndTags(boolean HideEndTags) { - configuration.HideEndTags = HideEndTags; - } - - public boolean getHideEndTags() { - return configuration.HideEndTags; - } - - /** - * XmlTags - treat input as XML - * @see org.w3c.tidy.Configuration#XmlTags - */ - - public void setXmlTags(boolean XmlTags) { - configuration.XmlTags = XmlTags; - } - - public boolean getXmlTags() { - return configuration.XmlTags; - } - - /** - * XmlOut - create output as XML - * @see org.w3c.tidy.Configuration#XmlOut - */ - - public void setXmlOut(boolean XmlOut) { - configuration.XmlOut = XmlOut; - } - - public boolean getXmlOut() { - return configuration.XmlOut; - } - - /** - * XHTML - output extensible HTML - * @see org.w3c.tidy.Configuration#xHTML - */ - - public void setXHTML(boolean xHTML) { - configuration.xHTML = xHTML; - } - - public boolean getXHTML() { - return configuration.xHTML; - } - - /** - * RawOut - avoid mapping values > 127 to entities - * @see org.w3c.tidy.Configuration#RawOut - */ - - public void setRawOut(boolean RawOut) { - configuration.RawOut = RawOut; - } - - public boolean getRawOut() { - return configuration.RawOut; - } - - /** - * UpperCaseTags - output tags in upper not lower case - * @see org.w3c.tidy.Configuration#UpperCaseTags - */ - - public void setUpperCaseTags(boolean UpperCaseTags) { - configuration.UpperCaseTags = UpperCaseTags; - } - - public boolean getUpperCaseTags() { - return configuration.UpperCaseTags; - } - - /** - * UpperCaseAttrs - output attributes in upper not lower case - * @see org.w3c.tidy.Configuration#UpperCaseAttrs - */ - - public void setUpperCaseAttrs(boolean UpperCaseAttrs) { - configuration.UpperCaseAttrs = UpperCaseAttrs; - } - - public boolean getUpperCaseAttrs() { - return configuration.UpperCaseAttrs; - } - - /** - * MakeClean - remove presentational clutter - * @see org.w3c.tidy.Configuration#MakeClean - */ - - public void setMakeClean(boolean MakeClean) { - configuration.MakeClean = MakeClean; - } - - public boolean getMakeClean() { - return configuration.MakeClean; - } - - /** - * BreakBeforeBR - o/p newline before &lt;br&gt; or not? - * @see org.w3c.tidy.Configuration#BreakBeforeBR - */ - - public void setBreakBeforeBR(boolean BreakBeforeBR) { - configuration.BreakBeforeBR = BreakBeforeBR; - } - - public boolean getBreakBeforeBR() { - return configuration.BreakBeforeBR; - } - - /** - * BurstSlides - create slides on each h2 element - * @see org.w3c.tidy.Configuration#BurstSlides - */ - - public void setBurstSlides(boolean BurstSlides) { - configuration.BurstSlides = BurstSlides; - } - - public boolean getBurstSlides() { - return configuration.BurstSlides; - } - - /** - * NumEntities - use numeric entities - * @see org.w3c.tidy.Configuration#NumEntities - */ - - public void setNumEntities(boolean NumEntities) { - configuration.NumEntities = NumEntities; - } - - public boolean getNumEntities() { - return configuration.NumEntities; - } - - /** - * QuoteMarks - output " marks as &amp;quot; - * @see org.w3c.tidy.Configuration#QuoteMarks - */ - - public void setQuoteMarks(boolean QuoteMarks) { - configuration.QuoteMarks = QuoteMarks; - } - - public boolean getQuoteMarks() { - return configuration.QuoteMarks; - } - - /** - * QuoteNbsp - output non-breaking space as entity - * @see org.w3c.tidy.Configuration#QuoteNbsp - */ - - public void setQuoteNbsp(boolean QuoteNbsp) { - configuration.QuoteNbsp = QuoteNbsp; - } - - public boolean getQuoteNbsp() { - return configuration.QuoteNbsp; - } - - /** - * QuoteAmpersand - output naked ampersand as &amp; - * @see org.w3c.tidy.Configuration#QuoteAmpersand - */ - - public void setQuoteAmpersand(boolean QuoteAmpersand) { - configuration.QuoteAmpersand = QuoteAmpersand; - } - - public boolean getQuoteAmpersand() { - return configuration.QuoteAmpersand; - } - - /** - * WrapAttVals - wrap within attribute values - * @see org.w3c.tidy.Configuration#WrapAttVals - */ - - public void setWrapAttVals(boolean WrapAttVals) { - configuration.WrapAttVals = WrapAttVals; - } - - public boolean getWrapAttVals() { - return configuration.WrapAttVals; - } - - /** - * WrapScriptlets - wrap within JavaScript string literals - * @see org.w3c.tidy.Configuration#WrapScriptlets - */ - - public void setWrapScriptlets(boolean WrapScriptlets) { - configuration.WrapScriptlets = WrapScriptlets; - } - - public boolean getWrapScriptlets() { - return configuration.WrapScriptlets; - } - - /** - * WrapSection - wrap within &lt;![ ... ]&gt; section tags - * @see org.w3c.tidy.Configuration#WrapSection - */ - - public void setWrapSection(boolean WrapSection) { - configuration.WrapSection = WrapSection; - } - - public boolean getWrapSection() { - return configuration.WrapSection; - } - - /** - * AltText - default text for alt attribute - * @see org.w3c.tidy.Configuration#altText - */ - - public void setAltText(String altText) { - configuration.altText = altText; - } - - public String getAltText() { - return configuration.altText; - } - - /** - * Slidestyle - style sheet for slides - * @see org.w3c.tidy.Configuration#slidestyle - */ - - public void setSlidestyle(String slidestyle) { - configuration.slidestyle = slidestyle; - } - - public String getSlidestyle() { - return configuration.slidestyle; - } - - /** - * XmlPi - add &lt;?xml?&gt; for XML docs - * @see org.w3c.tidy.Configuration#XmlPi - */ - - public void setXmlPi(boolean XmlPi) { - configuration.XmlPi = XmlPi; - } - - public boolean getXmlPi() { - return configuration.XmlPi; - } - - /** - * DropFontTags - discard presentation tags - * @see org.w3c.tidy.Configuration#DropFontTags - */ - - public void setDropFontTags(boolean DropFontTags) { - configuration.DropFontTags = DropFontTags; - } - - public boolean getDropFontTags() { - return configuration.DropFontTags; - } - - /** - * DropEmptyParas - discard empty p elements - * @see org.w3c.tidy.Configuration#DropEmptyParas - */ - - public void setDropEmptyParas(boolean DropEmptyParas) { - configuration.DropEmptyParas = DropEmptyParas; - } - - public boolean getDropEmptyParas() { - return configuration.DropEmptyParas; - } - - /** - * FixComments - fix comments with adjacent hyphens - * @see org.w3c.tidy.Configuration#FixComments - */ - - public void setFixComments(boolean FixComments) { - configuration.FixComments = FixComments; - } - - public boolean getFixComments() { - return configuration.FixComments; - } - - /** - * WrapAsp - wrap within ASP pseudo elements - * @see org.w3c.tidy.Configuration#WrapAsp - */ - - public void setWrapAsp(boolean WrapAsp) { - configuration.WrapAsp = WrapAsp; - } - - public boolean getWrapAsp() { - return configuration.WrapAsp; - } - - /** - * WrapJste - wrap within JSTE pseudo elements - * @see org.w3c.tidy.Configuration#WrapJste - */ - - public void setWrapJste(boolean WrapJste) { - configuration.WrapJste = WrapJste; - } - - public boolean getWrapJste() { - return configuration.WrapJste; - } - - /** - * WrapPhp - wrap within PHP pseudo elements - * @see org.w3c.tidy.Configuration#WrapPhp - */ - - public void setWrapPhp(boolean WrapPhp) { - configuration.WrapPhp = WrapPhp; - } - - public boolean getWrapPhp() { - return configuration.WrapPhp; - } - - /** - * FixBackslash - fix URLs by replacing \ with / - * @see org.w3c.tidy.Configuration#FixBackslash - */ - - public void setFixBackslash(boolean FixBackslash) { - configuration.FixBackslash = FixBackslash; - } - - public boolean getFixBackslash() { - return configuration.FixBackslash; - } - - /** - * IndentAttributes - newline+indent before each attribute - * @see org.w3c.tidy.Configuration#IndentAttributes - */ - - public void setIndentAttributes(boolean IndentAttributes) { - configuration.IndentAttributes = IndentAttributes; - } - - public boolean getIndentAttributes() { - return configuration.IndentAttributes; - } - - /** - * DocType - user specified doctype - * omit | auto | strict | loose | <i>fpi</i> - * where the <i>fpi</i> is a string similar to - * &quot;-//ACME//DTD HTML 3.14159//EN&quot; - * Note: for <i>fpi</i> include the double-quotes in the string. - * @see org.w3c.tidy.Configuration#docTypeStr - * @see org.w3c.tidy.Configuration#docTypeMode - */ - - public void setDocType(String doctype) { - if (doctype != null) - configuration.docTypeStr = configuration.parseDocType(doctype, "doctype"); - } - - public String getDocType() { - String result = null; - switch (configuration.docTypeMode) { - case Configuration.DOCTYPE_OMIT : - result = "omit"; - break; - case Configuration.DOCTYPE_AUTO : - result = "auto"; - break; - case Configuration.DOCTYPE_STRICT : - result = "strict"; - break; - case Configuration.DOCTYPE_LOOSE : - result = "loose"; - break; - case Configuration.DOCTYPE_USER : - result = configuration.docTypeStr; - break; - } - return result; - } - - /** - * LogicalEmphasis - replace i by em and b by strong - * @see org.w3c.tidy.Configuration#LogicalEmphasis - */ - - public void setLogicalEmphasis(boolean LogicalEmphasis) { - configuration.LogicalEmphasis = LogicalEmphasis; - } - - public boolean getLogicalEmphasis() { - return configuration.LogicalEmphasis; - } - - /** - * XmlPIs - if set to true PIs must end with ?> - * @see org.w3c.tidy.Configuration#XmlPIs - */ - - public void setXmlPIs(boolean XmlPIs) { - configuration.XmlPIs = XmlPIs; - } - - public boolean getXmlPIs() { - return configuration.XmlPIs; - } - - /** - * EncloseText - if true text at body is wrapped in &lt;p&gt;'s - * @see org.w3c.tidy.Configuration#EncloseBodyText - */ - - public void setEncloseText(boolean EncloseText) { - configuration.EncloseBodyText = EncloseText; - } - - public boolean getEncloseText() { - return configuration.EncloseBodyText; - } - - /** - * EncloseBlockText - if true text in blocks is wrapped in &lt;p&gt;'s - * @see org.w3c.tidy.Configuration#EncloseBlockText - */ - - public void setEncloseBlockText(boolean EncloseBlockText) { - configuration.EncloseBlockText = EncloseBlockText; - } - - public boolean getEncloseBlockText() { - return configuration.EncloseBlockText; - } - - /** - * KeepFileTimes - if true last modified time is preserved<br> - * <b>this is NOT supported at this time.</b> - * @see org.w3c.tidy.Configuration#KeepFileTimes - */ - - public void setKeepFileTimes(boolean KeepFileTimes) { - configuration.KeepFileTimes = KeepFileTimes; - } - - public boolean getKeepFileTimes() { - return configuration.KeepFileTimes; - } - - /** - * Word2000 - draconian cleaning for Word2000 - * @see org.w3c.tidy.Configuration#Word2000 - */ - - public void setWord2000(boolean Word2000) { - configuration.Word2000 = Word2000; - } - - public boolean getWord2000() { - return configuration.Word2000; - } - - /** - * TidyMark - add meta element indicating tidied doc - * @see org.w3c.tidy.Configuration#TidyMark - */ - - public void setTidyMark(boolean TidyMark) { - configuration.TidyMark = TidyMark; - } - - public boolean getTidyMark() { - return configuration.TidyMark; - } - - /** - * XmlSpace - if set to yes adds xml:space attr as needed - * @see org.w3c.tidy.Configuration#XmlSpace - */ - - public void setXmlSpace(boolean XmlSpace) { - configuration.XmlSpace = XmlSpace; - } - - public boolean getXmlSpace() { - return configuration.XmlSpace; - } - - /** - * Emacs - if true format error output for GNU Emacs - * @see org.w3c.tidy.Configuration#Emacs - */ - - public void setEmacs(boolean Emacs) { - configuration.Emacs = Emacs; - } - - public boolean getEmacs() { - return configuration.Emacs; - } - - /** - * LiteralAttribs - if true attributes may use newlines - * @see org.w3c.tidy.Configuration#LiteralAttribs - */ - - public void setLiteralAttribs(boolean LiteralAttribs) { - configuration.LiteralAttribs = LiteralAttribs; - } - - public boolean getLiteralAttribs() { - return configuration.LiteralAttribs; - } - - /** - * InputStreamName - the name of the input stream (printed in the - * header information). - */ - public void setInputStreamName(String name) { - if (name != null) - inputStreamName = name; - } - - public String getInputStreamName() { - return inputStreamName; - } - - /** - * Sets the configuration from a configuration file. - */ - - public void setConfigurationFromFile(String filename) { - configuration.parseFile(filename); - } - - /** - * Sets the configuration from a properties object. - */ - - public void setConfigurationFromProps(Properties props) { - configuration.addProps(props); - } - - /** - * first time initialization which should - * precede reading the command line - */ - - private void init() { - configuration = new Configuration(); - if (configuration == null) - return; - - AttributeTable at = AttributeTable.getDefaultAttributeTable(); - if (at == null) - return; - TagTable tt = new TagTable(); - if (tt == null) - return; - tt.setConfiguration(configuration); - configuration.tt = tt; - EntityTable et = EntityTable.getDefaultEntityTable(); - if (et == null) - return; - - /* Unnecessary - same initial values in Configuration - Configuration.XmlTags = false; - Configuration.XmlOut = false; - Configuration.HideEndTags = false; - Configuration.UpperCaseTags = false; - Configuration.MakeClean = false; - Configuration.writeback = false; - Configuration.OnlyErrors = false; - */ - - configuration.errfile = null; - stderr = new PrintWriter(System.err, true); - errout = stderr; - initialized = true; - } - - /** - * Parses InputStream in and returns the root Node. - * If out is non-null, pretty prints to OutputStream out. - */ - - public Node parse(IFile iFile, InputStream in, OutputStream out) { - Node document = null; - - try { - iFile.deleteMarkers(IMarker.PROBLEM, false, 0); - document = parse(iFile, in, null, out); - } catch (CoreException e) { - } catch (FileNotFoundException fnfe) { - } catch (IOException e) { - } - - return document; - } - - /** - * Internal routine that actually does the parsing. The caller - * can pass either an InputStream or file name. If both are passed, - * the file name is preferred. - */ - - private Node parse(IFile iFile, InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException { - Lexer lexer; - Node document = null; - Node doctype; - Out o = new OutImpl(); /* normal output stream */ - PPrint pprint; - - if (!initialized) - return null; - - if (errout == null) - return null; - - parseErrors = 0; - parseWarnings = 0; - - /* ensure config is self-consistent */ - configuration.adjust(); - - if (file != null) { - in = new FileInputStream(file); - inputStreamName = file; - } else if (in == null) { - in = System.in; - inputStreamName = "stdin"; - } - - if (in != null) { - lexer = new Lexer(iFile,new StreamInImpl(in, configuration.CharEncoding, configuration.tabsize), configuration); - lexer.errout = errout; - - /* - store pointer to lexer in input stream - to allow character encoding errors to be - reported - */ - lexer.in.lexer = lexer; - - /* Tidy doesn't alter the doctype for generic XML docs */ - if (configuration.XmlTags) - document = ParserImpl.parseXMLDocument(lexer); - else { - lexer.warnings = 0; - if (!configuration.Quiet) - Report.helloMessage(errout, Report.RELEASE_DATE, inputStreamName); - - document = ParserImpl.parseDocument(lexer); - - if (!document.checkNodeIntegrity()) { - Report.badTree(errout); - return null; - } - - Clean cleaner = new Clean(configuration.tt); - - /* simplifies <b><b> ... </b> ...</b> etc. */ - cleaner.nestedEmphasis(document); - - /* cleans up <dir>indented text</dir> etc. */ - cleaner.list2BQ(document); - cleaner.bQ2Div(document); - - /* replaces i by em and b by strong */ - if (configuration.LogicalEmphasis) - cleaner.emFromI(document); - - if (configuration.Word2000 && cleaner.isWord2000(document, configuration.tt)) { - /* prune Word2000's <![if ...]> ... <![endif]> */ - cleaner.dropSections(lexer, document); - - /* drop style & class attributes and empty p, span elements */ - cleaner.cleanWord2000(lexer, document); - } - - /* replaces presentational markup by style rules */ - if (configuration.MakeClean || configuration.DropFontTags) - cleaner.cleanTree(lexer, document); - - if (!document.checkNodeIntegrity()) { - Report.badTree(errout); - return null; - } - doctype = document.findDocType(); - if (document.content != null) { - if (configuration.xHTML) - lexer.setXHTMLDocType(document); - else - lexer.fixDocType(document); - - if (configuration.TidyMark) - lexer.addGenerator(document); - } - - /* ensure presence of initial <?XML version="1.0"?> */ - if (configuration.XmlOut && configuration.XmlPi) - lexer.fixXMLPI(document); - - if (!configuration.Quiet && document.content != null) { - Report.reportVersion(errout, lexer, inputStreamName, doctype); - Report.reportNumWarnings(errout, lexer); - } - } - - parseWarnings = lexer.warnings; - parseErrors = lexer.errors; - - // Try to close the InputStream but only if if we created it. - - if ((file != null) && (in != System.in)) { - try { - in.close(); - } catch (IOException e) { - } - } - - if (lexer.errors > 0) - Report.needsAuthorIntervention(errout); - - o.state = StreamIn.FSM_ASCII; - o.encoding = configuration.CharEncoding; - - if (!configuration.OnlyErrors && lexer.errors == 0) { - if (configuration.BurstSlides) { - Node body; - - body = null; - /* - remove doctype to avoid potential clash with - markup introduced when bursting into slides - */ - /* discard the document type */ - doctype = document.findDocType(); - - if (doctype != null) - Node.discardElement(doctype); - - /* slides use transitional features */ - lexer.versions |= Dict.VERS_HTML40_LOOSE; - - /* and patch up doctype to match */ - if (configuration.xHTML) - lexer.setXHTMLDocType(document); - else - lexer.fixDocType(document); - - /* find the body element which may be implicit */ - body = document.findBody(configuration.tt); - - if (body != null) { - pprint = new PPrint(configuration); - Report.reportNumberOfSlides(errout, pprint.countSlides(body)); - pprint.createSlides(lexer, document); - } else - Report.missingBody(errout); - } else if (configuration.writeback && (file != null)) { - try { - pprint = new PPrint(configuration); - o.out = new FileOutputStream(file); - - if (configuration.XmlTags) - pprint.printXMLTree(o, (short) 0, 0, lexer, document); - else - pprint.printTree(o, (short) 0, 0, lexer, document); - - pprint.flushLine(o, 0); - o.out.close(); - } catch (IOException e) { - errout.println(file + e.toString()); - } - } else if (out != null) { - pprint = new PPrint(configuration); - o.out = out; - - if (configuration.XmlTags) - pprint.printXMLTree(o, (short) 0, 0, lexer, document); - else - pprint.printTree(o, (short) 0, 0, lexer, document); - - pprint.flushLine(o, 0); - } - - } - - Report.errorSummary(lexer); - } - return document; - } - - /** - * Parses InputStream in and returns a DOM Document node. - * If out is non-null, pretty prints to OutputStream out. - */ - - public org.w3c.dom.Document parseDOM(IFile file, InputStream in, OutputStream out) { - Node document = parse(file, in, out); - if (document != null) - return (org.w3c.dom.Document) document.getAdapter(); - else - return null; - } - - /** - * Creates an empty DOM Document. - */ - - public static org.w3c.dom.Document createEmptyDocument() { - Node document = new Node(Node.RootNode, new byte[0], 0, 0); - Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html", new TagTable()); - if (document != null && node != null) { - Node.insertNodeAtStart(document, node); - return (org.w3c.dom.Document) document.getAdapter(); - } else { - return null; - } - } - - /** - * Pretty-prints a DOM Document. - */ - - public void pprint(org.w3c.dom.Document doc, OutputStream out) { - Out o = new OutImpl(); - PPrint pprint; - Node document; - - if (!(doc instanceof DOMDocumentImpl)) { - return; - } - document = ((DOMDocumentImpl) doc).adaptee; - - o.state = StreamIn.FSM_ASCII; - o.encoding = configuration.CharEncoding; - - if (out != null) { - pprint = new PPrint(configuration); - o.out = out; - - if (configuration.XmlTags) - pprint.printXMLTree(o, (short) 0, 0, null, document); - else - pprint.printTree(o, (short) 0, 0, null, document); - - pprint.flushLine(o, 0); - } - } - - /** - * Command line interface to parser and pretty printer. - */ - - public static void main(String[] argv) { - int totalerrors = 0; - int totalwarnings = 0; - String file; - InputStream in; - String prog = "Tidy"; - Node document; - Node doctype; - Lexer lexer; - String s; - Out out = new OutImpl(); /* normal output stream */ - PPrint pprint; - int argc = argv.length + 1; - int argIndex = 0; - Tidy tidy; - Configuration configuration; - String arg; - String current_errorfile = "stderr"; - - tidy = new Tidy(); - configuration = tidy.getConfiguration(); - - /* read command line */ - - while (argc > 0) { - if (argc > 1 && argv[argIndex].startsWith("-")) { - /* support -foo and --foo */ - arg = argv[argIndex].substring(1); - - if (arg.length() > 0 && arg.charAt(0) == '-') - arg = arg.substring(1); - - if (arg.equals("xml")) - configuration.XmlTags = true; - else if (arg.equals("asxml") || arg.equals("asxhtml")) - configuration.xHTML = true; - else if (arg.equals("indent")) { - configuration.IndentContent = true; - configuration.SmartIndent = true; - } else if (arg.equals("omit")) - configuration.HideEndTags = true; - else if (arg.equals("upper")) - configuration.UpperCaseTags = true; - else if (arg.equals("clean")) - configuration.MakeClean = true; - else if (arg.equals("raw")) - configuration.CharEncoding = Configuration.RAW; - else if (arg.equals("ascii")) - configuration.CharEncoding = Configuration.ASCII; - else if (arg.equals("latin1")) - configuration.CharEncoding = Configuration.LATIN1; - else if (arg.equals("utf8")) - configuration.CharEncoding = Configuration.UTF8; - else if (arg.equals("iso2022")) - configuration.CharEncoding = Configuration.ISO2022; - else if (arg.equals("mac")) - configuration.CharEncoding = Configuration.MACROMAN; - else if (arg.equals("numeric")) - configuration.NumEntities = true; - else if (arg.equals("modify")) - configuration.writeback = true; - else if (arg.equals("change")) /* obsolete */ - configuration.writeback = true; - else if (arg.equals("update")) /* obsolete */ - configuration.writeback = true; - else if (arg.equals("errors")) - configuration.OnlyErrors = true; - else if (arg.equals("quiet")) - configuration.Quiet = true; - else if (arg.equals("slides")) - configuration.BurstSlides = true; - else if (arg.equals("help") || argv[argIndex].charAt(1) == '?' || argv[argIndex].charAt(1) == 'h') { - Report.helpText(new PrintWriter(System.out, true), prog); - System.exit(1); - } else if (arg.equals("config")) { - if (argc >= 3) { - configuration.parseFile(argv[argIndex + 1]); - --argc; - ++argIndex; - } - } else if (argv[argIndex].equals("-file") || argv[argIndex].equals("--file") || argv[argIndex].equals("-f")) { - if (argc >= 3) { - configuration.errfile = argv[argIndex + 1]; - --argc; - ++argIndex; - } - } else if (argv[argIndex].equals("-wrap") || argv[argIndex].equals("--wrap") || argv[argIndex].equals("-w")) { - if (argc >= 3) { - configuration.wraplen = Integer.parseInt(argv[argIndex + 1]); - --argc; - ++argIndex; - } - } else if (argv[argIndex].equals("-version") || argv[argIndex].equals("--version") || argv[argIndex].equals("-v")) { - Report.showVersion(tidy.getErrout()); - System.exit(0); - } else { - s = argv[argIndex]; - - for (int i = 1; i < s.length(); i++) { - if (s.charAt(i) == 'i') { - configuration.IndentContent = true; - configuration.SmartIndent = true; - } else if (s.charAt(i) == 'o') - configuration.HideEndTags = true; - else if (s.charAt(i) == 'u') - configuration.UpperCaseTags = true; - else if (s.charAt(i) == 'c') - configuration.MakeClean = true; - else if (s.charAt(i) == 'n') - configuration.NumEntities = true; - else if (s.charAt(i) == 'm') - configuration.writeback = true; - else if (s.charAt(i) == 'e') - configuration.OnlyErrors = true; - else if (s.charAt(i) == 'q') - configuration.Quiet = true; - else - Report.unknownOption(tidy.getErrout(), s.charAt(i)); - } - } - - --argc; - ++argIndex; - continue; - } - - /* ensure config is self-consistent */ - configuration.adjust(); - - /* user specified error file */ - if (configuration.errfile != null) { - /* is it same as the currently opened file? */ - if (!configuration.errfile.equals(current_errorfile)) { - /* no so close previous error file */ - - if (tidy.getErrout() != tidy.getStderr()) - tidy.getErrout().close(); - - /* and try to open the new error file */ - try { - tidy.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true)); - current_errorfile = configuration.errfile; - } catch (IOException e) { - /* can't be opened so fall back to stderr */ - current_errorfile = "stderr"; - tidy.setErrout(tidy.getStderr()); - } - } - } - - if (argc > 1) { - file = argv[argIndex]; - } else { - file = "stdin"; - } - - try { - document = tidy.parse(null, null, file, System.out); - totalwarnings += tidy.parseWarnings; - totalerrors += tidy.parseErrors; - } catch (FileNotFoundException fnfe) { - Report.unknownFile(tidy.getErrout(), prog, file); - } catch (IOException ioe) { - Report.unknownFile(tidy.getErrout(), prog, file); - } - - --argc; - ++argIndex; - - if (argc <= 1) - break; - } - - if (totalerrors + totalwarnings > 0) - Report.generalInfo(tidy.getErrout()); - - if (tidy.getErrout() != tidy.getStderr()) - tidy.getErrout().close(); - - /* return status can be used by scripts */ - - if (totalerrors > 0) - System.exit(2); - - if (totalwarnings > 0) - System.exit(1); - - /* 0 signifies all is ok */ - System.exit(0); - } -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyBeanInfo.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyBeanInfo.java deleted file mode 100644 index 2acf2c2..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyBeanInfo.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * @(#)TidyBeanInfo.java 1.11 2000/08/16 - * - */ - -package org.w3c.tidy; - -/** - * - * BeanInfo for Tidy - * - * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> - * HTML Tidy Release 4 Aug 2000</a> - * - * @author Dave Raggett <dsr@w3.org> - * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - */ - -import java.beans.SimpleBeanInfo; -import java.awt.Image; - -public class TidyBeanInfo extends SimpleBeanInfo { - - public Image getIcon(int kind) - { - return loadImage("tidy.gif"); - } - -} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyMessages.properties b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyMessages.properties deleted file mode 100644 index 323a7ee..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyMessages.properties +++ /dev/null @@ -1,194 +0,0 @@ -error=Error: -warning=Warning: -line_column=line {0,number} column {1,number} - -emacs_format={0}:{1,number}:{2,number}: -illegal_char=Warning: replacing illegal character code {0,number} -missing_semicolon=Warning: entity "{0}" doesn''t end in '';'' -unknown_entity=Warning: unescaped & or unknown entity "{0}" -unescaped_ampersand=Warning: unescaped & which should be written as &amp; -unknown_attribute=Warning: unknown attribute "{0}" -missing_attribute=\ lacks "{0}" attribute -missing_attr_value=\ attribute "{0}" lacks value -missing_imagemap=\ should use client-side image map -bad_attribute_value=\ unknown attribute value "{0}" -xml_attribute_value=\ has XML attribute "{0}" -unexpected_gt=\ missing '>' for end of tag -unexpected_quotemark=\ unexpected or duplicate quote mark -repeated_attribute=\ repeated attribute -nested_emphasis=\ nested emphasis -coerce_to_endtag=\ <{0}> is probably intended as </{0}> -proprietary_attr_value=\ proprietary attribute value "{0}" -missing_endtag_for=Warning: missing </{0}> -missing_endtag_before=Warning: missing </{0}> before -discarding_unexpected=Warning: discarding unexpected -forced_end_anchor=Warning: <a> is probably intended as </a> -non_matching_endtag_1=Warning: replacing unexpected -non_matching_endtag_2=\ by </{0}> -tag_not_allowed_in=\ isn''t allowed in <{0}> elements -doctype_after_tags=Warning: <!DOCTYPE> isn't allowed after elements -missing_starttag=Warning: missing <{0}> -unexpected_endtag=Warning: unexpected </{0}> -unexpected_endtag_suffix=\ in <{0}> -too_many_elements=Warning: too many {0} elements -too_many_elements_suffix=\ in <{0}> -using_br_inplace_of=Warning: using <br> in place of -inserting_tag=Warning: inserting implicit <{0}> -cant_be_nested=\ can't be nested -proprietary_element=\ is not approved by W3C -obsolete_element=Warning: replacing obsolete element -replacing_element=Warning: replacing element -by=\ by -trim_empty_element=Warning: trimming empty -missing_title_element=Warning: inserting missing 'title' element -illegal_nesting=\ shouldn't be nested -noframes_content=\ not inside 'noframes' element -inconsistent_version=Warning: html doctype doesn't match content -content_after_body=Warning: content occurs after end of body -malformed_comment=Warning: adjacent hyphens within comment -bad_comment_chars=Warning: expecting -- or > -bad_xml_comment=Warning: XML comments can't contain -- -bad_cdata_content=Warning: '<' + '/' + letter not allowed here -inconsistent_namespace=Warning: html namespace doesn't match content -suspected_missing_quote=Error: missing quotemark for attribute value -duplicate_frameset=Error: repeated FRAMESET element -unknown_element=\ is not recognized! -dtype_not_upper_case=Warning: SYSTEM, PUBLIC, W3C, DTD, EN must be upper case -unexpected_end_of_file=Warning: end of file while parsing attributes -malformed_doctype=Warning: expected "html PUBLIC" or "html SYSTEM" -id_name_mismatch=\ id and name attribute value mismatch - -badchars_summary=Characters codes for the Microsoft Windows fonts in the range\n\ -128 - 159 may not be recognized on other platforms. You are\n\ -instead recommended to use named entities, e.g. &trade; rather\n\ -than Windows character code 153 (0x2122 in Unicode). Note that\n\ -as of February 1998 few browsers support the new entities."\n\n - -badform_summary=You may need to move one or both of the <form> and </form>\n\ -tags. HTML elements should be properly nested and form elements\n\ -are no exception. For instance you should not place the <form>\n\ -in one table cell and the </form> in another. If the <form> is\n\ -placed before a table, the </form> cannot be placed inside the\n\ -table! Note that one form can't be nested inside another!\n\n - -badaccess_missing_summary=The table summary attribute should be used to describe\n\ -the table structure. It is very helpful for people using\n\ -non-visual browsers. The scope and headers attributes for\n\ -table cells are useful for specifying which headers apply\n\ -to each table cell, enabling non-visual browsers to provide\n\ -a meaningful context for each cell.\n\n - -badaccess_missing_image_alt=The alt attribute should be used to give a short description\n\ -of an image; longer descriptions should be given with the\n\ -longdesc attribute which takes a URL linked to the description.\n\ -These measures are needed for people using non-graphical browsers.\n\n - -badaccess_missing_image_map=Use client-side image maps in preference to server-side image\n\ -maps as the latter are inaccessible to people using non-\n\ -graphical browsers. In addition, client-side maps are easier\n\ -to set up and provide immediate feedback to users.\n\n - -badaccess_missing_link_alt=For hypertext links defined using a client-side image map, you\n\ -need to use the alt attribute to provide a textual description\n\ -of the link for people using non-graphical browsers.\n\n - -badaccess_frames=Pages designed using frames presents problems for\n\ -people who are either blind or using a browser that\n\ -doesn't support frames. A frames-based page should always\n\ -include an alternative layout inside a NOFRAMES element.\n\n - -badaccess_summary=For further advice on how to make your pages accessible\n\ -see "{0}". You may also want to try\n\ -"http://www.cast.org/bobby/" which is a free Web-based\n\ -service for checking URLs for accessibility.\n\n - -badlayout_using_layer=The Cascading Style Sheets (CSS) Positioning mechanism\n\ -is recommended in preference to the proprietary <LAYER>\n\ -element due to limited vendor support for LAYER.\n\n - -badlayout_using_spacer=You are recommended to use CSS for controlling white\n\ -space (e.g. for indentation, margins and line spacing).\n\ -The proprietary <SPACER> element has limited vendor support.\n\n - -badlayout_using_font=You are recommended to use CSS to specify the font and\n\ -properties such as its size and color. This will reduce\n\ -the size of HTML files and make them easier maintain\n\ -compared with using <FONT> elements.\n\n - -badlayout_using_nobr=You are recommended to use CSS to control line wrapping.\n\ -Use \"white-space: nowrap\" to inhibit wrapping in place\n\ -of inserting <NOBR>...</NOBR> into the markup.\n\n - -badlayout_using_body=You are recommended to use CSS to specify page and link colors\n\n - -unrecognized_option=unrecognized option -{0} use -help to list options -unknown_file={0}: can''t open file "{1}" -unknown_option=Warning - unknown option: {0} -bad_argument=Warning - missing or malformed argument for option: {0} - -needs_author_intervention=This document has errors that must be fixed before\n\ -using HTML Tidy to generate a tidied up version.\n\n - -missing_body=Can't create slides - document is missing a body element. -slides_found={0,number} Slides found - -general_info=HTML & CSS specifications are available from http://www.w3.org/\n\ -To learn more about Tidy see http://www.w3.org/People/Raggett/tidy/\n\ -Please send bug reports to Dave Raggett care of <html-tidy@w3.org>\n\ -Lobby your company to join W3C, see http://www.w3.org/Consortium\n - -hello_message=\nTidy (vers {0}) Parsing "{1}" - -report_version=\n{0}: Document content looks like {1} - -doctype_given=\n{0}: Doctype given is " - -num_warnings={0,number} warnings/errors were found!\n -no_warnings=no warnings or errors were found\n - -help_text={0}: file1 file2 ...\n\ -Utility to clean up & pretty print html files\n\ -see http://www.w3.org/People/Raggett/tidy/\n\ -options for tidy released on {1}\n\ -\n\ -Processing directives\n\ ---------------------\n\ - -indent or -i indent element content\n\ - -omit or -o omit optional endtags\n\ - -wrap 72 wrap text at column 72 (default is 68)\n\ - -upper or -u force tags to upper case (default is lower)\n\ - -clean or -c replace font, nobr & center tags by CSS\n\ - -numeric or -n output numeric rather than named entities\n\ - -errors or -e only show errors\n\ - -quiet or -q suppress nonessential output\n\ - -xml use this when input is wellformed xml\n\ - -asxml to convert html to wellformed xml\n\ - -slides to burst into slides on h2 elements\n\ -\n\ -Character encodings\n\ -------------------\n\ - -raw leave chars > 128 unchanged upon output\n\ - -ascii use ASCII for output, Latin-1 for input\n\ - -latin1 use Latin-1 for both input and output\n\ - -iso2022 use ISO2022 for both input and output\n\ - -utf8 use UTF-8 for both input and output\n\ - -mac use the Apple MacRoman character set\n\ -\n\ -File manipulation\n\ ----------------\n\ - -config <file> set options from config file\n\ - -f <file> write errors to named <file>\n\ - -modify or -m to modify original files\n\ -\n\ -Miscellaneous\n\ -------------\n\ - -version or -v show version\n\ - -help or -h list command line options\n\ -You can also use --blah for any config file option blah\n\ -\n\ -Input/Output default to stdin/stdout respectively\n\ -Single letter options apart from -f may be combined\n\ -as in: tidy -f errs.txt -imu foo.html\n\ -For further info on HTML see http://www.w3.org/MarkUp\n\ -\n - -bad_tree=\nPanic - tree has lost its integrity\n diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/config.txt b/net.sourceforge.phpeclipse/src/org/w3c/tidy/config.txt deleted file mode 100644 index b1de207..0000000 --- a/net.sourceforge.phpeclipse/src/org/w3c/tidy/config.txt +++ /dev/null @@ -1,20 +0,0 @@ -# sample config file for Java HTML tidy - -indent=auto -indent-spaces=2 -wrap=72 -markup=yes -clean=yes -output-xml=no -input-xml=no -show-warnings=yes -numeric-entities=yes -quote-marks=yes -quote-nbsp=yes -quote-ampersand=no -break-before-br=no -uppercase-tags=yes -uppercase-attributes=yes -smart-indent=no -output-xhtml=yes -char-encoding=latin1 diff --git a/net.sourceforge.phpeclipse/templates.xml b/net.sourceforge.phpeclipse/templates.xml deleted file mode 100644 index d635aae..0000000 --- a/net.sourceforge.phpeclipse/templates.xml +++ /dev/null @@ -1,65 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<templates> -<template name="class" description="class template with constructor" context="php" enabled="true">class ${class_name} { - function ${class_name}() { - ${cursor} - } -}</template> -<template name="class" description="class with attribute" context="php" enabled="true">class ${class_name} { - var $$${attribute}; - function ${class_name}() { - ${cursor} - } - - function set_${attribute}( $$${attr} ) { - $$this->${attribute} = $$${attr}; - } - - function get_${attribute}() { - return $$this->${attribute}; - } -}</template> -<template name="for" description="iterate over array" context="php" enabled="true">for ($$${index} = 0; $$${index} &lt; sizeof($$${array}); $$${index}++) { - ${cursor} -}</template> -<template name="for" description="iterate over array w/ temporary variable" context="php" enabled="true">for ($$${index} = 0; $$${index} &lt; sizeof($$${array}); $$${index}++) { - $$${array_element} = $$${array}[$$${index}]; - ${cursor} -}</template> -<template name="function" description="function template" context="php" enabled="true">function ${function_name} () { - ${cursor} -}</template> -<template name="function" description="function template with return" context="php" enabled="true">function ${function_name} () { - return (${cursor}); -}</template> -<template name="while" description="while iteration" context="php" enabled="true">while (${condition}) { - ${cursor} -}</template> -<template name="switch" description="switch case statement" context="php" enabled="true">switch (${key}) { - case ${value}: - ${cursor} - break; - - default: - break; -}</template><template name="if" description="if statement" context="php" enabled="true">if (${condition}) { - ${cursor} -}</template><template name="ifelse" description="if else statement" context="php" enabled="true">if (${condition}) { - ${cursor} -} else { - -}</template><template name="elseif" description="else if block" context="php" enabled="true">elseif (${condition}) { - ${cursor} -}</template><template name="else" description="else block" context="php" enabled="true">else { - ${cursor} -}</template> -<template name="filecomment" description="file comment used by the class and interface wizards" context="php" enabled="true">/** - * Created on ${date} by ${user} - * - */</template><template name="functioncomment" description="function comment" context="php" enabled="true">/** - * @author ${user} - * - */</template> - <template name="echo" description="echo a string" context="php" enabled="true">echo "${string}"; - ${cursor}</template> - </templates> \ No newline at end of file -- 1.7.1