From: khartlage Date: Sun, 12 Jan 2003 18:04:51 +0000 (+0000) Subject: JTidy integration X-Git-Url: http://git.phpeclipse.com JTidy integration --- diff --git a/net.sourceforge.phpeclipse/plugin.xml b/net.sourceforge.phpeclipse/plugin.xml index 0967f6b..7783dea 100644 --- a/net.sourceforge.phpeclipse/plugin.xml +++ b/net.sourceforge.phpeclipse/plugin.xml @@ -315,6 +315,39 @@ point="org.eclipse.ui.popupMenus"> + + + + + + + + + + + + selected object in the view + Object obj = iterator.next(); + + // is it a resource + if (obj instanceof IResource) { + IResource resource = (IResource) obj; + + // check if it's a file resource + switch (resource.getType()) { + + case IResource.FILE : + // single file: + IFile file = (IFile) resource; + InputStream in; + try { + in = file.getContents(); + tidy.parse(file, in, null); + } catch (CoreException e) { + } + } + } + } + } + + /** + * @see IActionDelegate#selectionChanged(IAction, ISelection) + */ + public void selectionChanged(IAction action, ISelection selection) { + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttVal.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttVal.java new file mode 100644 index 0000000..7de977a --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttVal.java @@ -0,0 +1,182 @@ +/* + * @(#)AttVal.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Attribute/Value linked list node + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class AttVal extends Object implements Cloneable { + + public AttVal next; + public Attribute dict; + public Node asp; + public Node php; + public int delim; + public String attribute; + public String value; + + public AttVal() + { + this.next = null; + this.dict = null; + this.asp = null; + this.php = null; + this.delim = 0; + this.attribute = null; + this.value = null; + } + + public AttVal(AttVal next, Attribute dict, int delim, + String attribute, String value) + { + this.next = next; + this.dict = dict; + this.asp = null; + this.php = null; + this.delim = delim; + this.attribute = attribute; + this.value = value; + } + + public AttVal(AttVal next, Attribute dict, Node asp, Node php, + int delim, String attribute, String value) + { + this.next = next; + this.dict = dict; + this.asp = asp; + this.php = php; + this.delim = delim; + this.attribute = attribute; + this.value = value; + } + + protected Object clone() + { + AttVal av = new AttVal(); + if (next != null) { + av.next = (AttVal)next.clone(); + } + if (attribute != null) + av.attribute = attribute; + if (value != null) + av.value = value; + av.delim = delim; + if (asp != null) { + av.asp = (Node)asp.clone(); + } + if (php != null) { + av.php = (Node)php.clone(); + } + av.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(this); + return av; + } + + public boolean isBoolAttribute() + { + Attribute attribute = this.dict; + if ( attribute != null ) { + if (attribute.attrchk == AttrCheckImpl.getCheckBool() ) { + return true; + } + } + + return false; + } + + /* ignore unknown attributes for proprietary elements */ + public Attribute checkAttribute( Lexer lexer, Node node ) + { + TagTable tt = lexer.configuration.tt; + + if (this.asp == null && this.php == null) + this.checkUniqueAttribute(lexer, node); + + Attribute attribute = this.dict; + if ( attribute != null ) { + /* title is vers 2.0 for A and LINK otherwise vers 4.0 */ + if (attribute == AttributeTable.attrTitle && + (node.tag == tt.tagA || node.tag == tt.tagLink)) + lexer.versions &= Dict.VERS_ALL; + else if ((attribute.versions & Dict.VERS_XML) != 0) + { + if (!(lexer.configuration.XmlTags || lexer.configuration.XmlOut)) + Report.attrError(lexer, node, this.attribute, Report.XML_ATTRIBUTE_VALUE); + } + else + lexer.versions &= attribute.versions; + + if (attribute.attrchk != null) + attribute.attrchk.check(lexer, node, this); + } + else if (!lexer.configuration.XmlTags && !(node.tag == null) && this.asp == null && + !(node.tag != null && ((node.tag.versions & Dict.VERS_PROPRIETARY) != 0))) + Report.attrError(lexer, node, this.attribute, Report.UNKNOWN_ATTRIBUTE); + + return attribute; + } + + /* + the same attribute name can't be used + more than once in each element + */ + public void checkUniqueAttribute(Lexer lexer, Node node) + { + AttVal attr; + int count = 0; + + for (attr = this.next; attr != null; attr = attr.next) + { + if (this.attribute != null && + attr.attribute != null && + attr.asp == null && + attr.php == null && + Lexer.wstrcasecmp(this.attribute, attr.attribute) == 0) + ++count; + } + + if (count > 0) + Report.attrError(lexer, node, this.attribute, Report.REPEATED_ATTRIBUTE); + } + + /* --------------------- DOM ---------------------------- */ + + protected org.w3c.dom.Attr adapter = null; + + protected org.w3c.dom.Attr getAdapter() + { + if (adapter == null) + { + adapter = new DOMAttrImpl(this); + } + return adapter; + } + /* --------------------- END DOM ------------------------ */ + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheck.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheck.java new file mode 100644 index 0000000..967ea32 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheck.java @@ -0,0 +1,38 @@ +/* + * @(#)AttrCheck.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Check attribute values + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public interface AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval); + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheckImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheckImpl.java new file mode 100644 index 0000000..4c9bc92 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttrCheckImpl.java @@ -0,0 +1,189 @@ +/* + * @(#)AttrCheckImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Check attribute values implementations + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class AttrCheckImpl { + + public static class CheckUrl implements AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval) + { + if (attval.value == null) + Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE); + else if (lexer.configuration.FixBackslash) + { + attval.value = attval.value.replace('\\','/'); + } + } + + }; + + public static class CheckScript implements AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval) + { + } + + }; + + public static class CheckAlign implements AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval) + { + String value; + + /* IMG, OBJECT, APPLET and EMBED use align for vertical position */ + if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0)) + { + getCheckValign().check(lexer, node, attval); + return; + } + + value = attval.value; + + if (value == null) + Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE); + else if (! (Lexer.wstrcasecmp(value, "left") == 0 || + Lexer.wstrcasecmp(value, "center") == 0 || + Lexer.wstrcasecmp(value, "right") == 0 || + Lexer.wstrcasecmp(value, "justify") == 0)) + Report.attrError(lexer, node, attval.value, Report.BAD_ATTRIBUTE_VALUE); + } + + }; + + public static class CheckValign implements AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval) + { + String value; + + value = attval.value; + + if (value == null) + Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE); + else if (Lexer.wstrcasecmp(value, "top") == 0 || + Lexer.wstrcasecmp(value, "middle") == 0 || + Lexer.wstrcasecmp(value, "bottom") == 0 || + Lexer.wstrcasecmp(value, "baseline") == 0) + { + /* all is fine */ + } + else if (Lexer.wstrcasecmp(value, "left") == 0 || + Lexer.wstrcasecmp(value, "right") == 0) + { + if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))) + Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE); + } + else if (Lexer.wstrcasecmp(value, "texttop") == 0 || + Lexer.wstrcasecmp(value, "absmiddle") == 0 || + Lexer.wstrcasecmp(value, "absbottom") == 0 || + Lexer.wstrcasecmp(value, "textbottom") == 0) + { + lexer.versions &= Dict.VERS_PROPRIETARY; + Report.attrError(lexer, node, value, Report.PROPRIETARY_ATTR_VALUE); + } + else + Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE); + } + + }; + + public static class CheckBool implements AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval) + { + } + + }; + + public static class CheckId implements AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval) + { + } + + }; + + public static class CheckName implements AttrCheck { + + public void check( Lexer lexer, Node node, AttVal attval) + { + } + + }; + + public static AttrCheck getCheckUrl() + { + return _checkUrl; + } + + public static AttrCheck getCheckScript() + { + return _checkScript; + } + + public static AttrCheck getCheckAlign() + { + return _checkAlign; + } + + public static AttrCheck getCheckValign() + { + return _checkValign; + } + + public static AttrCheck getCheckBool() + { + return _checkBool; + } + + public static AttrCheck getCheckId() + { + return _checkId; + } + + public static AttrCheck getCheckName() + { + return _checkName; + } + + + private static AttrCheck _checkUrl = new CheckUrl(); + private static AttrCheck _checkScript = new CheckScript(); + private static AttrCheck _checkAlign = new CheckAlign(); + private static AttrCheck _checkValign = new CheckValign(); + private static AttrCheck _checkBool = new CheckBool(); + private static AttrCheck _checkId = new CheckId(); + private static AttrCheck _checkName = new CheckName(); + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Attribute.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Attribute.java new file mode 100644 index 0000000..e42cf49 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Attribute.java @@ -0,0 +1,65 @@ +/* + * @(#)Attribute.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * HTML attribute + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class Attribute { + + public Attribute( String name, + boolean nowrap, + short versions, + AttrCheck attrchk ) + { + this.name = name; + this.nowrap = nowrap; + this.literal = false; + this.versions = versions; + this.attrchk = attrchk; + } + + public Attribute( String name, + short versions, + AttrCheck attrchk ) + { + this.name = name; + this.nowrap = false; + this.literal = false; + this.versions = versions; + this.attrchk = attrchk; + } + + public String name; + public boolean nowrap; + public boolean literal; + public short versions; + public AttrCheck attrchk; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttributeTable.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttributeTable.java new file mode 100644 index 0000000..263f0f5 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/AttributeTable.java @@ -0,0 +1,317 @@ +/* + * @(#)AttributeTable.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import java.util.Hashtable; +import java.util.Enumeration; + +/** + * + * HTML attribute hash table + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class AttributeTable { + + public AttributeTable() + { + } + + public Attribute lookup( String name ) + { + return (Attribute)attributeHashtable.get( name ); + } + + public Attribute install( Attribute attr ) + { + return (Attribute)attributeHashtable.put( attr.name, attr ); + } + + /* public method for finding attribute definition by name */ + public Attribute findAttribute( AttVal attval ) + { + Attribute np; + + if ( attval.attribute != null ) { + np = lookup( attval.attribute ); + return np; + } + + return null; + } + + public boolean isUrl( String attrname ) + { + Attribute np; + + np = lookup( attrname ); + return ( np != null && np.attrchk == AttrCheckImpl.getCheckUrl() ); + } + + public boolean isScript( String attrname ) + { + Attribute np; + + np = lookup( attrname ); + return ( np != null && np.attrchk == AttrCheckImpl.getCheckScript() ); + } + + public boolean isLiteralAttribute( String attrname ) + { + Attribute np; + + np = lookup( attrname ); + return ( np != null && np.literal ); + } + + /* + Henry Zrepa reports that some folk are + using embed with script attributes where + newlines are signficant. These need to be + declared and handled specially! + */ + public void declareLiteralAttrib(String name) + { + Attribute attrib = lookup(name); + + if (attrib == null) + attrib = install(new Attribute(name, Dict.VERS_PROPRIETARY, null)); + + attrib.literal = true; + } + + private Hashtable attributeHashtable = new Hashtable(); + + private static AttributeTable defaultAttributeTable = null; + + private static Attribute[] attrs = { + + new Attribute( "abbr", Dict.VERS_HTML40, null ), + new Attribute( "accept-charset", Dict.VERS_HTML40, null ), + new Attribute( "accept", Dict.VERS_ALL, null ), + new Attribute( "accesskey", Dict.VERS_HTML40, null ), + new Attribute( "action", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ), + new Attribute( "add_date", Dict.VERS_NETSCAPE, null ), /* A */ + new Attribute( "align", Dict.VERS_ALL, AttrCheckImpl.getCheckAlign() ), /* set varies with element */ + new Attribute( "alink", Dict.VERS_LOOSE, null ), + new Attribute( "alt", Dict.VERS_ALL, null ), + new Attribute( "archive", Dict.VERS_HTML40, null ), /* space or comma separated list */ + new Attribute( "axis", Dict.VERS_HTML40, null ), + new Attribute( "background", Dict.VERS_LOOSE, AttrCheckImpl.getCheckUrl() ), + new Attribute( "bgcolor", Dict.VERS_LOOSE, null ), + new Attribute( "bgproperties", Dict.VERS_PROPRIETARY, null ), /* BODY "fixed" fixes background */ + new Attribute( "border", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* like LENGTH + "border" */ + new Attribute( "bordercolor", Dict.VERS_MICROSOFT, null ), /* used on TABLE */ + new Attribute( "bottommargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ + new Attribute( "cellpadding", Dict.VERS_FROM32, null ), /* % or pixel values */ + new Attribute( "cellspacing", Dict.VERS_FROM32, null ), + new Attribute( "char", Dict.VERS_HTML40, null ), + new Attribute( "charoff", Dict.VERS_HTML40, null ), + new Attribute( "charset", Dict.VERS_HTML40, null ), + new Attribute( "checked", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* i.e. "checked" or absent */ + new Attribute( "cite", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), + new Attribute( "class", Dict.VERS_HTML40, null ), + new Attribute( "classid", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), + new Attribute( "clear", Dict.VERS_LOOSE, null ), /* BR: left, right, all */ + new Attribute( "code", Dict.VERS_LOOSE, null ), /* APPLET */ + new Attribute( "codebase", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */ + new Attribute( "codetype", Dict.VERS_HTML40, null ), /* OBJECT */ + new Attribute( "color", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */ + new Attribute( "cols", Dict.VERS_IFRAMES, null ), /* TABLE & FRAMESET */ + new Attribute( "colspan", Dict.VERS_FROM32, null ), + new Attribute( "compact", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* lists */ + new Attribute( "content", Dict.VERS_ALL, null ), /* META */ + new Attribute( "coords", Dict.VERS_FROM32, null ), /* AREA, A */ + new Attribute( "data", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */ + new Attribute( "datafld", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */ + new Attribute( "dataformatas", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */ + new Attribute( "datapagesize", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */ + new Attribute( "datasrc", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckUrl() ), /* used on TABLE */ + new Attribute( "datetime", Dict.VERS_HTML40, null ), /* INS, DEL */ + new Attribute( "declare", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* OBJECT */ + new Attribute( "defer", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* SCRIPT */ + new Attribute( "dir", Dict.VERS_HTML40, null ), /* ltr or rtl */ + new Attribute( "disabled", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */ + new Attribute( "enctype", Dict.VERS_ALL, null ), /* FORM */ + new Attribute( "face", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */ + new Attribute( "for", Dict.VERS_HTML40, null ), /* LABEL */ + new Attribute( "frame", Dict.VERS_HTML40, null ), /* TABLE */ + new Attribute( "frameborder", Dict.VERS_FRAMES, null ), /* 0 or 1 */ + new Attribute( "framespacing", Dict.VERS_PROPRIETARY, null ), /* pixel value */ + new Attribute( "gridx", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive*/ + new Attribute( "gridy", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive */ + new Attribute( "headers", Dict.VERS_HTML40, null ), /* table cells */ + new Attribute( "height", Dict.VERS_ALL, null ), /* pixels only for TH/TD */ + new Attribute( "href", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ), /* A, AREA, LINK and BASE */ + new Attribute( "hreflang", Dict.VERS_HTML40, null ), /* A, LINK */ + new Attribute( "hspace", Dict.VERS_ALL, null ), /* APPLET, IMG, OBJECT */ + new Attribute( "http-equiv", Dict.VERS_ALL, null ), /* META */ + new Attribute( "id", Dict.VERS_HTML40, AttrCheckImpl.getCheckId() ), + new Attribute( "ismap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* IMG */ + new Attribute( "label", Dict.VERS_HTML40, null ), /* OPT, OPTGROUP */ + new Attribute( "lang", Dict.VERS_HTML40, null ), + new Attribute( "language", Dict.VERS_LOOSE, null ), /* SCRIPT */ + new Attribute( "last_modified", Dict.VERS_NETSCAPE, null ), /* A */ + new Attribute( "last_visit", Dict.VERS_NETSCAPE, null ), /* A */ + new Attribute( "leftmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ + new Attribute( "link", Dict.VERS_LOOSE, null ), /* BODY */ + new Attribute( "longdesc", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* IMG */ + new Attribute( "lowsrc", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckUrl() ), /* IMG */ + new Attribute( "marginheight", Dict.VERS_IFRAMES, null ), /* FRAME, IFRAME, BODY */ + new Attribute( "marginwidth", Dict.VERS_IFRAMES, null ), /* ditto */ + new Attribute( "maxlength", Dict.VERS_ALL, null ), /* INPUT */ + new Attribute( "media", Dict.VERS_HTML40, null ), /* STYLE, LINK */ + new Attribute( "method", Dict.VERS_ALL, null ), /* FORM: get or post */ + new Attribute( "multiple", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* SELECT */ + new Attribute( "name", Dict.VERS_ALL, AttrCheckImpl.getCheckName() ), + new Attribute( "nohref", Dict.VERS_FROM32, AttrCheckImpl.getCheckBool() ), /* AREA */ + new Attribute( "noresize", Dict.VERS_FRAMES, AttrCheckImpl.getCheckBool() ), /* FRAME */ + new Attribute( "noshade", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* HR */ + new Attribute( "nowrap", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* table cells */ + new Attribute( "object", Dict.VERS_HTML40_LOOSE, null ), /* APPLET */ + new Attribute( "onblur", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onchange", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "ondblclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onkeydown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onkeypress", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onkeyup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onmousedown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onmousemove", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onmouseout", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onmouseover", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onmouseup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onsubmit", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onreset", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onselect", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onunload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */ + new Attribute( "onafterupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ + new Attribute( "onbeforeupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ + new Attribute( "onerrorupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ + new Attribute( "onrowenter", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ + new Attribute( "onrowexit", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ + new Attribute( "onbeforeunload", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */ + new Attribute( "ondatasetchanged", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */ + new Attribute( "ondataavailable", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */ + new Attribute( "ondatasetcomplete",Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */ + new Attribute( "profile", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* HEAD */ + new Attribute( "prompt", Dict.VERS_LOOSE, null ), /* ISINDEX */ + new Attribute( "readonly", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */ + new Attribute( "rel", Dict.VERS_ALL, null ), /* A, LINK */ + new Attribute( "rev", Dict.VERS_ALL, null ), /* A, LINK */ + new Attribute( "rightmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ + new Attribute( "rows", Dict.VERS_ALL, null ), /* TEXTAREA */ + new Attribute( "rowspan", Dict.VERS_ALL, null ), /* table cells */ + new Attribute( "rules", Dict.VERS_HTML40, null ), /* TABLE */ + new Attribute( "scheme", Dict.VERS_HTML40, null ), /* META */ + new Attribute( "scope", Dict.VERS_HTML40, null ), /* table cells */ + new Attribute( "scrolling", Dict.VERS_IFRAMES, null ), /* yes, no or auto */ + new Attribute( "selected", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* OPTION */ + new Attribute( "shape", Dict.VERS_FROM32, null ), /* AREA, A */ + new Attribute( "showgrid", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive */ + new Attribute( "showgridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/ + new Attribute( "showgridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/ + new Attribute( "size", Dict.VERS_LOOSE, null ), /* HR, FONT, BASEFONT, SELECT */ + new Attribute( "span", Dict.VERS_HTML40, null ), /* COL, COLGROUP */ + new Attribute( "src", (short)(Dict.VERS_ALL | Dict.VERS_FRAMES), AttrCheckImpl.getCheckUrl() ), /* IMG, FRAME, IFRAME */ + new Attribute( "standby", Dict.VERS_HTML40, null ), /* OBJECT */ + new Attribute( "start", Dict.VERS_ALL, null ), /* OL */ + new Attribute( "style", Dict.VERS_HTML40, null ), + new Attribute( "summary", Dict.VERS_HTML40, null ), /* TABLE */ + new Attribute( "tabindex", Dict.VERS_HTML40, null ), /* fields, OBJECT and A */ + new Attribute( "target", Dict.VERS_HTML40, null ), /* names a frame/window */ + new Attribute( "text", Dict.VERS_LOOSE, null ), /* BODY */ + new Attribute( "title", Dict.VERS_HTML40, null ), /* text tool tip */ + new Attribute( "topmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */ + new Attribute( "type", Dict.VERS_FROM32, null ), /* also used by SPACER */ + new Attribute( "usemap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* things with images */ + new Attribute( "valign", Dict.VERS_FROM32, AttrCheckImpl.getCheckValign() ), + new Attribute( "value", Dict.VERS_ALL, null ), /* OPTION, PARAM */ + new Attribute( "valuetype", Dict.VERS_HTML40, null ), /* PARAM: data, ref, object */ + new Attribute( "version", Dict.VERS_ALL, null ), /* HTML */ + new Attribute( "vlink", Dict.VERS_LOOSE, null ), /* BODY */ + new Attribute( "vspace", Dict.VERS_LOOSE, null ), /* IMG, OBJECT, APPLET */ + new Attribute( "width", Dict.VERS_ALL, null ), /* pixels only for TD/TH */ + new Attribute( "wrap", Dict.VERS_NETSCAPE, null ), /* textarea */ + new Attribute( "xml:lang", Dict.VERS_XML, null ), /* XML language */ + new Attribute( "xmlns", Dict.VERS_ALL, null ), /* name space */ + + }; + + public static Attribute attrHref = null; + public static Attribute attrSrc = null; + public static Attribute attrId = null; + public static Attribute attrName = null; + public static Attribute attrSummary = null; + public static Attribute attrAlt = null; + public static Attribute attrLongdesc = null; + public static Attribute attrUsemap = null; + public static Attribute attrIsmap = null; + public static Attribute attrLanguage = null; + public static Attribute attrType = null; + public static Attribute attrTitle = null; + public static Attribute attrXmlns = null; + public static Attribute attrValue = null; + public static Attribute attrContent = null; + public static Attribute attrDatafld = null; + public static Attribute attrWidth = null; + public static Attribute attrHeight = null; + + public static AttributeTable getDefaultAttributeTable() + { + if ( defaultAttributeTable == null ) { + defaultAttributeTable = new AttributeTable(); + for ( int i = 0; i < attrs.length; i++ ) { + defaultAttributeTable.install( attrs[i] ); + } + attrHref = defaultAttributeTable.lookup("href"); + attrSrc = defaultAttributeTable.lookup("src"); + attrId = defaultAttributeTable.lookup("id"); + attrName = defaultAttributeTable.lookup("name"); + attrSummary = defaultAttributeTable.lookup("summary"); + attrAlt = defaultAttributeTable.lookup("alt"); + attrLongdesc = defaultAttributeTable.lookup("longdesc"); + attrUsemap = defaultAttributeTable.lookup("usemap"); + attrIsmap = defaultAttributeTable.lookup("ismap"); + attrLanguage = defaultAttributeTable.lookup("language"); + attrType = defaultAttributeTable.lookup("type"); + attrTitle = defaultAttributeTable.lookup("title"); + attrXmlns = defaultAttributeTable.lookup("xmlns"); + attrValue = defaultAttributeTable.lookup("value"); + attrContent = defaultAttributeTable.lookup("content"); + attrDatafld = defaultAttributeTable.lookup("datafld");; + attrWidth = defaultAttributeTable.lookup("width");; + attrHeight = defaultAttributeTable.lookup("height");; + + attrAlt.nowrap = true; + attrValue.nowrap = true; + attrContent.nowrap = true; + } + return defaultAttributeTable; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribs.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribs.java new file mode 100644 index 0000000..62af24f --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribs.java @@ -0,0 +1,39 @@ +/* + * @(#)CheckAttribs.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Check HTML attributes + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public interface CheckAttribs { + + public void check( Lexer lexer, Node node ); + +} + diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribsImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribsImpl.java new file mode 100644 index 0000000..11ccf04 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/CheckAttribsImpl.java @@ -0,0 +1,403 @@ +/* + * @(#)CheckAttribsImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Check HTML attributes implementation + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class CheckAttribsImpl { + + public static class CheckHTML implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + AttVal attval; + Attribute attribute; + + node.checkUniqueAttributes(lexer); + + for (attval = node.attributes; attval != null; attval = attval.next) + { + attribute = attval.checkAttribute(lexer, node ); + + if (attribute == AttributeTable.attrXmlns) + lexer.isvoyager = true; + } + } + + }; + + public static class CheckSCRIPT implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + Attribute attribute; + AttVal lang, type; + + node.checkUniqueAttributes(lexer); + + lang = node.getAttrByName("language"); + type = node.getAttrByName("type"); + + if (type == null) + { + Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); + + /* check for javascript */ + + if (lang != null) + { + String str = lang.value; + if (str.length() > 10) + str = str.substring(0, 10); + if ( (Lexer.wstrcasecmp(str, "javascript") == 0) || + (Lexer.wstrcasecmp(str, "jscript") == 0) ) + { + node.addAttribute("type", "text/javascript"); + } + } + else + node.addAttribute("type", "text/javascript"); + } + } + + }; + + public static class CheckTABLE implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + AttVal attval; + Attribute attribute; + boolean hasSummary = false; + + node.checkUniqueAttributes(lexer); + + for (attval = node.attributes; attval != null; attval = attval.next) + { + attribute = attval.checkAttribute(lexer, node); + + if (attribute == AttributeTable.attrSummary) + hasSummary = true; + } + + /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */ + if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32) + { + lexer.badAccess |= Report.MISSING_SUMMARY; + Report.attrError(lexer, node, "summary", Report.MISSING_ATTRIBUTE); + } + + /* convert to
*/ + if (lexer.configuration.XmlOut) + { + attval = node.getAttrByName("border"); + if (attval != null) + { + if (attval.value == null) + attval.value = "1"; + } + } + } + + }; + + public static class CheckCaption implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + AttVal attval; + String value = null; + + node.checkUniqueAttributes(lexer); + + for (attval = node.attributes; attval != null; attval = attval.next) + { + if ( Lexer.wstrcasecmp(attval.attribute, "align") == 0 ) + { + value = attval.value; + break; + } + } + + if (value != null) + { + if (Lexer.wstrcasecmp(value, "left") == 0 || Lexer.wstrcasecmp(value, "right") == 0) + lexer.versions &= (short)(Dict.VERS_HTML40_LOOSE|Dict.VERS_FRAMES); + else if (Lexer.wstrcasecmp(value, "top") == 0 || Lexer.wstrcasecmp(value, "bottom") == 0) + lexer.versions &= Dict.VERS_FROM32; + else + Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE); + } + } + + }; + + public static class CheckHR implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + if (node.getAttrByName("src") != null) + Report.attrError(lexer, node, "src", Report.PROPRIETARY_ATTR_VALUE); + } + }; + + public static class CheckIMG implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + AttVal attval; + Attribute attribute; + boolean hasAlt = false; + boolean hasSrc = false; + boolean hasUseMap = false; + boolean hasIsMap = false; + boolean hasDataFld = false; + + node.checkUniqueAttributes(lexer); + + for (attval = node.attributes; attval != null; attval = attval.next) + { + attribute = attval.checkAttribute( lexer, node ); + + if (attribute == AttributeTable.attrAlt) + hasAlt = true; + else if (attribute == AttributeTable.attrSrc) + hasSrc = true; + else if (attribute == AttributeTable.attrUsemap) + hasUseMap = true; + else if (attribute == AttributeTable.attrIsmap) + hasIsMap = true; + else if (attribute == AttributeTable.attrDatafld) + hasDataFld = true; + else if (attribute == AttributeTable.attrWidth || + attribute == AttributeTable.attrHeight) + lexer.versions &= ~Dict.VERS_HTML20; + } + + if (!hasAlt) + { + lexer.badAccess |= Report.MISSING_IMAGE_ALT; + Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE); + if (lexer.configuration.altText != null) + node.addAttribute("alt", lexer.configuration.altText); + } + + if (!hasSrc && !hasDataFld) + Report.attrError(lexer, node, "src", Report.MISSING_ATTRIBUTE); + + if (hasIsMap && !hasUseMap) + Report.attrError(lexer, node, "ismap", Report.MISSING_IMAGEMAP); + } + + }; + + public static class CheckAREA implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + AttVal attval; + Attribute attribute; + boolean hasAlt = false; + boolean hasHref = false; + + node.checkUniqueAttributes(lexer); + + for (attval = node.attributes; attval != null; attval = attval.next) + { + attribute = attval.checkAttribute( lexer, node ); + + if (attribute == AttributeTable.attrAlt) + hasAlt = true; + else if (attribute == AttributeTable.attrHref) + hasHref = true; + } + + if (!hasAlt) + { + lexer.badAccess |= Report.MISSING_LINK_ALT; + Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE); + } + if (!hasHref) + Report.attrError(lexer, node, "href", Report.MISSING_ATTRIBUTE); + } + + }; + + public static class CheckAnchor implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + node.checkUniqueAttributes(lexer); + + lexer.fixId(node); + } + }; + + public static class CheckMap implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + node.checkUniqueAttributes(lexer); + + lexer.fixId(node); + } + } + + public static class CheckSTYLE implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + AttVal type = node.getAttrByName("type"); + + node.checkUniqueAttributes(lexer); + + if (type == null) + { + Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); + + node.addAttribute("type", "text/css"); + } + } + } + + public static class CheckTableCell implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + node.checkUniqueAttributes(lexer); + + /* + HTML4 strict doesn't allow mixed content for + elements with %block; as their content model + */ + if (node.getAttrByName("width") != null || node.getAttrByName("height") != null) + lexer.versions &= ~Dict.VERS_HTML40_STRICT; + } + } + + /* add missing type attribute when appropriate */ + public static class CheckLINK implements CheckAttribs { + + public void check( Lexer lexer, Node node ) + { + AttVal rel = node.getAttrByName("rel"); + + node.checkUniqueAttributes(lexer); + + if (rel != null && rel.value != null && + rel.value.equals("stylesheet")) + { + AttVal type = node.getAttrByName("type"); + + if (type == null) + { + Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); + + node.addAttribute("type", "text/css"); + } + } + } + } + + public static CheckAttribs getCheckHTML() + { + return _checkHTML; + } + + public static CheckAttribs getCheckSCRIPT() + { + return _checkSCRIPT; + } + + public static CheckAttribs getCheckTABLE() + { + return _checkTABLE; + } + + public static CheckAttribs getCheckCaption() + { + return _checkCaption; + } + + public static CheckAttribs getCheckIMG() + { + return _checkIMG; + } + + public static CheckAttribs getCheckAREA() + { + return _checkAREA; + } + + public static CheckAttribs getCheckAnchor() + { + return _checkAnchor; + } + + public static CheckAttribs getCheckMap() + { + return _checkMap; + } + + public static CheckAttribs getCheckSTYLE() + { + return _checkStyle; + } + + public static CheckAttribs getCheckTableCell() + { + return _checkTableCell; + } + + public static CheckAttribs getCheckLINK() + { + return _checkLINK; + } + + public static CheckAttribs getCheckHR() + { + return _checkHR; + } + + + private static CheckAttribs _checkHTML = new CheckHTML(); + private static CheckAttribs _checkSCRIPT = new CheckSCRIPT(); + private static CheckAttribs _checkTABLE = new CheckTABLE(); + private static CheckAttribs _checkCaption = new CheckCaption(); + private static CheckAttribs _checkIMG = new CheckIMG(); + private static CheckAttribs _checkAREA = new CheckAREA(); + private static CheckAttribs _checkAnchor = new CheckAnchor(); + private static CheckAttribs _checkMap = new CheckMap(); + private static CheckAttribs _checkStyle = new CheckSTYLE(); + private static CheckAttribs _checkTableCell = new CheckTableCell(); + private static CheckAttribs _checkLINK = new CheckLINK(); + private static CheckAttribs _checkHR = new CheckHR(); + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Clean.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Clean.java new file mode 100644 index 0000000..458c84e --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Clean.java @@ -0,0 +1,1779 @@ +/* + * @(#)Clean.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Clean up misuse of presentation markup + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/* + Filters from other formats such as Microsoft Word + often make excessive use of presentation markup such + as font tags, B, I, and the align attribute. By applying + a set of production rules, it is straight forward to + transform this to use CSS. + + Some rules replace some of the children of an element by + style properties on the element, e.g. + +

...

->

...

+ + Such rules are applied to the element's content and then + to the element itself until none of the rules more apply. + Having applied all the rules to an element, it will have + a style attribute with one or more properties. + + Other rules strip the element they apply to, replacing + it by style properties on the contents, e.g. + +
  • ...

  • ->

    ... + + These rules are applied to an element before processing + its content and replace the current element by the first + element in the exposed content. + + After applying both sets of rules, you can replace the + style attribute by a class value and style rule in the + document head. To support this, an association of styles + and class names is built. + + A naive approach is to rely on string matching to test + when two property lists are the same. A better approach + would be to first sort the properties before matching. +*/ + +public class Clean { + + private int classNum = 1; + + private TagTable tt; + + public Clean(TagTable tt) + { + this.tt = tt; + } + + private StyleProp insertProperty(StyleProp props, String name, + String value) + { + StyleProp first, prev, prop; + int cmp; + + prev = null; + first = props; + + while (props != null) + { + cmp = props.name.compareTo(name); + + if (cmp == 0) + { + /* this property is already defined, ignore new value */ + return first; + } + + if (cmp > 0) // props.name > name + { + /* insert before this */ + + prop = new StyleProp(name, value, props); + + if (prev != null) + prev.next = prop; + else + first = prop; + + return first; + } + + prev = props; + props = props.next; + } + + prop = new StyleProp(name, value); + + if (prev != null) + prev.next = prop; + else + first = prop; + + return first; + } + + /* + Create sorted linked list of properties from style string + It temporarily places nulls in place of ':' and ';' to + delimit the strings for the property name and value. + Some systems don't allow you to null literal strings, + so to avoid this, a copy is made first. + */ + private StyleProp createProps(StyleProp prop, String style) + { + int name_end; + int value_end; + int value_start = 0; + int name_start = 0; + boolean more; + + name_start = 0; + while (name_start < style.length()) + { + while (name_start < style.length() && + style.charAt(name_start) == ' ') + ++name_start; + + name_end = name_start; + + while (name_end < style.length()) + { + if (style.charAt(name_end) == ':') + { + value_start = name_end + 1; + break; + } + + ++name_end; + } + + if (name_end >= style.length() || style.charAt(name_end) != ':') + break; + + while (value_start < style.length() && + style.charAt(value_start) == ' ') + ++value_start; + + value_end = value_start; + more = false; + + while (value_end < style.length()) + { + if (style.charAt(value_end) == ';') + { + more = true; + break; + } + + ++value_end; + } + + prop = insertProperty(prop, + style.substring(name_start, name_end), + style.substring(value_start, value_end)); + + if (more) + { + name_start = value_end + 1; + continue; + } + + break; + } + + return prop; + } + + private String createPropString(StyleProp props) + { + String style = ""; + int len; + StyleProp prop; + + /* compute length */ + + for (len = 0, prop = props; prop != null; prop = prop.next) + { + len += prop.name.length() + 2; + len += prop.value.length() + 2; + } + + for (prop = props; prop != null; prop = prop.next) + { + style = style.concat(prop.name); + style = style.concat(": "); + + style = style.concat(prop.value); + + if (prop.next == null) + break; + + style = style.concat("; "); + } + + return style; + } + + /* + create string with merged properties + */ + private String addProperty(String style, String property) + { + StyleProp prop; + + prop = createProps(null, style); + prop = createProps(prop, property); + style = createPropString(prop); + return style; + } + + private String gensymClass(String tag) + { + String str; + + str = "c" + classNum; + classNum++; + return str; + } + + private String findStyle(Lexer lexer, String tag, String properties) + { + Style style; + + for (style = lexer.styles; style != null; style=style.next) + { + if (style.tag.equals(tag) && + style.properties.equals(properties)) + return style.tagClass; + } + + style = new Style(tag, gensymClass(tag), properties, lexer.styles); + lexer.styles = style; + return style.tagClass; + } + + /* + Find style attribute in node, and replace it + by corresponding class attribute. Search for + class in style dictionary otherwise gensym + new class and add to dictionary. + + Assumes that node doesn't have a class attribute + */ + private void style2Rule(Lexer lexer, Node node) + { + AttVal styleattr, classattr; + String classname; + + styleattr = node.getAttrByName("style"); + + if (styleattr != null) + { + classname = findStyle(lexer, node.element, styleattr.value); + classattr = node.getAttrByName("class"); + + /* + if there already is a class attribute + then append class name after a space + */ + if (classattr != null) + { + classattr.value = classattr.value + " " + classname; + node.removeAttribute(styleattr); + } + else /* reuse style attribute for class attribute */ + { + styleattr.attribute = "class"; + styleattr.value = classname; + } + } + } + + private void addColorRule(Lexer lexer, String selector, String color) + { + if (color != null) + { + lexer.addStringLiteral(selector); + lexer.addStringLiteral(" { color: "); + lexer.addStringLiteral(color); + lexer.addStringLiteral(" }\n"); + } + } + + /* + move presentation attribs from body to style element + + background="foo" -> body { background-image: url(foo) } + bgcolor="foo" -> body { background-color: foo } + text="foo" -> body { color: foo } + link="foo" -> :link { color: foo } + vlink="foo" -> :visited { color: foo } + alink="foo" -> :active { color: foo } + */ + private void cleanBodyAttrs(Lexer lexer, Node body) + { + AttVal attr; + String bgurl = null; + String bgcolor = null; + String color = null; + + attr = body.getAttrByName("background"); + + if (attr != null) + { + bgurl = attr.value; + attr.value = null; + body.removeAttribute(attr); + } + + attr = body.getAttrByName("bgcolor"); + + if (attr != null) + { + bgcolor = attr.value; + attr.value = null; + body.removeAttribute(attr); + } + + attr = body.getAttrByName("text"); + + if (attr != null) + { + color = attr.value; + attr.value = null; + body.removeAttribute(attr); + } + + if (bgurl != null || bgcolor != null || color != null) + { + lexer.addStringLiteral(" body {\n"); + + if (bgurl != null) + { + lexer.addStringLiteral(" background-image: url("); + lexer.addStringLiteral(bgurl); + lexer.addStringLiteral(");\n"); + } + + if (bgcolor != null) + { + lexer.addStringLiteral(" background-color: "); + lexer.addStringLiteral(bgcolor); + lexer.addStringLiteral(";\n"); + } + + if (color != null) + { + lexer.addStringLiteral(" color: "); + lexer.addStringLiteral(color); + lexer.addStringLiteral(";\n"); + } + + lexer.addStringLiteral(" }\n"); + } + + attr = body.getAttrByName("link"); + + if (attr != null) + { + addColorRule(lexer, " :link", attr.value); + body.removeAttribute(attr); + } + + attr = body.getAttrByName("vlink"); + + if (attr != null) + { + addColorRule(lexer, " :visited", attr.value); + body.removeAttribute(attr); + } + + attr = body.getAttrByName("alink"); + + if (attr != null) + { + addColorRule(lexer, " :active", attr.value); + body.removeAttribute(attr); + } + } + + private boolean niceBody(Lexer lexer, Node doc) + { + Node body = doc.findBody(lexer.configuration.tt); + + if (body != null) + { + if ( + body.getAttrByName("background") != null || + body.getAttrByName("bgcolor") != null || + body.getAttrByName("text") != null || + body.getAttrByName("link") != null || + body.getAttrByName("vlink") != null || + body.getAttrByName("alink") != null + ) + { + lexer.badLayout |= Report.USING_BODY; + return false; + } + } + + return true; + } + + /* create style element using rules from dictionary */ + private void createStyleElement(Lexer lexer, Node doc) + { + Node node, head, body; + Style style; + AttVal av; + + if (lexer.styles == null && niceBody(lexer, doc)) + return; + + node = lexer.newNode(Node.StartTag, null, 0, 0, "style"); + node.implicit = true; + + /* insert type attribute */ + av = new AttVal(null, null, '"', "type", "text/css"); + av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); + node.attributes = av; + + body = doc.findBody(lexer.configuration.tt); + + lexer.txtstart = lexer.lexsize; + + if (body != null) + cleanBodyAttrs(lexer, body); + + for (style = lexer.styles; style != null; style = style.next) + { + lexer.addCharToLexer(' '); + lexer.addStringLiteral(style.tag); + lexer.addCharToLexer('.'); + lexer.addStringLiteral(style.tagClass); + lexer.addCharToLexer(' '); + lexer.addCharToLexer('{'); + lexer.addStringLiteral(style.properties); + lexer.addCharToLexer('}'); + lexer.addCharToLexer('\n'); + } + + lexer.txtend = lexer.lexsize; + + Node.insertNodeAtEnd(node, + lexer.newNode(Node.TextNode, + lexer.lexbuf, + lexer.txtstart, + lexer.txtend)); + + /* + now insert style element into document head + + doc is root node. search its children for html node + the head node should be first child of html node + */ + + head = doc.findHEAD(lexer.configuration.tt); + + if (head != null) + Node.insertNodeAtEnd(head, node); + } + + /* ensure bidirectional links are consistent */ + private void fixNodeLinks(Node node) + { + Node child; + + if (node.prev != null) + node.prev.next = node; + else + node.parent.content = node; + + if (node.next != null) + node.next.prev = node; + else + node.parent.last = node; + + for (child = node.content; child != null; child = child.next) + child.parent = node; + } + + /* + used to strip child of node when + the node has one and only one child + */ + private void stripOnlyChild(Node node) + { + Node child; + + child = node.content; + node.content = child.content; + node.last = child.last; + child.content = null; + + for (child = node.content; child != null; child = child.next) + child.parent = node; + } + + /* used to strip font start and end tags */ + private void discardContainer(Node element, MutableObject pnode) + { + Node node; + Node parent = element.parent; + + if (element.content != null) + { + element.last.next = element.next; + + if (element.next != null) + { + element.next.prev = element.last; + element.last.next = element.next; + } + else + parent.last = element.last; + + if (element.prev != null) + { + element.content.prev = element.prev; + element.prev.next = element.content; + } + else + parent.content = element.content; + + for (node = element.content; node != null; node = node.next) + node.parent = parent; + + pnode.setObject(element.content); + } + else + { + if (element.next != null) + element.next.prev = element.prev; + else + parent.last = element.prev; + + if (element.prev != null) + element.prev.next = element.next; + else + parent.content = element.next; + + pnode.setObject(element.next); + } + + element.next = null; + element.content = null; + } + + /* + Add style property to element, creating style + attribute as needed and adding ; delimiter + */ + private void addStyleProperty(Node node, String property) + { + AttVal av; + + for (av = node.attributes; av != null; av = av.next) + { + if (av.attribute.equals("style")) + break; + } + + /* if style attribute already exists then insert property */ + + if (av != null) + { + String s; + + s = addProperty(av.value, property); + av.value = s; + } + else /* else create new style attribute */ + { + av = new AttVal(node.attributes, null, '"', "style", property); + av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); + node.attributes = av; + } + } + + /* + Create new string that consists of the + combined style properties in s1 and s2 + + To merge property lists, we build a linked + list of property/values and insert properties + into the list in order, merging values for + the same property name. + */ + private String mergeProperties(String s1, String s2) + { + String s; + StyleProp prop; + + prop = createProps(null, s1); + prop = createProps(prop, s2); + s = createPropString(prop); + return s; + } + + private void mergeStyles(Node node, Node child) + { + AttVal av; + String s1, s2, style; + + for (s2 = null, av = child.attributes; av != null; av = av.next) + { + if (av.attribute.equals("style")) + { + s2 = av.value; + break; + } + } + + for (s1 = null, av = node.attributes; av != null; av = av.next) + { + if (av.attribute.equals("style")) + { + s1 = av.value; + break; + } + } + + if (s1 != null) + { + if (s2 != null) /* merge styles from both */ + { + style = mergeProperties(s1, s2); + av.value = style; + } + } + else if (s2 != null) /* copy style of child */ + { + av = new AttVal(node.attributes, null, '"', "style", s2); + av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); + node.attributes = av; + } + } + + private String fontSize2Name(String size) + { + /* + String[] sizes = + { + "50%", + "60%", + "80%", + null, + "120%", + "150%", + "200%" + }; + */ + + String[] sizes = + { + "60%", + "70%", + "80%", + null, + "120%", + "150%", + "200%" + }; + String buf; + + if (size.length() > 0 && + '0' <= size.charAt(0) && size.charAt(0) <= '6') + { + int n = size.charAt(0) - '0'; + return sizes[n]; + } + + if (size.length() > 0 && size.charAt(0) == '-') + { + if (size.length() > 1 && + '0' <= size.charAt(1) && size.charAt(1) <= '6') + { + int n = size.charAt(1) - '0'; + double x; + + for (x = 1.0; n > 0; --n) + x *= 0.8; + + x *= 100.0; + buf = "" + (int)x + "%"; + + return buf; + } + + return "smaller"; /*"70%"; */ + } + + if (size.length() > 1 && + '0' <= size.charAt(1) && size.charAt(1) <= '6') + { + int n = size.charAt(1) - '0'; + double x; + + for (x = 1.0; n > 0; --n) + x *= 1.2; + + x *= 100.0; + buf = "" + (int)x + "%"; + + return buf; + } + + return "larger"; /* "140%" */ + } + + private void addFontFace(Node node, String face) + { + addStyleProperty(node, "font-family: " + face); + } + + private void addFontSize(Node node, String size) + { + String value; + + if (size.equals("6") && node.tag == tt.tagP) + { + node.element = "h1"; + tt.findTag(node); + return; + } + + if (size.equals("5") && node.tag == tt.tagP) + { + node.element = "h2"; + tt.findTag(node); + return; + } + + if (size.equals("4") && node.tag == tt.tagP) + { + node.element = "h3"; + tt.findTag(node); + return; + } + + value = fontSize2Name(size); + + if (value != null) + { + addStyleProperty(node, "font-size: " + value); + } + } + + private void addFontColor(Node node, String color) + { + addStyleProperty(node, "color: " + color); + } + + private void addAlign(Node node, String align) + { + /* force alignment value to lower case */ + addStyleProperty(node, "text-align: " + align.toLowerCase()); + } + + /* + add style properties to node corresponding to + the font face, size and color attributes + */ + private void addFontStyles(Node node, AttVal av) + { + while (av != null) + { + if (av.attribute.equals("face")) + addFontFace(node, av.value); + else if (av.attribute.equals("size")) + addFontSize(node, av.value); + else if (av.attribute.equals("color")) + addFontColor(node, av.value); + + av = av.next; + } + } + + /* + Symptom:

    + Action:

    + */ + private void textAlign(Lexer lexer, Node node) + { + AttVal av, prev; + + prev = null; + + for (av = node.attributes; av != null; av = av.next) + { + if (av.attribute.equals("align")) + { + if (prev != null) + prev.next = av.next; + else + node.attributes = av.next; + + if (av.value != null) + { + addAlign(node, av.value); + } + + break; + } + + prev = av; + } + } + + /* + The clean up rules use the pnode argument to return the + next node when the orignal node has been deleted + */ + + /* + Symptom:

  • where
  • is only child + Action: coerce
  • to
    with indent. + */ + + private boolean dir2Div(Lexer lexer, Node node, MutableObject pnode) + { + Node child; + + if (node.tag == tt.tagDir || + node.tag == tt.tagUl || + node.tag == tt.tagOl) + { + child = node.content; + + if (child == null) + return false; + + /* check child has no peers */ + + if (child.next != null) + return false; + + if (child.tag != tt.tagLi) + return false; + + if (!child.implicit) + return false; + + /* coerce dir to div */ + + node.tag = tt.tagDiv; + node.element = "div"; + addStyleProperty(node, "margin-left: 2em"); + stripOnlyChild(node); + return true; + +//#if 0 + //Node content; + //Node last; + //content = child.content; + //last = child.last; + //child.content = null; + + /* adjust parent and set margin on contents of
  • */ + + //for (child = content; child != null; child = child.next) + //{ + // child.parent = node.parent; + // addStyleProperty(child, "margin-left: 1em"); + //} + + /* hook first/last into sequence */ + + //if (content != null) + //{ + // content.prev = node.prev; + // last.next = node.next; + // fixNodeLinks(content); + // fixNodeLinks(last); + //} + + //node.next = null; + + /* ensure that new node is cleaned */ + //pnode.setObject(cleanNode(lexer, content)); + //return true; +//#endif + } + + return false; + } + + /* + Symptom:
    + Action: replace
    by
    + */ + + private boolean center2Div(Lexer lexer, Node node, MutableObject pnode) + { + if (node.tag == tt.tagCenter) + { + if (lexer.configuration.DropFontTags) + { + if (node.content != null) + { + Node last = node.last; + Node parent = node.parent; + + discardContainer(node, pnode); + + node = lexer.inferredTag("br"); + + if (last.next != null) + last.next.prev = node; + + node.next = last.next; + last.next = node; + node.prev = last; + + if (parent.last == last) + parent.last = node; + + node.parent = parent; + } + else + { + Node prev = node.prev; + Node next = node.next; + Node parent = node.parent; + discardContainer(node, pnode); + + node = lexer.inferredTag("br"); + node.next = next; + node.prev = prev; + node.parent = parent; + + if (next != null) + next.prev = node; + else + parent.last = node; + + if (prev != null) + prev.next = node; + else + parent.content = node; + } + + return true; + } + node.tag = tt.tagDiv; + node.element = "div"; + addStyleProperty(node, "text-align: center"); + return true; + } + + return false; + } + + /* + Symptom
    ...
    + Action: merge the two divs + + This is useful after nested s used by Word + for indenting have been converted to
    s + */ + private boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode) + { + Node child; + + if (node.tag != tt.tagDiv) + return false; + + child = node.content; + + if (child == null) + return false; + + if (child.tag != tt.tagDiv) + return false; + + if (child.next != null) + return false; + + mergeStyles(node, child); + stripOnlyChild(node); + return true; + } + + /* + Symptom:
      • ...
    + Action: discard outer list + */ + + private boolean nestedList(Lexer lexer, Node node, MutableObject pnode) + { + Node child, list; + + if (node.tag == tt.tagUl || node.tag == tt.tagOl) + { + child = node.content; + + if (child == null) + return false; + + /* check child has no peers */ + + if (child.next != null) + return false; + + list = child.content; + + if (list == null) + return false; + + if (list.tag != node.tag) + return false; + + pnode.setObject(node.next); + + /* move inner list node into position of outer node */ + list.prev = node.prev; + list.next = node.next; + list.parent = node.parent; + fixNodeLinks(list); + + /* get rid of outer ul and its li */ + child.content = null; + node.content = null; + node.next = null; + + /* + If prev node was a list the chances are this node + should be appended to that list. Word has no way of + recognizing nested lists and just uses indents + */ + + if (list.prev != null) + { + node = list; + list = node.prev; + + if (list.tag == tt.tagUl || list.tag == tt.tagOl) + { + list.next = node.next; + + if (list.next != null) + list.next.prev = list; + + child = list.last; /*
  • */ + + node.parent = child; + node.next = null; + node.prev = child.last; + fixNodeLinks(node); + } + } + + cleanNode(lexer, node); + return true; + } + + return false; + } + + /* + Symptom: the only child of a block-level element is a + presentation element such as B, I or FONT + + Action: add style "font-weight: bold" to the block and + strip the element, leaving its children. + + example: + +

    + Draft Recommended Practice +

    + + becomes: + +

    + Draft Recommended Practice +

    + + This code also replaces the align attribute by a style attribute. + However, to avoid CSS problems with Navigator 4, this isn't done + for the elements: caption, tr and table + */ + private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode) + { + Node child; + + if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0) + { + if (node.tag != tt.tagTable + && node.tag != tt.tagTr + && node.tag != tt.tagLi) + { + /* check for align attribute */ + if (node.tag != tt.tagCaption) + textAlign(lexer, node); + + child = node.content; + + if (child == null) + return false; + + /* check child has no peers */ + + if (child.next != null) + return false; + + if (child.tag == tt.tagB) + { + mergeStyles(node, child); + addStyleProperty(node, "font-weight: bold"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagI) + { + mergeStyles(node, child); + addStyleProperty(node, "font-style: italic"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagFont) + { + mergeStyles(node, child); + addFontStyles(node, child.attributes); + stripOnlyChild(node); + return true; + } + } + } + + return false; + } + + /* the only child of table cell or an inline element such as em */ + private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode) + { + Node child; + + if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE|Dict.CM_ROW)) != 0) + { + child = node.content; + + if (child == null) + return false; + + /* check child has no peers */ + + if (child.next != null) + return false; + + if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis) + { + mergeStyles(node, child); + addStyleProperty(node, "font-weight: bold"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis) + { + mergeStyles(node, child); + addStyleProperty(node, "font-style: italic"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagFont) + { + mergeStyles(node, child); + addFontStyles(node, child.attributes); + stripOnlyChild(node); + return true; + } + } + + return false; + } + + /* + Replace font elements by span elements, deleting + the font element's attributes and replacing them + by a single style attribute. + */ + private boolean font2Span(Lexer lexer, Node node, MutableObject pnode) + { + AttVal av, style, next; + + if (node.tag == tt.tagFont) + { + if (lexer.configuration.DropFontTags) + { + discardContainer(node, pnode); + return false; + } + + /* if FONT is only child of parent element then leave alone */ + if (node.parent.content == node + && node.next == null) + return false; + + addFontStyles(node, node.attributes); + + /* extract style attribute and free the rest */ + av = node.attributes; + style = null; + + while (av != null) + { + next = av.next; + + if (av.attribute.equals("style")) + { + av.next = null; + style = av; + } + + av = next; + } + + node.attributes = style; + + node.tag = tt.tagSpan; + node.element = "span"; + + return true; + } + + return false; + } + + /* + Applies all matching rules to a node. + */ + private Node cleanNode(Lexer lexer, Node node) + { + Node next = null; + MutableObject o = new MutableObject(); + boolean b = false; + + for (next = node; node.isElement(); node = next) + { + o.setObject(next); + + b = dir2Div(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = nestedList(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = center2Div(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = mergeDivs(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = blockStyle(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = inlineStyle(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = font2Span(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + break; + } + + return next; + } + + private Node createStyleProperties(Lexer lexer, Node node) + { + Node child; + + if (node.content != null) + { + for (child = node.content; child != null; child = child.next) + { + child = createStyleProperties(lexer, child); + } + } + + return cleanNode(lexer, node); + } + + private void defineStyleRules(Lexer lexer, Node node) + { + Node child; + + if (node.content != null) + { + for (child = node.content; + child != null; child = child.next) + { + defineStyleRules(lexer, child); + } + } + + style2Rule(lexer, node); + } + + public void cleanTree(Lexer lexer, Node doc) + { + doc = createStyleProperties(lexer, doc); + + if (!lexer.configuration.MakeClean) + { + defineStyleRules(lexer, doc); + createStyleElement(lexer, doc); + } + } + + /* simplifies ... ... etc. */ + public void nestedEmphasis(Node node) + { + MutableObject o = new MutableObject(); + Node next; + + while (node != null) + { + next = node.next; + + if ((node.tag == tt.tagB || node.tag == tt.tagI) + && node.parent != null && node.parent.tag == node.tag) + { + /* strip redundant inner element */ + o.setObject(next); + discardContainer(node, o); + next = (Node)o.getObject(); + node = next; + continue; + } + + if (node.content != null) + nestedEmphasis(node.content); + + node = next; + } + } + + /* replace i by em and b by strong */ + public void emFromI(Node node) + { + while (node != null) + { + if (node.tag == tt.tagI) + { + node.element = tt.tagEm.name; + node.tag = tt.tagEm; + } + else if (node.tag == tt.tagB) + { + node.element = tt.tagStrong.name; + node.tag = tt.tagStrong; + } + + if (node.content != null) + emFromI(node.content); + + node = node.next; + } + } + + /* + Some people use dir or ul without an li + to indent the content. The pattern to + look for is a list with a single implicit + li. This is recursively replaced by an + implicit blockquote. + */ + public void list2BQ(Node node) + { + while (node != null) + { + if (node.content != null) + list2BQ(node.content); + + if (node.tag != null && node.tag.parser == ParserImpl.getParseList() && + node.hasOneChild() && node.content.implicit) + { + stripOnlyChild(node); + node.element = tt.tagBlockquote.name; + node.tag = tt.tagBlockquote; + node.implicit = true; + } + + node = node.next; + } + } + + /* + Replace implicit blockquote by div with an indent + taking care to reduce nested blockquotes to a single + div with the indent set to match the nesting depth + */ + public void bQ2Div(Node node) + { + int indent; + String indent_buf; + + while (node != null) + { + if (node.tag == tt.tagBlockquote && node.implicit) + { + indent = 1; + + while(node.hasOneChild() && + node.content.tag == tt.tagBlockquote && + node.implicit) + { + ++indent; + stripOnlyChild(node); + } + + if (node.content != null) + bQ2Div(node.content); + + indent_buf = "margin-left: " + + (new Integer(2*indent)).toString() + "em"; + + node.element = tt.tagDiv.name; + node.tag = tt.tagDiv; + node.addAttribute("style", indent_buf); + } + else if (node.content != null) + bQ2Div(node.content); + + + node = node.next; + } + } + + /* node is prune up to */ + public Node pruneSection(Lexer lexer, Node node) + { + for (;;) + { + /* discard node and returns next */ + node = Node.discardElement(node); + + if (node == null) + return null; + + if (node.type == Node.SectionTag) + { + if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) + { + node = pruneSection(lexer, node); + continue; + } + + if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif")) + { + node = Node.discardElement(node); + break; + } + } + } + + return node; + } + + public void dropSections(Lexer lexer, Node node) + { + while (node != null) + { + if (node.type == Node.SectionTag) + { + /* prune up to matching endif */ + if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) + { + node = pruneSection(lexer, node); + continue; + } + + /* discard others as well */ + node = Node.discardElement(node); + continue; + } + + if (node.content != null) + dropSections(lexer, node.content); + + node = node.next; + } + } + + public void purgeAttributes(Node node) + { + AttVal attr = node.attributes; + AttVal next = null; + AttVal prev = null; + + while (attr != null) + { + next = attr.next; + + /* special check for class="Code" denoting pre text */ + if (attr.attribute != null && + attr.value != null && + attr.attribute.equals("class") && + attr.value.equals("Code")) + { + prev = attr; + } + else if (attr.attribute != null && + (attr.attribute.equals("class") || + attr.attribute.equals("style") || + attr.attribute.equals("lang") || + attr.attribute.startsWith("x:") || + ((attr.attribute.equals("height") || attr.attribute.equals("width")) && + (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh)))) + { + if (prev != null) + prev.next = next; + else + node.attributes = next; + + } + else + prev = attr; + + attr = next; + } + } + + /* Word2000 uses span excessively, so we strip span out */ + public Node stripSpan(Lexer lexer, Node span) + { + Node node; + Node prev = null; + Node content; + + /* + deal with span elements that have content + by splicing the content in place of the span + after having processed it + */ + + cleanWord2000(lexer, span.content); + content = span.content; + + if (span.prev != null) + prev = span.prev; + else if (content != null) + { + node = content; + content = content.next; + Node.removeNode(node); + Node.insertNodeBeforeElement(span, node); + prev = node; + } + + while (content != null) + { + node = content; + content = content.next; + Node.removeNode(node); + Node.insertNodeAfterElement(prev, node); + prev = node; + } + + if (span.next == null) + span.parent.last = prev; + + node = span.next; + span.content = null; + Node.discardElement(span); + return node; + } + + /* map non-breaking spaces to regular spaces */ + private void normalizeSpaces(Lexer lexer, Node node) + { + while (node != null) + { + if (node.content != null) + normalizeSpaces(lexer, node.content); + + if (node.type == Node.TextNode) + { + int i; + MutableInteger c = new MutableInteger(); + int p = node.start; + + for (i = node.start; i < node.end; ++i) + { + c.value = (int)node.textarray[i]; + + /* look for UTF-8 multibyte character */ + if (c.value > 0x7F) + i += PPrint.getUTF8(node.textarray, i, c); + + if (c.value == 160) + c.value = ' '; + + p = PPrint.putUTF8(node.textarray, p, c.value); + } + } + + node = node.next; + } + } + + /* + This is a major clean up to strip out all the extra stuff you get + when you save as web page from Word 2000. It doesn't yet know what + to do with VML tags, but these will appear as errors unless you + declare them as new tags, such as o:p which needs to be declared + as inline. + */ + public void cleanWord2000(Lexer lexer, Node node) + { + /* used to a list from a sequence of bulletted p's */ + Node list = null; + + while (node != null) + { + /* discard Word's style verbiage */ + if (node.tag == tt.tagStyle || + node.tag == tt.tagMeta || + node.type == Node.CommentTag) + { + node = Node.discardElement(node); + continue; + } + + /* strip out all span tags Word scatters so liberally! */ + if (node.tag == tt.tagSpan) + { + node = stripSpan(lexer, node); + continue; + } + + /* get rid of Word's xmlns attributes */ + if (node.tag == tt.tagHtml) + { + /* check that it's a Word 2000 document */ + if (node.getAttrByName("xmlns:o") == null) + return; + } + + if (node.tag == tt.tagLink) + { + AttVal attr = node.getAttrByName("rel"); + + if (attr != null && attr.value != null && + attr.value.equals("File-List")) + { + node = Node.discardElement(node); + continue; + } + } + + /* discard empty paragraphs */ + if (node.content == null && node.tag == tt.tagP) + { + node = Node.discardElement(node); + continue; + } + + if (node.tag == tt.tagP) + { + AttVal attr = node.getAttrByName("class"); + + /* map sequence of

    to

      ...
    */ + if (attr != null && attr.value != null && + attr.value.equals("MsoListBullet")) + { + Node.coerceNode(lexer, node, tt.tagLi); + + if (list == null || list.tag != tt.tagUl) + { + list = lexer.inferredTag("ul"); + Node.insertNodeBeforeElement(node, list); + } + + purgeAttributes(node); + + if (node.content != null) + cleanWord2000(lexer, node.content); + + /* remove node and append to contents of list */ + Node.removeNode(node); + Node.insertNodeAtEnd(list, node); + node = list.next; + } + /* map sequence of

    to

    ...
    */ + else if (attr != null && attr.value != null && + attr.value.equals("Code")) + { + Node br = lexer.newLineNode(); + normalizeSpaces(lexer, node); + + if (list == null || list.tag != tt.tagPre) + { + list = lexer.inferredTag("pre"); + Node.insertNodeBeforeElement(node, list); + } + + /* remove node and append to contents of list */ + Node.removeNode(node); + Node.insertNodeAtEnd(list, node); + stripSpan(lexer, node); + Node.insertNodeAtEnd(list, br); + node = list.next; + } + else + list = null; + } + else + list = null; + + /* strip out style and class attributes */ + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + purgeAttributes(node); + + if (node.content != null) + cleanWord2000(lexer, node.content); + + node = node.next; + } + } + + public boolean isWord2000(Node root, TagTable tt) + { + Node html = root.findHTML(tt); + + return (html != null && html.getAttrByName("xmlns:o") != null); + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java new file mode 100644 index 0000000..b2d8dfd --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java @@ -0,0 +1,600 @@ +/* + * @(#)Configuration.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Read configuration file and manage configuration properties. + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/* + Configuration files associate a property name with a value. + The format is that of a Java .properties file. +*/ + +import java.util.Enumeration; +import java.util.Properties; +import java.util.StringTokenizer; +import java.io.FileInputStream; +import java.io.IOException; + +public class Configuration implements java.io.Serializable { + + /* character encodings */ + public static final int RAW = 0; + public static final int ASCII = 1; + public static final int LATIN1 = 2; + public static final int UTF8 = 3; + public static final int ISO2022 = 4; + public static final int MACROMAN = 5; + + /* mode controlling treatment of doctype */ + public static final int DOCTYPE_OMIT = 0; + public static final int DOCTYPE_AUTO = 1; + public static final int DOCTYPE_STRICT= 2; + public static final int DOCTYPE_LOOSE = 3; + public static final int DOCTYPE_USER = 4; + + protected int spaces = 2; /* default indentation */ + protected int wraplen = 68; /* default wrap margin */ + protected int CharEncoding = ASCII; + protected int tabsize = 4; + + protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */ + protected String altText = null; /* default text for alt attribute */ + protected String slidestyle = null; /* style sheet for slides */ + protected String docTypeStr = null; /* user specified doctype */ + protected String errfile = null; /* file name to write errors to */ + protected boolean writeback = false; /* if true then output tidied markup */ + + protected boolean OnlyErrors = false; /* if true normal output is suppressed */ + protected boolean ShowWarnings = true; /* however errors are always shown */ + protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */ + protected boolean IndentContent = false; /* indent content of appropriate tags */ + protected boolean SmartIndent = false; /* does text/block level content effect indentation */ + protected boolean HideEndTags = false; /* suppress optional end tags */ + protected boolean XmlTags = false; /* treat input as XML */ + protected boolean XmlOut = false; /* create output as XML */ + protected boolean xHTML = false; /* output extensible HTML */ + protected boolean XmlPi = false; /* add for XML docs */ + protected boolean RawOut = false; /* avoid mapping values > 127 to entities */ + protected boolean UpperCaseTags = false; /* output tags in upper not lower case */ + protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */ + protected boolean MakeClean = false; /* remove presentational clutter */ + protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */ + protected boolean DropFontTags = false; /* discard presentation tags */ + protected boolean DropEmptyParas = true; /* discard empty p elements */ + protected boolean FixComments = true; /* fix comments with adjacent hyphens */ + protected boolean BreakBeforeBR = false; /* o/p newline before
    or not? */ + protected boolean BurstSlides = false; /* create slides on each h2 element */ + protected boolean NumEntities = false; /* use numeric entities */ + protected boolean QuoteMarks = false; /* output " marks as " */ + protected boolean QuoteNbsp = true; /* output non-breaking space as entity */ + protected boolean QuoteAmpersand = true; /* output naked ampersand as & */ + protected boolean WrapAttVals = false; /* wrap within attribute values */ + protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */ + protected boolean WrapSection = true; /* wrap within section tags */ + protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */ + protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */ + protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */ + protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */ + protected boolean IndentAttributes = false; /* newline+indent before each attribute */ + protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */ + protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */ + protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in

    's */ + protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in

    's */ + protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */ + protected boolean Word2000 = false; /* draconian cleaning for Word2000 */ + protected boolean TidyMark = true; /* add meta element indicating tidied doc */ + protected boolean Emacs = false; /* if true format error output for GNU Emacs */ + protected boolean LiteralAttribs = false; /* if true attributes may use newlines */ + + protected TagTable tt; /* TagTable associated with this Configuration */ + + private transient Properties _properties = new Properties(); + + public Configuration() + { + } + + public void addProps( Properties p ) + { + Enumeration enum = p.propertyNames(); + while (enum.hasMoreElements()) + { + String key = (String) enum.nextElement(); + String value = p.getProperty(key); + _properties.put(key, value); + } + parseProps(); + } + + public void parseFile( String filename ) + { + try + { + _properties.load( new FileInputStream( filename ) ); + } + catch (IOException e) + { + System.err.println(filename + e.toString()); + return; + } + parseProps(); + } + + private void parseProps() + { + String value; + + value = _properties.getProperty("indent-spaces"); + if (value != null) + spaces = parseInt(value, "indent-spaces"); + + value = _properties.getProperty("wrap"); + if (value != null) + wraplen = parseInt(value, "wrap"); + + value = _properties.getProperty("wrap-attributes"); + if (value != null) + WrapAttVals = parseBool(value, "wrap-attributes"); + + value = _properties.getProperty("wrap-script-literals"); + if (value != null) + WrapScriptlets = parseBool(value, "wrap-script-literals"); + + value = _properties.getProperty("wrap-sections"); + if (value != null) + WrapSection = parseBool(value, "wrap-sections"); + + value = _properties.getProperty("wrap-asp"); + if (value != null) + WrapAsp = parseBool(value, "wrap-asp"); + + value = _properties.getProperty("wrap-jste"); + if (value != null) + WrapJste = parseBool(value, "wrap-jste"); + + value = _properties.getProperty("wrap-php"); + if (value != null) + WrapPhp = parseBool(value, "wrap-php"); + + value = _properties.getProperty("literal-attributes"); + if (value != null) + LiteralAttribs = parseBool(value, "literal-attributes"); + + value = _properties.getProperty("tab-size"); + if (value != null) + tabsize = parseInt(value, "tab-size"); + + value = _properties.getProperty("markup"); + if (value != null) + OnlyErrors = parseInvBool(value, "markup"); + + value = _properties.getProperty("quiet"); + if (value != null) + Quiet = parseBool(value, "quiet"); + + value = _properties.getProperty("tidy-mark"); + if (value != null) + TidyMark = parseBool(value, "tidy-mark"); + + value = _properties.getProperty("indent"); + if (value != null) + IndentContent = parseIndent(value, "indent"); + + value = _properties.getProperty("indent-attributes"); + if (value != null) + IndentAttributes = parseBool(value, "ident-attributes"); + + value = _properties.getProperty("hide-endtags"); + if (value != null) + HideEndTags = parseBool(value, "hide-endtags"); + + value = _properties.getProperty("input-xml"); + if (value != null) + XmlTags = parseBool(value, "input-xml"); + + value = _properties.getProperty("output-xml"); + if (value != null) + XmlOut = parseBool(value, "output-xml"); + + value = _properties.getProperty("output-xhtml"); + if (value != null) + xHTML = parseBool(value, "output-xhtml"); + + value = _properties.getProperty("add-xml-pi"); + if (value != null) + XmlPi = parseBool(value, "add-xml-pi"); + + value = _properties.getProperty("add-xml-decl"); + if (value != null) + XmlPi = parseBool(value, "add-xml-decl"); + + value = _properties.getProperty("assume-xml-procins"); + if (value != null) + XmlPIs = parseBool(value, "assume-xml-procins"); + + value = _properties.getProperty("raw"); + if (value != null) + RawOut = parseBool(value, "raw"); + + value = _properties.getProperty("uppercase-tags"); + if (value != null) + UpperCaseTags = parseBool(value, "uppercase-tags"); + + value = _properties.getProperty("uppercase-attributes"); + if (value != null) + UpperCaseAttrs = parseBool(value, "uppercase-attributes"); + + value = _properties.getProperty("clean"); + if (value != null) + MakeClean = parseBool(value, "clean"); + + value = _properties.getProperty("logical-emphasis"); + if (value != null) + LogicalEmphasis = parseBool(value, "logical-emphasis"); + + value = _properties.getProperty("word-2000"); + if (value != null) + Word2000 = parseBool(value, "word-2000"); + + value = _properties.getProperty("drop-empty-paras"); + if (value != null) + DropEmptyParas = parseBool(value, "drop-empty-paras"); + + value = _properties.getProperty("drop-font-tags"); + if (value != null) + DropFontTags = parseBool(value, "drop-font-tags"); + + value = _properties.getProperty("enclose-text"); + if (value != null) + EncloseBodyText = parseBool(value, "enclose-text"); + + value = _properties.getProperty("enclose-block-text"); + if (value != null) + EncloseBlockText = parseBool(value, "enclose-block-text"); + + value = _properties.getProperty("alt-text"); + if (value != null) + altText = value; + + value = _properties.getProperty("add-xml-space"); + if (value != null) + XmlSpace = parseBool(value, "add-xml-space"); + + value = _properties.getProperty("fix-bad-comments"); + if (value != null) + FixComments = parseBool(value, "fix-bad-comments"); + + value = _properties.getProperty("split"); + if (value != null) + BurstSlides = parseBool(value, "split"); + + value = _properties.getProperty("break-before-br"); + if (value != null) + BreakBeforeBR = parseBool(value, "break-before-br"); + + value = _properties.getProperty("numeric-entities"); + if (value != null) + NumEntities = parseBool(value, "numeric-entities"); + + value = _properties.getProperty("quote-marks"); + if (value != null) + QuoteMarks = parseBool(value, "quote-marks"); + + value = _properties.getProperty("quote-nbsp"); + if (value != null) + QuoteNbsp = parseBool(value, "quote-nbsp"); + + value = _properties.getProperty("quote-ampersand"); + if (value != null) + QuoteAmpersand = parseBool(value, "quote-ampersand"); + + value = _properties.getProperty("write-back"); + if (value != null) + writeback = parseBool(value, "write-back"); + + value = _properties.getProperty("keep-time"); + if (value != null) + KeepFileTimes = parseBool(value, "keep-time"); + + value = _properties.getProperty("show-warnings"); + if (value != null) + ShowWarnings = parseBool(value, "show-warnings"); + + value = _properties.getProperty("error-file"); + if (value != null) + errfile = parseName(value, "error-file"); + + value = _properties.getProperty("slide-style"); + if (value != null) + slidestyle = parseName(value, "slide-style"); + + value = _properties.getProperty("new-inline-tags"); + if (value != null) + parseInlineTagNames(value, "new-inline-tags"); + + value = _properties.getProperty("new-blocklevel-tags"); + if (value != null) + parseBlockTagNames(value, "new-blocklevel-tags"); + + value = _properties.getProperty("new-empty-tags"); + if (value != null) + parseEmptyTagNames(value, "new-empty-tags"); + + value = _properties.getProperty("new-pre-tags"); + if (value != null) + parsePreTagNames(value, "new-pre-tags"); + + value = _properties.getProperty("char-encoding"); + if (value != null) + CharEncoding = parseCharEncoding(value, "char-encoding"); + + value = _properties.getProperty("doctype"); + if (value != null) + docTypeStr = parseDocType(value, "doctype"); + + value = _properties.getProperty("fix-backslash"); + if (value != null) + FixBackslash = parseBool(value, "fix-backslash"); + + value = _properties.getProperty("gnu-emacs"); + if (value != null) + Emacs = parseBool(value, "gnu-emacs"); + } + + /* ensure that config is self consistent */ + public void adjust() + { + if (EncloseBlockText) + EncloseBodyText = true; + + /* avoid the need to set IndentContent when SmartIndent is set */ + + if (SmartIndent) + IndentContent = true; + + /* disable wrapping */ + if (wraplen == 0) + wraplen = 0x7FFFFFFF; + + /* Word 2000 needs o:p to be declared as inline */ + if (Word2000) + { + tt.defineInlineTag("o:p"); + } + + /* XHTML is written in lower case */ + if (xHTML) + { + XmlOut = true; + UpperCaseTags = false; + UpperCaseAttrs = false; + } + + /* if XML in, then XML out */ + if (XmlTags) + { + XmlOut = true; + XmlPIs = true; + } + + /* XML requires end tags */ + if (XmlOut) + { + QuoteAmpersand = true; + HideEndTags = false; + } + } + + private static int parseInt( String s, String option ) + { + int i = 0; + try { + i = Integer.parseInt( s ); + } + catch ( NumberFormatException e ) { + Report.badArgument(option); + i = -1; + } + return i; + } + + private static boolean parseBool( String s, String option ) + { + boolean b = false; + if ( s != null && s.length() > 0 ) { + char c = s.charAt(0); + if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1')) + b = true; + else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0')) + b = false; + else + Report.badArgument(option); + } + return b; + } + + private static boolean parseInvBool( String s, String option ) + { + boolean b = false; + if ( s != null && s.length() > 0 ) { + char c = s.charAt(0); + if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y')) + b = true; + else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n')) + b = false; + else + Report.badArgument(option); + } + return !b; + } + + private static String parseName( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s ); + String rs = null; + if ( t.countTokens() >= 1 ) + rs = t.nextToken(); + else + Report.badArgument(option); + return rs; + } + + private static int parseCharEncoding( String s, String option ) + { + int result = ASCII; + + if (Lexer.wstrcasecmp(s, "ascii") == 0) + result = ASCII; + else if (Lexer.wstrcasecmp(s, "latin1") == 0) + result = LATIN1; + else if (Lexer.wstrcasecmp(s, "raw") == 0) + result = RAW; + else if (Lexer.wstrcasecmp(s, "utf8") == 0) + result = UTF8; + else if (Lexer.wstrcasecmp(s, "iso2022") == 0) + result = ISO2022; + else if (Lexer.wstrcasecmp(s, "mac") == 0) + result = MACROMAN; + else + Report.badArgument(option); + + return result; + } + + /* slight hack to avoid changes to pprint.c */ + private boolean parseIndent( String s, String option ) + { + boolean b = IndentContent; + + if (Lexer.wstrcasecmp(s, "yes") == 0) + { + b = true; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "true") == 0) + { + b = true; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "no") == 0) + { + b = false; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "false") == 0) + { + b = false; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "auto") == 0) + { + b = true; + SmartIndent = true; + } + else + Report.badArgument(option); + return b; + } + + private void parseInlineTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.defineInlineTag( t.nextToken() ); + } + } + + private void parseBlockTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.defineBlockTag( t.nextToken() ); + } + } + + private void parseEmptyTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.defineEmptyTag( t.nextToken() ); + } + } + + private void parsePreTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.definePreTag( t.nextToken() ); + } + } + + /* + doctype: omit | auto | strict | loose | + + where the fpi is a string similar to + + "-//ACME//DTD HTML 3.14159//EN" + */ + protected String parseDocType( String s, String option ) + { + s = s.trim(); + + /* "-//ACME//DTD HTML 3.14159//EN" or similar */ + + if (s.startsWith("\"")) + { + docTypeMode = DOCTYPE_USER; + return s; + } + + /* read first word */ + String word = ""; + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + if (t.hasMoreTokens()) + word = t.nextToken(); + + if (Lexer.wstrcasecmp(word, "omit") == 0) + docTypeMode = DOCTYPE_OMIT; + else if (Lexer.wstrcasecmp(word, "strict") == 0) + docTypeMode = DOCTYPE_STRICT; + else if (Lexer.wstrcasecmp(word, "loose") == 0 || + Lexer.wstrcasecmp(word, "transitional") == 0) + docTypeMode = DOCTYPE_LOOSE; + else if (Lexer.wstrcasecmp(word, "auto") == 0) + docTypeMode = DOCTYPE_AUTO; + else + { + docTypeMode = DOCTYPE_AUTO; + Report.badArgument(option); + } + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java new file mode 100644 index 0000000..ebc8386 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java @@ -0,0 +1,190 @@ +/* + * @(#)DOMAttrImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMAttrImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr { + + protected AttVal avAdaptee; + + protected DOMAttrImpl(AttVal adaptee) + { + super(null); // must override all methods of DOMNodeImpl + this.avAdaptee = adaptee; + } + + + /* --------------------- DOM ---------------------------- */ + + public String getNodeValue() throws DOMException + { + return getValue(); + } + + public void setNodeValue(String nodeValue) throws DOMException + { + setValue(nodeValue); + } + + public String getNodeName() + { + return getName(); + } + + public short getNodeType() + { + return org.w3c.dom.Node.ATTRIBUTE_NODE; + } + + public org.w3c.dom.Node getParentNode() + { + return null; + } + + public org.w3c.dom.NodeList getChildNodes() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.Node getFirstChild() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.Node getLastChild() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.Node getPreviousSibling() + { + return null; + } + + public org.w3c.dom.Node getNextSibling() + { + return null; + } + + public org.w3c.dom.NamedNodeMap getAttributes() + { + return null; + } + + public org.w3c.dom.Document getOwnerDocument() + { + return null; + } + + public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, + org.w3c.dom.Node refChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, + org.w3c.dom.Node oldChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public boolean hasChildNodes() + { + return false; + } + + public org.w3c.dom.Node cloneNode(boolean deep) + { + return null; + } + + /** + * @see org.w3c.dom.Attr#getName + */ + public String getName() + { + return avAdaptee.attribute; + } + + /** + * @see org.w3c.dom.Attr#getSpecified + */ + public boolean getSpecified() + { + return true; + } + + /** + * Returns value of this attribute. If this attribute has a null value, + * then the attribute name is returned instead. + * Thanks to Brett Knights for this fix. + * @see org.w3c.dom.Attr#getValue + * + */ + public String getValue() + { + return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value ; + } + + /** + * @see org.w3c.dom.Attr#setValue + */ + public void setValue(String value) + { + avAdaptee.value = value; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Element getOwnerElement() { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java new file mode 100644 index 0000000..2b1ca08 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java @@ -0,0 +1,138 @@ +/* + * @(#)DOMAttrMapImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMAttrMapImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap { + + private AttVal first = null; + + protected DOMAttrMapImpl(AttVal first) + { + this.first = first; + } + + /** + * @see org.w3c.dom.NamedNodeMap#getNamedItem + */ + public org.w3c.dom.Node getNamedItem(String name) + { + AttVal att = this.first; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) + return att.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#setNamedItem + */ + public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#removeNamedItem + */ + public org.w3c.dom.Node removeNamedItem(String name) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#item + */ + public org.w3c.dom.Node item(int index) + { + int i = 0; + AttVal att = this.first; + while (att != null) { + if (i >= index) break; + i++; + att = att.next; + } + if (att != null) + return att.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#getLength + */ + public int getLength() + { + int len = 0; + AttVal att = this.first; + while (att != null) { + len++; + att = att.next; + } + return len; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Node getNamedItemNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Node removeNamedItemNS(String namespaceURI, + String localName) + throws org.w3c.dom.DOMException + { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java new file mode 100644 index 0000000..5e150db --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java @@ -0,0 +1,51 @@ +/* + * @(#)DOMCDATASectionImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMCDATASectionImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @author Gary L Peskin + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMCDATASectionImpl extends DOMTextImpl + implements org.w3c.dom.CDATASection { + + protected DOMCDATASectionImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#cdata-section"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.CDATA_SECTION_NODE; + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java new file mode 100644 index 0000000..37245fb --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java @@ -0,0 +1,143 @@ +/* + * @(#)DOMCharacterDataImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMCharacterDataImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMCharacterDataImpl extends DOMNodeImpl + implements org.w3c.dom.CharacterData { + + protected DOMCharacterDataImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.CharacterData#getData + */ + public String getData() throws DOMException + { + return getNodeValue(); + } + + /** + * @see org.w3c.dom.CharacterData#setData + */ + public void setData(String data) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#getLength + */ + public int getLength() + { + int len = 0; + if (adaptee.textarray != null && adaptee.start < adaptee.end) + len = adaptee.end - adaptee.start; + return len; + } + + /** + * @see org.w3c.dom.CharacterData#substringData + */ + public String substringData(int offset, + int count) throws DOMException + { + int len; + String value = null; + if (count < 0) + { + throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR, + "Invalid length"); + } + if (adaptee.textarray != null && adaptee.start < adaptee.end) + { + if (adaptee.start + offset >= adaptee.end) + { + throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR, + "Invalid offset"); + } + len = count; + if (adaptee.start + offset + len - 1 >= adaptee.end) + len = adaptee.end - adaptee.start - offset; + + value = Lexer.getString(adaptee.textarray, + adaptee.start + offset, + len); + } + return value; + } + + /** + * @see org.w3c.dom.CharacterData#appendData + */ + public void appendData(String arg) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#insertData + */ + public void insertData(int offset, + String arg) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#deleteData + */ + public void deleteData(int offset, + int count) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#replaceData + */ + public void replaceData(int offset, + int count, + String arg) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java new file mode 100644 index 0000000..2491714 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java @@ -0,0 +1,55 @@ +/* + * @(#)DOMCommentImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMCommentImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMCommentImpl extends DOMCharacterDataImpl + implements org.w3c.dom.Comment { + + protected DOMCommentImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#comment"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.COMMENT_NODE; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java new file mode 100644 index 0000000..52f4f73 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java @@ -0,0 +1,261 @@ +/* + * @(#)DOMDocumentImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMDocumentImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document { + + private TagTable tt; // a DOM Document has its own TagTable. + + protected DOMDocumentImpl(Node adaptee) + { + super(adaptee); + tt = new TagTable(); + } + + public void setTagTable(TagTable tt) + { + this.tt = tt; + } + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#document"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.DOCUMENT_NODE; + } + + /** + * @see org.w3c.dom.Document#getDoctype + */ + public org.w3c.dom.DocumentType getDoctype() + { + Node node = adaptee.content; + while (node != null) { + if (node.type == Node.DocTypeTag) break; + node = node.next; + } + if (node != null) + return (org.w3c.dom.DocumentType)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#getImplementation + */ + public org.w3c.dom.DOMImplementation getImplementation() + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#getDocumentElement + */ + public org.w3c.dom.Element getDocumentElement() + { + Node node = adaptee.content; + while (node != null) { + if (node.type == Node.StartTag || + node.type == Node.StartEndTag) break; + node = node.next; + } + if (node != null) + return (org.w3c.dom.Element)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#createElement + */ + public org.w3c.dom.Element createElement(String tagName) + throws DOMException + { + Node node = new Node(Node.StartEndTag, null, 0, 0, tagName, tt); + if (node != null) { + if (node.tag == null) // Fix Bug 121206 + node.tag = tt.xmlTags; + return (org.w3c.dom.Element)node.getAdapter(); + } + else + return null; + } + + /** + * @see org.w3c.dom.Document#createDocumentFragment + */ + public org.w3c.dom.DocumentFragment createDocumentFragment() + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#createTextNode + */ + public org.w3c.dom.Text createTextNode(String data) + { + byte[] textarray = Lexer.getBytes(data); + Node node = new Node(Node.TextNode, textarray, 0, textarray.length); + if (node != null) + return (org.w3c.dom.Text)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#createComment + */ + public org.w3c.dom.Comment createComment(String data) + { + byte[] textarray = Lexer.getBytes(data); + Node node = new Node(Node.CommentTag, textarray, 0, textarray.length); + if (node != null) + return (org.w3c.dom.Comment)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#createCDATASection + */ + public org.w3c.dom.CDATASection createCDATASection(String data) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#createProcessingInstruction + */ + public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target, + String data) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NOT_SUPPORTED_ERR, + "HTML document"); + } + + /** + * @see org.w3c.dom.Document#createAttribute + */ + public org.w3c.dom.Attr createAttribute(String name) + throws DOMException + { + AttVal av = new AttVal(null, null, (int)'"', name, null); + if (av != null) { + av.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(av); + return (org.w3c.dom.Attr)av.getAdapter(); + } else { + return null; + } + } + + /** + * @see org.w3c.dom.Document#createEntityReference + */ + public org.w3c.dom.EntityReference createEntityReference(String name) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#getElementsByTagName + */ + public org.w3c.dom.NodeList getElementsByTagName(String tagname) + { + return new DOMNodeListByTagNameImpl(this.adaptee, tagname); + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Attr createAttributeNS(String namespaceURI, + String qualifiedName) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Element createElementNS(String namespaceURI, + String qualifiedName) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Element getElementById(String elementId) + { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java new file mode 100644 index 0000000..3e9fb8f --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java @@ -0,0 +1,107 @@ +/* + * @(#)DOMDocumentTypeImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMDocumentTypeImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMDocumentTypeImpl extends DOMNodeImpl + implements org.w3c.dom.DocumentType { + + protected DOMDocumentTypeImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.DOCUMENT_TYPE_NODE; + } + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return getName(); + } + + /** + * @see org.w3c.dom.DocumentType#getName + */ + public String getName() + { + String value = null; + if (adaptee.type == Node.DocTypeTag) + { + + if (adaptee.textarray != null && adaptee.start < adaptee.end) + { + value = Lexer.getString(adaptee.textarray, + adaptee.start, + adaptee.end - adaptee.start); + } + } + return value; + } + + public org.w3c.dom.NamedNodeMap getEntities() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.NamedNodeMap getNotations() + { + // NOT SUPPORTED + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getPublicId() { + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getSystemId() { + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getInternalSubset() { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java new file mode 100644 index 0000000..f9a367f --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java @@ -0,0 +1,307 @@ +/* + * @(#)DOMElementImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMElementImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMElementImpl extends DOMNodeImpl + implements org.w3c.dom.Element { + + protected DOMElementImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.ELEMENT_NODE; + } + + /** + * @see org.w3c.dom.Element#getTagName + */ + public String getTagName() + { + return super.getNodeName(); + } + + /** + * @see org.w3c.dom.Element#getAttribute + */ + public String getAttribute(String name) + { + if (this.adaptee == null) + return null; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) + return att.value; + else + return ""; + } + + /** + * @see org.w3c.dom.Element#setAttribute + */ + public void setAttribute(String name, + String value) + throws DOMException + { + if (this.adaptee == null) + return; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) { + att.value = value; + } else { + att = new AttVal(null, null, (int)'"', name, value); + att.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(att); + if (this.adaptee.attributes == null) { + this.adaptee.attributes = att; + } else { + att.next = this.adaptee.attributes; + this.adaptee.attributes = att; + } + } + } + + /** + * @see org.w3c.dom.Element#removeAttribute + */ + public void removeAttribute(String name) + throws DOMException + { + if (this.adaptee == null) + return; + + AttVal att = this.adaptee.attributes; + AttVal pre = null; + while (att != null) { + if (att.attribute.equals(name)) break; + pre = att; + att = att.next; + } + if (att != null) { + if (pre == null) { + this.adaptee.attributes = att.next; + } else { + pre.next = att.next; + } + } + } + + /** + * @see org.w3c.dom.Element#getAttributeNode + */ + public org.w3c.dom.Attr getAttributeNode(String name) + { + if (this.adaptee == null) + return null; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) + return att.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Element#setAttributeNode + */ + public org.w3c.dom.Attr setAttributeNode(org.w3c.dom.Attr newAttr) + throws DOMException + { + if (newAttr == null) + return null; + if (!(newAttr instanceof DOMAttrImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newAttr not instanceof DOMAttrImpl"); + } + + DOMAttrImpl newatt = (DOMAttrImpl)newAttr; + String name = newatt.avAdaptee.attribute; + org.w3c.dom.Attr result = null; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) { + result = att.getAdapter(); + att.adapter = newAttr; + } else { + if (this.adaptee.attributes == null) { + this.adaptee.attributes = newatt.avAdaptee; + } else { + newatt.avAdaptee.next = this.adaptee.attributes; + this.adaptee.attributes = newatt.avAdaptee; + } + } + return result; + } + + /** + * @see org.w3c.dom.Element#removeAttributeNode + */ + public org.w3c.dom.Attr removeAttributeNode(org.w3c.dom.Attr oldAttr) + throws DOMException + { + if (oldAttr == null) + return null; + + org.w3c.dom.Attr result = null; + AttVal att = this.adaptee.attributes; + AttVal pre = null; + while (att != null) { + if (att.getAdapter() == oldAttr) break; + pre = att; + att = att.next; + } + if (att != null) { + if (pre == null) { + this.adaptee.attributes = att.next; + } else { + pre.next = att.next; + } + result = oldAttr; + } else { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "oldAttr not found"); + } + return result; + } + + /** + * @see org.w3c.dom.Element#getElementsByTagName + */ + public org.w3c.dom.NodeList getElementsByTagName(String name) + { + return new DOMNodeListByTagNameImpl(this.adaptee, name); + } + + /** + * @see org.w3c.dom.Element#normalize + */ + public void normalize() + { + // NOT SUPPORTED + } + + /** + * DOM2 - not implemented. + */ + public String getAttributeNS(String namespaceURI, String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public void setAttributeNS(String namespaceURI, + String qualifiedName, + String value) + throws org.w3c.dom.DOMException + { + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public void removeAttributeNS(String namespaceURI, String localName) + throws org.w3c.dom.DOMException + { + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public boolean hasAttribute(String name) + { + return false; + } + + /** + * DOM2 - not implemented. + */ + public boolean hasAttributeNS(String namespaceURI, + String localName) + { + return false; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java new file mode 100644 index 0000000..75c5337 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java @@ -0,0 +1,37 @@ +/* + * @(#)DOMExceptionImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMExceptionImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMExceptionImpl extends org.w3c.dom.DOMException { + + public DOMExceptionImpl(short code, String message) { + super(code, message); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java new file mode 100644 index 0000000..d0b14e2 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java @@ -0,0 +1,488 @@ +/* + * @(#)DOMNodeImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMNodeImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMNodeImpl implements org.w3c.dom.Node { + + protected Node adaptee; + + protected DOMNodeImpl(Node adaptee) + { + this.adaptee = adaptee; + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeValue + */ + public String getNodeValue() throws DOMException + { + String value = ""; //BAK 10/10/2000 replaced null + if (adaptee.type == Node.TextNode || + adaptee.type == Node.CDATATag || + adaptee.type == Node.CommentTag || + adaptee.type == Node.ProcInsTag) + { + + if (adaptee.textarray != null && adaptee.start < adaptee.end) + { + value = Lexer.getString(adaptee.textarray, + adaptee.start, + adaptee.end - adaptee.start); + } + } + return value; + } + + /** + * @see org.w3c.dom.Node#setNodeValue + */ + public void setNodeValue(String nodeValue) throws DOMException + { + if (adaptee.type == Node.TextNode || + adaptee.type == Node.CDATATag || + adaptee.type == Node.CommentTag || + adaptee.type == Node.ProcInsTag) + { + byte[] textarray = Lexer.getBytes(nodeValue); + adaptee.textarray = textarray; + adaptee.start = 0; + adaptee.end = textarray.length; + } + } + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return adaptee.element; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + short result = -1; + switch (adaptee.type) { + case Node.RootNode: + result = org.w3c.dom.Node.DOCUMENT_NODE; + break; + case Node.DocTypeTag: + result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE; + break; + case Node.CommentTag: + result = org.w3c.dom.Node.COMMENT_NODE; + break; + case Node.ProcInsTag: + result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; + break; + case Node.TextNode: + result = org.w3c.dom.Node.TEXT_NODE; + break; + case Node.CDATATag: + result = org.w3c.dom.Node.CDATA_SECTION_NODE; + break; + case Node.StartTag: + case Node.StartEndTag: + result = org.w3c.dom.Node.ELEMENT_NODE; + break; + } + return result; + } + + /** + * @see org.w3c.dom.Node#getParentNode + */ + public org.w3c.dom.Node getParentNode() + { + if (adaptee.parent != null) + return adaptee.parent.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getChildNodes + */ + public org.w3c.dom.NodeList getChildNodes() + { + return new DOMNodeListImpl(adaptee); + } + + /** + * @see org.w3c.dom.Node#getFirstChild + */ + public org.w3c.dom.Node getFirstChild() + { + if (adaptee.content != null) + return adaptee.content.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getLastChild + */ + public org.w3c.dom.Node getLastChild() + { + if (adaptee.last != null) + return adaptee.last.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getPreviousSibling + */ + public org.w3c.dom.Node getPreviousSibling() + { + if (adaptee.prev != null) + return adaptee.prev.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getNextSibling + */ + public org.w3c.dom.Node getNextSibling() + { + if (adaptee.next != null) + return adaptee.next.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getAttributes + */ + public org.w3c.dom.NamedNodeMap getAttributes() + { + return new DOMAttrMapImpl(adaptee.attributes); + } + + /** + * @see org.w3c.dom.Node#getOwnerDocument + */ + public org.w3c.dom.Document getOwnerDocument() + { + Node node; + + node = this.adaptee; + if (node != null && node.type == Node.RootNode) + return null; + + for (node = this.adaptee; + node != null && node.type != Node.RootNode; node = node.parent); + + if (node != null) + return (org.w3c.dom.Document)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#insertBefore + */ + public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, + org.w3c.dom.Node refChild) + throws DOMException + { + // TODO - handle newChild already in tree + + if (newChild == null) + return null; + if (!(newChild instanceof DOMNodeImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newChild not instanceof DOMNodeImpl"); + } + DOMNodeImpl newCh = (DOMNodeImpl)newChild; + + if (this.adaptee.type == Node.RootNode) { + if (newCh.adaptee.type != Node.DocTypeTag && + newCh.adaptee.type != Node.ProcInsTag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } else if (this.adaptee.type == Node.StartTag) { + if (newCh.adaptee.type != Node.StartTag && + newCh.adaptee.type != Node.StartEndTag && + newCh.adaptee.type != Node.CommentTag && + newCh.adaptee.type != Node.TextNode && + newCh.adaptee.type != Node.CDATATag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } + if (refChild == null) { + Node.insertNodeAtEnd(this.adaptee, newCh.adaptee); + if (this.adaptee.type == Node.StartEndTag) { + this.adaptee.setType(Node.StartTag); + } + } else { + Node ref = this.adaptee.content; + while (ref != null) { + if (ref.getAdapter() == refChild) break; + ref = ref.next; + } + if (ref == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "refChild not found"); + } + Node.insertNodeBeforeElement(ref, newCh.adaptee); + } + return newChild; + } + + /** + * @see org.w3c.dom.Node#replaceChild + */ + public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, + org.w3c.dom.Node oldChild) + throws DOMException + { + // TODO - handle newChild already in tree + + if (newChild == null) + return null; + if (!(newChild instanceof DOMNodeImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newChild not instanceof DOMNodeImpl"); + } + DOMNodeImpl newCh = (DOMNodeImpl)newChild; + + if (this.adaptee.type == Node.RootNode) { + if (newCh.adaptee.type != Node.DocTypeTag && + newCh.adaptee.type != Node.ProcInsTag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } else if (this.adaptee.type == Node.StartTag) { + if (newCh.adaptee.type != Node.StartTag && + newCh.adaptee.type != Node.StartEndTag && + newCh.adaptee.type != Node.CommentTag && + newCh.adaptee.type != Node.TextNode && + newCh.adaptee.type != Node.CDATATag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } + if (oldChild == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "oldChild not found"); + } else { + Node n; + Node ref = this.adaptee.content; + while (ref != null) { + if (ref.getAdapter() == oldChild) break; + ref = ref.next; + } + if (ref == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "oldChild not found"); + } + newCh.adaptee.next = ref.next; + newCh.adaptee.prev = ref.prev; + newCh.adaptee.last = ref.last; + newCh.adaptee.parent = ref.parent; + newCh.adaptee.content = ref.content; + if (ref.parent != null) { + if (ref.parent.content == ref) + ref.parent.content = newCh.adaptee; + if (ref.parent.last == ref) + ref.parent.last = newCh.adaptee; + } + if (ref.prev != null) { + ref.prev.next = newCh.adaptee; + } + if (ref.next != null) { + ref.next.prev = newCh.adaptee; + } + for (n = ref.content; n != null; n = n.next) { + if (n.parent == ref) + n.parent = newCh.adaptee; + } + } + return oldChild; + } + + /** + * @see org.w3c.dom.Node#removeChild + */ + public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) + throws DOMException + { + if (oldChild == null) + return null; + + Node ref = this.adaptee.content; + while (ref != null) { + if (ref.getAdapter() == oldChild) break; + ref = ref.next; + } + if (ref == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "refChild not found"); + } + Node.discardElement(ref); + + if (this.adaptee.content == null + && this.adaptee.type == Node.StartTag) { + this.adaptee.setType(Node.StartEndTag); + } + + return oldChild; + } + + /** + * @see org.w3c.dom.Node#appendChild + */ + public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) + throws DOMException + { + // TODO - handle newChild already in tree + + if (newChild == null) + return null; + if (!(newChild instanceof DOMNodeImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newChild not instanceof DOMNodeImpl"); + } + DOMNodeImpl newCh = (DOMNodeImpl)newChild; + + if (this.adaptee.type == Node.RootNode) { + if (newCh.adaptee.type != Node.DocTypeTag && + newCh.adaptee.type != Node.ProcInsTag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } else if (this.adaptee.type == Node.StartTag) { + if (newCh.adaptee.type != Node.StartTag && + newCh.adaptee.type != Node.StartEndTag && + newCh.adaptee.type != Node.CommentTag && + newCh.adaptee.type != Node.TextNode && + newCh.adaptee.type != Node.CDATATag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } + Node.insertNodeAtEnd(this.adaptee, newCh.adaptee); + + if (this.adaptee.type == Node.StartEndTag) { + this.adaptee.setType(Node.StartTag); + } + + return newChild; + } + + /** + * @see org.w3c.dom.Node#hasChildNodes + */ + public boolean hasChildNodes() + { + return (adaptee.content != null); + } + + /** + * @see org.w3c.dom.Node#cloneNode + */ + public org.w3c.dom.Node cloneNode(boolean deep) + { + Node node = adaptee.cloneNode(deep); + node.parent = null; + return node.getAdapter(); + } + + /** + * DOM2 - not implemented. + */ + public void normalize() + { + } + + /** + * DOM2 - not implemented. + */ + public boolean supports(String feature, String version) + { + return isSupported(feature, version); + } + + /** + * DOM2 - not implemented. + */ + public String getNamespaceURI() + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getPrefix() + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public void setPrefix(String prefix) + throws DOMException + { + } + + /** + * DOM2 - not implemented. + */ + public String getLocalName() + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public boolean isSupported(String feature,String version) { + return false; + } + + /** + * DOM2 - @see org.w3c.dom.Node#hasAttributes + * contributed by dlp@users.sourceforge.net + */ + public boolean hasAttributes() + { + return adaptee.attributes != null; + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java new file mode 100644 index 0000000..d69feb3 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java @@ -0,0 +1,99 @@ +/* + * @(#)DOMNodeListByTagNameImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * DOMNodeListByTagNameImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/** + *

    The items in the NodeList are accessible via an integral + * index, starting from 0. + * + */ +public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList { + + private Node first = null; + private String tagName = "*"; + private int currIndex = 0; + private int maxIndex = 0; + private Node currNode = null; + + protected DOMNodeListByTagNameImpl(Node first, String tagName) + { + this.first = first; + this.tagName = tagName; + } + + /** + * @see org.w3c.dom.NodeList#item + */ + public org.w3c.dom.Node item(int index) + { + currIndex = 0; + maxIndex = index; + preTraverse(first); + + if (currIndex > maxIndex && currNode != null) + return currNode.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NodeList#getLength + */ + public int getLength() + { + currIndex = 0; + maxIndex = Integer.MAX_VALUE; + preTraverse(first); + return currIndex; + } + + protected void preTraverse(Node node) + { + if (node == null) + return; + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if (currIndex <= maxIndex && + (tagName.equals("*") || tagName.equals(node.element))) + { + currIndex += 1; + currNode = node; + } + } + if (currIndex > maxIndex) + return; + + node = node.content; + while (node != null) + { + preTraverse(node); + node = node.next; + } + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java new file mode 100644 index 0000000..726f007 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java @@ -0,0 +1,75 @@ +/* + * @(#)DOMNodeListImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * DOMNodeListImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/** + *

    The items in the NodeList are accessible via an integral + * index, starting from 0. + * + */ +public class DOMNodeListImpl implements org.w3c.dom.NodeList { + + private Node parent = null; + + protected DOMNodeListImpl(Node parent) + { + this.parent = parent; + } + + /** + * @see org.w3c.dom.NodeList#item + */ + public org.w3c.dom.Node item(int index) + { + int i = 0; + Node node = parent.content; + while (node != null) { + if (i >= index) break; + i++; + node = node.next; + } + if (node != null) + return node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NodeList#getLength + */ + public int getLength() + { + int len = 0; + Node node = parent.content; + while (node != null) { + len++; + node = node.next; + } + return len; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java new file mode 100644 index 0000000..1eefeca --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java @@ -0,0 +1,74 @@ +/* + * @(#)DOMProcessingInstructionImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMProcessingInstructionImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMProcessingInstructionImpl extends DOMNodeImpl + implements org.w3c.dom.ProcessingInstruction { + + protected DOMProcessingInstructionImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; + } + + /** + * @see org.w3c.dom.ProcessingInstruction#getTarget + */ + public String getTarget() + { + // TODO + return null; + } + + /** + * @see org.w3c.dom.ProcessingInstruction#getData + */ + public String getData() + { + return getNodeValue(); + } + + /** + * @see org.w3c.dom.ProcessingInstruction#setData + */ + public void setData(String data) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java new file mode 100644 index 0000000..06ec997 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java @@ -0,0 +1,65 @@ +/* + * @(#)DOMTextImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMTextImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMTextImpl extends DOMCharacterDataImpl + implements org.w3c.dom.Text { + + protected DOMTextImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#text"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.TEXT_NODE; + } + + /** + * @see org.w3c.dom.Text#splitText + */ + public org.w3c.dom.Text splitText(int offset) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java new file mode 100644 index 0000000..4d05767 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java @@ -0,0 +1,110 @@ +/* + * @(#)Dict.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Tag dictionary node + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class Dict { + + /* content model shortcut encoding */ + + public static final int CM_UNKNOWN = 0; + public static final int CM_EMPTY = (1 << 0); + public static final int CM_HTML = (1 << 1); + public static final int CM_HEAD = (1 << 2); + public static final int CM_BLOCK = (1 << 3); + public static final int CM_INLINE = (1 << 4); + public static final int CM_LIST = (1 << 5); + public static final int CM_DEFLIST = (1 << 6); + public static final int CM_TABLE = (1 << 7); + public static final int CM_ROWGRP = (1 << 8); + public static final int CM_ROW = (1 << 9); + public static final int CM_FIELD = (1 << 10); + public static final int CM_OBJECT = (1 << 11); + public static final int CM_PARAM = (1 << 12); + public static final int CM_FRAMES = (1 << 13); + public static final int CM_HEADING = (1 << 14); + public static final int CM_OPT = (1 << 15); + public static final int CM_IMG = (1 << 16); + public static final int CM_MIXED = (1 << 17); + public static final int CM_NO_INDENT = (1 << 18); + public static final int CM_OBSOLETE = (1 << 19); + public static final int CM_NEW = (1 << 20); + public static final int CM_OMITST = (1 << 21); + + /* + + If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 + Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary + tags and attributes then describe it as HTML Proprietary. If it includes the + xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe + it as one of the flavors of Voyager (strict, loose or frameset). + */ + + public static final short VERS_UNKNOWN = 0; + + public static final short VERS_HTML20 = 1; + public static final short VERS_HTML32 = 2; + public static final short VERS_HTML40_STRICT = 4; + public static final short VERS_HTML40_LOOSE = 8; + public static final short VERS_FRAMES = 16; + public static final short VERS_XML = 32; + + public static final short VERS_NETSCAPE = 64; + public static final short VERS_MICROSOFT = 128; + public static final short VERS_SUN = 256; + + public static final short VERS_MALFORMED = 512; + + public static final short VERS_ALL = (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_HTML40 = (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_LOOSE = (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_IFRAMES = (VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_FROM32 = (VERS_HTML40_STRICT|VERS_LOOSE); + public static final short VERS_PROPRIETARY = (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN); + + public static final short VERS_EVERYTHING = (VERS_ALL|VERS_PROPRIETARY); + + public Dict( String name, short versions, int model, + Parser parser, CheckAttribs chkattrs ) + { + this.name = name; + this.versions = versions; + this.model = model; + this.parser = parser; + this.chkattrs = chkattrs; + } + + public String name; + public short versions; + public int model; + public Parser parser; + public CheckAttribs chkattrs; +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java new file mode 100644 index 0000000..fea6e51 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java @@ -0,0 +1,51 @@ +/* + * @(#)Entity.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * HTML ISO entity + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class Entity { + + public Entity( String name, short code ) + { + this.name = name; + this.code = code; + } + + public Entity( String name, int code ) + { + this.name = name; + this.code = (short)code; + } + + public String name; + public short code; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java new file mode 100644 index 0000000..aeec74d --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java @@ -0,0 +1,386 @@ +/* + * @(#)EntityTable.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Entity hash table + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.util.Hashtable; +import java.util.Enumeration; + +public class EntityTable { + + public EntityTable() + { + } + + public Entity lookup( String name ) + { + return (Entity)entityHashtable.get( name ); + } + + public Entity install( String name, short code ) + { + Entity ent = lookup( name ); + if ( ent == null ) { + ent = new Entity( name, code ); + entityHashtable.put( name, ent ); + } else { + ent.code = code; + } + return ent; + } + + public Entity install( Entity ent ) + { + return (Entity)entityHashtable.put( ent.name, ent ); + } + + /* entity starting with "&" returns zero on error */ + public short entityCode( String name ) + { + int c; + + if (name.length() <= 1) + return 0; + + /* numeric entitity: name = "&#" followed by number */ + if ( name.charAt(1) == '#' ) { + c = 0; /* zero on missing/bad number */ + + /* 'x' prefix denotes hexadecimal number format */ + try { + if (name.length() >= 4 && name.charAt(2) == 'x') { + c = Integer.parseInt( name.substring(3), 16 ); + } else if (name.length() >= 3) { + c = Integer.parseInt( name.substring(2) ); + } + } + catch ( NumberFormatException e ) {} + + return (short)c; + } + + /* Named entity: name ="&" followed by a name */ + Entity ent = lookup( name.substring(1) ); + if ( ent != null ) { + return ent.code; + } + + return 0; /* zero signifies unknown entity name */ + } + + public String entityName( short code ) + { + String name = null; + Entity ent; + Enumeration en = entityHashtable.elements(); + while ( en.hasMoreElements() ) { + ent = (Entity)en.nextElement(); + if ( ent.code == code ) { + name = ent.name; + break; + } + } + return name; + } + + private Hashtable entityHashtable = new Hashtable(); + + private static EntityTable defaultEntityTable = null; + + private static Entity[] entities = { + + new Entity( "nbsp", 160 ), + new Entity( "iexcl", 161 ), + new Entity( "cent", 162 ), + new Entity( "pound", 163 ), + new Entity( "curren", 164 ), + new Entity( "yen", 165 ), + new Entity( "brvbar", 166 ), + new Entity( "sect", 167 ), + new Entity( "uml", 168 ), + new Entity( "copy", 169 ), + new Entity( "ordf", 170 ), + new Entity( "laquo", 171 ), + new Entity( "not", 172 ), + new Entity( "shy", 173 ), + new Entity( "reg", 174 ), + new Entity( "macr", 175 ), + new Entity( "deg", 176 ), + new Entity( "plusmn", 177 ), + new Entity( "sup2", 178 ), + new Entity( "sup3", 179 ), + new Entity( "acute", 180 ), + new Entity( "micro", 181 ), + new Entity( "para", 182 ), + new Entity( "middot", 183 ), + new Entity( "cedil", 184 ), + new Entity( "sup1", 185 ), + new Entity( "ordm", 186 ), + new Entity( "raquo", 187 ), + new Entity( "frac14", 188 ), + new Entity( "frac12", 189 ), + new Entity( "frac34", 190 ), + new Entity( "iquest", 191 ), + new Entity( "Agrave", 192 ), + new Entity( "Aacute", 193 ), + new Entity( "Acirc", 194 ), + new Entity( "Atilde", 195 ), + new Entity( "Auml", 196 ), + new Entity( "Aring", 197 ), + new Entity( "AElig", 198 ), + new Entity( "Ccedil", 199 ), + new Entity( "Egrave", 200 ), + new Entity( "Eacute", 201 ), + new Entity( "Ecirc", 202 ), + new Entity( "Euml", 203 ), + new Entity( "Igrave", 204 ), + new Entity( "Iacute", 205 ), + new Entity( "Icirc", 206 ), + new Entity( "Iuml", 207 ), + new Entity( "ETH", 208 ), + new Entity( "Ntilde", 209 ), + new Entity( "Ograve", 210 ), + new Entity( "Oacute", 211 ), + new Entity( "Ocirc", 212 ), + new Entity( "Otilde", 213 ), + new Entity( "Ouml", 214 ), + new Entity( "times", 215 ), + new Entity( "Oslash", 216 ), + new Entity( "Ugrave", 217 ), + new Entity( "Uacute", 218 ), + new Entity( "Ucirc", 219 ), + new Entity( "Uuml", 220 ), + new Entity( "Yacute", 221 ), + new Entity( "THORN", 222 ), + new Entity( "szlig", 223 ), + new Entity( "agrave", 224 ), + new Entity( "aacute", 225 ), + new Entity( "acirc", 226 ), + new Entity( "atilde", 227 ), + new Entity( "auml", 228 ), + new Entity( "aring", 229 ), + new Entity( "aelig", 230 ), + new Entity( "ccedil", 231 ), + new Entity( "egrave", 232 ), + new Entity( "eacute", 233 ), + new Entity( "ecirc", 234 ), + new Entity( "euml", 235 ), + new Entity( "igrave", 236 ), + new Entity( "iacute", 237 ), + new Entity( "icirc", 238 ), + new Entity( "iuml", 239 ), + new Entity( "eth", 240 ), + new Entity( "ntilde", 241 ), + new Entity( "ograve", 242 ), + new Entity( "oacute", 243 ), + new Entity( "ocirc", 244 ), + new Entity( "otilde", 245 ), + new Entity( "ouml", 246 ), + new Entity( "divide", 247 ), + new Entity( "oslash", 248 ), + new Entity( "ugrave", 249 ), + new Entity( "uacute", 250 ), + new Entity( "ucirc", 251 ), + new Entity( "uuml", 252 ), + new Entity( "yacute", 253 ), + new Entity( "thorn", 254 ), + new Entity( "yuml", 255 ), + new Entity( "fnof", 402 ), + new Entity( "Alpha", 913 ), + new Entity( "Beta", 914 ), + new Entity( "Gamma", 915 ), + new Entity( "Delta", 916 ), + new Entity( "Epsilon", 917 ), + new Entity( "Zeta", 918 ), + new Entity( "Eta", 919 ), + new Entity( "Theta", 920 ), + new Entity( "Iota", 921 ), + new Entity( "Kappa", 922 ), + new Entity( "Lambda", 923 ), + new Entity( "Mu", 924 ), + new Entity( "Nu", 925 ), + new Entity( "Xi", 926 ), + new Entity( "Omicron", 927 ), + new Entity( "Pi", 928 ), + new Entity( "Rho", 929 ), + new Entity( "Sigma", 931 ), + new Entity( "Tau", 932 ), + new Entity( "Upsilon", 933 ), + new Entity( "Phi", 934 ), + new Entity( "Chi", 935 ), + new Entity( "Psi", 936 ), + new Entity( "Omega", 937 ), + new Entity( "alpha", 945 ), + new Entity( "beta", 946 ), + new Entity( "gamma", 947 ), + new Entity( "delta", 948 ), + new Entity( "epsilon", 949 ), + new Entity( "zeta", 950 ), + new Entity( "eta", 951 ), + new Entity( "theta", 952 ), + new Entity( "iota", 953 ), + new Entity( "kappa", 954 ), + new Entity( "lambda", 955 ), + new Entity( "mu", 956 ), + new Entity( "nu", 957 ), + new Entity( "xi", 958 ), + new Entity( "omicron", 959 ), + new Entity( "pi", 960 ), + new Entity( "rho", 961 ), + new Entity( "sigmaf", 962 ), + new Entity( "sigma", 963 ), + new Entity( "tau", 964 ), + new Entity( "upsilon", 965 ), + new Entity( "phi", 966 ), + new Entity( "chi", 967 ), + new Entity( "psi", 968 ), + new Entity( "omega", 969 ), + new Entity( "thetasym", 977 ), + new Entity( "upsih", 978 ), + new Entity( "piv", 982 ), + new Entity( "bull", 8226 ), + new Entity( "hellip", 8230 ), + new Entity( "prime", 8242 ), + new Entity( "Prime", 8243 ), + new Entity( "oline", 8254 ), + new Entity( "frasl", 8260 ), + new Entity( "weierp", 8472 ), + new Entity( "image", 8465 ), + new Entity( "real", 8476 ), + new Entity( "trade", 8482 ), + new Entity( "alefsym", 8501 ), + new Entity( "larr", 8592 ), + new Entity( "uarr", 8593 ), + new Entity( "rarr", 8594 ), + new Entity( "darr", 8595 ), + new Entity( "harr", 8596 ), + new Entity( "crarr", 8629 ), + new Entity( "lArr", 8656 ), + new Entity( "uArr", 8657 ), + new Entity( "rArr", 8658 ), + new Entity( "dArr", 8659 ), + new Entity( "hArr", 8660 ), + new Entity( "forall", 8704 ), + new Entity( "part", 8706 ), + new Entity( "exist", 8707 ), + new Entity( "empty", 8709 ), + new Entity( "nabla", 8711 ), + new Entity( "isin", 8712 ), + new Entity( "notin", 8713 ), + new Entity( "ni", 8715 ), + new Entity( "prod", 8719 ), + new Entity( "sum", 8721 ), + new Entity( "minus", 8722 ), + new Entity( "lowast", 8727 ), + new Entity( "radic", 8730 ), + new Entity( "prop", 8733 ), + new Entity( "infin", 8734 ), + new Entity( "ang", 8736 ), + new Entity( "and", 8743 ), + new Entity( "or", 8744 ), + new Entity( "cap", 8745 ), + new Entity( "cup", 8746 ), + new Entity( "int", 8747 ), + new Entity( "there4", 8756 ), + new Entity( "sim", 8764 ), + new Entity( "cong", 8773 ), + new Entity( "asymp", 8776 ), + new Entity( "ne", 8800 ), + new Entity( "equiv", 8801 ), + new Entity( "le", 8804 ), + new Entity( "ge", 8805 ), + new Entity( "sub", 8834 ), + new Entity( "sup", 8835 ), + new Entity( "nsub", 8836 ), + new Entity( "sube", 8838 ), + new Entity( "supe", 8839 ), + new Entity( "oplus", 8853 ), + new Entity( "otimes", 8855 ), + new Entity( "perp", 8869 ), + new Entity( "sdot", 8901 ), + new Entity( "lceil", 8968 ), + new Entity( "rceil", 8969 ), + new Entity( "lfloor", 8970 ), + new Entity( "rfloor", 8971 ), + new Entity( "lang", 9001 ), + new Entity( "rang", 9002 ), + new Entity( "loz", 9674 ), + new Entity( "spades", 9824 ), + new Entity( "clubs", 9827 ), + new Entity( "hearts", 9829 ), + new Entity( "diams", 9830 ), + new Entity( "quot", 34 ), + new Entity( "amp", 38 ), + new Entity( "lt", 60 ), + new Entity( "gt", 62 ), + new Entity( "OElig", 338 ), + new Entity( "oelig", 339 ), + new Entity( "Scaron", 352 ), + new Entity( "scaron", 353 ), + new Entity( "Yuml", 376 ), + new Entity( "circ", 710 ), + new Entity( "tilde", 732 ), + new Entity( "ensp", 8194 ), + new Entity( "emsp", 8195 ), + new Entity( "thinsp", 8201 ), + new Entity( "zwnj", 8204 ), + new Entity( "zwj", 8205 ), + new Entity( "lrm", 8206 ), + new Entity( "rlm", 8207 ), + new Entity( "ndash", 8211 ), + new Entity( "mdash", 8212 ), + new Entity( "lsquo", 8216 ), + new Entity( "rsquo", 8217 ), + new Entity( "sbquo", 8218 ), + new Entity( "ldquo", 8220 ), + new Entity( "rdquo", 8221 ), + new Entity( "bdquo", 8222 ), + new Entity( "dagger", 8224 ), + new Entity( "Dagger", 8225 ), + new Entity( "permil", 8240 ), + new Entity( "lsaquo", 8249 ), + new Entity( "rsaquo", 8250 ), + new Entity( "euro", 8364 ) + + }; + + public static EntityTable getDefaultEntityTable() + { + if ( defaultEntityTable == null ) { + defaultEntityTable = new EntityTable(); + for ( int i = 0; i < entities.length; i++ ) { + defaultEntityTable.install( entities[i] ); + } + } + return defaultEntityTable; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java new file mode 100644 index 0000000..8561a43 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java @@ -0,0 +1,65 @@ +/* + * @(#)IStack.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Inline stack node + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class IStack { + + /* + Mosaic handles inlines via a separate stack from other elements + We duplicate this to recover from inline markup errors such as: + + italic text +

    more italic text normal text + + which for compatibility with Mosaic is mapped to: + + italic text +

    more italic text normal text + + Note that any inline end tag pop's the effect of the current + inline start tag, so that pop's in the above example. + */ + + public IStack next; + public Dict tag; /* tag's dictionary definition */ + public String element; /* name (null for text nodes) */ + public AttVal attributes; + + public IStack() + { + next = null; + tag = null; + element = null; + attributes = null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Lexer.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Lexer.java new file mode 100644 index 0000000..1ed3db6 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Lexer.java @@ -0,0 +1,3134 @@ +/* + * @(#)Lexer.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Lexer for html parser + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/* + Given a file stream fp it returns a sequence of tokens. + + GetToken(fp) gets the next token + UngetToken(fp) provides one level undo + + The tags include an attribute list: + + - linked list of attribute/value nodes + - each node has 2 null-terminated strings. + - entities are replaced in attribute values + + white space is compacted if not in preformatted mode + If not in preformatted mode then leading white space + is discarded and subsequent white space sequences + compacted to single space chars. + + If XmlTags is no then Tag names are folded to upper + case and attribute names to lower case. + + Not yet done: + - Doctype subset and marked sections +*/ + +import java.io.PrintWriter; +import java.util.Stack; +import java.util.Vector; + +import org.eclipse.core.resources.IFile; +import sun.security.krb5.internal.av; + +public class Lexer { + + private IFile iFile; + public StreamIn in; /* file stream */ + public PrintWriter errout; /* error output stream */ + public short badAccess; /* for accessibility errors */ + public short badLayout; /* for bad style errors */ + public short badChars; /* for bad char encodings */ + public short badForm; /* for mismatched/mispositioned form tags */ + public short warnings; /* count of warnings in this document */ + public short errors; /* count of errors */ + public int lines; /* lines seen */ + public int columns; /* at start of current token */ + public boolean waswhite; /* used to collapse contiguous white space */ + public boolean pushed; /* true after token has been pushed back */ + public boolean insertspace; /* when space is moved after end tag */ + public boolean excludeBlocks; /* Netscape compatibility */ + public boolean exiled; /* true if moved out of table */ + public boolean isvoyager; /* true if xmlns attribute on html element */ + public short versions; /* bit vector of HTML versions */ + public int doctype; /* version as given by doctype (if any) */ + public boolean badDoctype; /* e.g. if html or PUBLIC is missing */ + public int txtstart; /* start of current node */ + public int txtend; /* end of current node */ + public short state; /* state of lexer's finite state machine */ + public Node token; + + /* + lexer character buffer + + parse tree nodes span onto this buffer + which contains the concatenated text + contents of all of the elements. + + lexsize must be reset for each file. + */ + public byte[] lexbuf; /* byte buffer of UTF-8 chars */ + public int lexlength; /* allocated */ + public int lexsize; /* used */ + + /* Inline stack for compatibility with Mosaic */ + public Node inode; /* for deferring text node */ + public int insert; /* for inferring inline tags */ + public Stack istack; + public int istackbase; /* start of frame */ + + public Style styles; /* used for cleaning up presentation markup */ + + public Configuration configuration; + protected int seenBodyEndTag; /* used by parser */ + private Vector nodeList; + + public Lexer(IFile iFile, StreamIn in, Configuration configuration) + { + this.iFile = iFile; + this.in = in; + this.lines = 1; + this.columns = 1; + this.state = LEX_CONTENT; + this.badAccess = 0; + this.badLayout = 0; + this.badChars = 0; + this.badForm = 0; + this.warnings = 0; + this.errors = 0; + this.waswhite = false; + this.pushed = false; + this.insertspace = false; + this.exiled = false; + this.isvoyager = false; + this.versions = Dict.VERS_EVERYTHING; + this.doctype = Dict.VERS_UNKNOWN; + this.badDoctype = false; + this.txtstart = 0; + this.txtend = 0; + this.token = null; + this.lexbuf = null; + this.lexlength = 0; + this.lexsize = 0; + this.inode = null; + this.insert = -1; + this.istack = new Stack(); + this.istackbase = 0; + this.styles = null; + this.configuration = configuration; + this.seenBodyEndTag = 0; + this.nodeList = new Vector(); + } + + public IFile getIFile() { + return iFile; + } + + public Node newNode() + { + Node node = new Node(); + nodeList.addElement(node); + return node; + } + + public Node newNode(short type, byte[] textarray, int start, int end) + { + Node node = new Node(type, textarray, start, end); + nodeList.addElement(node); + return node; + } + + public Node newNode(short type, byte[] textarray, int start, int end, String element) + { + Node node = new Node(type, textarray, start, end, element, configuration.tt); + nodeList.addElement(node); + return node; + } + + public Node cloneNode(Node node) + { + Node cnode = (Node)node.clone(); + nodeList.addElement(cnode); + for (AttVal att = cnode.attributes; att != null; att = att.next) { + if (att.asp != null) + nodeList.addElement(att.asp); + if (att.php != null) + nodeList.addElement(att.php); + } + return cnode; + } + + public AttVal cloneAttributes(AttVal attrs) + { + AttVal cattrs = (AttVal)attrs.clone(); + for (AttVal att = cattrs; att != null; att = att.next) { + if (att.asp != null) + nodeList.addElement(att.asp); + if (att.php != null) + nodeList.addElement(att.php); + } + return cattrs; + } + + protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray) + { + Node node; + for (int i = 0; i < nodeList.size(); i++) { + node = (Node)(nodeList.elementAt(i)); + if (node.textarray == oldtextarray) + node.textarray = newtextarray; + } + } + + /* used for creating preformatted text from Word2000 */ + public Node newLineNode() + { + Node node = newNode(); + + node.textarray = this.lexbuf; + node.start = this.lexsize; + addCharToLexer((int)'\n'); + node.end = this.lexsize; + return node; + } + + // Should always be able convert to/from UTF-8, so encoding exceptions are + // converted to an Error to avoid adding throws declarations in + // lots of methods. + + public static byte[] getBytes(String str) { + try { + return str.getBytes("UTF8"); + } catch (java.io.UnsupportedEncodingException e) { + throw new Error("string to UTF-8 conversion failed: " + e.getMessage()); + } + } + + public static String getString(byte[] bytes, int offset, int length) { + try { + return new String(bytes, offset, length, "UTF8"); + } catch (java.io.UnsupportedEncodingException e) { + throw new Error("UTF-8 to string conversion failed: " + e.getMessage()); + } + } + + public boolean endOfInput() + { + return this.in.isEndOfStream(); + } + + public void addByte(int c) + { + if (this.lexsize + 1 >= this.lexlength) + { + while (this.lexsize + 1 >= this.lexlength) + { + if (this.lexlength == 0) + this.lexlength = 8192; + else + this.lexlength = this.lexlength * 2; + } + + byte[] temp = this.lexbuf; + this.lexbuf = new byte[ this.lexlength ]; + if (temp != null) + { + System.arraycopy( temp, 0, this.lexbuf, 0, temp.length ); + updateNodeTextArrays(temp, this.lexbuf); + } + } + + this.lexbuf[this.lexsize++] = (byte)c; + this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */ + } + + public void changeChar(byte c) + { + if (this.lexsize > 0) + { + this.lexbuf[this.lexsize-1] = c; + } + } + + /* store char c as UTF-8 encoded byte stream */ + public void addCharToLexer(int c) + { + if (c < 128) + addByte(c); + else if (c <= 0x7FF) + { + addByte(0xC0 | (c >> 6)); + addByte(0x80 | (c & 0x3F)); + } + else if (c <= 0xFFFF) + { + addByte(0xE0 | (c >> 12)); + addByte(0x80 | ((c >> 6) & 0x3F)); + addByte(0x80 | (c & 0x3F)); + } + else if (c <= 0x1FFFFF) + { + addByte(0xF0 | (c >> 18)); + addByte(0x80 | ((c >> 12) & 0x3F)); + addByte(0x80 | ((c >> 6) & 0x3F)); + addByte(0x80 | (c & 0x3F)); + } + else + { + addByte(0xF8 | (c >> 24)); + addByte(0x80 | ((c >> 18) & 0x3F)); + addByte(0x80 | ((c >> 12) & 0x3F)); + addByte(0x80 | ((c >> 6) & 0x3F)); + addByte(0x80 | (c & 0x3F)); + } + } + + public void addStringToLexer(String str) + { + for ( int i = 0; i < str.length(); i++ ) { + addCharToLexer( (int)str.charAt(i) ); + } + } + + /* + No longer attempts to insert missing ';' for unknown + enitities unless one was present already, since this + gives unexpected results. + + For example: + was tidied to: + rather than: + + My thanks for Maurice Buxton for spotting this. + */ + public void parseEntity(short mode) + { + short map; + int start; + boolean first = true; + boolean semicolon = false; + boolean numeric = false; + int c, ch, startcol; + String str; + + start = this.lexsize - 1; /* to start at "&" */ + startcol = this.in.curcol - 1; + + while (true) + { + c = this.in.readChar(); + if (c == StreamIn.EndOfStream) break; + if (c == ';') + { + semicolon = true; + break; + } + + if (first && c == '#') + { + addCharToLexer(c); + first = false; + numeric = true; + continue; + } + + first = false; + map = MAP((char)c); + + /* AQ: Added flag for numeric entities so that numeric entities + with missing semi-colons are recognized. + Eg. "rep..." is recognized as "rep" + */ + if (numeric && ((c == 'x') || ((map & DIGIT) != 0))) + { + addCharToLexer(c); + continue; + } + if (!numeric && ((map & NAMECHAR) != 0)) + { + addCharToLexer(c); + continue; + } + + /* otherwise put it back */ + + this.in.ungetChar(c); + break; + } + + str = getString( this.lexbuf, start, this.lexsize - start ); + ch = EntityTable.getDefaultEntityTable().entityCode( str ); + + /* deal with unrecognized entities */ + if (ch <= 0) + { + /* set error position just before offending chararcter */ + this.lines = this.in.curline; + this.columns = startcol; + + if (this.lexsize > start +1 ) + { + Report.entityError(this, Report.UNKNOWN_ENTITY, str, ch); + + if (semicolon) + addCharToLexer(';'); + } + else /* naked & */ + { + Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch); + } + } + else + { + if (c != ';') /* issue warning if not terminated by ';' */ + { + /* set error position just before offending chararcter */ + this.lines = this.in.curline; + this.columns = startcol; + Report.entityError(this, Report.MISSING_SEMICOLON, str, c); + } + + this.lexsize = start; + + if (ch == 160 && (mode & Preformatted) != 0) + ch = ' '; + + addCharToLexer(ch); + + if (ch == '&' && !this.configuration.QuoteAmpersand) + { + addCharToLexer('a'); + addCharToLexer('m'); + addCharToLexer('p'); + addCharToLexer(';'); + } + } + } + + public char parseTagName() + { + short map; + int c; + + /* fold case of first char in buffer */ + + c = this.lexbuf[this.txtstart]; + map = MAP((char)c); + + if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) + { + c += (int)((int)'a' - (int)'A'); + this.lexbuf[this.txtstart] = (byte)c; + } + + while (true) + { + c = this.in.readChar(); + if (c == StreamIn.EndOfStream) break; + map = MAP((char)c); + + if ((map & NAMECHAR) == 0) + break; + + /* fold case of subsequent chars */ + + if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) + c += (int)((int)'a' - (int)'A'); + + addCharToLexer(c); + } + + this.txtend = this.lexsize; + return (char)c; + } + + public void addStringLiteral(String str) + { + for ( int i = 0; i < str.length(); i++ ) { + addCharToLexer( (int)str.charAt(i) ); + } + } + + /* choose what version to use for new doctype */ + public short HTMLVersion() + { + short versions; + + versions = this.versions; + + if ((versions & Dict.VERS_HTML20) != 0) + return Dict.VERS_HTML20; + + if ((versions & Dict.VERS_HTML32) != 0) + return Dict.VERS_HTML32; + + if ((versions & Dict.VERS_HTML40_STRICT) != 0) + return Dict.VERS_HTML40_STRICT; + + if ((versions & Dict.VERS_HTML40_LOOSE) != 0) + return Dict.VERS_HTML40_LOOSE; + + if ((versions & Dict.VERS_FRAMES) != 0) + return Dict.VERS_FRAMES; + + return Dict.VERS_UNKNOWN; + } + + public String HTMLVersionName() + { + short guessed; + int j; + + guessed = apparentVersion(); + + for (j = 0; j < W3CVersion.length; ++j) + { + if (guessed == W3CVersion[j].code) + { + if (this.isvoyager) + return W3CVersion[j].voyagerName; + + return W3CVersion[j].name; + } + } + + return null; + } + + /* add meta element for Tidy */ + public boolean addGenerator(Node root) + { + AttVal attval; + Node node; + Node head = root.findHEAD(configuration.tt); + + if (head != null) + { + for (node = head.content; node != null; node = node.next) + { + if (node.tag == configuration.tt.tagMeta) + { + attval = node.getAttrByName("name"); + + if (attval != null && attval.value != null && + Lexer.wstrcasecmp(attval.value, "generator") == 0) + { + attval = node.getAttrByName("content"); + + if (attval != null && attval.value != null && + attval.value.length() >= 9 && + Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0) + { + return false; + } + } + } + } + + node = this.inferredTag("meta"); + node.addAttribute("content", "HTML Tidy, see www.w3.org"); + node.addAttribute("name", "generator"); + Node.insertNodeAtStart(head, node); + return true; + } + + return false; + } + + /* return true if substring s is in p and isn't all in upper case */ + /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */ + /* len is how many chars to check in p */ + private static boolean findBadSubString(String s, String p, int len) + { + int n = s.length(); + int i = 0; + String ps; + + while (n < len) + { + ps = p.substring(i, i + n); + if (wstrcasecmp(s, ps) == 0) + return (!ps.equals(s.substring(0, n))); + + ++i; + --len; + } + + return false; + } + + public boolean checkDocTypeKeyWords(Node doctype) + { + int len = doctype.end - doctype.start; + String s = getString(this.lexbuf, doctype.start, len); + + return !( + findBadSubString("SYSTEM", s, len) || + findBadSubString("PUBLIC", s, len) || + findBadSubString("//DTD", s, len) || + findBadSubString("//W3C", s, len) || + findBadSubString("//EN", s, len) + ); + } + + /* examine to identify version */ + public short findGivenVersion(Node doctype) + { + String p, s; + int i, j; + int len; + String str1; + String str2; + + /* if root tag for doctype isn't html give up now */ + str1 = getString(this.lexbuf, doctype.start, 5); + if (wstrcasecmp(str1, "html ") != 0) + return 0; + + if (!checkDocTypeKeyWords(doctype)) + Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE); + + /* give up if all we are given is the system id for the doctype */ + str1 = getString(this.lexbuf, doctype.start + 5, 7); + if (wstrcasecmp(str1, "SYSTEM ") == 0) + { + /* but at least ensure the case is correct */ + if (!str1.substring(0, 6).equals("SYSTEM")) + System.arraycopy( getBytes("SYSTEM"), 0, + this.lexbuf, doctype.start + 5, 6 ); + return 0; /* unrecognized */ + } + + if (wstrcasecmp(str1, "PUBLIC ") == 0) + { + if (!str1.substring(0, 6).equals("PUBLIC")) + System.arraycopy( getBytes("PUBLIC "), 0, + this.lexbuf, doctype.start + 5, 6 ); + } + else + this.badDoctype = true; + + for (i = doctype.start; i < doctype.end; ++i) + { + if (this.lexbuf[i] == (byte)'"') + { + str1 = getString( this.lexbuf, i + 1, 12 ); + str2 = getString( this.lexbuf, i + 1, 13 ); + if (str1.equals("-//W3C//DTD ")) + { + /* compute length of identifier e.g. "HTML 4.0 Transitional" */ + for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j); + len = j - i - 13; + p = getString( this.lexbuf, i + 13, len ); + + for (j = 1; j < W3CVersion.length; ++j) + { + s = W3CVersion[j].name; + if (len == s.length() && s.equals(p)) + return W3CVersion[j].code; + } + + /* else unrecognized version */ + } + else if (str2.equals("-//IETF//DTD ")) + { + /* compute length of identifier e.g. "HTML 2.0" */ + for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j); + len = j - i - 14; + + p = getString( this.lexbuf, i + 14, len ); + s = W3CVersion[0].name; + if (len == s.length() && s.equals(p)) + return W3CVersion[0].code; + + /* else unrecognized version */ + } + break; + } + } + + return 0; + } + + public void fixHTMLNameSpace(Node root, String profile) + { + Node node; + AttVal prev, attr; + + for (node = root.content; + node != null && node.tag != configuration.tt.tagHtml; node = node.next); + + if (node != null) + { + prev = null; + + for (attr = node.attributes; attr != null; attr = attr.next) + { + if (attr.attribute.equals("xmlns")) + break; + + prev = attr; + } + + if (attr != null) + { + if (!attr.value.equals(profile)) + { + Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE); + attr.value = profile; + } + } + else + { + attr = new AttVal( node.attributes, null, (int)'"', + "xmlns", profile ); + attr.dict = + AttributeTable.getDefaultAttributeTable().findAttribute( attr ); + node.attributes = attr; + } + } + } + + public boolean setXHTMLDocType(Node root) + { + String fpi = " "; + String sysid = ""; + String namespace = XHTML_NAMESPACE; + Node doctype; + + doctype = root.findDocType(); + + if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) + { + if (doctype != null) + Node.discardElement(doctype); + return true; + } + + if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) + { + /* see what flavor of XHTML this document matches */ + if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) + { /* use XHTML strict */ + fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; + sysid = voyager_strict; + } + else if ((this.versions & Dict.VERS_LOOSE) != 0) + { + fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; + sysid = voyager_loose; + } + else if ((this.versions & Dict.VERS_FRAMES) != 0) + { /* use XHTML frames */ + fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN"; + sysid = voyager_frameset; + } + else /* lets assume XHTML transitional */ + { + fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; + sysid = voyager_loose; + } + } + else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) + { + fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; + sysid = voyager_strict; + } + else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) + { + fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; + sysid = voyager_loose; + } + + fixHTMLNameSpace(root, namespace); + + if (doctype == null) + { + doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0); + doctype.next = root.content; + doctype.parent = root; + doctype.prev = null; + root.content = doctype; + } + + if (configuration.docTypeMode == Configuration.DOCTYPE_USER && + configuration.docTypeStr != null) + { + fpi = configuration.docTypeStr; + sysid = ""; + } + + this.txtstart = this.lexsize; + this.txtend = this.lexsize; + + /* add public identifier */ + addStringLiteral("html PUBLIC "); + + /* check if the fpi is quoted or not */ + if (fpi.charAt(0) == '"') + addStringLiteral(fpi); + else + { + addStringLiteral("\""); + addStringLiteral(fpi); + addStringLiteral("\""); + } + + if (sysid.length() + 6 >= this.configuration.wraplen) + addStringLiteral("\n\""); + else + addStringLiteral("\n \""); + + /* add system identifier */ + addStringLiteral(sysid); + addStringLiteral("\""); + + this.txtend = this.lexsize; + + doctype.start = this.txtstart; + doctype.end = this.txtend; + + return false; + } + + public short apparentVersion() + { + switch (this.doctype) + { + case Dict.VERS_UNKNOWN: + return HTMLVersion(); + + case Dict.VERS_HTML20: + if ((this.versions & Dict.VERS_HTML20) != 0) + return Dict.VERS_HTML20; + + break; + + case Dict.VERS_HTML32: + if ((this.versions & Dict.VERS_HTML32) != 0) + return Dict.VERS_HTML32; + + break; /* to replace old version by new */ + + case Dict.VERS_HTML40_STRICT: + if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) + return Dict.VERS_HTML40_STRICT; + + break; + + case Dict.VERS_HTML40_LOOSE: + if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0) + return Dict.VERS_HTML40_LOOSE; + + break; /* to replace old version by new */ + + case Dict.VERS_FRAMES: + if ((this.versions & Dict.VERS_FRAMES) != 0) + return Dict.VERS_FRAMES; + + break; + } + + Report.warning(this, null, null, Report.INCONSISTENT_VERSION); + return this.HTMLVersion(); + } + + /* fixup doctype if missing */ + public boolean fixDocType(Node root) + { + Node doctype; + int guessed = Dict.VERS_HTML40_STRICT, i; + + if (this.badDoctype) + Report.warning(this, null, null, Report.MALFORMED_DOCTYPE); + + if (configuration.XmlOut) + return true; + + doctype = root.findDocType(); + + if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) + { + if (doctype != null) + Node.discardElement(doctype); + return true; + } + + if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) + { + Node.discardElement(doctype); + doctype = null; + guessed = Dict.VERS_HTML40_STRICT; + } + else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) + { + Node.discardElement(doctype); + doctype = null; + guessed = Dict.VERS_HTML40_LOOSE; + } + else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) + { + if (doctype != null) + { + if (this.doctype == Dict.VERS_UNKNOWN) + return false; + + switch (this.doctype) + { + case Dict.VERS_UNKNOWN: + return false; + + case Dict.VERS_HTML20: + if ((this.versions & Dict.VERS_HTML20) != 0) + return true; + + break; /* to replace old version by new */ + + case Dict.VERS_HTML32: + if ((this.versions & Dict.VERS_HTML32) != 0) + return true; + + break; /* to replace old version by new */ + + case Dict.VERS_HTML40_STRICT: + if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) + return true; + + break; /* to replace old version by new */ + + case Dict.VERS_HTML40_LOOSE: + if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0) + return true; + + break; /* to replace old version by new */ + + case Dict.VERS_FRAMES: + if ((this.versions & Dict.VERS_FRAMES) != 0) + return true; + + break; /* to replace old version by new */ + } + + /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */ + } + + /* choose new doctype */ + guessed = HTMLVersion(); + } + + if (guessed == Dict.VERS_UNKNOWN) + return false; + + /* for XML use the Voyager system identifier */ + if (this.configuration.XmlOut || this.configuration.XmlTags || this.isvoyager) + { + if (doctype != null) + Node.discardElement(doctype); + + for (i = 0; i < W3CVersion.length; ++i) + { + if (guessed == W3CVersion[i].code) + { + fixHTMLNameSpace(root, W3CVersion[i].profile); + break; + } + } + + return true; + } + + if (doctype == null) + { + doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0); + doctype.next = root.content; + doctype.parent = root; + doctype.prev = null; + root.content = doctype; + } + + this.txtstart = this.lexsize; + this.txtend = this.lexsize; + + /* use the appropriate public identifier */ + addStringLiteral("html PUBLIC "); + + if (configuration.docTypeMode == Configuration.DOCTYPE_USER && + configuration.docTypeStr != null) + addStringLiteral(configuration.docTypeStr); + else if (guessed == Dict.VERS_HTML20) + addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\""); + else + { + addStringLiteral("\"-//W3C//DTD "); + + for (i = 0; i < W3CVersion.length; ++i) + { + if (guessed == W3CVersion[i].code) + { + addStringLiteral(W3CVersion[i].name); + break; + } + } + + addStringLiteral("//EN\""); + } + + this.txtend = this.lexsize; + + doctype.start = this.txtstart; + doctype.end = this.txtend; + + return true; + } + + /* ensure XML document starts with */ + public boolean fixXMLPI(Node root) + { + Node xml; + int s; + + if( root.content != null && root.content.type == Node.ProcInsTag) + { + s = root.content.start; + + if (this.lexbuf[s] == (byte)'x' && + this.lexbuf[s+1] == (byte)'m' && + this.lexbuf[s+2] == (byte)'l') + return true; + } + + xml = newNode(Node.ProcInsTag, this.lexbuf, 0, 0); + xml.next = root.content; + + if (root.content != null) + { + root.content.prev = xml; + xml.next = root.content; + } + + root.content = xml; + + this.txtstart = this.lexsize; + this.txtend = this.lexsize; + addStringLiteral("xml version=\"1.0\""); + if (this.configuration.CharEncoding == Configuration.LATIN1) + addStringLiteral(" encoding=\"ISO-8859-1\""); + this.txtend = this.lexsize; + + xml.start = this.txtstart; + xml.end = this.txtend; + return false; + } + + public Node inferredTag(String name) + { + Node node; + + node = newNode(Node.StartTag, + this.lexbuf, + this.txtstart, + this.txtend, + name); + node.implicit = true; + return node; + } + + public static boolean expectsContent(Node node) + { + if (node.type != Node.StartTag) + return false; + + /* unknown element? */ + if (node.tag == null) + return true; + + if ((node.tag.model & Dict.CM_EMPTY) != 0) + return false; + + return true; + } + + /* + create a text node for the contents of + a CDATA element like style or script + which ends with for some foo. + */ + public Node getCDATA(Node container) + { + int c, lastc, start, len, i; + String str; + boolean endtag = false; + + this.lines = this.in.curline; + this.columns = this.in.curcol; + this.waswhite = false; + this.txtstart = this.lexsize; + this.txtend = this.lexsize; + + lastc = (int)'\0'; + start = -1; + + while (true) + { + c = this.in.readChar(); + if (c == StreamIn.EndOfStream) break; + /* treat \r\n as \n and \r as \n */ + + if (c == (int)'/' && lastc == (int)'<') + { + if (endtag) + { + this.lines = this.in.curline; + this.columns = this.in.curcol - 3; + + Report.warning(this, null, null, Report.BAD_CDATA_CONTENT); + } + + start = this.lexsize + 1; /* to first letter */ + endtag = true; + } + else if (c == (int)'>' && start >= 0) + { + len = this.lexsize - start; + if (len == container.element.length()) + { + str = getString( this.lexbuf, start, len ); + if (Lexer.wstrcasecmp(str, container.element) == 0) + { + this.txtend = start - 2; + break; + } + } + + this.lines = this.in.curline; + this.columns = this.in.curcol - 3; + + Report.warning(this, null, null, Report.BAD_CDATA_CONTENT); + + /* if javascript insert backslash before / */ + + if (ParserImpl.isJavaScript(container)) + { + for (i = this.lexsize; i > start-1; --i) + this.lexbuf[i] = this.lexbuf[i-1]; + + this.lexbuf[start-1] = (byte)'\\'; + this.lexsize++; + } + + start = -1; + } + else if (c == (int)'\r') + { + c = this.in.readChar(); + + if (c != (int)'\n') + this.in.ungetChar(c); + + c = (int)'\n'; + } + + addCharToLexer((int)c); + this.txtend = this.lexsize; + lastc = c; + } + + if (c == StreamIn.EndOfStream) + Report.warning(this, container, null, Report.MISSING_ENDTAG_FOR); + + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + return null; + } + + public void ungetToken() + { + this.pushed = true; + } + + public static final short IgnoreWhitespace = 0; + public static final short MixedContent = 1; + public static final short Preformatted = 2; + public static final short IgnoreMarkup = 3; + + /* + modes for GetToken() + + MixedContent -- for elements which don't accept PCDATA + Preformatted -- white space preserved as is + IgnoreMarkup -- for CDATA elements such as script, style + */ + + public Node getToken(short mode) + { + short map; + int c = 0; + int lastc; + int badcomment = 0; + MutableBoolean isempty = new MutableBoolean(); + AttVal attributes; + + if (this.pushed) + { + /* duplicate inlines in preference to pushed text nodes when appropriate */ + if (this.token.type != Node.TextNode || + (this.insert == -1 && this.inode == null)) + { + this.pushed = false; + return this.token; + } + } + + /* at start of block elements, unclosed inline + elements are inserted into the token stream */ + + if (this.insert != -1 || this.inode != null) + return insertedToken(); + + this.lines = this.in.curline; + this.columns = this.in.curcol; + this.waswhite = false; + + this.txtstart = this.lexsize; + this.txtend = this.lexsize; + + while (true) + { + c = this.in.readChar(); + if (c == StreamIn.EndOfStream) break; + if (this.insertspace && mode != IgnoreWhitespace) + { + addCharToLexer(' '); + this.waswhite = true; + this.insertspace = false; + } + + /* treat \r\n as \n and \r as \n */ + + if (c == '\r') + { + c = this.in.readChar(); + + if (c != '\n') + this.in.ungetChar(c); + + c = '\n'; + } + + addCharToLexer(c); + + switch (this.state) + { + case LEX_CONTENT: /* element content */ + map = MAP((char)c); + + /* + Discard white space if appropriate. Its cheaper + to do this here rather than in parser methods + for elements that don't have mixed content. + */ + if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) + && this.lexsize == this.txtstart + 1) + { + --this.lexsize; + this.waswhite = false; + this.lines = this.in.curline; + this.columns = this.in.curcol; + continue; + } + + if (c == '<') + { + this.state = LEX_GT; + continue; + } + + if ((map & WHITE) != 0) + { + /* was previous char white? */ + if (this.waswhite) + { + if (mode != Preformatted && mode != IgnoreMarkup) + { + --this.lexsize; + this.lines = this.in.curline; + this.columns = this.in.curcol; + } + } + else /* prev char wasn't white */ + { + this.waswhite = true; + lastc = c; + + if (mode != Preformatted && mode != IgnoreMarkup && c != ' ') + changeChar((byte)' '); + } + + continue; + } + else if (c == '&' && mode != IgnoreMarkup) + parseEntity(mode); + + /* this is needed to avoid trimming trailing whitespace */ + if (mode == IgnoreWhitespace) + mode = MixedContent; + + this.waswhite = false; + continue; + + case LEX_GT: /* < */ + + /* check for endtag */ + if (c == '/') + { + c = this.in.readChar(); + if (c == StreamIn.EndOfStream) + { + this.in.ungetChar(c); + continue; + } + + addCharToLexer(c); + map = MAP((char)c); + + if ((map & LETTER) != 0) + { + this.lexsize -= 3; + this.txtend = this.lexsize; + this.in.ungetChar(c); + this.state = LEX_ENDTAG; + this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */ + this.in.curcol -= 2; + + /* if some text before the this.txtstart) + { + /* trim space char before end tag */ + if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ') + { + this.lexsize -= 1; + this.txtend = this.lexsize; + } + + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + continue; /* no text so keep going */ + } + + /* otherwise treat as CDATA */ + this.waswhite = false; + this.state = LEX_CONTENT; + continue; + } + + if (mode == IgnoreMarkup) + { + /* otherwise treat as CDATA */ + this.waswhite = false; + this.state = LEX_CONTENT; + continue; + } + + /* + look out for comments, doctype or marked sections + this isn't quite right, but its getting there ... + */ + if (c == '!') + { + c = this.in.readChar(); + + if (c == '-') + { + c = this.in.readChar(); + + if (c == '-') + { + this.state = LEX_COMMENT; /* comment */ + this.lexsize -= 2; + this.txtend = this.lexsize; + + /* if some text before < return it now */ + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + this.txtstart = this.lexsize; + continue; + } + + Report.warning(this, null, null, Report.MALFORMED_COMMENT); + } + else if (c == 'd' || c == 'D') + { + this.state = LEX_DOCTYPE; /* doctype */ + this.lexsize -= 2; + this.txtend = this.lexsize; + mode = IgnoreWhitespace; + + /* skip until white space or '>' */ + + for (;;) + { + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream || c == '>') + { + this.in.ungetChar(c); + break; + } + + map = MAP((char)c); + + if ((map & WHITE) == 0) + continue; + + /* and skip to end of whitespace */ + + for (;;) + { + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream || c == '>') + { + this.in.ungetChar(c); + break; + } + + map = MAP((char)c); + + if ((map & WHITE) != 0) + continue; + + this.in.ungetChar(c); + break; + } + + break; + } + + /* if some text before < return it now */ + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + this.txtstart = this.lexsize; + continue; + } + else if (c == '[') + { + /* Word 2000 embeds ... sequences */ + this.lexsize -= 2; + this.state = LEX_SECTION; + this.txtend = this.lexsize; + + /* if some text before < return it now */ + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + this.txtstart = this.lexsize; + continue; + } + + /* otherwise swallow chars up to and including next '>' */ + while (true) + { + c = this.in.readChar(); + if (c == '>') break; + if (c == -1) + { + this.in.ungetChar(c); + break; + } + } + + this.lexsize -= 2; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + continue; + } + + /* + processing instructions + */ + + if (c == '?') + { + this.lexsize -= 2; + this.state = LEX_PROCINSTR; + this.txtend = this.lexsize; + + /* if some text before < return it now */ + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + this.txtstart = this.lexsize; + continue; + } + + /* Microsoft ASP's e.g. <% ... server-code ... %> */ + if (c == '%') + { + this.lexsize -= 2; + this.state = LEX_ASP; + this.txtend = this.lexsize; + + /* if some text before < return it now */ + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + this.txtstart = this.lexsize; + continue; + } + + /* Netscapes JSTE e.g. <# ... server-code ... #> */ + if (c == '#') + { + this.lexsize -= 2; + this.state = LEX_JSTE; + this.txtend = this.lexsize; + + /* if some text before < return it now */ + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + this.txtstart = this.lexsize; + continue; + } + + map = MAP((char)c); + + /* check for start tag */ + if ((map & LETTER) != 0) + { + this.in.ungetChar(c); /* push back letter */ + this.lexsize -= 2; /* discard "<" + letter */ + this.txtend = this.lexsize; + this.state = LEX_STARTTAG; /* ready to read tag name */ + + /* if some text before < return it now */ + if (this.txtend > this.txtstart) + { + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + continue; /* no text so keep going */ + } + + /* otherwise treat as CDATA */ + this.state = LEX_CONTENT; + this.waswhite = false; + continue; + + case LEX_ENDTAG: /* ' */ + while (c != '>') + { + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream) + break; + } + + if (c == StreamIn.EndOfStream) + { + this.in.ungetChar(c); + continue; + } + + this.state = LEX_CONTENT; + this.waswhite = false; + return this.token; /* the endtag token */ + + case LEX_STARTTAG: /* first letter of tagname */ + this.txtstart = this.lexsize - 1; /* set txtstart to first letter */ + c = parseTagName(); + isempty.value = false; + attributes = null; + this.token = newNode((isempty.value ? Node.StartEndTag : Node.StartTag), + this.lexbuf, + this.txtstart, + this.txtend, + getString(this.lexbuf, + this.txtstart, + this.txtend - this.txtstart)); + + /* parse attributes, consuming closing ">" */ + if (c != '>') + { + if (c == '/') + this.in.ungetChar(c); + + attributes = parseAttrs(isempty); + } + + if (isempty.value) + this.token.type = Node.StartEndTag; + + this.token.attributes = attributes; + this.lexsize = this.txtstart; + this.txtend = this.txtstart; + + /* swallow newline following start tag */ + /* special check needed for CRLF sequence */ + /* this doesn't apply to empty elements */ + + if (expectsContent(this.token) || + this.token.tag == configuration.tt.tagBr) + { + + c = this.in.readChar(); + + if (c == '\r') + { + c = this.in.readChar(); + + if (c != '\n') + this.in.ungetChar(c); + } + else if (c != '\n' && c != '\f') + this.in.ungetChar(c); + + this.waswhite = true; /* to swallow leading whitespace */ + } + else + this.waswhite = false; + + this.state = LEX_CONTENT; + + if (this.token.tag == null) + Report.error(this, null, this.token, Report.UNKNOWN_ELEMENT); + else if (!this.configuration.XmlTags) + { + this.versions &= this.token.tag.versions; + + if ((this.token.tag.versions & Dict.VERS_PROPRIETARY) != 0) + { + if (!this.configuration.MakeClean && (this.token.tag == configuration.tt.tagNobr || + this.token.tag == configuration.tt.tagWbr)) + Report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT); + } + + if (this.token.tag.chkattrs != null) + { + this.token.checkUniqueAttributes(this); + this.token.tag.chkattrs.check(this, this.token); + } + else + this.token.checkAttributes(this); + } + + return this.token; /* return start tag */ + + case LEX_COMMENT: /* seen */ + + if (c != '-') + continue; + + c = this.in.readChar(); + addCharToLexer(c); + + if (c != '-') + continue; + + end_comment: while (true) { + c = this.in.readChar(); + + if (c == '>') + { + if (badcomment != 0) + Report.warning(this, null, null, Report.MALFORMED_COMMENT); + + this.txtend = this.lexsize - 2; // AQ 8Jul2000 + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.CommentTag, + this.lexbuf, + this.txtstart, + this.txtend); + + /* now look for a line break */ + + c = this.in.readChar(); + + if (c == '\r') + { + c = this.in.readChar(); + + if (c != '\n') + this.token.linebreak = true; + } + + if (c == '\n') + this.token.linebreak = true; + else + this.in.ungetChar(c); + + return this.token; + } + + /* note position of first such error in the comment */ + if (badcomment == 0) + { + this.lines = this.in.curline; + this.columns = this.in.curcol - 3; + } + + badcomment++; + if (this.configuration.FixComments) + this.lexbuf[this.lexsize - 2] = (byte)'='; + + addCharToLexer(c); + + /* if '-' then look for '>' to end the comment */ + if (c != '-') + break end_comment; + + } + /* otherwise continue to look for --> */ + this.lexbuf[this.lexsize - 2] = (byte)'='; + continue; + + case LEX_DOCTYPE: /* seen ' munging whitespace */ + map = MAP((char)c); + + if ((map & WHITE) != 0) + { + if (this.waswhite) + this.lexsize -= 1; + + this.waswhite = true; + } + else + this.waswhite = false; + + if (c != '>') + continue; + + this.lexsize -= 1; + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.DocTypeTag, + this.lexbuf, + this.txtstart, + this.txtend); + /* make a note of the version named by the doctype */ + this.doctype = findGivenVersion(this.token); + return this.token; + + case LEX_PROCINSTR: /* seen ' */ + /* check for PHP preprocessor instructions */ + + if (this.lexsize - this.txtstart == 3) + { + if ((getString(this.lexbuf, this.txtstart, 3)).equals("php")) + { + this.state = LEX_PHP; + continue; + } + } + + if (this.configuration.XmlPIs) /* insist on ?> as terminator */ + { + if (c != '?') + continue; + + /* now look for '>' */ + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream) + { + Report.warning(this, null, null, Report.UNEXPECTED_END_OF_FILE); + this.in.ungetChar(c); + continue; + } + + addCharToLexer(c); + } + + if (c != '>') + continue; + + this.lexsize -= 1; + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.ProcInsTag, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + + case LEX_ASP: /* seen <% so look for "%>" */ + if (c != '%') + continue; + + /* now look for '>' */ + c = this.in.readChar(); + + + if (c != '>') + { + this.in.ungetChar(c); + continue; + } + + this.lexsize -= 1; + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.AspTag, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + + case LEX_JSTE: /* seen <# so look for "#>" */ + if (c != '#') + continue; + + /* now look for '>' */ + c = this.in.readChar(); + + + if (c != '>') + { + this.in.ungetChar(c); + continue; + } + + this.lexsize -= 1; + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.JsteTag, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + + case LEX_PHP: /* seen "" */ + if (c != '?') + continue; + + /* now look for '>' */ + c = this.in.readChar(); + + if (c != '>') + { + this.in.ungetChar(c); + continue; + } + + this.lexsize -= 1; + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.PhpTag, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + + case LEX_SECTION: /* seen "" */ + if (c == '[') + { + if (this.lexsize == (this.txtstart + 6) && + (getString(this.lexbuf, this.txtstart, 6)).equals("CDATA[")) + { + this.state = LEX_CDATA; + this.lexsize -= 6; + continue; + } + } + + if (c != ']') + continue; + + /* now look for '>' */ + c = this.in.readChar(); + + if (c != '>') + { + this.in.ungetChar(c); + continue; + } + + this.lexsize -= 1; + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.SectionTag, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + + case LEX_CDATA: /* seen "" */ + if (c != ']') + continue; + + /* now look for ']' */ + c = this.in.readChar(); + + if (c != ']') + { + this.in.ungetChar(c); + continue; + } + + /* now look for '>' */ + c = this.in.readChar(); + + if (c != '>') + { + this.in.ungetChar(c); + continue; + } + + this.lexsize -= 1; + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.CDATATag, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + } + + if (this.state == LEX_CONTENT) /* text string */ + { + this.txtend = this.lexsize; + + if (this.txtend > this.txtstart) + { + this.in.ungetChar(c); + + if (this.lexbuf[this.lexsize - 1] == (byte)' ') + { + this.lexsize -= 1; + this.txtend = this.lexsize; + } + + this.token = newNode(Node.TextNode, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + } + else if (this.state == LEX_COMMENT) /* comment */ + { + if (c == StreamIn.EndOfStream) + Report.warning(this, null, null, Report.MALFORMED_COMMENT); + + this.txtend = this.lexsize; + this.lexbuf[this.lexsize] = (byte)'\0'; + this.state = LEX_CONTENT; + this.waswhite = false; + this.token = newNode(Node.CommentTag, + this.lexbuf, + this.txtstart, + this.txtend); + return this.token; + } + + return null; + } + + /* + parser for ASP within start tags + + Some people use ASP for to customize attributes + Tidy isn't really well suited to dealing with ASP + This is a workaround for attributes, but won't + deal with the case where the ASP is used to tailor + the attribute value. Here is an example of a work + around for using ASP in attribute values: + + href="<%=rsSchool.Fields("ID").Value%>" + + where the ASP that generates the attribute value + is masked from Tidy by the quotemarks. + + */ + + public Node parseAsp() + { + int c; + Node asp = null; + + this.txtstart = this.lexsize; + + for (;;) + { + c = this.in.readChar(); + addCharToLexer(c); + + + if (c != '%') + continue; + + c = this.in.readChar(); + addCharToLexer(c); + + if (c == '>') + break; + } + + this.lexsize -= 2; + this.txtend = this.lexsize; + + if (this.txtend > this.txtstart) + asp = newNode(Node.AspTag, + this.lexbuf, + this.txtstart, + this.txtend); + + this.txtstart = this.txtend; + return asp; + } + + /* + PHP is like ASP but is based upon XML + processing instructions, e.g. + */ + public Node parsePhp() + { + int c; + Node php = null; + + this.txtstart = this.lexsize; + + for (;;) + { + c = this.in.readChar(); + addCharToLexer(c); + + + if (c != '?') + continue; + + c = this.in.readChar(); + addCharToLexer(c); + + if (c == '>') + break; + } + + this.lexsize -= 2; + this.txtend = this.lexsize; + + if (this.txtend > this.txtstart) + php = newNode(Node.PhpTag, + this.lexbuf, + this.txtstart, + this.txtend); + + this.txtstart = this.txtend; + return php; + } + + /* consumes the '>' terminating start tags */ + public String parseAttribute(MutableBoolean isempty, MutableObject asp, + MutableObject php) + { + int start = 0; + // int len = 0; Removed by BUGFIX for 126265 + short map; + String attr; + int c = 0; + + asp.setObject(null); /* clear asp pointer */ + php.setObject(null); /* clear php pointer */ + /* skip white space before the attribute */ + + for (;;) + { + c = this.in.readChar(); + + if (c == '/') + { + c = this.in.readChar(); + + if (c == '>') + { + isempty.value = true; + return null; + } + + this.in.ungetChar(c); + c = '/'; + break; + } + + if (c == '>') + return null; + + if (c =='<') + { + c = this.in.readChar(); + + if (c == '%') + { + asp.setObject(parseAsp()); + return null; + } + else if (c == '?') + { + php.setObject(parsePhp()); + return null; + } + + this.in.ungetChar(c); + Report.attrError(this, this.token, null, Report.UNEXPECTED_GT); + return null; + } + + if (c == '"' || c == '\'') + { + Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); + continue; + } + + if (c == StreamIn.EndOfStream) + { + Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); + this.in.ungetChar(c); + return null; + } + + map = MAP((char)c); + + if ((map & WHITE) == 0) + break; + } + + start = this.lexsize; + + for (;;) + { + /* but push back '=' for parseValue() */ + if (c == '=' || c == '>') + { + this.in.ungetChar(c); + break; + } + + if (c == '<' || c == StreamIn.EndOfStream) + { + this.in.ungetChar(c); + break; + } + + map = MAP((char)c); + + if ((map & WHITE) != 0) + break; + + /* what should be done about non-namechar characters? */ + /* currently these are incorporated into the attr name */ + + if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) + c += (int)('a' - 'A'); + + // ++len; Removed by BUGFIX for 126265 + addCharToLexer(c); + + c = this.in.readChar(); + } + + // Following line added by GLP to fix BUG 126265. This is a temporary comment + // and should be removed when Tidy is fixed. + int len = this.lexsize - start; + attr = (len > 0 ? getString(this.lexbuf, start, len) : null); + this.lexsize = start; + + return attr; + } + + /* + invoked when < is seen in place of attribute value + but terminates on whitespace if not ASP, PHP or Tango + this routine recognizes ' and " quoted strings + */ + public int parseServerInstruction() + { + int c, map, delim = '"'; + boolean isrule = false; + + c = this.in.readChar(); + addCharToLexer(c); + + /* check for ASP, PHP or Tango */ + if (c == '%' || c == '?' || c == '@') + isrule = true; + + for (;;) + { + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream) + break; + + if (c == '>') + { + if (isrule) + addCharToLexer(c); + else + this.in.ungetChar(c); + + break; + } + + /* if not recognized as ASP, PHP or Tango */ + /* then also finish value on whitespace */ + if (!isrule) + { + map = MAP((char)c); + + if ((map & WHITE) != 0) + break; + } + + addCharToLexer(c); + + if (c == '"') + { + do + { + c = this.in.readChar(); + addCharToLexer(c); + } + while (c != '"'); + delim = '\''; + continue; + } + + if (c == '\'') + { + do + { + c = this.in.readChar(); + addCharToLexer(c); + } + while (c != '\''); + } + } + + return delim; + } + + /* values start with "=" or " = " etc. */ + /* doesn't consume the ">" at end of start tag */ + + public String parseValue(String name, boolean foldCase, + MutableBoolean isempty, MutableInteger pdelim) + { + int len = 0; + int start; + short map; + boolean seen_gt = false; + boolean munge = true; + int c = 0; + int lastc, delim, quotewarning; + String value; + + delim = 0; + pdelim.value = (int)'"'; + + /* + Henry Zrepa reports that some folk are using the + embed element with script attributes where newlines + are significant and must be preserved + */ + if (configuration.LiteralAttribs) + munge = false; + + /* skip white space before the '=' */ + + for (;;) + { + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream) + { + this.in.ungetChar(c); + break; + } + + map = MAP((char)c); + + if ((map & WHITE) == 0) + break; + } + + /* + c should be '=' if there is a value + other legal possibilities are white + space, '/' and '>' + */ + + if (c != '=') + { + this.in.ungetChar(c); + return null; + } + + /* skip white space after '=' */ + + for (;;) + { + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream) + { + this.in.ungetChar(c); + break; + } + + map = MAP((char)c); + + if ((map & WHITE) == 0) + break; + } + + /* check for quote marks */ + + if (c == '"' || c == '\'') + delim = c; + else if (c == '<') + { + start = this.lexsize; + addCharToLexer(c); + pdelim.value = parseServerInstruction(); + len = this.lexsize - start; + this.lexsize = start; + return (len > 0 ? getString(this.lexbuf, start, len) : null); + } + else + this.in.ungetChar(c); + + /* + and read the value string + check for quote mark if needed + */ + + quotewarning = 0; + start = this.lexsize; + c = '\0'; + + for (;;) + { + lastc = c; /* track last character */ + c = this.in.readChar(); + + if (c == StreamIn.EndOfStream) + { + Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); + this.in.ungetChar(c); + break; + } + + if (delim == (char)0) + { + if (c == '>') + { + this.in.ungetChar(c); + break; + } + + if (c == '"' || c == '\'') + { + Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); + break; + } + + if (c == '<') + { + /* this.in.ungetChar(c); */ + Report.attrError(this, this.token, null, Report.UNEXPECTED_GT); + /* break; */ + } + + /* + For cases like
    need to avoid treating /> as + part of the attribute value, however care is needed to avoid + so treating
    in this way, which + would map the tag to + */ + if (c == '/') + { + /* peek ahead in case of /> */ + c = this.in.readChar(); + + if (c == '>' && + !AttributeTable.getDefaultAttributeTable().isUrl(name)) + { + isempty.value = true; + this.in.ungetChar(c); + break; + } + + /* unget peeked char */ + this.in.ungetChar(c); + c = '/'; + } + } + else /* delim is '\'' or '"' */ + { + if (c == delim) + break; + + /* treat CRLF, CR and LF as single line break */ + + if (c == '\r') + { + c = this.in.readChar(); + if (c != '\n') + this.in.ungetChar(c); + + c = '\n'; + } + + if (c == '\n' || c == '<' || c == '>') + ++quotewarning; + + if (c == '>') + seen_gt = true; + } + + if (c == '&') + { + addCharToLexer(c); + parseEntity((short)0); + continue; + } + + /* + kludge for JavaScript attribute values + with line continuations in string literals + */ + if (c == '\\') + { + c = this.in.readChar(); + + if (c != '\n') + { + this.in.ungetChar(c); + c = '\\'; + } + } + + map = MAP((char)c); + + if ((map & WHITE) != 0) + { + if (delim == (char)0) + break; + + if (munge) + { + c = ' '; + + if (lastc == ' ') + continue; + } + } + else if (foldCase && (map & UPPERCASE) != 0) + c += (int)('a' - 'A'); + + addCharToLexer(c); + } + + if (quotewarning > 10 && seen_gt && munge) + { + /* + there is almost certainly a missing trailling quote mark + as we have see too many newlines, < or > characters. + + an exception is made for Javascript attributes and the + javascript URL scheme which may legitimately include < and > + */ + if (!AttributeTable.getDefaultAttributeTable().isScript(name) && + !(AttributeTable.getDefaultAttributeTable().isUrl(name) && + (getString(this.lexbuf, start, 11)).equals("javascript:"))) + Report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE); + } + + len = this.lexsize - start; + this.lexsize = start; + + if (len > 0 || delim != 0) + value = getString(this.lexbuf, start, len); + else + value = null; + + /* note delimiter if given */ + if (delim != 0) + pdelim.value = delim; + else + pdelim.value = (int)'"'; + + return value; + } + + /* attr must be non-null */ + public static boolean isValidAttrName(String attr) + { + short map; + char c; + int i; + + /* first character should be a letter */ + c = attr.charAt(0); + map = MAP(c); + + if (!((map & LETTER) != 0)) + return false; + + /* remaining characters should be namechars */ + for( i = 1; i < attr.length(); i++) + { + c = attr.charAt(i); + map = MAP(c); + + if((map & NAMECHAR) != 0) + continue; + + return false; + } + + return true; + } + + /* swallows closing '>' */ + + public AttVal parseAttrs(MutableBoolean isempty) + { + AttVal av, list; + String attribute, value; + MutableInteger delim = new MutableInteger(); + MutableObject asp = new MutableObject(); + MutableObject php = new MutableObject(); + + list = null; + + for (; !endOfInput();) + { + attribute = parseAttribute(isempty, asp, php); + + if (attribute == null) + { + /* check if attributes are created by ASP markup */ + if (asp.getObject() != null) + { + av = new AttVal(list, null, (Node)asp.getObject(), null, + '\0', null, null ); + list = av; + continue; + } + + /* check if attributes are created by PHP markup */ + if (php.getObject() != null) + { + av = new AttVal(list, null, null, (Node)php.getObject(), + '\0', null, null ); + list = av; + continue; + } + + break; + } + + value = parseValue(attribute, false, isempty, delim); + + if (attribute != null && isValidAttrName(attribute)) + { + av = new AttVal( list, null, null, null, + delim.value, attribute, value ); + av.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(av); + list = av; + } + else + { + av = new AttVal( null, null, null, null, + 0, attribute, value ); + Report.attrError(this, this.token, value, Report.BAD_ATTRIBUTE_VALUE); + } + } + + return list; + } + + /* + push a copy of an inline node onto stack + but don't push if implicit or OBJECT or APPLET + (implicit tags are ones generated from the istack) + + One issue arises with pushing inlines when + the tag is already pushed. For instance: + +

    text +

    more text + + Shouldn't be mapped to + +

    text

    +

    more text + */ + public void pushInline( Node node ) + { + IStack is; + + if (node.implicit) + return; + + if (node.tag == null) + return; + + if ((node.tag.model & Dict.CM_INLINE) == 0 ) + return; + + if ((node.tag.model & Dict.CM_OBJECT) != 0) + return; + + if (node.tag != configuration.tt.tagFont && isPushed(node)) + return; + + // make sure there is enough space for the stack + is = new IStack(); + is.tag = node.tag; + is.element = node.element; + if (node.attributes != null) + is.attributes = cloneAttributes(node.attributes); + this.istack.push( is ); + } + + /* pop inline stack */ + public void popInline( Node node ) + { + AttVal av; + IStack is; + + if (node != null) { + + if (node.tag == null) + return; + + if ((node.tag.model & Dict.CM_INLINE) == 0) + return; + + if ((node.tag.model & Dict.CM_OBJECT) != 0) + return; + + // if node is then pop until we find an + if (node.tag == configuration.tt.tagA) { + + while (this.istack.size() > 0) { + is = (IStack)this.istack.pop(); + if (is.tag == configuration.tt.tagA) { + break; + } + } + + if (this.insert >= this.istack.size()) + this.insert = -1; + return; + } + } + + if (this.istack.size() > 0) { + is = (IStack)this.istack.pop(); + if (this.insert >= this.istack.size()) + this.insert = -1; + } + } + + public boolean isPushed( Node node ) + { + int i; + IStack is; + + for (i = this.istack.size() - 1; i >= 0; --i) { + is = (IStack)this.istack.elementAt(i); + if (is.tag == node.tag) + return true; + } + + return false; + } + + /* + This has the effect of inserting "missing" inline + elements around the contents of blocklevel elements + such as P, TD, TH, DIV, PRE etc. This procedure is + called at the start of ParseBlock. when the inline + stack is not empty, as will be the case in: + +

    italic heading

    + + which is then treated as equivalent to + +

    italic heading

    + + This is implemented by setting the lexer into a mode + where it gets tokens from the inline stack rather than + from the input stream. + */ + public int inlineDup( Node node ) + { + int n; + + n = this.istack.size() - this.istackbase; + if ( n > 0 ) { + this.insert = this.istackbase; + this.inode = node; + } + + return n; + } + + public Node insertedToken() + { + Node node; + IStack is; + int n; + + // this will only be null if inode != null + if (this.insert == -1) { + node = this.inode; + this.inode = null; + return node; + } + + // is this is the "latest" node then update + // the position, otherwise use current values + + if (this.inode == null) { + this.lines = this.in.curline; + this.columns = this.in.curcol; + } + + node = newNode(Node.StartTag, + this.lexbuf, + this.txtstart, + this.txtend); // GLP: Bugfix 126261. Remove when this change + // is fixed in istack.c in the original Tidy + node.implicit = true; + is = (IStack)this.istack.elementAt( this.insert ); + node.element = is.element; + node.tag = is.tag; + if (is.attributes != null) + node.attributes = cloneAttributes(is.attributes); + + // advance lexer to next item on the stack + n = this.insert; + + // and recover state if we have reached the end + if (++n < this.istack.size() ) { + this.insert = n; + } else { + this.insert = -1; + } + + return node; + } + + /* AQ: Try this for speed optimization */ + public static int wstrcasecmp(String s1, String s2) + { + return (s1.equalsIgnoreCase(s2) ? 0 : 1); + } + + public static int wstrcaselexcmp(String s1, String s2) + { + char c; + int i = 0; + + while ( i < s1.length() && i < s2.length() ) { + c = s1.charAt(i); + if ( toLower(c) != toLower( s2.charAt(i) ) ) { + break; + } + i += 1; + } + if ( i == s1.length() && i == s2.length() ) { + return 0; + } else if ( i == s1.length() ) { + return -1; + } else if ( i == s2.length() ) { + return 1; + } else { + return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 ); + } + } + + public static boolean wsubstr(String s1, String s2) + { + int i; + int len1 = s1.length(); + int len2 = s2.length(); + + for (i = 0; i <= len1 - len2; ++i) + { + if (s2.equalsIgnoreCase(s1.substring(i))) + return true; + } + + return false; + } + + public boolean canPrune(Node element) + { + if (element.type == Node.TextNode) + return true; + + if (element.content != null) + return false; + + if (element.tag == configuration.tt.tagA && element.attributes != null) + return false; + + if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas) + return false; + + if (element.tag == null) + return false; + + if ((element.tag.model & Dict.CM_ROW) != 0) + return false; + + if (element.tag == configuration.tt.tagApplet) + return false; + + if (element.tag == configuration.tt.tagObject) + return false; + + if (element.attributes != null && + (element.getAttrByName("id") != null || + element.getAttrByName("name") != null) ) + return false; + + return true; + } + + /* duplicate name attribute as an id */ + public void fixId(Node node) + { + AttVal name = node.getAttrByName("name"); + AttVal id = node.getAttrByName("id"); + + if (name != null) + { + if (id != null) + { + if (!id.value.equals(name.value)) + Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH); + } + else if (this.configuration.XmlOut) + node.addAttribute("id", name.value); + } + } + + /* + defer duplicates when entering a table or other + element where the inlines shouldn't be duplicated + */ + public void deferDup() + { + this.insert = -1; + this.inode = null; + } + + /* Private methods and fields */ + + /* lexer char types */ + private static final short DIGIT = 1; + private static final short LETTER = 2; + private static final short NAMECHAR = 4; + private static final short WHITE = 8; + private static final short NEWLINE = 16; + private static final short LOWERCASE = 32; + private static final short UPPERCASE = 64; + + /* lexer GetToken states */ + + private static final short LEX_CONTENT = 0; + private static final short LEX_GT = 1; + private static final short LEX_ENDTAG = 2; + private static final short LEX_STARTTAG = 3; + private static final short LEX_COMMENT = 4; + private static final short LEX_DOCTYPE = 5; + private static final short LEX_PROCINSTR = 6; + private static final short LEX_ENDCOMMENT = 7; + private static final short LEX_CDATA = 8; + private static final short LEX_SECTION = 9; + private static final short LEX_ASP = 10; + private static final short LEX_JSTE = 11; + private static final short LEX_PHP = 12; + + /* used to classify chars for lexical purposes */ + private static short[] lexmap = new short[128]; + + private static void mapStr(String str, short code) + { + int j; + + for ( int i = 0; i < str.length(); i++ ) { + j = (int)str.charAt(i); + lexmap[j] |= code; + } + } + + static { + mapStr("\r\n\f", (short)(NEWLINE|WHITE)); + mapStr(" \t", WHITE); + mapStr("-.:_", NAMECHAR); + mapStr("0123456789", (short)(DIGIT|NAMECHAR)); + mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR)); + mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR)); + } + + private static short MAP( char c ) + { + return ((int)c < 128 ? lexmap[(int)c] : 0); + } + + private static boolean isWhite(char c) + { + short m = MAP(c); + + return (m & WHITE) != 0; + } + + private static boolean isDigit(char c) + { + short m; + + m = MAP(c); + + return (m & DIGIT) != 0; + } + + private static boolean isLetter(char c) + { + short m; + + m = MAP(c); + + return (m & LETTER) != 0; + } + + private static char toLower(char c) + { + short m = MAP(c); + + if ((m & UPPERCASE) != 0) + c = (char)( (int)c + (int)'a' - (int)'A' ); + + return c; + } + + private static char toUpper(char c) + { + short m = MAP(c); + + if ((m & LOWERCASE) != 0) + c = (char)( (int)c + (int)'A' - (int)'a' ); + + return c; + } + + public static char foldCase(char c, boolean tocaps, boolean xmlTags) + { + short m; + + if (!xmlTags) + { + m = MAP(c); + + if (tocaps) + { + if ((m & LOWERCASE) != 0) + c = (char)( (int)c + (int)'A' - (int)'a' ); + } + else /* force to lower case */ + { + if ((m & UPPERCASE) != 0) + c = (char)( (int)c + (int)'a' - (int)'A' ); + } + } + + return c; + } + + + private static class W3CVersionInfo + { + String name; + String voyagerName; + String profile; + short code; + + public W3CVersionInfo( String name, + String voyagerName, + String profile, + short code ) + { + this.name = name; + this.voyagerName = voyagerName; + this.profile = profile; + this.code = code; + } + } + + /* the 3 URIs for the XHTML 1.0 DTDs */ + private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; + private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; + private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"; + + private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; + + private static Lexer.W3CVersionInfo[] W3CVersion = + { + new W3CVersionInfo("HTML 4.01", + "XHTML 1.0 Strict", + voyager_strict, + Dict.VERS_HTML40_STRICT), + new W3CVersionInfo("HTML 4.01 Transitional", + "XHTML 1.0 Transitional", + voyager_loose, + Dict.VERS_HTML40_LOOSE), + new W3CVersionInfo("HTML 4.01 Frameset", + "XHTML 1.0 Frameset", + voyager_frameset, + Dict.VERS_FRAMES), + new W3CVersionInfo("HTML 4.0", + "XHTML 1.0 Strict", + voyager_strict, + Dict.VERS_HTML40_STRICT), + new W3CVersionInfo("HTML 4.0 Transitional", + "XHTML 1.0 Transitional", + voyager_loose, + Dict.VERS_HTML40_LOOSE), + new W3CVersionInfo("HTML 4.0 Frameset", + "XHTML 1.0 Frameset", + voyager_frameset, + Dict.VERS_FRAMES), + new W3CVersionInfo("HTML 3.2", + "XHTML 1.0 Transitional", + voyager_loose, + Dict.VERS_HTML32), + new W3CVersionInfo("HTML 2.0", + "XHTML 1.0 Strict", + voyager_strict, + Dict.VERS_HTML20) + }; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java new file mode 100644 index 0000000..de0e64e --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java @@ -0,0 +1,38 @@ +/* + * @(#)MutableBoolean.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Mutable Boolean + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from
    + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class MutableBoolean { + + public boolean value; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java new file mode 100644 index 0000000..00ef347 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java @@ -0,0 +1,38 @@ +/* + * @(#)MutableInteger.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Mutable Integer + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class MutableInteger { + + public int value; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java new file mode 100644 index 0000000..a66fa73 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java @@ -0,0 +1,58 @@ +/* + * @(#)MutableObject.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Mutable Object + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class MutableObject { + + public MutableObject() + { + this(null); + } + + public MutableObject(Object o) + { + this.value = o; + } + + public void setObject(Object o) + { + value = o; + } + + public Object getObject() + { + return value; + } + + private Object value; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java new file mode 100644 index 0000000..e502702 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java @@ -0,0 +1,917 @@ +/* + * @(#)Node.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Node + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/* + Used for elements and text nodes + element name is null for text nodes + start and end are offsets into lexbuf + which contains the textual content of + all elements in the parse tree. + + parent and content allow traversal + of the parse tree in any direction. + attributes are represented as a linked + list of AttVal nodes which hold the + strings for attribute/value pairs. +*/ + +public class Node { + + public static final short RootNode = 0; + public static final short DocTypeTag = 1; + public static final short CommentTag = 2; + public static final short ProcInsTag = 3; + public static final short TextNode = 4; + public static final short StartTag = 5; + public static final short EndTag = 6; + public static final short StartEndTag = 7; + public static final short CDATATag = 8; + public static final short SectionTag = 9; + public static final short AspTag = 10; + public static final short JsteTag = 11; + public static final short PhpTag = 12; + + protected Node parent; + protected Node prev; + protected Node next; + protected Node last; + protected int start; /* start of span onto text array */ + protected int end; /* end of span onto text array */ + protected byte[] textarray; /* the text array */ + protected short type; /* TextNode, StartTag, EndTag etc. */ + protected boolean closed; /* true if closed by explicit end tag */ + protected boolean implicit; /* true if inferred */ + protected boolean linebreak; /* true if followed by a line break */ + protected Dict was; /* old tag when it was changed */ + protected Dict tag; /* tag's dictionary definition */ + protected String element; /* name (null for text nodes) */ + protected AttVal attributes; + protected Node content; + + public Node() + { + this(TextNode, null, 0, 0); + } + + public Node(short type, byte[] textarray, int start, int end) + { + this.parent = null; + this.prev = null; + this.next = null; + this.last = null; + this.start = start; + this.end = end; + this.textarray = textarray; + this.type = type; + this.closed = false; + this.implicit = false; + this.linebreak = false; + this.was = null; + this.tag = null; + this.element = null; + this.attributes = null; + this.content = null; + } + + public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt) + { + this.parent = null; + this.prev = null; + this.next = null; + this.last = null; + this.start = start; + this.end = end; + this.textarray = textarray; + this.type = type; + this.closed = false; + this.implicit = false; + this.linebreak = false; + this.was = null; + this.tag = null; + this.element = element; + this.attributes = null; + this.content = null; + if (type == StartTag || type == StartEndTag || type == EndTag) + tt.findTag(this); + } + + /* used to clone heading nodes when split by an
    */ + protected Object clone() + { + Node node = new Node(); + + node.parent = this.parent; + if (this.textarray != null) + { + node.textarray = new byte[this.end - this.start]; + node.start = 0; + node.end = this.end - this.start; + if (node.end > 0) + System.arraycopy(this.textarray, this.start, + node.textarray, node.start, node.end); + } + node.type = this.type; + node.closed = this.closed; + node.implicit = this.implicit; + node.linebreak = this.linebreak; + node.was = this.was; + node.tag = this.tag; + if (this.element != null) + node.element = this.element; + if (this.attributes != null) + node.attributes = (AttVal)this.attributes.clone(); + return node; + } + + public AttVal getAttrByName(String name) + { + AttVal attr; + + for (attr = this.attributes; attr != null; attr = attr.next) + { + if (name != null && + attr.attribute != null && + attr.attribute.equals(name)) + break; + } + + return attr; + } + + /* default method for checking an element's attributes */ + public void checkAttributes( Lexer lexer ) + { + AttVal attval; + + for (attval = this.attributes; attval != null; attval = attval.next) + attval.checkAttribute( lexer, this ); + } + + public void checkUniqueAttributes(Lexer lexer) + { + AttVal attval; + + for (attval = this.attributes; attval != null; attval = attval.next) { + if (attval.asp == null && attval.php == null) + attval.checkUniqueAttribute(lexer, this); + } + } + + public void addAttribute(String name, String value) + { + AttVal av = new AttVal(null, null, null, null, + '"', name, value); + av.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(av); + + if (this.attributes == null) + this.attributes = av; + else /* append to end of attributes */ + { + AttVal here = this.attributes; + + while (here.next != null) + here = here.next; + + here.next = av; + } + } + + /* remove attribute from node then free it */ + public void removeAttribute(AttVal attr) + { + AttVal av; + AttVal prev = null; + AttVal next; + + for (av = this.attributes; av != null; av = next) + { + next = av.next; + + if (av == attr) + { + if (prev != null) + prev.next = next; + else + this.attributes = next; + } + else + prev = av; + } + } + + /* find doctype element */ + public Node findDocType() + { + Node node; + + for (node = this.content; + node != null && node.type != DocTypeTag; node = node.next); + + return node; + } + + public void discardDocType() + { + Node node; + + node = findDocType(); + if (node != null) + { + if (node.prev != null) + node.prev.next = node.next; + else + node.parent.content = node.next; + + if (node.next != null) + node.next.prev = node.prev; + + node.next = null; + } + } + + /* remove node from markup tree and discard it */ + public static Node discardElement(Node element) + { + Node next = null; + + if (element != null) + { + next = element.next; + removeNode(element); + } + + return next; + } + + /* insert node into markup tree */ + public static void insertNodeAtStart(Node element, Node node) + { + node.parent = element; + + if (element.content == null) + element.last = node; + else + element.content.prev = node; // AQ added 13 Apr 2000 + + node.next = element.content; + node.prev = null; + element.content = node; + } + + /* insert node into markup tree */ + public static void insertNodeAtEnd(Node element, Node node) + { + node.parent = element; + node.prev = element.last; + + if (element.last != null) + element.last.next = node; + else + element.content = node; + + element.last = node; + } + + /* + insert node into markup tree in pace of element + which is moved to become the child of the node + */ + public static void insertNodeAsParent(Node element, Node node) + { + node.content = element; + node.last = element; + node.parent = element.parent; + element.parent = node; + + if (node.parent.content == element) + node.parent.content = node; + + if (node.parent.last == element) + node.parent.last = node; + + node.prev = element.prev; + element.prev = null; + + if (node.prev != null) + node.prev.next = node; + + node.next = element.next; + element.next = null; + + if (node.next != null) + node.next.prev = node; + } + + /* insert node into markup tree before element */ + public static void insertNodeBeforeElement(Node element, Node node) + { + Node parent; + + parent = element.parent; + node.parent = parent; + node.next = element; + node.prev = element.prev; + element.prev = node; + + if (node.prev != null) + node.prev.next = node; + + if (parent.content == element) + parent.content = node; + } + + /* insert node into markup tree after element */ + public static void insertNodeAfterElement(Node element, Node node) + { + Node parent; + + parent = element.parent; + node.parent = parent; + + // AQ - 13Jan2000 fix for parent == null + if (parent != null && parent.last == element) + parent.last = node; + else + { + node.next = element.next; + // AQ - 13Jan2000 fix for node.next == null + if (node.next != null) + node.next.prev = node; + } + + element.next = node; + node.prev = element; + } + + public static void trimEmptyElement(Lexer lexer, Node element) + { + TagTable tt = lexer.configuration.tt; + + if (lexer.canPrune(element)) + { + if (element.type != TextNode) + Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT); + + discardElement(element); + } + else if (element.tag == tt.tagP && element.content == null) + { + /* replace

    by

    to preserve formatting */ + Node node = lexer.inferredTag("br"); + Node.coerceNode(lexer, element, tt.tagBr); + Node.insertNodeAfterElement(element, node); + } + } + + /* + This maps + hello world + to + hello world + + If last child of element is a text node + then trim trailing white space character + moving it to after element's end tag. + */ + public static void trimTrailingSpace(Lexer lexer, Node element, Node last) + { + byte c; + TagTable tt = lexer.configuration.tt; + + if (last != null && last.type == Node.TextNode && + last.end > last.start) + { + c = lexer.lexbuf[last.end - 1]; + + if (c == 160 || c == (byte)' ') + { + /* take care with
  • */ + if (element.tag == tt.tagTd || + element.tag == tt.tagTh) + { + if (last.end > last.start + 1) + last.end -= 1; + } + else + { + last.end -= 1; + + if (((element.tag.model & Dict.CM_INLINE) != 0) && + !((element.tag.model & Dict.CM_FIELD) != 0)) + lexer.insertspace = true; + + /* if empty string then delete from parse tree */ + if (last.start == last.end) + trimEmptyElement(lexer, last); + } + } + } + } + + /* + This maps +

    hello world + to +

    hello world + + Trims initial space, by moving it before the + start tag, or if this element is the first in + parent's content, then by discarding the space + */ + public static void trimInitialSpace(Lexer lexer, Node element, Node text) + { + Node prev, node; + + // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated. + // 31-Oct-00. + if (text.type == TextNode && text.textarray[text.start] == (byte)' ' + && (text.start < text.end)) + { + if (((element.tag.model & Dict.CM_INLINE) != 0) && + !((element.tag.model & Dict.CM_FIELD) != 0) && + element.parent.content != element) + { + prev = element.prev; + + if (prev != null && prev.type == TextNode) + { + if (prev.textarray[prev.end - 1] != (byte)' ') + prev.textarray[prev.end++] = (byte)' '; + + ++element.start; + } + else /* create new node */ + { + node = lexer.newNode(); + // Local fix for bug 228486 (GLP). This handles the case + // where we need to create a preceeding text node but there are + // no "slots" in textarray that we can steal from the current + // element. Therefore, we create a new textarray containing + // just the blank. When Tidy is fixed, this should be removed. + if (element.start >= element.end) + { + node.start = 0; + node.end = 1; + node.textarray = new byte[1]; + } + else + { + node.start = element.start++; + node.end = element.start; + node.textarray = element.textarray; + } + node.textarray[node.start] = (byte)' '; + node.prev = prev; + if (prev != null) + prev.next = node; + node.next = element; + element.prev = node; + node.parent = element.parent; + } + } + + /* discard the space in current node */ + ++text.start; + } + } + + /* + Move initial and trailing space out. + This routine maps: + + hello world + to + hello world + and + hello world + to + hello world + */ + public static void trimSpaces(Lexer lexer, Node element) + { + Node text = element.content; + TagTable tt = lexer.configuration.tt; + + if (text != null && text.type == Node.TextNode && + element.tag != tt.tagPre) + trimInitialSpace(lexer, element, text); + + text = element.last; + + if (text != null && text.type == Node.TextNode) + trimTrailingSpace(lexer, element, text); + } + + public boolean isDescendantOf(Dict tag) + { + Node parent; + + for (parent = this.parent; + parent != null; parent = parent.parent) + { + if (parent.tag == tag) + return true; + } + + return false; + } + + /* + the doctype has been found after other tags, + and needs moving to before the html element + */ + public static void insertDocType(Lexer lexer, Node element, Node doctype) + { + TagTable tt = lexer.configuration.tt; + + Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS); + + while (element.tag != tt.tagHtml) + element = element.parent; + + insertNodeBeforeElement(element, doctype); + } + + public Node findBody(TagTable tt) + { + Node node; + + node = this.content; + + while (node != null && node.tag != tt.tagHtml) + node = node.next; + + if (node == null) + return null; + + node = node.content; + + while (node != null && node.tag != tt.tagBody) + node = node.next; + + return node; + } + + public boolean isElement() + { + return (this.type == StartTag || this.type == StartEndTag ? true : false); + } + + /* + unexpected content in table row is moved to just before + the table in accordance with Netscape and IE. This code + assumes that node hasn't been inserted into the row. + */ + public static void moveBeforeTable(Node row, Node node, TagTable tt) + { + Node table; + + /* first find the table element */ + for (table = row.parent; table != null; table = table.parent) + { + if (table.tag == tt.tagTable) + { + if (table.parent.content == table) + table.parent.content = node; + + node.prev = table.prev; + node.next = table; + table.prev = node; + node.parent = table.parent; + + if (node.prev != null) + node.prev.next = node; + + break; + } + } + } + + /* + if a table row is empty then insert an empty cell + this practice is consistent with browser behavior + and avoids potential problems with row spanning cells + */ + public static void fixEmptyRow(Lexer lexer, Node row) + { + Node cell; + + if (row.content == null) + { + cell = lexer.inferredTag("td"); + insertNodeAtEnd(row, cell); + Report.warning(lexer, row, cell, Report.MISSING_STARTTAG); + } + } + + public static void coerceNode(Lexer lexer, Node node, Dict tag) + { + Node tmp = lexer.inferredTag(tag.name); + Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT); + node.was = node.tag; + node.tag = tag; + node.type = StartTag; + node.implicit = true; + node.element = tag.name; + } + + /* extract a node and its children from a markup tree */ + public static void removeNode(Node node) + { + if (node.prev != null) + node.prev.next = node.next; + + if (node.next != null) + node.next.prev = node.prev; + + if (node.parent != null) + { + if (node.parent.content == node) + node.parent.content = node.next; + + if (node.parent.last == node) + node.parent.last = node.prev; + } + + node.parent = node.prev = node.next = null; + } + + public static boolean insertMisc(Node element, Node node) + { + if (node.type == CommentTag || + node.type == ProcInsTag || + node.type == CDATATag || + node.type == SectionTag || + node.type == AspTag || + node.type == JsteTag || + node.type == PhpTag) + { + insertNodeAtEnd(element, node); + return true; + } + + return false; + } + + /* + used to determine how attributes + without values should be printed + this was introduced to deal with + user defined tags e.g. Cold Fusion + */ + public static boolean isNewNode(Node node) + { + if (node != null && node.tag != null) + { + return ((node.tag.model & Dict.CM_NEW) != 0); + } + + return true; + } + + public boolean hasOneChild() + { + return (this.content != null && this.content.next == null); + } + + /* find html element */ + public Node findHTML(TagTable tt) + { + Node node; + + for (node = this.content; + node != null && node.tag != tt.tagHtml; node = node.next); + + return node; + } + + public Node findHEAD(TagTable tt) + { + Node node; + + node = this.findHTML(tt); + + if (node != null) + { + for (node = node.content; + node != null && node.tag != tt.tagHead; + node = node.next); + } + + return node; + } + + public boolean checkNodeIntegrity() + { + Node child; + boolean found = false; + + if (this.prev != null) + { + if (this.prev.next != this) + return false; + } + + if (this.next != null) + { + if (this.next.prev != this) + return false; + } + + if (this.parent != null) + { + if (this.prev == null && this.parent.content != this) + return false; + + if (this.next == null && this.parent.last != this) + return false; + + for (child = this.parent.content; child != null; child = child.next) + if (child == this) + { + found = true; + break; + } + + if (!found) + return false; + } + + for (child = this.content; child != null; child = child.next) + if (!child.checkNodeIntegrity()) + return false; + + return true; + } + + /* + Add class="foo" to node + */ + public static void addClass(Node node, String classname) + { + AttVal classattr = node.getAttrByName("class"); + + /* + if there already is a class attribute + then append class name after a space + */ + if (classattr != null) + { + classattr.value = classattr.value + " " + classname; + } + else /* create new class attribute */ + node.addAttribute("class", classname); + } + + /* --------------------- DEBUG -------------------------- */ + + private static final String[] nodeTypeString = + { + "RootNode", + "DocTypeTag", + "CommentTag", + "ProcInsTag", + "TextNode", + "StartTag", + "EndTag", + "StartEndTag", + "SectionTag", + "AspTag", + "PhpTag" + }; + + public String toString() + { + String s = ""; + Node n = this; + + while (n != null) { + s += "[Node type="; + s += nodeTypeString[n.type]; + s += ",element="; + if (n.element != null) + s += n.element; + else + s += "null"; + if (n.type == TextNode || + n.type == CommentTag || + n.type == ProcInsTag) { + s += ",text="; + if (n.textarray != null && n.start <= n.end) { + s += "\""; + s += Lexer.getString(n.textarray, n.start, n.end - n.start); + s += "\""; + } else { + s += "null"; + } + } + s += ",content="; + if (n.content != null) + s += n.content.toString(); + else + s += "null"; + s += "]"; + if (n.next != null) + s += ","; + n = n.next; + } + return s; + } + /* --------------------- END DEBUG ---------------------- */ + + + /* --------------------- DOM ---------------------------- */ + + protected org.w3c.dom.Node adapter = null; + + protected org.w3c.dom.Node getAdapter() + { + if (adapter == null) + { + switch (this.type) + { + case RootNode: + adapter = new DOMDocumentImpl(this); + break; + case StartTag: + case StartEndTag: + adapter = new DOMElementImpl(this); + break; + case DocTypeTag: + adapter = new DOMDocumentTypeImpl(this); + break; + case CommentTag: + adapter = new DOMCommentImpl(this); + break; + case TextNode: + adapter = new DOMTextImpl(this); + break; + case CDATATag: + adapter = new DOMCDATASectionImpl(this); + break; + case ProcInsTag: + adapter = new DOMProcessingInstructionImpl(this); + break; + default: + adapter = new DOMNodeImpl(this); + } + } + return adapter; + } + + protected Node cloneNode(boolean deep) + { + Node node = (Node)this.clone(); + if (deep) + { + Node child; + Node newChild; + for (child = this.content; child != null; child = child.next) + { + newChild = child.cloneNode(deep); + insertNodeAtEnd(node, newChild); + } + } + return node; + } + + + protected void setType(short newType) + { + this.type = newType; + } + + /* --------------------- END DOM ------------------------ */ + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Out.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Out.java new file mode 100644 index 0000000..ac6916d --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Out.java @@ -0,0 +1,49 @@ +/* + * @(#)Out.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Output Stream + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.io.OutputStream; + +public abstract class Out +{ + public int encoding; + public int state; /* for ISO 2022 */ + public OutputStream out; + + public abstract void outc(int c); + + public abstract void outc(byte c); + + public abstract void newline(); + +}; + diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/OutImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/OutImpl.java new file mode 100644 index 0000000..1701502 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/OutImpl.java @@ -0,0 +1,148 @@ +/* + * @(#)OutImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Output Stream Implementation + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.io.IOException; + +public class OutImpl extends Out +{ + + public OutImpl() + { + this.out = null; + } + + public void outc(byte c) { + outc(((int)c) & 0xFF); // Convert to unsigned. + } + + /* For mac users, should we map Unicode back to MacRoman? */ + public void outc(int c) + { + int ch; + + try { + if (this.encoding == Configuration.UTF8) + { + if (c < 128) + this.out.write(c); + else if (c <= 0x7FF) + { + ch = (0xC0 | (c >> 6)); this.out.write(ch); + ch = (0x80 | (c & 0x3F)); this.out.write(ch); + } + else if (c <= 0xFFFF) + { + ch = (0xE0 | (c >> 12)); this.out.write(ch); + ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch); + ch = (0x80 | (c & 0x3F)); this.out.write(ch); + } + else if (c <= 0x1FFFFF) + { + ch = (0xF0 | (c >> 18)); this.out.write(ch); + ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch); + ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch); + ch = (0x80 | (c & 0x3F)); this.out.write(ch); + } + else + { + ch = (0xF8 | (c >> 24)); this.out.write(ch); + ch = (0x80 | ((c >> 18) & 0x3F)); this.out.write(ch); + ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch); + ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch); + ch = (0x80 | (c & 0x3F)); this.out.write(ch); + } + } + else if (this.encoding == Configuration.ISO2022) + { + if (c == 0x1b) /* ESC */ + this.state = StreamIn.FSM_ESC; + else + { + switch (this.state) + { + case StreamIn.FSM_ESC: + if (c == '$') + this.state = StreamIn.FSM_ESCD; + else if (c == '(') + this.state = StreamIn.FSM_ESCP; + else + this.state = StreamIn.FSM_ASCII; + break; + + case StreamIn.FSM_ESCD: + if (c == '(') + this.state = StreamIn.FSM_ESCDP; + else + this.state = StreamIn.FSM_NONASCII; + break; + + case StreamIn.FSM_ESCDP: + this.state = StreamIn.FSM_NONASCII; + break; + + case StreamIn.FSM_ESCP: + this.state = StreamIn.FSM_ASCII; + break; + + case StreamIn.FSM_NONASCII: + c &= 0x7F; + break; + } + } + + this.out.write(c); + } + else + this.out.write(c); + } + catch (IOException e) { + System.err.println("OutImpl.outc: " + e.toString()); + } + } + + public void newline() + { + try { + this.out.write(nlBytes); + this.out.flush(); + } + catch (IOException e) { + System.err.println("OutImpl.newline: " + e.toString()); + } + } + + private static final byte[] nlBytes = + (System.getProperty("line.separator")).getBytes(); + +}; + diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/PPrint.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/PPrint.java new file mode 100644 index 0000000..aba3656 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/PPrint.java @@ -0,0 +1,1845 @@ +/* + * @(#)PPrint.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Pretty print parse tree + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/* + Block-level and unknown elements are printed on + new lines and their contents indented 2 spaces + + Inline elements are printed inline. + + Inline content is wrapped on spaces (except in + attribute values or preformatted text, after + start tags and before end tags +*/ + +import java.io.FileOutputStream; +import java.io.File; + +import java.io.IOException; +import java.io.FileNotFoundException; + +public class PPrint { + + /* page transition effects */ + + public static final short EFFECT_BLEND = -1; + public static final short EFFECT_BOX_IN = 0; + public static final short EFFECT_BOX_OUT = 1; + public static final short EFFECT_CIRCLE_IN = 2; + public static final short EFFECT_CIRCLE_OUT = 3; + public static final short EFFECT_WIPE_UP = 4; + public static final short EFFECT_WIPE_DOWN = 5; + public static final short EFFECT_WIPE_RIGHT = 6; + public static final short EFFECT_WIPE_LEFT = 7; + public static final short EFFECT_VERT_BLINDS = 8; + public static final short EFFECT_HORZ_BLINDS = 9; + public static final short EFFECT_CHK_ACROSS = 10; + public static final short EFFECT_CHK_DOWN = 11; + public static final short EFFECT_RND_DISSOLVE = 12; + public static final short EFFECT_SPLIT_VIRT_IN = 13; + public static final short EFFECT_SPLIT_VIRT_OUT = 14; + public static final short EFFECT_SPLIT_HORZ_IN = 15; + public static final short EFFECT_SPLIT_HORZ_OUT = 16; + public static final short EFFECT_STRIPS_LEFT_DOWN = 17; + public static final short EFFECT_STRIPS_LEFT_UP = 18; + public static final short EFFECT_STRIPS_RIGHT_DOWN = 19; + public static final short EFFECT_STRIPS_RIGHT_UP = 20; + public static final short EFFECT_RND_BARS_HORZ = 21; + public static final short EFFECT_RND_BARS_VERT = 22; + public static final short EFFECT_RANDOM = 23; + + private static final short NORMAL = 0; + private static final short PREFORMATTED = 1; + private static final short COMMENT = 2; + private static final short ATTRIBVALUE = 4; + private static final short NOWRAP = 8; + private static final short CDATA = 16; + + private int[] linebuf = null; + private int lbufsize = 0; + private int linelen = 0; + private int wraphere = 0; + private boolean inAttVal = false; + private boolean InString = false; + + private int slide = 0; + private int count = 0; + private Node slidecontent = null; + + private Configuration configuration; + + public PPrint(Configuration configuration) + { + this.configuration = configuration; + } + + /* + 1010 A + 1011 B + 1100 C + 1101 D + 1110 E + 1111 F + */ + + /* return one less that the number of bytes used by UTF-8 char */ + /* str points to 1st byte, *ch initialized to 1st byte */ + public static int getUTF8(byte[] str, int start, MutableInteger ch) + { + int c, n, i, bytes; + + c = ((int)str[start]) & 0xFF; // Convert to unsigned. + + if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */ + { + n = c & 31; + bytes = 2; + } + else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */ + { + n = c & 15; + bytes = 3; + } + else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */ + { + n = c & 7; + bytes = 4; + } + else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */ + { + n = c & 3; + bytes = 5; + } + else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */ + + { + n = c & 1; + bytes = 6; + } + else /* 0XXX XXXX one byte */ + { + ch.value = c; + return 0; + } + + /* successor bytes should have the form 10XX XXXX */ + for (i = 1; i < bytes; ++i) + { + c = ((int)str[start + i]) & 0xFF; // Convert to unsigned. + n = (n << 6) | (c & 0x3F); + } + + ch.value = n; + return bytes - 1; + } + + /* store char c as UTF-8 encoded byte stream */ + public static int putUTF8(byte[] buf, int start, int c) + { + if (c < 128) + buf[start++] = (byte)c; + else if (c <= 0x7FF) + { + buf[start++] = (byte)(0xC0 | (c >> 6)); + buf[start++] = (byte)(0x80 | (c & 0x3F)); + } + else if (c <= 0xFFFF) + { + buf[start++] = (byte)(0xE0 | (c >> 12)); + buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F)); + buf[start++] = (byte)(0x80 | (c & 0x3F)); + } + else if (c <= 0x1FFFFF) + { + buf[start++] = (byte)(0xF0 | (c >> 18)); + buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F)); + buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F)); + buf[start++] = (byte)(0x80 | (c & 0x3F)); + } + else + { + buf[start++] = (byte)(0xF8 | (c >> 24)); + buf[start++] = (byte)(0x80 | ((c >> 18) & 0x3F)); + buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F)); + buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F)); + buf[start++] = (byte)(0x80 | (c & 0x3F)); + } + + return start; + } + + private void addC(int c, int index) + { + if (index + 1 >= lbufsize) + { + while (index + 1 >= lbufsize) + { + if (lbufsize == 0) + lbufsize = 256; + else + lbufsize = lbufsize * 2; + } + + int[] temp = new int[ lbufsize ]; + if (linebuf != null) + System.arraycopy(linebuf, 0, temp, 0, index); + linebuf = temp; + } + + linebuf[index] = c; + } + + private void wrapLine(Out fout, int indent) + { + int i, p, q; + + if (wraphere == 0) + return; + + for (i = 0; i < indent; ++i) + fout.outc((int)' '); + + for (i = 0; i < wraphere; ++i) + fout.outc(linebuf[i]); + + if (InString) + { + fout.outc((int)' '); + fout.outc((int)'\\'); + } + + fout.newline(); + + if (linelen > wraphere) + { + p = 0; + + if (linebuf[wraphere] == ' ') + ++wraphere; + + q = wraphere; + addC('\0', linelen); + + while (true) + { + linebuf[p] = linebuf[q]; + if (linebuf[q] == 0) break; + p++; + q++; + } + linelen -= wraphere; + } + else + linelen = 0; + + wraphere = 0; + } + + private void wrapAttrVal(Out fout, int indent, boolean inString) + { + int i, p, q; + + for (i = 0; i < indent; ++i) + fout.outc((int)' '); + + for (i = 0; i < wraphere; ++i) + fout.outc(linebuf[i]); + + fout.outc((int)' '); + + if (inString) + fout.outc((int)'\\'); + + fout.newline(); + + if (linelen > wraphere) + { + p = 0; + + if (linebuf[wraphere] == ' ') + ++wraphere; + + q = wraphere; + addC('\0', linelen); + + while (true) + { + linebuf[p] = linebuf[q]; + if (linebuf[q] == 0) break; + p++; + q++; + } + linelen -= wraphere; + } + else + linelen = 0; + + wraphere = 0; + } + + public void flushLine(Out fout, int indent) + { + int i; + + if (linelen > 0) + { + if (indent + linelen >= this.configuration.wraplen) + wrapLine(fout, indent); + + if (!inAttVal || this.configuration.IndentAttributes) + { + for (i = 0; i < indent; ++i) + fout.outc((int)' '); + } + + for (i = 0; i < linelen; ++i) + fout.outc(linebuf[i]); + } + + fout.newline(); + linelen = 0; + wraphere = 0; + inAttVal = false; + } + + public void condFlushLine(Out fout, int indent) + { + int i; + + if (linelen > 0) + { + if (indent + linelen >= this.configuration.wraplen) + wrapLine(fout, indent); + + if (!inAttVal || this.configuration.IndentAttributes) + { + for (i = 0; i < indent; ++i) + fout.outc((int)' '); + } + + for (i = 0; i < linelen; ++i) + fout.outc(linebuf[i]); + + fout.newline(); + linelen = 0; + wraphere = 0; + inAttVal = false; + } + } + + private void printChar(int c, short mode) + { + String entity; + + if (c == ' ' && !((mode & (PREFORMATTED | COMMENT | ATTRIBVALUE)) != 0)) + { + /* coerce a space character to a non-breaking space */ + if ((mode & NOWRAP) != 0) + { + /* by default XML doesn't define   */ + if (this.configuration.NumEntities || this.configuration.XmlTags) + { + addC('&', linelen++); + addC('#', linelen++); + addC('1', linelen++); + addC('6', linelen++); + addC('0', linelen++); + addC(';', linelen++); + } + else /* otherwise use named entity */ + { + addC('&', linelen++); + addC('n', linelen++); + addC('b', linelen++); + addC('s', linelen++); + addC('p', linelen++); + addC(';', linelen++); + } + return; + } + else + wraphere = linelen; + } + + /* comment characters are passed raw */ + if ((mode & COMMENT) != 0) + { + addC(c, linelen++); + return; + } + + /* except in CDATA map < to < etc. */ + if (! ((mode & CDATA) != 0) ) + { + if (c == '<') + { + addC('&', linelen++); + addC('l', linelen++); + addC('t', linelen++); + addC(';', linelen++); + return; + } + + if (c == '>') + { + addC('&', linelen++); + addC('g', linelen++); + addC('t', linelen++); + addC(';', linelen++); + return; + } + + /* + naked '&' chars can be left alone or + quoted as & The latter is required + for XML where naked '&' are illegal. + */ + if (c == '&' && this.configuration.QuoteAmpersand) + { + addC('&', linelen++); + addC('a', linelen++); + addC('m', linelen++); + addC('p', linelen++); + addC(';', linelen++); + return; + } + + if (c == '"' && this.configuration.QuoteMarks) + { + addC('&', linelen++); + addC('q', linelen++); + addC('u', linelen++); + addC('o', linelen++); + addC('t', linelen++); + addC(';', linelen++); + return; + } + + if (c == '\'' && this.configuration.QuoteMarks) + { + addC('&', linelen++); + addC('#', linelen++); + addC('3', linelen++); + addC('9', linelen++); + addC(';', linelen++); + return; + } + + if (c == 160 && this.configuration.CharEncoding != Configuration.RAW) + { + if (this.configuration.QuoteNbsp) + { + addC('&', linelen++); + + if (this.configuration.NumEntities) + { + addC('#', linelen++); + addC('1', linelen++); + addC('6', linelen++); + addC('0', linelen++); + } + else + { + addC('n', linelen++); + addC('b', linelen++); + addC('s', linelen++); + addC('p', linelen++); + } + + addC(';', linelen++); + } + else + addC(c, linelen++); + + return; + } + } + + /* otherwise ISO 2022 characters are passed raw */ + if (this.configuration.CharEncoding == Configuration.ISO2022 || + this.configuration.CharEncoding == Configuration.RAW) + { + addC(c, linelen++); + return; + } + + /* if preformatted text, map   to space */ + if (c == 160 && ((mode & PREFORMATTED) != 0)) + { + addC(' ', linelen++); + return; + } + + /* + Filters from Word and PowerPoint often use smart + quotes resulting in character codes between 128 + and 159. Unfortunately, the corresponding HTML 4.0 + entities for these are not widely supported. The + following converts dashes and quotation marks to + the nearest ASCII equivalent. My thanks to + Andrzej Novosiolov for his help with this code. + */ + + if (this.configuration.MakeClean) + { + if (c >= 0x2013 && c <= 0x201E) + { + switch (c) { + case 0x2013: + case 0x2014: + c = '-'; + break; + case 0x2018: + case 0x2019: + case 0x201A: + c = '\''; + break; + case 0x201C: + case 0x201D: + case 0x201E: + c = '"'; + break; + } + } + } + + /* don't map latin-1 chars to entities */ + if (this.configuration.CharEncoding == Configuration.LATIN1) + { + if (c > 255) /* multi byte chars */ + { + if (!this.configuration.NumEntities) + { + entity = EntityTable.getDefaultEntityTable().entityName((short)c); + if (entity != null) + entity = "&" + entity + ";"; + else + entity = "&#" + c + ";"; + } + else + entity = "&#" + c + ";"; + + for (int i = 0; i < entity.length(); i++) + addC((int)entity.charAt(i), linelen++); + + return; + } + + if (c > 126 && c < 160) + { + entity = "&#" + c + ";"; + + for (int i = 0; i < entity.length(); i++) + addC((int)entity.charAt(i), linelen++); + + return; + } + + addC(c, linelen++); + return; + } + + /* don't map utf8 chars to entities */ + if (this.configuration.CharEncoding == Configuration.UTF8) + { + addC(c, linelen++); + return; + } + + /* use numeric entities only for XML */ + if (this.configuration.XmlTags) + { + /* if ASCII use numeric entities for chars > 127 */ + if (c > 127 && this.configuration.CharEncoding == Configuration.ASCII) + { + entity = "&#" + c + ";"; + + for (int i = 0; i < entity.length(); i++) + addC((int)entity.charAt(i), linelen++); + + return; + } + + /* otherwise output char raw */ + addC(c, linelen++); + return; + } + + /* default treatment for ASCII */ + if (c > 126 || (c < ' ' && c != '\t')) + { + if (!this.configuration.NumEntities) + { + entity = EntityTable.getDefaultEntityTable().entityName((short)c); + if (entity != null) + entity = "&" + entity + ";"; + else + entity = "&#" + c + ";"; + } + else + entity = "&#" + c + ";"; + + for (int i = 0; i < entity.length(); i++) + addC((int)entity.charAt(i), linelen++); + + return; + } + + addC(c, linelen++); + } + + /* + The line buffer is uint not char so we can + hold Unicode values unencoded. The translation + to UTF-8 is deferred to the outc routine called + to flush the line buffer. + */ + private void printText(Out fout, short mode, int indent, + byte[] textarray, int start, int end) + { + int i, c; + MutableInteger ci = new MutableInteger(); + + for (i = start; i < end; ++i) + { + if (indent + linelen >= this.configuration.wraplen) + wrapLine(fout, indent); + + c = ((int)textarray[i]) & 0xFF; // Convert to unsigned. + + /* look for UTF-8 multibyte character */ + if (c > 0x7F) + { + i += getUTF8(textarray, i, ci); + c = ci.value; + } + + if (c == '\n') + { + flushLine(fout, indent); + continue; + } + + printChar(c, mode); + } + } + + private void printString(Out fout, int indent, String str) + { + for (int i = 0; i < str.length(); i++ ) + addC((int)str.charAt(i), linelen++); + } + + private void printAttrValue(Out fout, int indent, String value, int delim, boolean wrappable) + { + int c; + MutableInteger ci = new MutableInteger(); + boolean wasinstring = false; + byte[] valueChars = null; + int i; + short mode = (wrappable ? (short)(NORMAL | ATTRIBVALUE) : + (short)(PREFORMATTED | ATTRIBVALUE)); + + if (value != null) + { + valueChars = Lexer.getBytes(value); + } + + /* look for ASP, Tango or PHP instructions for computed attribute value */ + if (valueChars != null && valueChars.length >= 5 && valueChars[0] == '<') + { + if (valueChars[1] == '%' || valueChars[1] == '@'|| + (new String(valueChars, 0, 5)).equals("= this.configuration.wraplen) + wrapLine(fout, indent); + + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; + else + condFlushLine(fout, indent); + } + + addC(delim, linelen++); + + if (value != null) + { + InString = false; + + i = 0; + while (i < valueChars.length) + { + c = ((int)valueChars[i]) & 0xFF; // Convert to unsigned. + + if (wrappable && c == ' ' && indent + linelen < this.configuration.wraplen) + { + wraphere = linelen; + wasinstring = InString; + } + + if (wrappable && wraphere > 0 && indent + linelen >= this.configuration.wraplen) + wrapAttrVal(fout, indent, wasinstring); + + if (c == delim) + { + String entity; + + entity = (c == '"' ? """ : "'"); + + for (int j = 0; j < entity.length(); j++ ) + addC(entity.charAt(j), linelen++); + + ++i; + continue; + } + else if (c == '"') + { + if (this.configuration.QuoteMarks) + { + addC('&', linelen++); + addC('q', linelen++); + addC('u', linelen++); + addC('o', linelen++); + addC('t', linelen++); + addC(';', linelen++); + } + else + addC('"', linelen++); + + if (delim == '\'') + InString = !InString; + + ++i; + continue; + } + else if (c == '\'') + { + if (this.configuration.QuoteMarks) + { + addC('&', linelen++); + addC('#', linelen++); + addC('3', linelen++); + addC('9', linelen++); + addC(';', linelen++); + } + else + addC('\'', linelen++); + + if (delim == '"') + InString = !InString; + + ++i; + continue; + } + + /* look for UTF-8 multibyte character */ + if (c > 0x7F) + { + i += getUTF8(valueChars, i, ci); + c = ci.value; + } + + ++i; + + if (c == '\n') + { + flushLine(fout, indent); + continue; + } + + printChar(c, mode); + } + } + + InString = false; + addC(delim, linelen++); + } + + private void printAttribute(Out fout, int indent, Node node, AttVal attr) + { + String name; + boolean wrappable = false; + + if (this.configuration.IndentAttributes) + { + flushLine(fout, indent); + indent += this.configuration.spaces; + } + + name = attr.attribute; + + if (indent + linelen >= this.configuration.wraplen) + wrapLine(fout, indent); + + if (!this.configuration.XmlTags && !this.configuration.XmlOut && attr.dict != null) + { + if (AttributeTable.getDefaultAttributeTable().isScript(name)) + wrappable = this.configuration.WrapScriptlets; + else if (!attr.dict.nowrap && this.configuration.WrapAttVals) + wrappable = true; + } + + if (indent + linelen < this.configuration.wraplen) + { + wraphere = linelen; + addC(' ', linelen++); + } + else + { + condFlushLine(fout, indent); + addC(' ', linelen++); + } + + for (int i = 0; i < name.length(); i++ ) + addC((int)Lexer.foldCase(name.charAt(i), + this.configuration.UpperCaseAttrs, + this.configuration.XmlTags), + linelen++); + + if (indent + linelen >= this.configuration.wraplen) + wrapLine(fout, indent); + + if (attr.value == null) + { + if (this.configuration.XmlTags || this.configuration.XmlOut) + printAttrValue(fout, indent, attr.attribute, attr.delim, true); + else if (!attr.isBoolAttribute() && !Node.isNewNode(node)) + printAttrValue(fout, indent, "", attr.delim, true); + else if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; + + } + else + printAttrValue(fout, indent, attr.value, attr.delim, wrappable); + } + + private void printAttrs(Out fout, int indent, + Node node, AttVal attr) + { + if (attr != null) + { + if (attr.next != null) + printAttrs(fout, indent, node, attr.next); + + if (attr.attribute != null) + printAttribute(fout, indent, node, attr); + else if (attr.asp != null) + { + addC(' ', linelen++); + printAsp(fout, indent, attr.asp); + } + else if (attr.php != null) + { + addC(' ', linelen++); + printPhp(fout, indent, attr.php); + } + } + + /* add xml:space attribute to pre and other elements */ + if (configuration.XmlOut && + configuration.XmlSpace && + ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt) && + node.getAttrByName("xml:space") == null) + printString(fout, indent, " xml:space=\"preserve\""); + } + + /* + Line can be wrapped immediately after inline start tag provided + if follows a text node ending in a space, or it parent is an + inline element that that rule applies to. This behaviour was + reverse engineered from Netscape 3.0 + */ + private static boolean afterSpace(Node node) + { + Node prev; + int c; + + if (node == null || node.tag == null || !((node.tag.model & Dict.CM_INLINE) != 0)) + return true; + + prev = node.prev; + + if (prev != null) + { + if (prev.type == Node.TextNode && prev.end > prev.start) + { + c = ((int)prev.textarray[prev.end - 1]) & 0xFF; // Convert to unsigned. + + if (c == 160 || c == ' ' || c == '\n') + return true; + } + + return false; + } + + return afterSpace(node.parent); + } + + private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node) + { + char c; + String p; + TagTable tt = this.configuration.tt; + + addC('<', linelen++); + + if (node.type == Node.EndTag) + addC('/', linelen++); + + p = node.element; + for (int i = 0; i < p.length(); i++ ) + addC((int)Lexer.foldCase(p.charAt(i), + this.configuration.UpperCaseTags, + this.configuration.XmlTags), + linelen++); + + printAttrs(fout, indent, node, node.attributes); + + if ((this.configuration.XmlOut || lexer != null && lexer.isvoyager) && + (node.type == Node.StartEndTag || (node.tag.model & Dict.CM_EMPTY) != 0)) + { + addC(' ', linelen++); /* compatibility hack */ + addC('/', linelen++); + } + + addC('>', linelen++);; + + if (node.type != Node.StartEndTag && !((mode & PREFORMATTED) != 0)) + { + if (indent + linelen >= this.configuration.wraplen) + wrapLine(fout, indent); + + if (indent + linelen < this.configuration.wraplen) + { + /* + wrap after start tag if is
    or if it's not + inline or it is an empty tag followed by + */ + if (afterSpace(node)) + { + if (!((mode & NOWRAP) != 0) && + (!((node.tag.model & Dict.CM_INLINE) != 0) || + (node.tag == tt.tagBr) || + (((node.tag.model & Dict.CM_EMPTY) != 0) && + node.next == null && + node.parent.tag == tt.tagA))) + { + wraphere = linelen; + } + } + } + else + condFlushLine(fout, indent); + } + } + + private void printEndTag(Out fout, short mode, int indent, Node node) + { + char c; + String p; + + /* + Netscape ignores SGML standard by not ignoring a + line break before or etc. To avoid rendering + this as an underlined space, I disable line wrapping + before inline end tags by the #if 0 ... #endif + */ +if (false) { + if (indent + linelen < this.configuration.wraplen && !((mode & NOWRAP) != 0)) + wraphere = linelen; +} + + addC('<', linelen++); + addC('/', linelen++); + + p = node.element; + for (int i = 0; i < p.length(); i++ ) + addC((int)Lexer.foldCase(p.charAt(i), + this.configuration.UpperCaseTags, + this.configuration.XmlTags), + linelen++); + + addC('>', linelen++); + } + + private void printComment(Out fout, int indent, Node node) + { + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; + + addC('<', linelen++); + addC('!', linelen++); + addC('-', linelen++); + addC('-', linelen++); +if (false) { + if (linelen < this.configuration.wraplen) + wraphere = linelen; +} + printText(fout, COMMENT, indent, + node.textarray, node.start, node.end); +if (false) { + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; +} + // See Lexer.java: AQ 8Jul2000 + addC('-', linelen++); + addC('-', linelen++); + addC('>', linelen++); + + if (node.linebreak) + flushLine(fout, indent); + } + + private void printDocType(Out fout, int indent, Node node) + { + boolean q = this.configuration.QuoteMarks; + + this.configuration.QuoteMarks = false; + + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; + + condFlushLine(fout, indent); + + addC('<', linelen++); + addC('!', linelen++); + addC('D', linelen++); + addC('O', linelen++); + addC('C', linelen++); + addC('T', linelen++); + addC('Y', linelen++); + addC('P', linelen++); + addC('E', linelen++); + addC(' ', linelen++); + + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; + + printText(fout, (short)0, indent, + node.textarray, node.start, node.end); + + if (linelen < this.configuration.wraplen) + wraphere = linelen; + + addC('>', linelen++); + this.configuration.QuoteMarks = q; + condFlushLine(fout, indent); + } + + private void printPI(Out fout, int indent, Node node) + { + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; + + addC('<', linelen++); + addC('?', linelen++); + + /* set CDATA to pass < and > unescaped */ + printText(fout, CDATA, indent, + node.textarray, node.start, node.end); + + if (node.textarray[node.end - 1] != (byte)'?') + addC('?', linelen++); + + addC('>', linelen++); + condFlushLine(fout, indent); + } + + /* note ASP and JSTE share <% ... %> syntax */ + private void printAsp(Out fout, int indent, Node node) + { + int savewraplen = this.configuration.wraplen; + + /* disable wrapping if so requested */ + + if (!this.configuration.WrapAsp || !this.configuration.WrapJste) + this.configuration.wraplen = 0xFFFFFF; /* a very large number */ +if (false) { //#if 0 + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; +} //#endif + + addC('<', linelen++); + addC('%', linelen++); + + printText(fout, (this.configuration.WrapAsp ? CDATA : COMMENT), indent, + node.textarray, node.start, node.end); + + addC('%', linelen++); + addC('>', linelen++); + /* condFlushLine(fout, indent); */ + this.configuration.wraplen = savewraplen; + } + + /* JSTE also supports <# ... #> syntax */ + private void printJste(Out fout, int indent, Node node) + { + int savewraplen = this.configuration.wraplen; + + /* disable wrapping if so requested */ + + if (!this.configuration.WrapJste) + this.configuration.wraplen = 0xFFFFFF; /* a very large number */ + + addC('<', linelen++); + addC('#', linelen++); + + printText(fout, (this.configuration.WrapJste ? CDATA : COMMENT), indent, + node.textarray, node.start, node.end); + + addC('#', linelen++); + addC('>', linelen++); + /* condFlushLine(fout, indent); */ + this.configuration.wraplen = savewraplen; + } + + /* PHP is based on XML processing instructions */ + private void printPhp(Out fout, int indent, Node node) + { + int savewraplen = this.configuration.wraplen; + + /* disable wrapping if so requested */ + + if (!this.configuration.WrapPhp) + this.configuration.wraplen = 0xFFFFFF; /* a very large number */ + +if (false) { //#if 0 + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; +} //#endif + addC('<', linelen++); + addC('?', linelen++); + + printText(fout, (this.configuration.WrapPhp ? CDATA : COMMENT), indent, + node.textarray, node.start, node.end); + + addC('?', linelen++); + addC('>', linelen++); + /* PCondFlushLine(fout, indent); */ + this.configuration.wraplen = savewraplen; + } + + private void printCDATA(Out fout, int indent, Node node) + { + int savewraplen = this.configuration.wraplen; + + condFlushLine(fout, indent); + + /* disable wrapping */ + + this.configuration.wraplen = 0xFFFFFF; /* a very large number */ + + addC('<', linelen++); + addC('!', linelen++); + addC('[', linelen++); + addC('C', linelen++); + addC('D', linelen++); + addC('A', linelen++); + addC('T', linelen++); + addC('A', linelen++); + addC('[', linelen++); + + printText(fout, COMMENT, indent, + node.textarray, node.start, node.end); + + addC(']', linelen++); + addC(']', linelen++); + addC('>', linelen++); + condFlushLine(fout, indent); + this.configuration.wraplen = savewraplen; + } + + private void printSection(Out fout, int indent, Node node) + { + int savewraplen = this.configuration.wraplen; + + /* disable wrapping if so requested */ + + if (!this.configuration.WrapSection) + this.configuration.wraplen = 0xFFFFFF; /* a very large number */ + +if (false) { //#if 0 + if (indent + linelen < this.configuration.wraplen) + wraphere = linelen; +} //#endif + addC('<', linelen++); + addC('!', linelen++); + addC('[', linelen++); + + printText(fout, (this.configuration.WrapSection ? CDATA : COMMENT), indent, + node.textarray, node.start, node.end); + + addC(']', linelen++); + addC('>', linelen++); + /* PCondFlushLine(fout, indent); */ + this.configuration.wraplen = savewraplen; + } + + private boolean shouldIndent(Node node) + { + TagTable tt = this.configuration.tt; + + if (!this.configuration.IndentContent) + return false; + + if (this.configuration.SmartIndent) + { + if (node.content != null && ((node.tag.model & Dict.CM_NO_INDENT) != 0)) + { + for (node = node.content; node != null; node = node.next) + if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0) + return true; + + return false; + } + + if ((node.tag.model & Dict.CM_HEADING) != 0) + return false; + + if (node.tag == tt.tagP) + return false; + + if (node.tag == tt.tagTitle) + return false; + } + + if ((node.tag.model & (Dict.CM_FIELD | Dict.CM_OBJECT)) != 0) + return true; + + if (node.tag == tt.tagMap) + return true; + + return !((node.tag.model & Dict.CM_INLINE) != 0); + } + + public void printTree(Out fout, short mode, int indent, + Lexer lexer, Node node) + { + Node content, last; + TagTable tt = this.configuration.tt; + + if (node == null) + return; + + if (node.type == Node.TextNode) + printText(fout, mode, indent, + node.textarray, node.start, node.end); + else if (node.type == Node.CommentTag) + { + printComment(fout, indent, node); + } + else if (node.type == Node.RootNode) + { + for (content = node.content; + content != null; + content = content.next) + printTree(fout, mode, indent, lexer, content); + } + else if (node.type == Node.DocTypeTag) + printDocType(fout, indent, node); + else if (node.type == Node.ProcInsTag) + printPI(fout, indent, node); + else if (node.type == Node.CDATATag) + printCDATA(fout, indent, node); + else if (node.type == Node.SectionTag) + printSection(fout, indent, node); + else if (node.type == Node.AspTag) + printAsp(fout, indent, node); + else if (node.type == Node.JsteTag) + printJste(fout, indent, node); + else if (node.type == Node.PhpTag) + printPhp(fout, indent, node); + else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag) + { + if (!((node.tag.model & Dict.CM_INLINE) != 0)) + condFlushLine(fout, indent); + + if (node.tag == tt.tagBr && node.prev != null && + node.prev.tag != tt.tagBr && this.configuration.BreakBeforeBR) + flushLine(fout, indent); + + if (this.configuration.MakeClean && node.tag == tt.tagWbr) + printString(fout, indent, " "); + else + printTag(lexer, fout, mode, indent, node); + + if (node.tag == tt.tagParam || node.tag == tt.tagArea) + condFlushLine(fout, indent); + else if (node.tag == tt.tagBr || node.tag == tt.tagHr) + flushLine(fout, indent); + } + else /* some kind of container element */ + { + if (node.tag != null && node.tag.parser == ParserImpl.getParsePre()) + { + condFlushLine(fout, indent); + + indent = 0; + condFlushLine(fout, indent); + printTag(lexer, fout, mode, indent, node); + flushLine(fout, indent); + + for (content = node.content; + content != null; + content = content.next) + printTree(fout, (short)(mode | PREFORMATTED | NOWRAP), indent, lexer, content); + + condFlushLine(fout, indent); + printEndTag(fout, mode, indent, node); + flushLine(fout, indent); + + if (this.configuration.IndentContent == false && node.next != null) + flushLine(fout, indent); + } + else if (node.tag == tt.tagStyle || node.tag == tt.tagScript) + { + condFlushLine(fout, indent); + + indent = 0; + condFlushLine(fout, indent); + printTag(lexer, fout, mode, indent, node); + flushLine(fout, indent); + + for (content = node.content; + content != null; + content = content.next) + printTree(fout, (short)(mode | PREFORMATTED | NOWRAP |CDATA), indent, lexer, content); + + condFlushLine(fout, indent); + printEndTag(fout, mode, indent, node); + flushLine(fout, indent); + + if (this.configuration.IndentContent == false && node.next != null) + flushLine(fout, indent); + } + else if ((node.tag.model & Dict.CM_INLINE) != 0) + { + if (this.configuration.MakeClean) + { + /* discards and tags */ + if (node.tag == tt.tagFont) + { + for (content = node.content; + content != null; + content = content.next) + printTree(fout, mode, indent, lexer, content); + return; + } + + /* replace ... by   or   etc. */ + if (node.tag == tt.tagNobr) + { + for (content = node.content; + content != null; + content = content.next) + printTree(fout, (short)(mode|NOWRAP), indent, lexer, content); + return; + } + } + + /* otherwise a normal inline element */ + + printTag(lexer, fout, mode, indent, node); + + /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */ + + if (shouldIndent(node)) + { + condFlushLine(fout, indent); + indent += this.configuration.spaces; + + for (content = node.content; + content != null; + content = content.next) + printTree(fout, mode, indent, lexer, content); + + condFlushLine(fout, indent); + indent -= this.configuration.spaces; + condFlushLine(fout, indent); + } + else + { + + for (content = node.content; + content != null; + content = content.next) + printTree(fout, mode, indent, lexer, content); + } + + printEndTag(fout, mode, indent, node); + } + else /* other tags */ + { + condFlushLine(fout, indent); + + if (this.configuration.SmartIndent && node.prev != null) + flushLine(fout, indent); + + if (this.configuration.HideEndTags == false || + !(node.tag != null && ((node.tag.model & Dict.CM_OMITST) != 0))) + { + printTag(lexer, fout, mode, indent, node); + + if (shouldIndent(node)) + condFlushLine(fout, indent); + else if ((node.tag.model & Dict.CM_HTML) != 0 || + node.tag == tt.tagNoframes || + ((node.tag.model & Dict.CM_HEAD) != 0 && + !(node.tag == tt.tagTitle))) + flushLine(fout, indent); + } + + if (node.tag == tt.tagBody && this.configuration.BurstSlides) + printSlide(fout, mode, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer); + else + { + last = null; + + for (content = node.content; + content != null; content = content.next) + { + /* kludge for naked text before block level tag */ + if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode && + content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0) + { + flushLine(fout, indent); + flushLine(fout, indent); + } + + printTree(fout, mode, + (shouldIndent(node) ? indent+this.configuration.spaces : indent), lexer, content); + + last = content; + } + } + + /* don't flush line for td and th */ + if (shouldIndent(node) || + (((node.tag.model & Dict.CM_HTML) != 0 || node.tag == tt.tagNoframes || + ((node.tag.model & Dict.CM_HEAD) != 0 && !(node.tag == tt.tagTitle))) + && this.configuration.HideEndTags == false)) + { + condFlushLine(fout, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent)); + + if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0)) + { + printEndTag(fout, mode, indent, node); + flushLine(fout, indent); + } + } + else + { + if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0)) + printEndTag(fout, mode, indent, node); + + flushLine(fout, indent); + } + + if (this.configuration.IndentContent == false && + node.next != null && + this.configuration.HideEndTags == false && + (node.tag.model & (Dict.CM_BLOCK|Dict.CM_LIST|Dict.CM_DEFLIST|Dict.CM_TABLE)) != 0) + { + flushLine(fout, indent); + } + } + } + } + + public void printXMLTree(Out fout, short mode, int indent, + Lexer lexer, Node node) + { + TagTable tt = this.configuration.tt; + + if (node == null) + return; + + if (node.type == Node.TextNode) + { + printText(fout, mode, indent, + node.textarray, node.start, node.end); + } + else if (node.type == Node.CommentTag) + { + condFlushLine(fout, indent); + printComment(fout, 0, node); + condFlushLine(fout, 0); + } + else if (node.type == Node.RootNode) + { + Node content; + + for (content = node.content; + content != null; + content = content.next) + printXMLTree(fout, mode, indent, lexer, content); + } + else if (node.type == Node.DocTypeTag) + printDocType(fout, indent, node); + else if (node.type == Node.ProcInsTag) + printPI(fout, indent, node); + else if (node.type == Node.SectionTag) + printSection(fout, indent, node); + else if (node.type == Node.AspTag) + printAsp(fout, indent, node); + else if (node.type == Node.JsteTag) + printJste(fout, indent, node); + else if (node.type == Node.PhpTag) + printPhp(fout, indent, node); + else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag) + { + condFlushLine(fout, indent); + printTag(lexer, fout, mode, indent, node); + flushLine(fout, indent); + + if (node.next != null) + flushLine(fout, indent); + } + else /* some kind of container element */ + { + Node content; + boolean mixed = false; + int cindent; + + for (content = node.content; content != null; content = content.next) + { + if (content.type == Node.TextNode) + { + mixed = true; + break; + } + } + + condFlushLine(fout, indent); + + if (ParserImpl.XMLPreserveWhiteSpace(node, tt)) + { + indent = 0; + cindent = 0; + mixed = false; + } + else if (mixed) + cindent = indent; + else + cindent = indent + this.configuration.spaces; + + printTag(lexer, fout, mode, indent, node); + + if (!mixed) + flushLine(fout, indent); + + for (content = node.content; + content != null; + content = content.next) + printXMLTree(fout, mode, cindent, lexer, content); + + if (!mixed) + condFlushLine(fout, cindent); + printEndTag(fout, mode, indent, node); + condFlushLine(fout, indent); + + if (node.next != null) + flushLine(fout, indent); + } + } + + + /* split parse tree by h2 elements and output to separate files */ + + /* counts number of h2 children belonging to node */ + public int countSlides(Node node) + { + int n = 1; + TagTable tt = this.configuration.tt; + + for (node = node.content; node != null; node = node.next) + if (node.tag == tt.tagH2) + ++n; + + return n; + } + + /* + inserts a space gif called "dot.gif" to ensure + that the slide is at least n pixels high + */ + private void printVertSpacer(Out fout, int indent) + { + condFlushLine(fout, indent); + printString(fout, indent , + ""); + condFlushLine(fout, indent); + } + + private void printNavBar(Out fout, int indent) + { + String buf; + + condFlushLine(fout, indent); + printString(fout, indent , "

    "); + + if (slide > 1) + { + buf = "previous | "; + printString(fout, indent , buf); + condFlushLine(fout, indent); + + if (slide < count) + printString(fout, indent , "start | "); + else + printString(fout, indent , "start"); + + condFlushLine(fout, indent); + } + + if (slide < count) + { + buf = "next"; + printString(fout, indent , buf); + } + + printString(fout, indent , "
    "); + condFlushLine(fout, indent); + } + + /* + Called from printTree to print the content of a slide from + the node slidecontent. On return slidecontent points to the + node starting the next slide or null. The variables slide + and count are used to customise the navigation bar. + */ + public void printSlide(Out fout, short mode, int indent, Lexer lexer) + { + Node content, last; + TagTable tt = this.configuration.tt; + + /* insert div for onclick handler */ + String s; + s = "
    "; + printString(fout, indent, s); + condFlushLine(fout, indent); + + /* first print the h2 element and navbar */ + if (slidecontent.tag == tt.tagH2) + { + printNavBar(fout, indent); + + /* now print an hr after h2 */ + + addC('<', linelen++); + + + addC((int)Lexer.foldCase('h', + this.configuration.UpperCaseTags, + this.configuration.XmlTags), + linelen++); + addC((int)Lexer.foldCase('r', + this.configuration.UpperCaseTags, + this.configuration.XmlTags), + linelen++); + + if (this.configuration.XmlOut == true) + printString(fout, indent , " />"); + else + addC('>', linelen++); + + + if (this.configuration.IndentContent == true) + condFlushLine(fout, indent); + + /* PrintVertSpacer(fout, indent); */ + + /*condFlushLine(fout, indent); */ + + /* print the h2 element */ + printTree(fout, mode, + (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, slidecontent); + + slidecontent = slidecontent.next; + } + + /* now continue until we reach the next h2 */ + + last = null; + content = slidecontent; + + for (; content != null; content = content.next) + { + if (content.tag == tt.tagH2) + break; + + /* kludge for naked text before block level tag */ + if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode && + content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0) + { + flushLine(fout, indent); + flushLine(fout, indent); + } + + printTree(fout, mode, + (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, content); + + last = content; + } + + slidecontent = content; + + /* now print epilog */ + + condFlushLine(fout, indent); + + printString(fout, indent , "
    "); + condFlushLine(fout, indent); + + addC('<', linelen++); + + + addC((int)Lexer.foldCase('h', + this.configuration.UpperCaseTags, + this.configuration.XmlTags), + linelen++); + addC((int)Lexer.foldCase('r', + this.configuration.UpperCaseTags, + this.configuration.XmlTags), + linelen++); + + if (this.configuration.XmlOut == true) + printString(fout, indent , " />"); + else + addC('>', linelen++); + + + if (this.configuration.IndentContent == true) + condFlushLine(fout, indent); + + printNavBar(fout, indent); + + /* end tag for div */ + printString(fout, indent, "
    "); + condFlushLine(fout, indent); + } + + + /* + Add meta element for page transition effect, this works on IE but not NS + */ + + public void addTransitionEffect(Lexer lexer, Node root, short effect, double duration) + { + Node head = root.findHEAD(lexer.configuration.tt); + String transition; + + if (0 <= effect && effect <= 23) + transition = "revealTrans(Duration=" + + (new Double(duration)).toString() + + ",Transition=" + effect + ")"; + else + transition = "blendTrans(Duration=" + + (new Double(duration)).toString() + ")"; + + if (head != null) + { + Node meta = lexer.inferredTag("meta"); + meta.addAttribute("http-equiv", "Page-Enter"); + meta.addAttribute("content", transition); + Node.insertNodeAtStart(head, meta); + } + } + + public void createSlides(Lexer lexer, Node root) + { + Node body; + String buf; + Out out = new OutImpl(); + + body = root.findBody(lexer.configuration.tt); + count = countSlides(body); + slidecontent = body.content; + addTransitionEffect(lexer, root, EFFECT_BLEND, 3.0); + + for (slide = 1; slide <= count; ++slide) + { + buf = "slide" + slide + ".html"; + out.state = StreamIn.FSM_ASCII; + out.encoding = this.configuration.CharEncoding; + + try + { + out.out = new FileOutputStream(buf); + printTree(out, (short)0, 0, lexer, root); + flushLine(out, 0); + out.out.close(); + } + catch (IOException e) + { + System.err.println(buf + e.toString() ); + } + } + + /* + delete superfluous slides by deleting slideN.html + for N = count+1, count+2, etc. until no such file + is found. + */ + + for (;;) + { + buf = "slide" + slide + "html"; + + if (!(new File(buf)).delete()) + break; + + ++slide; + } + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Parser.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Parser.java new file mode 100644 index 0000000..b3851ea --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Parser.java @@ -0,0 +1,39 @@ +/* + * @(#)Parser.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * HTML Parser + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public interface Parser { + + public void parse( Lexer lexer, Node node, short mode ); + +} + diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/ParserImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/ParserImpl.java new file mode 100644 index 0000000..044cfa9 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/ParserImpl.java @@ -0,0 +1,3205 @@ +/* + * @(#)ParserImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * HTML Parser implementation + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class ParserImpl { + + //private static int SeenBodyEndTag; /* AQ: moved into lexer structure */ + + private static void parseTag(Lexer lexer, Node node, short mode) + { + // Local fix by GLP 2000-12-21. Need to reset insertspace if this + // is both a non-inline and empty tag (base, link, meta, isindex, hr, area). + // Remove this code once the fix is made in Tidy. + +/****** (Original code follows) + if ((node.tag.model & Dict.CM_EMPTY) != 0) + { + lexer.waswhite = false; + return; + } + else if (!((node.tag.model & Dict.CM_INLINE) != 0)) + lexer.insertspace = false; +*******/ + + if (!((node.tag.model & Dict.CM_INLINE) != 0)) + lexer.insertspace = false; + + if ((node.tag.model & Dict.CM_EMPTY) != 0) + { + lexer.waswhite = false; + return; + } + + if (node.tag.parser == null || node.type == Node.StartEndTag) + return; + + node.tag.parser.parse(lexer, node, mode); + } + + private static void moveToHead(Lexer lexer, Node element, Node node) + { + Node head; + TagTable tt = lexer.configuration.tt; + + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); + + while (element.tag != tt.tagHtml) + element = element.parent; + + for (head = element.content; head != null; head = head.next) + { + if (head.tag == tt.tagHead) + { + Node.insertNodeAtEnd(head, node); + break; + } + } + + if (node.tag.parser != null) + parseTag(lexer, node, Lexer.IgnoreWhitespace); + } + else + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + } + } + + public static class ParseHTML implements Parser { + + public void parse( Lexer lexer, Node html, short mode ) + { + Node node, head; + Node frameset = null; + Node noframes = null; + + lexer.configuration.XmlTags = false; + lexer.seenBodyEndTag = 0; + TagTable tt = lexer.configuration.tt; + + for (;;) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + + if (node == null) + { + node = lexer.inferredTag("head"); + break; + } + + if (node.tag == tt.tagHead) + break; + + if (node.tag == html.tag && node.type == Node.EndTag) + { + Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* deal with comments etc. */ + if (Node.insertMisc(html, node)) + continue; + + lexer.ungetToken(); + node = lexer.inferredTag("head"); + break; + } + + head = node; + Node.insertNodeAtEnd(html, head); + getParseHead().parse(lexer, head, mode); + + for (;;) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + + if (node == null) + { + if (frameset == null) /* create an empty body */ + node = lexer.inferredTag("body"); + + return; + } + + /* robustly handle html tags */ + if (node.tag == html.tag) + { + if (node.type != Node.StartTag && frameset == null) + Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); + + continue; + } + + /* deal with comments etc. */ + if (Node.insertMisc(html, node)) + continue; + + /* if frameset document coerce to */ + if (node.tag == tt.tagBody) + { + if (node.type != Node.StartTag) + { + Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (frameset != null) + { + lexer.ungetToken(); + + if (noframes == null) + { + noframes = lexer.inferredTag("noframes"); + Node.insertNodeAtEnd(frameset, noframes); + Report.warning(lexer, html, noframes, Report.INSERTING_TAG); + } + + parseTag(lexer, noframes, mode); + continue; + } + + break; /* to parse body */ + } + + /* flag an error if we see more than one frameset */ + if (node.tag == tt.tagFrameset) + { + if (node.type != Node.StartTag) + { + Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (frameset != null) + Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET); + else + frameset = node; + + Node.insertNodeAtEnd(html, node); + parseTag(lexer, node, mode); + + /* + see if it includes a noframes element so + that we can merge subsequent noframes elements + */ + + for (node = frameset.content; node != null; node = node.next) + { + if (node.tag == tt.tagNoframes) + noframes = node; + } + continue; + } + + /* if not a frameset document coerce <noframes> to <body> */ + if (node.tag == tt.tagNoframes) + { + if (node.type != Node.StartTag) + { + Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (frameset == null) + { + Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); + node = lexer.inferredTag("body"); + break; + } + + if (noframes == null) + { + noframes = node; + Node.insertNodeAtEnd(frameset, noframes); + } + + parseTag(lexer, noframes, mode); + continue; + } + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) + { + moveToHead(lexer, html, node); + continue; + } + } + + lexer.ungetToken(); + + /* insert other content into noframes element */ + + if (frameset != null) + { + if (noframes == null) + { + noframes = lexer.inferredTag("noframes"); + Node.insertNodeAtEnd(frameset, noframes); + } + else + Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT); + + parseTag(lexer, noframes, mode); + continue; + } + + node = lexer.inferredTag("body"); + break; + } + + /* node must be body */ + + Node.insertNodeAtEnd(html, node); + parseTag(lexer, node, mode); + } + + }; + + public static class ParseHead implements Parser { + + public void parse( Lexer lexer, Node head, short mode ) + { + Node node; + int HasTitle = 0; + int HasBase = 0; + TagTable tt = lexer.configuration.tt; + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == head.tag && node.type == Node.EndTag) + { + head.closed = true; + break; + } + + if (node.type == Node.TextNode) + { + lexer.ungetToken(); + break; + } + + /* deal with comments etc. */ + if (Node.insertMisc(head, node)) + continue; + + if (node.type == Node.DocTypeTag) + { + Node.insertDocType(lexer, head, node); + continue; + } + + /* discard unknown tags */ + if (node.tag == null) + { + Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (!((node.tag.model & Dict.CM_HEAD) != 0)) + { + lexer.ungetToken(); + break; + } + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if (node.tag == tt.tagTitle) + { + ++HasTitle; + + if (HasTitle > 1) + Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); + } + else if (node.tag == tt.tagBase) + { + ++HasBase; + + if (HasBase > 1) + Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); + } + else if (node.tag == tt.tagNoscript) + Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN); + + Node.insertNodeAtEnd(head, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace); + continue; + } + + /* discard unexpected text nodes and end tags */ + Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); + } + + if (HasTitle == 0) + { + Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT); + Node.insertNodeAtEnd(head, lexer.inferredTag( "title")); + } + } + + }; + + public static class ParseTitle implements Parser { + + public void parse( Lexer lexer, Node title, short mode ) + { + Node node; + + while (true) + { + node = lexer.getToken(Lexer.MixedContent); + if (node == null) break; + if (node.tag == title.tag && node.type == Node.EndTag) + { + title.closed = true; + Node.trimSpaces(lexer, title); + return; + } + + if (node.type == Node.TextNode) + { + /* only called for 1st child */ + if (title.content == null) + Node.trimInitialSpace(lexer, title, node); + + if (node.start >= node.end) + { + continue; + } + + Node.insertNodeAtEnd(title, node); + continue; + } + + /* deal with comments etc. */ + if (Node.insertMisc(title, node)) + continue; + + /* discard unknown tags */ + if (node.tag == null) + { + Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* pushback unexpected tokens */ + Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE); + lexer.ungetToken(); + Node.trimSpaces(lexer, title); + return; + } + + Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR); + } + + }; + + public static class ParseScript implements Parser { + + public void parse( Lexer lexer, Node script, short mode ) + { + /* + This isn't quite right for CDATA content as it recognises + tags within the content and parses them accordingly. + This will unfortunately screw up scripts which include + < + letter, < + !, < + ? or < + / + letter + */ + + Node node; + + node = lexer.getCDATA( script); + + if (node != null) + Node.insertNodeAtEnd(script, node); + } + + }; + + public static class ParseBody implements Parser { + + public void parse( Lexer lexer, Node body, short mode ) + { + Node node; + boolean checkstack, iswhitenode; + + mode = Lexer.IgnoreWhitespace; + checkstack = true; + TagTable tt = lexer.configuration.tt; + + while (true) + { + node = lexer.getToken(mode); + if (node == null) break; + if (node.tag == body.tag && node.type == Node.EndTag) + { + body.closed = true; + Node.trimSpaces(lexer, body); + lexer.seenBodyEndTag = 1; + mode = Lexer.IgnoreWhitespace; + + if (body.parent.tag == tt.tagNoframes) + break; + + continue; + } + + if (node.tag == tt.tagNoframes) + { + if (node.type == Node.StartTag) + { + Node.insertNodeAtEnd(body, node); + getParseBlock().parse(lexer, node, mode); + continue; + } + + if (node.type == Node.EndTag && + body.parent.tag == tt.tagNoframes) + { + Node.trimSpaces(lexer, body); + lexer.ungetToken(); + break; + } + } + + if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset) + && body.parent.tag == tt.tagNoframes) + { + Node.trimSpaces(lexer, body); + lexer.ungetToken(); + break; + } + + if (node.tag == tt.tagHtml) + { + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); + + continue; + } + + iswhitenode = false; + + if (node.type == Node.TextNode && + node.end <= node.start + 1 && + node.textarray[node.start] == (byte)' ') + iswhitenode = true; + + /* deal with comments etc. */ + if (Node.insertMisc(body, node)) + continue; + + if (lexer.seenBodyEndTag == 1 && !iswhitenode) + { + ++lexer.seenBodyEndTag; + Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY); + } + + /* mixed content model permits text */ + if (node.type == Node.TextNode) + { + if (iswhitenode && mode == Lexer.IgnoreWhitespace) + { + continue; + } + + if (lexer.configuration.EncloseBodyText && !iswhitenode) + { + Node para; + + lexer.ungetToken(); + para = lexer.inferredTag("p"); + Node.insertNodeAtEnd(body, para); + parseTag(lexer, para, mode); + mode = Lexer.MixedContent; + continue; + } + else /* strict doesn't allow text here */ + lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20); + + if (checkstack) + { + checkstack = false; + + if (lexer.inlineDup( node) > 0) + continue; + } + + Node.insertNodeAtEnd(body, node); + mode = Lexer.MixedContent; + continue; + } + + if (node.type == Node.DocTypeTag) + { + Node.insertDocType(lexer, body, node); + continue; + } + /* discard unknown and PARAM tags */ + if (node.tag == null || node.tag == tt.tagParam) + { + Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* + Netscape allows LI and DD directly in BODY + We infer UL or DL respectively and use this + boolean to exclude block-level elements so as + to match Netscape's observed behaviour. + */ + lexer.excludeBlocks = false; + + if (!((node.tag.model & Dict.CM_BLOCK) != 0) && + !((node.tag.model & Dict.CM_INLINE) != 0)) + { + /* avoid this error message being issued twice */ + if (!((node.tag.model & Dict.CM_HEAD) != 0)) + Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN); + + if ((node.tag.model & Dict.CM_HTML) != 0) + { + /* copy body attributes if current body was inferred */ + if (node.tag == tt.tagBody && body.implicit + && body.attributes == null) + { + body.attributes = node.attributes; + node.attributes = null; + } + + continue; + } + + if ((node.tag.model & Dict.CM_HEAD) != 0) + { + moveToHead(lexer, body, node); + continue; + } + + if ((node.tag.model & Dict.CM_LIST) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "ul"); + Node.addClass(node, "noindent"); + lexer.excludeBlocks = true; + } + else if ((node.tag.model & Dict.CM_DEFLIST) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "dl"); + lexer.excludeBlocks = true; + } + else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "table"); + lexer.excludeBlocks = true; + } + else + { + /* AQ: The following line is from the official C + version of tidy. It doesn't make sense to me + because the '!' operator has higher precedence + than the '&' operator. It seems to me that the + expression always evaluates to 0. + + if (!node->tag->model & (CM_ROW | CM_FIELD)) + + AQ: 13Jan2000 fixed in C tidy + */ + if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0)) + { + lexer.ungetToken(); + return; + } + + /* ignore </td> </th> <option> etc. */ + continue; + } + } + + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagBr) + node.type = Node.StartTag; + else if (node.tag == tt.tagP) + { + Node.coerceNode(lexer, node, tt.tagBr); + Node.insertNodeAtEnd(body, node); + node = lexer.inferredTag("br"); + } + else if ((node.tag.model & Dict.CM_INLINE) != 0) + lexer.popInline(node); + } + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0)) + { + /* HTML4 strict doesn't allow inline content here */ + /* but HTML2 does allow img elements as children of body */ + if (node.tag == tt.tagImg) + lexer.versions &= ~Dict.VERS_HTML40_STRICT; + else + lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20); + + if (checkstack && !node.implicit) + { + checkstack = false; + + if (lexer.inlineDup( node) > 0) + continue; + } + + mode = Lexer.MixedContent; + } + else + { + checkstack = true; + mode = Lexer.IgnoreWhitespace; + } + + if (node.implicit) + Report.warning(lexer, body, node, Report.INSERTING_TAG); + + Node.insertNodeAtEnd(body, node); + parseTag(lexer, node, mode); + continue; + } + + /* discard unexpected tags */ + Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); + } + } + + }; + + public static class ParseFrameSet implements Parser { + + public void parse( Lexer lexer, Node frameset, short mode ) + { + Node node; + TagTable tt = lexer.configuration.tt; + + lexer.badAccess |= Report.USING_FRAMES; + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == frameset.tag && node.type == Node.EndTag) + { + frameset.closed = true; + Node.trimSpaces(lexer, frameset); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(frameset, node)) + continue; + + if (node.tag == null) + { + Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) + { + moveToHead(lexer, frameset, node); + continue; + } + } + + if (node.tag == tt.tagBody) + { + lexer.ungetToken(); + node = lexer.inferredTag("noframes"); + Report.warning(lexer, frameset, node, Report.INSERTING_TAG); + } + + if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0) + { + Node.insertNodeAtEnd(frameset, node); + lexer.excludeBlocks = false; + parseTag(lexer, node, Lexer.MixedContent); + continue; + } + else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0) + { + Node.insertNodeAtEnd(frameset, node); + continue; + } + + /* discard unexpected tags */ + Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); + } + + Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR); + } + + }; + + public static class ParseInline implements Parser { + + public void parse( Lexer lexer, Node element, short mode ) + { + Node node, parent; + TagTable tt = lexer.configuration.tt; + + if ((element.tag.model & Dict.CM_EMPTY) != 0) + return; + + if (element.tag == tt.tagA) + { + if (element.attributes == null) + { + Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED); + Node.discardElement(element); + return; + } + } + + /* + ParseInline is used for some block level elements like H1 to H6 + For such elements we need to insert inline emphasis tags currently + on the inline stack. For Inline elements, we normally push them + onto the inline stack provided they aren't implicit or OBJECT/APPLET. + This test is carried out in PushInline and PopInline, see istack.c + We don't push A or SPAN to replicate current browser behavior + */ + if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt)) + lexer.inlineDup( null); + else if ((element.tag.model & Dict.CM_INLINE) != 0 && + element.tag != tt.tagA && element.tag != tt.tagSpan) + lexer.pushInline( element); + + if (element.tag == tt.tagNobr) + lexer.badLayout |= Report.USING_NOBR; + else if (element.tag == tt.tagFont) + lexer.badLayout |= Report.USING_FONT; + + /* Inline elements may or may not be within a preformatted element */ + if (mode != Lexer.Preformatted) + mode = Lexer.MixedContent; + + while (true) + { + node = lexer.getToken(mode); + if (node == null) break; + /* end tag for current element */ + if (node.tag == element.tag && node.type == Node.EndTag) + { + if ((element.tag.model & Dict.CM_INLINE) != 0 && + element.tag != tt.tagA) + lexer.popInline( node); + + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + /* + if a font element wraps an anchor and nothing else + then move the font element inside the anchor since + otherwise it won't alter the anchor text color + */ + if (element.tag == tt.tagFont && + element.content != null && + element.content == element.last) + { + Node child = element.content; + + if (child.tag == tt.tagA) + { + child.parent = element.parent; + child.next = element.next; + child.prev = element.prev; + + if (child.prev != null) + child.prev.next = child; + else + child.parent.content = child; + + if (child.next != null) + child.next.prev = child; + else + child.parent.last = child; + + element.next = null; + element.prev = null; + element.parent = child; + element.content = child.content; + element.last = child.last; + child.content = element; + child.last = element; + for (child = element.content; child != null; child = child.next) + child.parent = element; + } + } + element.closed = true; + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + /* <u>...<u> map 2nd <u> to </u> if 1st is explicit */ + /* otherwise emphasis nesting is probably unintentional */ + /* big and small have cumulative effect to leave them alone */ + if (node.type == Node.StartTag + && node.tag == element.tag + && lexer.isPushed(node) + && !node.implicit + && !element.implicit + && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0) + && node.tag != tt.tagA + && node.tag != tt.tagFont + && node.tag != tt.tagBig + && node.tag != tt.tagSmall) + { + if (element.content != null && node.attributes == null) + { + Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); + node.type = Node.EndTag; + lexer.ungetToken(); + continue; + } + + Report.warning(lexer, element, node, Report.NESTED_EMPHASIS); + } + + if (node.type == Node.TextNode) + { + /* only called for 1st child */ + if (element.content == null && + !((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + + if (node.start >= node.end) + { + continue; + } + + Node.insertNodeAtEnd(element, node); + continue; + } + + /* mixed content model so allow text */ + if (Node.insertMisc(element, node)) + continue; + + /* deal with HTML tags */ + if (node.tag == tt.tagHtml) + { + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* otherwise infer end of inline element */ + lexer.ungetToken(); + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + /* within <dt> or <pre> map <p> to <br> */ + if (node.tag == tt.tagP && + node.type == Node.StartTag && + ((mode & Lexer.Preformatted) != 0 || + element.tag == tt.tagDt || + element.isDescendantOf(tt.tagDt))) + { + node.tag = tt.tagBr; + node.element = "br"; + Node.trimSpaces(lexer, element); + Node.insertNodeAtEnd(element, node); + continue; + } + + /* ignore unknown and PARAM tags */ + if (node.tag == null || node.tag == tt.tagParam) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag == tt.tagBr && node.type == Node.EndTag) + node.type = Node.StartTag; + + if (node.type == Node.EndTag) + { + /* coerce </br> to <br> */ + if (node.tag == tt.tagBr) + node.type = Node.StartTag; + else if (node.tag == tt.tagP) + { + /* coerce unmatched </p> to <br><br> */ + if (!element.isDescendantOf(tt.tagP)) + { + Node.coerceNode(lexer, node, tt.tagBr); + Node.trimSpaces(lexer, element); + Node.insertNodeAtEnd(element, node); + node = lexer.inferredTag("br"); + continue; + } + } + else if ((node.tag.model & Dict.CM_INLINE) != 0 + && node.tag != tt.tagA + && !((node.tag.model & Dict.CM_OBJECT) != 0) + && (element.tag.model & Dict.CM_INLINE) != 0) + { + /* allow any inline end tag to end current element */ + lexer.popInline( element); + + if (element.tag != tt.tagA) + { + if (node.tag == tt.tagA && node.tag != element.tag) + { + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + lexer.ungetToken(); + } + else + { + Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); + } + + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + /* if parent is <a> then discard unexpected inline end tag */ + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } /* special case </tr> etc. for stuff moved in front of table */ + else if (lexer.exiled + && node.tag.model != 0 + && (node.tag.model & Dict.CM_TABLE) != 0) + { + lexer.ungetToken(); + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + } + + /* allow any header tag to end current header */ + if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0) + { + if (node.tag == element.tag) + { + Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); + } + else + { + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + lexer.ungetToken(); + } + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + /* + an <A> tag to ends any open <A> element + but <A href=...> is mapped to </A><A href=...> + */ + if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node)) + { + /* coerce <a> to </a> unless it has some attributes */ + if (node.attributes == null) + { + node.type = Node.EndTag; + Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); + lexer.popInline( node); + lexer.ungetToken(); + continue; + } + + lexer.ungetToken(); + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + lexer.popInline( element); + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + if ((element.tag.model & Dict.CM_HEADING) != 0) + { + if (node.tag == tt.tagCenter || + node.tag == tt.tagDiv) + { + if (node.type != Node.StartTag && + node.type != Node.StartEndTag) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); + + /* insert center as parent if heading is empty */ + if (element.content == null) + { + Node.insertNodeAsParent(element, node); + continue; + } + + /* split heading and make center parent of 2nd part */ + Node.insertNodeAfterElement(element, node); + + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + + element = lexer.cloneNode(element); + element.start = lexer.lexsize; + element.end = lexer.lexsize; + Node.insertNodeAtEnd(node, element); + continue; + } + + if (node.tag == tt.tagHr) + { + if (node.type != Node.StartTag && + node.type != Node.StartEndTag) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); + + /* insert hr before heading if heading is empty */ + if (element.content == null) + { + Node.insertNodeBeforeElement(element, node); + continue; + } + + /* split heading and insert hr before 2nd part */ + Node.insertNodeAfterElement(element, node); + + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + + element = lexer.cloneNode(element); + element.start = lexer.lexsize; + element.end = lexer.lexsize; + Node.insertNodeAfterElement(node, element); + continue; + } + } + + if (element.tag == tt.tagDt) + { + if (node.tag == tt.tagHr) + { + Node dd; + + if (node.type != Node.StartTag && + node.type != Node.StartEndTag) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); + dd = lexer.inferredTag("dd"); + + /* insert hr within dd before dt if dt is empty */ + if (element.content == null) + { + Node.insertNodeBeforeElement(element, dd); + Node.insertNodeAtEnd(dd, node); + continue; + } + + /* split dt and insert hr within dd before 2nd part */ + Node.insertNodeAfterElement(element, dd); + Node.insertNodeAtEnd(dd, node); + + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + + element = lexer.cloneNode(element); + element.start = lexer.lexsize; + element.end = lexer.lexsize; + Node.insertNodeAfterElement(dd, element); + continue; + } + } + + + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + for (parent = element.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + if (!((element.tag.model & Dict.CM_OPT) != 0) && + !element.implicit) + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + + if (element.tag == tt.tagA) + lexer.popInline(element); + + lexer.ungetToken(); + + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + + Node.trimEmptyElement(lexer, element); + return; + } + } + } + + /* block level tags end this element */ + if (!((node.tag.model & Dict.CM_INLINE) != 0)) + { + if (node.type != Node.StartTag) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (!((element.tag.model & Dict.CM_OPT) != 0)) + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + + if ((node.tag.model & Dict.CM_HEAD) != 0 && + !((node.tag.model & Dict.CM_BLOCK) != 0)) + { + moveToHead(lexer, element, node); + continue; + } + + /* + prevent anchors from propagating into block tags + except for headings h1 to h6 + */ + if (element.tag == tt.tagA) + { + if (node.tag != null && + !((node.tag.model & Dict.CM_HEADING) != 0)) + lexer.popInline(element); + else if (!(element.content != null)) + { + Node.discardElement(element); + lexer.ungetToken(); + return; + } + } + + lexer.ungetToken(); + + if (!((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, element); + + Node.trimEmptyElement(lexer, element); + return; + } + + /* parse inline element */ + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if (node.implicit) + Report.warning(lexer, element, node, Report.INSERTING_TAG); + + /* trim white space before <br> */ + if (node.tag == tt.tagBr) + Node.trimSpaces(lexer, element); + + Node.insertNodeAtEnd(element, node); + parseTag(lexer, node, mode); + continue; + } + + /* discard unexpected tags */ + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + } + + if (!((element.tag.model & Dict.CM_OPT) != 0)) + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); + + Node.trimEmptyElement(lexer, element); + } + }; + + public static class ParseList implements Parser { + + public void parse( Lexer lexer, Node list, short mode ) + { + Node node; + Node parent; + TagTable tt = lexer.configuration.tt; + + if ((list.tag.model & Dict.CM_EMPTY) != 0) + return; + + lexer.insert = -1; /* defer implicit inline start tags */ + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + + if (node.tag == list.tag && node.type == Node.EndTag) + { + if ((list.tag.model & Dict.CM_OBSOLETE) != 0) + Node.coerceNode(lexer, list, tt.tagUl); + + list.closed = true; + Node.trimEmptyElement(lexer, list); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(list, node)) + continue; + + if (node.type != Node.TextNode && node.tag == null) + { + Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.badForm = 1; + Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0) + { + Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); + lexer.popInline(node); + continue; + } + + for (parent = list.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); + lexer.ungetToken(); + + if ((list.tag.model & Dict.CM_OBSOLETE) != 0) + Node.coerceNode(lexer, list, tt.tagUl); + + Node.trimEmptyElement(lexer, list); + return; + } + } + + Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag != tt.tagLi) + { + lexer.ungetToken(); + + if (node.tag != null && + (node.tag.model & Dict.CM_BLOCK) != 0 && + lexer.excludeBlocks) + { + Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); + Node.trimEmptyElement(lexer, list); + return; + } + + node = lexer.inferredTag("li"); + node.addAttribute("style", "list-style: none"); + Report.warning(lexer, list, node, Report.MISSING_STARTTAG); + } + + /* node should be <LI> */ + Node.insertNodeAtEnd(list, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace); + } + + if ((list.tag.model & Dict.CM_OBSOLETE) != 0) + Node.coerceNode(lexer, list, tt.tagUl); + + Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); + Node.trimEmptyElement(lexer, list); + } + + }; + + public static class ParseDefList implements Parser { + + public void parse( Lexer lexer, Node list, short mode ) + { + Node node, parent; + TagTable tt = lexer.configuration.tt; + + if ((list.tag.model & Dict.CM_EMPTY) != 0) + return; + + lexer.insert = -1; /* defer implicit inline start tags */ + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == list.tag && node.type == Node.EndTag) + { + list.closed = true; + Node.trimEmptyElement(lexer, list); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(list, node)) + continue; + + if (node.type == Node.TextNode) + { + lexer.ungetToken(); + node = lexer.inferredTag( "dt"); + Report.warning(lexer, list, node, Report.MISSING_STARTTAG); + } + + if (node.tag == null) + { + Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.badForm = 1; + Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + for (parent = list.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); + + lexer.ungetToken(); + Node.trimEmptyElement(lexer, list); + return; + } + } + } + + /* center in a dt or a dl breaks the dl list in two */ + if (node.tag == tt.tagCenter) + { + if (list.content != null) + Node.insertNodeAfterElement(list, node); + else /* trim empty dl list */ + { + Node.insertNodeBeforeElement(list, node); + Node.discardElement(list); + } + + /* and parse contents of center */ + parseTag(lexer, node, mode); + + /* now create a new dl element */ + list = lexer.inferredTag("dl"); + Node.insertNodeAfterElement(node, list); + continue; + } + + if (!(node.tag == tt.tagDt || node.tag == tt.tagDd)) + { + lexer.ungetToken(); + + if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) + { + Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN); + Node.trimEmptyElement(lexer, list); + return; + } + + /* if DD appeared directly in BODY then exclude blocks */ + if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks) + { + Node.trimEmptyElement(lexer, list); + return; + } + + node = lexer.inferredTag( "dd"); + Report.warning(lexer, list, node, Report.MISSING_STARTTAG); + } + + if (node.type == Node.EndTag) + { + Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* node should be <DT> or <DD>*/ + Node.insertNodeAtEnd(list, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace); + } + + Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); + Node.trimEmptyElement(lexer, list); + } + + }; + + public static class ParsePre implements Parser { + + public void parse( Lexer lexer, Node pre, short mode ) + { + Node node, parent; + TagTable tt = lexer.configuration.tt; + + if ((pre.tag.model & Dict.CM_EMPTY) != 0) + return; + + if ((pre.tag.model & Dict.CM_OBSOLETE) != 0) + Node.coerceNode(lexer, pre, tt.tagPre); + + lexer.inlineDup( null); /* tell lexer to insert inlines if needed */ + + while (true) + { + node = lexer.getToken(Lexer.Preformatted); + if (node == null) break; + if (node.tag == pre.tag && node.type == Node.EndTag) + { + Node.trimSpaces(lexer, pre); + pre.closed = true; + Node.trimEmptyElement(lexer, pre); + return; + } + + if (node.tag == tt.tagHtml) + { + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); + + continue; + } + + if (node.type == Node.TextNode) + { + /* if first check for inital newline */ + if (pre.content == null) + { + if (node.textarray[node.start] == (byte)'\n') + ++node.start; + + if (node.start >= node.end) + { + continue; + } + } + + Node.insertNodeAtEnd(pre, node); + continue; + } + + /* deal with comments etc. */ + if (Node.insertMisc(pre, node)) + continue; + + /* discard unknown and PARAM tags */ + if (node.tag == null || node.tag == tt.tagParam) + { + Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag == tt.tagP) + { + if (node.type == Node.StartTag) + { + Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF); + + /* trim white space before <p> in <pre>*/ + Node.trimSpaces(lexer, pre); + + /* coerce both <p> and </p> to <br> */ + Node.coerceNode(lexer, node, tt.tagBr); + Node.insertNodeAtEnd(pre, node); + } + else + { + Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); + } + continue; + } + + if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0)) + { + moveToHead(lexer, pre, node); + continue; + } + + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.badForm = 1; + Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + for (parent = pre.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); + + lexer.ungetToken(); + Node.trimSpaces(lexer, pre); + Node.trimEmptyElement(lexer, pre); + return; + } + } + } + + /* what about head content, HEAD, BODY tags etc? */ + if (!((node.tag.model & Dict.CM_INLINE) != 0)) + { + if (node.type != Node.StartTag) + { + Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); + lexer.excludeBlocks = true; + + /* check if we need to infer a container */ + if ((node.tag.model & Dict.CM_LIST) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "ul"); + Node.addClass(node, "noindent"); + } + else if ((node.tag.model & Dict.CM_DEFLIST) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "dl"); + } + else if ((node.tag.model & Dict.CM_TABLE) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "table"); + } + + Node.insertNodeAfterElement(pre, node); + pre = lexer.inferredTag( "pre"); + Node.insertNodeAfterElement(node, pre); + parseTag(lexer, node, Lexer.IgnoreWhitespace); + lexer.excludeBlocks = false; + continue; + } + /* + if (!((node.tag.model & Dict.CM_INLINE) != 0)) + { + Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); + lexer.ungetToken(); + return; + } + */ + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + /* trim white space before <br> */ + if (node.tag == tt.tagBr) + Node.trimSpaces(lexer, pre); + + Node.insertNodeAtEnd(pre, node); + parseTag(lexer, node, Lexer.Preformatted); + continue; + } + + /* discard unexpected tags */ + Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); + } + + Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR); + Node.trimEmptyElement(lexer, pre); + } + + }; + + public static class ParseBlock implements Parser { + + public void parse( Lexer lexer, Node element, short mode ) + /* + element is node created by the lexer + upon seeing the start tag, or by the + parser when the start tag is inferred + */ + { + Node node, parent; + boolean checkstack; + int istackbase = 0; + TagTable tt = lexer.configuration.tt; + + checkstack = true; + + if ((element.tag.model & Dict.CM_EMPTY) != 0) + return; + + if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm)) + Report.warning(lexer, element, null, Report.ILLEGAL_NESTING); + + /* + InlineDup() asks the lexer to insert inline emphasis tags + currently pushed on the istack, but take care to avoid + propagating inline emphasis inside OBJECT or APPLET. + For these elements a fresh inline stack context is created + and disposed of upon reaching the end of the element. + They thus behave like table cells in this respect. + */ + if ((element.tag.model & Dict.CM_OBJECT) != 0) + { + istackbase = lexer.istackbase; + lexer.istackbase = lexer.istack.size(); + } + + if (!((element.tag.model & Dict.CM_MIXED) != 0)) + lexer.inlineDup( null); + + mode = Lexer.IgnoreWhitespace; + + while (true) + { + node = lexer.getToken(mode /*Lexer.MixedContent*/); + if (node == null) break; + /* end tag for this element */ + if (node.type == Node.EndTag && node.tag != null && + (node.tag == element.tag || element.was == node.tag)) + { + + if ((element.tag.model & Dict.CM_OBJECT) != 0) + { + /* pop inline stack */ + while (lexer.istack.size() > lexer.istackbase) + lexer.popInline( null); + lexer.istackbase = istackbase; + } + + element.closed = true; + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + if (node.tag == tt.tagHtml || + node.tag == tt.tagHead || + node.tag == tt.tagBody) + { + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + + continue; + } + + if (node.type == Node.EndTag) + { + if (node.tag == null) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + + continue; + } + else if (node.tag == tt.tagBr) + node.type = Node.StartTag; + else if (node.tag == tt.tagP) + { + Node.coerceNode(lexer, node, tt.tagBr); + Node.insertNodeAtEnd(element, node); + node = lexer.inferredTag("br"); + } + else + { + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + for (parent = element.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + if (!((element.tag.model & Dict.CM_OPT) != 0)) + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + + lexer.ungetToken(); + + if ((element.tag.model & Dict.CM_OBJECT) != 0) + { + /* pop inline stack */ + while (lexer.istack.size() > lexer.istackbase) + lexer.popInline( null); + lexer.istackbase = istackbase; + } + + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + } + /* special case </tr> etc. for stuff moved in front of table */ + if (lexer.exiled + && node.tag.model != 0 + && (node.tag.model & Dict.CM_TABLE) != 0) + { + lexer.ungetToken(); + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + } + } + + /* mixed content model permits text */ + if (node.type == Node.TextNode) + { + boolean iswhitenode = false; + + if (node.type == Node.TextNode && + node.end <= node.start + 1 && + lexer.lexbuf[node.start] == (byte)' ') + iswhitenode = true; + + if (lexer.configuration.EncloseBlockText && !iswhitenode) + { + lexer.ungetToken(); + node = lexer.inferredTag("p"); + Node.insertNodeAtEnd(element, node); + parseTag(lexer, node, Lexer.MixedContent); + continue; + } + + if (checkstack) + { + checkstack = false; + + if (!((element.tag.model & Dict.CM_MIXED) != 0)) + { + if (lexer.inlineDup( node) > 0) + continue; + } + } + + Node.insertNodeAtEnd(element, node); + mode = Lexer.MixedContent; + /* + HTML4 strict doesn't allow mixed content for + elements with %block; as their content model + */ + lexer.versions &= ~Dict.VERS_HTML40_STRICT; + continue; + } + + if (Node.insertMisc(element, node)) + continue; + + /* allow PARAM elements? */ + if (node.tag == tt.tagParam) + { + if (((element.tag.model & Dict.CM_PARAM) != 0) && + (node.type == Node.StartTag || node.type == Node.StartEndTag)) + { + Node.insertNodeAtEnd(element, node); + continue; + } + + /* otherwise discard it */ + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* allow AREA elements? */ + if (node.tag == tt.tagArea) + { + if ((element.tag == tt.tagMap) && + (node.type == Node.StartTag || node.type == Node.StartEndTag)) + { + Node.insertNodeAtEnd(element, node); + continue; + } + + /* otherwise discard it */ + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* ignore unknown start/end tags */ + if (node.tag == null) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* + Allow Dict.CM_INLINE elements here. + + Allow Dict.CM_BLOCK elements here unless + lexer.excludeBlocks is yes. + + LI and DD are special cased. + + Otherwise infer end tag for this element. + */ + + if (!((node.tag.model & Dict.CM_INLINE) != 0)) + { + if (node.type != Node.StartTag && node.type != Node.StartEndTag) + { + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (element.tag == tt.tagTd || element.tag == tt.tagTh) + { + /* if parent is a table cell, avoid inferring the end of the cell */ + + if ((node.tag.model & Dict.CM_HEAD) != 0) + { + moveToHead(lexer, element, node); + continue; + } + + if ((node.tag.model & Dict.CM_LIST) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "ul"); + Node.addClass(node, "noindent"); + lexer.excludeBlocks = true; + } + else if ((node.tag.model & Dict.CM_DEFLIST) != 0) + { + lexer.ungetToken(); + node = lexer.inferredTag( "dl"); + lexer.excludeBlocks = true; + } + + /* infer end of current table cell */ + if (!((node.tag.model & Dict.CM_BLOCK) != 0)) + { + lexer.ungetToken(); + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + } + else if ((node.tag.model & Dict.CM_BLOCK) != 0) + { + if (lexer.excludeBlocks) + { + if (!((element.tag.model & Dict.CM_OPT) != 0)) + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + + lexer.ungetToken(); + + if ((element.tag.model & Dict.CM_OBJECT) != 0) + lexer.istackbase = istackbase; + + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + } + else /* things like list items */ + { + if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit) + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); + + if ((node.tag.model & Dict.CM_HEAD) != 0) + { + moveToHead(lexer, element, node); + continue; + } + + lexer.ungetToken(); + + if ((node.tag.model & Dict.CM_LIST) != 0) + { + if (element.parent != null && element.parent.tag != null && + element.parent.tag.parser == getParseList()) + { + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + node = lexer.inferredTag("ul"); + Node.addClass(node, "noindent"); + } + else if ((node.tag.model & Dict.CM_DEFLIST) != 0) + { + if (element.parent.tag == tt.tagDl) + { + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + + node = lexer.inferredTag("dl"); + } + else if ((node.tag.model & Dict.CM_TABLE) != 0 || + (node.tag.model & Dict.CM_ROW) != 0) + { + node = lexer.inferredTag("table"); + } + else if ((element.tag.model & Dict.CM_OBJECT) != 0) + { + /* pop inline stack */ + while (lexer.istack.size() > lexer.istackbase) + lexer.popInline( null); + lexer.istackbase = istackbase; + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + + } + else + { + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + return; + } + } + } + + /* parse known element */ + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if ((node.tag.model & Dict.CM_INLINE) != 0) + { + if (checkstack && !node.implicit) + { + checkstack = false; + + if (lexer.inlineDup( node) > 0) + continue; + } + + mode = Lexer.MixedContent; + } + else + { + checkstack = true; + mode = Lexer.IgnoreWhitespace; + } + + /* trim white space before <br> */ + if (node.tag == tt.tagBr) + Node.trimSpaces(lexer, element); + + Node.insertNodeAtEnd(element, node); + + if (node.implicit) + Report.warning(lexer, element, node, Report.INSERTING_TAG); + + parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/); + continue; + } + + /* discard unexpected tags */ + if (node.type == Node.EndTag) + lexer.popInline( node); /* if inline end tag */ + + Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); + } + + if (!((element.tag.model & Dict.CM_OPT) != 0)) + Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); + + if ((element.tag.model & Dict.CM_OBJECT) != 0) + { + /* pop inline stack */ + while (lexer.istack.size() > lexer.istackbase) + lexer.popInline( null); + lexer.istackbase = istackbase; + } + + Node.trimSpaces(lexer, element); + Node.trimEmptyElement(lexer, element); + } + + }; + + public static class ParseTableTag implements Parser { + + public void parse( Lexer lexer, Node table, short mode ) + { + Node node, parent; + int istackbase; + TagTable tt = lexer.configuration.tt; + + lexer.deferDup(); + istackbase = lexer.istackbase; + lexer.istackbase = lexer.istack.size(); + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == table.tag && node.type == Node.EndTag) + { + lexer.istackbase = istackbase; + table.closed = true; + Node.trimEmptyElement(lexer, table); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(table, node)) + continue; + + /* discard unknown tags */ + if (node.tag == null && node.type != Node.TextNode) + { + Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* if TD or TH or text or inline or block then infer <TR> */ + + if (node.type != Node.EndTag) + { + if (node.tag == tt.tagTd || + node.tag == tt.tagTh || + node.tag == tt.tagTable) + { + lexer.ungetToken(); + node = lexer.inferredTag( "tr"); + Report.warning(lexer, table, node, Report.MISSING_STARTTAG); + } + else if (node.type == Node.TextNode + || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) + { + Node.insertNodeBeforeElement(table, node); + Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); + lexer.exiled = true; + + /* AQ: TODO + Line 2040 of parser.c (13 Jan 2000) reads as follows: + if (!node->type == TextNode) + This will always evaluate to false. + This has been reported to Dave Raggett <dsr@w3.org> + */ + //Should be?: if (!(node.type == Node.TextNode)) + if (false) + parseTag(lexer, node, Lexer.IgnoreWhitespace); + + lexer.exiled = false; + continue; + } + else if ((node.tag.model & Dict.CM_HEAD) != 0) + { + moveToHead(lexer, table, node); + continue; + } + } + + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.badForm = 1; + Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0) + { + Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + for (parent = table.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE); + lexer.ungetToken(); + lexer.istackbase = istackbase; + Node.trimEmptyElement(lexer, table); + return; + } + } + } + + if (!((node.tag.model & Dict.CM_TABLE) != 0)) + { + lexer.ungetToken(); + Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); + lexer.istackbase = istackbase; + Node.trimEmptyElement(lexer, table); + return; + } + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + Node.insertNodeAtEnd(table, node);; + parseTag(lexer, node, Lexer.IgnoreWhitespace); + continue; + } + + /* discard unexpected text nodes and end tags */ + Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); + } + + Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR); + Node.trimEmptyElement(lexer, table); + lexer.istackbase = istackbase; + } + + }; + + public static class ParseColGroup implements Parser { + + public void parse( Lexer lexer, Node colgroup, short mode ) + { + Node node, parent; + TagTable tt = lexer.configuration.tt; + + if ((colgroup.tag.model & Dict.CM_EMPTY) != 0) + return; + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == colgroup.tag && node.type == Node.EndTag) + { + colgroup.closed = true; + return; + } + + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.badForm = 1; + Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + for (parent = colgroup.parent; + parent != null; parent = parent.parent) + { + + if (node.tag == parent.tag) + { + lexer.ungetToken(); + return; + } + } + } + + if (node.type == Node.TextNode) + { + lexer.ungetToken(); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(colgroup, node)) + continue; + + /* discard unknown tags */ + if (node.tag == null) + { + Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag != tt.tagCol) + { + lexer.ungetToken(); + return; + } + + if (node.type == Node.EndTag) + { + Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* node should be <COL> */ + Node.insertNodeAtEnd(colgroup, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace); + } + } + + }; + + public static class ParseRowGroup implements Parser { + + public void parse( Lexer lexer, Node rowgroup, short mode ) + { + Node node, parent; + TagTable tt = lexer.configuration.tt; + + if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0) + return; + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == rowgroup.tag) + { + if (node.type == Node.EndTag) + { + rowgroup.closed = true; + Node.trimEmptyElement(lexer, rowgroup); + return; + } + + lexer.ungetToken(); + return; + } + + /* if </table> infer end tag */ + if (node.tag == tt.tagTable && node.type == Node.EndTag) + { + lexer.ungetToken(); + Node.trimEmptyElement(lexer, rowgroup); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(rowgroup, node)) + continue; + + /* discard unknown tags */ + if (node.tag == null && node.type != Node.TextNode) + { + Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* + if TD or TH then infer <TR> + if text or inline or block move before table + if head content move to head + */ + + if (node.type != Node.EndTag) + { + if (node.tag == tt.tagTd || node.tag == tt.tagTh) + { + lexer.ungetToken(); + node = lexer.inferredTag("tr"); + Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); + } + else if (node.type == Node.TextNode + || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) + { + Node.moveBeforeTable(rowgroup, node, tt); + Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); + lexer.exiled = true; + + if (node.type != Node.TextNode) + parseTag(lexer, node, Lexer.IgnoreWhitespace); + + lexer.exiled = false; + continue; + } + else if ((node.tag.model & Dict.CM_HEAD) != 0) + { + Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); + moveToHead(lexer, rowgroup, node); + continue; + } + } + + /* + if this is the end tag for ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.badForm = 1; + Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh) + { + Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + for (parent = rowgroup.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + lexer.ungetToken(); + Node.trimEmptyElement(lexer, rowgroup); + return; + } + } + } + + /* + if THEAD, TFOOT or TBODY then implied end tag + + */ + if ((node.tag.model & Dict.CM_ROWGRP) != 0) + { + if (node.type != Node.EndTag) + lexer.ungetToken(); + + Node.trimEmptyElement(lexer, rowgroup); + return; + } + + if (node.type == Node.EndTag) + { + Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (!(node.tag == tt.tagTr)) + { + node = lexer.inferredTag( "tr"); + Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); + lexer.ungetToken(); + } + + /* node should be <TR> */ + Node.insertNodeAtEnd(rowgroup, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace); + } + + Node.trimEmptyElement(lexer, rowgroup); + } + + }; + + public static class ParseRow implements Parser { + + public void parse( Lexer lexer, Node row, short mode ) + { + Node node, parent; + boolean exclude_state; + TagTable tt = lexer.configuration.tt; + + if ((row.tag.model & Dict.CM_EMPTY) != 0) + return; + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == row.tag) + { + if (node.type == Node.EndTag) + { + row.closed = true; + Node.fixEmptyRow(lexer, row); + return; + } + + lexer.ungetToken(); + Node.fixEmptyRow(lexer, row); + return; + } + + /* + if this is the end tag for an ancestor element + then infer end tag for this element + */ + if (node.type == Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.badForm = 1; + Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.tag == tt.tagTd || node.tag == tt.tagTh) + { + Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + for (parent = row.parent; + parent != null; parent = parent.parent) + { + if (node.tag == parent.tag) + { + lexer.ungetToken(); + Node.trimEmptyElement(lexer, row); + return; + } + } + } + + /* deal with comments etc. */ + if (Node.insertMisc(row, node)) + continue; + + /* discard unknown tags */ + if (node.tag == null && node.type != Node.TextNode) + { + Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* discard unexpected <table> element */ + if (node.tag == tt.tagTable) + { + Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* THEAD, TFOOT or TBODY */ + if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0) + { + lexer.ungetToken(); + Node.trimEmptyElement(lexer, row); + return; + } + + if (node.type == Node.EndTag) + { + Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* + if text or inline or block move before table + if head content move to head + */ + + if (node.type != Node.EndTag) + { + if (node.tag == tt.tagForm) + { + lexer.ungetToken(); + node = lexer.inferredTag("td"); + Report.warning(lexer, row, node, Report.MISSING_STARTTAG); + } + else if (node.type == Node.TextNode + || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) + { + Node.moveBeforeTable(row, node, tt); + Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); + lexer.exiled = true; + + if (node.type != Node.TextNode) + parseTag(lexer, node, Lexer.IgnoreWhitespace); + + lexer.exiled = false; + continue; + } + else if ((node.tag.model & Dict.CM_HEAD) != 0) + { + Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); + moveToHead(lexer, row, node); + continue; + } + } + + if (!(node.tag == tt.tagTd || node.tag == tt.tagTh)) + { + Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); + continue; + } + + /* node should be <TD> or <TH> */ + Node.insertNodeAtEnd(row, node); + exclude_state = lexer.excludeBlocks; + lexer.excludeBlocks = false; + parseTag(lexer, node, Lexer.IgnoreWhitespace); + lexer.excludeBlocks = exclude_state; + + /* pop inline stack */ + + while (lexer.istack.size() > lexer.istackbase) + lexer.popInline( null); + } + + Node.trimEmptyElement(lexer, row); + } + + }; + + public static class ParseNoFrames implements Parser { + + public void parse( Lexer lexer, Node noframes, short mode ) + { + Node node; + boolean checkstack; + TagTable tt = lexer.configuration.tt; + + lexer.badAccess |= Report.USING_NOFRAMES; + mode = Lexer.IgnoreWhitespace; + checkstack = true; + + while (true) + { + node = lexer.getToken(mode); + if (node == null) break; + if (node.tag == noframes.tag && node.type == Node.EndTag) + { + noframes.closed = true; + Node.trimSpaces(lexer, noframes); + return; + } + + if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)) + { + Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE); + Node.trimSpaces(lexer, noframes); + lexer.ungetToken(); + return; + } + + if (node.tag == tt.tagHtml) + { + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); + + continue; + } + + /* deal with comments etc. */ + if (Node.insertMisc(noframes, node)) + continue; + + if (node.tag == tt.tagBody && node.type == Node.StartTag) + { + Node.insertNodeAtEnd(noframes, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/); + continue; + } + + /* implicit body element inferred */ + if (node.type == Node.TextNode || node.tag != null) + { + lexer.ungetToken(); + node = lexer.inferredTag("body"); + if (lexer.configuration.XmlOut) + Report.warning(lexer, noframes, node, Report.INSERTING_TAG); + Node.insertNodeAtEnd(noframes, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/); + continue; + } + /* discard unexpected end tags */ + Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); + } + + Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR); + } + + }; + + public static class ParseSelect implements Parser { + + public void parse( Lexer lexer, Node field, short mode ) + { + Node node; + TagTable tt = lexer.configuration.tt; + + lexer.insert = -1; /* defer implicit inline start tags */ + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == field.tag && node.type == Node.EndTag) + { + field.closed = true; + Node.trimSpaces(lexer, field); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(field, node)) + continue; + + if (node.type == Node.StartTag && + (node.tag == tt.tagOption || + node.tag == tt.tagOptgroup || + node.tag == tt.tagScript)) + { + Node.insertNodeAtEnd(field, node); + parseTag(lexer, node, Lexer.IgnoreWhitespace); + continue; + } + + /* discard unexpected tags */ + Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); + } + + Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); + } + + }; + + public static class ParseText implements Parser { + + public void parse( Lexer lexer, Node field, short mode ) + { + Node node; + TagTable tt = lexer.configuration.tt; + + lexer.insert = -1; /* defer implicit inline start tags */ + + if (field.tag == tt.tagTextarea) + mode = Lexer.Preformatted; + + while (true) + { + node = lexer.getToken(mode); + if (node == null) break; + if (node.tag == field.tag && node.type == Node.EndTag) + { + field.closed = true; + Node.trimSpaces(lexer, field); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(field, node)) + continue; + + if (node.type == Node.TextNode) + { + /* only called for 1st child */ + if (field.content == null && !((mode & Lexer.Preformatted) != 0)) + Node.trimSpaces(lexer, field); + + if (node.start >= node.end) + { + continue; + } + + Node.insertNodeAtEnd(field, node); + continue; + } + + if (node.tag == tt.tagFont) + { + Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + /* terminate element on other tags */ + if (!((field.tag.model & Dict.CM_OPT) != 0)) + Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE); + + lexer.ungetToken(); + Node.trimSpaces(lexer, field); + return; + } + + if (!((field.tag.model & Dict.CM_OPT) != 0)) + Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); + } + + }; + + public static class ParseOptGroup implements Parser { + + public void parse( Lexer lexer, Node field, short mode ) + { + Node node; + TagTable tt = lexer.configuration.tt; + + lexer.insert = -1; /* defer implicit inline start tags */ + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + if (node.tag == field.tag && node.type == Node.EndTag) + { + field.closed = true; + Node.trimSpaces(lexer, field); + return; + } + + /* deal with comments etc. */ + if (Node.insertMisc(field, node)) + continue; + + if (node.type == Node.StartTag && + (node.tag == tt.tagOption || node.tag == tt.tagOptgroup)) + { + if (node.tag == tt.tagOptgroup) + Report.warning(lexer, field, node, Report.CANT_BE_NESTED); + + Node.insertNodeAtEnd(field, node); + parseTag(lexer, node, Lexer.MixedContent); + continue; + } + + /* discard unexpected tags */ + Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); + } + } + + }; + + public static Parser getParseHTML() + { + return _parseHTML; + } + + public static Parser getParseHead() + { + return _parseHead; + } + + public static Parser getParseTitle() + { + return _parseTitle; + } + + public static Parser getParseScript() + { + return _parseScript; + } + + public static Parser getParseBody() + { + return _parseBody; + } + + public static Parser getParseFrameSet() + { + return _parseFrameSet; + } + + public static Parser getParseInline() + { + return _parseInline; + } + + public static Parser getParseList() + { + return _parseList; + } + + public static Parser getParseDefList() + { + return _parseDefList; + } + + public static Parser getParsePre() + { + return _parsePre; + } + + public static Parser getParseBlock() + { + return _parseBlock; + } + + public static Parser getParseTableTag() + { + return _parseTableTag; + } + + public static Parser getParseColGroup() + { + return _parseColGroup; + } + + public static Parser getParseRowGroup() + { + return _parseRowGroup; + } + + public static Parser getParseRow() + { + return _parseRow; + } + + public static Parser getParseNoFrames() + { + return _parseNoFrames; + } + + public static Parser getParseSelect() + { + return _parseSelect; + } + + public static Parser getParseText() + { + return _parseText; + } + + public static Parser getParseOptGroup() + { + return _parseOptGroup; + } + + + private static Parser _parseHTML = new ParseHTML(); + private static Parser _parseHead = new ParseHead(); + private static Parser _parseTitle = new ParseTitle(); + private static Parser _parseScript = new ParseScript(); + private static Parser _parseBody = new ParseBody(); + private static Parser _parseFrameSet = new ParseFrameSet(); + private static Parser _parseInline = new ParseInline(); + private static Parser _parseList = new ParseList(); + private static Parser _parseDefList = new ParseDefList(); + private static Parser _parsePre = new ParsePre(); + private static Parser _parseBlock = new ParseBlock(); + private static Parser _parseTableTag = new ParseTableTag(); + private static Parser _parseColGroup = new ParseColGroup(); + private static Parser _parseRowGroup = new ParseRowGroup(); + private static Parser _parseRow = new ParseRow(); + private static Parser _parseNoFrames = new ParseNoFrames(); + private static Parser _parseSelect = new ParseSelect(); + private static Parser _parseText = new ParseText(); + private static Parser _parseOptGroup = new ParseOptGroup(); + + /* + HTML is the top level element + */ + public static Node parseDocument(Lexer lexer) + { + Node node, document, html; + Node doctype = null; + TagTable tt = lexer.configuration.tt; + + document = lexer.newNode(); + document.type = Node.RootNode; + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + + /* deal with comments etc. */ + if (Node.insertMisc(document, node)) + continue; + + if (node.type == Node.DocTypeTag) + { + if (doctype == null) + { + Node.insertNodeAtEnd(document, node); + doctype = node; + } + else + Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); + continue; + } + + if (node.type == Node.EndTag) + { + Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO? + continue; + } + + if (node.type != Node.StartTag || node.tag != tt.tagHtml) + { + lexer.ungetToken(); + html = lexer.inferredTag("html"); + } + else + html = node; + + Node.insertNodeAtEnd(document, html); + getParseHTML().parse(lexer, html, (short)0); // TODO? + break; + } + + return document; + } + + /** + * Indicates whether or not whitespace should be preserved for this element. + * If an <code>xml:space</code> attribute is found, then if the attribute value is + * <code>preserve</code>, returns <code>true</code>. For any other value, returns + * <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em> + * found, then the following element names result in a return value of <code>true: + * pre, script, style,</code> and <code>xsl:text</code>. Finally, if a + * <code>TagTable</code> was passed in and the element appears as the "pre" element + * in the <code>TagTable</code>, then <code>true</code> will be returned. + * Otherwise, <code>false</code> is returned. + * @param element The <code>Node</code> to test to see if whitespace should be + * preserved. + * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code> + * function. This may be <code>null</code>, in which case this test + * is bypassed. + * @return <code>true</code> or <code>false</code>, as explained above. + */ + + public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt) + { + AttVal attribute; + + /* search attributes for xml:space */ + for (attribute = element.attributes; attribute != null; attribute = attribute.next) + { + if (attribute.attribute.equals("xml:space")) + { + if (attribute.value.equals("preserve")) + return true; + + return false; + } + } + + /* kludge for html docs without explicit xml:space attribute */ + if (Lexer.wstrcasecmp(element.element, "pre") == 0 + || Lexer.wstrcasecmp(element.element, "script") == 0 + || Lexer.wstrcasecmp(element.element, "style") == 0) + return true; + + if ( (tt != null) && (tt.findParser(element) == getParsePre()) ) + return true; + + /* kludge for XSL docs */ + if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0) + return true; + + return false; + } + + /* + XML documents + */ + public static void parseXMLElement(Lexer lexer, Node element, short mode) + { + Node node; + + /* Jeff Young's kludge for XSL docs */ + + if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0) + return; + + /* if node is pre or has xml:space="preserve" then do so */ + + if (XMLPreserveWhiteSpace(element, lexer.configuration.tt)) + mode = Lexer.Preformatted; + + while (true) + { + node = lexer.getToken(mode); + if (node == null) break; + if (node.type == Node.EndTag && node.element.equals(element.element)) + { + element.closed = true; + break; + } + + /* discard unexpected end tags */ + if (node.type == Node.EndTag) + { + Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG); + continue; + } + + /* parse content on seeing start tag */ + if (node.type == Node.StartTag) + parseXMLElement(lexer, node, mode); + + Node.insertNodeAtEnd(element, node); + } + + /* + if first child is text then trim initial space and + delete text node if it is empty. + */ + + node = element.content; + + if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted) + { + if (node.textarray[node.start] == (byte)' ') + { + node.start++; + + if (node.start >= node.end) + Node.discardElement(node); + } + } + + /* + if last child is text then trim final space and + delete the text node if it is empty + */ + + node = element.last; + + if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted) + { + if (node.textarray[node.end - 1] == (byte)' ') + { + node.end--; + + if (node.start >= node.end) + Node.discardElement(node); + } + } + } + + public static Node parseXMLDocument(Lexer lexer) + { + Node node, document, doctype; + + document = lexer.newNode(); + document.type = Node.RootNode; + doctype = null; + lexer.configuration.XmlTags = true; + + while (true) + { + node = lexer.getToken(Lexer.IgnoreWhitespace); + if (node == null) break; + /* discard unexpected end tags */ + if (node.type == Node.EndTag) + { + Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG); + continue; + } + + /* deal with comments etc. */ + if (Node.insertMisc(document, node)) + continue; + + if (node.type == Node.DocTypeTag) + { + if (doctype == null) + { + Node.insertNodeAtEnd(document, node); + doctype = node; + } + else + Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO + continue; + } + + /* if start tag then parse element's content */ + if (node.type == Node.StartTag) + { + Node.insertNodeAtEnd(document, node); + parseXMLElement(lexer, node, Lexer.IgnoreWhitespace); + } + + } + +if (false) { //#if 0 + /* discard the document type */ + node = document.findDocType(); + + if (node != null) + Node.discardElement(node); +} // #endif + + if (doctype != null && !lexer.checkDocTypeKeyWords(doctype)) + Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE); + + /* ensure presence of initial <?XML version="1.0"?> */ + if (lexer.configuration.XmlPi) + lexer.fixXMLPI(document); + + return document; + } + + public static boolean isJavaScript(Node node) + { + boolean result = false; + AttVal attr; + + if (node.attributes == null) + return true; + + for (attr = node.attributes; attr != null; attr = attr.next) + { + if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0 + || Lexer.wstrcasecmp(attr.attribute, "type") == 0) + && Lexer.wsubstr(attr.value, "javascript")) + result = true; + } + + return result; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Report.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Report.java new file mode 100644 index 0000000..f58e5d2 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Report.java @@ -0,0 +1,1130 @@ +/* + * @(#)Report.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Error/informational message reporter. + * + * You should only need to edit the file TidyMessages.properties + * to localize HTML tidy. + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.io.PrintWriter; +import java.text.MessageFormat; +import java.util.Hashtable; +import java.util.MissingResourceException; +import java.util.ResourceBundle; + +import org.eclipse.core.resources.IMarker; +import org.eclipse.core.runtime.CoreException; +import org.eclipse.ui.texteditor.MarkerUtilities; + +public class Report { + + /* used to point to Web Accessibility Guidelines */ + public static final String ACCESS_URL = "http://www.w3.org/WAI/GL"; + + public static final String RELEASE_DATE = "4th August 2000"; + + public static String currentFile; /* sasdjb 01May00 for GNU Emacs error parsing */ + + /* error codes for entities */ + + public static final short MISSING_SEMICOLON = 1; + public static final short UNKNOWN_ENTITY = 2; + public static final short UNESCAPED_AMPERSAND = 3; + + /* error codes for element messages */ + + public static final short MISSING_ENDTAG_FOR = 1; + public static final short MISSING_ENDTAG_BEFORE = 2; + public static final short DISCARDING_UNEXPECTED = 3; + public static final short NESTED_EMPHASIS = 4; + public static final short NON_MATCHING_ENDTAG = 5; + public static final short TAG_NOT_ALLOWED_IN = 6; + public static final short MISSING_STARTTAG = 7; + public static final short UNEXPECTED_ENDTAG = 8; + public static final short USING_BR_INPLACE_OF = 9; + public static final short INSERTING_TAG = 10; + public static final short SUSPECTED_MISSING_QUOTE = 11; + public static final short MISSING_TITLE_ELEMENT = 12; + public static final short DUPLICATE_FRAMESET = 13; + public static final short CANT_BE_NESTED = 14; + public static final short OBSOLETE_ELEMENT = 15; + public static final short PROPRIETARY_ELEMENT = 16; + public static final short UNKNOWN_ELEMENT = 17; + public static final short TRIM_EMPTY_ELEMENT = 18; + public static final short COERCE_TO_ENDTAG = 19; + public static final short ILLEGAL_NESTING = 20; + public static final short NOFRAMES_CONTENT = 21; + public static final short CONTENT_AFTER_BODY = 22; + public static final short INCONSISTENT_VERSION = 23; + public static final short MALFORMED_COMMENT = 24; + public static final short BAD_COMMENT_CHARS = 25; + public static final short BAD_XML_COMMENT = 26; + public static final short BAD_CDATA_CONTENT = 27; + public static final short INCONSISTENT_NAMESPACE = 28; + public static final short DOCTYPE_AFTER_TAGS = 29; + public static final short MALFORMED_DOCTYPE = 30; + public static final short UNEXPECTED_END_OF_FILE = 31; + public static final short DTYPE_NOT_UPPER_CASE = 32; + public static final short TOO_MANY_ELEMENTS = 33; + + /* error codes used for attribute messages */ + + public static final short UNKNOWN_ATTRIBUTE = 1; + public static final short MISSING_ATTRIBUTE = 2; + public static final short MISSING_ATTR_VALUE = 3; + public static final short BAD_ATTRIBUTE_VALUE = 4; + public static final short UNEXPECTED_GT = 5; + public static final short PROPRIETARY_ATTR_VALUE = 6; + public static final short REPEATED_ATTRIBUTE = 7; + public static final short MISSING_IMAGEMAP = 8; + public static final short XML_ATTRIBUTE_VALUE = 9; + public static final short UNEXPECTED_QUOTEMARK = 10; + public static final short ID_NAME_MISMATCH = 11; + + /* accessibility flaws */ + + public static final short MISSING_IMAGE_ALT = 1; + public static final short MISSING_LINK_ALT = 2; + public static final short MISSING_SUMMARY = 4; + public static final short MISSING_IMAGE_MAP = 8; + public static final short USING_FRAMES = 16; + public static final short USING_NOFRAMES = 32; + + /* presentation flaws */ + + public static final short USING_SPACER = 1; + public static final short USING_LAYER = 2; + public static final short USING_NOBR = 4; + public static final short USING_FONT = 8; + public static final short USING_BODY = 16; + + /* character encoding errors */ + public static final short WINDOWS_CHARS = 1; + public static final short NON_ASCII = 2; + public static final short FOUND_UTF16 = 4; + + private static short optionerrors; + + private static ResourceBundle res = null; + + static { + try { + res = ResourceBundle.getBundle("org/w3c/tidy/TidyMessages"); + } catch (MissingResourceException e) { + throw new Error(e.toString()); + } + } + + public static void tidyPrint(PrintWriter p, String msg) { + p.print(msg); + } + + public static void tidyPrintln(PrintWriter p, String msg) { + p.println(msg); + } + + public static void tidyPrintln(PrintWriter p) { + p.println(); + } + + public static void showVersion(PrintWriter p) { + tidyPrintln(p, "Java HTML Tidy release date: " + RELEASE_DATE); + tidyPrintln(p, "See http://www.w3.org/People/Raggett for details"); + } + + public static void tag(Lexer lexer, Node tag) { + if (tag != null) { + if (tag.type == Node.StartTag) + tidyPrint(lexer.errout, "<" + tag.element + ">"); + else if (tag.type == Node.EndTag) + tidyPrint(lexer.errout, "</" + tag.element + ">"); + else if (tag.type == Node.DocTypeTag) + tidyPrint(lexer.errout, "<!DOCTYPE>"); + else if (tag.type == Node.TextNode) + tidyPrint(lexer.errout, "plain text"); + else + tidyPrint(lexer.errout, tag.element); + } + } + + public static void tag(StringBuffer errorMessage, Lexer lexer, Node tag) { + if (tag != null) { + if (tag.type == Node.StartTag) { + tidyPrint(lexer.errout, "<" + tag.element + ">"); + errorMessage.append("<" + tag.element + ">"); + } else if (tag.type == Node.EndTag) { + tidyPrint(lexer.errout, "</" + tag.element + ">"); + errorMessage.append("</" + tag.element + ">"); + } else if (tag.type == Node.DocTypeTag) { + tidyPrint(lexer.errout, "<!DOCTYPE>"); + errorMessage.append("</" + tag.element + ">"); + } else if (tag.type == Node.TextNode) { + tidyPrint(lexer.errout, "plain text"); + errorMessage.append("plain text"); + } else { + tidyPrint(lexer.errout, tag.element); + errorMessage.append(tag.element); + } + } + } + + /* lexer is not defined when this is called */ + public static void unknownOption(String option) { + optionerrors++; + try { + System.err.println(MessageFormat.format(res.getString("unknown_option"), new Object[] { option })); + } catch (MissingResourceException e) { + System.err.println(e.toString()); + } + } + + /* lexer is not defined when this is called */ + public static void badArgument(String option) { + optionerrors++; + try { + System.err.println(MessageFormat.format(res.getString("bad_argument"), new Object[] { option })); + } catch (MissingResourceException e) { + System.err.println(e.toString()); + } + } + + public static void position(Lexer lexer) { + try { + /* Change formatting to be parsable by GNU Emacs */ + if (lexer.configuration.Emacs) { + tidyPrint( + lexer.errout, + MessageFormat.format( + res.getString("emacs_format"), + new Object[] { currentFile, new Integer(lexer.lines), new Integer(lexer.columns)})); + tidyPrint(lexer.errout, " "); + } else /* traditional format */ { + tidyPrint( + lexer.errout, + MessageFormat.format(res.getString("line_column"), new Object[] { new Integer(lexer.lines), new Integer(lexer.columns)})); + } + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + public static void encodingError(Lexer lexer, short code, int c) { + lexer.warnings++; + + if (lexer.configuration.ShowWarnings) { + position(lexer); + + if (code == WINDOWS_CHARS) { + lexer.badChars |= WINDOWS_CHARS; + try { + Hashtable attributes = new Hashtable(); + StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); + MarkerUtilities.setLineNumber(attributes, lexer.lines); + tidyPrint(lexer.errout, MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)})); + errorMessage.append(MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)})); + attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); + try { + MarkerUtilities.setMessage(attributes, errorMessage.toString()); + MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); + } catch (CoreException e) { + } + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + tidyPrintln(lexer.errout); + } + } + + public static void entityError(Lexer lexer, short code, String entity, int c) { + lexer.warnings++; + + if (lexer.configuration.ShowWarnings) { + position(lexer); + Hashtable attributes = new Hashtable(); + StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); + MarkerUtilities.setLineNumber(attributes, lexer.lines); + + if (code == MISSING_SEMICOLON) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity })); + errorMessage.append(MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNKNOWN_ENTITY) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity })); + errorMessage.append(MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNESCAPED_AMPERSAND) { + try { + tidyPrint(lexer.errout, res.getString("unescaped_ampersand")); + errorMessage.append(res.getString("unescaped_ampersand")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); + try { + MarkerUtilities.setMessage(attributes, errorMessage.toString()); + MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); + } catch (CoreException e) { + } + tidyPrintln(lexer.errout); + } + } + + public static void attrError(Lexer lexer, Node node, String attr, short code) { + lexer.warnings++; + + /* keep quiet after 6 errors */ + if (lexer.errors > 6) + return; + + Hashtable attributes = new Hashtable(); + StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); + + if (lexer.configuration.ShowWarnings) { + /* on end of file adjust reported position to end of input */ + if (code == UNEXPECTED_END_OF_FILE) { + lexer.lines = lexer.in.curline; + lexer.columns = lexer.in.curcol; + } + + position(lexer); + + MarkerUtilities.setLineNumber(attributes, lexer.lines); + + if (code == UNKNOWN_ATTRIBUTE) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr })); + errorMessage.append(MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == MISSING_ATTRIBUTE) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr })); + errorMessage.append(MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == MISSING_ATTR_VALUE) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr })); + errorMessage.append(MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == MISSING_IMAGEMAP) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, res.getString("missing_imagemap")); + errorMessage.append(res.getString("missing_imagemap")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + lexer.badAccess |= MISSING_IMAGE_MAP; + } else if (code == BAD_ATTRIBUTE_VALUE) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(lexer, node); + tidyPrint(lexer.errout, MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr })); + errorMessage.append(MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == XML_ATTRIBUTE_VALUE) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr })); + errorMessage.append(MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNEXPECTED_GT) { + try { + tidyPrint(lexer.errout, res.getString("error")); + errorMessage.append(res.getString("error")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, res.getString("unexpected_gt")); + errorMessage.append(res.getString("unexpected_gt")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + lexer.errors++; + ; + } else if (code == UNEXPECTED_QUOTEMARK) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, res.getString("unexpected_quotemark")); + errorMessage.append(res.getString("unexpected_quotemark")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == REPEATED_ATTRIBUTE) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, res.getString("repeated_attribute")); + errorMessage.append(res.getString("repeated_attribute")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == PROPRIETARY_ATTR_VALUE) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr })); + errorMessage.append(MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNEXPECTED_END_OF_FILE) { + try { + tidyPrint(lexer.errout, res.getString("unexpected_end_of_file")); + errorMessage.append(res.getString("unexpected_end_of_file")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == ID_NAME_MISMATCH) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, res.getString("id_name_mismatch")); + errorMessage.append(res.getString("id_name_mismatch")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); + attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); + try { + MarkerUtilities.setMessage(attributes, errorMessage.toString()); + MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); + } catch (CoreException e) { + } + tidyPrintln(lexer.errout); + } else if (code == UNEXPECTED_GT) { + position(lexer); + MarkerUtilities.setLineNumber(attributes, lexer.lines); + try { + tidyPrint(lexer.errout, res.getString("error")); + errorMessage.append(res.getString("error")); + tag(errorMessage, lexer, node); + tidyPrint(lexer.errout, res.getString("unexpected_gt")); + errorMessage.append(res.getString("unexpected_gt")); + attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); + try { + MarkerUtilities.setMessage(attributes, errorMessage.toString()); + MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); + } catch (CoreException e) { + } + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tidyPrintln(lexer.errout); + lexer.errors++; + ; + } + + } + + public static void warning(Lexer lexer, Node element, Node node, short code) { + + TagTable tt = lexer.configuration.tt; + + lexer.warnings++; + + /* keep quiet after 6 errors */ + if (lexer.errors > 6) + return; + + if (lexer.configuration.ShowWarnings) { + + /* on end of file adjust reported position to end of input */ + if (code == UNEXPECTED_END_OF_FILE) { + lexer.lines = lexer.in.curline; + lexer.columns = lexer.in.curcol; + } + + position(lexer); + Hashtable attributes = new Hashtable(); + StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); + + MarkerUtilities.setLineNumber(attributes, lexer.lines); + + if (code == MISSING_ENDTAG_FOR) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == MISSING_ENDTAG_BEFORE) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element })); + + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + } else if (code == DISCARDING_UNEXPECTED) { + try { + tidyPrint(lexer.errout, res.getString("discarding_unexpected")); + errorMessage.append(res.getString("discarding_unexpected")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + } else if (code == NESTED_EMPHASIS) { + try { + tidyPrint(lexer.errout, res.getString("nested_emphasis")); + errorMessage.append(res.getString("nested_emphasis")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + } else if (code == COERCE_TO_ENDTAG) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == NON_MATCHING_ENDTAG) { + try { + tidyPrint(lexer.errout, res.getString("non_matching_endtag_1")); + errorMessage.append(res.getString("non_matching_endtag_1")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == TAG_NOT_ALLOWED_IN) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == DOCTYPE_AFTER_TAGS) { + try { + tidyPrint(lexer.errout, res.getString("doctype_after_tags")); + errorMessage.append(res.getString("doctype_after_tags")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == MISSING_STARTTAG) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element })); + errorMessage.append(MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNEXPECTED_ENDTAG) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); + if (element != null) + tidyPrint( + lexer.errout, + MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == TOO_MANY_ELEMENTS) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element })); + if (element != null) + tidyPrint( + lexer.errout, + MessageFormat.format(res.getString("too_many_elements_suffix"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == USING_BR_INPLACE_OF) { + try { + tidyPrint(lexer.errout, res.getString("using_br_inplace_of")); + errorMessage.append(res.getString("using_br_inplace_of")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + } else if (code == INSERTING_TAG) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element })); + errorMessage.append(MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == CANT_BE_NESTED) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + try { + tidyPrint(lexer.errout, res.getString("cant_be_nested")); + errorMessage.append(res.getString("cant_be_nested")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == PROPRIETARY_ELEMENT) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + try { + tidyPrint(lexer.errout, res.getString("proprietary_element")); + errorMessage.append(res.getString("proprietary_element")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + + if (node.tag == tt.tagLayer) + lexer.badLayout |= USING_LAYER; + else if (node.tag == tt.tagSpacer) + lexer.badLayout |= USING_SPACER; + else if (node.tag == tt.tagNobr) + lexer.badLayout |= USING_NOBR; + } else if (code == OBSOLETE_ELEMENT) { + try { + if (element.tag != null && (element.tag.model & Dict.CM_OBSOLETE) != 0) { + tidyPrint(lexer.errout, res.getString("obsolete_element")); + errorMessage.append(res.getString("obsolete_element")); + } else { + tidyPrint(lexer.errout, res.getString("replacing_element")); + errorMessage.append(res.getString("replacing_element")); + } + + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, element); + try { + tidyPrint(lexer.errout, res.getString("by")); + errorMessage.append(res.getString("by")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + } else if (code == TRIM_EMPTY_ELEMENT) { + try { + tidyPrint(lexer.errout, res.getString("trim_empty_element")); + errorMessage.append(res.getString("trim_empty_element")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, element); + } else if (code == MISSING_TITLE_ELEMENT) { + try { + tidyPrint(lexer.errout, res.getString("missing_title_element")); + errorMessage.append(res.getString("missing_title_element")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == ILLEGAL_NESTING) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, element); + try { + tidyPrint(lexer.errout, res.getString("illegal_nesting")); + errorMessage.append(res.getString("illegal_nesting")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == NOFRAMES_CONTENT) { + try { + tidyPrint(lexer.errout, res.getString("warning")); + errorMessage.append(res.getString("warning")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + try { + tidyPrint(lexer.errout, res.getString("noframes_content")); + errorMessage.append(res.getString("noframes_content")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == INCONSISTENT_VERSION) { + try { + tidyPrint(lexer.errout, res.getString("inconsistent_version")); + errorMessage.append(res.getString("inconsistent_version")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == MALFORMED_DOCTYPE) { + try { + tidyPrint(lexer.errout, res.getString("malformed_doctype")); + errorMessage.append(res.getString("malformed_doctype")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == CONTENT_AFTER_BODY) { + try { + tidyPrint(lexer.errout, res.getString("content_after_body")); + errorMessage.append(res.getString("content_after_body")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == MALFORMED_COMMENT) { + try { + tidyPrint(lexer.errout, res.getString("malformed_comment")); + errorMessage.append(res.getString("malformed_comment")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == BAD_COMMENT_CHARS) { + try { + tidyPrint(lexer.errout, res.getString("bad_comment_chars")); + errorMessage.append(res.getString("bad_comment_chars")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == BAD_XML_COMMENT) { + try { + tidyPrint(lexer.errout, res.getString("bad_xml_comment")); + errorMessage.append(res.getString("bad_xml_comment")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == BAD_CDATA_CONTENT) { + try { + tidyPrint(lexer.errout, res.getString("bad_cdata_content")); + errorMessage.append(res.getString("bad_cdata_content")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == INCONSISTENT_NAMESPACE) { + try { + tidyPrint(lexer.errout, res.getString("inconsistent_namespace")); + errorMessage.append(res.getString("inconsistent_namespace")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == DTYPE_NOT_UPPER_CASE) { + try { + tidyPrint(lexer.errout, res.getString("dtype_not_upper_case")); + errorMessage.append(res.getString("dtype_not_upper_case")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNEXPECTED_END_OF_FILE) { + try { + tidyPrint(lexer.errout, res.getString("unexpected_end_of_file")); + errorMessage.append(res.getString("unexpected_end_of_file")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, element); + } + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); + attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); + try { + MarkerUtilities.setMessage(attributes, errorMessage.toString()); + MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); + } catch (CoreException e) { + } + tidyPrintln(lexer.errout); + } + } + + public static void error(Lexer lexer, Node element, Node node, short code) { + lexer.warnings++; + + /* keep quiet after 6 errors */ + if (lexer.errors > 6) + return; + + lexer.errors++; + + position(lexer); + + Hashtable attributes = new Hashtable(); + StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": "); + + MarkerUtilities.setLineNumber(attributes, lexer.lines); + + if (code == SUSPECTED_MISSING_QUOTE) { + try { + tidyPrint(lexer.errout, res.getString("suspected_missing_quote")); + errorMessage.append(res.getString("suspected_missing_quote")); + + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == DUPLICATE_FRAMESET) { + try { + tidyPrint(lexer.errout, res.getString("duplicate_frameset")); + errorMessage.append(res.getString("duplicate_frameset")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNKNOWN_ELEMENT) { + try { + tidyPrint(lexer.errout, res.getString("error")); + errorMessage.append(res.getString("error")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + tag(errorMessage, lexer, node); + try { + tidyPrint(lexer.errout, res.getString("unknown_element")); + errorMessage.append(res.getString("unknown_element")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } else if (code == UNEXPECTED_ENDTAG) { + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); + errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element })); + if (element != null) { + tidyPrint( + lexer.errout, + MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element })); + errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element })); + } + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING)); + // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO)); + try { + MarkerUtilities.setMessage(attributes, errorMessage.toString()); + MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM); + } catch (CoreException e) { + } + tidyPrintln(lexer.errout); + } + + public static void errorSummary(Lexer lexer) { + /* adjust badAccess to that its null if frames are ok */ + if ((lexer.badAccess & (USING_FRAMES | USING_NOFRAMES)) != 0) { + if (!(((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0))) + lexer.badAccess &= ~(USING_FRAMES | USING_NOFRAMES); + } + + if (lexer.badChars != 0) { + if ((lexer.badChars & WINDOWS_CHARS) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badchars_summary")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + } + + if (lexer.badForm != 0) { + try { + tidyPrint(lexer.errout, res.getString("badform_summary")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if (lexer.badAccess != 0) { + if ((lexer.badAccess & MISSING_SUMMARY) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badaccess_missing_summary")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if ((lexer.badAccess & MISSING_IMAGE_ALT) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badaccess_missing_image_alt")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if ((lexer.badAccess & MISSING_IMAGE_MAP) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badaccess_missing_image_map")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if ((lexer.badAccess & MISSING_LINK_ALT) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badaccess_missing_link_alt")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if (((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0)) { + try { + tidyPrint(lexer.errout, res.getString("badaccess_frames")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + try { + tidyPrint(lexer.errout, MessageFormat.format(res.getString("badaccess_summary"), new Object[] { ACCESS_URL })); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if (lexer.badLayout != 0) { + if ((lexer.badLayout & USING_LAYER) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badlayout_using_layer")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if ((lexer.badLayout & USING_SPACER) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badlayout_using_spacer")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if ((lexer.badLayout & USING_FONT) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badlayout_using_font")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if ((lexer.badLayout & USING_NOBR) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badlayout_using_nobr")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + + if ((lexer.badLayout & USING_BODY) != 0) { + try { + tidyPrint(lexer.errout, res.getString("badlayout_using_body")); + } catch (MissingResourceException e) { + lexer.errout.println(e.toString()); + } + } + } + } + + public static void unknownOption(PrintWriter errout, char c) { + try { + tidyPrintln( + errout, + MessageFormat.format(res.getString("unrecognized_option"), new Object[] { new String(new char[] { c }) + })); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void unknownFile(PrintWriter errout, String program, String file) { + try { + tidyPrintln(errout, MessageFormat.format(res.getString("unknown_file"), new Object[] { program, file })); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void needsAuthorIntervention(PrintWriter errout) { + try { + tidyPrintln(errout, res.getString("needs_author_intervention")); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void missingBody(PrintWriter errout) { + try { + tidyPrintln(errout, res.getString("missing_body")); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void reportNumberOfSlides(PrintWriter errout, int count) { + try { + tidyPrintln(errout, MessageFormat.format(res.getString("slides_found"), new Object[] { new Integer(count)})); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void generalInfo(PrintWriter errout) { + try { + tidyPrintln(errout, res.getString("general_info")); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void helloMessage(PrintWriter errout, String date, String filename) { + currentFile = filename; /* for use with Gnu Emacs */ + + try { + tidyPrintln(errout, MessageFormat.format(res.getString("hello_message"), new Object[] { date, filename })); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void reportVersion(PrintWriter errout, Lexer lexer, String filename, Node doctype) { + int i, c; + int state = 0; + String vers = lexer.HTMLVersionName(); + MutableInteger cc = new MutableInteger(); + + try { + if (doctype != null) { + tidyPrint(errout, MessageFormat.format(res.getString("doctype_given"), new Object[] { filename })); + + for (i = doctype.start; i < doctype.end; ++i) { + c = (int) doctype.textarray[i]; + + /* look for UTF-8 multibyte character */ + if (c < 0) { + i += PPrint.getUTF8(doctype.textarray, i, cc); + c = cc.value; + } + + if (c == (char) '"') + ++state; + else if (state == 1) + errout.print((char) c); + } + + errout.print('"'); + } + + tidyPrintln( + errout, + MessageFormat.format( + res.getString("report_version"), + new Object[] { filename, (vers != null ? vers : "HTML proprietary")})); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + + public static void reportNumWarnings(PrintWriter errout, Lexer lexer) { + if (lexer.warnings > 0) { + try { + tidyPrintln(errout, MessageFormat.format(res.getString("num_warnings"), new Object[] { new Integer(lexer.warnings)})); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } else { + try { + tidyPrintln(errout, res.getString("no_warnings")); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + } + + public static void helpText(PrintWriter out, String prog) { + try { + tidyPrintln(out, MessageFormat.format(res.getString("help_text"), new Object[] { prog, RELEASE_DATE })); + } catch (MissingResourceException e) { + out.println(e.toString()); + } + } + + public static void badTree(PrintWriter errout) { + try { + tidyPrintln(errout, res.getString("bad_tree")); + } catch (MissingResourceException e) { + errout.println(e.toString()); + } + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamIn.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamIn.java new file mode 100644 index 0000000..e2b83a7 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamIn.java @@ -0,0 +1,81 @@ +/* + * @(#)StreamIn.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Input Stream + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.io.InputStream; + +public abstract class StreamIn { + + public static final int EndOfStream = -1; // EOF + + /* states for ISO 2022 + + A document in ISO-2022 based encoding uses some ESC sequences called + "designator" to switch character sets. The designators defined and + used in ISO-2022-JP are: + + "ESC" + "(" + ? for ISO646 variants + + "ESC" + "$" + ? and + "ESC" + "$" + "(" + ? for multibyte character sets + */ + + public static final int FSM_ASCII = 0; + public static final int FSM_ESC = 1; + public static final int FSM_ESCD = 2; + public static final int FSM_ESCDP = 3; + public static final int FSM_ESCP = 4; + public static final int FSM_NONASCII = 5; + + /* non-raw input is cleaned up*/ + public int state; /* FSM for ISO2022 */ + public boolean pushed; + public int c; + public int tabs; + public int tabsize; + public int lastcol; + public int curcol; + public int curline; + public int encoding; + public InputStream stream; + public boolean endOfStream; + public Object lexer; /* needed for error reporting */ + + /* read char from stream */ + public abstract int readCharFromStream(); + + public abstract int readChar(); + + public abstract void ungetChar(int c); + + public abstract boolean isEndOfStream(); + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamInImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamInImpl.java new file mode 100644 index 0000000..5c12c8a --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StreamInImpl.java @@ -0,0 +1,367 @@ +/* + * @(#)StreamInImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Input Stream Implementation + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.io.InputStream; +import java.io.IOException; + +public class StreamInImpl extends StreamIn { + + /* Mapping for Windows Western character set (128-159) to Unicode */ + private static int[] Win2Unicode = + { + 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000, + 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178 + }; + + /* + John Love-Jensen contributed this table for mapping MacRoman + character set to Unicode + */ + + private static int[] Mac2Unicode = + { + + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + /* x7F = DEL */ + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, + 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, + 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, + 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, + 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, + 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, + 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, + 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + /* xF0 = Apple Logo */ + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, + 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7 + }; + + public StreamInImpl(InputStream stream, int encoding, int tabsize) + { + this.stream = stream; + this.pushed = false; + this.c = (int)'\0'; + this.tabs = 0; + this.tabsize = tabsize; + this.curline = 1; + this.curcol = 1; + this.encoding = encoding; + this.state = FSM_ASCII; + this.endOfStream = false; + } + + /* read char from stream */ + public int readCharFromStream() + { + int n, c, i, count; + + try { + c = this.stream.read(); + + if (c == EndOfStream) { + this.endOfStream = true; + return c; + } + + /* + A document in ISO-2022 based encoding uses some ESC sequences + called "designator" to switch character sets. The designators + defined and used in ISO-2022-JP are: + + "ESC" + "(" + ? for ISO646 variants + + "ESC" + "$" + ? and + "ESC" + "$" + "(" + ? for multibyte character sets + + Where ? stands for a single character used to indicate the + character set for multibyte characters. + + Tidy handles this by preserving the escape sequence and + setting the top bit of each byte for non-ascii chars. This + bit is then cleared on output. The input stream keeps track + of the state to determine when to set/clear the bit. + */ + + if (this.encoding == Configuration.ISO2022) + { + if (c == 0x1b) /* ESC */ + { + this.state = FSM_ESC; + return c; + } + + switch (this.state) + { + case FSM_ESC: + if (c == '$') + this.state = FSM_ESCD; + else if (c == '(') + this.state = FSM_ESCP; + else + this.state = FSM_ASCII; + break; + + case FSM_ESCD: + if (c == '(') + this.state = FSM_ESCDP; + else + this.state = FSM_NONASCII; + break; + + case FSM_ESCDP: + this.state = FSM_NONASCII; + break; + + case FSM_ESCP: + this.state = FSM_ASCII; + break; + + case FSM_NONASCII: + c |= 0x80; + break; + } + + return c; + } + + if (this.encoding != Configuration.UTF8) + return c; + + /* deal with UTF-8 encoded char */ + + if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */ + { + n = c & 31; + count = 1; + } + else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */ + { + n = c & 15; + count = 2; + } + else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */ + { + n = c & 7; + count = 3; + } + else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */ + { + n = c & 3; + count = 4; + } + else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */ + { + n = c & 1; + count = 5; + } + else /* 0XXX XXXX one byte */ + return c; + + /* successor bytes should have the form 10XX XXXX */ + for (i = 1; i <= count; ++i) + { + c = this.stream.read(); + + if (c == EndOfStream) { + this.endOfStream = true; + return c; + } + + n = (n << 6) | (c & 0x3F); + } + } + catch (IOException e) { + System.err.println("StreamInImpl.readCharFromStream: " + e.toString()); + n = EndOfStream; + } + + return n; + } + + public int readChar() + { + int c; + + if (this.pushed) + { + this.pushed = false; + c = this.c; + + if (c == '\n') + { + this.curcol = 1; + this.curline++; + return c; + } + + this.curcol++; + return c; + } + + this.lastcol = this.curcol; + + if (this.tabs > 0) + { + this.curcol++; + this.tabs--; + return ' '; + } + + for (;;) + { + c = readCharFromStream(); + + if (c < 0) + return EndOfStream; + + if (c == '\n') + { + this.curcol = 1; + this.curline++; + break; + } + + if (c == '\r') + { + c = readCharFromStream(); + if (c != '\n') + { + ungetChar(c); + c = '\n'; + } + this.curcol = 1; + this.curline++; + break; + } + + if (c == '\t') + { + this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1; + this.curcol++; + c = ' '; + break; + } + + /* strip control characters, except for Esc */ + + if (c == '\033') + break; + + if (0 < c && c < 32) + continue; + + /* watch out for IS02022 */ + + if (this.encoding == Configuration.RAW || + this.encoding == Configuration.ISO2022) + { + this.curcol++; + break; + } + + if (this.encoding == Configuration.MACROMAN) + c = Mac2Unicode[c]; + + /* produced e.g. as a side-effect of smart quotes in Word */ + + if (127 < c && c < 160) + { + Report.encodingError((Lexer)this.lexer, Report.WINDOWS_CHARS, c); + + c = Win2Unicode[c - 128]; + + if (c == 0) + continue; + } + + this.curcol++; + break; + } + + return c; + } + + public void ungetChar(int c) + { + this.pushed = true; + this.c = c; + + if (c == '\n') + { + --this.curline; + } + + this.curcol = this.lastcol; + } + + public boolean isEndOfStream() + { + return this.endOfStream; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Style.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Style.java new file mode 100644 index 0000000..061e332 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Style.java @@ -0,0 +1,58 @@ +/* + * @(#)Style.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Linked list of class names and styles + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class Style { + + public Style(String tag, String tagClass, String properties, Style next) + { + this.tag = tag; + this.tagClass = tagClass; + this.properties = properties; + this.next = next; + } + + public Style(String tag, String tagClass, String properties) + { + this(tag, tagClass, properties, null); + } + + public Style() + { + this(null, null, null, null); + } + + public String tag; + public String tagClass; + public String properties; + public Style next; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/StyleProp.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StyleProp.java new file mode 100644 index 0000000..773e414 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/StyleProp.java @@ -0,0 +1,57 @@ +/* + * @(#)StyleProp.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Linked list of style properties + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class StyleProp { + + public StyleProp(String name, String value, StyleProp next) + { + this.name = name; + this.value = value; + this.next = next; + } + + public StyleProp(String name, String value) + { + this(name, value, null); + } + + public StyleProp() + { + this(null, null, null); + } + + public String name; + public String value; + public StyleProp next; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TagTable.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TagTable.java new file mode 100644 index 0000000..dd81b1d --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TagTable.java @@ -0,0 +1,389 @@ +/* + * @(#)TagTable.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Tag dictionary node hash table + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + * Modified from a Singleton to a non-Singleton. + */ + +import java.util.Hashtable; +import java.util.Enumeration; + +public class TagTable { + + private Configuration configuration = null; + + public TagTable() + { + for ( int i = 0; i < tags.length; i++ ) { + install( tags[i] ); + } + tagHtml = lookup("html"); + tagHead = lookup("head"); + tagBody = lookup("body"); + tagFrameset = lookup("frameset"); + tagFrame = lookup("frame"); + tagNoframes = lookup("noframes"); + tagMeta = lookup("meta"); + tagTitle = lookup("title"); + tagBase = lookup("base"); + tagHr = lookup("hr"); + tagPre = lookup("pre"); + tagListing = lookup("listing"); + tagH1 = lookup("h1"); + tagH2 = lookup("h2"); + tagP = lookup("p"); + tagUl = lookup("ul"); + tagOl = lookup("ol"); + tagDir = lookup("dir"); + tagLi = lookup("li"); + tagDt = lookup("dt"); + tagDd = lookup("dd"); + tagDl = lookup("dl"); + tagTd = lookup("td"); + tagTh = lookup("th"); + tagTr = lookup("tr"); + tagCol = lookup("col"); + tagBr = lookup("br"); + tagA = lookup("a"); + tagLink = lookup("link"); + tagB = lookup("b"); + tagI = lookup("i"); + tagStrong = lookup("strong"); + tagEm = lookup("em"); + tagBig = lookup("big"); + tagSmall = lookup("small"); + tagParam = lookup("param"); + tagOption = lookup("option"); + tagOptgroup = lookup("optgroup"); + tagImg = lookup("img"); + tagMap = lookup("map"); + tagArea = lookup("area"); + tagNobr = lookup("nobr"); + tagWbr = lookup("wbr"); + tagFont = lookup("font"); + tagSpacer = lookup("spacer"); + tagLayer = lookup("layer"); + tagCenter = lookup("center"); + tagStyle = lookup("style"); + tagScript = lookup("script"); + tagNoscript = lookup("noscript"); + tagTable = lookup("table"); + tagCaption = lookup("caption"); + tagForm = lookup("form"); + tagTextarea = lookup("textarea"); + tagBlockquote = lookup("blockquote"); + tagApplet = lookup("applet"); + tagObject = lookup("object"); + tagDiv = lookup("div"); + tagSpan = lookup("span"); + } + + public void setConfiguration(Configuration configuration) + { + this.configuration = configuration; + } + + public Dict lookup( String name ) + { + return (Dict)tagHashtable.get( name ); + } + + public Dict install( Dict dict ) + { + Dict d = (Dict)tagHashtable.get(dict.name); + if (d != null) + { + d.versions = dict.versions; + d.model |= dict.model; + d.parser = dict.parser; + d.chkattrs = dict.chkattrs; + return d; + } + else + { + tagHashtable.put(dict.name, dict); + return dict; + } + } + + /* public interface for finding tag by name */ + public boolean findTag( Node node ) + { + Dict np; + + if ( configuration != null && configuration.XmlTags ) { + node.tag = xmlTags; + return true; + } + + if ( node.element != null ) { + np = lookup( node.element ); + if ( np != null ) { + node.tag = np; + return true; + } + } + + return false; + } + + public Parser findParser(Node node) + { + Dict np; + + if (node.element != null) { + np = lookup(node.element); + if (np != null) { + return np.parser; + } + } + + return null; + } + + private Hashtable tagHashtable = new Hashtable(); + + private static Dict[] tags = { + + new Dict( "html", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHTML(), CheckAttribsImpl.getCheckHTML() ), + + new Dict( "head", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHead(), null ), + + new Dict( "title", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseTitle(), null ), + new Dict( "base", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ), + new Dict( "link", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckLINK() ), + new Dict( "meta", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ), + new Dict( "style", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSTYLE() ), + new Dict( "script", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSCRIPT() ), + new Dict( "server", Dict.VERS_NETSCAPE, (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), null ), + + new Dict( "body", Dict.VERS_ALL, (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseBody(), null ), + new Dict( "frameset", Dict.VERS_FRAMES, (Dict.CM_HTML|Dict.CM_FRAMES), ParserImpl.getParseFrameSet(), null ), + + new Dict( "p", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OPT), ParserImpl.getParseInline(), null ), + new Dict( "h1", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), + new Dict( "h2", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), + new Dict( "h3", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), + new Dict( "h4", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), + new Dict( "h5", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), + new Dict( "h6", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ), + new Dict( "ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ), + new Dict( "ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ), + new Dict( "dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseDefList(), null ), + new Dict( "dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ), + new Dict( "menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ), + new Dict( "pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParsePre(), null ), + new Dict( "listing", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ), + new Dict( "xmp", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ), + new Dict( "plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ), + new Dict( "address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, null ), + new Dict( "fieldset", Dict.VERS_HTML40, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseTableTag(), CheckAttribsImpl.getCheckTABLE() ), + new Dict( "hr", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckHR() ), + new Dict( "div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ), + new Dict( "align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ), + new Dict( "ins", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ), + new Dict( "del", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ), + + new Dict( "li", Dict.VERS_ALL, (Dict.CM_LIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ), + new Dict( "dt", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseInline(), null ), + new Dict( "dd", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ), + + new Dict( "caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckCaption() ), + new Dict( "colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseColGroup(), null ), + new Dict( "col", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_EMPTY), null, null ), + new Dict( "thead", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ), + new Dict( "tfoot", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ), + new Dict( "tbody", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ), + new Dict( "tr", Dict.VERS_FROM32, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseRow(), null ), + new Dict( "td", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ), + new Dict( "th", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ), + + new Dict( "q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckAnchor() ), + new Dict( "br", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), + new Dict( "img", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckIMG() ), + new Dict( "object", Dict.VERS_HTML40, (Dict.CM_OBJECT|Dict.CM_HEAD|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ), + new Dict( "applet", Dict.VERS_LOOSE, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ), + new Dict( "servlet", Dict.VERS_SUN, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ), + new Dict( "param", Dict.VERS_FROM32, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), + new Dict( "embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ), + new Dict( "noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.getParseBlock(), null ), + new Dict( "frame", Dict.VERS_FRAMES, (Dict.CM_FRAMES|Dict.CM_EMPTY), null, null ), + new Dict( "noframes", Dict.VERS_IFRAMES, (Dict.CM_BLOCK|Dict.CM_FRAMES), ParserImpl.getParseNoFrames(), null ), + new Dict( "noscript", (short)(Dict.VERS_FRAMES|Dict.VERS_HTML40), (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ), + new Dict( "b", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "i", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "tt", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "big", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "small", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "sub", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "sup", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "acronym", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), + new Dict( "marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE|Dict.CM_OPT), ParserImpl.getParseInline(), null ), + new Dict( "bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ), + new Dict( "comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), + new Dict( "keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), + new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ), + new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "map", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckMap() ), + new Dict( "area", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckAREA() ), + new Dict( "input", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ), + new Dict( "select", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseSelect(), null ), + new Dict( "option", Dict.VERS_ALL, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseText(), null ), + new Dict( "optgroup", Dict.VERS_HTML40, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseOptGroup(), null ), + new Dict( "textarea", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseText(), null ), + new Dict( "label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "legend", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "button", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ), + new Dict( "font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + new Dict( "bdo", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ), + + }; + + /* create dummy entry for all xml tags */ + public Dict xmlTags = new Dict( null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null ); + + public Dict tagHtml = null; + public Dict tagHead = null; + public Dict tagBody = null; + public Dict tagFrameset = null; + public Dict tagFrame = null; + public Dict tagNoframes = null; + public Dict tagMeta = null; + public Dict tagTitle = null; + public Dict tagBase = null; + public Dict tagHr = null; + public Dict tagPre = null; + public Dict tagListing = null; + public Dict tagH1 = null; + public Dict tagH2 = null; + public Dict tagP = null; + public Dict tagUl = null; + public Dict tagOl = null; + public Dict tagDir = null; + public Dict tagLi = null; + public Dict tagDt = null; + public Dict tagDd = null; + public Dict tagDl = null; + public Dict tagTd = null; + public Dict tagTh = null; + public Dict tagTr = null; + public Dict tagCol = null; + public Dict tagBr = null; + public Dict tagA = null; + public Dict tagLink = null; + public Dict tagB = null; + public Dict tagI = null; + public Dict tagStrong = null; + public Dict tagEm = null; + public Dict tagBig = null; + public Dict tagSmall = null; + public Dict tagParam = null; + public Dict tagOption = null; + public Dict tagOptgroup = null; + public Dict tagImg = null; + public Dict tagMap = null; + public Dict tagArea = null; + public Dict tagNobr = null; + public Dict tagWbr = null; + public Dict tagFont = null; + public Dict tagSpacer = null; + public Dict tagLayer = null; + public Dict tagCenter = null; + public Dict tagStyle = null; + public Dict tagScript = null; + public Dict tagNoscript = null; + public Dict tagTable = null; + public Dict tagCaption = null; + public Dict tagForm = null; + public Dict tagTextarea = null; + public Dict tagBlockquote = null; + public Dict tagApplet = null; + public Dict tagObject = null; + public Dict tagDiv = null; + public Dict tagSpan = null; + + public void defineInlineTag( String name ) + { + install( new Dict( name, Dict.VERS_PROPRIETARY, + (Dict.CM_INLINE|Dict.CM_NO_INDENT|Dict.CM_NEW), + ParserImpl.getParseBlock(), null ) ); + } + + public void defineBlockTag( String name ) + { + install( new Dict( name, Dict.VERS_PROPRIETARY, + (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW), + ParserImpl.getParseBlock(), null ) ); + } + + public void defineEmptyTag(String name) + { + install(new Dict(name, Dict.VERS_PROPRIETARY, + (Dict.CM_EMPTY|Dict.CM_NO_INDENT|Dict.CM_NEW), + ParserImpl.getParseBlock(), null)); + } + + public void definePreTag(String name) + { + install(new Dict(name, Dict.VERS_PROPRIETARY, + (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW), + ParserImpl.getParsePre(), null)); + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Tidy.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Tidy.java new file mode 100644 index 0000000..c97dda4 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Tidy.java @@ -0,0 +1,1424 @@ +/* + * @(#)Tidy.java 1.11 2000/08/16 + * + */ + +/* + HTML parser and pretty printer + + Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts + Institute of Technology, Institut National de Recherche en + Informatique et en Automatique, Keio University). All Rights + Reserved. + + Contributing Author(s): + + Dave Raggett <dsr@w3.org> + Andy Quick <ac.quick@sympatico.ca> (translation to Java) + + The contributing author(s) would like to thank all those who + helped with testing, bug fixes, and patience. This wouldn't + have been possible without all of you. + + COPYRIGHT NOTICE: + + This software and documentation is provided "as is," and + the copyright holders and contributing author(s) make no + representations or warranties, express or implied, including + but not limited to, warranties of merchantability or fitness + for any particular purpose or that the use of the software or + documentation will not infringe any third party patents, + copyrights, trademarks or other rights. + + The copyright holders and contributing author(s) will not be + liable for any direct, indirect, special or consequential damages + arising out of any use of the software or documentation, even if + advised of the possibility of such damage. + + Permission is hereby granted to use, copy, modify, and distribute + this source code, or portions hereof, documentation and executables, + for any purpose, without fee, subject to the following restrictions: + + 1. The origin of this source code must not be misrepresented. + 2. Altered versions must be plainly marked as such and must + not be misrepresented as being the original source. + 3. This Copyright notice may not be removed or altered from any + source or altered source distribution. + + The copyright holders and contributing author(s) specifically + permit, without fee, and encourage the use of this source code + as a component for supporting the Hypertext Markup Language in + commercial products. If you use this source code in a product, + acknowledgment is not required but would be appreciated. +*/ + +package org.w3c.tidy; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.util.Properties; + +import org.eclipse.core.resources.IFile; +import org.eclipse.core.resources.IMarker; +import org.eclipse.core.runtime.CoreException; + +/** + * + * <p>HTML parser and pretty printer</p> + * + * <p> + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * </p> + * + * <p> + * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts + * Institute of Technology, Institut National de Recherche en + * Informatique et en Automatique, Keio University). All Rights + * Reserved. + * </p> + * + * <p> + * Contributing Author(s):<br> + * <a href="mailto:dsr@w3.org">Dave Raggett</a><br> + * <a href="mailto:ac.quick@sympatico.ca">Andy Quick</a> (translation to Java) + * </p> + * + * <p> + * The contributing author(s) would like to thank all those who + * helped with testing, bug fixes, and patience. This wouldn't + * have been possible without all of you. + * </p> + * + * <p> + * COPYRIGHT NOTICE:<br> + * + * This software and documentation is provided "as is," and + * the copyright holders and contributing author(s) make no + * representations or warranties, express or implied, including + * but not limited to, warranties of merchantability or fitness + * for any particular purpose or that the use of the software or + * documentation will not infringe any third party patents, + * copyrights, trademarks or other rights. + * </p> + * + * <p> + * The copyright holders and contributing author(s) will not be + * liable for any direct, indirect, special or consequential damages + * arising out of any use of the software or documentation, even if + * advised of the possibility of such damage. + * </p> + * + * <p> + * Permission is hereby granted to use, copy, modify, and distribute + * this source code, or portions hereof, documentation and executables, + * for any purpose, without fee, subject to the following restrictions: + * </p> + * + * <p> + * <ol> + * <li>The origin of this source code must not be misrepresented.</li> + * <li>Altered versions must be plainly marked as such and must + * not be misrepresented as being the original source.</li> + * <li>This Copyright notice may not be removed or altered from any + * source or altered source distribution.</li> + * </ol> + * </p> + * + * <p> + * The copyright holders and contributing author(s) specifically + * permit, without fee, and encourage the use of this source code + * as a component for supporting the Hypertext Markup Language in + * commercial products. If you use this source code in a product, + * acknowledgment is not required but would be appreciated. + * </p> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + * + */ + +public class Tidy implements java.io.Serializable { + + static final long serialVersionUID = -2794371560623987718L; + + private boolean initialized = false; + private PrintWriter errout = null; /* error output stream */ + private PrintWriter stderr = null; + private Configuration configuration = null; + private String inputStreamName = "InputStream"; + private int parseErrors = 0; + private int parseWarnings = 0; + + public Tidy() { + init(); + } + + public Configuration getConfiguration() { + return configuration; + } + + public PrintWriter getStderr() { + return stderr; + } + + /** + * ParseErrors - the number of errors that occurred in the most + * recent parse operation + */ + + public int getParseErrors() { + return parseErrors; + } + + /** + * ParseWarnings - the number of warnings that occurred in the most + * recent parse operation + */ + + public int getParseWarnings() { + return parseWarnings; + } + + /** + * Errout - the error output stream + */ + + public PrintWriter getErrout() { + return errout; + } + + public void setErrout(PrintWriter errout) { + this.errout = errout; + } + + /** + * Spaces - default indentation + * @see org.w3c.tidy.Configuration#spaces + */ + + public void setSpaces(int spaces) { + configuration.spaces = spaces; + } + + public int getSpaces() { + return configuration.spaces; + } + + /** + * Wraplen - default wrap margin + * @see org.w3c.tidy.Configuration#wraplen + */ + + public void setWraplen(int wraplen) { + configuration.wraplen = wraplen; + } + + public int getWraplen() { + return configuration.wraplen; + } + + /** + * CharEncoding + * @see org.w3c.tidy.Configuration#CharEncoding + */ + + public void setCharEncoding(int charencoding) { + configuration.CharEncoding = charencoding; + } + + public int getCharEncoding() { + return configuration.CharEncoding; + } + + /** + * Tabsize + * @see org.w3c.tidy.Configuration#tabsize + */ + + public void setTabsize(int tabsize) { + configuration.tabsize = tabsize; + } + + public int getTabsize() { + return configuration.tabsize; + } + + /** + * Errfile - file name to write errors to + * @see org.w3c.tidy.Configuration#errfile + */ + + public void setErrfile(String errfile) { + configuration.errfile = errfile; + } + + public String getErrfile() { + return configuration.errfile; + } + + /** + * Writeback - if true then output tidied markup + * NOTE: this property is ignored when parsing from an InputStream. + * @see org.w3c.tidy.Configuration#writeback + */ + + public void setWriteback(boolean writeback) { + configuration.writeback = writeback; + } + + public boolean getWriteback() { + return configuration.writeback; + } + + /** + * OnlyErrors - if true normal output is suppressed + * @see org.w3c.tidy.Configuration#OnlyErrors + */ + + public void setOnlyErrors(boolean OnlyErrors) { + configuration.OnlyErrors = OnlyErrors; + } + + public boolean getOnlyErrors() { + return configuration.OnlyErrors; + } + + /** + * ShowWarnings - however errors are always shown + * @see org.w3c.tidy.Configuration#ShowWarnings + */ + + public void setShowWarnings(boolean ShowWarnings) { + configuration.ShowWarnings = ShowWarnings; + } + + public boolean getShowWarnings() { + return configuration.ShowWarnings; + } + + /** + * Quiet - no 'Parsing X', guessed DTD or summary + * @see org.w3c.tidy.Configuration#Quiet + */ + + public void setQuiet(boolean Quiet) { + configuration.Quiet = Quiet; + } + + public boolean getQuiet() { + return configuration.Quiet; + } + + /** + * IndentContent - indent content of appropriate tags + * @see org.w3c.tidy.Configuration#IndentContent + */ + + public void setIndentContent(boolean IndentContent) { + configuration.IndentContent = IndentContent; + } + + public boolean getIndentContent() { + return configuration.IndentContent; + } + + /** + * SmartIndent - does text/block level content effect indentation + * @see org.w3c.tidy.Configuration#SmartIndent + */ + + public void setSmartIndent(boolean SmartIndent) { + configuration.SmartIndent = SmartIndent; + } + + public boolean getSmartIndent() { + return configuration.SmartIndent; + } + + /** + * HideEndTags - suppress optional end tags + * @see org.w3c.tidy.Configuration#HideEndTags + */ + + public void setHideEndTags(boolean HideEndTags) { + configuration.HideEndTags = HideEndTags; + } + + public boolean getHideEndTags() { + return configuration.HideEndTags; + } + + /** + * XmlTags - treat input as XML + * @see org.w3c.tidy.Configuration#XmlTags + */ + + public void setXmlTags(boolean XmlTags) { + configuration.XmlTags = XmlTags; + } + + public boolean getXmlTags() { + return configuration.XmlTags; + } + + /** + * XmlOut - create output as XML + * @see org.w3c.tidy.Configuration#XmlOut + */ + + public void setXmlOut(boolean XmlOut) { + configuration.XmlOut = XmlOut; + } + + public boolean getXmlOut() { + return configuration.XmlOut; + } + + /** + * XHTML - output extensible HTML + * @see org.w3c.tidy.Configuration#xHTML + */ + + public void setXHTML(boolean xHTML) { + configuration.xHTML = xHTML; + } + + public boolean getXHTML() { + return configuration.xHTML; + } + + /** + * RawOut - avoid mapping values > 127 to entities + * @see org.w3c.tidy.Configuration#RawOut + */ + + public void setRawOut(boolean RawOut) { + configuration.RawOut = RawOut; + } + + public boolean getRawOut() { + return configuration.RawOut; + } + + /** + * UpperCaseTags - output tags in upper not lower case + * @see org.w3c.tidy.Configuration#UpperCaseTags + */ + + public void setUpperCaseTags(boolean UpperCaseTags) { + configuration.UpperCaseTags = UpperCaseTags; + } + + public boolean getUpperCaseTags() { + return configuration.UpperCaseTags; + } + + /** + * UpperCaseAttrs - output attributes in upper not lower case + * @see org.w3c.tidy.Configuration#UpperCaseAttrs + */ + + public void setUpperCaseAttrs(boolean UpperCaseAttrs) { + configuration.UpperCaseAttrs = UpperCaseAttrs; + } + + public boolean getUpperCaseAttrs() { + return configuration.UpperCaseAttrs; + } + + /** + * MakeClean - remove presentational clutter + * @see org.w3c.tidy.Configuration#MakeClean + */ + + public void setMakeClean(boolean MakeClean) { + configuration.MakeClean = MakeClean; + } + + public boolean getMakeClean() { + return configuration.MakeClean; + } + + /** + * BreakBeforeBR - o/p newline before &lt;br&gt; or not? + * @see org.w3c.tidy.Configuration#BreakBeforeBR + */ + + public void setBreakBeforeBR(boolean BreakBeforeBR) { + configuration.BreakBeforeBR = BreakBeforeBR; + } + + public boolean getBreakBeforeBR() { + return configuration.BreakBeforeBR; + } + + /** + * BurstSlides - create slides on each h2 element + * @see org.w3c.tidy.Configuration#BurstSlides + */ + + public void setBurstSlides(boolean BurstSlides) { + configuration.BurstSlides = BurstSlides; + } + + public boolean getBurstSlides() { + return configuration.BurstSlides; + } + + /** + * NumEntities - use numeric entities + * @see org.w3c.tidy.Configuration#NumEntities + */ + + public void setNumEntities(boolean NumEntities) { + configuration.NumEntities = NumEntities; + } + + public boolean getNumEntities() { + return configuration.NumEntities; + } + + /** + * QuoteMarks - output " marks as &amp;quot; + * @see org.w3c.tidy.Configuration#QuoteMarks + */ + + public void setQuoteMarks(boolean QuoteMarks) { + configuration.QuoteMarks = QuoteMarks; + } + + public boolean getQuoteMarks() { + return configuration.QuoteMarks; + } + + /** + * QuoteNbsp - output non-breaking space as entity + * @see org.w3c.tidy.Configuration#QuoteNbsp + */ + + public void setQuoteNbsp(boolean QuoteNbsp) { + configuration.QuoteNbsp = QuoteNbsp; + } + + public boolean getQuoteNbsp() { + return configuration.QuoteNbsp; + } + + /** + * QuoteAmpersand - output naked ampersand as &amp; + * @see org.w3c.tidy.Configuration#QuoteAmpersand + */ + + public void setQuoteAmpersand(boolean QuoteAmpersand) { + configuration.QuoteAmpersand = QuoteAmpersand; + } + + public boolean getQuoteAmpersand() { + return configuration.QuoteAmpersand; + } + + /** + * WrapAttVals - wrap within attribute values + * @see org.w3c.tidy.Configuration#WrapAttVals + */ + + public void setWrapAttVals(boolean WrapAttVals) { + configuration.WrapAttVals = WrapAttVals; + } + + public boolean getWrapAttVals() { + return configuration.WrapAttVals; + } + + /** + * WrapScriptlets - wrap within JavaScript string literals + * @see org.w3c.tidy.Configuration#WrapScriptlets + */ + + public void setWrapScriptlets(boolean WrapScriptlets) { + configuration.WrapScriptlets = WrapScriptlets; + } + + public boolean getWrapScriptlets() { + return configuration.WrapScriptlets; + } + + /** + * WrapSection - wrap within &lt;![ ... ]&gt; section tags + * @see org.w3c.tidy.Configuration#WrapSection + */ + + public void setWrapSection(boolean WrapSection) { + configuration.WrapSection = WrapSection; + } + + public boolean getWrapSection() { + return configuration.WrapSection; + } + + /** + * AltText - default text for alt attribute + * @see org.w3c.tidy.Configuration#altText + */ + + public void setAltText(String altText) { + configuration.altText = altText; + } + + public String getAltText() { + return configuration.altText; + } + + /** + * Slidestyle - style sheet for slides + * @see org.w3c.tidy.Configuration#slidestyle + */ + + public void setSlidestyle(String slidestyle) { + configuration.slidestyle = slidestyle; + } + + public String getSlidestyle() { + return configuration.slidestyle; + } + + /** + * XmlPi - add &lt;?xml?&gt; for XML docs + * @see org.w3c.tidy.Configuration#XmlPi + */ + + public void setXmlPi(boolean XmlPi) { + configuration.XmlPi = XmlPi; + } + + public boolean getXmlPi() { + return configuration.XmlPi; + } + + /** + * DropFontTags - discard presentation tags + * @see org.w3c.tidy.Configuration#DropFontTags + */ + + public void setDropFontTags(boolean DropFontTags) { + configuration.DropFontTags = DropFontTags; + } + + public boolean getDropFontTags() { + return configuration.DropFontTags; + } + + /** + * DropEmptyParas - discard empty p elements + * @see org.w3c.tidy.Configuration#DropEmptyParas + */ + + public void setDropEmptyParas(boolean DropEmptyParas) { + configuration.DropEmptyParas = DropEmptyParas; + } + + public boolean getDropEmptyParas() { + return configuration.DropEmptyParas; + } + + /** + * FixComments - fix comments with adjacent hyphens + * @see org.w3c.tidy.Configuration#FixComments + */ + + public void setFixComments(boolean FixComments) { + configuration.FixComments = FixComments; + } + + public boolean getFixComments() { + return configuration.FixComments; + } + + /** + * WrapAsp - wrap within ASP pseudo elements + * @see org.w3c.tidy.Configuration#WrapAsp + */ + + public void setWrapAsp(boolean WrapAsp) { + configuration.WrapAsp = WrapAsp; + } + + public boolean getWrapAsp() { + return configuration.WrapAsp; + } + + /** + * WrapJste - wrap within JSTE pseudo elements + * @see org.w3c.tidy.Configuration#WrapJste + */ + + public void setWrapJste(boolean WrapJste) { + configuration.WrapJste = WrapJste; + } + + public boolean getWrapJste() { + return configuration.WrapJste; + } + + /** + * WrapPhp - wrap within PHP pseudo elements + * @see org.w3c.tidy.Configuration#WrapPhp + */ + + public void setWrapPhp(boolean WrapPhp) { + configuration.WrapPhp = WrapPhp; + } + + public boolean getWrapPhp() { + return configuration.WrapPhp; + } + + /** + * FixBackslash - fix URLs by replacing \ with / + * @see org.w3c.tidy.Configuration#FixBackslash + */ + + public void setFixBackslash(boolean FixBackslash) { + configuration.FixBackslash = FixBackslash; + } + + public boolean getFixBackslash() { + return configuration.FixBackslash; + } + + /** + * IndentAttributes - newline+indent before each attribute + * @see org.w3c.tidy.Configuration#IndentAttributes + */ + + public void setIndentAttributes(boolean IndentAttributes) { + configuration.IndentAttributes = IndentAttributes; + } + + public boolean getIndentAttributes() { + return configuration.IndentAttributes; + } + + /** + * DocType - user specified doctype + * omit | auto | strict | loose | <i>fpi</i> + * where the <i>fpi</i> is a string similar to + * &quot;-//ACME//DTD HTML 3.14159//EN&quot; + * Note: for <i>fpi</i> include the double-quotes in the string. + * @see org.w3c.tidy.Configuration#docTypeStr + * @see org.w3c.tidy.Configuration#docTypeMode + */ + + public void setDocType(String doctype) { + if (doctype != null) + configuration.docTypeStr = configuration.parseDocType(doctype, "doctype"); + } + + public String getDocType() { + String result = null; + switch (configuration.docTypeMode) { + case Configuration.DOCTYPE_OMIT : + result = "omit"; + break; + case Configuration.DOCTYPE_AUTO : + result = "auto"; + break; + case Configuration.DOCTYPE_STRICT : + result = "strict"; + break; + case Configuration.DOCTYPE_LOOSE : + result = "loose"; + break; + case Configuration.DOCTYPE_USER : + result = configuration.docTypeStr; + break; + } + return result; + } + + /** + * LogicalEmphasis - replace i by em and b by strong + * @see org.w3c.tidy.Configuration#LogicalEmphasis + */ + + public void setLogicalEmphasis(boolean LogicalEmphasis) { + configuration.LogicalEmphasis = LogicalEmphasis; + } + + public boolean getLogicalEmphasis() { + return configuration.LogicalEmphasis; + } + + /** + * XmlPIs - if set to true PIs must end with ?> + * @see org.w3c.tidy.Configuration#XmlPIs + */ + + public void setXmlPIs(boolean XmlPIs) { + configuration.XmlPIs = XmlPIs; + } + + public boolean getXmlPIs() { + return configuration.XmlPIs; + } + + /** + * EncloseText - if true text at body is wrapped in &lt;p&gt;'s + * @see org.w3c.tidy.Configuration#EncloseBodyText + */ + + public void setEncloseText(boolean EncloseText) { + configuration.EncloseBodyText = EncloseText; + } + + public boolean getEncloseText() { + return configuration.EncloseBodyText; + } + + /** + * EncloseBlockText - if true text in blocks is wrapped in &lt;p&gt;'s + * @see org.w3c.tidy.Configuration#EncloseBlockText + */ + + public void setEncloseBlockText(boolean EncloseBlockText) { + configuration.EncloseBlockText = EncloseBlockText; + } + + public boolean getEncloseBlockText() { + return configuration.EncloseBlockText; + } + + /** + * KeepFileTimes - if true last modified time is preserved<br> + * <b>this is NOT supported at this time.</b> + * @see org.w3c.tidy.Configuration#KeepFileTimes + */ + + public void setKeepFileTimes(boolean KeepFileTimes) { + configuration.KeepFileTimes = KeepFileTimes; + } + + public boolean getKeepFileTimes() { + return configuration.KeepFileTimes; + } + + /** + * Word2000 - draconian cleaning for Word2000 + * @see org.w3c.tidy.Configuration#Word2000 + */ + + public void setWord2000(boolean Word2000) { + configuration.Word2000 = Word2000; + } + + public boolean getWord2000() { + return configuration.Word2000; + } + + /** + * TidyMark - add meta element indicating tidied doc + * @see org.w3c.tidy.Configuration#TidyMark + */ + + public void setTidyMark(boolean TidyMark) { + configuration.TidyMark = TidyMark; + } + + public boolean getTidyMark() { + return configuration.TidyMark; + } + + /** + * XmlSpace - if set to yes adds xml:space attr as needed + * @see org.w3c.tidy.Configuration#XmlSpace + */ + + public void setXmlSpace(boolean XmlSpace) { + configuration.XmlSpace = XmlSpace; + } + + public boolean getXmlSpace() { + return configuration.XmlSpace; + } + + /** + * Emacs - if true format error output for GNU Emacs + * @see org.w3c.tidy.Configuration#Emacs + */ + + public void setEmacs(boolean Emacs) { + configuration.Emacs = Emacs; + } + + public boolean getEmacs() { + return configuration.Emacs; + } + + /** + * LiteralAttribs - if true attributes may use newlines + * @see org.w3c.tidy.Configuration#LiteralAttribs + */ + + public void setLiteralAttribs(boolean LiteralAttribs) { + configuration.LiteralAttribs = LiteralAttribs; + } + + public boolean getLiteralAttribs() { + return configuration.LiteralAttribs; + } + + /** + * InputStreamName - the name of the input stream (printed in the + * header information). + */ + public void setInputStreamName(String name) { + if (name != null) + inputStreamName = name; + } + + public String getInputStreamName() { + return inputStreamName; + } + + /** + * Sets the configuration from a configuration file. + */ + + public void setConfigurationFromFile(String filename) { + configuration.parseFile(filename); + } + + /** + * Sets the configuration from a properties object. + */ + + public void setConfigurationFromProps(Properties props) { + configuration.addProps(props); + } + + /** + * first time initialization which should + * precede reading the command line + */ + + private void init() { + configuration = new Configuration(); + if (configuration == null) + return; + + AttributeTable at = AttributeTable.getDefaultAttributeTable(); + if (at == null) + return; + TagTable tt = new TagTable(); + if (tt == null) + return; + tt.setConfiguration(configuration); + configuration.tt = tt; + EntityTable et = EntityTable.getDefaultEntityTable(); + if (et == null) + return; + + /* Unnecessary - same initial values in Configuration + Configuration.XmlTags = false; + Configuration.XmlOut = false; + Configuration.HideEndTags = false; + Configuration.UpperCaseTags = false; + Configuration.MakeClean = false; + Configuration.writeback = false; + Configuration.OnlyErrors = false; + */ + + configuration.errfile = null; + stderr = new PrintWriter(System.err, true); + errout = stderr; + initialized = true; + } + + /** + * Parses InputStream in and returns the root Node. + * If out is non-null, pretty prints to OutputStream out. + */ + + public Node parse(IFile iFile, InputStream in, OutputStream out) { + Node document = null; + + try { + iFile.deleteMarkers(IMarker.PROBLEM, false, 0); + document = parse(iFile, in, null, out); + } catch (CoreException e) { + } catch (FileNotFoundException fnfe) { + } catch (IOException e) { + } + + return document; + } + + /** + * Internal routine that actually does the parsing. The caller + * can pass either an InputStream or file name. If both are passed, + * the file name is preferred. + */ + + private Node parse(IFile iFile, InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException { + Lexer lexer; + Node document = null; + Node doctype; + Out o = new OutImpl(); /* normal output stream */ + PPrint pprint; + + if (!initialized) + return null; + + if (errout == null) + return null; + + parseErrors = 0; + parseWarnings = 0; + + /* ensure config is self-consistent */ + configuration.adjust(); + + if (file != null) { + in = new FileInputStream(file); + inputStreamName = file; + } else if (in == null) { + in = System.in; + inputStreamName = "stdin"; + } + + if (in != null) { + lexer = new Lexer(iFile,new StreamInImpl(in, configuration.CharEncoding, configuration.tabsize), configuration); + lexer.errout = errout; + + /* + store pointer to lexer in input stream + to allow character encoding errors to be + reported + */ + lexer.in.lexer = lexer; + + /* Tidy doesn't alter the doctype for generic XML docs */ + if (configuration.XmlTags) + document = ParserImpl.parseXMLDocument(lexer); + else { + lexer.warnings = 0; + if (!configuration.Quiet) + Report.helloMessage(errout, Report.RELEASE_DATE, inputStreamName); + + document = ParserImpl.parseDocument(lexer); + + if (!document.checkNodeIntegrity()) { + Report.badTree(errout); + return null; + } + + Clean cleaner = new Clean(configuration.tt); + + /* simplifies <b><b> ... </b> ...</b> etc. */ + cleaner.nestedEmphasis(document); + + /* cleans up <dir>indented text</dir> etc. */ + cleaner.list2BQ(document); + cleaner.bQ2Div(document); + + /* replaces i by em and b by strong */ + if (configuration.LogicalEmphasis) + cleaner.emFromI(document); + + if (configuration.Word2000 && cleaner.isWord2000(document, configuration.tt)) { + /* prune Word2000's <![if ...]> ... <![endif]> */ + cleaner.dropSections(lexer, document); + + /* drop style & class attributes and empty p, span elements */ + cleaner.cleanWord2000(lexer, document); + } + + /* replaces presentational markup by style rules */ + if (configuration.MakeClean || configuration.DropFontTags) + cleaner.cleanTree(lexer, document); + + if (!document.checkNodeIntegrity()) { + Report.badTree(errout); + return null; + } + doctype = document.findDocType(); + if (document.content != null) { + if (configuration.xHTML) + lexer.setXHTMLDocType(document); + else + lexer.fixDocType(document); + + if (configuration.TidyMark) + lexer.addGenerator(document); + } + + /* ensure presence of initial <?XML version="1.0"?> */ + if (configuration.XmlOut && configuration.XmlPi) + lexer.fixXMLPI(document); + + if (!configuration.Quiet && document.content != null) { + Report.reportVersion(errout, lexer, inputStreamName, doctype); + Report.reportNumWarnings(errout, lexer); + } + } + + parseWarnings = lexer.warnings; + parseErrors = lexer.errors; + + // Try to close the InputStream but only if if we created it. + + if ((file != null) && (in != System.in)) { + try { + in.close(); + } catch (IOException e) { + } + } + + if (lexer.errors > 0) + Report.needsAuthorIntervention(errout); + + o.state = StreamIn.FSM_ASCII; + o.encoding = configuration.CharEncoding; + + if (!configuration.OnlyErrors && lexer.errors == 0) { + if (configuration.BurstSlides) { + Node body; + + body = null; + /* + remove doctype to avoid potential clash with + markup introduced when bursting into slides + */ + /* discard the document type */ + doctype = document.findDocType(); + + if (doctype != null) + Node.discardElement(doctype); + + /* slides use transitional features */ + lexer.versions |= Dict.VERS_HTML40_LOOSE; + + /* and patch up doctype to match */ + if (configuration.xHTML) + lexer.setXHTMLDocType(document); + else + lexer.fixDocType(document); + + /* find the body element which may be implicit */ + body = document.findBody(configuration.tt); + + if (body != null) { + pprint = new PPrint(configuration); + Report.reportNumberOfSlides(errout, pprint.countSlides(body)); + pprint.createSlides(lexer, document); + } else + Report.missingBody(errout); + } else if (configuration.writeback && (file != null)) { + try { + pprint = new PPrint(configuration); + o.out = new FileOutputStream(file); + + if (configuration.XmlTags) + pprint.printXMLTree(o, (short) 0, 0, lexer, document); + else + pprint.printTree(o, (short) 0, 0, lexer, document); + + pprint.flushLine(o, 0); + o.out.close(); + } catch (IOException e) { + errout.println(file + e.toString()); + } + } else if (out != null) { + pprint = new PPrint(configuration); + o.out = out; + + if (configuration.XmlTags) + pprint.printXMLTree(o, (short) 0, 0, lexer, document); + else + pprint.printTree(o, (short) 0, 0, lexer, document); + + pprint.flushLine(o, 0); + } + + } + + Report.errorSummary(lexer); + } + return document; + } + + /** + * Parses InputStream in and returns a DOM Document node. + * If out is non-null, pretty prints to OutputStream out. + */ + + public org.w3c.dom.Document parseDOM(IFile file, InputStream in, OutputStream out) { + Node document = parse(file, in, out); + if (document != null) + return (org.w3c.dom.Document) document.getAdapter(); + else + return null; + } + + /** + * Creates an empty DOM Document. + */ + + public static org.w3c.dom.Document createEmptyDocument() { + Node document = new Node(Node.RootNode, new byte[0], 0, 0); + Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html", new TagTable()); + if (document != null && node != null) { + Node.insertNodeAtStart(document, node); + return (org.w3c.dom.Document) document.getAdapter(); + } else { + return null; + } + } + + /** + * Pretty-prints a DOM Document. + */ + + public void pprint(org.w3c.dom.Document doc, OutputStream out) { + Out o = new OutImpl(); + PPrint pprint; + Node document; + + if (!(doc instanceof DOMDocumentImpl)) { + return; + } + document = ((DOMDocumentImpl) doc).adaptee; + + o.state = StreamIn.FSM_ASCII; + o.encoding = configuration.CharEncoding; + + if (out != null) { + pprint = new PPrint(configuration); + o.out = out; + + if (configuration.XmlTags) + pprint.printXMLTree(o, (short) 0, 0, null, document); + else + pprint.printTree(o, (short) 0, 0, null, document); + + pprint.flushLine(o, 0); + } + } + + /** + * Command line interface to parser and pretty printer. + */ + + public static void main(String[] argv) { + int totalerrors = 0; + int totalwarnings = 0; + String file; + InputStream in; + String prog = "Tidy"; + Node document; + Node doctype; + Lexer lexer; + String s; + Out out = new OutImpl(); /* normal output stream */ + PPrint pprint; + int argc = argv.length + 1; + int argIndex = 0; + Tidy tidy; + Configuration configuration; + String arg; + String current_errorfile = "stderr"; + + tidy = new Tidy(); + configuration = tidy.getConfiguration(); + + /* read command line */ + + while (argc > 0) { + if (argc > 1 && argv[argIndex].startsWith("-")) { + /* support -foo and --foo */ + arg = argv[argIndex].substring(1); + + if (arg.length() > 0 && arg.charAt(0) == '-') + arg = arg.substring(1); + + if (arg.equals("xml")) + configuration.XmlTags = true; + else if (arg.equals("asxml") || arg.equals("asxhtml")) + configuration.xHTML = true; + else if (arg.equals("indent")) { + configuration.IndentContent = true; + configuration.SmartIndent = true; + } else if (arg.equals("omit")) + configuration.HideEndTags = true; + else if (arg.equals("upper")) + configuration.UpperCaseTags = true; + else if (arg.equals("clean")) + configuration.MakeClean = true; + else if (arg.equals("raw")) + configuration.CharEncoding = Configuration.RAW; + else if (arg.equals("ascii")) + configuration.CharEncoding = Configuration.ASCII; + else if (arg.equals("latin1")) + configuration.CharEncoding = Configuration.LATIN1; + else if (arg.equals("utf8")) + configuration.CharEncoding = Configuration.UTF8; + else if (arg.equals("iso2022")) + configuration.CharEncoding = Configuration.ISO2022; + else if (arg.equals("mac")) + configuration.CharEncoding = Configuration.MACROMAN; + else if (arg.equals("numeric")) + configuration.NumEntities = true; + else if (arg.equals("modify")) + configuration.writeback = true; + else if (arg.equals("change")) /* obsolete */ + configuration.writeback = true; + else if (arg.equals("update")) /* obsolete */ + configuration.writeback = true; + else if (arg.equals("errors")) + configuration.OnlyErrors = true; + else if (arg.equals("quiet")) + configuration.Quiet = true; + else if (arg.equals("slides")) + configuration.BurstSlides = true; + else if (arg.equals("help") || argv[argIndex].charAt(1) == '?' || argv[argIndex].charAt(1) == 'h') { + Report.helpText(new PrintWriter(System.out, true), prog); + System.exit(1); + } else if (arg.equals("config")) { + if (argc >= 3) { + configuration.parseFile(argv[argIndex + 1]); + --argc; + ++argIndex; + } + } else if (argv[argIndex].equals("-file") || argv[argIndex].equals("--file") || argv[argIndex].equals("-f")) { + if (argc >= 3) { + configuration.errfile = argv[argIndex + 1]; + --argc; + ++argIndex; + } + } else if (argv[argIndex].equals("-wrap") || argv[argIndex].equals("--wrap") || argv[argIndex].equals("-w")) { + if (argc >= 3) { + configuration.wraplen = Integer.parseInt(argv[argIndex + 1]); + --argc; + ++argIndex; + } + } else if (argv[argIndex].equals("-version") || argv[argIndex].equals("--version") || argv[argIndex].equals("-v")) { + Report.showVersion(tidy.getErrout()); + System.exit(0); + } else { + s = argv[argIndex]; + + for (int i = 1; i < s.length(); i++) { + if (s.charAt(i) == 'i') { + configuration.IndentContent = true; + configuration.SmartIndent = true; + } else if (s.charAt(i) == 'o') + configuration.HideEndTags = true; + else if (s.charAt(i) == 'u') + configuration.UpperCaseTags = true; + else if (s.charAt(i) == 'c') + configuration.MakeClean = true; + else if (s.charAt(i) == 'n') + configuration.NumEntities = true; + else if (s.charAt(i) == 'm') + configuration.writeback = true; + else if (s.charAt(i) == 'e') + configuration.OnlyErrors = true; + else if (s.charAt(i) == 'q') + configuration.Quiet = true; + else + Report.unknownOption(tidy.getErrout(), s.charAt(i)); + } + } + + --argc; + ++argIndex; + continue; + } + + /* ensure config is self-consistent */ + configuration.adjust(); + + /* user specified error file */ + if (configuration.errfile != null) { + /* is it same as the currently opened file? */ + if (!configuration.errfile.equals(current_errorfile)) { + /* no so close previous error file */ + + if (tidy.getErrout() != tidy.getStderr()) + tidy.getErrout().close(); + + /* and try to open the new error file */ + try { + tidy.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true)); + current_errorfile = configuration.errfile; + } catch (IOException e) { + /* can't be opened so fall back to stderr */ + current_errorfile = "stderr"; + tidy.setErrout(tidy.getStderr()); + } + } + } + + if (argc > 1) { + file = argv[argIndex]; + } else { + file = "stdin"; + } + + try { + document = tidy.parse(null, null, file, System.out); + totalwarnings += tidy.parseWarnings; + totalerrors += tidy.parseErrors; + } catch (FileNotFoundException fnfe) { + Report.unknownFile(tidy.getErrout(), prog, file); + } catch (IOException ioe) { + Report.unknownFile(tidy.getErrout(), prog, file); + } + + --argc; + ++argIndex; + + if (argc <= 1) + break; + } + + if (totalerrors + totalwarnings > 0) + Report.generalInfo(tidy.getErrout()); + + if (tidy.getErrout() != tidy.getStderr()) + tidy.getErrout().close(); + + /* return status can be used by scripts */ + + if (totalerrors > 0) + System.exit(2); + + if (totalwarnings > 0) + System.exit(1); + + /* 0 signifies all is ok */ + System.exit(0); + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyBeanInfo.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyBeanInfo.java new file mode 100644 index 0000000..2acf2c2 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyBeanInfo.java @@ -0,0 +1,39 @@ +/* + * @(#)TidyBeanInfo.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * BeanInfo for Tidy + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> + * HTML Tidy Release 4 Aug 2000</a> + * + * @author Dave Raggett <dsr@w3.org> + * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.beans.SimpleBeanInfo; +import java.awt.Image; + +public class TidyBeanInfo extends SimpleBeanInfo { + + public Image getIcon(int kind) + { + return loadImage("tidy.gif"); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyMessages.properties b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyMessages.properties new file mode 100644 index 0000000..323a7ee --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/TidyMessages.properties @@ -0,0 +1,194 @@ +error=Error: +warning=Warning: +line_column=line {0,number} column {1,number} - +emacs_format={0}:{1,number}:{2,number}: +illegal_char=Warning: replacing illegal character code {0,number} +missing_semicolon=Warning: entity "{0}" doesn''t end in '';'' +unknown_entity=Warning: unescaped & or unknown entity "{0}" +unescaped_ampersand=Warning: unescaped & which should be written as &amp; +unknown_attribute=Warning: unknown attribute "{0}" +missing_attribute=\ lacks "{0}" attribute +missing_attr_value=\ attribute "{0}" lacks value +missing_imagemap=\ should use client-side image map +bad_attribute_value=\ unknown attribute value "{0}" +xml_attribute_value=\ has XML attribute "{0}" +unexpected_gt=\ missing '>' for end of tag +unexpected_quotemark=\ unexpected or duplicate quote mark +repeated_attribute=\ repeated attribute +nested_emphasis=\ nested emphasis +coerce_to_endtag=\ <{0}> is probably intended as </{0}> +proprietary_attr_value=\ proprietary attribute value "{0}" +missing_endtag_for=Warning: missing </{0}> +missing_endtag_before=Warning: missing </{0}> before +discarding_unexpected=Warning: discarding unexpected +forced_end_anchor=Warning: <a> is probably intended as </a> +non_matching_endtag_1=Warning: replacing unexpected +non_matching_endtag_2=\ by </{0}> +tag_not_allowed_in=\ isn''t allowed in <{0}> elements +doctype_after_tags=Warning: <!DOCTYPE> isn't allowed after elements +missing_starttag=Warning: missing <{0}> +unexpected_endtag=Warning: unexpected </{0}> +unexpected_endtag_suffix=\ in <{0}> +too_many_elements=Warning: too many {0} elements +too_many_elements_suffix=\ in <{0}> +using_br_inplace_of=Warning: using <br> in place of +inserting_tag=Warning: inserting implicit <{0}> +cant_be_nested=\ can't be nested +proprietary_element=\ is not approved by W3C +obsolete_element=Warning: replacing obsolete element +replacing_element=Warning: replacing element +by=\ by +trim_empty_element=Warning: trimming empty +missing_title_element=Warning: inserting missing 'title' element +illegal_nesting=\ shouldn't be nested +noframes_content=\ not inside 'noframes' element +inconsistent_version=Warning: html doctype doesn't match content +content_after_body=Warning: content occurs after end of body +malformed_comment=Warning: adjacent hyphens within comment +bad_comment_chars=Warning: expecting -- or > +bad_xml_comment=Warning: XML comments can't contain -- +bad_cdata_content=Warning: '<' + '/' + letter not allowed here +inconsistent_namespace=Warning: html namespace doesn't match content +suspected_missing_quote=Error: missing quotemark for attribute value +duplicate_frameset=Error: repeated FRAMESET element +unknown_element=\ is not recognized! +dtype_not_upper_case=Warning: SYSTEM, PUBLIC, W3C, DTD, EN must be upper case +unexpected_end_of_file=Warning: end of file while parsing attributes +malformed_doctype=Warning: expected "html PUBLIC" or "html SYSTEM" +id_name_mismatch=\ id and name attribute value mismatch + +badchars_summary=Characters codes for the Microsoft Windows fonts in the range\n\ +128 - 159 may not be recognized on other platforms. You are\n\ +instead recommended to use named entities, e.g. &trade; rather\n\ +than Windows character code 153 (0x2122 in Unicode). Note that\n\ +as of February 1998 few browsers support the new entities."\n\n + +badform_summary=You may need to move one or both of the <form> and </form>\n\ +tags. HTML elements should be properly nested and form elements\n\ +are no exception. For instance you should not place the <form>\n\ +in one table cell and the </form> in another. If the <form> is\n\ +placed before a table, the </form> cannot be placed inside the\n\ +table! Note that one form can't be nested inside another!\n\n + +badaccess_missing_summary=The table summary attribute should be used to describe\n\ +the table structure. It is very helpful for people using\n\ +non-visual browsers. The scope and headers attributes for\n\ +table cells are useful for specifying which headers apply\n\ +to each table cell, enabling non-visual browsers to provide\n\ +a meaningful context for each cell.\n\n + +badaccess_missing_image_alt=The alt attribute should be used to give a short description\n\ +of an image; longer descriptions should be given with the\n\ +longdesc attribute which takes a URL linked to the description.\n\ +These measures are needed for people using non-graphical browsers.\n\n + +badaccess_missing_image_map=Use client-side image maps in preference to server-side image\n\ +maps as the latter are inaccessible to people using non-\n\ +graphical browsers. In addition, client-side maps are easier\n\ +to set up and provide immediate feedback to users.\n\n + +badaccess_missing_link_alt=For hypertext links defined using a client-side image map, you\n\ +need to use the alt attribute to provide a textual description\n\ +of the link for people using non-graphical browsers.\n\n + +badaccess_frames=Pages designed using frames presents problems for\n\ +people who are either blind or using a browser that\n\ +doesn't support frames. A frames-based page should always\n\ +include an alternative layout inside a NOFRAMES element.\n\n + +badaccess_summary=For further advice on how to make your pages accessible\n\ +see "{0}". You may also want to try\n\ +"http://www.cast.org/bobby/" which is a free Web-based\n\ +service for checking URLs for accessibility.\n\n + +badlayout_using_layer=The Cascading Style Sheets (CSS) Positioning mechanism\n\ +is recommended in preference to the proprietary <LAYER>\n\ +element due to limited vendor support for LAYER.\n\n + +badlayout_using_spacer=You are recommended to use CSS for controlling white\n\ +space (e.g. for indentation, margins and line spacing).\n\ +The proprietary <SPACER> element has limited vendor support.\n\n + +badlayout_using_font=You are recommended to use CSS to specify the font and\n\ +properties such as its size and color. This will reduce\n\ +the size of HTML files and make them easier maintain\n\ +compared with using <FONT> elements.\n\n + +badlayout_using_nobr=You are recommended to use CSS to control line wrapping.\n\ +Use \"white-space: nowrap\" to inhibit wrapping in place\n\ +of inserting <NOBR>...</NOBR> into the markup.\n\n + +badlayout_using_body=You are recommended to use CSS to specify page and link colors\n\n + +unrecognized_option=unrecognized option -{0} use -help to list options +unknown_file={0}: can''t open file "{1}" +unknown_option=Warning - unknown option: {0} +bad_argument=Warning - missing or malformed argument for option: {0} + +needs_author_intervention=This document has errors that must be fixed before\n\ +using HTML Tidy to generate a tidied up version.\n\n + +missing_body=Can't create slides - document is missing a body element. +slides_found={0,number} Slides found + +general_info=HTML & CSS specifications are available from http://www.w3.org/\n\ +To learn more about Tidy see http://www.w3.org/People/Raggett/tidy/\n\ +Please send bug reports to Dave Raggett care of <html-tidy@w3.org>\n\ +Lobby your company to join W3C, see http://www.w3.org/Consortium\n + +hello_message=\nTidy (vers {0}) Parsing "{1}" + +report_version=\n{0}: Document content looks like {1} + +doctype_given=\n{0}: Doctype given is " + +num_warnings={0,number} warnings/errors were found!\n +no_warnings=no warnings or errors were found\n + +help_text={0}: file1 file2 ...\n\ +Utility to clean up & pretty print html files\n\ +see http://www.w3.org/People/Raggett/tidy/\n\ +options for tidy released on {1}\n\ +\n\ +Processing directives\n\ +--------------------\n\ + -indent or -i indent element content\n\ + -omit or -o omit optional endtags\n\ + -wrap 72 wrap text at column 72 (default is 68)\n\ + -upper or -u force tags to upper case (default is lower)\n\ + -clean or -c replace font, nobr & center tags by CSS\n\ + -numeric or -n output numeric rather than named entities\n\ + -errors or -e only show errors\n\ + -quiet or -q suppress nonessential output\n\ + -xml use this when input is wellformed xml\n\ + -asxml to convert html to wellformed xml\n\ + -slides to burst into slides on h2 elements\n\ +\n\ +Character encodings\n\ +------------------\n\ + -raw leave chars > 128 unchanged upon output\n\ + -ascii use ASCII for output, Latin-1 for input\n\ + -latin1 use Latin-1 for both input and output\n\ + -iso2022 use ISO2022 for both input and output\n\ + -utf8 use UTF-8 for both input and output\n\ + -mac use the Apple MacRoman character set\n\ +\n\ +File manipulation\n\ +---------------\n\ + -config <file> set options from config file\n\ + -f <file> write errors to named <file>\n\ + -modify or -m to modify original files\n\ +\n\ +Miscellaneous\n\ +------------\n\ + -version or -v show version\n\ + -help or -h list command line options\n\ +You can also use --blah for any config file option blah\n\ +\n\ +Input/Output default to stdin/stdout respectively\n\ +Single letter options apart from -f may be combined\n\ +as in: tidy -f errs.txt -imu foo.html\n\ +For further info on HTML see http://www.w3.org/MarkUp\n\ +\n + +bad_tree=\nPanic - tree has lost its integrity\n diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/config.txt b/net.sourceforge.phpeclipse/src/org/w3c/tidy/config.txt new file mode 100644 index 0000000..b1de207 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/config.txt @@ -0,0 +1,20 @@ +# sample config file for Java HTML tidy + +indent=auto +indent-spaces=2 +wrap=72 +markup=yes +clean=yes +output-xml=no +input-xml=no +show-warnings=yes +numeric-entities=yes +quote-marks=yes +quote-nbsp=yes +quote-ampersand=no +break-before-br=no +uppercase-tags=yes +uppercase-attributes=yes +smart-indent=no +output-xhtml=yes +char-encoding=latin1