intial version

[phpeclipse.git] / archive / net.sourceforge.phpeclipse.jtidy / src / net / sourceforge / phpdt / tidy / w3c / Configuration.java
diff --git a/archive/net.sourceforge.phpeclipse.jtidy/src/net/sourceforge/phpdt/tidy/w3c/Configuration.java b/archive/net.sourceforge.phpeclipse.jtidy/src/net/sourceforge/phpdt/tidy/w3c/Configuration.java

new file mode 100644 (file)

index 0000000..270fb8c
--- /dev/null
+++ b/archive/net.sourceforge.phpeclipse.jtidy/src/net/sourceforge/phpdt/tidy/w3c/Configuration.java
@@ -0,0 +1,611 @@
+/*
+ * @(#)Configuration.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Read configuration file and manage configuration properties.
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+  Configuration files associate a property name with a value.
+  The format is that of a Java .properties file.
+*/
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Enumeration;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+public class Configuration implements java.io.Serializable {
+
+    /* character encodings */
+    public static final int RAW         = 0;
+    public static final int ASCII       = 1;
+    public static final int LATIN1      = 2;
+    public static final int UTF8        = 3;
+    public static final int ISO2022     = 4;
+    public static final int MACROMAN    = 5;
+
+    /* mode controlling treatment of doctype */
+    public static final int DOCTYPE_OMIT  = 0;
+    public static final int DOCTYPE_AUTO  = 1;
+    public static final int DOCTYPE_STRICT= 2;
+    public static final int DOCTYPE_LOOSE = 3;
+    public static final int DOCTYPE_USER  = 4;
+
+    protected int spaces =  2;           /* default indentation */
+    protected int wraplen = 68;          /* default wrap margin */
+    protected int CharEncoding = ASCII;
+    protected int tabsize = 4;
+
+    protected int     docTypeMode = DOCTYPE_AUTO; /* see doctype property */
+    public String  altText = null;      /* default text for alt attribute */
+    protected String  slidestyle = null;    /* style sheet for slides */
+    protected String  docTypeStr = null;    /* user specified doctype */
+    protected String  errfile = null;       /* file name to write errors to */
+    protected boolean writeback = false;        /* if true then output tidied markup */
+
+    public boolean OnlyErrors = false;       /* if true normal output is suppressed */
+    public boolean ShowWarnings = true;      /* however errors are always shown */
+    public boolean Quiet = false;            /* no 'Parsing X', guessed DTD or summary */
+    public boolean IndentContent = false;    /* indent content of appropriate tags */
+    public boolean SmartIndent = false;      /* does text/block level content effect indentation */
+    public boolean HideEndTags = false;      /* suppress optional end tags */
+    public boolean XmlTags = false;          /* treat input as XML */
+    public boolean XmlOut = false;           /* create output as XML */
+    public boolean xHTML = false;            /* output extensible HTML */
+    public boolean XmlPi = false;            /* add <?xml?> for XML docs */
+    public boolean RawOut = false;           /* avoid mapping values > 127 to entities */
+    public boolean UpperCaseTags = false;    /* output tags in upper not lower case */
+    public boolean UpperCaseAttrs = false;   /* output attributes in upper not lower case */
+    public boolean MakeClean = false;        /* remove presentational clutter */
+    public boolean LogicalEmphasis = false;  /* replace i by em and b by strong */
+    public boolean DropFontTags = false;     /* discard presentation tags */
+    public boolean DropEmptyParas = true;    /* discard empty p elements */
+    public boolean FixComments = true;       /* fix comments with adjacent hyphens */
+    public boolean BreakBeforeBR = false;    /* o/p newline before <br> or not? */
+    public boolean BurstSlides = false;      /* create slides on each h2 element */
+    public boolean NumEntities = false;      /* use numeric entities */
+    public boolean QuoteMarks = false;       /* output " marks as &quot; */
+    public boolean QuoteNbsp = true;         /* output non-breaking space as entity */
+    public boolean QuoteAmpersand = true;    /* output naked ampersand as &amp; */
+    public boolean WrapAttVals = false;      /* wrap within attribute values */
+    public boolean WrapScriptlets = false;   /* wrap within JavaScript string literals */
+    public boolean WrapSection = true;       /* wrap within <![ ... ]> section tags */
+    public boolean WrapAsp = true;           /* wrap within ASP pseudo elements */
+    public boolean WrapJste = true;          /* wrap within JSTE pseudo elements */
+    public boolean WrapPhp = true;           /* wrap within PHP pseudo elements */
+    public boolean FixBackslash = true;      /* fix URLs by replacing \ with / */
+    public boolean IndentAttributes = false; /* newline+indent before each attribute */
+    public boolean XmlPIs = false;           /* if set to yes PIs must end with ?> */
+    public boolean XmlSpace = false;         /* if set to yes adds xml:space attr as needed */
+    public boolean EncloseBodyText = false;  /* if yes text at body is wrapped in <p>'s */
+    public boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
+    public boolean KeepFileTimes = true;     /* if yes last modied time is preserved */
+    public boolean Word2000 = false;         /* draconian cleaning for Word2000 */
+    public boolean TidyMark = true;          /* add meta element indicating tidied doc */
+    public boolean Emacs = false;            /* if true format error output for GNU Emacs */
+    public boolean LiteralAttribs = false;   /* if true attributes may use newlines */
+
+    // gschadow patch start
+    /** Remove all scripting XML tags (ASP, JSP, PHP,...) */
+    public boolean DropPseudoXMLCrap = false;
+    // gschadow patch end
+    
+    protected TagTable tt;                      /* TagTable associated with this Configuration */
+
+    private transient Properties _properties = new Properties();
+
+    public Configuration()
+    {
+    }
+
+    public void addProps( Properties p )
+    {
+        Enumeration enum = p.propertyNames();
+        while (enum.hasMoreElements())
+        {
+            String key = (String) enum.nextElement();
+            String value = p.getProperty(key);
+            _properties.put(key, value);
+        }
+        parseProps();
+    }
+
+    public void parseFile( String filename )
+    {
+        try
+        {
+            _properties.load( new FileInputStream( filename ) );
+        }
+        catch (IOException e)
+        {
+            System.err.println(filename + e.toString());
+            return;
+        }
+        parseProps();
+    }
+
+    private void parseProps()
+    {
+        String value;
+
+        value = _properties.getProperty("indent-spaces");
+        if (value != null)
+            spaces = parseInt(value, "indent-spaces");
+
+        value = _properties.getProperty("wrap");
+        if (value != null)
+            wraplen = parseInt(value, "wrap");
+
+        value = _properties.getProperty("wrap-attributes");
+        if (value != null)
+            WrapAttVals = parseBool(value, "wrap-attributes");
+
+        value = _properties.getProperty("wrap-script-literals");
+        if (value != null)
+            WrapScriptlets = parseBool(value, "wrap-script-literals");
+
+        value = _properties.getProperty("wrap-sections");
+        if (value != null)
+            WrapSection = parseBool(value, "wrap-sections");
+
+        value = _properties.getProperty("wrap-asp");
+        if (value != null)
+            WrapAsp = parseBool(value, "wrap-asp");
+
+        value = _properties.getProperty("wrap-jste");
+        if (value != null)
+            WrapJste = parseBool(value, "wrap-jste");
+
+        value = _properties.getProperty("wrap-php");
+        if (value != null)
+            WrapPhp = parseBool(value, "wrap-php");
+
+        value = _properties.getProperty("literal-attributes");
+        if (value != null)
+            LiteralAttribs = parseBool(value, "literal-attributes");
+
+        value = _properties.getProperty("tab-size");
+        if (value != null)
+            tabsize = parseInt(value, "tab-size");
+
+        value = _properties.getProperty("markup");
+        if (value != null)
+            OnlyErrors = parseInvBool(value, "markup");
+
+        value = _properties.getProperty("quiet");
+        if (value != null)
+            Quiet = parseBool(value, "quiet");
+
+        value = _properties.getProperty("tidy-mark");
+        if (value != null)
+            TidyMark = parseBool(value, "tidy-mark");
+
+        value = _properties.getProperty("indent");
+        if (value != null)
+            IndentContent = parseIndent(value, "indent");
+
+        value = _properties.getProperty("indent-attributes");
+        if (value != null)
+            IndentAttributes = parseBool(value, "ident-attributes");
+
+        value = _properties.getProperty("hide-endtags");
+        if (value != null)
+            HideEndTags = parseBool(value, "hide-endtags");
+
+        value = _properties.getProperty("input-xml");
+        if (value != null)
+            XmlTags = parseBool(value, "input-xml");
+
+        value = _properties.getProperty("output-xml");
+        if (value != null)
+            XmlOut = parseBool(value, "output-xml");
+
+        value = _properties.getProperty("output-xhtml");
+        if (value != null)
+            xHTML = parseBool(value, "output-xhtml");
+
+        value = _properties.getProperty("add-xml-pi");
+        if (value != null)
+            XmlPi = parseBool(value, "add-xml-pi");
+
+        value = _properties.getProperty("add-xml-decl");
+        if (value != null)
+            XmlPi = parseBool(value, "add-xml-decl");
+
+        value = _properties.getProperty("assume-xml-procins");
+        if (value != null)
+            XmlPIs = parseBool(value, "assume-xml-procins");
+
+        value = _properties.getProperty("raw");
+        if (value != null)
+            RawOut = parseBool(value, "raw");
+
+        value = _properties.getProperty("uppercase-tags");
+        if (value != null)
+            UpperCaseTags = parseBool(value, "uppercase-tags");
+
+        value = _properties.getProperty("uppercase-attributes");
+        if (value != null)
+            UpperCaseAttrs = parseBool(value, "uppercase-attributes");
+
+        value = _properties.getProperty("clean");
+        if (value != null)
+            MakeClean = parseBool(value, "clean");
+
+        value = _properties.getProperty("logical-emphasis");
+        if (value != null)
+            LogicalEmphasis = parseBool(value, "logical-emphasis");
+
+        value = _properties.getProperty("word-2000");
+        if (value != null)
+            Word2000 = parseBool(value, "word-2000");
+
+        value = _properties.getProperty("drop-empty-paras");
+        if (value != null)
+            DropEmptyParas = parseBool(value, "drop-empty-paras");
+
+        value = _properties.getProperty("drop-font-tags");
+        if (value != null)
+            DropFontTags = parseBool(value, "drop-font-tags");
+
+        //gschadow patch start
+        value = _properties.getProperty("drop-pseudo-xml-crap");
+        if (value != null)
+            DropPseudoXMLCrap = parseBool(value, "drop-pseudo-xml-crap");
+        //gschadow patch end
+
+        value = _properties.getProperty("enclose-text");
+        if (value != null)
+            EncloseBodyText = parseBool(value, "enclose-text");
+
+        value = _properties.getProperty("enclose-block-text");
+        if (value != null)
+            EncloseBlockText = parseBool(value, "enclose-block-text");
+
+        value = _properties.getProperty("alt-text");
+        if (value != null)
+            altText = value;
+
+        value = _properties.getProperty("add-xml-space");
+        if (value != null)
+            XmlSpace = parseBool(value, "add-xml-space");
+
+        value = _properties.getProperty("fix-bad-comments");
+        if (value != null)
+            FixComments = parseBool(value, "fix-bad-comments");
+
+        value = _properties.getProperty("split");
+        if (value != null)
+            BurstSlides = parseBool(value, "split");
+
+        value = _properties.getProperty("break-before-br");
+        if (value != null)
+            BreakBeforeBR = parseBool(value, "break-before-br");
+
+        value = _properties.getProperty("numeric-entities");
+        if (value != null)
+            NumEntities = parseBool(value, "numeric-entities");
+
+        value = _properties.getProperty("quote-marks");
+        if (value != null)
+            QuoteMarks = parseBool(value, "quote-marks");
+
+        value = _properties.getProperty("quote-nbsp");
+        if (value != null)
+            QuoteNbsp = parseBool(value, "quote-nbsp");
+
+        value = _properties.getProperty("quote-ampersand");
+        if (value != null)
+            QuoteAmpersand = parseBool(value, "quote-ampersand");
+
+        value = _properties.getProperty("write-back");
+        if (value != null)
+            writeback = parseBool(value, "write-back");
+
+        value = _properties.getProperty("keep-time");
+        if (value != null)
+            KeepFileTimes = parseBool(value, "keep-time");
+
+        value = _properties.getProperty("show-warnings");
+        if (value != null)
+            ShowWarnings = parseBool(value, "show-warnings");
+
+        value = _properties.getProperty("error-file");
+        if (value != null)
+            errfile = parseName(value, "error-file");
+
+        value = _properties.getProperty("slide-style");
+        if (value != null)
+            slidestyle = parseName(value, "slide-style");
+
+        value = _properties.getProperty("new-inline-tags");
+        if (value != null)
+            parseInlineTagNames(value, "new-inline-tags");
+
+        value = _properties.getProperty("new-blocklevel-tags");
+        if (value != null)
+            parseBlockTagNames(value, "new-blocklevel-tags");
+
+        value = _properties.getProperty("new-empty-tags");
+        if (value != null)
+            parseEmptyTagNames(value, "new-empty-tags");
+
+        value = _properties.getProperty("new-pre-tags");
+        if (value != null)
+            parsePreTagNames(value, "new-pre-tags");
+
+        value = _properties.getProperty("char-encoding");
+        if (value != null)
+            CharEncoding = parseCharEncoding(value, "char-encoding");
+
+        value = _properties.getProperty("doctype");
+        if (value != null)
+            docTypeStr = parseDocType(value, "doctype");
+
+        value = _properties.getProperty("fix-backslash");
+        if (value != null)
+            FixBackslash = parseBool(value, "fix-backslash");
+
+        value = _properties.getProperty("gnu-emacs");
+        if (value != null)
+            Emacs = parseBool(value, "gnu-emacs");
+    }
+
+    /* ensure that config is self consistent */
+    public void adjust()
+    {
+        if (EncloseBlockText)
+            EncloseBodyText = true;
+
+        /* avoid the need to set IndentContent when SmartIndent is set */
+
+        if (SmartIndent)
+            IndentContent = true;
+
+        /* disable wrapping */
+        if (wraplen == 0)
+            wraplen = 0x7FFFFFFF;
+
+        /* Word 2000 needs o:p to be declared as inline */
+        if (Word2000)
+        {
+            tt.defineInlineTag("o:p");
+        }
+
+        /* XHTML is written in lower case */
+        if (xHTML)
+        {
+            XmlOut = true;
+            UpperCaseTags = false;
+            UpperCaseAttrs = false;
+        }
+
+        /* if XML in, then XML out */
+        if (XmlTags)
+        {
+            XmlOut = true;
+            XmlPIs = true;
+        }
+
+        /* XML requires end tags */
+        if (XmlOut)
+        {
+            QuoteAmpersand = true;
+            HideEndTags = false;
+        }
+    }
+
+    private static int parseInt( String s, String option )
+    {
+        int i = 0;
+        try {
+            i = Integer.parseInt( s );
+        }
+        catch ( NumberFormatException e ) {
+            Report.badArgument(option);
+            i = -1;
+        }
+        return i;
+    }
+
+    private static boolean parseBool( String s, String option )
+    {
+        boolean b = false;
+        if ( s != null && s.length() > 0 ) {
+            char c = s.charAt(0);
+            if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
+                b = true;
+            else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
+                b = false;
+            else
+                Report.badArgument(option);
+        }
+        return b;
+    }
+
+    private static boolean parseInvBool( String s, String option )
+    {
+        boolean b = false;
+        if ( s != null && s.length() > 0 ) {
+            char c = s.charAt(0);
+            if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
+                b = true;
+            else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
+                b = false;
+            else
+                Report.badArgument(option);
+        }
+        return !b;
+    }
+
+    private static String parseName( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s );
+        String rs = null;
+        if ( t.countTokens() >= 1 )
+            rs = t.nextToken();
+        else
+            Report.badArgument(option);
+        return rs;
+    }
+
+    private static int parseCharEncoding( String s, String option )
+    {
+        int result = ASCII;
+
+        if (Lexer.wstrcasecmp(s, "ascii") == 0)
+            result = ASCII;
+        else if (Lexer.wstrcasecmp(s, "latin1") == 0)
+            result = LATIN1;
+        else if (Lexer.wstrcasecmp(s, "raw") == 0)
+            result = RAW;
+        else if (Lexer.wstrcasecmp(s, "utf8") == 0)
+            result = UTF8;
+        else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
+            result = ISO2022;
+        else if (Lexer.wstrcasecmp(s, "mac") == 0)
+            result = MACROMAN;
+        else
+            Report.badArgument(option);
+
+        return result;
+    }
+
+    /* slight hack to avoid changes to pprint.c */
+    private boolean parseIndent( String s, String option )
+    {
+        boolean b = IndentContent;
+
+        if (Lexer.wstrcasecmp(s, "yes") == 0)
+        {
+            b = true;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "true") == 0)
+        {
+            b = true;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "no") == 0)
+        {
+            b = false;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "false") == 0)
+        {
+            b = false;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "auto") == 0)
+        {
+            b = true;
+            SmartIndent = true;
+        }
+        else
+            Report.badArgument(option);
+        return b;
+    }
+
+    public void parseInlineTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.defineInlineTag( t.nextToken() );
+        }
+    }
+
+    public void parseBlockTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.defineBlockTag( t.nextToken() );
+        }
+    }
+
+    public void parseEmptyTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.defineEmptyTag( t.nextToken() );
+        }
+    }
+
+    public void parsePreTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.definePreTag( t.nextToken() );
+        }
+    }
+
+    /*
+       doctype: omit | auto | strict | loose | <fpi>
+
+       where the fpi is a string similar to
+
+          "-//ACME//DTD HTML 3.14159//EN"
+    */
+    protected String parseDocType( String s, String option )
+    {
+        s = s.trim();
+
+        /* "-//ACME//DTD HTML 3.14159//EN" or similar */
+
+        if (s.startsWith("\""))
+        {
+            docTypeMode = DOCTYPE_USER;
+            return s;
+        }
+
+        /* read first word */
+        String word = "";
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        if (t.hasMoreTokens())
+            word = t.nextToken();
+
+        if (Lexer.wstrcasecmp(word, "omit") == 0)
+            docTypeMode = DOCTYPE_OMIT;
+        else if (Lexer.wstrcasecmp(word, "strict") == 0)
+            docTypeMode = DOCTYPE_STRICT;
+        else if (Lexer.wstrcasecmp(word, "loose") == 0 ||
+                 Lexer.wstrcasecmp(word, "transitional") == 0)
+            docTypeMode = DOCTYPE_LOOSE;
+        else if (Lexer.wstrcasecmp(word, "auto") == 0)
+            docTypeMode = DOCTYPE_AUTO;
+        else
+        {
+            docTypeMode = DOCTYPE_AUTO;
+            Report.badArgument(option);
+        }
+        return null;
+    }
+
+}