--- /dev/null
+/*
+ * @(#)Configuration.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Read configuration file and manage configuration properties.
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+ Configuration files associate a property name with a value.
+ The format is that of a Java .properties file.
+*/
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Enumeration;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+public class Configuration implements java.io.Serializable {
+
+ /* character encodings */
+ public static final int RAW = 0;
+ public static final int ASCII = 1;
+ public static final int LATIN1 = 2;
+ public static final int UTF8 = 3;
+ public static final int ISO2022 = 4;
+ public static final int MACROMAN = 5;
+
+ /* mode controlling treatment of doctype */
+ public static final int DOCTYPE_OMIT = 0;
+ public static final int DOCTYPE_AUTO = 1;
+ public static final int DOCTYPE_STRICT= 2;
+ public static final int DOCTYPE_LOOSE = 3;
+ public static final int DOCTYPE_USER = 4;
+
+ protected int spaces = 2; /* default indentation */
+ protected int wraplen = 68; /* default wrap margin */
+ protected int CharEncoding = ASCII;
+ protected int tabsize = 4;
+
+ protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
+ public String altText = null; /* default text for alt attribute */
+ protected String slidestyle = null; /* style sheet for slides */
+ protected String docTypeStr = null; /* user specified doctype */
+ protected String errfile = null; /* file name to write errors to */
+ protected boolean writeback = false; /* if true then output tidied markup */
+
+ public boolean OnlyErrors = false; /* if true normal output is suppressed */
+ public boolean ShowWarnings = true; /* however errors are always shown */
+ public boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
+ public boolean IndentContent = false; /* indent content of appropriate tags */
+ public boolean SmartIndent = false; /* does text/block level content effect indentation */
+ public boolean HideEndTags = false; /* suppress optional end tags */
+ public boolean XmlTags = false; /* treat input as XML */
+ public boolean XmlOut = false; /* create output as XML */
+ public boolean xHTML = false; /* output extensible HTML */
+ public boolean XmlPi = false; /* add <?xml?> for XML docs */
+ public boolean RawOut = false; /* avoid mapping values > 127 to entities */
+ public boolean UpperCaseTags = false; /* output tags in upper not lower case */
+ public boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
+ public boolean MakeClean = false; /* remove presentational clutter */
+ public boolean LogicalEmphasis = false; /* replace i by em and b by strong */
+ public boolean DropFontTags = false; /* discard presentation tags */
+ public boolean DropEmptyParas = true; /* discard empty p elements */
+ public boolean FixComments = true; /* fix comments with adjacent hyphens */
+ public boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
+ public boolean BurstSlides = false; /* create slides on each h2 element */
+ public boolean NumEntities = false; /* use numeric entities */
+ public boolean QuoteMarks = false; /* output " marks as " */
+ public boolean QuoteNbsp = true; /* output non-breaking space as entity */
+ public boolean QuoteAmpersand = true; /* output naked ampersand as & */
+ public boolean WrapAttVals = false; /* wrap within attribute values */
+ public boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
+ public boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
+ public boolean WrapAsp = true; /* wrap within ASP pseudo elements */
+ public boolean WrapJste = true; /* wrap within JSTE pseudo elements */
+ public boolean WrapPhp = true; /* wrap within PHP pseudo elements */
+ public boolean FixBackslash = true; /* fix URLs by replacing \ with / */
+ public boolean IndentAttributes = false; /* newline+indent before each attribute */
+ public boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
+ public boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
+ public boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
+ public boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
+ public boolean KeepFileTimes = true; /* if yes last modied time is preserved */
+ public boolean Word2000 = false; /* draconian cleaning for Word2000 */
+ public boolean TidyMark = true; /* add meta element indicating tidied doc */
+ public boolean Emacs = false; /* if true format error output for GNU Emacs */
+ public boolean LiteralAttribs = false; /* if true attributes may use newlines */
+
+ // gschadow patch start
+ /** Remove all scripting XML tags (ASP, JSP, PHP,...) */
+ public boolean DropPseudoXMLCrap = false;
+ // gschadow patch end
+
+ protected TagTable tt; /* TagTable associated with this Configuration */
+
+ private transient Properties _properties = new Properties();
+
+ public Configuration()
+ {
+ }
+
+ public void addProps( Properties p )
+ {
+ Enumeration enum = p.propertyNames();
+ while (enum.hasMoreElements())
+ {
+ String key = (String) enum.nextElement();
+ String value = p.getProperty(key);
+ _properties.put(key, value);
+ }
+ parseProps();
+ }
+
+ public void parseFile( String filename )
+ {
+ try
+ {
+ _properties.load( new FileInputStream( filename ) );
+ }
+ catch (IOException e)
+ {
+ System.err.println(filename + e.toString());
+ return;
+ }
+ parseProps();
+ }
+
+ private void parseProps()
+ {
+ String value;
+
+ value = _properties.getProperty("indent-spaces");
+ if (value != null)
+ spaces = parseInt(value, "indent-spaces");
+
+ value = _properties.getProperty("wrap");
+ if (value != null)
+ wraplen = parseInt(value, "wrap");
+
+ value = _properties.getProperty("wrap-attributes");
+ if (value != null)
+ WrapAttVals = parseBool(value, "wrap-attributes");
+
+ value = _properties.getProperty("wrap-script-literals");
+ if (value != null)
+ WrapScriptlets = parseBool(value, "wrap-script-literals");
+
+ value = _properties.getProperty("wrap-sections");
+ if (value != null)
+ WrapSection = parseBool(value, "wrap-sections");
+
+ value = _properties.getProperty("wrap-asp");
+ if (value != null)
+ WrapAsp = parseBool(value, "wrap-asp");
+
+ value = _properties.getProperty("wrap-jste");
+ if (value != null)
+ WrapJste = parseBool(value, "wrap-jste");
+
+ value = _properties.getProperty("wrap-php");
+ if (value != null)
+ WrapPhp = parseBool(value, "wrap-php");
+
+ value = _properties.getProperty("literal-attributes");
+ if (value != null)
+ LiteralAttribs = parseBool(value, "literal-attributes");
+
+ value = _properties.getProperty("tab-size");
+ if (value != null)
+ tabsize = parseInt(value, "tab-size");
+
+ value = _properties.getProperty("markup");
+ if (value != null)
+ OnlyErrors = parseInvBool(value, "markup");
+
+ value = _properties.getProperty("quiet");
+ if (value != null)
+ Quiet = parseBool(value, "quiet");
+
+ value = _properties.getProperty("tidy-mark");
+ if (value != null)
+ TidyMark = parseBool(value, "tidy-mark");
+
+ value = _properties.getProperty("indent");
+ if (value != null)
+ IndentContent = parseIndent(value, "indent");
+
+ value = _properties.getProperty("indent-attributes");
+ if (value != null)
+ IndentAttributes = parseBool(value, "ident-attributes");
+
+ value = _properties.getProperty("hide-endtags");
+ if (value != null)
+ HideEndTags = parseBool(value, "hide-endtags");
+
+ value = _properties.getProperty("input-xml");
+ if (value != null)
+ XmlTags = parseBool(value, "input-xml");
+
+ value = _properties.getProperty("output-xml");
+ if (value != null)
+ XmlOut = parseBool(value, "output-xml");
+
+ value = _properties.getProperty("output-xhtml");
+ if (value != null)
+ xHTML = parseBool(value, "output-xhtml");
+
+ value = _properties.getProperty("add-xml-pi");
+ if (value != null)
+ XmlPi = parseBool(value, "add-xml-pi");
+
+ value = _properties.getProperty("add-xml-decl");
+ if (value != null)
+ XmlPi = parseBool(value, "add-xml-decl");
+
+ value = _properties.getProperty("assume-xml-procins");
+ if (value != null)
+ XmlPIs = parseBool(value, "assume-xml-procins");
+
+ value = _properties.getProperty("raw");
+ if (value != null)
+ RawOut = parseBool(value, "raw");
+
+ value = _properties.getProperty("uppercase-tags");
+ if (value != null)
+ UpperCaseTags = parseBool(value, "uppercase-tags");
+
+ value = _properties.getProperty("uppercase-attributes");
+ if (value != null)
+ UpperCaseAttrs = parseBool(value, "uppercase-attributes");
+
+ value = _properties.getProperty("clean");
+ if (value != null)
+ MakeClean = parseBool(value, "clean");
+
+ value = _properties.getProperty("logical-emphasis");
+ if (value != null)
+ LogicalEmphasis = parseBool(value, "logical-emphasis");
+
+ value = _properties.getProperty("word-2000");
+ if (value != null)
+ Word2000 = parseBool(value, "word-2000");
+
+ value = _properties.getProperty("drop-empty-paras");
+ if (value != null)
+ DropEmptyParas = parseBool(value, "drop-empty-paras");
+
+ value = _properties.getProperty("drop-font-tags");
+ if (value != null)
+ DropFontTags = parseBool(value, "drop-font-tags");
+
+ //gschadow patch start
+ value = _properties.getProperty("drop-pseudo-xml-crap");
+ if (value != null)
+ DropPseudoXMLCrap = parseBool(value, "drop-pseudo-xml-crap");
+ //gschadow patch end
+
+ value = _properties.getProperty("enclose-text");
+ if (value != null)
+ EncloseBodyText = parseBool(value, "enclose-text");
+
+ value = _properties.getProperty("enclose-block-text");
+ if (value != null)
+ EncloseBlockText = parseBool(value, "enclose-block-text");
+
+ value = _properties.getProperty("alt-text");
+ if (value != null)
+ altText = value;
+
+ value = _properties.getProperty("add-xml-space");
+ if (value != null)
+ XmlSpace = parseBool(value, "add-xml-space");
+
+ value = _properties.getProperty("fix-bad-comments");
+ if (value != null)
+ FixComments = parseBool(value, "fix-bad-comments");
+
+ value = _properties.getProperty("split");
+ if (value != null)
+ BurstSlides = parseBool(value, "split");
+
+ value = _properties.getProperty("break-before-br");
+ if (value != null)
+ BreakBeforeBR = parseBool(value, "break-before-br");
+
+ value = _properties.getProperty("numeric-entities");
+ if (value != null)
+ NumEntities = parseBool(value, "numeric-entities");
+
+ value = _properties.getProperty("quote-marks");
+ if (value != null)
+ QuoteMarks = parseBool(value, "quote-marks");
+
+ value = _properties.getProperty("quote-nbsp");
+ if (value != null)
+ QuoteNbsp = parseBool(value, "quote-nbsp");
+
+ value = _properties.getProperty("quote-ampersand");
+ if (value != null)
+ QuoteAmpersand = parseBool(value, "quote-ampersand");
+
+ value = _properties.getProperty("write-back");
+ if (value != null)
+ writeback = parseBool(value, "write-back");
+
+ value = _properties.getProperty("keep-time");
+ if (value != null)
+ KeepFileTimes = parseBool(value, "keep-time");
+
+ value = _properties.getProperty("show-warnings");
+ if (value != null)
+ ShowWarnings = parseBool(value, "show-warnings");
+
+ value = _properties.getProperty("error-file");
+ if (value != null)
+ errfile = parseName(value, "error-file");
+
+ value = _properties.getProperty("slide-style");
+ if (value != null)
+ slidestyle = parseName(value, "slide-style");
+
+ value = _properties.getProperty("new-inline-tags");
+ if (value != null)
+ parseInlineTagNames(value, "new-inline-tags");
+
+ value = _properties.getProperty("new-blocklevel-tags");
+ if (value != null)
+ parseBlockTagNames(value, "new-blocklevel-tags");
+
+ value = _properties.getProperty("new-empty-tags");
+ if (value != null)
+ parseEmptyTagNames(value, "new-empty-tags");
+
+ value = _properties.getProperty("new-pre-tags");
+ if (value != null)
+ parsePreTagNames(value, "new-pre-tags");
+
+ value = _properties.getProperty("char-encoding");
+ if (value != null)
+ CharEncoding = parseCharEncoding(value, "char-encoding");
+
+ value = _properties.getProperty("doctype");
+ if (value != null)
+ docTypeStr = parseDocType(value, "doctype");
+
+ value = _properties.getProperty("fix-backslash");
+ if (value != null)
+ FixBackslash = parseBool(value, "fix-backslash");
+
+ value = _properties.getProperty("gnu-emacs");
+ if (value != null)
+ Emacs = parseBool(value, "gnu-emacs");
+ }
+
+ /* ensure that config is self consistent */
+ public void adjust()
+ {
+ if (EncloseBlockText)
+ EncloseBodyText = true;
+
+ /* avoid the need to set IndentContent when SmartIndent is set */
+
+ if (SmartIndent)
+ IndentContent = true;
+
+ /* disable wrapping */
+ if (wraplen == 0)
+ wraplen = 0x7FFFFFFF;
+
+ /* Word 2000 needs o:p to be declared as inline */
+ if (Word2000)
+ {
+ tt.defineInlineTag("o:p");
+ }
+
+ /* XHTML is written in lower case */
+ if (xHTML)
+ {
+ XmlOut = true;
+ UpperCaseTags = false;
+ UpperCaseAttrs = false;
+ }
+
+ /* if XML in, then XML out */
+ if (XmlTags)
+ {
+ XmlOut = true;
+ XmlPIs = true;
+ }
+
+ /* XML requires end tags */
+ if (XmlOut)
+ {
+ QuoteAmpersand = true;
+ HideEndTags = false;
+ }
+ }
+
+ private static int parseInt( String s, String option )
+ {
+ int i = 0;
+ try {
+ i = Integer.parseInt( s );
+ }
+ catch ( NumberFormatException e ) {
+ Report.badArgument(option);
+ i = -1;
+ }
+ return i;
+ }
+
+ private static boolean parseBool( String s, String option )
+ {
+ boolean b = false;
+ if ( s != null && s.length() > 0 ) {
+ char c = s.charAt(0);
+ if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
+ b = true;
+ else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
+ b = false;
+ else
+ Report.badArgument(option);
+ }
+ return b;
+ }
+
+ private static boolean parseInvBool( String s, String option )
+ {
+ boolean b = false;
+ if ( s != null && s.length() > 0 ) {
+ char c = s.charAt(0);
+ if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
+ b = true;
+ else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
+ b = false;
+ else
+ Report.badArgument(option);
+ }
+ return !b;
+ }
+
+ private static String parseName( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s );
+ String rs = null;
+ if ( t.countTokens() >= 1 )
+ rs = t.nextToken();
+ else
+ Report.badArgument(option);
+ return rs;
+ }
+
+ private static int parseCharEncoding( String s, String option )
+ {
+ int result = ASCII;
+
+ if (Lexer.wstrcasecmp(s, "ascii") == 0)
+ result = ASCII;
+ else if (Lexer.wstrcasecmp(s, "latin1") == 0)
+ result = LATIN1;
+ else if (Lexer.wstrcasecmp(s, "raw") == 0)
+ result = RAW;
+ else if (Lexer.wstrcasecmp(s, "utf8") == 0)
+ result = UTF8;
+ else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
+ result = ISO2022;
+ else if (Lexer.wstrcasecmp(s, "mac") == 0)
+ result = MACROMAN;
+ else
+ Report.badArgument(option);
+
+ return result;
+ }
+
+ /* slight hack to avoid changes to pprint.c */
+ private boolean parseIndent( String s, String option )
+ {
+ boolean b = IndentContent;
+
+ if (Lexer.wstrcasecmp(s, "yes") == 0)
+ {
+ b = true;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "true") == 0)
+ {
+ b = true;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "no") == 0)
+ {
+ b = false;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "false") == 0)
+ {
+ b = false;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "auto") == 0)
+ {
+ b = true;
+ SmartIndent = true;
+ }
+ else
+ Report.badArgument(option);
+ return b;
+ }
+
+ public void parseInlineTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.defineInlineTag( t.nextToken() );
+ }
+ }
+
+ public void parseBlockTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.defineBlockTag( t.nextToken() );
+ }
+ }
+
+ public void parseEmptyTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.defineEmptyTag( t.nextToken() );
+ }
+ }
+
+ public void parsePreTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.definePreTag( t.nextToken() );
+ }
+ }
+
+ /*
+ doctype: omit | auto | strict | loose | <fpi>
+
+ where the fpi is a string similar to
+
+ "-//ACME//DTD HTML 3.14159//EN"
+ */
+ protected String parseDocType( String s, String option )
+ {
+ s = s.trim();
+
+ /* "-//ACME//DTD HTML 3.14159//EN" or similar */
+
+ if (s.startsWith("\""))
+ {
+ docTypeMode = DOCTYPE_USER;
+ return s;
+ }
+
+ /* read first word */
+ String word = "";
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ if (t.hasMoreTokens())
+ word = t.nextToken();
+
+ if (Lexer.wstrcasecmp(word, "omit") == 0)
+ docTypeMode = DOCTYPE_OMIT;
+ else if (Lexer.wstrcasecmp(word, "strict") == 0)
+ docTypeMode = DOCTYPE_STRICT;
+ else if (Lexer.wstrcasecmp(word, "loose") == 0 ||
+ Lexer.wstrcasecmp(word, "transitional") == 0)
+ docTypeMode = DOCTYPE_LOOSE;
+ else if (Lexer.wstrcasecmp(word, "auto") == 0)
+ docTypeMode = DOCTYPE_AUTO;
+ else
+ {
+ docTypeMode = DOCTYPE_AUTO;
+ Report.badArgument(option);
+ }
+ return null;
+ }
+
+}