2 * @(#)Configuration.java 1.11 2000/08/16
6 package net.sourceforge.phpdt.tidy;
10 * Read configuration file and manage configuration properties.
12 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13 * See Tidy.java for the copyright notice.
14 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15 * HTML Tidy Release 4 Aug 2000</a>
17 * @author Dave Raggett <dsr@w3.org>
18 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19 * @version 1.0, 1999/05/22
20 * @version 1.0.1, 1999/05/29
21 * @version 1.1, 1999/06/18 Java Bean
22 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24 * @version 1.4, 1999/09/04 DOM support
25 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
35 Configuration files associate a property name with a value.
36 The format is that of a Java .properties file.
39 import java.io.FileInputStream;
40 import java.io.IOException;
41 import java.util.Enumeration;
42 import java.util.Properties;
43 import java.util.StringTokenizer;
45 public class Configuration implements java.io.Serializable {
47 /* character encodings */
48 public static final int RAW = 0;
49 public static final int ASCII = 1;
50 public static final int LATIN1 = 2;
51 public static final int UTF8 = 3;
52 public static final int ISO2022 = 4;
53 public static final int MACROMAN = 5;
55 /* mode controlling treatment of doctype */
56 public static final int DOCTYPE_OMIT = 0;
57 public static final int DOCTYPE_AUTO = 1;
58 public static final int DOCTYPE_STRICT= 2;
59 public static final int DOCTYPE_LOOSE = 3;
60 public static final int DOCTYPE_USER = 4;
62 protected int spaces = 2; /* default indentation */
63 protected int wraplen = 68; /* default wrap margin */
64 protected int CharEncoding = ASCII;
65 protected int tabsize = 4;
67 protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
68 protected String altText = null; /* default text for alt attribute */
69 protected String slidestyle = null; /* style sheet for slides */
70 protected String docTypeStr = null; /* user specified doctype */
71 protected String errfile = null; /* file name to write errors to */
72 protected boolean writeback = false; /* if true then output tidied markup */
74 protected boolean OnlyErrors = false; /* if true normal output is suppressed */
75 protected boolean ShowWarnings = true; /* however errors are always shown */
76 protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
77 protected boolean IndentContent = false; /* indent content of appropriate tags */
78 protected boolean SmartIndent = false; /* does text/block level content effect indentation */
79 protected boolean HideEndTags = false; /* suppress optional end tags */
80 protected boolean XmlTags = false; /* treat input as XML */
81 protected boolean XmlOut = false; /* create output as XML */
82 protected boolean xHTML = false; /* output extensible HTML */
83 protected boolean XmlPi = false; /* add <?xml?> for XML docs */
84 protected boolean RawOut = false; /* avoid mapping values > 127 to entities */
85 protected boolean UpperCaseTags = false; /* output tags in upper not lower case */
86 protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
87 protected boolean MakeClean = false; /* remove presentational clutter */
88 protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */
89 protected boolean DropFontTags = false; /* discard presentation tags */
90 protected boolean DropEmptyParas = true; /* discard empty p elements */
91 protected boolean FixComments = true; /* fix comments with adjacent hyphens */
92 protected boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
93 protected boolean BurstSlides = false; /* create slides on each h2 element */
94 protected boolean NumEntities = false; /* use numeric entities */
95 protected boolean QuoteMarks = false; /* output " marks as " */
96 protected boolean QuoteNbsp = true; /* output non-breaking space as entity */
97 protected boolean QuoteAmpersand = true; /* output naked ampersand as & */
98 protected boolean WrapAttVals = false; /* wrap within attribute values */
99 protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
100 protected boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
101 protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */
102 protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */
103 protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */
104 protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */
105 protected boolean IndentAttributes = false; /* newline+indent before each attribute */
106 protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
107 protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
108 protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
109 protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
110 protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */
111 protected boolean Word2000 = false; /* draconian cleaning for Word2000 */
112 protected boolean TidyMark = true; /* add meta element indicating tidied doc */
113 protected boolean Emacs = false; /* if true format error output for GNU Emacs */
114 protected boolean LiteralAttribs = false; /* if true attributes may use newlines */
116 protected TagTable tt; /* TagTable associated with this Configuration */
118 private transient Properties _properties = new Properties();
120 public Configuration()
124 public void addProps( Properties p )
126 Enumeration enum = p.propertyNames();
127 while (enum.hasMoreElements())
129 String key = (String) enum.nextElement();
130 String value = p.getProperty(key);
131 _properties.put(key, value);
136 public void parseFile( String filename )
140 _properties.load( new FileInputStream( filename ) );
142 catch (IOException e)
144 System.err.println(filename + e.toString());
150 private void parseProps()
154 value = _properties.getProperty("indent-spaces");
156 spaces = parseInt(value, "indent-spaces");
158 value = _properties.getProperty("wrap");
160 wraplen = parseInt(value, "wrap");
162 value = _properties.getProperty("wrap-attributes");
164 WrapAttVals = parseBool(value, "wrap-attributes");
166 value = _properties.getProperty("wrap-script-literals");
168 WrapScriptlets = parseBool(value, "wrap-script-literals");
170 value = _properties.getProperty("wrap-sections");
172 WrapSection = parseBool(value, "wrap-sections");
174 value = _properties.getProperty("wrap-asp");
176 WrapAsp = parseBool(value, "wrap-asp");
178 value = _properties.getProperty("wrap-jste");
180 WrapJste = parseBool(value, "wrap-jste");
182 value = _properties.getProperty("wrap-php");
184 WrapPhp = parseBool(value, "wrap-php");
186 value = _properties.getProperty("literal-attributes");
188 LiteralAttribs = parseBool(value, "literal-attributes");
190 value = _properties.getProperty("tab-size");
192 tabsize = parseInt(value, "tab-size");
194 value = _properties.getProperty("markup");
196 OnlyErrors = parseInvBool(value, "markup");
198 value = _properties.getProperty("quiet");
200 Quiet = parseBool(value, "quiet");
202 value = _properties.getProperty("tidy-mark");
204 TidyMark = parseBool(value, "tidy-mark");
206 value = _properties.getProperty("indent");
208 IndentContent = parseIndent(value, "indent");
210 value = _properties.getProperty("indent-attributes");
212 IndentAttributes = parseBool(value, "ident-attributes");
214 value = _properties.getProperty("hide-endtags");
216 HideEndTags = parseBool(value, "hide-endtags");
218 value = _properties.getProperty("input-xml");
220 XmlTags = parseBool(value, "input-xml");
222 value = _properties.getProperty("output-xml");
224 XmlOut = parseBool(value, "output-xml");
226 value = _properties.getProperty("output-xhtml");
228 xHTML = parseBool(value, "output-xhtml");
230 value = _properties.getProperty("add-xml-pi");
232 XmlPi = parseBool(value, "add-xml-pi");
234 value = _properties.getProperty("add-xml-decl");
236 XmlPi = parseBool(value, "add-xml-decl");
238 value = _properties.getProperty("assume-xml-procins");
240 XmlPIs = parseBool(value, "assume-xml-procins");
242 value = _properties.getProperty("raw");
244 RawOut = parseBool(value, "raw");
246 value = _properties.getProperty("uppercase-tags");
248 UpperCaseTags = parseBool(value, "uppercase-tags");
250 value = _properties.getProperty("uppercase-attributes");
252 UpperCaseAttrs = parseBool(value, "uppercase-attributes");
254 value = _properties.getProperty("clean");
256 MakeClean = parseBool(value, "clean");
258 value = _properties.getProperty("logical-emphasis");
260 LogicalEmphasis = parseBool(value, "logical-emphasis");
262 value = _properties.getProperty("word-2000");
264 Word2000 = parseBool(value, "word-2000");
266 value = _properties.getProperty("drop-empty-paras");
268 DropEmptyParas = parseBool(value, "drop-empty-paras");
270 value = _properties.getProperty("drop-font-tags");
272 DropFontTags = parseBool(value, "drop-font-tags");
274 value = _properties.getProperty("enclose-text");
276 EncloseBodyText = parseBool(value, "enclose-text");
278 value = _properties.getProperty("enclose-block-text");
280 EncloseBlockText = parseBool(value, "enclose-block-text");
282 value = _properties.getProperty("alt-text");
286 value = _properties.getProperty("add-xml-space");
288 XmlSpace = parseBool(value, "add-xml-space");
290 value = _properties.getProperty("fix-bad-comments");
292 FixComments = parseBool(value, "fix-bad-comments");
294 value = _properties.getProperty("split");
296 BurstSlides = parseBool(value, "split");
298 value = _properties.getProperty("break-before-br");
300 BreakBeforeBR = parseBool(value, "break-before-br");
302 value = _properties.getProperty("numeric-entities");
304 NumEntities = parseBool(value, "numeric-entities");
306 value = _properties.getProperty("quote-marks");
308 QuoteMarks = parseBool(value, "quote-marks");
310 value = _properties.getProperty("quote-nbsp");
312 QuoteNbsp = parseBool(value, "quote-nbsp");
314 value = _properties.getProperty("quote-ampersand");
316 QuoteAmpersand = parseBool(value, "quote-ampersand");
318 value = _properties.getProperty("write-back");
320 writeback = parseBool(value, "write-back");
322 value = _properties.getProperty("keep-time");
324 KeepFileTimes = parseBool(value, "keep-time");
326 value = _properties.getProperty("show-warnings");
328 ShowWarnings = parseBool(value, "show-warnings");
330 value = _properties.getProperty("error-file");
332 errfile = parseName(value, "error-file");
334 value = _properties.getProperty("slide-style");
336 slidestyle = parseName(value, "slide-style");
338 value = _properties.getProperty("new-inline-tags");
340 parseInlineTagNames(value, "new-inline-tags");
342 value = _properties.getProperty("new-blocklevel-tags");
344 parseBlockTagNames(value, "new-blocklevel-tags");
346 value = _properties.getProperty("new-empty-tags");
348 parseEmptyTagNames(value, "new-empty-tags");
350 value = _properties.getProperty("new-pre-tags");
352 parsePreTagNames(value, "new-pre-tags");
354 value = _properties.getProperty("char-encoding");
356 CharEncoding = parseCharEncoding(value, "char-encoding");
358 value = _properties.getProperty("doctype");
360 docTypeStr = parseDocType(value, "doctype");
362 value = _properties.getProperty("fix-backslash");
364 FixBackslash = parseBool(value, "fix-backslash");
366 value = _properties.getProperty("gnu-emacs");
368 Emacs = parseBool(value, "gnu-emacs");
371 /* ensure that config is self consistent */
374 if (EncloseBlockText)
375 EncloseBodyText = true;
377 /* avoid the need to set IndentContent when SmartIndent is set */
380 IndentContent = true;
382 /* disable wrapping */
384 wraplen = 0x7FFFFFFF;
386 /* Word 2000 needs o:p to be declared as inline */
389 tt.defineInlineTag("o:p");
392 /* XHTML is written in lower case */
396 UpperCaseTags = false;
397 UpperCaseAttrs = false;
400 /* if XML in, then XML out */
407 /* XML requires end tags */
410 QuoteAmpersand = true;
415 private static int parseInt( String s, String option )
419 i = Integer.parseInt( s );
421 catch ( NumberFormatException e ) {
422 Report.badArgument(option);
428 private static boolean parseBool( String s, String option )
431 if ( s != null && s.length() > 0 ) {
432 char c = s.charAt(0);
433 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
435 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
438 Report.badArgument(option);
443 private static boolean parseInvBool( String s, String option )
446 if ( s != null && s.length() > 0 ) {
447 char c = s.charAt(0);
448 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
450 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
453 Report.badArgument(option);
458 private static String parseName( String s, String option )
460 StringTokenizer t = new StringTokenizer( s );
462 if ( t.countTokens() >= 1 )
465 Report.badArgument(option);
469 private static int parseCharEncoding( String s, String option )
473 if (Lexer.wstrcasecmp(s, "ascii") == 0)
475 else if (Lexer.wstrcasecmp(s, "latin1") == 0)
477 else if (Lexer.wstrcasecmp(s, "raw") == 0)
479 else if (Lexer.wstrcasecmp(s, "utf8") == 0)
481 else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
483 else if (Lexer.wstrcasecmp(s, "mac") == 0)
486 Report.badArgument(option);
491 /* slight hack to avoid changes to pprint.c */
492 private boolean parseIndent( String s, String option )
494 boolean b = IndentContent;
496 if (Lexer.wstrcasecmp(s, "yes") == 0)
501 else if (Lexer.wstrcasecmp(s, "true") == 0)
506 else if (Lexer.wstrcasecmp(s, "no") == 0)
511 else if (Lexer.wstrcasecmp(s, "false") == 0)
516 else if (Lexer.wstrcasecmp(s, "auto") == 0)
522 Report.badArgument(option);
526 private void parseInlineTagNames( String s, String option )
528 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
529 while ( t.hasMoreTokens() ) {
530 tt.defineInlineTag( t.nextToken() );
534 private void parseBlockTagNames( String s, String option )
536 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
537 while ( t.hasMoreTokens() ) {
538 tt.defineBlockTag( t.nextToken() );
542 private void parseEmptyTagNames( String s, String option )
544 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
545 while ( t.hasMoreTokens() ) {
546 tt.defineEmptyTag( t.nextToken() );
550 private void parsePreTagNames( String s, String option )
552 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
553 while ( t.hasMoreTokens() ) {
554 tt.definePreTag( t.nextToken() );
559 doctype: omit | auto | strict | loose | <fpi>
561 where the fpi is a string similar to
563 "-//ACME//DTD HTML 3.14159//EN"
565 protected String parseDocType( String s, String option )
569 /* "-//ACME//DTD HTML 3.14159//EN" or similar */
571 if (s.startsWith("\""))
573 docTypeMode = DOCTYPE_USER;
577 /* read first word */
579 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
580 if (t.hasMoreTokens())
581 word = t.nextToken();
583 if (Lexer.wstrcasecmp(word, "omit") == 0)
584 docTypeMode = DOCTYPE_OMIT;
585 else if (Lexer.wstrcasecmp(word, "strict") == 0)
586 docTypeMode = DOCTYPE_STRICT;
587 else if (Lexer.wstrcasecmp(word, "loose") == 0 ||
588 Lexer.wstrcasecmp(word, "transitional") == 0)
589 docTypeMode = DOCTYPE_LOOSE;
590 else if (Lexer.wstrcasecmp(word, "auto") == 0)
591 docTypeMode = DOCTYPE_AUTO;
594 docTypeMode = DOCTYPE_AUTO;
595 Report.badArgument(option);