--- /dev/null
+package net.sourceforge.phpdt.tidy;
+
+/**********************************************************************
+Copyright (c) 2000, 2002 IBM Corp. and others.
+All rights reserved. This program and the accompanying materials
+are made available under the terms of the Common Public License v1.0
+which accompanies this distribution, and is available at
+http://www.eclipse.org/legal/cpl-v10.html
+
+Contributors:
+ IBM Corporation - Initial implementation
+ Klaus Hartlage - www.eclipseproject.de
+**********************************************************************/
+
+import org.eclipse.jface.action.Action;
+import org.eclipse.jface.resource.JFaceResources;
+import org.eclipse.jface.text.BadLocationException;
+import org.eclipse.jface.text.Document;
+import org.eclipse.jface.text.TextViewer;
+import org.eclipse.swt.SWT;
+import org.eclipse.swt.custom.StyledText;
+import org.eclipse.swt.layout.GridData;
+import org.eclipse.swt.widgets.Composite;
+import org.eclipse.ui.IActionBars;
+import org.eclipse.ui.IWorkbenchActionConstants;
+import org.eclipse.ui.IWorkbenchPage;
+import org.eclipse.ui.PartInitException;
+import org.eclipse.ui.PlatformUI;
+import org.eclipse.ui.part.ViewPart;
+
+/**
+ * The JTidyConsole is used to display the low level JTidy output
+ *
+ * @see ViewPart
+ */
+public class JTidyConsole extends ViewPart {
+
+ public static final String CONSOLE_ID = "net.sourceforge.phpdt.tidy.consoleview";
+
+ private TextViewer viewer = null;
+ private Document document = null;
+
+ /**
+ * The constructor.
+ */
+ public JTidyConsole() {
+ }
+
+ /**
+ * Insert the method's description here.
+ * @see ViewPart#createPartControl
+ */
+ public void createPartControl(Composite parent) {
+ viewer = new TextViewer(parent, SWT.WRAP | SWT.V_SCROLL | SWT.H_SCROLL);
+ GridData viewerData = new GridData(GridData.FILL_BOTH);
+ viewer.getControl().setLayoutData(viewerData);
+ viewer.setEditable(false);
+
+ StyledText widget = viewer.getTextWidget();
+ widget.setFont(JFaceResources.getFontRegistry().get(JFaceResources.TEXT_FONT));
+ Action cutAction = new Action() {
+ public void run() {
+ viewer.getTextWidget().cut();
+ }
+ };
+ Action copyAction = new Action() {
+ public void run() {
+ viewer.getTextWidget().copy();
+ }
+ };
+ Action pasteAction = new Action() {
+ public void run() {
+ viewer.getTextWidget().paste();
+ }
+ };
+
+ IActionBars bars = this.getViewSite().getActionBars();
+ bars.setGlobalActionHandler(IWorkbenchActionConstants.CUT, cutAction);
+ bars.setGlobalActionHandler(IWorkbenchActionConstants.COPY, copyAction);
+ bars.setGlobalActionHandler(IWorkbenchActionConstants.PASTE, pasteAction);
+ }
+
+ /**
+ * Insert the method's description here.
+ * @see ViewPart#setFocus
+ */
+ public void setFocus() {
+ }
+
+ /**
+ * Set the text for the viewer
+ */
+ private void setOutputText(String text) {
+ document = new Document(text);
+ viewer.setDocument(document);
+ }
+
+ private void appendOutputText(String text) {
+ try {
+ if (document == null) {
+ document = new Document(text);
+ viewer.setDocument(document);
+ }
+ document.replace(document.getLength(), 0, text);
+ } catch (BadLocationException e) {
+ }
+ // viewer.setDocument(document);
+ }
+
+ /**
+ * Prints out the string represented by the string buffer
+ */
+ public static void print(String output) {
+ try {
+ IWorkbenchPage page = PlatformUI.getWorkbench().getActiveWorkbenchWindow().getActivePage();
+ JTidyConsole console = (JTidyConsole) page.findView(JTidyConsole.CONSOLE_ID);
+
+ if (console != null) {
+ console.appendOutputText(output);
+ } else {
+ page.showView(JTidyConsole.CONSOLE_ID);
+ console = (JTidyConsole) page.findView(JTidyConsole.CONSOLE_ID);
+ console.setOutputText(output);
+ }
+ } catch (PartInitException e) {
+ System.err.println("Problems occured then opening console view");
+// JtidyPlugin.getDefault().getLog().log(
+// new Status(
+// IStatus.ERROR,
+// JtidyPlugin.getPluginId(),
+// 0,
+// JtidyPlugin.getString("consoleViewOpeningProblem"),
+// e));
+ }
+
+ }
+ public static void clear() {
+ try {
+ IWorkbenchPage page = PlatformUI.getWorkbench().getActiveWorkbenchWindow().getActivePage();
+ JTidyConsole console = (JTidyConsole) page.findView(JTidyConsole.CONSOLE_ID);
+
+ if (console != null) {
+ console.setOutputText("");
+ } else {
+ page.showView(JTidyConsole.CONSOLE_ID);
+ console = (JTidyConsole) page.findView(JTidyConsole.CONSOLE_ID);
+ console.setOutputText("");
+ }
+ } catch (PartInitException e) {
+ System.err.println("Problems occured then opening console view");
+ }
+ }
+ public static void println(String output) {
+ print(output+"\n");
+ }
+ /**
+ * Creates a string buffer from the given input stream
+ */
+// public static String getStringFromStream(InputStream stream) throws IOException {
+// StringBuffer buffer = new StringBuffer();
+// byte[] b = new byte[100];
+// int finished = 0;
+// while (finished != -1) {
+// finished = stream.read(b);
+// if (finished != -1) {
+// String current = new String(b, 0, finished);
+// buffer.append(current);
+// }
+// }
+// return buffer.toString();
+// }
+
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy;
+
+import java.io.File;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import org.eclipse.core.resources.IResource;
+import org.eclipse.core.resources.IWorkspace;
+import org.eclipse.core.resources.ResourcesPlugin;
+import org.eclipse.core.runtime.IPluginDescriptor;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.util.IPropertyChangeListener;
+import org.eclipse.jface.util.PropertyChangeEvent;
+import org.eclipse.ui.plugin.AbstractUIPlugin;
+import net.sourceforge.phpdt.tidy.w3c.Configuration;
+import net.sourceforge.phpdt.tidy.w3c.Tidy;
+
+import net.sourceforge.phpdt.tidy.preferences.IPreferenceConstants;
+
+/**
+ * The main plugin class to be used in the desktop.
+ */
+public class JtidyPlugin extends AbstractUIPlugin implements IPreferenceConstants {
+ //The shared instance.
+ private static JtidyPlugin fPlugin;
+ //Resource bundle.
+ private ResourceBundle fResourceBundle;
+
+ private Tidy fTidy;
+ private boolean fUseConfigurationFile;
+ public static String MARKER_NAME = "net.sourceforge.phpdt.tidy.MarkerName";
+
+ /**
+ * The constructor.
+ */
+ public JtidyPlugin(IPluginDescriptor descriptor) {
+ super(descriptor);
+ fPlugin = this;
+ try {
+ fResourceBundle = ResourceBundle.getBundle("net.sourceforge.phpdt.tidy.JtidyPluginResources");
+ } catch (MissingResourceException x) {
+ fResourceBundle = null;
+ }
+ initTidy();
+ }
+
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.plugin.AbstractUIPlugin#initializeDefaultPreferences(org.eclipse.jface.preference.IPreferenceStore)
+ */
+ protected void initializeDefaultPreferences(IPreferenceStore store) {
+ store.setDefault(GENERAL_CONFIG_FILE, ""); //$NON-NLS-1$
+ store.setDefault(GENERAL_USE_CONFIG_FILE, "false"); //$NON-NLS-1$
+
+ store.setDefault(GENERAL_TIDY_MARK, "true"); //$NON-NLS-1$
+ store.setDefault(GENERAL_SHOW_WARNINGS, "true"); //$NON-NLS-1$
+ store.setDefault(GENERAL_QUIET, "false"); //$NON-NLS-1$
+ store.setDefault(GENERAL_EMACS, "false"); //$NON-NLS-1$
+ store.setDefault(GENERAL_KEEP_FILE_TIMES, "true"); //$NON-NLS-1$
+
+ store.setDefault(WRAP_ATT_VALS, "false"); //$NON-NLS-1$
+ store.setDefault(WRAP_SCRIPTLETS, "false"); //$NON-NLS-1$
+ store.setDefault(WRAP_SECTION, "true"); //$NON-NLS-1$
+ store.setDefault(WRAP_ASP, "true"); //$NON-NLS-1$
+ store.setDefault(WRAP_JSTE, "true"); //$NON-NLS-1$
+ store.setDefault(WRAP_PHP, "true"); //$NON-NLS-1$
+ store.setDefault(INDENT_ATTRIBUTES, "false"); //$NON-NLS-1$
+ store.setDefault(LITERAL_ATTRIBS, "false"); //$NON-NLS-1$
+
+ // store.setDefault(TYPE_TREAD_AS_XML, "false"); //$NON-NLS-1$
+ // store.setDefault(TYPE_DOCTYPE, "false"); //$NON-NLS-1$
+
+ store.setDefault(OUTPUT_MAKE_CLEAR, "false"); //$NON-NLS-1$
+ store.setDefault(OUTPUT_STRIP_WORD, "false"); //$NON-NLS-1$
+ store.setDefault(OUTPUT_ENCLOSE_BODY_TEXT, "false"); //$NON-NLS-1$
+ store.setDefault(OUTPUT_ENCLOSE_BLOCK_TEXT, "false"); //$NON-NLS-1$
+
+ store.setDefault(OUT_AS_RAW, "false"); //$NON-NLS-1$
+ store.setDefault(OUT_UPPER_TAGS, "false"); //$NON-NLS-1$
+ store.setDefault(OUT_UPPER_ATTR, "false"); //$NON-NLS-1$
+ store.setDefault(OUT_BREAK_BR, "false"); //$NON-NLS-1$
+ store.setDefault(OUT_WRAP_ATTR_VALUES, "false"); //$NON-NLS-1$
+ store.setDefault(OUT_WRAP_SCRIPS, "false"); //$NON-NLS-1$
+
+ store.setDefault(GENERAL_TIDY_MARK, "true"); //$NON-NLS-1$
+ }
+
+ /**
+ * Initialises the Tidy Instance and registers the Preference Listener.
+ */
+ private void initTidy() {
+ fTidy = new Tidy();
+ String rawConfigFileName = getPreferenceStore().getString(GENERAL_CONFIG_FILE);
+ updateTidyConfig(rawConfigFileName);
+ IPropertyChangeListener listener = new IPropertyChangeListener() {
+ public void propertyChange(PropertyChangeEvent event) {
+ String propName = event.getProperty();
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+ Configuration configuration = fTidy.getConfiguration();
+ Object value = event.getNewValue();
+
+ if (value instanceof Boolean) {
+ boolean enabled = ((Boolean) value).booleanValue();
+ if (propName.equals(GENERAL_USE_CONFIG_FILE)) {
+ fUseConfigurationFile = enabled;
+ initConfiguration();
+ return;
+ }
+ fUseConfigurationFile = store.getBoolean(GENERAL_USE_CONFIG_FILE);
+ if (!fUseConfigurationFile) {
+ if (propName.equals(GENERAL_TIDY_MARK)) {
+ configuration.TidyMark = enabled;
+ }
+ if (propName.equals(GENERAL_SHOW_WARNINGS)) {
+ configuration.ShowWarnings = enabled;
+ }
+ if (propName.equals(GENERAL_QUIET)) {
+ configuration.Quiet = enabled;
+ }
+ if (propName.equals(GENERAL_EMACS)) {
+ configuration.Emacs = enabled;
+ }
+ if (propName.equals(GENERAL_KEEP_FILE_TIMES)) {
+ configuration.KeepFileTimes = enabled;
+ }
+ // wrap / indent
+ if (propName.equals(WRAP_ATT_VALS)) {
+ configuration.WrapAttVals = enabled;
+ }
+ if (propName.equals(WRAP_SCRIPTLETS)) {
+ configuration.WrapScriptlets = enabled;
+ }
+ if (propName.equals(WRAP_SECTION)) {
+ configuration.WrapSection = enabled;
+ }
+ if (propName.equals(WRAP_ASP)) {
+ configuration.WrapAsp = enabled;
+ }
+ if (propName.equals(WRAP_JSTE)) {
+ configuration.WrapJste = enabled;
+ }
+ if (propName.equals(WRAP_PHP)) {
+ configuration.WrapPhp = enabled;
+ }
+ if (propName.equals(INDENT_ATTRIBUTES)) {
+ configuration.IndentAttributes = enabled;
+ }
+ if (propName.equals(LITERAL_ATTRIBS)) {
+ configuration.LiteralAttribs = enabled;
+ }
+
+ // if (propName.equals(TYPE_TREAD_AS_XML)) {
+ // configuration.XmlTags = enabled;
+ // }
+ // if (propName.equals(TYPE_DOCTYPE)) {
+ // configuration.XmlPi = enabled;
+ // }
+
+ if (propName.equals(OUTPUT_MAKE_CLEAR)) {
+ configuration.MakeClean = enabled;
+ }
+
+ if (propName.equals(OUTPUT_ENCLOSE_BODY_TEXT)) {
+ configuration.EncloseBodyText = enabled;
+ }
+ if (propName.equals(OUTPUT_ENCLOSE_BLOCK_TEXT)) {
+ configuration.EncloseBlockText = enabled;
+ }
+ if (propName.equals(OUTPUT_STRIP_WORD)) {
+ configuration.Word2000 = enabled;
+ }
+ // if (propName.equals(OUTPUT_DEFAULT_ALT_TEXT)) {
+ // configuration. = enabled;
+ // }
+
+ if (propName.equals(OUT_AS_RAW)) {
+ configuration.RawOut = enabled;
+ }
+ if (propName.equals(OUT_UPPER_TAGS)) {
+ configuration.UpperCaseTags = enabled;
+ }
+ if (propName.equals(OUT_UPPER_ATTR)) {
+ configuration.UpperCaseAttrs = enabled;
+ }
+
+ }
+ } else if (value instanceof String) {
+ if (fUseConfigurationFile) {
+ if (propName.equals(GENERAL_CONFIG_FILE)) {
+ updateTidyConfig((String) value);
+ }
+ } else {
+ if (propName.equals(OUTPUT_DEFAULT_ALT_TEXT)) {
+ configuration.altText = (String) value;
+ }
+ if (propName.equals(INPUT_NEW_EMPTY_TAGS)) {
+ configuration.parseEmptyTagNames((String) value, null);
+ }
+ if (propName.equals(INPUT_NEW_INLINE_TAGS)) {
+ configuration.parseInlineTagNames((String) value, null);
+ }
+ if (propName.equals(INPUT_NEW_BLOCKLEVEL_TAGS)) {
+ configuration.parseBlockTagNames((String) value, null);
+ }
+ if (propName.equals(INPUT_NEW_PRE_TAGS)) {
+ configuration.parsePreTagNames((String) value, null);
+ }
+ }
+ }
+ }
+ };
+
+ getPreferenceStore().addPropertyChangeListener(listener);
+ initConfiguration();
+ }
+
+ private void initConfiguration() {
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+
+ fUseConfigurationFile = store.getBoolean(GENERAL_USE_CONFIG_FILE);
+ Configuration configuration = fTidy.getConfiguration();
+
+ String value;
+ if (fUseConfigurationFile) {
+ if ((value = store.getString(GENERAL_CONFIG_FILE)) != null) {
+ updateTidyConfig((String) value);
+ }
+ } else {
+
+ configuration.TidyMark = store.getBoolean(GENERAL_TIDY_MARK);
+ configuration.ShowWarnings = store.getBoolean(GENERAL_SHOW_WARNINGS);
+ configuration.Quiet = store.getBoolean(GENERAL_QUIET);
+ configuration.Emacs = store.getBoolean(GENERAL_EMACS);
+ configuration.KeepFileTimes = store.getBoolean(GENERAL_KEEP_FILE_TIMES);
+
+ configuration.WrapAttVals = store.getBoolean(WRAP_ATT_VALS);
+ configuration.WrapScriptlets = store.getBoolean(WRAP_SCRIPTLETS);
+ configuration.WrapSection = store.getBoolean(WRAP_SECTION);
+ configuration.WrapAsp = store.getBoolean(WRAP_ASP);
+ configuration.WrapJste = store.getBoolean(WRAP_JSTE);
+ configuration.WrapPhp = store.getBoolean(WRAP_PHP);
+ configuration.IndentAttributes = store.getBoolean(INDENT_ATTRIBUTES);
+ configuration.LiteralAttribs = store.getBoolean(LITERAL_ATTRIBS);
+
+ configuration.MakeClean = store.getBoolean(OUTPUT_MAKE_CLEAR);
+ configuration.EncloseBodyText = store.getBoolean(OUTPUT_ENCLOSE_BODY_TEXT);
+ configuration.EncloseBlockText = store.getBoolean(OUTPUT_ENCLOSE_BLOCK_TEXT);
+ configuration.Word2000 = store.getBoolean(OUTPUT_STRIP_WORD);
+ if ((value = store.getString(OUTPUT_DEFAULT_ALT_TEXT)) != null) {
+ configuration.altText = value;
+ }
+
+ configuration.RawOut = store.getBoolean(OUT_AS_RAW);
+ configuration.UpperCaseTags = store.getBoolean(OUT_UPPER_TAGS);
+ configuration.UpperCaseAttrs = store.getBoolean(OUT_UPPER_ATTR);
+
+ if ((value = store.getString(INPUT_NEW_EMPTY_TAGS)) != null) {
+ configuration.parseEmptyTagNames(value, null);
+ }
+ if ((value = store.getString(INPUT_NEW_INLINE_TAGS)) != null) {
+ configuration.parseInlineTagNames(value, null);
+ }
+ if ((value = store.getString(INPUT_NEW_BLOCKLEVEL_TAGS)) != null) {
+ configuration.parseBlockTagNames(value, null);
+ }
+ if ((value = store.getString(INPUT_NEW_PRE_TAGS)) != null) {
+ configuration.parsePreTagNames(value, null);
+ }
+ }
+ }
+
+ /**
+ * Updates the configuration of the tidy instance with content of the given
+ * file, if ths file exists. Returns silently on error.
+ *
+ * @param rawConfigFileName
+ */
+ private void updateTidyConfig(String rawConfigFileName) {
+ File config = new File(rawConfigFileName);
+ if (config.exists()) {
+ fTidy.setConfigurationFromFile(config.getAbsolutePath());
+ }
+ }
+
+ /**
+ * Returns the shared instance.
+ */
+ public static JtidyPlugin getDefault() {
+ return fPlugin;
+ }
+
+ /**
+ * Returns the workspace instance.
+ */
+ public static IWorkspace getWorkspace() {
+ return ResourcesPlugin.getWorkspace();
+ }
+
+ /**
+ * Returns the string from the plugin's resource bundle,
+ * or 'key' if not found.
+ */
+ public static String getResourceString(String key) {
+ ResourceBundle bundle = JtidyPlugin.getDefault().getResourceBundle();
+ if (bundle == null) {
+ return key;
+ }
+ try {
+ return bundle.getString(key);
+ } catch (MissingResourceException e) {
+ return key;
+ }
+
+ }
+
+ /**
+ * Returns the plugin's resource bundle,
+ */
+ public ResourceBundle getResourceBundle() {
+ return fResourceBundle;
+ }
+
+ /**
+ * Returns the Tidy instance for this resource. Curently the resource can be
+ * null, I just copied a similar conzept from a differnt Project, which uses
+ * a per Project configuration.
+ * @param resource
+ * @return Tidy
+ */
+
+ public static Tidy getTidyInstance(IResource resource) {
+ //IProject project = resource.getProject();
+ //TODO: bind the instance to the resource...
+ if (getDefault().fTidy == null) {
+ getDefault().initTidy();
+ }
+ return getDefault().fTidy;
+ }
+
+}
--- /dev/null
+/**
+ * Created on 13.01.2003 by Jan Schulz
+ */
+package net.sourceforge.phpdt.tidy.actions;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.core.resources.IFile;
+import org.eclipse.core.resources.IMarker;
+import org.eclipse.core.resources.IResource;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.jface.action.IAction;
+import org.eclipse.jface.dialogs.MessageDialog;
+import org.eclipse.jface.text.IDocument;
+import org.eclipse.jface.viewers.ISelection;
+import org.eclipse.jface.viewers.IStructuredSelection;
+import org.eclipse.swt.widgets.Shell;
+import org.eclipse.ui.IEditorPart;
+import org.eclipse.ui.texteditor.ITextEditor;
+
+/**
+ * @author jan
+ * @since 13.01.2003
+ */
+public abstract class AbstractJTidyAction {
+ private ITextEditor fTextEditor = null;
+ private IStructuredSelection fSelection = null;
+
+ /**
+ * Parses the given stream with a Tidy Instance, which belongs to this
+ * IFile. Problems will be marked on this file.
+ * @param in
+ * @param file
+ * @return InputStream
+ */
+ protected byte[] parseStreamOfFile(InputStream in, IFile file) {
+ deleteTidyMarker(file);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ JtidyPlugin.getTidyInstance(file).parse(file, in, out);
+ return out.toByteArray();
+
+ }
+
+ /**
+ * Deletes all JTidy Marker of this File
+ *
+ * @param file
+ */
+ protected void deleteTidyMarker(IFile file) {
+ try {
+ IMarker[] markers = file.findMarkers(null, false, IResource.DEPTH_ZERO);
+ for (int i = 0; i < markers.length; i++) {
+ IMarker marker = markers[i];
+ if (marker.getAttribute(JtidyPlugin.MARKER_NAME) != null) {
+ marker.delete();
+ }
+ }
+ } catch (CoreException e) {
+ //LOGGING
+ }
+ }
+
+ /**
+ * Parses the Document with Tidy.
+ */
+ protected void parseDocument(boolean writeBack) throws ParseFailedException {
+ IDocument doku = fTextEditor.getDocumentProvider().getDocument(fTextEditor.getEditorInput());
+ assertNotNull(doku);
+ String content = doku.get();
+ IFile file = (IFile) fTextEditor.getEditorInput().getAdapter(IFile.class);
+ assertNotNull(file);
+ byte[] ret = parseStreamOfFile(new ByteArrayInputStream(content.getBytes()), file);
+ if (writeBack) {
+ if (ret.length != 0) {
+ String cleanedContent = new String(ret);
+ doku.set(cleanedContent);
+ } else {
+ displayError(
+ "Formatting skipped",
+ "This document has errors that must be fixed before using HTML Tidy to generate a tidied up version.");
+ }
+
+ }
+ }
+
+ /**
+ * Throws a ParseFailedException, if the given obj is null
+ *
+ * @param obj
+ * @throws ParseFailedException
+ */
+ protected void assertNotNull(Object obj) throws ParseFailedException {
+ if (obj == null) {
+ throw new ParseFailedException("A expected 'non-null' Value was null");
+ }
+ }
+
+ /**
+ * Updates the enable state of the parent action
+ * @param action
+ */
+ protected void updateParent(IAction action) {
+ action.setEnabled(fTextEditor != null || fSelection != null);
+ }
+
+ /**
+ * Parses all Files in the given selection...
+ */
+ protected void parseSelection(boolean writeBack) {
+ Iterator iterator = null;
+ iterator = fSelection.iterator();
+ while (iterator.hasNext()) {
+ // obj => selected object in the view
+ Object obj = iterator.next();
+
+ // is it a resource
+ if (obj instanceof IResource) {
+ IResource resource = (IResource) obj;
+
+ // check if it's a file resource
+ switch (resource.getType()) {
+
+ case IResource.FILE :
+ // single file:
+ IFile file = (IFile) resource;
+
+ InputStream in;
+ try {
+ in = file.getContents();
+ byte[] ret = parseStreamOfFile(in, file);
+
+ if (writeBack) {
+ if (ret.length != 0) {
+ InputStream source = new ByteArrayInputStream(ret);
+ file.setContents(source, IFile.KEEP_HISTORY, null);
+
+ } else {
+ displayError(
+ "Formatting skipped",
+ "This document has errors that must be fixed before using HTML Tidy to generate a tidied up version.");
+ }
+ }
+ } catch (CoreException e) {
+ }
+ }
+ }
+ }
+ }
+ /**
+ * Method error.
+ * @param string
+ * @param string1
+ */
+ private void error(String lable, String message) {
+
+ // TODO:
+ }
+
+ /**
+ * Opens an error dialog to display the given message.
+ *
+ * @param message the error message to show
+ */
+ private void displayError(final String lable, final String message) {
+ final Shell parentShell = getShell();
+ parentShell.getDisplay().syncExec(new Runnable() {
+ public void run() {
+ MessageDialog.openError(parentShell, lable, message);
+ }
+ });
+ }
+ /**
+ * Method getShell.
+ * @return Shell
+ */
+ protected abstract Shell getShell();
+
+ /**
+ * Updates the Selection: if the given selection is of type
+ * IStruckturedSelection, fSection is set to this, otehrwise the field is
+ * set to null.
+ * @param sel
+ */
+ protected void updateSelection(ISelection sel) {
+ if (sel instanceof IStructuredSelection) {
+ fSelection = (IStructuredSelection) sel;
+ // REVISIT: further determination of types?
+ } else {
+ fSelection = null;
+ }
+ }
+
+ /**
+ * If the given WorkbenchPart is of type ITextEditor, fTextEditor is set to
+ * this value, otherwise to null
+ *
+ */
+ protected void updateEditor(IEditorPart part) {
+ if (part instanceof ITextEditor) {
+ fTextEditor = (ITextEditor) part;
+ } else {
+ fTextEditor = null;
+ }
+ }
+
+ protected IEditorPart getEditor() {
+ return fTextEditor;
+ }
+}
--- /dev/null
+/**
+ * Created on 13.01.2003 by Jan Schulz
+ */
+package net.sourceforge.phpdt.tidy.actions;
+
+import org.eclipse.jface.action.IAction;
+import org.eclipse.jface.viewers.ISelection;
+import org.eclipse.swt.widgets.Shell;
+import org.eclipse.ui.IEditorActionDelegate;
+import org.eclipse.ui.IEditorPart;
+
+/**
+ * @author jan
+ * @since 13.01.2003
+ */
+public abstract class AbstractJTidyEditorAction extends AbstractJTidyAction implements IEditorActionDelegate {
+ Shell fShell;
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IEditorActionDelegate#setActiveEditor(org.eclipse.jface.action.IAction, org.eclipse.ui.IEditorPart)
+ */
+ public void setActiveEditor(IAction action, IEditorPart targetEditor) {
+ if (fShell == null) {
+ fShell = targetEditor.getSite().getShell();
+ }
+ updateEditor(targetEditor);
+
+ updateParent(action);
+ }
+
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IActionDelegate#run(org.eclipse.jface.action.IAction)
+ */
+ public abstract void run(IAction action);
+
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IActionDelegate#selectionChanged(org.eclipse.jface.action.IAction, org.eclipse.jface.viewers.ISelection)
+ */
+ public void selectionChanged(IAction action, ISelection selection) {
+ // Noo need to get a selection...
+ }
+ /* (non-Javadoc)
+ * @see net.sourceforge.phpdt.tidy.actions.AbstractJTidyAction#getShell()
+ */
+protected Shell getShell() {
+ return fShell;
+}
+
+}
--- /dev/null
+/**
+ * Created on 13.01.2003 by Jan Schulz
+ */
+package net.sourceforge.phpdt.tidy.actions;
+
+import org.eclipse.jface.action.IAction;
+import org.eclipse.jface.viewers.ISelection;
+import org.eclipse.swt.widgets.Shell;
+import org.eclipse.ui.IObjectActionDelegate;
+import org.eclipse.ui.IWorkbenchPart;
+
+/**
+ * @author jan
+ * @since 13.01.2003
+ */
+public abstract class AbstractJTidyObjectAction extends AbstractJTidyAction implements IObjectActionDelegate {
+ private Shell fShell;
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IObjectActionDelegate#setActivePart(org.eclipse.jface.action.IAction, org.eclipse.ui.IWorkbenchPart)
+ */
+ public void setActivePart(IAction action, IWorkbenchPart targetPart) {
+ fShell = targetPart.getSite().getShell();
+ // No need to do something here...
+ }
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IActionDelegate#run(org.eclipse.jface.action.IAction)
+ */
+ public abstract void run(IAction action);
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IActionDelegate#selectionChanged(org.eclipse.jface.action.IAction, org.eclipse.jface.viewers.ISelection)
+ */
+ public void selectionChanged(IAction action, ISelection selection) {
+ updateSelection(selection);
+ updateParent(action);
+ }
+
+ /* (non-Javadoc)
+ * @see net.sourceforge.phpdt.tidy.actions.AbstractJTidyAction#getShell()
+ */
+ protected Shell getShell() {
+ return fShell;
+ }
+
+
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.actions;
+
+import net.sourceforge.phpdt.tidy.JTidyConsole;
+import org.eclipse.jface.action.IAction;
+
+public class FormatWithJTidyAction extends AbstractJTidyObjectAction {
+
+
+ /**
+ * @see IActionDelegate#run(IAction)
+ */
+ public void run(IAction action) {
+ JTidyConsole.clear();
+ parseSelection(true);
+
+ }
+
+
+}
--- /dev/null
+/**
+ * Created on 13.01.2003 by Jan Schulz
+ */
+package net.sourceforge.phpdt.tidy.actions;
+
+import net.sourceforge.phpdt.tidy.JTidyConsole;
+import org.eclipse.jface.action.IAction;
+
+/**
+ * @author jan
+ * @since 13.01.2003
+ */
+public class FormatWithJTidyEditorAction extends AbstractJTidyEditorAction {
+
+
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IActionDelegate#run(org.eclipse.jface.action.IAction)
+ */
+ public void run(IAction action) {
+ JTidyConsole.clear();
+ try {
+ parseDocument(true);
+ } catch (ParseFailedException e) {
+ //LOGGING
+ }
+ }
+
+
+}
--- /dev/null
+/**
+ * Created on 13.01.2003 by Jan Schulz
+ */
+package net.sourceforge.phpdt.tidy.actions;
+
+/**
+ * @author jan
+ * @since 13.01.2003
+ */
+public class ParseFailedException extends Exception {
+
+ /**
+ * Constructor for ParseFailedException.
+ */
+ public ParseFailedException() {
+ super();
+ }
+
+ /**
+ * Constructor for ParseFailedException.
+ * @param message
+ */
+ public ParseFailedException(String message) {
+ super(message);
+ }
+
+ /**
+ * Constructor for ParseFailedException.
+ * @param message
+ * @param cause
+ */
+ public ParseFailedException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ /**
+ * Constructor for ParseFailedException.
+ * @param cause
+ */
+ public ParseFailedException(Throwable cause) {
+ super(cause);
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Throwable#fillInStackTrace()
+ */
+ public synchronized Throwable fillInStackTrace() {
+ return this;
+ }
+
+
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.actions;
+
+import net.sourceforge.phpdt.tidy.JTidyConsole;
+import org.eclipse.jface.action.IAction;
+
+public class ParseWithJTidyAction extends AbstractJTidyObjectAction {
+
+
+
+ /**
+ * @see IActionDelegate#run(IAction)
+ */
+ public void run(IAction action) {
+ JTidyConsole.clear();
+ parseSelection(false);
+ }
+
+
+
+}
--- /dev/null
+/**
+ * Created on 13.01.2003 by Jan Schulz
+ */
+package net.sourceforge.phpdt.tidy.actions;
+
+import net.sourceforge.phpdt.tidy.JTidyConsole;
+import org.eclipse.jface.action.IAction;
+
+/**
+ * @author jan
+ * @since 13.01.2003
+ */
+public class ParseWithJTidyEditorAction extends AbstractJTidyEditorAction {
+
+
+
+ /* (non-Javadoc)
+ * @see org.eclipse.ui.IActionDelegate#run(org.eclipse.jface.action.IAction)
+ */
+ public void run(IAction action) {
+ JTidyConsole.clear();
+ try {
+ parseDocument(false);
+ } catch (ParseFailedException e) {
+ //LOGGING
+ }
+ }
+
+
+
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+
+import org.eclipse.jface.dialogs.IDialogConstants;
+import org.eclipse.jface.preference.FieldEditor;
+import org.eclipse.swt.SWT;
+import org.eclipse.swt.events.SelectionAdapter;
+import org.eclipse.swt.events.SelectionEvent;
+import org.eclipse.swt.layout.GridData;
+import org.eclipse.swt.layout.GridLayout;
+import org.eclipse.swt.widgets.Button;
+import org.eclipse.swt.widgets.Composite;
+import org.eclipse.swt.widgets.Label;
+import org.eclipse.swt.widgets.List;
+import org.eclipse.swt.widgets.Text;
+
+/**
+ * A field editor for displaying and storing a list of strings.
+ * Buttons are provided for adding items to the list and removing
+ * items from the list.
+ */
+public class AddRemoveListFieldEditor extends FieldEditor {
+ private static final String DEFAULT_ADD_LABEL = "Add";
+ private static final String DEFAULT_REMOVE_LABEL = "Remove selection";
+ private static final String DEFAULT_SEPERATOR = ";";
+
+ private static final int VERTICAL_DIALOG_UNITS_PER_CHAR = 8;
+ private static final int HORIZONTAL_DIALOG_UNITS_PER_CHAR = 4;
+ private static final int LIST_HEIGHT_IN_CHARS = 10;
+ private static final int LIST_HEIGHT_IN_DLUS =
+ LIST_HEIGHT_IN_CHARS * VERTICAL_DIALOG_UNITS_PER_CHAR;
+
+ // The top-level control for the field editor.
+ private Composite top;
+ // The list of tags.
+ private List list;
+ // The text field for inputting new tags.
+ private Text textField;
+ // The button for adding the contents of
+ // the text field to the list.
+ private Button add;
+ // The button for removing the currently-selected list item.
+ private Button remove;
+ // The string used to seperate list items
+ // in a single String representation.
+ private String seperator = DEFAULT_SEPERATOR;
+
+ public AddRemoveListFieldEditor(
+ String name,
+ String labelText,
+ Composite parent) {
+ super(name, labelText, parent);
+ }
+
+ public AddRemoveListFieldEditor(
+ String name,
+ String labelText,
+ String addButtonText,
+ String removeButtonText,
+ Composite parent) {
+ super(name, labelText, parent);
+ setAddButtonText(addButtonText);
+ setRemoveButtonText(removeButtonText);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.FieldEditor#adjustForNumColumns(int)
+ */
+ protected void adjustForNumColumns(int numColumns) {
+ ((GridData)top.getLayoutData()).horizontalSpan = numColumns;
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.FieldEditor#doFillIntoGrid
+ * (Composite, int)
+ */
+ protected void doFillIntoGrid(Composite parent, int numColumns) {
+ top = parent;
+
+ GridData gd = new GridData(GridData.FILL_HORIZONTAL);
+ gd.horizontalSpan = numColumns;
+ top.setLayoutData(gd);
+
+ Label label = getLabelControl(top);
+ GridData labelData = new GridData();
+ labelData.horizontalSpan = numColumns;
+ label.setLayoutData(labelData);
+
+ list = new List(top, SWT.BORDER);
+
+ // Create a grid data that takes up the extra
+ // space in the dialog and spans both columns.
+ GridData listData = new GridData(GridData.FILL_HORIZONTAL);
+ listData.heightHint =
+ convertVerticalDLUsToPixels(list, LIST_HEIGHT_IN_DLUS);
+ listData.horizontalSpan = numColumns;
+
+ list.setLayoutData(listData);
+ list.addSelectionListener(new SelectionAdapter() {
+ public void widgetSelected(SelectionEvent e) {
+ selectionChanged();
+ }
+ });
+
+ // Create a composite for the add and remove
+ // buttons and the input text field.
+ Composite addRemoveGroup = new Composite(top, SWT.NONE);
+
+ GridData addRemoveData = new GridData(GridData.FILL_HORIZONTAL);
+ addRemoveData.horizontalSpan = numColumns;
+ addRemoveGroup.setLayoutData(addRemoveData);
+
+ GridLayout addRemoveLayout = new GridLayout();
+ addRemoveLayout.numColumns = numColumns;
+ addRemoveLayout.marginHeight = 0;
+ addRemoveLayout.marginWidth = 0;
+ addRemoveGroup.setLayout(addRemoveLayout);
+
+ // Create a composite for the add and remove buttons.
+ Composite buttonGroup = new Composite(addRemoveGroup, SWT.NONE);
+ buttonGroup.setLayoutData(new GridData());
+
+ GridLayout buttonLayout = new GridLayout();
+ buttonLayout.marginHeight = 0;
+ buttonLayout.marginWidth = 0;
+ buttonGroup.setLayout(buttonLayout);
+
+ // Create the add button.
+ add = new Button(buttonGroup, SWT.NONE);
+ add.setText(DEFAULT_ADD_LABEL);
+ add.addSelectionListener(new SelectionAdapter() {
+ public void widgetSelected(SelectionEvent e) {
+ add();
+ }
+ });
+ GridData addData = new GridData(GridData.FILL_HORIZONTAL);
+ addData.heightHint = convertVerticalDLUsToPixels(
+ add,
+ IDialogConstants.BUTTON_HEIGHT);
+ addData.widthHint = convertHorizontalDLUsToPixels(
+ add,
+ IDialogConstants.BUTTON_WIDTH);
+ add.setLayoutData(addData);
+
+ // Create the remove button.
+ remove = new Button(buttonGroup, SWT.NONE);
+ remove.setEnabled(false);
+ remove.setText(DEFAULT_REMOVE_LABEL);
+ remove.addSelectionListener(new SelectionAdapter() {
+ public void widgetSelected(SelectionEvent e) {
+ list.remove(list.getSelectionIndex());
+ selectionChanged();
+ }
+ });
+ GridData removeData = new GridData(GridData.FILL_HORIZONTAL);
+ removeData.heightHint = convertVerticalDLUsToPixels(
+ remove,
+ IDialogConstants.BUTTON_HEIGHT);
+ removeData.widthHint = convertHorizontalDLUsToPixels(
+ remove,
+ IDialogConstants.BUTTON_WIDTH);
+ remove.setLayoutData(removeData);
+
+ // Create the text field.
+ textField = new Text(addRemoveGroup, SWT.BORDER);
+
+ GridData textData = new GridData(GridData.FILL_HORIZONTAL);
+ textData.horizontalSpan = numColumns - 1;
+ textData.verticalAlignment = GridData.BEGINNING;
+ textField.setLayoutData(textData);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.FieldEditor#doLoad()
+ */
+ protected void doLoad() {
+ String items = getPreferenceStore().getString(getPreferenceName());
+ setList(items);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.FieldEditor#doLoadDefault()
+ */
+ protected void doLoadDefault() {
+ String items = getPreferenceStore().getDefaultString(getPreferenceName());
+ setList(items);
+ }
+
+ // Parses the string into seperate list items and adds them to the list.
+ private void setList(String items) {
+ String[] itemArray = parseString(items);
+ list.setItems(itemArray);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.FieldEditor#doStore()
+ */
+ protected void doStore() {
+ String s = createListString(list.getItems());
+ if (s != null)
+ getPreferenceStore().setValue(getPreferenceName(), s);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.FieldEditor#getNumberOfControls()
+ */
+ public int getNumberOfControls() {
+ // The button composite and the text field.
+ return 2;
+ }
+
+ // Adds the string in the text field to the list.
+ private void add() {
+ String tag = textField.getText();
+ if (tag != null && tag.length() > 0)
+ list.add(tag);
+ textField.setText("");
+ }
+
+ /**
+ * Sets the label for the button that adds
+ * the contents of the text field to the list.
+ */
+ public void setAddButtonText(String text) {
+ add.setText(text);
+ }
+
+ /**
+ * Sets the label for the button that removes
+ * the selected item from the list.
+ */
+ public void setRemoveButtonText(String text) {
+ remove.setText(text);
+ }
+
+ /**
+ * Sets the string that seperates items in the list when the
+ * list is stored as a single String in the preference store.
+ */
+ public void setSeperator(String seperator) {
+ this.seperator = seperator;
+ }
+
+ /**
+ * Creates the single String representation of the list
+ * that is stored in the preference store.
+ */
+ private String createListString(String[] items) {
+ StringBuffer path = new StringBuffer("");//$NON-NLS-1$
+
+ for (int i = 0; i < items.length; i++) {
+ path.append(items[i]);
+ path.append(seperator);
+ }
+ return path.toString();
+ }
+
+ /**
+ * Parses the single String representation of the list
+ * into an array of list items.
+ */
+ private String[] parseString(String stringList) {
+ StringTokenizer st =
+ new StringTokenizer(stringList, seperator); //$NON-NLS-1$
+ ArrayList v = new ArrayList();
+ while (st.hasMoreElements()) {
+ v.add(st.nextElement());
+ }
+ return (String[])v.toArray(new String[v.size()]);
+ }
+
+ // Sets the enablement of the remove button depending
+ // on the selection in the list.
+ private void selectionChanged() {
+ int index = list.getSelectionIndex();
+ remove.setEnabled(index >= 0);
+ }
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+/*
+ * Interface to define all constants related to the preferences of this package.
+ */
+public interface IPreferenceConstants {
+
+ // The config file to be used
+ public static final String GENERAL_CONFIG_FILE = "__general_config_file";
+ public static final String GENERAL_USE_CONFIG_FILE = "__use_config_file";
+
+ //REVISIT: The rest of the ICOnfigStrings are not used yet.
+
+ // General Settings for the Jtidy
+ public static final String GENERAL_TIDY_MARK = "__TidyMark";
+ public static final String GENERAL_SHOW_WARNINGS = "__ShowWarnings";
+ public static final String GENERAL_QUIET = "__Quiet";
+ public static final String GENERAL_EMACS = "__Emacs";
+ public static final String GENERAL_KEEP_FILE_TIMES = "__KeepFileTimes";
+
+ // wrap indent
+ public static final String WRAP_ATT_VALS = "__wrapAttVals";
+ public static final String WRAP_SCRIPTLETS = "__wrapScriplets";
+ public static final String WRAP_SECTION = "__wrapSection";
+ public static final String WRAP_ASP = "__wrapAsp";
+ public static final String WRAP_JSTE = "__wrapJsta";
+ public static final String WRAP_PHP = "__wrapPhp";
+ public static final String INDENT_ATTRIBUTES = "__indentAttributes";
+ public static final String LITERAL_ATTRIBS = "__literalAttribs";
+
+ // // What to do with output. How valid must it be...
+ // public static final String TYPE_TREAD_AS_XML = "__type_xml";
+ // //TODO: The rest of the doctype switches...
+ // public static final String TYPE_DOCTYPE = "__type_Doctype";
+
+ // make valid...
+ public static final String OUTPUT_MAKE_CLEAR = "__out_make_clear";
+ public static final String OUTPUT_STRIP_WORD = "__out_strip_Word";
+ public static final String OUTPUT_ENCLOSE_BODY_TEXT = "__out_encloseBodyTexr";
+ public static final String OUTPUT_ENCLOSE_BLOCK_TEXT = "__out_encloseBlock";
+ public static final String OUTPUT_DEFAULT_ALT_TEXT = "__out_alt_text";
+
+ // Intendion and related...
+ public static final String OUT_AS_RAW = "__out_as_raw";
+ public static final String OUT_UPPER_TAGS = "__out_upper_Tags";
+ public static final String OUT_UPPER_ATTR = "__out_upper_attr";
+ public static final String OUT_BREAK_BR = "__out_break_befor_br";
+ public static final String OUT_WRAP_ATTR_VALUES = "__out_wrap_Attr_values";
+ public static final String OUT_WRAP_SCRIPS = "__out_wrap_scripts";
+
+ // new tags
+ public static final String INPUT_NEW_EMPTY_TAGS = "__in_new_empty_Tags";
+ public static final String INPUT_NEW_INLINE_TAGS = "__in_new_inline_Tags";
+ public static final String INPUT_NEW_BLOCKLEVEL_TAGS = "__in_new_blocklevel_tags";
+ public static final String INPUT_NEW_PRE_TAGS = "__in_new_pre_Tags";
+
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.FileFieldEditor;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+/**
+ * A preference page for a simple HTML editor.
+ */
+public class JTidyGeneralPreferences extends FieldEditorPreferencePage implements IWorkbenchPreferencePage, IPreferenceConstants {
+
+ public JTidyGeneralPreferences() {
+ super(FieldEditorPreferencePage.GRID);
+
+ // Set the preference store for the preference page.
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+ setPreferenceStore(store);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.
+ * FieldEditorPreferencePage#createFieldEditors()
+ */
+ protected void createFieldEditors() {
+ FileFieldEditor tidyConf = new FileFieldEditor(GENERAL_CONFIG_FILE, "Config file", true, getFieldEditorParent());
+ addField(tidyConf);
+ BooleanFieldEditor useConfigFile =
+ new BooleanFieldEditor(GENERAL_USE_CONFIG_FILE, "Use configurtion file", getFieldEditorParent());
+ addField(useConfigFile);
+ //
+
+ BooleanFieldEditor tidyMark =
+ new BooleanFieldEditor(GENERAL_TIDY_MARK, "Set Tidy signature in header", getFieldEditorParent());
+ addField(tidyMark);
+
+ BooleanFieldEditor showWarning =
+ new BooleanFieldEditor(GENERAL_SHOW_WARNINGS, "Show warnings", getFieldEditorParent());
+ addField(showWarning);
+
+ BooleanFieldEditor quiet =
+ new BooleanFieldEditor(GENERAL_QUIET, "Quiet output", getFieldEditorParent());
+ addField(quiet);
+
+ BooleanFieldEditor emacs =
+ new BooleanFieldEditor(GENERAL_EMACS, "Emacs error reporting style", getFieldEditorParent());
+ addField(emacs);
+
+ BooleanFieldEditor keepFileTimes =
+ new BooleanFieldEditor(GENERAL_KEEP_FILE_TIMES, "Keep last modified time", getFieldEditorParent());
+ addField(keepFileTimes);
+ }
+
+ /**
+ * @see IWorkbenchPreferencePage#init
+ */
+ public void init(IWorkbench workbench) {
+ }
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.StringFieldEditor;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+/**
+ * A preference page for a simple HTML editor.
+ */
+public class JTidyNewTagsPreferences extends FieldEditorPreferencePage implements IWorkbenchPreferencePage, IPreferenceConstants {
+
+ public JTidyNewTagsPreferences() {
+ super(FieldEditorPreferencePage.GRID);
+
+ // Set the preference store for the preference page.
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+ setPreferenceStore(store);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.
+ * FieldEditorPreferencePage#createFieldEditors()
+ */
+ protected void createFieldEditors() {
+ // For developing with template engines or something like this...
+ LabelFieldEditor seperate = new LabelFieldEditor("Seperate the values with <space>", getFieldEditorParent());
+ addField(seperate);
+
+ StringFieldEditor emptyTags = new StringFieldEditor(INPUT_NEW_EMPTY_TAGS, "New empy tags:", getFieldEditorParent());
+ addField(emptyTags);
+
+ StringFieldEditor inlineTags = new StringFieldEditor(INPUT_NEW_INLINE_TAGS, "New inline tags:", getFieldEditorParent());
+ addField(inlineTags);
+
+ StringFieldEditor blockTags = new StringFieldEditor(INPUT_NEW_BLOCKLEVEL_TAGS, "New blocklevel tags:", getFieldEditorParent());
+ addField(blockTags);
+
+ StringFieldEditor preTags = new StringFieldEditor(INPUT_NEW_PRE_TAGS, "New pre tags:", getFieldEditorParent());
+ addField(preTags);
+
+ }
+
+ /**
+ * @see IWorkbenchPreferencePage#init
+ */
+ public void init(IWorkbench workbench) {
+ }
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+/**
+ * A preference page for a simple HTML editor.
+ */
+public class JTidyOutputLookPreferences extends FieldEditorPreferencePage implements IWorkbenchPreferencePage {
+
+ public JTidyOutputLookPreferences() {
+ super(FieldEditorPreferencePage.GRID);
+
+ // Set the preference store for the preference page.
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+ setPreferenceStore(store);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.
+ * FieldEditorPreferencePage#createFieldEditors()
+ */
+ protected void createFieldEditors() {
+ // public static final String OUT_UPPER_TAGS = "__out_upper_Tags";
+ BooleanFieldEditor useUpperTags =
+ new BooleanFieldEditor(IPreferenceConstants.OUT_UPPER_TAGS, "Uppercase Tags", getFieldEditorParent());
+ addField(useUpperTags);
+
+ // public static final String OUT_UPPER_ATTR = "__out_upper_attr";
+ BooleanFieldEditor useUpperAttr =
+ new BooleanFieldEditor(IPreferenceConstants.OUT_UPPER_ATTR, "Uppercase Atributes", getFieldEditorParent());
+ addField(useUpperAttr);
+
+ // public static final String OUT_BREAK_BR = "__out_break_befor_br";
+
+ BooleanFieldEditor breakBr =
+ new BooleanFieldEditor(IPreferenceConstants.OUT_BREAK_BR, "Break before BR", getFieldEditorParent());
+ addField(breakBr);
+ // public static final String OUT_WRAP_ATTR_VALUES = "__out_wrap_Attr_values";
+ BooleanFieldEditor warpAttrValues =
+ new BooleanFieldEditor(
+ IPreferenceConstants.OUT_WRAP_ATTR_VALUES,
+ "Wrap Attribute Values",
+ getFieldEditorParent());
+ addField(warpAttrValues);
+
+ // public static final String OUT_WRAP_SCRIPS = "__out_wrap_scripts";
+ BooleanFieldEditor warpScripts =
+ new BooleanFieldEditor(IPreferenceConstants.OUT_WRAP_SCRIPS, "Wrap Scripts", getFieldEditorParent());
+ addField(warpScripts);
+
+ // public static final String OUT_AS_RAW = "__out_as_raw";
+
+ BooleanFieldEditor asRaw =
+ new BooleanFieldEditor(IPreferenceConstants.OUT_AS_RAW, "Output >128 chars as raw", getFieldEditorParent());
+ addField(asRaw);
+
+ }
+
+ /**
+ * @see IWorkbenchPreferencePage#init
+ */
+ public void init(IWorkbench workbench) {
+ }
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.StringFieldEditor;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+/**
+ * A preference page for a simple HTML editor.
+ */
+public class JTidyOutputPreferences extends FieldEditorPreferencePage implements IWorkbenchPreferencePage {
+
+ public JTidyOutputPreferences() {
+ super(FieldEditorPreferencePage.GRID);
+
+ // Set the preference store for the preference page.
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+ setPreferenceStore(store);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.
+ * FieldEditorPreferencePage#createFieldEditors()
+ */
+ protected void createFieldEditors() {
+
+ // public static final String OUTPUT_MAKE_CLEAR = "__out_make_clear";
+
+ BooleanFieldEditor makeClear =
+ new BooleanFieldEditor(IPreferenceConstants.OUTPUT_MAKE_CLEAR, "Clear up", getFieldEditorParent());
+ addField(makeClear);
+ // public static final String OUTPUT_STRIP_WORD = "__out_strip_Word";
+ BooleanFieldEditor stripWord =
+ new BooleanFieldEditor(
+ IPreferenceConstants.OUTPUT_STRIP_WORD,
+ "Strip Word 2000 thingies",
+ getFieldEditorParent());
+ addField(stripWord);
+ // public static final String OUTPUT_ENCLOSE_BODY_TEXT = "__out_encloseBodyTexr";
+ BooleanFieldEditor encloseText =
+ new BooleanFieldEditor(
+ IPreferenceConstants.OUTPUT_ENCLOSE_BODY_TEXT,
+ "Enclose Body Text",
+ getFieldEditorParent());
+ addField(encloseText);
+ // public static final String OUTPUT_ENCLOSE_BLOCK_TEXT = "__out_encloseBlock";
+ BooleanFieldEditor encloseBlock =
+ new BooleanFieldEditor(
+ IPreferenceConstants.OUTPUT_ENCLOSE_BLOCK_TEXT,
+ "Enclose Block Text",
+ getFieldEditorParent());
+ addField(encloseBlock);
+
+ // public static final String OUTPUT_DEFAULT_ALT_TEXT = "__out_alt_text";
+ StringFieldEditor defaultAlt =
+ new StringFieldEditor(
+ IPreferenceConstants.OUTPUT_DEFAULT_ALT_TEXT,
+ "Default Alt Text",
+ getFieldEditorParent());
+ addField(defaultAlt);
+ }
+
+ /**
+ * @see IWorkbenchPreferencePage#init
+ */
+ public void init(IWorkbench workbench) {
+ }
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+/**
+ * A preference page for a simple HTML editor.
+ */
+public class JTidyPreferences extends FieldEditorPreferencePage implements IWorkbenchPreferencePage {
+
+ public JTidyPreferences() {
+ super(FieldEditorPreferencePage.GRID);
+
+ // Set the preference store for the preference page.
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+ setPreferenceStore(store);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.
+ * FieldEditorPreferencePage#createFieldEditors()
+ */
+ protected void createFieldEditors() {
+
+ BooleanFieldEditor showWarning =
+ new BooleanFieldEditor(IPreferenceConstants.GENERAL_SHOW_WARNINGS, "Show warnings", getFieldEditorParent());
+ addField(showWarning);
+
+ BooleanFieldEditor beQuiet =
+ new BooleanFieldEditor(IPreferenceConstants.GENERAL_QUIET, "Show no Errors", getFieldEditorParent());
+ addField(beQuiet);
+
+ BooleanFieldEditor tidyMark =
+ new BooleanFieldEditor(
+ IPreferenceConstants.GENERAL_TIDY_MARK,
+ "Set Tidy Mark in Header",
+ getFieldEditorParent());
+ addField(tidyMark);
+
+ }
+
+ /**
+ * @see IWorkbenchPreferencePage#init
+ */
+ public void init(IWorkbench workbench) {
+ }
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+/**
+ * A preference page for a simple HTML editor.
+ */
+public class JTidyWrapIndentPreferences
+ extends FieldEditorPreferencePage
+ implements IWorkbenchPreferencePage, IPreferenceConstants {
+
+ public JTidyWrapIndentPreferences() {
+ super(FieldEditorPreferencePage.GRID);
+
+ // Set the preference store for the preference page.
+ IPreferenceStore store = JtidyPlugin.getDefault().getPreferenceStore();
+ setPreferenceStore(store);
+ }
+
+ /**
+ * @see org.eclipse.jface.preference.
+ * FieldEditorPreferencePage#createFieldEditors()
+ */
+ protected void createFieldEditors() {
+
+ BooleanFieldEditor wrapAttVals = new BooleanFieldEditor(WRAP_ATT_VALS, "Wrap attributes", getFieldEditorParent());
+ addField(wrapAttVals);
+ // public static final String OUTPUT_STRIP_WORD = "__out_strip_Word";
+ BooleanFieldEditor wrapScriplets = new BooleanFieldEditor(WRAP_SCRIPTLETS, "Wrap script literals", getFieldEditorParent());
+ addField(wrapScriplets);
+
+ BooleanFieldEditor wrapSection = new BooleanFieldEditor(WRAP_SECTION, "Wrap Section", getFieldEditorParent());
+ addField(wrapSection);
+ // public static final String OUTPUT_ENCLOSE_BLOCK_TEXT = "__out_encloseBlock";
+ BooleanFieldEditor wrapASP = new BooleanFieldEditor(WRAP_ASP, "Wrap ASP", getFieldEditorParent());
+ addField(wrapASP);
+
+ BooleanFieldEditor wrapJSTE = new BooleanFieldEditor(WRAP_JSTE, "Wrap JSTE", getFieldEditorParent());
+ addField(wrapJSTE);
+
+ BooleanFieldEditor wrapPHP = new BooleanFieldEditor(WRAP_PHP, "Wrap PHP", getFieldEditorParent());
+ addField(wrapPHP);
+
+ BooleanFieldEditor indentAttr = new BooleanFieldEditor(INDENT_ATTRIBUTES, "Indent Attributes", getFieldEditorParent());
+ addField(indentAttr);
+
+ BooleanFieldEditor literalAttr = new BooleanFieldEditor(LITERAL_ATTRIBS, "Literal Attributes", getFieldEditorParent());
+ addField(literalAttr);
+ }
+
+ /**
+ * @see IWorkbenchPreferencePage#init
+ */
+ public void init(IWorkbench workbench) {
+ }
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import org.eclipse.jface.preference.FieldEditor;
+import org.eclipse.swt.layout.GridData;
+import org.eclipse.swt.widgets.Composite;
+import org.eclipse.swt.widgets.Label;
+
+/**
+ * A field editor for displaying labels not associated with other widgets.
+ */
+class LabelFieldEditor extends FieldEditor {
+
+ private Label label;
+
+ // All labels can use the same preference name since they don't
+ // store any preference.
+ public LabelFieldEditor(String value, Composite parent) {
+ super("label", value, parent);
+ }
+
+ // Adjusts the field editor to be displayed correctly
+ // for the given number of columns.
+ protected void adjustForNumColumns(int numColumns) {
+ ((GridData) label.getLayoutData()).horizontalSpan = numColumns;
+ }
+
+ // Fills the field editor's controls into the given parent.
+ protected void doFillIntoGrid(Composite parent, int numColumns) {
+ label = getLabelControl(parent);
+
+ GridData gridData = new GridData();
+ gridData.horizontalSpan = numColumns;
+ gridData.horizontalAlignment = GridData.FILL;
+ gridData.grabExcessHorizontalSpace = false;
+ gridData.verticalAlignment = GridData.CENTER;
+ gridData.grabExcessVerticalSpace = false;
+
+ label.setLayoutData(gridData);
+ }
+
+ // Returns the number of controls in the field editor.
+ public int getNumberOfControls() {
+ return 1;
+ }
+
+ // Labels do not persist any preferences, so these methods are empty.
+ protected void doLoad() {
+ }
+ protected void doLoadDefault() {
+ }
+ protected void doStore() {
+ }
+}
--- /dev/null
+/**
+ * Created on 11.10.2002
+ *
+ * @author Jan Schulz
+ */
+package net.sourceforge.phpdt.tidy.preferences;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.eclipse.core.resources.IResource;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.core.runtime.QualifiedName;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.PreferenceStore;
+import org.eclipse.jface.resource.JFaceResources;
+import org.eclipse.jface.util.Assert;
+import org.eclipse.jface.util.IPropertyChangeListener;
+import org.eclipse.jface.util.ListenerList;
+import org.eclipse.jface.util.PropertyChangeEvent;
+import org.eclipse.jface.util.SafeRunnable;
+
+
+/**
+ * This class can be used for wrapping a IResource to a PreferencePage
+ *
+ * Most of the class is C&P from the PreferenceStore which uses a Propertie
+ * instance to store a the Properties.
+ *
+ *
+ * @author Jan Schulz
+ * @since 11.10.2002
+ */
+public class ResourcePreferenceStore implements IPreferenceStore {
+
+ private IResource resource;
+ private IPreferenceStore defaultValues = null;
+ private String prefix;
+
+ private List unDefaulted = new ArrayList();
+
+ /**
+ * List of registered listeners (element type: <code>IPropertyChangeListener</code>).
+ * These listeners are to be informed when the current value of a preference changes.
+ */
+ private ListenerList listeners = new ListenerList();
+
+ /**
+ * Indicates whether a value as been changed by <code>setToDefault</code>
+ * or <code>setValue</code>; initially <code>false</code>.
+ */
+ private boolean dirty = false;
+
+ /**
+ * <p>
+ * Creates a IPreferenceStore were the Values are stored in a IResource
+ * and the defaultValues are taken from a different IPreferenceStore
+ * (like the default one of the plugin). If _defaultValues is empty, no
+ * default values are present.
+ * </p>
+ * <p>
+ * Because of the different mechanism for storeing Preferences, any
+ * lookup for a preference with the given name will be 'translated' as
+ * following:
+ * </p><p>
+ * <ul>
+ * <li>The defaultValues are querried without prefix just with the given
+ * name</li>
+ * <li>The Resource will be querried with the prefix and the name as a
+ * QualifiedName</li>
+ * </ul>
+ * </p><p>
+ * Be carefull: even defaulted Properties are written into the underlying
+ * resource!
+ * @param _prefix -- String used as Qualifier
+ * @param _resource -- a IResource, which may NOT be null
+ * @param _defaultValues -- a IPreferenceStore or null
+ */
+ public ResourcePreferenceStore(IResource _resource, String _prefix, IPreferenceStore _defaultValues) {
+ super();
+ Assert.isNotNull(_resource);
+ resource = _resource;
+ prefix = _prefix;
+ defaultValues = _defaultValues;
+ if (defaultValues == null) {
+ defaultValues = new PreferenceStore();
+ }
+ }
+
+ public ResourcePreferenceStore(IResource _resource, String _prefix) {
+ this(_resource, _prefix, null);
+ }
+
+ public ResourcePreferenceStore(IResource _resource) {
+ this(_resource, null, null);
+ }
+
+ private QualifiedName getQNForName(String name) {
+ return new QualifiedName(prefix, name);
+ }
+
+ private boolean resourceContains(String name) {
+ try {
+ return (resource.getPersistentProperty(getQNForName(name)) != null);
+ } catch (CoreException e) {
+ return false;
+ }
+ }
+
+ private String getProperty(String name) {
+ try {
+ return resource.getPersistentProperty(getQNForName(name));
+ } catch (CoreException e) {
+ }
+
+ return null;
+ }
+
+ private void setProperty(String name, String value) {
+ try {
+ resource.setPersistentProperty(getQNForName(name), value);
+ } catch (CoreException e) {
+ }
+
+ }
+
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void addPropertyChangeListener(IPropertyChangeListener listener) {
+ listeners.add(listener);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public boolean contains(String name) {
+ return (resourceContains(name) || defaultValues.contains(name));
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void firePropertyChangeEvent(String name, Object oldValue, Object newValue) {
+ final Object[] listeners = this.listeners.getListeners();
+ // Do we need to fire an event.
+ if (listeners.length > 0 && (oldValue == null || !oldValue.equals(newValue))) {
+ final PropertyChangeEvent pe = new PropertyChangeEvent(this, name, oldValue, newValue);
+
+ Platform.run(new SafeRunnable(JFaceResources.getString("PreferenceStore.changeError")) { //$NON-NLS-1$
+ public void run() {
+ for (int i = 0; i < listeners.length; ++i) {
+ IPropertyChangeListener l = (IPropertyChangeListener) listeners[i];
+ l.propertyChange(pe);
+ }
+ }
+ });
+ }
+ }
+
+ /**
+ * Helper function: gets boolean for a given name.
+ */
+ public boolean getBoolean(String name) {
+ String value = getProperty(name);
+ if (value == null)
+ return BOOLEAN_DEFAULT_DEFAULT;
+ if (value.equals(IPreferenceStore.TRUE))
+ return true;
+ return false;
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public boolean getDefaultBoolean(String name) {
+ return defaultValues.getBoolean(name);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public double getDefaultDouble(String name) {
+ return defaultValues.getDouble(name);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public float getDefaultFloat(String name) {
+ return defaultValues.getFloat(name);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public int getDefaultInt(String name) {
+ return defaultValues.getInt(name);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public long getDefaultLong(String name) {
+ return defaultValues.getLong(name);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public String getDefaultString(String name) {
+ return defaultValues.getString(name);
+ }
+
+ public double getDouble(String name) {
+ String value = getProperty(name);
+ if (value == null)
+ return DOUBLE_DEFAULT_DEFAULT;
+ double ival = DOUBLE_DEFAULT_DEFAULT;
+ try {
+ ival = new Double(value).doubleValue();
+ } catch (NumberFormatException e) {
+ }
+ return ival;
+ }
+
+ /**
+ * Helper function: gets long for a given name.
+ */
+ public long getLong(String name) {
+ String value = getProperty(name);
+ if (value == null)
+ return LONG_DEFAULT_DEFAULT;
+ long ival = LONG_DEFAULT_DEFAULT;
+ try {
+ ival = Long.parseLong(value);
+ } catch (NumberFormatException e) {
+ }
+ return ival;
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public String getString(String name) {
+ String value = getProperty(name);
+ if (value == null)
+ return STRING_DEFAULT_DEFAULT;
+ return value;
+ }
+
+ public float getFloat(String name) {
+ String value = getProperty(name);
+ if (value == null)
+ return FLOAT_DEFAULT_DEFAULT;
+ float ival = FLOAT_DEFAULT_DEFAULT;
+ try {
+ ival = new Float(value).floatValue();
+ } catch (NumberFormatException e) {
+ }
+ return ival;
+ }
+
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+
+ public int getInt(String name) {
+ String value = getProperty(name);
+ if (value == null)
+ return INT_DEFAULT_DEFAULT;
+ int ival = 0;
+ try {
+ ival = Integer.parseInt(value);
+ } catch (NumberFormatException e) {
+ }
+ return ival;
+ }
+
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public boolean isDefault(String name) {
+ //REMIND: Is that the right algo?
+ return (!resourceContains(name) || getProperty(name).equals(defaultValues.getString(name)));
+ }
+
+ /**
+ * Indicates whether this PrefStore needds saving. As there isn't any saving
+ * facility, it always returns false
+ *
+ * Method declared on IPreferenceStore.
+ */
+ public boolean needsSaving() {
+ return false;
+ }
+
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void putValue(String name, String value) {
+ String oldValue = getString(name);
+ if (oldValue == null || !oldValue.equals(value)) {
+ setValue(name, value);
+ dirty = true;
+
+ }
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void removePropertyChangeListener(IPropertyChangeListener listener) {
+ listeners.remove(listener);
+ }
+
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setDefault(String name, double value) {
+ defaultValues.setValue(name, value);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setDefault(String name, float value) {
+ defaultValues.setValue(name, value);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setDefault(String name, int value) {
+ defaultValues.setValue(name, value);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setDefault(String name, long value) {
+ defaultValues.setValue(name, value);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setDefault(String name, String value) {
+ defaultValues.setValue(name, value);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setDefault(String name, boolean value) {
+ defaultValues.setValue(name, value);
+ }
+
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setToDefault(String name) {
+ Object oldValue = getProperty(name);
+ String newValue = null;
+ if (defaultValues != null)
+ newValue = (String) defaultValues.getString(name);
+ setProperty(name, newValue);
+ dirty = true;
+
+ firePropertyChangeEvent(name, oldValue, newValue);
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setValue(String name, double value) {
+ double oldValue = getDouble(name);
+ if (oldValue != value) {
+ setProperty(name, Double.toString(value));
+ dirty = true;
+ firePropertyChangeEvent(name, new Double(oldValue), new Double(value));
+ }
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setValue(String name, float value) {
+ float oldValue = getFloat(name);
+ if (oldValue != value) {
+ setProperty(name, Float.toString(value));
+ dirty = true;
+ firePropertyChangeEvent(name, new Float(oldValue), new Float(value));
+ }
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setValue(String name, int value) {
+ int oldValue = getInt(name);
+ if (oldValue != value) {
+ setProperty(name, Integer.toString(value));
+ dirty = true;
+ firePropertyChangeEvent(name, new Integer(oldValue), new Integer(value));
+ }
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setValue(String name, long value) {
+ long oldValue = getLong(name);
+ if (oldValue != value) {
+ setProperty(name, Long.toString(value));
+ dirty = true;
+ firePropertyChangeEvent(name, new Long(oldValue), new Long(value));
+ }
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setValue(String name, String value) {
+ String oldValue = getString(name);
+ if (oldValue == null || !oldValue.equals(value)) {
+ setProperty(name, value);
+ dirty = true;
+ firePropertyChangeEvent(name, oldValue, value);
+ }
+ }
+ /* (non-Javadoc)
+ * Method declared on IPreferenceStore.
+ */
+ public void setValue(String name, boolean value) {
+ boolean oldValue = getBoolean(name);
+ if (oldValue != value) {
+ setProperty(name, value == true ? IPreferenceStore.TRUE : IPreferenceStore.FALSE);
+ dirty = true;
+ firePropertyChangeEvent(name, new Boolean(oldValue), new Boolean(value));
+ }
+ }
+
+}
--- /dev/null
+package net.sourceforge.phpdt.tidy.preferences;
+
+import org.eclipse.swt.widgets.Composite;
+
+/**
+ * A field editor for adding space to a preference page.
+ */
+public class SpacerFieldEditor extends LabelFieldEditor {
+ // Implemented as an empty label field editor.
+ public SpacerFieldEditor(Composite parent) {
+ super("", parent);
+ }
+}
--- /dev/null
+/*
+ * @(#)AttVal.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Attribute/Value linked list node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class AttVal extends Object implements Cloneable {
+
+ public AttVal next;
+ public Attribute dict;
+ public Node asp;
+ public Node php;
+ public int delim;
+ public String attribute;
+ public String value;
+
+ public AttVal()
+ {
+ this.next = null;
+ this.dict = null;
+ this.asp = null;
+ this.php = null;
+ this.delim = 0;
+ this.attribute = null;
+ this.value = null;
+ }
+
+ public AttVal(AttVal next, Attribute dict, int delim,
+ String attribute, String value)
+ {
+ this.next = next;
+ this.dict = dict;
+ this.asp = null;
+ this.php = null;
+ this.delim = delim;
+ this.attribute = attribute;
+ this.value = value;
+ }
+
+ public AttVal(AttVal next, Attribute dict, Node asp, Node php,
+ int delim, String attribute, String value)
+ {
+ this.next = next;
+ this.dict = dict;
+ this.asp = asp;
+ this.php = php;
+ this.delim = delim;
+ this.attribute = attribute;
+ this.value = value;
+ }
+
+ protected Object clone()
+ {
+ AttVal av = new AttVal();
+ if (next != null) {
+ av.next = (AttVal)next.clone();
+ }
+ if (attribute != null)
+ av.attribute = attribute;
+ if (value != null)
+ av.value = value;
+ av.delim = delim;
+ if (asp != null) {
+ av.asp = (Node)asp.clone();
+ }
+ if (php != null) {
+ av.php = (Node)php.clone();
+ }
+ av.dict =
+ AttributeTable.getDefaultAttributeTable().findAttribute(this);
+ return av;
+ }
+
+ public boolean isBoolAttribute()
+ {
+ Attribute attribute = this.dict;
+ if ( attribute != null ) {
+ if (attribute.attrchk == AttrCheckImpl.getCheckBool() ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /* ignore unknown attributes for proprietary elements */
+ public Attribute checkAttribute( Lexer lexer, Node node )
+ {
+ TagTable tt = lexer.configuration.tt;
+
+ if (this.asp == null && this.php == null)
+ this.checkUniqueAttribute(lexer, node);
+
+ Attribute attribute = this.dict;
+ if ( attribute != null ) {
+ /* title is vers 2.0 for A and LINK otherwise vers 4.0 */
+ if (attribute == AttributeTable.attrTitle &&
+ (node.tag == tt.tagA || node.tag == tt.tagLink))
+ lexer.versions &= Dict.VERS_ALL;
+ else if ((attribute.versions & Dict.VERS_XML) != 0)
+ {
+ if (!(lexer.configuration.XmlTags || lexer.configuration.XmlOut))
+ Report.attrError(lexer, node, this.attribute, Report.XML_ATTRIBUTE_VALUE);
+ }
+ else
+ lexer.versions &= attribute.versions;
+
+ if (attribute.attrchk != null)
+ attribute.attrchk.check(lexer, node, this);
+ }
+ else if (!lexer.configuration.XmlTags && !(node.tag == null) && this.asp == null &&
+ !(node.tag != null && ((node.tag.versions & Dict.VERS_PROPRIETARY) != 0)))
+ Report.attrError(lexer, node, this.attribute, Report.UNKNOWN_ATTRIBUTE);
+
+ return attribute;
+ }
+
+ /*
+ the same attribute name can't be used
+ more than once in each element
+ */
+ public void checkUniqueAttribute(Lexer lexer, Node node)
+ {
+ AttVal attr;
+ int count = 0;
+
+ for (attr = this.next; attr != null; attr = attr.next)
+ {
+ if (this.attribute != null &&
+ attr.attribute != null &&
+ attr.asp == null &&
+ attr.php == null &&
+ Lexer.wstrcasecmp(this.attribute, attr.attribute) == 0)
+ ++count;
+ }
+
+ if (count > 0)
+ Report.attrError(lexer, node, this.attribute, Report.REPEATED_ATTRIBUTE);
+ }
+
+ /* --------------------- DOM ---------------------------- */
+
+ protected org.w3c.dom.Attr adapter = null;
+
+ protected org.w3c.dom.Attr getAdapter()
+ {
+ if (adapter == null)
+ {
+ adapter = new DOMAttrImpl(this);
+ }
+ return adapter;
+ }
+ /* --------------------- END DOM ------------------------ */
+
+}
--- /dev/null
+/*
+ * @(#)AttrCheck.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Check attribute values
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public interface AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval);
+
+}
--- /dev/null
+/*
+ * @(#)AttrCheckImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Check attribute values implementations
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class AttrCheckImpl {
+
+ public static class CheckUrl implements AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval)
+ {
+ if (attval.value == null)
+ Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
+ else if (lexer.configuration.FixBackslash)
+ {
+ attval.value = attval.value.replace('\\','/');
+ }
+ }
+
+ };
+
+ public static class CheckScript implements AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval)
+ {
+ }
+
+ };
+
+ public static class CheckAlign implements AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval)
+ {
+ String value;
+
+ /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
+ if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))
+ {
+ getCheckValign().check(lexer, node, attval);
+ return;
+ }
+
+ value = attval.value;
+
+ if (value == null)
+ Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
+ else if (! (Lexer.wstrcasecmp(value, "left") == 0 ||
+ Lexer.wstrcasecmp(value, "center") == 0 ||
+ Lexer.wstrcasecmp(value, "right") == 0 ||
+ Lexer.wstrcasecmp(value, "justify") == 0))
+ Report.attrError(lexer, node, attval.value, Report.BAD_ATTRIBUTE_VALUE);
+ }
+
+ };
+
+ public static class CheckValign implements AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval)
+ {
+ String value;
+
+ value = attval.value;
+
+ if (value == null)
+ Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
+ else if (Lexer.wstrcasecmp(value, "top") == 0 ||
+ Lexer.wstrcasecmp(value, "middle") == 0 ||
+ Lexer.wstrcasecmp(value, "bottom") == 0 ||
+ Lexer.wstrcasecmp(value, "baseline") == 0)
+ {
+ /* all is fine */
+ }
+ else if (Lexer.wstrcasecmp(value, "left") == 0 ||
+ Lexer.wstrcasecmp(value, "right") == 0)
+ {
+ if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0)))
+ Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ else if (Lexer.wstrcasecmp(value, "texttop") == 0 ||
+ Lexer.wstrcasecmp(value, "absmiddle") == 0 ||
+ Lexer.wstrcasecmp(value, "absbottom") == 0 ||
+ Lexer.wstrcasecmp(value, "textbottom") == 0)
+ {
+ lexer.versions &= Dict.VERS_PROPRIETARY;
+ Report.attrError(lexer, node, value, Report.PROPRIETARY_ATTR_VALUE);
+ }
+ else
+ Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
+ }
+
+ };
+
+ public static class CheckBool implements AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval)
+ {
+ }
+
+ };
+
+ public static class CheckId implements AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval)
+ {
+ }
+
+ };
+
+ public static class CheckName implements AttrCheck {
+
+ public void check( Lexer lexer, Node node, AttVal attval)
+ {
+ }
+
+ };
+
+ public static AttrCheck getCheckUrl()
+ {
+ return _checkUrl;
+ }
+
+ public static AttrCheck getCheckScript()
+ {
+ return _checkScript;
+ }
+
+ public static AttrCheck getCheckAlign()
+ {
+ return _checkAlign;
+ }
+
+ public static AttrCheck getCheckValign()
+ {
+ return _checkValign;
+ }
+
+ public static AttrCheck getCheckBool()
+ {
+ return _checkBool;
+ }
+
+ public static AttrCheck getCheckId()
+ {
+ return _checkId;
+ }
+
+ public static AttrCheck getCheckName()
+ {
+ return _checkName;
+ }
+
+
+ private static AttrCheck _checkUrl = new CheckUrl();
+ private static AttrCheck _checkScript = new CheckScript();
+ private static AttrCheck _checkAlign = new CheckAlign();
+ private static AttrCheck _checkValign = new CheckValign();
+ private static AttrCheck _checkBool = new CheckBool();
+ private static AttrCheck _checkId = new CheckId();
+ private static AttrCheck _checkName = new CheckName();
+
+}
--- /dev/null
+/*
+ * @(#)Attribute.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * HTML attribute
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Attribute {
+
+ public Attribute( String name,
+ boolean nowrap,
+ short versions,
+ AttrCheck attrchk )
+ {
+ this.name = name;
+ this.nowrap = nowrap;
+ this.literal = false;
+ this.versions = versions;
+ this.attrchk = attrchk;
+ }
+
+ public Attribute( String name,
+ short versions,
+ AttrCheck attrchk )
+ {
+ this.name = name;
+ this.nowrap = false;
+ this.literal = false;
+ this.versions = versions;
+ this.attrchk = attrchk;
+ }
+
+ public String name;
+ public boolean nowrap;
+ public boolean literal;
+ public short versions;
+ public AttrCheck attrchk;
+
+}
--- /dev/null
+/*
+ * @(#)AttributeTable.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import java.util.Hashtable;
+
+/**
+ *
+ * HTML attribute hash table
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class AttributeTable {
+
+ public AttributeTable()
+ {
+ }
+
+ public Attribute lookup( String name )
+ {
+ return (Attribute)attributeHashtable.get( name );
+ }
+
+ public Attribute install( Attribute attr )
+ {
+ return (Attribute)attributeHashtable.put( attr.name, attr );
+ }
+
+ /* public method for finding attribute definition by name */
+ public Attribute findAttribute( AttVal attval )
+ {
+ Attribute np;
+
+ if ( attval.attribute != null ) {
+ np = lookup( attval.attribute );
+ return np;
+ }
+
+ return null;
+ }
+
+ public boolean isUrl( String attrname )
+ {
+ Attribute np;
+
+ np = lookup( attrname );
+ return ( np != null && np.attrchk == AttrCheckImpl.getCheckUrl() );
+ }
+
+ public boolean isScript( String attrname )
+ {
+ Attribute np;
+
+ np = lookup( attrname );
+ return ( np != null && np.attrchk == AttrCheckImpl.getCheckScript() );
+ }
+
+ public boolean isLiteralAttribute( String attrname )
+ {
+ Attribute np;
+
+ np = lookup( attrname );
+ return ( np != null && np.literal );
+ }
+
+ /*
+ Henry Zrepa reports that some folk are
+ using embed with script attributes where
+ newlines are signficant. These need to be
+ declared and handled specially!
+ */
+ public void declareLiteralAttrib(String name)
+ {
+ Attribute attrib = lookup(name);
+
+ if (attrib == null)
+ attrib = install(new Attribute(name, Dict.VERS_PROPRIETARY, null));
+
+ attrib.literal = true;
+ }
+
+ private Hashtable attributeHashtable = new Hashtable();
+
+ private static AttributeTable defaultAttributeTable = null;
+
+ private static Attribute[] attrs = {
+
+ new Attribute( "abbr", Dict.VERS_HTML40, null ),
+ new Attribute( "accept-charset", Dict.VERS_HTML40, null ),
+ new Attribute( "accept", Dict.VERS_ALL, null ),
+ new Attribute( "accesskey", Dict.VERS_HTML40, null ),
+ new Attribute( "action", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ),
+ new Attribute( "add_date", Dict.VERS_NETSCAPE, null ), /* A */
+ new Attribute( "align", Dict.VERS_ALL, AttrCheckImpl.getCheckAlign() ), /* set varies with element */
+ new Attribute( "alink", Dict.VERS_LOOSE, null ),
+ new Attribute( "alt", Dict.VERS_ALL, null ),
+ new Attribute( "archive", Dict.VERS_HTML40, null ), /* space or comma separated list */
+ new Attribute( "axis", Dict.VERS_HTML40, null ),
+ new Attribute( "background", Dict.VERS_LOOSE, AttrCheckImpl.getCheckUrl() ),
+ new Attribute( "bgcolor", Dict.VERS_LOOSE, null ),
+ new Attribute( "bgproperties", Dict.VERS_PROPRIETARY, null ), /* BODY "fixed" fixes background */
+ new Attribute( "border", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* like LENGTH + "border" */
+ new Attribute( "bordercolor", Dict.VERS_MICROSOFT, null ), /* used on TABLE */
+ new Attribute( "bottommargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
+ new Attribute( "cellpadding", Dict.VERS_FROM32, null ), /* % or pixel values */
+ new Attribute( "cellspacing", Dict.VERS_FROM32, null ),
+ new Attribute( "char", Dict.VERS_HTML40, null ),
+ new Attribute( "charoff", Dict.VERS_HTML40, null ),
+ new Attribute( "charset", Dict.VERS_HTML40, null ),
+ new Attribute( "checked", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* i.e. "checked" or absent */
+ new Attribute( "cite", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ),
+ new Attribute( "class", Dict.VERS_HTML40, null ),
+ new Attribute( "classid", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ),
+ new Attribute( "clear", Dict.VERS_LOOSE, null ), /* BR: left, right, all */
+ new Attribute( "code", Dict.VERS_LOOSE, null ), /* APPLET */
+ new Attribute( "codebase", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */
+ new Attribute( "codetype", Dict.VERS_HTML40, null ), /* OBJECT */
+ new Attribute( "color", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */
+ new Attribute( "cols", Dict.VERS_IFRAMES, null ), /* TABLE & FRAMESET */
+ new Attribute( "colspan", Dict.VERS_FROM32, null ),
+ new Attribute( "compact", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* lists */
+ new Attribute( "content", Dict.VERS_ALL, null ), /* META */
+ new Attribute( "coords", Dict.VERS_FROM32, null ), /* AREA, A */
+ new Attribute( "data", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */
+ new Attribute( "datafld", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */
+ new Attribute( "dataformatas", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */
+ new Attribute( "datapagesize", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */
+ new Attribute( "datasrc", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckUrl() ), /* used on TABLE */
+ new Attribute( "datetime", Dict.VERS_HTML40, null ), /* INS, DEL */
+ new Attribute( "declare", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* OBJECT */
+ new Attribute( "defer", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* SCRIPT */
+ new Attribute( "dir", Dict.VERS_HTML40, null ), /* ltr or rtl */
+ new Attribute( "disabled", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */
+ new Attribute( "enctype", Dict.VERS_ALL, null ), /* FORM */
+ new Attribute( "face", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */
+ new Attribute( "for", Dict.VERS_HTML40, null ), /* LABEL */
+ new Attribute( "frame", Dict.VERS_HTML40, null ), /* TABLE */
+ new Attribute( "frameborder", Dict.VERS_FRAMES, null ), /* 0 or 1 */
+ new Attribute( "framespacing", Dict.VERS_PROPRIETARY, null ), /* pixel value */
+ new Attribute( "gridx", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive*/
+ new Attribute( "gridy", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive */
+ new Attribute( "headers", Dict.VERS_HTML40, null ), /* table cells */
+ new Attribute( "height", Dict.VERS_ALL, null ), /* pixels only for TH/TD */
+ new Attribute( "href", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ), /* A, AREA, LINK and BASE */
+ new Attribute( "hreflang", Dict.VERS_HTML40, null ), /* A, LINK */
+ new Attribute( "hspace", Dict.VERS_ALL, null ), /* APPLET, IMG, OBJECT */
+ new Attribute( "http-equiv", Dict.VERS_ALL, null ), /* META */
+ new Attribute( "id", Dict.VERS_HTML40, AttrCheckImpl.getCheckId() ),
+ new Attribute( "ismap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* IMG */
+ new Attribute( "label", Dict.VERS_HTML40, null ), /* OPT, OPTGROUP */
+ new Attribute( "lang", Dict.VERS_HTML40, null ),
+ new Attribute( "language", Dict.VERS_LOOSE, null ), /* SCRIPT */
+ new Attribute( "last_modified", Dict.VERS_NETSCAPE, null ), /* A */
+ new Attribute( "last_visit", Dict.VERS_NETSCAPE, null ), /* A */
+ new Attribute( "leftmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
+ new Attribute( "link", Dict.VERS_LOOSE, null ), /* BODY */
+ new Attribute( "longdesc", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* IMG */
+ new Attribute( "lowsrc", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckUrl() ), /* IMG */
+ new Attribute( "marginheight", Dict.VERS_IFRAMES, null ), /* FRAME, IFRAME, BODY */
+ new Attribute( "marginwidth", Dict.VERS_IFRAMES, null ), /* ditto */
+ new Attribute( "maxlength", Dict.VERS_ALL, null ), /* INPUT */
+ new Attribute( "media", Dict.VERS_HTML40, null ), /* STYLE, LINK */
+ new Attribute( "method", Dict.VERS_ALL, null ), /* FORM: get or post */
+ new Attribute( "multiple", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* SELECT */
+ new Attribute( "name", Dict.VERS_ALL, AttrCheckImpl.getCheckName() ),
+ new Attribute( "nohref", Dict.VERS_FROM32, AttrCheckImpl.getCheckBool() ), /* AREA */
+ new Attribute( "noresize", Dict.VERS_FRAMES, AttrCheckImpl.getCheckBool() ), /* FRAME */
+ new Attribute( "noshade", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* HR */
+ new Attribute( "nowrap", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* table cells */
+ new Attribute( "object", Dict.VERS_HTML40_LOOSE, null ), /* APPLET */
+ new Attribute( "onblur", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onchange", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "ondblclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onkeydown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onkeypress", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onkeyup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onmousedown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onmousemove", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onmouseout", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onmouseover", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onmouseup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onsubmit", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onreset", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onselect", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onunload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
+ new Attribute( "onafterupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
+ new Attribute( "onbeforeupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
+ new Attribute( "onerrorupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
+ new Attribute( "onrowenter", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
+ new Attribute( "onrowexit", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
+ new Attribute( "onbeforeunload", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
+ new Attribute( "ondatasetchanged", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */
+ new Attribute( "ondataavailable", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */
+ new Attribute( "ondatasetcomplete",Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */
+ new Attribute( "profile", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* HEAD */
+ new Attribute( "prompt", Dict.VERS_LOOSE, null ), /* ISINDEX */
+ new Attribute( "readonly", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */
+ new Attribute( "rel", Dict.VERS_ALL, null ), /* A, LINK */
+ new Attribute( "rev", Dict.VERS_ALL, null ), /* A, LINK */
+ new Attribute( "rightmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
+ new Attribute( "rows", Dict.VERS_ALL, null ), /* TEXTAREA */
+ new Attribute( "rowspan", Dict.VERS_ALL, null ), /* table cells */
+ new Attribute( "rules", Dict.VERS_HTML40, null ), /* TABLE */
+ new Attribute( "scheme", Dict.VERS_HTML40, null ), /* META */
+ new Attribute( "scope", Dict.VERS_HTML40, null ), /* table cells */
+ new Attribute( "scrolling", Dict.VERS_IFRAMES, null ), /* yes, no or auto */
+ new Attribute( "selected", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* OPTION */
+ new Attribute( "shape", Dict.VERS_FROM32, null ), /* AREA, A */
+ new Attribute( "showgrid", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive */
+ new Attribute( "showgridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/
+ new Attribute( "showgridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/
+ new Attribute( "size", Dict.VERS_LOOSE, null ), /* HR, FONT, BASEFONT, SELECT */
+ new Attribute( "span", Dict.VERS_HTML40, null ), /* COL, COLGROUP */
+ new Attribute( "src", (short)(Dict.VERS_ALL | Dict.VERS_FRAMES), AttrCheckImpl.getCheckUrl() ), /* IMG, FRAME, IFRAME */
+ new Attribute( "standby", Dict.VERS_HTML40, null ), /* OBJECT */
+ new Attribute( "start", Dict.VERS_ALL, null ), /* OL */
+ new Attribute( "style", Dict.VERS_HTML40, null ),
+ new Attribute( "summary", Dict.VERS_HTML40, null ), /* TABLE */
+ new Attribute( "tabindex", Dict.VERS_HTML40, null ), /* fields, OBJECT and A */
+ new Attribute( "target", Dict.VERS_HTML40, null ), /* names a frame/window */
+ new Attribute( "text", Dict.VERS_LOOSE, null ), /* BODY */
+ new Attribute( "title", Dict.VERS_HTML40, null ), /* text tool tip */
+ new Attribute( "topmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
+ new Attribute( "type", Dict.VERS_FROM32, null ), /* also used by SPACER */
+ new Attribute( "usemap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* things with images */
+ new Attribute( "valign", Dict.VERS_FROM32, AttrCheckImpl.getCheckValign() ),
+ new Attribute( "value", Dict.VERS_ALL, null ), /* OPTION, PARAM */
+ new Attribute( "valuetype", Dict.VERS_HTML40, null ), /* PARAM: data, ref, object */
+ new Attribute( "version", Dict.VERS_ALL, null ), /* HTML */
+ new Attribute( "vlink", Dict.VERS_LOOSE, null ), /* BODY */
+ new Attribute( "vspace", Dict.VERS_LOOSE, null ), /* IMG, OBJECT, APPLET */
+ new Attribute( "width", Dict.VERS_ALL, null ), /* pixels only for TD/TH */
+ new Attribute( "wrap", Dict.VERS_NETSCAPE, null ), /* textarea */
+ new Attribute( "xml:lang", Dict.VERS_XML, null ), /* XML language */
+ new Attribute( "xmlns", Dict.VERS_ALL, null ), /* name space */
+
+ };
+
+ public static Attribute attrHref = null;
+ public static Attribute attrSrc = null;
+ public static Attribute attrId = null;
+ public static Attribute attrName = null;
+ public static Attribute attrSummary = null;
+ public static Attribute attrAlt = null;
+ public static Attribute attrLongdesc = null;
+ public static Attribute attrUsemap = null;
+ public static Attribute attrIsmap = null;
+ public static Attribute attrLanguage = null;
+ public static Attribute attrType = null;
+ public static Attribute attrTitle = null;
+ public static Attribute attrXmlns = null;
+ public static Attribute attrValue = null;
+ public static Attribute attrContent = null;
+ public static Attribute attrDatafld = null;
+ public static Attribute attrWidth = null;
+ public static Attribute attrHeight = null;
+
+ public static AttributeTable getDefaultAttributeTable()
+ {
+ if ( defaultAttributeTable == null ) {
+ defaultAttributeTable = new AttributeTable();
+ for ( int i = 0; i < attrs.length; i++ ) {
+ defaultAttributeTable.install( attrs[i] );
+ }
+ attrHref = defaultAttributeTable.lookup("href");
+ attrSrc = defaultAttributeTable.lookup("src");
+ attrId = defaultAttributeTable.lookup("id");
+ attrName = defaultAttributeTable.lookup("name");
+ attrSummary = defaultAttributeTable.lookup("summary");
+ attrAlt = defaultAttributeTable.lookup("alt");
+ attrLongdesc = defaultAttributeTable.lookup("longdesc");
+ attrUsemap = defaultAttributeTable.lookup("usemap");
+ attrIsmap = defaultAttributeTable.lookup("ismap");
+ attrLanguage = defaultAttributeTable.lookup("language");
+ attrType = defaultAttributeTable.lookup("type");
+ attrTitle = defaultAttributeTable.lookup("title");
+ attrXmlns = defaultAttributeTable.lookup("xmlns");
+ attrValue = defaultAttributeTable.lookup("value");
+ attrContent = defaultAttributeTable.lookup("content");
+ attrDatafld = defaultAttributeTable.lookup("datafld");;
+ attrWidth = defaultAttributeTable.lookup("width");;
+ attrHeight = defaultAttributeTable.lookup("height");;
+
+ attrAlt.nowrap = true;
+ attrValue.nowrap = true;
+ attrContent.nowrap = true;
+ }
+ return defaultAttributeTable;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)CheckAttribs.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Check HTML attributes
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public interface CheckAttribs {
+
+ public void check( Lexer lexer, Node node );
+
+}
+
--- /dev/null
+/*
+ * @(#)CheckAttribsImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Check HTML attributes implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class CheckAttribsImpl {
+
+ public static class CheckHTML implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ AttVal attval;
+ Attribute attribute;
+
+ node.checkUniqueAttributes(lexer);
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attribute = attval.checkAttribute(lexer, node );
+
+ if (attribute == AttributeTable.attrXmlns)
+ lexer.isvoyager = true;
+ }
+ }
+
+ };
+
+ public static class CheckSCRIPT implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ Attribute attribute;
+ AttVal lang, type;
+
+ node.checkUniqueAttributes(lexer);
+
+ lang = node.getAttrByName("language");
+ type = node.getAttrByName("type");
+
+ if (type == null)
+ {
+ Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
+
+ /* check for javascript */
+
+ if (lang != null)
+ {
+ String str = lang.value;
+ if (str.length() > 10)
+ str = str.substring(0, 10);
+ if ( (Lexer.wstrcasecmp(str, "javascript") == 0) ||
+ (Lexer.wstrcasecmp(str, "jscript") == 0) )
+ {
+ node.addAttribute("type", "text/javascript");
+ }
+ }
+ else
+ node.addAttribute("type", "text/javascript");
+ }
+ }
+
+ };
+
+ public static class CheckTABLE implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ AttVal attval;
+ Attribute attribute;
+ boolean hasSummary = false;
+
+ node.checkUniqueAttributes(lexer);
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attribute = attval.checkAttribute(lexer, node);
+
+ if (attribute == AttributeTable.attrSummary)
+ hasSummary = true;
+ }
+
+ /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */
+ if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32)
+ {
+ lexer.badAccess |= Report.MISSING_SUMMARY;
+ Report.attrError(lexer, node, "summary", Report.MISSING_ATTRIBUTE);
+ }
+
+ /* convert <table border> to <table border="1"> */
+ if (lexer.configuration.XmlOut)
+ {
+ attval = node.getAttrByName("border");
+ if (attval != null)
+ {
+ if (attval.value == null)
+ attval.value = "1";
+ }
+ }
+ }
+
+ };
+
+ public static class CheckCaption implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ AttVal attval;
+ String value = null;
+
+ node.checkUniqueAttributes(lexer);
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ if ( Lexer.wstrcasecmp(attval.attribute, "align") == 0 )
+ {
+ value = attval.value;
+ break;
+ }
+ }
+
+ if (value != null)
+ {
+ if (Lexer.wstrcasecmp(value, "left") == 0 || Lexer.wstrcasecmp(value, "right") == 0)
+ lexer.versions &= (short)(Dict.VERS_HTML40_LOOSE|Dict.VERS_FRAMES);
+ else if (Lexer.wstrcasecmp(value, "top") == 0 || Lexer.wstrcasecmp(value, "bottom") == 0)
+ lexer.versions &= Dict.VERS_FROM32;
+ else
+ Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+
+ };
+
+ public static class CheckHR implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ if (node.getAttrByName("src") != null)
+ Report.attrError(lexer, node, "src", Report.PROPRIETARY_ATTR_VALUE);
+ }
+ };
+
+ public static class CheckIMG implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ AttVal attval;
+ Attribute attribute;
+ boolean hasAlt = false;
+ boolean hasSrc = false;
+ boolean hasUseMap = false;
+ boolean hasIsMap = false;
+ boolean hasDataFld = false;
+
+ node.checkUniqueAttributes(lexer);
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attribute = attval.checkAttribute( lexer, node );
+
+ if (attribute == AttributeTable.attrAlt)
+ hasAlt = true;
+ else if (attribute == AttributeTable.attrSrc)
+ hasSrc = true;
+ else if (attribute == AttributeTable.attrUsemap)
+ hasUseMap = true;
+ else if (attribute == AttributeTable.attrIsmap)
+ hasIsMap = true;
+ else if (attribute == AttributeTable.attrDatafld)
+ hasDataFld = true;
+ else if (attribute == AttributeTable.attrWidth ||
+ attribute == AttributeTable.attrHeight)
+ lexer.versions &= ~Dict.VERS_HTML20;
+ }
+
+ if (!hasAlt)
+ {
+ lexer.badAccess |= Report.MISSING_IMAGE_ALT;
+ Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE);
+ if (lexer.configuration.altText != null)
+ node.addAttribute("alt", lexer.configuration.altText);
+ }
+
+ if (!hasSrc && !hasDataFld)
+ Report.attrError(lexer, node, "src", Report.MISSING_ATTRIBUTE);
+
+ if (hasIsMap && !hasUseMap)
+ Report.attrError(lexer, node, "ismap", Report.MISSING_IMAGEMAP);
+ }
+
+ };
+
+ public static class CheckAREA implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ AttVal attval;
+ Attribute attribute;
+ boolean hasAlt = false;
+ boolean hasHref = false;
+
+ node.checkUniqueAttributes(lexer);
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attribute = attval.checkAttribute( lexer, node );
+
+ if (attribute == AttributeTable.attrAlt)
+ hasAlt = true;
+ else if (attribute == AttributeTable.attrHref)
+ hasHref = true;
+ }
+
+ if (!hasAlt)
+ {
+ lexer.badAccess |= Report.MISSING_LINK_ALT;
+ Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE);
+ }
+ if (!hasHref)
+ Report.attrError(lexer, node, "href", Report.MISSING_ATTRIBUTE);
+ }
+
+ };
+
+ public static class CheckAnchor implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ node.checkUniqueAttributes(lexer);
+
+ lexer.fixId(node);
+ }
+ };
+
+ public static class CheckMap implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ node.checkUniqueAttributes(lexer);
+
+ lexer.fixId(node);
+ }
+ }
+
+ public static class CheckSTYLE implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ AttVal type = node.getAttrByName("type");
+
+ node.checkUniqueAttributes(lexer);
+
+ if (type == null)
+ {
+ Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
+
+ node.addAttribute("type", "text/css");
+ }
+ }
+ }
+
+ public static class CheckTableCell implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ node.checkUniqueAttributes(lexer);
+
+ /*
+ HTML4 strict doesn't allow mixed content for
+ elements with %block; as their content model
+ */
+ if (node.getAttrByName("width") != null || node.getAttrByName("height") != null)
+ lexer.versions &= ~Dict.VERS_HTML40_STRICT;
+ }
+ }
+
+ /* add missing type attribute when appropriate */
+ public static class CheckLINK implements CheckAttribs {
+
+ public void check( Lexer lexer, Node node )
+ {
+ AttVal rel = node.getAttrByName("rel");
+
+ node.checkUniqueAttributes(lexer);
+
+ if (rel != null && rel.value != null &&
+ rel.value.equals("stylesheet"))
+ {
+ AttVal type = node.getAttrByName("type");
+
+ if (type == null)
+ {
+ Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
+
+ node.addAttribute("type", "text/css");
+ }
+ }
+ }
+ }
+
+ public static CheckAttribs getCheckHTML()
+ {
+ return _checkHTML;
+ }
+
+ public static CheckAttribs getCheckSCRIPT()
+ {
+ return _checkSCRIPT;
+ }
+
+ public static CheckAttribs getCheckTABLE()
+ {
+ return _checkTABLE;
+ }
+
+ public static CheckAttribs getCheckCaption()
+ {
+ return _checkCaption;
+ }
+
+ public static CheckAttribs getCheckIMG()
+ {
+ return _checkIMG;
+ }
+
+ public static CheckAttribs getCheckAREA()
+ {
+ return _checkAREA;
+ }
+
+ public static CheckAttribs getCheckAnchor()
+ {
+ return _checkAnchor;
+ }
+
+ public static CheckAttribs getCheckMap()
+ {
+ return _checkMap;
+ }
+
+ public static CheckAttribs getCheckSTYLE()
+ {
+ return _checkStyle;
+ }
+
+ public static CheckAttribs getCheckTableCell()
+ {
+ return _checkTableCell;
+ }
+
+ public static CheckAttribs getCheckLINK()
+ {
+ return _checkLINK;
+ }
+
+ public static CheckAttribs getCheckHR()
+ {
+ return _checkHR;
+ }
+
+
+ private static CheckAttribs _checkHTML = new CheckHTML();
+ private static CheckAttribs _checkSCRIPT = new CheckSCRIPT();
+ private static CheckAttribs _checkTABLE = new CheckTABLE();
+ private static CheckAttribs _checkCaption = new CheckCaption();
+ private static CheckAttribs _checkIMG = new CheckIMG();
+ private static CheckAttribs _checkAREA = new CheckAREA();
+ private static CheckAttribs _checkAnchor = new CheckAnchor();
+ private static CheckAttribs _checkMap = new CheckMap();
+ private static CheckAttribs _checkStyle = new CheckSTYLE();
+ private static CheckAttribs _checkTableCell = new CheckTableCell();
+ private static CheckAttribs _checkLINK = new CheckLINK();
+ private static CheckAttribs _checkHR = new CheckHR();
+
+}
--- /dev/null
+/*
+ * @(#)Clean.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Clean up misuse of presentation markup
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+ Filters from other formats such as Microsoft Word
+ often make excessive use of presentation markup such
+ as font tags, B, I, and the align attribute. By applying
+ a set of production rules, it is straight forward to
+ transform this to use CSS.
+
+ Some rules replace some of the children of an element by
+ style properties on the element, e.g.
+
+ <p><b>...</b></p> -> <p style="font-weight: bold">...</p>
+
+ Such rules are applied to the element's content and then
+ to the element itself until none of the rules more apply.
+ Having applied all the rules to an element, it will have
+ a style attribute with one or more properties.
+
+ Other rules strip the element they apply to, replacing
+ it by style properties on the contents, e.g.
+
+ <dir><li><p>...</li></dir> -> <p style="margin-left 1em">...
+
+ These rules are applied to an element before processing
+ its content and replace the current element by the first
+ element in the exposed content.
+
+ After applying both sets of rules, you can replace the
+ style attribute by a class value and style rule in the
+ document head. To support this, an association of styles
+ and class names is built.
+
+ A naive approach is to rely on string matching to test
+ when two property lists are the same. A better approach
+ would be to first sort the properties before matching.
+*/
+
+public class Clean {
+
+ private int classNum = 1;
+
+ private TagTable tt;
+
+ public Clean(TagTable tt) {
+ this.tt = tt;
+ }
+
+ private StyleProp insertProperty(StyleProp props, String name, String value) {
+ StyleProp first, prev, prop;
+ int cmp;
+
+ prev = null;
+ first = props;
+
+ while (props != null) {
+ cmp = props.name.compareTo(name);
+
+ if (cmp == 0) {
+ /* this property is already defined, ignore new value */
+ return first;
+ }
+
+ if (cmp > 0) // props.name > name
+ {
+ /* insert before this */
+
+ prop = new StyleProp(name, value, props);
+
+ if (prev != null)
+ prev.next = prop;
+ else
+ first = prop;
+
+ return first;
+ }
+
+ prev = props;
+ props = props.next;
+ }
+
+ prop = new StyleProp(name, value);
+
+ if (prev != null)
+ prev.next = prop;
+ else
+ first = prop;
+
+ return first;
+ }
+
+ /*
+ Create sorted linked list of properties from style string
+ It temporarily places nulls in place of ':' and ';' to
+ delimit the strings for the property name and value.
+ Some systems don't allow you to null literal strings,
+ so to avoid this, a copy is made first.
+ */
+ private StyleProp createProps(StyleProp prop, String style) {
+ int name_end;
+ int value_end;
+ int value_start = 0;
+ int name_start = 0;
+ boolean more;
+
+ name_start = 0;
+ while (name_start < style.length()) {
+ while (name_start < style.length() && style.charAt(name_start) == ' ')
+ ++name_start;
+
+ name_end = name_start;
+
+ while (name_end < style.length()) {
+ if (style.charAt(name_end) == ':') {
+ value_start = name_end + 1;
+ break;
+ }
+
+ ++name_end;
+ }
+
+ if (name_end >= style.length() || style.charAt(name_end) != ':')
+ break;
+
+ while (value_start < style.length() && style.charAt(value_start) == ' ')
+ ++value_start;
+
+ value_end = value_start;
+ more = false;
+
+ while (value_end < style.length()) {
+ if (style.charAt(value_end) == ';') {
+ more = true;
+ break;
+ }
+
+ ++value_end;
+ }
+
+ prop = insertProperty(prop, style.substring(name_start, name_end), style.substring(value_start, value_end));
+
+ if (more) {
+ name_start = value_end + 1;
+ continue;
+ }
+
+ break;
+ }
+
+ return prop;
+ }
+
+ private String createPropString(StyleProp props) {
+ String style = "";
+ int len;
+ StyleProp prop;
+
+ /* compute length */
+
+ for (len = 0, prop = props; prop != null; prop = prop.next) {
+ len += prop.name.length() + 2;
+ len += prop.value.length() + 2;
+ }
+
+ for (prop = props; prop != null; prop = prop.next) {
+ style = style.concat(prop.name);
+ style = style.concat(": ");
+
+ style = style.concat(prop.value);
+
+ if (prop.next == null)
+ break;
+
+ style = style.concat("; ");
+ }
+
+ return style;
+ }
+
+ /*
+ create string with merged properties
+ */
+ private String addProperty(String style, String property) {
+ StyleProp prop;
+
+ prop = createProps(null, style);
+ prop = createProps(prop, property);
+ style = createPropString(prop);
+ return style;
+ }
+
+ private String gensymClass(String tag) {
+ String str;
+
+ str = "c" + classNum;
+ classNum++;
+ return str;
+ }
+
+ private String findStyle(Lexer lexer, String tag, String properties) {
+ Style style;
+
+ for (style = lexer.styles; style != null; style = style.next) {
+ if (style.tag.equals(tag) && style.properties.equals(properties))
+ return style.tagClass;
+ }
+
+ style = new Style(tag, gensymClass(tag), properties, lexer.styles);
+ lexer.styles = style;
+ return style.tagClass;
+ }
+
+ /*
+ Find style attribute in node, and replace it
+ by corresponding class attribute. Search for
+ class in style dictionary otherwise gensym
+ new class and add to dictionary.
+
+ Assumes that node doesn't have a class attribute
+ */
+ private void style2Rule(Lexer lexer, Node node) {
+ AttVal styleattr, classattr;
+ String classname;
+
+ styleattr = node.getAttrByName("style");
+
+ if (styleattr != null) {
+ classname = findStyle(lexer, node.element, styleattr.value);
+ classattr = node.getAttrByName("class");
+
+ /*
+ if there already is a class attribute
+ then append class name after a space
+ */
+ if (classattr != null) {
+ classattr.value = classattr.value + " " + classname;
+ node.removeAttribute(styleattr);
+ } else /* reuse style attribute for class attribute */ {
+ styleattr.attribute = "class";
+ styleattr.value = classname;
+ }
+ }
+ }
+
+ private void addColorRule(Lexer lexer, String selector, String color) {
+ if (color != null) {
+ lexer.addStringLiteral(selector);
+ lexer.addStringLiteral(" { color: ");
+ lexer.addStringLiteral(color);
+ lexer.addStringLiteral(" }\n");
+ }
+ }
+
+ /*
+ move presentation attribs from body to style element
+
+ background="foo" -> body { background-image: url(foo) }
+ bgcolor="foo" -> body { background-color: foo }
+ text="foo" -> body { color: foo }
+ link="foo" -> :link { color: foo }
+ vlink="foo" -> :visited { color: foo }
+ alink="foo" -> :active { color: foo }
+ */
+ private void cleanBodyAttrs(Lexer lexer, Node body) {
+ AttVal attr;
+ String bgurl = null;
+ String bgcolor = null;
+ String color = null;
+
+ attr = body.getAttrByName("background");
+
+ if (attr != null) {
+ bgurl = attr.value;
+ attr.value = null;
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("bgcolor");
+
+ if (attr != null) {
+ bgcolor = attr.value;
+ attr.value = null;
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("text");
+
+ if (attr != null) {
+ color = attr.value;
+ attr.value = null;
+ body.removeAttribute(attr);
+ }
+
+ if (bgurl != null || bgcolor != null || color != null) {
+ lexer.addStringLiteral(" body {\n");
+
+ if (bgurl != null) {
+ lexer.addStringLiteral(" background-image: url(");
+ lexer.addStringLiteral(bgurl);
+ lexer.addStringLiteral(");\n");
+ }
+
+ if (bgcolor != null) {
+ lexer.addStringLiteral(" background-color: ");
+ lexer.addStringLiteral(bgcolor);
+ lexer.addStringLiteral(";\n");
+ }
+
+ if (color != null) {
+ lexer.addStringLiteral(" color: ");
+ lexer.addStringLiteral(color);
+ lexer.addStringLiteral(";\n");
+ }
+
+ lexer.addStringLiteral(" }\n");
+ }
+
+ attr = body.getAttrByName("link");
+
+ if (attr != null) {
+ addColorRule(lexer, " :link", attr.value);
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("vlink");
+
+ if (attr != null) {
+ addColorRule(lexer, " :visited", attr.value);
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("alink");
+
+ if (attr != null) {
+ addColorRule(lexer, " :active", attr.value);
+ body.removeAttribute(attr);
+ }
+ }
+
+ private boolean niceBody(Lexer lexer, Node doc) {
+ Node body = doc.findBody(lexer.configuration.tt);
+
+ if (body != null) {
+ if (body.getAttrByName("background") != null
+ || body.getAttrByName("bgcolor") != null
+ || body.getAttrByName("text") != null
+ || body.getAttrByName("link") != null
+ || body.getAttrByName("vlink") != null
+ || body.getAttrByName("alink") != null) {
+ lexer.badLayout |= Report.USING_BODY;
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /* create style element using rules from dictionary */
+ private void createStyleElement(Lexer lexer, Node doc) {
+ Node node, head, body;
+ Style style;
+ AttVal av;
+
+ if (lexer.styles == null && niceBody(lexer, doc))
+ return;
+
+ node = lexer.newNode(Node.StartTag, null, 0, 0, "style");
+ node.implicit = true;
+
+ /* insert type attribute */
+ av = new AttVal(null, null, '"', "type", "text/css");
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ node.attributes = av;
+
+ body = doc.findBody(lexer.configuration.tt);
+
+ lexer.txtstart = lexer.lexsize;
+
+ if (body != null)
+ cleanBodyAttrs(lexer, body);
+
+ for (style = lexer.styles; style != null; style = style.next) {
+ lexer.addCharToLexer(' ');
+ lexer.addStringLiteral(style.tag);
+ lexer.addCharToLexer('.');
+ lexer.addStringLiteral(style.tagClass);
+ lexer.addCharToLexer(' ');
+ lexer.addCharToLexer('{');
+ lexer.addStringLiteral(style.properties);
+ lexer.addCharToLexer('}');
+ lexer.addCharToLexer('\n');
+ }
+
+ lexer.txtend = lexer.lexsize;
+
+ Node.insertNodeAtEnd(node, lexer.newNode(Node.TextNode, lexer.lexbuf, lexer.txtstart, lexer.txtend));
+
+ /*
+ now insert style element into document head
+
+ doc is root node. search its children for html node
+ the head node should be first child of html node
+ */
+
+ head = doc.findHEAD(lexer.configuration.tt);
+
+ if (head != null)
+ Node.insertNodeAtEnd(head, node);
+ }
+
+ /* ensure bidirectional links are consistent */
+ private void fixNodeLinks(Node node) {
+ Node child;
+
+ if (node.prev != null)
+ node.prev.next = node;
+ else
+ node.parent.content = node;
+
+ if (node.next != null)
+ node.next.prev = node;
+ else
+ node.parent.last = node;
+
+ for (child = node.content; child != null; child = child.next)
+ child.parent = node;
+ }
+
+ /*
+ used to strip child of node when
+ the node has one and only one child
+ */
+ private void stripOnlyChild(Node node) {
+ Node child;
+
+ child = node.content;
+ node.content = child.content;
+ node.last = child.last;
+ child.content = null;
+
+ for (child = node.content; child != null; child = child.next)
+ child.parent = node;
+ }
+
+ /* used to strip font start and end tags */
+ private void discardContainer(Node element, MutableObject pnode) {
+ Node node;
+ Node parent = element.parent;
+
+ if (element.content != null) {
+ element.last.next = element.next;
+
+ if (element.next != null) {
+ element.next.prev = element.last;
+ element.last.next = element.next;
+ } else
+ parent.last = element.last;
+
+ if (element.prev != null) {
+ element.content.prev = element.prev;
+ element.prev.next = element.content;
+ } else
+ parent.content = element.content;
+
+ for (node = element.content; node != null; node = node.next)
+ node.parent = parent;
+
+ pnode.setObject(element.content);
+ } else {
+ if (element.next != null)
+ element.next.prev = element.prev;
+ else
+ parent.last = element.prev;
+
+ if (element.prev != null)
+ element.prev.next = element.next;
+ else
+ parent.content = element.next;
+
+ pnode.setObject(element.next);
+ }
+
+ element.next = null;
+ element.content = null;
+ }
+
+ /*
+ Add style property to element, creating style
+ attribute as needed and adding ; delimiter
+ */
+ private void addStyleProperty(Node node, String property) {
+ AttVal av;
+
+ for (av = node.attributes; av != null; av = av.next) {
+ if (av.attribute.equals("style"))
+ break;
+ }
+
+ /* if style attribute already exists then insert property */
+
+ if (av != null) {
+ String s;
+
+ s = addProperty(av.value, property);
+ av.value = s;
+ } else /* else create new style attribute */ {
+ av = new AttVal(node.attributes, null, '"', "style", property);
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ node.attributes = av;
+ }
+ }
+
+ /*
+ Create new string that consists of the
+ combined style properties in s1 and s2
+
+ To merge property lists, we build a linked
+ list of property/values and insert properties
+ into the list in order, merging values for
+ the same property name.
+ */
+ private String mergeProperties(String s1, String s2) {
+ String s;
+ StyleProp prop;
+
+ prop = createProps(null, s1);
+ prop = createProps(prop, s2);
+ s = createPropString(prop);
+ return s;
+ }
+
+ private void mergeStyles(Node node, Node child) {
+ AttVal av;
+ String s1, s2, style;
+
+ for (s2 = null, av = child.attributes; av != null; av = av.next) {
+ if (av.attribute.equals("style")) {
+ s2 = av.value;
+ break;
+ }
+ }
+
+ for (s1 = null, av = node.attributes; av != null; av = av.next) {
+ if (av.attribute.equals("style")) {
+ s1 = av.value;
+ break;
+ }
+ }
+
+ if (s1 != null) {
+ if (s2 != null) /* merge styles from both */ {
+ style = mergeProperties(s1, s2);
+ av.value = style;
+ }
+ } else if (s2 != null) /* copy style of child */ {
+ av = new AttVal(node.attributes, null, '"', "style", s2);
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ node.attributes = av;
+ }
+ }
+
+ private String fontSize2Name(String size) {
+ /*
+ String[] sizes =
+ {
+ "50%",
+ "60%",
+ "80%",
+ null,
+ "120%",
+ "150%",
+ "200%"
+ };
+ */
+
+ String[] sizes = { "60%", "70%", "80%", null, "120%", "150%", "200%" };
+ String buf;
+
+ if (size.length() > 0 && '0' <= size.charAt(0) && size.charAt(0) <= '6') {
+ int n = size.charAt(0) - '0';
+ return sizes[n];
+ }
+
+ if (size.length() > 0 && size.charAt(0) == '-') {
+ if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6') {
+ int n = size.charAt(1) - '0';
+ double x;
+
+ for (x = 1.0; n > 0; --n)
+ x *= 0.8;
+
+ x *= 100.0;
+ buf = "" + (int) x + "%";
+
+ return buf;
+ }
+
+ return "smaller"; /*"70%"; */
+ }
+
+ if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6') {
+ int n = size.charAt(1) - '0';
+ double x;
+
+ for (x = 1.0; n > 0; --n)
+ x *= 1.2;
+
+ x *= 100.0;
+ buf = "" + (int) x + "%";
+
+ return buf;
+ }
+
+ return "larger"; /* "140%" */
+ }
+
+ private void addFontFace(Node node, String face) {
+ addStyleProperty(node, "font-family: " + face);
+ }
+
+ private void addFontSize(Node node, String size) {
+ String value;
+
+ if (size.equals("6") && node.tag == tt.tagP) {
+ node.element = "h1";
+ tt.findTag(node);
+ return;
+ }
+
+ if (size.equals("5") && node.tag == tt.tagP) {
+ node.element = "h2";
+ tt.findTag(node);
+ return;
+ }
+
+ if (size.equals("4") && node.tag == tt.tagP) {
+ node.element = "h3";
+ tt.findTag(node);
+ return;
+ }
+
+ value = fontSize2Name(size);
+
+ if (value != null) {
+ addStyleProperty(node, "font-size: " + value);
+ }
+ }
+
+ private void addFontColor(Node node, String color) {
+ addStyleProperty(node, "color: " + color);
+ }
+
+ private void addAlign(Node node, String align) {
+ /* force alignment value to lower case */
+ addStyleProperty(node, "text-align: " + align.toLowerCase());
+ }
+
+ /*
+ add style properties to node corresponding to
+ the font face, size and color attributes
+ */
+ private void addFontStyles(Node node, AttVal av) {
+ while (av != null) {
+ if (av.attribute.equals("face"))
+ addFontFace(node, av.value);
+ else if (av.attribute.equals("size"))
+ addFontSize(node, av.value);
+ else if (av.attribute.equals("color"))
+ addFontColor(node, av.value);
+
+ av = av.next;
+ }
+ }
+
+ /*
+ Symptom: <p align=center>
+ Action: <p style="text-align: center">
+ */
+ private void textAlign(Lexer lexer, Node node) {
+ AttVal av, prev;
+
+ prev = null;
+
+ for (av = node.attributes; av != null; av = av.next) {
+ if (av.attribute.equals("align")) {
+ if (prev != null)
+ prev.next = av.next;
+ else
+ node.attributes = av.next;
+
+ if (av.value != null) {
+ addAlign(node, av.value);
+ }
+
+ break;
+ }
+
+ prev = av;
+ }
+ }
+
+ /*
+ The clean up rules use the pnode argument to return the
+ next node when the orignal node has been deleted
+ */
+
+ /*
+ Symptom: <dir> <li> where <li> is only child
+ Action: coerce <dir> <li> to <div> with indent.
+ */
+
+ private boolean dir2Div(Lexer lexer, Node node, MutableObject pnode) {
+ Node child;
+
+ if (node.tag == tt.tagDir || node.tag == tt.tagUl || node.tag == tt.tagOl) {
+ child = node.content;
+
+ if (child == null)
+ return false;
+
+ /* check child has no peers */
+
+ if (child.next != null)
+ return false;
+
+ if (child.tag != tt.tagLi)
+ return false;
+
+ if (!child.implicit)
+ return false;
+
+ /* coerce dir to div */
+
+ node.tag = tt.tagDiv;
+ node.element = "div";
+ addStyleProperty(node, "margin-left: 2em");
+ stripOnlyChild(node);
+ return true;
+
+ //#if 0
+ //Node content;
+ //Node last;
+ //content = child.content;
+ //last = child.last;
+ //child.content = null;
+
+ /* adjust parent and set margin on contents of <li> */
+
+ //for (child = content; child != null; child = child.next)
+ //{
+ // child.parent = node.parent;
+ // addStyleProperty(child, "margin-left: 1em");
+ //}
+
+ /* hook first/last into sequence */
+
+ //if (content != null)
+ //{
+ // content.prev = node.prev;
+ // last.next = node.next;
+ // fixNodeLinks(content);
+ // fixNodeLinks(last);
+ //}
+
+ //node.next = null;
+
+ /* ensure that new node is cleaned */
+ //pnode.setObject(cleanNode(lexer, content));
+ //return true;
+ //#endif
+ }
+
+ return false;
+ }
+
+ /*
+ Symptom: <center>
+ Action: replace <center> by <div style="text-align: center">
+ */
+
+ private boolean center2Div(Lexer lexer, Node node, MutableObject pnode) {
+ if (node.tag == tt.tagCenter) {
+ if (lexer.configuration.DropFontTags) {
+ if (node.content != null) {
+ Node last = node.last;
+ Node parent = node.parent;
+
+ discardContainer(node, pnode);
+
+ node = lexer.inferredTag("br");
+
+ if (last.next != null)
+ last.next.prev = node;
+
+ node.next = last.next;
+ last.next = node;
+ node.prev = last;
+
+ if (parent.last == last)
+ parent.last = node;
+
+ node.parent = parent;
+ } else {
+ Node prev = node.prev;
+ Node next = node.next;
+ Node parent = node.parent;
+ discardContainer(node, pnode);
+
+ node = lexer.inferredTag("br");
+ node.next = next;
+ node.prev = prev;
+ node.parent = parent;
+
+ if (next != null)
+ next.prev = node;
+ else
+ parent.last = node;
+
+ if (prev != null)
+ prev.next = node;
+ else
+ parent.content = node;
+ }
+
+ return true;
+ }
+ node.tag = tt.tagDiv;
+ node.element = "div";
+ addStyleProperty(node, "text-align: center");
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ Symptom <div><div>...</div></div>
+ Action: merge the two divs
+
+ This is useful after nested <dir>s used by Word
+ for indenting have been converted to <div>s
+ */
+ private boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode) {
+ Node child;
+
+ if (node.tag != tt.tagDiv)
+ return false;
+
+ child = node.content;
+
+ if (child == null)
+ return false;
+
+ if (child.tag != tt.tagDiv)
+ return false;
+
+ if (child.next != null)
+ return false;
+
+ mergeStyles(node, child);
+ stripOnlyChild(node);
+ return true;
+ }
+
+ /*
+ Symptom: <ul><li><ul>...</ul></li></ul>
+ Action: discard outer list
+ */
+
+ private boolean nestedList(Lexer lexer, Node node, MutableObject pnode) {
+ Node child, list;
+
+ if (node.tag == tt.tagUl || node.tag == tt.tagOl) {
+ child = node.content;
+
+ if (child == null)
+ return false;
+
+ /* check child has no peers */
+
+ if (child.next != null)
+ return false;
+
+ list = child.content;
+
+ if (list == null)
+ return false;
+
+ if (list.tag != node.tag)
+ return false;
+
+ pnode.setObject(node.next);
+
+ /* move inner list node into position of outer node */
+ list.prev = node.prev;
+ list.next = node.next;
+ list.parent = node.parent;
+ fixNodeLinks(list);
+
+ /* get rid of outer ul and its li */
+ child.content = null;
+ node.content = null;
+ node.next = null;
+
+ /*
+ If prev node was a list the chances are this node
+ should be appended to that list. Word has no way of
+ recognizing nested lists and just uses indents
+ */
+
+ if (list.prev != null) {
+ node = list;
+ list = node.prev;
+
+ if (list.tag == tt.tagUl || list.tag == tt.tagOl) {
+ list.next = node.next;
+
+ if (list.next != null)
+ list.next.prev = list;
+
+ child = list.last; /* <li> */
+
+ node.parent = child;
+ node.next = null;
+ node.prev = child.last;
+ fixNodeLinks(node);
+ }
+ }
+
+ cleanNode(lexer, node);
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ Symptom: the only child of a block-level element is a
+ presentation element such as B, I or FONT
+
+ Action: add style "font-weight: bold" to the block and
+ strip the <b> element, leaving its children.
+
+ example:
+
+ <p>
+ <b><font face="Arial" size="6">Draft Recommended Practice</font></b>
+ </p>
+
+ becomes:
+
+ <p style="font-weight: bold; font-family: Arial; font-size: 6">
+ Draft Recommended Practice
+ </p>
+
+ This code also replaces the align attribute by a style attribute.
+ However, to avoid CSS problems with Navigator 4, this isn't done
+ for the elements: caption, tr and table
+ */
+ private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode) {
+ Node child;
+
+ if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0) {
+ if (node.tag != tt.tagTable && node.tag != tt.tagTr && node.tag != tt.tagLi) {
+ /* check for align attribute */
+ if (node.tag != tt.tagCaption)
+ textAlign(lexer, node);
+
+ child = node.content;
+
+ if (child == null)
+ return false;
+
+ /* check child has no peers */
+
+ if (child.next != null)
+ return false;
+
+ if (child.tag == tt.tagB) {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-weight: bold");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == tt.tagI) {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-style: italic");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == tt.tagFont) {
+ mergeStyles(node, child);
+ addFontStyles(node, child.attributes);
+ stripOnlyChild(node);
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /* the only child of table cell or an inline element such as em */
+ private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode) {
+ Node child;
+
+ if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE | Dict.CM_ROW)) != 0) {
+ child = node.content;
+
+ if (child == null)
+ return false;
+
+ /* check child has no peers */
+
+ if (child.next != null)
+ return false;
+
+ if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis) {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-weight: bold");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis) {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-style: italic");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == tt.tagFont) {
+ mergeStyles(node, child);
+ addFontStyles(node, child.attributes);
+ stripOnlyChild(node);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /*
+ Replace font elements by span elements, deleting
+ the font element's attributes and replacing them
+ by a single style attribute.
+ */
+ private boolean font2Span(Lexer lexer, Node node, MutableObject pnode) {
+ AttVal av, style, next;
+
+ if (node.tag == tt.tagFont) {
+ if (lexer.configuration.DropFontTags) {
+ discardContainer(node, pnode);
+ return false;
+ }
+
+ /* if FONT is only child of parent element then leave alone */
+ if (node.parent.content == node && node.next == null)
+ return false;
+
+ addFontStyles(node, node.attributes);
+
+ /* extract style attribute and free the rest */
+ av = node.attributes;
+ style = null;
+
+ while (av != null) {
+ next = av.next;
+
+ if (av.attribute.equals("style")) {
+ av.next = null;
+ style = av;
+ }
+
+ av = next;
+ }
+
+ node.attributes = style;
+
+ node.tag = tt.tagSpan;
+ node.element = "span";
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ Applies all matching rules to a node.
+ */
+ private Node cleanNode(Lexer lexer, Node node) {
+ Node next = null;
+ MutableObject o = new MutableObject();
+ boolean b = false;
+
+ for (next = node; node.isElement(); node = next) {
+ o.setObject(next);
+
+ b = dir2Div(lexer, node, o);
+ next = (Node) o.getObject();
+ if (b)
+ continue;
+
+ b = nestedList(lexer, node, o);
+ next = (Node) o.getObject();
+ if (b)
+ continue;
+
+ b = center2Div(lexer, node, o);
+ next = (Node) o.getObject();
+ if (b)
+ continue;
+
+ b = mergeDivs(lexer, node, o);
+ next = (Node) o.getObject();
+ if (b)
+ continue;
+
+ b = blockStyle(lexer, node, o);
+ next = (Node) o.getObject();
+ if (b)
+ continue;
+
+ b = inlineStyle(lexer, node, o);
+ next = (Node) o.getObject();
+ if (b)
+ continue;
+
+ b = font2Span(lexer, node, o);
+ next = (Node) o.getObject();
+ if (b)
+ continue;
+
+ break;
+ }
+
+ return next;
+ }
+
+ private Node createStyleProperties(Lexer lexer, Node node) {
+ Node child;
+
+ if (node.content != null) {
+ for (child = node.content; child != null; child = child.next) {
+ child = createStyleProperties(lexer, child);
+ }
+ }
+
+ return cleanNode(lexer, node);
+ }
+
+ private void defineStyleRules(Lexer lexer, Node node) {
+ Node child;
+
+ if (node.content != null) {
+ for (child = node.content; child != null; child = child.next) {
+ defineStyleRules(lexer, child);
+ }
+ }
+
+ style2Rule(lexer, node);
+ }
+
+ public void cleanTree(Lexer lexer, Node doc) {
+ doc = createStyleProperties(lexer, doc);
+
+ if (!lexer.configuration.MakeClean) {
+ defineStyleRules(lexer, doc);
+ createStyleElement(lexer, doc);
+ }
+ }
+
+ /* simplifies <b><b> ... </b> ...</b> etc. */
+ public void nestedEmphasis(Node node) {
+ MutableObject o = new MutableObject();
+ Node next;
+
+ while (node != null) {
+ next = node.next;
+
+ if ((node.tag == tt.tagB || node.tag == tt.tagI) && node.parent != null && node.parent.tag == node.tag) {
+ /* strip redundant inner element */
+ o.setObject(next);
+ discardContainer(node, o);
+ next = (Node) o.getObject();
+ node = next;
+ continue;
+ }
+
+ if (node.content != null)
+ nestedEmphasis(node.content);
+
+ node = next;
+ }
+ }
+
+ /* replace i by em and b by strong */
+ public void emFromI(Node node) {
+ while (node != null) {
+ if (node.tag == tt.tagI) {
+ node.element = tt.tagEm.name;
+ node.tag = tt.tagEm;
+ } else if (node.tag == tt.tagB) {
+ node.element = tt.tagStrong.name;
+ node.tag = tt.tagStrong;
+ }
+
+ if (node.content != null)
+ emFromI(node.content);
+
+ node = node.next;
+ }
+ }
+
+ /*
+ Some people use dir or ul without an li
+ to indent the content. The pattern to
+ look for is a list with a single implicit
+ li. This is recursively replaced by an
+ implicit blockquote.
+ */
+ public void list2BQ(Node node) {
+ while (node != null) {
+ if (node.content != null)
+ list2BQ(node.content);
+
+ if (node.tag != null && node.tag.parser == ParserImpl.getParseList() && node.hasOneChild() && node.content.implicit) {
+ stripOnlyChild(node);
+ node.element = tt.tagBlockquote.name;
+ node.tag = tt.tagBlockquote;
+ node.implicit = true;
+ }
+
+ node = node.next;
+ }
+ }
+
+ /*
+ Replace implicit blockquote by div with an indent
+ taking care to reduce nested blockquotes to a single
+ div with the indent set to match the nesting depth
+ */
+ public void bQ2Div(Node node) {
+ int indent;
+ String indent_buf;
+
+ while (node != null) {
+ if (node.tag == tt.tagBlockquote && node.implicit) {
+ indent = 1;
+
+ while (node.hasOneChild() && node.content.tag == tt.tagBlockquote && node.implicit) {
+ ++indent;
+ stripOnlyChild(node);
+ }
+
+ if (node.content != null)
+ bQ2Div(node.content);
+
+ indent_buf = "margin-left: " + (new Integer(2 * indent)).toString() + "em";
+
+ node.element = tt.tagDiv.name;
+ node.tag = tt.tagDiv;
+ node.addAttribute("style", indent_buf);
+ } else if (node.content != null)
+ bQ2Div(node.content);
+
+ node = node.next;
+ }
+ }
+
+ /* node is <![if ...]> prune up to <![endif]> */
+ public Node pruneSection(Lexer lexer, Node node) {
+ for (;;) {
+ /* discard node and returns next */
+ node = Node.discardElement(node);
+
+ if (node == null)
+ return null;
+
+ if (node.type == Node.SectionTag) {
+ if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) {
+ node = pruneSection(lexer, node);
+ continue;
+ }
+
+ if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif")) {
+ node = Node.discardElement(node);
+ break;
+ }
+ }
+ }
+
+ return node;
+ }
+
+ public void dropSections(Lexer lexer, Node node) {
+ while (node != null) {
+ if (node.type == Node.SectionTag) {
+ /* prune up to matching endif */
+ if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) {
+ node = pruneSection(lexer, node);
+ continue;
+ }
+
+ /* discard others as well */
+ node = Node.discardElement(node);
+ continue;
+ }
+
+ if (node.content != null)
+ dropSections(lexer, node.content);
+
+ node = node.next;
+ }
+ }
+
+ // gschadow patch start
+ /** Get rid of all this pseudo-XML crap, sections, Asp tags, JSP tags, etc.
+ **/
+ public void dropPseudoXMLCrap(Lexer lexer, Node node) {
+ while (node != null) {
+ switch (node.type) {
+ case Node.AspTag :
+ case Node.JsteTag :
+ case Node.PhpTag :
+ case Node.SectionTag :
+ node = Node.discardElement(node);
+ break;
+
+ default :
+ if (node.content != null)
+ dropPseudoXMLCrap(lexer, node.content);
+ node = node.next;
+ break;
+ }
+ }
+ }
+ // gschadow patch end
+
+ public void purgeAttributes(Node node) {
+ AttVal attr = node.attributes;
+ AttVal next = null;
+ AttVal prev = null;
+
+ while (attr != null) {
+ next = attr.next;
+
+ /* special check for class="Code" denoting pre text */
+ if (attr.attribute != null && attr.value != null && attr.attribute.equals("class") && attr.value.equals("Code")) {
+ prev = attr;
+ } else if (
+ attr.attribute != null
+ && (attr.attribute.equals("class")
+ || attr.attribute.equals("style")
+ || attr.attribute.equals("lang")
+ || attr.attribute.startsWith("x:")
+ || ((attr.attribute.equals("height") || attr.attribute.equals("width"))
+ && (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh)))) {
+ if (prev != null)
+ prev.next = next;
+ else
+ node.attributes = next;
+
+ } else
+ prev = attr;
+
+ attr = next;
+ }
+ }
+
+ /* Word2000 uses span excessively, so we strip span out */
+ public Node stripSpan(Lexer lexer, Node span) {
+ Node node;
+ Node prev = null;
+ Node content;
+
+ /*
+ deal with span elements that have content
+ by splicing the content in place of the span
+ after having processed it
+ */
+
+ cleanWord2000(lexer, span.content);
+ content = span.content;
+
+ if (span.prev != null)
+ prev = span.prev;
+ else if (content != null) {
+ node = content;
+ content = content.next;
+ Node.removeNode(node);
+ Node.insertNodeBeforeElement(span, node);
+ prev = node;
+ }
+
+ while (content != null) {
+ node = content;
+ content = content.next;
+ Node.removeNode(node);
+ Node.insertNodeAfterElement(prev, node);
+ prev = node;
+ }
+
+ if (span.next == null)
+ span.parent.last = prev;
+
+ node = span.next;
+ span.content = null;
+ Node.discardElement(span);
+ return node;
+ }
+
+ /* map non-breaking spaces to regular spaces */
+ private void normalizeSpaces(Lexer lexer, Node node) {
+ while (node != null) {
+ if (node.content != null)
+ normalizeSpaces(lexer, node.content);
+
+ if (node.type == Node.TextNode) {
+ int i;
+ MutableInteger c = new MutableInteger();
+ int p = node.start;
+
+ for (i = node.start; i < node.end; ++i) {
+ c.value = (int) node.textarray[i];
+
+ /* look for UTF-8 multibyte character */
+ if (c.value > 0x7F)
+ i += PPrint.getUTF8(node.textarray, i, c);
+
+ if (c.value == 160)
+ c.value = ' ';
+
+ p = PPrint.putUTF8(node.textarray, p, c.value);
+ }
+ }
+
+ node = node.next;
+ }
+ }
+
+ /*
+ This is a major clean up to strip out all the extra stuff you get
+ when you save as web page from Word 2000. It doesn't yet know what
+ to do with VML tags, but these will appear as errors unless you
+ declare them as new tags, such as o:p which needs to be declared
+ as inline.
+ */
+ public void cleanWord2000(Lexer lexer, Node node) {
+ /* used to a list from a sequence of bulletted p's */
+ Node list = null;
+
+ while (node != null) {
+ /* discard Word's style verbiage */
+ if (node.tag == tt.tagStyle || node.tag == tt.tagMeta || node.type == Node.CommentTag) {
+ node = Node.discardElement(node);
+ continue;
+ }
+
+ /* strip out all span tags Word scatters so liberally! */
+ if (node.tag == tt.tagSpan) {
+ node = stripSpan(lexer, node);
+ continue;
+ }
+
+ /* get rid of Word's xmlns attributes */
+ if (node.tag == tt.tagHtml) {
+ /* check that it's a Word 2000 document */
+ if (node.getAttrByName("xmlns:o") == null)
+ return;
+ }
+
+ if (node.tag == tt.tagLink) {
+ AttVal attr = node.getAttrByName("rel");
+
+ if (attr != null && attr.value != null && attr.value.equals("File-List")) {
+ node = Node.discardElement(node);
+ continue;
+ }
+ }
+
+ /* discard empty paragraphs */
+ if (node.content == null && node.tag == tt.tagP) {
+ node = Node.discardElement(node);
+ continue;
+ }
+
+ if (node.tag == tt.tagP) {
+ AttVal attr = node.getAttrByName("class");
+
+ /* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */
+ if (attr != null && attr.value != null && attr.value.equals("MsoListBullet")) {
+ Node.coerceNode(lexer, node, tt.tagLi);
+
+ if (list == null || list.tag != tt.tagUl) {
+ list = lexer.inferredTag("ul");
+ Node.insertNodeBeforeElement(node, list);
+ }
+
+ purgeAttributes(node);
+
+ if (node.content != null)
+ cleanWord2000(lexer, node.content);
+
+ /* remove node and append to contents of list */
+ Node.removeNode(node);
+ Node.insertNodeAtEnd(list, node);
+ node = list.next;
+ }
+ /* map sequence of <p class="Code"> to <pre>...</pre> */
+ else if (attr != null && attr.value != null && attr.value.equals("Code")) {
+ Node br = lexer.newLineNode();
+ normalizeSpaces(lexer, node);
+
+ if (list == null || list.tag != tt.tagPre) {
+ list = lexer.inferredTag("pre");
+ Node.insertNodeBeforeElement(node, list);
+ }
+
+ /* remove node and append to contents of list */
+ Node.removeNode(node);
+ Node.insertNodeAtEnd(list, node);
+ stripSpan(lexer, node);
+ Node.insertNodeAtEnd(list, br);
+ node = list.next;
+ } else
+ list = null;
+ } else
+ list = null;
+
+ /* strip out style and class attributes */
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ purgeAttributes(node);
+
+ if (node.content != null)
+ cleanWord2000(lexer, node.content);
+
+ node = node.next;
+ }
+ }
+
+ public boolean isWord2000(Node root, TagTable tt) {
+ Node html = root.findHTML(tt);
+
+ return (html != null && html.getAttrByName("xmlns:o") != null);
+ }
+}
--- /dev/null
+/*
+ * @(#)Configuration.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Read configuration file and manage configuration properties.
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+ Configuration files associate a property name with a value.
+ The format is that of a Java .properties file.
+*/
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Enumeration;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+public class Configuration implements java.io.Serializable {
+
+ /* character encodings */
+ public static final int RAW = 0;
+ public static final int ASCII = 1;
+ public static final int LATIN1 = 2;
+ public static final int UTF8 = 3;
+ public static final int ISO2022 = 4;
+ public static final int MACROMAN = 5;
+
+ /* mode controlling treatment of doctype */
+ public static final int DOCTYPE_OMIT = 0;
+ public static final int DOCTYPE_AUTO = 1;
+ public static final int DOCTYPE_STRICT= 2;
+ public static final int DOCTYPE_LOOSE = 3;
+ public static final int DOCTYPE_USER = 4;
+
+ protected int spaces = 2; /* default indentation */
+ protected int wraplen = 68; /* default wrap margin */
+ protected int CharEncoding = ASCII;
+ protected int tabsize = 4;
+
+ protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
+ public String altText = null; /* default text for alt attribute */
+ protected String slidestyle = null; /* style sheet for slides */
+ protected String docTypeStr = null; /* user specified doctype */
+ protected String errfile = null; /* file name to write errors to */
+ protected boolean writeback = false; /* if true then output tidied markup */
+
+ public boolean OnlyErrors = false; /* if true normal output is suppressed */
+ public boolean ShowWarnings = true; /* however errors are always shown */
+ public boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
+ public boolean IndentContent = false; /* indent content of appropriate tags */
+ public boolean SmartIndent = false; /* does text/block level content effect indentation */
+ public boolean HideEndTags = false; /* suppress optional end tags */
+ public boolean XmlTags = false; /* treat input as XML */
+ public boolean XmlOut = false; /* create output as XML */
+ public boolean xHTML = false; /* output extensible HTML */
+ public boolean XmlPi = false; /* add <?xml?> for XML docs */
+ public boolean RawOut = false; /* avoid mapping values > 127 to entities */
+ public boolean UpperCaseTags = false; /* output tags in upper not lower case */
+ public boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
+ public boolean MakeClean = false; /* remove presentational clutter */
+ public boolean LogicalEmphasis = false; /* replace i by em and b by strong */
+ public boolean DropFontTags = false; /* discard presentation tags */
+ public boolean DropEmptyParas = true; /* discard empty p elements */
+ public boolean FixComments = true; /* fix comments with adjacent hyphens */
+ public boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
+ public boolean BurstSlides = false; /* create slides on each h2 element */
+ public boolean NumEntities = false; /* use numeric entities */
+ public boolean QuoteMarks = false; /* output " marks as " */
+ public boolean QuoteNbsp = true; /* output non-breaking space as entity */
+ public boolean QuoteAmpersand = true; /* output naked ampersand as & */
+ public boolean WrapAttVals = false; /* wrap within attribute values */
+ public boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
+ public boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
+ public boolean WrapAsp = true; /* wrap within ASP pseudo elements */
+ public boolean WrapJste = true; /* wrap within JSTE pseudo elements */
+ public boolean WrapPhp = true; /* wrap within PHP pseudo elements */
+ public boolean FixBackslash = true; /* fix URLs by replacing \ with / */
+ public boolean IndentAttributes = false; /* newline+indent before each attribute */
+ public boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
+ public boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
+ public boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
+ public boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
+ public boolean KeepFileTimes = true; /* if yes last modied time is preserved */
+ public boolean Word2000 = false; /* draconian cleaning for Word2000 */
+ public boolean TidyMark = true; /* add meta element indicating tidied doc */
+ public boolean Emacs = false; /* if true format error output for GNU Emacs */
+ public boolean LiteralAttribs = false; /* if true attributes may use newlines */
+
+ // gschadow patch start
+ /** Remove all scripting XML tags (ASP, JSP, PHP,...) */
+ public boolean DropPseudoXMLCrap = false;
+ // gschadow patch end
+
+ protected TagTable tt; /* TagTable associated with this Configuration */
+
+ private transient Properties _properties = new Properties();
+
+ public Configuration()
+ {
+ }
+
+ public void addProps( Properties p )
+ {
+ Enumeration enum = p.propertyNames();
+ while (enum.hasMoreElements())
+ {
+ String key = (String) enum.nextElement();
+ String value = p.getProperty(key);
+ _properties.put(key, value);
+ }
+ parseProps();
+ }
+
+ public void parseFile( String filename )
+ {
+ try
+ {
+ _properties.load( new FileInputStream( filename ) );
+ }
+ catch (IOException e)
+ {
+ System.err.println(filename + e.toString());
+ return;
+ }
+ parseProps();
+ }
+
+ private void parseProps()
+ {
+ String value;
+
+ value = _properties.getProperty("indent-spaces");
+ if (value != null)
+ spaces = parseInt(value, "indent-spaces");
+
+ value = _properties.getProperty("wrap");
+ if (value != null)
+ wraplen = parseInt(value, "wrap");
+
+ value = _properties.getProperty("wrap-attributes");
+ if (value != null)
+ WrapAttVals = parseBool(value, "wrap-attributes");
+
+ value = _properties.getProperty("wrap-script-literals");
+ if (value != null)
+ WrapScriptlets = parseBool(value, "wrap-script-literals");
+
+ value = _properties.getProperty("wrap-sections");
+ if (value != null)
+ WrapSection = parseBool(value, "wrap-sections");
+
+ value = _properties.getProperty("wrap-asp");
+ if (value != null)
+ WrapAsp = parseBool(value, "wrap-asp");
+
+ value = _properties.getProperty("wrap-jste");
+ if (value != null)
+ WrapJste = parseBool(value, "wrap-jste");
+
+ value = _properties.getProperty("wrap-php");
+ if (value != null)
+ WrapPhp = parseBool(value, "wrap-php");
+
+ value = _properties.getProperty("literal-attributes");
+ if (value != null)
+ LiteralAttribs = parseBool(value, "literal-attributes");
+
+ value = _properties.getProperty("tab-size");
+ if (value != null)
+ tabsize = parseInt(value, "tab-size");
+
+ value = _properties.getProperty("markup");
+ if (value != null)
+ OnlyErrors = parseInvBool(value, "markup");
+
+ value = _properties.getProperty("quiet");
+ if (value != null)
+ Quiet = parseBool(value, "quiet");
+
+ value = _properties.getProperty("tidy-mark");
+ if (value != null)
+ TidyMark = parseBool(value, "tidy-mark");
+
+ value = _properties.getProperty("indent");
+ if (value != null)
+ IndentContent = parseIndent(value, "indent");
+
+ value = _properties.getProperty("indent-attributes");
+ if (value != null)
+ IndentAttributes = parseBool(value, "ident-attributes");
+
+ value = _properties.getProperty("hide-endtags");
+ if (value != null)
+ HideEndTags = parseBool(value, "hide-endtags");
+
+ value = _properties.getProperty("input-xml");
+ if (value != null)
+ XmlTags = parseBool(value, "input-xml");
+
+ value = _properties.getProperty("output-xml");
+ if (value != null)
+ XmlOut = parseBool(value, "output-xml");
+
+ value = _properties.getProperty("output-xhtml");
+ if (value != null)
+ xHTML = parseBool(value, "output-xhtml");
+
+ value = _properties.getProperty("add-xml-pi");
+ if (value != null)
+ XmlPi = parseBool(value, "add-xml-pi");
+
+ value = _properties.getProperty("add-xml-decl");
+ if (value != null)
+ XmlPi = parseBool(value, "add-xml-decl");
+
+ value = _properties.getProperty("assume-xml-procins");
+ if (value != null)
+ XmlPIs = parseBool(value, "assume-xml-procins");
+
+ value = _properties.getProperty("raw");
+ if (value != null)
+ RawOut = parseBool(value, "raw");
+
+ value = _properties.getProperty("uppercase-tags");
+ if (value != null)
+ UpperCaseTags = parseBool(value, "uppercase-tags");
+
+ value = _properties.getProperty("uppercase-attributes");
+ if (value != null)
+ UpperCaseAttrs = parseBool(value, "uppercase-attributes");
+
+ value = _properties.getProperty("clean");
+ if (value != null)
+ MakeClean = parseBool(value, "clean");
+
+ value = _properties.getProperty("logical-emphasis");
+ if (value != null)
+ LogicalEmphasis = parseBool(value, "logical-emphasis");
+
+ value = _properties.getProperty("word-2000");
+ if (value != null)
+ Word2000 = parseBool(value, "word-2000");
+
+ value = _properties.getProperty("drop-empty-paras");
+ if (value != null)
+ DropEmptyParas = parseBool(value, "drop-empty-paras");
+
+ value = _properties.getProperty("drop-font-tags");
+ if (value != null)
+ DropFontTags = parseBool(value, "drop-font-tags");
+
+ //gschadow patch start
+ value = _properties.getProperty("drop-pseudo-xml-crap");
+ if (value != null)
+ DropPseudoXMLCrap = parseBool(value, "drop-pseudo-xml-crap");
+ //gschadow patch end
+
+ value = _properties.getProperty("enclose-text");
+ if (value != null)
+ EncloseBodyText = parseBool(value, "enclose-text");
+
+ value = _properties.getProperty("enclose-block-text");
+ if (value != null)
+ EncloseBlockText = parseBool(value, "enclose-block-text");
+
+ value = _properties.getProperty("alt-text");
+ if (value != null)
+ altText = value;
+
+ value = _properties.getProperty("add-xml-space");
+ if (value != null)
+ XmlSpace = parseBool(value, "add-xml-space");
+
+ value = _properties.getProperty("fix-bad-comments");
+ if (value != null)
+ FixComments = parseBool(value, "fix-bad-comments");
+
+ value = _properties.getProperty("split");
+ if (value != null)
+ BurstSlides = parseBool(value, "split");
+
+ value = _properties.getProperty("break-before-br");
+ if (value != null)
+ BreakBeforeBR = parseBool(value, "break-before-br");
+
+ value = _properties.getProperty("numeric-entities");
+ if (value != null)
+ NumEntities = parseBool(value, "numeric-entities");
+
+ value = _properties.getProperty("quote-marks");
+ if (value != null)
+ QuoteMarks = parseBool(value, "quote-marks");
+
+ value = _properties.getProperty("quote-nbsp");
+ if (value != null)
+ QuoteNbsp = parseBool(value, "quote-nbsp");
+
+ value = _properties.getProperty("quote-ampersand");
+ if (value != null)
+ QuoteAmpersand = parseBool(value, "quote-ampersand");
+
+ value = _properties.getProperty("write-back");
+ if (value != null)
+ writeback = parseBool(value, "write-back");
+
+ value = _properties.getProperty("keep-time");
+ if (value != null)
+ KeepFileTimes = parseBool(value, "keep-time");
+
+ value = _properties.getProperty("show-warnings");
+ if (value != null)
+ ShowWarnings = parseBool(value, "show-warnings");
+
+ value = _properties.getProperty("error-file");
+ if (value != null)
+ errfile = parseName(value, "error-file");
+
+ value = _properties.getProperty("slide-style");
+ if (value != null)
+ slidestyle = parseName(value, "slide-style");
+
+ value = _properties.getProperty("new-inline-tags");
+ if (value != null)
+ parseInlineTagNames(value, "new-inline-tags");
+
+ value = _properties.getProperty("new-blocklevel-tags");
+ if (value != null)
+ parseBlockTagNames(value, "new-blocklevel-tags");
+
+ value = _properties.getProperty("new-empty-tags");
+ if (value != null)
+ parseEmptyTagNames(value, "new-empty-tags");
+
+ value = _properties.getProperty("new-pre-tags");
+ if (value != null)
+ parsePreTagNames(value, "new-pre-tags");
+
+ value = _properties.getProperty("char-encoding");
+ if (value != null)
+ CharEncoding = parseCharEncoding(value, "char-encoding");
+
+ value = _properties.getProperty("doctype");
+ if (value != null)
+ docTypeStr = parseDocType(value, "doctype");
+
+ value = _properties.getProperty("fix-backslash");
+ if (value != null)
+ FixBackslash = parseBool(value, "fix-backslash");
+
+ value = _properties.getProperty("gnu-emacs");
+ if (value != null)
+ Emacs = parseBool(value, "gnu-emacs");
+ }
+
+ /* ensure that config is self consistent */
+ public void adjust()
+ {
+ if (EncloseBlockText)
+ EncloseBodyText = true;
+
+ /* avoid the need to set IndentContent when SmartIndent is set */
+
+ if (SmartIndent)
+ IndentContent = true;
+
+ /* disable wrapping */
+ if (wraplen == 0)
+ wraplen = 0x7FFFFFFF;
+
+ /* Word 2000 needs o:p to be declared as inline */
+ if (Word2000)
+ {
+ tt.defineInlineTag("o:p");
+ }
+
+ /* XHTML is written in lower case */
+ if (xHTML)
+ {
+ XmlOut = true;
+ UpperCaseTags = false;
+ UpperCaseAttrs = false;
+ }
+
+ /* if XML in, then XML out */
+ if (XmlTags)
+ {
+ XmlOut = true;
+ XmlPIs = true;
+ }
+
+ /* XML requires end tags */
+ if (XmlOut)
+ {
+ QuoteAmpersand = true;
+ HideEndTags = false;
+ }
+ }
+
+ private static int parseInt( String s, String option )
+ {
+ int i = 0;
+ try {
+ i = Integer.parseInt( s );
+ }
+ catch ( NumberFormatException e ) {
+ Report.badArgument(option);
+ i = -1;
+ }
+ return i;
+ }
+
+ private static boolean parseBool( String s, String option )
+ {
+ boolean b = false;
+ if ( s != null && s.length() > 0 ) {
+ char c = s.charAt(0);
+ if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
+ b = true;
+ else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
+ b = false;
+ else
+ Report.badArgument(option);
+ }
+ return b;
+ }
+
+ private static boolean parseInvBool( String s, String option )
+ {
+ boolean b = false;
+ if ( s != null && s.length() > 0 ) {
+ char c = s.charAt(0);
+ if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
+ b = true;
+ else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
+ b = false;
+ else
+ Report.badArgument(option);
+ }
+ return !b;
+ }
+
+ private static String parseName( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s );
+ String rs = null;
+ if ( t.countTokens() >= 1 )
+ rs = t.nextToken();
+ else
+ Report.badArgument(option);
+ return rs;
+ }
+
+ private static int parseCharEncoding( String s, String option )
+ {
+ int result = ASCII;
+
+ if (Lexer.wstrcasecmp(s, "ascii") == 0)
+ result = ASCII;
+ else if (Lexer.wstrcasecmp(s, "latin1") == 0)
+ result = LATIN1;
+ else if (Lexer.wstrcasecmp(s, "raw") == 0)
+ result = RAW;
+ else if (Lexer.wstrcasecmp(s, "utf8") == 0)
+ result = UTF8;
+ else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
+ result = ISO2022;
+ else if (Lexer.wstrcasecmp(s, "mac") == 0)
+ result = MACROMAN;
+ else
+ Report.badArgument(option);
+
+ return result;
+ }
+
+ /* slight hack to avoid changes to pprint.c */
+ private boolean parseIndent( String s, String option )
+ {
+ boolean b = IndentContent;
+
+ if (Lexer.wstrcasecmp(s, "yes") == 0)
+ {
+ b = true;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "true") == 0)
+ {
+ b = true;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "no") == 0)
+ {
+ b = false;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "false") == 0)
+ {
+ b = false;
+ SmartIndent = false;
+ }
+ else if (Lexer.wstrcasecmp(s, "auto") == 0)
+ {
+ b = true;
+ SmartIndent = true;
+ }
+ else
+ Report.badArgument(option);
+ return b;
+ }
+
+ public void parseInlineTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.defineInlineTag( t.nextToken() );
+ }
+ }
+
+ public void parseBlockTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.defineBlockTag( t.nextToken() );
+ }
+ }
+
+ public void parseEmptyTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.defineEmptyTag( t.nextToken() );
+ }
+ }
+
+ public void parsePreTagNames( String s, String option )
+ {
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ while ( t.hasMoreTokens() ) {
+ tt.definePreTag( t.nextToken() );
+ }
+ }
+
+ /*
+ doctype: omit | auto | strict | loose | <fpi>
+
+ where the fpi is a string similar to
+
+ "-//ACME//DTD HTML 3.14159//EN"
+ */
+ protected String parseDocType( String s, String option )
+ {
+ s = s.trim();
+
+ /* "-//ACME//DTD HTML 3.14159//EN" or similar */
+
+ if (s.startsWith("\""))
+ {
+ docTypeMode = DOCTYPE_USER;
+ return s;
+ }
+
+ /* read first word */
+ String word = "";
+ StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+ if (t.hasMoreTokens())
+ word = t.nextToken();
+
+ if (Lexer.wstrcasecmp(word, "omit") == 0)
+ docTypeMode = DOCTYPE_OMIT;
+ else if (Lexer.wstrcasecmp(word, "strict") == 0)
+ docTypeMode = DOCTYPE_STRICT;
+ else if (Lexer.wstrcasecmp(word, "loose") == 0 ||
+ Lexer.wstrcasecmp(word, "transitional") == 0)
+ docTypeMode = DOCTYPE_LOOSE;
+ else if (Lexer.wstrcasecmp(word, "auto") == 0)
+ docTypeMode = DOCTYPE_AUTO;
+ else
+ {
+ docTypeMode = DOCTYPE_AUTO;
+ Report.badArgument(option);
+ }
+ return null;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMAttrImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMAttrImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr {
+
+ protected AttVal avAdaptee;
+
+ protected DOMAttrImpl(AttVal adaptee)
+ {
+ super(null); // must override all methods of DOMNodeImpl
+ this.avAdaptee = adaptee;
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ public String getNodeValue() throws DOMException
+ {
+ return getValue();
+ }
+
+ public void setNodeValue(String nodeValue) throws DOMException
+ {
+ setValue(nodeValue);
+ }
+
+ public String getNodeName()
+ {
+ return getName();
+ }
+
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.ATTRIBUTE_NODE;
+ }
+
+ public org.w3c.dom.Node getParentNode()
+ {
+ return null;
+ }
+
+ public org.w3c.dom.NodeList getChildNodes()
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ public org.w3c.dom.Node getFirstChild()
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ public org.w3c.dom.Node getLastChild()
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ public org.w3c.dom.Node getPreviousSibling()
+ {
+ return null;
+ }
+
+ public org.w3c.dom.Node getNextSibling()
+ {
+ return null;
+ }
+
+ public org.w3c.dom.NamedNodeMap getAttributes()
+ {
+ return null;
+ }
+
+ public org.w3c.dom.Document getOwnerDocument()
+ {
+ return null;
+ }
+
+ public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild,
+ org.w3c.dom.Node refChild)
+ throws DOMException
+ {
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild,
+ org.w3c.dom.Node oldChild)
+ throws DOMException
+ {
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild)
+ throws DOMException
+ {
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild)
+ throws DOMException
+ {
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ public boolean hasChildNodes()
+ {
+ return false;
+ }
+
+ public org.w3c.dom.Node cloneNode(boolean deep)
+ {
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Attr#getName
+ */
+ public String getName()
+ {
+ return avAdaptee.attribute;
+ }
+
+ /**
+ * @see org.w3c.dom.Attr#getSpecified
+ */
+ public boolean getSpecified()
+ {
+ return true;
+ }
+
+ /**
+ * Returns value of this attribute. If this attribute has a null value,
+ * then the attribute name is returned instead.
+ * Thanks to Brett Knights <brett@knightsofthenet.com> for this fix.
+ * @see org.w3c.dom.Attr#getValue
+ *
+ */
+ public String getValue()
+ {
+ return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value ;
+ }
+
+ /**
+ * @see org.w3c.dom.Attr#setValue
+ */
+ public void setValue(String value)
+ {
+ avAdaptee.value = value;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public org.w3c.dom.Element getOwnerElement() {
+ return null;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMAttrMapImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMAttrMapImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap {
+
+ private AttVal first = null;
+
+ protected DOMAttrMapImpl(AttVal first)
+ {
+ this.first = first;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#getNamedItem
+ */
+ public org.w3c.dom.Node getNamedItem(String name)
+ {
+ AttVal att = this.first;
+ while (att != null) {
+ if (att.attribute.equals(name)) break;
+ att = att.next;
+ }
+ if (att != null)
+ return att.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#setNamedItem
+ */
+ public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg)
+ throws DOMException
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#removeNamedItem
+ */
+ public org.w3c.dom.Node removeNamedItem(String name)
+ throws DOMException
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#item
+ */
+ public org.w3c.dom.Node item(int index)
+ {
+ int i = 0;
+ AttVal att = this.first;
+ while (att != null) {
+ if (i >= index) break;
+ i++;
+ att = att.next;
+ }
+ if (att != null)
+ return att.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#getLength
+ */
+ public int getLength()
+ {
+ int len = 0;
+ AttVal att = this.first;
+ while (att != null) {
+ len++;
+ att = att.next;
+ }
+ return len;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public org.w3c.dom.Node getNamedItemNS(String namespaceURI,
+ String localName)
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg)
+ throws org.w3c.dom.DOMException
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public org.w3c.dom.Node removeNamedItemNS(String namespaceURI,
+ String localName)
+ throws org.w3c.dom.DOMException
+ {
+ return null;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMCDATASectionImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+
+/**
+ *
+ * DOMCDATASectionImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @author Gary L Peskin <garyp@firstech.com>
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMCDATASectionImpl extends DOMTextImpl
+ implements org.w3c.dom.CDATASection {
+
+ protected DOMCDATASectionImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#cdata-section";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.CDATA_SECTION_NODE;
+ }
+}
--- /dev/null
+/*
+ * @(#)DOMCharacterDataImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMCharacterDataImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMCharacterDataImpl extends DOMNodeImpl
+ implements org.w3c.dom.CharacterData {
+
+ protected DOMCharacterDataImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.CharacterData#getData
+ */
+ public String getData() throws DOMException
+ {
+ return getNodeValue();
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#setData
+ */
+ public void setData(String data) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#getLength
+ */
+ public int getLength()
+ {
+ int len = 0;
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ len = adaptee.end - adaptee.start;
+ return len;
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#substringData
+ */
+ public String substringData(int offset,
+ int count) throws DOMException
+ {
+ int len;
+ String value = null;
+ if (count < 0)
+ {
+ throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR,
+ "Invalid length");
+ }
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ {
+ if (adaptee.start + offset >= adaptee.end)
+ {
+ throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR,
+ "Invalid offset");
+ }
+ len = count;
+ if (adaptee.start + offset + len - 1 >= adaptee.end)
+ len = adaptee.end - adaptee.start - offset;
+
+ value = Lexer.getString(adaptee.textarray,
+ adaptee.start + offset,
+ len);
+ }
+ return value;
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#appendData
+ */
+ public void appendData(String arg) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#insertData
+ */
+ public void insertData(int offset,
+ String arg) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#deleteData
+ */
+ public void deleteData(int offset,
+ int count) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#replaceData
+ */
+ public void replaceData(int offset,
+ int count,
+ String arg) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMCommentImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+
+/**
+ *
+ * DOMCommentImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMCommentImpl extends DOMCharacterDataImpl
+ implements org.w3c.dom.Comment {
+
+ protected DOMCommentImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#comment";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.COMMENT_NODE;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMDocumentImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMDocumentImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document {
+
+ private TagTable tt; // a DOM Document has its own TagTable.
+
+ protected DOMDocumentImpl(Node adaptee)
+ {
+ super(adaptee);
+ tt = new TagTable();
+ }
+
+ public void setTagTable(TagTable tt)
+ {
+ this.tt = tt;
+ }
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#document";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.DOCUMENT_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#getDoctype
+ */
+ public org.w3c.dom.DocumentType getDoctype()
+ {
+ Node node = adaptee.content;
+ while (node != null) {
+ if (node.type == Node.DocTypeTag) break;
+ node = node.next;
+ }
+ if (node != null)
+ return (org.w3c.dom.DocumentType)node.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#getImplementation
+ */
+ public org.w3c.dom.DOMImplementation getImplementation()
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#getDocumentElement
+ */
+ public org.w3c.dom.Element getDocumentElement()
+ {
+ Node node = adaptee.content;
+ while (node != null) {
+ if (node.type == Node.StartTag ||
+ node.type == Node.StartEndTag) break;
+ node = node.next;
+ }
+ if (node != null)
+ return (org.w3c.dom.Element)node.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createElement
+ */
+ public org.w3c.dom.Element createElement(String tagName)
+ throws DOMException
+ {
+ Node node = new Node(Node.StartEndTag, null, 0, 0, tagName, tt);
+ if (node != null) {
+ if (node.tag == null) // Fix Bug 121206
+ node.tag = tt.xmlTags;
+ return (org.w3c.dom.Element)node.getAdapter();
+ }
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createDocumentFragment
+ */
+ public org.w3c.dom.DocumentFragment createDocumentFragment()
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createTextNode
+ */
+ public org.w3c.dom.Text createTextNode(String data)
+ {
+ byte[] textarray = Lexer.getBytes(data);
+ Node node = new Node(Node.TextNode, textarray, 0, textarray.length);
+ if (node != null)
+ return (org.w3c.dom.Text)node.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createComment
+ */
+ public org.w3c.dom.Comment createComment(String data)
+ {
+ byte[] textarray = Lexer.getBytes(data);
+ Node node = new Node(Node.CommentTag, textarray, 0, textarray.length);
+ if (node != null)
+ return (org.w3c.dom.Comment)node.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createCDATASection
+ */
+ public org.w3c.dom.CDATASection createCDATASection(String data)
+ throws DOMException
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createProcessingInstruction
+ */
+ public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target,
+ String data)
+ throws DOMException
+ {
+ throw new DOMExceptionImpl(DOMException.NOT_SUPPORTED_ERR,
+ "HTML document");
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createAttribute
+ */
+ public org.w3c.dom.Attr createAttribute(String name)
+ throws DOMException
+ {
+ AttVal av = new AttVal(null, null, (int)'"', name, null);
+ if (av != null) {
+ av.dict =
+ AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ return (org.w3c.dom.Attr)av.getAdapter();
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createEntityReference
+ */
+ public org.w3c.dom.EntityReference createEntityReference(String name)
+ throws DOMException
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#getElementsByTagName
+ */
+ public org.w3c.dom.NodeList getElementsByTagName(String tagname)
+ {
+ return new DOMNodeListByTagNameImpl(this.adaptee, tagname);
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep)
+ throws org.w3c.dom.DOMException
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public org.w3c.dom.Attr createAttributeNS(String namespaceURI,
+ String qualifiedName)
+ throws org.w3c.dom.DOMException
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public org.w3c.dom.Element createElementNS(String namespaceURI,
+ String qualifiedName)
+ throws org.w3c.dom.DOMException
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI,
+ String localName)
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public org.w3c.dom.Element getElementById(String elementId)
+ {
+ return null;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMDocumentTypeImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+
+/**
+ *
+ * DOMDocumentTypeImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMDocumentTypeImpl extends DOMNodeImpl
+ implements org.w3c.dom.DocumentType {
+
+ protected DOMDocumentTypeImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return getName();
+ }
+
+ /**
+ * @see org.w3c.dom.DocumentType#getName
+ */
+ public String getName()
+ {
+ String value = null;
+ if (adaptee.type == Node.DocTypeTag)
+ {
+
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ {
+ value = Lexer.getString(adaptee.textarray,
+ adaptee.start,
+ adaptee.end - adaptee.start);
+ }
+ }
+ return value;
+ }
+
+ public org.w3c.dom.NamedNodeMap getEntities()
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ public org.w3c.dom.NamedNodeMap getNotations()
+ {
+ // NOT SUPPORTED
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public String getPublicId() {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public String getSystemId() {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public String getInternalSubset() {
+ return null;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMElementImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMElementImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMElementImpl extends DOMNodeImpl
+ implements org.w3c.dom.Element {
+
+ protected DOMElementImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.ELEMENT_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getTagName
+ */
+ public String getTagName()
+ {
+ return super.getNodeName();
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getAttribute
+ */
+ public String getAttribute(String name)
+ {
+ if (this.adaptee == null)
+ return null;
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null) {
+ if (att.attribute.equals(name)) break;
+ att = att.next;
+ }
+ if (att != null)
+ return att.value;
+ else
+ return "";
+ }
+
+ /**
+ * @see org.w3c.dom.Element#setAttribute
+ */
+ public void setAttribute(String name,
+ String value)
+ throws DOMException
+ {
+ if (this.adaptee == null)
+ return;
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null) {
+ if (att.attribute.equals(name)) break;
+ att = att.next;
+ }
+ if (att != null) {
+ att.value = value;
+ } else {
+ att = new AttVal(null, null, (int)'"', name, value);
+ att.dict =
+ AttributeTable.getDefaultAttributeTable().findAttribute(att);
+ if (this.adaptee.attributes == null) {
+ this.adaptee.attributes = att;
+ } else {
+ att.next = this.adaptee.attributes;
+ this.adaptee.attributes = att;
+ }
+ }
+ }
+
+ /**
+ * @see org.w3c.dom.Element#removeAttribute
+ */
+ public void removeAttribute(String name)
+ throws DOMException
+ {
+ if (this.adaptee == null)
+ return;
+
+ AttVal att = this.adaptee.attributes;
+ AttVal pre = null;
+ while (att != null) {
+ if (att.attribute.equals(name)) break;
+ pre = att;
+ att = att.next;
+ }
+ if (att != null) {
+ if (pre == null) {
+ this.adaptee.attributes = att.next;
+ } else {
+ pre.next = att.next;
+ }
+ }
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getAttributeNode
+ */
+ public org.w3c.dom.Attr getAttributeNode(String name)
+ {
+ if (this.adaptee == null)
+ return null;
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null) {
+ if (att.attribute.equals(name)) break;
+ att = att.next;
+ }
+ if (att != null)
+ return att.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#setAttributeNode
+ */
+ public org.w3c.dom.Attr setAttributeNode(org.w3c.dom.Attr newAttr)
+ throws DOMException
+ {
+ if (newAttr == null)
+ return null;
+ if (!(newAttr instanceof DOMAttrImpl)) {
+ throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+ "newAttr not instanceof DOMAttrImpl");
+ }
+
+ DOMAttrImpl newatt = (DOMAttrImpl)newAttr;
+ String name = newatt.avAdaptee.attribute;
+ org.w3c.dom.Attr result = null;
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null) {
+ if (att.attribute.equals(name)) break;
+ att = att.next;
+ }
+ if (att != null) {
+ result = att.getAdapter();
+ att.adapter = newAttr;
+ } else {
+ if (this.adaptee.attributes == null) {
+ this.adaptee.attributes = newatt.avAdaptee;
+ } else {
+ newatt.avAdaptee.next = this.adaptee.attributes;
+ this.adaptee.attributes = newatt.avAdaptee;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#removeAttributeNode
+ */
+ public org.w3c.dom.Attr removeAttributeNode(org.w3c.dom.Attr oldAttr)
+ throws DOMException
+ {
+ if (oldAttr == null)
+ return null;
+
+ org.w3c.dom.Attr result = null;
+ AttVal att = this.adaptee.attributes;
+ AttVal pre = null;
+ while (att != null) {
+ if (att.getAdapter() == oldAttr) break;
+ pre = att;
+ att = att.next;
+ }
+ if (att != null) {
+ if (pre == null) {
+ this.adaptee.attributes = att.next;
+ } else {
+ pre.next = att.next;
+ }
+ result = oldAttr;
+ } else {
+ throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+ "oldAttr not found");
+ }
+ return result;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getElementsByTagName
+ */
+ public org.w3c.dom.NodeList getElementsByTagName(String name)
+ {
+ return new DOMNodeListByTagNameImpl(this.adaptee, name);
+ }
+
+ /**
+ * @see org.w3c.dom.Element#normalize
+ */
+ public void normalize()
+ {
+ // NOT SUPPORTED
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public String getAttributeNS(String namespaceURI, String localName)
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public void setAttributeNS(String namespaceURI,
+ String qualifiedName,
+ String value)
+ throws org.w3c.dom.DOMException
+ {
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public void removeAttributeNS(String namespaceURI, String localName)
+ throws org.w3c.dom.DOMException
+ {
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI,
+ String localName)
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @exception org.w3c.dom.DOMException
+ */
+ public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr)
+ throws org.w3c.dom.DOMException
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI,
+ String localName)
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public boolean hasAttribute(String name)
+ {
+ return false;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public boolean hasAttributeNS(String namespaceURI,
+ String localName)
+ {
+ return false;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMExceptionImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+
+/**
+ *
+ * DOMExceptionImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMExceptionImpl extends org.w3c.dom.DOMException {
+
+ public DOMExceptionImpl(short code, String message) {
+ super(code, message);
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMNodeImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+import sun.security.krb5.internal.n;
+
+/**
+ *
+ * DOMNodeImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMNodeImpl implements org.w3c.dom.Node {
+
+ protected Node adaptee;
+
+ protected DOMNodeImpl(Node adaptee)
+ {
+ this.adaptee = adaptee;
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeValue
+ */
+ public String getNodeValue() throws DOMException
+ {
+ String value = ""; //BAK 10/10/2000 replaced null
+ if (adaptee.type == Node.TextNode ||
+ adaptee.type == Node.CDATATag ||
+ adaptee.type == Node.CommentTag ||
+ adaptee.type == Node.ProcInsTag)
+ {
+
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ {
+ value = Lexer.getString(adaptee.textarray,
+ adaptee.start,
+ adaptee.end - adaptee.start);
+ }
+ }
+ return value;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#setNodeValue
+ */
+ public void setNodeValue(String nodeValue) throws DOMException
+ {
+ if (adaptee.type == Node.TextNode ||
+ adaptee.type == Node.CDATATag ||
+ adaptee.type == Node.CommentTag ||
+ adaptee.type == Node.ProcInsTag)
+ {
+ byte[] textarray = Lexer.getBytes(nodeValue);
+ adaptee.textarray = textarray;
+ adaptee.start = 0;
+ adaptee.end = textarray.length;
+ }
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return adaptee.element;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ short result = -1;
+ switch (adaptee.type) {
+ case Node.RootNode:
+ result = org.w3c.dom.Node.DOCUMENT_NODE;
+ break;
+ case Node.DocTypeTag:
+ result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
+ break;
+ case Node.CommentTag:
+ result = org.w3c.dom.Node.COMMENT_NODE;
+ break;
+ case Node.ProcInsTag:
+ result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
+ break;
+ case Node.TextNode:
+ result = org.w3c.dom.Node.TEXT_NODE;
+ break;
+ case Node.CDATATag:
+ result = org.w3c.dom.Node.CDATA_SECTION_NODE;
+ break;
+ case Node.StartTag:
+ case Node.StartEndTag:
+ result = org.w3c.dom.Node.ELEMENT_NODE;
+ break;
+ }
+ return result;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getParentNode
+ */
+ public org.w3c.dom.Node getParentNode()
+ {
+ if (adaptee.parent != null)
+ return adaptee.parent.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getChildNodes
+ */
+ public org.w3c.dom.NodeList getChildNodes()
+ {
+ return new DOMNodeListImpl(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getFirstChild
+ */
+ public org.w3c.dom.Node getFirstChild()
+ {
+ if (adaptee.content != null)
+ return adaptee.content.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getLastChild
+ */
+ public org.w3c.dom.Node getLastChild()
+ {
+ if (adaptee.last != null)
+ return adaptee.last.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getPreviousSibling
+ */
+ public org.w3c.dom.Node getPreviousSibling()
+ {
+ if (adaptee.prev != null)
+ return adaptee.prev.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNextSibling
+ */
+ public org.w3c.dom.Node getNextSibling()
+ {
+ if (adaptee.next != null)
+ return adaptee.next.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getAttributes
+ */
+ public org.w3c.dom.NamedNodeMap getAttributes()
+ {
+ return new DOMAttrMapImpl(adaptee.attributes);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getOwnerDocument
+ */
+ public org.w3c.dom.Document getOwnerDocument()
+ {
+ Node node;
+
+ node = this.adaptee;
+ if (node != null && node.type == Node.RootNode)
+ return null;
+
+ for (node = this.adaptee;
+ node != null && node.type != Node.RootNode; node = node.parent);
+
+ if (node != null)
+ return (org.w3c.dom.Document)node.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#insertBefore
+ */
+ public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild,
+ org.w3c.dom.Node refChild)
+ throws DOMException
+ {
+ // TODO - handle newChild already in tree
+
+ if (newChild == null)
+ return null;
+ if (!(newChild instanceof DOMNodeImpl)) {
+ throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+ "newChild not instanceof DOMNodeImpl");
+ }
+ DOMNodeImpl newCh = (DOMNodeImpl)newChild;
+
+ if (this.adaptee.type == Node.RootNode) {
+ if (newCh.adaptee.type != Node.DocTypeTag &&
+ newCh.adaptee.type != Node.ProcInsTag) {
+ throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+ "newChild cannot be a child of this node");
+ }
+ } else if (this.adaptee.type == Node.StartTag) {
+ if (newCh.adaptee.type != Node.StartTag &&
+ newCh.adaptee.type != Node.StartEndTag &&
+ newCh.adaptee.type != Node.CommentTag &&
+ newCh.adaptee.type != Node.TextNode &&
+ newCh.adaptee.type != Node.CDATATag) {
+ throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+ "newChild cannot be a child of this node");
+ }
+ }
+ if (refChild == null) {
+ Node.insertNodeAtEnd(this.adaptee, newCh.adaptee);
+ if (this.adaptee.type == Node.StartEndTag) {
+ this.adaptee.setType(Node.StartTag);
+ }
+ } else {
+ Node ref = this.adaptee.content;
+ while (ref != null) {
+ if (ref.getAdapter() == refChild) break;
+ ref = ref.next;
+ }
+ if (ref == null) {
+ throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+ "refChild not found");
+ }
+ Node.insertNodeBeforeElement(ref, newCh.adaptee);
+ }
+ return newChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#replaceChild
+ */
+ public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild,
+ org.w3c.dom.Node oldChild)
+ throws DOMException
+ {
+ // TODO - handle newChild already in tree
+
+ if (newChild == null)
+ return null;
+ if (!(newChild instanceof DOMNodeImpl)) {
+ throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+ "newChild not instanceof DOMNodeImpl");
+ }
+ DOMNodeImpl newCh = (DOMNodeImpl)newChild;
+
+ if (this.adaptee.type == Node.RootNode) {
+ if (newCh.adaptee.type != Node.DocTypeTag &&
+ newCh.adaptee.type != Node.ProcInsTag) {
+ throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+ "newChild cannot be a child of this node");
+ }
+ } else if (this.adaptee.type == Node.StartTag) {
+ if (newCh.adaptee.type != Node.StartTag &&
+ newCh.adaptee.type != Node.StartEndTag &&
+ newCh.adaptee.type != Node.CommentTag &&
+ newCh.adaptee.type != Node.TextNode &&
+ newCh.adaptee.type != Node.CDATATag) {
+ throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+ "newChild cannot be a child of this node");
+ }
+ }
+ if (oldChild == null) {
+ throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+ "oldChild not found");
+ } else {
+ Node n;
+ Node ref = this.adaptee.content;
+ while (ref != null) {
+ if (ref.getAdapter() == oldChild) break;
+ ref = ref.next;
+ }
+ if (ref == null) {
+ throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+ "oldChild not found");
+ }
+ newCh.adaptee.next = ref.next;
+ newCh.adaptee.prev = ref.prev;
+ newCh.adaptee.last = ref.last;
+ newCh.adaptee.parent = ref.parent;
+ newCh.adaptee.content = ref.content;
+ if (ref.parent != null) {
+ if (ref.parent.content == ref)
+ ref.parent.content = newCh.adaptee;
+ if (ref.parent.last == ref)
+ ref.parent.last = newCh.adaptee;
+ }
+ if (ref.prev != null) {
+ ref.prev.next = newCh.adaptee;
+ }
+ if (ref.next != null) {
+ ref.next.prev = newCh.adaptee;
+ }
+ for (n = ref.content; n != null; n = n.next) {
+ if (n.parent == ref)
+ n.parent = newCh.adaptee;
+ }
+ }
+ return oldChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#removeChild
+ */
+ public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild)
+ throws DOMException
+ {
+ if (oldChild == null)
+ return null;
+
+ Node ref = this.adaptee.content;
+ while (ref != null) {
+ if (ref.getAdapter() == oldChild) break;
+ ref = ref.next;
+ }
+ if (ref == null) {
+ throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+ "refChild not found");
+ }
+ Node.discardElement(ref);
+
+ if (this.adaptee.content == null
+ && this.adaptee.type == Node.StartTag) {
+ this.adaptee.setType(Node.StartEndTag);
+ }
+
+ return oldChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#appendChild
+ */
+ public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild)
+ throws DOMException
+ {
+ // TODO - handle newChild already in tree
+
+ if (newChild == null)
+ return null;
+ if (!(newChild instanceof DOMNodeImpl)) {
+ throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+ "newChild not instanceof DOMNodeImpl");
+ }
+ DOMNodeImpl newCh = (DOMNodeImpl)newChild;
+
+ if (this.adaptee.type == Node.RootNode) {
+ if (newCh.adaptee.type != Node.DocTypeTag &&
+ newCh.adaptee.type != Node.ProcInsTag) {
+ throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+ "newChild cannot be a child of this node");
+ }
+ } else if (this.adaptee.type == Node.StartTag) {
+ if (newCh.adaptee.type != Node.StartTag &&
+ newCh.adaptee.type != Node.StartEndTag &&
+ newCh.adaptee.type != Node.CommentTag &&
+ newCh.adaptee.type != Node.TextNode &&
+ newCh.adaptee.type != Node.CDATATag) {
+ throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+ "newChild cannot be a child of this node");
+ }
+ }
+ Node.insertNodeAtEnd(this.adaptee, newCh.adaptee);
+
+ if (this.adaptee.type == Node.StartEndTag) {
+ this.adaptee.setType(Node.StartTag);
+ }
+
+ return newChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#hasChildNodes
+ */
+ public boolean hasChildNodes()
+ {
+ return (adaptee.content != null);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#cloneNode
+ */
+ public org.w3c.dom.Node cloneNode(boolean deep)
+ {
+ Node node = adaptee.cloneNode(deep);
+ node.parent = null;
+ return node.getAdapter();
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public void normalize()
+ {
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public boolean supports(String feature, String version)
+ {
+ return isSupported(feature, version);
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public String getNamespaceURI()
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public String getPrefix()
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public void setPrefix(String prefix)
+ throws DOMException
+ {
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public String getLocalName()
+ {
+ return null;
+ }
+
+ /**
+ * DOM2 - not implemented.
+ */
+ public boolean isSupported(String feature,String version) {
+ return false;
+ }
+
+ /**
+ * DOM2 - @see org.w3c.dom.Node#hasAttributes
+ * contributed by dlp@users.sourceforge.net
+ */
+ public boolean hasAttributes()
+ {
+ return adaptee.attributes != null;
+ }
+}
--- /dev/null
+/*
+ * @(#)DOMNodeListByTagNameImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * DOMNodeListByTagNameImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/**
+ * <p>The items in the <code>NodeList</code> are accessible via an integral
+ * index, starting from 0.
+ *
+ */
+public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList {
+
+ private Node first = null;
+ private String tagName = "*";
+ private int currIndex = 0;
+ private int maxIndex = 0;
+ private Node currNode = null;
+
+ protected DOMNodeListByTagNameImpl(Node first, String tagName)
+ {
+ this.first = first;
+ this.tagName = tagName;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#item
+ */
+ public org.w3c.dom.Node item(int index)
+ {
+ currIndex = 0;
+ maxIndex = index;
+ preTraverse(first);
+
+ if (currIndex > maxIndex && currNode != null)
+ return currNode.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#getLength
+ */
+ public int getLength()
+ {
+ currIndex = 0;
+ maxIndex = Integer.MAX_VALUE;
+ preTraverse(first);
+ return currIndex;
+ }
+
+ protected void preTraverse(Node node)
+ {
+ if (node == null)
+ return;
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if (currIndex <= maxIndex &&
+ (tagName.equals("*") || tagName.equals(node.element)))
+ {
+ currIndex += 1;
+ currNode = node;
+ }
+ }
+ if (currIndex > maxIndex)
+ return;
+
+ node = node.content;
+ while (node != null)
+ {
+ preTraverse(node);
+ node = node.next;
+ }
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMNodeListImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * DOMNodeListImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/**
+ * <p>The items in the <code>NodeList</code> are accessible via an integral
+ * index, starting from 0.
+ *
+ */
+public class DOMNodeListImpl implements org.w3c.dom.NodeList {
+
+ private Node parent = null;
+
+ protected DOMNodeListImpl(Node parent)
+ {
+ this.parent = parent;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#item
+ */
+ public org.w3c.dom.Node item(int index)
+ {
+ int i = 0;
+ Node node = parent.content;
+ while (node != null) {
+ if (i >= index) break;
+ i++;
+ node = node.next;
+ }
+ if (node != null)
+ return node.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#getLength
+ */
+ public int getLength()
+ {
+ int len = 0;
+ Node node = parent.content;
+ while (node != null) {
+ len++;
+ node = node.next;
+ }
+ return len;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMProcessingInstructionImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMProcessingInstructionImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMProcessingInstructionImpl extends DOMNodeImpl
+ implements org.w3c.dom.ProcessingInstruction {
+
+ protected DOMProcessingInstructionImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.ProcessingInstruction#getTarget
+ */
+ public String getTarget()
+ {
+ // TODO
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.ProcessingInstruction#getData
+ */
+ public String getData()
+ {
+ return getNodeValue();
+ }
+
+ /**
+ * @see org.w3c.dom.ProcessingInstruction#setData
+ */
+ public void setData(String data) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+}
--- /dev/null
+/*
+ * @(#)DOMTextImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMTextImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMTextImpl extends DOMCharacterDataImpl
+ implements org.w3c.dom.Text {
+
+ protected DOMTextImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#text";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.TEXT_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Text#splitText
+ */
+ public org.w3c.dom.Text splitText(int offset) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+ "Not supported");
+ }
+
+}
--- /dev/null
+/*
+ * @(#)Dict.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Tag dictionary node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Dict {
+
+ /* content model shortcut encoding */
+
+ public static final int CM_UNKNOWN = 0;
+ public static final int CM_EMPTY = (1 << 0);
+ public static final int CM_HTML = (1 << 1);
+ public static final int CM_HEAD = (1 << 2);
+ public static final int CM_BLOCK = (1 << 3);
+ public static final int CM_INLINE = (1 << 4);
+ public static final int CM_LIST = (1 << 5);
+ public static final int CM_DEFLIST = (1 << 6);
+ public static final int CM_TABLE = (1 << 7);
+ public static final int CM_ROWGRP = (1 << 8);
+ public static final int CM_ROW = (1 << 9);
+ public static final int CM_FIELD = (1 << 10);
+ public static final int CM_OBJECT = (1 << 11);
+ public static final int CM_PARAM = (1 << 12);
+ public static final int CM_FRAMES = (1 << 13);
+ public static final int CM_HEADING = (1 << 14);
+ public static final int CM_OPT = (1 << 15);
+ public static final int CM_IMG = (1 << 16);
+ public static final int CM_MIXED = (1 << 17);
+ public static final int CM_NO_INDENT = (1 << 18);
+ public static final int CM_OBSOLETE = (1 << 19);
+ public static final int CM_NEW = (1 << 20);
+ public static final int CM_OMITST = (1 << 21);
+
+ /*
+
+ If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0
+ Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary
+ tags and attributes then describe it as HTML Proprietary. If it includes the
+ xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe
+ it as one of the flavors of Voyager (strict, loose or frameset).
+ */
+
+ public static final short VERS_UNKNOWN = 0;
+
+ public static final short VERS_HTML20 = 1;
+ public static final short VERS_HTML32 = 2;
+ public static final short VERS_HTML40_STRICT = 4;
+ public static final short VERS_HTML40_LOOSE = 8;
+ public static final short VERS_FRAMES = 16;
+ public static final short VERS_XML = 32;
+
+ public static final short VERS_NETSCAPE = 64;
+ public static final short VERS_MICROSOFT = 128;
+ public static final short VERS_SUN = 256;
+
+ public static final short VERS_MALFORMED = 512;
+
+ public static final short VERS_ALL = (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES);
+ public static final short VERS_HTML40 = (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES);
+ public static final short VERS_LOOSE = (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES);
+ public static final short VERS_IFRAMES = (VERS_HTML40_LOOSE|VERS_FRAMES);
+ public static final short VERS_FROM32 = (VERS_HTML40_STRICT|VERS_LOOSE);
+ public static final short VERS_PROPRIETARY = (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN);
+
+ public static final short VERS_EVERYTHING = (VERS_ALL|VERS_PROPRIETARY);
+
+ public Dict( String name, short versions, int model,
+ Parser parser, CheckAttribs chkattrs )
+ {
+ this.name = name;
+ this.versions = versions;
+ this.model = model;
+ this.parser = parser;
+ this.chkattrs = chkattrs;
+ }
+
+ public String name;
+ public short versions;
+ public int model;
+ public Parser parser;
+ public CheckAttribs chkattrs;
+}
--- /dev/null
+/*
+ * @(#)Entity.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * HTML ISO entity
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Entity {
+
+ public Entity( String name, short code )
+ {
+ this.name = name;
+ this.code = code;
+ }
+
+ public Entity( String name, int code )
+ {
+ this.name = name;
+ this.code = (short)code;
+ }
+
+ public String name;
+ public short code;
+
+}
--- /dev/null
+/*
+ * @(#)EntityTable.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Entity hash table
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+
+public class EntityTable {
+
+ public EntityTable()
+ {
+ }
+
+ public Entity lookup( String name )
+ {
+ return (Entity)entityHashtable.get( name );
+ }
+
+ public Entity install( String name, short code )
+ {
+ Entity ent = lookup( name );
+ if ( ent == null ) {
+ ent = new Entity( name, code );
+ entityHashtable.put( name, ent );
+ } else {
+ ent.code = code;
+ }
+ return ent;
+ }
+
+ public Entity install( Entity ent )
+ {
+ return (Entity)entityHashtable.put( ent.name, ent );
+ }
+
+ /* entity starting with "&" returns zero on error */
+ public short entityCode( String name )
+ {
+ int c;
+
+ if (name.length() <= 1)
+ return 0;
+
+ /* numeric entitity: name = "&#" followed by number */
+ if ( name.charAt(1) == '#' ) {
+ c = 0; /* zero on missing/bad number */
+
+ /* 'x' prefix denotes hexadecimal number format */
+ try {
+ if (name.length() >= 4 && name.charAt(2) == 'x') {
+ c = Integer.parseInt( name.substring(3), 16 );
+ } else if (name.length() >= 3) {
+ c = Integer.parseInt( name.substring(2) );
+ }
+ }
+ catch ( NumberFormatException e ) {}
+
+ return (short)c;
+ }
+
+ /* Named entity: name ="&" followed by a name */
+ Entity ent = lookup( name.substring(1) );
+ if ( ent != null ) {
+ return ent.code;
+ }
+
+ return 0; /* zero signifies unknown entity name */
+ }
+
+ public String entityName( short code )
+ {
+ String name = null;
+ Entity ent;
+ Enumeration en = entityHashtable.elements();
+ while ( en.hasMoreElements() ) {
+ ent = (Entity)en.nextElement();
+ if ( ent.code == code ) {
+ name = ent.name;
+ break;
+ }
+ }
+ return name;
+ }
+
+ private Hashtable entityHashtable = new Hashtable();
+
+ private static EntityTable defaultEntityTable = null;
+
+ private static Entity[] entities = {
+
+ new Entity( "nbsp", 160 ),
+ new Entity( "iexcl", 161 ),
+ new Entity( "cent", 162 ),
+ new Entity( "pound", 163 ),
+ new Entity( "curren", 164 ),
+ new Entity( "yen", 165 ),
+ new Entity( "brvbar", 166 ),
+ new Entity( "sect", 167 ),
+ new Entity( "uml", 168 ),
+ new Entity( "copy", 169 ),
+ new Entity( "ordf", 170 ),
+ new Entity( "laquo", 171 ),
+ new Entity( "not", 172 ),
+ new Entity( "shy", 173 ),
+ new Entity( "reg", 174 ),
+ new Entity( "macr", 175 ),
+ new Entity( "deg", 176 ),
+ new Entity( "plusmn", 177 ),
+ new Entity( "sup2", 178 ),
+ new Entity( "sup3", 179 ),
+ new Entity( "acute", 180 ),
+ new Entity( "micro", 181 ),
+ new Entity( "para", 182 ),
+ new Entity( "middot", 183 ),
+ new Entity( "cedil", 184 ),
+ new Entity( "sup1", 185 ),
+ new Entity( "ordm", 186 ),
+ new Entity( "raquo", 187 ),
+ new Entity( "frac14", 188 ),
+ new Entity( "frac12", 189 ),
+ new Entity( "frac34", 190 ),
+ new Entity( "iquest", 191 ),
+ new Entity( "Agrave", 192 ),
+ new Entity( "Aacute", 193 ),
+ new Entity( "Acirc", 194 ),
+ new Entity( "Atilde", 195 ),
+ new Entity( "Auml", 196 ),
+ new Entity( "Aring", 197 ),
+ new Entity( "AElig", 198 ),
+ new Entity( "Ccedil", 199 ),
+ new Entity( "Egrave", 200 ),
+ new Entity( "Eacute", 201 ),
+ new Entity( "Ecirc", 202 ),
+ new Entity( "Euml", 203 ),
+ new Entity( "Igrave", 204 ),
+ new Entity( "Iacute", 205 ),
+ new Entity( "Icirc", 206 ),
+ new Entity( "Iuml", 207 ),
+ new Entity( "ETH", 208 ),
+ new Entity( "Ntilde", 209 ),
+ new Entity( "Ograve", 210 ),
+ new Entity( "Oacute", 211 ),
+ new Entity( "Ocirc", 212 ),
+ new Entity( "Otilde", 213 ),
+ new Entity( "Ouml", 214 ),
+ new Entity( "times", 215 ),
+ new Entity( "Oslash", 216 ),
+ new Entity( "Ugrave", 217 ),
+ new Entity( "Uacute", 218 ),
+ new Entity( "Ucirc", 219 ),
+ new Entity( "Uuml", 220 ),
+ new Entity( "Yacute", 221 ),
+ new Entity( "THORN", 222 ),
+ new Entity( "szlig", 223 ),
+ new Entity( "agrave", 224 ),
+ new Entity( "aacute", 225 ),
+ new Entity( "acirc", 226 ),
+ new Entity( "atilde", 227 ),
+ new Entity( "auml", 228 ),
+ new Entity( "aring", 229 ),
+ new Entity( "aelig", 230 ),
+ new Entity( "ccedil", 231 ),
+ new Entity( "egrave", 232 ),
+ new Entity( "eacute", 233 ),
+ new Entity( "ecirc", 234 ),
+ new Entity( "euml", 235 ),
+ new Entity( "igrave", 236 ),
+ new Entity( "iacute", 237 ),
+ new Entity( "icirc", 238 ),
+ new Entity( "iuml", 239 ),
+ new Entity( "eth", 240 ),
+ new Entity( "ntilde", 241 ),
+ new Entity( "ograve", 242 ),
+ new Entity( "oacute", 243 ),
+ new Entity( "ocirc", 244 ),
+ new Entity( "otilde", 245 ),
+ new Entity( "ouml", 246 ),
+ new Entity( "divide", 247 ),
+ new Entity( "oslash", 248 ),
+ new Entity( "ugrave", 249 ),
+ new Entity( "uacute", 250 ),
+ new Entity( "ucirc", 251 ),
+ new Entity( "uuml", 252 ),
+ new Entity( "yacute", 253 ),
+ new Entity( "thorn", 254 ),
+ new Entity( "yuml", 255 ),
+ new Entity( "fnof", 402 ),
+ new Entity( "Alpha", 913 ),
+ new Entity( "Beta", 914 ),
+ new Entity( "Gamma", 915 ),
+ new Entity( "Delta", 916 ),
+ new Entity( "Epsilon", 917 ),
+ new Entity( "Zeta", 918 ),
+ new Entity( "Eta", 919 ),
+ new Entity( "Theta", 920 ),
+ new Entity( "Iota", 921 ),
+ new Entity( "Kappa", 922 ),
+ new Entity( "Lambda", 923 ),
+ new Entity( "Mu", 924 ),
+ new Entity( "Nu", 925 ),
+ new Entity( "Xi", 926 ),
+ new Entity( "Omicron", 927 ),
+ new Entity( "Pi", 928 ),
+ new Entity( "Rho", 929 ),
+ new Entity( "Sigma", 931 ),
+ new Entity( "Tau", 932 ),
+ new Entity( "Upsilon", 933 ),
+ new Entity( "Phi", 934 ),
+ new Entity( "Chi", 935 ),
+ new Entity( "Psi", 936 ),
+ new Entity( "Omega", 937 ),
+ new Entity( "alpha", 945 ),
+ new Entity( "beta", 946 ),
+ new Entity( "gamma", 947 ),
+ new Entity( "delta", 948 ),
+ new Entity( "epsilon", 949 ),
+ new Entity( "zeta", 950 ),
+ new Entity( "eta", 951 ),
+ new Entity( "theta", 952 ),
+ new Entity( "iota", 953 ),
+ new Entity( "kappa", 954 ),
+ new Entity( "lambda", 955 ),
+ new Entity( "mu", 956 ),
+ new Entity( "nu", 957 ),
+ new Entity( "xi", 958 ),
+ new Entity( "omicron", 959 ),
+ new Entity( "pi", 960 ),
+ new Entity( "rho", 961 ),
+ new Entity( "sigmaf", 962 ),
+ new Entity( "sigma", 963 ),
+ new Entity( "tau", 964 ),
+ new Entity( "upsilon", 965 ),
+ new Entity( "phi", 966 ),
+ new Entity( "chi", 967 ),
+ new Entity( "psi", 968 ),
+ new Entity( "omega", 969 ),
+ new Entity( "thetasym", 977 ),
+ new Entity( "upsih", 978 ),
+ new Entity( "piv", 982 ),
+ new Entity( "bull", 8226 ),
+ new Entity( "hellip", 8230 ),
+ new Entity( "prime", 8242 ),
+ new Entity( "Prime", 8243 ),
+ new Entity( "oline", 8254 ),
+ new Entity( "frasl", 8260 ),
+ new Entity( "weierp", 8472 ),
+ new Entity( "image", 8465 ),
+ new Entity( "real", 8476 ),
+ new Entity( "trade", 8482 ),
+ new Entity( "alefsym", 8501 ),
+ new Entity( "larr", 8592 ),
+ new Entity( "uarr", 8593 ),
+ new Entity( "rarr", 8594 ),
+ new Entity( "darr", 8595 ),
+ new Entity( "harr", 8596 ),
+ new Entity( "crarr", 8629 ),
+ new Entity( "lArr", 8656 ),
+ new Entity( "uArr", 8657 ),
+ new Entity( "rArr", 8658 ),
+ new Entity( "dArr", 8659 ),
+ new Entity( "hArr", 8660 ),
+ new Entity( "forall", 8704 ),
+ new Entity( "part", 8706 ),
+ new Entity( "exist", 8707 ),
+ new Entity( "empty", 8709 ),
+ new Entity( "nabla", 8711 ),
+ new Entity( "isin", 8712 ),
+ new Entity( "notin", 8713 ),
+ new Entity( "ni", 8715 ),
+ new Entity( "prod", 8719 ),
+ new Entity( "sum", 8721 ),
+ new Entity( "minus", 8722 ),
+ new Entity( "lowast", 8727 ),
+ new Entity( "radic", 8730 ),
+ new Entity( "prop", 8733 ),
+ new Entity( "infin", 8734 ),
+ new Entity( "ang", 8736 ),
+ new Entity( "and", 8743 ),
+ new Entity( "or", 8744 ),
+ new Entity( "cap", 8745 ),
+ new Entity( "cup", 8746 ),
+ new Entity( "int", 8747 ),
+ new Entity( "there4", 8756 ),
+ new Entity( "sim", 8764 ),
+ new Entity( "cong", 8773 ),
+ new Entity( "asymp", 8776 ),
+ new Entity( "ne", 8800 ),
+ new Entity( "equiv", 8801 ),
+ new Entity( "le", 8804 ),
+ new Entity( "ge", 8805 ),
+ new Entity( "sub", 8834 ),
+ new Entity( "sup", 8835 ),
+ new Entity( "nsub", 8836 ),
+ new Entity( "sube", 8838 ),
+ new Entity( "supe", 8839 ),
+ new Entity( "oplus", 8853 ),
+ new Entity( "otimes", 8855 ),
+ new Entity( "perp", 8869 ),
+ new Entity( "sdot", 8901 ),
+ new Entity( "lceil", 8968 ),
+ new Entity( "rceil", 8969 ),
+ new Entity( "lfloor", 8970 ),
+ new Entity( "rfloor", 8971 ),
+ new Entity( "lang", 9001 ),
+ new Entity( "rang", 9002 ),
+ new Entity( "loz", 9674 ),
+ new Entity( "spades", 9824 ),
+ new Entity( "clubs", 9827 ),
+ new Entity( "hearts", 9829 ),
+ new Entity( "diams", 9830 ),
+ new Entity( "quot", 34 ),
+ new Entity( "amp", 38 ),
+ new Entity( "lt", 60 ),
+ new Entity( "gt", 62 ),
+ new Entity( "OElig", 338 ),
+ new Entity( "oelig", 339 ),
+ new Entity( "Scaron", 352 ),
+ new Entity( "scaron", 353 ),
+ new Entity( "Yuml", 376 ),
+ new Entity( "circ", 710 ),
+ new Entity( "tilde", 732 ),
+ new Entity( "ensp", 8194 ),
+ new Entity( "emsp", 8195 ),
+ new Entity( "thinsp", 8201 ),
+ new Entity( "zwnj", 8204 ),
+ new Entity( "zwj", 8205 ),
+ new Entity( "lrm", 8206 ),
+ new Entity( "rlm", 8207 ),
+ new Entity( "ndash", 8211 ),
+ new Entity( "mdash", 8212 ),
+ new Entity( "lsquo", 8216 ),
+ new Entity( "rsquo", 8217 ),
+ new Entity( "sbquo", 8218 ),
+ new Entity( "ldquo", 8220 ),
+ new Entity( "rdquo", 8221 ),
+ new Entity( "bdquo", 8222 ),
+ new Entity( "dagger", 8224 ),
+ new Entity( "Dagger", 8225 ),
+ new Entity( "permil", 8240 ),
+ new Entity( "lsaquo", 8249 ),
+ new Entity( "rsaquo", 8250 ),
+ new Entity( "euro", 8364 )
+
+ };
+
+ public static EntityTable getDefaultEntityTable()
+ {
+ if ( defaultEntityTable == null ) {
+ defaultEntityTable = new EntityTable();
+ for ( int i = 0; i < entities.length; i++ ) {
+ defaultEntityTable.install( entities[i] );
+ }
+ }
+ return defaultEntityTable;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)IStack.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Inline stack node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class IStack {
+
+ /*
+ Mosaic handles inlines via a separate stack from other elements
+ We duplicate this to recover from inline markup errors such as:
+
+ <i>italic text
+ <p>more italic text</b> normal text
+
+ which for compatibility with Mosaic is mapped to:
+
+ <i>italic text</i>
+ <p><i>more italic text</i> normal text
+
+ Note that any inline end tag pop's the effect of the current
+ inline start tag, so that </b> pop's <i> in the above example.
+ */
+
+ public IStack next;
+ public Dict tag; /* tag's dictionary definition */
+ public String element; /* name (null for text nodes) */
+ public AttVal attributes;
+
+ public IStack()
+ {
+ next = null;
+ tag = null;
+ element = null;
+ attributes = null;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)Lexer.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Lexer for html parser
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+ Given a file stream fp it returns a sequence of tokens.
+
+ GetToken(fp) gets the next token
+ UngetToken(fp) provides one level undo
+
+ The tags include an attribute list:
+
+ - linked list of attribute/value nodes
+ - each node has 2 null-terminated strings.
+ - entities are replaced in attribute values
+
+ white space is compacted if not in preformatted mode
+ If not in preformatted mode then leading white space
+ is discarded and subsequent white space sequences
+ compacted to single space chars.
+
+ If XmlTags is no then Tag names are folded to upper
+ case and attribute names to lower case.
+
+ Not yet done:
+ - Doctype subset and marked sections
+*/
+
+import java.io.PrintWriter;
+import java.util.Stack;
+import java.util.Vector;
+
+import org.eclipse.core.resources.IFile;
+import sun.security.krb5.internal.av;
+
+public class Lexer {
+
+ private IFile iFile;
+ public StreamIn in; /* file stream */
+ public PrintWriter errout; /* error output stream */
+ public short badAccess; /* for accessibility errors */
+ public short badLayout; /* for bad style errors */
+ public short badChars; /* for bad char encodings */
+ public short badForm; /* for mismatched/mispositioned form tags */
+ public short warnings; /* count of warnings in this document */
+ public short errors; /* count of errors */
+ public int lines; /* lines seen */
+ public int columns; /* at start of current token */
+ public boolean waswhite; /* used to collapse contiguous white space */
+ public boolean pushed; /* true after token has been pushed back */
+ public boolean insertspace; /* when space is moved after end tag */
+ public boolean excludeBlocks; /* Netscape compatibility */
+ public boolean exiled; /* true if moved out of table */
+ public boolean isvoyager; /* true if xmlns attribute on html element */
+ public short versions; /* bit vector of HTML versions */
+ public int doctype; /* version as given by doctype (if any) */
+ public boolean badDoctype; /* e.g. if html or PUBLIC is missing */
+ public int txtstart; /* start of current node */
+ public int txtend; /* end of current node */
+ public short state; /* state of lexer's finite state machine */
+ public Node token;
+
+ /*
+ lexer character buffer
+
+ parse tree nodes span onto this buffer
+ which contains the concatenated text
+ contents of all of the elements.
+
+ lexsize must be reset for each file.
+ */
+ public byte[] lexbuf; /* byte buffer of UTF-8 chars */
+ public int lexlength; /* allocated */
+ public int lexsize; /* used */
+
+ /* Inline stack for compatibility with Mosaic */
+ public Node inode; /* for deferring text node */
+ public int insert; /* for inferring inline tags */
+ public Stack istack;
+ public int istackbase; /* start of frame */
+
+ public Style styles; /* used for cleaning up presentation markup */
+
+ public Configuration configuration;
+ protected int seenBodyEndTag; /* used by parser */
+ private Vector nodeList;
+
+ public Lexer(IFile iFile, StreamIn in, Configuration configuration)
+ {
+ this.iFile = iFile;
+ this.in = in;
+ this.lines = 1;
+ this.columns = 1;
+ this.state = LEX_CONTENT;
+ this.badAccess = 0;
+ this.badLayout = 0;
+ this.badChars = 0;
+ this.badForm = 0;
+ this.warnings = 0;
+ this.errors = 0;
+ this.waswhite = false;
+ this.pushed = false;
+ this.insertspace = false;
+ this.exiled = false;
+ this.isvoyager = false;
+ this.versions = Dict.VERS_EVERYTHING;
+ this.doctype = Dict.VERS_UNKNOWN;
+ this.badDoctype = false;
+ this.txtstart = 0;
+ this.txtend = 0;
+ this.token = null;
+ this.lexbuf = null;
+ this.lexlength = 0;
+ this.lexsize = 0;
+ this.inode = null;
+ this.insert = -1;
+ this.istack = new Stack();
+ this.istackbase = 0;
+ this.styles = null;
+ this.configuration = configuration;
+ this.seenBodyEndTag = 0;
+ this.nodeList = new Vector();
+ }
+
+ public IFile getIFile() {
+ return iFile;
+ }
+
+ public Node newNode()
+ {
+ Node node = new Node();
+ nodeList.addElement(node);
+ return node;
+ }
+
+ public Node newNode(short type, byte[] textarray, int start, int end)
+ {
+ Node node = new Node(type, textarray, start, end);
+ nodeList.addElement(node);
+ return node;
+ }
+
+ public Node newNode(short type, byte[] textarray, int start, int end, String element)
+ {
+ Node node = new Node(type, textarray, start, end, element, configuration.tt);
+ nodeList.addElement(node);
+ return node;
+ }
+
+ public Node cloneNode(Node node)
+ {
+ Node cnode = (Node)node.clone();
+ nodeList.addElement(cnode);
+ for (AttVal att = cnode.attributes; att != null; att = att.next) {
+ if (att.asp != null)
+ nodeList.addElement(att.asp);
+ if (att.php != null)
+ nodeList.addElement(att.php);
+ }
+ return cnode;
+ }
+
+ public AttVal cloneAttributes(AttVal attrs)
+ {
+ AttVal cattrs = (AttVal)attrs.clone();
+ for (AttVal att = cattrs; att != null; att = att.next) {
+ if (att.asp != null)
+ nodeList.addElement(att.asp);
+ if (att.php != null)
+ nodeList.addElement(att.php);
+ }
+ return cattrs;
+ }
+
+ protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray)
+ {
+ Node node;
+ for (int i = 0; i < nodeList.size(); i++) {
+ node = (Node)(nodeList.elementAt(i));
+ if (node.textarray == oldtextarray)
+ node.textarray = newtextarray;
+ }
+ }
+
+ /* used for creating preformatted text from Word2000 */
+ public Node newLineNode()
+ {
+ Node node = newNode();
+
+ node.textarray = this.lexbuf;
+ node.start = this.lexsize;
+ addCharToLexer((int)'\n');
+ node.end = this.lexsize;
+ return node;
+ }
+
+ // Should always be able convert to/from UTF-8, so encoding exceptions are
+ // converted to an Error to avoid adding throws declarations in
+ // lots of methods.
+
+ public static byte[] getBytes(String str) {
+ try {
+ return str.getBytes("UTF8");
+ } catch (java.io.UnsupportedEncodingException e) {
+ throw new Error("string to UTF-8 conversion failed: " + e.getMessage());
+ }
+ }
+
+ public static String getString(byte[] bytes, int offset, int length) {
+ try {
+ return new String(bytes, offset, length, "UTF8");
+ } catch (java.io.UnsupportedEncodingException e) {
+ throw new Error("UTF-8 to string conversion failed: " + e.getMessage());
+ }
+ }
+
+ public boolean endOfInput()
+ {
+ return this.in.isEndOfStream();
+ }
+
+ public void addByte(int c)
+ {
+ if (this.lexsize + 1 >= this.lexlength)
+ {
+ while (this.lexsize + 1 >= this.lexlength)
+ {
+ if (this.lexlength == 0)
+ this.lexlength = 8192;
+ else
+ this.lexlength = this.lexlength * 2;
+ }
+
+ byte[] temp = this.lexbuf;
+ this.lexbuf = new byte[ this.lexlength ];
+ if (temp != null)
+ {
+ System.arraycopy( temp, 0, this.lexbuf, 0, temp.length );
+ updateNodeTextArrays(temp, this.lexbuf);
+ }
+ }
+
+ this.lexbuf[this.lexsize++] = (byte)c;
+ this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */
+ }
+
+ public void changeChar(byte c)
+ {
+ if (this.lexsize > 0)
+ {
+ this.lexbuf[this.lexsize-1] = c;
+ }
+ }
+
+ /* store char c as UTF-8 encoded byte stream */
+ public void addCharToLexer(int c)
+ {
+ if (c < 128)
+ addByte(c);
+ else if (c <= 0x7FF)
+ {
+ addByte(0xC0 | (c >> 6));
+ addByte(0x80 | (c & 0x3F));
+ }
+ else if (c <= 0xFFFF)
+ {
+ addByte(0xE0 | (c >> 12));
+ addByte(0x80 | ((c >> 6) & 0x3F));
+ addByte(0x80 | (c & 0x3F));
+ }
+ else if (c <= 0x1FFFFF)
+ {
+ addByte(0xF0 | (c >> 18));
+ addByte(0x80 | ((c >> 12) & 0x3F));
+ addByte(0x80 | ((c >> 6) & 0x3F));
+ addByte(0x80 | (c & 0x3F));
+ }
+ else
+ {
+ addByte(0xF8 | (c >> 24));
+ addByte(0x80 | ((c >> 18) & 0x3F));
+ addByte(0x80 | ((c >> 12) & 0x3F));
+ addByte(0x80 | ((c >> 6) & 0x3F));
+ addByte(0x80 | (c & 0x3F));
+ }
+ }
+
+ public void addStringToLexer(String str)
+ {
+ for ( int i = 0; i < str.length(); i++ ) {
+ addCharToLexer( (int)str.charAt(i) );
+ }
+ }
+
+ /*
+ No longer attempts to insert missing ';' for unknown
+ enitities unless one was present already, since this
+ gives unexpected results.
+
+ For example: <a href="something.htm?foo&bar&fred">
+ was tidied to: <a href="something.htm?foo&bar;&fred;">
+ rather than: <a href="something.htm?foo&bar&fred">
+
+ My thanks for Maurice Buxton for spotting this.
+ */
+ public void parseEntity(short mode)
+ {
+ short map;
+ int start;
+ boolean first = true;
+ boolean semicolon = false;
+ boolean numeric = false;
+ int c, ch, startcol;
+ String str;
+
+ start = this.lexsize - 1; /* to start at "&" */
+ startcol = this.in.curcol - 1;
+
+ while (true)
+ {
+ c = this.in.readChar();
+ if (c == StreamIn.EndOfStream) break;
+ if (c == ';')
+ {
+ semicolon = true;
+ break;
+ }
+
+ if (first && c == '#')
+ {
+ addCharToLexer(c);
+ first = false;
+ numeric = true;
+ continue;
+ }
+
+ first = false;
+ map = MAP((char)c);
+
+ /* AQ: Added flag for numeric entities so that numeric entities
+ with missing semi-colons are recognized.
+ Eg. "rep..." is recognized as "rep"
+ */
+ if (numeric && ((c == 'x') || ((map & DIGIT) != 0)))
+ {
+ addCharToLexer(c);
+ continue;
+ }
+ if (!numeric && ((map & NAMECHAR) != 0))
+ {
+ addCharToLexer(c);
+ continue;
+ }
+
+ /* otherwise put it back */
+
+ this.in.ungetChar(c);
+ break;
+ }
+
+ str = getString( this.lexbuf, start, this.lexsize - start );
+ ch = EntityTable.getDefaultEntityTable().entityCode( str );
+
+ /* deal with unrecognized entities */
+ if (ch <= 0)
+ {
+ /* set error position just before offending chararcter */
+ this.lines = this.in.curline;
+ this.columns = startcol;
+
+ if (this.lexsize > start +1 )
+ {
+ Report.entityError(this, Report.UNKNOWN_ENTITY, str, ch);
+
+ if (semicolon)
+ addCharToLexer(';');
+ }
+ else /* naked & */
+ {
+ Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch);
+ }
+ }
+ else
+ {
+ if (c != ';') /* issue warning if not terminated by ';' */
+ {
+ /* set error position just before offending chararcter */
+ this.lines = this.in.curline;
+ this.columns = startcol;
+ Report.entityError(this, Report.MISSING_SEMICOLON, str, c);
+ }
+
+ this.lexsize = start;
+
+ if (ch == 160 && (mode & Preformatted) != 0)
+ ch = ' ';
+
+ addCharToLexer(ch);
+
+ if (ch == '&' && !this.configuration.QuoteAmpersand)
+ {
+ addCharToLexer('a');
+ addCharToLexer('m');
+ addCharToLexer('p');
+ addCharToLexer(';');
+ }
+ }
+ }
+
+ public char parseTagName()
+ {
+ short map;
+ int c;
+
+ /* fold case of first char in buffer */
+
+ c = this.lexbuf[this.txtstart];
+ map = MAP((char)c);
+
+ if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
+ {
+ c += (int)((int)'a' - (int)'A');
+ this.lexbuf[this.txtstart] = (byte)c;
+ }
+
+ while (true)
+ {
+ c = this.in.readChar();
+ if (c == StreamIn.EndOfStream) break;
+ map = MAP((char)c);
+
+ if ((map & NAMECHAR) == 0)
+ break;
+
+ /* fold case of subsequent chars */
+
+ if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
+ c += (int)((int)'a' - (int)'A');
+
+ addCharToLexer(c);
+ }
+
+ this.txtend = this.lexsize;
+ return (char)c;
+ }
+
+ public void addStringLiteral(String str)
+ {
+ for ( int i = 0; i < str.length(); i++ ) {
+ addCharToLexer( (int)str.charAt(i) );
+ }
+ }
+
+ /* choose what version to use for new doctype */
+ public short HTMLVersion()
+ {
+ short versions;
+
+ versions = this.versions;
+
+ if ((versions & Dict.VERS_HTML20) != 0)
+ return Dict.VERS_HTML20;
+
+ if ((versions & Dict.VERS_HTML32) != 0)
+ return Dict.VERS_HTML32;
+
+ if ((versions & Dict.VERS_HTML40_STRICT) != 0)
+ return Dict.VERS_HTML40_STRICT;
+
+ if ((versions & Dict.VERS_HTML40_LOOSE) != 0)
+ return Dict.VERS_HTML40_LOOSE;
+
+ if ((versions & Dict.VERS_FRAMES) != 0)
+ return Dict.VERS_FRAMES;
+
+ return Dict.VERS_UNKNOWN;
+ }
+
+ public String HTMLVersionName()
+ {
+ short guessed;
+ int j;
+
+ guessed = apparentVersion();
+
+ for (j = 0; j < W3CVersion.length; ++j)
+ {
+ if (guessed == W3CVersion[j].code)
+ {
+ if (this.isvoyager)
+ return W3CVersion[j].voyagerName;
+
+ return W3CVersion[j].name;
+ }
+ }
+
+ return null;
+ }
+
+ /* add meta element for Tidy */
+ public boolean addGenerator(Node root)
+ {
+ AttVal attval;
+ Node node;
+ Node head = root.findHEAD(configuration.tt);
+
+ if (head != null)
+ {
+ for (node = head.content; node != null; node = node.next)
+ {
+ if (node.tag == configuration.tt.tagMeta)
+ {
+ attval = node.getAttrByName("name");
+
+ if (attval != null && attval.value != null &&
+ Lexer.wstrcasecmp(attval.value, "generator") == 0)
+ {
+ attval = node.getAttrByName("content");
+
+ if (attval != null && attval.value != null &&
+ attval.value.length() >= 9 &&
+ Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0)
+ {
+ return false;
+ }
+ }
+ }
+ }
+
+ node = this.inferredTag("meta");
+ node.addAttribute("content", "HTML Tidy, see www.w3.org");
+ node.addAttribute("name", "generator");
+ Node.insertNodeAtStart(head, node);
+ return true;
+ }
+
+ return false;
+ }
+
+ /* return true if substring s is in p and isn't all in upper case */
+ /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */
+ /* len is how many chars to check in p */
+ private static boolean findBadSubString(String s, String p, int len)
+ {
+ int n = s.length();
+ int i = 0;
+ String ps;
+
+ while (n < len)
+ {
+ ps = p.substring(i, i + n);
+ if (wstrcasecmp(s, ps) == 0)
+ return (!ps.equals(s.substring(0, n)));
+
+ ++i;
+ --len;
+ }
+
+ return false;
+ }
+
+ public boolean checkDocTypeKeyWords(Node doctype)
+ {
+ int len = doctype.end - doctype.start;
+ String s = getString(this.lexbuf, doctype.start, len);
+
+ return !(
+ findBadSubString("SYSTEM", s, len) ||
+ findBadSubString("PUBLIC", s, len) ||
+ findBadSubString("//DTD", s, len) ||
+ findBadSubString("//W3C", s, len) ||
+ findBadSubString("//EN", s, len)
+ );
+ }
+
+ /* examine <!DOCTYPE> to identify version */
+ public short findGivenVersion(Node doctype)
+ {
+ String p, s;
+ int i, j;
+ int len;
+ String str1;
+ String str2;
+
+ /* if root tag for doctype isn't html give up now */
+ str1 = getString(this.lexbuf, doctype.start, 5);
+ if (wstrcasecmp(str1, "html ") != 0)
+ return 0;
+
+ if (!checkDocTypeKeyWords(doctype))
+ Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
+
+ /* give up if all we are given is the system id for the doctype */
+ str1 = getString(this.lexbuf, doctype.start + 5, 7);
+ if (wstrcasecmp(str1, "SYSTEM ") == 0)
+ {
+ /* but at least ensure the case is correct */
+ if (!str1.substring(0, 6).equals("SYSTEM"))
+ System.arraycopy( getBytes("SYSTEM"), 0,
+ this.lexbuf, doctype.start + 5, 6 );
+ return 0; /* unrecognized */
+ }
+
+ if (wstrcasecmp(str1, "PUBLIC ") == 0)
+ {
+ if (!str1.substring(0, 6).equals("PUBLIC"))
+ System.arraycopy( getBytes("PUBLIC "), 0,
+ this.lexbuf, doctype.start + 5, 6 );
+ }
+ else
+ this.badDoctype = true;
+
+ for (i = doctype.start; i < doctype.end; ++i)
+ {
+ if (this.lexbuf[i] == (byte)'"')
+ {
+ str1 = getString( this.lexbuf, i + 1, 12 );
+ str2 = getString( this.lexbuf, i + 1, 13 );
+ if (str1.equals("-//W3C//DTD "))
+ {
+ /* compute length of identifier e.g. "HTML 4.0 Transitional" */
+ for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
+ len = j - i - 13;
+ p = getString( this.lexbuf, i + 13, len );
+
+ for (j = 1; j < W3CVersion.length; ++j)
+ {
+ s = W3CVersion[j].name;
+ if (len == s.length() && s.equals(p))
+ return W3CVersion[j].code;
+ }
+
+ /* else unrecognized version */
+ }
+ else if (str2.equals("-//IETF//DTD "))
+ {
+ /* compute length of identifier e.g. "HTML 2.0" */
+ for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
+ len = j - i - 14;
+
+ p = getString( this.lexbuf, i + 14, len );
+ s = W3CVersion[0].name;
+ if (len == s.length() && s.equals(p))
+ return W3CVersion[0].code;
+
+ /* else unrecognized version */
+ }
+ break;
+ }
+ }
+
+ return 0;
+ }
+
+ public void fixHTMLNameSpace(Node root, String profile)
+ {
+ Node node;
+ AttVal prev, attr;
+
+ for (node = root.content;
+ node != null && node.tag != configuration.tt.tagHtml; node = node.next);
+
+ if (node != null)
+ {
+ prev = null;
+
+ for (attr = node.attributes; attr != null; attr = attr.next)
+ {
+ if (attr.attribute.equals("xmlns"))
+ break;
+
+ prev = attr;
+ }
+
+ if (attr != null)
+ {
+ if (!attr.value.equals(profile))
+ {
+ Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE);
+ attr.value = profile;
+ }
+ }
+ else
+ {
+ attr = new AttVal( node.attributes, null, (int)'"',
+ "xmlns", profile );
+ attr.dict =
+ AttributeTable.getDefaultAttributeTable().findAttribute( attr );
+ node.attributes = attr;
+ }
+ }
+ }
+
+ public boolean setXHTMLDocType(Node root)
+ {
+ String fpi = " ";
+ String sysid = "";
+ String namespace = XHTML_NAMESPACE;
+ Node doctype;
+
+ doctype = root.findDocType();
+
+ if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
+ {
+ if (doctype != null)
+ Node.discardElement(doctype);
+ return true;
+ }
+
+ if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
+ {
+ /* see what flavor of XHTML this document matches */
+ if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
+ { /* use XHTML strict */
+ fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
+ sysid = voyager_strict;
+ }
+ else if ((this.versions & Dict.VERS_LOOSE) != 0)
+ {
+ fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+ sysid = voyager_loose;
+ }
+ else if ((this.versions & Dict.VERS_FRAMES) != 0)
+ { /* use XHTML frames */
+ fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
+ sysid = voyager_frameset;
+ }
+ else /* lets assume XHTML transitional */
+ {
+ fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+ sysid = voyager_loose;
+ }
+ }
+ else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
+ {
+ fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
+ sysid = voyager_strict;
+ }
+ else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
+ {
+ fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+ sysid = voyager_loose;
+ }
+
+ fixHTMLNameSpace(root, namespace);
+
+ if (doctype == null)
+ {
+ doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
+ doctype.next = root.content;
+ doctype.parent = root;
+ doctype.prev = null;
+ root.content = doctype;
+ }
+
+ if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
+ configuration.docTypeStr != null)
+ {
+ fpi = configuration.docTypeStr;
+ sysid = "";
+ }
+
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ /* add public identifier */
+ addStringLiteral("html PUBLIC ");
+
+ /* check if the fpi is quoted or not */
+ if (fpi.charAt(0) == '"')
+ addStringLiteral(fpi);
+ else
+ {
+ addStringLiteral("\"");
+ addStringLiteral(fpi);
+ addStringLiteral("\"");
+ }
+
+ if (sysid.length() + 6 >= this.configuration.wraplen)
+ addStringLiteral("\n\"");
+ else
+ addStringLiteral("\n \"");
+
+ /* add system identifier */
+ addStringLiteral(sysid);
+ addStringLiteral("\"");
+
+ this.txtend = this.lexsize;
+
+ doctype.start = this.txtstart;
+ doctype.end = this.txtend;
+
+ return false;
+ }
+
+ public short apparentVersion()
+ {
+ switch (this.doctype)
+ {
+ case Dict.VERS_UNKNOWN:
+ return HTMLVersion();
+
+ case Dict.VERS_HTML20:
+ if ((this.versions & Dict.VERS_HTML20) != 0)
+ return Dict.VERS_HTML20;
+
+ break;
+
+ case Dict.VERS_HTML32:
+ if ((this.versions & Dict.VERS_HTML32) != 0)
+ return Dict.VERS_HTML32;
+
+ break; /* to replace old version by new */
+
+ case Dict.VERS_HTML40_STRICT:
+ if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
+ return Dict.VERS_HTML40_STRICT;
+
+ break;
+
+ case Dict.VERS_HTML40_LOOSE:
+ if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
+ return Dict.VERS_HTML40_LOOSE;
+
+ break; /* to replace old version by new */
+
+ case Dict.VERS_FRAMES:
+ if ((this.versions & Dict.VERS_FRAMES) != 0)
+ return Dict.VERS_FRAMES;
+
+ break;
+ }
+
+ Report.warning(this, null, null, Report.INCONSISTENT_VERSION);
+ return this.HTMLVersion();
+ }
+
+ /* fixup doctype if missing */
+ public boolean fixDocType(Node root)
+ {
+ Node doctype;
+ int guessed = Dict.VERS_HTML40_STRICT, i;
+
+ if (this.badDoctype)
+ Report.warning(this, null, null, Report.MALFORMED_DOCTYPE);
+
+ if (configuration.XmlOut)
+ return true;
+
+ doctype = root.findDocType();
+
+ if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
+ {
+ if (doctype != null)
+ Node.discardElement(doctype);
+ return true;
+ }
+
+ if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
+ {
+ Node.discardElement(doctype);
+ doctype = null;
+ guessed = Dict.VERS_HTML40_STRICT;
+ }
+ else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
+ {
+ Node.discardElement(doctype);
+ doctype = null;
+ guessed = Dict.VERS_HTML40_LOOSE;
+ }
+ else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
+ {
+ if (doctype != null)
+ {
+ if (this.doctype == Dict.VERS_UNKNOWN)
+ return false;
+
+ switch (this.doctype)
+ {
+ case Dict.VERS_UNKNOWN:
+ return false;
+
+ case Dict.VERS_HTML20:
+ if ((this.versions & Dict.VERS_HTML20) != 0)
+ return true;
+
+ break; /* to replace old version by new */
+
+ case Dict.VERS_HTML32:
+ if ((this.versions & Dict.VERS_HTML32) != 0)
+ return true;
+
+ break; /* to replace old version by new */
+
+ case Dict.VERS_HTML40_STRICT:
+ if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
+ return true;
+
+ break; /* to replace old version by new */
+
+ case Dict.VERS_HTML40_LOOSE:
+ if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
+ return true;
+
+ break; /* to replace old version by new */
+
+ case Dict.VERS_FRAMES:
+ if ((this.versions & Dict.VERS_FRAMES) != 0)
+ return true;
+
+ break; /* to replace old version by new */
+ }
+
+ /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
+ }
+
+ /* choose new doctype */
+ guessed = HTMLVersion();
+ }
+
+ if (guessed == Dict.VERS_UNKNOWN)
+ return false;
+
+ /* for XML use the Voyager system identifier */
+ if (this.configuration.XmlOut || this.configuration.XmlTags || this.isvoyager)
+ {
+ if (doctype != null)
+ Node.discardElement(doctype);
+
+ for (i = 0; i < W3CVersion.length; ++i)
+ {
+ if (guessed == W3CVersion[i].code)
+ {
+ fixHTMLNameSpace(root, W3CVersion[i].profile);
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ if (doctype == null)
+ {
+ doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
+ doctype.next = root.content;
+ doctype.parent = root;
+ doctype.prev = null;
+ root.content = doctype;
+ }
+
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ /* use the appropriate public identifier */
+ addStringLiteral("html PUBLIC ");
+
+ if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
+ configuration.docTypeStr != null)
+ addStringLiteral(configuration.docTypeStr);
+ else if (guessed == Dict.VERS_HTML20)
+ addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
+ else
+ {
+ addStringLiteral("\"-//W3C//DTD ");
+
+ for (i = 0; i < W3CVersion.length; ++i)
+ {
+ if (guessed == W3CVersion[i].code)
+ {
+ addStringLiteral(W3CVersion[i].name);
+ break;
+ }
+ }
+
+ addStringLiteral("//EN\"");
+ }
+
+ this.txtend = this.lexsize;
+
+ doctype.start = this.txtstart;
+ doctype.end = this.txtend;
+
+ return true;
+ }
+
+ /* ensure XML document starts with <?XML version="1.0"?> */
+ public boolean fixXMLPI(Node root)
+ {
+ Node xml;
+ int s;
+
+ if( root.content != null && root.content.type == Node.ProcInsTag)
+ {
+ s = root.content.start;
+
+ if (this.lexbuf[s] == (byte)'x' &&
+ this.lexbuf[s+1] == (byte)'m' &&
+ this.lexbuf[s+2] == (byte)'l')
+ return true;
+ }
+
+ xml = newNode(Node.ProcInsTag, this.lexbuf, 0, 0);
+ xml.next = root.content;
+
+ if (root.content != null)
+ {
+ root.content.prev = xml;
+ xml.next = root.content;
+ }
+
+ root.content = xml;
+
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+ addStringLiteral("xml version=\"1.0\"");
+ if (this.configuration.CharEncoding == Configuration.LATIN1)
+ addStringLiteral(" encoding=\"ISO-8859-1\"");
+ this.txtend = this.lexsize;
+
+ xml.start = this.txtstart;
+ xml.end = this.txtend;
+ return false;
+ }
+
+ public Node inferredTag(String name)
+ {
+ Node node;
+
+ node = newNode(Node.StartTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend,
+ name);
+ node.implicit = true;
+ return node;
+ }
+
+ public static boolean expectsContent(Node node)
+ {
+ if (node.type != Node.StartTag)
+ return false;
+
+ /* unknown element? */
+ if (node.tag == null)
+ return true;
+
+ if ((node.tag.model & Dict.CM_EMPTY) != 0)
+ return false;
+
+ return true;
+ }
+
+ /*
+ create a text node for the contents of
+ a CDATA element like style or script
+ which ends with </foo> for some foo.
+ */
+ public Node getCDATA(Node container)
+ {
+ int c, lastc, start, len, i;
+ String str;
+ boolean endtag = false;
+
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol;
+ this.waswhite = false;
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ lastc = (int)'\0';
+ start = -1;
+
+ while (true)
+ {
+ c = this.in.readChar();
+ if (c == StreamIn.EndOfStream) break;
+ /* treat \r\n as \n and \r as \n */
+
+ if (c == (int)'/' && lastc == (int)'<')
+ {
+ if (endtag)
+ {
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol - 3;
+
+ Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
+ }
+
+ start = this.lexsize + 1; /* to first letter */
+ endtag = true;
+ }
+ else if (c == (int)'>' && start >= 0)
+ {
+ len = this.lexsize - start;
+ if (len == container.element.length())
+ {
+ str = getString( this.lexbuf, start, len );
+ if (Lexer.wstrcasecmp(str, container.element) == 0)
+ {
+ this.txtend = start - 2;
+ break;
+ }
+ }
+
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol - 3;
+
+ Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
+
+ /* if javascript insert backslash before / */
+
+ if (ParserImpl.isJavaScript(container))
+ {
+ for (i = this.lexsize; i > start-1; --i)
+ this.lexbuf[i] = this.lexbuf[i-1];
+
+ this.lexbuf[start-1] = (byte)'\\';
+ this.lexsize++;
+ }
+
+ start = -1;
+ }
+ else if (c == (int)'\r')
+ {
+ c = this.in.readChar();
+
+ if (c != (int)'\n')
+ this.in.ungetChar(c);
+
+ c = (int)'\n';
+ }
+
+ addCharToLexer((int)c);
+ this.txtend = this.lexsize;
+ lastc = c;
+ }
+
+ if (c == StreamIn.EndOfStream)
+ Report.warning(this, container, null, Report.MISSING_ENDTAG_FOR);
+
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ return null;
+ }
+
+ public void ungetToken()
+ {
+ this.pushed = true;
+ }
+
+ public static final short IgnoreWhitespace = 0;
+ public static final short MixedContent = 1;
+ public static final short Preformatted = 2;
+ public static final short IgnoreMarkup = 3;
+
+ /*
+ modes for GetToken()
+
+ MixedContent -- for elements which don't accept PCDATA
+ Preformatted -- white space preserved as is
+ IgnoreMarkup -- for CDATA elements such as script, style
+ */
+
+ public Node getToken(short mode)
+ {
+ short map;
+ int c = 0;
+ int lastc;
+ int badcomment = 0;
+ MutableBoolean isempty = new MutableBoolean();
+ AttVal attributes;
+
+ if (this.pushed)
+ {
+ /* duplicate inlines in preference to pushed text nodes when appropriate */
+ if (this.token.type != Node.TextNode ||
+ (this.insert == -1 && this.inode == null))
+ {
+ this.pushed = false;
+ return this.token;
+ }
+ }
+
+ /* at start of block elements, unclosed inline
+ elements are inserted into the token stream */
+
+ if (this.insert != -1 || this.inode != null)
+ return insertedToken();
+
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol;
+ this.waswhite = false;
+
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ while (true)
+ {
+ c = this.in.readChar();
+ if (c == StreamIn.EndOfStream) break;
+ if (this.insertspace && mode != IgnoreWhitespace)
+ {
+ addCharToLexer(' ');
+ this.waswhite = true;
+ this.insertspace = false;
+ }
+
+ /* treat \r\n as \n and \r as \n */
+
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ this.in.ungetChar(c);
+
+ c = '\n';
+ }
+
+ addCharToLexer(c);
+
+ switch (this.state)
+ {
+ case LEX_CONTENT: /* element content */
+ map = MAP((char)c);
+
+ /*
+ Discard white space if appropriate. Its cheaper
+ to do this here rather than in parser methods
+ for elements that don't have mixed content.
+ */
+ if (((map & WHITE) != 0) && (mode == IgnoreWhitespace)
+ && this.lexsize == this.txtstart + 1)
+ {
+ --this.lexsize;
+ this.waswhite = false;
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol;
+ continue;
+ }
+
+ if (c == '<')
+ {
+ this.state = LEX_GT;
+ continue;
+ }
+
+ if ((map & WHITE) != 0)
+ {
+ /* was previous char white? */
+ if (this.waswhite)
+ {
+ if (mode != Preformatted && mode != IgnoreMarkup)
+ {
+ --this.lexsize;
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol;
+ }
+ }
+ else /* prev char wasn't white */
+ {
+ this.waswhite = true;
+ lastc = c;
+
+ if (mode != Preformatted && mode != IgnoreMarkup && c != ' ')
+ changeChar((byte)' ');
+ }
+
+ continue;
+ }
+ else if (c == '&' && mode != IgnoreMarkup)
+ parseEntity(mode);
+
+ /* this is needed to avoid trimming trailing whitespace */
+ if (mode == IgnoreWhitespace)
+ mode = MixedContent;
+
+ this.waswhite = false;
+ continue;
+
+ case LEX_GT: /* < */
+
+ /* check for endtag */
+ if (c == '/')
+ {
+ c = this.in.readChar();
+ if (c == StreamIn.EndOfStream)
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ addCharToLexer(c);
+ map = MAP((char)c);
+
+ if ((map & LETTER) != 0)
+ {
+ this.lexsize -= 3;
+ this.txtend = this.lexsize;
+ this.in.ungetChar(c);
+ this.state = LEX_ENDTAG;
+ this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */
+ this.in.curcol -= 2;
+
+ /* if some text before the </ return it now */
+ if (this.txtend > this.txtstart)
+ {
+ /* trim space char before end tag */
+ if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ')
+ {
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ }
+
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ continue; /* no text so keep going */
+ }
+
+ /* otherwise treat as CDATA */
+ this.waswhite = false;
+ this.state = LEX_CONTENT;
+ continue;
+ }
+
+ if (mode == IgnoreMarkup)
+ {
+ /* otherwise treat as CDATA */
+ this.waswhite = false;
+ this.state = LEX_CONTENT;
+ continue;
+ }
+
+ /*
+ look out for comments, doctype or marked sections
+ this isn't quite right, but its getting there ...
+ */
+ if (c == '!')
+ {
+ c = this.in.readChar();
+
+ if (c == '-')
+ {
+ c = this.in.readChar();
+
+ if (c == '-')
+ {
+ this.state = LEX_COMMENT; /* comment */
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+
+ /* if some text before < return it now */
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ Report.warning(this, null, null, Report.MALFORMED_COMMENT);
+ }
+ else if (c == 'd' || c == 'D')
+ {
+ this.state = LEX_DOCTYPE; /* doctype */
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+ mode = IgnoreWhitespace;
+
+ /* skip until white space or '>' */
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream || c == '>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ map = MAP((char)c);
+
+ if ((map & WHITE) == 0)
+ continue;
+
+ /* and skip to end of whitespace */
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream || c == '>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ map = MAP((char)c);
+
+ if ((map & WHITE) != 0)
+ continue;
+
+ this.in.ungetChar(c);
+ break;
+ }
+
+ break;
+ }
+
+ /* if some text before < return it now */
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+ else if (c == '[')
+ {
+ /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
+ this.lexsize -= 2;
+ this.state = LEX_SECTION;
+ this.txtend = this.lexsize;
+
+ /* if some text before < return it now */
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ /* otherwise swallow chars up to and including next '>' */
+ while (true)
+ {
+ c = this.in.readChar();
+ if (c == '>') break;
+ if (c == -1)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+ }
+
+ this.lexsize -= 2;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ continue;
+ }
+
+ /*
+ processing instructions
+ */
+
+ if (c == '?')
+ {
+ this.lexsize -= 2;
+ this.state = LEX_PROCINSTR;
+ this.txtend = this.lexsize;
+
+ /* if some text before < return it now */
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ /* Microsoft ASP's e.g. <% ... server-code ... %> */
+ if (c == '%')
+ {
+ this.lexsize -= 2;
+ this.state = LEX_ASP;
+ this.txtend = this.lexsize;
+
+ /* if some text before < return it now */
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ /* Netscapes JSTE e.g. <# ... server-code ... #> */
+ if (c == '#')
+ {
+ this.lexsize -= 2;
+ this.state = LEX_JSTE;
+ this.txtend = this.lexsize;
+
+ /* if some text before < return it now */
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ map = MAP((char)c);
+
+ /* check for start tag */
+ if ((map & LETTER) != 0)
+ {
+ this.in.ungetChar(c); /* push back letter */
+ this.lexsize -= 2; /* discard "<" + letter */
+ this.txtend = this.lexsize;
+ this.state = LEX_STARTTAG; /* ready to read tag name */
+
+ /* if some text before < return it now */
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ continue; /* no text so keep going */
+ }
+
+ /* otherwise treat as CDATA */
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ continue;
+
+ case LEX_ENDTAG: /* </letter */
+ this.txtstart = this.lexsize - 1;
+ this.in.curcol += 2;
+ c = parseTagName();
+ this.token = newNode(Node.EndTag, /* create endtag token */
+ this.lexbuf,
+ this.txtstart,
+ this.txtend,
+ getString(this.lexbuf,
+ this.txtstart,
+ this.txtend - this.txtstart));
+ this.lexsize = this.txtstart;
+ this.txtend = this.txtstart;
+
+ /* skip to '>' */
+ while (c != '>')
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream)
+ break;
+ }
+
+ if (c == StreamIn.EndOfStream)
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ return this.token; /* the endtag token */
+
+ case LEX_STARTTAG: /* first letter of tagname */
+ this.txtstart = this.lexsize - 1; /* set txtstart to first letter */
+ c = parseTagName();
+ isempty.value = false;
+ attributes = null;
+ this.token = newNode((isempty.value ? Node.StartEndTag : Node.StartTag),
+ this.lexbuf,
+ this.txtstart,
+ this.txtend,
+ getString(this.lexbuf,
+ this.txtstart,
+ this.txtend - this.txtstart));
+
+ /* parse attributes, consuming closing ">" */
+ if (c != '>')
+ {
+ if (c == '/')
+ this.in.ungetChar(c);
+
+ attributes = parseAttrs(isempty);
+ }
+
+ if (isempty.value)
+ this.token.type = Node.StartEndTag;
+
+ this.token.attributes = attributes;
+ this.lexsize = this.txtstart;
+ this.txtend = this.txtstart;
+
+ /* swallow newline following start tag */
+ /* special check needed for CRLF sequence */
+ /* this doesn't apply to empty elements */
+
+ if (expectsContent(this.token) ||
+ this.token.tag == configuration.tt.tagBr)
+ {
+
+ c = this.in.readChar();
+
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ this.in.ungetChar(c);
+ }
+ else if (c != '\n' && c != '\f')
+ this.in.ungetChar(c);
+
+ this.waswhite = true; /* to swallow leading whitespace */
+ }
+ else
+ this.waswhite = false;
+
+ this.state = LEX_CONTENT;
+
+ if (this.token.tag == null)
+ Report.error(this, null, this.token, Report.UNKNOWN_ELEMENT);
+ else if (!this.configuration.XmlTags)
+ {
+ this.versions &= this.token.tag.versions;
+
+ if ((this.token.tag.versions & Dict.VERS_PROPRIETARY) != 0)
+ {
+ if (!this.configuration.MakeClean && (this.token.tag == configuration.tt.tagNobr ||
+ this.token.tag == configuration.tt.tagWbr))
+ Report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT);
+ }
+
+ if (this.token.tag.chkattrs != null)
+ {
+ this.token.checkUniqueAttributes(this);
+ this.token.tag.chkattrs.check(this, this.token);
+ }
+ else
+ this.token.checkAttributes(this);
+ }
+
+ return this.token; /* return start tag */
+
+ case LEX_COMMENT: /* seen <!-- so look for --> */
+
+ if (c != '-')
+ continue;
+
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+ if (c != '-')
+ continue;
+
+ end_comment: while (true) {
+ c = this.in.readChar();
+
+ if (c == '>')
+ {
+ if (badcomment != 0)
+ Report.warning(this, null, null, Report.MALFORMED_COMMENT);
+
+ this.txtend = this.lexsize - 2; // AQ 8Jul2000
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.CommentTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+
+ /* now look for a line break */
+
+ c = this.in.readChar();
+
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ this.token.linebreak = true;
+ }
+
+ if (c == '\n')
+ this.token.linebreak = true;
+ else
+ this.in.ungetChar(c);
+
+ return this.token;
+ }
+
+ /* note position of first such error in the comment */
+ if (badcomment == 0)
+ {
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol - 3;
+ }
+
+ badcomment++;
+ if (this.configuration.FixComments)
+ this.lexbuf[this.lexsize - 2] = (byte)'=';
+
+ addCharToLexer(c);
+
+ /* if '-' then look for '>' to end the comment */
+ if (c != '-')
+ break end_comment;
+
+ }
+ /* otherwise continue to look for --> */
+ this.lexbuf[this.lexsize - 2] = (byte)'=';
+ continue;
+
+ case LEX_DOCTYPE: /* seen <!d so look for '>' munging whitespace */
+ map = MAP((char)c);
+
+ if ((map & WHITE) != 0)
+ {
+ if (this.waswhite)
+ this.lexsize -= 1;
+
+ this.waswhite = true;
+ }
+ else
+ this.waswhite = false;
+
+ if (c != '>')
+ continue;
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.DocTypeTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ /* make a note of the version named by the doctype */
+ this.doctype = findGivenVersion(this.token);
+ return this.token;
+
+ case LEX_PROCINSTR: /* seen <? so look for '>' */
+ /* check for PHP preprocessor instructions <?php ... ?> */
+
+ if (this.lexsize - this.txtstart == 3)
+ {
+ if ((getString(this.lexbuf, this.txtstart, 3)).equals("php"))
+ {
+ this.state = LEX_PHP;
+ continue;
+ }
+ }
+
+ if (this.configuration.XmlPIs) /* insist on ?> as terminator */
+ {
+ if (c != '?')
+ continue;
+
+ /* now look for '>' */
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream)
+ {
+ Report.warning(this, null, null, Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ addCharToLexer(c);
+ }
+
+ if (c != '>')
+ continue;
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.ProcInsTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+
+ case LEX_ASP: /* seen <% so look for "%>" */
+ if (c != '%')
+ continue;
+
+ /* now look for '>' */
+ c = this.in.readChar();
+
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.AspTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+
+ case LEX_JSTE: /* seen <# so look for "#>" */
+ if (c != '#')
+ continue;
+
+ /* now look for '>' */
+ c = this.in.readChar();
+
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.JsteTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+
+ case LEX_PHP: /* seen "<?php" so look for "?>" */
+ if (c != '?')
+ continue;
+
+ /* now look for '>' */
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.PhpTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+
+ case LEX_SECTION: /* seen "<![" so look for "]>" */
+ if (c == '[')
+ {
+ if (this.lexsize == (this.txtstart + 6) &&
+ (getString(this.lexbuf, this.txtstart, 6)).equals("CDATA["))
+ {
+ this.state = LEX_CDATA;
+ this.lexsize -= 6;
+ continue;
+ }
+ }
+
+ if (c != ']')
+ continue;
+
+ /* now look for '>' */
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.SectionTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+
+ case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */
+ if (c != ']')
+ continue;
+
+ /* now look for ']' */
+ c = this.in.readChar();
+
+ if (c != ']')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ /* now look for '>' */
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.CDATATag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+ }
+
+ if (this.state == LEX_CONTENT) /* text string */
+ {
+ this.txtend = this.lexsize;
+
+ if (this.txtend > this.txtstart)
+ {
+ this.in.ungetChar(c);
+
+ if (this.lexbuf[this.lexsize - 1] == (byte)' ')
+ {
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ }
+
+ this.token = newNode(Node.TextNode,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+ }
+ else if (this.state == LEX_COMMENT) /* comment */
+ {
+ if (c == StreamIn.EndOfStream)
+ Report.warning(this, null, null, Report.MALFORMED_COMMENT);
+
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte)'\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.CommentTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+ return this.token;
+ }
+
+ return null;
+ }
+
+ /*
+ parser for ASP within start tags
+
+ Some people use ASP for to customize attributes
+ Tidy isn't really well suited to dealing with ASP
+ This is a workaround for attributes, but won't
+ deal with the case where the ASP is used to tailor
+ the attribute value. Here is an example of a work
+ around for using ASP in attribute values:
+
+ href="<%=rsSchool.Fields("ID").Value%>"
+
+ where the ASP that generates the attribute value
+ is masked from Tidy by the quotemarks.
+
+ */
+
+ public Node parseAsp()
+ {
+ int c;
+ Node asp = null;
+
+ this.txtstart = this.lexsize;
+
+ for (;;)
+ {
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+
+ if (c != '%')
+ continue;
+
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+ if (c == '>')
+ break;
+ }
+
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+
+ if (this.txtend > this.txtstart)
+ asp = newNode(Node.AspTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+
+ this.txtstart = this.txtend;
+ return asp;
+ }
+
+ /*
+ PHP is like ASP but is based upon XML
+ processing instructions, e.g. <?php ... ?>
+ */
+ public Node parsePhp()
+ {
+ int c;
+ Node php = null;
+
+ this.txtstart = this.lexsize;
+
+ for (;;)
+ {
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+
+ if (c != '?')
+ continue;
+
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+ if (c == '>')
+ break;
+ }
+
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+
+ if (this.txtend > this.txtstart)
+ php = newNode(Node.PhpTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend);
+
+ this.txtstart = this.txtend;
+ return php;
+ }
+
+ /* consumes the '>' terminating start tags */
+ public String parseAttribute(MutableBoolean isempty, MutableObject asp,
+ MutableObject php)
+ {
+ int start = 0;
+ // int len = 0; Removed by BUGFIX for 126265
+ short map;
+ String attr;
+ int c = 0;
+
+ asp.setObject(null); /* clear asp pointer */
+ php.setObject(null); /* clear php pointer */
+ /* skip white space before the attribute */
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == '/')
+ {
+ c = this.in.readChar();
+
+ if (c == '>')
+ {
+ isempty.value = true;
+ return null;
+ }
+
+ this.in.ungetChar(c);
+ c = '/';
+ break;
+ }
+
+ if (c == '>')
+ return null;
+
+ if (c =='<')
+ {
+ c = this.in.readChar();
+
+ if (c == '%')
+ {
+ asp.setObject(parseAsp());
+ return null;
+ }
+ else if (c == '?')
+ {
+ php.setObject(parsePhp());
+ return null;
+ }
+
+ this.in.ungetChar(c);
+ Report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+ return null;
+ }
+
+ if (c == '"' || c == '\'')
+ {
+ Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK);
+ continue;
+ }
+
+ if (c == StreamIn.EndOfStream)
+ {
+ Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ return null;
+ }
+
+ map = MAP((char)c);
+
+ if ((map & WHITE) == 0)
+ break;
+ }
+
+ start = this.lexsize;
+
+ for (;;)
+ {
+ /* but push back '=' for parseValue() */
+ if (c == '=' || c == '>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (c == '<' || c == StreamIn.EndOfStream)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ map = MAP((char)c);
+
+ if ((map & WHITE) != 0)
+ break;
+
+ /* what should be done about non-namechar characters? */
+ /* currently these are incorporated into the attr name */
+
+ if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
+ c += (int)('a' - 'A');
+
+ // ++len; Removed by BUGFIX for 126265
+ addCharToLexer(c);
+
+ c = this.in.readChar();
+ }
+
+ // Following line added by GLP to fix BUG 126265. This is a temporary comment
+ // and should be removed when Tidy is fixed.
+ int len = this.lexsize - start;
+ attr = (len > 0 ? getString(this.lexbuf, start, len) : null);
+ this.lexsize = start;
+
+ return attr;
+ }
+
+ /*
+ invoked when < is seen in place of attribute value
+ but terminates on whitespace if not ASP, PHP or Tango
+ this routine recognizes ' and " quoted strings
+ */
+ public int parseServerInstruction()
+ {
+ int c, map, delim = '"';
+ boolean isrule = false;
+
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+ /* check for ASP, PHP or Tango */
+ if (c == '%' || c == '?' || c == '@')
+ isrule = true;
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream)
+ break;
+
+ if (c == '>')
+ {
+ if (isrule)
+ addCharToLexer(c);
+ else
+ this.in.ungetChar(c);
+
+ break;
+ }
+
+ /* if not recognized as ASP, PHP or Tango */
+ /* then also finish value on whitespace */
+ if (!isrule)
+ {
+ map = MAP((char)c);
+
+ if ((map & WHITE) != 0)
+ break;
+ }
+
+ addCharToLexer(c);
+
+ if (c == '"')
+ {
+ do
+ {
+ c = this.in.readChar();
+ addCharToLexer(c);
+ }
+ while (c != '"');
+ delim = '\'';
+ continue;
+ }
+
+ if (c == '\'')
+ {
+ do
+ {
+ c = this.in.readChar();
+ addCharToLexer(c);
+ }
+ while (c != '\'');
+ }
+ }
+
+ return delim;
+ }
+
+ /* values start with "=" or " = " etc. */
+ /* doesn't consume the ">" at end of start tag */
+
+ public String parseValue(String name, boolean foldCase,
+ MutableBoolean isempty, MutableInteger pdelim)
+ {
+ int len = 0;
+ int start;
+ short map;
+ boolean seen_gt = false;
+ boolean munge = true;
+ int c = 0;
+ int lastc, delim, quotewarning;
+ String value;
+
+ delim = 0;
+ pdelim.value = (int)'"';
+
+ /*
+ Henry Zrepa reports that some folk are using the
+ embed element with script attributes where newlines
+ are significant and must be preserved
+ */
+ if (configuration.LiteralAttribs)
+ munge = false;
+
+ /* skip white space before the '=' */
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ map = MAP((char)c);
+
+ if ((map & WHITE) == 0)
+ break;
+ }
+
+ /*
+ c should be '=' if there is a value
+ other legal possibilities are white
+ space, '/' and '>'
+ */
+
+ if (c != '=')
+ {
+ this.in.ungetChar(c);
+ return null;
+ }
+
+ /* skip white space after '=' */
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ map = MAP((char)c);
+
+ if ((map & WHITE) == 0)
+ break;
+ }
+
+ /* check for quote marks */
+
+ if (c == '"' || c == '\'')
+ delim = c;
+ else if (c == '<')
+ {
+ start = this.lexsize;
+ addCharToLexer(c);
+ pdelim.value = parseServerInstruction();
+ len = this.lexsize - start;
+ this.lexsize = start;
+ return (len > 0 ? getString(this.lexbuf, start, len) : null);
+ }
+ else
+ this.in.ungetChar(c);
+
+ /*
+ and read the value string
+ check for quote mark if needed
+ */
+
+ quotewarning = 0;
+ start = this.lexsize;
+ c = '\0';
+
+ for (;;)
+ {
+ lastc = c; /* track last character */
+ c = this.in.readChar();
+
+ if (c == StreamIn.EndOfStream)
+ {
+ Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (delim == (char)0)
+ {
+ if (c == '>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (c == '"' || c == '\'')
+ {
+ Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK);
+ break;
+ }
+
+ if (c == '<')
+ {
+ /* this.in.ungetChar(c); */
+ Report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+ /* break; */
+ }
+
+ /*
+ For cases like <br clear=all/> need to avoid treating /> as
+ part of the attribute value, however care is needed to avoid
+ so treating <a href=http://www.acme.com/> in this way, which
+ would map the <a> tag to <a href="http://www.acme.com"/>
+ */
+ if (c == '/')
+ {
+ /* peek ahead in case of /> */
+ c = this.in.readChar();
+
+ if (c == '>' &&
+ !AttributeTable.getDefaultAttributeTable().isUrl(name))
+ {
+ isempty.value = true;
+ this.in.ungetChar(c);
+ break;
+ }
+
+ /* unget peeked char */
+ this.in.ungetChar(c);
+ c = '/';
+ }
+ }
+ else /* delim is '\'' or '"' */
+ {
+ if (c == delim)
+ break;
+
+ /* treat CRLF, CR and LF as single line break */
+
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+ if (c != '\n')
+ this.in.ungetChar(c);
+
+ c = '\n';
+ }
+
+ if (c == '\n' || c == '<' || c == '>')
+ ++quotewarning;
+
+ if (c == '>')
+ seen_gt = true;
+ }
+
+ if (c == '&')
+ {
+ addCharToLexer(c);
+ parseEntity((short)0);
+ continue;
+ }
+
+ /*
+ kludge for JavaScript attribute values
+ with line continuations in string literals
+ */
+ if (c == '\\')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ {
+ this.in.ungetChar(c);
+ c = '\\';
+ }
+ }
+
+ map = MAP((char)c);
+
+ if ((map & WHITE) != 0)
+ {
+ if (delim == (char)0)
+ break;
+
+ if (munge)
+ {
+ c = ' ';
+
+ if (lastc == ' ')
+ continue;
+ }
+ }
+ else if (foldCase && (map & UPPERCASE) != 0)
+ c += (int)('a' - 'A');
+
+ addCharToLexer(c);
+ }
+
+ if (quotewarning > 10 && seen_gt && munge)
+ {
+ /*
+ there is almost certainly a missing trailling quote mark
+ as we have see too many newlines, < or > characters.
+
+ an exception is made for Javascript attributes and the
+ javascript URL scheme which may legitimately include < and >
+ */
+ if (!AttributeTable.getDefaultAttributeTable().isScript(name) &&
+ !(AttributeTable.getDefaultAttributeTable().isUrl(name) &&
+ (getString(this.lexbuf, start, 11)).equals("javascript:")))
+ Report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE);
+ }
+
+ len = this.lexsize - start;
+ this.lexsize = start;
+
+ if (len > 0 || delim != 0)
+ value = getString(this.lexbuf, start, len);
+ else
+ value = null;
+
+ /* note delimiter if given */
+ if (delim != 0)
+ pdelim.value = delim;
+ else
+ pdelim.value = (int)'"';
+
+ return value;
+ }
+
+ /* attr must be non-null */
+ public static boolean isValidAttrName(String attr)
+ {
+ short map;
+ char c;
+ int i;
+
+ /* first character should be a letter */
+ c = attr.charAt(0);
+ map = MAP(c);
+
+ if (!((map & LETTER) != 0))
+ return false;
+
+ /* remaining characters should be namechars */
+ for( i = 1; i < attr.length(); i++)
+ {
+ c = attr.charAt(i);
+ map = MAP(c);
+
+ if((map & NAMECHAR) != 0)
+ continue;
+
+ return false;
+ }
+
+ return true;
+ }
+
+ /* swallows closing '>' */
+
+ public AttVal parseAttrs(MutableBoolean isempty)
+ {
+ AttVal av, list;
+ String attribute, value;
+ MutableInteger delim = new MutableInteger();
+ MutableObject asp = new MutableObject();
+ MutableObject php = new MutableObject();
+
+ list = null;
+
+ for (; !endOfInput();)
+ {
+ attribute = parseAttribute(isempty, asp, php);
+
+ if (attribute == null)
+ {
+ /* check if attributes are created by ASP markup */
+ if (asp.getObject() != null)
+ {
+ av = new AttVal(list, null, (Node)asp.getObject(), null,
+ '\0', null, null );
+ list = av;
+ continue;
+ }
+
+ /* check if attributes are created by PHP markup */
+ if (php.getObject() != null)
+ {
+ av = new AttVal(list, null, null, (Node)php.getObject(),
+ '\0', null, null );
+ list = av;
+ continue;
+ }
+
+ break;
+ }
+
+ value = parseValue(attribute, false, isempty, delim);
+
+ if (attribute != null && isValidAttrName(attribute))
+ {
+ av = new AttVal( list, null, null, null,
+ delim.value, attribute, value );
+ av.dict =
+ AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ list = av;
+ }
+ else
+ {
+ av = new AttVal( null, null, null, null,
+ 0, attribute, value );
+ Report.attrError(this, this.token, value, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+
+ return list;
+ }
+
+ /*
+ push a copy of an inline node onto stack
+ but don't push if implicit or OBJECT or APPLET
+ (implicit tags are ones generated from the istack)
+
+ One issue arises with pushing inlines when
+ the tag is already pushed. For instance:
+
+ <p><em>text
+ <p><em>more text
+
+ Shouldn't be mapped to
+
+ <p><em>text</em></p>
+ <p><em><em>more text</em></em>
+ */
+ public void pushInline( Node node )
+ {
+ IStack is;
+
+ if (node.implicit)
+ return;
+
+ if (node.tag == null)
+ return;
+
+ if ((node.tag.model & Dict.CM_INLINE) == 0 )
+ return;
+
+ if ((node.tag.model & Dict.CM_OBJECT) != 0)
+ return;
+
+ if (node.tag != configuration.tt.tagFont && isPushed(node))
+ return;
+
+ // make sure there is enough space for the stack
+ is = new IStack();
+ is.tag = node.tag;
+ is.element = node.element;
+ if (node.attributes != null)
+ is.attributes = cloneAttributes(node.attributes);
+ this.istack.push( is );
+ }
+
+ /* pop inline stack */
+ public void popInline( Node node )
+ {
+ AttVal av;
+ IStack is;
+
+ if (node != null) {
+
+ if (node.tag == null)
+ return;
+
+ if ((node.tag.model & Dict.CM_INLINE) == 0)
+ return;
+
+ if ((node.tag.model & Dict.CM_OBJECT) != 0)
+ return;
+
+ // if node is </a> then pop until we find an <a>
+ if (node.tag == configuration.tt.tagA) {
+
+ while (this.istack.size() > 0) {
+ is = (IStack)this.istack.pop();
+ if (is.tag == configuration.tt.tagA) {
+ break;
+ }
+ }
+
+ if (this.insert >= this.istack.size())
+ this.insert = -1;
+ return;
+ }
+ }
+
+ if (this.istack.size() > 0) {
+ is = (IStack)this.istack.pop();
+ if (this.insert >= this.istack.size())
+ this.insert = -1;
+ }
+ }
+
+ public boolean isPushed( Node node )
+ {
+ int i;
+ IStack is;
+
+ for (i = this.istack.size() - 1; i >= 0; --i) {
+ is = (IStack)this.istack.elementAt(i);
+ if (is.tag == node.tag)
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ This has the effect of inserting "missing" inline
+ elements around the contents of blocklevel elements
+ such as P, TD, TH, DIV, PRE etc. This procedure is
+ called at the start of ParseBlock. when the inline
+ stack is not empty, as will be the case in:
+
+ <i><h1>italic heading</h1></i>
+
+ which is then treated as equivalent to
+
+ <h1><i>italic heading</i></h1>
+
+ This is implemented by setting the lexer into a mode
+ where it gets tokens from the inline stack rather than
+ from the input stream.
+ */
+ public int inlineDup( Node node )
+ {
+ int n;
+
+ n = this.istack.size() - this.istackbase;
+ if ( n > 0 ) {
+ this.insert = this.istackbase;
+ this.inode = node;
+ }
+
+ return n;
+ }
+
+ public Node insertedToken()
+ {
+ Node node;
+ IStack is;
+ int n;
+
+ // this will only be null if inode != null
+ if (this.insert == -1) {
+ node = this.inode;
+ this.inode = null;
+ return node;
+ }
+
+ // is this is the "latest" node then update
+ // the position, otherwise use current values
+
+ if (this.inode == null) {
+ this.lines = this.in.curline;
+ this.columns = this.in.curcol;
+ }
+
+ node = newNode(Node.StartTag,
+ this.lexbuf,
+ this.txtstart,
+ this.txtend); // GLP: Bugfix 126261. Remove when this change
+ // is fixed in istack.c in the original Tidy
+ node.implicit = true;
+ is = (IStack)this.istack.elementAt( this.insert );
+ node.element = is.element;
+ node.tag = is.tag;
+ if (is.attributes != null)
+ node.attributes = cloneAttributes(is.attributes);
+
+ // advance lexer to next item on the stack
+ n = this.insert;
+
+ // and recover state if we have reached the end
+ if (++n < this.istack.size() ) {
+ this.insert = n;
+ } else {
+ this.insert = -1;
+ }
+
+ return node;
+ }
+
+ /* AQ: Try this for speed optimization */
+ public static int wstrcasecmp(String s1, String s2)
+ {
+ return (s1.equalsIgnoreCase(s2) ? 0 : 1);
+ }
+
+ public static int wstrcaselexcmp(String s1, String s2)
+ {
+ char c;
+ int i = 0;
+
+ while ( i < s1.length() && i < s2.length() ) {
+ c = s1.charAt(i);
+ if ( toLower(c) != toLower( s2.charAt(i) ) ) {
+ break;
+ }
+ i += 1;
+ }
+ if ( i == s1.length() && i == s2.length() ) {
+ return 0;
+ } else if ( i == s1.length() ) {
+ return -1;
+ } else if ( i == s2.length() ) {
+ return 1;
+ } else {
+ return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 );
+ }
+ }
+
+ public static boolean wsubstr(String s1, String s2)
+ {
+ int i;
+ int len1 = s1.length();
+ int len2 = s2.length();
+
+ for (i = 0; i <= len1 - len2; ++i)
+ {
+ if (s2.equalsIgnoreCase(s1.substring(i)))
+ return true;
+ }
+
+ return false;
+ }
+
+ public boolean canPrune(Node element)
+ {
+ if (element.type == Node.TextNode)
+ return true;
+
+ if (element.content != null)
+ return false;
+
+ if (element.tag == configuration.tt.tagA && element.attributes != null)
+ return false;
+
+ if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas)
+ return false;
+
+ if (element.tag == null)
+ return false;
+
+ if ((element.tag.model & Dict.CM_ROW) != 0)
+ return false;
+
+ if (element.tag == configuration.tt.tagApplet)
+ return false;
+
+ if (element.tag == configuration.tt.tagObject)
+ return false;
+
+ if (element.attributes != null &&
+ (element.getAttrByName("id") != null ||
+ element.getAttrByName("name") != null) )
+ return false;
+
+ return true;
+ }
+
+ /* duplicate name attribute as an id */
+ public void fixId(Node node)
+ {
+ AttVal name = node.getAttrByName("name");
+ AttVal id = node.getAttrByName("id");
+
+ if (name != null)
+ {
+ if (id != null)
+ {
+ if (!id.value.equals(name.value))
+ Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH);
+ }
+ else if (this.configuration.XmlOut)
+ node.addAttribute("id", name.value);
+ }
+ }
+
+ /*
+ defer duplicates when entering a table or other
+ element where the inlines shouldn't be duplicated
+ */
+ public void deferDup()
+ {
+ this.insert = -1;
+ this.inode = null;
+ }
+
+ /* Private methods and fields */
+
+ /* lexer char types */
+ private static final short DIGIT = 1;
+ private static final short LETTER = 2;
+ private static final short NAMECHAR = 4;
+ private static final short WHITE = 8;
+ private static final short NEWLINE = 16;
+ private static final short LOWERCASE = 32;
+ private static final short UPPERCASE = 64;
+
+ /* lexer GetToken states */
+
+ private static final short LEX_CONTENT = 0;
+ private static final short LEX_GT = 1;
+ private static final short LEX_ENDTAG = 2;
+ private static final short LEX_STARTTAG = 3;
+ private static final short LEX_COMMENT = 4;
+ private static final short LEX_DOCTYPE = 5;
+ private static final short LEX_PROCINSTR = 6;
+ private static final short LEX_ENDCOMMENT = 7;
+ private static final short LEX_CDATA = 8;
+ private static final short LEX_SECTION = 9;
+ private static final short LEX_ASP = 10;
+ private static final short LEX_JSTE = 11;
+ private static final short LEX_PHP = 12;
+
+ /* used to classify chars for lexical purposes */
+ private static short[] lexmap = new short[128];
+
+ private static void mapStr(String str, short code)
+ {
+ int j;
+
+ for ( int i = 0; i < str.length(); i++ ) {
+ j = (int)str.charAt(i);
+ lexmap[j] |= code;
+ }
+ }
+
+ static {
+ mapStr("\r\n\f", (short)(NEWLINE|WHITE));
+ mapStr(" \t", WHITE);
+ mapStr("-.:_", NAMECHAR);
+ mapStr("0123456789", (short)(DIGIT|NAMECHAR));
+ mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR));
+ mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR));
+ }
+
+ private static short MAP( char c )
+ {
+ return ((int)c < 128 ? lexmap[(int)c] : 0);
+ }
+
+ private static boolean isWhite(char c)
+ {
+ short m = MAP(c);
+
+ return (m & WHITE) != 0;
+ }
+
+ private static boolean isDigit(char c)
+ {
+ short m;
+
+ m = MAP(c);
+
+ return (m & DIGIT) != 0;
+ }
+
+ private static boolean isLetter(char c)
+ {
+ short m;
+
+ m = MAP(c);
+
+ return (m & LETTER) != 0;
+ }
+
+ private static char toLower(char c)
+ {
+ short m = MAP(c);
+
+ if ((m & UPPERCASE) != 0)
+ c = (char)( (int)c + (int)'a' - (int)'A' );
+
+ return c;
+ }
+
+ private static char toUpper(char c)
+ {
+ short m = MAP(c);
+
+ if ((m & LOWERCASE) != 0)
+ c = (char)( (int)c + (int)'A' - (int)'a' );
+
+ return c;
+ }
+
+ public static char foldCase(char c, boolean tocaps, boolean xmlTags)
+ {
+ short m;
+
+ if (!xmlTags)
+ {
+ m = MAP(c);
+
+ if (tocaps)
+ {
+ if ((m & LOWERCASE) != 0)
+ c = (char)( (int)c + (int)'A' - (int)'a' );
+ }
+ else /* force to lower case */
+ {
+ if ((m & UPPERCASE) != 0)
+ c = (char)( (int)c + (int)'a' - (int)'A' );
+ }
+ }
+
+ return c;
+ }
+
+
+ private static class W3CVersionInfo
+ {
+ String name;
+ String voyagerName;
+ String profile;
+ short code;
+
+ public W3CVersionInfo( String name,
+ String voyagerName,
+ String profile,
+ short code )
+ {
+ this.name = name;
+ this.voyagerName = voyagerName;
+ this.profile = profile;
+ this.code = code;
+ }
+ }
+
+ /* the 3 URIs for the XHTML 1.0 DTDs */
+ private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
+ private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
+ private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd";
+
+ private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
+
+ private static Lexer.W3CVersionInfo[] W3CVersion =
+ {
+ new W3CVersionInfo("HTML 4.01",
+ "XHTML 1.0 Strict",
+ voyager_strict,
+ Dict.VERS_HTML40_STRICT),
+ new W3CVersionInfo("HTML 4.01 Transitional",
+ "XHTML 1.0 Transitional",
+ voyager_loose,
+ Dict.VERS_HTML40_LOOSE),
+ new W3CVersionInfo("HTML 4.01 Frameset",
+ "XHTML 1.0 Frameset",
+ voyager_frameset,
+ Dict.VERS_FRAMES),
+ new W3CVersionInfo("HTML 4.0",
+ "XHTML 1.0 Strict",
+ voyager_strict,
+ Dict.VERS_HTML40_STRICT),
+ new W3CVersionInfo("HTML 4.0 Transitional",
+ "XHTML 1.0 Transitional",
+ voyager_loose,
+ Dict.VERS_HTML40_LOOSE),
+ new W3CVersionInfo("HTML 4.0 Frameset",
+ "XHTML 1.0 Frameset",
+ voyager_frameset,
+ Dict.VERS_FRAMES),
+ new W3CVersionInfo("HTML 3.2",
+ "XHTML 1.0 Transitional",
+ voyager_loose,
+ Dict.VERS_HTML32),
+ new W3CVersionInfo("HTML 2.0",
+ "XHTML 1.0 Strict",
+ voyager_strict,
+ Dict.VERS_HTML20)
+ };
+
+}
--- /dev/null
+/*
+ * @(#)MutableBoolean.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Mutable Boolean
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class MutableBoolean {
+
+ public boolean value;
+
+}
--- /dev/null
+/*
+ * @(#)MutableInteger.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Mutable Integer
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class MutableInteger {
+
+ public int value;
+
+}
--- /dev/null
+/*
+ * @(#)MutableObject.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Mutable Object
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class MutableObject {
+
+ public MutableObject()
+ {
+ this(null);
+ }
+
+ public MutableObject(Object o)
+ {
+ this.value = o;
+ }
+
+ public void setObject(Object o)
+ {
+ value = o;
+ }
+
+ public Object getObject()
+ {
+ return value;
+ }
+
+ private Object value;
+
+}
--- /dev/null
+/*
+ * @(#)Node.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import sun.security.krb5.internal.n;
+
+/**
+ *
+ * Node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+ Used for elements and text nodes
+ element name is null for text nodes
+ start and end are offsets into lexbuf
+ which contains the textual content of
+ all elements in the parse tree.
+
+ parent and content allow traversal
+ of the parse tree in any direction.
+ attributes are represented as a linked
+ list of AttVal nodes which hold the
+ strings for attribute/value pairs.
+*/
+
+public class Node {
+
+ public static final short RootNode = 0;
+ public static final short DocTypeTag = 1;
+ public static final short CommentTag = 2;
+ public static final short ProcInsTag = 3;
+ public static final short TextNode = 4;
+ public static final short StartTag = 5;
+ public static final short EndTag = 6;
+ public static final short StartEndTag = 7;
+ public static final short CDATATag = 8;
+ public static final short SectionTag = 9;
+ public static final short AspTag = 10;
+ public static final short JsteTag = 11;
+ public static final short PhpTag = 12;
+
+ protected Node parent;
+ protected Node prev;
+ protected Node next;
+ protected Node last;
+ protected int start; /* start of span onto text array */
+ protected int end; /* end of span onto text array */
+ protected byte[] textarray; /* the text array */
+ protected short type; /* TextNode, StartTag, EndTag etc. */
+ protected boolean closed; /* true if closed by explicit end tag */
+ protected boolean implicit; /* true if inferred */
+ protected boolean linebreak; /* true if followed by a line break */
+ protected Dict was; /* old tag when it was changed */
+ protected Dict tag; /* tag's dictionary definition */
+ protected String element; /* name (null for text nodes) */
+ protected AttVal attributes;
+ protected Node content;
+
+ public Node()
+ {
+ this(TextNode, null, 0, 0);
+ }
+
+ public Node(short type, byte[] textarray, int start, int end)
+ {
+ this.parent = null;
+ this.prev = null;
+ this.next = null;
+ this.last = null;
+ this.start = start;
+ this.end = end;
+ this.textarray = textarray;
+ this.type = type;
+ this.closed = false;
+ this.implicit = false;
+ this.linebreak = false;
+ this.was = null;
+ this.tag = null;
+ this.element = null;
+ this.attributes = null;
+ this.content = null;
+ }
+
+ public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
+ {
+ this.parent = null;
+ this.prev = null;
+ this.next = null;
+ this.last = null;
+ this.start = start;
+ this.end = end;
+ this.textarray = textarray;
+ this.type = type;
+ this.closed = false;
+ this.implicit = false;
+ this.linebreak = false;
+ this.was = null;
+ this.tag = null;
+ this.element = element;
+ this.attributes = null;
+ this.content = null;
+ if (type == StartTag || type == StartEndTag || type == EndTag)
+ tt.findTag(this);
+ }
+
+ /* used to clone heading nodes when split by an <HR> */
+ protected Object clone()
+ {
+ Node node = new Node();
+
+ node.parent = this.parent;
+ if (this.textarray != null)
+ {
+ node.textarray = new byte[this.end - this.start];
+ node.start = 0;
+ node.end = this.end - this.start;
+ if (node.end > 0)
+ System.arraycopy(this.textarray, this.start,
+ node.textarray, node.start, node.end);
+ }
+ node.type = this.type;
+ node.closed = this.closed;
+ node.implicit = this.implicit;
+ node.linebreak = this.linebreak;
+ node.was = this.was;
+ node.tag = this.tag;
+ if (this.element != null)
+ node.element = this.element;
+ if (this.attributes != null)
+ node.attributes = (AttVal)this.attributes.clone();
+ return node;
+ }
+
+ public AttVal getAttrByName(String name)
+ {
+ AttVal attr;
+
+ for (attr = this.attributes; attr != null; attr = attr.next)
+ {
+ if (name != null &&
+ attr.attribute != null &&
+ attr.attribute.equals(name))
+ break;
+ }
+
+ return attr;
+ }
+
+ /* default method for checking an element's attributes */
+ public void checkAttributes( Lexer lexer )
+ {
+ AttVal attval;
+
+ for (attval = this.attributes; attval != null; attval = attval.next)
+ attval.checkAttribute( lexer, this );
+ }
+
+ public void checkUniqueAttributes(Lexer lexer)
+ {
+ AttVal attval;
+
+ for (attval = this.attributes; attval != null; attval = attval.next) {
+ if (attval.asp == null && attval.php == null)
+ attval.checkUniqueAttribute(lexer, this);
+ }
+ }
+
+ public void addAttribute(String name, String value)
+ {
+ AttVal av = new AttVal(null, null, null, null,
+ '"', name, value);
+ av.dict =
+ AttributeTable.getDefaultAttributeTable().findAttribute(av);
+
+ if (this.attributes == null)
+ this.attributes = av;
+ else /* append to end of attributes */
+ {
+ AttVal here = this.attributes;
+
+ while (here.next != null)
+ here = here.next;
+
+ here.next = av;
+ }
+ }
+
+ /* remove attribute from node then free it */
+ public void removeAttribute(AttVal attr)
+ {
+ AttVal av;
+ AttVal prev = null;
+ AttVal next;
+
+ for (av = this.attributes; av != null; av = next)
+ {
+ next = av.next;
+
+ if (av == attr)
+ {
+ if (prev != null)
+ prev.next = next;
+ else
+ this.attributes = next;
+ }
+ else
+ prev = av;
+ }
+ }
+
+ /* find doctype element */
+ public Node findDocType()
+ {
+ Node node;
+
+ for (node = this.content;
+ node != null && node.type != DocTypeTag; node = node.next);
+
+ return node;
+ }
+
+ public void discardDocType()
+ {
+ Node node;
+
+ node = findDocType();
+ if (node != null)
+ {
+ if (node.prev != null)
+ node.prev.next = node.next;
+ else
+ node.parent.content = node.next;
+
+ if (node.next != null)
+ node.next.prev = node.prev;
+
+ node.next = null;
+ }
+ }
+
+ /* remove node from markup tree and discard it */
+ public static Node discardElement(Node element)
+ {
+ Node next = null;
+
+ if (element != null)
+ {
+ next = element.next;
+ removeNode(element);
+ }
+
+ return next;
+ }
+
+ /* insert node into markup tree */
+ public static void insertNodeAtStart(Node element, Node node)
+ {
+ node.parent = element;
+
+ if (element.content == null)
+ element.last = node;
+ else
+ element.content.prev = node; // AQ added 13 Apr 2000
+
+ node.next = element.content;
+ node.prev = null;
+ element.content = node;
+ }
+
+ /* insert node into markup tree */
+ public static void insertNodeAtEnd(Node element, Node node)
+ {
+ node.parent = element;
+ node.prev = element.last;
+
+ if (element.last != null)
+ element.last.next = node;
+ else
+ element.content = node;
+
+ element.last = node;
+ }
+
+ /*
+ insert node into markup tree in pace of element
+ which is moved to become the child of the node
+ */
+ public static void insertNodeAsParent(Node element, Node node)
+ {
+ node.content = element;
+ node.last = element;
+ node.parent = element.parent;
+ element.parent = node;
+
+ if (node.parent.content == element)
+ node.parent.content = node;
+
+ if (node.parent.last == element)
+ node.parent.last = node;
+
+ node.prev = element.prev;
+ element.prev = null;
+
+ if (node.prev != null)
+ node.prev.next = node;
+
+ node.next = element.next;
+ element.next = null;
+
+ if (node.next != null)
+ node.next.prev = node;
+ }
+
+ /* insert node into markup tree before element */
+ public static void insertNodeBeforeElement(Node element, Node node)
+ {
+ Node parent;
+
+ parent = element.parent;
+ node.parent = parent;
+ node.next = element;
+ node.prev = element.prev;
+ element.prev = node;
+
+ if (node.prev != null)
+ node.prev.next = node;
+
+ if (parent.content == element)
+ parent.content = node;
+ }
+
+ /* insert node into markup tree after element */
+ public static void insertNodeAfterElement(Node element, Node node)
+ {
+ Node parent;
+
+ parent = element.parent;
+ node.parent = parent;
+
+ // AQ - 13Jan2000 fix for parent == null
+ if (parent != null && parent.last == element)
+ parent.last = node;
+ else
+ {
+ node.next = element.next;
+ // AQ - 13Jan2000 fix for node.next == null
+ if (node.next != null)
+ node.next.prev = node;
+ }
+
+ element.next = node;
+ node.prev = element;
+ }
+
+ public static void trimEmptyElement(Lexer lexer, Node element)
+ {
+ TagTable tt = lexer.configuration.tt;
+
+ if (lexer.canPrune(element))
+ {
+ if (element.type != TextNode)
+ Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
+
+ discardElement(element);
+ }
+ else if (element.tag == tt.tagP && element.content == null)
+ {
+ /* replace <p></p> by <br><br> to preserve formatting */
+ Node node = lexer.inferredTag("br");
+ Node.coerceNode(lexer, element, tt.tagBr);
+ Node.insertNodeAfterElement(element, node);
+ }
+ }
+
+ /*
+ This maps
+ <em>hello </em><strong>world</strong>
+ to
+ <em>hello</em> <strong>world</strong>
+
+ If last child of element is a text node
+ then trim trailing white space character
+ moving it to after element's end tag.
+ */
+ public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
+ {
+ byte c;
+ TagTable tt = lexer.configuration.tt;
+
+ if (last != null && last.type == Node.TextNode &&
+ last.end > last.start)
+ {
+ c = lexer.lexbuf[last.end - 1];
+
+ if (c == 160 || c == (byte)' ')
+ {
+ /* take care with <td> </td> */
+ if (element.tag == tt.tagTd ||
+ element.tag == tt.tagTh)
+ {
+ if (last.end > last.start + 1)
+ last.end -= 1;
+ }
+ else
+ {
+ last.end -= 1;
+
+ if (((element.tag.model & Dict.CM_INLINE) != 0) &&
+ !((element.tag.model & Dict.CM_FIELD) != 0))
+ lexer.insertspace = true;
+
+ /* if empty string then delete from parse tree */
+ if (last.start == last.end)
+ trimEmptyElement(lexer, last);
+ }
+ }
+ }
+ }
+
+ /*
+ This maps
+ <p>hello<em> world</em>
+ to
+ <p>hello <em>world</em>
+
+ Trims initial space, by moving it before the
+ start tag, or if this element is the first in
+ parent's content, then by discarding the space
+ */
+ public static void trimInitialSpace(Lexer lexer, Node element, Node text)
+ {
+ Node prev, node;
+
+ // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
+ // 31-Oct-00.
+ if (text.type == TextNode && text.textarray[text.start] == (byte)' '
+ && (text.start < text.end))
+ {
+ if (((element.tag.model & Dict.CM_INLINE) != 0) &&
+ !((element.tag.model & Dict.CM_FIELD) != 0) &&
+ element.parent.content != element)
+ {
+ prev = element.prev;
+
+ if (prev != null && prev.type == TextNode)
+ {
+ if (prev.textarray[prev.end - 1] != (byte)' ')
+ prev.textarray[prev.end++] = (byte)' ';
+
+ ++element.start;
+ }
+ else /* create new node */
+ {
+ node = lexer.newNode();
+ // Local fix for bug 228486 (GLP). This handles the case
+ // where we need to create a preceeding text node but there are
+ // no "slots" in textarray that we can steal from the current
+ // element. Therefore, we create a new textarray containing
+ // just the blank. When Tidy is fixed, this should be removed.
+ if (element.start >= element.end)
+ {
+ node.start = 0;
+ node.end = 1;
+ node.textarray = new byte[1];
+ }
+ else
+ {
+ node.start = element.start++;
+ node.end = element.start;
+ node.textarray = element.textarray;
+ }
+ node.textarray[node.start] = (byte)' ';
+ node.prev = prev;
+ if (prev != null)
+ prev.next = node;
+ node.next = element;
+ element.prev = node;
+ node.parent = element.parent;
+ }
+ }
+
+ /* discard the space in current node */
+ ++text.start;
+ }
+ }
+
+ /*
+ Move initial and trailing space out.
+ This routine maps:
+
+ hello<em> world</em>
+ to
+ hello <em>world</em>
+ and
+ <em>hello </em><strong>world</strong>
+ to
+ <em>hello</em> <strong>world</strong>
+ */
+ public static void trimSpaces(Lexer lexer, Node element)
+ {
+ Node text = element.content;
+ TagTable tt = lexer.configuration.tt;
+
+ if (text != null && text.type == Node.TextNode &&
+ element.tag != tt.tagPre)
+ trimInitialSpace(lexer, element, text);
+
+ text = element.last;
+
+ if (text != null && text.type == Node.TextNode)
+ trimTrailingSpace(lexer, element, text);
+ }
+
+ public boolean isDescendantOf(Dict tag)
+ {
+ Node parent;
+
+ for (parent = this.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (parent.tag == tag)
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ the doctype has been found after other tags,
+ and needs moving to before the html element
+ */
+ public static void insertDocType(Lexer lexer, Node element, Node doctype)
+ {
+ TagTable tt = lexer.configuration.tt;
+
+ Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
+
+ while (element.tag != tt.tagHtml)
+ element = element.parent;
+
+ insertNodeBeforeElement(element, doctype);
+ }
+
+ public Node findBody(TagTable tt)
+ {
+ Node node;
+
+ node = this.content;
+
+ while (node != null && node.tag != tt.tagHtml)
+ node = node.next;
+
+ if (node == null)
+ return null;
+
+ node = node.content;
+
+ while (node != null && node.tag != tt.tagBody)
+ node = node.next;
+
+ return node;
+ }
+
+ public boolean isElement()
+ {
+ return (this.type == StartTag || this.type == StartEndTag ? true : false);
+ }
+
+ /*
+ unexpected content in table row is moved to just before
+ the table in accordance with Netscape and IE. This code
+ assumes that node hasn't been inserted into the row.
+ */
+ public static void moveBeforeTable(Node row, Node node, TagTable tt)
+ {
+ Node table;
+
+ /* first find the table element */
+ for (table = row.parent; table != null; table = table.parent)
+ {
+ if (table.tag == tt.tagTable)
+ {
+ if (table.parent.content == table)
+ table.parent.content = node;
+
+ node.prev = table.prev;
+ node.next = table;
+ table.prev = node;
+ node.parent = table.parent;
+
+ if (node.prev != null)
+ node.prev.next = node;
+
+ break;
+ }
+ }
+ }
+
+ /*
+ if a table row is empty then insert an empty cell
+ this practice is consistent with browser behavior
+ and avoids potential problems with row spanning cells
+ */
+ public static void fixEmptyRow(Lexer lexer, Node row)
+ {
+ Node cell;
+
+ if (row.content == null)
+ {
+ cell = lexer.inferredTag("td");
+ insertNodeAtEnd(row, cell);
+ Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
+ }
+ }
+
+ public static void coerceNode(Lexer lexer, Node node, Dict tag)
+ {
+ Node tmp = lexer.inferredTag(tag.name);
+ Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
+ node.was = node.tag;
+ node.tag = tag;
+ node.type = StartTag;
+ node.implicit = true;
+ node.element = tag.name;
+ }
+
+ /* extract a node and its children from a markup tree */
+ public static void removeNode(Node node)
+ {
+ if (node.prev != null)
+ node.prev.next = node.next;
+
+ if (node.next != null)
+ node.next.prev = node.prev;
+
+ if (node.parent != null)
+ {
+ if (node.parent.content == node)
+ node.parent.content = node.next;
+
+ if (node.parent.last == node)
+ node.parent.last = node.prev;
+ }
+
+ node.parent = node.prev = node.next = null;
+ }
+
+ public static boolean insertMisc(Node element, Node node)
+ {
+ if (node.type == CommentTag ||
+ node.type == ProcInsTag ||
+ node.type == CDATATag ||
+ node.type == SectionTag ||
+ node.type == AspTag ||
+ node.type == JsteTag ||
+ node.type == PhpTag)
+ {
+ insertNodeAtEnd(element, node);
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ used to determine how attributes
+ without values should be printed
+ this was introduced to deal with
+ user defined tags e.g. Cold Fusion
+ */
+ public static boolean isNewNode(Node node)
+ {
+ if (node != null && node.tag != null)
+ {
+ return ((node.tag.model & Dict.CM_NEW) != 0);
+ }
+
+ return true;
+ }
+
+ public boolean hasOneChild()
+ {
+ return (this.content != null && this.content.next == null);
+ }
+
+ /* find html element */
+ public Node findHTML(TagTable tt)
+ {
+ Node node;
+
+ for (node = this.content;
+ node != null && node.tag != tt.tagHtml; node = node.next);
+
+ return node;
+ }
+
+ public Node findHEAD(TagTable tt)
+ {
+ Node node;
+
+ node = this.findHTML(tt);
+
+ if (node != null)
+ {
+ for (node = node.content;
+ node != null && node.tag != tt.tagHead;
+ node = node.next);
+ }
+
+ return node;
+ }
+
+ public boolean checkNodeIntegrity()
+ {
+ Node child;
+ boolean found = false;
+
+ if (this.prev != null)
+ {
+ if (this.prev.next != this)
+ return false;
+ }
+
+ if (this.next != null)
+ {
+ if (this.next.prev != this)
+ return false;
+ }
+
+ if (this.parent != null)
+ {
+ if (this.prev == null && this.parent.content != this)
+ return false;
+
+ if (this.next == null && this.parent.last != this)
+ return false;
+
+ for (child = this.parent.content; child != null; child = child.next)
+ if (child == this)
+ {
+ found = true;
+ break;
+ }
+
+ if (!found)
+ return false;
+ }
+
+ for (child = this.content; child != null; child = child.next)
+ if (!child.checkNodeIntegrity())
+ return false;
+
+ return true;
+ }
+
+ /*
+ Add class="foo" to node
+ */
+ public static void addClass(Node node, String classname)
+ {
+ AttVal classattr = node.getAttrByName("class");
+
+ /*
+ if there already is a class attribute
+ then append class name after a space
+ */
+ if (classattr != null)
+ {
+ classattr.value = classattr.value + " " + classname;
+ }
+ else /* create new class attribute */
+ node.addAttribute("class", classname);
+ }
+
+ /* --------------------- DEBUG -------------------------- */
+
+ private static final String[] nodeTypeString =
+ {
+ "RootNode",
+ "DocTypeTag",
+ "CommentTag",
+ "ProcInsTag",
+ "TextNode",
+ "StartTag",
+ "EndTag",
+ "StartEndTag",
+ "SectionTag",
+ "AspTag",
+ "PhpTag"
+ };
+
+ public String toString()
+ {
+ String s = "";
+ Node n = this;
+
+ while (n != null) {
+ s += "[Node type=";
+ s += nodeTypeString[n.type];
+ s += ",element=";
+ if (n.element != null)
+ s += n.element;
+ else
+ s += "null";
+ if (n.type == TextNode ||
+ n.type == CommentTag ||
+ n.type == ProcInsTag) {
+ s += ",text=";
+ if (n.textarray != null && n.start <= n.end) {
+ s += "\"";
+ s += Lexer.getString(n.textarray, n.start, n.end - n.start);
+ s += "\"";
+ } else {
+ s += "null";
+ }
+ }
+ s += ",content=";
+ if (n.content != null)
+ s += n.content.toString();
+ else
+ s += "null";
+ s += "]";
+ if (n.next != null)
+ s += ",";
+ n = n.next;
+ }
+ return s;
+ }
+ /* --------------------- END DEBUG ---------------------- */
+
+
+ /* --------------------- DOM ---------------------------- */
+
+ protected org.w3c.dom.Node adapter = null;
+
+ protected org.w3c.dom.Node getAdapter()
+ {
+ if (adapter == null)
+ {
+ switch (this.type)
+ {
+ case RootNode:
+ adapter = new DOMDocumentImpl(this);
+ break;
+ case StartTag:
+ case StartEndTag:
+ adapter = new DOMElementImpl(this);
+ break;
+ case DocTypeTag:
+ adapter = new DOMDocumentTypeImpl(this);
+ break;
+ case CommentTag:
+ adapter = new DOMCommentImpl(this);
+ break;
+ case TextNode:
+ adapter = new DOMTextImpl(this);
+ break;
+ case CDATATag:
+ adapter = new DOMCDATASectionImpl(this);
+ break;
+ case ProcInsTag:
+ adapter = new DOMProcessingInstructionImpl(this);
+ break;
+ default:
+ adapter = new DOMNodeImpl(this);
+ }
+ }
+ return adapter;
+ }
+
+ protected Node cloneNode(boolean deep)
+ {
+ Node node = (Node)this.clone();
+ if (deep)
+ {
+ Node child;
+ Node newChild;
+ for (child = this.content; child != null; child = child.next)
+ {
+ newChild = child.cloneNode(deep);
+ insertNodeAtEnd(node, newChild);
+ }
+ }
+ return node;
+ }
+
+
+ protected void setType(short newType)
+ {
+ this.type = newType;
+ }
+
+ /* --------------------- END DOM ------------------------ */
+
+}
--- /dev/null
+/*
+ * @(#)Out.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Output Stream
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.OutputStream;
+
+public abstract class Out
+{
+ public int encoding;
+ public int state; /* for ISO 2022 */
+ public OutputStream out;
+
+ public abstract void outc(int c);
+
+ public abstract void outc(byte c);
+
+ public abstract void newline();
+
+};
+
--- /dev/null
+/*
+ * @(#)OutImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Output Stream Implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.IOException;
+
+public class OutImpl extends Out
+{
+
+ public OutImpl()
+ {
+ this.out = null;
+ }
+
+ public void outc(byte c) {
+ outc(((int)c) & 0xFF); // Convert to unsigned.
+ }
+
+ /* For mac users, should we map Unicode back to MacRoman? */
+ public void outc(int c)
+ {
+ int ch;
+
+ try {
+ if (this.encoding == Configuration.UTF8)
+ {
+ if (c < 128)
+ this.out.write(c);
+ else if (c <= 0x7FF)
+ {
+ ch = (0xC0 | (c >> 6)); this.out.write(ch);
+ ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+ }
+ else if (c <= 0xFFFF)
+ {
+ ch = (0xE0 | (c >> 12)); this.out.write(ch);
+ ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
+ ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+ }
+ else if (c <= 0x1FFFFF)
+ {
+ ch = (0xF0 | (c >> 18)); this.out.write(ch);
+ ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch);
+ ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
+ ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+ }
+ else
+ {
+ ch = (0xF8 | (c >> 24)); this.out.write(ch);
+ ch = (0x80 | ((c >> 18) & 0x3F)); this.out.write(ch);
+ ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch);
+ ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
+ ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+ }
+ }
+ else if (this.encoding == Configuration.ISO2022)
+ {
+ if (c == 0x1b) /* ESC */
+ this.state = StreamIn.FSM_ESC;
+ else
+ {
+ switch (this.state)
+ {
+ case StreamIn.FSM_ESC:
+ if (c == '$')
+ this.state = StreamIn.FSM_ESCD;
+ else if (c == '(')
+ this.state = StreamIn.FSM_ESCP;
+ else
+ this.state = StreamIn.FSM_ASCII;
+ break;
+
+ case StreamIn.FSM_ESCD:
+ if (c == '(')
+ this.state = StreamIn.FSM_ESCDP;
+ else
+ this.state = StreamIn.FSM_NONASCII;
+ break;
+
+ case StreamIn.FSM_ESCDP:
+ this.state = StreamIn.FSM_NONASCII;
+ break;
+
+ case StreamIn.FSM_ESCP:
+ this.state = StreamIn.FSM_ASCII;
+ break;
+
+ case StreamIn.FSM_NONASCII:
+ c &= 0x7F;
+ break;
+ }
+ }
+
+ this.out.write(c);
+ }
+ else
+ this.out.write(c);
+ }
+ catch (IOException e) {
+ System.err.println("OutImpl.outc: " + e.toString());
+ }
+ }
+
+ public void newline()
+ {
+ try {
+ this.out.write(nlBytes);
+ this.out.flush();
+ }
+ catch (IOException e) {
+ System.err.println("OutImpl.newline: " + e.toString());
+ }
+ }
+
+ private static final byte[] nlBytes =
+ (System.getProperty("line.separator")).getBytes();
+
+};
+
--- /dev/null
+/*
+ * @(#)PPrint.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Pretty print parse tree
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+ Block-level and unknown elements are printed on
+ new lines and their contents indented 2 spaces
+
+ Inline elements are printed inline.
+
+ Inline content is wrapped on spaces (except in
+ attribute values or preformatted text, after
+ start tags and before end tags
+*/
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+public class PPrint {
+
+ /* page transition effects */
+
+ public static final short EFFECT_BLEND = -1;
+ public static final short EFFECT_BOX_IN = 0;
+ public static final short EFFECT_BOX_OUT = 1;
+ public static final short EFFECT_CIRCLE_IN = 2;
+ public static final short EFFECT_CIRCLE_OUT = 3;
+ public static final short EFFECT_WIPE_UP = 4;
+ public static final short EFFECT_WIPE_DOWN = 5;
+ public static final short EFFECT_WIPE_RIGHT = 6;
+ public static final short EFFECT_WIPE_LEFT = 7;
+ public static final short EFFECT_VERT_BLINDS = 8;
+ public static final short EFFECT_HORZ_BLINDS = 9;
+ public static final short EFFECT_CHK_ACROSS = 10;
+ public static final short EFFECT_CHK_DOWN = 11;
+ public static final short EFFECT_RND_DISSOLVE = 12;
+ public static final short EFFECT_SPLIT_VIRT_IN = 13;
+ public static final short EFFECT_SPLIT_VIRT_OUT = 14;
+ public static final short EFFECT_SPLIT_HORZ_IN = 15;
+ public static final short EFFECT_SPLIT_HORZ_OUT = 16;
+ public static final short EFFECT_STRIPS_LEFT_DOWN = 17;
+ public static final short EFFECT_STRIPS_LEFT_UP = 18;
+ public static final short EFFECT_STRIPS_RIGHT_DOWN = 19;
+ public static final short EFFECT_STRIPS_RIGHT_UP = 20;
+ public static final short EFFECT_RND_BARS_HORZ = 21;
+ public static final short EFFECT_RND_BARS_VERT = 22;
+ public static final short EFFECT_RANDOM = 23;
+
+ private static final short NORMAL = 0;
+ private static final short PREFORMATTED = 1;
+ private static final short COMMENT = 2;
+ private static final short ATTRIBVALUE = 4;
+ private static final short NOWRAP = 8;
+ private static final short CDATA = 16;
+
+ private int[] linebuf = null;
+ private int lbufsize = 0;
+ private int linelen = 0;
+ private int wraphere = 0;
+ private boolean inAttVal = false;
+ private boolean InString = false;
+
+ private int slide = 0;
+ private int count = 0;
+ private Node slidecontent = null;
+
+ private Configuration configuration;
+
+ public PPrint(Configuration configuration)
+ {
+ this.configuration = configuration;
+ }
+
+ /*
+ 1010 A
+ 1011 B
+ 1100 C
+ 1101 D
+ 1110 E
+ 1111 F
+ */
+
+ /* return one less that the number of bytes used by UTF-8 char */
+ /* str points to 1st byte, *ch initialized to 1st byte */
+ public static int getUTF8(byte[] str, int start, MutableInteger ch)
+ {
+ int c, n, i, bytes;
+
+ c = ((int)str[start]) & 0xFF; // Convert to unsigned.
+
+ if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */
+ {
+ n = c & 31;
+ bytes = 2;
+ }
+ else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
+ {
+ n = c & 15;
+ bytes = 3;
+ }
+ else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
+ {
+ n = c & 7;
+ bytes = 4;
+ }
+ else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */
+ {
+ n = c & 3;
+ bytes = 5;
+ }
+ else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */
+
+ {
+ n = c & 1;
+ bytes = 6;
+ }
+ else /* 0XXX XXXX one byte */
+ {
+ ch.value = c;
+ return 0;
+ }
+
+ /* successor bytes should have the form 10XX XXXX */
+ for (i = 1; i < bytes; ++i)
+ {
+ c = ((int)str[start + i]) & 0xFF; // Convert to unsigned.
+ n = (n << 6) | (c & 0x3F);
+ }
+
+ ch.value = n;
+ return bytes - 1;
+ }
+
+ /* store char c as UTF-8 encoded byte stream */
+ public static int putUTF8(byte[] buf, int start, int c)
+ {
+ if (c < 128)
+ buf[start++] = (byte)c;
+ else if (c <= 0x7FF)
+ {
+ buf[start++] = (byte)(0xC0 | (c >> 6));
+ buf[start++] = (byte)(0x80 | (c & 0x3F));
+ }
+ else if (c <= 0xFFFF)
+ {
+ buf[start++] = (byte)(0xE0 | (c >> 12));
+ buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F));
+ buf[start++] = (byte)(0x80 | (c & 0x3F));
+ }
+ else if (c <= 0x1FFFFF)
+ {
+ buf[start++] = (byte)(0xF0 | (c >> 18));
+ buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F));
+ buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F));
+ buf[start++] = (byte)(0x80 | (c & 0x3F));
+ }
+ else
+ {
+ buf[start++] = (byte)(0xF8 | (c >> 24));
+ buf[start++] = (byte)(0x80 | ((c >> 18) & 0x3F));
+ buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F));
+ buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F));
+ buf[start++] = (byte)(0x80 | (c & 0x3F));
+ }
+
+ return start;
+ }
+
+ private void addC(int c, int index)
+ {
+ if (index + 1 >= lbufsize)
+ {
+ while (index + 1 >= lbufsize)
+ {
+ if (lbufsize == 0)
+ lbufsize = 256;
+ else
+ lbufsize = lbufsize * 2;
+ }
+
+ int[] temp = new int[ lbufsize ];
+ if (linebuf != null)
+ System.arraycopy(linebuf, 0, temp, 0, index);
+ linebuf = temp;
+ }
+
+ linebuf[index] = c;
+ }
+
+ private void wrapLine(Out fout, int indent)
+ {
+ int i, p, q;
+
+ if (wraphere == 0)
+ return;
+
+ for (i = 0; i < indent; ++i)
+ fout.outc((int)' ');
+
+ for (i = 0; i < wraphere; ++i)
+ fout.outc(linebuf[i]);
+
+ if (InString)
+ {
+ fout.outc((int)' ');
+ fout.outc((int)'\\');
+ }
+
+ fout.newline();
+
+ if (linelen > wraphere)
+ {
+ p = 0;
+
+ if (linebuf[wraphere] == ' ')
+ ++wraphere;
+
+ q = wraphere;
+ addC('\0', linelen);
+
+ while (true)
+ {
+ linebuf[p] = linebuf[q];
+ if (linebuf[q] == 0) break;
+ p++;
+ q++;
+ }
+ linelen -= wraphere;
+ }
+ else
+ linelen = 0;
+
+ wraphere = 0;
+ }
+
+ private void wrapAttrVal(Out fout, int indent, boolean inString)
+ {
+ int i, p, q;
+
+ for (i = 0; i < indent; ++i)
+ fout.outc((int)' ');
+
+ for (i = 0; i < wraphere; ++i)
+ fout.outc(linebuf[i]);
+
+ fout.outc((int)' ');
+
+ if (inString)
+ fout.outc((int)'\\');
+
+ fout.newline();
+
+ if (linelen > wraphere)
+ {
+ p = 0;
+
+ if (linebuf[wraphere] == ' ')
+ ++wraphere;
+
+ q = wraphere;
+ addC('\0', linelen);
+
+ while (true)
+ {
+ linebuf[p] = linebuf[q];
+ if (linebuf[q] == 0) break;
+ p++;
+ q++;
+ }
+ linelen -= wraphere;
+ }
+ else
+ linelen = 0;
+
+ wraphere = 0;
+ }
+
+ public void flushLine(Out fout, int indent)
+ {
+ int i;
+
+ if (linelen > 0)
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ wrapLine(fout, indent);
+
+ if (!inAttVal || this.configuration.IndentAttributes)
+ {
+ for (i = 0; i < indent; ++i)
+ fout.outc((int)' ');
+ }
+
+ for (i = 0; i < linelen; ++i)
+ fout.outc(linebuf[i]);
+ }
+
+ fout.newline();
+ linelen = 0;
+ wraphere = 0;
+ inAttVal = false;
+ }
+
+ public void condFlushLine(Out fout, int indent)
+ {
+ int i;
+
+ if (linelen > 0)
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ wrapLine(fout, indent);
+
+ if (!inAttVal || this.configuration.IndentAttributes)
+ {
+ for (i = 0; i < indent; ++i)
+ fout.outc((int)' ');
+ }
+
+ for (i = 0; i < linelen; ++i)
+ fout.outc(linebuf[i]);
+
+ fout.newline();
+ linelen = 0;
+ wraphere = 0;
+ inAttVal = false;
+ }
+ }
+
+ private void printChar(int c, short mode)
+ {
+ String entity;
+
+ if (c == ' ' && !((mode & (PREFORMATTED | COMMENT | ATTRIBVALUE)) != 0))
+ {
+ /* coerce a space character to a non-breaking space */
+ if ((mode & NOWRAP) != 0)
+ {
+ /* by default XML doesn't define */
+ if (this.configuration.NumEntities || this.configuration.XmlTags)
+ {
+ addC('&', linelen++);
+ addC('#', linelen++);
+ addC('1', linelen++);
+ addC('6', linelen++);
+ addC('0', linelen++);
+ addC(';', linelen++);
+ }
+ else /* otherwise use named entity */
+ {
+ addC('&', linelen++);
+ addC('n', linelen++);
+ addC('b', linelen++);
+ addC('s', linelen++);
+ addC('p', linelen++);
+ addC(';', linelen++);
+ }
+ return;
+ }
+ else
+ wraphere = linelen;
+ }
+
+ /* comment characters are passed raw */
+ if ((mode & COMMENT) != 0)
+ {
+ addC(c, linelen++);
+ return;
+ }
+
+ /* except in CDATA map < to < etc. */
+ if (! ((mode & CDATA) != 0) )
+ {
+ if (c == '<')
+ {
+ addC('&', linelen++);
+ addC('l', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == '>')
+ {
+ addC('&', linelen++);
+ addC('g', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ /*
+ naked '&' chars can be left alone or
+ quoted as & The latter is required
+ for XML where naked '&' are illegal.
+ */
+ if (c == '&' && this.configuration.QuoteAmpersand)
+ {
+ addC('&', linelen++);
+ addC('a', linelen++);
+ addC('m', linelen++);
+ addC('p', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == '"' && this.configuration.QuoteMarks)
+ {
+ addC('&', linelen++);
+ addC('q', linelen++);
+ addC('u', linelen++);
+ addC('o', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == '\'' && this.configuration.QuoteMarks)
+ {
+ addC('&', linelen++);
+ addC('#', linelen++);
+ addC('3', linelen++);
+ addC('9', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == 160 && this.configuration.CharEncoding != Configuration.RAW)
+ {
+ if (this.configuration.QuoteNbsp)
+ {
+ addC('&', linelen++);
+
+ if (this.configuration.NumEntities)
+ {
+ addC('#', linelen++);
+ addC('1', linelen++);
+ addC('6', linelen++);
+ addC('0', linelen++);
+ }
+ else
+ {
+ addC('n', linelen++);
+ addC('b', linelen++);
+ addC('s', linelen++);
+ addC('p', linelen++);
+ }
+
+ addC(';', linelen++);
+ }
+ else
+ addC(c, linelen++);
+
+ return;
+ }
+ }
+
+ /* otherwise ISO 2022 characters are passed raw */
+ if (this.configuration.CharEncoding == Configuration.ISO2022 ||
+ this.configuration.CharEncoding == Configuration.RAW)
+ {
+ addC(c, linelen++);
+ return;
+ }
+
+ /* if preformatted text, map to space */
+ if (c == 160 && ((mode & PREFORMATTED) != 0))
+ {
+ addC(' ', linelen++);
+ return;
+ }
+
+ /*
+ Filters from Word and PowerPoint often use smart
+ quotes resulting in character codes between 128
+ and 159. Unfortunately, the corresponding HTML 4.0
+ entities for these are not widely supported. The
+ following converts dashes and quotation marks to
+ the nearest ASCII equivalent. My thanks to
+ Andrzej Novosiolov for his help with this code.
+ */
+
+ if (this.configuration.MakeClean)
+ {
+ if (c >= 0x2013 && c <= 0x201E)
+ {
+ switch (c) {
+ case 0x2013:
+ case 0x2014:
+ c = '-';
+ break;
+ case 0x2018:
+ case 0x2019:
+ case 0x201A:
+ c = '\'';
+ break;
+ case 0x201C:
+ case 0x201D:
+ case 0x201E:
+ c = '"';
+ break;
+ }
+ }
+ }
+
+ /* don't map latin-1 chars to entities */
+ if (this.configuration.CharEncoding == Configuration.LATIN1)
+ {
+ if (c > 255) /* multi byte chars */
+ {
+ if (!this.configuration.NumEntities)
+ {
+ entity = EntityTable.getDefaultEntityTable().entityName((short)c);
+ if (entity != null)
+ entity = "&" + entity + ";";
+ else
+ entity = "&#" + c + ";";
+ }
+ else
+ entity = "&#" + c + ";";
+
+ for (int i = 0; i < entity.length(); i++)
+ addC((int)entity.charAt(i), linelen++);
+
+ return;
+ }
+
+ if (c > 126 && c < 160)
+ {
+ entity = "&#" + c + ";";
+
+ for (int i = 0; i < entity.length(); i++)
+ addC((int)entity.charAt(i), linelen++);
+
+ return;
+ }
+
+ addC(c, linelen++);
+ return;
+ }
+
+ /* don't map utf8 chars to entities */
+ if (this.configuration.CharEncoding == Configuration.UTF8)
+ {
+ addC(c, linelen++);
+ return;
+ }
+
+ /* use numeric entities only for XML */
+ if (this.configuration.XmlTags)
+ {
+ /* if ASCII use numeric entities for chars > 127 */
+ if (c > 127 && this.configuration.CharEncoding == Configuration.ASCII)
+ {
+ entity = "&#" + c + ";";
+
+ for (int i = 0; i < entity.length(); i++)
+ addC((int)entity.charAt(i), linelen++);
+
+ return;
+ }
+
+ /* otherwise output char raw */
+ addC(c, linelen++);
+ return;
+ }
+
+ /* default treatment for ASCII */
+ if (c > 126 || (c < ' ' && c != '\t'))
+ {
+ if (!this.configuration.NumEntities)
+ {
+ entity = EntityTable.getDefaultEntityTable().entityName((short)c);
+ if (entity != null)
+ entity = "&" + entity + ";";
+ else
+ entity = "&#" + c + ";";
+ }
+ else
+ entity = "&#" + c + ";";
+
+ for (int i = 0; i < entity.length(); i++)
+ addC((int)entity.charAt(i), linelen++);
+
+ return;
+ }
+
+ addC(c, linelen++);
+ }
+
+ /*
+ The line buffer is uint not char so we can
+ hold Unicode values unencoded. The translation
+ to UTF-8 is deferred to the outc routine called
+ to flush the line buffer.
+ */
+ private void printText(Out fout, short mode, int indent,
+ byte[] textarray, int start, int end)
+ {
+ int i, c;
+ MutableInteger ci = new MutableInteger();
+
+ for (i = start; i < end; ++i)
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ wrapLine(fout, indent);
+
+ c = ((int)textarray[i]) & 0xFF; // Convert to unsigned.
+
+ /* look for UTF-8 multibyte character */
+ if (c > 0x7F)
+ {
+ i += getUTF8(textarray, i, ci);
+ c = ci.value;
+ }
+
+ if (c == '\n')
+ {
+ flushLine(fout, indent);
+ continue;
+ }
+
+ printChar(c, mode);
+ }
+ }
+
+ private void printString(Out fout, int indent, String str)
+ {
+ for (int i = 0; i < str.length(); i++ )
+ addC((int)str.charAt(i), linelen++);
+ }
+
+ private void printAttrValue(Out fout, int indent, String value, int delim, boolean wrappable)
+ {
+ int c;
+ MutableInteger ci = new MutableInteger();
+ boolean wasinstring = false;
+ byte[] valueChars = null;
+ int i;
+ short mode = (wrappable ? (short)(NORMAL | ATTRIBVALUE) :
+ (short)(PREFORMATTED | ATTRIBVALUE));
+
+ if (value != null)
+ {
+ valueChars = Lexer.getBytes(value);
+ }
+
+ /* look for ASP, Tango or PHP instructions for computed attribute value */
+ if (valueChars != null && valueChars.length >= 5 && valueChars[0] == '<')
+ {
+ if (valueChars[1] == '%' || valueChars[1] == '@'||
+ (new String(valueChars, 0, 5)).equals("<?php"))
+ mode |= CDATA;
+ }
+
+ if (delim == 0)
+ delim = '"';
+
+ addC('=', linelen++);
+
+ /* don't wrap after "=" for xml documents */
+ if (!this.configuration.XmlOut) {
+
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+
+ if (indent + linelen >= this.configuration.wraplen)
+ wrapLine(fout, indent);
+
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+ else
+ condFlushLine(fout, indent);
+ }
+
+ addC(delim, linelen++);
+
+ if (value != null)
+ {
+ InString = false;
+
+ i = 0;
+ while (i < valueChars.length)
+ {
+ c = ((int)valueChars[i]) & 0xFF; // Convert to unsigned.
+
+ if (wrappable && c == ' ' && indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ wasinstring = InString;
+ }
+
+ if (wrappable && wraphere > 0 && indent + linelen >= this.configuration.wraplen)
+ wrapAttrVal(fout, indent, wasinstring);
+
+ if (c == delim)
+ {
+ String entity;
+
+ entity = (c == '"' ? """ : "'");
+
+ for (int j = 0; j < entity.length(); j++ )
+ addC(entity.charAt(j), linelen++);
+
+ ++i;
+ continue;
+ }
+ else if (c == '"')
+ {
+ if (this.configuration.QuoteMarks)
+ {
+ addC('&', linelen++);
+ addC('q', linelen++);
+ addC('u', linelen++);
+ addC('o', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ }
+ else
+ addC('"', linelen++);
+
+ if (delim == '\'')
+ InString = !InString;
+
+ ++i;
+ continue;
+ }
+ else if (c == '\'')
+ {
+ if (this.configuration.QuoteMarks)
+ {
+ addC('&', linelen++);
+ addC('#', linelen++);
+ addC('3', linelen++);
+ addC('9', linelen++);
+ addC(';', linelen++);
+ }
+ else
+ addC('\'', linelen++);
+
+ if (delim == '"')
+ InString = !InString;
+
+ ++i;
+ continue;
+ }
+
+ /* look for UTF-8 multibyte character */
+ if (c > 0x7F)
+ {
+ i += getUTF8(valueChars, i, ci);
+ c = ci.value;
+ }
+
+ ++i;
+
+ if (c == '\n')
+ {
+ flushLine(fout, indent);
+ continue;
+ }
+
+ printChar(c, mode);
+ }
+ }
+
+ InString = false;
+ addC(delim, linelen++);
+ }
+
+ private void printAttribute(Out fout, int indent, Node node, AttVal attr)
+ {
+ String name;
+ boolean wrappable = false;
+
+ if (this.configuration.IndentAttributes)
+ {
+ flushLine(fout, indent);
+ indent += this.configuration.spaces;
+ }
+
+ name = attr.attribute;
+
+ if (indent + linelen >= this.configuration.wraplen)
+ wrapLine(fout, indent);
+
+ if (!this.configuration.XmlTags && !this.configuration.XmlOut && attr.dict != null)
+ {
+ if (AttributeTable.getDefaultAttributeTable().isScript(name))
+ wrappable = this.configuration.WrapScriptlets;
+ else if (!attr.dict.nowrap && this.configuration.WrapAttVals)
+ wrappable = true;
+ }
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ addC(' ', linelen++);
+ }
+ else
+ {
+ condFlushLine(fout, indent);
+ addC(' ', linelen++);
+ }
+
+ for (int i = 0; i < name.length(); i++ )
+ addC((int)Lexer.foldCase(name.charAt(i),
+ this.configuration.UpperCaseAttrs,
+ this.configuration.XmlTags),
+ linelen++);
+
+ if (indent + linelen >= this.configuration.wraplen)
+ wrapLine(fout, indent);
+
+ if (attr.value == null)
+ {
+ if (this.configuration.XmlTags || this.configuration.XmlOut)
+ printAttrValue(fout, indent, attr.attribute, attr.delim, true);
+ else if (!attr.isBoolAttribute() && !Node.isNewNode(node))
+ printAttrValue(fout, indent, "", attr.delim, true);
+ else if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+
+ }
+ else
+ printAttrValue(fout, indent, attr.value, attr.delim, wrappable);
+ }
+
+ private void printAttrs(Out fout, int indent,
+ Node node, AttVal attr)
+ {
+ if (attr != null)
+ {
+ if (attr.next != null)
+ printAttrs(fout, indent, node, attr.next);
+
+ if (attr.attribute != null)
+ printAttribute(fout, indent, node, attr);
+ else if (attr.asp != null)
+ {
+ addC(' ', linelen++);
+ printAsp(fout, indent, attr.asp);
+ }
+ else if (attr.php != null)
+ {
+ addC(' ', linelen++);
+ printPhp(fout, indent, attr.php);
+ }
+ }
+
+ /* add xml:space attribute to pre and other elements */
+ if (configuration.XmlOut &&
+ configuration.XmlSpace &&
+ ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt) &&
+ node.getAttrByName("xml:space") == null)
+ printString(fout, indent, " xml:space=\"preserve\"");
+ }
+
+ /*
+ Line can be wrapped immediately after inline start tag provided
+ if follows a text node ending in a space, or it parent is an
+ inline element that that rule applies to. This behaviour was
+ reverse engineered from Netscape 3.0
+ */
+ private static boolean afterSpace(Node node)
+ {
+ Node prev;
+ int c;
+
+ if (node == null || node.tag == null || !((node.tag.model & Dict.CM_INLINE) != 0))
+ return true;
+
+ prev = node.prev;
+
+ if (prev != null)
+ {
+ if (prev.type == Node.TextNode && prev.end > prev.start)
+ {
+ c = ((int)prev.textarray[prev.end - 1]) & 0xFF; // Convert to unsigned.
+
+ if (c == 160 || c == ' ' || c == '\n')
+ return true;
+ }
+
+ return false;
+ }
+
+ return afterSpace(node.parent);
+ }
+
+ private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node)
+ {
+ char c;
+ String p;
+ TagTable tt = this.configuration.tt;
+
+ addC('<', linelen++);
+
+ if (node.type == Node.EndTag)
+ addC('/', linelen++);
+
+ p = node.element;
+ for (int i = 0; i < p.length(); i++ )
+ addC((int)Lexer.foldCase(p.charAt(i),
+ this.configuration.UpperCaseTags,
+ this.configuration.XmlTags),
+ linelen++);
+
+ printAttrs(fout, indent, node, node.attributes);
+
+ if ((this.configuration.XmlOut || lexer != null && lexer.isvoyager) &&
+ (node.type == Node.StartEndTag || (node.tag.model & Dict.CM_EMPTY) != 0))
+ {
+ addC(' ', linelen++); /* compatibility hack */
+ addC('/', linelen++);
+ }
+
+ addC('>', linelen++);;
+
+ if (node.type != Node.StartEndTag && !((mode & PREFORMATTED) != 0))
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ wrapLine(fout, indent);
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ /*
+ wrap after start tag if is <br/> or if it's not
+ inline or it is an empty tag followed by </a>
+ */
+ if (afterSpace(node))
+ {
+ if (!((mode & NOWRAP) != 0) &&
+ (!((node.tag.model & Dict.CM_INLINE) != 0) ||
+ (node.tag == tt.tagBr) ||
+ (((node.tag.model & Dict.CM_EMPTY) != 0) &&
+ node.next == null &&
+ node.parent.tag == tt.tagA)))
+ {
+ wraphere = linelen;
+ }
+ }
+ }
+ else
+ condFlushLine(fout, indent);
+ }
+ }
+
+ private void printEndTag(Out fout, short mode, int indent, Node node)
+ {
+ char c;
+ String p;
+
+ /*
+ Netscape ignores SGML standard by not ignoring a
+ line break before </A> or </U> etc. To avoid rendering
+ this as an underlined space, I disable line wrapping
+ before inline end tags by the #if 0 ... #endif
+ */
+if (false) {
+ if (indent + linelen < this.configuration.wraplen && !((mode & NOWRAP) != 0))
+ wraphere = linelen;
+}
+
+ addC('<', linelen++);
+ addC('/', linelen++);
+
+ p = node.element;
+ for (int i = 0; i < p.length(); i++ )
+ addC((int)Lexer.foldCase(p.charAt(i),
+ this.configuration.UpperCaseTags,
+ this.configuration.XmlTags),
+ linelen++);
+
+ addC('>', linelen++);
+ }
+
+ private void printComment(Out fout, int indent, Node node)
+ {
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('-', linelen++);
+ addC('-', linelen++);
+if (false) {
+ if (linelen < this.configuration.wraplen)
+ wraphere = linelen;
+}
+ printText(fout, COMMENT, indent,
+ node.textarray, node.start, node.end);
+if (false) {
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+}
+ // See Lexer.java: AQ 8Jul2000
+ addC('-', linelen++);
+ addC('-', linelen++);
+ addC('>', linelen++);
+
+ if (node.linebreak)
+ flushLine(fout, indent);
+ }
+
+ private void printDocType(Out fout, int indent, Node node)
+ {
+ boolean q = this.configuration.QuoteMarks;
+
+ this.configuration.QuoteMarks = false;
+
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+
+ condFlushLine(fout, indent);
+
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('D', linelen++);
+ addC('O', linelen++);
+ addC('C', linelen++);
+ addC('T', linelen++);
+ addC('Y', linelen++);
+ addC('P', linelen++);
+ addC('E', linelen++);
+ addC(' ', linelen++);
+
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+
+ printText(fout, (short)0, indent,
+ node.textarray, node.start, node.end);
+
+ if (linelen < this.configuration.wraplen)
+ wraphere = linelen;
+
+ addC('>', linelen++);
+ this.configuration.QuoteMarks = q;
+ condFlushLine(fout, indent);
+ }
+
+ private void printPI(Out fout, int indent, Node node)
+ {
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+
+ addC('<', linelen++);
+ addC('?', linelen++);
+
+ /* set CDATA to pass < and > unescaped */
+ printText(fout, CDATA, indent,
+ node.textarray, node.start, node.end);
+
+ if (node.textarray[node.end - 1] != (byte)'?')
+ addC('?', linelen++);
+
+ addC('>', linelen++);
+ condFlushLine(fout, indent);
+ }
+
+ /* note ASP and JSTE share <% ... %> syntax */
+ private void printAsp(Out fout, int indent, Node node)
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ /* disable wrapping if so requested */
+
+ if (!this.configuration.WrapAsp || !this.configuration.WrapJste)
+ this.configuration.wraplen = 0xFFFFFF; /* a very large number */
+if (false) { //#if 0
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+} //#endif
+
+ addC('<', linelen++);
+ addC('%', linelen++);
+
+ printText(fout, (this.configuration.WrapAsp ? CDATA : COMMENT), indent,
+ node.textarray, node.start, node.end);
+
+ addC('%', linelen++);
+ addC('>', linelen++);
+ /* condFlushLine(fout, indent); */
+ this.configuration.wraplen = savewraplen;
+ }
+
+ /* JSTE also supports <# ... #> syntax */
+ private void printJste(Out fout, int indent, Node node)
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ /* disable wrapping if so requested */
+
+ if (!this.configuration.WrapJste)
+ this.configuration.wraplen = 0xFFFFFF; /* a very large number */
+
+ addC('<', linelen++);
+ addC('#', linelen++);
+
+ printText(fout, (this.configuration.WrapJste ? CDATA : COMMENT), indent,
+ node.textarray, node.start, node.end);
+
+ addC('#', linelen++);
+ addC('>', linelen++);
+ /* condFlushLine(fout, indent); */
+ this.configuration.wraplen = savewraplen;
+ }
+
+ /* PHP is based on XML processing instructions */
+ private void printPhp(Out fout, int indent, Node node)
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ /* disable wrapping if so requested */
+
+ if (!this.configuration.WrapPhp)
+ this.configuration.wraplen = 0xFFFFFF; /* a very large number */
+
+if (false) { //#if 0
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+} //#endif
+ addC('<', linelen++);
+ addC('?', linelen++);
+
+ printText(fout, (this.configuration.WrapPhp ? CDATA : COMMENT), indent,
+ node.textarray, node.start, node.end);
+
+ addC('?', linelen++);
+ addC('>', linelen++);
+ /* PCondFlushLine(fout, indent); */
+ this.configuration.wraplen = savewraplen;
+ }
+
+ private void printCDATA(Out fout, int indent, Node node)
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ condFlushLine(fout, indent);
+
+ /* disable wrapping */
+
+ this.configuration.wraplen = 0xFFFFFF; /* a very large number */
+
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('[', linelen++);
+ addC('C', linelen++);
+ addC('D', linelen++);
+ addC('A', linelen++);
+ addC('T', linelen++);
+ addC('A', linelen++);
+ addC('[', linelen++);
+
+ printText(fout, COMMENT, indent,
+ node.textarray, node.start, node.end);
+
+ addC(']', linelen++);
+ addC(']', linelen++);
+ addC('>', linelen++);
+ condFlushLine(fout, indent);
+ this.configuration.wraplen = savewraplen;
+ }
+
+ private void printSection(Out fout, int indent, Node node)
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ /* disable wrapping if so requested */
+
+ if (!this.configuration.WrapSection)
+ this.configuration.wraplen = 0xFFFFFF; /* a very large number */
+
+if (false) { //#if 0
+ if (indent + linelen < this.configuration.wraplen)
+ wraphere = linelen;
+} //#endif
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('[', linelen++);
+
+ printText(fout, (this.configuration.WrapSection ? CDATA : COMMENT), indent,
+ node.textarray, node.start, node.end);
+
+ addC(']', linelen++);
+ addC('>', linelen++);
+ /* PCondFlushLine(fout, indent); */
+ this.configuration.wraplen = savewraplen;
+ }
+
+ private boolean shouldIndent(Node node)
+ {
+ TagTable tt = this.configuration.tt;
+
+ if (!this.configuration.IndentContent)
+ return false;
+
+ if (this.configuration.SmartIndent)
+ {
+ if (node.content != null && ((node.tag.model & Dict.CM_NO_INDENT) != 0))
+ {
+ for (node = node.content; node != null; node = node.next)
+ if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0)
+ return true;
+
+ return false;
+ }
+
+ if ((node.tag.model & Dict.CM_HEADING) != 0)
+ return false;
+
+ if (node.tag == tt.tagP)
+ return false;
+
+ if (node.tag == tt.tagTitle)
+ return false;
+ }
+
+ if ((node.tag.model & (Dict.CM_FIELD | Dict.CM_OBJECT)) != 0)
+ return true;
+
+ if (node.tag == tt.tagMap)
+ return true;
+
+ return !((node.tag.model & Dict.CM_INLINE) != 0);
+ }
+
+ public void printTree(Out fout, short mode, int indent,
+ Lexer lexer, Node node)
+ {
+ Node content, last;
+ TagTable tt = this.configuration.tt;
+
+ if (node == null)
+ return;
+
+ if (node.type == Node.TextNode)
+ printText(fout, mode, indent,
+ node.textarray, node.start, node.end);
+ else if (node.type == Node.CommentTag)
+ {
+ printComment(fout, indent, node);
+ }
+ else if (node.type == Node.RootNode)
+ {
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printTree(fout, mode, indent, lexer, content);
+ }
+ else if (node.type == Node.DocTypeTag)
+ printDocType(fout, indent, node);
+ else if (node.type == Node.ProcInsTag)
+ printPI(fout, indent, node);
+ else if (node.type == Node.CDATATag)
+ printCDATA(fout, indent, node);
+ else if (node.type == Node.SectionTag)
+ printSection(fout, indent, node);
+ else if (node.type == Node.AspTag)
+ printAsp(fout, indent, node);
+ else if (node.type == Node.JsteTag)
+ printJste(fout, indent, node);
+ else if (node.type == Node.PhpTag)
+ printPhp(fout, indent, node);
+ else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag)
+ {
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ condFlushLine(fout, indent);
+
+ if (node.tag == tt.tagBr && node.prev != null &&
+ node.prev.tag != tt.tagBr && this.configuration.BreakBeforeBR)
+ flushLine(fout, indent);
+
+ if (this.configuration.MakeClean && node.tag == tt.tagWbr)
+ printString(fout, indent, " ");
+ else
+ printTag(lexer, fout, mode, indent, node);
+
+ if (node.tag == tt.tagParam || node.tag == tt.tagArea)
+ condFlushLine(fout, indent);
+ else if (node.tag == tt.tagBr || node.tag == tt.tagHr)
+ flushLine(fout, indent);
+ }
+ else /* some kind of container element */
+ {
+ if (node.tag != null && node.tag.parser == ParserImpl.getParsePre())
+ {
+ condFlushLine(fout, indent);
+
+ indent = 0;
+ condFlushLine(fout, indent);
+ printTag(lexer, fout, mode, indent, node);
+ flushLine(fout, indent);
+
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printTree(fout, (short)(mode | PREFORMATTED | NOWRAP), indent, lexer, content);
+
+ condFlushLine(fout, indent);
+ printEndTag(fout, mode, indent, node);
+ flushLine(fout, indent);
+
+ if (this.configuration.IndentContent == false && node.next != null)
+ flushLine(fout, indent);
+ }
+ else if (node.tag == tt.tagStyle || node.tag == tt.tagScript)
+ {
+ condFlushLine(fout, indent);
+
+ indent = 0;
+ condFlushLine(fout, indent);
+ printTag(lexer, fout, mode, indent, node);
+ flushLine(fout, indent);
+
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printTree(fout, (short)(mode | PREFORMATTED | NOWRAP |CDATA), indent, lexer, content);
+
+ condFlushLine(fout, indent);
+ printEndTag(fout, mode, indent, node);
+ flushLine(fout, indent);
+
+ if (this.configuration.IndentContent == false && node.next != null)
+ flushLine(fout, indent);
+ }
+ else if ((node.tag.model & Dict.CM_INLINE) != 0)
+ {
+ if (this.configuration.MakeClean)
+ {
+ /* discards <font> and </font> tags */
+ if (node.tag == tt.tagFont)
+ {
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printTree(fout, mode, indent, lexer, content);
+ return;
+ }
+
+ /* replace <nobr>...</nobr> by or   etc. */
+ if (node.tag == tt.tagNobr)
+ {
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printTree(fout, (short)(mode|NOWRAP), indent, lexer, content);
+ return;
+ }
+ }
+
+ /* otherwise a normal inline element */
+
+ printTag(lexer, fout, mode, indent, node);
+
+ /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */
+
+ if (shouldIndent(node))
+ {
+ condFlushLine(fout, indent);
+ indent += this.configuration.spaces;
+
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printTree(fout, mode, indent, lexer, content);
+
+ condFlushLine(fout, indent);
+ indent -= this.configuration.spaces;
+ condFlushLine(fout, indent);
+ }
+ else
+ {
+
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printTree(fout, mode, indent, lexer, content);
+ }
+
+ printEndTag(fout, mode, indent, node);
+ }
+ else /* other tags */
+ {
+ condFlushLine(fout, indent);
+
+ if (this.configuration.SmartIndent && node.prev != null)
+ flushLine(fout, indent);
+
+ if (this.configuration.HideEndTags == false ||
+ !(node.tag != null && ((node.tag.model & Dict.CM_OMITST) != 0)))
+ {
+ printTag(lexer, fout, mode, indent, node);
+
+ if (shouldIndent(node))
+ condFlushLine(fout, indent);
+ else if ((node.tag.model & Dict.CM_HTML) != 0 ||
+ node.tag == tt.tagNoframes ||
+ ((node.tag.model & Dict.CM_HEAD) != 0 &&
+ !(node.tag == tt.tagTitle)))
+ flushLine(fout, indent);
+ }
+
+ if (node.tag == tt.tagBody && this.configuration.BurstSlides)
+ printSlide(fout, mode, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer);
+ else
+ {
+ last = null;
+
+ for (content = node.content;
+ content != null; content = content.next)
+ {
+ /* kludge for naked text before block level tag */
+ if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode &&
+ content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0)
+ {
+ flushLine(fout, indent);
+ flushLine(fout, indent);
+ }
+
+ printTree(fout, mode,
+ (shouldIndent(node) ? indent+this.configuration.spaces : indent), lexer, content);
+
+ last = content;
+ }
+ }
+
+ /* don't flush line for td and th */
+ if (shouldIndent(node) ||
+ (((node.tag.model & Dict.CM_HTML) != 0 || node.tag == tt.tagNoframes ||
+ ((node.tag.model & Dict.CM_HEAD) != 0 && !(node.tag == tt.tagTitle)))
+ && this.configuration.HideEndTags == false))
+ {
+ condFlushLine(fout, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent));
+
+ if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0))
+ {
+ printEndTag(fout, mode, indent, node);
+ flushLine(fout, indent);
+ }
+ }
+ else
+ {
+ if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0))
+ printEndTag(fout, mode, indent, node);
+
+ flushLine(fout, indent);
+ }
+
+ if (this.configuration.IndentContent == false &&
+ node.next != null &&
+ this.configuration.HideEndTags == false &&
+ (node.tag.model & (Dict.CM_BLOCK|Dict.CM_LIST|Dict.CM_DEFLIST|Dict.CM_TABLE)) != 0)
+ {
+ flushLine(fout, indent);
+ }
+ }
+ }
+ }
+
+ public void printXMLTree(Out fout, short mode, int indent,
+ Lexer lexer, Node node)
+ {
+ TagTable tt = this.configuration.tt;
+
+ if (node == null)
+ return;
+
+ if (node.type == Node.TextNode)
+ {
+ printText(fout, mode, indent,
+ node.textarray, node.start, node.end);
+ }
+ else if (node.type == Node.CommentTag)
+ {
+ condFlushLine(fout, indent);
+ printComment(fout, 0, node);
+ condFlushLine(fout, 0);
+ }
+ else if (node.type == Node.RootNode)
+ {
+ Node content;
+
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printXMLTree(fout, mode, indent, lexer, content);
+ }
+ else if (node.type == Node.DocTypeTag)
+ printDocType(fout, indent, node);
+ else if (node.type == Node.ProcInsTag)
+ printPI(fout, indent, node);
+ else if (node.type == Node.SectionTag)
+ printSection(fout, indent, node);
+ else if (node.type == Node.AspTag)
+ printAsp(fout, indent, node);
+ else if (node.type == Node.JsteTag)
+ printJste(fout, indent, node);
+ else if (node.type == Node.PhpTag)
+ printPhp(fout, indent, node);
+ else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag)
+ {
+ condFlushLine(fout, indent);
+ printTag(lexer, fout, mode, indent, node);
+ flushLine(fout, indent);
+
+ if (node.next != null)
+ flushLine(fout, indent);
+ }
+ else /* some kind of container element */
+ {
+ Node content;
+ boolean mixed = false;
+ int cindent;
+
+ for (content = node.content; content != null; content = content.next)
+ {
+ if (content.type == Node.TextNode)
+ {
+ mixed = true;
+ break;
+ }
+ }
+
+ condFlushLine(fout, indent);
+
+ if (ParserImpl.XMLPreserveWhiteSpace(node, tt))
+ {
+ indent = 0;
+ cindent = 0;
+ mixed = false;
+ }
+ else if (mixed)
+ cindent = indent;
+ else
+ cindent = indent + this.configuration.spaces;
+
+ printTag(lexer, fout, mode, indent, node);
+
+ if (!mixed)
+ flushLine(fout, indent);
+
+ for (content = node.content;
+ content != null;
+ content = content.next)
+ printXMLTree(fout, mode, cindent, lexer, content);
+
+ if (!mixed)
+ condFlushLine(fout, cindent);
+ printEndTag(fout, mode, indent, node);
+ condFlushLine(fout, indent);
+
+ if (node.next != null)
+ flushLine(fout, indent);
+ }
+ }
+
+
+ /* split parse tree by h2 elements and output to separate files */
+
+ /* counts number of h2 children belonging to node */
+ public int countSlides(Node node)
+ {
+ int n = 1;
+ TagTable tt = this.configuration.tt;
+
+ for (node = node.content; node != null; node = node.next)
+ if (node.tag == tt.tagH2)
+ ++n;
+
+ return n;
+ }
+
+ /*
+ inserts a space gif called "dot.gif" to ensure
+ that the slide is at least n pixels high
+ */
+ private void printVertSpacer(Out fout, int indent)
+ {
+ condFlushLine(fout, indent);
+ printString(fout, indent ,
+ "<img width=\"0\" height=\"0\" hspace=\"1\" src=\"dot.gif\" vspace=\"%d\" align=\"left\">");
+ condFlushLine(fout, indent);
+ }
+
+ private void printNavBar(Out fout, int indent)
+ {
+ String buf;
+
+ condFlushLine(fout, indent);
+ printString(fout, indent , "<center><small>");
+
+ if (slide > 1)
+ {
+ buf = "<a href=\"slide" +
+ (new Integer(slide - 1)).toString() +
+ ".html\">previous</a> | ";
+ printString(fout, indent , buf);
+ condFlushLine(fout, indent);
+
+ if (slide < count)
+ printString(fout, indent , "<a href=\"slide1.html\">start</a> | ");
+ else
+ printString(fout, indent , "<a href=\"slide1.html\">start</a>");
+
+ condFlushLine(fout, indent);
+ }
+
+ if (slide < count)
+ {
+ buf = "<a href=\"slide" +
+ (new Integer(slide + 1)).toString() +
+ ".html\">next</a>";
+ printString(fout, indent , buf);
+ }
+
+ printString(fout, indent , "</small></center>");
+ condFlushLine(fout, indent);
+ }
+
+ /*
+ Called from printTree to print the content of a slide from
+ the node slidecontent. On return slidecontent points to the
+ node starting the next slide or null. The variables slide
+ and count are used to customise the navigation bar.
+ */
+ public void printSlide(Out fout, short mode, int indent, Lexer lexer)
+ {
+ Node content, last;
+ TagTable tt = this.configuration.tt;
+
+ /* insert div for onclick handler */
+ String s;
+ s = "<div onclick=\"document.location='slide" +
+ (new Integer(slide < count ? slide + 1 : 1)).toString() +
+ ".html'\">";
+ printString(fout, indent, s);
+ condFlushLine(fout, indent);
+
+ /* first print the h2 element and navbar */
+ if (slidecontent.tag == tt.tagH2)
+ {
+ printNavBar(fout, indent);
+
+ /* now print an hr after h2 */
+
+ addC('<', linelen++);
+
+
+ addC((int)Lexer.foldCase('h',
+ this.configuration.UpperCaseTags,
+ this.configuration.XmlTags),
+ linelen++);
+ addC((int)Lexer.foldCase('r',
+ this.configuration.UpperCaseTags,
+ this.configuration.XmlTags),
+ linelen++);
+
+ if (this.configuration.XmlOut == true)
+ printString(fout, indent , " />");
+ else
+ addC('>', linelen++);
+
+
+ if (this.configuration.IndentContent == true)
+ condFlushLine(fout, indent);
+
+ /* PrintVertSpacer(fout, indent); */
+
+ /*condFlushLine(fout, indent); */
+
+ /* print the h2 element */
+ printTree(fout, mode,
+ (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, slidecontent);
+
+ slidecontent = slidecontent.next;
+ }
+
+ /* now continue until we reach the next h2 */
+
+ last = null;
+ content = slidecontent;
+
+ for (; content != null; content = content.next)
+ {
+ if (content.tag == tt.tagH2)
+ break;
+
+ /* kludge for naked text before block level tag */
+ if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode &&
+ content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0)
+ {
+ flushLine(fout, indent);
+ flushLine(fout, indent);
+ }
+
+ printTree(fout, mode,
+ (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, content);
+
+ last = content;
+ }
+
+ slidecontent = content;
+
+ /* now print epilog */
+
+ condFlushLine(fout, indent);
+
+ printString(fout, indent , "<br clear=\"all\">");
+ condFlushLine(fout, indent);
+
+ addC('<', linelen++);
+
+
+ addC((int)Lexer.foldCase('h',
+ this.configuration.UpperCaseTags,
+ this.configuration.XmlTags),
+ linelen++);
+ addC((int)Lexer.foldCase('r',
+ this.configuration.UpperCaseTags,
+ this.configuration.XmlTags),
+ linelen++);
+
+ if (this.configuration.XmlOut == true)
+ printString(fout, indent , " />");
+ else
+ addC('>', linelen++);
+
+
+ if (this.configuration.IndentContent == true)
+ condFlushLine(fout, indent);
+
+ printNavBar(fout, indent);
+
+ /* end tag for div */
+ printString(fout, indent, "</div>");
+ condFlushLine(fout, indent);
+ }
+
+
+ /*
+ Add meta element for page transition effect, this works on IE but not NS
+ */
+
+ public void addTransitionEffect(Lexer lexer, Node root, short effect, double duration)
+ {
+ Node head = root.findHEAD(lexer.configuration.tt);
+ String transition;
+
+ if (0 <= effect && effect <= 23)
+ transition = "revealTrans(Duration=" +
+ (new Double(duration)).toString() +
+ ",Transition=" + effect + ")";
+ else
+ transition = "blendTrans(Duration=" +
+ (new Double(duration)).toString() + ")";
+
+ if (head != null)
+ {
+ Node meta = lexer.inferredTag("meta");
+ meta.addAttribute("http-equiv", "Page-Enter");
+ meta.addAttribute("content", transition);
+ Node.insertNodeAtStart(head, meta);
+ }
+ }
+
+ public void createSlides(Lexer lexer, Node root)
+ {
+ Node body;
+ String buf;
+ Out out = new OutImpl();
+
+ body = root.findBody(lexer.configuration.tt);
+ count = countSlides(body);
+ slidecontent = body.content;
+ addTransitionEffect(lexer, root, EFFECT_BLEND, 3.0);
+
+ for (slide = 1; slide <= count; ++slide)
+ {
+ buf = "slide" + slide + ".html";
+ out.state = StreamIn.FSM_ASCII;
+ out.encoding = this.configuration.CharEncoding;
+
+ try
+ {
+ out.out = new FileOutputStream(buf);
+ printTree(out, (short)0, 0, lexer, root);
+ flushLine(out, 0);
+ out.out.close();
+ }
+ catch (IOException e)
+ {
+ System.err.println(buf + e.toString() );
+ }
+ }
+
+ /*
+ delete superfluous slides by deleting slideN.html
+ for N = count+1, count+2, etc. until no such file
+ is found.
+ */
+
+ for (;;)
+ {
+ buf = "slide" + slide + "html";
+
+ if (!(new File(buf)).delete())
+ break;
+
+ ++slide;
+ }
+ }
+
+}
--- /dev/null
+/*
+ * @(#)Parser.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * HTML Parser
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public interface Parser {
+
+ public void parse( Lexer lexer, Node node, short mode );
+
+}
+
--- /dev/null
+/*
+ * @(#)ParserImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * HTML Parser implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class ParserImpl {
+
+ //private static int SeenBodyEndTag; /* AQ: moved into lexer structure */
+
+ private static void parseTag(Lexer lexer, Node node, short mode)
+ {
+ // Local fix by GLP 2000-12-21. Need to reset insertspace if this
+ // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
+ // Remove this code once the fix is made in Tidy.
+
+/****** (Original code follows)
+ if ((node.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ lexer.waswhite = false;
+ return;
+ }
+ else if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ lexer.insertspace = false;
+*******/
+
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ lexer.insertspace = false;
+
+ if ((node.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ lexer.waswhite = false;
+ return;
+ }
+
+ if (node.tag.parser == null || node.type == Node.StartEndTag)
+ return;
+
+ node.tag.parser.parse(lexer, node, mode);
+ }
+
+ private static void moveToHead(Lexer lexer, Node element, Node node)
+ {
+ Node head;
+ TagTable tt = lexer.configuration.tt;
+
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+
+ while (element.tag != tt.tagHtml)
+ element = element.parent;
+
+ for (head = element.content; head != null; head = head.next)
+ {
+ if (head.tag == tt.tagHead)
+ {
+ Node.insertNodeAtEnd(head, node);
+ break;
+ }
+ }
+
+ if (node.tag.parser != null)
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+ else
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ }
+ }
+
+ public static class ParseHTML implements Parser {
+
+ public void parse( Lexer lexer, Node html, short mode )
+ {
+ Node node, head;
+ Node frameset = null;
+ Node noframes = null;
+
+ lexer.configuration.XmlTags = false;
+ lexer.seenBodyEndTag = 0;
+ TagTable tt = lexer.configuration.tt;
+
+ for (;;)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+
+ if (node == null)
+ {
+ node = lexer.inferredTag("head");
+ break;
+ }
+
+ if (node.tag == tt.tagHead)
+ break;
+
+ if (node.tag == html.tag && node.type == Node.EndTag)
+ {
+ Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(html, node))
+ continue;
+
+ lexer.ungetToken();
+ node = lexer.inferredTag("head");
+ break;
+ }
+
+ head = node;
+ Node.insertNodeAtEnd(html, head);
+ getParseHead().parse(lexer, head, mode);
+
+ for (;;)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+
+ if (node == null)
+ {
+ if (frameset == null) /* create an empty body */
+ node = lexer.inferredTag("body");
+
+ return;
+ }
+
+ /* robustly handle html tags */
+ if (node.tag == html.tag)
+ {
+ if (node.type != Node.StartTag && frameset == null)
+ Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(html, node))
+ continue;
+
+ /* if frameset document coerce <body> to <noframes> */
+ if (node.tag == tt.tagBody)
+ {
+ if (node.type != Node.StartTag)
+ {
+ Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (frameset != null)
+ {
+ lexer.ungetToken();
+
+ if (noframes == null)
+ {
+ noframes = lexer.inferredTag("noframes");
+ Node.insertNodeAtEnd(frameset, noframes);
+ Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
+ }
+
+ parseTag(lexer, noframes, mode);
+ continue;
+ }
+
+ break; /* to parse body */
+ }
+
+ /* flag an error if we see more than one frameset */
+ if (node.tag == tt.tagFrameset)
+ {
+ if (node.type != Node.StartTag)
+ {
+ Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (frameset != null)
+ Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
+ else
+ frameset = node;
+
+ Node.insertNodeAtEnd(html, node);
+ parseTag(lexer, node, mode);
+
+ /*
+ see if it includes a noframes element so
+ that we can merge subsequent noframes elements
+ */
+
+ for (node = frameset.content; node != null; node = node.next)
+ {
+ if (node.tag == tt.tagNoframes)
+ noframes = node;
+ }
+ continue;
+ }
+
+ /* if not a frameset document coerce <noframes> to <body> */
+ if (node.tag == tt.tagNoframes)
+ {
+ if (node.type != Node.StartTag)
+ {
+ Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (frameset == null)
+ {
+ Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+ node = lexer.inferredTag("body");
+ break;
+ }
+
+ if (noframes == null)
+ {
+ noframes = node;
+ Node.insertNodeAtEnd(frameset, noframes);
+ }
+
+ parseTag(lexer, noframes, mode);
+ continue;
+ }
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, html, node);
+ continue;
+ }
+ }
+
+ lexer.ungetToken();
+
+ /* insert other content into noframes element */
+
+ if (frameset != null)
+ {
+ if (noframes == null)
+ {
+ noframes = lexer.inferredTag("noframes");
+ Node.insertNodeAtEnd(frameset, noframes);
+ }
+ else
+ Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
+
+ parseTag(lexer, noframes, mode);
+ continue;
+ }
+
+ node = lexer.inferredTag("body");
+ break;
+ }
+
+ /* node must be body */
+
+ Node.insertNodeAtEnd(html, node);
+ parseTag(lexer, node, mode);
+ }
+
+ };
+
+ public static class ParseHead implements Parser {
+
+ public void parse( Lexer lexer, Node head, short mode )
+ {
+ Node node;
+ int HasTitle = 0;
+ int HasBase = 0;
+ TagTable tt = lexer.configuration.tt;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == head.tag && node.type == Node.EndTag)
+ {
+ head.closed = true;
+ break;
+ }
+
+ if (node.type == Node.TextNode)
+ {
+ lexer.ungetToken();
+ break;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(head, node))
+ continue;
+
+ if (node.type == Node.DocTypeTag)
+ {
+ Node.insertDocType(lexer, head, node);
+ continue;
+ }
+
+ /* discard unknown tags */
+ if (node.tag == null)
+ {
+ Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!((node.tag.model & Dict.CM_HEAD) != 0))
+ {
+ lexer.ungetToken();
+ break;
+ }
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if (node.tag == tt.tagTitle)
+ {
+ ++HasTitle;
+
+ if (HasTitle > 1)
+ Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
+ }
+ else if (node.tag == tt.tagBase)
+ {
+ ++HasBase;
+
+ if (HasBase > 1)
+ Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
+ }
+ else if (node.tag == tt.tagNoscript)
+ Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
+
+ Node.insertNodeAtEnd(head, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ continue;
+ }
+
+ /* discard unexpected text nodes and end tags */
+ Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ if (HasTitle == 0)
+ {
+ Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
+ Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
+ }
+ }
+
+ };
+
+ public static class ParseTitle implements Parser {
+
+ public void parse( Lexer lexer, Node title, short mode )
+ {
+ Node node;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.MixedContent);
+ if (node == null) break;
+ if (node.tag == title.tag && node.type == Node.EndTag)
+ {
+ title.closed = true;
+ Node.trimSpaces(lexer, title);
+ return;
+ }
+
+ if (node.type == Node.TextNode)
+ {
+ /* only called for 1st child */
+ if (title.content == null)
+ Node.trimInitialSpace(lexer, title, node);
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+
+ Node.insertNodeAtEnd(title, node);
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(title, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null)
+ {
+ Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* pushback unexpected tokens */
+ Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, title);
+ return;
+ }
+
+ Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseScript implements Parser {
+
+ public void parse( Lexer lexer, Node script, short mode )
+ {
+ /*
+ This isn't quite right for CDATA content as it recognises
+ tags within the content and parses them accordingly.
+ This will unfortunately screw up scripts which include
+ < + letter, < + !, < + ? or < + / + letter
+ */
+
+ Node node;
+
+ node = lexer.getCDATA( script);
+
+ if (node != null)
+ Node.insertNodeAtEnd(script, node);
+ }
+
+ };
+
+ public static class ParseBody implements Parser {
+
+ public void parse( Lexer lexer, Node body, short mode )
+ {
+ Node node;
+ boolean checkstack, iswhitenode;
+
+ mode = Lexer.IgnoreWhitespace;
+ checkstack = true;
+ TagTable tt = lexer.configuration.tt;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ if (node.tag == body.tag && node.type == Node.EndTag)
+ {
+ body.closed = true;
+ Node.trimSpaces(lexer, body);
+ lexer.seenBodyEndTag = 1;
+ mode = Lexer.IgnoreWhitespace;
+
+ if (body.parent.tag == tt.tagNoframes)
+ break;
+
+ continue;
+ }
+
+ if (node.tag == tt.tagNoframes)
+ {
+ if (node.type == Node.StartTag)
+ {
+ Node.insertNodeAtEnd(body, node);
+ getParseBlock().parse(lexer, node, mode);
+ continue;
+ }
+
+ if (node.type == Node.EndTag &&
+ body.parent.tag == tt.tagNoframes)
+ {
+ Node.trimSpaces(lexer, body);
+ lexer.ungetToken();
+ break;
+ }
+ }
+
+ if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
+ && body.parent.tag == tt.tagNoframes)
+ {
+ Node.trimSpaces(lexer, body);
+ lexer.ungetToken();
+ break;
+ }
+
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ iswhitenode = false;
+
+ if (node.type == Node.TextNode &&
+ node.end <= node.start + 1 &&
+ node.textarray[node.start] == (byte)' ')
+ iswhitenode = true;
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(body, node))
+ continue;
+
+ if (lexer.seenBodyEndTag == 1 && !iswhitenode)
+ {
+ ++lexer.seenBodyEndTag;
+ Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
+ }
+
+ /* mixed content model permits text */
+ if (node.type == Node.TextNode)
+ {
+ if (iswhitenode && mode == Lexer.IgnoreWhitespace)
+ {
+ continue;
+ }
+
+ if (lexer.configuration.EncloseBodyText && !iswhitenode)
+ {
+ Node para;
+
+ lexer.ungetToken();
+ para = lexer.inferredTag("p");
+ Node.insertNodeAtEnd(body, para);
+ parseTag(lexer, para, mode);
+ mode = Lexer.MixedContent;
+ continue;
+ }
+ else /* strict doesn't allow text here */
+ lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
+
+ if (checkstack)
+ {
+ checkstack = false;
+
+ if (lexer.inlineDup( node) > 0)
+ continue;
+ }
+
+ Node.insertNodeAtEnd(body, node);
+ mode = Lexer.MixedContent;
+ continue;
+ }
+
+ if (node.type == Node.DocTypeTag)
+ {
+ Node.insertDocType(lexer, body, node);
+ continue;
+ }
+ /* discard unknown and PARAM tags */
+ if (node.tag == null || node.tag == tt.tagParam)
+ {
+ //TODO: message Fix...
+ //Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ Netscape allows LI and DD directly in BODY
+ We infer UL or DL respectively and use this
+ boolean to exclude block-level elements so as
+ to match Netscape's observed behaviour.
+ */
+ lexer.excludeBlocks = false;
+
+ if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
+ !((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ /* avoid this error message being issued twice */
+ if (!((node.tag.model & Dict.CM_HEAD) != 0))
+ Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
+
+ if ((node.tag.model & Dict.CM_HTML) != 0)
+ {
+ /* copy body attributes if current body was inferred */
+ if (node.tag == tt.tagBody && body.implicit
+ && body.attributes == null)
+ {
+ body.attributes = node.attributes;
+ node.attributes = null;
+ }
+
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, body, node);
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "ul");
+ Node.addClass(node, "noindent");
+ lexer.excludeBlocks = true;
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "dl");
+ lexer.excludeBlocks = true;
+ }
+ else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "table");
+ lexer.excludeBlocks = true;
+ }
+ else
+ {
+ /* AQ: The following line is from the official C
+ version of tidy. It doesn't make sense to me
+ because the '!' operator has higher precedence
+ than the '&' operator. It seems to me that the
+ expression always evaluates to 0.
+
+ if (!node->tag->model & (CM_ROW | CM_FIELD))
+
+ AQ: 13Jan2000 fixed in C tidy
+ */
+ if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ /* ignore </td> </th> <option> etc. */
+ continue;
+ }
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagBr)
+ node.type = Node.StartTag;
+ else if (node.tag == tt.tagP)
+ {
+ Node.coerceNode(lexer, node, tt.tagBr);
+ Node.insertNodeAtEnd(body, node);
+ node = lexer.inferredTag("br");
+ }
+ else if ((node.tag.model & Dict.CM_INLINE) != 0)
+ lexer.popInline(node);
+ }
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
+ {
+ /* HTML4 strict doesn't allow inline content here */
+ /* but HTML2 does allow img elements as children of body */
+ if (node.tag == tt.tagImg)
+ lexer.versions &= ~Dict.VERS_HTML40_STRICT;
+ else
+ lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
+
+ if (checkstack && !node.implicit)
+ {
+ checkstack = false;
+
+ if (lexer.inlineDup( node) > 0)
+ continue;
+ }
+
+ mode = Lexer.MixedContent;
+ }
+ else
+ {
+ checkstack = true;
+ mode = Lexer.IgnoreWhitespace;
+ }
+
+ if (node.implicit)
+ Report.warning(lexer, body, node, Report.INSERTING_TAG);
+
+ Node.insertNodeAtEnd(body, node);
+ parseTag(lexer, node, mode);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
+ }
+ }
+
+ };
+
+ public static class ParseFrameSet implements Parser {
+
+ public void parse( Lexer lexer, Node frameset, short mode )
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.badAccess |= Report.USING_FRAMES;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == frameset.tag && node.type == Node.EndTag)
+ {
+ frameset.closed = true;
+ Node.trimSpaces(lexer, frameset);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(frameset, node))
+ continue;
+
+ if (node.tag == null)
+ {
+ Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, frameset, node);
+ continue;
+ }
+ }
+
+ if (node.tag == tt.tagBody)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("noframes");
+ Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
+ }
+
+ if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
+ {
+ Node.insertNodeAtEnd(frameset, node);
+ lexer.excludeBlocks = false;
+ parseTag(lexer, node, Lexer.MixedContent);
+ continue;
+ }
+ else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
+ {
+ Node.insertNodeAtEnd(frameset, node);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseInline implements Parser {
+
+ public void parse( Lexer lexer, Node element, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((element.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ if (element.tag == tt.tagA)
+ {
+ if (element.attributes == null)
+ {
+ Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
+ Node.discardElement(element);
+ return;
+ }
+ }
+
+ /*
+ ParseInline is used for some block level elements like H1 to H6
+ For such elements we need to insert inline emphasis tags currently
+ on the inline stack. For Inline elements, we normally push them
+ onto the inline stack provided they aren't implicit or OBJECT/APPLET.
+ This test is carried out in PushInline and PopInline, see istack.c
+ We don't push A or SPAN to replicate current browser behavior
+ */
+ if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
+ lexer.inlineDup( null);
+ else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
+ element.tag != tt.tagA && element.tag != tt.tagSpan)
+ lexer.pushInline( element);
+
+ if (element.tag == tt.tagNobr)
+ lexer.badLayout |= Report.USING_NOBR;
+ else if (element.tag == tt.tagFont)
+ lexer.badLayout |= Report.USING_FONT;
+
+ /* Inline elements may or may not be within a preformatted element */
+ if (mode != Lexer.Preformatted)
+ mode = Lexer.MixedContent;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ /* end tag for current element */
+ if (node.tag == element.tag && node.type == Node.EndTag)
+ {
+ if ((element.tag.model & Dict.CM_INLINE) != 0 &&
+ element.tag != tt.tagA)
+ lexer.popInline( node);
+
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+ /*
+ if a font element wraps an anchor and nothing else
+ then move the font element inside the anchor since
+ otherwise it won't alter the anchor text color
+ */
+ if (element.tag == tt.tagFont &&
+ element.content != null &&
+ element.content == element.last)
+ {
+ Node child = element.content;
+
+ if (child.tag == tt.tagA)
+ {
+ child.parent = element.parent;
+ child.next = element.next;
+ child.prev = element.prev;
+
+ if (child.prev != null)
+ child.prev.next = child;
+ else
+ child.parent.content = child;
+
+ if (child.next != null)
+ child.next.prev = child;
+ else
+ child.parent.last = child;
+
+ element.next = null;
+ element.prev = null;
+ element.parent = child;
+ element.content = child.content;
+ element.last = child.last;
+ child.content = element;
+ child.last = element;
+ for (child = element.content; child != null; child = child.next)
+ child.parent = element;
+ }
+ }
+ element.closed = true;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ /* <u>...<u> map 2nd <u> to </u> if 1st is explicit */
+ /* otherwise emphasis nesting is probably unintentional */
+ /* big and small have cumulative effect to leave them alone */
+ if (node.type == Node.StartTag
+ && node.tag == element.tag
+ && lexer.isPushed(node)
+ && !node.implicit
+ && !element.implicit
+ && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
+ && node.tag != tt.tagA
+ && node.tag != tt.tagFont
+ && node.tag != tt.tagBig
+ && node.tag != tt.tagSmall)
+ {
+ if (element.content != null && node.attributes == null)
+ {
+ Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
+ node.type = Node.EndTag;
+ lexer.ungetToken();
+ continue;
+ }
+
+ Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
+ }
+
+ if (node.type == Node.TextNode)
+ {
+ /* only called for 1st child */
+ if (element.content == null &&
+ !((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+
+ Node.insertNodeAtEnd(element, node);
+ continue;
+ }
+
+ /* mixed content model so allow text */
+ if (Node.insertMisc(element, node))
+ continue;
+
+ /* deal with HTML tags */
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* otherwise infer end of inline element */
+ lexer.ungetToken();
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ /* within <dt> or <pre> map <p> to <br> */
+ if (node.tag == tt.tagP &&
+ node.type == Node.StartTag &&
+ ((mode & Lexer.Preformatted) != 0 ||
+ element.tag == tt.tagDt ||
+ element.isDescendantOf(tt.tagDt)))
+ {
+ node.tag = tt.tagBr;
+ node.element = "br";
+ Node.trimSpaces(lexer, element);
+ Node.insertNodeAtEnd(element, node);
+ continue;
+ }
+
+ /* ignore unknown and PARAM tags */
+ if (node.tag == null || node.tag == tt.tagParam)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagBr && node.type == Node.EndTag)
+ node.type = Node.StartTag;
+
+ if (node.type == Node.EndTag)
+ {
+ /* coerce </br> to <br> */
+ if (node.tag == tt.tagBr)
+ node.type = Node.StartTag;
+ else if (node.tag == tt.tagP)
+ {
+ /* coerce unmatched </p> to <br><br> */
+ if (!element.isDescendantOf(tt.tagP))
+ {
+ Node.coerceNode(lexer, node, tt.tagBr);
+ Node.trimSpaces(lexer, element);
+ Node.insertNodeAtEnd(element, node);
+ node = lexer.inferredTag("br");
+ continue;
+ }
+ }
+ else if ((node.tag.model & Dict.CM_INLINE) != 0
+ && node.tag != tt.tagA
+ && !((node.tag.model & Dict.CM_OBJECT) != 0)
+ && (element.tag.model & Dict.CM_INLINE) != 0)
+ {
+ /* allow any inline end tag to end current element */
+ lexer.popInline( element);
+
+ if (element.tag != tt.tagA)
+ {
+ if (node.tag == tt.tagA && node.tag != element.tag)
+ {
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ }
+ else
+ {
+ Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
+ }
+
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ /* if parent is <a> then discard unexpected inline end tag */
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ } /* special case </tr> etc. for stuff moved in front of table */
+ else if (lexer.exiled
+ && node.tag.model != 0
+ && (node.tag.model & Dict.CM_TABLE) != 0)
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+
+ /* allow any header tag to end current header */
+ if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
+ {
+ if (node.tag == element.tag)
+ {
+ Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
+ }
+ else
+ {
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ }
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ /*
+ an <A> tag to ends any open <A> element
+ but <A href=...> is mapped to </A><A href=...>
+ */
+ if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
+ {
+ /* coerce <a> to </a> unless it has some attributes */
+ if (node.attributes == null)
+ {
+ node.type = Node.EndTag;
+ Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
+ lexer.popInline( node);
+ lexer.ungetToken();
+ continue;
+ }
+
+ lexer.ungetToken();
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.popInline( element);
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ if ((element.tag.model & Dict.CM_HEADING) != 0)
+ {
+ if (node.tag == tt.tagCenter ||
+ node.tag == tt.tagDiv)
+ {
+ if (node.type != Node.StartTag &&
+ node.type != Node.StartEndTag)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+
+ /* insert center as parent if heading is empty */
+ if (element.content == null)
+ {
+ Node.insertNodeAsParent(element, node);
+ continue;
+ }
+
+ /* split heading and make center parent of 2nd part */
+ Node.insertNodeAfterElement(element, node);
+
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+
+ element = lexer.cloneNode(element);
+ element.start = lexer.lexsize;
+ element.end = lexer.lexsize;
+ Node.insertNodeAtEnd(node, element);
+ continue;
+ }
+
+ if (node.tag == tt.tagHr)
+ {
+ if (node.type != Node.StartTag &&
+ node.type != Node.StartEndTag)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+
+ /* insert hr before heading if heading is empty */
+ if (element.content == null)
+ {
+ Node.insertNodeBeforeElement(element, node);
+ continue;
+ }
+
+ /* split heading and insert hr before 2nd part */
+ Node.insertNodeAfterElement(element, node);
+
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+
+ element = lexer.cloneNode(element);
+ element.start = lexer.lexsize;
+ element.end = lexer.lexsize;
+ Node.insertNodeAfterElement(node, element);
+ continue;
+ }
+ }
+
+ if (element.tag == tt.tagDt)
+ {
+ if (node.tag == tt.tagHr)
+ {
+ Node dd;
+
+ if (node.type != Node.StartTag &&
+ node.type != Node.StartEndTag)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+ dd = lexer.inferredTag("dd");
+
+ /* insert hr within dd before dt if dt is empty */
+ if (element.content == null)
+ {
+ Node.insertNodeBeforeElement(element, dd);
+ Node.insertNodeAtEnd(dd, node);
+ continue;
+ }
+
+ /* split dt and insert hr within dd before 2nd part */
+ Node.insertNodeAfterElement(element, dd);
+ Node.insertNodeAtEnd(dd, node);
+
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+
+ element = lexer.cloneNode(element);
+ element.start = lexer.lexsize;
+ element.end = lexer.lexsize;
+ Node.insertNodeAfterElement(dd, element);
+ continue;
+ }
+ }
+
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ for (parent = element.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0) &&
+ !element.implicit)
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ if (element.tag == tt.tagA)
+ lexer.popInline(element);
+
+ lexer.ungetToken();
+
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ /* block level tags end this element */
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ if (node.type != Node.StartTag)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0 &&
+ !((node.tag.model & Dict.CM_BLOCK) != 0))
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ /*
+ prevent anchors from propagating into block tags
+ except for headings h1 to h6
+ */
+ if (element.tag == tt.tagA)
+ {
+ if (node.tag != null &&
+ !((node.tag.model & Dict.CM_HEADING) != 0))
+ lexer.popInline(element);
+ else if (!(element.content != null))
+ {
+ Node.discardElement(element);
+ lexer.ungetToken();
+ return;
+ }
+ }
+
+ lexer.ungetToken();
+
+ if (!((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, element);
+
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ /* parse inline element */
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if (node.implicit)
+ Report.warning(lexer, element, node, Report.INSERTING_TAG);
+
+ /* trim white space before <br> */
+ if (node.tag == tt.tagBr)
+ Node.trimSpaces(lexer, element);
+
+ Node.insertNodeAtEnd(element, node);
+ parseTag(lexer, node, mode);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
+
+ Node.trimEmptyElement(lexer, element);
+ }
+ };
+
+ public static class ParseList implements Parser {
+
+ public void parse( Lexer lexer, Node list, short mode )
+ {
+ Node node;
+ Node parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((list.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+
+ if (node.tag == list.tag && node.type == Node.EndTag)
+ {
+ if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+ Node.coerceNode(lexer, list, tt.tagUl);
+
+ list.closed = true;
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(list, node))
+ continue;
+
+ if (node.type != Node.TextNode && node.tag == null)
+ {
+ Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
+ {
+ Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+ lexer.popInline(node);
+ continue;
+ }
+
+ for (parent = list.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+
+ if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+ Node.coerceNode(lexer, list, tt.tagUl);
+
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+ }
+
+ Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != tt.tagLi)
+ {
+ lexer.ungetToken();
+
+ if (node.tag != null &&
+ (node.tag.model & Dict.CM_BLOCK) != 0 &&
+ lexer.excludeBlocks)
+ {
+ Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ node = lexer.inferredTag("li");
+ node.addAttribute("style", "list-style: none");
+ Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+ }
+
+ /* node should be <LI> */
+ Node.insertNodeAtEnd(list, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+
+ if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+ Node.coerceNode(lexer, list, tt.tagUl);
+
+ Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, list);
+ }
+
+ };
+
+ public static class ParseDefList implements Parser {
+
+ public void parse( Lexer lexer, Node list, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((list.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == list.tag && node.type == Node.EndTag)
+ {
+ list.closed = true;
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(list, node))
+ continue;
+
+ if (node.type == Node.TextNode)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "dt");
+ Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+ }
+
+ if (node.tag == null)
+ {
+ Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = list.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+ }
+ }
+
+ /* center in a dt or a dl breaks the dl list in two */
+ if (node.tag == tt.tagCenter)
+ {
+ if (list.content != null)
+ Node.insertNodeAfterElement(list, node);
+ else /* trim empty dl list */
+ {
+ Node.insertNodeBeforeElement(list, node);
+ Node.discardElement(list);
+ }
+
+ /* and parse contents of center */
+ parseTag(lexer, node, mode);
+
+ /* now create a new dl element */
+ list = lexer.inferredTag("dl");
+ Node.insertNodeAfterElement(node, list);
+ continue;
+ }
+
+ if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
+ {
+ lexer.ungetToken();
+
+ if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
+ {
+ Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ /* if DD appeared directly in BODY then exclude blocks */
+ if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
+ {
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ node = lexer.inferredTag( "dd");
+ Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* node should be <DT> or <DD>*/
+ Node.insertNodeAtEnd(list, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+
+ Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, list);
+ }
+
+ };
+
+ public static class ParsePre implements Parser {
+
+ public void parse( Lexer lexer, Node pre, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((pre.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
+ Node.coerceNode(lexer, pre, tt.tagPre);
+
+ lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.Preformatted);
+ if (node == null) break;
+ if (node.tag == pre.tag && node.type == Node.EndTag)
+ {
+ Node.trimSpaces(lexer, pre);
+ pre.closed = true;
+ Node.trimEmptyElement(lexer, pre);
+ return;
+ }
+
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ if (node.type == Node.TextNode)
+ {
+ /* if first check for inital newline */
+ if (pre.content == null)
+ {
+ if (node.textarray[node.start] == (byte)'\n')
+ ++node.start;
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+ }
+
+ Node.insertNodeAtEnd(pre, node);
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(pre, node))
+ continue;
+
+ /* discard unknown and PARAM tags */
+ if (node.tag == null || node.tag == tt.tagParam)
+ {
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagP)
+ {
+ if (node.type == Node.StartTag)
+ {
+ Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
+
+ /* trim white space before <p> in <pre>*/
+ Node.trimSpaces(lexer, pre);
+
+ /* coerce both <p> and </p> to <br> */
+ Node.coerceNode(lexer, node, tt.tagBr);
+ Node.insertNodeAtEnd(pre, node);
+ }
+ else
+ {
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ }
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
+ {
+ moveToHead(lexer, pre, node);
+ continue;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = pre.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, pre);
+ Node.trimEmptyElement(lexer, pre);
+ return;
+ }
+ }
+ }
+
+ /* what about head content, HEAD, BODY tags etc? */
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ if (node.type != Node.StartTag)
+ {
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.excludeBlocks = true;
+
+ /* check if we need to infer a container */
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "ul");
+ Node.addClass(node, "noindent");
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "dl");
+ }
+ else if ((node.tag.model & Dict.CM_TABLE) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "table");
+ }
+
+ Node.insertNodeAfterElement(pre, node);
+ pre = lexer.inferredTag( "pre");
+ Node.insertNodeAfterElement(node, pre);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ lexer.excludeBlocks = false;
+ continue;
+ }
+ /*
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ return;
+ }
+ */
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ /* trim white space before <br> */
+ if (node.tag == tt.tagBr)
+ Node.trimSpaces(lexer, pre);
+
+ Node.insertNodeAtEnd(pre, node);
+ parseTag(lexer, node, Lexer.Preformatted);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, pre);
+ }
+
+ };
+
+ public static class ParseBlock implements Parser {
+
+ public void parse( Lexer lexer, Node element, short mode )
+ /*
+ element is node created by the lexer
+ upon seeing the start tag, or by the
+ parser when the start tag is inferred
+ */
+ {
+ Node node, parent;
+ boolean checkstack;
+ int istackbase = 0;
+ TagTable tt = lexer.configuration.tt;
+
+ checkstack = true;
+
+ if ((element.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
+ Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
+
+ /*
+ InlineDup() asks the lexer to insert inline emphasis tags
+ currently pushed on the istack, but take care to avoid
+ propagating inline emphasis inside OBJECT or APPLET.
+ For these elements a fresh inline stack context is created
+ and disposed of upon reaching the end of the element.
+ They thus behave like table cells in this respect.
+ */
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ istackbase = lexer.istackbase;
+ lexer.istackbase = lexer.istack.size();
+ }
+
+ if (!((element.tag.model & Dict.CM_MIXED) != 0))
+ lexer.inlineDup( null);
+
+ mode = Lexer.IgnoreWhitespace;
+
+ while (true)
+ {
+ node = lexer.getToken(mode /*Lexer.MixedContent*/);
+ if (node == null) break;
+ /* end tag for this element */
+ if (node.type == Node.EndTag && node.tag != null &&
+ (node.tag == element.tag || element.was == node.tag))
+ {
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ }
+
+ element.closed = true;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ if (node.tag == tt.tagHtml ||
+ node.tag == tt.tagHead ||
+ node.tag == tt.tagBody)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == null)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+ else if (node.tag == tt.tagBr)
+ node.type = Node.StartTag;
+ else if (node.tag == tt.tagP)
+ {
+ Node.coerceNode(lexer, node, tt.tagBr);
+ Node.insertNodeAtEnd(element, node);
+ node = lexer.inferredTag("br");
+ }
+ else
+ {
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ for (parent = element.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ }
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ /* special case </tr> etc. for stuff moved in front of table */
+ if (lexer.exiled
+ && node.tag.model != 0
+ && (node.tag.model & Dict.CM_TABLE) != 0)
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ /* mixed content model permits text */
+ if (node.type == Node.TextNode)
+ {
+ boolean iswhitenode = false;
+
+ if (node.type == Node.TextNode &&
+ node.end <= node.start + 1 &&
+ lexer.lexbuf[node.start] == (byte)' ')
+ iswhitenode = true;
+
+ if (lexer.configuration.EncloseBlockText && !iswhitenode)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("p");
+ Node.insertNodeAtEnd(element, node);
+ parseTag(lexer, node, Lexer.MixedContent);
+ continue;
+ }
+
+ if (checkstack)
+ {
+ checkstack = false;
+
+ if (!((element.tag.model & Dict.CM_MIXED) != 0))
+ {
+ if (lexer.inlineDup( node) > 0)
+ continue;
+ }
+ }
+
+ Node.insertNodeAtEnd(element, node);
+ mode = Lexer.MixedContent;
+ /*
+ HTML4 strict doesn't allow mixed content for
+ elements with %block; as their content model
+ */
+ lexer.versions &= ~Dict.VERS_HTML40_STRICT;
+ continue;
+ }
+
+ if (Node.insertMisc(element, node))
+ continue;
+
+ /* allow PARAM elements? */
+ if (node.tag == tt.tagParam)
+ {
+ if (((element.tag.model & Dict.CM_PARAM) != 0) &&
+ (node.type == Node.StartTag || node.type == Node.StartEndTag))
+ {
+ Node.insertNodeAtEnd(element, node);
+ continue;
+ }
+
+ /* otherwise discard it */
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* allow AREA elements? */
+ if (node.tag == tt.tagArea)
+ {
+ if ((element.tag == tt.tagMap) &&
+ (node.type == Node.StartTag || node.type == Node.StartEndTag))
+ {
+ Node.insertNodeAtEnd(element, node);
+ continue;
+ }
+
+ /* otherwise discard it */
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* ignore unknown start/end tags */
+ if (node.tag == null)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ Allow Dict.CM_INLINE elements here.
+
+ Allow Dict.CM_BLOCK elements here unless
+ lexer.excludeBlocks is yes.
+
+ LI and DD are special cased.
+
+ Otherwise infer end tag for this element.
+ */
+
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ if (node.type != Node.StartTag && node.type != Node.StartEndTag)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (element.tag == tt.tagTd || element.tag == tt.tagTh)
+ {
+ /* if parent is a table cell, avoid inferring the end of the cell */
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "ul");
+ Node.addClass(node, "noindent");
+ lexer.excludeBlocks = true;
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "dl");
+ lexer.excludeBlocks = true;
+ }
+
+ /* infer end of current table cell */
+ if (!((node.tag.model & Dict.CM_BLOCK) != 0))
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ else if ((node.tag.model & Dict.CM_BLOCK) != 0)
+ {
+ if (lexer.excludeBlocks)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ lexer.istackbase = istackbase;
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ else /* things like list items */
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ lexer.ungetToken();
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ if (element.parent != null && element.parent.tag != null &&
+ element.parent.tag.parser == getParseList())
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ node = lexer.inferredTag("ul");
+ Node.addClass(node, "noindent");
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ if (element.parent.tag == tt.tagDl)
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ node = lexer.inferredTag("dl");
+ }
+ else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
+ (node.tag.model & Dict.CM_ROW) != 0)
+ {
+ node = lexer.inferredTag("table");
+ }
+ else if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+
+ }
+ else
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ /* parse known element */
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if ((node.tag.model & Dict.CM_INLINE) != 0)
+ {
+ if (checkstack && !node.implicit)
+ {
+ checkstack = false;
+
+ if (lexer.inlineDup( node) > 0)
+ continue;
+ }
+
+ mode = Lexer.MixedContent;
+ }
+ else
+ {
+ checkstack = true;
+ mode = Lexer.IgnoreWhitespace;
+ }
+
+ /* trim white space before <br> */
+ if (node.tag == tt.tagBr)
+ Node.trimSpaces(lexer, element);
+
+ Node.insertNodeAtEnd(element, node);
+
+ if (node.implicit)
+ Report.warning(lexer, element, node, Report.INSERTING_TAG);
+
+ parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ if (node.type == Node.EndTag)
+ lexer.popInline( node); /* if inline end tag */
+
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ }
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ }
+
+ };
+
+ public static class ParseTableTag implements Parser {
+
+ public void parse( Lexer lexer, Node table, short mode )
+ {
+ Node node, parent;
+ int istackbase;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.deferDup();
+ istackbase = lexer.istackbase;
+ lexer.istackbase = lexer.istack.size();
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == table.tag && node.type == Node.EndTag)
+ {
+ lexer.istackbase = istackbase;
+ table.closed = true;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(table, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null && node.type != Node.TextNode)
+ {
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* if TD or TH or text or inline or block then infer <TR> */
+
+ if (node.type != Node.EndTag)
+ {
+ if (node.tag == tt.tagTd ||
+ node.tag == tt.tagTh ||
+ node.tag == tt.tagTable)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "tr");
+ Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TextNode
+ || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ Node.insertNodeBeforeElement(table, node);
+ Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.exiled = true;
+
+ /* AQ: TODO
+ Line 2040 of parser.c (13 Jan 2000) reads as follows:
+ if (!node->type == TextNode)
+ This will always evaluate to false.
+ This has been reported to Dave Raggett <dsr@w3.org>
+ */
+ //Should be?: if (!(node.type == Node.TextNode))
+ if (false)
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, table, node);
+ continue;
+ }
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
+ {
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = table.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ lexer.istackbase = istackbase;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+ }
+ }
+
+ if (!((node.tag.model & Dict.CM_TABLE) != 0))
+ {
+ lexer.ungetToken();
+ Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.istackbase = istackbase;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ Node.insertNodeAtEnd(table, node);;
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ continue;
+ }
+
+ /* discard unexpected text nodes and end tags */
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, table);
+ lexer.istackbase = istackbase;
+ }
+
+ };
+
+ public static class ParseColGroup implements Parser {
+
+ public void parse( Lexer lexer, Node colgroup, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == colgroup.tag && node.type == Node.EndTag)
+ {
+ colgroup.closed = true;
+ return;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = colgroup.parent;
+ parent != null; parent = parent.parent)
+ {
+
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ return;
+ }
+ }
+ }
+
+ if (node.type == Node.TextNode)
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(colgroup, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null)
+ {
+ Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != tt.tagCol)
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* node should be <COL> */
+ Node.insertNodeAtEnd(colgroup, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+ }
+
+ };
+
+ public static class ParseRowGroup implements Parser {
+
+ public void parse( Lexer lexer, Node rowgroup, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == rowgroup.tag)
+ {
+ if (node.type == Node.EndTag)
+ {
+ rowgroup.closed = true;
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ lexer.ungetToken();
+ return;
+ }
+
+ /* if </table> infer end tag */
+ if (node.tag == tt.tagTable && node.type == Node.EndTag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(rowgroup, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null && node.type != Node.TextNode)
+ {
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ if TD or TH then infer <TR>
+ if text or inline or block move before table
+ if head content move to head
+ */
+
+ if (node.type != Node.EndTag)
+ {
+ if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("tr");
+ Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TextNode
+ || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ Node.moveBeforeTable(rowgroup, node, tt);
+ Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.exiled = true;
+
+ if (node.type != Node.TextNode)
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
+ moveToHead(lexer, rowgroup, node);
+ continue;
+ }
+ }
+
+ /*
+ if this is the end tag for ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = rowgroup.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+ }
+ }
+
+ /*
+ if THEAD, TFOOT or TBODY then implied end tag
+
+ */
+ if ((node.tag.model & Dict.CM_ROWGRP) != 0)
+ {
+ if (node.type != Node.EndTag)
+ lexer.ungetToken();
+
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!(node.tag == tt.tagTr))
+ {
+ node = lexer.inferredTag( "tr");
+ Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
+ lexer.ungetToken();
+ }
+
+ /* node should be <TR> */
+ Node.insertNodeAtEnd(rowgroup, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+
+ Node.trimEmptyElement(lexer, rowgroup);
+ }
+
+ };
+
+ public static class ParseRow implements Parser {
+
+ public void parse( Lexer lexer, Node row, short mode )
+ {
+ Node node, parent;
+ boolean exclude_state;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((row.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == row.tag)
+ {
+ if (node.type == Node.EndTag)
+ {
+ row.closed = true;
+ Node.fixEmptyRow(lexer, row);
+ return;
+ }
+
+ lexer.ungetToken();
+ Node.fixEmptyRow(lexer, row);
+ return;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = row.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, row);
+ return;
+ }
+ }
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(row, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null && node.type != Node.TextNode)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* discard unexpected <table> element */
+ if (node.tag == tt.tagTable)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* THEAD, TFOOT or TBODY */
+ if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, row);
+ return;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ if text or inline or block move before table
+ if head content move to head
+ */
+
+ if (node.type != Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("td");
+ Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TextNode
+ || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ Node.moveBeforeTable(row, node, tt);
+ Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.exiled = true;
+
+ if (node.type != Node.TextNode)
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+ moveToHead(lexer, row, node);
+ continue;
+ }
+ }
+
+ if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
+ {
+ Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+ continue;
+ }
+
+ /* node should be <TD> or <TH> */
+ Node.insertNodeAtEnd(row, node);
+ exclude_state = lexer.excludeBlocks;
+ lexer.excludeBlocks = false;
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ lexer.excludeBlocks = exclude_state;
+
+ /* pop inline stack */
+
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ }
+
+ Node.trimEmptyElement(lexer, row);
+ }
+
+ };
+
+ public static class ParseNoFrames implements Parser {
+
+ public void parse( Lexer lexer, Node noframes, short mode )
+ {
+ Node node;
+ boolean checkstack;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.badAccess |= Report.USING_NOFRAMES;
+ mode = Lexer.IgnoreWhitespace;
+ checkstack = true;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ if (node.tag == noframes.tag && node.type == Node.EndTag)
+ {
+ noframes.closed = true;
+ Node.trimSpaces(lexer, noframes);
+ return;
+ }
+
+ if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
+ {
+ Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
+ Node.trimSpaces(lexer, noframes);
+ lexer.ungetToken();
+ return;
+ }
+
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(noframes, node))
+ continue;
+
+ if (node.tag == tt.tagBody && node.type == Node.StartTag)
+ {
+ Node.insertNodeAtEnd(noframes, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
+ continue;
+ }
+
+ /* implicit body element inferred */
+ if (node.type == Node.TextNode || node.tag != null)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("body");
+ if (lexer.configuration.XmlOut)
+ Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
+ Node.insertNodeAtEnd(noframes, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
+ continue;
+ }
+ /* discard unexpected end tags */
+ Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseSelect implements Parser {
+
+ public void parse( Lexer lexer, Node field, short mode )
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == field.tag && node.type == Node.EndTag)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(field, node))
+ continue;
+
+ if (node.type == Node.StartTag &&
+ (node.tag == tt.tagOption ||
+ node.tag == tt.tagOptgroup ||
+ node.tag == tt.tagScript))
+ {
+ Node.insertNodeAtEnd(field, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseText implements Parser {
+
+ public void parse( Lexer lexer, Node field, short mode )
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ if (field.tag == tt.tagTextarea)
+ mode = Lexer.Preformatted;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ if (node.tag == field.tag && node.type == Node.EndTag)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(field, node))
+ continue;
+
+ if (node.type == Node.TextNode)
+ {
+ /* only called for 1st child */
+ if (field.content == null && !((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, field);
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+
+ Node.insertNodeAtEnd(field, node);
+ continue;
+ }
+
+ if (node.tag == tt.tagFont)
+ {
+ Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* terminate element on other tags */
+ if (!((field.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ if (!((field.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseOptGroup implements Parser {
+
+ public void parse( Lexer lexer, Node field, short mode )
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == field.tag && node.type == Node.EndTag)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(field, node))
+ continue;
+
+ if (node.type == Node.StartTag &&
+ (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
+ {
+ if (node.tag == tt.tagOptgroup)
+ Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
+
+ Node.insertNodeAtEnd(field, node);
+ parseTag(lexer, node, Lexer.MixedContent);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ }
+ }
+
+ };
+
+ public static Parser getParseHTML()
+ {
+ return _parseHTML;
+ }
+
+ public static Parser getParseHead()
+ {
+ return _parseHead;
+ }
+
+ public static Parser getParseTitle()
+ {
+ return _parseTitle;
+ }
+
+ public static Parser getParseScript()
+ {
+ return _parseScript;
+ }
+
+ public static Parser getParseBody()
+ {
+ return _parseBody;
+ }
+
+ public static Parser getParseFrameSet()
+ {
+ return _parseFrameSet;
+ }
+
+ public static Parser getParseInline()
+ {
+ return _parseInline;
+ }
+
+ public static Parser getParseList()
+ {
+ return _parseList;
+ }
+
+ public static Parser getParseDefList()
+ {
+ return _parseDefList;
+ }
+
+ public static Parser getParsePre()
+ {
+ return _parsePre;
+ }
+
+ public static Parser getParseBlock()
+ {
+ return _parseBlock;
+ }
+
+ public static Parser getParseTableTag()
+ {
+ return _parseTableTag;
+ }
+
+ public static Parser getParseColGroup()
+ {
+ return _parseColGroup;
+ }
+
+ public static Parser getParseRowGroup()
+ {
+ return _parseRowGroup;
+ }
+
+ public static Parser getParseRow()
+ {
+ return _parseRow;
+ }
+
+ public static Parser getParseNoFrames()
+ {
+ return _parseNoFrames;
+ }
+
+ public static Parser getParseSelect()
+ {
+ return _parseSelect;
+ }
+
+ public static Parser getParseText()
+ {
+ return _parseText;
+ }
+
+ public static Parser getParseOptGroup()
+ {
+ return _parseOptGroup;
+ }
+
+
+ private static Parser _parseHTML = new ParseHTML();
+ private static Parser _parseHead = new ParseHead();
+ private static Parser _parseTitle = new ParseTitle();
+ private static Parser _parseScript = new ParseScript();
+ private static Parser _parseBody = new ParseBody();
+ private static Parser _parseFrameSet = new ParseFrameSet();
+ private static Parser _parseInline = new ParseInline();
+ private static Parser _parseList = new ParseList();
+ private static Parser _parseDefList = new ParseDefList();
+ private static Parser _parsePre = new ParsePre();
+ private static Parser _parseBlock = new ParseBlock();
+ private static Parser _parseTableTag = new ParseTableTag();
+ private static Parser _parseColGroup = new ParseColGroup();
+ private static Parser _parseRowGroup = new ParseRowGroup();
+ private static Parser _parseRow = new ParseRow();
+ private static Parser _parseNoFrames = new ParseNoFrames();
+ private static Parser _parseSelect = new ParseSelect();
+ private static Parser _parseText = new ParseText();
+ private static Parser _parseOptGroup = new ParseOptGroup();
+
+ /*
+ HTML is the top level element
+ */
+ public static Node parseDocument(Lexer lexer)
+ {
+ Node node, document, html;
+ Node doctype = null;
+ TagTable tt = lexer.configuration.tt;
+
+ document = lexer.newNode();
+ document.type = Node.RootNode;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(document, node))
+ continue;
+
+ if (node.type == Node.DocTypeTag)
+ {
+ if (doctype == null)
+ {
+ Node.insertNodeAtEnd(document, node);
+ doctype = node;
+ }
+ else
+ Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
+ continue;
+ }
+
+ if (node.type != Node.StartTag || node.tag != tt.tagHtml)
+ {
+ lexer.ungetToken();
+ html = lexer.inferredTag("html");
+ }
+ else
+ html = node;
+
+ Node.insertNodeAtEnd(document, html);
+ getParseHTML().parse(lexer, html, (short)0); // TODO?
+ break;
+ }
+
+ return document;
+ }
+
+ /**
+ * Indicates whether or not whitespace should be preserved for this element.
+ * If an <code>xml:space</code> attribute is found, then if the attribute value is
+ * <code>preserve</code>, returns <code>true</code>. For any other value, returns
+ * <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
+ * found, then the following element names result in a return value of <code>true:
+ * pre, script, style,</code> and <code>xsl:text</code>. Finally, if a
+ * <code>TagTable</code> was passed in and the element appears as the "pre" element
+ * in the <code>TagTable</code>, then <code>true</code> will be returned.
+ * Otherwise, <code>false</code> is returned.
+ * @param element The <code>Node</code> to test to see if whitespace should be
+ * preserved.
+ * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
+ * function. This may be <code>null</code>, in which case this test
+ * is bypassed.
+ * @return <code>true</code> or <code>false</code>, as explained above.
+ */
+
+ public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
+ {
+ AttVal attribute;
+
+ /* search attributes for xml:space */
+ for (attribute = element.attributes; attribute != null; attribute = attribute.next)
+ {
+ if (attribute.attribute.equals("xml:space"))
+ {
+ if (attribute.value.equals("preserve"))
+ return true;
+
+ return false;
+ }
+ }
+
+ /* kludge for html docs without explicit xml:space attribute */
+ if (Lexer.wstrcasecmp(element.element, "pre") == 0
+ || Lexer.wstrcasecmp(element.element, "script") == 0
+ || Lexer.wstrcasecmp(element.element, "style") == 0)
+ return true;
+
+ if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
+ return true;
+
+ /* kludge for XSL docs */
+ if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
+ return true;
+
+ return false;
+ }
+
+ /*
+ XML documents
+ */
+ public static void parseXMLElement(Lexer lexer, Node element, short mode)
+ {
+ Node node;
+
+ /* Jeff Young's kludge for XSL docs */
+
+ if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
+ return;
+
+ /* if node is pre or has xml:space="preserve" then do so */
+
+ if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
+ mode = Lexer.Preformatted;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ if (node.type == Node.EndTag && node.element.equals(element.element))
+ {
+ element.closed = true;
+ break;
+ }
+
+ /* discard unexpected end tags */
+ if (node.type == Node.EndTag)
+ {
+ Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
+ continue;
+ }
+
+ /* parse content on seeing start tag */
+ if (node.type == Node.StartTag)
+ parseXMLElement(lexer, node, mode);
+
+ Node.insertNodeAtEnd(element, node);
+ }
+
+ /*
+ if first child is text then trim initial space and
+ delete text node if it is empty.
+ */
+
+ node = element.content;
+
+ if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
+ {
+ if (node.textarray[node.start] == (byte)' ')
+ {
+ node.start++;
+
+ if (node.start >= node.end)
+ Node.discardElement(node);
+ }
+ }
+
+ /*
+ if last child is text then trim final space and
+ delete the text node if it is empty
+ */
+
+ node = element.last;
+
+ if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
+ {
+ if (node.textarray[node.end - 1] == (byte)' ')
+ {
+ node.end--;
+
+ if (node.start >= node.end)
+ Node.discardElement(node);
+ }
+ }
+ }
+
+ public static Node parseXMLDocument(Lexer lexer)
+ {
+ Node node, document, doctype;
+
+ document = lexer.newNode();
+ document.type = Node.RootNode;
+ doctype = null;
+ lexer.configuration.XmlTags = true;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ /* discard unexpected end tags */
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(document, node))
+ continue;
+
+ if (node.type == Node.DocTypeTag)
+ {
+ if (doctype == null)
+ {
+ Node.insertNodeAtEnd(document, node);
+ doctype = node;
+ }
+ else
+ Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
+ continue;
+ }
+
+ /* if start tag then parse element's content */
+ if (node.type == Node.StartTag)
+ {
+ Node.insertNodeAtEnd(document, node);
+ parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
+ }
+
+ }
+
+if (false) { //#if 0
+ /* discard the document type */
+ node = document.findDocType();
+
+ if (node != null)
+ Node.discardElement(node);
+} // #endif
+
+ if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
+ Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
+
+ /* ensure presence of initial <?XML version="1.0"?> */
+ if (lexer.configuration.XmlPi)
+ lexer.fixXMLPI(document);
+
+ return document;
+ }
+
+ public static boolean isJavaScript(Node node)
+ {
+ boolean result = false;
+ AttVal attr;
+
+ if (node.attributes == null)
+ return true;
+
+ for (attr = node.attributes; attr != null; attr = attr.next)
+ {
+ if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
+ || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
+ && Lexer.wsubstr(attr.value, "javascript"))
+ result = true;
+ }
+
+ return result;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)Report.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Error/informational message reporter.
+ *
+ * You should only need to edit the file TidyMessages.properties
+ * to localize HTML tidy.
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.PrintWriter;
+import java.text.MessageFormat;
+import java.util.Hashtable;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import net.sourceforge.phpdt.tidy.JTidyConsole;
+import net.sourceforge.phpdt.tidy.JtidyPlugin;
+import org.eclipse.core.resources.IMarker;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.ui.texteditor.MarkerUtilities;
+
+public class Report {
+
+ /* used to point to Web Accessibility Guidelines */
+ public static final String ACCESS_URL = "http://www.w3.org/WAI/GL";
+
+ public static final String RELEASE_DATE = "4th August 2000";
+
+ public static String currentFile;
+ /* sasdjb 01May00 for GNU Emacs error parsing */
+
+ /* error codes for entities */
+
+ public static final short MISSING_SEMICOLON = 1;
+ public static final short UNKNOWN_ENTITY = 2;
+ public static final short UNESCAPED_AMPERSAND = 3;
+
+ /* error codes for element messages */
+
+ public static final short MISSING_ENDTAG_FOR = 1;
+ public static final short MISSING_ENDTAG_BEFORE = 2;
+ public static final short DISCARDING_UNEXPECTED = 3;
+ public static final short NESTED_EMPHASIS = 4;
+ public static final short NON_MATCHING_ENDTAG = 5;
+ public static final short TAG_NOT_ALLOWED_IN = 6;
+ public static final short MISSING_STARTTAG = 7;
+ public static final short UNEXPECTED_ENDTAG = 8;
+ public static final short USING_BR_INPLACE_OF = 9;
+ public static final short INSERTING_TAG = 10;
+ public static final short SUSPECTED_MISSING_QUOTE = 11;
+ public static final short MISSING_TITLE_ELEMENT = 12;
+ public static final short DUPLICATE_FRAMESET = 13;
+ public static final short CANT_BE_NESTED = 14;
+ public static final short OBSOLETE_ELEMENT = 15;
+ public static final short PROPRIETARY_ELEMENT = 16;
+ public static final short UNKNOWN_ELEMENT = 17;
+ public static final short TRIM_EMPTY_ELEMENT = 18;
+ public static final short COERCE_TO_ENDTAG = 19;
+ public static final short ILLEGAL_NESTING = 20;
+ public static final short NOFRAMES_CONTENT = 21;
+ public static final short CONTENT_AFTER_BODY = 22;
+ public static final short INCONSISTENT_VERSION = 23;
+ public static final short MALFORMED_COMMENT = 24;
+ public static final short BAD_COMMENT_CHARS = 25;
+ public static final short BAD_XML_COMMENT = 26;
+ public static final short BAD_CDATA_CONTENT = 27;
+ public static final short INCONSISTENT_NAMESPACE = 28;
+ public static final short DOCTYPE_AFTER_TAGS = 29;
+ public static final short MALFORMED_DOCTYPE = 30;
+ public static final short UNEXPECTED_END_OF_FILE = 31;
+ public static final short DTYPE_NOT_UPPER_CASE = 32;
+ public static final short TOO_MANY_ELEMENTS = 33;
+
+ /* error codes used for attribute messages */
+
+ public static final short UNKNOWN_ATTRIBUTE = 1;
+ public static final short MISSING_ATTRIBUTE = 2;
+ public static final short MISSING_ATTR_VALUE = 3;
+ public static final short BAD_ATTRIBUTE_VALUE = 4;
+ public static final short UNEXPECTED_GT = 5;
+ public static final short PROPRIETARY_ATTR_VALUE = 6;
+ public static final short REPEATED_ATTRIBUTE = 7;
+ public static final short MISSING_IMAGEMAP = 8;
+ public static final short XML_ATTRIBUTE_VALUE = 9;
+ public static final short UNEXPECTED_QUOTEMARK = 10;
+ public static final short ID_NAME_MISMATCH = 11;
+
+ /* accessibility flaws */
+
+ public static final short MISSING_IMAGE_ALT = 1;
+ public static final short MISSING_LINK_ALT = 2;
+ public static final short MISSING_SUMMARY = 4;
+ public static final short MISSING_IMAGE_MAP = 8;
+ public static final short USING_FRAMES = 16;
+ public static final short USING_NOFRAMES = 32;
+
+ /* presentation flaws */
+
+ public static final short USING_SPACER = 1;
+ public static final short USING_LAYER = 2;
+ public static final short USING_NOBR = 4;
+ public static final short USING_FONT = 8;
+ public static final short USING_BODY = 16;
+
+ /* character encoding errors */
+ public static final short WINDOWS_CHARS = 1;
+ public static final short NON_ASCII = 2;
+ public static final short FOUND_UTF16 = 4;
+
+ private static short optionerrors;
+
+ private static ResourceBundle res = null;
+
+ static {
+ try {
+ res = ResourceBundle.getBundle("net/sourceforge/phpdt/tidy/w3c/TidyMessages");
+ } catch (MissingResourceException e) {
+ throw new Error(e.toString());
+ }
+ }
+
+ // public static void tidyPrint(PrintWriter p, String msg) {
+ // p.print(msg);
+ // }
+
+// public static void tidyPrint(char c) {
+// // p.println(msg);
+// // JTidyConsole.print(new String(c));
+// }
+
+ public static void tidyPrint(String msg) {
+ // p.println(msg);
+ JTidyConsole.print(msg);
+ }
+
+ public static void tidyPrintln(String msg) {
+ // p.println(msg);
+ JTidyConsole.println(msg);
+ }
+
+ public static void tidyPrintln() {
+ JTidyConsole.println("");
+ }
+
+ public static void showVersion(PrintWriter p) {
+ tidyPrintln("Java HTML Tidy release date: " + RELEASE_DATE);
+ tidyPrintln("See http://www.w3.org/People/Raggett for details");
+ }
+
+ // public static void tag(Lexer lexer, Node tag) {
+ // if (tag != null) {
+ // if (tag.type == Node.StartTag)
+ // tidyPrint(lexer.errout, "<" + tag.element + ">");
+ // else if (tag.type == Node.EndTag)
+ // tidyPrint(lexer.errout, "</" + tag.element + ">");
+ // else if (tag.type == Node.DocTypeTag)
+ // tidyPrint(lexer.errout, "<!DOCTYPE>");
+ // else if (tag.type == Node.TextNode)
+ // tidyPrint(lexer.errout, "plain text");
+ // else
+ // tidyPrint(lexer.errout, tag.element);
+ // }
+ // }
+
+ public static void tag(StringBuffer errorMessage, Node tag) {
+ if (tag != null) {
+ if (tag.type == Node.StartTag) {
+ // tidyPrint(lexer.errout, "<" + tag.element + ">");
+ errorMessage.append("<" + tag.element + ">");
+ } else if (tag.type == Node.EndTag) {
+ // tidyPrint(lexer.errout, "</" + tag.element + ">");
+ errorMessage.append("</" + tag.element + ">");
+ } else if (tag.type == Node.DocTypeTag) {
+ // tidyPrint(lexer.errout, "<!DOCTYPE>");
+ errorMessage.append("</" + tag.element + ">");
+ } else if (tag.type == Node.TextNode) {
+ // tidyPrint(lexer.errout, "plain text");
+ errorMessage.append("plain text");
+ } else {
+ // tidyPrint(lexer.errout, tag.element);
+ errorMessage.append(tag.element);
+ }
+ }
+ }
+
+ /* lexer is not defined when this is called */
+ public static void unknownOption(String option) {
+ optionerrors++;
+ try {
+ System.err.println(
+ MessageFormat.format(
+ res.getString("unknown_option"),
+ new Object[] { option }));
+ } catch (MissingResourceException e) {
+ System.err.println(e.toString());
+ }
+ }
+
+ /* lexer is not defined when this is called */
+ public static void badArgument(String option) {
+ optionerrors++;
+ try {
+ System.err.println(
+ MessageFormat.format(
+ res.getString("bad_argument"),
+ new Object[] { option }));
+ } catch (MissingResourceException e) {
+ System.err.println(e.toString());
+ }
+ }
+
+ // public static void position(Lexer lexer) {
+ // try {
+ // /* Change formatting to be parsable by GNU Emacs */
+ // if (lexer.configuration.Emacs) {
+ // tidyPrint(
+ // lexer.errout,
+ // MessageFormat.format(
+ // res.getString("emacs_format"),
+ // new Object[] { currentFile, new Integer(lexer.lines), new Integer(lexer.columns)}));
+ // tidyPrint(lexer.errout, " ");
+ // } else /* traditional format */ {
+ // tidyPrint(
+ // lexer.errout,
+ // MessageFormat.format(res.getString("line_column"), new Object[] { new Integer(lexer.lines), new Integer(lexer.columns)}));
+ // }
+ // } catch (MissingResourceException e) {
+ // lexer.errout.tidyPrintln(e.toString());
+ // }
+ // }
+
+ public static void encodingError(Lexer lexer, short code, int c) {
+ lexer.warnings++;
+
+ if (lexer.configuration.ShowWarnings) {
+ // position(lexer);
+
+ if (code == WINDOWS_CHARS) {
+ lexer.badChars |= WINDOWS_CHARS;
+ try {
+ Hashtable attributes = new Hashtable();
+ StringBuffer errorMessage =
+ new StringBuffer("Column " + lexer.columns + ": ");
+ MarkerUtilities.setLineNumber(attributes, lexer.lines);
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)}));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("illegal_char"),
+ new Object[] { new Integer(c)}));
+ attributes.put(
+ IMarker.SEVERITY,
+ new Integer(IMarker.SEVERITY_ERROR));
+ attributes.put(JtidyPlugin.MARKER_NAME, Boolean.TRUE);
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+ try {
+ MarkerUtilities.setMessage(
+ attributes,
+ errorMessage.toString());
+ MarkerUtilities.createMarker(
+ lexer.getIFile(),
+ attributes,
+ IMarker.PROBLEM);
+ } catch (CoreException e) {
+ }
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ tidyPrintln();
+ }
+ }
+
+ public static void entityError(
+ Lexer lexer,
+ short code,
+ String entity,
+ int c) {
+ lexer.warnings++;
+
+ if (lexer.configuration.ShowWarnings) {
+ // position(lexer);
+ Hashtable attributes = new Hashtable();
+ StringBuffer errorMessage =
+ new StringBuffer("Column " + lexer.columns + ": ");
+ MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+ if (code == MISSING_SEMICOLON) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("missing_semicolon"),
+ new Object[] { entity }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNKNOWN_ENTITY) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("unknown_entity"),
+ new Object[] { entity }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNESCAPED_AMPERSAND) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("unescaped_ampersand"));
+ errorMessage.append(res.getString("unescaped_ampersand"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+ attributes.put(
+ IMarker.SEVERITY,
+ new Integer(IMarker.SEVERITY_ERROR));
+ attributes.put(JtidyPlugin.MARKER_NAME, Boolean.TRUE);
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+ try {
+ MarkerUtilities.setMessage(attributes, errorMessage.toString());
+ MarkerUtilities.createMarker(
+ lexer.getIFile(),
+ attributes,
+ IMarker.PROBLEM);
+ } catch (CoreException e) {
+ }
+ tidyPrintln();
+ }
+ }
+
+ public static void attrError(
+ Lexer lexer,
+ Node node,
+ String attr,
+ short code) {
+ lexer.warnings++;
+
+ /* keep quiet after 6 errors */
+ if (lexer.errors > 6)
+ return;
+
+ Hashtable attributes = new Hashtable();
+ StringBuffer errorMessage =
+ new StringBuffer("Column " + lexer.columns + ": ");
+
+ if (lexer.configuration.ShowWarnings) {
+ /* on end of file adjust reported position to end of input */
+ if (code == UNEXPECTED_END_OF_FILE) {
+ lexer.lines = lexer.in.curline;
+ lexer.columns = lexer.in.curcol;
+ }
+
+ // position(lexer);
+
+ MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+ if (code == UNKNOWN_ATTRIBUTE) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("unknown_attribute"),
+ new Object[] { attr }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == MISSING_ATTRIBUTE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("missing_attribute"),
+ new Object[] { attr }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == MISSING_ATTR_VALUE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("missing_attr_value"),
+ new Object[] { attr }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == MISSING_IMAGEMAP) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, res.getString("missing_imagemap"));
+ errorMessage.append(res.getString("missing_imagemap"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ lexer.badAccess |= MISSING_IMAGE_MAP;
+ } else if (code == BAD_ATTRIBUTE_VALUE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("bad_attribute_value"),
+ new Object[] { attr }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == XML_ATTRIBUTE_VALUE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("xml_attribute_value"),
+ new Object[] { attr }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNEXPECTED_GT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("error"));
+ errorMessage.append(res.getString("error"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, res.getString("unexpected_gt"));
+ errorMessage.append(res.getString("unexpected_gt"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ lexer.errors++;
+ ;
+ } else if (code == UNEXPECTED_QUOTEMARK) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, res.getString("unexpected_quotemark"));
+ errorMessage.append(res.getString("unexpected_quotemark"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == REPEATED_ATTRIBUTE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, res.getString("repeated_attribute"));
+ errorMessage.append(res.getString("repeated_attribute"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == PROPRIETARY_ATTR_VALUE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("proprietary_attr_value"),
+ new Object[] { attr }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNEXPECTED_END_OF_FILE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("unexpected_end_of_file"));
+ errorMessage.append(
+ res.getString("unexpected_end_of_file"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == ID_NAME_MISMATCH) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, res.getString("id_name_mismatch"));
+ errorMessage.append(res.getString("id_name_mismatch"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+ attributes.put(
+ IMarker.SEVERITY,
+ new Integer(IMarker.SEVERITY_WARNING));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+ attributes.put(JtidyPlugin.MARKER_NAME, Boolean.TRUE);
+ try {
+ MarkerUtilities.setMessage(attributes, errorMessage.toString());
+ MarkerUtilities.createMarker(
+ lexer.getIFile(),
+ attributes,
+ IMarker.PROBLEM);
+ } catch (CoreException e) {
+ }
+ tidyPrintln();
+ } else if (code == UNEXPECTED_GT) {
+ // position(lexer);
+ MarkerUtilities.setLineNumber(attributes, lexer.lines);
+ try {
+ // tidyPrint(lexer.errout, res.getString("error"));
+ errorMessage.append(res.getString("error"));
+ tag(errorMessage, node);
+ // tidyPrint(lexer.errout, res.getString("unexpected_gt"));
+ errorMessage.append(res.getString("unexpected_gt"));
+ attributes.put(
+ IMarker.SEVERITY,
+ new Integer(IMarker.SEVERITY_ERROR));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+ attributes.put(JtidyPlugin.MARKER_NAME, Boolean.TRUE);
+ try {
+ MarkerUtilities.setMessage(
+ attributes,
+ errorMessage.toString());
+ MarkerUtilities.createMarker(
+ lexer.getIFile(),
+ attributes,
+ IMarker.PROBLEM);
+ } catch (CoreException e) {
+ }
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tidyPrintln();
+ lexer.errors++;
+ ;
+ }
+
+ }
+
+ public static void warning(
+ Lexer lexer,
+ Node element,
+ Node node,
+ short code) {
+
+
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.warnings++;
+
+ /* keep quiet after 6 errors */
+ if (lexer.errors > 6)
+ return;
+
+ if (lexer.configuration.ShowWarnings) {
+
+ /* on end of file adjust reported position to end of input */
+ if (code == UNEXPECTED_END_OF_FILE) {
+ lexer.lines = lexer.in.curline;
+ lexer.columns = lexer.in.curcol;
+ }
+
+ // position(lexer);
+ Hashtable attributes = new Hashtable();
+ StringBuffer errorMessage =
+ new StringBuffer("Column " + lexer.columns + ": ");
+
+ MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+ if (code == MISSING_ENDTAG_FOR) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("missing_endtag_for"),
+ new Object[] { element.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == MISSING_ENDTAG_BEFORE) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("missing_endtag_before"),
+ new Object[] { element.element }));
+
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ } else if (code == DISCARDING_UNEXPECTED) {
+ //REVISIT: DIscarding... message
+ //return;
+ // try {
+ // // tidyPrint(lexer.errout, res.getString("discarding_unexpected"));
+ // errorMessage.append(res.getString("discarding_unexpected"));
+ // } catch (MissingResourceException e) {
+ // tidyPrintln(e.toString());
+ // }
+ // tag(errorMessage, lexer, node);
+ } else if (code == NESTED_EMPHASIS) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("nested_emphasis"));
+ errorMessage.append(res.getString("nested_emphasis"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ } else if (code == COERCE_TO_ENDTAG) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("coerce_to_endtag"),
+ new Object[] { element.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == NON_MATCHING_ENDTAG) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("non_matching_endtag_1"));
+ errorMessage.append(res.getString("non_matching_endtag_1"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("non_matching_endtag_2"),
+ new Object[] { element.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == TAG_NOT_ALLOWED_IN) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("tag_not_allowed_in"),
+ new Object[] { element.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == DOCTYPE_AFTER_TAGS) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("doctype_after_tags"));
+ errorMessage.append(res.getString("doctype_after_tags"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == MISSING_STARTTAG) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("missing_starttag"),
+ new Object[] { node.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNEXPECTED_ENDTAG) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
+ if (element != null)
+ // tidyPrint(
+ // lexer.errout,
+ // MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("unexpected_endtag"),
+ new Object[] { node.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == TOO_MANY_ELEMENTS) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element }));
+ if (element != null)
+ // tidyPrint(
+ // lexer.errout,
+ // MessageFormat.format(res.getString("too_many_elements_suffix"), new Object[] { element.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("too_many_elements"),
+ new Object[] { node.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == USING_BR_INPLACE_OF) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("using_br_inplace_of"));
+ errorMessage.append(res.getString("using_br_inplace_of"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ } else if (code == INSERTING_TAG) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("inserting_tag"),
+ new Object[] { node.element }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == CANT_BE_NESTED) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ try {
+ // tidyPrint(lexer.errout, res.getString("cant_be_nested"));
+ errorMessage.append(res.getString("cant_be_nested"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == PROPRIETARY_ELEMENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ try {
+ // tidyPrint(lexer.errout, res.getString("proprietary_element"));
+ errorMessage.append(res.getString("proprietary_element"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+
+ if (node.tag == tt.tagLayer)
+ lexer.badLayout |= USING_LAYER;
+ else if (node.tag == tt.tagSpacer)
+ lexer.badLayout |= USING_SPACER;
+ else if (node.tag == tt.tagNobr)
+ lexer.badLayout |= USING_NOBR;
+ } else if (code == OBSOLETE_ELEMENT) {
+ try {
+ if (element.tag != null
+ && (element.tag.model & Dict.CM_OBSOLETE) != 0) {
+ // tidyPrint(lexer.errout, res.getString("obsolete_element"));
+ errorMessage.append(res.getString("obsolete_element"));
+ } else {
+ // tidyPrint(lexer.errout, res.getString("replacing_element"));
+ errorMessage.append(res.getString("replacing_element"));
+ }
+
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, element);
+ try {
+ // tidyPrint(lexer.errout, res.getString("by"));
+ errorMessage.append(res.getString("by"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ } else if (code == TRIM_EMPTY_ELEMENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("trim_empty_element"));
+ errorMessage.append(res.getString("trim_empty_element"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, element);
+ } else if (code == MISSING_TITLE_ELEMENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("missing_title_element"));
+ errorMessage.append(res.getString("missing_title_element"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == ILLEGAL_NESTING) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, element);
+ try {
+ // tidyPrint(lexer.errout, res.getString("illegal_nesting"));
+ errorMessage.append(res.getString("illegal_nesting"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == NOFRAMES_CONTENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("warning"));
+ errorMessage.append(res.getString("warning"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ try {
+ // tidyPrint(lexer.errout, res.getString("noframes_content"));
+ errorMessage.append(res.getString("noframes_content"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == INCONSISTENT_VERSION) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("inconsistent_version"));
+ errorMessage.append(res.getString("inconsistent_version"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == MALFORMED_DOCTYPE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("malformed_doctype"));
+ errorMessage.append(res.getString("malformed_doctype"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == CONTENT_AFTER_BODY) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("content_after_body"));
+ errorMessage.append(res.getString("content_after_body"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == MALFORMED_COMMENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("malformed_comment"));
+ errorMessage.append(res.getString("malformed_comment"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == BAD_COMMENT_CHARS) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("bad_comment_chars"));
+ errorMessage.append(res.getString("bad_comment_chars"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == BAD_XML_COMMENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("bad_xml_comment"));
+ errorMessage.append(res.getString("bad_xml_comment"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == BAD_CDATA_CONTENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("bad_cdata_content"));
+ errorMessage.append(res.getString("bad_cdata_content"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == INCONSISTENT_NAMESPACE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("inconsistent_namespace"));
+ errorMessage.append(
+ res.getString("inconsistent_namespace"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == DTYPE_NOT_UPPER_CASE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("dtype_not_upper_case"));
+ errorMessage.append(res.getString("dtype_not_upper_case"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNEXPECTED_END_OF_FILE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("unexpected_end_of_file"));
+ errorMessage.append(
+ res.getString("unexpected_end_of_file"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, element);
+ }
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+ attributes.put(
+ IMarker.SEVERITY,
+ new Integer(IMarker.SEVERITY_WARNING));
+ attributes.put(JtidyPlugin.MARKER_NAME, Boolean.TRUE);
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+ try {
+ MarkerUtilities.setMessage(attributes, errorMessage.toString());
+ MarkerUtilities.createMarker(
+ lexer.getIFile(),
+ attributes,
+ IMarker.PROBLEM);
+ } catch (CoreException e) {
+ }
+ tidyPrintln();
+ }
+ }
+
+ public static void error(
+ Lexer lexer,
+ Node element,
+ Node node,
+ short code) {
+ lexer.warnings++;
+
+ /* keep quiet after 6 errors */
+ if (lexer.errors > 6)
+ return;
+
+ lexer.errors++;
+
+ // position(lexer);
+
+ Hashtable attributes = new Hashtable();
+ StringBuffer errorMessage =
+ new StringBuffer("Column " + lexer.columns + ": ");
+
+ MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+ if (code == SUSPECTED_MISSING_QUOTE) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("suspected_missing_quote"));
+ errorMessage.append(res.getString("suspected_missing_quote"));
+
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == DUPLICATE_FRAMESET) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("duplicate_frameset"));
+ errorMessage.append(res.getString("duplicate_frameset"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNKNOWN_ELEMENT) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("error"));
+ errorMessage.append(res.getString("error"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ tag(errorMessage, node);
+ try {
+ // tidyPrint(lexer.errout, res.getString("unknown_element"));
+ errorMessage.append(res.getString("unknown_element"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else if (code == UNEXPECTED_ENDTAG) {
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("unexpected_endtag"),
+ new Object[] { node.element }));
+ if (element != null) {
+ // tidyPrint(
+ // lexer.errout,
+ // MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
+ // errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
+ }
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+ attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+ attributes.put(JtidyPlugin.MARKER_NAME, Boolean.TRUE);
+ try {
+ MarkerUtilities.setMessage(attributes, errorMessage.toString());
+ MarkerUtilities.createMarker(
+ lexer.getIFile(),
+ attributes,
+ IMarker.PROBLEM);
+ } catch (CoreException e) {
+ }
+ // tidyPrintln(lexer.errout);
+ }
+
+ public static void errorSummary(Lexer lexer) {
+ /* adjust badAccess to that its null if frames are ok */
+ if ((lexer.badAccess & (USING_FRAMES | USING_NOFRAMES)) != 0) {
+ if (!(((lexer.badAccess & USING_FRAMES) != 0)
+ && ((lexer.badAccess & USING_NOFRAMES) == 0)))
+ lexer.badAccess &= ~(USING_FRAMES | USING_NOFRAMES);
+ }
+
+ Hashtable attributes = new Hashtable();
+ StringBuffer errorMessage =
+ new StringBuffer("Column " + lexer.columns + ": ");
+
+ MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+ if (lexer.badChars != 0) {
+ if ((lexer.badChars & WINDOWS_CHARS) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badchars_summary"));
+ errorMessage.append(res.getString("badchars_summary"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+ }
+
+ if (lexer.badForm != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badform_summary"));
+ errorMessage.append(res.getString("badform_summary"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if (lexer.badAccess != 0) {
+ if ((lexer.badAccess & MISSING_SUMMARY) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badaccess_missing_summary"));
+ errorMessage.append(
+ res.getString("badaccess_missing_summary"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if ((lexer.badAccess & MISSING_IMAGE_ALT) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badaccess_missing_image_alt"));
+ errorMessage.append(
+ res.getString("badaccess_missing_image_alt"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if ((lexer.badAccess & MISSING_IMAGE_MAP) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badaccess_missing_image_map"));
+ errorMessage.append(
+ res.getString("badaccess_missing_image_map"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if ((lexer.badAccess & MISSING_LINK_ALT) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badaccess_missing_link_alt"));
+ errorMessage.append(
+ res.getString("badaccess_missing_link_alt"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if (((lexer.badAccess & USING_FRAMES) != 0)
+ && ((lexer.badAccess & USING_NOFRAMES) == 0)) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badaccess_frames"));
+ errorMessage.append(res.getString("badaccess_frames"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ try {
+ // tidyPrint(lexer.errout, MessageFormat.format(res.getString("badaccess_summary"), new Object[] { ACCESS_URL }));
+ errorMessage.append(
+ MessageFormat.format(
+ res.getString("badaccess_summary"),
+ new Object[] { ACCESS_URL }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if (lexer.badLayout != 0) {
+ if ((lexer.badLayout & USING_LAYER) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badlayout_using_layer"));
+ errorMessage.append(res.getString("badlayout_using_layer"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if ((lexer.badLayout & USING_SPACER) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badlayout_using_spacer"));
+ errorMessage.append(
+ res.getString("badlayout_using_spacer"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if ((lexer.badLayout & USING_FONT) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badlayout_using_font"));
+ errorMessage.append(res.getString("badlayout_using_font"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if ((lexer.badLayout & USING_NOBR) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badlayout_using_nobr"));
+ errorMessage.append(res.getString("badlayout_using_nobr"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ if ((lexer.badLayout & USING_BODY) != 0) {
+ try {
+ // tidyPrint(lexer.errout, res.getString("badlayout_using_body"));
+ errorMessage.append(res.getString("badlayout_using_body"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+ attributes.put(
+ IMarker.SEVERITY,
+ new Integer(IMarker.SEVERITY_ERROR));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+ // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+ attributes.put(JtidyPlugin.MARKER_NAME, Boolean.TRUE);
+ try {
+ MarkerUtilities.setMessage(attributes, errorMessage.toString());
+ MarkerUtilities.createMarker(
+ lexer.getIFile(),
+ attributes,
+ IMarker.PROBLEM);
+ } catch (CoreException e) {
+ }
+ }
+ }
+
+ public static void unknownOption(PrintWriter errout, char c) {
+ try {
+ tidyPrintln(
+ MessageFormat.format(
+ res.getString("unrecognized_option"),
+ new Object[] { new String(new char[] { c })
+ }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void unknownFile(
+ PrintWriter errout,
+ String program,
+ String file) {
+ try {
+ tidyPrintln(
+ MessageFormat.format(
+ res.getString("unknown_file"),
+ new Object[] { program, file }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void needsAuthorIntervention(PrintWriter errout) {
+ try {
+ tidyPrintln(res.getString("needs_author_intervention"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void missingBody(PrintWriter errout) {
+ try {
+ tidyPrintln(res.getString("missing_body"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void reportNumberOfSlides(PrintWriter errout, int count) {
+ try {
+ tidyPrintln(
+ MessageFormat.format(
+ res.getString("slides_found"),
+ new Object[] { new Integer(count)}));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void generalInfo(PrintWriter errout) {
+ try {
+ tidyPrintln(res.getString("general_info"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void helloMessage(
+ PrintWriter errout,
+ String date,
+ String filename) {
+ currentFile = filename; /* for use with Gnu Emacs */
+
+ try {
+ tidyPrintln(
+ MessageFormat.format(
+ res.getString("hello_message"),
+ new Object[] { date, filename }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void reportVersion(
+ PrintWriter errout,
+ Lexer lexer,
+ String filename,
+ Node doctype) {
+ int i, c;
+ int state = 0;
+ String vers = lexer.HTMLVersionName();
+ MutableInteger cc = new MutableInteger();
+
+ try {
+ if (doctype != null) {
+ tidyPrint(
+ MessageFormat.format(
+ res.getString("doctype_given"),
+ new Object[] { filename }));
+ StringBuffer buf = new StringBuffer();
+ for (i = doctype.start; i < doctype.end; ++i) {
+ c = (int) doctype.textarray[i];
+
+ /* look for UTF-8 multibyte character */
+ if (c < 0) {
+ i += PPrint.getUTF8(doctype.textarray, i, cc);
+ c = cc.value;
+ }
+
+ if (c == (char) '"')
+ ++state;
+ else if (state == 1) {
+ buf.append(c);
+ // tidyPrint((char) c);
+ }
+ }
+
+ // tidyPrint('"');
+ buf.append('"');
+ tidyPrint( buf.toString() );
+ }
+
+ tidyPrintln(
+ MessageFormat.format(
+ res.getString("report_version"),
+ new Object[] {
+ filename,
+ (vers != null ? vers : "HTML proprietary")}));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void reportNumWarnings(PrintWriter errout, Lexer lexer) {
+ if (lexer.warnings > 0) {
+ try {
+ tidyPrintln(
+ MessageFormat.format(
+ res.getString("num_warnings"),
+ new Object[] { new Integer(lexer.warnings)}));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ } else {
+ try {
+ tidyPrintln(res.getString("no_warnings"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+ }
+
+ public static void helpText(PrintWriter out, String prog) {
+ try {
+ tidyPrintln(
+ MessageFormat.format(
+ res.getString("help_text"),
+ new Object[] { prog, RELEASE_DATE }));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+ public static void badTree(PrintWriter errout) {
+ try {
+ tidyPrintln(res.getString("bad_tree"));
+ } catch (MissingResourceException e) {
+ tidyPrintln(e.toString());
+ }
+ }
+
+}
--- /dev/null
+/*
+ * @(#)StreamIn.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Input Stream
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.InputStream;
+
+public abstract class StreamIn {
+
+ public static final int EndOfStream = -1; // EOF
+
+ /* states for ISO 2022
+
+ A document in ISO-2022 based encoding uses some ESC sequences called
+ "designator" to switch character sets. The designators defined and
+ used in ISO-2022-JP are:
+
+ "ESC" + "(" + ? for ISO646 variants
+
+ "ESC" + "$" + ? and
+ "ESC" + "$" + "(" + ? for multibyte character sets
+ */
+
+ public static final int FSM_ASCII = 0;
+ public static final int FSM_ESC = 1;
+ public static final int FSM_ESCD = 2;
+ public static final int FSM_ESCDP = 3;
+ public static final int FSM_ESCP = 4;
+ public static final int FSM_NONASCII = 5;
+
+ /* non-raw input is cleaned up*/
+ public int state; /* FSM for ISO2022 */
+ public boolean pushed;
+ public int c;
+ public int tabs;
+ public int tabsize;
+ public int lastcol;
+ public int curcol;
+ public int curline;
+ public int encoding;
+ public InputStream stream;
+ public boolean endOfStream;
+ public Object lexer; /* needed for error reporting */
+
+ /* read char from stream */
+ public abstract int readCharFromStream();
+
+ public abstract int readChar();
+
+ public abstract void ungetChar(int c);
+
+ public abstract boolean isEndOfStream();
+
+}
--- /dev/null
+/*
+ * @(#)StreamInImpl.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Input Stream Implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class StreamInImpl extends StreamIn {
+
+ /* Mapping for Windows Western character set (128-159) to Unicode */
+ private static int[] Win2Unicode =
+ {
+ 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
+ 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
+ };
+
+ /*
+ John Love-Jensen contributed this table for mapping MacRoman
+ character set to Unicode
+ */
+
+ private static int[] Mac2Unicode =
+ {
+
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+ /* x7F = DEL */
+ 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
+ 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
+
+ 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
+ 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
+
+ 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
+ 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
+
+ 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
+ 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
+
+ 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
+ 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
+
+ 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
+ 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
+
+ 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
+ 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
+ /* xF0 = Apple Logo */
+ 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
+ 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7
+ };
+
+ public StreamInImpl(InputStream stream, int encoding, int tabsize)
+ {
+ this.stream = stream;
+ this.pushed = false;
+ this.c = (int)'\0';
+ this.tabs = 0;
+ this.tabsize = tabsize;
+ this.curline = 1;
+ this.curcol = 1;
+ this.encoding = encoding;
+ this.state = FSM_ASCII;
+ this.endOfStream = false;
+ }
+
+ /* read char from stream */
+ public int readCharFromStream()
+ {
+ int n, c, i, count;
+
+ try {
+ c = this.stream.read();
+
+ if (c == EndOfStream) {
+ this.endOfStream = true;
+ return c;
+ }
+
+ /*
+ A document in ISO-2022 based encoding uses some ESC sequences
+ called "designator" to switch character sets. The designators
+ defined and used in ISO-2022-JP are:
+
+ "ESC" + "(" + ? for ISO646 variants
+
+ "ESC" + "$" + ? and
+ "ESC" + "$" + "(" + ? for multibyte character sets
+
+ Where ? stands for a single character used to indicate the
+ character set for multibyte characters.
+
+ Tidy handles this by preserving the escape sequence and
+ setting the top bit of each byte for non-ascii chars. This
+ bit is then cleared on output. The input stream keeps track
+ of the state to determine when to set/clear the bit.
+ */
+
+ if (this.encoding == Configuration.ISO2022)
+ {
+ if (c == 0x1b) /* ESC */
+ {
+ this.state = FSM_ESC;
+ return c;
+ }
+
+ switch (this.state)
+ {
+ case FSM_ESC:
+ if (c == '$')
+ this.state = FSM_ESCD;
+ else if (c == '(')
+ this.state = FSM_ESCP;
+ else
+ this.state = FSM_ASCII;
+ break;
+
+ case FSM_ESCD:
+ if (c == '(')
+ this.state = FSM_ESCDP;
+ else
+ this.state = FSM_NONASCII;
+ break;
+
+ case FSM_ESCDP:
+ this.state = FSM_NONASCII;
+ break;
+
+ case FSM_ESCP:
+ this.state = FSM_ASCII;
+ break;
+
+ case FSM_NONASCII:
+ c |= 0x80;
+ break;
+ }
+
+ return c;
+ }
+
+ if (this.encoding != Configuration.UTF8)
+ return c;
+
+ /* deal with UTF-8 encoded char */
+
+ if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */
+ {
+ n = c & 31;
+ count = 1;
+ }
+ else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
+ {
+ n = c & 15;
+ count = 2;
+ }
+ else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
+ {
+ n = c & 7;
+ count = 3;
+ }
+ else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */
+ {
+ n = c & 3;
+ count = 4;
+ }
+ else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */
+ {
+ n = c & 1;
+ count = 5;
+ }
+ else /* 0XXX XXXX one byte */
+ return c;
+
+ /* successor bytes should have the form 10XX XXXX */
+ for (i = 1; i <= count; ++i)
+ {
+ c = this.stream.read();
+
+ if (c == EndOfStream) {
+ this.endOfStream = true;
+ return c;
+ }
+
+ n = (n << 6) | (c & 0x3F);
+ }
+ }
+ catch (IOException e) {
+ System.err.println("StreamInImpl.readCharFromStream: " + e.toString());
+ n = EndOfStream;
+ }
+
+ return n;
+ }
+
+ public int readChar()
+ {
+ int c;
+
+ if (this.pushed)
+ {
+ this.pushed = false;
+ c = this.c;
+
+ if (c == '\n')
+ {
+ this.curcol = 1;
+ this.curline++;
+ return c;
+ }
+
+ this.curcol++;
+ return c;
+ }
+
+ this.lastcol = this.curcol;
+
+ if (this.tabs > 0)
+ {
+ this.curcol++;
+ this.tabs--;
+ return ' ';
+ }
+
+ for (;;)
+ {
+ c = readCharFromStream();
+
+ if (c < 0)
+ return EndOfStream;
+
+ if (c == '\n')
+ {
+ this.curcol = 1;
+ this.curline++;
+ break;
+ }
+
+ if (c == '\r')
+ {
+ c = readCharFromStream();
+ if (c != '\n')
+ {
+ ungetChar(c);
+ c = '\n';
+ }
+ this.curcol = 1;
+ this.curline++;
+ break;
+ }
+
+ if (c == '\t')
+ {
+ this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
+ this.curcol++;
+ c = ' ';
+ break;
+ }
+
+ /* strip control characters, except for Esc */
+
+ if (c == '\033')
+ break;
+
+ if (0 < c && c < 32)
+ continue;
+
+ /* watch out for IS02022 */
+
+ if (this.encoding == Configuration.RAW ||
+ this.encoding == Configuration.ISO2022)
+ {
+ this.curcol++;
+ break;
+ }
+
+ if (this.encoding == Configuration.MACROMAN)
+ c = Mac2Unicode[c];
+
+ /* produced e.g. as a side-effect of smart quotes in Word */
+
+ if (127 < c && c < 160)
+ {
+ Report.encodingError((Lexer)this.lexer, Report.WINDOWS_CHARS, c);
+
+ c = Win2Unicode[c - 128];
+
+ if (c == 0)
+ continue;
+ }
+
+ this.curcol++;
+ break;
+ }
+
+ return c;
+ }
+
+ public void ungetChar(int c)
+ {
+ this.pushed = true;
+ this.c = c;
+
+ if (c == '\n')
+ {
+ --this.curline;
+ }
+
+ this.curcol = this.lastcol;
+ }
+
+ public boolean isEndOfStream()
+ {
+ return this.endOfStream;
+ }
+
+}
--- /dev/null
+/*
+ * @(#)Style.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Linked list of class names and styles
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Style {
+
+ public Style(String tag, String tagClass, String properties, Style next)
+ {
+ this.tag = tag;
+ this.tagClass = tagClass;
+ this.properties = properties;
+ this.next = next;
+ }
+
+ public Style(String tag, String tagClass, String properties)
+ {
+ this(tag, tagClass, properties, null);
+ }
+
+ public Style()
+ {
+ this(null, null, null, null);
+ }
+
+ public String tag;
+ public String tagClass;
+ public String properties;
+ public Style next;
+
+}
--- /dev/null
+/*
+ * @(#)StyleProp.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Linked list of style properties
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class StyleProp {
+
+ public StyleProp(String name, String value, StyleProp next)
+ {
+ this.name = name;
+ this.value = value;
+ this.next = next;
+ }
+
+ public StyleProp(String name, String value)
+ {
+ this(name, value, null);
+ }
+
+ public StyleProp()
+ {
+ this(null, null, null);
+ }
+
+ public String name;
+ public String value;
+ public StyleProp next;
+
+}
--- /dev/null
+/*
+ * @(#)TagTable.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * Tag dictionary node hash table
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ * Modified from a Singleton to a non-Singleton.
+ */
+
+import java.util.Hashtable;
+
+public class TagTable {
+
+ private Configuration configuration = null;
+
+ public TagTable()
+ {
+ for ( int i = 0; i < tags.length; i++ ) {
+ install( tags[i] );
+ }
+ tagHtml = lookup("html");
+ tagHead = lookup("head");
+ tagBody = lookup("body");
+ tagFrameset = lookup("frameset");
+ tagFrame = lookup("frame");
+ tagNoframes = lookup("noframes");
+ tagMeta = lookup("meta");
+ tagTitle = lookup("title");
+ tagBase = lookup("base");
+ tagHr = lookup("hr");
+ tagPre = lookup("pre");
+ tagListing = lookup("listing");
+ tagH1 = lookup("h1");
+ tagH2 = lookup("h2");
+ tagP = lookup("p");
+ tagUl = lookup("ul");
+ tagOl = lookup("ol");
+ tagDir = lookup("dir");
+ tagLi = lookup("li");
+ tagDt = lookup("dt");
+ tagDd = lookup("dd");
+ tagDl = lookup("dl");
+ tagTd = lookup("td");
+ tagTh = lookup("th");
+ tagTr = lookup("tr");
+ tagCol = lookup("col");
+ tagBr = lookup("br");
+ tagA = lookup("a");
+ tagLink = lookup("link");
+ tagB = lookup("b");
+ tagI = lookup("i");
+ tagStrong = lookup("strong");
+ tagEm = lookup("em");
+ tagBig = lookup("big");
+ tagSmall = lookup("small");
+ tagParam = lookup("param");
+ tagOption = lookup("option");
+ tagOptgroup = lookup("optgroup");
+ tagImg = lookup("img");
+ tagMap = lookup("map");
+ tagArea = lookup("area");
+ tagNobr = lookup("nobr");
+ tagWbr = lookup("wbr");
+ tagFont = lookup("font");
+ tagSpacer = lookup("spacer");
+ tagLayer = lookup("layer");
+ tagCenter = lookup("center");
+ tagStyle = lookup("style");
+ tagScript = lookup("script");
+ tagNoscript = lookup("noscript");
+ tagTable = lookup("table");
+ tagCaption = lookup("caption");
+ tagForm = lookup("form");
+ tagTextarea = lookup("textarea");
+ tagBlockquote = lookup("blockquote");
+ tagApplet = lookup("applet");
+ tagObject = lookup("object");
+ tagDiv = lookup("div");
+ tagSpan = lookup("span");
+ }
+
+ public void setConfiguration(Configuration configuration)
+ {
+ this.configuration = configuration;
+ }
+
+ public Dict lookup( String name )
+ {
+ return (Dict)tagHashtable.get( name );
+ }
+
+ public Dict install( Dict dict )
+ {
+ Dict d = (Dict)tagHashtable.get(dict.name);
+ if (d != null)
+ {
+ d.versions = dict.versions;
+ d.model |= dict.model;
+ d.parser = dict.parser;
+ d.chkattrs = dict.chkattrs;
+ return d;
+ }
+ else
+ {
+ tagHashtable.put(dict.name, dict);
+ return dict;
+ }
+ }
+
+ /* public interface for finding tag by name */
+ public boolean findTag( Node node )
+ {
+ Dict np;
+
+ if ( configuration != null && configuration.XmlTags ) {
+ node.tag = xmlTags;
+ return true;
+ }
+
+ if ( node.element != null ) {
+ np = lookup( node.element );
+ if ( np != null ) {
+ node.tag = np;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public Parser findParser(Node node)
+ {
+ Dict np;
+
+ if (node.element != null) {
+ np = lookup(node.element);
+ if (np != null) {
+ return np.parser;
+ }
+ }
+
+ return null;
+ }
+
+ private Hashtable tagHashtable = new Hashtable();
+
+ private static Dict[] tags = {
+
+ new Dict( "html", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHTML(), CheckAttribsImpl.getCheckHTML() ),
+
+ new Dict( "head", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHead(), null ),
+
+ new Dict( "title", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseTitle(), null ),
+ new Dict( "base", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
+ new Dict( "link", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckLINK() ),
+ new Dict( "meta", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
+ new Dict( "style", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSTYLE() ),
+ new Dict( "script", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSCRIPT() ),
+ new Dict( "server", Dict.VERS_NETSCAPE, (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), null ),
+
+ new Dict( "body", Dict.VERS_ALL, (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseBody(), null ),
+ new Dict( "frameset", Dict.VERS_FRAMES, (Dict.CM_HTML|Dict.CM_FRAMES), ParserImpl.getParseFrameSet(), null ),
+
+ new Dict( "p", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OPT), ParserImpl.getParseInline(), null ),
+ new Dict( "h1", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+ new Dict( "h2", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+ new Dict( "h3", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+ new Dict( "h4", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+ new Dict( "h5", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+ new Dict( "h6", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+ new Dict( "ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ),
+ new Dict( "ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ),
+ new Dict( "dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseDefList(), null ),
+ new Dict( "dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ),
+ new Dict( "menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ),
+ new Dict( "pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParsePre(), null ),
+ new Dict( "listing", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
+ new Dict( "xmp", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
+ new Dict( "plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
+ new Dict( "address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, null ),
+ new Dict( "fieldset", Dict.VERS_HTML40, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseTableTag(), CheckAttribsImpl.getCheckTABLE() ),
+ new Dict( "hr", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckHR() ),
+ new Dict( "div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
+ new Dict( "align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+ new Dict( "ins", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ),
+ new Dict( "del", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ),
+
+ new Dict( "li", Dict.VERS_ALL, (Dict.CM_LIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ),
+ new Dict( "dt", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseInline(), null ),
+ new Dict( "dd", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ),
+
+ new Dict( "caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckCaption() ),
+ new Dict( "colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseColGroup(), null ),
+ new Dict( "col", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_EMPTY), null, null ),
+ new Dict( "thead", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
+ new Dict( "tfoot", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
+ new Dict( "tbody", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
+ new Dict( "tr", Dict.VERS_FROM32, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseRow(), null ),
+ new Dict( "td", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ),
+ new Dict( "th", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ),
+
+ new Dict( "q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckAnchor() ),
+ new Dict( "br", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+ new Dict( "img", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckIMG() ),
+ new Dict( "object", Dict.VERS_HTML40, (Dict.CM_OBJECT|Dict.CM_HEAD|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
+ new Dict( "applet", Dict.VERS_LOOSE, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
+ new Dict( "servlet", Dict.VERS_SUN, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
+ new Dict( "param", Dict.VERS_FROM32, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+ new Dict( "embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ),
+ new Dict( "noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.getParseBlock(), null ),
+ new Dict( "frame", Dict.VERS_FRAMES, (Dict.CM_FRAMES|Dict.CM_EMPTY), null, null ),
+ new Dict( "noframes", Dict.VERS_IFRAMES, (Dict.CM_BLOCK|Dict.CM_FRAMES), ParserImpl.getParseNoFrames(), null ),
+ new Dict( "noscript", (short)(Dict.VERS_FRAMES|Dict.VERS_HTML40), (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
+ new Dict( "b", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "i", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "tt", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "big", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "small", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "sub", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "sup", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "acronym", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+ new Dict( "marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE|Dict.CM_OPT), ParserImpl.getParseInline(), null ),
+ new Dict( "bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
+ new Dict( "comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+ new Dict( "keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+ new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
+ new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "map", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckMap() ),
+ new Dict( "area", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckAREA() ),
+ new Dict( "input", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ),
+ new Dict( "select", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseSelect(), null ),
+ new Dict( "option", Dict.VERS_ALL, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseText(), null ),
+ new Dict( "optgroup", Dict.VERS_HTML40, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseOptGroup(), null ),
+ new Dict( "textarea", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseText(), null ),
+ new Dict( "label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "legend", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "button", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+ new Dict( "font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+ new Dict( "bdo", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+
+ };
+
+ /* create dummy entry for all xml tags */
+ public Dict xmlTags = new Dict( null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null );
+
+ public Dict tagHtml = null;
+ public Dict tagHead = null;
+ public Dict tagBody = null;
+ public Dict tagFrameset = null;
+ public Dict tagFrame = null;
+ public Dict tagNoframes = null;
+ public Dict tagMeta = null;
+ public Dict tagTitle = null;
+ public Dict tagBase = null;
+ public Dict tagHr = null;
+ public Dict tagPre = null;
+ public Dict tagListing = null;
+ public Dict tagH1 = null;
+ public Dict tagH2 = null;
+ public Dict tagP = null;
+ public Dict tagUl = null;
+ public Dict tagOl = null;
+ public Dict tagDir = null;
+ public Dict tagLi = null;
+ public Dict tagDt = null;
+ public Dict tagDd = null;
+ public Dict tagDl = null;
+ public Dict tagTd = null;
+ public Dict tagTh = null;
+ public Dict tagTr = null;
+ public Dict tagCol = null;
+ public Dict tagBr = null;
+ public Dict tagA = null;
+ public Dict tagLink = null;
+ public Dict tagB = null;
+ public Dict tagI = null;
+ public Dict tagStrong = null;
+ public Dict tagEm = null;
+ public Dict tagBig = null;
+ public Dict tagSmall = null;
+ public Dict tagParam = null;
+ public Dict tagOption = null;
+ public Dict tagOptgroup = null;
+ public Dict tagImg = null;
+ public Dict tagMap = null;
+ public Dict tagArea = null;
+ public Dict tagNobr = null;
+ public Dict tagWbr = null;
+ public Dict tagFont = null;
+ public Dict tagSpacer = null;
+ public Dict tagLayer = null;
+ public Dict tagCenter = null;
+ public Dict tagStyle = null;
+ public Dict tagScript = null;
+ public Dict tagNoscript = null;
+ public Dict tagTable = null;
+ public Dict tagCaption = null;
+ public Dict tagForm = null;
+ public Dict tagTextarea = null;
+ public Dict tagBlockquote = null;
+ public Dict tagApplet = null;
+ public Dict tagObject = null;
+ public Dict tagDiv = null;
+ public Dict tagSpan = null;
+
+ public void defineInlineTag( String name )
+ {
+ install( new Dict( name, Dict.VERS_PROPRIETARY,
+ (Dict.CM_INLINE|Dict.CM_NO_INDENT|Dict.CM_NEW),
+ ParserImpl.getParseBlock(), null ) );
+ }
+
+ public void defineBlockTag( String name )
+ {
+ install( new Dict( name, Dict.VERS_PROPRIETARY,
+ (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW),
+ ParserImpl.getParseBlock(), null ) );
+ }
+
+ public void defineEmptyTag(String name)
+ {
+ install(new Dict(name, Dict.VERS_PROPRIETARY,
+ (Dict.CM_EMPTY|Dict.CM_NO_INDENT|Dict.CM_NEW),
+ ParserImpl.getParseBlock(), null));
+ }
+
+ public void definePreTag(String name)
+ {
+ install(new Dict(name, Dict.VERS_PROPRIETARY,
+ (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW),
+ ParserImpl.getParsePre(), null));
+ }
+}
--- /dev/null
+/*
+ * @(#)Tidy.java 1.11 2000/08/16
+ *
+ */
+
+/*
+ HTML parser and pretty printer
+
+ Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ Institute of Technology, Institut National de Recherche en
+ Informatique et en Automatique, Keio University). All Rights
+ Reserved.
+
+ Contributing Author(s):
+
+ Dave Raggett <dsr@w3.org>
+ Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+
+ The contributing author(s) would like to thank all those who
+ helped with testing, bug fixes, and patience. This wouldn't
+ have been possible without all of you.
+
+ COPYRIGHT NOTICE:
+
+ This software and documentation is provided "as is," and
+ the copyright holders and contributing author(s) make no
+ representations or warranties, express or implied, including
+ but not limited to, warranties of merchantability or fitness
+ for any particular purpose or that the use of the software or
+ documentation will not infringe any third party patents,
+ copyrights, trademarks or other rights.
+
+ The copyright holders and contributing author(s) will not be
+ liable for any direct, indirect, special or consequential damages
+ arising out of any use of the software or documentation, even if
+ advised of the possibility of such damage.
+
+ Permission is hereby granted to use, copy, modify, and distribute
+ this source code, or portions hereof, documentation and executables,
+ for any purpose, without fee, subject to the following restrictions:
+
+ 1. The origin of this source code must not be misrepresented.
+ 2. Altered versions must be plainly marked as such and must
+ not be misrepresented as being the original source.
+ 3. This Copyright notice may not be removed or altered from any
+ source or altered source distribution.
+
+ The copyright holders and contributing author(s) specifically
+ permit, without fee, and encourage the use of this source code
+ as a component for supporting the Hypertext Markup Language in
+ commercial products. If you use this source code in a product,
+ acknowledgment is not required but would be appreciated.
+*/
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.util.Properties;
+
+import org.eclipse.core.resources.IFile;
+
+/**
+ *
+ * <p>HTML parser and pretty printer</p>
+ *
+ * <p>
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ * </p>
+ *
+ * <p>
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ * </p>
+ *
+ * <p>
+ * Contributing Author(s):<br>
+ * <a href="mailto:dsr@w3.org">Dave Raggett</a><br>
+ * <a href="mailto:ac.quick@sympatico.ca">Andy Quick</a> (translation to Java)
+ * </p>
+ *
+ * <p>
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ * </p>
+ *
+ * <p>
+ * COPYRIGHT NOTICE:<br>
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ * </p>
+ *
+ * <p>
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ * </p>
+ *
+ * <p>
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ * </p>
+ *
+ * <p>
+ * <ol>
+ * <li>The origin of this source code must not be misrepresented.</li>
+ * <li>Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.</li>
+ * <li>This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.</li>
+ * </ol>
+ * </p>
+ *
+ * <p>
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ * </p>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ *
+ */
+
+public class Tidy implements java.io.Serializable {
+
+ static final long serialVersionUID = -2794371560623987718L;
+
+ private boolean initialized = false;
+ private PrintWriter errout = null; /* error output stream */
+ private PrintWriter stderr = null;
+ private Configuration configuration = null;
+ private String inputStreamName = "InputStream";
+ private int parseErrors = 0;
+ private int parseWarnings = 0;
+
+ public Tidy() {
+ init();
+ }
+
+ public Configuration getConfiguration() {
+ return configuration;
+ }
+
+ public PrintWriter getStderr() {
+ return stderr;
+ }
+
+ /**
+ * ParseErrors - the number of errors that occurred in the most
+ * recent parse operation
+ */
+
+ public int getParseErrors() {
+ return parseErrors;
+ }
+
+ /**
+ * ParseWarnings - the number of warnings that occurred in the most
+ * recent parse operation
+ */
+
+ public int getParseWarnings() {
+ return parseWarnings;
+ }
+
+ /**
+ * Errout - the error output stream
+ */
+
+ public PrintWriter getErrout() {
+ return errout;
+ }
+
+ public void setErrout(PrintWriter errout) {
+ this.errout = errout;
+ }
+
+ /**
+ * Spaces - default indentation
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#spaces
+ */
+
+ public void setSpaces(int spaces) {
+ configuration.spaces = spaces;
+ }
+
+ public int getSpaces() {
+ return configuration.spaces;
+ }
+
+ /**
+ * Wraplen - default wrap margin
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#wraplen
+ */
+
+ public void setWraplen(int wraplen) {
+ configuration.wraplen = wraplen;
+ }
+
+ public int getWraplen() {
+ return configuration.wraplen;
+ }
+
+ /**
+ * CharEncoding
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#CharEncoding
+ */
+
+ public void setCharEncoding(int charencoding) {
+ configuration.CharEncoding = charencoding;
+ }
+
+ public int getCharEncoding() {
+ return configuration.CharEncoding;
+ }
+
+ /**
+ * Tabsize
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#tabsize
+ */
+
+ public void setTabsize(int tabsize) {
+ configuration.tabsize = tabsize;
+ }
+
+ public int getTabsize() {
+ return configuration.tabsize;
+ }
+
+ /**
+ * Errfile - file name to write errors to
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#errfile
+ */
+
+ public void setErrfile(String errfile) {
+ configuration.errfile = errfile;
+ }
+
+ public String getErrfile() {
+ return configuration.errfile;
+ }
+
+ /**
+ * Writeback - if true then output tidied markup
+ * NOTE: this property is ignored when parsing from an InputStream.
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#writeback
+ */
+
+ public void setWriteback(boolean writeback) {
+ configuration.writeback = writeback;
+ }
+
+ public boolean getWriteback() {
+ return configuration.writeback;
+ }
+
+ /**
+ * OnlyErrors - if true normal output is suppressed
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#OnlyErrors
+ */
+
+ public void setOnlyErrors(boolean OnlyErrors) {
+ configuration.OnlyErrors = OnlyErrors;
+ }
+
+ public boolean getOnlyErrors() {
+ return configuration.OnlyErrors;
+ }
+
+ /**
+ * ShowWarnings - however errors are always shown
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#ShowWarnings
+ */
+
+ public void setShowWarnings(boolean ShowWarnings) {
+ configuration.ShowWarnings = ShowWarnings;
+ }
+
+ public boolean getShowWarnings() {
+ return configuration.ShowWarnings;
+ }
+
+ /**
+ * Quiet - no 'Parsing X', guessed DTD or summary
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#Quiet
+ */
+
+ public void setQuiet(boolean Quiet) {
+ configuration.Quiet = Quiet;
+ }
+
+ public boolean getQuiet() {
+ return configuration.Quiet;
+ }
+
+ /**
+ * IndentContent - indent content of appropriate tags
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#IndentContent
+ */
+
+ public void setIndentContent(boolean IndentContent) {
+ configuration.IndentContent = IndentContent;
+ }
+
+ public boolean getIndentContent() {
+ return configuration.IndentContent;
+ }
+
+ /**
+ * SmartIndent - does text/block level content effect indentation
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#SmartIndent
+ */
+
+ public void setSmartIndent(boolean SmartIndent) {
+ configuration.SmartIndent = SmartIndent;
+ }
+
+ public boolean getSmartIndent() {
+ return configuration.SmartIndent;
+ }
+
+ /**
+ * HideEndTags - suppress optional end tags
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#HideEndTags
+ */
+
+ public void setHideEndTags(boolean HideEndTags) {
+ configuration.HideEndTags = HideEndTags;
+ }
+
+ public boolean getHideEndTags() {
+ return configuration.HideEndTags;
+ }
+
+ /**
+ * XmlTags - treat input as XML
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#XmlTags
+ */
+
+ public void setXmlTags(boolean XmlTags) {
+ configuration.XmlTags = XmlTags;
+ }
+
+ public boolean getXmlTags() {
+ return configuration.XmlTags;
+ }
+
+ /**
+ * XmlOut - create output as XML
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#XmlOut
+ */
+
+ public void setXmlOut(boolean XmlOut) {
+ configuration.XmlOut = XmlOut;
+ }
+
+ public boolean getXmlOut() {
+ return configuration.XmlOut;
+ }
+
+ /**
+ * XHTML - output extensible HTML
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#xHTML
+ */
+
+ public void setXHTML(boolean xHTML) {
+ configuration.xHTML = xHTML;
+ }
+
+ public boolean getXHTML() {
+ return configuration.xHTML;
+ }
+
+ /**
+ * RawOut - avoid mapping values > 127 to entities
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#RawOut
+ */
+
+ public void setRawOut(boolean RawOut) {
+ configuration.RawOut = RawOut;
+ }
+
+ public boolean getRawOut() {
+ return configuration.RawOut;
+ }
+
+ /**
+ * UpperCaseTags - output tags in upper not lower case
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#UpperCaseTags
+ */
+
+ public void setUpperCaseTags(boolean UpperCaseTags) {
+ configuration.UpperCaseTags = UpperCaseTags;
+ }
+
+ public boolean getUpperCaseTags() {
+ return configuration.UpperCaseTags;
+ }
+
+ /**
+ * UpperCaseAttrs - output attributes in upper not lower case
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#UpperCaseAttrs
+ */
+
+ public void setUpperCaseAttrs(boolean UpperCaseAttrs) {
+ configuration.UpperCaseAttrs = UpperCaseAttrs;
+ }
+
+ public boolean getUpperCaseAttrs() {
+ return configuration.UpperCaseAttrs;
+ }
+
+ /**
+ * MakeClean - remove presentational clutter
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#MakeClean
+ */
+
+ public void setMakeClean(boolean MakeClean) {
+ configuration.MakeClean = MakeClean;
+ }
+
+ public boolean getMakeClean() {
+ return configuration.MakeClean;
+ }
+
+ /**
+ * BreakBeforeBR - o/p newline before <br> or not?
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#BreakBeforeBR
+ */
+
+ public void setBreakBeforeBR(boolean BreakBeforeBR) {
+ configuration.BreakBeforeBR = BreakBeforeBR;
+ }
+
+ public boolean getBreakBeforeBR() {
+ return configuration.BreakBeforeBR;
+ }
+
+ /**
+ * BurstSlides - create slides on each h2 element
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#BurstSlides
+ */
+
+ public void setBurstSlides(boolean BurstSlides) {
+ configuration.BurstSlides = BurstSlides;
+ }
+
+ public boolean getBurstSlides() {
+ return configuration.BurstSlides;
+ }
+
+ /**
+ * NumEntities - use numeric entities
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#NumEntities
+ */
+
+ public void setNumEntities(boolean NumEntities) {
+ configuration.NumEntities = NumEntities;
+ }
+
+ public boolean getNumEntities() {
+ return configuration.NumEntities;
+ }
+
+ /**
+ * QuoteMarks - output " marks as &quot;
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#QuoteMarks
+ */
+
+ public void setQuoteMarks(boolean QuoteMarks) {
+ configuration.QuoteMarks = QuoteMarks;
+ }
+
+ public boolean getQuoteMarks() {
+ return configuration.QuoteMarks;
+ }
+
+ /**
+ * QuoteNbsp - output non-breaking space as entity
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#QuoteNbsp
+ */
+
+ public void setQuoteNbsp(boolean QuoteNbsp) {
+ configuration.QuoteNbsp = QuoteNbsp;
+ }
+
+ public boolean getQuoteNbsp() {
+ return configuration.QuoteNbsp;
+ }
+
+ /**
+ * QuoteAmpersand - output naked ampersand as &
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#QuoteAmpersand
+ */
+
+ public void setQuoteAmpersand(boolean QuoteAmpersand) {
+ configuration.QuoteAmpersand = QuoteAmpersand;
+ }
+
+ public boolean getQuoteAmpersand() {
+ return configuration.QuoteAmpersand;
+ }
+
+ /**
+ * WrapAttVals - wrap within attribute values
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#WrapAttVals
+ */
+
+ public void setWrapAttVals(boolean WrapAttVals) {
+ configuration.WrapAttVals = WrapAttVals;
+ }
+
+ public boolean getWrapAttVals() {
+ return configuration.WrapAttVals;
+ }
+
+ /**
+ * WrapScriptlets - wrap within JavaScript string literals
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#WrapScriptlets
+ */
+
+ public void setWrapScriptlets(boolean WrapScriptlets) {
+ configuration.WrapScriptlets = WrapScriptlets;
+ }
+
+ public boolean getWrapScriptlets() {
+ return configuration.WrapScriptlets;
+ }
+
+ /**
+ * WrapSection - wrap within <![ ... ]> section tags
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#WrapSection
+ */
+
+ public void setWrapSection(boolean WrapSection) {
+ configuration.WrapSection = WrapSection;
+ }
+
+ public boolean getWrapSection() {
+ return configuration.WrapSection;
+ }
+
+ /**
+ * AltText - default text for alt attribute
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#altText
+ */
+
+ public void setAltText(String altText) {
+ configuration.altText = altText;
+ }
+
+ public String getAltText() {
+ return configuration.altText;
+ }
+
+ /**
+ * Slidestyle - style sheet for slides
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#slidestyle
+ */
+
+ public void setSlidestyle(String slidestyle) {
+ configuration.slidestyle = slidestyle;
+ }
+
+ public String getSlidestyle() {
+ return configuration.slidestyle;
+ }
+
+ /**
+ * XmlPi - add <?xml?> for XML docs
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#XmlPi
+ */
+
+ public void setXmlPi(boolean XmlPi) {
+ configuration.XmlPi = XmlPi;
+ }
+
+ public boolean getXmlPi() {
+ return configuration.XmlPi;
+ }
+
+ /**
+ * DropFontTags - discard presentation tags
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#DropFontTags
+ */
+
+ public void setDropFontTags(boolean DropFontTags) {
+ configuration.DropFontTags = DropFontTags;
+ }
+
+ public boolean getDropFontTags() {
+ return configuration.DropFontTags;
+ }
+
+ //gschadow patch start
+ /**
+ * Remove all those stupid pseudo-XML tags. If only XSLT had been
+ * around 5 years earlier, we wouldn't need to bother about all
+ * this ASP, JSP, PHP and other sh..!
+ *
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#DropFontTags
+ */
+ public void setDropPseudoXMLCrap(boolean DropPseudoXMLCrap) {
+ configuration.DropPseudoXMLCrap = DropPseudoXMLCrap;
+ }
+ //gschadow patch end
+
+ public boolean getDropPseudoXMLCrap() {
+ return configuration.DropPseudoXMLCrap;
+ }
+
+ /**
+ * DropEmptyParas - discard empty p elements
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#DropEmptyParas
+ */
+
+ public void setDropEmptyParas(boolean DropEmptyParas) {
+ configuration.DropEmptyParas = DropEmptyParas;
+ }
+
+ public boolean getDropEmptyParas() {
+ return configuration.DropEmptyParas;
+ }
+
+ /**
+ * FixComments - fix comments with adjacent hyphens
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#FixComments
+ */
+
+ public void setFixComments(boolean FixComments) {
+ configuration.FixComments = FixComments;
+ }
+
+ public boolean getFixComments() {
+ return configuration.FixComments;
+ }
+
+ /**
+ * WrapAsp - wrap within ASP pseudo elements
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#WrapAsp
+ */
+
+ public void setWrapAsp(boolean WrapAsp) {
+ configuration.WrapAsp = WrapAsp;
+ }
+
+ public boolean getWrapAsp() {
+ return configuration.WrapAsp;
+ }
+
+ /**
+ * WrapJste - wrap within JSTE pseudo elements
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#WrapJste
+ */
+
+ public void setWrapJste(boolean WrapJste) {
+ configuration.WrapJste = WrapJste;
+ }
+
+ public boolean getWrapJste() {
+ return configuration.WrapJste;
+ }
+
+ /**
+ * WrapPhp - wrap within PHP pseudo elements
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#WrapPhp
+ */
+
+ public void setWrapPhp(boolean WrapPhp) {
+ configuration.WrapPhp = WrapPhp;
+ }
+
+ public boolean getWrapPhp() {
+ return configuration.WrapPhp;
+ }
+
+ /**
+ * FixBackslash - fix URLs by replacing \ with /
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#FixBackslash
+ */
+
+ public void setFixBackslash(boolean FixBackslash) {
+ configuration.FixBackslash = FixBackslash;
+ }
+
+ public boolean getFixBackslash() {
+ return configuration.FixBackslash;
+ }
+
+ /**
+ * IndentAttributes - newline+indent before each attribute
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#IndentAttributes
+ */
+
+ public void setIndentAttributes(boolean IndentAttributes) {
+ configuration.IndentAttributes = IndentAttributes;
+ }
+
+ public boolean getIndentAttributes() {
+ return configuration.IndentAttributes;
+ }
+
+ /**
+ * DocType - user specified doctype
+ * omit | auto | strict | loose | <i>fpi</i>
+ * where the <i>fpi</i> is a string similar to
+ * "-//ACME//DTD HTML 3.14159//EN"
+ * Note: for <i>fpi</i> include the double-quotes in the string.
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#docTypeStr
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#docTypeMode
+ */
+
+ public void setDocType(String doctype) {
+ if (doctype != null)
+ configuration.docTypeStr = configuration.parseDocType(doctype, "doctype");
+ }
+
+ public String getDocType() {
+ String result = null;
+ switch (configuration.docTypeMode) {
+ case Configuration.DOCTYPE_OMIT :
+ result = "omit";
+ break;
+ case Configuration.DOCTYPE_AUTO :
+ result = "auto";
+ break;
+ case Configuration.DOCTYPE_STRICT :
+ result = "strict";
+ break;
+ case Configuration.DOCTYPE_LOOSE :
+ result = "loose";
+ break;
+ case Configuration.DOCTYPE_USER :
+ result = configuration.docTypeStr;
+ break;
+ }
+ return result;
+ }
+
+ /**
+ * LogicalEmphasis - replace i by em and b by strong
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#LogicalEmphasis
+ */
+
+ public void setLogicalEmphasis(boolean LogicalEmphasis) {
+ configuration.LogicalEmphasis = LogicalEmphasis;
+ }
+
+ public boolean getLogicalEmphasis() {
+ return configuration.LogicalEmphasis;
+ }
+
+ /**
+ * XmlPIs - if set to true PIs must end with ?>
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#XmlPIs
+ */
+
+ public void setXmlPIs(boolean XmlPIs) {
+ configuration.XmlPIs = XmlPIs;
+ }
+
+ public boolean getXmlPIs() {
+ return configuration.XmlPIs;
+ }
+
+ /**
+ * EncloseText - if true text at body is wrapped in <p>'s
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#EncloseBodyText
+ */
+
+ public void setEncloseText(boolean EncloseText) {
+ configuration.EncloseBodyText = EncloseText;
+ }
+
+ public boolean getEncloseText() {
+ return configuration.EncloseBodyText;
+ }
+
+ /**
+ * EncloseBlockText - if true text in blocks is wrapped in <p>'s
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#EncloseBlockText
+ */
+
+ public void setEncloseBlockText(boolean EncloseBlockText) {
+ configuration.EncloseBlockText = EncloseBlockText;
+ }
+
+ public boolean getEncloseBlockText() {
+ return configuration.EncloseBlockText;
+ }
+
+ /**
+ * KeepFileTimes - if true last modified time is preserved<br>
+ * <b>this is NOT supported at this time.</b>
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#KeepFileTimes
+ */
+
+ public void setKeepFileTimes(boolean KeepFileTimes) {
+ configuration.KeepFileTimes = KeepFileTimes;
+ }
+
+ public boolean getKeepFileTimes() {
+ return configuration.KeepFileTimes;
+ }
+
+ /**
+ * Word2000 - draconian cleaning for Word2000
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#Word2000
+ */
+
+ public void setWord2000(boolean Word2000) {
+ configuration.Word2000 = Word2000;
+ }
+
+ public boolean getWord2000() {
+ return configuration.Word2000;
+ }
+
+ /**
+ * TidyMark - add meta element indicating tidied doc
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#TidyMark
+ */
+
+ public void setTidyMark(boolean TidyMark) {
+ configuration.TidyMark = TidyMark;
+ }
+
+ public boolean getTidyMark() {
+ return configuration.TidyMark;
+ }
+
+ /**
+ * XmlSpace - if set to yes adds xml:space attr as needed
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#XmlSpace
+ */
+
+ public void setXmlSpace(boolean XmlSpace) {
+ configuration.XmlSpace = XmlSpace;
+ }
+
+ public boolean getXmlSpace() {
+ return configuration.XmlSpace;
+ }
+
+ /**
+ * Emacs - if true format error output for GNU Emacs
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#Emacs
+ */
+
+ public void setEmacs(boolean Emacs) {
+ configuration.Emacs = Emacs;
+ }
+
+ public boolean getEmacs() {
+ return configuration.Emacs;
+ }
+
+ /**
+ * LiteralAttribs - if true attributes may use newlines
+ * @see net.sourceforge.phpdt.tidy.w3c.Configuration#LiteralAttribs
+ */
+
+ public void setLiteralAttribs(boolean LiteralAttribs) {
+ configuration.LiteralAttribs = LiteralAttribs;
+ }
+
+ public boolean getLiteralAttribs() {
+ return configuration.LiteralAttribs;
+ }
+
+ /**
+ * InputStreamName - the name of the input stream (printed in the
+ * header information).
+ */
+ public void setInputStreamName(String name) {
+ if (name != null)
+ inputStreamName = name;
+ }
+
+ public String getInputStreamName() {
+ return inputStreamName;
+ }
+
+ /**
+ * Sets the configuration from a configuration file.
+ */
+
+ public void setConfigurationFromFile(String filename) {
+ configuration.parseFile(filename);
+ }
+
+ /**
+ * Sets the configuration from a properties object.
+ */
+
+ public void setConfigurationFromProps(Properties props) {
+ configuration.addProps(props);
+ }
+
+ /**
+ * first time initialization which should
+ * precede reading the command line
+ */
+
+ private void init() {
+ configuration = new Configuration();
+ if (configuration == null)
+ return;
+
+ AttributeTable at = AttributeTable.getDefaultAttributeTable();
+ if (at == null)
+ return;
+ TagTable tt = new TagTable();
+ if (tt == null)
+ return;
+ tt.setConfiguration(configuration);
+ configuration.tt = tt;
+ EntityTable et = EntityTable.getDefaultEntityTable();
+ if (et == null)
+ return;
+
+ /* Unnecessary - same initial values in Configuration
+ Configuration.XmlTags = false;
+ Configuration.XmlOut = false;
+ Configuration.HideEndTags = false;
+ Configuration.UpperCaseTags = false;
+ Configuration.MakeClean = false;
+ Configuration.writeback = false;
+ Configuration.OnlyErrors = false;
+ */
+
+ configuration.errfile = null;
+ stderr = new PrintWriter(System.err, true);
+ errout = stderr;
+ initialized = true;
+ }
+
+ /**
+ * Parses InputStream in and returns the root Node.
+ * If out is non-null, pretty prints to OutputStream out.
+ */
+
+ public Node parse(IFile iFile, InputStream in, OutputStream out) {
+ Node document = null;
+
+ try {
+ document = parse(iFile, in, null, out);
+
+ } catch (FileNotFoundException fnfe) {
+ } catch (IOException e) {
+ }
+
+ return document;
+ }
+
+ /**
+ * Internal routine that actually does the parsing. The caller
+ * can pass either an InputStream or file name. If both are passed,
+ * the file name is preferred.
+ */
+
+ private Node parse(IFile iFile, InputStream in, String file, OutputStream out)
+ throws FileNotFoundException, IOException {
+ Lexer lexer;
+ Node document = null;
+ Node doctype;
+ Out o = new OutImpl(); /* normal output stream */
+ PPrint pprint;
+
+ if (!initialized)
+ return null;
+
+ if (errout == null)
+ return null;
+
+ parseErrors = 0;
+ parseWarnings = 0;
+
+ /* ensure config is self-consistent */
+ configuration.adjust();
+
+ if (file != null) {
+ in = new FileInputStream(file);
+ inputStreamName = file;
+ } else if (in == null) {
+ in = System.in;
+ inputStreamName = "stdin";
+ }
+
+ if (in != null) {
+ lexer =
+ new Lexer(
+ iFile,
+ new StreamInImpl(in, configuration.CharEncoding, configuration.tabsize),
+ configuration);
+ lexer.errout = errout;
+
+ /*
+ store pointer to lexer in input stream
+ to allow character encoding errors to be
+ reported
+ */
+ lexer.in.lexer = lexer;
+
+ /* Tidy doesn't alter the doctype for generic XML docs */
+ if (configuration.XmlTags)
+ document = ParserImpl.parseXMLDocument(lexer);
+ else {
+ lexer.warnings = 0;
+ if (!configuration.Quiet)
+ Report.helloMessage(errout, Report.RELEASE_DATE, inputStreamName);
+
+ document = ParserImpl.parseDocument(lexer);
+
+ if (!document.checkNodeIntegrity()) {
+ Report.badTree(errout);
+ return null;
+ }
+
+ Clean cleaner = new Clean(configuration.tt);
+
+ /* simplifies <b><b> ... </b> ...</b> etc. */
+ cleaner.nestedEmphasis(document);
+
+ /* cleans up <dir>indented text</dir> etc. */
+ cleaner.list2BQ(document);
+ cleaner.bQ2Div(document);
+
+ /* replaces i by em and b by strong */
+ if (configuration.LogicalEmphasis)
+ cleaner.emFromI(document);
+
+ if (configuration.Word2000 && cleaner.isWord2000(document, configuration.tt)) {
+ /* prune Word2000's <![if ...]> ... <![endif]> */
+ cleaner.dropSections(lexer, document);
+
+ /* drop style & class attributes and empty p, span elements */
+ cleaner.cleanWord2000(lexer, document);
+ }
+
+ //gschadow patch start
+ if (configuration.DropPseudoXMLCrap) {
+ cleaner.dropPseudoXMLCrap(lexer, document);
+ }
+ //gschadow patch end
+
+ /* replaces presentational markup by style rules */
+ if (configuration.MakeClean || configuration.DropFontTags)
+ cleaner.cleanTree(lexer, document);
+
+ if (!document.checkNodeIntegrity()) {
+ Report.badTree(errout);
+ return null;
+ }
+ doctype = document.findDocType();
+ if (document.content != null) {
+ if (configuration.xHTML)
+ lexer.setXHTMLDocType(document);
+ else
+ lexer.fixDocType(document);
+
+ if (configuration.TidyMark)
+ lexer.addGenerator(document);
+ }
+
+ /* ensure presence of initial <?XML version="1.0"?> */
+ if (configuration.XmlOut && configuration.XmlPi)
+ lexer.fixXMLPI(document);
+
+ if (!configuration.Quiet && document.content != null) {
+ Report.reportVersion(errout, lexer, inputStreamName, doctype);
+ Report.reportNumWarnings(errout, lexer);
+ }
+ }
+
+ parseWarnings = lexer.warnings;
+ parseErrors = lexer.errors;
+
+ // Try to close the InputStream but only if if we created it.
+
+ if ((file != null) && (in != System.in)) {
+ try {
+ in.close();
+ } catch (IOException e) {
+ }
+ }
+
+ if (lexer.errors > 0)
+ Report.needsAuthorIntervention(errout);
+
+ o.state = StreamIn.FSM_ASCII;
+ o.encoding = configuration.CharEncoding;
+
+ if (!configuration.OnlyErrors && lexer.errors == 0) {
+ if (configuration.BurstSlides) {
+ Node body;
+
+ body = null;
+ /*
+ remove doctype to avoid potential clash with
+ markup introduced when bursting into slides
+ */
+ /* discard the document type */
+ doctype = document.findDocType();
+
+ if (doctype != null)
+ Node.discardElement(doctype);
+
+ /* slides use transitional features */
+ lexer.versions |= Dict.VERS_HTML40_LOOSE;
+
+ /* and patch up doctype to match */
+ if (configuration.xHTML)
+ lexer.setXHTMLDocType(document);
+ else
+ lexer.fixDocType(document);
+
+ /* find the body element which may be implicit */
+ body = document.findBody(configuration.tt);
+
+ if (body != null) {
+ pprint = new PPrint(configuration);
+ Report.reportNumberOfSlides(errout, pprint.countSlides(body));
+ pprint.createSlides(lexer, document);
+ } else
+ Report.missingBody(errout);
+ } else if (configuration.writeback && (file != null)) {
+ try {
+ pprint = new PPrint(configuration);
+ o.out = new FileOutputStream(file);
+
+ if (configuration.XmlTags)
+ pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+ else
+ pprint.printTree(o, (short) 0, 0, lexer, document);
+
+ pprint.flushLine(o, 0);
+ o.out.close();
+ } catch (IOException e) {
+ errout.println(file + e.toString());
+ }
+ } else if (out != null) {
+ pprint = new PPrint(configuration);
+ o.out = out;
+
+ if (configuration.XmlTags)
+ pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+ else
+ pprint.printTree(o, (short) 0, 0, lexer, document);
+
+ pprint.flushLine(o, 0);
+ }
+
+ }
+
+ Report.errorSummary(lexer);
+ }
+ return document;
+ }
+
+ /**
+ * Parses InputStream in and returns a DOM Document node.
+ * If out is non-null, pretty prints to OutputStream out.
+ */
+
+ public org.w3c.dom.Document parseDOM(IFile file, InputStream in, OutputStream out) {
+ Node document = parse(file, in, out);
+ if (document != null)
+ return (org.w3c.dom.Document) document.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * Creates an empty DOM Document.
+ */
+
+ public static org.w3c.dom.Document createEmptyDocument() {
+ Node document = new Node(Node.RootNode, new byte[0], 0, 0);
+ Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html", new TagTable());
+ if (document != null && node != null) {
+ Node.insertNodeAtStart(document, node);
+ return (org.w3c.dom.Document) document.getAdapter();
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Pretty-prints a DOM Document.
+ */
+
+ public void pprint(org.w3c.dom.Document doc, OutputStream out) {
+ Out o = new OutImpl();
+ PPrint pprint;
+ Node document;
+
+ if (!(doc instanceof DOMDocumentImpl)) {
+ return;
+ }
+ document = ((DOMDocumentImpl) doc).adaptee;
+
+ o.state = StreamIn.FSM_ASCII;
+ o.encoding = configuration.CharEncoding;
+
+ if (out != null) {
+ pprint = new PPrint(configuration);
+ o.out = out;
+
+ if (configuration.XmlTags)
+ pprint.printXMLTree(o, (short) 0, 0, null, document);
+ else
+ pprint.printTree(o, (short) 0, 0, null, document);
+
+ pprint.flushLine(o, 0);
+ }
+ }
+
+ /**
+ * Command line interface to parser and pretty printer.
+ */
+
+ public static void main(String[] argv) {
+ int totalerrors = 0;
+ int totalwarnings = 0;
+ String file;
+ InputStream in;
+ String prog = "Tidy";
+ Node document;
+ Node doctype;
+ Lexer lexer;
+ String s;
+ Out out = new OutImpl(); /* normal output stream */
+ PPrint pprint;
+ int argc = argv.length + 1;
+ int argIndex = 0;
+ Tidy tidy;
+ Configuration configuration;
+ String arg;
+ String current_errorfile = "stderr";
+
+ tidy = new Tidy();
+ configuration = tidy.getConfiguration();
+
+ /* read command line */
+
+ while (argc > 0) {
+ if (argc > 1 && argv[argIndex].startsWith("-")) {
+ /* support -foo and --foo */
+ arg = argv[argIndex].substring(1);
+
+ if (arg.length() > 0 && arg.charAt(0) == '-')
+ arg = arg.substring(1);
+
+ if (arg.equals("xml"))
+ configuration.XmlTags = true;
+ else if (arg.equals("asxml") || arg.equals("asxhtml"))
+ configuration.xHTML = true;
+ else if (arg.equals("indent")) {
+ configuration.IndentContent = true;
+ configuration.SmartIndent = true;
+ } else if (arg.equals("omit"))
+ configuration.HideEndTags = true;
+ else if (arg.equals("upper"))
+ configuration.UpperCaseTags = true;
+ else if (arg.equals("clean"))
+ configuration.MakeClean = true;
+ else if (arg.equals("raw"))
+ configuration.CharEncoding = Configuration.RAW;
+ else if (arg.equals("ascii"))
+ configuration.CharEncoding = Configuration.ASCII;
+ else if (arg.equals("latin1"))
+ configuration.CharEncoding = Configuration.LATIN1;
+ else if (arg.equals("utf8"))
+ configuration.CharEncoding = Configuration.UTF8;
+ else if (arg.equals("iso2022"))
+ configuration.CharEncoding = Configuration.ISO2022;
+ else if (arg.equals("mac"))
+ configuration.CharEncoding = Configuration.MACROMAN;
+ else if (arg.equals("numeric"))
+ configuration.NumEntities = true;
+ else if (arg.equals("modify"))
+ configuration.writeback = true;
+ else if (arg.equals("change")) /* obsolete */
+ configuration.writeback = true;
+ else if (arg.equals("update")) /* obsolete */
+ configuration.writeback = true;
+ else if (arg.equals("errors"))
+ configuration.OnlyErrors = true;
+ else if (arg.equals("quiet"))
+ configuration.Quiet = true;
+ else if (arg.equals("slides"))
+ configuration.BurstSlides = true;
+ else if (arg.equals("help") || argv[argIndex].charAt(1) == '?' || argv[argIndex].charAt(1) == 'h') {
+ Report.helpText(new PrintWriter(System.out, true), prog);
+ System.exit(1);
+ } else if (arg.equals("config")) {
+ if (argc >= 3) {
+ configuration.parseFile(argv[argIndex + 1]);
+ --argc;
+ ++argIndex;
+ }
+ } else if (
+ argv[argIndex].equals("-file") || argv[argIndex].equals("--file") || argv[argIndex].equals("-f")) {
+ if (argc >= 3) {
+ configuration.errfile = argv[argIndex + 1];
+ --argc;
+ ++argIndex;
+ }
+ } else if (
+ argv[argIndex].equals("-wrap") || argv[argIndex].equals("--wrap") || argv[argIndex].equals("-w")) {
+ if (argc >= 3) {
+ configuration.wraplen = Integer.parseInt(argv[argIndex + 1]);
+ --argc;
+ ++argIndex;
+ }
+ } else if (
+ argv[argIndex].equals("-version")
+ || argv[argIndex].equals("--version")
+ || argv[argIndex].equals("-v")) {
+ Report.showVersion(tidy.getErrout());
+ System.exit(0);
+ } else {
+ s = argv[argIndex];
+
+ for (int i = 1; i < s.length(); i++) {
+ if (s.charAt(i) == 'i') {
+ configuration.IndentContent = true;
+ configuration.SmartIndent = true;
+ } else if (s.charAt(i) == 'o')
+ configuration.HideEndTags = true;
+ else if (s.charAt(i) == 'u')
+ configuration.UpperCaseTags = true;
+ else if (s.charAt(i) == 'c')
+ configuration.MakeClean = true;
+ else if (s.charAt(i) == 'n')
+ configuration.NumEntities = true;
+ else if (s.charAt(i) == 'm')
+ configuration.writeback = true;
+ else if (s.charAt(i) == 'e')
+ configuration.OnlyErrors = true;
+ else if (s.charAt(i) == 'q')
+ configuration.Quiet = true;
+ else
+ Report.unknownOption(tidy.getErrout(), s.charAt(i));
+ }
+ }
+
+ --argc;
+ ++argIndex;
+ continue;
+ }
+
+ /* ensure config is self-consistent */
+ configuration.adjust();
+
+ /* user specified error file */
+ if (configuration.errfile != null) {
+ /* is it same as the currently opened file? */
+ if (!configuration.errfile.equals(current_errorfile)) {
+ /* no so close previous error file */
+
+ if (tidy.getErrout() != tidy.getStderr())
+ tidy.getErrout().close();
+
+ /* and try to open the new error file */
+ try {
+ tidy.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true));
+ current_errorfile = configuration.errfile;
+ } catch (IOException e) {
+ /* can't be opened so fall back to stderr */
+ current_errorfile = "stderr";
+ tidy.setErrout(tidy.getStderr());
+ }
+ }
+ }
+
+ if (argc > 1) {
+ file = argv[argIndex];
+ } else {
+ file = "stdin";
+ }
+
+ try {
+ document = tidy.parse(null, null, file, System.out);
+ totalwarnings += tidy.parseWarnings;
+ totalerrors += tidy.parseErrors;
+ } catch (FileNotFoundException fnfe) {
+ Report.unknownFile(tidy.getErrout(), prog, file);
+ } catch (IOException ioe) {
+ Report.unknownFile(tidy.getErrout(), prog, file);
+ }
+
+ --argc;
+ ++argIndex;
+
+ if (argc <= 1)
+ break;
+ }
+
+ if (totalerrors + totalwarnings > 0)
+ Report.generalInfo(tidy.getErrout());
+
+ if (tidy.getErrout() != tidy.getStderr())
+ tidy.getErrout().close();
+
+ /* return status can be used by scripts */
+
+ if (totalerrors > 0)
+ System.exit(2);
+
+ if (totalwarnings > 0)
+ System.exit(1);
+
+ /* 0 signifies all is ok */
+ System.exit(0);
+ }
+}
--- /dev/null
+/*
+ * @(#)TidyBeanInfo.java 1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy.w3c;
+
+/**
+ *
+ * BeanInfo for Tidy
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author Dave Raggett <dsr@w3.org>
+ * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.awt.Image;
+import java.beans.SimpleBeanInfo;
+
+public class TidyBeanInfo extends SimpleBeanInfo {
+
+ public Image getIcon(int kind)
+ {
+ return loadImage("tidy.gif");
+ }
+
+}
--- /dev/null
+error=Error:
+warning=Warning:
+line_column=line {0,number} column {1,number} -
+emacs_format={0}:{1,number}:{2,number}:
+illegal_char=Warning: replacing illegal character code {0,number}
+missing_semicolon=Warning: entity "{0}" doesn''t end in '';''
+unknown_entity=Warning: unescaped & or unknown entity "{0}"
+unescaped_ampersand=Warning: unescaped & which should be written as &
+unknown_attribute=Warning: unknown attribute "{0}"
+missing_attribute=\ lacks "{0}" attribute
+missing_attr_value=\ attribute "{0}" lacks value
+missing_imagemap=\ should use client-side image map
+bad_attribute_value=\ unknown attribute value "{0}"
+xml_attribute_value=\ has XML attribute "{0}"
+unexpected_gt=\ missing '>' for end of tag
+unexpected_quotemark=\ unexpected or duplicate quote mark
+repeated_attribute=\ repeated attribute
+nested_emphasis=\ nested emphasis
+coerce_to_endtag=\ <{0}> is probably intended as </{0}>
+proprietary_attr_value=\ proprietary attribute value "{0}"
+missing_endtag_for=Warning: missing </{0}>
+missing_endtag_before=Warning: missing </{0}> before
+discarding_unexpected=Warning: discarding unexpected
+forced_end_anchor=Warning: <a> is probably intended as </a>
+non_matching_endtag_1=Warning: replacing unexpected
+non_matching_endtag_2=\ by </{0}>
+tag_not_allowed_in=\ isn''t allowed in <{0}> elements
+doctype_after_tags=Warning: <!DOCTYPE> isn't allowed after elements
+missing_starttag=Warning: missing <{0}>
+unexpected_endtag=Warning: unexpected </{0}>
+unexpected_endtag_suffix=\ in <{0}>
+too_many_elements=Warning: too many {0} elements
+too_many_elements_suffix=\ in <{0}>
+using_br_inplace_of=Warning: using <br> in place of
+inserting_tag=Warning: inserting implicit <{0}>
+cant_be_nested=\ can't be nested
+proprietary_element=\ is not approved by W3C
+obsolete_element=Warning: replacing obsolete element
+replacing_element=Warning: replacing element
+by=\ by
+trim_empty_element=Warning: trimming empty
+missing_title_element=Warning: inserting missing 'title' element
+illegal_nesting=\ shouldn't be nested
+noframes_content=\ not inside 'noframes' element
+inconsistent_version=Warning: html doctype doesn't match content
+content_after_body=Warning: content occurs after end of body
+malformed_comment=Warning: adjacent hyphens within comment
+bad_comment_chars=Warning: expecting -- or >
+bad_xml_comment=Warning: XML comments can't contain --
+bad_cdata_content=Warning: '<' + '/' + letter not allowed here
+inconsistent_namespace=Warning: html namespace doesn't match content
+suspected_missing_quote=Error: missing quotemark for attribute value
+duplicate_frameset=Error: repeated FRAMESET element
+unknown_element=\ is not recognized!
+dtype_not_upper_case=Warning: SYSTEM, PUBLIC, W3C, DTD, EN must be upper case
+unexpected_end_of_file=Warning: end of file while parsing attributes
+malformed_doctype=Warning: expected "html PUBLIC" or "html SYSTEM"
+id_name_mismatch=\ id and name attribute value mismatch
+
+badchars_summary=Characters codes for the Microsoft Windows fonts in the range\n\
+128 - 159 may not be recognized on other platforms. You are\n\
+instead recommended to use named entities, e.g. ™ rather\n\
+than Windows character code 153 (0x2122 in Unicode). Note that\n\
+as of February 1998 few browsers support the new entities."\n\n
+
+badform_summary=You may need to move one or both of the <form> and </form>\n\
+tags. HTML elements should be properly nested and form elements\n\
+are no exception. For instance you should not place the <form>\n\
+in one table cell and the </form> in another. If the <form> is\n\
+placed before a table, the </form> cannot be placed inside the\n\
+table! Note that one form can't be nested inside another!\n\n
+
+badaccess_missing_summary=The table summary attribute should be used to describe\n\
+the table structure. It is very helpful for people using\n\
+non-visual browsers. The scope and headers attributes for\n\
+table cells are useful for specifying which headers apply\n\
+to each table cell, enabling non-visual browsers to provide\n\
+a meaningful context for each cell.\n\n
+
+badaccess_missing_image_alt=The alt attribute should be used to give a short description\n\
+of an image; longer descriptions should be given with the\n\
+longdesc attribute which takes a URL linked to the description.\n\
+These measures are needed for people using non-graphical browsers.\n\n
+
+badaccess_missing_image_map=Use client-side image maps in preference to server-side image\n\
+maps as the latter are inaccessible to people using non-\n\
+graphical browsers. In addition, client-side maps are easier\n\
+to set up and provide immediate feedback to users.\n\n
+
+badaccess_missing_link_alt=For hypertext links defined using a client-side image map, you\n\
+need to use the alt attribute to provide a textual description\n\
+of the link for people using non-graphical browsers.\n\n
+
+badaccess_frames=Pages designed using frames presents problems for\n\
+people who are either blind or using a browser that\n\
+doesn't support frames. A frames-based page should always\n\
+include an alternative layout inside a NOFRAMES element.\n\n
+
+badaccess_summary=For further advice on how to make your pages accessible\n\
+see "{0}". You may also want to try\n\
+"http://www.cast.org/bobby/" which is a free Web-based\n\
+service for checking URLs for accessibility.\n\n
+
+badlayout_using_layer=The Cascading Style Sheets (CSS) Positioning mechanism\n\
+is recommended in preference to the proprietary <LAYER>\n\
+element due to limited vendor support for LAYER.\n\n
+
+badlayout_using_spacer=You are recommended to use CSS for controlling white\n\
+space (e.g. for indentation, margins and line spacing).\n\
+The proprietary <SPACER> element has limited vendor support.\n\n
+
+badlayout_using_font=You are recommended to use CSS to specify the font and\n\
+properties such as its size and color. This will reduce\n\
+the size of HTML files and make them easier maintain\n\
+compared with using <FONT> elements.\n\n
+
+badlayout_using_nobr=You are recommended to use CSS to control line wrapping.\n\
+Use \"white-space: nowrap\" to inhibit wrapping in place\n\
+of inserting <NOBR>...</NOBR> into the markup.\n\n
+
+badlayout_using_body=You are recommended to use CSS to specify page and link colors\n\n
+
+unrecognized_option=unrecognized option -{0} use -help to list options
+unknown_file={0}: can''t open file "{1}"
+unknown_option=Warning - unknown option: {0}
+bad_argument=Warning - missing or malformed argument for option: {0}
+
+needs_author_intervention=This document has errors that must be fixed before\n\
+using HTML Tidy to generate a tidied up version.\n\n
+
+missing_body=Can't create slides - document is missing a body element.
+slides_found={0,number} Slides found
+
+general_info=HTML & CSS specifications are available from http://www.w3.org/\n\
+To learn more about Tidy see http://www.w3.org/People/Raggett/tidy/\n\
+Please send bug reports to Dave Raggett care of <html-tidy@w3.org>\n\
+Lobby your company to join W3C, see http://www.w3.org/Consortium\n
+
+hello_message=\nTidy (vers {0}) Parsing "{1}"
+
+report_version=\n{0}: Document content looks like {1}
+
+doctype_given=\n{0}: Doctype given is "
+
+num_warnings={0,number} warnings/errors were found!\n
+no_warnings=no warnings or errors were found\n
+
+help_text={0}: file1 file2 ...\n\
+Utility to clean up & pretty print html files\n\
+see http://www.w3.org/People/Raggett/tidy/\n\
+options for tidy released on {1}\n\
+\n\
+Processing directives\n\
+--------------------\n\
+ -indent or -i indent element content\n\
+ -omit or -o omit optional endtags\n\
+ -wrap 72 wrap text at column 72 (default is 68)\n\
+ -upper or -u force tags to upper case (default is lower)\n\
+ -clean or -c replace font, nobr & center tags by CSS\n\
+ -numeric or -n output numeric rather than named entities\n\
+ -errors or -e only show errors\n\
+ -quiet or -q suppress nonessential output\n\
+ -xml use this when input is wellformed xml\n\
+ -asxml to convert html to wellformed xml\n\
+ -slides to burst into slides on h2 elements\n\
+\n\
+Character encodings\n\
+------------------\n\
+ -raw leave chars > 128 unchanged upon output\n\
+ -ascii use ASCII for output, Latin-1 for input\n\
+ -latin1 use Latin-1 for both input and output\n\
+ -iso2022 use ISO2022 for both input and output\n\
+ -utf8 use UTF-8 for both input and output\n\
+ -mac use the Apple MacRoman character set\n\
+\n\
+File manipulation\n\
+---------------\n\
+ -config <file> set options from config file\n\
+ -f <file> write errors to named <file>\n\
+ -modify or -m to modify original files\n\
+\n\
+Miscellaneous\n\
+------------\n\
+ -version or -v show version\n\
+ -help or -h list command line options\n\
+You can also use --blah for any config file option blah\n\
+\n\
+Input/Output default to stdin/stdout respectively\n\
+Single letter options apart from -f may be combined\n\
+as in: tidy -f errs.txt -imu foo.html\n\
+For further info on HTML see http://www.w3.org/MarkUp\n\
+\n
+
+bad_tree=\nPanic - tree has lost its integrity\n