2 * Copyright (c) 2004 Christopher Lenz and others
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * Christopher Lenz - initial API
11 * $Id: XMLParser.java,v 1.1 2004-09-02 18:26:55 jsurfer Exp $
14 package net.sourceforge.phpeclipse.xml.core.internal.parser;
16 import java.io.IOException;
17 import java.io.StringReader;
19 import javax.xml.parsers.ParserConfigurationException;
20 import javax.xml.parsers.SAXParser;
21 import javax.xml.parsers.SAXParserFactory;
23 import net.sourceforge.phpeclipse.xml.core.internal.model.XMLDocument;
24 import net.sourceforge.phpeclipse.xml.core.internal.model.XMLElement;
25 import net.sourceforge.phpeclipse.xml.core.model.IXMLDocument;
26 import net.sourceforge.phpeclipse.xml.core.model.IXMLElement;
27 import net.sourceforge.phpeclipse.xml.core.parser.IProblem;
28 import net.sourceforge.phpeclipse.xml.core.parser.IProblemCollector;
29 import net.sourceforge.phpeclipse.xml.core.parser.IXMLParser;
31 import org.eclipse.jface.text.BadLocationException;
32 import org.eclipse.jface.text.IDocument;
33 import org.eclipse.jface.text.IRegion;
34 import org.eclipse.jface.text.Region;
35 import org.xml.sax.Attributes;
36 import org.xml.sax.InputSource;
37 import org.xml.sax.Locator;
38 import org.xml.sax.SAXException;
39 import org.xml.sax.SAXParseException;
40 import org.xml.sax.helpers.DefaultHandler;
44 * SAX-based default implementation of the {@link IXMLParser} interface.
46 * TODO This implementation doesn't do error recovery, as SAX doesn't allow it.
47 * Maybe we partition the document and parse individual fragments so that
48 * errors can be isolated to their source
50 public class XMLParser implements IXMLParser {
52 * SAX content handler that builds a model of the XML document.
54 class ModelBuilder extends DefaultHandler {
56 * The document model being built.
61 * The current top element. That is the element that has been most
62 * recently opened by a start tag.
64 private XMLElement top;
67 * The SAX locator provided by the parser, used to calculate the source
68 * regions covered by elements.
70 private Locator locator;
73 * Limits parsing time.
78 * @see org.xml.sax.ContentHandler#startDocument()
80 public void startDocument() throws SAXException {
81 timeout = System.currentTimeMillis() + 2000;
82 document = new XMLDocument(source, systemId);
86 * @see org.xml.sax.ContentHandler#startElement(String, String, String, Attributes)
88 public void startElement(
89 String namespaceURI, String localName, String qName, Attributes atts
90 ) throws SAXException {
91 if (System.currentTimeMillis() > timeout) {
92 throw new SAXException("timeout");
95 XMLElement newTop = new XMLElement(source);
96 newTop.setLocalName(localName);
97 newTop.setNamespaceURI(namespaceURI);
100 int colonIndex = qName.indexOf(':');
101 if (colonIndex >= 0) {
102 newTop.setPrefix(qName.substring(0, colonIndex));
106 int offset = computeOffset(newTop,
107 locator.getLineNumber(),
108 locator.getColumnNumber());
111 newTop.setSourceRegion(offset, 0);
114 newTop.setParent(top);
120 * @see org.xml.sax.ContentHandler#endElement(String, String, String)
122 public void endElement(
123 String namespaceURI, String localName, String qName
124 ) throws SAXException {
125 int length = computeLength(top,
126 locator.getLineNumber(),
127 locator.getColumnNumber());
130 top.setSourceRegion(top.getSourceRegion().getOffset(), length);
133 XMLElement previousTop = (XMLElement) top.getParent();
134 if (previousTop != null) {
135 previousTop.addChild(top);
137 // this is the root element
138 document.setRoot(top);
144 * @see org.xml.sax.ErrorHandler#error(SAXParseException)
146 public void error(SAXParseException e) throws SAXException {
147 if (problemCollector != null) {
148 problemCollector.addProblem(createProblem(e, true));
153 * @see org.xml.sax.ErrorHandler#fatalError(SAXParseException)
155 public void fatalError(SAXParseException e) throws SAXException {
156 if (problemCollector != null) {
157 problemCollector.addProblem(createProblem(e, true));
162 * @see org.xml.sax.ErrorHandler#warning(SAXParseException)
164 public void warning(SAXParseException e) throws SAXException {
165 if (problemCollector != null) {
166 problemCollector.addProblem(createProblem(e, false));
171 * @see org.xml.sax.ContentHandler#setDocumentLocator(Locator)
173 public void setDocumentLocator(Locator locator) {
174 this.locator = locator;
178 * Creates a <tt>IProblem</tt> instance based on the information
179 * accessible from the parse exception. This method estimates the exact
180 * location of the error based on the line and column numbers provided
181 * with the exception.
183 * TODO Limit the location to the current top element
185 * @param e the SAX parse exception
186 * @param error whether the problem is an error or a warning
187 * @return the created problem object
189 private IProblem createProblem(SAXParseException e, boolean error) {
190 int line = e.getLineNumber();
191 int column = e.getColumnNumber();
199 int offset = 0, length = 1;
201 offset = getOffset(line, column);
202 length = getLastCharColumn(line) - column;
203 } catch (BadLocationException ble) {
204 ble.printStackTrace();
207 return new DefaultProblem(e.getLocalizedMessage(),
208 offset, offset + length, line, error);
212 // Instance Variables ------------------------------------------------------
215 * The associated problem collector.
217 IProblemCollector problemCollector;
220 * The document containing the source that should be parsed.
225 * The system ID of the document to parse, if available. This is necessary
226 * to resolve relative external entities. Can be <tt>null</tt>.
230 // IXMLParser Implementation -----------------------------------------------
233 * @see IXMLParser#parse()
235 public IXMLDocument parse() {
236 SAXParserFactory factory = SAXParserFactory.newInstance();
237 factory.setNamespaceAware(true);
238 factory.setValidating(false);
241 SAXParser parser = factory.newSAXParser();
243 InputSource in = new InputSource(new StringReader(source.get()));
244 if (systemId != null) {
245 in.setSystemId(systemId);
248 ModelBuilder builder = new ModelBuilder();
249 parser.parse(in, builder);
250 return builder.document;
251 } catch (ParserConfigurationException e) {
252 // TODO Throw CoreException or at least log the error
253 } catch (SAXParseException e) {
254 // Already handled by the ModelBuilder
255 } catch (SAXException e) {
256 // SAX exceptions that are not parsing errors
257 // TODO Throw CoreException or at least log the error
258 } catch (IOException e) {
259 // TODO Throw CoreException or at least log the error
266 * @see IProblemReporter#setProblemCollector(IProblemCollector)
268 public void setProblemCollector(IProblemCollector problemCollector) {
269 this.problemCollector = problemCollector;
273 * @see IXMLParser#setSource(IDocument)
275 public void setSource(IDocument source) {
276 this.source = source;
280 * @see IXMLParser#setSystemId(String)
282 public void setSystemId(String systemId) {
283 this.systemId = systemId;
286 // Private Methods ---------------------------------------------------------
289 * Computes the exact length of the given element by searching for the
290 * offset of the last character of the end tag.
292 int computeLength(XMLElement element, int line, int column) {
296 int lineOffset = source.getLineOffset(line);
297 String endTag = getEndTag(element);
299 IRegion result = findStringForward(lineOffset, endTag);
300 if (result != null) {
301 offset = result.getOffset() + endTag.length();
303 result = findStringForward(lineOffset, "/>"); //$NON-NLS-1$
304 if (result == null) {
307 offset = result.getOffset() + 2;
311 if ((offset < 0) || (getLine(offset) != line)) {
317 offset = getOffset(line, column);
320 return offset - element.getSourceRegion().getOffset();
321 } catch (BadLocationException e) {
322 // ignore as the parser may be out of sync with the document during
330 * Computes the offset at which the specified elements start tag begins in
333 int computeOffset(XMLElement element, int line, int column) {
336 String prefix = "<"; //$NON-NLS-1$
338 offset = getOffset(line, 0);
339 int lastCharColumn = getLastCharColumn(line);
340 String lineText = source
341 .get(source.getLineOffset(line - 1), lastCharColumn);
342 String startTag = getStartTag(element);
344 int lastIndex = lineText.indexOf(startTag);
345 if (lastIndex > -1) {
346 offset += lastIndex + 1;
348 offset = getOffset(line, lastCharColumn);
349 IRegion result = findStringBackward(offset - 1, prefix);
350 offset = result.getOffset();
353 offset = getOffset(line, column);
354 IRegion result = findStringForward(offset - 1, prefix);
355 offset = result.getOffset();
359 } catch (BadLocationException e) {
360 // ignore as the parser may be out of sync with the document during
367 private IRegion findStringBackward(
368 int startOffset, String string
369 ) throws BadLocationException {
370 int offset = startOffset;
371 int length = string.length();
374 while (offset >= 0) {
375 match = source.get(offset, length);
376 if (match.equals(string)) {
377 return new Region(offset, length);
385 private IRegion findStringForward(
386 int startOffset, String string
387 ) throws BadLocationException {
388 int offset = startOffset;
389 int length = string.length();
392 int sourceLength = source.getLength();
393 while (offset + length <= sourceLength) {
394 match = source.get(offset, length);
395 if (match.equals(string)) {
396 return new Region(offset, length);
405 * Given an XML element, this method reconstructs the corresponding end tag
406 * of the element, including the namespace prefix if there was one.
408 * @param element the XML element for which the end tag should be contructed
409 * @return the end tag as string
411 private String getEndTag(IXMLElement element) {
412 StringBuffer buf = new StringBuffer("</"); //$NON-NLS-1$
413 if (element.getPrefix() != null) {
414 buf.append(element.getPrefix());
417 buf.append(element.getLocalName());
420 return buf.toString();
424 * Reconstructs and returns the start tag corresponding to the given XML
425 * element, excluding any attribute specifications or the closing
426 * <tt>></tt> character.
428 * @param element the XML element for which the start tag should be
430 * @return the start tag as string, excluding everything after the tag name
433 private String getStartTag(IXMLElement element) {
434 StringBuffer buf = new StringBuffer("<"); //$NON-NLS-1$
435 if (element.getPrefix() != null) {
436 buf.append(element.getPrefix());
439 buf.append(element.getLocalName());
441 return buf.toString();
444 int getOffset(int line, int column) throws BadLocationException {
445 return source.getLineOffset(line - 1) + column - 1;
448 private int getLine(int offset) throws BadLocationException {
449 return source.getLineOfOffset(offset) + 1;
452 int getLastCharColumn(int line) throws BadLocationException {
453 String lineDelimiter = source.getLineDelimiter(line - 1);
454 int lineDelimiterLength = (lineDelimiter != null)
455 ? lineDelimiter.length() : 0;
457 return source.getLineLength(line - 1) - lineDelimiterLength;