intial source from http://www.sf.net/projects/wdte
[phpeclipse.git] / net.sourceforge.phpeclipse.xml.core / src / net / sourceforge / phpeclipse / xml / core / internal / parser / XMLParser.java
1 /*
2  * Copyright (c) 2004 Christopher Lenz and others
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     Christopher Lenz - initial API
10  * 
11  * $Id: XMLParser.java,v 1.1 2004-09-02 18:26:55 jsurfer Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.core.internal.parser;
15
16 import java.io.IOException;
17 import java.io.StringReader;
18
19 import javax.xml.parsers.ParserConfigurationException;
20 import javax.xml.parsers.SAXParser;
21 import javax.xml.parsers.SAXParserFactory;
22
23 import net.sourceforge.phpeclipse.xml.core.internal.model.XMLDocument;
24 import net.sourceforge.phpeclipse.xml.core.internal.model.XMLElement;
25 import net.sourceforge.phpeclipse.xml.core.model.IXMLDocument;
26 import net.sourceforge.phpeclipse.xml.core.model.IXMLElement;
27 import net.sourceforge.phpeclipse.xml.core.parser.IProblem;
28 import net.sourceforge.phpeclipse.xml.core.parser.IProblemCollector;
29 import net.sourceforge.phpeclipse.xml.core.parser.IXMLParser;
30
31 import org.eclipse.jface.text.BadLocationException;
32 import org.eclipse.jface.text.IDocument;
33 import org.eclipse.jface.text.IRegion;
34 import org.eclipse.jface.text.Region;
35 import org.xml.sax.Attributes;
36 import org.xml.sax.InputSource;
37 import org.xml.sax.Locator;
38 import org.xml.sax.SAXException;
39 import org.xml.sax.SAXParseException;
40 import org.xml.sax.helpers.DefaultHandler;
41
42
43 /**
44  * SAX-based default implementation of the {@link IXMLParser} interface.
45  * 
46  * TODO This implementation doesn't do error recovery, as SAX doesn't allow it.
47  *      Maybe we partition the document and parse individual fragments so that
48  *      errors can be isolated to their source
49  */
50 public class XMLParser implements IXMLParser {
51         /**
52          * SAX content handler that builds a model of the XML document.
53          */
54         class ModelBuilder extends DefaultHandler {
55                 /**
56                  * The document model being built.
57                  */
58                 XMLDocument document;
59
60                 /**
61                  * The current top element. That is the element that has been most 
62                  * recently opened by a start tag.
63                  */
64                 private XMLElement top;
65
66                 /**
67                  * The SAX locator provided by the parser, used to calculate the source
68                  * regions covered by elements.
69                  */
70                 private Locator locator;
71
72                 /**
73                  * Limits parsing time.
74                  */
75                 private long timeout;
76
77                 /*
78                  * @see org.xml.sax.ContentHandler#startDocument()
79                  */
80                 public void startDocument() throws SAXException {
81                         timeout = System.currentTimeMillis() + 2000;
82                         document = new XMLDocument(source, systemId);
83                 }
84
85                 /*
86                  * @see org.xml.sax.ContentHandler#startElement(String, String, String, Attributes)
87                  */
88                 public void startElement(
89                         String namespaceURI, String localName, String qName, Attributes atts
90                 ) throws SAXException {
91                         if (System.currentTimeMillis() > timeout) {
92                                 throw new SAXException("timeout");
93                         }
94
95                         XMLElement newTop = new XMLElement(source);
96                         newTop.setLocalName(localName);
97                         newTop.setNamespaceURI(namespaceURI);
98
99                         if (qName != null) {
100                                 int colonIndex = qName.indexOf(':');
101                                 if (colonIndex >= 0) {
102                                         newTop.setPrefix(qName.substring(0, colonIndex));
103                                 }
104                         }
105
106                         int offset = computeOffset(newTop,
107                                         locator.getLineNumber(),
108                                         locator.getColumnNumber());
109
110                         if (offset >= 0) {
111                                 newTop.setSourceRegion(offset, 0);
112                         }
113                         if (top != null) {
114                                 newTop.setParent(top);
115                         }
116                         top = newTop;
117                 }
118
119                 /*
120                  * @see org.xml.sax.ContentHandler#endElement(String, String, String)
121                  */
122                 public void endElement(
123                         String namespaceURI, String localName, String qName
124                 ) throws SAXException {
125                         int length = computeLength(top,
126                                         locator.getLineNumber(),
127                                         locator.getColumnNumber());
128
129                         if (length >= 0) {
130                                 top.setSourceRegion(top.getSourceRegion().getOffset(), length);
131                         }
132
133                         XMLElement previousTop = (XMLElement) top.getParent();
134                         if (previousTop != null) {
135                                 previousTop.addChild(top);
136                         } else {
137                                 // this is the root element
138                                 document.setRoot(top);
139                         } 
140                         top = previousTop;
141                 }
142
143                 /* 
144                  * @see org.xml.sax.ErrorHandler#error(SAXParseException)
145                  */
146                 public void error(SAXParseException e) throws SAXException {
147                         if (problemCollector != null) {
148                                 problemCollector.addProblem(createProblem(e, true));
149                         }
150                 }
151
152                 /* 
153                  * @see org.xml.sax.ErrorHandler#fatalError(SAXParseException)
154                  */
155                 public void fatalError(SAXParseException e) throws SAXException {
156                         if (problemCollector != null) {
157                                 problemCollector.addProblem(createProblem(e, true));
158                         }
159                 }
160
161                 /* 
162                  * @see org.xml.sax.ErrorHandler#warning(SAXParseException)
163                  */
164                 public void warning(SAXParseException e) throws SAXException {
165                         if (problemCollector != null) {
166                                 problemCollector.addProblem(createProblem(e, false));
167                         }
168                 }
169
170                 /*
171                  * @see org.xml.sax.ContentHandler#setDocumentLocator(Locator)
172                  */
173                 public void setDocumentLocator(Locator locator) {
174                         this.locator = locator;
175                 }
176
177                 /**
178                  * Creates a <tt>IProblem</tt> instance based on the information
179                  * accessible from the parse exception. This method estimates the exact
180                  * location of the error based on the line and column numbers provided
181                  * with the exception.
182                  * 
183                  * TODO Limit the location to the current top element
184                  * 
185                  * @param e the SAX parse exception
186                  * @param error whether the problem is an error or a warning
187                  * @return the created problem object
188                  */
189                 private IProblem createProblem(SAXParseException e, boolean error) {
190                         int line = e.getLineNumber();
191                         int column = e.getColumnNumber();
192                         if (line < 0) {
193                                 line = 0;
194                         }
195                         if (column < 1) {
196                                 column = 1;
197                         }
198
199                         int offset = 0, length = 1;
200                         try {
201                                 offset = getOffset(line, column);
202                                 length = getLastCharColumn(line) - column;
203                         } catch (BadLocationException ble) {
204                                 ble.printStackTrace();
205                         }
206
207                         return new DefaultProblem(e.getLocalizedMessage(),
208                                                         offset, offset + length, line, error);
209                 }
210         }
211
212         // Instance Variables ------------------------------------------------------
213
214         /**
215          * The associated problem collector.
216          */
217         IProblemCollector problemCollector;
218
219         /**
220          * The document containing the source that should be parsed.
221          */
222         IDocument source;
223
224         /**
225          * The system ID of the document to parse, if available. This is necessary
226          * to resolve relative external entities. Can be <tt>null</tt>.
227          */
228         String systemId;
229
230         // IXMLParser Implementation -----------------------------------------------
231
232         /*
233          * @see IXMLParser#parse()
234          */
235         public IXMLDocument parse() {
236                 SAXParserFactory factory = SAXParserFactory.newInstance();
237                 factory.setNamespaceAware(true);
238                 factory.setValidating(false);
239
240                 try {
241                         SAXParser parser = factory.newSAXParser();
242
243                         InputSource in = new InputSource(new StringReader(source.get()));
244                         if (systemId != null) {
245                                 in.setSystemId(systemId);
246                         }
247
248                         ModelBuilder builder = new ModelBuilder();
249                         parser.parse(in, builder);
250                         return builder.document;
251                 } catch (ParserConfigurationException e) {
252                         // TODO Throw CoreException or at least log the error
253                 } catch (SAXParseException e) {
254                         // Already handled by the ModelBuilder
255                 } catch (SAXException e) {
256                         // SAX exceptions that are not parsing errors
257                         // TODO Throw CoreException or at least log the error
258                 } catch (IOException e) {
259                         // TODO Throw CoreException or at least log the error
260                 }
261
262                 return null;
263         }
264
265         /* 
266          * @see IProblemReporter#setProblemCollector(IProblemCollector)
267          */
268         public void setProblemCollector(IProblemCollector problemCollector) {
269                 this.problemCollector = problemCollector;
270         }
271
272         /* 
273          * @see IXMLParser#setSource(IDocument)
274          */
275         public void setSource(IDocument source) {
276                 this.source = source;
277         }
278
279         /* 
280          * @see IXMLParser#setSystemId(String)
281          */
282         public void setSystemId(String systemId) {
283                 this.systemId = systemId;
284         }
285
286         // Private Methods ---------------------------------------------------------
287
288         /**
289          * Computes the exact length of the given element by searching for the
290          * offset of the last character of the end tag.
291          */
292         int computeLength(XMLElement element, int line, int column) {
293                 try {
294                         int offset;
295                         if (column <= 0) {
296                                 int lineOffset = source.getLineOffset(line);
297                                 String endTag = getEndTag(element);
298
299                                 IRegion result = findStringForward(lineOffset, endTag);
300                                 if (result != null) {
301                                         offset = result.getOffset() + endTag.length();
302                                 } else {
303                                         result = findStringForward(lineOffset, "/>"); //$NON-NLS-1$
304                                         if (result == null) {
305                                                 offset = -1;
306                                         } else {
307                                                 offset = result.getOffset() + 2;
308                                         }
309                                 }
310
311                                 if ((offset < 0) || (getLine(offset) != line)) {
312                                         offset = lineOffset;
313                                 } else {
314                                         offset++;
315                                 }
316                         } else {
317                                 offset = getOffset(line, column);
318                         }
319
320                         return offset - element.getSourceRegion().getOffset();
321                 } catch (BadLocationException e) {
322                         // ignore as the parser may be out of sync with the document during
323                         // reconciliation
324                 }
325
326                 return -1;
327         }
328
329         /**
330          * Computes the offset at which the specified elements start tag begins in
331          * the source.
332          */
333         int computeOffset(XMLElement element, int line, int column) {
334                 try {
335                         int offset;
336                         String prefix = "<"; //$NON-NLS-1$
337                         if (column <= 0) {
338                                 offset = getOffset(line, 0);
339                                 int lastCharColumn = getLastCharColumn(line);
340                                 String lineText = source
341                                         .get(source.getLineOffset(line - 1), lastCharColumn);
342                                 String startTag = getStartTag(element);
343
344                                 int lastIndex = lineText.indexOf(startTag);
345                                 if (lastIndex > -1) {
346                                         offset += lastIndex + 1;
347                                 } else {
348                                         offset = getOffset(line, lastCharColumn);
349                                         IRegion result = findStringBackward(offset - 1, prefix);
350                                         offset = result.getOffset();
351                                 }
352                         } else {
353                                 offset = getOffset(line, column);
354                                 IRegion result = findStringForward(offset - 1, prefix);
355                                 offset = result.getOffset();
356                         }
357
358                         return offset;
359                 } catch (BadLocationException e) {
360                         // ignore as the parser may be out of sync with the document during
361                         // reconciliation
362                 }
363
364                 return -1;
365         }
366
367         private IRegion findStringBackward(
368                 int startOffset, String string
369         ) throws BadLocationException {
370                 int offset = startOffset;
371                 int length = string.length();
372
373                 String match;
374                 while (offset >= 0) {
375                         match = source.get(offset, length);
376                         if (match.equals(string)) {
377                                 return new Region(offset, length);
378                         }
379                         offset -= 1;
380                 }
381
382                 return null;
383         }
384
385         private IRegion findStringForward(
386                 int startOffset, String string
387         ) throws BadLocationException {
388                 int offset = startOffset;
389                 int length = string.length();
390
391                 String match;
392                 int sourceLength = source.getLength();
393                 while (offset + length <= sourceLength) {
394                         match = source.get(offset, length);
395                         if (match.equals(string)) {
396                                 return new Region(offset, length);
397                         }
398                         offset += 1;
399                 }
400
401                 return null;
402         }
403
404         /**
405          * Given an XML element, this method reconstructs the corresponding end tag
406          * of the element, including the namespace prefix if there was one. 
407          * 
408          * @param element the XML element for which the end tag should be contructed
409          * @return the end tag as string
410          */
411         private String getEndTag(IXMLElement element) {
412                 StringBuffer buf = new StringBuffer("</"); //$NON-NLS-1$
413                 if (element.getPrefix() != null) {
414                         buf.append(element.getPrefix());
415                         buf.append(':');
416                 }
417                 buf.append(element.getLocalName());
418                 buf.append('>');
419
420                 return buf.toString();
421         }
422
423         /**
424          * Reconstructs and returns the start tag corresponding to the given XML
425          * element, excluding any attribute specifications or the closing 
426          * <tt>&gt;</tt> character.
427          * 
428          * @param element the XML element for which the start tag should be
429          *        constructed
430          * @return the start tag as string, excluding everything after the tag name
431          *         itself
432          */
433         private String getStartTag(IXMLElement element) {
434                 StringBuffer buf = new StringBuffer("<"); //$NON-NLS-1$
435                 if (element.getPrefix() != null) {
436                         buf.append(element.getPrefix());
437                         buf.append(':');
438                 }
439                 buf.append(element.getLocalName());
440
441                 return buf.toString();
442         }
443
444         int getOffset(int line, int column) throws BadLocationException {
445                 return source.getLineOffset(line - 1) + column - 1;
446         }
447
448         private int getLine(int offset) throws BadLocationException {
449                 return source.getLineOfOffset(offset) + 1;
450         }
451
452         int getLastCharColumn(int line) throws BadLocationException {
453                 String lineDelimiter = source.getLineDelimiter(line - 1);
454                 int lineDelimiterLength = (lineDelimiter != null)
455                                 ? lineDelimiter.length() : 0;
456
457                 return source.getLineLength(line - 1) - lineDelimiterLength;
458         }
459 }