First try for AST structure. A lot of things to change
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPPartitionScanner.java
1 /**
2  * This program and the accompanying materials
3  * are made available under the terms of the Common Public License v1.0
4  * which accompanies this distribution, and is available at
5  * http://www.eclipse.org/legal/cpl-v10.html
6  * Created on 05.03.2003
7  *
8  * @author Stefan Langer (musk)
9  * @version $Revision: 1.17 $
10  */
11 package net.sourceforge.phpeclipse.phpeditor.php;
12
13 import java.util.*;
14
15 import org.eclipse.jface.text.*;
16 import org.eclipse.jface.text.rules.*;
17
18 /**
19  * 
20  */
21 public class PHPPartitionScanner implements IPartitionTokenScanner
22 {
23     private static final boolean DEBUG = true;
24     private boolean fInString = false;
25     private boolean fInDoubString = false;
26     private IDocument fDocument = null;
27     private int fOffset = -1;
28     private String fContentType = IPHPPartitionScannerConstants.HTML;
29     private String fPrevContentType = IPHPPartitionScannerConstants.HTML;
30     private boolean partitionBorder = false;
31     private int fTokenOffset;
32     private int fEnd = -1;
33     private int fLength;
34     private int fCurrentLength;
35     private Map tokens = new HashMap();
36
37     public PHPPartitionScanner()
38     {
39         this.tokens.put(
40             IPHPPartitionScannerConstants.PHP,
41             new Token(IPHPPartitionScannerConstants.PHP));
42         this.tokens.put(
43             IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT,
44             new Token(IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT));
45         this.tokens.put(
46             IPHPPartitionScannerConstants.HTML,
47             new Token(IPHPPartitionScannerConstants.HTML));
48         this.tokens.put(
49             IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT,
50             new Token(IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT));
51         this.tokens.put(
52             IDocument.DEFAULT_CONTENT_TYPE,
53             new Token(IDocument.DEFAULT_CONTENT_TYPE));
54     }
55
56     private IToken getToken(String type)
57     {
58         fLength = fCurrentLength;
59         if (DEBUG)
60         {
61
62             try
63             {
64                 if (fLength <= 0)
65                 {
66                     int line = fDocument.getLineOfOffset(fOffset);
67                     System.err.println(
68                         "Error at "
69                             + line
70                             + " offset:"
71                             + String.valueOf(
72                                 fOffset - fDocument.getLineOffset(line)));
73                 }
74             }
75             catch (BadLocationException e)
76             {   // should never happen
77                 // TODO Write stacktrace to log
78                 e.printStackTrace();
79             }
80         }
81         Assert.isTrue(fLength > 0, "Partition length <= 0!");
82         fCurrentLength = 0;
83         // String can never cross partition borders so reset string detection
84         fInString = false;
85         fInDoubString = false;
86         IToken token = (IToken) this.tokens.get(type);
87         Assert.isNotNull(token, "Token for type \"" + type + "\" not found!");
88         if (DEBUG)
89         {
90             System.out.println(
91                 "Partition: fTokenOffset="
92                     + fTokenOffset
93                     + " fContentType="
94                     + type
95                     + " fLength="
96                     + fLength);
97         }
98         return token;
99     }
100
101     /* (non-Javadoc)
102      * @see org.eclipse.jface.text.rules.IPartitionTokenScanner#setPartialRange(org.eclipse.jface.text.IDocument, int, int, java.lang.String, int)
103      */
104     public void setPartialRange(
105         IDocument document,
106         int offset,
107         int length,
108         String contentType,
109         int partitionOffset)
110     {
111         if (DEBUG)
112         {
113             System.out.println(
114                 "PartialRange: contentType="
115                     + contentType
116                     + " partitionOffset="
117                     + partitionOffset);
118         }
119
120         try
121         {
122             if (partitionOffset > -1)
123             {
124                 partitionBorder = false;
125                 // because of strings we have to parse the whole partition
126                 this.setRange(
127                     document,
128                     partitionOffset,
129                     offset - partitionOffset + length);
130                 // sometimes we get a wrong partition so we retrieve the partition
131                 // directly from the document
132                 fContentType = fDocument.getContentType(partitionOffset);
133             }
134             else
135                 this.setRange(document, offset, length);
136
137         }
138         catch (BadLocationException e)
139         {
140             // should never happen
141             // TODO print stack trace to log
142             // fall back just scan the whole document again
143             this.setRange(document, 0, fDocument.getLength());
144         }
145
146     }
147
148     /* (non-Javadoc)
149      * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
150      */
151     public int getTokenLength()
152     {
153         return fLength;
154     }
155
156     /* (non-Javadoc)
157      * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
158      */
159     public int getTokenOffset()
160     {
161         return fTokenOffset;
162     }
163
164     /* (non-Javadoc)
165      * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
166      */
167     public IToken nextToken()
168     {
169         int c;
170
171         // check if we are not allready at the end of the
172         // file
173         if ((c = read()) == ICharacterScanner.EOF)
174         {
175             partitionBorder = false;
176             return Token.EOF;
177         }
178         else
179             unread();
180
181         if (partitionBorder)
182         {
183             fTokenOffset = fOffset;
184             partitionBorder = false;
185         }
186
187         while ((c = read()) != ICharacterScanner.EOF)
188         {
189             switch (c)
190             {
191                 case '<' :
192                     if (!isInString(IPHPPartitionScannerConstants.PHP)
193                         && fContentType
194                             != IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT
195                         && checkPattern(new char[] { '?', 'p', 'h', 'p' }, true))
196                     {
197                         if (fContentType != IPHPPartitionScannerConstants.PHP
198                             && fCurrentLength > 5)
199                         {
200                             unread(5);
201                             IToken token = getToken(fContentType);
202                             // save previouse contenttype
203                             //TODO build stack for previouse contenttype 
204                             fPrevContentType = fContentType;
205
206                             fContentType = IPHPPartitionScannerConstants.PHP;
207
208                             return token;
209                         }
210                         else
211                             fContentType = IPHPPartitionScannerConstants.PHP;
212
213                         // remember offset of this partition
214                         fTokenOffset = fOffset - 5;
215                         fCurrentLength = 5;
216                     }
217                     else if (
218                         !isInString(IPHPPartitionScannerConstants.PHP)
219                             && fContentType
220                                 != IPHPPartitionScannerConstants
221                                     .PHP_MULTILINE_COMMENT
222                             && checkPattern(new char[] { '?' }, false))
223                     {
224                         if (fContentType != IPHPPartitionScannerConstants.PHP
225                             && fCurrentLength > 2)
226                         {
227                             unread(2);
228                             IToken token = getToken(fContentType);
229                             // save previouse contenttype
230                             fPrevContentType = fContentType;
231                             fContentType = IPHPPartitionScannerConstants.PHP;
232                             return token;
233                         }
234                         else
235                             fContentType = IPHPPartitionScannerConstants.PHP;
236                         // remember offset of this partition
237                         fTokenOffset = fOffset - 2;
238                         fCurrentLength = 2;
239                     }
240                     else if (
241                         !isInString(IPHPPartitionScannerConstants.PHP)
242                             && checkPattern(new char[] { '!', '-', '-' }))
243                     { // return previouse partition
244                         if (fContentType
245                             != IPHPPartitionScannerConstants
246                                 .HTML_MULTILINE_COMMENT
247                             && fCurrentLength > 4)
248                         {
249                             unread(4);
250                             IToken token = getToken(fContentType);
251                             fContentType =
252                                 IPHPPartitionScannerConstants
253                                     .HTML_MULTILINE_COMMENT;
254                             return token;
255                         }
256                         else
257                             fContentType =
258                                 IPHPPartitionScannerConstants
259                                     .HTML_MULTILINE_COMMENT;
260                                     
261                         fTokenOffset = fOffset - 4;
262                         fCurrentLength = 4;
263                     }
264                     break;
265                 case '?' :
266                     if (!isInString(IPHPPartitionScannerConstants.PHP)
267                         && fContentType == IPHPPartitionScannerConstants.PHP)
268                     {
269                         if ((c = read()) == '>')
270                         { 
271                             if (fPrevContentType != null)
272                                 fContentType = fPrevContentType;
273                             else
274                                 fContentType =
275                                     IPHPPartitionScannerConstants.HTML;
276                             partitionBorder = true;
277                             return getToken(IPHPPartitionScannerConstants.PHP);
278                         }
279                         else if (c != ICharacterScanner.EOF)
280                             unread();
281                     }
282                     break;
283                 case '-' :
284                     if (!isInString(IPHPPartitionScannerConstants.PHP)
285                         && fContentType
286                             == IPHPPartitionScannerConstants
287                                 .HTML_MULTILINE_COMMENT
288                         && checkPattern(new char[] { '-', '>' }))
289                     {
290                         fContentType = IPHPPartitionScannerConstants.HTML;
291                         partitionBorder = true;
292                         return getToken(
293                             IPHPPartitionScannerConstants
294                                 .HTML_MULTILINE_COMMENT);
295                     }
296                     break;
297                 case '/' :
298                     if (!isInString(IPHPPartitionScannerConstants.PHP) && (c = read()) == '*')
299                     { // MULTINE COMMENT JAVASCRIPT, CSS, PHP
300                         if (fContentType == IPHPPartitionScannerConstants.PHP
301                             && fCurrentLength > 2)
302                         {
303                             unread(2);
304                             IToken token = getToken(fContentType);
305                             fContentType =
306                                 IPHPPartitionScannerConstants
307                                     .PHP_MULTILINE_COMMENT;
308                             return token;
309                         }
310                         else if (
311                             fContentType
312                                 == IPHPPartitionScannerConstants
313                                     .PHP_MULTILINE_COMMENT)
314                         {
315
316                             fTokenOffset = fOffset - 2;
317                             fCurrentLength = 2;
318                         }
319
320                     }
321                     else if (!isInString(IPHPPartitionScannerConstants.PHP) && c != ICharacterScanner.EOF)
322                         unread();
323                     break;
324                 case '*' :
325                     if (!isInString(IPHPPartitionScannerConstants.PHP) && (c = read()) == '/')
326                     {
327                         if (fContentType
328                             == IPHPPartitionScannerConstants
329                                 .PHP_MULTILINE_COMMENT)
330                         {
331                             fContentType = IPHPPartitionScannerConstants.PHP;
332                             partitionBorder = true;
333                             return getToken(
334                                 IPHPPartitionScannerConstants
335                                     .PHP_MULTILINE_COMMENT);
336                         }
337                         else if (
338                             fContentType
339                                 == IPHPPartitionScannerConstants
340                                     .CSS_MULTILINE_COMMENT)
341                         {
342                         }
343                         else if (
344                             fContentType
345                                 == IPHPPartitionScannerConstants
346                                     .JS_MULTILINE_COMMENT)
347                         {
348                         }
349                     }
350                     else if (!isInString(IPHPPartitionScannerConstants.PHP) && c != ICharacterScanner.EOF)
351                         unread();
352                     break;
353                 case '\'' :
354                     if (!fInDoubString)
355                         fInString = !fInString;
356                     break;
357                 case '"' :
358                     // toggle String mode
359                     if (!fInString)
360                         fInDoubString = !fInDoubString;
361                     break;
362             }
363         } // end of file reached but we have to return the
364         // last partition.
365         return getToken(fContentType);
366     }
367     /* (non-Javadoc)
368      * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(org.eclipse.jface.text.IDocument, int, int)
369      */
370     public void setRange(IDocument document, int offset, int length)
371     {
372         if (DEBUG)
373         {
374             System.out.println(
375                 "SET RANGE: offset=" + offset + " length=" + length);
376         }
377
378         fDocument = document;
379         fOffset = offset;
380         fTokenOffset = offset;
381         fCurrentLength = 0;
382         fLength = 0;
383         fEnd = fOffset + length;
384         fInString = false;
385         fInDoubString = false;
386         fContentType = IPHPPartitionScannerConstants.HTML;
387 //        String[] prev = getPartitionStack(offset);
388     }
389
390     private int read()
391     {
392         try
393         {
394             if (fOffset < fEnd)
395             {
396                 fCurrentLength++;
397                 return fDocument.getChar(fOffset++);
398             }
399             return ICharacterScanner.EOF;
400         }
401         catch (BadLocationException e)
402         {
403             // should never happen
404             // TODO write stacktrace to log
405             fOffset = fEnd;
406             return ICharacterScanner.EOF;
407         }
408     }
409
410     private void unread()
411     {
412         --fOffset;
413         --fCurrentLength;
414     }
415     
416     private void unread(int num)
417     {
418         fOffset -= num;
419         fCurrentLength -= num;
420     }
421
422     private boolean checkPattern(char[] pattern)
423     {
424         return checkPattern(pattern, false);
425     }
426
427     /**
428      * Check if next character sequence read from document is equals to 
429      * the provided pattern. Pattern is read from left to right until the 
430      * first character read doesn't match. If this happens all read characters are
431      * unread.
432      * @param pattern The pattern to check.
433      * @return <code>true</code> if pattern is equals else returns <code>false</code>.
434      */
435     private boolean checkPattern(char[] pattern, boolean ignoreCase)
436     {
437         int prevOffset = fOffset;
438         int prevLength = fCurrentLength;
439         for (int i = 0; i < pattern.length; i++)
440         {
441             int c = read();
442
443             if (c == ICharacterScanner.EOF
444                 || !letterEquals(c, pattern[i], ignoreCase))
445             {
446                 fOffset = prevOffset;
447                 fCurrentLength = prevLength;
448                 return false;
449             }
450         }
451
452         return true;
453     }
454
455     private boolean letterEquals(int test, char letter, boolean ignoreCase)
456     {
457         if (test == letter)
458             return true;
459         else if (
460             ignoreCase
461                 && Character.isLowerCase(letter)
462                 && test == Character.toUpperCase(letter))
463             return true;
464         else if (
465             ignoreCase
466                 && Character.isUpperCase(letter)
467                 && test == Character.toLowerCase(letter))
468             return true;
469
470         return false;
471     }
472     
473     /**
474      * Checks wether the offset is in a <code>String</code> and the specified 
475      * contenttype is the current content type.
476      * Strings are delimited, mutual exclusive, by a " or by a '.
477      * 
478      * @param contentType The contenttype to check.
479      * @return <code>true</code> if the current offset is in a string else 
480      *                  returns false.
481      */
482     private  boolean isInString(String contentType)
483     {
484         if(fContentType == contentType)
485                 return (fInString || fInDoubString);
486         else
487                 return false;           
488     }
489     
490     /**
491      * Returns the previouse partition stack for the given offset.
492      * 
493      * @param offset The offset to return the previouse partitionstack for.
494      * 
495      * @return The stack as a string array.
496      */
497     private String[] getPartitionStack(int offset)
498     {
499         ArrayList types = new ArrayList();
500         int tmpOffset = 0;
501         try
502         {
503             ITypedRegion region = fDocument.getPartition(offset);
504             tmpOffset = region.getOffset();
505             while(tmpOffset-1 > 0)
506             {
507                 region = fDocument.getPartition(tmpOffset-1);
508                 tmpOffset = region.getOffset();
509                 types.add(0, region.getType());
510             }
511         }
512         catch (BadLocationException e)
513         {
514            if(DEBUG)
515            {
516                         e.printStackTrace();
517            }
518         }
519         
520                 String[] retVal = new String[types.size()];
521         
522         retVal = (String[])types.toArray(retVal);
523         return retVal;
524     }
525     
526 }