better error messages for unterminated strings and comments
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / FastJavaPartitionScanner.java
1 package net.sourceforge.phpdt.internal.ui.text;
2
3 /*
4  * (c) Copyright IBM Corp. 2000, 2001.
5  * All Rights Reserved.
6  */
7
8 import org.eclipse.jface.text.IDocument;
9 import org.eclipse.jface.text.rules.ICharacterScanner;
10 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
11 import org.eclipse.jface.text.rules.IToken;
12 import org.eclipse.jface.text.rules.Token;
13
14
15 /**
16  * This scanner recognizes the JavaDoc comments, Java multi line comments, Java single line comments,
17  * Java strings and Java characters.
18  */
19 public class FastJavaPartitionScanner implements IPartitionTokenScanner {
20
21 //      private final static String SKIP= "__skip"; //$NON-NLS-1$       
22 //      public final static String JAVA_STRING= "__php_string"; //$NON-NLS-1$
23 //      public final static String JAVA_SINGLE_LINE_COMMENT= "__php_singleline_comment"; //$NON-NLS-1$
24 //      public final static String JAVA_MULTI_LINE_COMMENT= "__php_multiline_comment"; //$NON-NLS-1$
25 //      public final static String JAVA_DOC= "__php_phpdoc"; //$NON-NLS-1$
26
27         // states
28         private static final int HTML= 0;       
29         private static final int SINGLE_LINE_COMMENT= 1; 
30         private static final int MULTI_LINE_COMMENT= 2;
31         private static final int PHPDOC= 3;
32 //      private static final int CHARACTER= 4;
33         private static final int STRING_SQ= 4;  // single quote string
34         private static final int STRING_DQ= 5;  // double quote string
35         private static final int PHP= 6;  // double quote string
36         
37         // beginning of prefixes and postfixes
38         private static final int NONE= 0;
39         private static final int BACKSLASH= 1; // postfix for STRING and CHARACTER
40         private static final int SLASH= 2; // prefix for SINGLE_LINE or MULTI_LINE or JAVADOC
41         private static final int SLASH_STAR= 3; // prefix for MULTI_LINE_COMMENT or JAVADOC
42         private static final int SLASH_STAR_STAR= 4; // prefix for MULTI_LINE_COMMENT or JAVADOC
43         private static final int STAR= 5; // postfix for MULTI_LINE_COMMENT or JAVADOC
44         private static final int CARRIAGE_RETURN=6; // postfix for STRING, CHARACTER and SINGLE_LINE_COMMENT
45         
46         /** The scanner. */
47 //      private final BufferedRuleBasedScanner fScanner= new BufferedRuleBasedScanner(1000);
48         private final BufferedDocumentScanner fScanner= new BufferedDocumentScanner(1000);      // faster implementation
49         
50         /** The offset of the last returned token. */
51         private int fTokenOffset;
52         /** The length of the last returned token. */
53         private int fTokenLength;
54         
55         /** The state of the scanner. */        
56         private int fState;
57         /** The last significant characters read. */
58         private int fLast;
59         /** The amount of characters already read on first call to nextToken(). */
60         private int fPrefixLength;
61         
62         // emulate JavaPartitionScanner 
63 //      private static final boolean fgEmulate= false;
64         private int fJavaOffset;
65         private int fJavaLength;
66         
67         private final IToken[] fTokens= new IToken[] {
68                 new Token(null),
69                 new Token(IPHPPartitions.PHP_SINGLELINE_COMMENT),
70                 new Token(IPHPPartitions.PHP_MULTILINE_COMMENT),
71                 new Token(IPHPPartitions.PHP_PHPDOC_COMMENT),
72                 new Token(IPHPPartitions.PHP_STRING_SQ),
73                 new Token(IPHPPartitions.PHP_STRING_DQ),
74                 new Token(IPHPPartitions.PHP_PARTITIONING),
75         };
76
77         /*
78          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
79          */
80         public IToken nextToken() {
81
82                 fTokenOffset += fTokenLength;
83                 fTokenLength= fPrefixLength;
84
85                 while (true) {
86                         final int ch= fScanner.read();
87                         
88                         // characters
89                         switch (ch) {
90                         case ICharacterScanner.EOF:
91                                 if (fTokenLength > 0) {
92                                         fLast= NONE; // ignore last
93                                         return preFix(fState, HTML, NONE, 0);
94
95                                 } else {
96                                         fLast= NONE;
97                                         fPrefixLength= 0;
98                                         return Token.EOF;
99                                 }
100
101                         case '\r':
102                                 if ( fLast != CARRIAGE_RETURN) {
103                                                 fLast= CARRIAGE_RETURN;
104                                                 fTokenLength++;
105                                                 continue;
106
107                                 } else {
108                                         
109                                         switch (fState) {
110                                         case SINGLE_LINE_COMMENT:
111 //                                      case CHARACTER:
112                                         case STRING_SQ:  
113                                         case STRING_DQ:
114                                                 if (fTokenLength > 0) {
115                                                         IToken token= fTokens[fState];
116                                                         
117                                                         // emulate JavaPartitionScanner
118 //                                                      if (fgEmulate) {
119 //                                                              fTokenLength++;
120 //                                                              fLast= NONE;
121 //                                                              fPrefixLength= 0;
122 //                                                      } else {                                                                
123                                                                 fLast= CARRIAGE_RETURN; 
124                                                                 fPrefixLength= 1;
125 //                                                      }
126                                                         
127                                                         fState= HTML;
128                                                         return token;
129
130                                                 } else {
131                                                         consume();
132                                                         continue;       
133                                                 }
134
135                                         default:
136                                                 consume();
137                                                 continue;
138                                         }                                       
139                                 }
140         
141                         case '\n':                              
142                                 switch (fState) {
143                                 case SINGLE_LINE_COMMENT:
144 //                              case CHARACTER:
145                                 case STRING_SQ:
146                                 case STRING_DQ:                         
147                                         // assert(fTokenLength > 0);
148                                         return postFix(fState);
149
150                                 default:
151                                         consume();
152                                         continue;
153                                 }
154
155                         default:
156                                 if ( fLast == CARRIAGE_RETURN) {                        
157                                         switch (fState) {
158                                         case SINGLE_LINE_COMMENT:
159 //                                      case CHARACTER:
160                                         case STRING_SQ:
161                                         case STRING_DQ:
162
163                                                 int last;
164                                                 int newState;
165                                                 switch (ch) {
166                                                 case '/':
167                                                         last= SLASH;
168                                                         newState= HTML;
169                                                         break;
170
171                                                 case '*':
172                                                         last= STAR;
173                                                         newState= HTML;
174                                                         break;
175                                                 
176                                                 case '\'':
177                                                         last= NONE;
178         //                                              newState= CHARACTER;
179                                                         newState= STRING_SQ;
180                                                         break;
181
182                                                 case '"':
183                                                         last= NONE;
184                                                         newState= STRING_DQ;
185                                                         break;
186
187                                                 case '\r':
188                                                         last= CARRIAGE_RETURN;
189                                                         newState= HTML;
190                                                         break;
191
192                                                 case '\\':
193                                                         last= BACKSLASH;
194                                                         newState= HTML;
195                                                         break;
196
197                                                 default:
198                                                         last= NONE;
199                                                         newState= HTML;
200                                                         break;
201                                                 }
202                                                 
203                                                 fLast= NONE; // ignore fLast
204                                                 return preFix(fState, newState, last, 1);
205         
206                                         default:
207                                                 break;
208                                         }
209                                 }
210                         }
211
212                         // states        
213                         switch (fState) {
214                         case PHP:
215                                 switch (ch) {
216                                 case '/':
217                                         if (fLast == SLASH) {
218                                                 if (fTokenLength - getLastLength(fLast) > 0) {
219                                                         return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
220                                                 } else {                                                        
221                                                         preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
222                                                         fTokenOffset += fTokenLength;
223                                                         fTokenLength= fPrefixLength;
224                                                         break;
225                                                 }
226         
227                                         } else {
228                                                 fTokenLength++;
229                                                 fLast= SLASH;
230                                                 break;
231                                         }
232         
233                                 case '*':
234                                         if (fLast == SLASH) {
235                                                 if (fTokenLength - getLastLength(fLast) > 0)
236                                                         return preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
237                                                 else {
238                                                         preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
239                                                         fTokenOffset += fTokenLength;
240                                                         fTokenLength= fPrefixLength;
241                                                         break;
242                                                 }
243
244                                         } else {
245                                                 consume();
246                                                 break;
247                                         }
248                                         
249                                 case '\'':
250                                         fLast= NONE; // ignore fLast
251                                         if (fTokenLength > 0)
252                                                 return preFix(PHP, STRING_SQ, NONE, 1);
253                                         else {                                          
254                                                 preFix(PHP, STRING_SQ, NONE, 1);
255                                                 fTokenOffset += fTokenLength;
256                                                 fTokenLength= fPrefixLength;
257                                                 break;
258                                         }
259
260                                 case '"':
261                                         fLast= NONE; // ignore fLast                            
262                                         if (fTokenLength > 0)
263                                                 return preFix(PHP, STRING_DQ, NONE, 1);
264                                         else {
265                                                 preFix(PHP, STRING_DQ, NONE, 1);
266                                                 fTokenOffset += fTokenLength;
267                                                 fTokenLength= fPrefixLength;
268                                                 break;
269                                         }
270         
271                                 default:
272                                         consume();
273                                         break;
274                                 }
275                                 break;
276         
277                         case SINGLE_LINE_COMMENT:
278                                 consume();
279                                 break;
280                                 
281                         case PHPDOC:
282                                 switch (ch) {
283                                 case '/':
284                                         switch (fLast) {
285                                         case SLASH_STAR_STAR:
286                                                 return postFix(MULTI_LINE_COMMENT);
287         
288                                         case STAR:
289                                                 return postFix(PHPDOC);
290
291                                         default:
292                                                 consume();
293                                                 break;
294                                         }
295                                         break;
296
297                                 case '*':
298                                         fTokenLength++;
299                                         fLast= STAR;
300                                         break;
301
302                                 default:
303                                         consume();
304                                         break;
305                                 }
306                                 break;
307         
308                         case MULTI_LINE_COMMENT:
309                                 switch (ch) {
310                                 case '*':
311                                         if (fLast == SLASH_STAR) {
312                                                 fLast= SLASH_STAR_STAR;
313                                                 fTokenLength++;
314                                                 fState= PHPDOC;
315                                         } else {
316                                                 fTokenLength++;
317                                                 fLast= STAR;
318                                         }
319                                         break;
320         
321                                 case '/':
322                                         if (fLast == STAR) {
323                                                 return postFix(MULTI_LINE_COMMENT);
324                                         } else {
325                                                 consume();
326                                                 break;
327                                         }
328         
329                                 default:
330                                         consume();
331                                         break;                  
332                                 }
333                                 break;
334                                 
335                         case STRING_DQ:
336                                 switch (ch) {
337                                 case '\\':
338                                         fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
339                                         fTokenLength++;
340                                         break;
341                                         
342                                 case '\"':                                                      
343                                         if (fLast != BACKSLASH) {
344                                                 return postFix(STRING_DQ);
345
346                                         } else {
347                                                 consume();
348                                                 break;                                  
349                                         }
350                                 
351                                 default:
352                                         consume();
353                                         break;
354                                 }
355                                 break;
356         
357                         case STRING_SQ:
358                                 switch (ch) {
359                                 case '\\':
360                                         fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
361                                         fTokenLength++;
362                                         break;
363         
364                                 case '\'':
365                                         if (fLast != BACKSLASH) {
366                                                 return postFix(STRING_SQ);
367         
368                                         } else {
369                                                 consume();
370                                                 break;
371                                         }
372         
373                                 default:
374                                         consume();
375                                         break;
376                                 }
377                                 break;
378                         }
379                 } 
380         }               
381
382         private static final int getLastLength(int last) {
383                 switch (last) {
384                 default:
385                         return -1;
386
387                 case NONE:
388                         return 0;
389                         
390                 case CARRIAGE_RETURN:
391                 case BACKSLASH:
392                 case SLASH:
393                 case STAR:
394                         return 1;
395
396                 case SLASH_STAR:
397                         return 2;
398
399                 case SLASH_STAR_STAR:
400                         return 3;
401                 }       
402         }
403
404         private final void consume() {
405                 fTokenLength++;
406                 fLast= NONE;    
407         }
408         
409         private final IToken postFix(int state) {
410                 fTokenLength++;
411                 fLast= NONE;
412                 fState= HTML;
413                 fPrefixLength= 0;               
414                 return fTokens[state];
415         }
416
417         private final IToken preFix(int state, int newState, int last, int prefixLength) {
418                 // emulate JavaPartitionScanner
419 //              if (fgEmulate && state == JAVA && (fTokenLength - getLastLength(fLast) > 0)) {
420 //                      fTokenLength -= getLastLength(fLast);
421 //                      fJavaOffset= fTokenOffset;
422 //                      fJavaLength= fTokenLength;
423 //                      fTokenLength= 1;
424 //                      fState= newState;
425 //                      fPrefixLength= prefixLength;
426 //                      fLast= last;
427 //                      return fTokens[state];
428 //
429 //              } else {
430                         fTokenLength -= getLastLength(fLast);
431                         fLast= last;
432                         fPrefixLength= prefixLength;
433                         IToken token= fTokens[state];           
434                         fState= newState;
435                         return token;
436 //              }
437         }
438
439         private static int getState(String contentType) {
440
441                 if (contentType == null)
442                         return HTML;
443                 
444                 else if (contentType.equals(IPHPPartitions.PHP_PARTITIONING))
445                         return PHP;
446                 
447                 else if (contentType.equals(IPHPPartitions.PHP_SINGLELINE_COMMENT))
448                         return SINGLE_LINE_COMMENT;
449
450                 else if (contentType.equals(IPHPPartitions.PHP_MULTILINE_COMMENT))
451                         return MULTI_LINE_COMMENT;
452
453                 else if (contentType.equals(IPHPPartitions.PHP_PHPDOC_COMMENT))
454                         return PHPDOC;
455
456                 else if (contentType.equals(IPHPPartitions.PHP_STRING_DQ))
457                         return STRING_DQ;
458                 
459                 else if (contentType.equals(IPHPPartitions.PHP_STRING_SQ))
460                         return STRING_SQ;
461                 
462 //              else if (contentType.equals(SKIP))
463 //                      return CHARACTER;
464                         
465                 else
466                         return HTML;
467         }
468
469         /*
470          * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String, int)
471          */
472         public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
473
474                 fScanner.setRange(document, offset, length);
475                 fTokenOffset= partitionOffset;
476                 fTokenLength= 0;
477                 fPrefixLength= offset - partitionOffset;
478                 fLast= NONE;
479                 
480                 if (offset == partitionOffset) {
481                         // restart at beginning of partition
482                         fState= HTML;
483                 } else {
484                         fState= getState(contentType);                  
485                 }
486
487                 // emulate JavaPartitionScanner
488 //              if (fgEmulate) {
489 //                      fJavaOffset= -1;
490 //                      fJavaLength= 0;
491 //              }
492         }
493
494         /*
495          * @see ITokenScanner#setRange(IDocument, int, int)
496          */
497         public void setRange(IDocument document, int offset, int length) {
498
499                 fScanner.setRange(document, offset, length);
500                 fTokenOffset= offset;
501                 fTokenLength= 0;                
502                 fPrefixLength= 0;
503                 fLast= NONE;
504                 fState= HTML;
505
506                 // emulate JavaPartitionScanner
507 //              if (fgEmulate) {
508 //                      fJavaOffset= -1;
509 //                      fJavaLength= 0;
510 //              }
511         }
512
513         /*
514          * @see ITokenScanner#getTokenLength()
515          */
516         public int getTokenLength() {
517                 return fTokenLength;
518         }
519
520         /*
521          * @see ITokenScanner#getTokenOffset()
522          */
523         public int getTokenOffset() {
524                 return fTokenOffset;
525         }
526
527 }