1) Fixed issue #215: Escaping Strings
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPCodeScanner.java
1 /**********************************************************************
2  Copyright (c) 2000, 2002 IBM Corp. and others.
3  All rights reserved. This program and the accompanying materials
4  are made available under the terms of the Common Public License v1.0
5  which accompanies this distribution, and is available at
6  http://www.eclipse.org/legal/cpl-v10.html
7
8  Contributors:
9  IBM Corporation - Initial implementation
10  www.phpeclipse.de
11  **********************************************************************/
12 package net.sourceforge.phpeclipse.phpeditor.php;
13
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 import java.util.List;
17 import java.util.Map;
18
19 import net.sourceforge.phpdt.internal.ui.text.AbstractJavaScanner;
20 import net.sourceforge.phpdt.ui.text.IColorManager;
21 import net.sourceforge.phpeclipse.IPreferenceConstants;
22 import net.sourceforge.phpeclipse.phpeditor.PHPSyntaxRdr;
23 import net.sourceforge.phpeclipse.phpeditor.util.PHPWhitespaceDetector;
24 import net.sourceforge.phpeclipse.phpeditor.util.PHPWordDetector;
25
26 import org.eclipse.jface.preference.IPreferenceStore;
27 //incastrix
28 //import org.eclipse.jface.text.Assert;
29 import org.eclipse.core.runtime.Assert;
30 import org.eclipse.jface.text.rules.ICharacterScanner;
31 import org.eclipse.jface.text.rules.IRule;
32 import org.eclipse.jface.text.rules.IToken;
33 import org.eclipse.jface.text.rules.IWordDetector;
34 import org.eclipse.jface.text.rules.MultiLineRule;
35 import org.eclipse.jface.text.rules.Token;
36 import org.eclipse.jface.text.rules.WhitespaceRule;
37 import org.eclipse.jface.text.rules.WordRule;
38
39 /**
40  * PHP Code Scanner
41  */
42 public class PHPCodeScanner extends AbstractJavaScanner {
43
44         /**
45          * Rule to detect java operators.
46          * 
47          * @since 3.0
48          */
49         protected class OperatorRule implements IRule {
50
51                 /** Java operators */
52                 private final char[] PHP_OPERATORS = { ';', '(', ')', '.', '=', '/',
53                                 '\\', '+', '-', '*', '[', ']', '<', '>', ':', '?', '!', ',',
54                                 '|', '&', '^', '%', '~', '@' };
55
56                 /** Token to return for this rule */
57                 private final IToken fToken;
58
59                 /** Token to return for braces */
60                 private final IToken fTokenBraces;
61
62                 /** Token to return for heredocs */
63                 private final IToken fTokenHeredoc;
64
65                 /**
66                  * Creates a new operator rule.
67                  * 
68                  * @param token
69                  *            Token to use for this rule
70                  * @param tokenHeredoc
71                  *            TODO
72                  */
73                 public OperatorRule(IToken token, IToken tokenBraces,
74                                 IToken tokenHeredoc) {
75                         fToken = token;
76                         fTokenBraces = tokenBraces;
77                         fTokenHeredoc = tokenHeredoc;
78                 }
79
80                 /**
81                  * Is this character an operator character?
82                  * 
83                  * @param character
84                  *            Character to determine whether it is an operator character
85                  * @return <code>true</code> iff the character is an operator,
86                  *         <code>false</code> otherwise.
87                  */
88                 public boolean isOperator(char character) {
89                         for (int index = 0; index < PHP_OPERATORS.length; index++) {
90                                 if (PHP_OPERATORS[index] == character)
91                                         return true;
92                         }
93                         return false;
94                 }
95
96                 /*
97                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
98                  */
99                 public IToken evaluate(ICharacterScanner scanner) {
100
101                         int character = scanner.read();
102                         if (character == '{' || character == '}') {
103                                 return fTokenBraces;
104                         }
105                         if (isOperator((char) character)) {
106                                 int lastCharacter = character;
107                                 character = scanner.read();
108                                 // the readHEREDOC(scanner) call doesn't work, if we have our
109                                 // own partitions for single quoted
110                                 // or double quoted strings:
111                                 //
112                                 // if (lastCharacter == '<' && character == '<') {
113                                 // int heredocCharacter = scanner.read();
114                                 // if (heredocCharacter == '<') {
115                                 // // start of heredoc comment;
116                                 // if (readHEREDOC(scanner)) {
117                                 // return fTokenHeredoc;
118                                 // }
119                                 // } else {
120                                 // scanner.unread();
121                                 // }
122                                 // }
123                                 if (!isOperator((char) character)) {
124                                         scanner.unread();
125                                         return fToken;
126                                 }
127                                 if (checkPHPTag(scanner, lastCharacter, character)) {
128                                         return Token.UNDEFINED;
129                                 }
130                                 do {
131                                         lastCharacter = character;
132                                         character = scanner.read();
133                                         if (checkPHPTag(scanner, lastCharacter, character)) {
134                                                 return fToken;
135                                         }
136                                         if (character == ICharacterScanner.EOF) {
137                                                 return fToken;
138                                         }
139                                 } while (isOperator((char) character));
140                                 scanner.unread();
141                                 return fToken;
142                         } else {
143                                 scanner.unread();
144                                 return Token.UNDEFINED;
145                         }
146                 }
147
148                 // private boolean readHEREDOC(ICharacterScanner scanner) {
149                 // // search until heredoc ends
150                 // int ch;
151                 // StringBuffer buf = new StringBuffer();
152                 // char[] heredocIdent;
153                 //
154                 // ch = scanner.read();
155                 // if (!Scanner.isPHPIdentifierStart((char)ch)) {
156                 // scanner.unread();
157                 // scanner.unread();
158                 // return false;
159                 // }
160                 // while (Scanner.isPHPIdentifierPart((char)ch)) {
161                 // buf.append((char)ch);
162                 // ch = scanner.read();
163                 // }
164                 // if (ch==ICharacterScanner.EOF) {
165                 // return true;
166                 // }
167                 // heredocIdent = buf.toString().toCharArray();
168                 // while (true) {
169                 // ch = scanner.read();
170                 // if (ch==ICharacterScanner.EOF) {
171                 // return true;
172                 // }
173                 // if (ch == '\n') { // heredoc could end after a newline
174                 // int pos = 0;
175                 // while (true) {
176                 // if (pos == heredocIdent.length) {
177                 // return true;
178                 // }
179                 // ch = scanner.read(); // ignore escaped character
180                 // if (ch != heredocIdent[pos]) {
181                 // break;
182                 // }
183                 // if (ch==ICharacterScanner.EOF) {
184                 // return true;
185                 // }
186                 // pos++;
187                 // }
188                 // }
189                 // }
190                 // }
191
192                 /**
193                  * Check if lastCharacter/character are a PHP start or end token ( &lt;?
194                  * ... ?&gt; )
195                  * 
196                  * @param scanner
197                  * @param lastCharacter
198                  * @param character
199                  * @return
200                  */
201                 private boolean checkPHPTag(ICharacterScanner scanner,
202                                 int lastCharacter, int character) {
203                         if (lastCharacter == '<' && character == '?') {
204                                 scanner.unread();
205                                 scanner.unread();
206                                 return true;
207                         } else if (lastCharacter == '?' && character == '>') {
208                                 scanner.unread();
209                                 scanner.unread();
210                                 return true;
211                         }
212                         return false;
213                 }
214         }
215
216         protected class AccentStringRule implements IRule {
217
218                 /** Token to return for this rule */
219                 private final IToken fToken;
220
221                 public AccentStringRule(IToken token) {
222                         fToken = token;
223
224                 }
225
226                 /*
227                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
228                  */
229                 public IToken evaluate(ICharacterScanner scanner) {
230
231                         int character = scanner.read();
232
233                         if (character == '`') {
234
235                                 while (character != ICharacterScanner.EOF) {
236                                         character = scanner.read();
237                                         if (character == '\\') {
238                                                 character = scanner.read();
239                                         } else if (character == '`') {
240                                                 return fToken;
241                                         }
242                                 }
243                                 scanner.unread();
244                                 return Token.UNDEFINED;
245                         } else {
246                                 scanner.unread();
247                                 return Token.UNDEFINED;
248                         }
249                 }
250
251         }
252
253         private class PHPWordRule extends WordRule {
254                 private StringBuffer fBuffer = new StringBuffer();
255
256                 protected Map fWordsIgnoreCase = new HashMap();
257
258                 public PHPWordRule(IWordDetector detector) {
259                         super(detector, Token.UNDEFINED);
260                 }
261
262                 public PHPWordRule(IWordDetector detector, IToken defaultToken) {
263                         super(detector, defaultToken);
264                 }
265
266                 /**
267                  * Adds a word and the token to be returned if it is detected.
268                  * 
269                  * @param word
270                  *            the word this rule will search for, may not be
271                  *            <code>null</code>
272                  * @param token
273                  *            the token to be returned if the word has been found, may
274                  *            not be <code>null</code>
275                  */
276                 public void addWordIgnoreCase(String word, IToken token) {
277                         Assert.isNotNull(word);
278                         Assert.isNotNull(token);
279
280                         fWordsIgnoreCase.put(word, token);
281                 }
282
283                 public IToken evaluate(ICharacterScanner scanner) {
284                         int c = scanner.read();
285                         boolean isVariable = false;
286                         boolean isUnderscore = false;
287                         String word;
288                         if (c == '<') {
289                                 c = scanner.read();
290                                 if (c != '?') {
291                                         scanner.unread();
292                                         scanner.unread();
293                                         return Token.UNDEFINED;
294                                 } else {
295                                         c = scanner.read();
296                                         if (c == '=') { // <?=
297                                                 return getToken(IPreferenceConstants.PHP_TAG);
298                                         }
299                                         if (c != 'p' && c != 'P') {
300                                                 scanner.unread();
301                                                 return getToken(IPreferenceConstants.PHP_TAG);
302                                         } else {
303                                                 c = scanner.read();
304                                                 if (c != 'h' && c != 'H') {
305                                                         scanner.unread();
306                                                         scanner.unread();
307                                                         return getToken(IPreferenceConstants.PHP_TAG);
308                                                 } else {
309                                                         c = scanner.read();
310                                                         if (c != 'p' && c != 'P') {
311                                                                 scanner.unread();
312                                                                 scanner.unread();
313                                                                 scanner.unread();
314                                                                 return getToken(IPreferenceConstants.PHP_TAG);
315                                                         } else {
316                                                                 return getToken(IPreferenceConstants.PHP_TAG);
317                                                         }
318                                                 }
319                                         }
320                                 }
321                         }
322                         if (c == '?') {
323                                 c = scanner.read();
324                                 if (c == '>') {
325                                         return getToken(IPreferenceConstants.PHP_TAG);
326                                 }
327                                 scanner.unread();
328                                 scanner.unread();
329                                 return Token.UNDEFINED;
330                         }
331                         if (fDetector.isWordStart((char) c)) {
332                                 if (c == '$') {
333                                         isVariable = true;
334                                 }
335                                 if (fColumn == UNDEFINED
336                                                 || (fColumn == scanner.getColumn() - 1)) {
337
338                                         fBuffer.setLength(0);
339                                         fBuffer.append((char) c);
340                                         c = scanner.read();
341                                         if (c == '_') {
342                                                 isUnderscore = true;
343                                         }
344                                         while (c != ICharacterScanner.EOF
345                                                         && fDetector.isWordPart((char) c)) {
346                                                 fBuffer.append((char) c);
347                                                 c = scanner.read();
348                                         }
349                                         scanner.unread();
350
351                                         if (isVariable) {
352                                                 if (isUnderscore) {
353                                                         return getToken(IPreferenceConstants.PHP_VARIABLE_DOLLAR);
354                                                 }
355                                                 return getToken(IPreferenceConstants.PHP_VARIABLE);
356                                         }
357                                         word = fBuffer.toString();
358                                         IToken token = (IToken) fWords.get(word);
359                                         if (token != null)
360                                                 return token;
361
362                                         token = (IToken) fWordsIgnoreCase.get(word.toLowerCase());
363                                         if (token != null)
364                                                 return token;
365
366                                         if (fDefaultToken.isUndefined())
367                                                 unreadBuffer(scanner);
368
369                                         return fDefaultToken;
370                                 }
371                         }
372
373                         scanner.unread();
374                         return Token.UNDEFINED;
375                 }
376         }
377
378         // private PHPColorProvider fColorProvider;
379
380         private static String[] fgTokenProperties = {
381                         IPreferenceConstants.PHP_MULTILINE_COMMENT,
382                         IPreferenceConstants.PHP_SINGLELINE_COMMENT,
383                         IPreferenceConstants.PHP_TAG, IPreferenceConstants.PHP_KEYWORD,
384                         IPreferenceConstants.PHP_FUNCTIONNAME,
385                         IPreferenceConstants.PHP_VARIABLE,
386                         IPreferenceConstants.PHP_VARIABLE_DOLLAR,
387                         IPreferenceConstants.PHP_STRING_DQ,
388                         IPreferenceConstants.PHP_STRING_SQ, IPreferenceConstants.PHP_TYPE,
389                         IPreferenceConstants.PHP_CONSTANT,
390                         IPreferenceConstants.PHP_DEFAULT,
391                         IPreferenceConstants.PHP_OPERATOR,
392                         IPreferenceConstants.PHP_BRACE_OPERATOR,
393                         IPreferenceConstants.PHP_KEYWORD_RETURN };
394
395         /**
396          * Creates a PHP code scanner
397          */
398         // public PHPCodeScanner(JavaColorManager provider, IPreferenceStore store)
399         // {
400         public PHPCodeScanner(IColorManager manager, IPreferenceStore store) {
401                 super(manager, store);
402                 initialize();
403         }
404
405         /*
406          * @see AbstractJavaScanner#getTokenProperties()
407          */
408         protected String[] getTokenProperties() {
409                 return fgTokenProperties;
410         }
411
412         /*
413          * @see AbstractJavaScanner#createRules()
414          */
415         protected List createRules() {
416                 List rules = new ArrayList();
417                 Token token = getToken(IPreferenceConstants.PHP_SINGLELINE_COMMENT);
418                 // Add rule for single line comments.
419                 // rules.add(new EndOfLineRule("//", token)); //$NON-NLS-1$
420                 // rules.add(new EndOfLineRule("#", token)); //$NON-NLS-1$
421                 // Add rule for strings and character constants.
422                 // token = getToken(IPreferenceConstants.PHP_STRING_SQ);
423                 // rules.add(new SingleQuoteStringRule(token));
424                 // token = getToken(IPreferenceConstants.PHP_STRING_DQ);
425                 // rules.add(new DoubleQuoteStringRule(token));
426                 rules.add(new AccentStringRule(token));
427
428                 token = getToken(IPreferenceConstants.PHP_MULTILINE_COMMENT);
429                 rules.add(new MultiLineRule("/*", "*/", token)); //$NON-NLS-2$ //$NON-NLS-1$
430                 // Add generic whitespace rule.
431                 rules.add(new WhitespaceRule(new PHPWhitespaceDetector()));
432                 // Add word rule for keywords, types, and constants.
433                 token = getToken(IPreferenceConstants.PHP_DEFAULT);
434                 PHPWordRule wordRule = new PHPWordRule(new PHPWordDetector(), token);
435
436                 Token keyword = getToken(IPreferenceConstants.PHP_KEYWORD);
437                 Token functionName = getToken(IPreferenceConstants.PHP_FUNCTIONNAME);
438                 Token type = getToken(IPreferenceConstants.PHP_TYPE);
439                 Token constant = getToken(IPreferenceConstants.PHP_CONSTANT);
440
441                 ArrayList buffer = PHPSyntaxRdr.getSyntaxData();
442                 // String strbuffer = null; unused
443                 PHPElement elbuffer = null;
444                 String name;
445                 for (int i = 0; i < buffer.size(); i++) {
446                         // while ((buffer != null)
447                         // && (!buffer.isEmpty()
448                         // && ((elbuffer = (PHPElement) buffer.remove(0)) != null))) {
449                         elbuffer = (PHPElement) buffer.get(i);
450                         if (elbuffer instanceof PHPKeyword) {
451                                 name = ((PHPKeyword) elbuffer).getName();
452                                 if (!name.equals("return")) {
453                                         wordRule.addWord(name, keyword);
454                                 }
455                         } else if (elbuffer instanceof PHPFunction) {
456                                 wordRule.addWordIgnoreCase(((PHPFunction) elbuffer).getName(),
457                                                 functionName);
458                         } else if (elbuffer instanceof PHPType) {
459                                 wordRule.addWord(elbuffer.getName(), type);
460                         } else if (elbuffer instanceof PHPConstant) {
461                                 wordRule.addWord(elbuffer.getName(), constant);
462                         }
463                 }
464
465                 // Add word rule for keyword 'return'.
466                 token = getToken(IPreferenceConstants.PHP_KEYWORD_RETURN);
467                 wordRule.addWord("return", token);
468
469                 // Add rule for operators and brackets (at the end !)
470                 rules.add(new OperatorRule(getToken(IPreferenceConstants.PHP_OPERATOR),
471                                 getToken(IPreferenceConstants.PHP_BRACE_OPERATOR),
472                                 getToken(IPreferenceConstants.PHP_STRING_DQ)));
473
474                 rules.add(wordRule);
475
476                 setDefaultReturnToken(getToken(IPreferenceConstants.PHP_DEFAULT));
477                 return rules;
478         }
479 }