intial source from ttp://www.sf.net/projects/wdte
[phpeclipse.git] / archive / net.sourceforge.phpeclipse.css.core / src / net / sourceforge / phpeclipse / css / core / internal / parser / DefaultCssScanner.java
1 /*
2  * Copyright (c) 2003-2004 Christopher Lenz and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     Christopher Lenz - initial API and implementation
10  * 
11  * $Id: DefaultCssScanner.java,v 1.1 2004-09-02 18:07:13 jsurfer Exp $
12  */
13
14 package net.sourceforge.phpeclipse.css.core.internal.parser;
15
16 import net.sourceforge.phpeclipse.css.core.internal.text.CssTextUtils;
17 import net.sourceforge.phpeclipse.css.core.parser.ICssScanner;
18 import net.sourceforge.phpeclipse.css.core.parser.ICssTokens;
19 import net.sourceforge.phpeclipse.css.core.parser.IProblem;
20 import net.sourceforge.phpeclipse.css.core.parser.LexicalErrorException;
21
22 import org.eclipse.jface.text.BadLocationException;
23 import org.eclipse.jface.text.IDocument;
24 import org.eclipse.jface.text.IRegion;
25 import org.eclipse.jface.text.Region;
26
27 /**
28  * Default implementation of a lexical scanner for CSS.
29  * 
30  * TODO Add support for character escapes and unicode ranges
31  */
32 public class DefaultCssScanner extends AbstractProblemReporter
33         implements ICssScanner {
34
35         // Instance Variables ------------------------------------------------------
36
37         private int currentChar;
38
39         private int offset;
40
41         private int tokenOffset;
42
43         // ICssScanner Implementation ----------------------------------------------
44
45         /**
46          * @see ICssScanner#getTokenRegion()
47          */
48         public IRegion getTokenRegion() {
49                 return new Region(tokenOffset, offset - tokenOffset);
50         }
51
52         /**
53          * @see ICssScanner#getNextToken()
54          */
55         public int getNextToken() throws LexicalErrorException {
56                 if (document == null) {
57                         throw new IllegalStateException("Source must be set"); //$NON-NLS-1$
58                 }
59                 if (currentChar == -1) {
60                         return ICssTokens.EOF;
61                 }
62                 tokenOffset = offset;
63                 getNextCharOrEnd();
64                 switch (currentChar) {
65                         case '@': {
66                                 return ICssTokens.AT;
67                         }
68                         case ':': {
69                                 return ICssTokens.COLON;
70                         }
71                         case '.': {
72                                 if (Character.isDigit((char) peekNextChar())) {
73                                         return handleNumber();
74                                 }
75                                 return currentChar;
76                         }
77                         case '{': {
78                                 return ICssTokens.LBRACE;
79                         }
80                         case '[': {
81                                 return ICssTokens.LBRACKET;
82                         }
83                         case '(': {
84                                 return ICssTokens.LPAREN;
85                         }
86                         case '}': {
87                                 return ICssTokens.RBRACE;
88                         }
89                         case ']': {
90                                 return ICssTokens.RBRACKET;
91                         }
92                         case ')': {
93                                 return ICssTokens.RPAREN;
94                         }
95                         case ';': {
96                                 return ICssTokens.SEMICOLON;
97                         }
98                         case '/': {
99                                 if (peekNextChar() == '*') {
100                                         getNextCharOrEnd();
101                                         return handleComment();
102                                 }
103                                 return currentChar;
104                         }
105                         case '\'':
106                         case '"': {
107                                 return handleString((char) currentChar);
108                         }
109                         case '<': {
110                                 if (peekNextChar() == '!') {
111                                         getNextCharOrEnd();
112                                         return handleCdo();
113                                 }
114                                 return currentChar;
115                         }
116                         case '-': {
117                                 if (peekNextChar() == '-') {
118                                         getNextCharOrEnd();
119                                         return handleCdc();
120                                 } else if (CssTextUtils.isCssNumberStart((char)
121                                         peekNextChar())) {
122                                         return handleNumber();
123                                 }
124                                 return currentChar;
125                         }
126                         default: {
127                                 if (CssTextUtils.isCssIdentifierStart((char) currentChar)) {
128                                         return handleIdentifier();
129                                 } else if (Character.isDigit((char) currentChar)) {
130                                         return handleNumber();
131                                 } else if (CssTextUtils.isCssWhitespace((char) currentChar)) {
132                                         return handleWhitespace();
133                                 }
134                                 return currentChar;
135                         }
136                 }
137         }
138
139         /**
140          * @see ICssScanner#setSource(IDocument)
141          */
142         public void setSource(IDocument document) {
143                 super.setDocument(document);
144                 currentChar = 0;
145                 offset = 0;
146                 tokenOffset = 0;
147         }
148
149         // Protected Methods -------------------------------------------------------
150
151         protected int handleCdc() throws LexicalErrorException {
152                 if (currentChar != '-') {
153                         throw new IllegalStateException(
154                                 "Not at the beginning of a CDC"); //$NON-NLS-1$
155                 }
156                 if (peekNextChar() != '>') {
157                         // not a CDC, rewind and return the start character
158                         offset -= 2;
159                         return getNextCharOrEnd();
160                 }
161                 getNextCharOrEnd();
162                 return ICssTokens.CDC;
163         }
164
165         protected int handleCdo() throws LexicalErrorException {
166                 if (currentChar != '!') {
167                         throw new IllegalStateException(
168                                 "Not at the beginning of a CDO"); //$NON-NLS-1$
169                 }
170                 if (peekNextChar() != '-') {
171                         // not a CDO, rewind and return the start character
172                         offset -= 2;
173                         return getNextCharOrEnd();
174                 }
175                 getNextCharOrEnd();
176                 if (peekNextChar() != '-') {
177                         // not a CDO, rewind and return the start character
178                         offset -= "!--".length(); //$NON-NLS-1$
179                         return getNextCharOrEnd();
180                 }
181                 getNextCharOrEnd();
182                 return ICssTokens.CDO;
183         }
184
185         protected int handleComment() throws LexicalErrorException {
186                 if (currentChar != '*') {
187                         throw new IllegalStateException(
188                                 "Not at the beginning of a comment"); //$NON-NLS-1$
189                 }
190                 do {
191                         do {
192                                 getNextCharOrError("unterminatedComment"); //$NON-NLS-1$
193                         } while (currentChar != '*');
194                         getNextCharOrError("unterminatedComment"); //$NON-NLS-1$
195                         while (currentChar == '*') {
196                                 getNextCharOrError("unterminatedComment"); //$NON-NLS-1$
197                         }
198                 } while (currentChar != '/');
199                 return getNextToken();
200         }
201
202         protected int handleIdentifier() throws LexicalErrorException {
203                 if (!CssTextUtils.isCssIdentifierStart((char) currentChar)) {
204                         throw new IllegalStateException(
205                                 "Not at the beginning of an identifier"); //$NON-NLS-1$
206                 }
207                 while (CssTextUtils.isCssIdentifierPart((char) peekNextChar())) {
208                         getNextCharOrEnd();
209                 }
210                 return ICssTokens.IDENT;
211         }
212
213         protected int handleNumber() throws LexicalErrorException {
214                 if (!CssTextUtils.isCssNumberStart((char) currentChar)) {
215                         throw new IllegalStateException(
216                                 "Not at the beginning of a number"); //$NON-NLS-1$
217                 }
218                 while (CssTextUtils.isCssNumberPart((char) peekNextChar())) {
219                         getNextCharOrEnd();
220                 }
221                 if (peekNextChar() == '.') {
222                         getNextCharOrEnd();
223                         while (Character.isDigit((char) peekNextChar())) {
224                                 getNextCharOrEnd();
225                         }
226                 }
227                 return ICssTokens.NUM;
228         }
229
230         protected int handleString(char delim) throws LexicalErrorException {
231                 if (currentChar != delim) {
232                         throw new IllegalStateException(
233                                 "Not at the beginning of a string"); //$NON-NLS-1$
234                 }
235                 do {
236                         getNextCharOrError("unterminatedString"); //$NON-NLS-1$
237                         if (currentChar == '\\') {
238                                 getNextCharOrEnd();
239                                 getNextCharOrError("unterminatedString"); //$NON-NLS-1$
240                         } else if (currentChar == '\n') {
241                                 reportError("unescapedNewlineInString", //$NON-NLS-1$
242                                         new Region(tokenOffset, offset - tokenOffset));
243                         }
244                 } while (currentChar != delim);
245                 return ICssTokens.STRING;
246         }
247
248         protected int handleWhitespace() throws LexicalErrorException {
249                 if (!CssTextUtils.isCssWhitespace((char) currentChar)) {
250                         throw new IllegalStateException(
251                                 "Not at the beginning of white space"); //$NON-NLS-1$
252                 }
253                 while (CssTextUtils.isCssWhitespace((char) peekNextChar())) {
254                         getNextCharOrEnd();
255                 }
256                 return getNextToken();
257         }
258
259         /**
260          * Positions the scanner over the next character in the source and returns
261          * that character. If the end of the source is reached, an 'unexpected end
262          * of file' error is reported. 
263          * 
264          * @param errorId the ID of the error to report
265          * @return the next character in the source, or -1 if the end of the source
266          *         has been reached
267          * @throws LexicalErrorException If the end of the source has been reached
268          *         and no problem collector has been configured to collect the 
269          *         problem
270          */
271         protected final int getNextCharOrError(String errorId)
272                 throws LexicalErrorException {
273                 getNextCharOrEnd();
274                 if (currentChar == -1) {
275                         IProblem error = reportError(errorId,
276                                 new Region(tokenOffset, offset - tokenOffset));
277                         throw new LexicalErrorException(error.getMessage());
278                 }
279                 return currentChar;
280         }
281
282         /**
283          * Positions the scanner over the next character in the source and returns
284          * that character.
285          * 
286          * @return the next character in the source, or -1 if the end of the source
287          *         has been reached
288          */
289         protected final int getNextCharOrEnd() {
290                 try {
291                         currentChar = document.getChar(offset);
292                         if (currentChar != -1) {
293                                 offset++;
294                         }
295                 } catch (BadLocationException e) {
296                         currentChar = -1;
297                 }
298                 return currentChar;
299         }
300
301         /**
302          * Returns the next character in the source without changing the scanners
303          * current position (look ahead).
304          * 
305          * @return the next character in the source, or -1 if the end of the source
306          *         has been reached
307          */
308         protected final int peekNextChar() {
309                 int retVal = -1;
310                 try {
311                         retVal = document.getChar(offset);
312                 } catch (BadLocationException e) {
313                         // ignore
314                 }
315                 return retVal;
316         }
317
318 }