/* * Copyright (c) 2003-2004 Christopher Lenz and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Common Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/cpl-v10.html * * Contributors: * Christopher Lenz - initial API and implementation * * $Id: DefaultCssScanner.java,v 1.1 2004-09-02 18:07:13 jsurfer Exp $ */ package net.sourceforge.phpeclipse.css.core.internal.parser; import net.sourceforge.phpeclipse.css.core.internal.text.CssTextUtils; import net.sourceforge.phpeclipse.css.core.parser.ICssScanner; import net.sourceforge.phpeclipse.css.core.parser.ICssTokens; import net.sourceforge.phpeclipse.css.core.parser.IProblem; import net.sourceforge.phpeclipse.css.core.parser.LexicalErrorException; import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.IRegion; import org.eclipse.jface.text.Region; /** * Default implementation of a lexical scanner for CSS. * * TODO Add support for character escapes and unicode ranges */ public class DefaultCssScanner extends AbstractProblemReporter implements ICssScanner { // Instance Variables ------------------------------------------------------ private int currentChar; private int offset; private int tokenOffset; // ICssScanner Implementation ---------------------------------------------- /** * @see ICssScanner#getTokenRegion() */ public IRegion getTokenRegion() { return new Region(tokenOffset, offset - tokenOffset); } /** * @see ICssScanner#getNextToken() */ public int getNextToken() throws LexicalErrorException { if (document == null) { throw new IllegalStateException("Source must be set"); //$NON-NLS-1$ } if (currentChar == -1) { return ICssTokens.EOF; } tokenOffset = offset; getNextCharOrEnd(); switch (currentChar) { case '@': { return ICssTokens.AT; } case ':': { return ICssTokens.COLON; } case '.': { if (Character.isDigit((char) peekNextChar())) { return handleNumber(); } return currentChar; } case '{': { return ICssTokens.LBRACE; } case '[': { return ICssTokens.LBRACKET; } case '(': { return ICssTokens.LPAREN; } case '}': { return ICssTokens.RBRACE; } case ']': { return ICssTokens.RBRACKET; } case ')': { return ICssTokens.RPAREN; } case ';': { return ICssTokens.SEMICOLON; } case '/': { if (peekNextChar() == '*') { getNextCharOrEnd(); return handleComment(); } return currentChar; } case '\'': case '"': { return handleString((char) currentChar); } case '<': { if (peekNextChar() == '!') { getNextCharOrEnd(); return handleCdo(); } return currentChar; } case '-': { if (peekNextChar() == '-') { getNextCharOrEnd(); return handleCdc(); } else if (CssTextUtils.isCssNumberStart((char) peekNextChar())) { return handleNumber(); } return currentChar; } default: { if (CssTextUtils.isCssIdentifierStart((char) currentChar)) { return handleIdentifier(); } else if (Character.isDigit((char) currentChar)) { return handleNumber(); } else if (CssTextUtils.isCssWhitespace((char) currentChar)) { return handleWhitespace(); } return currentChar; } } } /** * @see ICssScanner#setSource(IDocument) */ public void setSource(IDocument document) { super.setDocument(document); currentChar = 0; offset = 0; tokenOffset = 0; } // Protected Methods ------------------------------------------------------- protected int handleCdc() throws LexicalErrorException { if (currentChar != '-') { throw new IllegalStateException( "Not at the beginning of a CDC"); //$NON-NLS-1$ } if (peekNextChar() != '>') { // not a CDC, rewind and return the start character offset -= 2; return getNextCharOrEnd(); } getNextCharOrEnd(); return ICssTokens.CDC; } protected int handleCdo() throws LexicalErrorException { if (currentChar != '!') { throw new IllegalStateException( "Not at the beginning of a CDO"); //$NON-NLS-1$ } if (peekNextChar() != '-') { // not a CDO, rewind and return the start character offset -= 2; return getNextCharOrEnd(); } getNextCharOrEnd(); if (peekNextChar() != '-') { // not a CDO, rewind and return the start character offset -= "!--".length(); //$NON-NLS-1$ return getNextCharOrEnd(); } getNextCharOrEnd(); return ICssTokens.CDO; } protected int handleComment() throws LexicalErrorException { if (currentChar != '*') { throw new IllegalStateException( "Not at the beginning of a comment"); //$NON-NLS-1$ } do { do { getNextCharOrError("unterminatedComment"); //$NON-NLS-1$ } while (currentChar != '*'); getNextCharOrError("unterminatedComment"); //$NON-NLS-1$ while (currentChar == '*') { getNextCharOrError("unterminatedComment"); //$NON-NLS-1$ } } while (currentChar != '/'); return getNextToken(); } protected int handleIdentifier() throws LexicalErrorException { if (!CssTextUtils.isCssIdentifierStart((char) currentChar)) { throw new IllegalStateException( "Not at the beginning of an identifier"); //$NON-NLS-1$ } while (CssTextUtils.isCssIdentifierPart((char) peekNextChar())) { getNextCharOrEnd(); } return ICssTokens.IDENT; } protected int handleNumber() throws LexicalErrorException { if (!CssTextUtils.isCssNumberStart((char) currentChar)) { throw new IllegalStateException( "Not at the beginning of a number"); //$NON-NLS-1$ } while (CssTextUtils.isCssNumberPart((char) peekNextChar())) { getNextCharOrEnd(); } if (peekNextChar() == '.') { getNextCharOrEnd(); while (Character.isDigit((char) peekNextChar())) { getNextCharOrEnd(); } } return ICssTokens.NUM; } protected int handleString(char delim) throws LexicalErrorException { if (currentChar != delim) { throw new IllegalStateException( "Not at the beginning of a string"); //$NON-NLS-1$ } do { getNextCharOrError("unterminatedString"); //$NON-NLS-1$ if (currentChar == '\\') { getNextCharOrEnd(); getNextCharOrError("unterminatedString"); //$NON-NLS-1$ } else if (currentChar == '\n') { reportError("unescapedNewlineInString", //$NON-NLS-1$ new Region(tokenOffset, offset - tokenOffset)); } } while (currentChar != delim); return ICssTokens.STRING; } protected int handleWhitespace() throws LexicalErrorException { if (!CssTextUtils.isCssWhitespace((char) currentChar)) { throw new IllegalStateException( "Not at the beginning of white space"); //$NON-NLS-1$ } while (CssTextUtils.isCssWhitespace((char) peekNextChar())) { getNextCharOrEnd(); } return getNextToken(); } /** * Positions the scanner over the next character in the source and returns * that character. If the end of the source is reached, an 'unexpected end * of file' error is reported. * * @param errorId the ID of the error to report * @return the next character in the source, or -1 if the end of the source * has been reached * @throws LexicalErrorException If the end of the source has been reached * and no problem collector has been configured to collect the * problem */ protected final int getNextCharOrError(String errorId) throws LexicalErrorException { getNextCharOrEnd(); if (currentChar == -1) { IProblem error = reportError(errorId, new Region(tokenOffset, offset - tokenOffset)); throw new LexicalErrorException(error.getMessage()); } return currentChar; } /** * Positions the scanner over the next character in the source and returns * that character. * * @return the next character in the source, or -1 if the end of the source * has been reached */ protected final int getNextCharOrEnd() { try { currentChar = document.getChar(offset); if (currentChar != -1) { offset++; } } catch (BadLocationException e) { currentChar = -1; } return currentChar; } /** * Returns the next character in the source without changing the scanners * current position (look ahead). * * @return the next character in the source, or -1 if the end of the source * has been reached */ protected final int peekNextChar() { int retVal = -1; try { retVal = document.getChar(offset); } catch (BadLocationException e) { // ignore } return retVal; } }