/*******************************************************************************
 * Copyright (c) 2000, 2004 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/cpl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
package net.sourceforge.phpdt.internal.ui.text;

import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;

//incastrix
//import org.eclipse.jface.text.Assert;
import org.eclipse.core.runtime.Assert;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.rules.ICharacterScanner;
import org.eclipse.jface.text.rules.IPartitionTokenScanner;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.Token;

/**
 * This scanner recognizes the JavaDoc comments, Java multi line comments, Java
 * single line comments, Java strings.
 */
public class FastJavaPartitionScanner implements IPartitionTokenScanner,
		IPHPPartitions {

	// states
	
	private static enum PartState {
		PHP,
		SINGLE_LINE_COMMENT,
		MULTI_LINE_COMMENT,
		PHPDOC,
		STRING_DQ,
		STRING_SQ,
		STRING_HEREDOC,
	};

	private static enum ScanState {
		NONE,
		BACKSLASH,									// postfix for STRING_DQ and CHARACTER
		SLASH,										// prefix for SINGLE_LINE or MULTI_LINE or JAVADOC
		SLASH_STAR,									// prefix for MULTI_LINE_COMMENT or JAVADOC 
		SLASH_STAR_STAR,							// prefix for MULTI_LINE_COMMENT or JAVADOC
		STAR,										// postfix for MULTI_LINE_COMMENT or JAVADOC
		CARRIAGE_RETURN,							// postfix for STRING_DQ, CHARACTER and SINGLE_LINE_COMMENT
		LESS,										// Found a '<'
		LESS_LESS,									// Found a '<<'
		LESS_LESS_LESS,								// Found a '<<<'
		HEREDOC_ID,									// Found a '<<<' and scanning ID (till and of id, which is a ' ')
		HEREDOC,									// Found a '<<<' and ID
		HEREDOC_ID_END,								// Searching the heredoc end ID
	};

	/** The heredoc ID string */
	private String fHeredocId;
	
	/** The possible heredoc ID string which is read right after a new line. Ends with a ';' and should
	 * match the heredoc ID string fHeredocId
	 */
	private String fHeredocIdEnd;
	
	/** The scanner. */
	private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner (1000); // faster implementation

	/** The offset of the last returned token. */
	private int fTokenOffset;

	/** The length of the last returned token. */
	private int fTokenLength;

	/** The state of the scanner. */
	private PartState fState;

	/** The last significant characters read. */
	private ScanState fLast;
	
	/** The amount of characters already read on first call to nextToken(). */
	private int fPrefixLength;

	// emulate JavaPartitionScanner
	private boolean fEmulate = false;

	private int fJavaOffset;

	private int fJavaLength;

	private final IToken[] fTokens = new IToken[] { 
			new Token (null),
			new Token (PHP_SINGLELINE_COMMENT),
			new Token (PHP_MULTILINE_COMMENT), 
			new Token (PHP_PHPDOC_COMMENT),
			new Token (PHP_STRING_DQ), 
			new Token (PHP_STRING_SQ),
			new Token (PHP_STRING_HEREDOC)};

	public FastJavaPartitionScanner(boolean emulate) {
		fEmulate = emulate;
	}

	public FastJavaPartitionScanner() {
		this(false);
	}

	/**
	 * Emulate JavaPartitionScanner
	 * 
	 * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
	 */
	public IToken nextToken() { 
		if (fEmulate) {
			if ((fJavaOffset != -1) && (fTokenOffset + fTokenLength != fJavaOffset + fJavaLength)) {
				fTokenOffset += fTokenLength;
				
				return fTokens[PartState.PHP.ordinal()];
			} 
			else {
				fJavaOffset = -1;
				fJavaLength = 0;
			}
		}

		fTokenOffset += fTokenLength;				// The new token offset is the offset of the previous partition + length of previous partition 
		fTokenLength  = fPrefixLength;				// The new partition is at least the length of the start of the new partition
		
		while (true) {
			final int ch = fScanner.read();

			switch (ch) {
				case ICharacterScanner.EOF:
					if (fTokenLength > 0) {
						fLast = ScanState.NONE; // ignore last
						return preFix (fState, PartState.PHP, ScanState.NONE, 0);
					} 
					else {
						fLast = ScanState.NONE;
						fPrefixLength = 0;
						return Token.EOF;
					}
	
				case '\r':										// Found a carriage return 
					// emulate JavaPartitionScanner
					if (!fEmulate && (fLast != ScanState.CARRIAGE_RETURN)) {
						fLast = ScanState.CARRIAGE_RETURN;		// Set to what we currently found
						fTokenLength++;							// and count the partition length
						
						continue;								// Go for the next character to read
					} 
					else {
						switch (fState) {
							case SINGLE_LINE_COMMENT:
								if (fTokenLength > 0) {
									IToken token = fTokens[fState.ordinal()];
		
									// emulate JavaPartitionScanner
									if (fEmulate) {
										fTokenLength++;
										fLast = ScanState.NONE;
										fPrefixLength = 0;
									} 
									else {
										fLast = ScanState.CARRIAGE_RETURN;
										fPrefixLength = 1;
									}
		
									fState = PartState.PHP;
									return token;
								} 
								else {
									consume();
									continue;
								}
								
							default:
								consume();
								continue;
						}
					}
	
				case '\n':											// Found a line feed
					switch (fState) {
						case SINGLE_LINE_COMMENT:					// If we running within a single line comment,
							return postFix (fState);				// this is the end my friend
		
						case STRING_HEREDOC:						// If we running within a heredoc string
							fTokenLength++;							// Count the character
							fLast = ScanState.CARRIAGE_RETURN;		// and state is still new line 
							continue;
							
						default:									// If running anywhere else than on a single line comment
							consume();								// count the length of the current partition
							continue;
					}
	
				case '?':
					if (fState == PartState.SINGLE_LINE_COMMENT) {
						int nextch = fScanner.read();
						
						if (nextch == '>') {
							// <h1>This is an <?php # echo 'simple' ?> example.</h1>
							fTokenLength--;
							fScanner.unread();
							fScanner.unread();
							
							return postFix (fState);
						} 
						else {
							// bug #1404228: Crash on <?php // comment ?>
							if (nextch != ICharacterScanner.EOF) {
								fScanner.unread();
							}
						}
					}
	
				default:
					if (!fEmulate && (fLast == ScanState.CARRIAGE_RETURN)) {
						switch (fState) {
							case SINGLE_LINE_COMMENT:
								// case CHARACTER:
								// case STRING_DQ:
								// case STRING_SQ:
								ScanState last;
								PartState newState;
								
								switch (ch) {
									case '/':
										last = ScanState.SLASH;
										newState = PartState.PHP;
										break;
			
									case '*':
										last = ScanState.STAR;
										newState = PartState.PHP;
										break;
			
									case '\'':
										last = ScanState.NONE;
										newState = PartState.STRING_SQ;
										break;
			
									case '"':
										last = ScanState.NONE;
										newState = PartState.STRING_DQ;
										break;
			
									case '\r':
										last = ScanState.CARRIAGE_RETURN;
										newState = PartState.PHP;
										break;
			
									case '\\':
										last = ScanState.BACKSLASH;
										newState = PartState.PHP;
										break;
			
									default:
										last = ScanState.NONE;
										newState = PartState.PHP;
										break;
								}
		
								fLast = ScanState.NONE; // ignore fLast
								return preFix (fState, newState, last, 1);
		
							default:
								break;
						}
					}
			}

			// states
			switch (fState) {
				case PHP:
					switch (ch) {
						case '#':							// Start of a single line comment
							if (fTokenLength > 0) {
								return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1);
							} 
							else {
								preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1);
								fTokenOffset += fTokenLength;
								fTokenLength = fPrefixLength;
							}
							break;
					
						case '<':
							if (fLast == ScanState.LESS) {
								fTokenLength++;
								fLast = ScanState.LESS_LESS;
							}
							else if (fLast == ScanState.LESS_LESS) {
								if (fTokenLength - getLastLength(fLast) > 0) {	// this is the start of a single line comment
									return preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3);
								} 
								else {
									preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3);
									fTokenOffset += fTokenLength;
									fTokenLength = fPrefixLength;
								}
							}
							else {
								fTokenLength++;
								fLast = ScanState.LESS;
							}
							break;
							
						case '/':												// Start of single line comment?
							if (fLast == ScanState.SLASH) {						// If previous character was already a slash,
								if (fTokenLength - getLastLength(fLast) > 0) {	// this is the start of a single line comment
									return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2);
								} 
								else {
									preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2);
									fTokenOffset += fTokenLength;
									fTokenLength = fPrefixLength;
								}
							} 
							else {
								fTokenLength++;
								fLast = ScanState.SLASH;						// We currently found a slash 
							}
							break;
		
						case '*':
							if (fLast == ScanState.SLASH) {						// If previous character was a slash
								if (fTokenLength - getLastLength (fLast) > 0) {	// this is the start of a comment /*
									return preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2);
								}
								else {
									preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2);
									fTokenOffset += fTokenLength;
									fTokenLength = fPrefixLength;
								}
							} 
							else {												// No slash before the '*', so it's a normal character
								consume ();
							}
							break;
		
						case '\'':												// The start of a single quoted string
							fLast = ScanState.NONE; // ignore fLast
							
							if (fTokenLength > 0) {
								return preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1);
							}
							else {
								preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1);
								fTokenOffset += fTokenLength;
								fTokenLength = fPrefixLength;
							}
							break;
		
						case '"':												// The start of a double quoted string
							fLast = ScanState.NONE; // ignore fLast
							
							if (fTokenLength > 0) {
								return preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1);
							}
							else {
								preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1);
								fTokenOffset += fTokenLength;
								fTokenLength = fPrefixLength;
							}
							break;
		
						default:												// Just a normal character with no special meaning
							consume ();
							break;
					}
					break;
	
				case SINGLE_LINE_COMMENT:										// We are just running within a single line comment (started with // or #)
					consume();
					break;
	
				case PHPDOC:													// We are just running within a php doc comment
					switch (ch) {
						case '/':
							switch (fLast) {
								case SLASH_STAR_STAR:
									return postFix (PartState.MULTI_LINE_COMMENT);
			
								case STAR:
									return postFix (PartState.PHPDOC);			// Found the end of the php doc (multi line) comment
			
								default:
									consume();
									break;
							}
							break;
		
						case '*':												// Found a '*'
							fTokenLength++;
							fLast = ScanState.STAR;								// Remember that we found a '*'
							break;
		
						default:
							consume();
							break;
					}
					break;
	
				case MULTI_LINE_COMMENT:									// We are currently running through a (possible) multi line comment 
					switch (ch) {
						case '*':											// and we found a '*'																								
							if (fLast == ScanState.SLASH_STAR) {			// If the previous characters have been a /*
								fLast = ScanState.SLASH_STAR_STAR;
								fTokenLength++;
								fState = PartState.PHPDOC;
							} 
							else {
								fTokenLength++;
								fLast = ScanState.STAR;
							}
							break;
		
						case '/':
							if (fLast == ScanState.STAR) {
								return postFix (PartState.MULTI_LINE_COMMENT);
							} 
							else {
								consume();
								break;
							}
		
						default:
							consume();
							break;
					}
					break;
	
				case STRING_DQ:
					switch (ch) {
						case '\\':
							fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH;
							fTokenLength++;
							break;
		
						case '\"':
							if (fLast != ScanState.BACKSLASH) {
								return postFix (PartState.STRING_DQ);
							} 
							else {
								consume();
							}
							break;
		
						default:
							consume();
							break;
					}
					break;
					
				case STRING_SQ:
					switch (ch) {
						case '\\':
							fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH;
							fTokenLength++;
							break;
		
						case '\'':
							if (fLast != ScanState.BACKSLASH) {
								return postFix (PartState.STRING_SQ);
							} 
							else {
								consume();
							}
							break;
		
						default:
							consume();
							break;
					}
					break;
				
				case STRING_HEREDOC:													// We are just running within a heredoc string
					switch (fLast) {
						case LESS_LESS_LESS:											// The first time after we recognized the '<<<'
							fLast = ScanState.HEREDOC_ID;								// We do a scan of the heredoc id string
							fHeredocId  = "";
							fHeredocId += (char) ch;
							fTokenLength++;
							break;
							
						case HEREDOC_ID:												// Scan the starting heredoc ID
							if (ch == ' ') {										
								fLast = ScanState.HEREDOC;
								fTokenLength++;
							}
							else {
								fHeredocId += (char) ch;
								fTokenLength++;
							}
							break;
							
						case CARRIAGE_RETURN:											// We previously found a new line
 							fTokenLength++;
							fHeredocIdEnd  = "";
							fHeredocIdEnd += (char) ch;									// Add the first character to the (possible) end ID
							fLast = ScanState.HEREDOC_ID_END;							// Go for scanning the (possible) end ID
							break;
							
						case HEREDOC_ID_END:											// We scan the (possible) end ID
							if (ch == ';') {											// End ID ends with an ';'
								if (fHeredocId.compareTo (fHeredocIdEnd) == 0) {		// If start ID and end ID matches.
									return postFix (PartState.STRING_HEREDOC);			// It's the end of a heredoc partition				
								}
								else {
									consume ();											// Wrong end ID, so just eat the character
								}
							}
							else {
								fTokenLength++;											// 
								fHeredocIdEnd += (char) ch;								// Add the characther to the possible heredoc end ID
							}
							break;
							
						default:														// Normally state NONE
							consume ();													// Eat the character
							break;
					}
					break;
			} // end of switch (fState)
		}
	}

	private static final int getLastLength (ScanState last) {
		switch (last) {
			default:
				return -1;
	
			case NONE:
				return 0;
	
			case LESS:
			case CARRIAGE_RETURN:
			case BACKSLASH:
			case SLASH:
			case STAR:
				return 1;
	
			case LESS_LESS:	
			case SLASH_STAR:
				return 2;
	
			case SLASH_STAR_STAR:
				return 3;
				
			case HEREDOC:
				return 3;
		}
	}

	private final void consume() {
		fTokenLength++;							// Count the character
		fLast = ScanState.NONE;					// Reset scanner state to nothing special  
	}

	/**
	 * If we found the end of a partition, return the type of the partition which is currently finished  
	 * 
	 * @param state	The type of partition we found the end for
	 * @return
	 */
	private final IToken postFix (PartState state) {
		fTokenLength++;
		fLast         = ScanState.NONE;			// Reset the scanner state
		fState        = PartState.PHP;			// The type of the next partition is just PHP				
		fPrefixLength = 0;						// and have no prefix length
		
		return fTokens[state.ordinal()];		// Return the type of partition for which we found the end
	}

	/**
	 * If we find the prefix of a new partition, return the type of the previous partition
	 * 
	 * @param state
	 * @param newState
	 * @param last
	 * @param prefixLength
	 * @return
	 */
	private final IToken preFix (PartState oldState, PartState newState, ScanState last, int prefixLength) {
		if (fEmulate && 										// If we are in emulation run
			(oldState == PartState.PHP) && 
			(fTokenLength - getLastLength (fLast) > 0)) {
			
			fTokenLength -= getLastLength (fLast);
			fJavaOffset   = fTokenOffset;
			fJavaLength   = fTokenLength;
			fTokenLength  = 1;
			fState        = newState;
			fPrefixLength = prefixLength;
			fLast         = last;
			
			return fTokens[oldState.ordinal()];
		} 
		else {
			fTokenLength -= getLastLength (fLast);				// Set the length of the last token (partition) 
			fLast         = last;								// Remember the type of the type of the last partition 
			fPrefixLength = prefixLength;						// Remember the length of the currently found start of new partition
			fState        = newState;							// The type of the new partition we found
			
			IToken token  = fTokens[oldState.ordinal()];		// Return the type of the old partition
			
			return token;
		}
	}

	private static PartState getState (String contentType) {
		if (contentType == null)
			return PartState.PHP;

		else if (contentType.equals (PHP_SINGLELINE_COMMENT))
			return PartState.SINGLE_LINE_COMMENT;

		else if (contentType.equals (PHP_MULTILINE_COMMENT))
			return PartState.MULTI_LINE_COMMENT;

		else if (contentType.equals (PHP_PHPDOC_COMMENT))
			return PartState.PHPDOC;

		else if (contentType.equals (PHP_STRING_DQ))
			return PartState.STRING_DQ;

		else if (contentType.equals (PHP_STRING_SQ))
			return PartState.STRING_SQ;

		else if (contentType.equals (PHP_STRING_HEREDOC))
			return PartState.STRING_HEREDOC;

		else
			return PartState.PHP;
	}

	/**
	 * @see IPartitionTokenScanner#setPartialRange (IDocument, int, int, String, int)
	 * 
	 * @note Because of the PHP heredoc syntax we need to parse from the beginning of a heredoc partition,
	 * and not from anywhere in the middle. When not reading the start of the heredoc (and the correct heredoc start ID,
	 * we can't recognize the correct heredoc end ID. So we start if possible form the partitionOffset.  
	 * 
	 */
	public void setPartialRange (IDocument document, int offset, int length, String contentType, int partitionOffset) {
		if (partitionOffset >= 0) {
			fScanner.setRange (document, partitionOffset, length + (offset - partitionOffset));
			
			fTokenOffset  = partitionOffset;
			fTokenLength  = 0;
			fPrefixLength = 0;
			fLast         = ScanState.NONE;
			fState 	      = PartState.PHP;						// restart at beginning of partition
		}
		else {
			fScanner.setRange (document, offset, length);
			
			fTokenOffset  = partitionOffset;
			fTokenLength  = 0;
			fPrefixLength = offset - partitionOffset;
			fLast         = ScanState.NONE;
	
			if (offset == partitionOffset) {
				fState = PartState.PHP;						// restart at beginning of partition
			} 
			else {
				fState = getState(contentType);
			}
		}
		// emulate JavaPartitionScanner
		if (fEmulate) {
			fJavaOffset = -1;
			fJavaLength = 0;
		}
	}

	/**
	 * @see ITokenScanner#setRange(IDocument, int, int)
	 */
	public void setRange (IDocument document, int offset, int length) {
		fScanner.setRange (document, offset, length);
		
		fTokenOffset  = offset;
		fTokenLength  = 0;
		fPrefixLength = 0;
		fLast         = ScanState.NONE;
		fState        = PartState.PHP;

		// emulate JavaPartitionScanner
		if (fEmulate) {
			fJavaOffset = -1;
			fJavaLength = 0;
		}
	}

	/*
	 * @see ITokenScanner#getTokenLength()
	 */
	public int getTokenLength() {
		return fTokenLength;
	}

	/*
	 * @see ITokenScanner#getTokenOffset()
	 */
	public int getTokenOffset() {
		if (AbstractPartitioner.DEBUG) {
			Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));
		}
		return fTokenOffset;
	}

}