package org.plog4u.wiki.filter;
import java.util.ArrayList;
import java.util.NoSuchElementException;
import java.util.Stack;
import java.util.StringTokenizer;
import org.plog4u.wiki.filter.WikipediaFilter.InvalidInputException;
import org.plog4u.wiki.filter.tags.AbstractTag;
import org.plog4u.wiki.filter.tags.CloseTagToken;
import org.plog4u.wiki.filter.tags.ListToken;
import org.plog4u.wiki.filter.tags.OpenTagToken;
import org.plog4u.wiki.filter.tags.SpecialTagToken;
import org.radeox.api.engine.ImageRenderEngine;
import org.radeox.api.engine.IncludeRenderEngine;
import org.radeox.api.engine.RenderEngine;
import org.radeox.api.engine.WikiRenderEngine;
import org.radeox.filter.context.FilterContext;
import org.radeox.macro.Macro;
import org.radeox.macro.MacroRepository;
import org.radeox.macro.parameter.MacroParameter;
import org.radeox.util.StringBufferWriter;
/**
* A parser for the WikipediaFilter
*
* @see org.plog4u.wiki.filter.WikipediaFilter
*/
public class WikipediaParser {
// private static Log log = LogFactory.getLog(WikipediaFilter.class);
MacroRepository fMacros;
private FilterContext fContext;
private RenderEngine fWikiEngine;
// TODO check, if this counter is correct in recursions:
private int fImageCounter;
/**
* The current snip
*/
// private Snip fSnip;
/**
* If the snip contains headings for a "table of content" this buffer temporarily contains the start of the snip and the
* "table of content"
*/
private StringBuffer fResultBufferHeader = null;
/**
* The buffer for the resulting HTML rendering from the current snip.
*/
private StringBuffer fResultBuffer;
/**
* The wiki syntax string which should be parsed
*/
private char[] fSource;
/**
* The corresponding String for the character source array
*/
private final String fStringSource;
/**
* The current scanned character
*/
private char fCurrentCharacter;
/**
* The current offset in the character source array
*/
private int fCurrentPosition;
/**
* The current recursion level for this parser
*/
private int fRecursionLevel;
private Stack fTokenStack;
// private Stack fTableStack;
private boolean fWhiteStart = false;
private int fWhiteStartPosition = 0;
// private TeXParser fTeXParser;
// private TeXParser fTeXImageParser;
/**
*
* "table of content"
*
*/
private ArrayList fTableOfContent = null;
// private String fSrcPath;
// private String fBinPath;
public WikipediaParser(MacroRepository macros, String stringSource, StringBuffer result, FilterContext context, int recursionLevel) {
fContext = context;
fWikiEngine = context.getRenderContext().getRenderEngine();
// try {
// SnipMacroParameter params = (SnipMacroParameter)
// fContext.getMacroParameter();
// fSnip = params.getSnipRenderContext().getSnip();
// } catch (ClassCastException e) {
// e.printStackTrace();
// }
fMacros = macros;
fResultBuffer = result;
fStringSource = stringSource;
setSource(stringSource.toCharArray());
fRecursionLevel = recursionLevel;
fTokenStack = new Stack();
// fTableStack = new Stack();
// fTeXParser = new TeXParser("", "m:");
// fTeXImageParser = new TeXParser("", "");
fImageCounter = 1;
// fSrcPath = (String) fContext.getRenderContext().get("srcpath");
// if (fSrcPath==null) {
// fSrcPath = "";
// }
// fBinPath = (String) fContext.getRenderContext().get("binpath");
// if (fBinPath==null) {
// fBinPath = "";
// }
}
/**
* Check until a new-line was found, if there are only whitespace characters before the given endposition.
*
* @param startPosition
* @param endPosition
* @return -1 if no whitespace line is found from the end (i.e. endPosition); otherwise the offset directly after where the
* new-line was found
*/
private int checkWhitespaces(int startPosition, int endPosition) {
char tempChar;
while (endPosition >= startPosition) {
if ((tempChar = fSource[endPosition--]) == '\n') {
return endPosition + 2;
}
if (tempChar != ' ' && tempChar != '\t' && tempChar != '\r') {
return -1;
}
}
if (endPosition < startPosition && endPosition >= 0) {
if ((tempChar = fSource[endPosition]) != '\n') {
return -1;
}
} else if (endPosition == (-1) && startPosition == 0) {
// special case at the start of a string
return 0;
}
return startPosition;
}
/**
* copy the content in the resulting buffer and escape special html characters (< > " & ')
*/
private void copyWhite(boolean whiteStart, final int whiteStartPosition, final int diff) {
if (whiteStart) {
final int len = fCurrentPosition - diff;
int currentIndex = whiteStartPosition;
int lastIndex = currentIndex;
while (currentIndex < len) {
switch (fSource[currentIndex++]) {
case '<': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(">");
break;
case '&': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("&");
break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("'");
break;
case '\"': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(""");
break;
}
}
if (lastIndex < (currentIndex)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex);
}
fWhiteStart = false;
}
}
/**
* copy the text in the resulting buffer and escape special html characters (< > " & ')
*/
private void copyWhite(String text) {
final int len = text.length();
int currentIndex = 0;
int lastIndex = currentIndex;
while (currentIndex < len) {
switch (text.charAt(currentIndex++)) {
case '<': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
}
fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(">");
break;
case '&': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("&");
break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("'");
break;
case '\"': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(""");
break;
}
}
if (lastIndex < (currentIndex)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex));
}
}
/**
* Copy the text in the resulting buffer and escape special html characters (< > " & ') Additionally every
* newline will be replaced by <br/>
*/
private void copyNowikiNewLine(String text) {
final int len = text.length();
int currentIndex = 0;
int lastIndex = currentIndex;
while (currentIndex < len) {
switch (text.charAt(currentIndex++)) {
case '\n':
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("
");
break;
case '<': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(">");
break;
// case '&': // special html escape character
// if (lastIndex < (currentIndex - 1)) {
// fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
// lastIndex = currentIndex;
// } else {
// lastIndex++;
// }
// fResultBuffer.append("&");
// break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("'");
break;
case '\"': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(""");
break;
}
}
if (lastIndex < (currentIndex)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex));
}
}
/**
* Render the HTML token which are defined in the OPEN_TAGS and CLOSE_TAGS map
*
* @return
*/
public int getHTMLToken() {
int currentHtmlPosition = fCurrentPosition;
try {
char closeCharacter;
char nextCharacter;
if (getNextChar('/')) {
// end tag detected
currentHtmlPosition++;
// closing tag
int r = readUntilCharOrEOL('>');
if (r != 1) {
return WikipediaFilter.TokenNotFound;
}
String closeTagString = new String(fSource, currentHtmlPosition, fCurrentPosition - currentHtmlPosition - 1).toLowerCase();
// System.out.println(closeTagString);
StringTokenizer tagTokenizer = new StringTokenizer(closeTagString);
String tokenString;
try {
tokenString = tagTokenizer.nextToken();
CloseTagToken token = (CloseTagToken) WikipediaFilter.CLOSE_TAGS.get(tokenString);
if (token == null) {
return WikipediaFilter.TokenNotFound;
}
Object topToken = fTokenStack.peek();
if (topToken instanceof OpenTagToken && ((OpenTagToken) topToken).getTagName() == token.getTagName()) {
fTokenStack.pop();
// if (token.getTagName().equals("table")) {
// fTableStack.pop();
// }
copyWhite(fWhiteStart, fWhiteStartPosition, 3 + tokenString.length());
fWhiteStart = false;
fResultBuffer.append(token.getCloseTag());
return WikipediaFilter.TokenIgnore;
}
fWhiteStart = false;
unexpectedTag(token.getTagName());
return WikipediaFilter.TokenIgnore;
} catch (NoSuchElementException e) {
return WikipediaFilter.TokenNotFound;
}
} else {
// start tag
String tokenString;
int tagNameStart = fCurrentPosition;
int tokenLength = 0;
while (Character.isJavaIdentifierStart(fSource[fCurrentPosition])) {
fCurrentPosition++;
tokenLength++;
}
try {
tokenString = new String(fSource, tagNameStart, fCurrentPosition - tagNameStart); //tagTokenizer.nextToken();
OpenTagToken token = (OpenTagToken) WikipediaFilter.OPEN_TAGS.get(tokenString);
if (token == null) {
return WikipediaFilter.TokenNotFound;
}
copyWhite(fWhiteStart, fWhiteStartPosition, (fCurrentPosition - tagNameStart) + 1);
fWhiteStart = false;
if (token instanceof SpecialTagToken) {
// for
while (Character.isWhitespace(fSource[fCurrentPosition])) {
fCurrentPosition++;
}
if (fSource[fCurrentPosition] == '/') {
fCurrentPosition++;
}
if (fSource[fCurrentPosition] == '>') {
fCurrentPosition++;
fWhiteStartPosition = fCurrentPosition;
// insert the special tag :
fResultBuffer.append(token.getOpenTag());
return WikipediaFilter.TokenIgnore;
}
} else if (token instanceof OpenTagToken) {
fResultBuffer.append("<");
fResultBuffer.append(token.getTagName());
fTokenStack.push(token);
fCurrentPosition = token.scan(fResultBuffer, fSource, fCurrentPosition);
fResultBuffer.append(">");
return WikipediaFilter.TokenIgnore;
}
return WikipediaFilter.TokenNotFound;
} catch (NoSuchElementException e) {
return WikipediaFilter.TokenNotFound;
}
}
} catch (IndexOutOfBoundsException e) {
//
}
fCurrentPosition = currentHtmlPosition;
return WikipediaFilter.TokenNotFound;
}
public final boolean getNextChar(char testedChar) {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter != testedChar) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
public final int getNextChar(char testedChar1, char testedChar2) {
int temp = fCurrentPosition;
try {
int result;
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter == testedChar1)
result = 0;
else if (fCurrentCharacter == testedChar2)
result = 1;
else {
fCurrentPosition = temp;
return -1;
}
return result;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return -1;
}
}
public final boolean getNextCharAsDigit() {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (!Character.isDigit(fCurrentCharacter)) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
public final boolean getNextCharAsDigit(int radix) {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (Character.digit(fCurrentCharacter, radix) == -1) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
public final int getNumberOfChar(char testedChar) {
int number = 0;
try {
while ((fCurrentCharacter = fSource[fCurrentPosition++]) == testedChar) {
number++;
}
} catch (IndexOutOfBoundsException e) {
}
fCurrentPosition--;
return number;
}
public final char[] getListChars() {
int startPosition = fCurrentPosition - 1;
try {
while (true) {
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter != '*' && fCurrentCharacter != '#') {
break;
}
}
} catch (IndexOutOfBoundsException e) {
//
}
fCurrentPosition--;
char[] result = new char[fCurrentPosition - startPosition];
System.arraycopy(fSource, startPosition, result, 0, fCurrentPosition - startPosition);
return result;
}
public boolean getNextCharAsWikiPluginIdentifierPart() {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (!WikipediaFilter.isWikiPluginIdentifierPart(fCurrentCharacter)) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
private void stopList() {
while (!fTokenStack.isEmpty()) {
AbstractTag tok = (AbstractTag) fTokenStack.peek();
if (tok.equals(WikipediaFilter.LIST_UL_START)) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok.equals(WikipediaFilter.LIST_OL_START)) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.BOLD) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.ITALIC) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.STRONG) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.EM) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.STRIKETHROUGH) {
fTokenStack.pop();
fResultBuffer.append("");
} else {
break;
}
}
}
protected int getNextToken() throws InvalidInputException {
boolean startOfIndent = false;
fWhiteStartPosition = 0;
fWhiteStart = false;
try {
while (true) {
// fStartPosition = fCurrentPosition;
fCurrentCharacter = fSource[fCurrentPosition++];
// ---------Identify the next token-------------
switch (fCurrentCharacter) {
case '\n':
if (fWhiteStart) {
int tempPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 2);
if (tempPosition >= 0) {
copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - (++tempPosition));
fWhiteStart = false;
stopList();
fResultBuffer.append("");
// continue;
}
}
int fStartPrePosition = fCurrentPosition;
boolean preSection = false;
try {
while (fSource[fCurrentPosition++] == ' ') {
fCurrentCharacter = fSource[fCurrentPosition++];
while (fCurrentCharacter != '\n') {
if (!Character.isWhitespace(fCurrentCharacter)) {
// preformatted section starts here
preSection = true;
}
fCurrentCharacter = fSource[fCurrentPosition++];
}
}
--fCurrentPosition;
} catch (IndexOutOfBoundsException e) {
}
if (preSection && fRecursionLevel == 1) {
String preString;
copyWhite(fWhiteStart, fStartPrePosition, fCurrentPosition - fStartPrePosition);
fWhiteStart = true;
fResultBuffer.append("");
// copyWhite(fWhiteStart, fStartPrePosition, 1);
preString = new String(fSource, fStartPrePosition, fCurrentPosition - fStartPrePosition - 1) + '\n';
fResultBuffer.append(WikipediaFilter.filterParser(preString, fContext, fMacros, fRecursionLevel));
// preString = new String(fSource, fStartPrePosition, fCurrentPosition - fStartPrePosition - 1)+'\n';
// int preIndex = 0;
// int lastIndex = 0;
// while (preIndex>=0) {
// preIndex = preString.indexOf('\n', lastIndex);
// if (preIndex>=0) {
// fResultBuffer.append(WikipediaFilter.filterParser(preString.substring(lastIndex,preIndex), fContext,
// fCachedPage, fMacros, fRecursionLevel));
// fResultBuffer.append('\n');
// lastIndex = ++preIndex;
// }
// }
fResultBuffer.append("
");
fWhiteStart = false;
continue;
} else {
fCurrentPosition = fStartPrePosition;
}
break;
case ':':
if (isStartOfLine()) {
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
int levelHeader = getNumberOfChar(':') + 1;
int startHeadPosition = fCurrentPosition;
if (readUntilEOL()) {
String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition);
for (int i = 0; i < levelHeader; i++) {
fResultBuffer.append("- ");
}
fResultBuffer.append(head);
for (int i = 0; i < levelHeader; i++) {
fResultBuffer.append("
");
}
continue;
}
continue;
}
break;
case ';':
if (isStartOfLine() && getNextChar(' ')) {
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
int startHeadPosition = fCurrentPosition - 1;
if (readUntilEOL()) {
// TODO not correct - improve this
String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition);
int index = head.indexOf(" : ");
if (index > 0) {
fResultBuffer.append("- ");
fResultBuffer.append(head.substring(0, index));
fResultBuffer.append("
- ");
fResultBuffer.append(head.substring(index + 2));
fResultBuffer.append("
");
} else {
fResultBuffer.append("- ");
fResultBuffer.append(head);
fResultBuffer.append("
");
}
continue;
}
continue;
}
break;
case '[':
int startLinkPosition = fCurrentPosition;
if (getNextChar('[')) { // wikipedia link style
startLinkPosition = fCurrentPosition;
copyWhite(fWhiteStart, fWhiteStartPosition, 2);
fWhiteStart = false;
if (readUntilString("]]")) {
String name = new String(fSource, startLinkPosition, fCurrentPosition - startLinkPosition - 2);
// test for suffix string
int temp = fCurrentPosition;
StringBuffer suffixBuffer = new StringBuffer();
try {
while (true) {
fCurrentCharacter = fSource[fCurrentPosition++];
if (!Character.isLetterOrDigit(fCurrentCharacter)) {
fCurrentPosition--;
break;
}
suffixBuffer.append(fCurrentCharacter);
}
handleWikipediaLink(name, suffixBuffer.toString());
continue;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
}
handleWikipediaLink(name, "");
continue;
}
} else {
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
if (readUntilChar(']')) {
String name = new String(fSource, startLinkPosition, fCurrentPosition - startLinkPosition - 1);
handleSnipLink(name);
continue;
}
}
break;
case '*': // list
case '#': // list
if (isStartOfLine()) {
char[] listChars = getListChars();
int tempStarPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - listChars.length);
if (tempStarPosition >= 0) {
appendList(listChars);
continue;
}
}
break;
case '\'':
if (getNextChar('\'')) {
if (getNextChar('\'')) {
copyWhite(fWhiteStart, fWhiteStartPosition, 3);
fWhiteStart = false;
return WikipediaFilter.TokenSTRONG;
}
copyWhite(fWhiteStart, fWhiteStartPosition, 2);
fWhiteStart = false;
return WikipediaFilter.TokenEM;
}
break;
case '-':
int tempCurrPosition = fCurrentPosition;
try {
if (fSource[tempCurrPosition++] == '-' && fSource[tempCurrPosition++] == '-' && fSource[tempCurrPosition++] == '-') {
if (fSource[tempCurrPosition] == '\n') {
fCurrentPosition = tempCurrPosition;
fResultBuffer.append("
");
fWhiteStart = false;
continue;
} else if (fSource[tempCurrPosition++] == '\r' && fSource[tempCurrPosition++] == '\n') {
fCurrentPosition = tempCurrPosition - 1;
fResultBuffer.append("
");
fWhiteStart = false;
continue;
}
}
} catch (IndexOutOfBoundsException e) {
}
break;
case 'h': // http(s)://
int urlStartPosition = fCurrentPosition;
boolean foundUrl = false;
int diff = 7;
try {
String urlString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 3);
if (urlString.equals("http")) {
fCurrentPosition += 3;
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter == 's') { // optional
fCurrentCharacter = fSource[fCurrentPosition++];
diff++;
}
if (fCurrentCharacter == ':' && fSource[fCurrentPosition++] == '/' && fSource[fCurrentPosition++] == '/') {
copyWhite(fWhiteStart, fWhiteStartPosition, diff);
fWhiteStart = false;
foundUrl = true;
while (WikipediaFilter.isUrlIdentifierPart(fSource[fCurrentPosition++])) {
}
}
}
} catch (IndexOutOfBoundsException e) {
if (!foundUrl) {
// rollback work :-)
fCurrentPosition = urlStartPosition;
}
}
if (foundUrl) {
String urlString = new String(fSource, urlStartPosition - 1, fCurrentPosition - urlStartPosition);
fCurrentPosition--;
createExternalLink(urlString);
continue;
}
break;
// case '@': // images @xml@ -> /static/rss-small.png
// copyWhite(fWhiteStart, fWhiteStartPosition, 1);
// fWhiteStart = false;
// int atStart = fCurrentPosition;
// if (readUntilChar('@')) {
// String imageTag = new String(fSource, atStart, fCurrentPosition - atStart - 1);
// if (imageTag != null) {
// if (WikipediaFilter.createStaticImage(imageTag, fResultBuffer)) {
// continue;
// }
// }
// }
// fCurrentPosition = atStart;
// break;
case '&':
int ampersandStart = fCurrentPosition - 1;
if (getNextChar('#')) {
try {
StringBuffer num = new StringBuffer(5);
char ch = fSource[fCurrentPosition++];
while (Character.isDigit(ch)) {
num.append(ch);
ch = fSource[fCurrentPosition++];
}
if (num.length() > 0 && ch == ';') {
Integer i = Integer.valueOf(num.toString());
if (i.intValue() < 65536) {
copyWhite(fWhiteStart, fWhiteStartPosition, 3 + num.length());
fWhiteStart = false;
fResultBuffer.append(fSource, ampersandStart, fCurrentPosition - ampersandStart);
continue;
}
}
} catch (IndexOutOfBoundsException e) {
// ignore exception
} catch (NumberFormatException e) {
// ignore exception
}
} else {
try {
StringBuffer entity = new StringBuffer(10);
char ch = fSource[fCurrentPosition++];
while (Character.isLetterOrDigit(ch)) {
entity.append(ch);
ch = fSource[fCurrentPosition++];
}
if (entity.length() > 0 && ch == ';') {
if (WikipediaFilter.ENTITY_SET.contains(entity.toString())) {
copyWhite(fWhiteStart, fWhiteStartPosition, 2 + entity.length());
fWhiteStart = false;
fResultBuffer.append(fSource, ampersandStart, fCurrentPosition - ampersandStart);
continue;
}
}
} catch (IndexOutOfBoundsException e) {
// ignore exception
} catch (NumberFormatException e) {
// ignore exception
}
}
break;
case '{':
// detect macros
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
int startMacroPosition = fCurrentPosition;
if (getNextChar('|') && handleWikipediaTable()) { // Wikipedia
// table
// syntax
continue;
// } else {
// SnipSnap / Radeox Macro Syntax
// if (readUntilChar('}')) {
// String macroStartTag;
//
// macroStartTag = new String(fSource, startMacroPosition, fCurrentPosition - startMacroPosition - 1);
// if (macroStartTag != null) {
// createMacro(startMacroPosition, macroStartTag);
// continue;
// }
// }
}
break;
case '<':
int htmlStartPosition = fCurrentPosition;
try {
switch (fStringSource.charAt(fCurrentPosition)) {
case '!': //
String htmlCommentString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 3);
if (htmlCommentString.equals("")) {
String htmlCommentContent = new String(fSource, htmlStartPosition + 3, fCurrentPosition - htmlStartPosition - 6);
if (htmlCommentContent != null) {
copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - htmlStartPosition + 1);
fWhiteStart = false;
// insert html comment for visual checks
// only:
/*
* fResultBuffer.append(" ");
*/
continue;
}
}
}
break;
case 'n': // nowiki
String nowikiString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 7);
if (nowikiString.equals("")) {
fCurrentPosition += 7;
if (readUntilString("")) {
String nowikiContent = new String(fSource, htmlStartPosition + 7, fCurrentPosition - htmlStartPosition - 16);
if (nowikiContent != null) {
copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - htmlStartPosition + 1);
fWhiteStart = false;
copyNowikiNewLine(nowikiContent);
continue;
}
}
}
break;
}
} catch (IndexOutOfBoundsException e) {
// do nothing
}
startOfIndent = false;
fCurrentPosition = htmlStartPosition;
// detect special html tags
int htmlToken = getHTMLToken();
if (htmlToken == WikipediaFilter.TokenIgnore) {
continue;
// } else if (htmlToken > TokenIgnore) {
// return htmlToken;
}
fCurrentPosition = htmlStartPosition;
break;
case '=': // wikipedia header ?
if (isStartOfLine()) {
int levelHeader = getNumberOfChar('=') + 1;
// int tempPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - levelHeader);
// if (tempPosition >= 0) {
copyWhite(fWhiteStart, fWhiteStartPosition, levelHeader);
fWhiteStart = false;
int startHeadPosition = fCurrentPosition;
// int initialOffset = levelHeader;
if (levelHeader > 6) {
levelHeader = 6;
}
levelHeader--;
if (readUntilString(WikipediaFilter.HEADER_STRINGS[levelHeader])) {
String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition - (1 + levelHeader));
levelHeader++;
handleHead(head, levelHeader);
continue;
}
// }
}
break;
}
if (!fWhiteStart) {
fWhiteStart = true;
fWhiteStartPosition = fCurrentPosition - 1;
}
startOfIndent = false;
}
// -----------------end switch while try--------------------
} catch (IndexOutOfBoundsException e) {
// end of scanner text
}
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
return WikipediaFilter.TokenEOF;
}
/**
* @return
*/
private boolean isStartOfLine() {
boolean isListStart = false;
if (fCurrentPosition >= 2) {
char beforeChar = fSource[fCurrentPosition - 2];
if (beforeChar == '\n' || beforeChar == '\r') {
isListStart = true;
}
}
if (fCurrentPosition == 1) {
isListStart = true;
}
return isListStart;
}
/**
* @param levelStar
* @param listChars
* TODO
*/
private void appendList(char[] listChars) {
int topLevel = 0;
int levelStar = listChars.length;
copyWhite(fWhiteStart, fWhiteStartPosition, levelStar);
fWhiteStart = false;
AbstractTag tok = (AbstractTag) fTokenStack.peek();
if (tok instanceof ListToken) {
ListToken listToken = (ListToken) tok;
topLevel = listToken.getLevel();
if (levelStar > topLevel) {
while (levelStar > topLevel) {
if (listChars[topLevel] == '*') {
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_UL_START, ++topLevel));
fResultBuffer.append("- ");
} else {
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, ++topLevel));
fResultBuffer.append("
- ");
}
}
} else if (levelStar < topLevel) {
while (levelStar < topLevel) {
tok = (AbstractTag) fTokenStack.peek();
if (tok instanceof ListToken) {
fTokenStack.pop();
listToken = (ListToken) tok;
if (listToken.getToken() == WikipediaFilter.TokenLIST_UL_START) {
fResultBuffer.append("
- ");
} else {
fResultBuffer.append("
- ");
}
topLevel--;
} else {
break;
}
}
} else {
--topLevel;
if (listToken.getToken() == WikipediaFilter.TokenLIST_UL_START && listChars[topLevel] == '#') {
fTokenStack.pop();
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, topLevel));
fResultBuffer.append("
- ");
} else if (listToken.getToken() == WikipediaFilter.TokenLIST_OL_START && listChars[topLevel] == '*') {
fTokenStack.pop();
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_UL_START, topLevel));
fResultBuffer.append("