Small code clean up
[phpeclipse.git] / archive / org.plog4u.wiki / src / org / plog4u / wiki / filter / WikipediaParser.java
index 5383fb7..2e04285 100644 (file)
@@ -103,7 +103,6 @@ public class WikipediaParser {
 
   //  private String fSrcPath;
   //  private String fBinPath;
-
   public WikipediaParser(MacroRepository macros, String stringSource, StringBuffer result, FilterContext context, int recursionLevel) {
     fContext = context;
     fWikiEngine = context.getRenderContext().getRenderEngine();
@@ -182,7 +181,7 @@ public class WikipediaParser {
           } else {
             lastIndex++;
           }
-          fResultBuffer.append("<");
+          fResultBuffer.append("<");
           break;
         case '>': // special html escape character
           if (lastIndex < (currentIndex - 1)) {
@@ -191,7 +190,7 @@ public class WikipediaParser {
           } else {
             lastIndex++;
           }
-          fResultBuffer.append("&#62;");
+          fResultBuffer.append("&gt;");
           break;
         case '&': // special html escape character
           if (lastIndex < (currentIndex - 1)) {
@@ -370,6 +369,7 @@ public class WikipediaParser {
       char closeCharacter;
       char nextCharacter;
       if (getNextChar('/')) {
+        // end tag detected
         currentHtmlPosition++;
         // closing tag
         int r = readUntilCharOrEOL('>');
@@ -405,24 +405,26 @@ public class WikipediaParser {
         }
 
       } else {
-        // opening tag
+        // start tag
         String tokenString;
         int tagNameStart = fCurrentPosition;
         int tokenLength = 0;
-        while (Character.isJavaIdentifierStart(fSource[fCurrentPosition++])) {
+        while (Character.isJavaIdentifierStart(fSource[fCurrentPosition])) {
+          fCurrentPosition++;
           tokenLength++;
         }
         try {
-          tokenString = new String(fSource, tagNameStart, fCurrentPosition - tagNameStart - 1); //tagTokenizer.nextToken();
+          tokenString = new String(fSource, tagNameStart, fCurrentPosition - tagNameStart); //tagTokenizer.nextToken();
+
           OpenTagToken token = (OpenTagToken) WikipediaFilter.OPEN_TAGS.get(tokenString);
           if (token == null) {
             return WikipediaFilter.TokenNotFound;
           }
           copyWhite(fWhiteStart, fWhiteStartPosition, (fCurrentPosition - tagNameStart) + 1);
           fWhiteStart = false;
-
           if (token instanceof SpecialTagToken) {
-            fResultBuffer.append(token.getOpenTag());
+            // for <br> <br/> <br /> <hr> <hr/>
+
             while (Character.isWhitespace(fSource[fCurrentPosition])) {
               fCurrentPosition++;
             }
@@ -431,17 +433,21 @@ public class WikipediaParser {
             }
             if (fSource[fCurrentPosition] == '>') {
               fCurrentPosition++;
+              fWhiteStartPosition = fCurrentPosition;
+              // insert the special tag :
+              fResultBuffer.append(token.getOpenTag());
+              return WikipediaFilter.TokenIgnore;
             }
+
           } else if (token instanceof OpenTagToken) {
             fResultBuffer.append("<");
             fResultBuffer.append(token.getTagName());
             fTokenStack.push(token);
             fCurrentPosition = token.scan(fResultBuffer, fSource, fCurrentPosition - 1);
             fResultBuffer.append(">");
+            return WikipediaFilter.TokenIgnore;
           }
-
-          //                                   System.out.println(fResultBuffer);
-          return WikipediaFilter.TokenIgnore;
+          return WikipediaFilter.TokenNotFound;
         } catch (NoSuchElementException e) {
           return WikipediaFilter.TokenNotFound;
         }
@@ -701,9 +707,9 @@ public class WikipediaParser {
               int index = head.indexOf(": ");
               if (index > 0) {
                 fResultBuffer.append("<dl><dt>");
-                fResultBuffer.append(head.substring(0,index));
+                fResultBuffer.append(head.substring(0, index));
                 fResultBuffer.append("</dt><dd>");
-                fResultBuffer.append(head.substring(index+2));
+                fResultBuffer.append(head.substring(index + 2));
                 fResultBuffer.append("</dd></dl>");
               } else {
                 fResultBuffer.append("<dl><dt>");
@@ -716,79 +722,6 @@ public class WikipediaParser {
             continue;
           }
           break;
-        //                             case '\\': // special characters follow
-        //                                     copyWhite(fWhiteStart, fWhiteStartPosition, 1);
-        //                                     fWhiteStart = false;
-        //                                     try {
-        //                                             fCurrentCharacter = fSource[fCurrentPosition++];
-        //                                             switch (fCurrentCharacter) {
-        //                                             case '\\': // newline
-        //                                                     if ((fCurrentCharacter = fSource[fCurrentPosition++]) == '\\') {
-        //                                                             fResultBuffer.append(Encoder
-        //                                                                             .toEntity(fCurrentCharacter));
-        //                                                             break;
-        //                                                     } else {
-        //                                                             fResultBuffer.append("<br />");
-        //                                                             break;
-        //                                                     }
-        //                                             default:
-        //                                                     fResultBuffer.append(Encoder
-        //                                                                     .toEntity(fCurrentCharacter));
-        //                                             }
-        //                                     } catch (IndexOutOfBoundsException e) {
-        //
-        //                                     }
-        //                                     continue;
-        //          case '$' : // detect tex math
-        //            copyWhite(fWhiteStart, fWhiteStartPosition, 1);
-        //            fWhiteStart = false;
-        //            startOfIndent = false;
-        //            int startMathPosition = fCurrentPosition;
-        //            if (getNextChar('$')) {
-        //              startMathPosition = fCurrentPosition;
-        //              copyWhite(fWhiteStart, fWhiteStartPosition, 2);
-        //              fWhiteStart = false;
-        //              if (readUntilString("$$")) {
-        //                String mathContent = new String(fSource, startMathPosition,
-        // fCurrentPosition - startMathPosition - 2);
-        //                if (mathContent != null) {
-        //                  handleTeXMath(mathContent, false);
-        //                  continue;
-        //                }
-        //              }
-        //            } else {
-        //              if (readUntilChar('$')) {
-        //                String mathContent = new String(fSource, startMathPosition,
-        // fCurrentPosition - startMathPosition - 1);
-        //                if (mathContent != null) {
-        //                  handleTeXMath(mathContent, true);
-        //                  continue;
-        //                }
-        //              }
-        //            }
-        //            break;
-        case '{':
-          // detect macros
-          copyWhite(fWhiteStart, fWhiteStartPosition, 1);
-          fWhiteStart = false;
-          //              boolean scanBody = true;
-          int startMacroPosition = fCurrentPosition;
-          if (getNextChar('|') && handleWikipediaTable()) { // Wikipedia
-            // table
-            // syntax
-            continue;
-          } else {
-            if (readUntilChar('}')) {
-              String macroStartTag;
-
-              macroStartTag = new String(fSource, startMacroPosition, fCurrentPosition - startMacroPosition - 1);
-              if (macroStartTag != null) {
-                createMacro(startMacroPosition, macroStartTag);
-                continue;
-              }
-            }
-          }
-          break;
         case '[':
           int startLinkPosition = fCurrentPosition;
           if (getNextChar('[')) { // wikipedia link style
@@ -830,56 +763,6 @@ public class WikipediaParser {
             }
           }
           break;
-        //                             case '1': // heading filter ?
-        //                                     int temp1Position = checkWhitespaces(fWhiteStartPosition,
-        //                                                     fCurrentPosition - 2);
-        //                                     if (temp1Position >= 0) {
-        //                                             copyWhite(fWhiteStart, fWhiteStartPosition, 1);
-        //                                             fWhiteStart = false;
-        //                                             int simpleHeader = getNextChar(' ', '.');
-        //                                             if (simpleHeader < 0) {
-        //                                                     if (getNextChar('1')) {
-        //                                                             fCurrentPosition--;
-        //                                                             if (getList('1', "<ol>", "</ol>")) {
-        //                                                                     continue;
-        //                                                             }
-        //                                                     }
-        //                                                     break;
-        //                                             }
-        //                                             if (simpleHeader == 1 && !getNextChar('1')) {
-        //                                                     fCurrentPosition--;
-        //                                                     if (getList('1', "<ol>", "</ol>")) {
-        //                                                             continue;
-        //                                                     }
-        //                                                     break;
-        //                                             }
-        //                                             temp1Position = fCurrentPosition;
-        //                                             if (simpleHeader >= 0 && readUntilChar('\n')) {
-        //                                                     String heading = new String(fSource, temp1Position,
-        //                                                                     fCurrentPosition - temp1Position - 1);
-        //                                                     if (heading != null) {
-        //                                                             fResultBuffer.append("<h3 class=\"heading-");
-        //                                                             if (simpleHeader == 1) {
-        //                                                                     fResultBuffer.append("1");
-        //                                                             } else {
-        //                                                                     fResultBuffer.append("1-1");
-        //                                                             }
-        //                                                             fResultBuffer.append("\">");
-        //                                                             // System.out.println(heading);
-        //                                                             fResultBuffer
-        //                                                                             .append(WikipediaFilter
-        //                                                                                             .filterParser(
-        //                                                                                                             heading,
-        //                                                                                                             fContext,
-        //                                                                                                             WikipediaFilter.DUMMY_CACHED_PAGE,
-        //                                                                                                             fMacros,
-        //                                                                                                             fRecursionLevel));
-        //                                                             fResultBuffer.append("</h3>");
-        //                                                             continue;
-        //                                                     }
-        //                                             }
-        //                                     }
-        //                                     break;
         case '*': // <ul> list
         case '#': // <ol> list
           if (isStartOfLine()) {
@@ -891,92 +774,6 @@ public class WikipediaParser {
             }
           }
           break;
-        //        case '#': // <ol> list
-        //          if (fCurrentPosition >= 2) {
-        //            char beforeChar = fSource[fCurrentPosition - 2];
-        //            if (beforeChar == '\n' || beforeChar == '\r') {
-        //
-        //              int levelHash = getNumberOfChar('#') + 1;
-        //
-        //              int tempHashPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - levelHash);
-        //              if (tempHashPosition >= 0) {
-        //                copyWhite(fWhiteStart, fWhiteStartPosition, levelHash);
-        //                fWhiteStart = false;
-        //                AbstractTag tok = (AbstractTag) fTokenStack.peek();
-        //                if (tok instanceof ListToken) {
-        //                  ListToken listToken = (ListToken) tok;
-        //                  int topLevel = listToken.getLevel();
-        //                  if (listToken.getToken() == WikipediaFilter.TokenLIST_OL_START) {
-        //                    if (levelHash > topLevel) {
-        //                      fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, topLevel + 1));
-        //                      fResultBuffer.append("<ol><li>");
-        //                    } else if (levelHash < topLevel) {
-        //                      fTokenStack.pop();
-        //                      fResultBuffer.append("</li></ol></li><li>");
-        //                    } else {
-        //                      fResultBuffer.append("</li><li>");
-        //                    }
-        //                  } else {
-        //                    fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, levelHash));
-        //                    fResultBuffer.append("<ol><li>");
-        //                  }
-        //                } else {
-        //                  fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, 1));
-        //                  fResultBuffer.append("\n<ol><li>");
-        //                }
-        //                continue;
-        //              }
-        //            }
-        //            // }
-        //          }
-        //          break;
-
-        //                             case 'i': // <ol> list
-        //                                     if (getList('i', "<ol class=\"roman\">", "</ol>")) {
-        //                                             continue;
-        //                                     }
-        //                                     break;
-        //                             case 'I': // <ol> list
-        //                                     if (getList('i', "<ol class=\"ROMAN\">", "</ol>")) {
-        //                                             continue;
-        //                                     }
-        //                                     break;
-        //            case 'a' : // <ol> list
-        //              if (getList('a', "<ol class=\"alpha\">", "</ol>")) {
-        //                continue;
-        //              }
-        //              break;
-        //            case 'A' : // <ol> list
-        //              if (getList('A', "<ol class=\"ALPHA\">", "</ol>")) {
-        //                continue;
-        //              }
-        //              break;
-        //            case 'g' : // <ol> list
-        //              if (getList('g', "<ol class=\"greek\">", "</ol>")) {
-        //                continue;
-        //              }
-        //              break;
-        //            case 'H' : // <ol> list
-        //              if (getList('H', "<ol class=\"HIRAGANA\">", "</ol>")) {
-        //                continue;
-        //              }
-        //              break;
-        //            case 'k' : // <ol> list
-        //              if (getList('k', "<ol class=\"katakana\">", "</ol>")) {
-        //                continue;
-        //              }
-        //              break;
-        //            case 'K' : // <ol> list
-        //              if (getList('K', "<ol class=\"KATAKANA\">", "</ol>")) {
-        //                continue;
-        //              }
-        //              break;
-        //            case 'j' : // <ol> list
-        //              if (getList('j', "<ol class=\"HEBREW\">", "</ol>")) {
-        //                continue;
-        //              }
-        //              break;
-
         case '\'':
           if (getNextChar('\'')) {
             if (getNextChar('\'')) {
@@ -989,20 +786,6 @@ public class WikipediaParser {
             return WikipediaFilter.TokenEM;
           }
           break;
-        //                             case '_':
-        //                                     if (getNextChar('_')) {
-        //                                             copyWhite(fWhiteStart, fWhiteStartPosition, 2);
-        //                                             fWhiteStart = false;
-        //                                             return WikipediaFilter.TokenBOLD;
-        //                                     }
-        //                                     break;
-        //                             case '~':
-        //                                     if (getNextChar('~')) {
-        //                                             copyWhite(fWhiteStart, fWhiteStartPosition, 2);
-        //                                             fWhiteStart = false;
-        //                                             return WikipediaFilter.TokenITALIC;
-        //                                     }
-        //                                     break;
         case '-':
           int tempCurrPosition = fCurrentPosition;
           try {
@@ -1022,60 +805,6 @@ public class WikipediaParser {
           } catch (IndexOutOfBoundsException e) {
 
           }
-
-          //                                   int levelMinus = getNumberOfChar('-') + 1;
-          //                                   if (getNextChar(' ')) {
-          //                                           int tempPosition = checkWhitespaces(
-          //                                                           fWhiteStartPosition, fCurrentPosition - 2
-          //                                                                           - levelMinus);
-          //                                           if (tempPosition >= 0) {
-          //                                                   copyWhite(fWhiteStart, fWhiteStartPosition,
-          //                                                                   1 + levelMinus);
-          //                                                   fWhiteStart = false;
-          //                                                   AbstractTag tok = (AbstractTag) fTokenStack.peek();
-          //                                                   if (tok instanceof ListToken) {
-          //                                                           ListToken listToken = (ListToken) tok;
-          //                                                           int topLevel = listToken.getLevel();
-          //                                                           if (listToken.getToken() ==
-          // WikipediaFilter.TokenLIST_UL_START) {
-          //                                                                   if (levelMinus > topLevel) {
-          //                                                                           fTokenStack
-          //                                                                                           .push(new ListToken(
-          //                                                                                                           WikipediaFilter.TokenLIST_UL_START,
-          //                                                                                                           topLevel + 1));
-          //                                                                           fResultBuffer
-          //                                                                                           .append("<ul class=\"minus\"><li>");
-          //                                                                   } else if (levelMinus < topLevel) {
-          //                                                                           fTokenStack.pop();
-          //                                                                           fResultBuffer
-          //                                                                                           .append("</li></ul></li><li>");
-          //                                                                   } else {
-          //                                                                           fResultBuffer.append("</li><li>");
-          //                                                                   }
-          //                                                           } else {
-          //                                                                   fTokenStack
-          //                                                                                   .push(new ListToken(
-          //                                                                                                   WikipediaFilter.TokenLIST_UL_START,
-          //                                                                                                   levelMinus));
-          //                                                                   fResultBuffer
-          //                                                                                   .append("<ul class=\"minus\"><li>");
-          //                                                           }
-          //                                                   } else {
-          //                                                           fTokenStack
-          //                                                                           .push(new ListToken(
-          //                                                                                           WikipediaFilter.TokenLIST_UL_START,
-          //                                                                                           1));
-          //                                                           fResultBuffer
-          //                                                                           .append("\n<ul class=\"minus\"><li>");
-          //                                                   }
-          //                                                   continue;
-          //                                           }
-          //                                   }
-          //                                   if (levelMinus == 2) {
-          //                                           copyWhite(fWhiteStart, fWhiteStartPosition, 2);
-          //                                           fWhiteStart = false;
-          //                                           return WikipediaFilter.TokenSTRIKETHROUGH;
-          //                                   }
           break;
         case 'h': // http(s)://
           int urlStartPosition = fCurrentPosition;
@@ -1174,6 +903,27 @@ public class WikipediaParser {
             }
           }
           break;
+        case '{':
+          // detect macros
+          copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+          fWhiteStart = false;
+          int startMacroPosition = fCurrentPosition;
+          if (getNextChar('|') && handleWikipediaTable()) { // Wikipedia
+            // table
+            // syntax
+            continue;
+          } else {
+            if (readUntilChar('}')) {
+              String macroStartTag;
+
+              macroStartTag = new String(fSource, startMacroPosition, fCurrentPosition - startMacroPosition - 1);
+              if (macroStartTag != null) {
+                createMacro(startMacroPosition, macroStartTag);
+                continue;
+              }
+            }
+          }
+          break;
         case '<':
           int htmlStartPosition = fCurrentPosition;
           try {
@@ -1198,32 +948,6 @@ public class WikipediaParser {
                 }
               }
               break;
-            //                case 'm' : // math
-            //                  String mathString =
-            // fStringSource.substring(fCurrentPosition - 1,
-            // fCurrentPosition + 5);
-
-            //                  if (mathString.equals("<math>")) {
-            //                    fCurrentPosition += 5;
-            //                    if (readUntilString("</math>")) {
-            //                      String mathContent = new String(fSource,
-            // htmlStartPosition + 5, fCurrentPosition -
-            // htmlStartPosition - 12);
-            //                      if (mathContent != null) {
-            //                        copyWhite(fWhiteStart, fWhiteStartPosition,
-            // fCurrentPosition - htmlStartPosition + 1);
-            //                        fWhiteStart = false;
-            //                        if (startOfIndent) {
-            //                          startOfIndent = false;
-            //                          handleTeXMath(mathContent, false);
-            //                        } else {
-            //                          handleTeXMath(mathContent, true);
-            //                        }
-            //                        continue;
-            //                      }
-            //                    }
-            //                  }
-            //                  break;
             case 'n': // nowiki
               String nowikiString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 7);
 
@@ -1242,7 +966,7 @@ public class WikipediaParser {
               break;
             }
           } catch (IndexOutOfBoundsException e) {
-
+            // do nothing
           }
           startOfIndent = false;
           fCurrentPosition = htmlStartPosition;
@@ -2627,11 +2351,13 @@ public class WikipediaParser {
 
     if (fResultBufferHeader != null) {
       int tocStart = fResultBufferHeader.length();
-      fResultBufferHeader.append("<table id=\"toc\" border=\"0\"><tr><th>Table of contents</th></tr><tr><td>");
-      fResultBufferHeader.append("<ol>");
-      createToC(fTableOfContent);
-      fResultBufferHeader.append("</ol>");
-      fResultBufferHeader.append("</td></tr></table><hr/>");
+      if (isToC(fTableOfContent) > 3) {
+        fResultBufferHeader.append("<table id=\"toc\" border=\"0\"><tr><th>Table of contents</th></tr><tr><td>");
+        fResultBufferHeader.append("<ol>");
+        createToC(fTableOfContent);
+        fResultBufferHeader.append("</ol>");
+        fResultBufferHeader.append("</td></tr></table><hr/>");
+      }
 
       fResultBufferHeader.append(fResultBuffer);
       fResultBuffer = fResultBufferHeader;
@@ -2640,6 +2366,28 @@ public class WikipediaParser {
     }
   }
 
+  /**
+   * count the number of wiki headers in this document
+   * 
+   * @param toc
+   * @return
+   */
+  private int isToC(ArrayList toc) {
+
+    if (toc.size() == 1 && (toc.get(0) instanceof ArrayList)) {
+      return isToC((ArrayList) toc.get(0));
+    }
+    int result = 0;
+    for (int i = 0; i < toc.size(); i++) {
+      if (toc.get(i) instanceof ArrayList) {
+        result += isToC((ArrayList) toc.get(i));
+      } else {
+        result++;
+      }
+    }
+    return result;
+  }
+
   private void createToC(ArrayList toc) {
     if (toc.size() == 1 && (toc.get(0) instanceof ArrayList)) {
       createToC((ArrayList) toc.get(0));