Enable word wrapping with preference key editor.wrap.words (false by default)
[phpeclipse.git] / archive / net.sourceforge.phpeclipse.wiki / src / net / sourceforge / phpeclipse / wiki / actions / category / ParseCategory.java
1 package net.sourceforge.phpeclipse.wiki.actions.category;
2
3 import java.util.ArrayList;
4
5 public class ParseCategory {
6   ArrayList titleList;
7
8   public ParseCategory() {
9     titleList = new ArrayList();
10   }
11   public void parseCategory(String text) {
12     int index1 = text.indexOf("contentSub");
13     int index2 = text.indexOf("printfooter");
14     if (index2 < 0) {
15       index2 = text.length();
16     }
17     if (index1 > 0 && index2 > index1) {
18       try {
19         int i = index1;
20         int titleStart;
21         int titleEnd;
22         char ch;
23         while (true) {
24           ch = text.charAt(i++);
25           if (i>index2) {
26             break;
27           }
28           if (ch == 't' && text.charAt(i) == 'i' && text.charAt(i +1) == 't' && text.charAt(i + 2) == 'l'
29               && text.charAt(i + 3) == 'e') {
30             i += 4;
31             titleStart = -1;
32             
33 // found: "title"
34             while (true) {
35               ch = text.charAt(i++);
36               if (ch=='"') {
37                 if (titleStart>0) {
38                   titleList.add(text.substring(titleStart,i-1));
39                   break;
40                 }
41                 titleStart = i;
42               }
43             }
44             
45           }
46         }
47       } catch (IndexOutOfBoundsException e) {
48
49       }
50
51     }
52   }
53
54   public static void main(String[] args) {
55     String test = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n"
56         + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"de\" lang=\"de\" dir=\"ltr\">\r\n"
57         + "  <head>\r\n"
58         + "    <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\r\n"
59         + "    <meta name=\"KEYWORDS\" content=\"Kategorie:Eclipse Tips\" />\r\n"
60         + "<meta name=\"robots\" content=\"index,follow\" />\r\n"
61         + "<link rel=\"shortcut icon\" href=\"/favicon.ico\" />\r\n"
62         + "<link title=\"Creative Commons\" type=\"application/rdf+xml\" href=\"/index.php?title=Kategorie:Eclipse_Tips&amp;action=creativecommons\" rel=\"meta\" />\r\n"
63         + "<link rel=\"copyright\" href=\"http://www.gnu.org/copyleft/fdl.html\" />\r\n"
64         + "\r\n"
65         + "    <title>Kategorie:Eclipse Tips - Plog4u</title>\r\n"
66         + "    <style type=\"text/css\" media=\"screen,projection\">/*<![CDATA[*/ @import \"/stylesheets/monobook/main.css\"; /*]]>*/</style>\r\n"
67         + "\r\n"
68         + "    <link rel=\"stylesheet\" type=\"text/css\" media=\"print\" href=\"/stylesheets/commonPrint.css\"/>\r\n"
69         + "    <!--[if IE]><style type=\"text/css\" media=\"all\">@import \"/stylesheets/monobook/IEFixes.css\";</style>\r\n"
70         + "    <script type=\"text/javascript\" src=\"/stylesheets/IEFixes.js\"></script>\r\n"
71         + "    <meta http-equiv=\"imagetoolbar\" content=\"no\" /><![endif]-->\r\n"
72         + "    <script src=\"/index.php?title=-&amp;action=raw&amp;gen=js\" type=\"text/javascript\"></script>\r\n"
73         + "    <script type=\"text/javascript\" src=\"/stylesheets/wikibits.js\"></script>\r\n"
74         + "    <style type=\"text/css\">/*<![CDATA[*/ @import \"/index.php?title=-&action=raw&gen=css\";\r\n"
75         + " /*]]>*/</style>\r\n"
76         + "    \r\n"
77         + "  </head>\r\n"
78         + "  <body class=\"ns-14\">\r\n"
79         + "\r\n"
80         + "    <div id=\"globalWrapper\">\r\n"
81         + "      <div id=\"column-content\">\r\n"
82         + "     <div id=\"content\">\r\n"
83         + "         \r\n"
84         + "       <a name=\"top\" id=\"contentTop\"></a>\r\n"
85         + "       <h1 class=\"firstHeading\">Kategorie:Eclipse Tips</h1>\r\n"
86         + "       <div id=\"bodyContent\">\r\n"
87         + "         <h3 id=\"siteSub\">aus Plog4u, der freien Eclipse Wissensdatenbank</h3>\r\n"
88         + "         <div id=\"contentSub\"></div>\r\n"
89         + "\r\n"
90         + "         \r\n"
91         + "\r\n"
92         + "         \r\n"
93         + "         <!-- start content -->\r\n"
94         + "         <p>(Dieser Artikel enthält momentan keinen Text)<br style=\"clear:both;\"/>\r\n"
95         + "</p>\r\n"
96         + "<h2>Artikel in der Kategorie \"Eclipse Tips\"</h2>\r\n"
97         + "Dieser Kategorie gehören 6 Artikel an.<h3>B</h3>\r\n"
98         + "<ul><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Eclipse_Sprachpaket\" title =\"Benutzung:Eclipse:Tips:Eclipse Sprachpaket\">Benutzung:Eclipse:Tips:Eclipse Sprachpaket</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Eclipse_Starten\" title =\"Benutzung:Eclipse:Tips:Eclipse Starten\">Benutzung:Eclipse:Tips:Eclipse Starten</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:JDT:Pr%C3%A4fixe_f%C3%BCr_Klassenattribute\" title =\"Benutzung:Eclipse:Tips:JDT:Präfixe für Klassenattribute\">Benutzung:Eclipse:Tips:JDT:Präfixe für Klassenattribute</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Plugins_aufrufen\" title =\"Benutzung:Eclipse:Tips:Plugins aufrufen\">Benutzung:Eclipse:Tips:Plugins aufrufen</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Sourceforge_CVS_Zugriff\" title =\"Benutzung:Eclipse:Tips:Sourceforge CVS Zugriff\">Benutzung:Eclipse:Tips:Sourceforge CVS Zugriff</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Stringvergleiche_in_JUnit\" title =\"Benutzung:Eclipse:Tips:Stringvergleiche in JUnit\">Benutzung:Eclipse:Tips:Stringvergleiche in JUnit</a></li></ul>\r\n"
99         + "\r\n"
100         + "<div class=\"printfooter\">\r\n"
101         + "Von \"<a href=\"http://www.plog4u.de/index.php/Kategorie:Eclipse_Tips\">http://www.plog4u.de/index.php/Kategorie:Eclipse_Tips</a>\"</div>\r\n"
102         + "\r\n"
103         + "         \r\n"
104         + "         <!-- end content -->\r\n"
105         + "         <div class=\"visualClear\"></div>\r\n"
106         + "       </div>\r\n"
107         + "     </div>\r\n"
108         + "      </div>\r\n"
109         + "      <div id=\"column-one\">\r\n"
110         + "\r\n"
111         + "     <div id=\"p-cactions\" class=\"portlet\">\r\n"
112         + "       <h5>Views</h5>\r\n"
113         + "       <ul>\r\n"
114         + "         <li id=\"ca-nstab-category\" class=\"selected\"><a href=\"/index.php/Kategorie:Eclipse_Tips\">Kategorie</a></li><li id=\"ca-talk\" class=\"new\"><a href=\"/index.php?title=Kategorie_Diskussion:Eclipse_Tips&amp;action=edit\">Diskussion</a></li><li id=\"ca-edit\" class=\"\"><a href=\"/index.php?title=Kategorie:Eclipse_Tips&amp;action=edit\">bearbeiten</a></li>\r\n"
115         + "       </ul>\r\n"
116         + "     </div>\r\n"
117         + "     <div class=\"portlet\" id=\"p-personal\">\r\n"
118         + "\r\n"
119         + "       <h5>\'Persönliche Werkzeuge</h5>\r\n"
120         + "       <div class=\"pBody\">\r\n"
121         + "         <ul>\r\n"
122         + "           <li id=\"pt-anonuserpage\"><a href=\"/index.php/Benutzer:217.252.6.161\" class=\"new\">217.252.6.161</a></li><li id=\"pt-anontalk\"><a href=\"/index.php/Benutzer_Diskussion:217.252.6.161\" class=\"new\">Diskussionsseite dieser IP</a></li><li id=\"pt-anonlogin\"><a href=\"/index.php?title=Spezial:Userlogin&amp;returnto=Kategorie:Eclipse_Tips\">Anmelden</a></li>\r\n"
123         + "         </ul>\r\n"
124         + "       </div>\r\n"
125         + "     </div>\r\n"
126         + "\r\n"
127         + "<!-- <div class=\"portlet\" id=\"p-logo\">\r\n"
128         + "       <a style=\"background-image: url(/stylesheets/images/wiki.png);\" href=\"/index.php/Hauptseite\" i18n:attributes=\"title string:mainpage\"></a>\r\n"
129         + "     </div> -->\r\n"
130         + "     <div class=\"portlet\" id=\"p-nav\">\r\n"
131         + "       <h5>Navigation</h5>\r\n"
132         + "       <div class=\"pBody\">\r\n"
133         + "         <ul>\r\n"
134         + "           <li id=\"n-mainpage\"><a href=\"/index.php/Hauptseite\">Hauptseite</a></li>\r\n"
135         + "           <li id=\"n-portal\"><a href=\"/index.php/Plog4u:Portal\">Plog4u-Portal</a></li>\r\n"
136         + "           <li id=\"n-currentevents\"><a href=\"/index.php/Aktuelle_Ereignisse\">Aktuelle Ereignisse</a></li>\r\n"
137         + "\r\n"
138         + "           <li id=\"n-recentchanges\"><a href=\"/index.php/Spezial:Recentchanges\">Letzte Änderungen</a></li>\r\n"
139         + "           <li id=\"n-randompage\"><a href=\"/index.php/Spezial:Randompage\">Zufälliger Artikel</a></li>\r\n"
140         + "           <li id=\"n-help\"><a href=\"/index.php/Plog4u:Hilfe\">Hilfe</a></li>\r\n"
141         + "           \r\n"
142         + "<li><a href=\"http://www.plog4u.de/impressum.html\">Impressum</a></li>\r\n"
143         + "         </ul>\r\n"
144         + "       </div>\r\n"
145         + "     </div>\r\n"
146         + "\r\n"
147         + "     <div id=\"p-search\" class=\"portlet\">\r\n"
148         + "       <h5>Suche</h5>\r\n"
149         + "       <div class=\"pBody\">\r\n"
150         + "         <form name=\"searchform\" action=\"/index.php/Spezial:Search\" id=\"searchform\">\r\n"
151         + "           <input accesskey=\"f\" id=\"searchInput\" name=\"search\" type=\"text\" />\r\n"
152         + "           <input value=\"Los\" type=\"submit\" name=\"go\" class=\"searchButton\" />&nbsp;<input value=\"Suche\" type=\"submit\" name=\"fulltext\" class=\"searchButton\" />\r\n"
153         + "         </form>\r\n"
154         + "       </div>\r\n"
155         + "\r\n"
156         + "     </div>\r\n"
157         + "     <div class=\"portlet\" id=\"p-tb\">\r\n"
158         + "       <h5>Werkzeuge</h5>\r\n"
159         + "       <div class=\"pBody\">\r\n"
160         + "         <ul>\r\n"
161         + "           <li id=\"t-whatlinkshere\"><a href=\"/index.php?title=Spezial:Whatlinkshere&amp;target=Kategorie%3AEclipse_Tips\">Was zeigt hierhin</a></li>\r\n"
162         + "           <li id=\"t-recentchangeslinked\"><a href=\"/index.php?title=Spezial:Recentchangeslinked&amp;target=Kategorie%3AEclipse_Tips\">Verlinkte Seiten</a></li>\r\n"
163         + "\r\n"
164         + "           \r\n"
165         + "           \r\n"
166         + "           \r\n"
167         + "           \r\n"
168         + "           <li id=\"t-specialpages\"><a href=\"/index.php/Spezial:Specialpages\">Spezialseiten</a></li>\r\n"
169         + "         </ul>\r\n"
170         + "       </div>\r\n"
171         + "     </div>\r\n"
172         + "\r\n"
173         + "      <div class=\"portlet\" id=\"p-advertisement\">\r\n"
174         + "       <h5>werbung</h5>\r\n"
175         + "       <div class=\"pBody\">\r\n"
176         + "\r\n"
177         + "<script type=\"text/javascript\"><!--\r\n"
178         + "google_ad_client = \"pub-0182243963199149\";\r\n"
179         + "google_ad_width = 120;\r\n"
180         + "google_ad_height = 240;\r\n"
181         + "google_ad_format = \"120x240_as\";\r\n"
182         + "google_ad_channel =\"4781857343\";\r\n"
183         + "google_ad_language =\"de\";\r\n"
184         + "//--></script>\r\n"
185         + "<script type=\"text/javascript\" src=\"http://pagead2.googlesyndication.com/pagead/show_ads.js\">\r\n"
186         + "</script>\r\n"
187         + "\r\n"
188         + "       </div>\r\n"
189         + "      </div>\r\n"
190         + "     \r\n"
191         + "      </div><!-- end of the left (by default at least) column -->\r\n"
192         + "      <div class=\"visualClear\"></div>\r\n"
193         + "      <div id=\"footer\">\r\n"
194         + "     <div id=\"f-poweredbyico\"><a href=\"http://www.mediawiki.org/\"><img src=\"/stylesheets/images/poweredby_mediawiki_88x31.png\" alt=\"MediaWiki\" /></a></div>\r\n"
195         + "\r\n"
196         + "     <div id=\"f-copyrightico\"><a href=\"http://www.gnu.org/copyleft/fdl.html\"><img src=\"/stylesheets/images/gnu-fdl.png\" alt=\'GNU Free Documentation License 1.2\' /></a></div>\r\n"
197         + "     <ul id=\"f-list\">\r\n"
198         + "       \r\n"
199         + "       \r\n"
200         + "       \r\n"
201         + "       \r\n"
202         + "       <li id=\"f-about\"><a href=\"/index.php/Plog4u:%C3%9Cber_Plog4u\" title =\"Plog4u:Über Plog4u\">Über Plog4u</a></li>\r\n"
203         + "       <li id=\"f-disclaimer\"><a href=\"/index.php/Plog4u:Lizenzbestimmungen\" title =\"Plog4u:Lizenzbestimmungen\">Lizenzbestimmungen</a></li>\r\n"
204         + "     </ul>\r\n"
205         + "      </div>\r\n"
206         + "    </div>\r\n"
207         + "    <!-- Served by single in 0.09 secs. -->\r\n"
208         + "\r\n"
209         + "  </body>\r\n" + "</html>\r\n" + "";
210     ParseCategory pc = new ParseCategory();
211     pc.parseCategory(test);
212     ArrayList list = pc.getTitleList();
213     for (int i = 0; i < list.size(); i++) {
214       System.out.println(list.get(i));
215     }
216   }
217   /**
218    * @return Returns the titleList.
219    */
220   public ArrayList getTitleList() {
221     return titleList;
222   }
223 }