1 package net.sourceforge.phpeclipse.wiki.actions.mediawiki.connect;
3 //Parts of this sources are copied and modified from the jEdit Wikipedia plugin:
4 //http://www.djini.de/software/wikipedia/index.html
6 //The modified sources are available under the "Common Public License"
7 //with permission from the original author: Daniel Wunsch
9 import java.io.IOException;
10 import java.io.StringReader;
11 import java.io.UnsupportedEncodingException;
12 import java.net.URLDecoder;
13 import java.util.ArrayList;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
17 import net.sourceforge.phpeclipse.wiki.actions.mediawiki.config.IWikipedia;
18 import net.sourceforge.phpeclipse.wiki.actions.mediawiki.config.WikipediaDE;
19 import net.sourceforge.phpeclipse.wiki.actions.mediawiki.exceptions.MethodException;
20 import net.sourceforge.phpeclipse.wiki.actions.mediawiki.exceptions.PageNotEditableException;
21 import net.sourceforge.phpeclipse.wiki.actions.mediawiki.exceptions.UnexpectedAnswerException;
22 import net.sourceforge.phpeclipse.wiki.editor.WikiEditorPlugin;
24 import org.apache.commons.httpclient.ConnectMethod;
25 import org.apache.commons.httpclient.HttpClient;
26 import org.apache.commons.httpclient.HttpConnection;
27 import org.apache.commons.httpclient.HttpException;
28 import org.apache.commons.httpclient.HttpMethod;
29 import org.apache.commons.httpclient.HttpState;
30 import org.apache.commons.httpclient.HttpStatus;
31 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
32 import org.apache.commons.httpclient.NameValuePair;
33 import org.apache.commons.httpclient.URI;
34 import org.apache.commons.httpclient.UsernamePasswordCredentials;
35 import org.apache.commons.httpclient.methods.GetMethod;
36 import org.apache.commons.httpclient.methods.PostMethod;
37 import org.apache.commons.httpclient.protocol.Protocol;
38 import org.apache.commons.httpclient.util.EncodingUtil;
39 import org.eclipse.core.runtime.CoreException;
42 * This class gets the wikitext from a wikipedia edit page
44 * The basic coding was copied from the commons-httpclient example <code>MediaWikiConnector.java</code>
46 public class MediaWikiConnector {
47 //pattern used to scarp an edit page
48 private static final Pattern BODY_PATTERN = Pattern.compile(
50 * action=".*?title=(.*?)(&|\") <form id="editform" name="editform" method="post"
51 * action="/w/wiki.phtml?title=Ammersee&action=submit" locked pages: <textarea cols='80' rows='25' readonly>
53 ".*<form[^>]*\\sid=\"editform\"[^>]*title=(.*?)&[^>]*>" + ".*<textarea[^>]*\\sname=\"wpTextbox1\"[^>]*>(.*?)</textarea>"
54 + ".*<input[^>]*\\svalue=\"(\\d*)\"[^>]*\\sname=\"wpEdittime\"[^>]*>" + ".*", Pattern.DOTALL);
56 //setup default user agent
57 final static public String userAgent = "plog4u.org/0.0";
59 // create a ConnectionManager
60 private MultiThreadedHttpConnectionManager manager;
62 private HttpClient client;
65 * Delay a new store to 1 second
67 private Throttle storeThrottle = new Throttle(1000);
70 private long nextTime = 0;
72 private final long minimumDelay;
74 public Throttle(long minimumDelay) {
75 this.minimumDelay = minimumDelay;
78 /** this is called from the client */
79 public synchronized void delay() throws InterruptedException {
80 long delay = nextTime - System.currentTimeMillis();
83 nextTime = System.currentTimeMillis() + minimumDelay;
87 public MediaWikiConnector() {
88 // <a href="javascript:window.location.href='http://127.0.0.1:8009/open/?' + window.location.href">bookmarklet</a>
89 manager = new MultiThreadedHttpConnectionManager();
90 manager.setMaxConnectionsPerHost(6);
91 manager.setMaxTotalConnections(18);
92 manager.setConnectionStaleCheckingEnabled(true);
93 // open the conversation
94 client = new HttpClient(manager);
95 //client.State.CookiePolicy = CookiePolicy.COMPATIBILITY;
96 //client.HostConfiguration.setHost(LOGON_SITE, LOGON_PORT, "http");
99 /** destructor freeing all resources. the Connection is not usable any more after calling this method */
100 public void destroy() {
104 /** log in - returns success */
105 public boolean login(IWikipedia config, String actionUrl, String user, String password, boolean remember)
106 throws UnexpectedAnswerException, MethodException {
107 PostMethod method = new PostMethod(actionUrl);
108 method.setFollowRedirects(false);
109 method.addRequestHeader("User-Agent", userAgent);
110 NameValuePair[] params = new NameValuePair[] {
111 new NameValuePair("title", config.getLoginTitle()),
112 new NameValuePair("action", "submit"),
113 new NameValuePair("wpName", user),
114 new NameValuePair("wpPassword", password),
115 new NameValuePair("wpRemember", remember ? "1" : "0"),
116 new NameValuePair("wpLoginattempt", "submit") };
117 method.addParameters(params);
121 int responseCode = client.executeMethod(method);
122 String responseBody = method.getResponseBodyAsString();
128 if (responseCode == 302 && responseBody.length() == 0 || responseCode == 200
129 && responseBody.matches(config.getLoginSuccess())) {
131 } else if (responseCode == 200 && responseBody.matches(config.getLoginWrongPw()) || responseCode == 200
132 && responseBody.matches(config.getLoginNoUser())) {
134 if (responseBody.matches(config.getLoginNoUser())) {
135 throw new UnexpectedAnswerException("login not successful: wrong user name: "+user);
136 } else if (responseBody.matches(config.getLoginWrongPw())) {
137 throw new UnexpectedAnswerException("login not successful: wrong password for user: "+user);
139 throw new UnexpectedAnswerException("logout not successful: responseCode == 200");
142 throw new UnexpectedAnswerException("login not successful: " + method.getStatusLine());
144 } catch (HttpException e) {
145 throw new MethodException("method failed", e);
146 } catch (IOException e) {
147 throw new MethodException("method failed", e);
149 method.releaseConnection();
152 * // display cookies System.err.println("login: " + result); for (var cookie : client.State.Cookies) {
153 * System.err.println("cookie: " + cookie); }
157 SiteState state = SiteState.siteState(config);
158 state.loggedIn = result;
159 state.userName = user;
164 /** log out - return success */
165 public boolean logout(IWikipedia config, String actionUrl) throws UnexpectedAnswerException, MethodException {
166 GetMethod method = new GetMethod(actionUrl);
167 method.setFollowRedirects(false);
168 method.addRequestHeader("User-Agent", userAgent);
169 NameValuePair[] params = new NameValuePair[] {
170 new NameValuePair("title", config.getLogoutTitle()),
171 new NameValuePair("action", "submit") };
172 method.setQueryString(EncodingUtil.formUrlEncode(params, config.getCharSet()));
176 int responseCode = client.executeMethod(method);
177 String responseBody = method.getResponseBodyAsString();
180 if (responseCode == 302 && responseBody.length() == 0 || responseCode == 200
181 && responseBody.matches(config.getLoginSuccess())) {
182 // config.getloggedIn = false;
184 } else if (responseCode == 200) {
185 //### should check for a failure message
187 throw new UnexpectedAnswerException("logout not successful: responseCode == 200");
189 throw new UnexpectedAnswerException("logout not successful: " + method.getStatusLine());
191 } catch (HttpException e) {
192 throw new MethodException("method failed", e);
193 } catch (IOException e) {
194 throw new MethodException("method failed", e);
196 method.releaseConnection();
200 SiteState state = SiteState.siteState(config);
201 state.loggedIn = false;
206 /** parses a returned editform into a Content object with UNIX-EOLs ("\n") */
207 private Parsed parseBody(String charSet, String responseBody) throws PageNotEditableException, UnsupportedEncodingException {
208 Matcher matcher = BODY_PATTERN.matcher(responseBody);
209 if (!matcher.matches())
210 throw new PageNotEditableException("cannot find editform form");
212 String title = matcher.group(1);
213 String body = matcher.group(2);
214 String timestamp = matcher.group(3);
216 title = URLDecoder.decode(title, charSet);
217 body = body.replaceAll(""", "\"").replaceAll("'", "'").replaceAll("<", "<").replaceAll(">", ">").replaceAll(
218 "&", "&").replaceAll("\r\n", "\n").replace('\r', '\n');
220 return new Parsed(timestamp, title, body);
223 /** load a Page Version - returns a Loaded Object */
224 public Loaded load(String actionURL, String charSet, String title) throws UnexpectedAnswerException, MethodException,
225 PageNotEditableException {
226 GetMethod method = new GetMethod(actionURL);
227 method.setFollowRedirects(false);
228 method.addRequestHeader("User-Agent", userAgent);
229 NameValuePair[] params = new NameValuePair[] { new NameValuePair("title", title), new NameValuePair("action", "edit") };
230 method.setQueryString(EncodingUtil.formUrlEncode(params, charSet));
234 int responseCode = client.executeMethod(method);
235 String responseBody = method.getResponseBodyAsString();
238 if (responseCode == 200) {
239 Parsed parsed = parseBody(charSet, responseBody);
240 Content content = new Content(parsed.timestamp, parsed.body);
241 result = new Loaded(actionURL, charSet, parsed.title, content);
243 throw new UnexpectedAnswerException("load not successful: expected 200 OK, got " + method.getStatusLine());
245 } catch (HttpException e) {
246 throw new MethodException("method failed", e);
247 } catch (IOException e) {
248 throw new MethodException("method failed", e);
250 method.releaseConnection();
255 public ArrayList loadXML(IWikipedia config, String actionURL, String pages) throws UnexpectedAnswerException, MethodException {
256 PostMethod method = new PostMethod(actionURL);
257 method.setFollowRedirects(false);
258 method.addRequestHeader("User-Agent", userAgent);
259 method.addRequestHeader("Content-Type", PostMethod.FORM_URL_ENCODED_CONTENT_TYPE + "; charset=" + config.getCharSet());
261 NameValuePair[] params = new NameValuePair[] {
262 new NameValuePair("pages", pages),
263 new NameValuePair("curonly", "X"),
264 new NameValuePair("action", "submit") };
265 method.addParameters(params);
267 int responseCode = client.executeMethod(method);
268 String responseBody = method.getResponseBodyAsString();
270 if (responseCode == 200) {
271 StringReader reader = new StringReader(responseBody);
272 return XMLReader.readFromStream(reader);
274 throw new UnexpectedAnswerException("XML load not successful: expected 200 OK, got " + method.getStatusLine());
276 } catch(CoreException e) {
277 throw new UnexpectedAnswerException("XML load method failed" + e.getMessage());
278 } catch (HttpException e) {
279 throw new MethodException("XML load method failed", e);
280 } catch (IOException e) {
281 throw new MethodException("XML load method failed", e);
283 method.releaseConnection();
288 * store a Page Version - returns a Stored object
291 * WiKipedia predefined properties
299 * @throws UnexpectedAnswerException
300 * @throws MethodException
301 * @throws PageNotEditableException
302 * @throws InterruptedException
304 public Stored store(IWikipedia config, String actionUrl, String title, Content content, String summary, boolean minorEdit,
305 boolean watchThis) throws UnexpectedAnswerException, MethodException, PageNotEditableException, InterruptedException {
306 //### workaround: prevent too many stores at a time
307 storeThrottle.delay();
309 PostMethod method = new PostMethod(actionUrl);
310 method.setFollowRedirects(false);
311 method.addRequestHeader("User-Agent", userAgent);
312 method.addRequestHeader("Content-Type", PostMethod.FORM_URL_ENCODED_CONTENT_TYPE + "; charset=" + config.getCharSet());
313 NameValuePair[] params = new NameValuePair[] {
314 // new NameValuePair("wpSection", ""),
315 // new NameValuePair("wpPreview", "Vorschau zeigen"),
316 // new NameValuePair("wpSave", "Artikel speichern"),
317 new NameValuePair("title", title),
318 new NameValuePair("wpTextbox1", content.body),
319 new NameValuePair("wpEdittime", content.timestamp),
320 new NameValuePair("wpSummary", summary),
321 new NameValuePair("wpSave", "yes"),
322 new NameValuePair("action", "submit") };
323 method.addParameters(params);
325 method.addParameter("wpMinoredit", "1");
327 method.addParameter("wpWatchthis", "1");
331 int responseCode = client.executeMethod(method);
332 String responseBody = method.getResponseBodyAsString();
335 // since 11dec04 there is a single linefeed instead of an empty page.. trim() helps.
336 if (responseCode == 302 && responseBody.trim().length() == 0) {
337 // log("store successful, reloading");
338 Loaded loaded = load(actionUrl, config.getCharSet(), title);
339 result = new Stored(actionUrl, config.getCharSet(), loaded.title, loaded.content, false);
340 } else if (responseCode == 200) {
341 // log("store not successful, conflict detected");
342 Parsed parsed = parseBody(config.getCharSet(), responseBody);
343 Content cont = new Content(parsed.timestamp, parsed.body);
344 result = new Stored(actionUrl, config.getCharSet(), parsed.title, cont, true);
346 throw new UnexpectedAnswerException("store not successful: expected 200 OK, got " + method.getStatusLine());
348 } catch (HttpException e) {
349 throw new MethodException("method failed", e);
350 } catch (IOException e) {
351 throw new MethodException("method failed", e);
353 method.releaseConnection();
359 * Get the text of a wikimedia article
362 public String getWikiRawText(String wikiname, String urlStr) {
364 // http://en.wikipedia.org/w/wiki.phtml?title=Main_Page&action=raw
365 // http://en.wikibooks.org/w/index.php?title=Programming:PHP:SQL_Injection&action=raw
366 // http://en.wikipedia.org/w/wiki.phtml?title=Talk:Division_by_zero&action=raw
367 HttpMethod method = null;
369 if (urlStr == null) {
370 WikiEditorPlugin.getDefault().reportError("No Wikipedia URL configured", "URL-String == null");
371 // urlStr = "http://en.wikipedia.org/w/wiki.phtml?title=" + wikiname + "&action=raw";
373 URI uri = new URI(urlStr.toCharArray());
375 String schema = uri.getScheme();
376 if ((schema == null) || (schema.equals(""))) {
379 Protocol protocol = Protocol.getProtocol(schema);
381 HttpState state = new HttpState();
383 method = new GetMethod(uri.toString());
384 String host = uri.getHost();
385 int port = uri.getPort();
387 HttpConnection connection = new HttpConnection(host, port, protocol);
388 // timeout after 30 seconds
389 connection.setConnectionTimeout(30000);
390 connection.setProxyHost(System.getProperty("http.proxyHost"));
391 connection.setProxyPort(Integer.parseInt(System.getProperty("http.proxyPort", "80")));
393 if (System.getProperty("http.proxyUserName") != null) {
394 state.setProxyCredentials(null, null, new UsernamePasswordCredentials(System.getProperty("http.proxyUserName"), System
395 .getProperty("http.proxyPassword")));
398 if (connection.isProxied() && connection.isSecure()) {
399 method = new ConnectMethod(method);
402 method.execute(state, connection);
403 // client.executeMethod(method);
405 if (method.getStatusCode() == HttpStatus.SC_OK) {
406 // get the wiki text now:
407 String wikiText = method.getResponseBodyAsString();
410 } catch (Throwable e) {
411 WikiEditorPlugin.log(e);
412 WikiEditorPlugin.getDefault().reportError("Exception occured", e.getMessage() + "\nSee stacktrace in /.metadata/.log file.");
414 if (method != null) {
415 method.releaseConnection();
418 return null; // no success in getting wiki text
421 // public static String getWikiEditTextarea(String wikiname, String urlStr) {
423 // // http://en.wikipedia.org/w/wiki.phtml?title=Main_Page&action=edit
424 // // http://en.wikibooks.org/w/wiki.phtml?title=Programming:PHP:SQL_Injection&action=edit
425 // // http://en.wikipedia.org/w/wiki.phtml?title=Talk:Division_by_zero&action=edit
426 // HttpMethod method = null;
428 // if (urlStr == null) {
429 // urlStr = "http://en.wikipedia.org/w/wiki.phtml?title=" + wikiname + "&action=edit";
432 // // urlStr = urlStr + "?title=" + wikiname + "&action=edit";
434 // URI uri = new URI(urlStr.toCharArray());
436 // String schema = uri.getScheme();
437 // if ((schema == null) || (schema.equals(""))) {
440 // Protocol protocol = Protocol.getProtocol(schema);
442 // HttpState state = new HttpState();
444 // method = new GetMethod(uri.toString());
445 // String host = uri.getHost();
446 // int port = uri.getPort();
448 // HttpConnection connection = new HttpConnection(host, port, protocol);
450 // connection.setProxyHost(System.getProperty("http.proxyHost"));
451 // connection.setProxyPort(Integer.parseInt(System.getProperty("http.proxyPort", "80")));
453 // if (System.getProperty("http.proxyUserName") != null) {
454 // state.setProxyCredentials(null, null, new UsernamePasswordCredentials(System.getProperty("http.proxyUserName"), System
455 // .getProperty("http.proxyPassword")));
458 // if (connection.isProxied() && connection.isSecure()) {
459 // method = new ConnectMethod(method);
462 // method.execute(state, connection);
464 // if (method.getStatusCode() == HttpStatus.SC_OK) {
465 // // get the textareas wiki text now:
466 // InputStream stream = method.getResponseBodyAsStream();
467 // int byteLen = stream.available();
469 // byte[] buffer = new byte[byteLen];
470 // stream.read(buffer, 0, byteLen);
471 // String wikiText = new String(buffer);
472 // // String wikiText = method.getResponseBodyAsString();
473 // int start = wikiText.indexOf("<textarea");
474 // if (start != (-1)) {
475 // start = wikiText.indexOf(">", start + 1);
476 // if (start != (-1)) {
477 // int end = wikiText.indexOf("</textarea>");
478 // wikiText = wikiText.substring(start + 1, end);
482 // // System.out.println(wikiText);
485 // } catch (Exception e) {
486 // e.printStackTrace();
488 // if (method != null) {
489 // method.releaseConnection();
492 // return null; // no success in getting wiki text
495 public static void main(String[] args) {
496 MediaWikiConnector mwc = new MediaWikiConnector();
498 IWikipedia wp = null;
499 ArrayList list = mwc.loadXML(wp, "http://www.plog4u.de/wiki/index.php/Spezial:Export", "Mechanisches Fernsehen\nSynästhesie");
500 for (int i = 0; i < list.size(); i++) {
501 System.out.println(list.get(i).toString());
503 } catch (UnexpectedAnswerException e) {
504 // TODO Auto-generated catch block
506 } catch (MethodException e) {
507 // TODO Auto-generated catch block