X-Git-Url: http://git.phpeclipse.com diff --git a/archive/net.sourceforge.phpeclipse.wiki/src/net/sourceforge/phpeclipse/wiki/actions/mediawiki/connect/XMLReader.java b/archive/net.sourceforge.phpeclipse.wiki/src/net/sourceforge/phpeclipse/wiki/actions/mediawiki/connect/XMLReader.java index 8cdfc20..013ff3b 100644 --- a/archive/net.sourceforge.phpeclipse.wiki/src/net/sourceforge/phpeclipse/wiki/actions/mediawiki/connect/XMLReader.java +++ b/archive/net.sourceforge.phpeclipse.wiki/src/net/sourceforge/phpeclipse/wiki/actions/mediawiki/connect/XMLReader.java @@ -30,8 +30,11 @@ import org.xml.sax.SAXParseException; */ public class XMLReader { private static final String TITLE_TAG = "title"; //$NON-NLS-1$ + private static final String TIMESTAMP_TAG = "timestamp"; //$NON-NLS-1$ + private static final String TEXT_TAG = "text"; //$NON-NLS-1$ + private static final String PAGE_TAG = "page"; //$NON-NLS-1$ public XMLReader() { @@ -86,20 +89,20 @@ public class XMLReader { private static void traverse(String eleName, Node cNode, Parsed parsed) { switch (cNode.getNodeType()) { case Node.DOCUMENT_NODE: -// System.out.println("DOCUMENT_NODE " + cNode.getNodeName()); + // System.out.println("DOCUMENT_NODE " + cNode.getNodeName()); processChildren(eleName, cNode.getChildNodes(), parsed); break; case Node.ELEMENT_NODE: eleName = cNode.getNodeName(); -// System.out.println("ELEMENT_NODE " + eleName); -// NamedNodeMap attributeMap = cNode.getAttributes(); -// int numAttrs = attributeMap.getLength(); -// for (int i = 0; i < attributeMap.getLength(); i++) { -// Attr attribute = (Attr) attributeMap.item(i); -// String attrName = attribute.getNodeName(); -// String attrValue = attribute.getNodeValue(); -// } + // System.out.println("ELEMENT_NODE " + eleName); + // NamedNodeMap attributeMap = cNode.getAttributes(); + // int numAttrs = attributeMap.getLength(); + // for (int i = 0; i < attributeMap.getLength(); i++) { + // Attr attribute = (Attr) attributeMap.item(i); + // String attrName = attribute.getNodeName(); + // String attrValue = attribute.getNodeValue(); + // } processChildren(eleName, cNode.getChildNodes(), parsed); break; case Node.CDATA_SECTION_NODE: @@ -127,10 +130,59 @@ public class XMLReader { } /** + * Read the first timestamp found in the Wikipedia xml stream + * + * @param stream + * @return + * @throws Exception + */ + public static String getTimestamp(InputStream stream) throws Exception { + // Create a factory object for creating DOM parsers + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + // Now use the factory to create a DOM parser (a.k.a. a DocumentBuilder) + DocumentBuilder parser = factory.newDocumentBuilder(); + // Parse the file and build a Document tree to represent its content + Document document = parser.parse(stream); + // Ask the document for a list of all tags it contains + NodeList timestamps = document.getElementsByTagName(TIMESTAMP_TAG); + // Loop through those elements one at a time, and extract the + // content of their tags. + int numPages = timestamps.getLength(); + for (int i = 0; i < numPages; i++) { + ElementNode page = (ElementNode) timestamps.item(i); // A + return page.getChildNodes().item(0).getNodeValue(); + } + return null; + } + + /** + * Get the timestamp as java Date Format String + * + * @param stream + * @return + * @throws Exception + */ + public static String getDateTimestamp(InputStream stream) throws Exception { + String timestamp = getTimestamp(stream); + if (timestamp!=null) { + StringBuffer buffer = new StringBuffer(); + // 2004-11-22T12:41:10Z + buffer.append(timestamp.substring(0,4)); //year + buffer.append(timestamp.substring(5,7)); //month + buffer.append(timestamp.substring(8,10)); //day + buffer.append(timestamp.substring(11,13));//hour + buffer.append(timestamp.substring(14,16));//minute + buffer.append(timestamp.substring(17,19));//second + return buffer.toString(); + } + return null; + } + + /** * Reads the wikipedia xml data from the given stream * * @param stream - * @return + * @return * @throws CoreException */ public static ArrayList readFromStream(Reader stream) throws CoreException { @@ -144,8 +196,7 @@ public class XMLReader { Document document = parser.parse(new InputSource(stream)); // Ask the document for a list of all tags it contains NodeList pages = document.getElementsByTagName(PAGE_TAG); - // Loop through those elements one at a time, and extract the - // content of their tags. + // Loop through those elements one at a time int numPages = pages.getLength(); for (int i = 0; i < numPages; i++) { @@ -162,7 +213,7 @@ public class XMLReader { } catch (IOException e) { throwReadException(e); } catch (SAXParseException e) { -// System.out.println("SAXParseException in line:" + e.getLineNumber() + " column:" + e.getColumnNumber()); + // System.out.println("SAXParseException in line:" + e.getLineNumber() + " column:" + e.getColumnNumber()); throwReadException(e); } catch (SAXException e) { throwReadException(e); @@ -174,7 +225,7 @@ public class XMLReader { Node node = attributes.getNamedItem(name); return node == null ? null : node.getNodeValue(); } - + // public static void saveToFile(File file) throws CoreException { // OutputStream stream = null; // try { @@ -347,9 +398,9 @@ public class XMLReader { + "\r\n" + "[[Kategorie:Rhetorischer Begriff]]\r\n" + "[[en:Synaesthesia]] [[es:Sinestesia]] [[sv:Synestesi]] [[tr:Sinestezi]]\r\n" + " \r\n" + " \r\n" + ""; - StringReader st = new StringReader(test2); + StringReader st = new StringReader(test); - readFromStream(st); + System.out.println(readFromStream(st)); } catch (CoreException e) { // TODO Auto-generated catch block e.printStackTrace();