Thomas Darimont
Erfahrenes Mitglied
Hallo!
Gruß Tom
Code:
/*
* Created on 29.04.2005@11:54:21 by Darimont
*
* TODO Licence info
*/
package de.tutorials;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
/**
* @author Darimont
*
* TODO Explain me
*/
public class LinkExtractor {
public static void main(String[] args) throws Exception {
URL url = new URL("file:/c:/tutorials.htm");
URLConnection con = url.openConnection();
BufferedReader br = new BufferedReader(new InputStreamReader(con
.getInputStream()));
HTMLEditorKit editorKit = new HTMLEditorKit();
HTMLDocument htmlDoc = new HTMLDocument();
htmlDoc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
editorKit.read(br, htmlDoc, 0);
HTMLDocument.Iterator iter = htmlDoc.getIterator(HTML.Tag.A);
while (iter.isValid()) {
System.out.println(iter.getAttributes().getAttribute(
HTML.Attribute.HREF));
iter.next();
}
}
}
Gruß Tom