Java port of arc90's readability project
Java
Switch branches/tags
Nothing to show
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Failed to load latest commit information.
example
src/main/java/org/articleparser
.gitignore
README.md
pom.xml

README.md

Java ArticleParser

Java ArticleParser is a java library to extract the primary content of a webpage. It is a Java port of the arc90's readability project.

Dependency

Java ArticleParser uses jsoup (https://github.com/jhy/jsoup) to parse html.

Example

import java.io.IOException;
import java.io.BufferedWriter;
import java.io.FileWriter;

import org.articleparser.Article;
import org.articleparser.ArticleParser;

public class HelloArticleParser {
    public static void main(String[] args) {
        try {
            ArticleParser parser = new ArticleParser("http://en.wikipedia.org/wiki/Github");
            Article article = parser.parse();
            String articleHTML = article.getHTML();
            BufferedWriter out = new BufferedWriter(new FileWriter("parsed.html"));
            try {
                out.write(articleHTML);
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}