Permalink
Browse files

fixed bugs for long-running processes - not all data structures were …

…getting reset properly
  • Loading branch information...
1 parent 1a1b911 commit be81c644a86df30b60e0d7c2ffdda03fc0b514f8 Paco NATHAN committed Apr 30, 2009
View
File renamed without changes.
View
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project
- name="mapred"
+ name="textrank"
default="rank"
basedir="."
>
@@ -110,6 +110,7 @@ http://github.com/sharethis/textrank/
<target
name="compile"
description="compile all Java classes"
+ depends="clean"
>
<mkdir dir="${build.dir}"/>
<javac
@@ -127,6 +128,7 @@ http://github.com/sharethis/textrank/
<target
name="jar"
description="build a JAR"
+ depends="compile"
>
<jar
destfile="${app.jar}"
@@ -140,12 +142,13 @@ http://github.com/sharethis/textrank/
<target
- name="rank"
- description="misc. testing"
+ name="rank"
+ description="misc. testing"
+ depends="compile"
>
<property
name="data.file"
- location="${basedir}/sample.txt"
+ location="${basedir}/test/good.txt"
/>
<property
name="lang.code"
@@ -43,7 +43,7 @@
/**
* A singleton class implementing a cache of scanned sentences.
*
- * @author Paco NATHAN
+ * @author paco@sharethis.com
*/
public class
@@ -52,7 +52,7 @@
{
// logging
- private final static Log log_ =
+ private final static Log LOG =
LogFactory.getLog(Cache.class.getName());
@@ -69,12 +69,8 @@
public
Cache ()
+ throws Exception
{
- try {
- md_sent = MessageDigest.getInstance("MD5");
- }
- catch (Exception e) {
- System.err.println("cannot load MD5: " + e.getMessage());
- }
+ md_sent = MessageDigest.getInstance("MD5");
}
}
@@ -39,15 +39,15 @@
/**
* Track the context within a sentence for a selected n-gram.
*
- * @author Paco NATHAN
+ * @author paco@sharethis.com
*/
public class
Context
{
// logging
- private final static Log log_ =
+ private final static Log LOG =
LogFactory.getLog(Context.class.getName());
@@ -45,7 +45,7 @@
/**
* An abstraction for handling the graph as a data object.
*
- * @author Paco NATHAN
+ * @author paco@sharethis.com
*/
public class
@@ -54,7 +54,7 @@
{
// logging
- private final static Log log_ =
+ private final static Log LOG =
LogFactory.getLog(Graph.class.getName());
@@ -140,8 +140,8 @@
final double standard_error =
dist_stats.getStandardDeviation() / Math.sqrt((double) dist_stats.getN());
- if (log_.isInfoEnabled()) {
- log_.info("iteration: " + k + " error: " + standard_error);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("iteration: " + k + " error: " + standard_error);
}
// swap in new rank values
@@ -169,9 +169,11 @@
public int compare (Node n1, Node n2) {
if (n1.rank > n2.rank) {
return -1;
- } else if (n1.rank < n2.rank) {
+ }
+ else if (n1.rank < n2.rank) {
return 1;
- } else {
+ }
+ else {
return 0;
}
}
@@ -190,11 +192,11 @@ public int compare (Node n1, Node n2) {
dist_stats.addValue(n1.rank);
}
- if (log_.isDebugEnabled()) {
- log_.debug("n: " + n1.key + " " + n1.rank + " " + n1.marked);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("n: " + n1.key + " " + n1.rank + " " + n1.marked);
for (Node n2 : n1.edges) {
- log_.debug(" - " + n2.key);
+ LOG.debug(" - " + n2.key);
}
}
}
@@ -40,7 +40,7 @@
* Implements a node value in a TextRank graph denoting a noun or
* adjective.
*
- * @author Paco NATHAN
+ * @author paco@sharethis.com
*/
public class
@@ -49,7 +49,7 @@
{
// logging
- private final static Log log_ =
+ private final static Log LOG =
LogFactory.getLog(KeyWord.class.getName());
@@ -50,16 +50,16 @@
* Implementation of English-specific tools for natural language
* processing.
*
- * @author Paco NATHAN
+ * @author paco@sharethis.com
*/
public class
LanguageEnglish
- extends Language
+ extends LanguageModel
{
// logging
- private final static Log log_ =
+ private final static Log LOG =
LogFactory.getLog(LanguageEnglish.class.getName());
@@ -74,11 +74,13 @@
/**
- * Constructor.
+ * Constructor. Not quite a Singleton pattern but close enough
+ * given the resources required to be loaded ONCE.
*/
public
LanguageEnglish (final String path)
+ throws Exception
{
if (splitter_en == null) {
loadResources(path);
@@ -92,27 +94,22 @@
public void
loadResources (final String path)
+ throws Exception
{
- try {
- splitter_en =
+ splitter_en =
new SentenceDetector((new File(path, "opennlp/EnglishSD.bin.gz")).getPath());
- tokenizer_en =
- new Tokenizer((new File(path, "opennlp/EnglishTok.bin.gz")).getPath());
-
- tagger_en =
- new ParserTagger((new File(path, "opennlp/tag.bin.gz")).getPath(),
- (new File(path, "opennlp/tagdict")).getPath(),
- false
- );
-
- stemmer_en =
- new englishStemmer();
- }
- catch (Exception e) {
- System.err.println("Error loading libraries for OpenNLP.");
- e.printStackTrace();
- }
+ tokenizer_en =
+ new Tokenizer((new File(path, "opennlp/EnglishTok.bin.gz")).getPath());
+
+ tagger_en =
+ new ParserTagger((new File(path, "opennlp/tag.bin.gz")).getPath(),
+ (new File(path, "opennlp/tagdict")).getPath(),
+ false
+ );
+
+ stemmer_en =
+ new englishStemmer();
}
Oops, something went wrong.

0 comments on commit be81c64

Please sign in to comment.