Skip to content
Browse files

testing different pkg for sentence boundary detection, to workaround …

…seg faults past JVM release 6u2
  • Loading branch information...
1 parent f4261dc commit b346d95c25d5fe0c7b377f05d2c178de35979a3d Paco NATHAN committed
Showing with 13 additions and 0 deletions.
  1. BIN lib/sptoolkit.jar
  2. +13 −0 src/com/sharethis/textrank/LanguageEnglish.java
View
BIN lib/sptoolkit.jar
Binary file not shown.
View
13 src/com/sharethis/textrank/LanguageEnglish.java
@@ -45,6 +45,8 @@
import org.tartarus.snowball.ext.englishStemmer;
+import spiaotools.SentParDetector;
+
/**
* Implementation of English-specific tools for natural language
@@ -67,7 +69,10 @@
* Public definitions.
*/
+ public static SentParDetector splitter_en = null;
+ /** /
public static SentenceDetectorME splitter_en = null;
+ /* */
public static Tokenizer tokenizer_en = null;
public static ParserTagger tagger_en = null;
public static englishStemmer stemmer_en = null;
@@ -96,8 +101,12 @@
loadResources (final String path)
throws Exception
{
+ splitter_en = new SentParDetector();
+
+ /** /
splitter_en =
new SentenceDetector((new File(path, "opennlp/EnglishSD.bin.gz")).getPath());
+ /* */
tokenizer_en =
new Tokenizer((new File(path, "opennlp/EnglishTok.bin.gz")).getPath());
@@ -120,7 +129,11 @@
public String[]
splitParagraph (final String text)
{
+ return splitter_en.markupRawText(2, text).split("\\n");
+
+ /** /
return splitter_en.sentDetect(text);
+ /* */
}

0 comments on commit b346d95

Please sign in to comment.
Something went wrong with that request. Please try again.