Skip to content

Commit

Permalink
NGramIterator now takes optional StopWords to exclude n-grams contain…
Browse files Browse the repository at this point in the history
…ing stop words
  • Loading branch information
Jonathan Feinberg committed Dec 2, 2009
1 parent 3e7e111 commit 1262af9
Showing 1 changed file with 25 additions and 2 deletions.
27 changes: 25 additions & 2 deletions src/cue/lang/NGramIterator.java
Expand Up @@ -20,6 +20,8 @@
import java.util.Locale;
import java.util.NoSuchElementException;

import cue.lang.stop.StopWords;

/**
* Construct with a {@link String}, some integer n, and a {@link Locale};
* retrieve a sequence of {@link String}s, each of which has n words
Expand Down Expand Up @@ -66,6 +68,7 @@ public class NGramIterator extends IterableText
private final SentenceIterator sentenceIterator;
private final LinkedList<String> grams = new LinkedList<String>();
private final int n;
private final StopWords stopWords;

private String next;
private Iterator<String> currentWordIterator;
Expand All @@ -76,9 +79,16 @@ public NGramIterator(final int n, final String text)
}

public NGramIterator(final int n, final String text, final Locale locale)
{
this(n, text, locale, null);
}

public NGramIterator(final int n, final String text, final Locale locale,
final StopWords stopWords)
{
this.n = n;
this.sentenceIterator = new SentenceIterator(text, locale);
this.stopWords = stopWords;
loadNext();
}

Expand Down Expand Up @@ -123,11 +133,11 @@ private void loadNext()
.iterator();
for (int i = 0; currentWordIterator.hasNext() && i < n - 1; i++)
{
grams.add(currentWordIterator.next());
maybeAddWord();
}
}
// now grams has n-1 words in it and currentWordIterator hasNext
grams.add(currentWordIterator.next());
maybeAddWord();
}
final StringBuilder sb = new StringBuilder();
for (final String gram : grams)
Expand All @@ -141,6 +151,19 @@ private void loadNext()
next = sb.toString();
}

private void maybeAddWord()
{
final String nextWord = currentWordIterator.next();
if (stopWords != null && stopWords.isStopWord(nextWord))
{
grams.clear();
}
else
{
grams.add(nextWord);
}
}

public static void main(final String[] args)
{
final String lyric = "This happened once before. I came to your door. No reply.";
Expand Down

0 comments on commit 1262af9

Please sign in to comment.