Permalink
Browse files

Added tests

  • Loading branch information...
1 parent 1f19dae commit 533cc47ddcc3d6b37cbf9bab21c0883bf1ec10b6 @vpekar committed Nov 5, 2012
Showing with 77 additions and 6 deletions.
  1. +3 −3 README.md
  2. +3 −3 stanford.py
  3. +71 −0 test_stanford.py
View
@@ -26,11 +26,11 @@ USAGE:
To keep XML tags provided in the input text:
- sentence = parser.parse('This is a <tag>test</tag>')
-
+ sentence = parser.parse_xml('This is a <b>test</b>.')
+
To strip all XML before parsing:
- sentence = parser.parse_xml('This is a <b>test</b>.')
+ sentence = parser.parse('This is a <tag>test</tag>')
To print the sentence as a table (one word per line):
View
@@ -285,11 +285,11 @@ def get_most_probable_parses(self, text, kbest=2):
"""
if not self.parser_query:
self.parser_query = self.lp.parserQuery()
- response = self.parser_query.parse(sp.tokenize(text))
+ response = self.parser_query.parse(self.tokenize(text))
if not response:
raise Exception("The sentence was not accepted by the parser: %s" % text)
for candidate_tree in self.parser_query.getKBestPCFGParses(kbest):
- s = PySentence(sp, candidate_tree.object())
+ s = PySentence(self, candidate_tree.object())
prob = math.e**candidate_tree.score()
yield s, prob
@@ -375,7 +375,7 @@ def get_dependencies_example(sp):
def get_common_path_example(sp):
print 'Common path:'
- text = 'The quick brown fox jumped over the lazy dog.'
+ text = 'The quick brown fox jumped over a lazy dog.'
print 'Text:', text
i = 4
j = 9
View
@@ -0,0 +1,71 @@
+"""To run the tests, put englishPCFG.ser.gz into the working directory.
+
+Created on 4 Nov 2012
+
+@author: viktor
+"""
+
+import unittest
+from stanford import StanfordParser, PySentence
+
+
+class TestPySentence(unittest.TestCase):
+
+ def setUp(self):
+ input = "The quick brown fox jumped over a lazy dog."
+ sp = StanfordParser('englishPCFG.ser.gz')
+ self.sentence = sp.parse_xml(input)
+
+ def test_get_least_common_node(self):
+ lcn, shortest_path = self.sentence.get_least_common_node(4, 9)
+ actual_lcn = self.sentence.word[lcn]
+ actual_path = ' '.join([self.sentence.word[x] for x in sorted(shortest_path)])
+ expected_lcn = 'jumped'
+ expected_path = 'fox jumped over dog'
+ msg = "Expected %s != actual %s" % (expected_lcn, actual_lcn)
+ self.assertTrue(expected_lcn == actual_lcn, msg)
+ msg = "Expected %s != actual %s" % (expected_path, actual_path)
+ self.assertTrue(expected_path == actual_path, msg)
+
+
+class TestStanfordParser(unittest.TestCase):
+
+ def setUp(self):
+ self.sp = StanfordParser('englishPCFG.ser.gz')
+
+ def test_get_most_probable_parses_check_types(self):
+ input = 'I saw a man with a telescope.'
+ expected_type1, expected_type2 = type(PySentence), float
+ for s, prob in self.sp.get_most_probable_parses(input, kbest=2):
+ actual_type1, actual_type2 = type(s.__class__), type(prob)
+ msg = "Expected %s != actual %s" % (expected_type1, actual_type1)
+ self.assertTrue(actual_type1 == expected_type1, msg)
+ msg = "Expected %s != actual %s" % (expected_type2, actual_type2)
+ self.assertTrue(actual_type2 == expected_type2, msg)
+
+ def test_get_most_probable_parses_check_nonzero(self):
+ input = 'I saw a man with a telescope.'
+ expected = 2
+ parses = [x for x in self.sp.get_most_probable_parses(input, kbest=expected)]
+ actual = len(parses)
+ msg = "Expected %d != actual %d" % (expected, actual)
+ self.assertTrue(expected == actual, msg)
+
+ def test_parse_xml(self):
+ input = 'This <a>is</a> a test<!-- b -->.'
+ expected = ['DT', 'VBZ', 'DT', 'NN', '.']
+ sentence = self.sp.parse_xml(input)
+ actual = [v for k, v in sorted(sentence.tag.items())]
+ msg = "Expected %s != actual %s" % (expected, actual)
+ self.assertTrue(expected == actual, msg)
+
+ def test_tokenise(self):
+ input = 'This is a test.'
+ expected = ['This', 'is', 'a', 'test', '.']
+ actual = [unicode(x) for x in self.sp.tokenize(input)]
+ msg = "Expected %s != actual %s" % (expected, actual)
+ self.assertTrue(expected == actual, msg)
+
+
+if __name__ == "__main__":
+ unittest.main()

0 comments on commit 533cc47

Please sign in to comment.