Skip to content
Newer
Older
100644 466 lines (366 sloc) 13.8 KB
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
1 """A Jython interface to the Stanford parser (v.3.5.0). Includes various
c41b6f9 @vpekar Refactored
authored
2 utilities to manipulate parsed sentences:
3 * parse text containing XML tags,
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
4 * obtain probabilities for different analyses,
5 * extract dependency relations,
c41b6f9 @vpekar Refactored
authored
6 * extract subtrees,
7 * find the shortest path between two nodes,
d7b28cc @vpekar Initial commit
authored
8 * print the parse in various formats.
9
10 See examples after the if __name__ == "__main__" hooks.
11
a06e481 @vpekar First commit on hotfix
authored
12
d7b28cc @vpekar Initial commit
authored
13 INSTALLATION:
14
15 1. Download the parser from http://nlp.stanford.edu/downloads/lex-parser.shtml
c41b6f9 @vpekar Refactored
authored
16 2. Unpack into a local dir, put the path to stanford-parser.jar into the
17 classpath for jython
18 3. Put the full path to englishPCFG.ser.gz as parser_file arg to
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
19 StanfordParser (searched in the local directory by default)
d7b28cc @vpekar Initial commit
authored
20
c41b6f9 @vpekar Refactored
authored
21 USAGE:
d7b28cc @vpekar Initial commit
authored
22
e825b95 @vpekar m
authored
23 Initialize a parser:
d7b28cc @vpekar Initial commit
authored
24
8d60fdb @vpekar m
authored
25 parser = StanfordParser('englishPCFG.ser.gz')
d7b28cc @vpekar Initial commit
authored
26
27 To keep XML tags provided in the input text:
c41b6f9 @vpekar Refactored
authored
28
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
29 sentence = parser.parse('This is a <tag>test</tag>.')
c41b6f9 @vpekar Refactored
authored
30
d7b28cc @vpekar Initial commit
authored
31 To strip all XML before parsing:
c41b6f9 @vpekar Refactored
authored
32
d7b28cc @vpekar Initial commit
authored
33 sentence = parser.parse_xml('This is a <b>test</b>.')
c41b6f9 @vpekar Refactored
authored
34
d7b28cc @vpekar Initial commit
authored
35 To print the sentence as a table (one word per line):
c41b6f9 @vpekar Refactored
authored
36
d7b28cc @vpekar Initial commit
authored
37 sentence.print_table()
c41b6f9 @vpekar Refactored
authored
38
d7b28cc @vpekar Initial commit
authored
39 To print the sentence as a parse tree:
c41b6f9 @vpekar Refactored
authored
40
d7b28cc @vpekar Initial commit
authored
41 sentence.print_tree()
c41b6f9 @vpekar Refactored
authored
42
d7b28cc @vpekar Initial commit
authored
43 On input, the script accepts unicode or utf8 or latin1.
e825b95 @vpekar m
authored
44
c41b6f9 @vpekar Refactored
authored
45 On output, the script produces unicode.
d7b28cc @vpekar Initial commit
authored
46 """
47
c41b6f9 @vpekar Refactored
authored
48 __author__ = "Viktor Pekar <v.pekar@gmail.com>"
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
49 __version__ = "0.2"
c41b6f9 @vpekar Refactored
authored
50
b943786 @vpekar Second commit
authored
51
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
52 import sys
53 import re
54 import os
55 import string
56 import math
d7b28cc @vpekar Initial commit
authored
57
58 try:
59 assert 'java' in sys.platform
60 except AssertionError:
61 raise Exception("The script should be run from Jython!")
62
63 from java.util import *
64 from edu.stanford.nlp.trees import PennTreebankLanguagePack, TreePrint
65 from edu.stanford.nlp.parser.lexparser import LexicalizedParser
66 from edu.stanford.nlp.process import Morphology, PTBTokenizer, WordTokenFactory
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
67 from edu.stanford.nlp.parser.lexparser import Options
68 from edu.stanford.nlp.ling import Sentence, WordTag
d7b28cc @vpekar Initial commit
authored
69 from java.io import StringReader
70
71
72 def stanford2tt(sentence):
c41b6f9 @vpekar Refactored
authored
73 """Given a Sentence object, return TreeTagger-style
74 tuples (word, tag, lemma).
d7b28cc @vpekar Initial commit
authored
75 """
c41b6f9 @vpekar Refactored
authored
76
77 for idx in sorted(sentence.word):
78
79 word = sentence.word.get(idx, '')
80
d7b28cc @vpekar Initial commit
authored
81 if word.startswith('<'):
82 tag, lemma = 'XML', word
83 else:
c41b6f9 @vpekar Refactored
authored
84 tag = sentence.tag.get(idx, '')
85 lemma = sentence.lemma.get(idx, '')
86
d7b28cc @vpekar Initial commit
authored
87 # correcting: TO -> IN
88 if word == 'to' and tag == 'TO':
89 tag = 'IN'
c41b6f9 @vpekar Refactored
authored
90
d7b28cc @vpekar Initial commit
authored
91 yield (word, tag, lemma)
92
93
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
94 class PySentence:
d7b28cc @vpekar Initial commit
authored
95 """An interface to the grammaticalStructure object of SP
96 """
c41b6f9 @vpekar Refactored
authored
97
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
98 def __init__(self, parser, parse, xmltags={}):
99 """Create a PySentence object from parse.
d7b28cc @vpekar Initial commit
authored
100 @param gsf: a grammaticalStructureFactory object
101 @param parse: a parse of the sentence
c41b6f9 @vpekar Refactored
authored
102 @param xmltags: index of the previous text token =>
103 list of intervening xmltags
d7b28cc @vpekar Initial commit
authored
104 """
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
105 self.gs = parser.gsf.newGrammaticalStructure(parse)
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
106 self.parse = parse
c41b6f9 @vpekar Refactored
authored
107
d7b28cc @vpekar Initial commit
authored
108 self.node = {}
109 self.word = {}
110 self.tag = {}
111 self.lemma = {}
112 self.dep = {}
113 self.rel = {}
114 self.children = {}
c41b6f9 @vpekar Refactored
authored
115
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
116 self.lemmer = parser.lemmer
117 self.xmltags = xmltags
118
c41b6f9 @vpekar Refactored
authored
119 self.populate_indices()
120
121 def get_lemma(self, word, tag):
122 lemma = self.lemmer.lemmatize(WordTag(word, tag)).lemma()
123 return lemma.decode('latin1')
124
125 def get_pos_tag(self, node):
b338545 @vpekar Refactored tests
authored
126 parent = node.parent()
c41b6f9 @vpekar Refactored
authored
127 tag = 'Z' if parent == None else parent.value()
128 return tag.decode('latin1')
129
130 def get_word(self, node_i):
131 word = node_i.value().decode('latin1')
132
133 # correct the appearance of parentheses
134 if word == '-RRB-':
135 word = u'('
136 elif word == '-LRB-':
137 word = u')'
138
139 return word
140
141 def populate_indices(self):
142
d7b28cc @vpekar Initial commit
authored
143 # insert the tags before the text, if any are present before the text
c41b6f9 @vpekar Refactored
authored
144 self.add_xml_tags_to_word_index(idx=0)
145
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
146 # dependency indices
147 for td in self.gs.typedDependenciesCCprocessed(True):
148 dep_idx = td.dep().index()
149 p_idx = td.gov().index()
150 self.rel[dep_idx] = td.reln().getShortName()
151 self.dep[dep_idx] = p_idx
152 self.children[p_idx] = self.children.get(p_idx, [])
153 self.children[p_idx].append(dep_idx)
154
155 # word, pos tag and lemma indices
156 for node_i in self.gs.root():
c41b6f9 @vpekar Refactored
authored
157
158 if node_i.headTagNode() != None:
159 continue
160
161 idx = node_i.index()
162 word = self.get_word(node_i)
1075f15 @vpekar Not adding non-leaf notes to indices
authored
163 if word == "ROOT":
164 break
c41b6f9 @vpekar Refactored
authored
165 tag = self.get_pos_tag(node_i)
166
167 self.node[idx] = node_i
d7b28cc @vpekar Initial commit
authored
168 self.word[idx] = word
169 self.tag[idx] = tag
c41b6f9 @vpekar Refactored
authored
170 self.lemma[idx] = self.get_lemma(word, tag)
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
171
172 # if the word is unattached
173 if word in string.punctuation or not self.dep.get(idx):
174 self.dep[idx] = 0
175 self.rel[idx] = 'punct'
c41b6f9 @vpekar Refactored
authored
176
d7b28cc @vpekar Initial commit
authored
177 # insert xml tags, if any
c41b6f9 @vpekar Refactored
authored
178 self.add_xml_tags_to_word_index(idx)
179
180 def add_xml_tags_to_word_index(self, idx):
181 """@param idx: the id of the previous word
182 """
183 tags_at_idx = self.xmltags.get(idx)
184 if tags_at_idx:
185 num_tags = len(tags_at_idx)
186 for tag_i in xrange(num_tags):
187 tag_idx = (tag_i + 1) / float(num_tags + 1)
188 tag_name = tags_at_idx[tag_i].decode('latin1')
189 self.word[idx + tag_idx] = tag_name
d7b28cc @vpekar Initial commit
authored
190
191 def get_head(self, node):
c41b6f9 @vpekar Refactored
authored
192 """Return a tuple with the head of the dependency for a node and the
d7b28cc @vpekar Initial commit
authored
193 relation label.
194 """
195 idx = node.index()
196 dep_idx = self.dep.get(idx)
c41b6f9 @vpekar Refactored
authored
197 if not dep_idx:
198 return None, None
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
199 return self.node.get(dep_idx), self.rel.get(idx)
c41b6f9 @vpekar Refactored
authored
200
201 def get_children(self, node):
202 """Yield tuples each with a child of the dependency
d7b28cc @vpekar Initial commit
authored
203 and the relation label
204 """
c41b6f9 @vpekar Refactored
authored
205 for idx in self.children.get(node.index(), []):
206 yield self.node[idx], self.rel[idx]
207
208 def get_descendants(self, start_idx):
d7b28cc @vpekar Initial commit
authored
209 """Return all descendants of a node, including the node itself
210 """
211 def traverse(idx):
212 global descendants
c41b6f9 @vpekar Refactored
authored
213 for idx_i in self.children.get(idx, []):
214 descendants.append(idx_i)
215 traverse(idx_i)
216 global descendants
217 descendants = [start_idx]
218 traverse(start_idx)
d7b28cc @vpekar Initial commit
authored
219 return descendants
c41b6f9 @vpekar Refactored
authored
220
221 def prune(self, idx):
222 """Given an index, remove all the words dependent on the word with that
223 index, including the word itself.
d7b28cc @vpekar Initial commit
authored
224 """
c41b6f9 @vpekar Refactored
authored
225 for idx_i in self.get_descendants(idx):
226 self.delete_node(idx_i)
227
228 def delete_node(self, idx):
229 del self.node[idx], self.word[idx], self.tag[idx], self.lemma[idx], \
230 self.rel[idx], self.dep[idx]
231 if idx in self.children:
232 del self.children[idx]
d7b28cc @vpekar Initial commit
authored
233
234 def get_plain_text(self):
235 """Output plain-text sentence.
236 """
c41b6f9 @vpekar Refactored
authored
237 text = ' '.join([self.word[x] for x in sorted(self.node)])
d7b28cc @vpekar Initial commit
authored
238 # remove spaces in front of commas, etc
239 for i in ',.:;!?':
240 text = text.replace(' ' + i, i)
241 return text
242
c41b6f9 @vpekar Refactored
authored
243 def get_least_common_node(self, node_i_idx, node_j_idx):
244 """Return a node that is least common for two given nodes,
d7b28cc @vpekar Initial commit
authored
245 as well as the shortest path between the two nodes
c41b6f9 @vpekar Refactored
authored
246 @param node_i_idx: index of node 1
247 @param node_j_idx: index of node 2
d7b28cc @vpekar Initial commit
authored
248 """
c41b6f9 @vpekar Refactored
authored
249
d7b28cc @vpekar Initial commit
authored
250 common_node = None
251 shortest_path = []
c41b6f9 @vpekar Refactored
authored
252 path1 = self.path2root(node_i_idx)
253 path2 = self.path2root(node_j_idx)
254
255 for idx_i in path1:
256 if common_node != None:
257 break
258 for idx_j in path2:
259 if idx_i == idx_j:
260 common_node = idx_i
d7b28cc @vpekar Initial commit
authored
261 break
c41b6f9 @vpekar Refactored
authored
262
d7b28cc @vpekar Initial commit
authored
263 if common_node != None:
c41b6f9 @vpekar Refactored
authored
264 for idx_i in path1:
265 shortest_path.append(idx_i)
266 if idx_i == common_node:
267 break
268 for idx_i in path2:
269 if idx_i == common_node:
270 break
271 shortest_path.append(idx_i)
272
d7b28cc @vpekar Initial commit
authored
273 return common_node, shortest_path
c41b6f9 @vpekar Refactored
authored
274
275 def path2root(self, idx):
d7b28cc @vpekar Initial commit
authored
276 """The path to the root from a node.
c41b6f9 @vpekar Refactored
authored
277 @param idx: the index of the node
d7b28cc @vpekar Initial commit
authored
278 """
c41b6f9 @vpekar Refactored
authored
279 path = [idx]
280
281 if idx != 0:
282 while True:
283 parent = self.dep.get(idx)
284 if not parent:
285 break
286 path.append(parent)
287 idx = parent
288
d7b28cc @vpekar Initial commit
authored
289 return path
c41b6f9 @vpekar Refactored
authored
290
d7b28cc @vpekar Initial commit
authored
291 def print_table(self):
292 """Print the parse as a table, FDG-style, to STDOUT
293 """
c41b6f9 @vpekar Refactored
authored
294 def get_index(id_str):
295 return '-' if '.' in id_str else id_str
296
297 for idx in sorted(self.word):
d7b28cc @vpekar Initial commit
authored
298 line = '\t'.join([
c41b6f9 @vpekar Refactored
authored
299 get_index(unicode(idx)),
300 self.word.get(idx, ''),
301 self.lemma.get(idx, ''),
302 self.tag.get(idx, ''),
303 self.rel.get(idx, ''),
304 unicode(self.dep.get(idx, '')),
d7b28cc @vpekar Initial commit
authored
305 ])
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
306 print line.encode('latin1')
c41b6f9 @vpekar Refactored
authored
307
d7b28cc @vpekar Initial commit
authored
308 def print_tree(self, mode='penn'):
309 """Prints the parse.
310 @param mode: penn/typedDependenciesCollapsed/etc
311 """
c41b6f9 @vpekar Refactored
authored
312 tree_print = TreePrint(mode)
313 tree_print.printTree(self.parse)
314
d7b28cc @vpekar Initial commit
authored
315
316 class StanfordParser:
c41b6f9 @vpekar Refactored
authored
317
d7b28cc @vpekar Initial commit
authored
318 TAG = re.compile(r'<[^>]+>')
c41b6f9 @vpekar Refactored
authored
319
320 def __init__(self, parser_file,
d7b28cc @vpekar Initial commit
authored
321 parser_options=['-maxLength', '80', '-retainTmpSubcategories']):
c41b6f9 @vpekar Refactored
authored
322
323 """@param parser_file: path to the serialised parser model
324 (e.g. englishPCFG.ser.gz)
d7b28cc @vpekar Initial commit
authored
325 @param parser_options: options
326 """
c41b6f9 @vpekar Refactored
authored
327
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
328 assert os.path.exists(parser_file)
329 options = Options()
330 options.setOptions(parser_options)
331 self.lp = LexicalizedParser.getParserFromFile(parser_file, options)
d7b28cc @vpekar Initial commit
authored
332 tlp = PennTreebankLanguagePack()
333 self.gsf = tlp.grammaticalStructureFactory()
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
334 self.lemmer = Morphology()
335 self.word_token_factory = WordTokenFactory()
336 self.parser_query = None
337
338 def get_most_probable_parses(self, text, kbest=2):
339 """Yield kbest parses of a sentence along with their probabilities.
340 """
341 if not self.parser_query:
342 self.parser_query = self.lp.parserQuery()
c41b6f9 @vpekar Refactored
authored
343
533cc47 @vpekar Added tests
authored
344 response = self.parser_query.parse(self.tokenize(text))
c41b6f9 @vpekar Refactored
authored
345
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
346 if not response:
c41b6f9 @vpekar Refactored
authored
347 raise Exception("The sentence cannot be parsed: %s" % text)
348
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
349 for candidate_tree in self.parser_query.getKBestPCFGParses(kbest):
c41b6f9 @vpekar Refactored
authored
350 py_sentence = PySentence(self, candidate_tree.object())
351 prob = math.e ** candidate_tree.score()
352 yield py_sentence, prob
d7b28cc @vpekar Initial commit
authored
353
c41b6f9 @vpekar Refactored
authored
354 def parse(self, sentence):
d7b28cc @vpekar Initial commit
authored
355 """Strips XML tags first.
356 @param s: the sentence to be parsed, as a string
357 @return: a Sentence object
358 """
c41b6f9 @vpekar Refactored
authored
359 sentence = self.TAG.sub('', sentence)
84b03f0 @vpekar Updated to use Stanford parser v.3.3.1
authored
360 tokens = [unicode(x) for x in self.tokenize(sentence)]
361 parse = self.lp.apply(Sentence.toWordList(tokens))
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
362 return PySentence(self, parse)
c41b6f9 @vpekar Refactored
authored
363
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
364 def tokenize(self, text):
365 reader = StringReader(text)
366 tokeniser = PTBTokenizer(reader, self.word_token_factory, None)
367 tokens = tokeniser.tokenize()
368 return tokens
c41b6f9 @vpekar Refactored
authored
369
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
370 def parse_xml(self, text):
d7b28cc @vpekar Initial commit
authored
371 """Tokenise the XML text, remember XML positions, and then parse it.
372 """
c41b6f9 @vpekar Refactored
authored
373
d7b28cc @vpekar Initial commit
authored
374 # build a plain-text token list and remember tag positions
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
375 xml_tags = {}
d7b28cc @vpekar Initial commit
authored
376 sent = []
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
377
c41b6f9 @vpekar Refactored
authored
378 for token in self.tokenize(text):
b338545 @vpekar Refactored tests
authored
379 token = unicode(token).replace(u'\xa0', ' ')
49218df @vpekar Updated to Stanford parser 3.5.0, fixes #2
authored
380
d7b28cc @vpekar Initial commit
authored
381 if token.startswith('<'):
382 cur_size = len(sent)
c41b6f9 @vpekar Refactored
authored
383 xml_tags[cur_size] = xml_tags.get(cur_size, [])
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
384 xml_tags[cur_size].append(token)
d7b28cc @vpekar Initial commit
authored
385 else:
386 sent.append(token)
c41b6f9 @vpekar Refactored
authored
387
d7b28cc @vpekar Initial commit
authored
388 # parse
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
389 parse = self.lp.apply(Sentence.toWordList(sent))
c41b6f9 @vpekar Refactored
authored
390
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
391 return PySentence(self, parse, xml_tags)
d7b28cc @vpekar Initial commit
authored
392
c41b6f9 @vpekar Refactored
authored
393
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
394 def parse_xml_example(sp):
395 print 'Parsing XML text'
c41b6f9 @vpekar Refactored
authored
396 text = 'The quick brown <tag attr="term">fox<!-- this is a comment --></tag> jumped over the lazy dog.'
397 print 'IN:', text
398 sentence = sp.parse_xml(text)
d7b28cc @vpekar Initial commit
authored
399 print 'OUT:'
400 sentence.print_table()
c41b6f9 @vpekar Refactored
authored
401 print '-' * 80
402
403
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
404 def parse_probabilities_example(sp):
d7b28cc @vpekar Initial commit
authored
405 print 'Parse probabilities\n'
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
406 text = 'I saw a man with a telescope.'
407 print 'IN:', text
c41b6f9 @vpekar Refactored
authored
408 for sentence, prob in sp.get_most_probable_parses(text, kbest=2):
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
409 print 'Probability:', prob
d7b28cc @vpekar Initial commit
authored
410 print 'Tree:'
c41b6f9 @vpekar Refactored
authored
411 sentence.print_table()
412 print '-' * 50
413 print '-' * 80
414
415
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
416 def subtrees_example(sp):
417 print 'Subtrees:'
418 text = 'I saw a man with a telescope.'
419 sentence = sp.parse(text)
d7b28cc @vpekar Initial commit
authored
420 for subtree in sentence.parse.subTrees():
421 print subtree
c41b6f9 @vpekar Refactored
authored
422 print '-' * 50
423 print '-' * 80
424
425
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
426 def get_dependencies_example(sp):
427 print 'Dependencies:'
428 text = 'I saw a man with a telescope.'
c41b6f9 @vpekar Refactored
authored
429 tmpl = 'Head: %s (%d); dependent: %s (%d); relation: %s'
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
430 sentence = sp.parse(text)
d7b28cc @vpekar Initial commit
authored
431 for td in sentence.gs.allTypedDependencies():
432 gov = td.gov()
433 gov_idx = gov.index()
434 dep = td.dep()
435 dep_idx = dep.index()
436 rel = td.reln()
c41b6f9 @vpekar Refactored
authored
437 print tmpl % (gov.value(), gov_idx, dep.value(), dep_idx, rel)
438 print '-' * 80
439
440
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
441 def get_common_path_example(sp):
c41b6f9 @vpekar Refactored
authored
442 tmpl = 'Least common node for "%s" and "%s": "%s"'
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
443 print 'Common path:'
533cc47 @vpekar Added tests
authored
444 text = 'The quick brown fox jumped over a lazy dog.'
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
445 print 'Text:', text
446 i = 4
447 j = 9
448 sentence = sp.parse(text)
449 lcn, shortest_path = sentence.get_least_common_node(i, j)
c41b6f9 @vpekar Refactored
authored
450 print tmpl % (sentence.word[i], sentence.word[j], sentence.word[lcn])
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
451 path = ' '.join([sentence.word[x] for x in sorted(shortest_path)])
452 print 'Path: %s' % path
c41b6f9 @vpekar Refactored
authored
453
454
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
455 if __name__ == '__main__':
c41b6f9 @vpekar Refactored
authored
456
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
457 # full path to parser file, e.g. englishPCFG.ser.gz
458 parser_file = sys.argv[1]
459 sp = StanfordParser(parser_file)
c41b6f9 @vpekar Refactored
authored
460
1f19dae @vpekar Upgraded to work with Stanford Parser 2.0
authored
461 parse_xml_example(sp)
462 parse_probabilities_example(sp)
463 subtrees_example(sp)
464 get_dependencies_example(sp)
465 get_common_path_example(sp)
Something went wrong with that request. Please try again.