# Homework: Competitive Grammar Writing

In [1]:
%load_ext autoreload
%autoreload 2
from pcfg_parse_gen import Pcfg, PcfgGenerator, CkyParse
import nltk

def print_tree(tree_string):
    tree_string = tree_string.strip()
    tree = nltk.Tree.fromstring(tree_string)
    tree.pretty_print()

def draw_tree(tree_string):
    tree_string = tree_string.strip()
    tree = nltk.Tree.fromstring(tree_string)
    tree.draw()

## Parsing sentences with your grammar

While you are developing your grammar you should parse with your grammar both example sentences and samples from your and other grammars.

In [28]:
# parse_gram = Pcfg(["S1.gr","S2.gr","Vocab.gr"])
parse_gram = Pcfg(["grammars/devset_s1.gr","grammars/devset_s2.gr","grammars/devset_vocab.gr"])

parser = CkyParse(parse_gram, beamsize=0.0001, verbose=0)

In [29]:
ce, trees = parser.parse_sentences(["Arthur suggested that the castle be carried ."])
print("-cross entropy: {}".format(ce))
for tree_string in trees:
    print_tree(tree_string)

-cross entropy: -8.724367379546793
                          TOP                                    
                           |                                      
                           S1                                    
                           |                                      
                          ROOT                                   
                           |                                      
                           S                                     
                   ________|_________________________             
                  NP                                 |           
   _______________|____                              |            
  |                    VP                         S|<VP-.>       
  |         ___________|___                     _____|_______     
  |        |               PP                  VP            |   
  |        |       ________|____            ___|_____        |    
  NP       |      |             NP

#-cross entropy (bits/word): -8.72437


Use `parse_file` to parse a file of sentences.

## Generating sentences with your grammar

While you are developing your grammar you should generate sentences with your grammar
to check what your grammar is doing. Try to write your grammars to that it will 
generate hard to parse sentences.

## Compare generating sentences

In [33]:
gen_gram = Pcfg(["grammars/moresentences_devset_s1.gr","grammars/moresentences_devset_vocab.gr"])
gen = PcfgGenerator(gen_gram)
for _ in range(20):
    print(" ".join(gen.generate()))

the new power take Grail .
can't -
Father ! have n't uh of Camelot .
the breakfast fooling up
me have is in breadth I feel burned ? .
of Gable had , for n't fatal Greetings .


#reading grammar file: grammars/moresentences_devset_s1.gr
#Ignored cycle NP -> NP
#reading grammar file: grammars/moresentences_devset_vocab.gr
ERROR: word Tower was sampled but is not allowed
ERROR: word KNIGHT was sampled but is not allowed


no far are is Well Defeat two
not !
's I
does Grail You will is delirious -- and exciting
Man , Oh ? - her -- attend my nervous training of .
I 's the !
nice to are only tell .
away
frighten lovely silly travellers his ways
do bitching .
a inherent rescue a coconut , Grenade Go what Oh , wield actually ! like It ...
You speaking it !
you
Heh I of 's heads . -- interested Lancelot . away but fight as him


In [34]:
gen_gram = Pcfg(["grammars/devset_s1.gr","grammars/devset_vocab.gr"])
gen = PcfgGenerator(gen_gram)
for _ in range(20):
    print(" ".join(gen.generate()))

are the country No sweet
an syndicalist
charged . , and a nobody ! Right voluntarily . told shit from of on No of I And seven tropical bridges forget quiet and seemed You're haste ! !
have Look l , but him from Sir -- . have
basic , swallow Please Prince Thursday 's Well ? ;
a examine ? shall to 'd currently did your great Please .
Where do No Camauuuugh 's Come !
he know his Dingo up .
Well
who can I'm were I ? , the luck stand in my I .
Divine
leave you say in The Burn Ramper I and who
the Haw can't of God !
And I the ?
Tis repressed if from !
Who take Not !
a bad answer remember you off Of the Scene Is Who is Well
All daft
the hello brave you You
did keep . , and the vache 're . !


#reading grammar file: grammars/devset_s1.gr
#Ignored cycle NP -> NP
#reading grammar file: grammars/devset_vocab.gr
#Ignored cycle ( -> (
#Ignored cycle ) -> )


In [35]:
gen_gram = Pcfg(["grammars/dev_and_exp_s1.gr","grammars/dev_and_exp_vocab.gr"])
gen = PcfgGenerator(gen_gram)
for _ in range(20):
    print(" ".join(gen.generate()))

the basic coconut blow So did Iiiiives ?
are buggering of The east
my quest Well Shut here - , Sir ,
I'm but Sir 's have burned a wood for pointy .
why were the separate felt does , why He was first up not second .
Camelot Now no syndicalist
it Will much just Arthur raised .
very , not ...
kneecaps - I ! been
what join only of you !
is if your open
buy
a Dennis Uh 's King !
currently . , I buggered ?
a
Oh Now the Lancelot We're suggesting it .
far by this !
the Man ridden will , more laurels ? in my Quiet tackle by the open One , AM resting of No harder !
he Aaaaugh his sayin of A wearier !
using near you , and knows the interested Heh


#reading grammar file: grammars/dev_and_exp_s1.gr
#Ignored cycle NP -> NP
#reading grammar file: grammars/dev_and_exp_vocab.gr
#Ignored cycle ( -> (
#Ignored cycle ) -> )


## Test on different parser

In [54]:
def test_grammar(s1, s2, vocab, test):
    parse_gram = Pcfg([s1, s2, vocab])
    parser = CkyParse(parse_gram, beamsize=0.0001, verbose=0)
    ce, trees = parser.parse_file(test)

### On `text/example_sentences.txt`

In [62]:
test_grammar("grammars/devset_s1.gr", "grammars/devset_s2.gr", "grammars/devset_vocab.gr", "text/example_sentences.txt")

#-cross entropy (bits/word): -8.92096


In [63]:
test_grammar("grammars/devset_rnn_s1.gr", "grammars/devset_rnn_s2.gr", "grammars/devset_rnn_vocab.gr", "text/example_sentences.txt")

#No parses found for: they migrate precisely because they know they will grow .
#-cross entropy (bits/word): -13.819


In [64]:
test_grammar("grammars/devset_stanford_s1.gr", "grammars/devset_stanford_s2.gr", "grammars/devset_stanford_vocab.gr", "text/example_sentences.txt")

#-cross entropy (bits/word): -9.15619


### On `text/more_examples.txt`

In [65]:
test_grammar("grammars/devset_s1.gr", "grammars/devset_s2.gr", "grammars/devset_vocab.gr", "text/more_examples.txt")

#-cross entropy (bits/word): -9.57106


In [66]:
test_grammar("grammars/devset_rnn_s1.gr", "grammars/devset_rnn_s2.gr", "grammars/devset_rnn_vocab.gr", "text/more_examples.txt")

#No parses found for: they migrate precisely because they know they will grow .
#-cross entropy (bits/word): -11.2527


In [67]:
test_grammar("grammars/devset_stanford_s1.gr", "grammars/devset_stanford_s2.gr", "grammars/devset_stanford_vocab.gr", "text/more_examples.txt")

#-cross entropy (bits/word): -9.70726


## Test with different training set

### On `text/example_sentences.txt`

In [75]:
test_grammar("grammars/devset_s1.gr", "grammars/devset_s2.gr", "grammars/devset_vocab.gr", "text/example_sentences.txt")

#-cross entropy (bits/word): -8.92096


In [85]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/example_sentences.txt")

#-cross entropy (bits/word): -7.87721


In [77]:
test_grammar("grammars/moresentences_devset_s1.gr", "grammars/moresentences_devset_s2.gr", "grammars/moresentences_devset_vocab.gr", "text/example_sentences.txt")

#-cross entropy (bits/word): -9.03043


In [78]:
test_grammar("grammars/moresentences_devset_examplesentences_s1.gr", "grammars/moresentences_devset_examplesentences_s2.gr", "grammars/moresentences_devset_examplesentences_vocab.gr", "text/example_sentences.txt")

#-cross entropy (bits/word): -8.31589


### On `text/more_examples.txt`

In [79]:
test_grammar("grammars/devset_s1.gr", "grammars/devset_s2.gr", "grammars/devset_vocab.gr", "text/more_examples.txt")

#-cross entropy (bits/word): -9.57106


In [80]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/more_examples.txt")

#-cross entropy (bits/word): -8.83074


In [81]:
test_grammar("grammars/moresentences_devset_s1.gr", "grammars/moresentences_devset_s2.gr", "grammars/moresentences_devset_vocab.gr", "text/more_examples.txt")

#-cross entropy (bits/word): -9.67019


In [82]:
test_grammar("grammars/moresentences_devset_examplesentences_s1.gr", "grammars/moresentences_devset_examplesentences_s2.gr", "grammars/moresentences_devset_examplesentences_vocab.gr", "text/more_examples.txt")

#-cross entropy (bits/word): -9.04387


## Test using different s2

### On ` text/example_sentences.txt`

In [83]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_bi_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/example_sentences.txt")

FileNotFoundError: [Errno 2] No such file or directory: 'grammars/devset_and_examplesentences_bi_s2.gr'

In [None]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_uni_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/example_sentences.txt")

In [None]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_0_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/example_sentences.txt")

### On ` text/more_examples.txt`

In [None]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_bi_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/more_examples.txt")

In [None]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_uni_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/more_examples.txt")

In [None]:
test_grammar("grammars/devset_and_examplesentences_s1.gr", "grammars/devset_and_examplesentences_0_s2.gr", "grammars/devset_and_examplesentences_vocab.gr", "text/more_examples.txt")