In [3]:
import nltk
import json
import numpy as np
import pickle

In [4]:
emb_mat = np.load("word_embedding_matrix.npy").astype(np.float)

with open("vocabulary.pickle", "rb") as input_file:
    voc = pickle.load(input_file)
    
with open("testing_data.pickle", "rb") as input_file:
    testing_data = pickle.load(input_file)

In [6]:
def get_word_embedding(word, voc, e_mat):
    if word in voc:
        return e_mat[voc[word], :]
    else:
        return e_mat[0, :]

def get_tokenize_sentences(documents):
    sentences = []
    
    for doc in documents:
        sentences.extend(nltk.sent_tokenize(doc))

    sentences = [nltk.word_tokenize(sent) for sent in sentences]
    return sentences

def get_sent_embedding(sent, voc, emb_mat):
    sent_embedding = np.zeros((len(sent), 50))
    for i, word in enumerate(sent):
        word_embedding = get_word_embedding(word, voc, emb_mat)
        sent_embedding[i, :] = word_embedding

    sent_embedding = np.mean(sent_embedding, axis=0)
    return sent_embedding
    
def cos_sim(a, b):
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)

In [98]:
test_ans = []

for t in testing_data[:3]:
    ans = dict()
    tokenize_sentences = get_tokenize_sentences(t['text'])
    tokenize_question = get_tokenize_sentences([t['question']])
    q_emb = get_sent_embedding(tokenize_question[0], voc, emb_mat)

    sims = np.zeros((len(tokenize_sentences)))
    for i, sent in enumerate(tokenize_sentences):
        s_emb = get_sent_embedding(sent, voc, emb_mat)
        sims[i] = cos_sim(q_emb, s_emb)
    
    print(sims)
    sentences = []
    for para in t['text']:
        sentences.extend(nltk.sent_tokenize(para))
    
    ans["id"] = t['id']
    ans['question'] = t['question']
    ans["text"] = sentences[np.argmax(sims)]
    test_ans.append(ans)

[0.93490492 0.90168545 0.92828754 0.91339167 0.86186674 0.82032573
 0.91350028 0.9100018 ]
[0.91581718 0.88284843 0.90909996 0.87392492 0.83246396 0.92219867
 0.91817481 0.90499963]
[0.9653651  0.94495417 0.98542792 0.97842912 0.94061607 0.9662019
 0.89187433 0.95549378 0.9673979  0.97011311 0.97152908 0.8997807 ]


In [99]:
file_name = "devel.json"
with open(file_name) as json_data:
    devel_set = json.load(json_data)

In [110]:
print(testing_data[2000])

{'question': 'What can adult learners obtain through studying at the Edwards Campus of the University of Kansas?', 'id': 2000, 'text': ['The University of Kansas School of Business is a public business school located on the main campus of the University of Kansas in Lawrence, Kansas.']}


In [7]:
def traverse(tree):
    "recursively traverses an nltk.tree.Tree to find named entities"

    items = []

    if hasattr(tree, 'label') and tree.label:
        if tree.label() == 'NP'or tree.label() == 'NUM':
            items.append(' '.join([child[0] for child in tree]))
        else:
            for child in tree:
                items.extend(traverse(child))

    return items

In [29]:
from nltk.tag import StanfordNERTagger
import nltk
import re

grammar = r"""
  NUM:
    {<CD>}
  NP:
    {<DT>?<JJ>*<NN.*>+}
"""

snt = StanfordNERTagger('/home/ubuntu/stanford-ner-2018-02-27/classifiers/english.all.3class.distsim.crf.ser.gz') 
cp = nltk.RegexpParser(grammar)

for t in testing_data[2638:2639]:
    for sent in t['text']:
        sent = sent.strip(".")
        sent = re.sub(r'[,;":\']', '', sent)
        print(sent)
        sent = nltk.word_tokenize(sent)
        pos_sent = nltk.pos_tag(sent)
        tree = cp.parse(pos_sent)
        print(tree)
        print(traverse(tree))
        entities = snt.tag(sent)
        print(entities)

        result = []
        for e in entities:
            if e[1] != 'O':
                result.append(e)
        print(result)

Starting in the coal mines by the mid-19th century elevators were operated with steam power and were used for moving goods in bulk in mines and factories
(S
  Starting/VBG
  in/IN
  (NP the/DT coal/NN mines/NNS)
  by/IN
  (NP the/DT mid-19th/JJ century/NN elevators/NNS)
  were/VBD
  operated/VBN
  with/IN
  (NP steam/NN power/NN)
  and/CC
  were/VBD
  used/VBN
  for/IN
  moving/VBG
  (NP goods/NNS)
  in/IN
  (NP bulk/NN)
  in/IN
  (NP mines/NNS)
  and/CC
  (NP factories/NNS))
['the coal mines', 'the mid-19th century elevators', 'steam power', 'goods', 'bulk', 'mines', 'factories']
[('Starting', 'O'), ('in', 'O'), ('the', 'O'), ('coal', 'O'), ('mines', 'O'), ('by', 'O'), ('the', 'O'), ('mid-19th', 'O'), ('century', 'O'), ('elevators', 'O'), ('were', 'O'), ('operated', 'O'), ('with', 'O'), ('steam', 'O'), ('power', 'O'), ('and', 'O'), ('were', 'O'), ('used', 'O'), ('for', 'O'), ('moving', 'O'), ('goods', 'O'), ('in', 'O'), ('bulk', 'O'), ('in', 'O'), ('mines', 'O'), ('and', 'O'), ('facto

In [113]:
import re

with open('test.csv', 'w') as f:
    f.write("id,answer\n")
    for t in testing_data:
        pos_sent = nltk.pos_tag(nltk.word_tokenize(t['text'][0]))
        tree = cp.parse(pos_sent)
        ans = traverse(tree)
        entities = snt.tag(traverse(tree))
        
        ans = " ".join(ans)
        ans = re.sub(r'[^\w\s]', '', ans)
        print(ans)
        f.write(str(t['id']) + ',' + ans + '\n')

Modern web browsers combination standardsbased and de facto HTML and XHTML  same way browsers 
browser extension is computer program extends functionality web browser 
is browser s layout engine markup interactive document  process   
web browser  browser  is software application  presenting  and information resources World Wide Web 
browser Firefox  respectable following beta stage development  release Firefox 10 late 2004  Firefox  versions  7  browser use 
Windows  Internet Explorer dominance web browser market  Internet Explorer usage share 95  2002 
first web browser 1990 Sir Tim BernersLee 
Early web browsers simple version HTML 
most recent major entrant browser market is Chrome  September 2008 
first web browser 1990 Sir Tim BernersLee 
Microsoft Corp v Commission
Internet Explorer available Mac 
Mozilla makes enough money deal it does users Firefox 
January 2009  European Commission it bundling Internet Explorer Windows systems Microsoft   Microsoft s tying Internet Explorer W

Chan is when  
present Thupten Gyatso  President French Tibetan community  who proTibet demonstrators  calm  nonviolent  peaceful  
Chan is when  
newspaper Libération   police much Chinese freedom expression 
route torch six continents March 2008 May 2008 August 2008 
article Macao Daily News list torchbearers Macanese and there many nonathletes torchbearers 
torch Chinese security officials several times Paris leg security reasons  and protest Paris 
Demonstrators and counterdemonstrators Australian Federal Police 
Chan is when  
Several onlookers Chan   What kind Chinese you  
One torchbearer due flight delay 
response demonstrations  editorial People s Daily Chinese people   their  patriotic enthusiasm calmly and  and patriotic aspiration orderly and legal manner  
torchbearers Kiran Bedi  Soha Ali Khan  Sachin Tendulkar and Bhaichung Bhutia event  official Chinese website relay  Indian torchbearers spirit Olympics   and torchbearers Manavjit Singh Sandhu  Abhinav Bindra  Ayaan Ali

There several protests torch relay route 
Chan is when  
Chan is when  
total 120 torchbearers event casino tycoon Stanley Ho 
Indian national football captain  Baichung Bhutia part Indian leg torch relay  concerns Tibet 
London relay torch what BBC  mobile protective ring  
Chan is when  
Several onlookers Chan   What kind Chinese you  
Chan is when  
last time Olympic torch relay Malaysia 1964 Tokyo edition 
Beijing Olympic Organizing Committee out team 30 unarmed attendants People s Armed Police flame its journey 
She praise ethnic Chinese worldwide  Angel Wheelchair  
torch is lit 65 kilometre hour  37 mile hour  winds  and rain 50 millimetres  2 inches  hour 
Several onlookers Chan   What kind Chinese you  
public parking lot where relay 
start McCovey Cove  where Norman Bellingham US Olympic Committee torch first torchbearer  Chinese 1992 Olympic champion swimmer Lin Li 
Thailand  April 18 relay Bangkok Olympic flame s first visit Thailand 
Kazakhstan  first torchbearer Almaty  w

city s name is  Soton  or  Soton   and resident Southampton is Sotonian 
certain times year  Queen Mary 2  Queen Elizabeth and Queen Victoria Southampton same time  event Arrival Three Queens  
Southampton has two large live music venues  Mayflower Theatre  Gaumont Theatre  and Guildhall 
Council elections early May one third seats  one councillor ward   fouryear term  there elections three years four 
Unilink passes West Quay directions  and Wilts  Dorset drop passengers and them up  series bus stands road 
quarter jobs available city health and education sector 
third ferry is Hythe Ferry  passenger service Hythe other side Southampton Water 
Royal Charter 1952 upgraded University College Highfield University Southampton 
Town Quay is original public quay  and dates 13th century 
Buses majority local public transport 
They top flight English football  First Division  first time 1966  eight years 
district remains part Hampshire ceremonial county 
Innovative buildings purpose West Qua

There three fire stations city boundaries St Mary s  Hightown and Redbridge 
his 1854 book  Cruise Steam Yacht North Star  John Choules Southampton   I town more beautiful Main Street Southampton  it Oxford 
King Henry s departure Battle Agincourt 1415  ringleaders  Southampton Plot  Richard  Earl Cambridge  Henry Scrope  3rd Baron Scrope Masham  and Sir Thomas Grey Hetonwere high treason and what is Red Lion public house High Street 
Southampton City Council consists 48 councillors  3 16 wards 
city hockey club  Southampton Hockey Club  1938  is one largest and regarded clubs Hampshire  7 senior men s and 5 senior ladies weekly basis boys  and girls teams 6 upwards 
Commercial radio stations city Breeze  Saint and Hot adult contemporary music  Capital  Power FM and Galaxy and popular music  105 and Heart Hampshire  latter Ocean FM and adult contemporary music  and 106 Jack FM  wwwjackradiocom   Coast 106 
There is controversy comparative crime statisitics due inconsistencies different

His contemporary Saadia Gaon Ashkenaz Saquliba or Slavic territories  and such usage lands tribes Slavs  and Eastern and Central Europe 
2006  study Behar et al  what time highresolution analysis haplogroup K  mtDNA   40  current Ashkenazi population is four women  or  founder lineages    HebrewLevantine mtDNA pool  Middle East 1st and 2nd centuries CE 
late Middle Ages center gravity Ashkenazi population  and its traditional cultural life   German lands Poland and Lithuania  presentday Belarus and Ukraine  
 place origin  Ashkenazi Jews same genetic cohort  is  Ashkenazi Jew s ancestors Poland  Russia  Hungary  Lithuania  or other place historical Jewish population  they same ethnic group 
2006  study Behar et al  what time highresolution analysis haplogroup K  mtDNA   40  current Ashkenazi population is four women  or  founder lineages    HebrewLevantine mtDNA pool  Middle East 1st and 2nd centuries CE 
Human geneticists genetic variations high frequencies Ashkenazi Jews  but general

Reform Judaism  does minhagim  originate Ashkenazi Jews 
1843  John Couch Adams work orbit Uranus data he 
motion Sun relation barycentre Solar System  11 July Neptune its exact discovery position relation Sun  more common heliocentric coordinate system is  discovery longitude 12 July 2011 
focus Neptune and its largest moon Triton 2029 
Objects resonance complete 2 orbits 3 Neptune  and plutinos largest Kuiper belt objects  Pluto  is them 
difference flow direction is  skin effect  and due deeper atmospheric processes 
Neptune moves opposite side Sun  south pole and north pole  methane release north pole 
right his discovery  Le Verrier name Neptune new planet  French Bureau des Longitudes 
large quadrupole moment Neptune result offset planet s centre and geometrical constraints field s dynamo generator 
earliest recorded observations telescope  Galileo s drawings 28 December 1612 and 27 January 1613  contain points up what is position Neptune 
Neptune is 17 times mass Earth and is mo

 Justice Jackson   T  good grounds present legislation 
Jefferson s metaphor wall separation has US Supreme Court 
Lemon v Kurtzman  403 US 602  1971   court Pennsylvania state policy salaries and related costs teachers secular subjects private religious schools Establishment Clause 
They nonconformists Puritans  who Protestant Christians religious persecution Anglican King England 
lone dissenter  Justice Potter Stewart  court s embrace  wall separation  metaphor   I Court s task  areas constitutional adjudication  is uncritical invocation metaphors  wall separation   phrase Constitution  
Most important  pervasive secularism American public life   religion private sphere 
decision  four dissents  state law funding transportation students religious schools  majority opinion  Justice Hugo Black  and dissenting opinions  Justice Wiley Blount Rutledge and Justice Robert H Jackson  Constitution has  wall church and state  or  separation Church State   their disagreement case state funding

Pliny Elder mentions artist Sosus Pergamon name  his mosaics food floor feast and group doves bowl 
monastic communities Judean Desert their monasteries mosaic floors 
5thcentury building Huldah Samaritan synagogue 
heyday mosaic making Sicily age independent Norman kingdom 12th century 
There similar crosses apses Hagia Sophia Church Thessaloniki and Church Dormition Nicaea 
single most important piece Byzantine Christian mosaic art East is Madaba Map  542 and 570 floor church Saint George Madaba  Jordan 
similar mosaic  Coronation Virgin  decorates apse Santa Maria Maggiore 
Tsromi tesserae visible walls 7thcentury church but faint lines hint original scheme 
 carpetlike mosaic floor 1949 Bethany  early Byzantine church Lazarium 333 and 390 
most important one 1990 
Jerusalem its many holy places highest concentration mosaiccovered churches but few them subsequent waves destructions 
Southern Italy part Norman kingdom but great mosaics area fine mosaic pavement Otranto Cathedral 1166

KeyboardInterrupt: 