<a href="https://colab.research.google.com/github/zapper59/NLP-Question-Answering/blob/master/panlp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%%shell
### Only needs to be run once per "runtime session"

git clone https://github.com/huggingface/pytorch-pretrained-BERT.git
cd pytorch-pretrained-BERT/
git checkout b8e2a9c5840e
python setup.py install
cd ..

git clone https://github.com/zapper59/NLP-Question-Answering.git
rm pytorch-pretrained-BERT/examples/run_squad.py
cp NLP-Question-Answering/bert_on_colab/run_squad.py pytorch-pretrained-BERT/examples/

In [0]:
### Setup environment variables
import os
os.environ['pdir'] = 'NLP-Question-Answering/bert_on_colab'
SQUAD_VERSION= 2

While training, you may see that the GPU's memory usage is getting high.
Just ignore this.

In [0]:
%%shell
### Train and also evalute on test set

python $pdir/format_data.py --v2

rm -fr out
python pytorch-pretrained-BERT/examples/run_squad.py \
  --bert_model bert-base-cased \
  --do_train \
  --do_predict \
  --train_file $pdir/training.json \
  --predict_file $pdir/testing.json \
  --train_batch_size 12 \
  --learning_rate 3e-5 \
  --num_train_epochs 20.0 \
  --max_seq_length 384 \
  --doc_stride 128 \
  --output_dir out \
  --max_answer_length 8 \
  --version_2_with_negative

### Save most recently trained model
rm -fr saved_model
mv out saved_model

In [0]:
### Print human friendly prediction results on test set
import json
from pprint import pprint

with open('NLP-Question-Answering/bert_on_colab/testing.json') as f:
  test = json.load(f)

if SQUAD_VERSION == 2:
  with open('saved_model/nbest_predictions.json') as f:
    nbest_preds = json.load(f)
    for i in nbest_preds:
      nbest_preds[i] = [{'text': p['text'], 'prob':p['probability']} for p in nbest_preds[i]]
else:
  with open('saved_model/predictions.json') as f:
    preds = json.load(f)

if SQUAD_VERSION == 2:
  golds = []
  for char in test['data']:
    for para in char['paragraphs']:
      qas = para['qas']
      for qa in qas:
        if not qa['is_impossible']:
          qid = qa['id']
          q = qa['question']
          gold = qa['answers'][0]['text']
          golds.append({'.question':q, 'gold': gold, 'preds': nbest_preds[qid][0:5]})
else:
  golds = []
  for char in test['data']:
    for para in char['paragraphs']:
      for qa in para['qas']:
        golds.append({'.question': qa['question'], 'gold': qa['answers'][0]['text']})

    i = 0
    for p in preds:
      golds[i]['pred'] = preds[p]['text']
      golds[i]['prob'] = preds[p]['probability']
      i += 1

pprint(golds)


From this point on, training is done and you only need run the code blocks below to test a query.

In [0]:
%%shell

query="Where was Jon Snow born and raised?"
python $pdir/format_query.py "$query"

### Query using model in saved_model

rm -fr out
python pytorch-pretrained-BERT/examples/run_squad.py \
  --bert_model bert-base-cased \
  --do_predict \
  --predict_batch_size 32 \
  --predict_file $pdir/query.json \
  --max_seq_length 384 \
  --doc_stride 32 \
  --output_dir out \
  --max_answer_length 8 \
  --only_predict \
  --saved_model_dir saved_model \
  --version_2_with_negative

In [0]:
### Get answer
import collections
import json
import operator

with open("out/predictions.json") as f:
    preds = json.load(f)

freq = collections.Counter()
sumscores = {}
minscores = {}
maxscores = {}
if SQUAD_VERSION == 2:
  for i in preds:
    if preds[i]:
      freq[preds[i]] += 1
else:
  for i in preds:
      ans = preds[i]['text']
      score = preds[i]['probability']
      freq[ans] += 1
      if ans in sumscores:
          sumscores[ans] += score
      else:
          sumscores[ans] = score
      if ans in minscores:
          minscores[ans] = min(minscores[ans], score)
      else:
        minscores[ans] = score
      if ans in maxscores:
          maxscores[ans] = max(maxscores[ans], score)
      else:
        maxscores[ans] = score

  avgscores = {}
  for text in freq:
      avgscores[text] = sumscores[text]/freq[text]

  top_avgscores = sorted(avgscores.items(), key=operator.itemgetter(1), reverse=True)
  top_minscores = sorted(minscores.items(), key=operator.itemgetter(1), reverse=True)
  top_maxscores = sorted(maxscores.items(), key=operator.itemgetter(1), reverse=True)
  print(top_avgscores)
  print(top_minscores)
  print(top_maxscores)


print(freq.most_common(50))


[('Winterfell', 6), ('Daenerys Targaryen', 4), ('Aegon Targaryen', 4), ('Daenerys', 2), ('Aemon Targaryen', 2), ('Maester Luwin', 1), ('Wylla', 1), ('Jon Arryn', 1), ('Arya', 1), ('Aegon', 1)]
