In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
CS224N 2019-20: Homework 3
run.py: Run the dependency parser.
Sahil Chopra <schopra8@stanford.edu>
Haoshen Hong <haoshen@stanford.edu>
"""
from datetime import datetime
import os
import pickle
import math
import time
import argparse

from torch import nn, optim
import torch
from tqdm import tqdm

from parser_model import ParserModel
from utils.parser_utils import minibatches, load_and_preprocess_data, AverageMeter

parser = argparse.ArgumentParser(description='Train neural dependency parser in pytorch')
parser.add_argument('-d', '--debug', action='store_true', help='whether to enter debug mode')
args = parser.parse_args()


usage: ipykernel_launcher.py [-h] [-d]
ipykernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-1c80e27d-cd9f-4585-86e8-3828fa809fee.json


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [0]:
# -----------------
# Primary Functions
# -----------------
def train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005):
    """ Train the neural dependency parser.

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param output_path (str): Path to which model weights and results are written.
    @param batch_size (int): Number of examples in a single batch
    @param n_epochs (int): Number of training epochs
    @param lr (float): Learning rate
    """
    best_dev_UAS = 0


    ### YOUR CODE HERE (~2-7 lines)
    ### TODO:
    ###      1) Construct Adam Optimizer in variable `optimizer`
    ###      2) Construct the Cross Entropy Loss Function in variable `loss_func` with `mean`
    ###         reduction (default)
    ###
    ### Hint: Use `parser.model.parameters()` to pass optimizer
    ###       necessary parameters to tune.
    ### Please see the following docs for support:
    ###     Adam Optimizer: https://pytorch.org/docs/stable/optim.html
    ###     Cross Entropy Loss: https://pytorch.org/docs/stable/nn.html#crossentropyloss

    optimizer = optim.Adam(parser.model.parameters(), lr=lr)
    loss_func = nn.CrossEntropyLoss(reduction='mean') # reduction = 'mean' as default

    ### END YOUR CODE

    for epoch in range(n_epochs):
        print("Epoch {:} out of {:}".format(epoch + 1, n_epochs))
        dev_UAS = train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size)
        if dev_UAS > best_dev_UAS:
            best_dev_UAS = dev_UAS
            print("New best dev UAS! Saving model.")
            torch.save(parser.model.state_dict(), output_path)
        print("")


def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size):
    """ Train the neural dependency parser for single epoch.

    Note: In PyTorch we can signify train versus test and automatically have
    the Dropout Layer applied and removed, accordingly, by specifying
    whether we are training, `model.train()`, or evaluating, `model.eval()`

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param optimizer (nn.Optimizer): Adam Optimizer
    @param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function
    @param batch_size (int): batch size

    @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data
    """
    parser.model.train() # Places model in "train" mode, i.e. apply dropout layer
    n_minibatches = math.ceil(len(train_data) / batch_size)
    loss_meter = AverageMeter()

    with tqdm(total=(n_minibatches)) as prog:
        for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)):
            optimizer.zero_grad()   # remove any baggage in the optimizer
            loss = 0. # store loss for this batch here
            train_x = torch.from_numpy(train_x).long()
            train_y = torch.from_numpy(train_y.nonzero()[1]).long()

            ### YOUR CODE HERE (~5-10 lines)
            ### TODO:
            ###      1) Run train_x forward through model to produce `logits`
            ###      2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function.
            ###         This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss
            ###         between softmax(`logits`) and `train_y`. Remember that softmax(`logits`)
            ###         are the predictions (y^ from the PDF).
            ###      3) Backprop losses
            ###      4) Take step with the optimizer
            ### Please see the following docs for support:
            ###     Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step

            logits = parser.model.forward(train_x)
            loss = loss_func(logits, train_y)
            loss.backward()
            optimizer.step()


            ### END YOUR CODE
            prog.update(1)
            loss_meter.update(loss.item())

    print ("Average Train Loss: {}".format(loss_meter.avg))

    print("Evaluating on dev set",)
    parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer
    dev_UAS, _ = parser.parse(dev_data)
    print("- dev UAS: {:.2f}".format(dev_UAS * 100.0))
    return dev_UAS




In [3]:
%cd /content/drive/My Drive/cs224n/assignment3
!python run.py -d

/content/drive/My Drive/cs224n/assignment3
INITIALIZING
Loading data...
took 1.98 seconds
Building parser...
took 0.03 seconds
Loading pretrained embeddings...
took 2.29 seconds
Vectorizing data...
took 0.05 seconds
Preprocessing training data...
took 1.22 seconds
took 0.00 seconds

TRAINING
Epoch 1 out of 10
100% 48/48 [00:02<00:00, 16.59it/s]
Average Train Loss: 0.7357395738363266
Evaluating on dev set
125250it [00:00, 13823555.40it/s]
- dev UAS: 50.16
New best dev UAS! Saving model.

Epoch 2 out of 10
100% 48/48 [00:02<00:00, 18.63it/s]
Average Train Loss: 0.37260768314202625
Evaluating on dev set
125250it [00:00, 13730699.84it/s]
- dev UAS: 58.49
New best dev UAS! Saving model.

Epoch 3 out of 10
100% 48/48 [00:02<00:00, 18.85it/s]
Average Train Loss: 0.31117786280810833
Evaluating on dev set
125250it [00:00, 11407464.95it/s]
- dev UAS: 61.29
New best dev UAS! Saving model.

Epoch 4 out of 10
100% 48/48 [00:02<00:00, 18.67it/s]
Average Train Loss: 0.26819199261566
Evaluating on dev

In [4]:
%cd /content/drive/My Drive/cs224n/assignment3
!python run.py

/content/drive/My Drive/cs224n/assignment3
INITIALIZING
Loading data...
took 1.92 seconds
Building parser...
took 1.40 seconds
Loading pretrained embeddings...
took 2.73 seconds
Vectorizing data...
took 1.76 seconds
Preprocessing training data...
took 42.54 seconds
took 0.01 seconds

TRAINING
Epoch 1 out of 10
100% 1848/1848 [02:05<00:00, 14.68it/s]
Average Train Loss: 0.18117778356795833
Evaluating on dev set
1445850it [00:00, 48063265.32it/s]
- dev UAS: 83.86
New best dev UAS! Saving model.

Epoch 2 out of 10
100% 1848/1848 [02:05<00:00, 14.78it/s]
Average Train Loss: 0.11343337228174978
Evaluating on dev set
1445850it [00:00, 40205087.93it/s]
- dev UAS: 86.01
New best dev UAS! Saving model.

Epoch 3 out of 10
100% 1848/1848 [02:01<00:00, 15.21it/s]
Average Train Loss: 0.09942459968398466
Evaluating on dev set
1445850it [00:00, 48022920.80it/s]
- dev UAS: 87.14
New best dev UAS! Saving model.

Epoch 4 out of 10
100% 1848/1848 [02:04<00:00, 14.88it/s]
Average Train Loss: 0.09117837147

In [0]:
if __name__ == "__main__":
    debug = args.debug

    assert (torch.__version__.split(".") >= ["1", "0", "0"]), "Please install torch version >= 1.0.0"

    print(80 * "=")
    print("INITIALIZING")
    print(80 * "=")
    parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(debug)

    start = time.time()
    model = ParserModel(embeddings)
    parser.model = model
    print("took {:.2f} seconds\n".format(time.time() - start))

    print(80 * "=")
    print("TRAINING")
    print(80 * "=")
    output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now())
    output_path = output_dir + "model.weights"

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005)

    if not debug:
        print(80 * "=")
        print("TESTING")
        print(80 * "=")
        print("Restoring the best model weights found on the dev set")
        parser.model.load_state_dict(torch.load(output_path))
        print("Final evaluation on test set",)
        parser.model.eval()
        UAS, dependencies = parser.parse(test_data)
        print("- test UAS: {:.2f}".format(UAS * 100.0))
        print("Done!")
