In [0]:
%cd -q ./data/
!mkdir -p pos_chunk_ner/
!wget -q https://raw.githubusercontent.com/Franck-Dernoncourt/NeuroNER/master/neuroner/data/conll2003/en//test.txt -P pos_chunk_ner/
!wget -q https://raw.githubusercontent.com/Franck-Dernoncourt/NeuroNER/master/neuroner/data/conll2003/en/valid.txt -P pos_chunk_ner/
!wget -q https://raw.githubusercontent.com/Franck-Dernoncourt/NeuroNER/master/neuroner/data/conll2003/en/train.txt -P pos_chunk_ner/
!python format_data.py --input_path pos_chunk_ner/ --output_path pos_chunk/
!python format_data.py --input_path pos_chunk_ner/ --output_path pos_ner/ --ner
%cd -q ../
!mkdir -p result/
save_dir = 'result/'

In [0]:
import pandas as pd
import os
import torch
from data import Corpus

argsdata = 'data/pos_chunk/'
corpus = Corpus(argsdata)

In [0]:
target_data = (corpus.pos_train, corpus.chunk_train)
valid_target_data = (corpus.pos_valid, corpus.chunk_valid)
test_target_data = (corpus.pos_test, corpus.chunk_test)

In [0]:
filename = save_dir + 'pos-chunk'
modes = [''] + ['--auxiliary --mode '+s for s in ['"Projection"']]
parameters = [(seed, lam, alpha, mode)
              for seed in range(50)
              for lam in [0., 0.01, 0.03, 0.1, 0.3, 1., 3., 10., 30., 100.]
              for alpha in [1., 0.01]
              for mode in modes]

parameters = [x for x in parameters if (x[3] != '' or x[2] == 1.)]
parameters = [x for x in parameters if (x[1] != 0. or x[3] == '')]

i = max(0, len(os.listdir(save_dir))-2)
while i < len(parameters):
    seed, lam, alpha, mode = parameters[i]
    !python main.py --seed {seed} --lam {lam} --alpha {alpha} --data 'data/pos_chunk/' --emsize 300 --npos_layers 2 --nchunk_layers 2 --nhid 128 --batch_size 128 --seq_len 10 --cuda --train_mode 'Joint' --epochs 300 {mode} --log_interval 20 --save "{filename}"
    i += 1

In [0]:
modes = ['--auxiliary --mode '+s for s in ['"Weighted cosine"', '"Unweighted cosine"', '"Orthogonal"']]
parameters2 = [(seed, lam, alpha, mode)
              for seed in range(50)
              for alpha in [1., 0.01]
              for lam in [0.01, 0.03, 0.1, 0.3, 1., 3., 10., 30., 100.]
              for mode in modes]

i = max(0, len(os.listdir(save_dir))-2-len(parameters))
while i < len(parameters2):
    seed, lam, alpha, mode = parameters2[i]
    !python main.py --seed {seed} --lam {lam} --alpha {alpha} --data 'data/pos_chunk/' --emsize 300 --npos_layers 2 --nchunk_layers 2 --nhid 128 --batch_size 128 --seq_len 10 --cuda --train_mode 'Joint' --epochs 300 {mode} --log_interval 20 --save "{filename}"
    i += 1

In [0]:
files = [save_dir+f for f in os.listdir(save_dir) if 'seed' in f]
results_dict = {'lam':[], 'alpha':[], 'mode':[], 'run':[], 'accuracy':[]}
for i in range(len(files)):
    lam = float(files[i].split('_lam')[1].split('_')[0])
    alpha = float(files[i].split('_alpha')[1].split('_')[0])
    run = int(files[i].split('_seed')[1].split('_')[0])
    mode = files[i].split('_mode-')[1].split('_')[0].replace('_', ' ')
    if lam == 0:
        mode = 'Single task'
    accuracy = float(torch.load(files[i])['test_accuracies'][0][1])
    results_dict['lam'].append(lam)
    results_dict['alpha'].append(alpha)
    results_dict['run'].append(run)
    results_dict['mode'].append(mode)
    results_dict['accuracy'].append(accuracy)
df = pd.DataFrame.from_dict(results_dict)

In [0]:
df.to_csv('../experiment-3.csv', index=False)