In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Tue Nov 23 16:24:06 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 54.8 gigabytes of available RAM

You are using a high-RAM runtime!


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import sys
import os
os.chdir('/content/drive/MyDrive/CS105BProject/bias' )
sys.path.append('/content/drive/MyDrive/CS105BProject')
sys.path.append(os.getcwd())

In [6]:
!pip install transformers
!pip install pytorch-nlp
# !pip install hypopt



In [7]:
import sys
import os
# sys.path.append('../../code-acl')
# sys.path.append(os.getcwd())
sys.path.append('/content/drive/MyDrive/CS105BProject/bias/')
os.environ['OMP_NUM_THREADS'] = "1"
import argparse
import pandas as pd
import pickle
from model.generator import TransformerDataset, transformer_collate
from model.bertmodel import MyBertModel
from model.lstmmodel import LSTMModel
import torch
from parameters import BERT_MODEL_PATH, CLAIM_ONLY, CLAIM_AND_EVIDENCE, EVIDENCE_ONLY, DEVICE, INPUT_TYPE_ORDER
from transformers import AdamW
import numpy as np
from utils.utils import print_message, clean_str, preprocess
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
from collections import Counter
from torchnlp.word_to_vector import GloVe
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
#from hypopt import GridSearch
from model_selection import GridSearch
from tqdm import tqdm

def load_data(dataset):
    #path = "../../multi_fc_publicdata/" + dataset + "/"

    path = "../multi_fc_publicdata/" + dataset + "/"

    main_data = pd.read_csv(path + dataset + ".tsv", sep="\t", header=None)
    for index, row in main_data.iterrows():
      main_data[1][index] = preprocess(row[1])
    
    
    snippets_data = pd.read_csv(path + dataset + "_snippets.tsv", sep="\t", header=None)    
    for index, row in snippets_data.iterrows():
      snippets_data[1][index] = preprocess(row[1])
      snippets_data[2][index] = preprocess(row[2])
      snippets_data[3][index] = preprocess(row[3])
      snippets_data[4][index] = preprocess(row[4])
      snippets_data[5][index] = preprocess(row[5])
      snippets_data[6][index] = preprocess(row[6])
      snippets_data[7][index] = preprocess(row[7])
      snippets_data[8][index] = preprocess(row[8])
      snippets_data[9][index] = preprocess(row[9])
      snippets_data[10][index] = preprocess(row[10])
    
    label_order = pickle.load(open(path + dataset + "_labels.pkl", "rb"))
    splits = pickle.load(open(path + dataset + "_index_split.pkl", "rb"))

    return main_data, snippets_data, label_order, splits

def make_generators(main_data, snippets_data, label_order, splits, params, dataset_generator=TransformerDataset, other_dataset=False):
    generators = []

    all_labels = main_data.values[:,2]
    counter = Counter(all_labels)
    ss = ""
    for c in label_order:
        ss = ss + ", " + str(c) + " (" + str(np.around(counter[c]/len(all_labels) * 100,1)) + "\%)"
        #print(c, np.around(counter[c]/len(all_labels) * 100,1), "%", counter[c])
    print("len", len(all_labels), ss)

    for isplit, split in enumerate(splits):
        # print(f'isplit {isplit}')
        sub_main_data = main_data.values[split]
        # print(f'len sub_main_data: {len(sub_main_data)}')
        
        sub_snippets_data = snippets_data.values[split]
        # print(f'len sub_snippets_data: {len(sub_snippets_data)}')

        

        tmp = dataset_generator(sub_main_data, sub_snippets_data, label_order)
        if isplit == 0:
            generator = torch.utils.data.DataLoader(tmp, **params[0])
        else:
            generator = torch.utils.data.DataLoader(tmp, **params[1])

        generators.append(generator)

        # print(sub_main_data)
        # print(sub_snippets_data)
        # print(f'tmp: \n {tmp[0]}')
        # gen0 = next(iter(generator))
        # print(f'gen0: \n {gen0}')


    # make class weights
    labels = main_data.values[splits[0]][:,2]
    labels = np.array([label_order.index(v) for v in labels])


    if not other_dataset:
        label_weights = torch.tensor(compute_class_weight("balanced", classes=np.arange(len(label_order)), y=labels).astype(np.float32))
    else:
        label_weights = None

    return generators[0], generators[1], generators[2], label_weights

def evaluate(generator, model, other_from=None, ignore_snippet=None):
    all_labels = []
    all_predictions = []

    all_claimIDs = []
    all_logits = []

    for vals in generator:
        claimIDs, claims, labels, snippets = vals[0], vals[1], vals[2], vals[3]

        if ignore_snippet is not None:
            for i in range(len(snippets)):
                snippets[i][ignore_snippet] = "filler"

        all_labels += labels
        logits = model(claims, snippets)

        predictions = torch.argmax(logits, 1).cpu().numpy()

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            predictions[predictions == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        all_predictions += predictions.tolist()

        all_claimIDs += claimIDs
        all_logits += logits.cpu().numpy().tolist()

    f1_micro = f1_score(all_labels, all_predictions, average="micro")
    f1_macro = f1_score(all_labels, all_predictions, average="macro")

    return f1_micro, f1_macro, all_claimIDs, all_logits, all_labels, all_predictions

def train_step(optimizer, vals, model, criterion):
    optimizer.zero_grad()

    claimIDs, claims, labels, snippets = vals[0], vals[1], torch.tensor(vals[2]).to(DEVICE), vals[3]

    logits = model(claims, snippets)
    loss = criterion(logits, labels)

    loss.backward()
    optimizer.step()

    return loss


def get_embedding_matrix(generators, dataset, min_occurrence=1):
    savename = "preprocessed/" + dataset + "_glove.pkl"
    if os.path.exists(savename):
        tmp = pickle.load(open(savename, "rb"))
        glove_embedding_matrix = tmp[0]
        word2idx = tmp[1]
        idx2word = tmp[2]
        return glove_embedding_matrix, word2idx, idx2word

    glove_vectors = GloVe('840B')
    all_claims = []
    all_snippets = []
    for gen in generators:
        for vals in gen:
            claims = vals[1]
            claims = [clean_str(v) for v in claims]
            snippets = vals[3]
            snippets = [clean_str(item) for sublist in snippets for item in sublist]

            all_claims += claims
            all_snippets += snippets

    all_words = [word for v in all_claims+all_snippets for word in v.split(" ")]
    counter = Counter(all_words)
    all_words = set(all_words)
    all_words = list(set([word for word in all_words if counter[word] > min_occurrence]))
    word2idx = {word: i+2 for i, word in enumerate(all_words)} # reserve 0 for potential mask and 1 for unk token
    idx2word = {word2idx[key]: key for key in word2idx}

    num_words = len(idx2word)

    glove_embedding_matrix = np.random.random((num_words+2, 300)) - 0.5
    missed = 0
    for word in word2idx:
        if word in glove_vectors:
            glove_embedding_matrix[word2idx[word]] = glove_vectors[word]
        else:
            missed += 1

    pickle.dump([glove_embedding_matrix, word2idx, idx2word], open(savename, "wb"))
    return glove_embedding_matrix, word2idx, idx2word

def train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("model parameters", params)

    num_epochs = 0
    patience_counter = 0
    patience_max = 8
    best_f1 = -np.inf
    while (True):
        train_losses = []

        model.train()
        for ivals, vals in enumerate(train_generator):
            loss = train_step(optimizer, vals, model, criterion)
            train_losses.append(loss.item())

        num_epochs += 1
        print_message("TRAIN loss", np.mean(train_losses), num_epochs)

        if num_epochs % args.eval_per_epoch == 0:
            model.eval()
            with torch.no_grad():
                val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions = evaluate(val_generator, model)
                print_message("VALIDATION F1micro, F1macro, loss:", val_f1micro, val_f1macro, len(val_claimIDs))

            if val_f1micro > best_f1:
                with torch.no_grad():
                    test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions = evaluate(test_generator, model)
                    print_message("TEST F1micro, F1macro, loss:", test_f1micro, test_f1macro, len(test_claimIDs))

                    other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions = evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt")
                    print_message("OTHER-TEST F1micro, F1macro, loss:", other_test_f1micro, other_test_f1macro, len(other_test_claimIDs))

                    test_remove_top_bottom = []
                    test_remove_bottom_top = []
                    other_test_remove_top_bottom = []
                    other_test_remove_bottom_top = []
                    ten = np.arange(10)
                    if args.inputtype != "CLAIM_ONLY":
                        for i in tqdm(range(10)):
                            top_is = ten[:(i+1)]
                            bottom_is = ten[-(i+1):]
                            test_remove_top_bottom.append( evaluate(test_generator, model, ignore_snippet=top_is) )
                            test_remove_bottom_top.append( evaluate(test_generator, model, ignore_snippet=bottom_is) )
                            other_test_remove_top_bottom.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=top_is))
                            other_test_remove_bottom_top.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=bottom_is))

                        print_message([np.around(v[1], 4) for v in test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in test_remove_bottom_top])
                        print_message([np.around(v[1], 4) for v in other_test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in other_test_remove_bottom_top])

                patience_counter = 0
                best_f1 = val_f1micro
                val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
                test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions, test_remove_top_bottom, test_remove_bottom_top]
                other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions, other_test_remove_top_bottom, other_test_remove_bottom_top]
                misc_store = [args]
                total_store = [val_store, test_store, other_test_store, misc_store]
            else:
                patience_counter += 1

            print_message("PATIENCE", patience_counter, "/", patience_max)

            if patience_counter >= patience_max:
                pickle.dump(total_store, open(savename, "wb"))
                break

def run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator):
    print(f'***run_bert*** with inputtype {args.inputtype}')
    model = MyBertModel.from_pretrained(BERT_MODEL_PATH, labelnum=len(label_order), input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

def run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator):
    print(f'***run_lstm*** with inputtype {args.inputtype}')
    glove_embedding_matrix, word2idx, idx2word = get_embedding_matrix([train_generator, val_generator, test_generator, other_generator], args.dataset)

    model = LSTMModel(args.lstm_hidden_dim, args.lstm_layers, args.lstm_dropout, len(label_order), word2idx, glove_embedding_matrix, input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

def filter_snippet_for_bow(generator, ignore_snippet, inputtype):
    samples = []
    for vals in generator:
        claims = vals[1]
        labels = vals[2]
        snippets = vals[3]

        for i in range(len(snippets)):
            snippets[i][ignore_snippet] = "filler"

        for i in range(len(claims)):
            if inputtype == CLAIM_AND_EVIDENCE:
                sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
            elif inputtype == CLAIM_ONLY:
                sample = clean_str(claims[i])
            elif inputtype == EVIDENCE_ONLY:
                sample = " ".join([clean_str(v) for v in snippets[i]])
            else:
                raise Exception("Unknown type", inputtype)
            samples.append(sample)
    return samples

def get_bows_labels(generators, dataset, inputtype):
    all_samples = []
    all_labels = []

    for gen in generators:
        gen_samples = []
        gen_labels = []
        for vals in gen:
            claims = vals[1]
            labels = vals[2]
            snippets = vals[3]

            for i in range(len(claims)):
                if inputtype == CLAIM_AND_EVIDENCE:
                    sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
                elif inputtype == CLAIM_ONLY:
                    sample = clean_str(claims[i])
                elif inputtype == EVIDENCE_ONLY:
                    sample = " ".join([clean_str(v) for v in snippets[i]])
                else:
                    raise Exception("Unknown type", inputtype)
                gen_samples.append(sample)
                gen_labels.append(labels[i])

        all_samples.append(gen_samples)
        all_labels.append(gen_labels)

    test_remove_top_bottom = []
    test_remove_bottom_top = []
    other_test_remove_top_bottom = []
    other_test_remove_bottom_top = []
    ten = np.arange(10)
    for i in tqdm(range(10)):
        top_is = ten[:(i + 1)]
        bottom_is = ten[-(i + 1):]
        test_remove_top_bottom.append( filter_snippet_for_bow(generators[-2], top_is, inputtype) )
        test_remove_bottom_top.append( filter_snippet_for_bow(generators[-2], bottom_is, inputtype) )
        other_test_remove_top_bottom.append( filter_snippet_for_bow(generators[-1], top_is, inputtype) )
        other_test_remove_bottom_top.append( filter_snippet_for_bow(generators[-1], bottom_is, inputtype) )

    vectorizer = TfidfVectorizer(min_df=2)
    vectorizer.fit([item for sublist in all_samples for item in sublist])

    bows = [vectorizer.transform(all_samples[i]) for i in range(len(all_samples))]

    test_remove_top_bottom = [vectorizer.transform(test_remove_top_bottom[i]) for i in range(len(test_remove_top_bottom))]
    test_remove_bottom_top = [vectorizer.transform(test_remove_bottom_top[i]) for i in range(len(test_remove_bottom_top))]
    other_test_remove_top_bottom = [vectorizer.transform(other_test_remove_top_bottom[i]) for i in range(len(other_test_remove_top_bottom))]
    other_test_remove_bottom_top = [vectorizer.transform(other_test_remove_bottom_top[i]) for i in range(len(other_test_remove_bottom_top))]

    return bows, all_labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top

def run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator):
    # print(f'train_generator0 :\n {next(iter(train_generator))}')
    print(f'***run_bow*** with inputtype {args.inputtype}')

    bows, labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top = get_bows_labels([train_generator, val_generator, test_generator, other_test_generator], args.dataset, inputtype)

    train_bow, val_bow, test_bow, other_test_bow = bows[0], bows[1], bows[2], bows[3]
    train_labels, val_labels, test_labels, other_test_labels = labels[0], labels[1], labels[2], labels[3]

    label_weights = label_weights.numpy()
    weights = {i: label_weights[i] for i in range(len(label_weights))}

    # print(f'****** run bow train_bow \n {train_bow}')
    # print('*********')

    param_grid = [
        {'n_estimators': [100, 500, 1000], 'min_samples_leaf': [1, 3, 5, 10], 'min_samples_split': [2, 5, 10]}
    ]

    opt = GridSearch(model=RandomForestClassifier(n_jobs=5, class_weight=weights), param_grid=param_grid, parallelize=False)

    
    opt.fit(train_bow, train_labels, val_bow, val_labels, scoring="f1_macro")

    def rf_eval(model, bow, labels, other_from=None):
        preds = model.predict(bow)

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            preds[preds == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        f1_macro = f1_score(labels, preds, average="macro")
        f1_micro = f1_score(labels, preds, average="micro")
        return f1_micro, f1_macro, labels, preds

    # val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
    # test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions,test_remove_top_bottom, test_remove_bottom_top]
    # other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits,
    #                     other_test_labels, other_test_predictions, other_test_remove_top_bottom,
    #                     other_test_remove_bottom_top]
    #misc_store = [args]


    val_store = rf_eval(opt, val_bow, val_labels)
    test_store = list(rf_eval(opt, test_bow, test_labels)) + [[rf_eval(opt, test_remove_top_bottom[i], test_labels) for i in range(10)],
                                                       [rf_eval(opt, test_remove_bottom_top[i], test_labels) for i in range(10)]]
    other_test_store = list(rf_eval(opt, other_test_bow, other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt")) + [[rf_eval(opt, other_test_remove_top_bottom[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)],
                                                       [rf_eval(opt, other_test_remove_bottom_top[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)]]
    misc_store = [opt.get_best_params()]
    total_store = [val_store, test_store, other_test_store, misc_store]

    print_message("VALIDATION", val_store[0], val_store[1])
    print_message("TEST", test_store[0], test_store[1])
    print_message("OTHER-TEST", other_test_store[0], other_test_store[1])

    print_message([np.around(v[1], 4) for v in test_store[-2]])
    print_message([np.around(v[1], 4) for v in test_store[-1]])
    print_message([np.around(v[1], 4) for v in other_test_store[-2]])
    print_message([np.around(v[1], 4) for v in other_test_store[-1]])
    print(misc_store)

    pickle.dump(total_store, open(savename, "wb"))

def filter_websites(snippets_data):
    bad_websites = ["factcheck.org", "politifact.com", "snopes.com", "fullfact.org", "factscan.ca"]
    ids = snippets_data.values[:, 0]
    remove_count = 0
    for i, id in enumerate(ids):
        with open("../../multi_fc_publicdata/snippets/" + id, "r", encoding="utf-8") as f:
            lines = f.readlines()

        links = [line.strip().split("\t")[-1] for line in lines]
        remove = [False for _ in range(10)]
        for j in range(len(links)):
            remove[j] = any([bad in links[j] for bad in bad_websites])
        remove = remove[:10]  # 1 data sample has 11 links by mistake in the dataset
        snippets_data.iloc[i, [False] + remove] = "filler"

        remove_count += np.sum(remove)
    print_message("REMOVE COUNT", remove_count)
    return snippets_data



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Error loading SnowballStemmer: Package 'SnowballStemmer'
[nltk_data]     not found in index
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [8]:
%%time

import gc

gc.collect()
class vars():
    def __init__(self, mode, inputtype):
        if mode == "bow":
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bow"
            self.batchsize = 2
            self.eval_per_epoch = 1
            self.lr = 0.0001
        elif mode == 'lstm':
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "lstm"
            self.batchsize = 16
            self.eval_per_epoch = 1
            self.lr = 0.0001
            self.lstm_hidden_dim = 128
            self.lstm_layers = 2
            self.lstm_dropout = 0.1
        elif mode == 'bert':
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bert"
            self.batchsize = 6
            self.eval_per_epoch = 1
            self.lr = 0.000003            

filepath = 'sorted.uk.word.unigrams'  
word_freq = {}  
count = 0
with open(filepath, encoding= 'utf-8') as f:
    for line in f:
        line = line.rstrip()
        if line:
            x = line.split('\t')
            #print(x)
            #print(key, val)
            #print(str(x[1]))
            word_freq[x[1]] = str(x[0])
        count +=1
        if count > 100000:
            break


# for mode in ['bow']:
# for mode in ['lstm']:
for mode in ['bert']:
  for inputtype in ['CLAIM_ONLY', 'CLAIM_AND_EVIDENCE', 'EVIDENCE_ONLY']:
  # for inputtype in ['CLAIM_ONLY']:
  # for inputtype in ['CLAIM_AND_EVIDENCE']:
  # for inputtype in ['EVIDENCE_ONLY']:


            
    args = vars(mode, inputtype)

    if args.filter_websites > 0.5:
        savename = "results/" + "-".join([str(v) for v in [args.filter_websites, args.model, args.dataset, args.inputtype, args.lr, args.batchsize]])
    else:
        savename = "results/" + "-".join([str(v) for v in [args.model, args.dataset, args.inputtype, args.lr, args.batchsize]])

    if args.model == "lstm":
        savename += "-" + "-".join([str(v) for v in [args.lstm_hidden_dim, args.lstm_layers, args.lstm_dropout]])
    savename += ".pkl"

    inputtype = INPUT_TYPE_ORDER.index(args.inputtype)
    main_data, snippets_data, label_order, splits = load_data(args.dataset)

    if args.filter_websites > 0.5:
        snippets_data = filter_websites(snippets_data)

    params = {"batch_size": args.batchsize, "shuffle": True, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}
    eval_params = {"batch_size": args.batchsize, "shuffle": False, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}

    train_generator, val_generator, test_generator, label_weights = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params])

    if args.dataset == "snes":
        main_data, snippets_data, _, splits = load_data("pomt")
        if args.filter_websites > 0.5:
            snippets_data = filter_websites(snippets_data)
        main_data.iloc[main_data.iloc[:, 2] == "pants on fire!", 2] = "false"
        main_data.iloc[main_data.iloc[:, 2] == "half-true", 2] = "mixture"
        _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
    else:
        main_data, snippets_data, _, splits = load_data("snes")
        if args.filter_websites > 0.5:
            snippets_data = filter_websites(snippets_data)
        main_data.iloc[main_data.iloc[:, 2] == "mixture", 2] = "half-true"
        _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)


    if args.model == "bert":
        run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
    elif args.model == "lstm":
        run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
    elif args.model == "bow":
        # print("run bow")
        run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)

    gc.collect()




len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype CLAIM_ONLY


You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizerFast'.
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing MyBertModel: ['distilbert.transformer.layer.0.attention.out_lin.weight', 'distilbert.transformer.layer.0.output_layer_norm.bias', 'distilbert.transformer.layer.0.ffn.lin1.weight', 'distilbert.transformer.layer.1.attention.out_lin.weight', 'distilbert.transformer.layer.5.ffn.lin2.weight', 'distilbert.transformer.layer.0.attention.out_lin.bias', 'distilbert.transformer.layer.1.output_layer_norm.weight', 'distilbert.transformer.layer.3.sa_layer_norm.b

model parameters 109486085
[Nov 23, 16:26:44] TRAIN loss 1.6190532757019676 1
[Nov 23, 16:26:46] VALIDATION F1micro, F1macro, loss: 0.13214990138067062 0.058697969186689555 507
[Nov 23, 16:26:48] TEST F1micro, F1macro, loss: 0.14102564102564102 0.05631117345675309 1014
[Nov 23, 16:26:55] OTHER-TEST F1micro, F1macro, loss: 0.15495031284504968 0.07460475232901116 2717
[Nov 23, 16:26:55] PATIENCE 0 / 8
[Nov 23, 16:27:25] TRAIN loss 1.587994174880756 2
[Nov 23, 16:27:26] VALIDATION F1micro, F1macro, loss: 0.42800788954635116 0.16163809753742153 507
[Nov 23, 16:27:28] TEST F1micro, F1macro, loss: 0.4812623274161736 0.1830161610923861 1014
[Nov 23, 16:27:35] OTHER-TEST F1micro, F1macro, loss: 0.2451232977548767 0.14434773537243656 2717
[Nov 23, 16:27:35] PATIENCE 0 / 8
[Nov 23, 16:28:05] TRAIN loss 1.5731076732076503 3
[Nov 23, 16:28:07] VALIDATION F1micro, F1macro, loss: 0.6429980276134122 0.1565426170468187 507
[Nov 23, 16:28:09] TEST F1micro, F1macro, loss: 0.6429980276134122 0.1565426170

You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizerFast'.
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing MyBertModel: ['distilbert.transformer.layer.0.attention.out_lin.weight', 'distilbert.transformer.layer.0.output_layer_norm.bias', 'distilbert.transformer.layer.0.ffn.lin1.weight', 'distilbert.transformer.layer.1.attention.out_lin.weight', 'distilbert.transformer.layer.5.ffn.lin2.weight', 'distilbert.transformer.layer.0.attention.out_lin.bias', 'distilbert.transformer.layer.1.output_layer_norm.weight', 'distilbert.transformer.layer.3.sa_layer_norm.b

model parameters 109490694
[Nov 23, 16:37:41] TRAIN loss 1.65799579799578 1
[Nov 23, 16:37:51] VALIDATION F1micro, F1macro, loss: 0.38461538461538464 0.15751025991792064 507
[Nov 23, 16:38:11] TEST F1micro, F1macro, loss: 0.4122287968441814 0.16982693902578028 1014
[Nov 23, 16:39:05] OTHER-TEST F1micro, F1macro, loss: 0.23739418476260585 0.13068671416809025 2717


100%|██████████| 10/10 [22:01<00:00, 132.18s/it]

[Nov 23, 17:01:06] [0.1558, 0.1421, 0.1329, 0.1254, 0.118, 0.1066, 0.0939, 0.086, 0.0816, 0.0816]
[Nov 23, 17:01:06] [0.1692, 0.157, 0.1425, 0.136, 0.1267, 0.1158, 0.1052, 0.0949, 0.086, 0.0816]
[Nov 23, 17:01:06] [0.1276, 0.1226, 0.1184, 0.1144, 0.1101, 0.1042, 0.0972, 0.0888, 0.0846, 0.0847]
[Nov 23, 17:01:06] [0.1301, 0.1259, 0.1212, 0.1174, 0.1116, 0.1083, 0.1016, 0.0952, 0.087, 0.0847]
[Nov 23, 17:01:06] PATIENCE 0 / 8





[Nov 23, 17:04:23] TRAIN loss 1.6091922242697831 2
[Nov 23, 17:04:32] VALIDATION F1micro, F1macro, loss: 0.1282051282051282 0.09107094114338551 507
[Nov 23, 17:04:32] PATIENCE 1 / 8
[Nov 23, 17:07:49] TRAIN loss 1.6034194109206263 3
[Nov 23, 17:07:59] VALIDATION F1micro, F1macro, loss: 0.13609467455621302 0.05496448687205642 507
[Nov 23, 17:07:59] PATIENCE 2 / 8
[Nov 23, 17:11:16] TRAIN loss 1.5914153811093923 4
[Nov 23, 17:11:26] VALIDATION F1micro, F1macro, loss: 0.23274161735700197 0.1057033082181528 507
[Nov 23, 17:11:26] PATIENCE 3 / 8
[Nov 23, 17:14:43] TRAIN loss 1.558961503107 5
[Nov 23, 17:14:52] VALIDATION F1micro, F1macro, loss: 0.17357001972386588 0.09083712348751394 507
[Nov 23, 17:14:52] PATIENCE 4 / 8
[Nov 23, 17:18:09] TRAIN loss 1.5623787240804852 6
[Nov 23, 17:18:18] VALIDATION F1micro, F1macro, loss: 0.3747534516765286 0.1766302573769662 507
[Nov 23, 17:18:18] PATIENCE 5 / 8
[Nov 23, 17:21:35] TRAIN loss 1.5251937736631245 7
[Nov 23, 17:21:44] VALIDATION F1micro, F1m

100%|██████████| 10/10 [21:54<00:00, 131.46s/it]

[Nov 23, 17:44:52] [0.2279, 0.2366, 0.2339, 0.2283, 0.2239, 0.221, 0.2191, 0.2196, 0.2184, 0.218]
[Nov 23, 17:44:52] [0.2314, 0.2293, 0.2342, 0.2357, 0.2299, 0.2234, 0.2206, 0.2211, 0.2201, 0.218]
[Nov 23, 17:44:52] [0.1633, 0.1635, 0.1637, 0.1642, 0.1654, 0.1667, 0.1671, 0.1671, 0.169, 0.1687]
[Nov 23, 17:44:52] [0.1607, 0.1633, 0.1644, 0.1642, 0.1645, 0.1662, 0.166, 0.1675, 0.1675, 0.1687]
[Nov 23, 17:44:52] PATIENCE 0 / 8





[Nov 23, 17:48:07] TRAIN loss 1.48570760868087 8
[Nov 23, 17:48:17] VALIDATION F1micro, F1macro, loss: 0.46942800788954636 0.23504943136369408 507
[Nov 23, 17:48:17] PATIENCE 1 / 8
[Nov 23, 17:51:32] TRAIN loss 1.408308107154192 9
[Nov 23, 17:51:42] VALIDATION F1micro, F1macro, loss: 0.40236686390532544 0.1984895653104894 507
[Nov 23, 17:51:42] PATIENCE 2 / 8
[Nov 23, 17:54:57] TRAIN loss 1.3046370643518261 10
[Nov 23, 17:55:07] VALIDATION F1micro, F1macro, loss: 0.3609467455621302 0.2160158690281075 507
[Nov 23, 17:55:07] PATIENCE 3 / 8
[Nov 23, 17:58:22] TRAIN loss 1.1290360087075748 11
[Nov 23, 17:58:32] VALIDATION F1micro, F1macro, loss: 0.5680473372781065 0.21272550886343328 507
[Nov 23, 17:58:51] TEST F1micro, F1macro, loss: 0.5927021696252466 0.23269166028164148 1014
[Nov 23, 17:59:44] OTHER-TEST F1micro, F1macro, loss: 0.2701509017298491 0.15836287033826663 2717


100%|██████████| 10/10 [21:44<00:00, 130.43s/it]

[Nov 23, 18:21:29] [0.2333, 0.2316, 0.2301, 0.2301, 0.2324, 0.2345, 0.2339, 0.2358, 0.239, 0.239]
[Nov 23, 18:21:29] [0.2327, 0.2336, 0.2307, 0.2304, 0.2308, 0.23, 0.2374, 0.2366, 0.2397, 0.239]
[Nov 23, 18:21:29] [0.1597, 0.1595, 0.1603, 0.1608, 0.1623, 0.1639, 0.1647, 0.1656, 0.1661, 0.1664]
[Nov 23, 18:21:29] [0.1583, 0.1597, 0.1615, 0.1599, 0.1624, 0.1652, 0.1662, 0.1662, 0.1664, 0.1664]
[Nov 23, 18:21:29] PATIENCE 0 / 8





[Nov 23, 18:24:43] TRAIN loss 0.9682518116928436 12
[Nov 23, 18:24:52] VALIDATION F1micro, F1macro, loss: 0.38461538461538464 0.2209251894758076 507
[Nov 23, 18:24:52] PATIENCE 1 / 8
[Nov 23, 18:28:07] TRAIN loss 0.8086056464376885 13
[Nov 23, 18:28:16] VALIDATION F1micro, F1macro, loss: 0.25443786982248523 0.1625078827950278 507
[Nov 23, 18:28:16] PATIENCE 2 / 8
[Nov 23, 18:31:31] TRAIN loss 0.7212922797865562 14
[Nov 23, 18:31:40] VALIDATION F1micro, F1macro, loss: 0.3333333333333333 0.19834652487379228 507
[Nov 23, 18:31:40] PATIENCE 3 / 8
[Nov 23, 18:34:54] TRAIN loss 0.5516541581443587 15
[Nov 23, 18:35:04] VALIDATION F1micro, F1macro, loss: 0.41420118343195267 0.22523719931498604 507
[Nov 23, 18:35:04] PATIENCE 4 / 8
[Nov 23, 18:38:18] TRAIN loss 0.461321419019982 16
[Nov 23, 18:38:28] VALIDATION F1micro, F1macro, loss: 0.33727810650887574 0.19414478991116968 507
[Nov 23, 18:38:28] PATIENCE 5 / 8
[Nov 23, 18:41:42] TRAIN loss 0.39425390717538855 17
[Nov 23, 18:41:52] VALIDATION F

You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizerFast'.
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing MyBertModel: ['distilbert.transformer.layer.0.attention.out_lin.weight', 'distilbert.transformer.layer.0.output_layer_norm.bias', 'distilbert.transformer.layer.0.ffn.lin1.weight', 'distilbert.transformer.layer.1.attention.out_lin.weight', 'distilbert.transformer.layer.5.ffn.lin2.weight', 'distilbert.transformer.layer.0.attention.out_lin.bias', 'distilbert.transformer.layer.1.output_layer_norm.weight', 'distilbert.transformer.layer.3.sa_layer_norm.b

model parameters 109486854
[Nov 23, 18:52:40] TRAIN loss 1.6358572519006762 1
[Nov 23, 18:52:46] VALIDATION F1micro, F1macro, loss: 0.1242603550295858 0.04421052631578947 507
[Nov 23, 18:52:59] TEST F1micro, F1macro, loss: 0.1232741617357002 0.043898156277436345 1014
[Nov 23, 18:53:32] OTHER-TEST F1micro, F1macro, loss: 0.19764446080235554 0.06601106330669945 2717


100%|██████████| 10/10 [13:08<00:00, 78.81s/it]

[Nov 23, 19:06:40] [0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439]
[Nov 23, 19:06:40] [0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439]
[Nov 23, 19:06:40] [0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066]
[Nov 23, 19:06:40] [0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066]
[Nov 23, 19:06:40] PATIENCE 0 / 8





[Nov 23, 19:08:50] TRAIN loss 1.6023496229302239 2
[Nov 23, 19:08:56] VALIDATION F1micro, F1macro, loss: 0.6429980276134122 0.1565426170468187 507
[Nov 23, 19:09:09] TEST F1micro, F1macro, loss: 0.6429980276134122 0.1565426170468187 1014
[Nov 23, 19:09:42] OTHER-TEST F1micro, F1macro, loss: 0.29738682370261316 0.09168794326241134 2717


100%|██████████| 10/10 [13:07<00:00, 78.71s/it]

[Nov 23, 19:22:49] [0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565]
[Nov 23, 19:22:49] [0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565]
[Nov 23, 19:22:49] [0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917]
[Nov 23, 19:22:49] [0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917]
[Nov 23, 19:22:49] PATIENCE 0 / 8





[Nov 23, 19:24:59] TRAIN loss 1.5900437697969578 3
[Nov 23, 19:25:05] VALIDATION F1micro, F1macro, loss: 0.20512820512820512 0.09234653460286309 507
[Nov 23, 19:25:05] PATIENCE 1 / 8
[Nov 23, 19:27:15] TRAIN loss 1.5863927307765227 4
[Nov 23, 19:27:22] VALIDATION F1micro, F1macro, loss: 0.631163708086785 0.17479475040450648 507
[Nov 23, 19:27:22] PATIENCE 2 / 8
[Nov 23, 19:29:32] TRAIN loss 1.59145012185783 5
[Nov 23, 19:29:38] VALIDATION F1micro, F1macro, loss: 0.1242603550295858 0.04421052631578947 507
[Nov 23, 19:29:38] PATIENCE 3 / 8
[Nov 23, 19:31:48] TRAIN loss 1.5771648790183905 6
[Nov 23, 19:31:54] VALIDATION F1micro, F1macro, loss: 0.6429980276134122 0.1565426170468187 507
[Nov 23, 19:31:54] PATIENCE 4 / 8
[Nov 23, 19:34:04] TRAIN loss 1.575556547959914 7
[Nov 23, 19:34:11] VALIDATION F1micro, F1macro, loss: 0.6429980276134122 0.1565426170468187 507
[Nov 23, 19:34:11] PATIENCE 5 / 8
[Nov 23, 19:36:21] TRAIN loss 1.58578383268134 8
[Nov 23, 19:36:27] VALIDATION F1micro, F1macro