In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Mon Nov 29 12:01:48 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 54.8 gigabytes of available RAM

You are using a high-RAM runtime!


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import sys
import os
os.chdir('/content/drive/MyDrive/CS105BProject/bias' )
sys.path.append('/content/drive/MyDrive/CS105BProject')
sys.path.append(os.getcwd())

In [6]:
!pip install transformers
!pip install pytorch-nlp
# !pip install hypopt



In [7]:
import sys
import os
# sys.path.append('../../code-acl')
# sys.path.append(os.getcwd())
sys.path.append('/content/drive/MyDrive/CS105BProject/bias/')
os.environ['OMP_NUM_THREADS'] = "1"
import argparse
import pandas as pd
import pickle
from model.generator import TransformerDataset, transformer_collate
from model.bertmodel import MyBertModel
from model.lstmmodel import LSTMModel
import torch
from parameters import BERT_MODEL_PATH, CLAIM_ONLY, CLAIM_AND_EVIDENCE, EVIDENCE_ONLY, DEVICE, INPUT_TYPE_ORDER
from transformers import AdamW
import numpy as np
from utils.utils import print_message, clean_str, preprocess
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
from collections import Counter
from torchnlp.word_to_vector import GloVe
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
#from hypopt import GridSearch
from model_selection import GridSearch
from tqdm import tqdm

def load_data(dataset, step = 'none'):
    #path = "../../multi_fc_publicdata/" + dataset + "/"

    path = "../multi_fc_publicdata/" + dataset + "/"

    main_data = pd.read_csv(path + dataset + ".tsv", sep="\t", header=None)
    for index, row in main_data.iterrows():
      main_data[1][index] = preprocess(row[1], step)
    
    
    snippets_data = pd.read_csv(path + dataset + "_snippets.tsv", sep="\t", header=None)    
    for index, row in snippets_data.iterrows():
      snippets_data[1][index] = preprocess(row[1], step)
      snippets_data[2][index] = preprocess(row[2], step)
      snippets_data[3][index] = preprocess(row[3], step)
      snippets_data[4][index] = preprocess(row[4], step)
      snippets_data[5][index] = preprocess(row[5], step)
      snippets_data[6][index] = preprocess(row[6], step)
      snippets_data[7][index] = preprocess(row[7], step)
      snippets_data[8][index] = preprocess(row[8], step)
      snippets_data[9][index] = preprocess(row[9], step)
      snippets_data[10][index] = preprocess(row[10], step)
    
    label_order = pickle.load(open(path + dataset + "_labels.pkl", "rb"))
    splits = pickle.load(open(path + dataset + "_index_split.pkl", "rb"))

    return main_data, snippets_data, label_order, splits

def make_generators(main_data, snippets_data, label_order, splits, params, dataset_generator=TransformerDataset, other_dataset=False):
    generators = []

    all_labels = main_data.values[:,2]
    counter = Counter(all_labels)
    ss = ""
    for c in label_order:
        ss = ss + ", " + str(c) + " (" + str(np.around(counter[c]/len(all_labels) * 100,1)) + "\%)"
        #print(c, np.around(counter[c]/len(all_labels) * 100,1), "%", counter[c])
    print("len", len(all_labels), ss)

    for isplit, split in enumerate(splits):
        # print(f'isplit {isplit}')
        sub_main_data = main_data.values[split]
        # print(f'len sub_main_data: {len(sub_main_data)}')
        
        sub_snippets_data = snippets_data.values[split]
        # print(f'len sub_snippets_data: {len(sub_snippets_data)}')

        

        tmp = dataset_generator(sub_main_data, sub_snippets_data, label_order)
        if isplit == 0:
            generator = torch.utils.data.DataLoader(tmp, **params[0])
        else:
            generator = torch.utils.data.DataLoader(tmp, **params[1])

        generators.append(generator)

        # print(sub_main_data)
        # print(sub_snippets_data)
        # print(f'tmp: \n {tmp[0]}')
        # gen0 = next(iter(generator))
        # print(f'gen0: \n {gen0}')


    # make class weights
    labels = main_data.values[splits[0]][:,2]
    labels = np.array([label_order.index(v) for v in labels])


    if not other_dataset:
        label_weights = torch.tensor(compute_class_weight("balanced", classes=np.arange(len(label_order)), y=labels).astype(np.float32))
    else:
        label_weights = None

    return generators[0], generators[1], generators[2], label_weights

def evaluate(generator, model, other_from=None, ignore_snippet=None):
    all_labels = []
    all_predictions = []

    all_claimIDs = []
    all_logits = []

    for vals in generator:
        claimIDs, claims, labels, snippets = vals[0], vals[1], vals[2], vals[3]

        if ignore_snippet is not None:
            for i in range(len(snippets)):
                snippets[i][ignore_snippet] = "filler"

        all_labels += labels
        logits = model(claims, snippets)

        predictions = torch.argmax(logits, 1).cpu().numpy()

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            predictions[predictions == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        all_predictions += predictions.tolist()

        all_claimIDs += claimIDs
        all_logits += logits.cpu().numpy().tolist()

    f1_micro = f1_score(all_labels, all_predictions, average="micro")
    f1_macro = f1_score(all_labels, all_predictions, average="macro")

    return f1_micro, f1_macro, all_claimIDs, all_logits, all_labels, all_predictions

def train_step(optimizer, vals, model, criterion):
    optimizer.zero_grad()

    claimIDs, claims, labels, snippets = vals[0], vals[1], torch.tensor(vals[2]).to(DEVICE), vals[3]

    logits = model(claims, snippets)
    loss = criterion(logits, labels)

    loss.backward()
    optimizer.step()

    return loss


def get_embedding_matrix(generators, dataset, min_occurrence=1):
    savename = "preprocessed/" + dataset + "_glove.pkl"
    if os.path.exists(savename):
        tmp = pickle.load(open(savename, "rb"))
        glove_embedding_matrix = tmp[0]
        word2idx = tmp[1]
        idx2word = tmp[2]
        return glove_embedding_matrix, word2idx, idx2word

    glove_vectors = GloVe('840B')
    all_claims = []
    all_snippets = []
    for gen in generators:
        for vals in gen:
            claims = vals[1]
            claims = [clean_str(v) for v in claims]
            snippets = vals[3]
            snippets = [clean_str(item) for sublist in snippets for item in sublist]

            all_claims += claims
            all_snippets += snippets

    all_words = [word for v in all_claims+all_snippets for word in v.split(" ")]
    counter = Counter(all_words)
    all_words = set(all_words)
    all_words = list(set([word for word in all_words if counter[word] > min_occurrence]))
    word2idx = {word: i+2 for i, word in enumerate(all_words)} # reserve 0 for potential mask and 1 for unk token
    idx2word = {word2idx[key]: key for key in word2idx}

    num_words = len(idx2word)

    glove_embedding_matrix = np.random.random((num_words+2, 300)) - 0.5
    missed = 0
    for word in word2idx:
        if word in glove_vectors:
            glove_embedding_matrix[word2idx[word]] = glove_vectors[word]
        else:
            missed += 1

    pickle.dump([glove_embedding_matrix, word2idx, idx2word], open(savename, "wb"))
    return glove_embedding_matrix, word2idx, idx2word

def train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("model parameters", params)

    num_epochs = 0
    patience_counter = 0
    patience_max = 8
    best_f1 = -np.inf
    while (True):
        train_losses = []

        model.train()
        for ivals, vals in enumerate(train_generator):
            loss = train_step(optimizer, vals, model, criterion)
            train_losses.append(loss.item())

        num_epochs += 1
        print_message("TRAIN loss", np.mean(train_losses), num_epochs)

        if num_epochs % args.eval_per_epoch == 0:
            model.eval()
            with torch.no_grad():
                val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions = evaluate(val_generator, model)
                print_message("VALIDATION F1micro, F1macro, loss:", val_f1micro, val_f1macro, len(val_claimIDs))

            if val_f1macro > best_f1:
                with torch.no_grad():
                    test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions = evaluate(test_generator, model)
                    print_message("TEST F1micro, F1macro, loss:", test_f1micro, test_f1macro, len(test_claimIDs))

                    other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions = evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt")
                    print_message("OTHER-TEST F1micro, F1macro, loss:", other_test_f1micro, other_test_f1macro, len(other_test_claimIDs))

                    test_remove_top_bottom = []
                    test_remove_bottom_top = []
                    other_test_remove_top_bottom = []
                    other_test_remove_bottom_top = []
                    ten = np.arange(10)
                    if args.inputtype != "CLAIM_ONLY":
                        for i in tqdm(range(10)):
                            top_is = ten[:(i+1)]
                            bottom_is = ten[-(i+1):]
                            test_remove_top_bottom.append( evaluate(test_generator, model, ignore_snippet=top_is) )
                            test_remove_bottom_top.append( evaluate(test_generator, model, ignore_snippet=bottom_is) )
                            other_test_remove_top_bottom.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=top_is))
                            other_test_remove_bottom_top.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=bottom_is))

                        print_message([np.around(v[1], 4) for v in test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in test_remove_bottom_top])
                        print_message([np.around(v[1], 4) for v in other_test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in other_test_remove_bottom_top])

                patience_counter = 0
                best_f1 = val_f1macro
                val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
                test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions, test_remove_top_bottom, test_remove_bottom_top]
                other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions, other_test_remove_top_bottom, other_test_remove_bottom_top]
                misc_store = [args]
                total_store = [val_store, test_store, other_test_store, misc_store]
            else:
                patience_counter += 1

            print_message("PATIENCE", patience_counter, "/", patience_max)

            if patience_counter >= patience_max:
                pickle.dump(total_store, open(savename, "wb"))
                break

def run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator):
    print(f'***run_bert*** with inputtype {args.inputtype}')
    model = MyBertModel.from_pretrained(BERT_MODEL_PATH, labelnum=len(label_order), input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

def run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator):
    print(f'***run_lstm*** with inputtype {args.inputtype}')
    glove_embedding_matrix, word2idx, idx2word = get_embedding_matrix([train_generator, val_generator, test_generator, other_generator], args.dataset)

    model = LSTMModel(args.lstm_hidden_dim, args.lstm_layers, args.lstm_dropout, len(label_order), word2idx, glove_embedding_matrix, input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

def filter_snippet_for_bow(generator, ignore_snippet, inputtype):
    samples = []
    for vals in generator:
        claims = vals[1]
        labels = vals[2]
        snippets = vals[3]

        for i in range(len(snippets)):
            snippets[i][ignore_snippet] = "filler"

        for i in range(len(claims)):
            if inputtype == CLAIM_AND_EVIDENCE:
                sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
            elif inputtype == CLAIM_ONLY:
                sample = clean_str(claims[i])
            elif inputtype == EVIDENCE_ONLY:
                sample = " ".join([clean_str(v) for v in snippets[i]])
            else:
                raise Exception("Unknown type", inputtype)
            samples.append(sample)
    return samples

def get_bows_labels(generators, dataset, inputtype):
    all_samples = []
    all_labels = []

    for gen in generators:
        gen_samples = []
        gen_labels = []
        for vals in gen:
            claims = vals[1]
            labels = vals[2]
            snippets = vals[3]

            for i in range(len(claims)):
                if inputtype == CLAIM_AND_EVIDENCE:
                    sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
                elif inputtype == CLAIM_ONLY:
                    sample = clean_str(claims[i])
                elif inputtype == EVIDENCE_ONLY:
                    sample = " ".join([clean_str(v) for v in snippets[i]])
                else:
                    raise Exception("Unknown type", inputtype)
                gen_samples.append(sample)
                gen_labels.append(labels[i])

        all_samples.append(gen_samples)
        all_labels.append(gen_labels)

    test_remove_top_bottom = []
    test_remove_bottom_top = []
    other_test_remove_top_bottom = []
    other_test_remove_bottom_top = []
    ten = np.arange(10)
    for i in tqdm(range(10)):
        top_is = ten[:(i + 1)]
        bottom_is = ten[-(i + 1):]
        test_remove_top_bottom.append( filter_snippet_for_bow(generators[-2], top_is, inputtype) )
        test_remove_bottom_top.append( filter_snippet_for_bow(generators[-2], bottom_is, inputtype) )
        other_test_remove_top_bottom.append( filter_snippet_for_bow(generators[-1], top_is, inputtype) )
        other_test_remove_bottom_top.append( filter_snippet_for_bow(generators[-1], bottom_is, inputtype) )

    vectorizer = TfidfVectorizer(min_df=2)
    vectorizer.fit([item for sublist in all_samples for item in sublist])

    bows = [vectorizer.transform(all_samples[i]) for i in range(len(all_samples))]

    test_remove_top_bottom = [vectorizer.transform(test_remove_top_bottom[i]) for i in range(len(test_remove_top_bottom))]
    test_remove_bottom_top = [vectorizer.transform(test_remove_bottom_top[i]) for i in range(len(test_remove_bottom_top))]
    other_test_remove_top_bottom = [vectorizer.transform(other_test_remove_top_bottom[i]) for i in range(len(other_test_remove_top_bottom))]
    other_test_remove_bottom_top = [vectorizer.transform(other_test_remove_bottom_top[i]) for i in range(len(other_test_remove_bottom_top))]

    return bows, all_labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top

def run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator):
    # print(f'train_generator0 :\n {next(iter(train_generator))}')
    print(f'***run_bow*** with inputtype {args.inputtype}')

    bows, labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top = get_bows_labels([train_generator, val_generator, test_generator, other_test_generator], args.dataset, inputtype)

    train_bow, val_bow, test_bow, other_test_bow = bows[0], bows[1], bows[2], bows[3]
    train_labels, val_labels, test_labels, other_test_labels = labels[0], labels[1], labels[2], labels[3]

    label_weights = label_weights.numpy()
    weights = {i: label_weights[i] for i in range(len(label_weights))}

    # print(f'****** run bow train_bow \n {train_bow}')
    # print('*********')

    param_grid = [
        {'n_estimators': [100, 500, 1000], 'min_samples_leaf': [1, 3, 5, 10], 'min_samples_split': [2, 5, 10]}
    ]

    opt = GridSearch(model=RandomForestClassifier(n_jobs=5, class_weight=weights), param_grid=param_grid, parallelize=False)

    
    opt.fit(train_bow, train_labels, val_bow, val_labels, scoring="f1_macro")

    def rf_eval(model, bow, labels, other_from=None):
        preds = model.predict(bow)

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            preds[preds == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        f1_macro = f1_score(labels, preds, average="macro")
        f1_micro = f1_score(labels, preds, average="micro")
        return f1_micro, f1_macro, labels, preds

    # val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
    # test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions,test_remove_top_bottom, test_remove_bottom_top]
    # other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits,
    #                     other_test_labels, other_test_predictions, other_test_remove_top_bottom,
    #                     other_test_remove_bottom_top]
    #misc_store = [args]


    val_store = rf_eval(opt, val_bow, val_labels)
    test_store = list(rf_eval(opt, test_bow, test_labels)) + [[rf_eval(opt, test_remove_top_bottom[i], test_labels) for i in range(10)],
                                                       [rf_eval(opt, test_remove_bottom_top[i], test_labels) for i in range(10)]]
    other_test_store = list(rf_eval(opt, other_test_bow, other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt")) + [[rf_eval(opt, other_test_remove_top_bottom[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)],
                                                       [rf_eval(opt, other_test_remove_bottom_top[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)]]
    misc_store = [opt.get_best_params()]
    total_store = [val_store, test_store, other_test_store, misc_store]

    print_message("VALIDATION", val_store[0], val_store[1])
    print_message("TEST", test_store[0], test_store[1])
    print_message("OTHER-TEST", other_test_store[0], other_test_store[1])

    print_message([np.around(v[1], 4) for v in test_store[-2]])
    print_message([np.around(v[1], 4) for v in test_store[-1]])
    print_message([np.around(v[1], 4) for v in other_test_store[-2]])
    print_message([np.around(v[1], 4) for v in other_test_store[-1]])
    print(misc_store)

    pickle.dump(total_store, open(savename, "wb"))

def filter_websites(snippets_data):
    bad_websites = ["factcheck.org", "politifact.com", "snopes.com", "fullfact.org", "factscan.ca"]
    ids = snippets_data.values[:, 0]
    remove_count = 0
    for i, id in enumerate(ids):
        with open("../../multi_fc_publicdata/snippets/" + id, "r", encoding="utf-8") as f:
            lines = f.readlines()

        links = [line.strip().split("\t")[-1] for line in lines]
        remove = [False for _ in range(10)]
        for j in range(len(links)):
            remove[j] = any([bad in links[j] for bad in bad_websites])
        remove = remove[:10]  # 1 data sample has 11 links by mistake in the dataset
        snippets_data.iloc[i, [False] + remove] = "filler"

        remove_count += np.sum(remove)
    print_message("REMOVE COUNT", remove_count)
    return snippets_data



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Error loading SnowballStemmer: Package 'SnowballStemmer'
[nltk_data]     not found in index
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [8]:
%%time

import gc

gc.collect()
class vars():
    def __init__(self, mode, inputtype):
        if mode == "bow":
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bow"
            self.batchsize = 2
            self.eval_per_epoch = 1
            self.lr = 0.0001
        elif mode == 'lstm':
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "lstm"
            self.batchsize = 16
            self.eval_per_epoch = 1
            self.lr = 0.0001
            self.lstm_hidden_dim = 128
            self.lstm_layers = 2
            self.lstm_dropout = 0.1
        elif mode == 'bert':
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bert"
            self.batchsize = 4
            self.eval_per_epoch = 1
            self.lr = 0.000003            

filepath = 'sorted.uk.word.unigrams'  
word_freq = {}  
count = 0
with open(filepath, encoding= 'utf-8') as f:
    for line in f:
        line = line.rstrip()
        if line:
            x = line.split('\t')
            #print(x)
            #print(key, val)
            #print(str(x[1]))
            word_freq[x[1]] = str(x[0])
        count +=1
        if count > 100000:
            break

steps = [['stem', 'pos', 'stop', 'neg']]

for step in steps:
  for mode in ['bert']:
    #for inputtype in ['CLAIM_ONLY', 'CLAIM_AND_EVIDENCE', 'EVIDENCE_ONLY']:
    # for inputtype in ['CLAIM_ONLY']:
    # for inputtype in ['CLAIM_AND_EVIDENCE']:
    for inputtype in ['EVIDENCE_ONLY']:

      stepstr = "-".join([s for s in step])
              
      args = vars(mode, inputtype)

      if args.filter_websites > 0.5:
          savename = "results/" + "-".join([str(v) for v in [args.filter_websites, args.model, args.dataset, args.inputtype, args.lr, args.batchsize, stepstr]])
      else:
          savename = "results/" + "-".join([str(v) for v in [args.model, args.dataset, args.inputtype, args.lr, args.batchsize, stepstr]])

      if args.model == "lstm":
          savename += "-" + "-".join([str(v) for v in [args.lstm_hidden_dim, args.lstm_layers, args.lstm_dropout, stepstr]])
      savename += ".pkl"

      inputtype = INPUT_TYPE_ORDER.index(args.inputtype)
      main_data, snippets_data, label_order, splits = load_data(args.dataset, step)

      if args.filter_websites > 0.5:
          snippets_data = filter_websites(snippets_data)

      params = {"batch_size": args.batchsize, "shuffle": True, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}
      eval_params = {"batch_size": args.batchsize, "shuffle": False, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}

      train_generator, val_generator, test_generator, label_weights = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params])

      if args.dataset == "snes":
          main_data, snippets_data, _, splits = load_data("pomt", step)
          if args.filter_websites > 0.5:
              snippets_data = filter_websites(snippets_data)
          main_data.iloc[main_data.iloc[:, 2] == "pants on fire!", 2] = "false"
          main_data.iloc[main_data.iloc[:, 2] == "half-true", 2] = "mixture"
          _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
      else:
          main_data, snippets_data, _, splits = load_data("snes", step)
          if args.filter_websites > 0.5:
              snippets_data = filter_websites(snippets_data)
          main_data.iloc[main_data.iloc[:, 2] == "mixture", 2] = "half-true"
          _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)


      if args.model == "bert":
          run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
      elif args.model == "lstm":
          run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
      elif args.model == "bow":
          # print("run bow")
          run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)

      gc.collect()




len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype EVIDENCE_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.bias', 'attn_s

model parameters 109486854
[Nov 29, 12:13:44] TRAIN loss 1.525184027640634 1
[Nov 29, 12:13:49] VALIDATION F1micro, F1macro, loss: 0.48717948717948717 0.19488376349990846 507
[Nov 29, 12:13:59] TEST F1micro, F1macro, loss: 0.46942800788954636 0.18848821236724253 1014
[Nov 29, 12:14:25] OTHER-TEST F1micro, F1macro, loss: 0.24254692675745307 0.12717274281923824 2717


100%|██████████| 10/10 [10:34<00:00, 63.41s/it]

[Nov 29, 12:24:59] [0.1787, 0.1691, 0.1634, 0.1443, 0.1199, 0.082, 0.0491, 0.0445, 0.0439, 0.0439]
[Nov 29, 12:24:59] [0.1882, 0.1776, 0.1704, 0.1542, 0.1379, 0.1212, 0.0906, 0.0535, 0.0439, 0.0439]
[Nov 29, 12:24:59] [0.1212, 0.1101, 0.0996, 0.0875, 0.0778, 0.0739, 0.0695, 0.066, 0.066, 0.066]
[Nov 29, 12:24:59] [0.1262, 0.1171, 0.1084, 0.0971, 0.0867, 0.0807, 0.0722, 0.0676, 0.066, 0.066]
[Nov 29, 12:24:59] PATIENCE 0 / 8





[Nov 29, 12:26:48] TRAIN loss 1.4669952828021302 2
[Nov 29, 12:26:53] VALIDATION F1micro, F1macro, loss: 0.591715976331361 0.2284588680333361 507
[Nov 29, 12:27:03] TEST F1micro, F1macro, loss: 0.596646942800789 0.23243552821174066 1014
[Nov 29, 12:27:29] OTHER-TEST F1micro, F1macro, loss: 0.27456753772543246 0.1737652634747731 2717


100%|██████████| 10/10 [10:34<00:00, 63.45s/it]

[Nov 29, 12:38:04] [0.2291, 0.2239, 0.2232, 0.2196, 0.2085, 0.1948, 0.1755, 0.1312, 0.0484, 0.0439]
[Nov 29, 12:38:04] [0.2324, 0.2253, 0.2347, 0.234, 0.2251, 0.2237, 0.2093, 0.1844, 0.1292, 0.0439]
[Nov 29, 12:38:04] [0.1711, 0.1685, 0.1604, 0.1532, 0.1435, 0.133, 0.1172, 0.094, 0.0733, 0.066]
[Nov 29, 12:38:04] [0.1725, 0.1738, 0.1685, 0.163, 0.1548, 0.1466, 0.1339, 0.1141, 0.0881, 0.066]
[Nov 29, 12:38:04] PATIENCE 0 / 8





[Nov 29, 12:39:52] TRAIN loss 1.410795281341944 3
[Nov 29, 12:39:57] VALIDATION F1micro, F1macro, loss: 0.5108481262327417 0.20291123914834447 507
[Nov 29, 12:39:57] PATIENCE 1 / 8
[Nov 29, 12:41:46] TRAIN loss 1.3231443412677546 4
[Nov 29, 12:41:51] VALIDATION F1micro, F1macro, loss: 0.5404339250493096 0.25852522597082256 507
[Nov 29, 12:42:01] TEST F1micro, F1macro, loss: 0.5631163708086785 0.26985804256013124 1014
[Nov 29, 12:42:27] OTHER-TEST F1micro, F1macro, loss: 0.2550607287449393 0.1960202424063076 2717


100%|██████████| 10/10 [10:33<00:00, 63.37s/it]

[Nov 29, 12:53:01] [0.262, 0.2649, 0.2477, 0.26, 0.2566, 0.2517, 0.236, 0.1995, 0.0484, 0.0439]
[Nov 29, 12:53:01] [0.2699, 0.2726, 0.2614, 0.2662, 0.2686, 0.2635, 0.2647, 0.2415, 0.1865, 0.0439]
[Nov 29, 12:53:01] [0.1978, 0.1988, 0.1951, 0.1895, 0.1785, 0.1764, 0.1521, 0.1272, 0.0873, 0.066]
[Nov 29, 12:53:01] [0.1979, 0.1989, 0.1936, 0.1929, 0.1912, 0.1885, 0.1718, 0.1493, 0.1182, 0.066]
[Nov 29, 12:53:01] PATIENCE 0 / 8





[Nov 29, 12:54:50] TRAIN loss 1.248160880424393 5
[Nov 29, 12:54:55] VALIDATION F1micro, F1macro, loss: 0.5641025641025641 0.2607790532318834 507
[Nov 29, 12:55:05] TEST F1micro, F1macro, loss: 0.5936883629191322 0.29133166421022766 1014
[Nov 29, 12:55:31] OTHER-TEST F1micro, F1macro, loss: 0.2664703717335296 0.19867880437109753 2717


100%|██████████| 10/10 [10:33<00:00, 63.34s/it]

[Nov 29, 13:06:04] [0.2873, 0.2802, 0.2619, 0.2589, 0.2657, 0.2759, 0.2648, 0.2503, 0.0549, 0.0439]
[Nov 29, 13:06:04] [0.2913, 0.2877, 0.2629, 0.2743, 0.2785, 0.2787, 0.2752, 0.2785, 0.2263, 0.0439]
[Nov 29, 13:06:04] [0.1996, 0.1987, 0.2001, 0.1987, 0.1926, 0.1942, 0.1859, 0.159, 0.097, 0.066]
[Nov 29, 13:06:04] [0.2012, 0.1991, 0.1969, 0.2, 0.1942, 0.196, 0.1943, 0.1826, 0.1437, 0.066]
[Nov 29, 13:06:04] PATIENCE 0 / 8





[Nov 29, 13:07:53] TRAIN loss 1.1144613205479392 6
[Nov 29, 13:07:58] VALIDATION F1micro, F1macro, loss: 0.5562130177514792 0.2684256339065463 507
[Nov 29, 13:08:08] TEST F1micro, F1macro, loss: 0.5808678500986193 0.2687710755180495 1014
[Nov 29, 13:08:34] OTHER-TEST F1micro, F1macro, loss: 0.2631578947368421 0.1875575959060515 2717


100%|██████████| 10/10 [10:35<00:00, 63.55s/it]

[Nov 29, 13:19:10] [0.2648, 0.253, 0.2413, 0.2431, 0.2548, 0.2704, 0.2804, 0.2458, 0.0726, 0.0439]
[Nov 29, 13:19:10] [0.2684, 0.2665, 0.2593, 0.2716, 0.2812, 0.2915, 0.2935, 0.2828, 0.2381, 0.0439]
[Nov 29, 13:19:10] [0.1911, 0.1919, 0.1907, 0.1961, 0.1896, 0.1867, 0.1792, 0.1688, 0.107, 0.066]
[Nov 29, 13:19:10] [0.1923, 0.1916, 0.1874, 0.1871, 0.1897, 0.1998, 0.193, 0.1832, 0.1612, 0.066]
[Nov 29, 13:19:10] PATIENCE 0 / 8





[Nov 29, 13:20:59] TRAIN loss 0.999379268913554 7
[Nov 29, 13:21:04] VALIDATION F1micro, F1macro, loss: 0.5857988165680473 0.26237072792555305 507
[Nov 29, 13:21:04] PATIENCE 1 / 8
[Nov 29, 13:22:52] TRAIN loss 0.8578450216364699 8
[Nov 29, 13:22:57] VALIDATION F1micro, F1macro, loss: 0.5581854043392505 0.2554643548644079 507
[Nov 29, 13:22:57] PATIENCE 2 / 8
[Nov 29, 13:24:46] TRAIN loss 0.7322302000196032 9
[Nov 29, 13:24:51] VALIDATION F1micro, F1macro, loss: 0.48717948717948717 0.24902959888866275 507
[Nov 29, 13:24:51] PATIENCE 3 / 8
[Nov 29, 13:26:40] TRAIN loss 0.5927860680484583 10
[Nov 29, 13:26:45] VALIDATION F1micro, F1macro, loss: 0.5207100591715976 0.25222317911973086 507
[Nov 29, 13:26:45] PATIENCE 4 / 8
[Nov 29, 13:28:34] TRAIN loss 0.4862877436343417 11
[Nov 29, 13:28:39] VALIDATION F1micro, F1macro, loss: 0.5621301775147929 0.2833220679981975 507
[Nov 29, 13:28:49] TEST F1micro, F1macro, loss: 0.5571992110453649 0.3000771991900803 1014
[Nov 29, 13:29:15] OTHER-TEST F1m

100%|██████████| 10/10 [10:33<00:00, 63.39s/it]

[Nov 29, 13:39:49] [0.28, 0.2771, 0.2726, 0.2703, 0.2763, 0.2704, 0.2612, 0.2207, 0.0548, 0.0279]
[Nov 29, 13:39:49] [0.3015, 0.2989, 0.2829, 0.2768, 0.2954, 0.2809, 0.2901, 0.2688, 0.2316, 0.0279]
[Nov 29, 13:39:49] [0.225, 0.2231, 0.2185, 0.2215, 0.224, 0.2251, 0.2077, 0.1942, 0.121, 0.058]
[Nov 29, 13:39:49] [0.2244, 0.2261, 0.2239, 0.2256, 0.2172, 0.2301, 0.2212, 0.2055, 0.1804, 0.058]
[Nov 29, 13:39:49] PATIENCE 0 / 8





[Nov 29, 13:41:38] TRAIN loss 0.3869762930288289 12
[Nov 29, 13:41:43] VALIDATION F1micro, F1macro, loss: 0.5759368836291914 0.2437976830632525 507
[Nov 29, 13:41:43] PATIENCE 1 / 8
[Nov 29, 13:43:31] TRAIN loss 0.3331876905002918 13
[Nov 29, 13:43:36] VALIDATION F1micro, F1macro, loss: 0.5601577909270217 0.26095126397265594 507
[Nov 29, 13:43:36] PATIENCE 2 / 8
[Nov 29, 13:45:25] TRAIN loss 0.262589584470106 14
[Nov 29, 13:45:30] VALIDATION F1micro, F1macro, loss: 0.5581854043392505 0.25877681723066515 507
[Nov 29, 13:45:30] PATIENCE 3 / 8
[Nov 29, 13:47:19] TRAIN loss 0.21981283171803784 15
[Nov 29, 13:47:24] VALIDATION F1micro, F1macro, loss: 0.52465483234714 0.25520310842845484 507
[Nov 29, 13:47:24] PATIENCE 4 / 8
[Nov 29, 13:49:12] TRAIN loss 0.174221724281224 16
[Nov 29, 13:49:17] VALIDATION F1micro, F1macro, loss: 0.4990138067061144 0.2415838269819905 507
[Nov 29, 13:49:17] PATIENCE 5 / 8
[Nov 29, 13:51:06] TRAIN loss 0.13400202453044047 17
[Nov 29, 13:51:11] VALIDATION F1micro