In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Thu Nov 11 00:59:49 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   60C    P0    33W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import sys
import os
os.chdir('/content/drive/MyDrive/CS105BProject/bias' )
sys.path.append('/content/drive/MyDrive/CS105BProject')
sys.path.append(os.getcwd())

In [6]:
!pip install transformers
!pip install pytorch-nlp
# !pip install hypopt



In [7]:
import sys
import os
# sys.path.append('../../code-acl')
# sys.path.append(os.getcwd())
sys.path.append('/content/drive/MyDrive/CS105BProject/bias/')
os.environ['OMP_NUM_THREADS'] = "1"
import argparse
import pandas as pd
import pickle
from model.generator import TransformerDataset, transformer_collate
from model.bertmodel import MyBertModel
from model.lstmmodel import LSTMModel
import torch
from parameters import BERT_MODEL_PATH, CLAIM_ONLY, CLAIM_AND_EVIDENCE, EVIDENCE_ONLY, DEVICE, INPUT_TYPE_ORDER
from transformers import AdamW
import numpy as np
from utils.utils import print_message, clean_str
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
from collections import Counter
from torchnlp.word_to_vector import GloVe
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
#from hypopt import GridSearch
from model_selection import GridSearch
from tqdm import tqdm

def load_data(dataset):
    #path = "../../multi_fc_publicdata/" + dataset + "/"

    path = "../multi_fc_publicdata/" + dataset + "/"

    main_data = pd.read_csv(path + dataset + ".tsv", sep="\t", header=None)
    snippets_data = pd.read_csv(path + dataset + "_snippets.tsv", sep="\t", header=None)
    label_order = pickle.load(open(path + dataset + "_labels.pkl", "rb"))
    splits = pickle.load(open(path + dataset + "_index_split.pkl", "rb"))

    return main_data, snippets_data, label_order, splits

def make_generators(main_data, snippets_data, label_order, splits, params, dataset_generator=TransformerDataset, other_dataset=False):
    generators = []

    all_labels = main_data.values[:,2]
    counter = Counter(all_labels)
    ss = ""
    for c in label_order:
        ss = ss + ", " + str(c) + " (" + str(np.around(counter[c]/len(all_labels) * 100,1)) + "\%)"
        #print(c, np.around(counter[c]/len(all_labels) * 100,1), "%", counter[c])
    print("len", len(all_labels), ss)

    for isplit, split in enumerate(splits):
        # print(f'isplit {isplit}')
        sub_main_data = main_data.values[split]
        # print(f'len sub_main_data: {len(sub_main_data)}')
        
        sub_snippets_data = snippets_data.values[split]
        # print(f'len sub_snippets_data: {len(sub_snippets_data)}')

        

        tmp = dataset_generator(sub_main_data, sub_snippets_data, label_order)
        if isplit == 0:
            generator = torch.utils.data.DataLoader(tmp, **params[0])
        else:
            generator = torch.utils.data.DataLoader(tmp, **params[1])

        generators.append(generator)

        # print(sub_main_data)
        # print(sub_snippets_data)
        # print(f'tmp: \n {tmp[0]}')
        # gen0 = next(iter(generator))
        # print(f'gen0: \n {gen0}')


    # make class weights
    labels = main_data.values[splits[0]][:,2]
    labels = np.array([label_order.index(v) for v in labels])


    if not other_dataset:
        label_weights = torch.tensor(compute_class_weight("balanced", classes=np.arange(len(label_order)), y=labels).astype(np.float32))
    else:
        label_weights = None

    return generators[0], generators[1], generators[2], label_weights

def evaluate(generator, model, other_from=None, ignore_snippet=None):
    all_labels = []
    all_predictions = []

    all_claimIDs = []
    all_logits = []

    for vals in generator:
        claimIDs, claims, labels, snippets = vals[0], vals[1], vals[2], vals[3]

        if ignore_snippet is not None:
            for i in range(len(snippets)):
                snippets[i][ignore_snippet] = "filler"

        all_labels += labels
        logits = model(claims, snippets)

        predictions = torch.argmax(logits, 1).cpu().numpy()

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            predictions[predictions == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        all_predictions += predictions.tolist()

        all_claimIDs += claimIDs
        all_logits += logits.cpu().numpy().tolist()

    f1_micro = f1_score(all_labels, all_predictions, average="micro")
    f1_macro = f1_score(all_labels, all_predictions, average="macro")

    return f1_micro, f1_macro, all_claimIDs, all_logits, all_labels, all_predictions

def train_step(optimizer, vals, model, criterion):
    optimizer.zero_grad()

    claimIDs, claims, labels, snippets = vals[0], vals[1], torch.tensor(vals[2]).to(DEVICE), vals[3]

    logits = model(claims, snippets)
    loss = criterion(logits, labels)

    loss.backward()
    optimizer.step()

    return loss


def get_embedding_matrix(generators, dataset, min_occurrence=1):
    savename = "preprocessed/" + dataset + "_glove.pkl"
    if os.path.exists(savename):
        tmp = pickle.load(open(savename, "rb"))
        glove_embedding_matrix = tmp[0]
        word2idx = tmp[1]
        idx2word = tmp[2]
        return glove_embedding_matrix, word2idx, idx2word

    glove_vectors = GloVe('840B')
    all_claims = []
    all_snippets = []
    for gen in generators:
        for vals in gen:
            claims = vals[1]
            claims = [clean_str(v) for v in claims]
            snippets = vals[3]
            snippets = [clean_str(item) for sublist in snippets for item in sublist]

            all_claims += claims
            all_snippets += snippets

    all_words = [word for v in all_claims+all_snippets for word in v.split(" ")]
    counter = Counter(all_words)
    all_words = set(all_words)
    all_words = list(set([word for word in all_words if counter[word] > min_occurrence]))
    word2idx = {word: i+2 for i, word in enumerate(all_words)} # reserve 0 for potential mask and 1 for unk token
    idx2word = {word2idx[key]: key for key in word2idx}

    num_words = len(idx2word)

    glove_embedding_matrix = np.random.random((num_words+2, 300)) - 0.5
    missed = 0
    for word in word2idx:
        if word in glove_vectors:
            glove_embedding_matrix[word2idx[word]] = glove_vectors[word]
        else:
            missed += 1

    pickle.dump([glove_embedding_matrix, word2idx, idx2word], open(savename, "wb"))
    return glove_embedding_matrix, word2idx, idx2word

def train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("model parameters", params)

    num_epochs = 0
    patience_counter = 0
    patience_max = 10
    best_f1 = -np.inf
    while (True):
        train_losses = []

        model.train()
        for ivals, vals in enumerate(train_generator):
            loss = train_step(optimizer, vals, model, criterion)
            train_losses.append(loss.item())

        num_epochs += 1
        print_message("TRAIN loss", np.mean(train_losses), num_epochs)

        if num_epochs % args.eval_per_epoch == 0:
            model.eval()
            with torch.no_grad():
                val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions = evaluate(val_generator, model)
                print_message("VALIDATION F1micro, F1macro, loss:", val_f1micro, val_f1macro, len(val_claimIDs))

            if val_f1macro > best_f1:
                with torch.no_grad():
                    test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions = evaluate(test_generator, model)
                    print_message("TEST F1micro, F1macro, loss:", test_f1micro, test_f1macro, len(test_claimIDs))

                    other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions = evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt")
                    print_message("OTHER-TEST F1micro, F1macro, loss:", other_test_f1micro, other_test_f1macro, len(other_test_claimIDs))

                    test_remove_top_bottom = []
                    test_remove_bottom_top = []
                    other_test_remove_top_bottom = []
                    other_test_remove_bottom_top = []
                    ten = np.arange(10)
                    if args.inputtype != "CLAIM_ONLY":
                        for i in tqdm(range(10)):
                            top_is = ten[:(i+1)]
                            bottom_is = ten[-(i+1):]
                            test_remove_top_bottom.append( evaluate(test_generator, model, ignore_snippet=top_is) )
                            test_remove_bottom_top.append( evaluate(test_generator, model, ignore_snippet=bottom_is) )
                            other_test_remove_top_bottom.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=top_is))
                            other_test_remove_bottom_top.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=bottom_is))

                        print_message([np.around(v[1], 4) for v in test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in test_remove_bottom_top])
                        print_message([np.around(v[1], 4) for v in other_test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in other_test_remove_bottom_top])

                patience_counter = 0
                best_f1 = val_f1macro
                val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
                test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions, test_remove_top_bottom, test_remove_bottom_top]
                other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions, other_test_remove_top_bottom, other_test_remove_bottom_top]
                misc_store = [args]
                total_store = [val_store, test_store, other_test_store, misc_store]
            else:
                patience_counter += 1

            print_message("PATIENCE", patience_counter, "/", patience_max)

            if patience_counter >= patience_max:
                pickle.dump(total_store, open(savename, "wb"))
                break

def run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator):
    print(f'***run_bert*** with inputtype {args.inputtype}')
    model = MyBertModel.from_pretrained(BERT_MODEL_PATH, labelnum=len(label_order), input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

def run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator):
    print(f'***run_lstm*** with inputtype {args.inputtype}')
    glove_embedding_matrix, word2idx, idx2word = get_embedding_matrix([train_generator, val_generator, test_generator, other_generator], args.dataset)

    model = LSTMModel(args.lstm_hidden_dim, args.lstm_layers, args.lstm_dropout, len(label_order), word2idx, glove_embedding_matrix, input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

def filter_snippet_for_bow(generator, ignore_snippet, inputtype):
    samples = []
    for vals in generator:
        claims = vals[1]
        labels = vals[2]
        snippets = vals[3]

        for i in range(len(snippets)):
            snippets[i][ignore_snippet] = "filler"

        for i in range(len(claims)):
            if inputtype == CLAIM_AND_EVIDENCE:
                sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
            elif inputtype == CLAIM_ONLY:
                sample = clean_str(claims[i])
            elif inputtype == EVIDENCE_ONLY:
                sample = " ".join([clean_str(v) for v in snippets[i]])
            else:
                raise Exception("Unknown type", inputtype)
            samples.append(sample)
    return samples

def get_bows_labels(generators, dataset, inputtype):
    all_samples = []
    all_labels = []

    for gen in generators:
        gen_samples = []
        gen_labels = []
        for vals in gen:
            claims = vals[1]
            labels = vals[2]
            snippets = vals[3]

            for i in range(len(claims)):
                if inputtype == CLAIM_AND_EVIDENCE:
                    sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
                elif inputtype == CLAIM_ONLY:
                    sample = clean_str(claims[i])
                elif inputtype == EVIDENCE_ONLY:
                    sample = " ".join([clean_str(v) for v in snippets[i]])
                else:
                    raise Exception("Unknown type", inputtype)
                gen_samples.append(sample)
                gen_labels.append(labels[i])

        all_samples.append(gen_samples)
        all_labels.append(gen_labels)

    test_remove_top_bottom = []
    test_remove_bottom_top = []
    other_test_remove_top_bottom = []
    other_test_remove_bottom_top = []
    ten = np.arange(10)
    for i in tqdm(range(10)):
        top_is = ten[:(i + 1)]
        bottom_is = ten[-(i + 1):]
        test_remove_top_bottom.append( filter_snippet_for_bow(generators[-2], top_is, inputtype) )
        test_remove_bottom_top.append( filter_snippet_for_bow(generators[-2], bottom_is, inputtype) )
        other_test_remove_top_bottom.append( filter_snippet_for_bow(generators[-1], top_is, inputtype) )
        other_test_remove_bottom_top.append( filter_snippet_for_bow(generators[-1], bottom_is, inputtype) )

    vectorizer = TfidfVectorizer(min_df=2)
    vectorizer.fit([item for sublist in all_samples for item in sublist])

    bows = [vectorizer.transform(all_samples[i]) for i in range(len(all_samples))]

    test_remove_top_bottom = [vectorizer.transform(test_remove_top_bottom[i]) for i in range(len(test_remove_top_bottom))]
    test_remove_bottom_top = [vectorizer.transform(test_remove_bottom_top[i]) for i in range(len(test_remove_bottom_top))]
    other_test_remove_top_bottom = [vectorizer.transform(other_test_remove_top_bottom[i]) for i in range(len(other_test_remove_top_bottom))]
    other_test_remove_bottom_top = [vectorizer.transform(other_test_remove_bottom_top[i]) for i in range(len(other_test_remove_bottom_top))]

    return bows, all_labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top

def run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator):
    # print(f'train_generator0 :\n {next(iter(train_generator))}')
    print(f'***run_bow*** with inputtype {args.inputtype}')

    bows, labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top = get_bows_labels([train_generator, val_generator, test_generator, other_test_generator], args.dataset, inputtype)

    train_bow, val_bow, test_bow, other_test_bow = bows[0], bows[1], bows[2], bows[3]
    train_labels, val_labels, test_labels, other_test_labels = labels[0], labels[1], labels[2], labels[3]

    label_weights = label_weights.numpy()
    weights = {i: label_weights[i] for i in range(len(label_weights))}

    # print(f'****** run bow train_bow \n {train_bow}')
    # print('*********')

    param_grid = [
        {'n_estimators': [100, 500, 1000], 'min_samples_leaf': [1, 3, 5, 10], 'min_samples_split': [2, 5, 10]}
    ]

    opt = GridSearch(model=RandomForestClassifier(n_jobs=5, class_weight=weights), param_grid=param_grid, parallelize=False)

    
    opt.fit(train_bow, train_labels, val_bow, val_labels, scoring="f1_macro")

    def rf_eval(model, bow, labels, other_from=None):
        preds = model.predict(bow)

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            preds[preds == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        f1_macro = f1_score(labels, preds, average="macro")
        f1_micro = f1_score(labels, preds, average="micro")
        return f1_micro, f1_macro, labels, preds

    # val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
    # test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions,test_remove_top_bottom, test_remove_bottom_top]
    # other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits,
    #                     other_test_labels, other_test_predictions, other_test_remove_top_bottom,
    #                     other_test_remove_bottom_top]
    #misc_store = [args]


    val_store = rf_eval(opt, val_bow, val_labels)
    test_store = list(rf_eval(opt, test_bow, test_labels)) + [[rf_eval(opt, test_remove_top_bottom[i], test_labels) for i in range(10)],
                                                       [rf_eval(opt, test_remove_bottom_top[i], test_labels) for i in range(10)]]
    other_test_store = list(rf_eval(opt, other_test_bow, other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt")) + [[rf_eval(opt, other_test_remove_top_bottom[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)],
                                                       [rf_eval(opt, other_test_remove_bottom_top[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)]]
    misc_store = [opt.get_best_params()]
    total_store = [val_store, test_store, other_test_store, misc_store]

    print_message("VALIDATION", val_store[0], val_store[1])
    print_message("TEST", test_store[0], test_store[1])
    print_message("OTHER-TEST", other_test_store[0], other_test_store[1])

    print_message([np.around(v[1], 4) for v in test_store[-2]])
    print_message([np.around(v[1], 4) for v in test_store[-1]])
    print_message([np.around(v[1], 4) for v in other_test_store[-2]])
    print_message([np.around(v[1], 4) for v in other_test_store[-1]])
    print(misc_store)

    pickle.dump(total_store, open(savename, "wb"))

def filter_websites(snippets_data):
    bad_websites = ["factcheck.org", "politifact.com", "snopes.com", "fullfact.org", "factscan.ca"]
    ids = snippets_data.values[:, 0]
    remove_count = 0
    for i, id in enumerate(ids):
        with open("../../multi_fc_publicdata/snippets/" + id, "r", encoding="utf-8") as f:
            lines = f.readlines()

        links = [line.strip().split("\t")[-1] for line in lines]
        remove = [False for _ in range(10)]
        for j in range(len(links)):
            remove[j] = any([bad in links[j] for bad in bad_websites])
        remove = remove[:10]  # 1 data sample has 11 links by mistake in the dataset
        snippets_data.iloc[i, [False] + remove] = "filler"

        remove_count += np.sum(remove)
    print_message("REMOVE COUNT", remove_count)
    return snippets_data



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Error loading SnowballStemmer: Package 'SnowballStemmer'
[nltk_data]     not found in index
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [None]:
%%time

import gc

gc.collect()
class vars():
    def __init__(self, mode, inputtype):
        if mode == "bow":
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bow"
            self.batchsize = 2
            self.eval_per_epoch = 1
            self.lr = 0.0001
        elif mode == 'lstm':
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "lstm"
            self.batchsize = 16
            self.eval_per_epoch = 1
            self.lr = 0.0001
            self.lstm_hidden_dim = 128
            self.lstm_layers = 2
            self.lstm_dropout = 0.1
        elif mode == 'bert':
            self.dataset = "snes"
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bert"
            self.batchsize = 4
            self.eval_per_epoch = 1
            self.lr = 0.000003            


# for mode in ['bow']:
for mode in ['lstm']:
# for mode in ['bert']:
  # for inputtype in ['CLAIM_ONLY', 'EVIDENCE_ONLY', 'CLAIM_AND_EVIDENCE']:
  for inputtype in ['CLAIM_AND_EVIDENCE']:
  # for inputtype in ['EVIDENCE_ONLY']:


            
    args = vars(mode, inputtype)

    if args.filter_websites > 0.5:
        savename = "results/" + "-".join([str(v) for v in [args.filter_websites, args.model, args.dataset, args.inputtype, args.lr, args.batchsize]])
    else:
        savename = "results/" + "-".join([str(v) for v in [args.model, args.dataset, args.inputtype, args.lr, args.batchsize]])

    if args.model == "lstm":
        savename += "-" + "-".join([str(v) for v in [args.lstm_hidden_dim, args.lstm_layers, args.lstm_dropout]])
    savename += ".pkl"

    inputtype = INPUT_TYPE_ORDER.index(args.inputtype)
    main_data, snippets_data, label_order, splits = load_data(args.dataset)

    if args.filter_websites > 0.5:
        snippets_data = filter_websites(snippets_data)

    params = {"batch_size": args.batchsize, "shuffle": True, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}
    eval_params = {"batch_size": args.batchsize, "shuffle": False, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}

    train_generator, val_generator, test_generator, label_weights = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params])

    if args.dataset == "snes":
        main_data, snippets_data, _, splits = load_data("pomt")
        if args.filter_websites > 0.5:
            snippets_data = filter_websites(snippets_data)
        main_data.iloc[main_data.iloc[:, 2] == "pants on fire!", 2] = "false"
        main_data.iloc[main_data.iloc[:, 2] == "half-true", 2] = "mixture"
        _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
    else:
        main_data, snippets_data, _, splits = load_data("snes")
        if args.filter_websites > 0.5:
            snippets_data = filter_websites(snippets_data)
        main_data.iloc[main_data.iloc[:, 2] == "mixture", 2] = "half-true"
        _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)


    if args.model == "bert":
        run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
    elif args.model == "lstm":
        run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
    elif args.model == "bow":
        # print("run bow")
        run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)

    gc.collect()




len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_lstm*** with inputtype CLAIM_AND_EVIDENCE
model parameters 1678089
[Nov 11, 01:01:55] TRAIN loss 1.604727430923565 1
[Nov 11, 01:02:09] VALIDATION F1micro, F1macro, loss: 0.6429980276134122 0.1565426170468187 507
[Nov 11, 01:02:38] TEST F1micro, F1macro, loss: 0.6429980276134122 0.1565426170468187 1014
[Nov 11, 01:03:55] OTHER-TEST F1micro, F1macro, loss: 0.29738682370261316 0.09168794326241134 2717


100%|██████████| 10/10 [20:29<00:00, 122.99s/it]

[Nov 11, 01:24:25] [0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565]
[Nov 11, 01:24:25] [0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565]
[Nov 11, 01:24:25] [0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917]
[Nov 11, 01:24:25] [0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917]
[Nov 11, 01:24:25] PATIENCE 0 / 10





[Nov 11, 01:26:15] TRAIN loss 1.599390602326608 2
[Nov 11, 01:26:29] VALIDATION F1micro, F1macro, loss: 0.6410256410256411 0.15625 507
[Nov 11, 01:26:29] PATIENCE 1 / 10
[Nov 11, 01:28:19] TRAIN loss 1.5968857579403095 3
[Nov 11, 01:28:33] VALIDATION F1micro, F1macro, loss: 0.6331360946745562 0.17088583787850292 507
[Nov 11, 01:29:02] TEST F1micro, F1macro, loss: 0.6400394477317555 0.18604460814405566 1014
[Nov 11, 01:30:19] OTHER-TEST F1micro, F1macro, loss: 0.2948104527051896 0.12927386296082505 2717


100%|██████████| 10/10 [20:10<00:00, 121.06s/it]

[Nov 11, 01:50:29] [0.1629, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1738, 0.1502]
[Nov 11, 01:50:29] [0.1861, 0.1629, 0.1598, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1502]
[Nov 11, 01:50:29] [0.1089, 0.0933, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.1231]
[Nov 11, 01:50:29] [0.1233, 0.096, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.1231]
[Nov 11, 01:50:29] PATIENCE 0 / 10





[Nov 11, 01:52:19] TRAIN loss 1.5912921315914876 4
[Nov 11, 01:52:33] VALIDATION F1micro, F1macro, loss: 0.6015779092702169 0.22075788986491046 507
[Nov 11, 01:53:02] TEST F1micro, F1macro, loss: 0.606508875739645 0.20343091750250938 1014
[Nov 11, 01:54:19] OTHER-TEST F1micro, F1macro, loss: 0.2741994847258005 0.1486831986369183 2717


100%|██████████| 10/10 [20:12<00:00, 121.28s/it]

[Nov 11, 02:14:32] [0.1834, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.1592, 0.0828]
[Nov 11, 02:14:32] [0.2059, 0.1846, 0.1598, 0.1598, 0.1565, 0.1565, 0.1565, 0.1565, 0.1565, 0.0828]
[Nov 11, 02:14:32] [0.1488, 0.1143, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0916, 0.1142]
[Nov 11, 02:14:32] [0.1494, 0.1327, 0.0944, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.0917, 0.1142]
[Nov 11, 02:14:32] PATIENCE 0 / 10





[Nov 11, 02:16:21] TRAIN loss 1.577500950645756 5
[Nov 11, 02:16:36] VALIDATION F1micro, F1macro, loss: 0.4714003944773176 0.23234527783214318 507
[Nov 11, 02:17:04] TEST F1micro, F1macro, loss: 0.45759368836291914 0.24888934334820254 1014
[Nov 11, 02:18:21] OTHER-TEST F1micro, F1macro, loss: 0.21825542878174456 0.16887838926413495 2717


100%|██████████| 10/10 [20:17<00:00, 121.76s/it]

[Nov 11, 02:38:39] [0.2416, 0.2302, 0.1825, 0.1598, 0.1565, 0.1565, 0.1565, 0.1565, 0.1443, 0.0368]
[Nov 11, 02:38:39] [0.2498, 0.2338, 0.2234, 0.1888, 0.1565, 0.1596, 0.1565, 0.1565, 0.1565, 0.0368]
[Nov 11, 02:38:39] [0.1929, 0.1884, 0.1479, 0.11, 0.0917, 0.0917, 0.0917, 0.0917, 0.0944, 0.1093]
[Nov 11, 02:38:39] [0.1745, 0.1881, 0.1718, 0.1218, 0.0988, 0.0931, 0.0917, 0.0917, 0.0917, 0.1093]
[Nov 11, 02:38:39] PATIENCE 0 / 10





[Nov 11, 02:40:29] TRAIN loss 1.5703933432295516 6
[Nov 11, 02:40:44] VALIDATION F1micro, F1macro, loss: 0.4911242603550296 0.20505349924889535 507
[Nov 11, 02:40:44] PATIENCE 1 / 10
[Nov 11, 02:42:33] TRAIN loss 1.5559484346492871 7
[Nov 11, 02:42:48] VALIDATION F1micro, F1macro, loss: 0.5483234714003945 0.22464950889735585 507
[Nov 11, 02:42:48] PATIENCE 2 / 10
[Nov 11, 02:44:38] TRAIN loss 1.5464481169039064 8
[Nov 11, 02:44:52] VALIDATION F1micro, F1macro, loss: 0.4536489151873767 0.2205377695664303 507
[Nov 11, 02:44:52] PATIENCE 3 / 10
[Nov 11, 02:46:42] TRAIN loss 1.5407612871479344 9
[Nov 11, 02:46:57] VALIDATION F1micro, F1macro, loss: 0.34911242603550297 0.22288144029861406 507
[Nov 11, 02:46:57] PATIENCE 4 / 10
[Nov 11, 02:48:47] TRAIN loss 1.5273479309167948 10
[Nov 11, 02:49:01] VALIDATION F1micro, F1macro, loss: 0.4358974358974359 0.2410930555512599 507
[Nov 11, 02:49:30] TEST F1micro, F1macro, loss: 0.4358974358974359 0.25904878640915024 1014
[Nov 11, 02:50:47] OTHER-TES

100%|██████████| 10/10 [20:14<00:00, 121.44s/it]

[Nov 11, 03:11:01] [0.2341, 0.1966, 0.1925, 0.1902, 0.1938, 0.1857, 0.1783, 0.1673, 0.0737, 0.0455]
[Nov 11, 03:11:01] [0.2592, 0.2189, 0.1995, 0.1847, 0.1848, 0.1924, 0.2015, 0.1777, 0.1641, 0.0455]
[Nov 11, 03:11:01] [0.1869, 0.1607, 0.1466, 0.1438, 0.139, 0.1206, 0.1045, 0.0994, 0.1106, 0.1151]
[Nov 11, 03:11:01] [0.1985, 0.1793, 0.1496, 0.1449, 0.142, 0.1325, 0.1137, 0.1006, 0.098, 0.1151]
[Nov 11, 03:11:01] PATIENCE 0 / 10





[Nov 11, 03:12:51] TRAIN loss 1.506778182210149 11
[Nov 11, 03:13:05] VALIDATION F1micro, F1macro, loss: 0.45759368836291914 0.24390419727082063 507
[Nov 11, 03:13:34] TEST F1micro, F1macro, loss: 0.45956607495069035 0.25551457329157046 1014
[Nov 11, 03:14:51] OTHER-TEST F1micro, F1macro, loss: 0.26352594773647403 0.20539099852388376 2717


100%|██████████| 10/10 [20:16<00:00, 121.64s/it]

[Nov 11, 03:35:08] [0.2689, 0.2439, 0.217, 0.1947, 0.1709, 0.1684, 0.1741, 0.1745, 0.1174, 0.0459]
[Nov 11, 03:35:08] [0.2525, 0.262, 0.227, 0.22, 0.2128, 0.1686, 0.1686, 0.1681, 0.1768, 0.0459]
[Nov 11, 03:35:08] [0.201, 0.1826, 0.1625, 0.1451, 0.1232, 0.1011, 0.0923, 0.0952, 0.121, 0.1238]
[Nov 11, 03:35:08] [0.2029, 0.1921, 0.1808, 0.1634, 0.1428, 0.121, 0.1035, 0.0977, 0.0976, 0.1238]
[Nov 11, 03:35:08] PATIENCE 0 / 10





[Nov 11, 03:36:58] TRAIN loss 1.4921760204676036 12
[Nov 11, 03:37:12] VALIDATION F1micro, F1macro, loss: 0.48520710059171596 0.24337416816355759 507
[Nov 11, 03:37:12] PATIENCE 1 / 10
[Nov 11, 03:39:02] TRAIN loss 1.482360025246938 13
[Nov 11, 03:39:17] VALIDATION F1micro, F1macro, loss: 0.47534516765285995 0.25483788150024017 507
[Nov 11, 03:39:46] TEST F1micro, F1macro, loss: 0.4990138067061144 0.2667512355445877 1014
[Nov 11, 03:41:03] OTHER-TEST F1micro, F1macro, loss: 0.25322046374677953 0.20360818642610767 2717


100%|██████████| 10/10 [20:15<00:00, 121.50s/it]

[Nov 11, 04:01:18] [0.2421, 0.2047, 0.2018, 0.1951, 0.1961, 0.1838, 0.189, 0.16, 0.0364, 0.0279]
[Nov 11, 04:01:18] [0.2653, 0.2492, 0.2125, 0.2045, 0.1915, 0.2008, 0.1948, 0.19, 0.1528, 0.0279]
[Nov 11, 04:01:18] [0.1933, 0.1723, 0.158, 0.1477, 0.1363, 0.1214, 0.1147, 0.1439, 0.146, 0.0577]
[Nov 11, 04:01:18] [0.1955, 0.1778, 0.1604, 0.1464, 0.1386, 0.1293, 0.1169, 0.1176, 0.1568, 0.0577]
[Nov 11, 04:01:18] PATIENCE 0 / 10





[Nov 11, 04:03:08] TRAIN loss 1.4656925260483682 14
[Nov 11, 04:03:22] VALIDATION F1micro, F1macro, loss: 0.4339250493096647 0.279665680028082 507
[Nov 11, 04:03:51] TEST F1micro, F1macro, loss: 0.45759368836291914 0.26932459493675687 1014
[Nov 11, 04:05:08] OTHER-TEST F1micro, F1macro, loss: 0.25248435774751565 0.21135644842801735 2717


 20%|██        | 2/10 [06:24<25:25, 190.71s/it]