In [1]:
import json
import spacy
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

nlp = spacy.load('en_core_web_sm')

In [2]:
def get_dataset(df: pd.DataFrame):
    examples = []
    for index, row in df.iterrows():
        premises = row['theory']
        premises = premises.replace('. ', '\n ')
        question_dict = row['questions']
        num_q = len(question_dict)
        questions, answers = [], []
        for i in range(1, num_q + 1):
            questions.append(question_dict[f'Q{i}']['question'])
            answers.append(question_dict[f'Q{i}']['answer'])
        examples.append(dict(
            premises=premises,
            conclusions=questions,
            labels=answers
        ))
    return examples

glove_dict = dict()
with open('glove.6B/glove.6B.50d.txt', 'r') as fp:
    glove = fp.read().splitlines()
for line in glove:
    line =  line.split(' ')
    word, nums = line[0], np.array([float(num) for num in line[1:]])
    glove_dict[word] = nums

def sentence2vec(sentence: str) -> np.ndarray:
    doc = nlp(sentence)
    tokens = [word.lower_ for word in doc]
    vecs = np.stack([glove_dict[tok] for tok in tokens if tok in glove_dict])
    return np.mean(vecs, axis=0)

label_map = {
    True: 0,
    False: 1,
    'Unknown': 2
}

def get_arrays(all_examples):
    X_list, y_list = [], []
    for idx, example in enumerate(all_examples):
        p_vec = sentence2vec(example['premises'])
        c_vec_list = []
        for conc in example['conclusions']:
            c_vec_list.append(sentence2vec(conc))
        pc_vecs = np.concatenate([np.expand_dims(p_vec, axis=0).repeat(len(c_vec_list), axis=0), np.stack(c_vec_list)], axis=1)
        X_list.append(pc_vecs)
        y_list.append(list(map(label_map.get,example['labels'])))
    X, y = np.concatenate(X_list), np.concatenate(y_list)
    return X, y

In [4]:
clf = None
depth = [0, 1, 2, 3, 5]
for d in depth:
    proofwriter_train = pd.read_json(f'proofwriter-dataset-V2020.12.3/OWA/depth-{d}/meta-train.jsonl', lines=True)
    proofwriter_test = pd.read_json(f'proofwriter-dataset-V2020.12.3/OWA/depth-{d}/meta-test.jsonl', lines=True)

    train_examples = get_dataset(proofwriter_train)
    test_examples = get_dataset(proofwriter_test)

    X_train, y_train = get_arrays(train_examples)
    X_test, y_test = get_arrays(test_examples)

    if clf is None:
        clf = make_pipeline(StandardScaler(), SVC(gamma='auto', max_iter=10000))
        clf.fit(X_train, y_train)

    preds = clf.predict(X_test)

    print('depth:', d)
    print(accuracy_score(y_test, preds))
    print(precision_score(y_test, preds, average='weighted'))
    print(recall_score(y_test, preds, average='weighted'))
    print(f1_score(y_test, preds, average='weighted'))



depth: 0
0.6107670795045945
0.6342492123061582
0.6107670795045945
0.591555479676545
depth: 1
0.5570014844136566
0.5765397928266934
0.5570014844136566
0.5216864045590136
depth: 2
0.5480846774193548
0.5915395250418526
0.5480846774193548
0.5085455247888292
depth: 3
0.5283102329696254
0.5866495775280959
0.5283102329696254
0.4814931155940912
depth: 5
0.4861208187718422
0.5602288562407689
0.4861208187718422
0.42738651404887423


In [12]:
from datasets import load_dataset

label_map_folio = {
    'True': 0,
    'False': 1,
    'Uncertain': 2
}
def get_arrays_folio(all_examples):
    X_list, y_list = [], []
    for idx, example in enumerate(all_examples):
        p_vec = sentence2vec(example['premises'])
        c_vec = sentence2vec(example['conclusion'])
        pc_vecs = np.concatenate([p_vec, c_vec])
        X_list.append(pc_vecs)
        y_list.append(label_map_folio[example['label']])
    X, y = np.stack(X_list), np.stack(y_list)
    return X, y

folio_train = load_dataset('yale-nlp/FOLIO', split='train')
folio_val = load_dataset('yale-nlp/FOLIO', split='validation')

X_train, y_train = get_arrays_folio(folio_train)
X_test, y_test = get_arrays_folio(folio_val)

preds = clf.predict(X_test)

print(accuracy_score(y_test, preds))
print(precision_score(y_test, preds, average='weighted'))
print(recall_score(y_test, preds, average='weighted'))
print(f1_score(y_test, preds, average='weighted'))

0.3399014778325123
0.11553301463272586
0.3399014778325123
0.17245001448855404


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
