In [1]:
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, matthews_corrcoef, f1_score
from sklearn.svm import SVC

In [2]:

TASKS_AND_METRICS = {
    "COLA": matthews_corrcoef,
    "RTE": accuracy_score,

    "MRPC": accuracy_score,
    "QNLI": accuracy_score,
    "MNLI": accuracy_score,
#     "QQP": accuracy_score,


}



lr = LogisticRegression(solver='lbfgs', max_iter=1000)
rfc = RandomForestClassifier(n_jobs=4, n_estimators=10)
rfc_100_est = RandomForestClassifier(n_estimators=100, n_jobs=4)
rfc_300_est = RandomForestClassifier(n_estimators=300, n_jobs=4)
knn = KNeighborsClassifier()
svc = SVC(gamma="scale")

models = {"lr": lr,
          "rfc": rfc,
          "rfc_100_est": rfc_100_est,
          "rfc_300_est": rfc_300_est,
          "knn": knn,
          "svc": svc,
         }
scores = {}    

In [3]:
?LogisticRegression

In [4]:
def score_per_task():
    task_to_scores = {}
    for task, metric in TASKS_AND_METRICS.items():
        print(f'Doing task {task}')
        with open(f"embeddings/{task}/train.pkl", "rb") as infile:
            X_train, y_train = pickle.load(infile)
        with open(f"embeddings/{task}/dev.pkl", "rb") as infile:
            X_dev, y_dev = pickle.load(infile)
        scores = {}
        for model_name, model in models.items():
            if task in ("QNLI", "QQP", "MNLI"):
                if model_name in ("svc", "rfc_300_est"):
                    continue
                
            print(f'Doing model {model_name}')
            model.fit(X_train, y_train)
            preds = model.predict(X_dev)
            if metric == f1_score:
                preds = [int(i) for i in preds]
                y_dev = [int(i) for i in y_dev]
            scores[model_name] = metric(y_dev, preds)
        print(scores)
        task_to_scores[task] = scores
    return task_to_scores
    

In [5]:
task_to_scores = score_per_task()

Doing task COLA
Doing model lr
Doing model rfc
Doing model rfc_100_est
Doing model rfc_300_est
Doing model knn
Doing model svc
{'lr': 0.6431471477188715, 'rfc': 0.6305380037656513, 'rfc_100_est': 0.640766637939218, 'rfc_300_est': 0.6432718914115204, 'knn': 0.6420445126187212, 'svc': 0.6442142090411843}
Doing task RTE
Doing model lr
Doing model rfc
Doing model rfc_100_est
Doing model rfc_300_est
Doing model knn
Doing model svc
{'lr': 0.8592057761732852, 'rfc': 0.8411552346570397, 'rfc_100_est': 0.8483754512635379, 'rfc_300_est': 0.8483754512635379, 'knn': 0.851985559566787, 'svc': 0.8483754512635379}
Doing task MRPC
Doing model lr
Doing model rfc
Doing model rfc_100_est
Doing model rfc_300_est
Doing model knn
Doing model svc
{'lr': 0.8676470588235294, 'rfc': 0.8799019607843137, 'rfc_100_est': 0.8725490196078431, 'rfc_300_est': 0.8774509803921569, 'knn': 0.8799019607843137, 'svc': 0.8774509803921569}
Doing task QNLI
Doing model lr




Doing model rfc
Doing model rfc_100_est
Doing model knn
{'lr': 0.9063154221912073, 'rfc': 0.9045708304256804, 'rfc_100_est': 0.9084089323098395, 'knn': 0.9049197487787858}
Doing task MNLI
Doing model lr




Doing model rfc
Doing model rfc_100_est
Doing model knn
{'lr': 0.8677534386143657, 'rfc': 0.8626591951095263, 'rfc_100_est': 0.867142129393785, 'knn': 0.8641874681609781}


In [6]:
?LogisticRegression