# Dataset preparation - SameStance (Webis)

In [None]:
import csv
import pickle
from pathlib import Path

import pandas as pd
from sklearn.model_selection import train_test_split

from utils import Timer

# make tqdm jupyter friendly
from tqdm import tqdm_notebook as tqdm

# apply progress bars for pandas .apply() -> .progress_apply()
# for .progress_apply() we have to hack it like this?
tqdm().pandas()

---

### 2021 (new data)

In [None]:
import pickle
from pathlib import Path

from tqdm import tqdm_notebook as tqdm

Load data train/dev/test (2021)

In [None]:
fn_data_path = "data/data_split.pkl"

with open(fn_data_path, "rb") as fp:
    cross_train = pickle.load(fp)
    cross_dev = pickle.load(fp)
    cross_test = pickle.load(fp)   
    within_train = pickle.load(fp)
    within_dev = pickle.load(fp)
    within_test = pickle.load(fp)

In [None]:
# argument statistics
if False:
    print("pairs")
    print(f"cross:  {len(cross_train):,d} / {len(cross_dev):,d} / {len(cross_test):,d}")
    print(f"within: {len(within_train):,d} / {len(within_dev):,d} / {len(within_test):,d}")

    cross_train_args = len(set([i.split("-", 1)[0] for i in sorted(cross_train["argument1_id"])] + [i.split("-", 1)[0] for i in sorted(cross_train["argument2_id"])]))
    cross_dev_args = len(set([i.split("-", 1)[0] for i in sorted(cross_dev["argument1_id"])] + [i.split("-", 1)[0] for i in sorted(cross_dev["argument2_id"])]))
    cross_test_args = len(set([i.split("-", 1)[0] for i in sorted(cross_test["argument1_id"])] + [i.split("-", 1)[0] for i in sorted(cross_test["argument2_id"])]))
    within_train_args = len(set([i.split("-", 1)[0] for i in sorted(within_train["argument1_id"])] + [i.split("-", 1)[0] for i in sorted(within_train["argument2_id"])]))
    within_dev_args = len(set([i.split("-", 1)[0] for i in sorted(within_dev["argument1_id"])] + [i.split("-", 1)[0] for i in sorted(within_dev["argument2_id"])]))
    within_test_args = len(set([i.split("-", 1)[0] for i in sorted(within_test["argument1_id"])] + [i.split("-", 1)[0] for i in sorted(within_test["argument2_id"])]))

    print("argument ids (unique)")
    print(f"cross:  {cross_train_args:,d} / {cross_dev_args:,d} / {cross_test_args:,d}")
    print(f"within: {within_train_args:,d} / {within_dev_args:,d} / {within_test_args:,d}")

Write data

In [None]:
# argmining --> argmining
data_argmining_within_tdt_path = Path("data/argmining/within")
data_argmining_cross_tdt_path = Path("data/argmining/cross")

if not data_argmining_within_tdt_path.exists():
    print(f"Create dir: {data_argmining_within_tdt_path}")
    data_argmining_within_tdt_path.mkdir(parents=True)

if not data_argmining_cross_tdt_path.exists():
    print(f"Create dir: {data_argmining_cross_tdt_path}")
    data_argmining_cross_tdt_path.mkdir(parents=True)

fn_within_train_tsv = data_argmining_within_tdt_path / "train.tsv"
fn_within_dev_tsv = data_argmining_within_tdt_path / "dev.tsv"
fn_within_test_tsv = data_argmining_within_tdt_path / "test.tsv"

fn_cross_train_tsv = data_argmining_cross_tdt_path / "train.tsv"
fn_cross_dev_tsv = data_argmining_cross_tdt_path / "dev.tsv"
fn_cross_test_tsv = data_argmining_cross_tdt_path / "test.tsv"

In [None]:
with fn_within_train_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(within_train.itertuples(), desc="train (within)", total=len(within_train)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

with fn_within_dev_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(within_dev.itertuples(), desc="dev (within)", total=len(within_dev)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")
        
with fn_within_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(within_test.itertuples(), desc="test pred (within)", total=len(within_test)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")
        
with fn_cross_train_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(cross_train.itertuples(), desc="train (cross)", total=len(cross_train)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

with fn_cross_dev_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(cross_dev.itertuples(), desc="dev (cross)", total=len(cross_dev)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

with fn_cross_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(cross_test.itertuples(), desc="test pred (cross)", total=len(cross_test)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

In [None]:
# symlink pred.tsv
! ln -s test.tsv {data_argmining_within_tdt_path}/pred.tsv
! ln -s test.tsv {data_argmining_cross_tdt_path}/pred.tsv

just write within train data (combined train/dev, separate test) for artificial dataset analysis

In [None]:
# argmining --> argmining_tt

# TODO: ...
within_train = pd.concat([within_train, within_dev], ignore_index=True)

data_argmining_within_tdt_path = Path("data/argmining_tt/within")

if not data_argmining_within_tdt_path.exists():
    print(f"Create dir: {data_argmining_within_tdt_path}")
    data_argmining_within_tdt_path.mkdir(parents=True)

fn_within_train_tsv = data_argmining_within_tdt_path / "train.tsv"
fn_within_test_tsv = data_argmining_within_tdt_path / "test.tsv"


with fn_within_train_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(within_train.itertuples(), desc="train (within)", total=len(within_train)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")
        
with fn_within_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(within_test.itertuples(), desc="test pred (within)", total=len(within_test)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

---

### Ground-truth dataset

In [None]:
import pickle
from pathlib import Path

import pandas as pd

#from tqdm import tqdm_notebook as tqdm
from tqdm.notebook import tqdm

tqdm().pandas()

Load data pred (2021)

In [None]:
fn_ground_truth = "data_raw/argmining/ground-truth/{}-topics-ground-truth-subset.csv"

within_test_df = pd.read_csv(fn_ground_truth.format("within"), index_col="id")
cross_test_df = pd.read_csv(fn_ground_truth.format("cross"), index_col="id")

within_test_df["tag"] = within_test_df["subject"]
cross_test_df["tag"] = cross_test_df["subject"]

In [None]:
fn_ground_truth_p = "data/ground_truth.p"

with open(fn_ground_truth_p, "wb") as fp:
    pickle.dump(within_test_df, fp, protocol=pickle.HIGHEST_PROTOCOL)
    pickle.dump(cross_test_df, fp, protocol=pickle.HIGHEST_PROTOCOL)

Write data

In [None]:
# argmining --> argmining
data_argmining_groundtruth_tdt_path = Path("data/argmining/ground_truth")
data_argmining_groundtruth_within_tdt_path = data_argmining_groundtruth_tdt_path / "within"
data_argmining_groundtruth_cross_tdt_path = data_argmining_groundtruth_tdt_path / "cross"

if not data_argmining_groundtruth_within_tdt_path.exists():
    print(f"Create dir: {data_argmining_groundtruth_within_tdt_path}")
    data_argmining_groundtruth_within_tdt_path.mkdir(parents=True)
if not data_argmining_groundtruth_cross_tdt_path.exists():
    print(f"Create dir: {data_argmining_groundtruth_cross_tdt_path}")
    data_argmining_groundtruth_cross_tdt_path.mkdir(parents=True)

fn_gold_within_test_tsv = data_argmining_groundtruth_within_tdt_path / "test.tsv"
fn_gold_cross_test_tsv = data_argmining_groundtruth_cross_tdt_path / "test.tsv"

In [None]:
with fn_gold_within_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(within_test_df.itertuples(), desc="pred/test (within)", total=len(within_test_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

with fn_gold_cross_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(cross_test_df.itertuples(), desc="pred/test (cross)", total=len(cross_test_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

In [None]:
# symlink pred.tsv
! ln -s test.tsv {data_argmining_groundtruth_within_tdt_path}/pred.tsv
! ln -s test.tsv {data_argmining_groundtruth_cross_tdt_path}/pred.tsv

---

### Artificial dataset

see `data/` folder

In [None]:
import pickle
from pathlib import Path

import pandas as pd

from tqdm import tqdm_notebook as tqdm

tqdm().pandas()

Load data pred (2021)

In [None]:
fn_art_eval = "data/artificial_evalset.tsv"

artificial_evalset_df = pd.read_csv(fn_art_eval, sep="\t", index_col=None)

new_cols = artificial_evalset_df.columns.to_list()
new_cols[2] = "type"
artificial_evalset_df.columns = new_cols

def fix_cols(row):
    row["argument1_id"] = row["arg_id"]
    row["argument2_id"] = "{}-{}".format(row["arg_id"], row["type"])
    row["topic"] = "gay marriage"
    row["tag"] = "gay marriage"
    return row

artificial_evalset_df = artificial_evalset_df.progress_apply(fix_cols, axis=1)

In [None]:
fn_art_eval_p = "data/artificial_evalset.p"

with open(fn_art_eval_p, "wb") as fp:
    pickle.dump(artificial_evalset_df, fp, protocol=pickle.HIGHEST_PROTOCOL)

Write data

In [None]:
# argmining --> argmining
data_argmining_art_tdt_path = Path("data/argmining/artificial")

if not data_argmining_art_tdt_path.exists():
    print(f"Create dir: {data_argmining_art_tdt_path}")
    data_argmining_art_tdt_path.mkdir(parents=True)

fn_art_test_tsv = data_argmining_art_tdt_path / "test.tsv"

In [None]:
with fn_art_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(artificial_evalset_df.itertuples(), desc="pred/test (within, gay)", total=len(artificial_evalset_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

In [None]:
# symlink pred.tsv
! ln -s test.tsv {data_argmining_art_tdt_path}/pred.tsv

---

In [None]:
data_cross_path = "data_raw/argmining/cross/{}.csv"
data_within_path = "data_raw/argmining/within/{}.csv"
new_within_test = "data_raw/argmining/within/test.csv"

names_columns_X = ["argument1", "argument2", "argument1_id", "argument2_id", "topic"]
names_columns_y = ["is_same_side"]

#### Load data webis

In [None]:
def load_official_data_cross():
    with Timer("read S3C cross train/dev"):
        cross_traindev_df = pd.read_csv(
            data_cross_path.format("training"),
            quotechar='"',
            quoting=csv.QUOTE_ALL,
            encoding="utf-8",
            escapechar="\\",
            doublequote=False,
            index_col="id",
        )
        cross_test_df = pd.read_csv(data_cross_path.format("test"), index_col="id")

    return cross_traindev_df, cross_test_df


def load_official_data_within():
    with Timer("read S3C within train/dev"):
        within_traindev_df = pd.read_csv(
            data_within_path.format("training"),
            quotechar='"',
            quoting=csv.QUOTE_ALL,
            encoding="utf-8",
            escapechar="\\",
            doublequote=False,
            index_col="id",
        )
        # within_test_df = pd.read_csv(data_within_path.format('test'),
        #                              quotechar='"',
        #                              quoting=csv.QUOTE_ALL,
        #                              encoding='utf-8',
        #                              escapechar='\\',
        #                              doublequote=True,  # <-- change, "" as quote escape in text?
        #                              index_col='id')
        within_test_df = pd.read_csv(data_within_path.format("test"), index_col="id")
        new_within_test_df = pd.read_csv(new_within_test, index_col="id")

    return within_traindev_df, new_within_test_df


def load_official_data(task="within"):
    if task == "within":
        return load_official_data_within()
    if task == "cross":
        return load_official_data_cross()
    raise Exception("Unknown dataset!")

In [None]:
def add_tag(df):
    # Adding a tag for the topics in focus: "gay marriage" and "abortion"
    def _add_tag(row):
        title = row['topic'].lower().strip()
        if "abortion" in title:
            row['tag'] = 'abortion'
        elif "gay marriage"  in title:
            row['tag'] = 'gay marriage'
        else:
            row['tag'] = 'NA'
        return row
    
    return df.progress_apply(_add_tag, axis=1)


def load_and_prepare_official_data(task="within"):
    traindev_df, test_df = load_official_data(task=task)
    
    with Timer("tag {} train/dev".format(task)):
        traindev_df = add_tag(traindev_df)
        test_df = add_tag(test_df)

    return traindev_df, test_df

#### Split train/dev

In [None]:
# train dev set - 70% 30%
def get_train_test_sets(df, ratio=0.30, random_state=42):
    X = df[names_columns_X]
    y = df[names_columns_y]

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=ratio,
                                                        random_state=random_state,
                                                        shuffle=True)
    return X_train, X_test, y_train, y_test


def split_within_by_topic(within_df):
    groups = within_df.groupby(['tag'])
    abortion_df = groups.get_group("abortion")
    gay_marriage_df = groups.get_group("gay marriage")
    
    X_abortion = abortion_df[names_columns_X]
    y_abortion = abortion_df[names_columns_y]
    X_gay_marriage = gay_marriage_df[names_columns_X]
    y_gay_marriage = gay_marriage_df[names_columns_y]
    
    return X_abortion, X_gay_marriage, y_abortion, y_gay_marriage

---

## Load data

In [None]:
within_traindev_df, within_test_df = load_and_prepare_official_data("within")
cross_traindev_df, cross_test_df = load_and_prepare_official_data("cross")

## Write data

In [None]:
data_argmining_within_tdt_path = Path("data/argmining/within")
data_argmining_cross_tdt_path = Path("data/argmining/cross")

if not data_argmining_within_tdt_path.exists():
    print(f"Create dir: {data_argmining_within_tdt_path}")
    data_argmining_within_tdt_path.mkdir(parents=True)

if not data_argmining_cross_tdt_path.exists():
    print(f"Create dir: {data_argmining_cross_tdt_path}")
    data_argmining_cross_tdt_path.mkdir(parents=True)

fn_within_train_tsv = data_argmining_within_tdt_path / "train.tsv"
fn_within_dev_tsv = data_argmining_within_tdt_path / "dev.tsv"
fn_within_test_tsv = data_argmining_within_tdt_path / "pred.tsv"

fn_cross_train_tsv = data_argmining_cross_tdt_path / "train.tsv"
fn_cross_dev_tsv = data_argmining_cross_tdt_path / "dev.tsv"
fn_cross_test_tsv = data_argmining_cross_tdt_path / "pred.tsv"

In [None]:
X_train, X_dev, y_train, y_dev = get_train_test_sets(within_traindev_df, ratio=0.20, random_state=42)

train_df = X_train.merge(y_train, left_index=True, right_index=True)
dev_df = X_dev.merge(y_dev, left_index=True, right_index=True)

with fn_within_train_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(train_df.itertuples(), desc="train (within)", total=len(train_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

with fn_within_dev_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(dev_df.itertuples(), desc="dev (within)", total=len(dev_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

In [None]:
X_train, X_dev, y_train, y_dev = get_train_test_sets(cross_traindev_df, ratio=0.20, random_state=42)

train_df = X_train.merge(y_train, left_index=True, right_index=True)
dev_df = X_dev.merge(y_dev, left_index=True, right_index=True)

with fn_cross_train_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(train_df.itertuples(), desc="train (cross)", total=len(train_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

with fn_cross_dev_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(dev_df.itertuples(), desc="dev (cross)", total=len(dev_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = "1" if str(row.is_same_side) == "True" else "0"
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

In [None]:
with fn_within_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(within_test_df.itertuples(), desc="test pred (within)", total=len(within_test_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = -1
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

with fn_cross_test_tsv.open("w", encoding="utf-8") as fp:
    for row in tqdm(cross_test_df.itertuples(), desc="test pred (cross)", total=len(cross_test_df)):
        text1 = row.argument1.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        text2 = row.argument2.replace("\t", " ").replace("\n", " ").replace("\r", " ").strip()
        label = -1
        fp.write(f"""{row.Index}\t{text1}\t{text2}\t{label}\n""")

In [None]:
# symlink pred.tsv
! ln -s test.tsv {data_argmining_within_tdt_path}/pred.tsv
! ln -s test.tsv {data_argmining_cross_tdt_path}/pred.tsv

---

## Run training

In [None]:
#model_name = "bert-base-uncased"
#model_name = "bert-base-cased"
#model_name = "distilroberta-base"
#model_name = "distilbert-base-cased"
model_name = "albert-base-v2"
#model_name = "albert-base-v1"
#model_name = "roberta-base"

#model_name = "bert-large-cased"
#model_name = "albert-large-v2"

model_name = "xlnet-base-cased"
#model_name = "reformer-enwik8"
#model_name = "funnel-transformer/small"
#model_name = "squeezebert/squeezebert-uncased"
#model_name = "facebook/bart-base"

#model_name = "google/electra-small-discriminator"
#model_name = "google/electra-base-discriminator"
#model_name = "sentence-transformers/stsb-distilbert-base"
#model_name = "sentence-transformers/quora-distilbert-base"
#model_name = "sentence-transformers/bert-base-nli-mean-tokens"

data_name = "cross"
data_name = "within"

seq_len = 512
batch_size = 4
acc_steps = 64
num_epoch = 3
cuda_devs = "1"

run_name = f"{model_name.replace('/', '-')}-{data_name}_{seq_len}_{batch_size}-acc{acc_steps}_{num_epoch}"

In [None]:
# create folder for logging
log_dir = f"./output_5_logs/{run_name}"
! mkdir -p {log_dir}

! \
    PYTHONASYNCIODEBUG=1 \
    HF_MLFLOW_LOG_ARTIFACTS=TRUE \
    MLFLOW_EXPERIMENT_NAME=same-stance \
    CUDA_VISIBLE_DEVICES={cuda_devs} \
    python trainer.py \
    --do_train --do_eval --do_test --do_pred \
    --seed 743 \
    --model_name_or_path {model_name} \
    --task_name same-b \
    --data_dir ./data/argmining/{data_name} \
    --output_dir ./output_5/{run_name} \
    --run_name {run_name} \
    --per_device_eval_batch_size {batch_size} \
    --per_device_train_batch_size {batch_size} \
    --gradient_accumulation_steps {acc_steps} \
    --eval_steps 128 \
    --logging_steps 2000 \
    --save_steps 2000 \
    --save_total_limit 4 \
    --num_train_epochs {num_epoch} \
    --max_seq_length {seq_len} \
    --evaluation_strategy steps \
    --load_best_model_at_end \
    > >(tee -a {log_dir}/stdout.log) \
    2> >(tee -a {log_dir}/stderr.log >&2)

# --overwrite_output_dir \
# --overwrite_cache \
# --eval_steps 100 (same as --logging_steps)
# --load_best_model_at_end \
# --max_steps 1000 \
# --gradient_accumulation_steps {acc_steps} \

In [None]:
if False:

    # create folder for logging
    ! mkdir -p ./output_logs/{run_name}"-(2)"

    ! \
        HF_MLFLOW_LOG_ARTIFACTS=TRUE \
        MLFLOW_EXPERIMENT_NAME=same-stance \
        CUDA_VISIBLE_DEVICES={cuda_devs} \
        python trainer.py \
        --do_eval \
        --model_name_or_path ./output/{run_name} \
        --task_name same-b \
        --data_dir ./data/argmining/{data_name} \
        --output_dir ./output/{run_name}"-(2)" \
        --run_name {run_name} \
        --per_device_eval_batch_size {batch_size} \
        --gradient_accumulation_steps {acc_steps} \
        --logging_steps 100 \
        --max_seq_length {seq_len} \
        > >(tee -a ./output_logs/{run_name}"-(2)"/stdout.log) \
        2> >(tee -a ./output_logs/{run_name}"-(2)"/stderr.log >&2)

---

## Run artificial dataset prediction

--> _run_name_ and params from above

In [None]:
# train model

load_name = f"./output/{run_name}"
run_name = f"{run_name}-art"
log_dir = f"./output_logs/{run_name}"

# create folder for logging
! mkdir -p {log_dir}

! \
    PYTHONASYNCIODEBUG=1 \
    HF_MLFLOW_LOG_ARTIFACTS=TRUE \
    MLFLOW_EXPERIMENT_NAME=same-stance \
    CUDA_VISIBLE_DEVICES={cuda_devs} \
    python trainer.py \
    --do_train --do_test \
    --model_name_or_path {model_name} \
    --task_name same-b \
    --data_dir ./data/argmining_tt/{data_name} \
    --output_dir ./output/{run_name} \
    --run_name {run_name} \
    --per_device_eval_batch_size {batch_size} \
    --per_device_train_batch_size {batch_size} \
    --gradient_accumulation_steps {acc_steps} \
    --logging_steps 1000 \
    --save_steps 1000 \
    --save_total_limit 4 \
    --num_train_epochs {num_epoch} \
    --max_seq_length {seq_len} \
    > >(tee -a ./output_logs/{run_name}/stdout.log) \
    2> >(tee -a ./output_logs/{run_name}/stderr.log >&2)

In [None]:
# evaluate

load_name = f"./output/{run_name}"
run_name = f"{run_name}-art"
log_dir = f"./output_logs/{run_name}"

# create folder for logging
! mkdir -p {log_dir}

! \
    HF_MLFLOW_LOG_ARTIFACTS=TRUE \
    MLFLOW_EXPERIMENT_NAME=same-stance \
    CUDA_VISIBLE_DEVICES={cuda_devs} \
    python trainer.py \
    --do_test --do_pred \
    --model_name_or_path {load_name} \
    --task_name same-b \
    --data_dir ./data/argmining/artificial \
    --output_dir ./output/{run_name} \
    --overwrite_output_dir \
    --overwrite_cache \
    --run_name {run_name} \
    --max_seq_length {seq_len} \
    --per_device_eval_batch_size {batch_size} \
    --logging_steps 100 \
    > >(tee -a {log_dir}/stdout.log) \
    2> >(tee -a {log_dir}/stderr.log >&2)

Make plots

In [None]:
import pickle
from pathlib import Path

import sklearn.metrics as sklm
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd

from sklearn.base import BaseEstimator, ClassifierMixin

In [None]:
class IdentityEstimator(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.is_fitted_ = True
        self.classes_ = [0, 1]

    def predict(self, X):
        return X

estim = IdentityEstimator()

def plot_conf_mat(true_labels, predicted_labels, label_names, plot_title, fname=None):
    matplotlib.rcParams.update({'font.size': 30})  # 20
    disp = sklm.plot_confusion_matrix(estim, np.array(predicted_labels), true_labels,
                                      cmap=plt.cm.Blues, values_format = '.5g',
                                      display_labels=label_names)
    disp.ax_.set_title(plot_title)
    if fname is not None:
        plt.savefig(fname, transparent=True, bbox_inches='tight', pad_inches=0)
    plt.show()

In [None]:
fn_results = Path("output/albert-base-v2-within_512_8-acc64_3-art-art/pred_results_same-b.txt")

preds = list()
with fn_results.open("r") as fp:
    fp.readline()
    preds = [int(l.split("\t")[1]) for l in fp]

In [None]:
# NOTE: load artificial_evalset_df with code above
if True:
    fn_art_eval_p = "data/artificial_evalset.p"

    with open(fn_art_eval_p, "rb") as fp:
        artificial_evalset_df = pickle.load(fp)

preds = pd.Series(preds, dtype=bool, name="predictions")
artificial_evalset_df = artificial_evalset_df.join(preds)

In [None]:
true_labels = artificial_evalset_df["is_same_side"].to_list()
pred_labels = artificial_evalset_df["predictions"].to_list()

filter_neg = artificial_evalset_df["type"].str.endswith("NEG")

art_df_neg = artificial_evalset_df[filter_neg]
true_labels_n = art_df_neg["is_same_side"].to_list()
pred_labels_n = art_df_neg["predictions"].to_list()

art_df_no_neg = artificial_evalset_df[~filter_neg]
true_labels_no_n = art_df_no_neg["is_same_side"].to_list()
pred_labels_no_n = art_df_no_neg["predictions"].to_list()

art_df_para = artificial_evalset_df[artificial_evalset_df["type"].str.startswith("CON")]
true_labels_para = art_df_para["is_same_side"].to_list()
pred_labels_para = art_df_para["predictions"].to_list()

art_df_arg = artificial_evalset_df[artificial_evalset_df["type"].str.startswith("DIFF")]
true_labels_arg = art_df_arg["is_same_side"].to_list()
pred_labels_arg = art_df_arg["predictions"].to_list()

art_df_cit = artificial_evalset_df[artificial_evalset_df["type"].str.startswith("CIT")]
true_labels_cit = art_df_cit["is_same_side"].to_list()
pred_labels_cit = art_df_cit["predictions"].to_list()

fig_prefix = "fig_1_"
fig_fmt = ".pdf"  # png/pdf
plot_conf_mat(true_labels, pred_labels, ['0', '1'], 'ALL', fname=f"{fig_prefix}all{fig_fmt}")
plot_conf_mat(true_labels_n, pred_labels_n, ['0', '1'], 'w/ negation', fname=f"{fig_prefix}neg{fig_fmt}")
plot_conf_mat(true_labels_no_n, pred_labels_no_n, ['0', '1'], 'w/o negation', fname=f"{fig_prefix}noneg{fig_fmt}")
plot_conf_mat(true_labels_para, pred_labels_para, ['0', '1'], 'Paraphrase', fname=f"{fig_prefix}para{fig_fmt}")
plot_conf_mat(true_labels_arg, pred_labels_arg, ['0', '1'], 'Argument', fname=f"{fig_prefix}arg{fig_fmt}") # Diff. Arg.
plot_conf_mat(true_labels_cit, pred_labels_cit, ['0', '1'], 'Citation', fname=f"{fig_prefix}cit{fig_fmt}") # Reference

---

In [None]:
! dbot-message -c .dbot.conf "Finished "{run_name}

In [None]:
#! tail -n 10 ./output_logs/{run_name}/stderr.log | dbot-message - -c .dbot.conf --type