In [1]:
%load_ext autoreload

In [2]:
import sys
sys.path.append('../../multitask-learning-transformers/shared_encoder')
sys.path.append('../src-py/')

In [3]:
import logging
import torch
import torch.nn as nn
import nltk
import numpy as np
from datasets import load_dataset, load_metric
from torch.utils.data.dataloader import DataLoader
from tqdm.auto import tqdm
from tqdm import tqdm as tqdm1

In [4]:
import numpy as np
from datasets import load_metric
from sklearn.metrics import precision_recall_fscore_support
metric = load_metric("accuracy")

In [5]:
torch.cuda.is_available()

True

In [6]:
from utils import *

In [7]:
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
from datasets import Dataset

In [8]:
%autoreload 
import transformers
#from accelerate import Accelerator
from filelock import FileLock
from transformers import set_seed
from transformers.file_utils import is_offline_mode
from multitask_model import MultitaskModel
#from preprocess import convert_to_features
from multitask_data_collator import MultitaskTrainer, NLPDataCollator
from multitask_eval import multitask_eval_fn
from checkpoint_model import save_model
from pathlib import Path

In [9]:
# from evaluate_bleu import *
from sklearn.metrics import f1_score, classification_report

In [10]:
import pandas as pd
import pickle
pd.set_option('display.max_colwidth', None)

In [11]:
set_seed(42)

In [12]:
df_train = pd.read_pickle('../../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/preprocessed_train_conclusion_all.pkl')
df_validation = pd.read_pickle('../../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/sample_valid_conclusion_all_preprocessed.pkl')

In [13]:
#Execute once to create a sample from the stance data
# stance_train_df = pd.read_csv('../data/kialo_stance_classification_training_data.csv')
# stance_valid_df = pd.read_csv('../data/kialo_stance_classification_valid_data.csv')

# stance_train_df['input_txt'] = stance_train_df.apply(lambda x: x['claim1'] + ' </s> ' + x['claim2'], axis=1)
# stance_valid_df['input_txt'] = stance_valid_df.apply(lambda x: x['claim1'] + ' </s> ' + x['claim2'], axis=1)
# stance_train_df = stance_train_df.rename(columns={'label':'labels'})
# stance_valid_df = stance_valid_df.rename(columns={'label':'labels'})

# stance_train_df.sample(50000).to_pickle('../data/kialo_stance_classification_training_data_sample.csv')
# stance_valid_df.sample(2000).to_pickle('../data/kialo_stance_classification_valid_data_sample.csv')

In [14]:
stance_train_df = pd.read_pickle('../data/kialo_stance_classification_training_data_sample.csv')
stance_valid_df = pd.read_pickle('../data/kialo_stance_classification_valid_data_sample.csv')

In [15]:
tasks = ['counter_gen', 'conclusion_gen', 'stance_cls']

In [16]:
model_name = 'facebook/bart-large'

In [17]:
model_names = [model_name] * 2

In [18]:
class MultitaskBartModel(transformers.PreTrainedModel):
    def __init__(self, encoder, taskmodels_dict):
        """
        Setting MultitaskModel up as a PretrainedModel allows us
        to take better advantage of Trainer features
        """
        super().__init__(transformers.PretrainedConfig())

        self.encoder = encoder
        self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)

    @classmethod
    def create(cls, model_name, model_type_dict, model_config_dict):
        """
        This creates a MultitaskModel using the model class and config objects
        from single-task models.

        We do this by creating each single-task model, and having them share
        the same encoder transformer.
        """
        shared_encoder = None
        taskmodels_dict = {}
        for task_name, model_type in model_type_dict.items():
            model = model_type.from_pretrained(
                model_name,
                config=model_config_dict[task_name],
            )
            if shared_encoder is None:
                shared_encoder = model.model.encoder
            else:
                model.model.encoder = shared_encoder
            taskmodels_dict[task_name] = model
        return cls(encoder=shared_encoder, taskmodels_dict=taskmodels_dict)
    
    @classmethod
    def load(cls, model_folder, model_type_dict, model_config_dict):
        """
        This loads a MultitaskModel using the model class and config objects
        from single-task models.
        """
        shared_encoder = None
        taskmodels_dict = {}
        for task_name, model_type in model_type_dict.items():
            model = model_type.from_pretrained(
                f"{model_folder}/{task_name}_model",
                config=model_config_dict[task_name],
            )
            if shared_encoder is None:
                shared_encoder = model.model.encoder
            else:
                model.model.encoder = shared_encoder
            taskmodels_dict[task_name] = model
        return cls(encoder=shared_encoder, taskmodels_dict=taskmodels_dict)

    def forward(self, task_name, **kwargs):
        return self.taskmodels_dict[task_name](**kwargs)
    
    def generate(self, input_ids, attention_mask, task_name, gen_kwargs):
       
        return self.taskmodels_dict[task_name].generate(
            input_ids,
            attention_mask=attention_mask,
            **gen_kwargs,
        )

    def resize_token_embeddings(self, new_num_tokens):
        for task_name, model in self.taskmodels_dict.items():
            model.resize_token_embeddings(new_num_tokens)
            
            
    def predict_stances(self, dataset, task_name='stance_cls'):
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=16)
        all_labels = []
        for idx, batch in enumerate(data_loader):
            input_ids, attention_mask = batch['input_ids'].cuda(), batch['attention_mask'].cuda()
            output = self.taskmodels_dict[task_name](input_ids, attention_mask)
            pred_labels = output.logits.argmax(-1).cpu().tolist()
            all_labels+= pred_labels
        
        return all_labels
        
def preprocess_function(examples, tokenizer, input_clm, output_clm, max_input_length=512, max_target_length=512):
    input_examples  = examples[input_clm]
    output_examples = examples[output_clm]
        
    if isinstance(input_examples[0], list):
        input_examples = [' '.join(x) for x in input_examples]
    
    processed_output = tokenizer(input_examples, max_length=max_input_length, truncation=True, padding='max_length')
    
    if isinstance(output_examples[0], list):
        output_examples = [' '.join(x) for x in output_examples]
    

    # Setup the tokenizer for targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(output_examples, max_length=max_target_length, truncation=True, padding='max_length')    
    
    processed_output["labels"] = labels["input_ids"]
    return processed_output

def save_model(multitask_model, fold='mt-v2', tasks = ['stance_cls']):
    for task_name in tasks:
        multitask_model.taskmodels_dict[task_name].config.to_json_file(
            f"../data/output/ca-final-models/{fold}/{task_name}_model/config.json"
        )
        torch.save(
            multitask_model.taskmodels_dict[task_name].state_dict(),
            f"../data/output/ca-final-models/{fold}/{task_name}_model/pytorch_model.bin",
        )
        tokenizer.save_pretrained(f"../data/output/ca-final-models/{fold}/{task_name}_model/")

### Training single model for stance classification:

In [99]:
stance_model = MultitaskBartModel.create(
    model_name=model_name,
    model_type_dict={
        "stance_cls": transformers.BartForSequenceClassification,
    },
    model_config_dict={
        "stance_cls": transformers.AutoConfig.from_pretrained('facebook/bart-large', num_labels=2)
    },
)

tokenizer = transformers.AutoTokenizer.from_pretrained('facebook/bart-large')

loading configuration file https://huggingface.co/facebook/bart-large/resolve/main/config.json from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/3f12fb71b844fcb7d591fdd4e55027da90d7b5dd6aa5430ad00ec6d76585f26c.bc22f15dc7ba074ee0a60bdd34c5f2fe3b6d746f89e765303376c51aff04e260
Model config BartConfig {
  "_name_or_path": "facebook/bart-large",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartModel"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "force

In [105]:
stance_train_dataset = Dataset.from_pandas(stance_train_df)
stance_valid_dataset = Dataset.from_pandas(stance_valid_df)

training_ds_dict = {'stance_cls': stance_train_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True),
                                           remove_columns=['claim1', 'claim2', 'input_txt', '__index_level_0__'],batched=True)}
valid_ds_dict = {'stance_cls': stance_valid_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True),
                                           remove_columns=['claim1', 'claim2', 'input_txt', '__index_level_0__'],batched=True)}

  0%|          | 0/50 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

In [108]:
trainer = MultitaskTrainer(
    model=stance_model,
    args=transformers.TrainingArguments(
        output_dir='../data/output/ca-final-models/bart-based-stance-classifier',
        overwrite_output_dir=True,
        learning_rate=5e-5,
        do_train=True,
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        evaluation_strategy='steps',
        save_steps=500,
        eval_steps=500
    ),
    #data_collator=DataCollatorForSeq2Seq(tokenizer, multitask_model),
    #callbacks = [MyTrainerCallback(multitask_model, valid_ds_dict)],
    train_dataset=training_ds_dict,
    eval_dataset =valid_ds_dict,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [109]:
trainer.train()

***** Running training *****
  Num examples = 50000
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 9375


Step,Training Loss,Validation Loss
500,0.6816,0.632229
1000,0.6213,0.568599
1500,0.5768,0.621323
2000,0.5553,0.537356
2500,0.5272,0.529795
3000,0.5051,0.505297
3500,0.4376,0.529449
4000,0.4239,0.473925
4500,0.4087,0.466852
5000,0.394,0.446495


***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8
Saving model checkpoint to ../data/output/ca-final-models/bart-based-stance-classifier/checkpoint-500
Configuration saved in ../data/output/ca-final-models/bart-based-stance-classifier/checkpoint-500/config.json
Model weights saved in ../data/output/ca-final-models/bart-based-stance-classifier/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8
Saving model checkpoint to ../data/output/ca-final-models/bart-based-stance-classifier/checkpoint-1000
Configuration saved in ../data/output/ca-final-models/bart-based-stance-classifier/checkpoint-1000/config.json
Model weights saved in ../data/output/ca-final-models/bart-based-stance-classifier/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8
Saving model checkpoint to ../data/output/ca-final-models/bart-based-stance-classifier/checkpoint-1500
Configuration saved in ../da

TrainOutput(global_step=9375, training_loss=0.4128147021484375, metrics={'train_runtime': 2400.6514, 'train_samples_per_second': 62.483, 'train_steps_per_second': 3.905, 'total_flos': 8.15087227392e+16, 'train_loss': 0.4128147021484375, 'epoch': 3.0})

In [112]:
save_model(stance_model, fold='bart-based-stance-classifier', tasks = ['stance_cls'])

tokenizer config file saved in ../data/output/ca-final-models/bart-based-stance-classifier/stance_cls_model/tokenizer_config.json
Special tokens file saved in ../data/output/ca-final-models/bart-based-stance-classifier/stance_cls_model/special_tokens_map.json


#### Evaluate:

In [47]:
stance_model = MultitaskBartModel.load(
    model_folder='../data/output/ca-final-models/bart-based-stance-classifier',
    model_type_dict={
        "stance_cls": transformers.BartForSequenceClassification,
    },
    model_config_dict={
        "stance_cls": transformers.AutoConfig.from_pretrained('facebook/bart-large', num_labels=2)
    }
)

tokenizer = transformers.AutoTokenizer.from_pretrained('../data/output/ca-final-models/bart-based-stance-classifier/stance_cls_model')

_ = stance_model.to('cuda')
_ = stance_model.eval()

loading configuration file https://huggingface.co/facebook/bart-large/resolve/main/config.json from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/3f12fb71b844fcb7d591fdd4e55027da90d7b5dd6aa5430ad00ec6d76585f26c.bc22f15dc7ba074ee0a60bdd34c5f2fe3b6d746f89e765303376c51aff04e260
Model config BartConfig {
  "_name_or_path": "facebook/bart-large",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartModel"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "force

In [45]:
stance_valid_dataset = Dataset.from_pandas(stance_valid_df)
ds = stance_valid_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True), remove_columns=['claim1', 'claim2', 'input_txt', '__index_level_0__'],batched=True)
ds.set_format(type='torch', columns=['input_ids', 'labels', 'attention_mask'])

  0%|          | 0/2 [00:00<?, ?ba/s]

In [48]:
pred_labels = stance_model.predict_stances(ds)
true_labels = ds['labels'].tolist()
metric.compute(predictions=pred_labels, references=true_labels)

{'accuracy': 0.814}

In [37]:
stance_test_df = pd.read_csv('../data/kialo_stance_classification_test_data.csv')
stance_test_df['input_txt'] = stance_test_df.apply(lambda x: x['claim1'] + ' </s> ' + x['claim2'], axis=1)
stance_test_df = stance_test_df.rename(columns={'label':'labels'})

stance_valid_dataset = Dataset.from_pandas(stance_test_df)
ds = stance_valid_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True), remove_columns=['claim1', 'claim2', 'input_txt'],batched=True)
ds.set_format(type='torch', columns=['input_ids', 'labels', 'attention_mask'])

  0%|          | 0/23 [00:00<?, ?ba/s]

In [49]:
predpred_labels= stance_model.predict_stances(ds)
true_labels = ds['labels'].tolist()
metric.compute(predictions=pred_labels, references=true_labels)

{'accuracy': 0.814}

### Creating and Training the MultiTask Model:

In [19]:
multitask_model = MultitaskBartModel.create(
    model_name=model_name,
    model_type_dict={
        "counter_gen": transformers.BartForConditionalGeneration,
        "conclusion_gen": transformers.BartForConditionalGeneration,
        "stance_cls": transformers.BartForSequenceClassification,
    },
    model_config_dict={
        "counter_gen": transformers.AutoConfig.from_pretrained('facebook/bart-large'),
        "conclusion_gen": transformers.AutoConfig.from_pretrained('facebook/bart-large'),
        "stance_cls": transformers.AutoConfig.from_pretrained('facebook/bart-large', num_labels=2)
    },
)

tokenizer = transformers.AutoTokenizer.from_pretrained('facebook/bart-large')

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-large and are newly initialized: ['classification_head.out_proj.bias', 'classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
train_ds = Dataset.from_pandas(df_train)
valid_ds = Dataset.from_pandas(df_validation)

stance_train_dataset = Dataset.from_pandas(stance_train_df)
stance_valid_dataset = Dataset.from_pandas(stance_valid_df)

In [21]:
training_ds_dict = {
    'conclusion_gen':train_ds.map(lambda a: preprocess_function(a, tokenizer, 'post', 'title', 512, 100),
                                  remove_columns=['post_id', 'split', 'comment_id', 'title', 'post', 'n_sentences', 'counter', 'bot_comment', 'counter_conclusion', 'counter_conclusions', '__index_level_0__'],
                                  batched=True),
    'counter_gen':train_ds.map(lambda a: preprocess_function(a, tokenizer, 'post', 'counter', 512, 256),
                     remove_columns=['post_id', 'split', 'comment_id', 'title', 'post', 'n_sentences', 'counter', 'bot_comment', 'counter_conclusion', 'counter_conclusions', '__index_level_0__'],
                     batched=True),
    'stance_cls': stance_train_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True),
                                           remove_columns=['claim1', 'claim2', 'input_txt', '__index_level_0__'],batched=True)
}

  0%|          | 0/26 [00:00<?, ?ba/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

In [22]:
valid_ds_dict = {
    'conclusion_gen':valid_ds.map(lambda a: preprocess_function(a, tokenizer, 'post', 'title', 512, 100),
                                  remove_columns=['post_id', 'split', 'comment_id', 'title', 'post', 'n_sentences', 'counter', 'bot_comment', '__index_level_0__'],
                                  batched=True),
    'counter_gen':valid_ds.map(lambda a: preprocess_function(a, tokenizer, 'post', 'counter', 512, 256),
                     remove_columns=['post_id', 'split', 'comment_id', 'title', 'post', 'n_sentences', 'counter', 'bot_comment', '__index_level_0__'],
                     batched=True),
    'stance_cls': stance_valid_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True),
                                           remove_columns=['claim1', 'claim2', 'input_txt', '__index_level_0__'],batched=True)
}

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

##### Train the model:

In [23]:
trainer = MultitaskTrainer(
    model=multitask_model,
    args=transformers.TrainingArguments(
        output_dir='../data/output/ca-final-models/mt-v3',
        overwrite_output_dir=True,
        learning_rate=5e-5,
        do_train=True,
        num_train_epochs=1,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        #evaluation_strategy='steps',
        save_steps=500,
        #eval_steps=500
    ),
    #data_collator=DataCollatorForSeq2Seq(tokenizer, multitask_model),
    #callbacks = [MyTrainerCallback(multitask_model, valid_ds_dict)],
    train_dataset=training_ds_dict,
    #eval_dataset =valid_ds_dict,
)

In [24]:
trainer.train()

***** Running training *****
  Num examples = 101408
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 6339


Step,Training Loss
500,1.8165
1000,0.9435
1500,0.8709
2000,0.8716
2500,0.8202
3000,0.8395
3500,0.8293
4000,0.8314
4500,0.7846
5000,0.7454


Saving model checkpoint to ../data/output/ca-final-models/mt-v3/checkpoint-500
Configuration saved in ../data/output/ca-final-models/mt-v3/checkpoint-500/config.json
Model weights saved in ../data/output/ca-final-models/mt-v3/checkpoint-500/pytorch_model.bin
Saving model checkpoint to ../data/output/ca-final-models/mt-v3/checkpoint-1000
Configuration saved in ../data/output/ca-final-models/mt-v3/checkpoint-1000/config.json
Model weights saved in ../data/output/ca-final-models/mt-v3/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to ../data/output/ca-final-models/mt-v3/checkpoint-1500
Configuration saved in ../data/output/ca-final-models/mt-v3/checkpoint-1500/config.json
Model weights saved in ../data/output/ca-final-models/mt-v3/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to ../data/output/ca-final-models/mt-v3/checkpoint-2000
Configuration saved in ../data/output/ca-final-models/mt-v3/checkpoint-2000/config.json
Model weights saved in ../data/output/ca-final-mo

TrainOutput(global_step=6339, training_loss=0.903246081464015, metrics={'train_runtime': 2712.6629, 'train_samples_per_second': 37.383, 'train_steps_per_second': 2.337, 'total_flos': 1.7766239821386547e+17, 'train_loss': 0.903246081464015, 'epoch': 1.0})

In [27]:
save_model(multitask_model, fold='mt-v3', tasks=['counter_gen', 'conclusion_gen', 'stance_cls'])

tokenizer config file saved in ../data/output/ca-final-models/mt-v3/counter_gen_model/tokenizer_config.json
Special tokens file saved in ../data/output/ca-final-models/mt-v3/counter_gen_model/special_tokens_map.json
tokenizer config file saved in ../data/output/ca-final-models/mt-v3/conclusion_gen_model/tokenizer_config.json
Special tokens file saved in ../data/output/ca-final-models/mt-v3/conclusion_gen_model/special_tokens_map.json
tokenizer config file saved in ../data/output/ca-final-models/mt-v3/stance_cls_model/tokenizer_config.json
Special tokens file saved in ../data/output/ca-final-models/mt-v3/stance_cls_model/special_tokens_map.json


### Evaluate stance performance:

In [19]:
multitask_model = MultitaskBartModel.load(
    model_folder='../data/output/ca-final-models/mt-v3',
    model_type_dict={
        "counter_gen": transformers.BartForConditionalGeneration,
        "conclusion_gen": transformers.BartForConditionalGeneration,
        "stance_cls": transformers.BartForSequenceClassification,
    },
    model_config_dict={
        "counter_gen": transformers.AutoConfig.from_pretrained('facebook/bart-large'),
        "conclusion_gen": transformers.AutoConfig.from_pretrained('facebook/bart-large'),
        "stance_cls": transformers.AutoConfig.from_pretrained('facebook/bart-large', num_labels=2)
    }
)

tokenizer = transformers.AutoTokenizer.from_pretrained('../data/output/ca-final-models/mt-v3/counter_gen_model')

_ = multitask_model.to('cuda')
_ = multitask_model.eval()

#### Evaluate stance:

In [24]:
stance_valid_dataset = Dataset.from_pandas(stance_valid_df)
ds = stance_valid_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True), remove_columns=['claim1', 'claim2', 'input_txt', '__index_level_0__'],batched=True)
ds.set_format(type='torch', columns=['input_ids', 'labels', 'attention_mask'])

  0%|          | 0/2 [00:00<?, ?ba/s]

In [25]:
pred_labels = multitask_model.predict_stances(ds)
true_labels = ds['labels'].tolist()
metric.compute(predictions=pred_labels, references=true_labels)

{'accuracy': 0.826}

In [26]:
stance_test_df = pd.read_csv('../data/kialo_stance_classification_test_data.csv')
stance_test_df['input_txt'] = stance_test_df.apply(lambda x: x['claim1'] + ' </s> ' + x['claim2'], axis=1)
stance_test_df = stance_test_df.rename(columns={'label':'labels'})

stance_valid_dataset = Dataset.from_pandas(stance_test_df)
ds = stance_valid_dataset.map(lambda a: tokenizer(a['input_txt'], padding='max_length', max_length=256, truncation=True), remove_columns=['claim1', 'claim2', 'input_txt'],batched=True)
ds.set_format(type='torch', columns=['input_ids', 'labels', 'attention_mask'])

  0%|          | 0/23 [00:00<?, ?ba/s]

In [27]:
pred_labels = multitask_model.predict_stances(ds)
true_labels = ds['labels'].tolist()
metric.compute(predictions=pred_labels, references=true_labels)

{'accuracy': 0.8351741337846263}

#### Evaluate the generation:

In [29]:
def generate_texts(model, tokenizer, df, gen_kwargs, task_name='counter_gen'):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    ds = Dataset.from_pandas(df)
    enc_ds = ds.map(lambda a: preprocess_function(a, tokenizer, 'post', 'counter', 512, 256),
                          remove_columns=['post_id', 'split', 'comment_id', 'title', 'post', 'n_sentences', 'counter', 'bot_comment', '__index_level_0__'],
                          batched=True)
    enc_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    dataloader = torch.utils.data.DataLoader(enc_ds, batch_size=16)

    generated_texts = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_tokens = model.generate(
                input_ids,
                attention_mask,
                task_name,
                gen_kwargs
            )
            
            if isinstance(generated_tokens, tuple):
                generated_tokens = generated_tokens[0]
                
            generated_tokens = generated_tokens.cpu().numpy()
            decoded_preds = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
            
            generated_texts += decoded_preds
    
    return generated_texts

In [30]:
test_df = pd.read_pickle('../../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/test_conclusion_all_preprocessed.pkl')

In [32]:
gen_kwargs = {
    "do_sample": False, 
    "max_length":100,
    "min_length":50,
    "top_p":0.95, 
    "num_beams":4,
}


attacks = generate_texts(multitask_model, tokenizer, test_df, gen_kwargs, 'counter_gen')

  0%|          | 0/9 [00:00<?, ?ba/s]

In [33]:
gen_kwargs = {
    "do_sample": False, 
    "max_length":100,
    "min_length":10,
    "num_beams":4,
}

conclusions = generate_texts(multitask_model, tokenizer, test_df, gen_kwargs, 'conclusion_gen')

  0%|          | 0/9 [00:00<?, ?ba/s]

In [35]:
test_df['post_conclusion'] = conclusions
test_df['post_counter'] = attacks

In [37]:
test_df.to_pickle('../data/output/ca-final-models/mt-v2/predictions.pkl')

In [38]:
test_df[['title', 'post_conclusion', 'post_counter']].head(n=10)

Unnamed: 0,title,post_conclusion,post_counter
410850,people should come with instructions,We should have an electronic signature on our introduction to meet people.,"i don't think it's a good idea to have a social contract, but i think it would be a great idea if we could just instantly avoid people or gravitate toward some. if you're an introvert, you're probably not going to be a good friend, but if you are an extrovert, then you're going to become a great friend."
410858,People should not be heavily criticized for things they put on social media in the distant past,I think it is unfair for the internet to come down hard on people for things they put on social media a long time ago.,"i think it is unfair for the internet to come down hard on people for things they put on social media a long time ago. the internet is a place for people to express their opinions, and it is not a place where they should be held accountable for what they say."
410902,We shouldn't focus on slowing climate change,There is no good reason to reduce CO2 emissions.,"climate change is not a problem that can be solved by reducing co2 emissions. it is a problem. it's a problem we need to deal with, but it's not the only one. there are many other environmental issues that are more important than climate change. for example, global warming is a major threat to the environment, and it's going to be a problem for a long time to come. the problem is that we don't know what to do about it."
410910,The Australian PM was right to tell students to stop activism around global warming,I believe that climate change is real and that activism is not the right way to combat it.,"i don't think it's fair to say that climate change is the most important issue in the world right now, but it's certainly one of the most pressing issues of our time. it's not that we can't do anything about it, it's that we don't know what we can do about it. we have a lot of information about the effects of climate change, but we have no way of knowing how much of it we can actually do. there's a"
410916,Feeding cats or dogs a diet with meat is indefensible.,"If you feed your cat dog meat, you're responsible for the deaths of 100s of smarter animals such as pigs.",i don't think it's fair to call people who feed their pets vegan diets shit like 'animal abusers' because they don't kill any animals. it's not like they're killing 100s of animals to sustain just one of them. they're doing so because they're trying to save the lives of other animals.
410927,For a government to allow Hate Speech on the basis of Free Speech they must also abolish Copyright.,Copyright should be abolished and replaced with a right to hate speech.,"the problem with this argument is that it ignores the fact that the first amendment is a document that was written in the early 20th century. it was written to protect freedom of speech, not freedom of expression. it is not about freedom of the press, it is about freedom from government interference in the free speech of the people. it's not about the right to free speech, it's about free speech."
410940,Cmv: if you are against legal abortion then you are for illegal abortion.,Banning abortion would be a win for black market abortion clinics,"the problem with banning abortion is that it's not a solution to the problem of abortion. it's a solution for a problem that doesn't exist. the problem with abortion is not that there is a demand for it, it's that there isn't a market for it. there is no market for abortion. the market for abortions is that there are people who don't want to have a baby, but they don't have the money to pay for a doctor to do it"
410964,The job market in the US has never been better.,The US is the best job market in US history.,"i don't think it's fair to say that the unemployment rate is the lowest its been since 1969. it's the lowest it's ever been since the bls began recording data, and it's not even close to the lowest unemployment rate in the past 50 years. the real unemployment rate has been rising for decades, but it's been steadily increasing since the early 1970s. if you look at the black unemployment rate, we see that it's at its lowest its ever"
410971,Men and women should not be treated the same when it comes to sexual harrassment,Men who have been sexually harassed should be treated equally as women who are sexually harassed.,"i don't think it's fair to say that men should be treated equally as women when they are catcalled, but i think that it's unfair to treat men equally when they have been catcalled. it's not fair to treat women equally because they are more likely to be catcalled than men, but it is unfair because it's a violation of their rights."
410991,All couples must undergo standarized tests in order to conceive and have children,I believe that parents should be required to pass some sort of test before having children.,"the problem with this is that it's not about the children, it's about the parents. if you want to have children, you need to have a system in place to ensure that the children are raised in a way that is safe, healthy, and well cared for. the problem is that there is no such thing as'safe' or 'healthy' for the child. it's a matter of the child's well being, not the parents' well being. if"
