In [None]:
%load_ext autoreload

In [None]:
import sys
import os
import json
import re
import random
import pandas as pd
from sklearn.model_selection import train_test_split

os.environ['CUDA_VISIBLE_DEVICES']='2'
pd.set_option('display.max_colwidth', None)

sys.path.append('../../lib/')
sys.path.append('../../lib/counter_gen_lib')

In [None]:
from utility import *
import interact

### Data Preparation:

In [None]:
data_path = '/workspace/ceph_data/argument-undermining/jo_data'

In [None]:
jo_df =  pd.read_pickle(data_path + "/gen_data.pickle")

training_df = jo_df[jo_df.split =='train']
valid_df = jo_df[jo_df.split =='val']

In [None]:
training_df = utility.clean_df(training_df)
valid_df    = utility.clean_df(valid_df)

In [None]:
jo_training = utility.prepare_data_for_training({'train': training_df, 'valid': valid_df[0:5000]},
                                                full_counter=False, context='title+post', 
                                                post_clm='post', 
                                                comment_clm='comment_sents', 
                                                attacks_clm='premise_counter_premise_pairs', max_sens=20, baseline=False)

In [None]:
json.dump(jo_training, open('/workspace/ceph_data/argument-undermining/data/gpt_data/jo_data.json', 'w'))

### Training GPT:

In [1]:
GENERATION_LIB_PATH='/workspace/counter-argument-generation-via-undermining/code/lib/counter_gen_lib'

### Training the model with weak premises annotated:

In [None]:
! CUDA_VISIBLE_DEVICES=5 python ${GENERATION_LIB_PATH}/train.py \
                    --dataset_path /workspace/ceph_data/argument-undermining/data/gpt_data/jo_data.json \
                    --model_checkpoint openai-gpt \
                    --log_dir /workspace/ceph_data/argument-undermining/models/gen_models/ \
                    --output_model_checkpoint my_approach_v2 \
                    --dataset_cache ./jo_data_cache \
                    --build_instance_version 'v2' \
                    --num_candidates 2 \
                    --train_batch_size 1 \
                    --valid_batch_size 1 \
                    --lr 6.25e-5 \
                    --n_epochs 6

### Training the model with weak premises annotated, and with special tokens:

In [None]:
! CUDA_VISIBLE_DEVICES=3 python /workspace/computationally-undermining-arguments/thirdparty/transfer-learning-conv-ai/train.py \
                    --dataset_path /workspace/ceph_data/argument-undermining/data/gpt_data/jo_data.json \
                    --model_checkpoint openai-gpt \
                    --log_dir /workspace/ceph_data/argument-undermining/models/gen_models/ \
                    --output_model_checkpoint my_approach_v4 \
                    --dataset_cache ./jo_data_cache \
                    --build_instance_version 'v4' \
                    --num_candidates 2 \
                    --train_batch_size 1 \
                    --valid_batch_size 1 \
                    --lr 6.25e-5 \
                    --n_epochs 6

### Training the baseline:

In [None]:
!CUDA_VISIBLE_DEVICES=3 python3 /workspace/computationally-undermining-arguments/thirdparty/transfer-learning-conv-ai/train.py \
                    --dataset_path /workspace/ceph_data/argument-undermining/data/gpt_data/jo_data.json \
                    --model_checkpoint openai-gpt \
                    --log_dir /workspace/ceph_data/argument-undermining/models/gen_models/ \
                    --baseline \
                    --dataset_cache ./jo_data_cache \
                    --output_model_checkpoint jo_baseline_gpt \
                    --num_candidates 2 \
                    --train_batch_size 1 \
                    --valid_batch_size 1 \
                    --lr 6.25e-5 \
                    --n_epochs 6