In [1]:
import os
import sys

sys.path.append('../src-py/')
os.environ['CUDA_VISIBLE_DEVICES']='0'

In [None]:
import csv
import random
import torch
import numpy as np
from torch.utils.data import DataLoader
from transformers import AdamW
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
import torch
import transformers
from rouge_score import rouge_scorer
from transformers import BartTokenizer, BartForConditionalGeneration
from torch.nn import CrossEntropyLoss, MSELoss
import os
from model import MultiTaskBart
from model import OurModel
from utils import parse_df
import time
import sys
import tqdm
import pandas as pd
from datasets import load_dataset, load_metric, Dataset

import nltk
pd.set_option('display.max_colwidth', None)

In [None]:
import prompted_conclusion_utils as conc_utils
from prompted_conclusion_utils import *

from transformers.generation_logits_process import * 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("Using GPU? ", torch.cuda.is_available())
print("Device name:", torch.cuda.get_device_name(0))


data_dir = '../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/'
teacher_model_path='../data/output/stance_classification/best_model/'

batch_size=4

In [None]:
#Teacher model
stance_classifier_teacher_tokenizer = AutoTokenizer.from_pretrained(teacher_model_path)
stance_classifier_teacher_model     = AutoModelForSequenceClassification.from_pretrained(teacher_model_path)
arg_stance_pipeline = TextClassificationPipeline(model=stance_classifier_teacher_model, tokenizer=stance_classifier_teacher_tokenizer, framework='pt', task='stance_classification', device=0)

In [None]:
#Our model versions with the best performing checkpoint on the validation set..
model_without_stance    = OurModel.load('../data/output/ca-final-models/mt-v4.baseline_1/trained_models/models-global-step-5000', 'facebook/bart-large',  model_config=transformers.AutoConfig.from_pretrained('facebook/bart-large'))
model_with_stance = OurModel.load('../data/output/ca-final-models/mt-v4.baseline_2/trained_models/models-global-step-4000', 'facebook/bart-large',  model_config=transformers.AutoConfig.from_pretrained('facebook/bart-large'))

In [25]:
tokenizer = transformers.AutoTokenizer.from_pretrained('facebook/bart-large')

model_without_stance.to(device)
model_with_stance.to(device)

_ = model_with_stance.eval()
_ = model_without_stance.eval()

In [26]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
        
def get_stance_scores(sents1, sents2):
    #compute stance score using our trained model
    text_inputs = [x[0] + ' </s> ' + x[1] for x in zip(sents1, sents2)]
    stance_results = arg_stance_pipeline(text_inputs, truncation=True)
    stance_labels = [int(x['label'].split('_')[-1]) for x in stance_results]
    stance_scores = [x['score'] for x in stance_results]
    return sum(stance_labels)/len(stance_labels), stance_labels, stance_scores  #The score is the percentage of cases we generated a counter

def counters_coherence(post_conclusions, post_counters):
    post_counters = [nltk.sent_tokenize(x) for x in post_counters]
    conclusion_counter_sent_pairs = [(x[1], s) for x in zip(post_counters, post_conclusions) for s in x[0]]
    #print(conclusion_counter_sent_pairs)
    conclusions, counter_sents = zip(*conclusion_counter_sent_pairs)
    _, stance_labels, stance_scores = get_stance_scores(conclusions, counter_sents)
    stance_scores = [x[0] * -1 if x[1] == 0 else x[0] for x in zip(stance_scores, stance_labels)]

    #collect counter_scores
    counter_scores = []
    idx = 0
    #print(len(stance_scores))
    for i, post_counter in enumerate(post_counters):
        #print(len(post_counter))
        counter_scores.append(stance_scores[idx: idx + len(post_counter)])
        idx+=len(post_counter)
    
    #print(counter_scores)
    return [np.mean(s) for s in counter_scores]

def get_best_counters(conclusions, counters):
    #choose best counter
    best_counters = []
    for post_conclusions, post_counters  in zip(conclusions, counters):
        scores = counters_coherence(post_conclusions, post_counters)
        best_counters.append((post_conclusions[np.argmax(scores)], post_counters[np.argmax(scores)]))
        
    return best_counters

def generate_multiple_counters(model, tokenizer, df, conclusion_gen_kwargs, argument_gen_kwargs, 
                               output_clm='post_counters', post_concepts_clm='post_concepts', num_candidates=6, batch_size=8):
    
    post_counters = []
    for i in range(num_candidates):
        control_codes = [x[i] if i < len(x) else '' for x in df[post_concepts_clm].tolist()]
        premises = df['post'].tolist()
        
        print(control_codes[:3])
        conclusions, counters   = generate_two_seq_prompted_counters(model, tokenizer, premises, 
                                                                     control_codes, conclusion_gen_kwargs, argument_gen_kwargs, 
                                                                     batch_size=batch_size)
                
        post_counters.append(zip(control_codes, conclusions, counters))

    df[output_clm] = list(map(list, zip(*post_counters))) # transpose

    return df


def generate_counters(model, tokenizer, data_loader, argument_gen_kwargs, conclusion_gen_kwargs, skip_special_tokens=True):
    processors = LogitsProcessorList()
    generated_counter_arguments = []
    generated_conclusions = []
    model.eval()
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_argument_tokens   = model.generate_counter_argument(input_ids, attention_mask, argument_gen_kwargs, processors)
            generated_conclusion_tokens = model.generate_conclusion(input_ids, attention_mask, conclusion_gen_kwargs, processors)
                        
            generated_argument_tokens = generated_argument_tokens.cpu().numpy()
            decoded_arguments = tokenizer.batch_decode(generated_argument_tokens, skip_special_tokens=skip_special_tokens)
            
            generated_conclusion_tokens = generated_conclusion_tokens.cpu().numpy()
            decoded_conclusions = tokenizer.batch_decode(generated_conclusion_tokens, skip_special_tokens=skip_special_tokens)
            
            generated_counter_arguments +=decoded_arguments
            generated_conclusions +=decoded_conclusions
            
    return generated_conclusions, generated_counter_arguments

In [27]:
conclusion_gen_kwargs = {
    "do_sample": True, 
    "max_length":50,
    "top_p":0.95, 
    "num_beams":1,
}

argument_gen_kwargs = {
    "do_sample": True, 
    "max_length":100,
    "min_length":50,
    "top_p":0.95, 
    "no_repeat_ngram_size":3,
    "top_k": 50,
    "num_beams":1,
}

In [28]:
#df = pd.read_pickle(data_dir + 'test_conclusion_all_preprocessed.pkl')[['post_id', 'title', 'post', 'counter']]
#df = pd.read_pickle('../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/sample_test_conclusion_all_preprocessed_multi_conclusions.pkl')[['post_id', 'title', 'post_concepts2', 'post', 'counter']]
df = pd.read_pickle('../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/test_conclusion_all_preprocessed_multi_conclusions.pkl')[['post_id', 'title', 'post_concepts2', 'post', 'counter']]
df['post'] = df.post.apply(lambda x: ' '.join(x))

In [36]:
df.head()

Unnamed: 0,post_id,title,post_concepts2,post,counter,single_pred_counter_arguments_no_stance,single_pred_conclusion_no_stance
410850,t3_a20l8s,people should come with instructions,"[Algorithm, Extraversion and introversion, People, Secrecy, Recommender system, Existential crisis]","cmv. when you meet people, the introduction should come with an electronic signature that informs us of that person s perceived disposition. that way you can efficiently engage them as they would like to be engaged. i would for example put things like introvert but likes to have fun and sing karaoke and write on reddit dislikes sports, random acts of kindness and illogical thinking that way people can easily deal with me without spending time trying to figure that stuff out. we can keep our deepest darkest secrets to ourselves, but a quick cheat sheet would be helpful we can be as forthcoming as we feel comfortable. algorithms could help is pair like minded souls. there would recommendations like amazon ads josh in appt a might be a good friend. he too is pondering his existential crisis i think like would be more efficient and satisfying if we could just instantly avoid people or gravitate toward some.","[so you're looking to establish an immensely personal database of everyone's likes and dislikes and whatnot, and we're all supposed to keep it updated, right?, and it'll never be abused by a stalker, right?, and it'll never be used by law enforcement to profile law abiding citizens, right?, and it'll never be used to help representatives gerrymander a district, right?, you're looking to avoid small talk, a casual activity that to most people isn't even a nuisance, and to do it, you're proposing a massive amount of infrastructure and a dangerous erosion of privacy while exposing people and their info to those who would abuse it., yeah, no thank you.]","i think you're missing out on a lot of the benefits of social interaction. first off, social interaction is not the only way to meet people, but it's the most efficient way to find them. if you're looking for a relationship, you're not going to be able to find one that suits you because you're going to have to be a little more selective about the people you meet. second, you'll be more likely to meet someone who doesn't like you",We should just recognize people’s temperament.
410858,t3_a22bn2,People should not be heavily criticized for things they put on social media in the distant past,"[Internet, Black and white, Adolescence, Time, Reason, Belief, People, Person, Opinion, Jame]","i think that it is unfair for the internet to come down hard on people for things they put on social media a long time ago. i'm talking about cases such as james gunn getting fired over tweets he made a long time ago 2009 2010 , and doja cat getting criticized for using the word 'faggot' in tweets from a few years back too. here's why i hold this view: 1 people change. i think we can all say that the person you are today is not the person you were 10 years ago. your beliefs and values change as time goes by, shaped by your varying life experiences. 10 years is a long time, in which many things can happen that drastically change your view on things. this is especially true throughout adolescence, when your thinking matures and your life is rapidly changing. personally, many of my views were black and white years ago, but as i've gone through more experiences, my views have changed into something more grey. i think it would be really unreasonable if you treated me as if the only views i hold today were the views i held 10 years ago, many of which i would find abhorrent today. 2 people's lives don't revolve around social media. building on the first point, people's views could change without them having to their social media history to reflect that. if my opinion on a subject matter changes, i'm probably not going to dig through my entire post history to delete every post that goes against my newly formed opinion. i think it's unreasonable to expect anyone to do that. now, i don't know for sure if people like james gunn's views on things have changed since he first made the comments that he did. even if those views were changed, i don't expect him to dig through 10 years worth of tweets to delete offensive tweets. now i'm not denying that people should be responsible for what they put online, but i do think that others ought to be more understanding instead of simply dismissing a person for a distant mistake in the past. cmv.","[if i kill someone, ten years later i shouldn't be let out of jail, just because i changed doesn't mean it effects what i've done., however, i highly agree that people getting fired from their jobs is disgusting, everyone should have freedom of speech, not just because it's right, but also because it helps against governments going completely authoritorian, and the entire nature of democracy relies on views not intrinsically being better than others., i am all for james getting criticised foe what he said although i don't think he did anything wrong, but freedom of speech is vital to maintain]","people change all the time. people change their views on a lot of things. their views change over time, and change as they get older. you can't expect someone to change in 10 years. you have to wait until the person you are today to change your view. if you don't change your views today, you will not be able to change them in the future.","It's unfair that we shouldn't expect people to delete hateful/misogynistic/etc. posts they made 10 years ago, then say they didn't want to be accused of it."
410902,t3_a22nv4,We shouldn't focus on slowing climate change,"[Climate change, Economics, Health, Environmental issues, People, Cutting]","first of all: climate change is real happening and in large part caused by human activity. climate change will damage many ecosystems extremely and will make a lot of things humans do right now more dangerous expensive difficult. there are also many other environmental issues especially regarding pollution, local ecosystems and local human livability. many people call for a global solution that involves all countries reducing co2 emissions. but cutting co2 emissions drastically will hurt the economies and human ability to progress. higher economic activity on earth leads to more and better research and enables humans to defend better against the changing environment than cutting co2 emissions and thus slowing climate change would. in short: healthy and sustainable economic growth worldwide is more important than slowing climate change.","[there is an estimate of 12 years before climate change results in irreversible and catastrophic consequences., i would rather have the economy suffer than the cataclysmic effects of climate change.]","climate change is real, but it's not a bad thing. the problem is that we don't know what to do about it. we can't predict what will happen in the future, and we have no way of knowing if it will happen at all. the best we can do is try to mitigate it as much as we can. we need to be prepared for it, but we can only do so much. if we want to fight climate change, we have to",The global CO2 emissions issue is more important than the global economy
410910,t3_a22xd0,The Australian PM was right to tell students to stop activism around global warming,"[Climate change, Science, Activism, Government, Economist, Poverty, Scientist, Planet, Public policy, Fat]","first things first, just so i don't get mauled alive, i believe in climate change. i am not a skeptic. and yes, i know the science is clear although it's dangerous to subscribe to a dogmatic view as i'll explain later i am, however, a big believer it has become difficult to seperate the science from politics these days and that people are in over their heads in terms of what we can do about it. for one, activism is definitely not the right move, especially when they confirm nietzsche's stance on moral self exculpation pretentious . yes, i am referring to the case that just happened in australia. the government spokesperson or whoever it was, was an idiot for wording it the way he did but it doesn't change the fact that not all activism is going to change things or have any real utility you can't just bring up the civil rights movement to justify every movement you decide to be apart of i'm going to go on a tangent for a second to look at bjorn lombergs research where he took the millenial goals of the un 200 of them and ranked order them in terms of return on investment with a team of nobel prize winning economics only to find that global warming wasn't even on the list? ! due to the fact all the complex models required to combat climate change had such large error margins that we couldn't even measure the positive or negative effects of what we do right now? that's important! at its nucleus, science is attempting to prove hypotheses wrong. however, when things become government policy, only studies trying to confirm the research gets approved. we need the best people looking at this without policy getting involved. and people's understandable yet misguided outrage only gives politicians power the last people on earth you want handling this shit look at nutrition for example. today, we understand the importance of dietary fats. in the late 70s however, it became government policy that dietary fats were the main culprits of heart attacks. as a result, any researchers looking to prove the lipid theory wrong were denied funding. the public was led to believe this was true. the food industry developed low fat products and the result was metabolic issues and obesity. that policy turned out to be dead wrong and contributed to the massive healthcare issues that threaten to bankrupt this country to this day. that's not a trivial matter. we have to think big picture no? why is combating climate change our 1 priority? what about world starvation? why should we dump billions and trillions of dollars to combat something we don't know will work? 'we got to try' yea, of course, if there weren't 1000 other global issues that needs just as much funding and research. but nope, climate change is all the rage these days and we feel good standing up for that despite having no knowledge on the complexities of the issues surely global starvation is just as important as climate change? easy to say global warming is the bigger issue sitting on your bed watching netflix but tell that to people starving around the world so how can you be confident, that investing into these policies is the right move on a higher resolution plane of thinking. is it the right move? does it take priority? how does it fit with trying to make the best possible move to best serve our planet? i guess it's almost impossible to answer that but i believe it can be done by combining the top scientists, engineers, economists and statisians around the world and letting them get to work without politics driving them. and guess what, climate change might not be their number 1 priority. or looking at the problem from the bottom up rather than top down might be the best way? ? maybe investing in getting the world out of poverty is the precursor to combating climate change in the most effective manner and that might mean climate change has to take a back seat for awhile. this seems very reasonable to me if you take a step back and do your best to drop your biases and rationalizations impossible but we can try . but if someone were to suggest that certain economists already have , there would be outrage and the media would eat it up the point is, the systems of the world are complicated. and i mean that. you're 12. you don't know what you are even protesting. even the average adult is way in over their heads. the best thing to do is to invest in yourself. research, study and become competent to the point where you can have real impact on the world. change my view","[solving climate change is easier than solving world hunger or world poverty, because we know what we have to do to stop it: stop carbon emissions., now, totally stopping carbon emissions would be impossible., even stopping most carbon emissions would be quite difficult., the trouble is that the government refuses to take meaningful steps toward addressing climate change because doing so would be unprofitable., it's not going to be economical to fix climate change right now., it may never be economical., but it's the right thing to do, and it will make the world a better place., even if there's scientific debate about the exact nature of climate change, every reputable scientist agrees that it's happening, it's a problem, it's accelerating, and the longer we ignore it the worse of a problem it will be.]","i think you're missing a few things:1. climate change is not something that will happen in the near future. 2. we have no way of knowing if it will happen or not. if it does, we can't do anything about it except wait and see. 3. it is not going to happen until the middle of the 20th century. there is no way to know if climate change will happen before then. 4. we are not in a position to","Scientists, politics, and all else is too big to fail in dealing with climate change"
410916,t3_a22y6n,Feeding cats or dogs a diet with meat is indefensible.,"[Animal, Dog, Cat, Veganism, People, Animal feed]","dogs can be perfectly healthy with a vegetarian diet. and while cats can thrive on a vegetarian diet, it takes more effort. if you feed your cat dog meat, you'll be responsible for the deaths of 100s of smarter animals such as pigs. you're killing 100s of animals smarter than cats dogs to sustain just one of them. it doesn't make sense. even if a vegan diet killed a cat dog immediately which it doesn't , it'd still be the more moral thing to do because you'd kill only 1 animal instead of 100s. so calling people who feed their pets vegan diets shit like 'animal abusers' is hypocritical.","[no it's actually animal abuse for cats., cats are carnivores., please feed them cat food.]","i'm a vegan, but i don't think veganism is the answer. veganism doesn't make sense to me. it's a lifestyle choice, not a moral choice. i'm not sure why you're trying to make a distinction between veganism and veganism.",Vegan food is better for the animals that it consumes. Vegans who eat cat/dog meat are animal abusers.


In [None]:
df = generate_multiple_counters(model_without_stance, 
                                                   tokenizer, df, conclusion_gen_kwargs, argument_gen_kwargs, 
                                                   output_clm='post_counters', 
                                                   post_concepts_clm='post_concepts2', 
                                                   num_candidates=10, 
                                                   batch_size=16)

['Science', 'Law', 'Military']




  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [05:02<00:00,  2.42s/it]

['Religion', 'Sovereignty', 'Transgender']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [05:00<00:00,  2.40s/it]

['Fact', 'European Union law', 'Surgery']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [05:00<00:00,  2.41s/it]

['Idiot', 'Border', 'Torture']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [05:06<00:00,  2.45s/it]

['Parking violation', 'People', 'Headquarters']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [05:01<00:00,  2.41s/it]

['', 'Idea', 'United States Secretary of Defense']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [05:02<00:00,  2.42s/it]

['', 'Trade bloc', 'Presidency of Barack Obama']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [04:58<00:00,  2.38s/it]

['', 'Sign', 'Citizenship of the United States']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [04:54<00:00,  2.35s/it]

['', '', 'Senate']





  0%|          | 0/2 [00:00<?, ?ba/s]

100%|██████████| 125/125 [04:53<00:00,  2.35s/it]

['', '', 'Secretary']





  0%|          | 0/2 [00:00<?, ?ba/s]

 40%|████      | 50/125 [01:57<02:53,  2.32s/it]

In [None]:
df[['title', 'post_concepts2', 'post_counters']].head()

In [None]:
conclusions = [[x[1] for x in row] for row in df.post_counters.tolist()]
counters = [[x[2] for x in row] for row in df.post_counters.tolist()]

In [None]:
best_conclusions, best_counters = zip(*get_best_counters(conclusions, counters))

In [None]:
df['pred_counter_arguments_no_stance'] = best_counters
df['pred_conclusions_no_stance'] = best_conclusions

In [None]:
df[['title', 'post_counters', 'pred_conclusions_no_stance', 'pred_counter_arguments_no_stance']].head()

In [None]:
df.to_pickle('../multitask-counter-arg-generation/data/output/ca-final-models/mt-v4/results/test_prompts_preds_df.pkl')

#### Predict single counters:

In [30]:
conclusion_gen_kwargs = {
    "do_sample": True, 
    "max_length":50,
    "top_p":0.95, 
    "num_beams":1,
}

argument_gen_kwargs = {
    "do_sample": True, 
    "max_length":100,
    "min_length":50,
    "top_p":0.95, 
    "no_repeat_ngram_size":3,
    "top_k": 50,
    "num_beams":4,
}

In [31]:
ds = Dataset.from_pandas(df[['post']])
ds = ds.map(lambda a: tokenizer(a['post'], padding='max_length', max_length=512, truncation=True), 
                                   remove_columns=[ '__index_level_0__'], batched=True)

ds.set_format(type='torch', columns=['input_ids', 'attention_mask'])
dataloader = torch.utils.data.DataLoader(ds, batch_size=batch_size)

  0%|          | 0/9 [00:00<?, ?ba/s]

In [32]:
no_stance_conclusions, no_stance_counter_arguments = generate_counters(model_without_stance, tokenizer, dataloader, argument_gen_kwargs, conclusion_gen_kwargs)

In [33]:
df['single_pred_counter_arguments_no_stance'] = no_stance_counter_arguments
df['single_pred_conclusion_no_stance'] = no_stance_conclusions

In [35]:
df[['title', 'single_pred_counter_arguments_no_stance']].head()

Unnamed: 0,title,single_pred_counter_arguments_no_stance
410850,people should come with instructions,"i think you're missing out on a lot of the benefits of social interaction. first off, social interaction is not the only way to meet people, but it's the most efficient way to find them. if you're looking for a relationship, you're not going to be able to find one that suits you because you're going to have to be a little more selective about the people you meet. second, you'll be more likely to meet someone who doesn't like you"
410858,People should not be heavily criticized for things they put on social media in the distant past,"people change all the time. people change their views on a lot of things. their views change over time, and change as they get older. you can't expect someone to change in 10 years. you have to wait until the person you are today to change your view. if you don't change your views today, you will not be able to change them in the future."
410902,We shouldn't focus on slowing climate change,"climate change is real, but it's not a bad thing. the problem is that we don't know what to do about it. we can't predict what will happen in the future, and we have no way of knowing if it will happen at all. the best we can do is try to mitigate it as much as we can. we need to be prepared for it, but we can only do so much. if we want to fight climate change, we have to"
410910,The Australian PM was right to tell students to stop activism around global warming,"i think you're missing a few things:1. climate change is not something that will happen in the near future. 2. we have no way of knowing if it will happen or not. if it does, we can't do anything about it except wait and see. 3. it is not going to happen until the middle of the 20th century. there is no way to know if climate change will happen before then. 4. we are not in a position to"
410916,Feeding cats or dogs a diet with meat is indefensible.,"i'm a vegan, but i don't think veganism is the answer. veganism doesn't make sense to me. it's a lifestyle choice, not a moral choice. i'm not sure why you're trying to make a distinction between veganism and veganism."


In [26]:
df.to_pickle('../multitask-counter-arg-generation/data/output/ca-final-models/mt-v4/results/test_prompts_preds_df.pkl')

----------

In [10]:
df = pd.read_pickle('../multitask-counter-arg-generation/data/output/ca-final-models/mt-v4/results/test_prompts_preds_df.pkl')