In [1]:
%load_ext autoreload

In [2]:
import os
import sys
import re
from argparse import Namespace

sys.path.append('../src-py/')

In [3]:
%autoreload

from utils import *
from project_debater_api import *

In [4]:
import torch
import json

import nltk
import numpy as np
import pandas as pd

from pathlib import Path
from datasets import load_dataset, load_metric, Dataset

from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import BartTokenizer, BartForConditionalGeneration

In [5]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

In [6]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [7]:
ceph_dir = '/home/sile2804/data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation'
local_home_dir = '../data'

In [8]:
#Evalute ROUGE scores
gen_kwargs = {
    "do_sample": True, 
    "max_length":200,
    "top_p":0.95, 
    "top_k":50,
}

def generate_ds_attacks(ds, model, tokenizer, premises_clm, conclusion_clm, gen_kwargs, skip_special_tokens=True, batch_size=8):
    ds = ds.map(lambda x :preprocess_function(x, tokenizer, premises_clm, 'counter', conclusion_clm=conclusion_clm), batched=True)
    ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    dataloader = torch.utils.data.DataLoader(ds, batch_size=batch_size)
    attacks = generate_counters(model, tokenizer, dataloader, gen_kwargs, skip_special_tokens=skip_special_tokens)
    
    return attacks

#### Load Testing data:

##### Loading validation data from Reddit:

In [9]:
valid_df = pd.read_pickle(ceph_dir + '/reddit_data/conclusion_and_ca_generation/valid_conclusion_comp_remove_75sem_perc_with_targets.pkl')

In [10]:
#drop duplicate posts
unique_valid_posts = valid_df.drop_duplicates('post_id')

In [11]:
valid_ds = Dataset.from_pandas(unique_valid_posts)
tmp_ds = valid_ds.train_test_split(0.9)
sample_valid_ds = tmp_ds['test']

#saving the sample
reddit_sample_valid_ds = sample_valid_ds.flatten_indices()
reddit_sample_valid_ds.save_to_disk('../data/sample_valid_ds')

  0%|          | 0/3 [00:00<?, ?ba/s]

In [13]:
#loading the random sample of validation dataset with the auto-generated conclusion
reddit_sample_valid_ds = Dataset.load_from_disk('../data/sample_valid_ds/')

##### Loading validation data from Kialo:

In [29]:
valid_ds = Dataset.from_pandas(pd.read_pickle(ceph_dir + '/kialo_data/kialo_valid_df.pkl'))

In [10]:
tmp_ds = valid_ds.train_test_split(0.5)
sample_valid_ds = tmp_ds['test']

#saving the sample
sample_valid_ds = sample_valid_ds.flatten_indices()
sample_valid_ds.save_to_disk('../data/kialo_sample_valid_ds')

  0%|          | 0/3 [00:00<?, ?ba/s]

In [9]:
sample_valid_ds = Dataset.load_from_disk('../data/kialo_sample_valid_ds')

### Evaluating on Kialo:

In [None]:
#KIALO models
masked_conclusion_kialo_model = BartForConditionalGeneration.from_pretrained(local_home_dir + '/output/masked-conclusion-kialo-model/').to(device)
masked_conclusion_kialo_tokenizer = BartTokenizer.from_pretrained(local_home_dir + '/output/masked-conclusion-kialo-model/')
known_conclusion_kialo_model = BartForConditionalGeneration.from_pretrained(local_home_dir + '/output/known-conclusion-kialo-model/').to(device)
known_conclusion_kialo_tokenizer = BartTokenizer.from_pretrained(local_home_dir + '/output/known-conclusion-kialo-model/')

#masked_conclusion_kialo_model = BartForConditionalGeneration.from_pretrained(local_home_dir + '/output/masked-conclusion-kialo-unique-model/').to(device)
#known_conclusion_kialo_model = BartForConditionalGeneration.from_pretrained(local_home_dir + '/output/known-conclusion-kialo-unique-model/').to(device)

In [11]:
known_conc_attacks = generate_ds_attacks(sample_valid_ds, known_conclusion_kialo_model, known_conclusion_kialo_tokenizer, 'premises', 'conclusion_text', gen_kwargs)
masked_conc_attacks = generate_ds_attacks(sample_valid_ds, masked_conclusion_kialo_model, masked_conclusion_kialo_tokenizer, 'premises', None, gen_kwargs)
#auto_conc_attacks = generate_ds_attacks(sample_valid_ds, known_conclusion_model, tokenizer, 'masked_premises', 'auto_conclusion', gen_kwargs)

  0%|          | 0/3 [00:00<?, ?ba/s]

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


  0%|          | 0/3 [00:00<?, ?ba/s]

In [12]:
kialo_pred_df = pd.DataFrame(list(zip(sample_valid_ds['conclusion_text'], sample_valid_ds['premises'], 
                                   sample_valid_ds['counter'], known_conc_attacks, masked_conc_attacks)), 
                      columns=['conclusion', 'premises', 'gt', 'known_conc_attacks', 'masked_conc_attacks'])

kialo_pred_df['argument'] = kialo_pred_df.apply(lambda row: row['conclusion'] + ' : ' + ' '.join(row['premises']), axis=1)
kialo_pred_df['premises'] = kialo_pred_df['premises'].apply(lambda x: ' '.join(x))

In [13]:
kialo_pred_df[['argument', 'gt', 'known_conc_attacks', 'masked_conc_attacks']].sample(10).head(n=10)

Unnamed: 0,argument,gt,known_conc_attacks,masked_conc_attacks
404,"In some German states it is forbidden to host dance events and concerts on certain holidays. : This leads to a decrease in happiness and leisure activities for non-Christians. These bans have existed in Germany since at least the early 20th century Dance events and concerts contribute to the economy as food and beverages are generally consumed in large quantities, and tickets normally being required for attendance.","The German dance scene is similar to dance scenes in other countries where it is allowed to host dance events on Easter. Thus, no major economic effect is visible.","Some German states have banned cultural events and concerts on certain holidays, such as Christmas Eve and New Year's Eve.","Dance events and concerts contribute to the economy as food and beverages are generally consumed in large quantities, and tickets normally being required for attendance."
4,"When one person is having sex only to get paid, that sex is not fully consensual and so should not be allowed or encouraged. : The sex industry is often a means of providing for oneself or a family when all other options have been exhausted. Choice is always contingent on circumstances; the most free choices are made when all options are equally viable. For sex workers, choosing not to have sex is not a viable choice because it directly undermines their livelihood. When many sex workers have a history of abuse, you cannot assume that consent can be given. Monetary exchange is an inequitable power dynamic that undermines free choice. We should not permit sexual activity that does not proceed from personal relationships that exist on a level field.","Under this logic, there are a series of jobs that should simply be banished, like mining, for instance: it's something miners are doing only to get paid, so taking action on the job is not fully consensual, and would thus configure slavery, and should not be allowed or encouraged.\nThe same could be said of countless jobs.","The sex industry is often a means of providing for oneself or a family when all other options have been exhausted. Choice is always contingent on circumstances; the most free choices are made when all options are equally viable. For sex workers, choosing not to have sex is not a viable choice because it directly undermines their livelihood.","The sex industry is often a means of providing for oneself or a family when all other options have been exhausted. Choice is always contingent on circumstances; the most free choices are made when all options are equally viable. For sex workers, choosing not to have sex is not a viable choice because it directly undermines their livelihood. When many sex workers have a history of abuse, you cannot assume that consent can be given. Monetary exchange is an inequitable power dynamic that undermines free choice."
615,"Judge by Intent : Friends gained in by judging their intent will want to be better with you Intention allows for failure in action that may not always be possible; if a friend truly cares for you, their sentiment is more important than their mistakes. The action is observable, so the judging actually occurs when we examine intent. Intent is hard to establish, because we rely on asking the offender to explain why they did what they did. We use clues, such as intonation of voice, expression of face, choice that led to action, previous actions etc, to determine the motivation and to judge the character and intent. The action is observable and a fact of occurence, not needing judgement.",Friends gained by intent may fail at certain actions causing complications to both lives,"Intention is hard to establish, because we rely on asking the offender to explain why they did what they did. We use clues, such as intonation of voice, expression of face, choice that led to action, previous actions etc, to determine the motivation and to judge the character and intent. The action is observable and a fact of occurence, not needing judgement.","The intent is hard to establish, because we rely on asking the offender to explain why they did what they did. We use clues, such as intonation of voice, expression of face, choice that led to action, previous actions etc, to determine the motivation and to judge the character and intent. The action is observable and a fact of occurence, not needing judgement."
947,Der Verfassungsschutz sollte die AfD beobachten : Die AfD steht in Verbindung mit rechtsextremen und völkischen Gruppierungen. Einige AfD-Politiker bedienen sich auffälliger Nazi-Rethorik. Führende AfD-Spitzenpolitiker stehen trotz Unvereinbarkeitsbeschluss der rechtsextremen NPD nahe. zeit.de Teilorganisationen der AfD wurden vom Bundesverfassungsschutz bereits zum Verdachtsfall \(Rechtsextremismus\) erhoben. Zu einer Gedenkfeier für Opfer des Holocausts trug im Berliner Senat der AfD-Politiker Andreas Wild eine blaue Kornblume. Diese galt Nazis in Deutschland und Österreich von 1933 bis 1938 als Erkennungszeichen. Der Verfassungsschutz bewertet diese Blume auch heute noch als solches. rbb24.de,Eine Beobachtung durch das BfV spielt der AfD in die Karten.,Der Verfassungsschutz sollte die AfD beobachten. rbb24.de Teilorganisationen der AfD wurden bereits zum Verdachtsfall \(Rechtsextremismus\) erhoben.,Die AfD-Politiker können auf einer Gedenkfeier für Opfer des Holocausts trug auffälliger Nazi-Rethorik. rbb24.de
1963,"Mariology , which is part of Jesus' story, prescribes the role of women in society and therewith makes emancipation more difficult. : The Bible's depiction of a virginal, revered yet subservient, Mary was an unconscious source for the continued oppression of women. The Catholic Church discriminates against women. Mary provided Catholic women an impossible ideal to which no woman could attain, with whom all women are invited to feel inadequate. Mary's chief qualification for exaltation is maternity. Even for Mary, biology is destiny. Mary serves as a function of female chastity",Love and reverence for Mary serve to further the well-being of women.,"The Catholic Church discriminates against women. Mary provided Catholic women an impossible ideal to which no woman could attain, with whom all women are invited to feel inadequate. Mary's chief qualification for exaltation is maternity. Even for Mary, biology is destiny.","The Catholic Church discriminates against women. Mary provided Catholic women an impossible ideal to which no woman could attain, with whom all women are invited to feel inadequate. Mary's chief qualification for exaltation is maternity. Even for Mary, biology is destiny."
422,"The historicity of the New Testament is questionable therefore so are these claims. : The circumstances of Jesus' birth are especially dubious \(with references to the Census of Quirinius occurring before the death of Herod, for example, which contradicts the historical record\). The New Testament contains contradictions. Many aspects of the gospels themselves are unprovable due to a lack of resources. There are no testimonies of any of Jesus' disciples. ""We know almost precisely nothing about what they thought, what they did, and what they came to believe."" Jesus' genealogy contains multiple contradictions between the Gospel of Luke and the Gospel of Matthew.",There are multiple outside sources that confirm Jesus actually lived. probe.org en.wikipedia.org,The historicity of the New Testament is questionable therefore so are these claims.,The Gospel of Luke contains multiple contradictions.
1652,The Pope did not challenge the Croat Ustashi government that was prosecuting Jews according to Nazi laws. : It is impossible that the Pope did not know that the Ustashi murders amounted to genocide \(p.38\) The Papacy did not distribute the information of opposing Croat Bishop Stepinac to other Bishops across Europe \(p.47\) The Pope saw a chance to prevent more Croats turning to Orthodox belief or communism because the fascists supported Catholicism \(p.32\) The local Bishop denounced the killing but other Croation bishops did not follow suit. It is the Pope's responsibility to give clear directions \(p.34\),There was little the Pope could do because the situation was very chaotic after the invasion of the Axis and the proclamation of an independent Croatia.,The Pope did not challenge the Ustashi government.,The Pope did not distribute the information of opposing Croat Bishop Stepinac to other Bishops across Europe \(p.47\)
1545,"Legalising sex work will reduce stigmas surrounding sex and sexuality. : Legalization allows sex workers to better reshape interpretations of sex and sexuality in society. Legalising sex work reduces the stigmatization of sexually active women. Legalization will stop the punitive laws against prostitution that create double standards of sexual morality that result in stigmatizing not just prostitutes, but many unconventional women, as sluts or whores. With legalization, prostitution is allowed to function in a social climate freed from emotional prejudices. This is seen as major obstacle to socially sound prostitution \(Ericsson, p. 365 Sex worker support organizations can freely inform the general public about life and problems in the sex industry.",Sex workers' rights must be legally established in addition to the legalization of prostitution to counter the stigmatisation of sex and sexuality.,Sex workers are unlikely to be able to change the stigma surrounding sex and sexuality.,"Legalizing sex work will reduce the stigmatization of sexually active women. Legalization will stop the punitive laws against prostitution that create double standards of sexual morality that result in stigmatizing not just prostitutes, but many unconventional women, as sluts or whores."
5,Sex work is often part of business deals. This puts businesswomen at a disadvantage as they presumably find it more difficult to participate together with their male colleagues. : Bankers often offer sex workers to their clients. In South Korea business is sometimes done in brothels and karaoke bars where sex workers join to provide entertainment. Sex work is a widespread feature in deal-making in China The cases of Dominique Strauss-Kahn or Goldman Sachs illustrates how sex workers are often used in business deals.,"These are usually not cases of conducting business in brothels, but of access to sex workers being used as incentives in business deals. This may not be particularly inclusive of women, but also does not exclude them from business in any obvious way.",Businesswomen are likely to find it difficult to participate together with their male colleagues.,Sex work is a widespread feature in deal-making in China The cases of Dominique Strauss-Kahn or Goldman Sachs illustrates how sex workers are often used in business deals.
1471,"As a sovereign state under international law, North Korea has every right to develop and defend itself with nuclear weapons. : North Korea has only embraced a nuclear program because of a need to protect themselves from aggressive US imperialism. History gives justification for North Korea to still be wary of foreign powers, including the US. As an individual country the US is in no position to ""force"" other sovereign states to do anything. International security is the responsibility of international organisations. Unilateral action and interventionism undermine the development of international law North Korea is not a signatory of the Nuclear Non-Proliferation Treaty \(NPT\) which would prohibit it from developing and/or acquiring nuclear weapons. As the Korean War was never ended through a peace treaty \(but only through an armistice\), North Korea is right to consider itself still at war with South Korea and the US. It is only natural that it strives to protect itself.","In the case of North Korea, other considerations should outweigh its rights as a sovereign state under national law.","North Korea is not a sovereign state under international law, as the Korean War was never ended through a peace treaty \(but only through an armistice\), North Korea is right to consider itself still at war with South Korea and the US. It is only natural that it strives to protect itself.","The United States is not a signatory to the Nuclear Non-Proliferation Treaty \(NPT\) which would prohibit it from developing and/or acquiring nuclear weapons. As the Korean War was never ended through a peace treaty \(but only through an armistice\), North Korea is right to consider itself still at war with South Korea and the US. It is only natural that it strives to protect itself."


### Generate predictions on Reddit:

In [14]:
# Loading Reddit models

#masked_conclusion_tokenizer = BartTokenizer.from_pretrained(local_home_dir + '/output/extended_tokenizer')
#masked_conclusion_model = BartForConditionalGeneration.from_pretrained(local_home_dir + '/output/masked-conclusion-bart-model/').to(device)

known_conclusion_model = BartForConditionalGeneration.from_pretrained(local_home_dir  + '/output/known-conclusion-bart-model').to(device)
known_conclusion_tokenizer = BartTokenizer.from_pretrained(local_home_dir + '/output/extended_tokenizer')

pred_conclusion_model = BartForConditionalGeneration.from_pretrained(local_home_dir  + '/output/pred-conclusion-bart-model/').to(device)
pred_conclusion_tokenizer = BartTokenizer.from_pretrained(local_home_dir + '/output/pred-conclusion-bart-model/')

In [15]:
# Generate attacks for different settings
known_conc_attacks  = generate_ds_attacks(reddit_sample_valid_ds, known_conclusion_model, known_conclusion_tokenizer, 'masked_premises', 'title', gen_kwargs)
bart_conc_attacks   = generate_ds_attacks(reddit_sample_valid_ds, known_conclusion_model, known_conclusion_tokenizer, 'masked_premises', 'bart_conclusion', gen_kwargs)
masked_conc_attacks = generate_ds_attacks(reddit_sample_valid_ds, known_conclusion_model, known_conclusion_tokenizer, 'masked_premises', None, gen_kwargs)
#This model would predict the conclusion and then the counter in one sequence.
joint_conc_attacks  = generate_ds_attacks(reddit_sample_valid_ds, pred_conclusion_model, pred_conclusion_tokenizer, 'masked_premises', None, gen_kwargs, skip_special_tokens=False)

  0%|          | 0/3 [00:00<?, ?ba/s]

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

In [16]:
reddit_pred_df = pd.DataFrame(list(zip(
                                       reddit_sample_valid_ds['post_id'],
                                       reddit_sample_valid_ds['title'], 
                                       reddit_sample_valid_ds['conclusion_targets'],
                                       reddit_sample_valid_ds['conclusion_stance'],
                                       reddit_sample_valid_ds['bart_conclusion'], 
                                       reddit_sample_valid_ds['masked_premises'], 
                                       reddit_sample_valid_ds['counter'], 
                                       known_conc_attacks, masked_conc_attacks, 
                                       bart_conc_attacks, joint_conc_attacks)), 
                columns=['post_id', 'conclusion', 'conclusion_target', 'conclusion_stance', 'bart_conclusion', 'premises', 'gt_attack', 'known_conc_attacks', 
                         'masked_conc_attacks', 'bart_conc_attacks', 'joint_conc_attacks'])

reddit_pred_df['argument'] = reddit_pred_df.apply(lambda row: row['conclusion'] + ' : ' + ' '.join(row['premises']), axis=1)
reddit_pred_df['premises'] = reddit_pred_df['premises'].apply(lambda x: ' '.join(x))

#process the jointly generated conclusion and counter
reddit_pred_df['joint_conc'] = reddit_pred_df['joint_conc_attacks'].apply (lambda x: x.split('<counter>')[0])
reddit_pred_df['joint_conc_attacks'] = reddit_pred_df['joint_conc_attacks'].apply (lambda x: x.split('<counter>')[1] if '<counter>' in x else x)
reddit_pred_df['joint_conc'] = reddit_pred_df['joint_conc'].apply (lambda x: re.sub('<s>|</s>|<conclusion>|<counter>|<pad>', '', x).strip())
reddit_pred_df['joint_conc_attacks'] = reddit_pred_df['joint_conc_attacks'].apply (lambda x: re.sub('<s>|</s>|<conclusion>|<counter>|<pad>', '', x).strip())

#save created pred_df
reddit_pred_df.to_pickle('../data/output/reddit_pred_df.pkl')

In [17]:
reddit_pred_df[['argument', 'conclusion' , 'joint_conc', 'masked_conc_attacks', 'known_conc_attacks', 'joint_conc_attacks']].sample(10).head(n=10)

Unnamed: 0,argument,conclusion,joint_conc,masked_conc_attacks,known_conc_attacks,joint_conc_attacks
110,"I am in favor of the establishment of quasi-national sports teams for indigenous groups within countries. : below are my two top points in favour the establishment of 'quasi national' sports teams. i look forward to your responses! 1. native american tribes rarely give their consent to have their image and culture used yet teams such as the cleveland indians frequently go ahead and used mascots inspired from indian culture regardless of if it will be deemed offensive or not. however, if quasi national sports teams are formed for indigenous groups they will then be able to reclaim their own culture. this effect, will also likely get those who are in marginalized indigenous groups to feel empowered and consequently cultivate an interest in sports. why is this important? because it would help them stay away from gangs, drugs, and other crime. 2. indigenous people were colonized. they had their own distinct culture and heritage prior to being imperialized. therefore, by giving indigenous people their own sports team they regain some long lost cultural individuality. also, it clearly would be way more fun for an indigenous person to root for a team dedicated specifically designed for them. by analogy, if a british guy is a fan of football he is likely not going to want to root for germany in the world cup if he can root for england instead. why? because germany has it's own distinct culture and history, and brits would rather root for something they relate too, i. e their country and culture.",I am in favor of the establishment of quasi-national sports teams for indigenous groups within countries.,I believe that there should be quasi-national sports teams for indigenous groups.,i don't think there is much of a difference between a 'quasi national' team and a 'national' team.,i don't think it's fair to say that native americans aren't interested in sports teams. it's not like they don't want to be involved in them. they're not interested in being involved in the culture of the native american people. they want to get involved in their country and culture.,"i think the biggest problem with this idea is that it's not about the rights of indigenous people, it's about the right of the people to own their own sports team. i think it's important to understand that the idea of a quasi national sports team is a form of cultural appropriation."
1385,"Children ought never to be put on psychiatric medication. : my main argument is this: children, physiologically speaking, have not reached maturity. duh. no one disagrees with this. their brains, bodies, emotions and basically everything about them is not mature. a child can display both acceptable and unacceptable behavior at any given time. when the behavior is deemed unacceptable and persistently so, this is when psychiatrists are called upon. this is bad news for the child. since children have not reached puberty, no less their formative years, their enduring personality along with their body, brain and character have only just begun to develop into what in the future can become a stable, integrated self. studies have shown the potential of stultification of children, stunting their growth after prolonged usage of adhd related drugs. interfering chemically with a brain not yet matured must then be seen as having the potential for tremendous, far reaching negative consequences. the idea i am challenging is believing a child and their bad or aberrant behavior necessarily equates to a brain based biological chemical imbalance and or disease process needing the treatment of a psychiatrist who will administer powerful brain and mind altering psychotropic chemicals to a still developing brain. that psychiatric medications certainly alter brain chemistry in anyone taking them is indisputable. premature psychiatric action upon developing children inherently stifles and impedes the natural course of growth that would otherwise occur had no medication been used. insofar as it is true that psychiatric medications correct no chemical imbalances in the brain and never cure so called mental illness, but rather interfere with natural development, children ought never to be put on psychiatric medication.",Children ought never to be put on psychiatric medication.,Children should not be put on psychiatric medication until they reach puberty.,"the problem with your premise is that you are assuming that children have not reached maturity. this is not the case at all. the problem is that there is no evidence to support your claim that a child has not matured. if a child is not mature, they are not going to be able to be a productive member of society. they are going to have to learn to function as a part of society, and if they don't they will be unable to function in society. it's not that they are incapable of functioning in society, it's that they aren't capable of functioning as a whole. if you believe that children are not mature then you are correct, but you are not correct. i don't think there is any evidence that this is the case.",i think it's fair to say that children should not be put on psychiatric medication.,i think you're missing the point. children are not mature. they are developing in a way that is not a natural process.
2086,"Abortion should be avoided since it is killing babies : abortion is killing babies and should be avoided. a fetus while having lesser value than a person that has been born still has value. we should avoid killing them and should instead search for a world where abortion is unnecessary. we should avoid it by improving adoption and or foster care systems, improving birth control methods, etc. i am not saying that we should immediately end abortion as it solves problems temporarily but instead we should make aborting illegal once we have reached the point where abortion is unnecessary. instead of glorifying those who have performed abortions we should glorify people who do their best to avoid it and seek better solutions.",Abortion should be avoided since it is killing babies,Abortion should be made illegal once we reach a point where it is unnecessary.,abortion is killing babies and should be avoided.,"i don't think it's fair to say that a fetus has a higher value than a person that has been born, but that's not the same as saying that a person has no value.","i don't think this is a good argument, but i think it's a good one. if we want to avoid killing babies, we need to stop killing them. we don't want to end up with a world where we kill babies."
510,"Idris Elba should not play as 007 in the next bond film because he is black. : don't worry, the triggering title is not as bad as it sounds. although the james bond franchise is a fictional storyline taking place in its own established world in terms of its events and outcomes, idris elba should not play as the next james bond. i have nothing against the actor being black, but my only issue is the continuity of the main actor who plays james bond being white. if they were to cast idris elba as a character for the spy network, forgive me as i forget the name of that organization, then he should have his own designation such as 009, 014, etc. , and play his own named character that takes the place of bond after he retires, dies, or disappears altogether, to keep the flow of the story in line and not have an awkward moment of the massive plot hole that would be like bond is suddenly black and no one notices a thing. james bond is probably the most iconic spy in the entire world, and one of the most iconic names in action movies altogether. it would be like putting terry crews in for terminator 4 as the terminator t 800? that arnold schwarzenegger plays. it wouldn't make sense based on what's been established in that storyline already. tl;dr: idris elba shouldn't play as bond himself, but as a new character and a new agent to replace bond if he does get cast as a new character.",Idris Elba should not play as 007 in the next bond film because he is black.,Idris Elba should not play James Bond in the next James Bond movie.,"i don't know if i can change your view, but i think you're missing the point of the james bond franchise, which is to say that the main character is black. bond is black, and bond is white, and so is bond. i don't think that's the point. i think it's better to have a character who is black and a character that is white than one who is white.","i'm not sure what you're trying to say, but i think you're missing the point.","i agree with you, but i don't think it would be a good idea for james bond to be replaced by a black actor."
939,"I had zero problems with the DNC hack as it exposed information that was true. : first off, i did not vote for trump. i do not like trump. i do not like a lot of his policies. i'm an independent. but i had zero problems with the dnc hack as it only exposed true information. it helped us realize who we were really voting for. if the information being leaked is so bad that it will cause people to not vote for a specific person that there is something to be said. i would have had zero problems if the leak exposed trump or both candidates. i get we don't want foreign countries interfering but i am all about knowing the truth. i do not think there is anything unethical about exposing unethical actions of a particular party candidate.",I had zero problems with the DNC hack as it exposed information that was true.,I had zero problems with the DNC hack.,"i don't know if you are aware of the dnc hack or not, but i think it's fair to say that if you don't, then you probably won't vote for trump. if you do, then i don't think you should vote for him.","i don't think it's fair to say that the dnc hack exposed information that was true. i think it exposed a lot of things that were not true. it exposed things that weren't true. for example, if i had known that trump was going to win the election, i wouldn't have voted for him. if i knew that he would win, i would have voted against him. but if i know that he's going to lose, i won't vote for him because i don't want him to lose.","the problem with the dnc hack is that it doesn't expose the truth, it only exposes the lie."
1015,"Hitler was a solid dude : he was crazy but he truly did what he thought was right. he had horrible misconceptions about reality but if you take those misconceptions into account and put yourself in his boots you see that he really did what he thought was best for his country. the holocaust was one of the greatest atrocities of all time, i'm obviously not denying that or trying to downplay it but it wasn't caused out of badness, just craziness. hitler tried to do good. his kind gentle personality is made even more evident from his intensely socialist and animal friendly policies. he fought for his country, got imprisoned, spent all his time in prison writing a book on how to improve his country, then when he left he put his ideas into action. hitler was a solid dude, just really mentally ill.",Hitler was a solid dude,Hitler was a crazy man.,"i'm not sure what you mean by 'crazy' but i think you're missing the point of the holocaust. hitler was a bad guy. he did what he thought was best for his country, but he didn't do it out of badness, just because he thought it was best. he was a good guy, and he did his best to help his country. he wasn't a good person, he was just a bad person. he had horrible misconceptions about reality, but when he left he put himself in prison writing a book on how to improve his country's economy, then he put his ideas into action.","i don't think that hitler was a solid dude, just really mentally ill. the holocaust was one of the greatest atrocities of all time, but it wasn't caused out of badness, just craziness. he had horrible misconceptions about reality, but he really did what he thought was best for his country.","the holocaust was a horrible thing, but he did what he thought was best for his country."
2148,"News networks that frequently sensationalize news should be forced to label themselves as Entertainment News in their titles. : media sensationalization for the sake of viewership and clicks is significantly changing the format of how we get news. there are very few major news networks that simply provide the public with facts and data free of partisanship, and commentary aimed at molding the viewers idea of how to interpret news stories. if a company wants to call themselves news they must refrain from the sensationalized and clickbait format of news casting. if not, they should be forced to identify themselves as entertainment news networks. for example, cnn entertainment or fox entertainment news. because in reality, they are about viewers, ad revenue and being interesting and not focused on informing the public accurately. although government sponsored, the best examples i can think of for actual news is npr and bbc. the rest focus too heavily on partisanship and ratings.",News networks that frequently sensationalize news should be forced to label themselves as Entertainment News in their titles.,News networks should cease to be entertainment news networks.,i'm not sure what you mean by'media sensationalization for the sake of viewership and clicks.'i'm not going to argue that it is. i'm just going to say that media sensationalization is significantly changing the format of how we get news.,"there are plenty of entertainment news networks that frequently sensationalize news, but they are not really entertainment news. they are entertainment news for the sake of viewership and clicks.","there are a lot of things that people don't like about entertainment news, but i think it's important to remember that entertainment news isn't just about entertainment."
1949,"The moon landing should be viewed as a global achievement. : there's been a lot of controversy about ryan gosling's new movie about the moon landing omitting references about the us' role in it. imo this is 100 correct. the united states didn't land on the moon, humanity did. the entirety of human history led the way to that monumental achievement and to chalk it up to just ''murica' is incredibly shortsighted. where would the us space program be without newtonian physics? or german pioneered engineering techniques. most of what the americans did was just a repackage of what the soviet union did previously. there's a lot of truth to 'you didn't build that'. the people who stepped foot on the moon first might have been u. s. citizens but they got there by standing on the shoulders of innumerable people from around the world. we should recognize the moon landing as an achievement for all humanity, not just one specific country. cmv!",The moon landing should be viewed as a global achievement.,"The United States' role in the Moon landing is an important part of human history, not just one specific country",i don't think it's fair to say that ryan gosling's new movie about the moon landing omitting references about the us' role in it is incredibly shortsighted.,"the moon landing was not a global achievement, it was a global effort. the us didn't build the moon, but the us did build it.",i think the main thing you need to understand about the moon landing is that it wasn't the us. the us was the first country to land on the moon. it was the only country to ever land on a moon. that's what the us did.
2095,"It is literally impossible for /r/enoughpetersonspam to have the 5+ billion subscribers that it claims to have : according to the sidebar in r enoughpetersonspam, there are over 5 billion subscribers to that sub. i think this is extremely unlikely and thus there is something very suspicious going on in that sub. since obviously that number is being tampered with, who knows what other data in that sub is being manipulated? can we trust the upvote count? the comments? even the posts themselves? maybe even it's all bot generated like r subredditsimulator my main evidence for the 5 billion subscriber count being highly unlikely is that the total global internet user count , according to multiple sources is not even over 5 billion. at the end of 2017, there were this many total global internet users: accroding to wikipedia: 4,800,000,000 internet usage according to statista. com: 3,578,000,000 according to internetworldstats: 4,156,000,000 the chance that every single internet user in the world, from wallstreet bankers to african farmers with 2g cellphones, happens to be not only be a reddit user but a subscriber to r enoughpetersonspam is infinitesimal. my second piece of evidence is that every single post and comment in r enoughpetersonspam is in english. if every single internet user was subscribed to r enoughpetersonspam, where are all the posts and comments from these non english speaking users? where are all the chinese posts from the 1. 4 billion chinese subscribers? where are all the indian posts from the 1. 3 billion indian subscribers? not many chinese and indian citizens speak english as a second language so the chance that all those subscribers have the ability and are choosing to communicate there in english is practically nonexistent.",It is literally impossible for /r/enoughpetersonspam to have the 5+ billion subscribers that it claims to have,There is a very unlikely chance that there are over 5 billion subscribers to /r/enoughpetersonspam.,"i don't think it's possible, but i don't see how it's impossible. i think you're missing an important part of the problem. if you're looking for evidence of something, you need to look at r enoughpetersonspam.","i don't think you have a good reason to believe that the 5 billion subscribers would be impossible to prove. i'm not sure how you could prove that it's impossible, but i would like to point out that the vast majority of people who subscribe to r enoughpetersonspam are non english speaking. in fact, i think it's likely that a large portion of those who subscribe don't speak english.",i think you're underestimating the amount of people who subscribe to the subreddit. i'm not sure how many people are subscribed to the sub.
2037,"Children under the age of 5 should NEVER be admitted to any movies, even for children's movies. : there should a minimum age limit for children being admitted to any movies: no under the age of 5 should be admitted, even if accompanied by parents. even to movies to like the incredibles 2. they're simply too young to comprehend movies. to them, it's just moving pictures and noises, and they're either gonna just be restless and be disruptive or ve traumatized and be disruptive. and there's only so much a parent can do to keep them quiet. i understand that parents don't want to leave their younger children home alone, but it's not fair for anyone else to deal with their disruptions. they should either hire a babysitter, or just not go until they're mature enough.","Children under the age of 5 should NEVER be admitted to any movies, even for children's movies.",Children should not be allowed to go to movies unless accompanied by parents.,"there should be a minimum age limit for children being admitted to any movies, even if accompanied by parents.","i don't think you're going to convince anyone that children under the age of 5 should ever be admitted to any movies, even for children's movies.",the age limit for movies is too high for the parents.


### Evaluate generated predictions:

In [33]:
reddit_pred_df = pd.read_pickle('../data/output/reddit_pred_df.pkl')

In [34]:
#collect references
arg_counters = valid_df.groupby('post_id').agg({
    'counter': lambda x: [' '.join(c) for c in x]
}).reset_index()
arg_counters = pd.Series(arg_counters.counter.values, index=arg_counters.post_id).to_dict()

reddit_pred_df['gt_attack'] = reddit_pred_df['gt_attack'].apply(lambda x: str(x))
reddit_pred_df['all_counters'] = reddit_pred_df['post_id'].apply(lambda x: arg_counters[x])

In [36]:
reddit_pred_df = reddit_pred_df[reddit_pred_df.all_counters.map(len) > 0]

In [47]:
masked_conc_eval = evaluate_gen_attacks(reddit_pred_df['masked_conc_attacks'].tolist(), reddit_pred_df['all_counters'].tolist())
known_conc_eval = evaluate_gen_attacks(reddit_pred_df['known_conc_attacks'].tolist(), reddit_pred_df['all_counters'].tolist())
bart_conc_eval = evaluate_gen_attacks(reddit_pred_df['bart_conc_attacks'].tolist(), reddit_pred_df['all_counters'].tolist())
pred_conc_eval = evaluate_gen_attacks(reddit_pred_df['joint_conc_attacks'].tolist(), reddit_pred_df['all_counters'].tolist())



##### Test stance correcteness:

In [21]:
filtered_reddit_pred_df = reddit_pred_df[pd.notna(reddit_pred_df.conclusion_target)]

In [22]:
#compute the stance of the generated counters towards the conclusion target
filtered_reddit_pred_df['masked_conc_stances'] = get_stances(filtered_reddit_pred_df.conclusion_target.tolist(), filtered_reddit_pred_df.masked_conc_attacks.tolist())
filtered_reddit_pred_df['known_conc_stances']  = get_stances(filtered_reddit_pred_df.conclusion_target.tolist(), filtered_reddit_pred_df.known_conc_attacks.tolist())
filtered_reddit_pred_df['bart_conc_stances']   = get_stances(filtered_reddit_pred_df.conclusion_target.tolist(), filtered_reddit_pred_df.bart_conc_attacks.tolist())
filtered_reddit_pred_df['joint_conc_stances']  = get_stances(filtered_reddit_pred_df.conclusion_target.tolist(), filtered_reddit_pred_df.joint_conc_attacks.tolist())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
ProConClient: 100%|██████████| 2172/2172 [00:37<00:00, 57.53it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until

ProConClient:   0%|          | 0/2172 [00:00<?, ?it/s][A
ProConClient: 100%|██████████| 2172/2172 [00:52<00:00, 59.12it/s][A
ProConClient:  46%|████▌     | 1000/2172 [00:17<00:18, 64.15it/s][A
ProConClient: 100%|██████████| 2172/2172 [01:15<00:00, 28.62it/s][A

ProConClient:  92%|█████████▏| 2000/2172 [00:38<00:03, 53

In [23]:
#compute the distance between the conclusion stance and the attack stance: the bigger the distance the better...
masked_conc_stance_score1 = round(np.mean([abs(x[0] - x[1]) for x in zip(filtered_reddit_pred_df.masked_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())]), 2)
known_conc_stance_score1 = round(np.mean([abs(x[0] - x[1]) for x in zip(filtered_reddit_pred_df.known_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())]), 2)
bart_conc_stance_score1 = round(np.mean([abs(x[0] - x[1]) for x in zip(filtered_reddit_pred_df.bart_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())]), 2)
joint_conc_stance_score1 = round(np.mean([abs(x[0] - x[1]) for x in zip(filtered_reddit_pred_df.joint_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())]), 2)

#check if the two stances are contradicotry
masked_conc_stance_score2 = round(sum([int(x[0] * x[1] < 0) for x in zip(filtered_reddit_pred_df.masked_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())])/len(filtered_reddit_pred_df), 2)
known_conc_stance_score2  = round(sum([int(x[0] * x[1] < 0) for x in zip(filtered_reddit_pred_df.known_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())])/len(filtered_reddit_pred_df), 2)
bart_conc_stance_score2   = round(sum([int(x[0] * x[1] < 0) for x in zip(filtered_reddit_pred_df.bart_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())])/len(filtered_reddit_pred_df), 2)
joint_conc_stance_score2  = round(sum([int(x[0] * x[1] < 0) for x in zip(filtered_reddit_pred_df.joint_conc_stances.tolist(), filtered_reddit_pred_df.conclusion_stance.tolist())])/len(filtered_reddit_pred_df), 2)

In [24]:
from tabulate import tabulate

print(tabulate([['Masked Conclusion'] + [round(masked_conc_eval['bleu'], 2), masked_conc_eval['bert-fscore'], masked_conc_stance_score1, masked_conc_stance_score2],
                ['BART Conclusion'] + [round(bart_conc_eval['bleu'], 2), bart_conc_eval['bert-fscore'], bart_conc_stance_score1, bart_conc_stance_score2],
                ['Joint Prediction'] + [round(pred_conc_eval['bleu'], 2), pred_conc_eval['bert-fscore'], joint_conc_stance_score1, joint_conc_stance_score2],
                ['Known Conclusion'] + [round(known_conc_eval['bleu'], 2), known_conc_eval['bert-fscore'], known_conc_stance_score1, known_conc_stance_score2],
    ], headers=['bleu', 'bert-f1score', 'stance-score (diff)', 'stance-score (agreement)']))

                     bleu    bert-f1score    stance-score (diff)    stance-score (agreement)
-----------------  ------  --------------  ---------------------  --------------------------
Masked Conclusion    0.16            0.14                   0.82                        0.55
BART Conclusion      0.18            0.16                   0.85                        0.55
Joint Prediction     0.17            0.16                   0.83                        0.55
Known Conclusion     0.19            0.16                   0.85                        0.55


ProConClient: 100%|██████████| 2172/2172 [00:52<00:00, 57.11it/s]

##### Similarity to conclusion:

In [15]:
masked_conc_eval = evaluate_gen_attacks(reddit_pred_df['conclusion'], reddit_pred_df['masked_conc_attacks'].tolist())
known_conc_eval  = evaluate_gen_attacks(reddit_pred_df['conclusion'], reddit_pred_df['known_conc_attacks'].tolist())
#auto_conc_eval   = evaluate_gen_attacks(reddit_pred_df['conclusion'], reddit_pred_df['auto_conc_attacks'].tolist())
pred_conc_eval   = evaluate_gen_attacks(reddit_pred_df['conclusion'], reddit_pred_df['pred_conc_attacks'].tolist())
gt_attack_conc_eval = evaluate_gen_attacks(reddit_pred_df['conclusion'], reddit_pred_df['gt'].tolist())

In [17]:
from tabulate import tabulate

print(tabulate([['masked'] + [round(masked_conc_eval['bleu'], 2), masked_conc_eval['bert-fscore']],
                #['auto'] + list(auto_conc_eval.values()),
                ['pred'] + [round(pred_conc_eval['bleu'], 2), pred_conc_eval['bert-fscore']],
                ['known'] + [round(known_conc_eval['bleu'], 2), known_conc_eval['bert-fscore']],
                ['gt_attack'] + [round(gt_attack_conc_eval['bleu'], 2), gt_attack_conc_eval['bert-fscore']]
    ], headers=['bleu', 'bert-f1score']))

             bleu    bert-f1score
---------  ------  --------------
masked       0               0.16
pred         0.01            0.18
known        0.03            0.33
gt_attack    0              -0.03
