In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
from tqdm import tqdm
tqdm.pandas()
from glob import glob
import numpy as np
import json

#### Appropriateness Corpus

**Summarization Results**

In [2]:
documents = []
llama_summaries = []
bart_summaries = []

In [3]:
app_df = pd.read_csv('../data/inappropriate_arguments_sample_100_appropriateness.csv')

In [4]:
llama_df = pd.read_csv('../data/results-by-corpus/appropriateness/summarization/llama.csv', sep="\t", header=None)
llama_df.columns = ['id', 'llama_gist']

In [5]:
bart_df = pd.read_csv('../data/results-by-corpus/appropriateness/summarization/bart.csv')

In [6]:
bart_df.count()

issue                 100
argument              100
Reasons               100
word_count            100
supervised_summary    100
dtype: int64

In [7]:
documents = bart_df['argument'].tolist()
llama_summaries = llama_df['llama_gist'].tolist()
bart_summaries = bart_df['supervised_summary'].tolist()

In [8]:
len(documents), len(llama_summaries), len(bart_summaries)

(100, 100, 100)

In [9]:
app_summary_records = []
for doc, llama, bart in zip(documents, llama_summaries, bart_summaries):
    record = {}
    record['document'] = doc
    record['reference'] = " " 
    record['llama_summary'] = llama
    record['bart_summary'] = bart
    app_summary_records.append(record)

In [10]:
len(app_summary_records)

100

In [11]:
import json
with open("../data/results_appropriateness_summarization.jsonl", "w", encoding="utf-8") as outf:
    for record in app_summary_records:
        outf.write(json.dumps(record) + "\n")


**Neutralization Results**

In [12]:
app_df.count()

issue         100
argument      100
Reasons       100
word_count    100
dtype: int64

In [13]:
references = app_df['argument'].tolist()

In [14]:
llam_ndf = pd.read_csv('../data/results-by-corpus/appropriateness/neutralization/llama.csv', sep="\t", header=None)
llam_ndf.columns = ['id', 'llama_neutralization']

In [15]:
llam_ndf_ppo_04a_06ss = pd.read_csv('../data/results-by-corpus/appropriateness/neutralization/llama_ppo_rewrite_appropriateness_llama-7b-harmonic-mean-04a-06ss.csv', sep="\t", header=None)
llam_ndf_ppo_04a_06ss.columns = ['id', 'llama_neutralization']

In [16]:
llam_ndf_ppo_045a_055ss = pd.read_csv('../data/results-by-corpus/appropriateness/neutralization/llama_ppo_rewrite_appropriateness_llama-7b-harmonic-mean-045a-055ss.csv', sep="\t", header=None)
llam_ndf_ppo_045a_055ss.columns = ['id', 'llama_neutralization']

In [17]:
llam_ndf_ppo_05a_05ss = pd.read_csv('../data/results-by-corpus/appropriateness/neutralization/llama_ppo_rewrite_appropriateness_llama-7b-harmonic-mean-05a-05ss.csv', sep="\t", header=None)
llam_ndf_ppo_05a_05ss.columns = ['id', 'llama_neutralization']

In [18]:
llam_ndf_ppo_055a_045ss = pd.read_csv('../data/results-by-corpus/appropriateness/neutralization/llama_ppo_rewrite_appropriateness_llama-7b-harmonic-mean-055a-045ss.csv', sep="\t", header=None)
llam_ndf_ppo_055a_045ss.columns = ['id', 'llama_neutralization']

In [19]:
llam_ndf_ppo_06a_04ss = pd.read_csv('../data/results-by-corpus/appropriateness/neutralization/llama_ppo_rewrite_appropriateness_llama-7b-harmonic-mean-06a-04ss.csv', sep="\t", header=None)
llam_ndf_ppo_06a_04ss.columns = ['id', 'llama_neutralization']

In [20]:
llam_ndf_ppo_10a_00ss = pd.read_csv('../data/results-by-corpus/appropriateness/neutralization/llama_ppo_rewrite_appropriateness_llama-7b-harmonic-mean-10a-00ss.csv', sep="\t", header=None)
llam_ndf_ppo_10a_00ss.columns = ['id', 'llama_neutralization']

In [21]:
llam_ndf.count()

id                      100
llama_neutralization    100
dtype: int64

In [22]:
neutralized = llam_ndf['llama_neutralization'].tolist()
neutralized_ppo_04a_06ss = llam_ndf_ppo_04a_06ss['llama_neutralization'].tolist()
neutralized_ppo_045a_055ss = llam_ndf_ppo_045a_055ss['llama_neutralization'].tolist()
neutralized_ppo_05a_05ss = llam_ndf_ppo_05a_05ss['llama_neutralization'].tolist()
neutralized_ppo_055a_045ss = llam_ndf_ppo_055a_045ss['llama_neutralization'].tolist()
neutralized_ppo_06a_04ss = llam_ndf_ppo_06a_04ss['llama_neutralization'].tolist()
neutralized_ppo_10a_00ss = llam_ndf_ppo_10a_00ss['llama_neutralization'].tolist()

In [23]:
len(references), len(neutralized)

(100, 100)

In [24]:
records = []
for ref, neutral, neutral_ppo_045a_055ss, neutral_ppo_04a_06ss, neutral_ppo_10a_00ss, neutral_ppo_06a_04ss, neutral_ppo_05a_05ss, neutral_ppo_055a_045ss in zip(references, neutralized, neutralized_ppo_045a_055ss, neutralized_ppo_04a_06ss, neutralized_ppo_10a_00ss, neutralized_ppo_06a_04ss, neutralized_ppo_05a_05ss, neutralized_ppo_055a_045ss):
    record = {}
    record['reference'] = ref
    record['neutralized_ppo_040a_060ss'] = neutral_ppo_04a_06ss
    record['neutralized_ppo_045a_055ss'] = neutral_ppo_045a_055ss
    record['neutralized'] = neutral
    record['neutralized_ppo_050a_050ss'] = neutral_ppo_05a_05ss
    record['neutralized_ppo_055a_045ss'] = neutral_ppo_055a_045ss
    record['neutralized_ppo_060a_040ss'] = neutral_ppo_06a_04ss
    record['neutralized_ppo_100a_000ss'] = neutral_ppo_10a_00ss

    records.append(record)

with open("../data/results_appropriateness_neutralization.jsonl", "w", encoding="utf-8") as outf:
    for record in records:
        outf.write(json.dumps(record) + "\n")

**Summarize -> Neutralize (Single prompt)**

In [25]:
app_df = pd.read_csv('../data/inappropriate_arguments_sample_100_appropriateness.csv')

In [26]:
documents = app_df['argument'].tolist()

In [27]:
single_prompt_both_df = pd.read_csv('../data/results-by-corpus/appropriateness/both/llama_extract_rewrite_appropriateness.csv', sep="\t", header=None)
single_prompt_both_df.columns = ['id', 'llama_extract_rewrite']

In [28]:
gist_neutralize_single_prompt_responses = single_prompt_both_df['llama_extract_rewrite'].tolist()

**Summarize -> Neutralize (Individual Prompts)**

In [29]:
two_prompts_df = pd.read_csv('../data/results-by-corpus/appropriateness/both/llama_extract_then_rewrite_appropriateness.csv', sep="\t", header=None)
two_prompts_df.columns = ['id', 'llama_extract_then_rewrite']

In [30]:
gist_neutralize_two_prompts_responses = two_prompts_df['llama_extract_then_rewrite'].tolist()

**Neutralize -> Summarize (Single Prompt)**

In [31]:
swap_order_single_prompt_df = pd.read_csv('../data/results-by-corpus/appropriateness/both/llama_rewrite_extract_appropriateness.csv', sep="\t", header=None)
swap_order_single_prompt_df.columns = ['id', 'llama_rewrite_extract']

In [32]:
neutralize_gist_single_prompt_responses = swap_order_single_prompt_df['llama_rewrite_extract'].tolist()

**Neutralize -> Summarize (Individual Prompts)**

In [33]:
swap_order_two_prompts_df = pd.read_csv('../data/results-by-corpus/appropriateness/both/llama_rewrite_then_extract_appropriateness.csv', sep="\t", header=None)
swap_order_two_prompts_df.columns = ['id', 'llama_rewrite_then_extract']

In [34]:
neutralize_gist_two_prompts_responses = swap_order_two_prompts_df['llama_rewrite_then_extract'].tolist()

In [35]:
len(documents), len(gist_neutralize_single_prompt_responses), len(gist_neutralize_two_prompts_responses), len(neutralize_gist_single_prompt_responses), len(neutralize_gist_two_prompts_responses)

(100, 100, 100, 100, 100)

In [36]:
with open("../data/results_appropriateness_oasis.jsonl", "w", encoding="utf-8") as outf:
    for doc, gist_neut_single, gist_neut_two, neut_gist_single, neut_gist_two in zip(documents, gist_neutralize_single_prompt_responses, gist_neutralize_two_prompts_responses, neutralize_gist_single_prompt_responses, neutralize_gist_two_prompts_responses):
        record = {}
        record['document'] = doc
        record['reference'] = " "
        record['gist_neutralize_single_prompt'] = gist_neut_single
        record['gist_neutralize_two_prompts'] = gist_neut_two
        record['neutralize_gist_single_prompt'] = neut_gist_single
        record['neutralize_gist_two_prompts'] = neut_gist_two
        outf.write(json.dumps(record) + "\n")

#### Args.me Corpus

**Summarization Results**

In [37]:
documents = []
argsme_summaries = []
llama_summaries = []
bart_summaries = []

In [38]:
args_df = pd.read_csv("../data/inappropriate_arguments_sample_100_argsme.csv")

In [39]:
args_df.count()

query            99
argument         99
stance           99
snippet          99
id               99
prediction       99
appropriate      99
inappropriate    99
dtype: int64

In [40]:
documents = args_df['argument'].tolist()
args_snippets = args_df['snippet'].tolist()

In [41]:
len(documents), len(args_snippets)

(99, 99)

In [42]:
llama_df = pd.read_csv("../data/results-by-corpus/argsme/summarization/llama.csv", sep="\t", header=None)
llama_df.columns = ['id', 'llama_gist']

In [43]:
llama_df.count()

id            99
llama_gist    99
dtype: int64

In [44]:
llama_summaries = llama_df['llama_gist'].tolist()

In [45]:
bart_df = pd.read_csv("../data/results-by-corpus/argsme/summarization/bart.csv")

In [46]:
bart_df.count()

Unnamed: 0       99
query            99
argument         99
stance           99
snippet          99
id               99
prediction       99
appropriate      99
inappropriate    99
word_count       99
bart_gist        99
dtype: int64

In [47]:
bart_summaries = bart_df['bart_gist'].tolist()

In [48]:
len(documents), len(args_snippets), len(llama_summaries), len(bart_summaries)

(99, 99, 99, 99)

In [49]:
with open("../data/results_argsme_summarization.jsonl", "w", encoding="utf-8") as outf:
    for doc, snippet, llama, bart in zip(documents, args_snippets, llama_summaries, bart_summaries):
        record = {}
        record['document'] = doc
        record['reference'] = snippet
        record['llama_summary'] = llama
        record['bart_summary'] = bart
        outf.write(json.dumps(record) + "\n")

**Neutralization Results**

In [50]:
args_df = pd.read_csv("../data/inappropriate_arguments_sample_100_argsme.csv")

In [51]:
args_df.count()

query            99
argument         99
stance           99
snippet          99
id               99
prediction       99
appropriate      99
inappropriate    99
dtype: int64

In [52]:
references = args_df['argument'].tolist()

In [53]:
len(references)

99

In [54]:
llam_ndf = pd.read_csv('../data/results-by-corpus/argsme/neutralization/llama.csv', sep="\t", header=None)
llam_ndf.columns = ['id', 'llama_neutralization']

In [55]:
neutralized = llam_ndf['llama_neutralization'].tolist()

In [56]:
len(references), len(neutralized)

(99, 99)

In [57]:
with open("../data/results_argsme_neutralization.jsonl", "w", encoding="utf-8") as outf:
    for ref, neutral in zip(references, neutralized):
        record = {}
        record['reference'] = ref
        record['neutralized'] = neutral
        outf.write(json.dumps(record) + "\n")

**Summarize -> Neutralize (Single Prompt)**

In [58]:
args_df = pd.read_csv("../data/inappropriate_arguments_sample_100_argsme.csv")
documents = args_df['argument'].tolist()

In [59]:
single_prompt_both_df = pd.read_csv('../data/results-by-corpus/argsme/both/llama_extract_rewrite_argsme.csv', sep="\t", header=None)
single_prompt_both_df.columns = ['id', 'llama_extract_rewrite']

In [60]:
gist_neutralize_single_prompt_responses = single_prompt_both_df['llama_extract_rewrite'].tolist()

**Summarize -> Neutralize (Individual Prompts)**

In [61]:
two_prompts_df = pd.read_csv('../data/results-by-corpus/argsme/both/llama_extract_then_rewrite_argsme.csv', sep="\t", header=None)
two_prompts_df.columns = ['id', 'llama_extract_then_rewrite']

In [62]:
gist_neutralize_two_prompts_responses = two_prompts_df['llama_extract_then_rewrite'].tolist()

**Neutralize -> Summarize (Single Prompt)**

In [63]:
swap_order_single_prompt_df = pd.read_csv('../data/results-by-corpus/argsme/both/llama_rewrite_extract_argsme.csv', sep="\t", header=None)
swap_order_single_prompt_df.columns = ['id', 'llama_rewrite_extract']

In [64]:
neutralize_gist_single_prompt_responses = swap_order_single_prompt_df['llama_rewrite_extract'].tolist()

**Neutralize -> Summarize (Individual Prompts)**

In [65]:
swap_order_two_prompts_df = pd.read_csv('../data/results-by-corpus/argsme/both/llama_rewrite_then_extract_argsme.csv', sep="\t", header=None)
swap_order_two_prompts_df.columns = ['id', 'llama_rewrite_then_extract']

In [66]:
neutralize_gist_two_prompts_responses = swap_order_two_prompts_df['llama_rewrite_then_extract'].tolist()

In [67]:
with open("../data/results_argsme_oasis.jsonl", "w", encoding="utf-8") as outf:
    for doc, gist_neut_single, gist_neut_two, neut_gist_single, neut_gist_two in zip(documents, gist_neutralize_single_prompt_responses, gist_neutralize_two_prompts_responses, neutralize_gist_single_prompt_responses, neutralize_gist_two_prompts_responses):
        record = {}
        record['document'] = doc
        record['reference'] = " "
        record['gist_neutralize_single_prompt'] = gist_neut_single
        record['gist_neutralize_two_prompts'] = gist_neut_two
        record['neutralize_gist_single_prompt'] = neut_gist_single
        record['neutralize_gist_two_prompts'] = neut_gist_two
        outf.write(json.dumps(record) + "\n")

Args.me: Gist and Neutralize vs Neutralize and Gist

In [68]:
n_g_df = pd.read_csv('../data/results-by-corpus/argsme/both/neutralized_and_bart_summarized.csv')

In [69]:
# drop unnamed column
n_g_df = n_g_df.drop(n_g_df.columns[0], axis=1)

In [70]:
n_g_df.head()

Unnamed: 0,id,neutralized_argument,word_count,bart_gist
0,0,"Climate change is a complex and multifaceted issue, and the causes and effects of it are still being studied and debated. While carbon dioxide exhalation and car emissions do contribute to climate change, the overall effect is still significant. It is important to consider both natural and man-made sources of carbon dioxide when evaluating the effects of climate change. In addition, the evidence suggests that climate change is occurring and that human activities are the major cause. The sources provided give an interesting overview of the topic, but the conclusions drawn should be carefully considered as the accuracy and reliability of the sources should be considered as well.",109,"Climate change is a complex and multifaceted issue, and the causes and effects of it are still being studied and debated. While carbon dioxide exhalation and car emissions do"
1,1,The consensus that humans are causing recent global warming is shared by 90-100% of publishing climate scientists according to six independent studies by co-authors of this paper. Those results are consistent with the 97% consensus reported by Cook et al. (2013). This consensus strongly supports the view that humans are causing climate change and that the impacts of climate change could be severe. It is thus essential that we take immediate action to reduce emissions and mitigate the impacts of climate change.,84,The consensus that humans are causing recent global warming is shared by 90-100% of publishing climate scientists. This consensus strongly supports the
2,2,"My position on climate change is that it is undeniable and largely caused by human activities. According to numerous scientific studies, 97% of climate scientists agree that climate change is occurring and is caused by human-induced emissions. Additionally, the arguments presented by climate change deniers are usually not based on facts, but on personal beliefs. It is clear that the climate is changing and it is important for all of us to take action to reduce the impact of our activities on the environment. Sources: Shaftel, H. (Ed.). (2012, January 5). Global Climate Change: Consensus. Retrieved February 3, 2015, from http://climate.nasa.gov...",105,"The arguments presented by climate change deniers are usually not based on facts, but on personal beliefs. 97% of climate scientists agree that climate change is occurring and is"
3,3,"There is an overwhelming scientific consensus that climate change presents a serious risk to the future of the Earth and to mankind. Many of the threats we face due to climate change are insidious and while with others the connection may not be noticed by the deniers. But they are real and serious. Among these are the spread of potentially lethal diseases such as that caused by the Zika virus. Other diseases formerly confined to the tropics will spread across North America and Europe. It is clear that climate change poses a real risk, and this risk is being exacerbated by human activity. There are a limited number of circumstances under which free speech can be restricted. One of them is child pornography. The reason that can be banned with violating the 1st Amendment is the harm caused to children in the production of child porn is of such magnitude that it outweighs the importance of the right to freedom of expression. The spread of climate change-induced diseases and conflict may indeed be a real possibility in the future, and measures should be taken to minimize this risk. Though it is important to be aware of the potential risks, it is also essential to ensure that balanced and accurate information is made available in order to properly inform people of the risks and ensure that the most appropriate actions are taken. It is important to ensure that reliable information is available to allow people to make informed decisions.",249,There is an overwhelming scientific consensus that climate change presents a serious risk to the future of the Earth and to mankind. There are a limited number of circumstances under which free speech can be restricted. The reason that can be banned with violating the 1st Amendment is the harm caused to children in the production of child porn is of such magnitude.
4,4,"I'll accept this challenge. First, I should ask to be clear: When you say ""climate change is a fraud,"" are you referring specifically to anthropogenic climate change or all climate change? If it is the former, then I believe the evidence suggests that the globe is, in fact, warming at an unprecedented rate and that the majority of scientists are correct in their assessments. However, if you don't believe that climate change is happening at an unprecedented rate, then I am happy to go into the evidence for it. This debate does require a larger burden of proof as it does involve a complex topic with multiple layers of evidence. It is important to take both sides of the argument into consideration in order to draw a conclusive conclusion. I am willing to take the time to explain the evidence and to ensure that both sides are presented fairly. It would be beneficial for us to do so in order to fully understand the complexity of the issue. Therefore, I am willing to make the time commitment in order to reach a satisfactory conclusion. I look forward to engaging in this debate and to learning more about the topic.",201,"""I'll accept this challenge. If you don't believe that climate change is happening at an unprecedented rate, then I am happy to go into the evidence for it,"" he says. ""This debate does require a larger burden of proof as it does involve a complex topic"""


In [71]:
g_n_df = pd.read_csv('../data/results-by-corpus/argsme/both/bart_summarized_and_neutralized.csv', sep="\\t", header=None)

  g_n_df = pd.read_csv('../data/results-by-corpus/argsme/both/bart_summarized_and_neutralized.csv', sep="\\t", header=None)


In [72]:
g_n_df.head()

Unnamed: 0,0,1
0,0,"Climate change is an urgent issue that requires our attention, as the impacts of global warming are becoming more and more evident."
1,1,"There is a scientific consensus on the reality of anthropogenic climate change. Human-induced climate change has already caused numerous deaths due to extreme weather events and health conditions exacerbated by heat waves. According to the World Health Organization, climate change is estimated to claim over 150,000 lives annually. Diseases caused by climate fluctuations are becoming more common, from cardiovascular mortality and respiratory illnesses due to heat waves."
2,2,My position on climate change is that I believe it is occurring and is caused in part by human activities.
3,3,"There is an overwhelming scientific consensus that climate change is primarily caused by human activities. The potential risks to children in connection with climate change are so serious that traditional measures of combatting bad speech may not be adequate or sufficient. Teenagers may be especially affected by the adverse effects of climate change, as they may be more vulnerable to the risks associated with it due to their age."
4,4,"Given the amount of evidence indicating that climate change is occurring, it is likely that human activities are contributing to it. While this phenomenon is still debated among scientists, the consensus is that climate change is occurring and caused in part by human activities."


In [73]:
g_n_df[0][17]

17

In [74]:
len(g_n_df)

99

In [75]:
g_n_df.columns = ['id', 'bart_summarized_and_neutralized']

In [76]:
g_n_df.head()

Unnamed: 0,id,bart_summarized_and_neutralized
0,0,"Climate change is an urgent issue that requires our attention, as the impacts of global warming are becoming more and more evident."
1,1,"There is a scientific consensus on the reality of anthropogenic climate change. Human-induced climate change has already caused numerous deaths due to extreme weather events and health conditions exacerbated by heat waves. According to the World Health Organization, climate change is estimated to claim over 150,000 lives annually. Diseases caused by climate fluctuations are becoming more common, from cardiovascular mortality and respiratory illnesses due to heat waves."
2,2,My position on climate change is that I believe it is occurring and is caused in part by human activities.
3,3,"There is an overwhelming scientific consensus that climate change is primarily caused by human activities. The potential risks to children in connection with climate change are so serious that traditional measures of combatting bad speech may not be adequate or sufficient. Teenagers may be especially affected by the adverse effects of climate change, as they may be more vulnerable to the risks associated with it due to their age."
4,4,"Given the amount of evidence indicating that climate change is occurring, it is likely that human activities are contributing to it. While this phenomenon is still debated among scientists, the consensus is that climate change is occurring and caused in part by human activities."


In [77]:
# rename bart_gist to neutralized_and_bart_summarized
n_g_df = n_g_df.rename(columns={'bart_gist': 'neutralized_and_bart_summarized'})

In [78]:
n_g_df.head()

Unnamed: 0,id,neutralized_argument,word_count,neutralized_and_bart_summarized
0,0,"Climate change is a complex and multifaceted issue, and the causes and effects of it are still being studied and debated. While carbon dioxide exhalation and car emissions do contribute to climate change, the overall effect is still significant. It is important to consider both natural and man-made sources of carbon dioxide when evaluating the effects of climate change. In addition, the evidence suggests that climate change is occurring and that human activities are the major cause. The sources provided give an interesting overview of the topic, but the conclusions drawn should be carefully considered as the accuracy and reliability of the sources should be considered as well.",109,"Climate change is a complex and multifaceted issue, and the causes and effects of it are still being studied and debated. While carbon dioxide exhalation and car emissions do"
1,1,The consensus that humans are causing recent global warming is shared by 90-100% of publishing climate scientists according to six independent studies by co-authors of this paper. Those results are consistent with the 97% consensus reported by Cook et al. (2013). This consensus strongly supports the view that humans are causing climate change and that the impacts of climate change could be severe. It is thus essential that we take immediate action to reduce emissions and mitigate the impacts of climate change.,84,The consensus that humans are causing recent global warming is shared by 90-100% of publishing climate scientists. This consensus strongly supports the
2,2,"My position on climate change is that it is undeniable and largely caused by human activities. According to numerous scientific studies, 97% of climate scientists agree that climate change is occurring and is caused by human-induced emissions. Additionally, the arguments presented by climate change deniers are usually not based on facts, but on personal beliefs. It is clear that the climate is changing and it is important for all of us to take action to reduce the impact of our activities on the environment. Sources: Shaftel, H. (Ed.). (2012, January 5). Global Climate Change: Consensus. Retrieved February 3, 2015, from http://climate.nasa.gov...",105,"The arguments presented by climate change deniers are usually not based on facts, but on personal beliefs. 97% of climate scientists agree that climate change is occurring and is"
3,3,"There is an overwhelming scientific consensus that climate change presents a serious risk to the future of the Earth and to mankind. Many of the threats we face due to climate change are insidious and while with others the connection may not be noticed by the deniers. But they are real and serious. Among these are the spread of potentially lethal diseases such as that caused by the Zika virus. Other diseases formerly confined to the tropics will spread across North America and Europe. It is clear that climate change poses a real risk, and this risk is being exacerbated by human activity. There are a limited number of circumstances under which free speech can be restricted. One of them is child pornography. The reason that can be banned with violating the 1st Amendment is the harm caused to children in the production of child porn is of such magnitude that it outweighs the importance of the right to freedom of expression. The spread of climate change-induced diseases and conflict may indeed be a real possibility in the future, and measures should be taken to minimize this risk. Though it is important to be aware of the potential risks, it is also essential to ensure that balanced and accurate information is made available in order to properly inform people of the risks and ensure that the most appropriate actions are taken. It is important to ensure that reliable information is available to allow people to make informed decisions.",249,There is an overwhelming scientific consensus that climate change presents a serious risk to the future of the Earth and to mankind. There are a limited number of circumstances under which free speech can be restricted. The reason that can be banned with violating the 1st Amendment is the harm caused to children in the production of child porn is of such magnitude.
4,4,"I'll accept this challenge. First, I should ask to be clear: When you say ""climate change is a fraud,"" are you referring specifically to anthropogenic climate change or all climate change? If it is the former, then I believe the evidence suggests that the globe is, in fact, warming at an unprecedented rate and that the majority of scientists are correct in their assessments. However, if you don't believe that climate change is happening at an unprecedented rate, then I am happy to go into the evidence for it. This debate does require a larger burden of proof as it does involve a complex topic with multiple layers of evidence. It is important to take both sides of the argument into consideration in order to draw a conclusive conclusion. I am willing to take the time to explain the evidence and to ensure that both sides are presented fairly. It would be beneficial for us to do so in order to fully understand the complexity of the issue. Therefore, I am willing to make the time commitment in order to reach a satisfactory conclusion. I look forward to engaging in this debate and to learning more about the topic.",201,"""I'll accept this challenge. If you don't believe that climate change is happening at an unprecedented rate, then I am happy to go into the evidence for it,"" he says. ""This debate does require a larger burden of proof as it does involve a complex topic"""


In [79]:
neutralized_summarized = n_g_df['neutralized_and_bart_summarized'].tolist()
summarized_neutralized = g_n_df['bart_summarized_and_neutralized'].tolist()
docs = n_g_df['neutralized_argument'].tolist()

In [80]:
np.mean([len(x) for x in docs])

914.020202020202

In [81]:
np.mean([len(x) for x in documents])

1474.4444444444443

In [82]:
with open('../data/visualization-files/args_neutralization_length.jsonl','w', encoding='utf-8') as outf:
    for d, n in zip(documents, docs):
        record = {}
        record['document'] = d
        record['reference'] = n
        outf.write(json.dumps(record) + "\n")

In [83]:
with open('../data/visualization-files/args_ordering_comparison.jsonl','w', encoding='utf-8') as outf:
    for d, n_s, s_n in zip(docs, neutralized_summarized, summarized_neutralized):
        record = {}
        record['document'] = d
        record['reference'] = s_n # reference is the neutralized version of the bart summary
        record['neutralized_summarized'] = n_s
        outf.write(json.dumps(record) + "\n")

In [84]:
np.mean([len(x.split(" ")) for x in summarized_neutralized])

66.03030303030303

In [85]:
np.mean([len(x.split(" ")) for x in bart_summaries])

63.17171717171717

In [86]:
with open('../data/visualization-files/args_neutralization_of_summaries.jsonl','w', encoding='utf-8') as outf:
    for d, n_s, s_n in zip(documents, summarized_neutralized, bart_summaries):
        record = {}
        record['document'] = d
        record['reference'] = s_n # reference is the neutralized version of the bart summary
        record['summarized_neutralized'] = n_s
        outf.write(json.dumps(record) + "\n")

### Analyze annotator comments

In [2]:
gist_eval_df = pd.read_csv('../data/snippets_study_results.csv')

In [3]:
neutralization_eval_df = pd.read_csv('../data/neutralization_study_results.csv')

In [4]:
search_eval_df = pd.read_csv('../data/search_study_results.csv')

In [5]:
gist_comments = gist_eval_df['comments'].tolist()
neutralization_comments = neutralization_eval_df['comments'].tolist()
search_comments = search_eval_df['comments'].tolist()

In [6]:
len(gist_comments), len(neutralization_comments), len(search_comments)

(495, 528, 410)

In [11]:
# remove empty comments from each list
gist_comments = [x for x in gist_comments if str(x) != 'nan']
neutralization_comments = [x for x in neutralization_comments if str(x) != 'nan']
search_comments = [x for x in search_comments if str(x) != 'nan']

In [12]:
len(gist_comments), len(neutralization_comments), len(search_comments)

(56, 219, 242)

In [13]:
# write each comment to a csv file
with open('../data/gist_comments.txt','w', encoding='utf-8') as outf:
    for c in gist_comments:
        outf.write(c + "\n")

with open('../data/neutralization_comments.txt','w', encoding='utf-8') as outf:
    for c in neutralization_comments:
        outf.write(c + "\n")

with open('../data/search_comments.txt','w', encoding='utf-8') as outf:
    for c in search_comments:
        outf.write(c + "\n")