In [35]:
import sys
sys.path.append('../src-py/')
from tqdm import tqdm
tqdm.pandas()
import pandas as pd
pd.set_option('display.max_colwidth', None)

In [13]:
from flair.data import Corpus, Sentence
from flair.datasets import ColumnCorpus    
from flair.embeddings import TransformerWordEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

In [8]:
data_folder = '../../../data-ceph/arguana/arg-generation/claim-target-tagger/data/ibm_ds/'
model_folder = '../../../data-ceph/arguana/arg-generation/claim-target-tagger/model'

### Train a Target tagger on IBM dataset:

In [6]:
columns = {0: 'text', 1: 'pos', 2: 'ct'}
# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.tsv',
                              test_file='test.tsv')

2022-01-07 14:48:21,655 Reading data from ../../../data-ceph/arguana/arg-generation/claim-target-tagger/data/ibm_ds
2022-01-07 14:48:21,656 Train: ../../../data-ceph/arguana/arg-generation/claim-target-tagger/data/ibm_ds/train.tsv
2022-01-07 14:48:21,657 Dev: None
2022-01-07 14:48:21,658 Test: ../../../data-ceph/arguana/arg-generation/claim-target-tagger/data/ibm_ds/test.tsv


In [11]:
label_type = 'ct'

label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

# 4. initialize fine-tuneable transformer embeddings WITH document context
embeddings = TransformerWordEmbeddings(model='xlm-roberta-large',
                                       layers="-1",
                                       subtoken_pooling="first",
                                       fine_tune=True,
                                       use_context=True,
                                       )

# 5. initialize bare-bones sequence tagger (no CRF, no RNN, no reprojection)
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type='ct',
                        use_crf=False,
                        use_rnn=False,
                        reproject_embeddings=False,
                        )

# 6. initialize trainer
trainer = ModelTrainer(tagger, corpus)

# 7. run fine-tuning
trainer.fine_tune(model_folder,
                  learning_rate=5.0e-6,
                  mini_batch_size=4,
                  #mini_batch_chunk_size=1,  # remove this parameter to speed up computation if you have a big GPU
                  )

2022-01-07 15:01:45,191 Computing label dictionary. Progress:


100%|██████████| 877/877 [00:00<00:00, 19214.40it/s]

2022-01-07 15:01:45,240 Corpus contains the labels: pos (#11355), ct (#11355)
2022-01-07 15:01:45,240 Created (for label 'ct') Dictionary with 4 tags: <unk>, O, B-CT, I-CT
Dictionary with 4 tags: <unk>, O, B-CT, I-CT





2022-01-07 15:01:54,759 ----------------------------------------------------------------------------------------------------
2022-01-07 15:01:54,791 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): XLMRobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(250002, 1024, padding_idx=1)
        (position_embeddings): Embedding(514, 1024, padding_idx=1)
        (token_type_embeddings): Embedding(1, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0): RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSelfAttention(
                (query): Linear(in_features=1024, out_features=1024, bias=True)
                (key): Linear(in_features=1024, out_features=1024, bias=True)
                (value): Linear(in_features=1024, out_feature

{'test_score': 0.7769347496206374,
 'dev_score_history': [0.5333333333333333,
  0.7452830188679246,
  0.7902439024390244,
  0.8350515463917525,
  0.8775510204081632,
  0.8730964467005077,
  0.8820512820512821,
  0.8832487309644671,
  0.8787878787878788,
  0.8820512820512821],
 'train_loss_history': [1.0426235850715049,
  0.42617399000822315,
  0.29372392421475935,
  0.2260485619470602,
  0.20374770659286134,
  0.18255921371144995,
  0.14026931725029088,
  0.14063569918816818,
  0.11979448386688905,
  0.10413014359435109],
 'dev_loss_history': [tensor(0.3534, device='cuda:0'),
  tensor(0.1715, device='cuda:0'),
  tensor(0.1730, device='cuda:0'),
  tensor(0.2387, device='cuda:0'),
  tensor(0.2016, device='cuda:0'),
  tensor(0.2484, device='cuda:0'),
  tensor(0.2894, device='cuda:0'),
  tensor(0.2741, device='cuda:0'),
  tensor(0.2803, device='cuda:0'),
  tensor(0.2976, device='cuda:0')]}

### Extract targets from Reddit conclusions:

In [12]:
from flair.models import SequenceTagger
from flair.tokenization import SegtokSentenceSplitter

In [29]:
def extract_targets(claims):
    sentences = [Sentence(x) for x in claims]
    # predict tags for sentences
    model = SequenceTagger.load(model_folder+'/final-model.pt')
    model.predict(sentences)

    # iterate through sentences and print predicted labels
    targets = []
    for sentence in sentences:
        target_spans = sorted([(s.text, s.score) for s in sentence.get_spans('ct')], key=lambda x: -x[1])
        print(target_spans)
        if len(target_spans) > 0:
            targets.append(target_spans[0][0])
        else:
            targets.append(None)
        
    return targets

In [37]:
dev_df = pd.read_pickle('../../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/valid_conclusion_comp_remove_75sem_perc.pkl')

In [43]:
unique_conclusions = dev_df.title.unique().tolist()
unique_conclusions_targets = extract_targets(unique_conclusions)
conc_to_targets = {x[0]: x[1] for x in zip(unique_conclusions, unique_conclusions_targets)}

In [49]:
unique_conclusions_targets.count(None)

122

In [64]:
dev_df['conclusion_targets'] = dev_df.title.apply(lambda x: conc_to_targets[x])

In [48]:
dev_df.head()

Unnamed: 0,post_id,split,comment_id,title,post,n_sentences,counter,num_cand_conc,masked_premises,premises_with_conclusion,conclusion_targers
295914,t3_7uek1b,val,t1_dtjoww6,Pink is just light red and doesn't deserve it's own name.,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]",8,"[what's your opinion on scarlet?, brick?, salmon?, rose?, wine?]",1,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]","[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name., Therefore, Pink is just light red and doesn't deserve it's own name.]",Pink
295915,t3_7uek1b,val,t1_dtjphvq,Pink is just light red and doesn't deserve it's own name.,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]",8,"[pink is a very specific, commonly used and well recognized shade of light red that is iconic and often associated with gender roles, etc., when you say 'pink' i instantly know and see the color in my head., it's a great description., but light red?, that could mean many different colors and shades., so in short this is a downgrade., you're forgetting pink isn't 'burnt umber' or some color used only by painters., its rather a very, very specific and important hue that is iconic and even is associated with causes e. g., breast cancer awareness , etc.]",1,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]","[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name., Therefore, Pink is just light red and doesn't deserve it's own name.]",Pink
295916,t3_7uek1b,val,t1_dtjqozq,Pink is just light red and doesn't deserve it's own name.,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]",8,"[u madethisforoneremark op has awarded 1 delta in this post., all comments that earned deltas from op or other users are listed here r deltalog comments 7uesvt , in r deltalog., please note that a change of view doesn't necessarily mean a reversal or that the conversation has ended., delta system explained deltaboards]",1,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]","[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name., Therefore, Pink is just light red and doesn't deserve it's own name.]",Pink
295917,t3_7uek1b,val,t1_dtju7o9,Pink is just light red and doesn't deserve it's own name.,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]",8,"[optics physicist here., this is actually way deeper., pink isn t s real color at all., pink is one of the few illusion colors like purple., they are harmonics think about the spectrum rainbow ., roygbiv., neither purple nor pink is present., violet is actually a different color than purple., see we have something that is a wavelength between the wavelength of red and yellow orange., but when you mix red and blue the wavelength in between is yellow not violet., so why do we see purple?, see my eli5 for more: we 'see' violet because of harmonics., we don't have a violet color receptor; just red blue and green., there is a sensitivity in the red cone that makes it activate a tiny bit from violet light., thus is essentially a harmony like in music because the wavelength is almost doubled., notes have the same similar sound to their harmonic partners., because this is similar to a red mixed with a blue purple our brains use the same sensation to represent them., in reality, they are as different as yellow and indigo., pink is similar]",1,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]","[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name., Therefore, Pink is just light red and doesn't deserve it's own name.]",Pink
295918,t3_7uek1b,val,t1_dtk8pdf,Pink is just light red and doesn't deserve it's own name.,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]",8,"[pink is:hex triplet ffc0cbsrgbb r, g, b 255, 192, 203 source: you can see there is quite a bit of blue and green component to pink apart from red.]",1,"[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name.]","[i understand the need for more precise naming when you're doing something artsy., what i'm focusing on is how we describe colors in everyday life., pink is just a light shade of the color red., we already have a ton of precedence for just calling something a light or dark color., when people see a light or dark blue object, they just call it light blue or dark blue., when the see a light or dark green thing, they call it light green or dark green., for some reason, when we see something that is light red we call it pink., i don't see any rhyme or reason to this distinction and i don't think pink deserves it's name., Therefore, Pink is just light red and doesn't deserve it's own name.]",Pink


In [50]:
dev_df.to_pickle('../../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/valid_conclusion_comp_remove_75sem_perc_with_targets.pkl')

### Compute the stance of the conclusion towards its target:

In [52]:
from debater_python_api.api.debater_api import DebaterApi
from debater_python_api.api.sentence_level_index.client.sentence_query_base import SimpleQuery
from debater_python_api.api.sentence_level_index.client.sentence_query_request import SentenceQueryRequest
from debater_python_api.api.clients.narrative_generation_client import Polarity

debater_api = DebaterApi('0abeffa5335cc942fc7c43e75d41fe33L05')
pro_con_client = debater_api.get_pro_con_client()

In [54]:
conc_to_targets

{" Pink is just light red and doesn't deserve it's own name.": 'Pink',
 ' Elon Musk should not be selling flamethrowers as if they were toys.': 'Elon Musk',
 ' The Commandment "Thou Shalt Not Kill" is not hypocritical when reviewing the actions of Old Testament God; it\'s just poorly translated.': 'The Commandment " Thou Shalt Not Kill "',
 ' You can be feminist and still believe there are, on the whole, differences between men and women': None,
 ' Go is a better board game than chess for casual players': 'Go',
 ' I dislike Led Zeppelin': 'Led Zeppelin',
 ' Life in a post-apocalyptic world is pointless. Suicide would be the practical option.': 'Life in a post-apocalyptic world',
 'If Robert Muller gets fired, taking to the streets to protest like many people are planning to do will accomplish absolutely nothing.': 'taking to the streets to protest like many people are planning to do',
 " Consent either doesn't matter or is always legitimate.": 'Consent',
 ' Concussions in the NFL are j

In [57]:
sentence_topic_dicts = [{'sentence' : x[0], 'topic' : x[1] if x[1] != None else x[0] } for x in conc_to_targets.items()]
scores = pro_con_client.run(sentence_topic_dicts)

ProConClient: 100%|██████████| 2528/2528 [00:59<00:00, 59.07it/s]

In [59]:
conc_to_scores = {x[0] : x[1] for x in zip(unique_conclusions, scores)}

In [60]:
dev_df['conclusion_stance'] = dev_df.title.apply(lambda x: conc_to_scores[x])

In [66]:
dev_df[['title', 'conclusion_targets', 'conclusion_stance']].sample(10).head()

Unnamed: 0,title,conclusion_targets,conclusion_stance
406495,I think gender roles are a good thing because they give you a sense of identity,gender roles,0.995024
373542,Throw pillows are a useless waste of space,Throw pillows,-0.9981
390606,"Drug education for children needs to be reformed, and at it's current state is dangerous and unethical",Drug education for children,-0.669331
304576,"Based on all information available, Communism on a large scale should stop being attempted.",Communism on a large scale,-0.99383
369195,The wage gap isn't about gender,The wage,0.008727


In [73]:
dev_df.to_pickle('../../../data-ceph/arguana/arg-generation/multi-taks-counter-argument-generation/reddit_data/conclusion_and_ca_generation/valid_conclusion_comp_remove_75sem_perc_with_targets.pkl')