In [250]:
import torch
import numpy as np
import pandas as pd
from transformers import BertModel

In [251]:
train_data = pd.read_csv('arguments-training.tsv', sep="\t")
train_data

Unnamed: 0,Argument ID,Conclusion,Stance,Premise
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...
1,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...
2,A01003,We should abandon marriage,against,marriage is the ultimate commitment to someone...
3,A01004,We should ban naturopathy,against,it provides a useful income for some people
4,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...
...,...,...,...,...
5215,D27096,Nepotism exists in Bollywood,against,Star kids also have an upbringing which is sur...
5216,D27097,Nepotism exists in Bollywood,in favor of,Movie stars of Bollywood often launch their ch...
5217,D27098,India is safe for women,in favor of,Evil historic practices on women in the pre an...
5218,D27099,India is safe for women,in favor of,Women of our country have been and are achievi...


In [252]:
train_data['combined1'] = train_data.Conclusion.str.cat(train_data.Stance, sep=' [SEP] ')
train_data

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,combined1
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...,Entrapment should be legalized [SEP] in favor of
1,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,We should ban human cloning [SEP] in favor of
2,A01003,We should abandon marriage,against,marriage is the ultimate commitment to someone...,We should abandon marriage [SEP] against
3,A01004,We should ban naturopathy,against,it provides a useful income for some people,We should ban naturopathy [SEP] against
4,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,We should ban fast food [SEP] in favor of
...,...,...,...,...,...
5215,D27096,Nepotism exists in Bollywood,against,Star kids also have an upbringing which is sur...,Nepotism exists in Bollywood [SEP] against
5216,D27097,Nepotism exists in Bollywood,in favor of,Movie stars of Bollywood often launch their ch...,Nepotism exists in Bollywood [SEP] in favor of
5217,D27098,India is safe for women,in favor of,Evil historic practices on women in the pre an...,India is safe for women [SEP] in favor of
5218,D27099,India is safe for women,in favor of,Women of our country have been and are achievi...,India is safe for women [SEP] in favor of


In [253]:
train_data['combined2'] = train_data.combined1.str.cat(train_data.Premise, sep=' [SEP] ')
train_data

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,combined1,combined2
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...,Entrapment should be legalized [SEP] in favor of,Entrapment should be legalized [SEP] in favor ...
1,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,We should ban human cloning [SEP] in favor of,We should ban human cloning [SEP] in favor of ...
2,A01003,We should abandon marriage,against,marriage is the ultimate commitment to someone...,We should abandon marriage [SEP] against,We should abandon marriage [SEP] against [SEP]...
3,A01004,We should ban naturopathy,against,it provides a useful income for some people,We should ban naturopathy [SEP] against,We should ban naturopathy [SEP] against [SEP] ...
4,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,We should ban fast food [SEP] in favor of,We should ban fast food [SEP] in favor of [SEP...
...,...,...,...,...,...,...
5215,D27096,Nepotism exists in Bollywood,against,Star kids also have an upbringing which is sur...,Nepotism exists in Bollywood [SEP] against,Nepotism exists in Bollywood [SEP] against [SE...
5216,D27097,Nepotism exists in Bollywood,in favor of,Movie stars of Bollywood often launch their ch...,Nepotism exists in Bollywood [SEP] in favor of,Nepotism exists in Bollywood [SEP] in favor of...
5217,D27098,India is safe for women,in favor of,Evil historic practices on women in the pre an...,India is safe for women [SEP] in favor of,India is safe for women [SEP] in favor of [SEP...
5218,D27099,India is safe for women,in favor of,Women of our country have been and are achievi...,India is safe for women [SEP] in favor of,India is safe for women [SEP] in favor of [SEP...


In [254]:
combined = train_data.combined2.values.tolist()

In [255]:
train_labels = pd.read_csv('labels-training.tsv', sep="\t")
train_labels

Unnamed: 0,Argument ID,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,...,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
0,A01001,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,A01002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,A01003,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,A01004,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,A01005,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5215,D27096,1,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
5216,D27097,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
5217,D27098,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5218,D27099,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [256]:
train_labels = train_labels.drop(columns=['Argument ID'])
train_labels

Unnamed: 0,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,Security: societal,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5215,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
5216,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
5217,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
5218,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1


In [257]:
label_list = train_labels.values.tolist()

In [258]:
val_data = pd.read_csv('arguments-validation.tsv', sep="\t")
val_data

Unnamed: 0,Argument ID,Conclusion,Stance,Premise
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...
1,A01012,The use of public defenders should be mandatory,in favor of,the use of public defenders should be mandator...
2,A02001,Payday loans should be banned,in favor of,payday loans create a more impoverished societ...
3,A02002,Surrogacy should be banned,against,Surrogacy should not be banned as it is the wo...
4,A02009,Entrapment should be legalized,against,entrapment is gravely immoral and against huma...
...,...,...,...,...
1891,E08014,We should shift the EU policy toward the Russi...,in favor of,Pushing Russia to the wall will have adverse e...
1892,E08021,We should stop buying Russian gas,in favor of,The Russians use the money we give them in exc...
1893,E08022,We should stop buying Russian gas,in favor of,The cost of gas will be higher. But I prefer t...
1894,E08024,We should strengthen our ties with Ukraine and...,in favor of,We must support countries that want to improve...


In [259]:
val_data['combined1'] = val_data.Conclusion.str.cat(val_data.Stance, sep=' [SEP] ')
val_data

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,combined1
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...,Entrapment should be legalized [SEP] in favor of
1,A01012,The use of public defenders should be mandatory,in favor of,the use of public defenders should be mandator...,The use of public defenders should be mandator...
2,A02001,Payday loans should be banned,in favor of,payday loans create a more impoverished societ...,Payday loans should be banned [SEP] in favor of
3,A02002,Surrogacy should be banned,against,Surrogacy should not be banned as it is the wo...,Surrogacy should be banned [SEP] against
4,A02009,Entrapment should be legalized,against,entrapment is gravely immoral and against huma...,Entrapment should be legalized [SEP] against
...,...,...,...,...,...
1891,E08014,We should shift the EU policy toward the Russi...,in favor of,Pushing Russia to the wall will have adverse e...,We should shift the EU policy toward the Russi...
1892,E08021,We should stop buying Russian gas,in favor of,The Russians use the money we give them in exc...,We should stop buying Russian gas [SEP] in fav...
1893,E08022,We should stop buying Russian gas,in favor of,The cost of gas will be higher. But I prefer t...,We should stop buying Russian gas [SEP] in fav...
1894,E08024,We should strengthen our ties with Ukraine and...,in favor of,We must support countries that want to improve...,We should strengthen our ties with Ukraine and...


In [260]:
val_data['combined2'] = val_data.combined1.str.cat(val_data.Premise, sep=' [SEP] ')
val_data

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,combined1,combined2
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...,Entrapment should be legalized [SEP] in favor of,Entrapment should be legalized [SEP] in favor ...
1,A01012,The use of public defenders should be mandatory,in favor of,the use of public defenders should be mandator...,The use of public defenders should be mandator...,The use of public defenders should be mandator...
2,A02001,Payday loans should be banned,in favor of,payday loans create a more impoverished societ...,Payday loans should be banned [SEP] in favor of,Payday loans should be banned [SEP] in favor o...
3,A02002,Surrogacy should be banned,against,Surrogacy should not be banned as it is the wo...,Surrogacy should be banned [SEP] against,Surrogacy should be banned [SEP] against [SEP]...
4,A02009,Entrapment should be legalized,against,entrapment is gravely immoral and against huma...,Entrapment should be legalized [SEP] against,Entrapment should be legalized [SEP] against [...
...,...,...,...,...,...,...
1891,E08014,We should shift the EU policy toward the Russi...,in favor of,Pushing Russia to the wall will have adverse e...,We should shift the EU policy toward the Russi...,We should shift the EU policy toward the Russi...
1892,E08021,We should stop buying Russian gas,in favor of,The Russians use the money we give them in exc...,We should stop buying Russian gas [SEP] in fav...,We should stop buying Russian gas [SEP] in fav...
1893,E08022,We should stop buying Russian gas,in favor of,The cost of gas will be higher. But I prefer t...,We should stop buying Russian gas [SEP] in fav...,We should stop buying Russian gas [SEP] in fav...
1894,E08024,We should strengthen our ties with Ukraine and...,in favor of,We must support countries that want to improve...,We should strengthen our ties with Ukraine and...,We should strengthen our ties with Ukraine and...


In [261]:
val_combined = val_data.combined2.values.tolist()

In [262]:
val_labels = pd.read_csv('labels-validation.tsv', sep="\t")
val_labels

Unnamed: 0,Argument ID,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,...,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
0,A01001,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,A01012,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,A02001,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
3,A02002,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,A02009,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1891,E08014,1,0,0,0,1,0,0,0,1,...,0,1,0,0,0,0,1,0,0,1
1892,E08021,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,1,0,0,1
1893,E08022,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,1,1,0,0,1
1894,E08024,0,1,0,0,0,1,0,0,1,...,0,0,0,0,0,0,1,0,0,1


In [263]:
val_labels = val_labels.drop(columns=['Argument ID'])
val_labels

Unnamed: 0,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,Security: societal,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0
3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1891,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1
1892,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1
1893,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1
1894,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1


In [264]:
val_label_list = val_labels.values.tolist()

In [265]:
from transformers import BertTokenizer


tok = BertTokenizer.from_pretrained("bert-base-cased")
text = 'one' +  '[SEP]' + 'two' + '[SEP]' + 'three'
text2 = 'this is a sentence' + '[SEP]' + 'this is another sentence' + '[SEP]' + 'and another sentence'
ids = tok(text, add_special_tokens=True, padding="max_length", return_tensors='pt')
ids2 = tok(text2, add_special_tokens=True, padding="max_length", return_tensors='pt')

In [266]:
symbols = []
for each in combined:
    ids = tok(each, add_special_tokens=True, padding="max_length", return_tensors='pt')
    symbols.append(ids)

In [267]:
val_symbols = []
for each in val_combined:
    ids = tok(each, add_special_tokens=True, padding="max_length", return_tensors='pt')
    val_symbols.append(ids)

In [268]:
bert = BertModel.from_pretrained("prajjwal1/bert-small")
# symbols = ids
# # print(symbols)
# encoded_sequence = bert(**ids)
# # TODO: Get the [CLS] token using the `pooler_output` from 
# #      The BertModel output. See here: https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertModel
# #      and check the returns for the forward method.
# bert_output = encoded_sequence.pooler_output
# output = torch.unsqueeze(bert_output,1)
# # print(output[0][0].tolist())

# encoded_sequence = bert(**ids2)
# # TODO: Get the [CLS] token using the `pooler_output` from 
# #      The BertModel output. See here: https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertModel
# #      and check the returns for the forward method.
# bert_output = encoded_sequence.pooler_output
# output2 = torch.unsqueeze(bert_output,1)

Some weights of the model checkpoint at prajjwal1/bert-small were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [269]:
X = []
for s in symbols:
    encoded_sequence = bert(**s)
    bert_output = encoded_sequence.pooler_output
    output = torch.unsqueeze(bert_output,1)
    X.append(output[0][0].tolist())

In [270]:
print(len(X))

5220


In [271]:
X_val = []
for s in val_symbols:
    encoded_sequence = bert(**s)
    bert_output = encoded_sequence.pooler_output
    output = torch.unsqueeze(bert_output,1)
    X_val.append(output[0][0].tolist())

In [272]:
print(len(X_val))

1896


In [273]:
import numpy as np
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
# print(X)
# X_all = []
# for i in X:
#     X_all.append(list(i[0:5]))
# t = [[1, 2, 3, 3, 6, 3], [2, 5, 7, 6, 6, 8]]
# print(X_all)
print(np.array(label_list).shape)
clf = MultiOutputClassifier(LogisticRegression(max_iter=1000)).fit(X, np.array(label_list))

(5220, 20)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [274]:
p = clf.predict(X_val)

In [275]:
p.shape

(1896, 20)

In [276]:
len(val_label_list)

1896

In [277]:
from sklearn.metrics import classification_report
# print(classification_report(val_label_list[0:5], p, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]))
print(classification_report(val_label_list, p))

              precision    recall  f1-score   support

           0       0.65      0.19      0.30       251
           1       0.64      0.32      0.43       496
           2       0.25      0.01      0.01       138
           3       0.66      0.28      0.39       103
           4       0.69      0.40      0.51       575
           5       0.00      0.00      0.00       164
           6       0.42      0.06      0.11       132
           7       0.78      0.05      0.10       130
           8       0.66      0.55      0.60       759
           9       0.62      0.39      0.48       488
          10       0.65      0.19      0.29       172
          11       0.62      0.27      0.37       455
          12       0.75      0.05      0.09        60
          13       0.50      0.01      0.02       127
          14       0.66      0.42      0.51       633
          15       0.25      0.01      0.01       268
          16       0.65      0.52      0.58       687
          17       0.50    

  _warn_prf(average, modifier, msg_start, len(result))
