#### This notebook implements the baseline stance classification using the Debater API available at https://early-access-program.debater.res.ibm.com/terms?#pro_con. To use the service you should request the API-key at https://early-access-program.debater.res.ibm.com

In [None]:
import pandas as pd
from tqdm import tqdm
from debater_python_api.api.debater_api import DebaterApi
import time

debater_api = DebaterApi('YOUR-API-KEY')
pro_con_client = debater_api.get_pro_con_client()

In [None]:
# you can calculate the pro/con score for the complete dataset before splitting into train/test, or you can apply it on the two pre-defined splits

data_df = pd.read_csv('DATASET.tsv', sep='\t', encoding='utf-8') # specify the stance dataset

#### Object as is

In [None]:
%%time

# The API classifier return a single score from -1 (strong con) to +1 (strong pro)
# We will request the API twice once for (object_1, answer) and once for (object_2, answer)
# This scores are further packed in tuples on which another classifier is fit to predict the final stance label

ids, objects_0, objects_1, scores0, scores1, stances = list(), list(), list(), list(), list(), list()

for _, row in data_df.iterrows():
    while True:
        try:
            stances.append(row.answer_stance)
            ids.append(row.id)
            topic = row.object_1
            objects_0.append(topic)
            sentences = [row.answer]
            sentence_topic_dicts = [{'sentence' : sentence, 'topic' : topic} for sentence in sentences]
            scores = pro_con_client.run(sentence_topic_dicts)
            for i in range(len(sentences)):
                scores0.append(scores[i])
            time.sleep(2)
            topic = row.object_2
            objects_1.append(topic)
            sentences = [row.answer]
            sentence_topic_dicts = [{'sentence' : sentence, 'topic' : topic} for sentence in sentences]
            scores = pro_con_client.run(sentence_topic_dicts)
            for i in range(len(sentences)):
                scores1.append(scores[i])
            time.sleep(2)
        except:
            time.sleep(60)
            continue
        break

In [None]:
df_out = pd.DataFrame({'id': ids, 'object_0': objects_0, 'object_1': objects_1, 'score_0': scores0, 'score_1': scores1, 'answer_stance': stances})
df_out.head()

In [None]:
# save the results
df_out.to_csv('result_object.tsv', sep='\t', index=False)

#### Masked objects

In [None]:
%%time
ids, objects_0, objects_1, scores0, scores1, stances = list(), list(), list(), list(), list(), list()

for _, row in data_df.iterrows():
    while True:
        try:
            stances.append(row.answer_stance)
            ids.append(row.id)
            topic = '[FIRST_ENTITY]' # use '[FIRST_ENTITY] is good' for a sentiment prompt
            objects_0.append(topic)
            sentences = [row.masked_all]
            sentence_topic_dicts = [{'sentence' : sentence, 'topic' : topic} for sentence in sentences]
            scores = pro_con_client.run(sentence_topic_dicts)
            for i in range(len(sentences)):
                scores0.append(scores[i])
            time.sleep(2)
            topic = '[SECOND_ENTITY]' # use '[SECOND_ENTITY] is good' for a sentiment prompt
            objects_1.append(topic)
            sentences = [row.masked_all]
            sentence_topic_dicts = [{'sentence' : sentence, 'topic' : topic} for sentence in sentences]
            scores = pro_con_client.run(sentence_topic_dicts)
            for i in range(len(sentences)):
                scores1.append(scores[i])
            time.sleep(2)
        except:
            time.sleep(60)
            continue
        break

In [None]:
df_out = pd.DataFrame({'id': ids, 'object_0': objects_0, 'object_1': objects_1, 'score_0': scores0, 'score_1': scores1, 'answer_stance': stances})
df_out.head()

In [None]:
df_out.to_csv('result_masked.tsv', sep='\t', index=False)