In [81]:
import pandas as pd
import numpy as np

In [82]:
def get_hypotheses_sarcasm(row):
    row = row.dropna()
    literal_keys = [key for key in row.keys() if 'literal' in key]
    assert len(literal_keys) == 1
    return row[literal_keys[0]]

def sarcasm_metadata(row):
    curr_dict = {}
    curr_dict['snum'] = row['snum']
    curr_dict['incongruency'] = row['incongruency']
    return curr_dict

df = pd.read_json("sarcasm_twitter_rte_separate.jsonlines", lines=True).rename(columns={'sarcasm_message':'premise'})
df['hypothesis'] = df.apply(get_hypotheses_sarcasm, axis=1)
df['type-of-inference'] = df['premise'].map(lambda x: "sarcasm")

# Make metadata
df['metadata'] = df.apply(sarcasm_metadata, axis=1)
# Remove keys moved to metadata
literal_keys = [key for key in df.keys() if 'literal' in key]
df.drop(columns=literal_keys + ['incongruency', 'snum'], inplace=True)

df['label'] = df['metadata'].map(lambda x: '?')

df

Unnamed: 0,premise,hypothesis,type-of-inference,metadata,label
0,Nice having a conversation with you before I h...,I am too tired to talk to you right now.,sarcasm,"{'snum': '384499124747251713-1', 'incongruency...",?
1,Nice having a conversation with you before I h...,Terrible having a conversation with you before...,sarcasm,"{'snum': '384499124747251713-2', 'incongruency...",?
2,Nice having a conversation with you before I h...,It was not nice having a conversation with you...,sarcasm,"{'snum': '384499124747251713-3', 'incongruency...",?
3,Nice having a conversation with you before I h...,Unpleasant having a conversation with you befo...,sarcasm,"{'snum': '384499124747251713-4', 'incongruency...",?
4,I'm starting to enjoy cardio now ? ? http://t....,I don't like cardio.,sarcasm,"{'snum': '380407973278801920-1', 'incongruency...",?
...,...,...,...,...,...
4757,Don't everyone text me at once now...,Nobody text me...,sarcasm,"{'snum': '3.78968E+17-1', 'incongruency': 'imp...",?
4758,Thanks for being family,I wish you treated me like family.,sarcasm,"{'snum': '3.79028E+17-1', 'incongruency': 'imp...",?
4759,Did someone forgot to on the air-conditioner? ...,"It's too hot, I think someone needs to turn on...",sarcasm,"{'snum': '409819488356298752-1', 'incongruency...",?
4760,Awwww you're so cute! Look at you being all dr...,"Wow, you are behaving really badly! You are b...",sarcasm,"{'snum': '4.25467E+17-1', 'incongruency': 'imp...",?


In [83]:
# Combine similes

simile_df = pd.read_json('simile-entail.json')
simile_df
simile_df['type-of-inference'] = simile_df['premise'].map(lambda x: 'simile')
simile_df['metadata'] = simile_df['premise'].map(lambda x: np.nan)
df = df.append(simile_df, ignore_index=True)
df

Unnamed: 0,premise,hypothesis,type-of-inference,metadata,label
0,Nice having a conversation with you before I h...,I am too tired to talk to you right now.,sarcasm,"{'snum': '384499124747251713-1', 'incongruency...",?
1,Nice having a conversation with you before I h...,Terrible having a conversation with you before...,sarcasm,"{'snum': '384499124747251713-2', 'incongruency...",?
2,Nice having a conversation with you before I h...,It was not nice having a conversation with you...,sarcasm,"{'snum': '384499124747251713-3', 'incongruency...",?
3,Nice having a conversation with you before I h...,Unpleasant having a conversation with you befo...,sarcasm,"{'snum': '384499124747251713-4', 'incongruency...",?
4,I'm starting to enjoy cardio now ? ? http://t....,I don't like cardio.,sarcasm,"{'snum': '380407973278801920-1', 'incongruency...",?
...,...,...,...,...,...
5355,"At that point, it could spread like wild fire.","At that point, it could spread harmlessly",simile,,not_entailment
5356,"He had a wide smile, but his eyes were cold, y...","He had a wide smile, but his eyes were cold, y...",simile,,entailment
5357,"He had a wide smile, but his eyes were cold, y...","He had a wide smile, but his eyes were cold, y...",simile,,not_entailment
5358,The things that wait in between the two are li...,The things that wait in between the two are un...,simile,,entailment


In [84]:
# Combine metaphors

metaphor_df = pd.read_json('metaphor-entail.json')
metaphor_df['type-of-inference'] = metaphor_df['premise'].map(lambda x: 'metaphor')
metaphor_df['metadata'] = metaphor_df['premise'].map(lambda x: np.nan)

df = df.append(metaphor_df, ignore_index=True)
df

Unnamed: 0,premise,hypothesis,type-of-inference,metadata,label
0,Nice having a conversation with you before I h...,I am too tired to talk to you right now.,sarcasm,"{'snum': '384499124747251713-1', 'incongruency...",?
1,Nice having a conversation with you before I h...,Terrible having a conversation with you before...,sarcasm,"{'snum': '384499124747251713-2', 'incongruency...",?
2,Nice having a conversation with you before I h...,It was not nice having a conversation with you...,sarcasm,"{'snum': '384499124747251713-3', 'incongruency...",?
3,Nice having a conversation with you before I h...,Unpleasant having a conversation with you befo...,sarcasm,"{'snum': '384499124747251713-4', 'incongruency...",?
4,I'm starting to enjoy cardio now ? ? http://t....,I don't like cardio.,sarcasm,"{'snum': '380407973278801920-1', 'incongruency...",?
...,...,...,...,...,...
5968,She did not realize that opportunity was waiti...,She did not realize that opportunity was leavi...,metaphor,,not_entailment
5969,As I walk through the grey mist that tempers m...,As I walk through the grey mist that blocks my...,metaphor,,entailment
5970,As I walk through the grey mist that tempers m...,As I walk through the grey mist that enhances ...,metaphor,,not_entailment
5971,Still waters groove in the wind,Still waters sway in the wind,metaphor,,entailment


In [85]:
# Add irony
def irony_metadata(row):
    curr_dict = {}
    curr_dict['corpus-sent-id'] = row['Tweet index']
    curr_dict['split'] = row['split']
    return curr_dict

irony_df = pd.read_csv('irony/recast_irony.csv')
irony_df = irony_df.rename(columns={'hyp':'hypothesis'})
irony_df.drop(columns=['Unnamed: 0', 'Label', 'Tweet text'], inplace=True)
irony_df['label'] = irony_df['label'].map(lambda x: 'entailment' if True else 'not_entailment')

irony_df['metadata'] = irony_df.apply(irony_metadata, axis=1)
irony_df.drop(columns=['Tweet index', 'split'], inplace=True)
irony_df['type-of-inference'] = irony_df['metadata'].map(lambda x: 'irony')


df = df.append(irony_df, ignore_index=True)
df['type-of-inference'].value_counts()

sarcasm     4762
irony       4601
metaphor     613
simile       598
Name: type-of-inference, dtype: int64

In [86]:
df.to_csv('all_data.tsv', sep='\t')