In [1]:
import pandas as pd
import numpy as np
import json

from sklearn.metrics import cohen_kappa_score

In [2]:
def valid_filter(df, attention_checker):
    not_use = 0
    index = []
    for i in range(len(df)):
        sub = df.iloc[i]
        not_pass = 0
        for word in range(1,61):
            input_col = 'Input.word'+str(word)
            word_col = 'Answer.word'+str(word)+'-radios'
            if sub[input_col] in attention_checker:
                if sub[word_col] != attention_checker[sub[input_col]]:
                    not_pass += 1
        if not_pass > 2:
            index.append(i)
            not_use += 1

    print(df.shape[0])
    print(not_use)
    #notuse.append(not_use)

    df_valid = []
    for i in range(len(index)):
        if i != len(index)-1:
            df_valid.append(df.iloc[index[i]+1:index[i+1]])
        else:
            if index[i] != len(df) - 1:
                df_valid.append(df.iloc[index[i]+1:])

    df_valid = pd.concat(df_valid)
    
    return index, df_valid

In [3]:
def worker_stat(df, df_valid, index, task):
    worker_mean_kappa = {}
    for worker in df["WorkerId"].unique():
        workersub = df[df["WorkerId"] == worker]
        if worker not in worker_mean_kappa:
            worker_mean_kappa[worker] = [len(workersub)]
        hits = workersub.HITId.unique()
        for hit in hits:
            workerhit = workersub[workersub["HITId"] == hit]
            hitsub = df_valid[df_valid['HITId'] == hit]
            if len(hitsub) <= 1:
                continue
            word_col = []
            for word in range(1,61):
                word_col.append('Answer.word'+str(word)+'-radios')
            for i in range(len(hitsub)):
                if hitsub.iloc[i]["WorkerId"] != worker:
                    score = cohen_kappa_score(hitsub.iloc[i][word_col].astype(int), workerhit.iloc[0][word_col].astype(int), labels=[1,2,3])
                    if np.isnan(score):
                        score = 1
                    worker_mean_kappa[worker].append(score)

    worker_list = {}
    for worker in worker_mean_kappa:
        if worker not in worker_list:
            if len(worker_mean_kappa[worker]) == 1:
                worker_list[worker] = [worker_mean_kappa[worker][0], 0, np.nan, 0, 0]
            else:
                mean = round(np.mean(worker_mean_kappa[worker][1:]), 3)
                bad_times = sum([i<0.3 for i in worker_mean_kappa[worker][1:]])
                good_times = sum([i>0.55 for i in worker_mean_kappa[worker][1:]])
#                 print(worker, worker_mean_kappa[worker][1:], mean, bad_times, good_times)
                worker_list[worker] = [worker_mean_kappa[worker][0], 0, mean, bad_times, good_times]

    temp = {}
    for worker in df.iloc[index].WorkerId:
        if worker in temp:
            temp[worker] += 1
        else:
            temp[worker] = 1

    for worker in temp:
        if temp[worker]/worker_list[worker][0]>0.3:
            worker_list[worker][1] = 1

    worker_data = pd.DataFrame(worker_list).T
    worker_data.columns = [task+i for i in [" HITs Count", " No Pass", " Mean Kappa", " Bad Times", " Good Times"]]
    
    return worker_data

In [4]:
def calculate_mean_ck(df_valid):
    ck = []
    for hit in df_valid['HITId'].unique():
        sub = df_valid[df_valid['HITId'] == hit]
        if len(sub) <= 1:
            continue
        word_col = []
        for word in range(1,61):
            word_col.append('Answer.word'+str(word)+'-radios')
        for i in range(len(sub)-1):
            for j in range(i+1, len(sub)):
                score = cohen_kappa_score(sub.iloc[i][word_col].astype(int), sub.iloc[j][word_col].astype(int), labels=[1,2,3])
                if np.isnan(score):
                    score = 1
                ck.append(score)

    print('Average CK:{:.3f}'.format(np.mean(ck)))

In [5]:
def lexicon_classification(df_valid, threshold=0.8):
    word2score = {}
    for answer_ind in range(len(df_valid)):
        answer = df_valid.iloc[answer_ind]
        for word in range(1,61):
            input_col = 'Input.word'+str(word)
            word_col = 'Answer.word'+str(word)+'-radios'
            #score = max(3 - answer[word_col].astype(int), 0)
            score = 1 if answer[word_col].astype(int) < 3 else 0
            if answer[input_col] not in word2score:
                word2score[answer[input_col]] = [score, 1]
            else:
                word2score[answer[input_col]][0] = word2score[answer[input_col]][0]+score
                word2score[answer[input_col]][1] = word2score[answer[input_col]][1]+1
                
    desB = []
    # relB = []
    no = []

    for word in word2score:
        average = word2score[word][0]/word2score[word][1]
        #if average > 1.6:
        if average >= threshold:
            desB.append(word)
        #elif average < 0.6:
        else:
            no.append(word)
        #else:
        #    relB.append(word)
                
    return desB, no

In [6]:
def generate_result(desB, no, positivedata, filename, total):
    stat = {}
    for method in alldata.Method.unique():
        stat[method] = {"right": {"one":[], "freq":[]}, "wrong":{"one":[], "freq":[]}}

    for word in desB:
        sub = positivedata[positivedata["Word"] == word]
        for ind, method in enumerate(sub["Method"]):
            if sub.iloc[ind]["Freq"] == "one":
                stat[method]["right"]["one"].append(word)
            else:
                stat[method]["right"]["freq"].append(word)

    for word in no:
        sub = positivedata[positivedata["Word"] == word]
        for ind, method in enumerate(sub["Method"]):
            if sub.iloc[ind]["Freq"] == "one":
                stat[method]["wrong"]["one"].append(word)
            else:
                stat[method]["wrong"]["freq"].append(word)

    for method in stat:
        print(method, len(stat[method]['right']['one'])/total, len(stat[method]['right']['freq'])/total)
    
    with open(filename, "w") as f:
        json.dump(stat, f)

In [7]:
alldata = pd.read_csv("../empathy_dictionary/lexica/AMT/topwords/all.csv")

#### Positive

In [8]:
positivedata = alldata[alldata["Label"] == "pos"]

In [9]:
df = pd.read_csv('positive.csv')
df2 = pd.read_csv('positive_2.csv')

df_pos = pd.concat([df,df2])

In [10]:
attention_checker_pos = {"great": 1, "skiing": 2, "deadline": 3, "further": 3, "the": 3, "alsike": 4, "Q<--->": 4}

no_use_pos, df_valid_pos = valid_filter(df_pos, attention_checker_pos)

312
133


In [11]:
worker_dataframe = worker_stat(df_pos, df_valid_pos, no_use_pos, 'Pos')
calculate_mean_ck(df_valid_pos)

Average CK:0.446


In [12]:
lexica_pos, non_lexica_pos = lexicon_classification(df_valid_pos)
generate_result(lexica_pos, non_lexica_pos, positivedata, "../empathy_dictionary/lexica/AMT/results/positive.json", 3)

Roberta_Mask 8.0 21.666666666666668
Roberta_Partition 6.666666666666667 28.0
DistilBERT_Mask 10.666666666666666 50.0
DistilBERT_Partition 9.333333333333334 30.0
FFN 28.333333333333332 63.666666666666664
SVM 19.666666666666668 57.0
LSTM 8.333333333333334 60.333333333333336
Uni 5.666666666666667 46.0


#### Negative

In [13]:
negativedata = alldata[alldata["Label"] == "neg"]

In [14]:
df_neg = pd.read_csv('negative.csv')
df_neg = pd.concat([df_neg, pd.read_csv('negative_2.csv')])

In [15]:
attention_checker_neg = {"terrible": 1, "great": 3, "exam": 2, "further": 3, "the": 3, "alsike": 4, "Q<--->": 4}

no_use_neg, df_valid_neg = valid_filter(df_neg, attention_checker_neg)

290
120


In [16]:
worker_data = worker_stat(df_neg, df_valid_neg, no_use_neg, 'Neg')
worker_dataframe = worker_dataframe.merge(worker_data, left_index=True, right_index=True, how="outer")
calculate_mean_ck(df_valid_neg)

Average CK:0.431


In [17]:
lexica_neg, non_lexica_neg = lexicon_classification(df_valid_neg)
generate_result(lexica_neg, non_lexica_neg, negativedata, "../empathy_dictionary/lexica/AMT/results/negative.json", 3)

Roberta_Mask 16.333333333333332 48.666666666666664
Roberta_Partition 11.0 41.333333333333336
DistilBERT_Mask 24.333333333333332 62.0
DistilBERT_Partition 10.666666666666666 40.666666666666664
FFN 46.0 63.666666666666664
SVM 42.333333333333336 61.666666666666664
LSTM 14.666666666666666 58.666666666666664
Uni 8.333333333333334 19.666666666666668


#### Joy

In [18]:
joydata = alldata[alldata["Label"] == "joy"]

In [19]:
df_joy = pd.read_csv('joy.csv')

In [20]:
attention_checker_joy = {'happy':1, 'party':2, 'jail':3, 'further':3, 'the':3, 'alsike':4, 'Q<--->':4}
no_use_joy, df_valid_joy = valid_filter(df_joy, attention_checker_joy)

105
11


In [21]:
worker_data = worker_stat(df_joy, df_valid_joy, no_use_joy, 'Joy')
worker_dataframe = worker_dataframe.merge(worker_data, left_index=True, right_index=True, how="outer")
calculate_mean_ck(df_valid_joy)

Average CK:0.511


In [22]:
lexica_joy, non_lexica_joy = lexicon_classification(df_valid_joy)
generate_result(lexica_joy, non_lexica_joy, joydata, "../empathy_dictionary/lexica/AMT/results/joy.json", 1)

Roberta_Mask 18.0 25.0
Roberta_Partition 24.0 21.0
DistilBERT_Mask 16.0 31.0
DistilBERT_Partition 12.0 15.0
FFN 21.0 39.0
SVM 16.0 38.0
LSTM 11.0 25.0
Uni 6.0 19.0


#### Anger

In [23]:
angerdata = alldata[alldata["Label"] == "anger"]
df_anger = pd.read_csv('anger.csv')

In [24]:
attention_checker_anger = {'angry':1, 'argument':2, 'animal':3, 'further':3, 'the':3, 'alsike':4, 'Q<--->':4}
no_use_anger, df_valid_anger = valid_filter(df_anger, attention_checker_anger)

85
6


In [25]:
worker_data = worker_stat(df_anger, df_valid_anger, no_use_anger, 'Anger')
worker_dataframe = worker_dataframe.merge(worker_data, left_index=True, right_index=True, how="outer")
calculate_mean_ck(df_valid_anger)

Average CK:0.551


In [26]:
lexica_anger, non_lexica_anger = lexicon_classification(df_valid_anger)
generate_result(lexica_anger, non_lexica_anger, angerdata, "../empathy_dictionary/lexica/AMT/results/anger.json", 1)

Roberta_Mask 3.0 14.0
Roberta_Partition 9.0 18.0
DistilBERT_Mask 19.0 19.0
DistilBERT_Partition 6.0 20.0
FFN 19.0 15.0
SVM 15.0 16.0
LSTM 12.0 18.0
Uni 0.0 13.0


#### Fear

In [27]:
feardata = alldata[alldata["Label"] == "fear"]
df_fear = pd.read_csv('fear.csv')
df_fear = pd.concat([df_fear, pd.read_csv('fear_2.csv')])

In [28]:
attention_checker_fear = {"afraid": 1, "jail": 2, "table": 3, "further": 3, "the": 3, "alsike": 4, "Q<--->": 4}
no_use_fear, df_valid_fear = valid_filter(df_fear, attention_checker_fear)

90
5


In [29]:
worker_data = worker_stat(df_fear, df_valid_fear, no_use_fear, 'Fear')
worker_dataframe = worker_dataframe.merge(worker_data, left_index=True, right_index=True, how="outer")
calculate_mean_ck(df_valid_fear)

Average CK:0.557


In [30]:
lexica_fear, non_lexica_fear = lexicon_classification(df_valid_fear)
generate_result(lexica_fear, non_lexica_fear, feardata, "../empathy_dictionary/lexica/AMT/results/fear.json", 1)

Roberta_Mask 14.0 28.0
Roberta_Partition 14.0 29.0
DistilBERT_Mask 25.0 33.0
DistilBERT_Partition 2.0 18.0
FFN 28.0 28.0
SVM 35.0 31.0
LSTM 18.0 30.0
Uni 3.0 14.0


#### Sadness

In [31]:
saddata = alldata[alldata["Label"] == "sadness"]
df_sad = pd.read_csv('sadness.csv')
df_sad = pd.concat([df_sad, pd.read_csv('sadness_2.csv')])

In [32]:
attention_checker_sad = {"sad": 1, "tear": 2, "party": 3, "further": 3, "the": 3, "alsike": 4, "Q<--->": 4}
no_use_sad, df_valid_sad = valid_filter(df_sad, attention_checker_sad)

95
2


In [33]:
worker_data = worker_stat(df_sad, df_valid_sad, no_use_sad, 'Sadness')
worker_dataframe = worker_dataframe.merge(worker_data, left_index=True, right_index=True, how="outer")
calculate_mean_ck(df_valid_sad)

Average CK:0.576


In [34]:
lexica_sad, non_lexica_sad = lexicon_classification(df_valid_sad)
generate_result(lexica_sad, non_lexica_sad, saddata, "../empathy_dictionary/lexica/AMT/results/sad.json", 1)

Roberta_Mask 8.0 22.0
Roberta_Partition 3.0 18.0
DistilBERT_Mask 8.0 18.0
DistilBERT_Partition 2.0 14.0
FFN 6.0 17.0
SVM 8.0 17.0
LSTM 7.0 17.0
Uni 1.0 13.0


#### surprise

In [35]:
surprisedata = alldata[alldata["Label"] == "surprise"]
df_surprise = pd.read_csv('surprise.csv')
df_surprise = pd.concat([df_surprise, pd.read_csv('surprise_2.csv')])

In [36]:
attention_checker_surprise = {"surprising": 1, "magician": 2, "book": 3, "further": 3, "the": 3, "alsike": 4, "Q<--->": 4}
no_use_surprise, df_valid_surprise = valid_filter(df_surprise, attention_checker_surprise)

100
11


In [37]:
worker_data = worker_stat(df_surprise, df_valid_surprise, no_use_surprise, 'Surprise')
worker_dataframe = worker_dataframe.merge(worker_data, left_index=True, right_index=True, how="outer")
calculate_mean_ck(df_valid_surprise)

Average CK:0.517


In [38]:
lexica_surprise, non_lexica_surprise = lexicon_classification(df_valid_surprise)
generate_result(lexica_surprise, non_lexica_surprise, surprisedata, "../empathy_dictionary/lexica/AMT/results/surprise.json", 1)

Roberta_Mask 4.0 9.0
Roberta_Partition 5.0 13.0
DistilBERT_Mask 3.0 11.0
DistilBERT_Partition 2.0 9.0
FFN 9.0 11.0
SVM 8.0 11.0
LSTM 9.0 15.0
Uni 1.0 6.0
