In [3]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [1]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split

In [4]:
df_train = pd.read_csv("/content/drive/MyDrive/final2/data/train.txt", delimiter=';', header=None, names=['sentence','label'])
df_test = pd.read_csv("/content/drive/MyDrive/final2/data/test.txt", delimiter=';', header=None, names=['sentence','label'])
df_val = pd.read_csv("/content/drive/MyDrive/final2/data/val.txt", delimiter=';', header=None, names=['sentence','label'])

In [5]:
df_train

Unnamed: 0,sentence,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had a very brief time in the beanbag an...,sadness
15996,i am now turning and i feel pathetic that i am...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such a rude comment and i...,anger


In [7]:
def predict(sentence):
  Emo = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
  return random.choice(Emo)

test = df_train.loc[15997, 'sentence']
predict(test)

'sadness'

In [8]:
def add_predict_col(df):
  df = df.reindex(columns = df.columns.tolist() + ['predict'])
  for row in range(len(df)):
    df.loc[row, 'predict'] = predict(df.loc[row, 'sentence'])
  return df

In [10]:
df_total = pd.concat([df_train,df_test,df_val],axis=0, ignore_index = True)

In [11]:
df_total = add_predict_col(df_total)
df_total

Unnamed: 0,sentence,label,predict
0,i didnt feel humiliated,sadness,fear
1,i can go from feeling so hopeless to so damned...,sadness,sadness
2,im grabbing a minute to post i feel greedy wrong,anger,love
3,i am ever feeling nostalgic about the fireplac...,love,love
4,i am feeling grouchy,anger,love
...,...,...,...
19995,im having ssa examination tomorrow in the morn...,sadness,sadness
19996,i constantly worry about their fight against n...,joy,love
19997,i feel its important to share this info for th...,joy,love
19998,i truly feel that if you are passionate enough...,joy,love


In [12]:
train_data, valid_data = train_test_split(df_total,random_state=41,test_size=0.1)
train_data.reset_index(inplace=True, drop=True)
valid_data.reset_index(inplace=True, drop=True)

In [15]:
Emo = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
def micro_f(datas, predicts, labels, emotions):
  #datas: [str]
  #predicts: [predict]
  #labels: [label]
  #emotions: [emotion label]
  p = 0
  r = 0
  TP = 0
  FP = 0
  FN = 0
  n = 6
  for emotion in emotions:
    tp = 0
    fp = 0
    fn = 0
    for i in range(len(datas)):
      if predicts[i] == emotion:
        if predicts[i] == labels[i]:
          tp += 1
        else:
          fp += 1
      elif labels[i] == emotion:
        fn += 1
    TP += tp
    FP += fp
    FN += fn
  # print(TP)
  # print(FP)
  # print(FN)
  p = TP/(TP+FP)
  r = TP/(TP+FN)
  f = 2*p*r/(p+r)
  return (p, r, f)

def macro_f(datas, predicts, labels, emotions):
  #datas: [str]
  #predicts: [predict]
  #labels: [label]
  #emotions: [emotion label]
  p = 0
  r = 0
  n = 6
  for emotion in emotions:
    tp = 0
    fp = 0
    fn = 0
    for i in range(len(datas)):
      if predicts[i] == emotion:
        if predicts[i] == labels[i]:
          tp += 1
        else:
          fp += 1
      elif labels[i] == emotion:
        fn += 1
    p += tp/(tp+fp)
    r += tp/(tp+fn)
  p /= n
  r /= n
  f = 2*p*r/(p+r)
  return (p, r, f)


def accuracy(df):
  #evaluation
  correct = 0
  for i in range(len(df)):
    if df.loc[i, 'label'] == df.loc[i, 'predict']:
          correct += 1
  print('correct = ', correct, ' / ', len(df))
  eva = correct/len(df)
  print('accuracy = ', eva)


In [16]:
def compute_fscore(df):
  Microf = micro_f(df['sentence'].tolist(), df['predict'].tolist(), df['label'].tolist(), Emo)
  print('Microf: ', Microf)
  Macrof = macro_f(df['sentence'].tolist(), df['predict'].tolist(), df['label'].tolist(), Emo)
  print('Macrof: ', Macrof)
  
print('baseline performance')
print('for train')
compute_fscore(train_data)
accuracy(train_data)
print('for validation')
compute_fscore(valid_data)
accuracy(valid_data)

baseline performance
for train
Microf:  (0.167, 0.167, 0.167)
Macrof:  (0.16729939570512076, 0.16455913726219196, 0.1659179529033381)
correct =  3006  /  18000
accuracy =  0.167
for validation
Microf:  (0.1695, 0.1695, 0.1695)
Macrof:  (0.17055445768857966, 0.16932991331826241, 0.16993997959305446)
correct =  339  /  2000
accuracy =  0.1695
