# In this notebook, we analyze crowdsourced data by answering the following 3 questions:
- which model best **predicted** the crowdsourced label?
- which model performed best when **blended** with the crowdsourced label?
- which model had the highest performance when only taking the agreeing label between the model and the crowdsourced output, and computing accuracy on this **subsection**?


## Load crowdsourced .csv file

In [2]:
import pandas as pd
path_to_crowdsourced_data_csv_file = 'CorwdSourced_Data.csv'
crowdsourcing = pd.read_csv(path_to_crowdsourced_data_csv_file)
crowdsourcing.dropna(how='all').sample(2)

Unnamed: 0,initialLabel,binaryInitialLabel,title,url,numTrueVotes,numFalseVotes,totalVotes,maxVotes,interraterPercentAgreement,agreementWithLabel,votersLabel,voterLabelWasAccurate,averageInterraterPercentAgreement,averageAgreementWithLabel,percentOfAgreeingDecisions
3097,False,0,Photos of grand celebration of Ram Navami (4/...,https://www.poynter.org/?ifcn_misinformation=p...,3,0,3,3,1.0,0.0,1,0,,,
5359,True,1,WHO advance team ends visit to China to probe ...,https://apnews.com/d075f9381ae9e84a8e8ecdf5040...,3,1,4,3,0.75,0.75,1,1,,,


In [3]:
crowdsourcing['softLabel']=crowdsourcing['numTrueVotes']/crowdsourcing['totalVotes']
df_crowdsourcing = pd.DataFrame({'title' : crowdsourcing['title'], 'label':crowdsourcing['binaryInitialLabel'], 'voterSoftLabel':crowdsourcing['softLabel'],'voterLabel': crowdsourcing['votersLabel']})
df_crowdsourcing=df_crowdsourcing.dropna()
df_crowdsourcing = df_crowdsourcing.reset_index(drop=True)
df_crowdsourcing.sample(2)

Unnamed: 0,title,label,voterSoftLabel,voterLabel
5879,"Bombardier reports earnings miss, hit by rail ...",1,1.0,1
5644,Bulgaria says its official who tested positive...,1,1.0,1


## Question 1 which model best **predicted** the crowdsourced label?

In [4]:
import os
path_to_prediction_folder = '../Model_Predictions/'
prediction_csv_files = os.listdir(path_to_prediction_folder)
prediction_csv_files[2]

'finetune_bert_CoAID_GossipCop_Prediction.csv'

In [5]:
attampt = prediction_csv_files[2]
attampt_file = pd.read_csv(path_to_prediction_folder+attampt)
attampt_file.sample(2)

Unnamed: 0.1,Unnamed: 0,softLabel,label,prediction,accuracy,f1,precision,recall
6140,6140,0.998074,1,1,0.804823,0.72892,0.973684,0.582494
1632,1632,0.00045,0,0,0.804823,0.72892,0.973684,0.582494


List all model prediction results

In [6]:
prediction_csv_files

['bert-base_Prediction.csv',
 'XLNet_on_Gossip_Prediction.csv',
 'finetune_bert_CoAID_GossipCop_Prediction.csv',
 'roberta-fake-news_CoAID_PolitiFact_Prediction.csv',
 'finetune_bert_CoAID_Prediction.csv',
 'XLNet_on_CoAID_FNN_Prediction.csv',
 'Text_CNN_Politi_predict.csv',
 'finetune_bert_PolitiFact_Prediction.csv',
 'Fake-News-Bert-Detect_FNN_Prediction.csv',
 'XLNet_on_CoAID_Gossip_Prediction.csv',
 'roberta-fake-news_FNN_Prediction.csv',
 'roberta-fake-news_PolitiFact_Prediction.csv',
 'Text_CNN_Gossip_predict.csv',
 'roberta-fake-news_CoAID_GossipCop_Prediction.csv',
 'Fake-News-Bert-Detect_GossipCop_Prediction.csv',
 'Fake-News-Bert-Detect_CoAID&GossipCop_Prediction.csv',
 'XLNet-base_Prediction.csv',
 'Text_CNN_CoAID_Gossip_predict.csv',
 'XLNet_on_CoAID_Politi_Prediction.csv',
 'XLNet_on_CoAID_Prediction.csv',
 'roberta-fake-news_GossipCop_Prediction.csv',
 'finetune_bert_CoAID_FNN_Prediction.csv',
 'Fake-News-Bert-Detect_Prediction.csv',
 'roberta-fake-news_Prediction.csv',
 

In [7]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score


performance_dict = {
                    'model_name': [],
                    'accuracy': [],
                    'f1': [],
                    'precision': [],
                    'recall': []
                  }
path = path_to_prediction_folder

for attampt in prediction_csv_files:
  predictions = pd.read_csv(path+attampt)['prediction']
  performance_dict['model_name'].append(attampt)
  performance_dict['accuracy'].append(accuracy_score(df_crowdsourcing['voterLabel'], pd.read_csv(path+attampt)['prediction']))
  performance_dict['f1'].append( f1_score(df_crowdsourcing['voterLabel'], pd.read_csv(path+attampt)['prediction']))
  performance_dict['precision'].append(precision_score(df_crowdsourcing['voterLabel'], pd.read_csv(path+attampt)['prediction']))
  performance_dict['recall'].append(recall_score(df_crowdsourcing['voterLabel'], pd.read_csv(path+attampt)['prediction']))
 

  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
pd.DataFrame(performance_dict).sort_values(by='accuracy', ascending=False)

Unnamed: 0,model_name,accuracy,f1,precision,recall
37,Fake-News-Bert-Detect_CoAID_Prediction.csv,0.701418,0.713488,0.8585,0.610387
19,XLNet_on_CoAID_Prediction.csv,0.68695,0.699605,0.841795,0.59851
36,roberta-fake-news_CoAID_Prediction.csv,0.686099,0.687826,0.872272,0.567769
4,finetune_bert_CoAID_Prediction.csv,0.665532,0.661596,0.862004,0.536796
3,roberta-fake-news_CoAID_PolitiFact_Prediction.csv,0.664681,0.656794,0.872012,0.526782
35,Fake-News-Bert-Detect_CoAID&FNN_Prediction.csv,0.657447,0.647702,0.866849,0.517
24,Fake-News-Bert-Detect_CoAID&PolitiFact_Predict...,0.655461,0.64211,0.874047,0.507452
27,Text_CNN_CoAID_predict.csv,0.646099,0.644131,0.831064,0.52585
13,roberta-fake-news_CoAID_GossipCop_Prediction.csv,0.645957,0.626683,0.875836,0.48789
9,XLNet_on_CoAID_Gossip_Prediction.csv,0.642695,0.632852,0.845734,0.505589


## which model performed best when **blended** with the crowdsourced label?



In [9]:
from numpy.lib.function_base import average
alphas = [0.1,0.3,0.5,0.7,0.9]
for alpha in alphas:

  performance_dict = {
                    'model_name': [],
                    'blended_acc': [],
                    'model_acc':[],
                    'improvement': []
                  }
  for attampt in prediction_csv_files:
    predictions = pd.read_csv(path+attampt)['prediction']
    performance_dict['model_name'].append(attampt)
    attampt_file = pd.read_csv(path+attampt)
    blended_acc = accuracy_score(df_crowdsourcing['label'], round(df_crowdsourcing['voterSoftLabel']*alpha +attampt_file['softLabel']*(1-alpha)))
    performance_dict['blended_acc'].append(blended_acc)
    performance_dict['model_acc'].append(attampt_file['accuracy'][0])
    performance_dict['improvement'].append(blended_acc - attampt_file['accuracy'][0])
  print('- crowdsourcing soft label * ', alpha, ' + model predict soft label * ', 1 - alpha)
  print('- average improvement: ', average(performance_dict['improvement']))
  print('- max_improvement: ', max(performance_dict['improvement']))
  max_index = performance_dict['improvement'].index(max(performance_dict['improvement']))
  print('\t at model: ', performance_dict['model_name'][max_index], ',  the previous acc is: ', performance_dict['model_acc'][max_index], ' curr: ', performance_dict['blended_acc'][max_index])
  print('- best performance: ', max(performance_dict['blended_acc']) )
  best_performance_index = performance_dict['blended_acc'].index(max(performance_dict['blended_acc']))
  print('\t at model: ', performance_dict['model_name'][best_performance_index], ',  the previous acc is: ', performance_dict['model_acc'][best_performance_index])
  print('##############################')


- crowdsourcing soft label *  0.1  + model predict soft label *  0.9
- average improvement:  0.07313329696308421
- max_improvement:  0.5714893617021277
	 at model:  Text_CNN_Gossip_predict.csv ,  the previous acc is:  0.4263829787234042  curr:  0.997872340425532
- best performance:  0.9981560283687944
	 at model:  Text_CNN_CoAID_predict.csv ,  the previous acc is:  0.8739007092198582
##############################
- crowdsourcing soft label *  0.3  + model predict soft label *  0.7
- average improvement:  0.08593198763411529
- max_improvement:  0.5621276595744682
	 at model:  Text_CNN_Gossip_predict.csv ,  the previous acc is:  0.4263829787234042  curr:  0.9885106382978723
- best performance:  0.9907801418439717
	 at model:  Text_CNN_CoAID_predict.csv ,  the previous acc is:  0.8739007092198582
##############################
- crowdsourcing soft label *  0.5  + model predict soft label *  0.5
- average improvement:  0.08745590107292237
- max_improvement:  0.4679432624113476
	 at model:

In [14]:
# best improvement after blending
pd.DataFrame(performance_dict).sort_values(by='improvement', ascending=False)

Unnamed: 0,model_name,blended_acc,model_acc,improvement
14,Fake-News-Bert-Detect_GossipCop_Prediction.csv,0.716738,0.301986,0.414752
30,finetune_bert_GossipCop_Prediction.csv,0.719007,0.361702,0.357305
12,Text_CNN_Gossip_predict.csv,0.731489,0.426383,0.305106
1,XLNet_on_Gossip_Prediction.csv,0.719858,0.416454,0.303404
28,Text_CNN_FNN_predict.csv,0.731489,0.444681,0.286809
20,roberta-fake-news_GossipCop_Prediction.csv,0.71773,0.43617,0.28156
8,Fake-News-Bert-Detect_FNN_Prediction.csv,0.718298,0.450496,0.267801
16,XLNet-base_Prediction.csv,0.718298,0.450496,0.267801
6,Text_CNN_Politi_predict.csv,0.731489,0.499291,0.232199
25,finetune_bert_FNN_Prediction.csv,0.72156,0.512908,0.208652


In [15]:
# best accuracy after blending
pd.DataFrame(performance_dict).sort_values(by='blended_acc', ascending=False)

Unnamed: 0,model_name,blended_acc,model_acc,improvement
38,Text_CNN_CoAID_FNN_predict.csv,0.731489,0.721277,0.010213
12,Text_CNN_Gossip_predict.csv,0.731489,0.426383,0.305106
31,Text_CNN_CoAID_Politi_predict.csv,0.731489,0.79844,-0.06695
28,Text_CNN_FNN_predict.csv,0.731489,0.444681,0.286809
27,Text_CNN_CoAID_predict.csv,0.731489,0.873901,-0.142411
6,Text_CNN_Politi_predict.csv,0.731489,0.499291,0.232199
17,Text_CNN_CoAID_Gossip_predict.csv,0.731489,0.741135,-0.009645
37,Fake-News-Bert-Detect_CoAID_Prediction.csv,0.730922,0.965532,-0.23461
19,XLNet_on_CoAID_Prediction.csv,0.730496,0.943404,-0.212908
4,finetune_bert_CoAID_Prediction.csv,0.730071,0.914043,-0.183972


## which model had the highest performance when only taking the agreeing label between the model and the crowdsourced output, and computing accuracy on this subsection? 

In [16]:
def agreed_accuracy(true_label_ls, voter_label_ls, pred_label_ls):
  assert len(true_label_ls) == len(voter_label_ls)
  assert len(voter_label_ls) == len(pred_label_ls)

  total = 0
  corr = 0
  for i in range(len(true_label_ls)):
    if voter_label_ls[i] == pred_label_ls[i]:
      total +=1
      if true_label_ls[i] == pred_label_ls[i]:
        corr +=1
  return corr/total, total/len(true_label_ls)


In [19]:
agreed_accuracy(df_crowdsourcing['label'], df_crowdsourcing['voterLabel'], attampt_file['prediction'])

(0.8582658959537572, 0.6134751773049646)

In [17]:
performance_dict = {
                    'model_name': [],
                    'accuracy': [],
                    'label_agreed':[]
                  }
                  
for attampt in prediction_csv_files:
  attampt_file = pd.read_csv(path+attampt)
  acc, label_agreed = agreed_accuracy(df_crowdsourcing['label'], df_crowdsourcing['voterLabel'], attampt_file['prediction'])
  predictions = pd.read_csv(path+attampt)['prediction']
  performance_dict['model_name'].append(attampt)
  performance_dict['accuracy'].append(acc)
  performance_dict['label_agreed'].append(label_agreed)

In [20]:
pd.DataFrame(performance_dict).sort_values(by='accuracy', ascending=False)

Unnamed: 0,model_name,accuracy,label_agreed
37,Fake-News-Bert-Detect_CoAID_Prediction.csv,0.987462,0.701418
19,XLNet_on_CoAID_Prediction.csv,0.981623,0.68695
4,finetune_bert_CoAID_Prediction.csv,0.975064,0.665532
36,roberta-fake-news_CoAID_Prediction.csv,0.974571,0.686099
3,roberta-fake-news_CoAID_PolitiFact_Prediction.csv,0.964575,0.664681
24,Fake-News-Bert-Detect_CoAID&PolitiFact_Predict...,0.961047,0.655461
35,Fake-News-Bert-Detect_CoAID&FNN_Prediction.csv,0.960734,0.657447
15,Fake-News-Bert-Detect_CoAID&GossipCop_Predicti...,0.958509,0.639291
27,Text_CNN_CoAID_predict.csv,0.958068,0.646099
13,roberta-fake-news_CoAID_GossipCop_Prediction.csv,0.954326,0.645957
