# In this notebook, we analyze crowdsourced data by answering the following 3 questions:
- which model best **predicted** the crowdsourced label?
- which model performed best when **blended** with the crowdsourced label?
- which model had the highest performance when only taking the agreeing label between the model and the crowdsourced output, and computing accuracy on this **subsection**?


## Load crowdsourced .csv file, remove rows with missing data, and remove rows with less than 2 votes

In [16]:
import pandas as pd
path_to_crowdsourced_data_csv_file = './CrowdSourced_Data.csv'
crowdsourcing = pd.read_csv(path_to_crowdsourced_data_csv_file)
print(len(crowdsourcing))
crowdsourcing.dropna(how='all').sample(2)

7051


Unnamed: 0,initialLabel,binaryInitialLabel,title,url,numTrueVotes,numFalseVotes,totalVotes,maxVotes,interraterPercentAgreement,agreementWithLabel,votersLabel,voterLabelWasAccurate,averageInterraterPercentAgreement,averageAgreementWithLabel,percentOfAgreeingDecisions
1813,False,0,Brazilian President Jair Bolsonaro declared a...,https://www.poynter.org/?ifcn_misinformation=b...,2,3,5,3,0.6,0.6,0,1,,,
3620,False,0,â€œA lootingâ€ in a supermarket due to the c...,https://www.poynter.org/?ifcn_misinformation=a...,0,3,3,3,1.0,1.0,0,1,,,


In [9]:
#remove rows from crowdsoucing with less than 2 totalVotes
crowdsourcing = crowdsourcing[crowdsourcing['totalVotes'] > 2]
len(crowdsourcing)

6931

Unnamed: 0,initialLabel,binaryInitialLabel,title,url,numTrueVotes,numFalseVotes,totalVotes,maxVotes,interraterPercentAgreement,agreementWithLabel,votersLabel,voterLabelWasAccurate,averageInterraterPercentAgreement,averageAgreementWithLabel,percentOfAgreeingDecisions
0,FALSE,0,Video clip shows people keep leaving Phuket.,https://www.poynter.org/?ifcn_misinformation=s...,5,1,6,5,0.833333,0.166667,1,0,0.833511,0.675247,0.731386
1,FALSE,0,Using hot air from a hair dryer on sinuses ca...,https://www.poynter.org/?ifcn_misinformation=u...,0,5,5,5,1.000000,1.000000,0,1,,,
2,FALSE,0,This video shows a woman eating a bat soup in...,https://www.poynter.org/?ifcn_misinformation=t...,0,5,5,5,1.000000,1.000000,0,1,,,
3,TRUE,1,Premier League looking at 'clinical passports'...,https://www.bbc.co.uk/sport/football/53605863,5,2,7,5,0.714286,0.714286,1,1,,,
4,TRUE,1,What to expect as GOP hammers out its coronavi...,https://www.politico.com/newsletters/weekly-ed...,5,1,6,5,0.833333,0.833333,1,1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7046,FALSE,0,Images of an alleged fire at New Yorkâ€™s Leb...,https://www.poynter.org/?ifcn_misinformation=i...,0,1,1,1,1.000000,1.000000,0,1,,,
7047,TRUE,1,Pfizer sales take a hit during pandemic,https://apnews.com/66e1cede592436a42bdba32bfdf...,1,1,2,1,0.500000,0.500000,1,1,,,
7048,TRUE,1,COVID-19 vaccine's human trials show promise -...,https://www.reuters.com/video/watch/idP4Cb?now...,1,0,1,1,1.000000,1.000000,1,1,,,
7049,TRUE,1,Coronavirus in Scotland: Economic recovery pla...,https://www.bbc.co.uk/news/uk-scotland-scotlan...,0,1,1,1,1.000000,0.000000,0,0,,,


In [10]:
crowdsourcing['softLabel']=crowdsourcing['numTrueVotes']/crowdsourcing['totalVotes']
df_crowdsourcing = pd.DataFrame({'title' : crowdsourcing['title'], 'label':crowdsourcing['binaryInitialLabel'], 'voterSoftLabel':crowdsourcing['softLabel'],'voterLabel': crowdsourcing['votersLabel']})
df_crowdsourcing=df_crowdsourcing.dropna()
df_crowdsourcing = df_crowdsourcing.reset_index(drop=True)
df_crowdsourcing.sample(2)

Unnamed: 0,title,label,voterSoftLabel,voterLabel
776,"Despite pandemic, Merck increases 2Q net, prof...",1,1.0,1
3709,Infographic claiming that the coronavirus pan...,0,1.0,1


## Question 1 which model best **predicted** the crowdsourced label?

In [11]:
import os
path_to_prediction_folder = '../Model_Predictions/'
prediction_csv_files = os.listdir(path_to_prediction_folder)
prediction_csv_files[2]

'finetune_bert_CoAID_GossipCop_Prediction.csv'

In [12]:
attampt = prediction_csv_files[2]
attampt_file = pd.read_csv(path_to_prediction_folder+attampt)
attampt_file.sample(2)

Unnamed: 0.1,Unnamed: 0,softLabel,label,prediction,accuracy,f1,precision,recall
4068,4068,0.000464,0,0,0.804823,0.72892,0.973684,0.582494
2708,2708,0.000481,0,0,0.804823,0.72892,0.973684,0.582494


List all model prediction results

In [30]:
#drop string not end with .csv
prediction_csv_files = [x for x in prediction_csv_files if x.endswith('.csv')]
prediction_csv_files

['bert-base_Prediction.csv',
 'XLNet_on_Gossip_Prediction.csv',
 'finetune_bert_CoAID_GossipCop_Prediction.csv',
 'roberta-fake-news_CoAID_PolitiFact_Prediction.csv',
 'finetune_bert_CoAID_Prediction.csv',
 'XLNet_on_CoAID_FNN_Prediction.csv',
 'Text_CNN_Politi_predict.csv',
 'finetune_bert_PolitiFact_Prediction.csv',
 'Fake-News-Bert-Detect_FNN_Prediction.csv',
 'XLNet_on_CoAID_Gossip_Prediction.csv',
 'roberta-fake-news_FNN_Prediction.csv',
 'roberta-fake-news_PolitiFact_Prediction.csv',
 'Text_CNN_Gossip_predict.csv',
 'roberta-fake-news_CoAID_GossipCop_Prediction.csv',
 'Fake-News-Bert-Detect_GossipCop_Prediction.csv',
 'Fake-News-Bert-Detect_CoAID&GossipCop_Prediction.csv',
 'XLNet-base_Prediction.csv',
 'Text_CNN_CoAID_Gossip_predict.csv',
 'XLNet_on_CoAID_Politi_Prediction.csv',
 'XLNet_on_CoAID_Prediction.csv',
 'roberta-fake-news_GossipCop_Prediction.csv',
 'finetune_bert_CoAID_FNN_Prediction.csv',
 'Fake-News-Bert-Detect_Prediction.csv',
 'roberta-fake-news_Prediction.csv',
 

In [31]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score


performance_dict = {
                    'model_name': [],
                    'accuracy': [],
                    'f1': [],
                    'precision': [],
                    'recall': []
                  }
path = path_to_prediction_folder

for attampt in prediction_csv_files:
  predictions = pd.read_csv(path+attampt)['prediction']
  performance_dict['model_name'].append(attampt)

  df_prediction=pd.read_csv(path+attampt)
  df_prediction = pd.concat([df_crowdsourcing, df_prediction], axis=1)
  df_prediction = df_prediction.dropna(how='any')

  performance_dict['accuracy'].append(accuracy_score(df_prediction['voterLabel'], df_prediction['prediction']))
  performance_dict['f1'].append( f1_score(df_prediction['voterLabel'], df_prediction['prediction']))
  performance_dict['precision'].append(precision_score(df_prediction['voterLabel'], df_prediction['prediction']))
  performance_dict['recall'].append(recall_score(df_prediction['voterLabel'], df_prediction['prediction']))
 

  _warn_prf(average, modifier, msg_start, len(result))


In [32]:
pd.DataFrame(performance_dict).sort_values(by='accuracy', ascending=False)

Unnamed: 0,model_name,accuracy,f1,precision,recall
37,Fake-News-Bert-Detect_CoAID_Prediction.csv,0.695815,0.705587,0.856562,0.599858
19,XLNet_on_CoAID_Prediction.csv,0.682107,0.692533,0.839878,0.589171
36,roberta-fake-news_CoAID_Prediction.csv,0.681241,0.680272,0.871016,0.558062
4,finetune_bert_CoAID_Prediction.csv,0.660895,0.654005,0.860519,0.527428
3,roberta-fake-news_CoAID_PolitiFact_Prediction.csv,0.65974,0.648689,0.870452,0.516979
35,Fake-News-Bert-Detect_CoAID&FNN_Prediction.csv,0.65267,0.639832,0.864887,0.507718
24,Fake-News-Bert-Detect_CoAID&PolitiFact_Predict...,0.652381,0.636377,0.873239,0.500594
27,Text_CNN_CoAID_predict.csv,0.642713,0.638223,0.829472,0.518642
13,roberta-fake-news_CoAID_GossipCop_Prediction.csv,0.641126,0.618266,0.874132,0.478271
9,XLNet_on_CoAID_Gossip_Prediction.csv,0.638672,0.625822,0.844015,0.497269


## which model performed best when **blended** with the crowdsourced label?



In [34]:
from numpy.lib.function_base import average
alphas = [0.1,0.3,0.5,0.7,0.9]
for alpha in alphas:

  performance_dict = {
                    'model_name': [],
                    'blended_acc': [],
                    'model_acc':[],
                    'improvement': []
                  }
  for attampt in prediction_csv_files:
    df_prediction=pd.read_csv(path+attampt)
    df_prediction = pd.concat([df_crowdsourcing, df_prediction], axis=1)
    df_prediction = df_prediction.dropna(how='any')
    predictions = df_prediction['prediction']
    performance_dict['model_name'].append(attampt)
    attampt_file = df_prediction
    blended_acc = accuracy_score(df_crowdsourcing['label'], round(df_crowdsourcing['voterSoftLabel']*alpha +attampt_file['softLabel']*(1-alpha)))
    performance_dict['blended_acc'].append(blended_acc)
    performance_dict['model_acc'].append(attampt_file['accuracy'][0])
    performance_dict['improvement'].append(blended_acc - attampt_file['accuracy'][0])
  print('- crowdsourcing soft label * ', alpha, ' + model predict soft label * ', 1 - alpha)
  print('- average improvement: ', average(performance_dict['improvement']))
  print('- max_improvement: ', max(performance_dict['improvement']))
  max_index = performance_dict['improvement'].index(max(performance_dict['improvement']))
  print('\t at model: ', performance_dict['model_name'][max_index], ',  the previous acc is: ', performance_dict['model_acc'][max_index], ' curr: ', performance_dict['blended_acc'][max_index])
  print('- best performance: ', max(performance_dict['blended_acc']) )
  best_performance_index = performance_dict['blended_acc'].index(max(performance_dict['blended_acc']))
  print('\t at model: ', performance_dict['model_name'][best_performance_index], ',  the previous acc is: ', performance_dict['model_acc'][best_performance_index])
  print('##############################')


- crowdsourcing soft label *  0.1  + model predict soft label *  0.9
- average improvement:  0.06921485134251092
- max_improvement:  0.5645261121856868
	 at model:  Text_CNN_Gossip_predict.csv ,  the previous acc is:  0.4263829787234042  curr:  0.990909090909091
- best performance:  0.9911976911976912
	 at model:  Text_CNN_CoAID_predict.csv ,  the previous acc is:  0.8739007092198582
##############################
- crowdsourcing soft label *  0.3  + model predict soft label *  0.7
- average improvement:  0.08184666397432357
- max_improvement:  0.5548580025175771
	 at model:  Text_CNN_Gossip_predict.csv ,  the previous acc is:  0.4263829787234042  curr:  0.9812409812409812
- best performance:  0.9836940836940837
	 at model:  Text_CNN_CoAID_predict.csv ,  the previous acc is:  0.8739007092198582
##############################
- crowdsourcing soft label *  0.5  + model predict soft label *  0.5
- average improvement:  0.08418136630902587
- max_improvement:  0.46265021030978487
	 at model

In [35]:
# best improvement after blending
pd.DataFrame(performance_dict).sort_values(by='improvement', ascending=False)

Unnamed: 0,model_name,blended_acc,model_acc,improvement
14,Fake-News-Bert-Detect_GossipCop_Prediction.csv,0.716739,0.301986,0.414753
30,finetune_bert_GossipCop_Prediction.csv,0.718615,0.361702,0.356913
1,XLNet_on_Gossip_Prediction.csv,0.719481,0.416454,0.303027
12,Text_CNN_Gossip_predict.csv,0.728427,0.426383,0.302044
28,Text_CNN_FNN_predict.csv,0.728427,0.444681,0.283746
20,roberta-fake-news_GossipCop_Prediction.csv,0.717027,0.43617,0.280857
8,Fake-News-Bert-Detect_FNN_Prediction.csv,0.71746,0.450496,0.266964
16,XLNet-base_Prediction.csv,0.71746,0.450496,0.266964
6,Text_CNN_Politi_predict.csv,0.728427,0.499291,0.229136
25,finetune_bert_FNN_Prediction.csv,0.721068,0.512908,0.20816


In [36]:
# best accuracy after blending
pd.DataFrame(performance_dict).sort_values(by='blended_acc', ascending=False)

Unnamed: 0,model_name,blended_acc,model_acc,improvement
38,Text_CNN_CoAID_FNN_predict.csv,0.728427,0.721277,0.007151
12,Text_CNN_Gossip_predict.csv,0.728427,0.426383,0.302044
31,Text_CNN_CoAID_Politi_predict.csv,0.728427,0.79844,-0.070013
28,Text_CNN_FNN_predict.csv,0.728427,0.444681,0.283746
27,Text_CNN_CoAID_predict.csv,0.728427,0.873901,-0.145474
6,Text_CNN_Politi_predict.csv,0.728427,0.499291,0.229136
17,Text_CNN_CoAID_Gossip_predict.csv,0.728427,0.741135,-0.012708
37,Fake-News-Bert-Detect_CoAID_Prediction.csv,0.727706,0.965532,-0.237826
19,XLNet_on_CoAID_Prediction.csv,0.727273,0.943404,-0.216132
36,roberta-fake-news_CoAID_Prediction.csv,0.726984,0.932908,-0.205924


## which model had the highest performance when only taking the agreeing label between the model and the crowdsourced output, and computing accuracy on this subsection? 

In [37]:
def agreed_accuracy(true_label_ls, voter_label_ls, pred_label_ls):
  assert len(true_label_ls) == len(voter_label_ls)
  assert len(voter_label_ls) == len(pred_label_ls)

  total = 0
  corr = 0
  for i in range(len(true_label_ls)):
    if voter_label_ls[i] == pred_label_ls[i]:
      total +=1
      if true_label_ls[i] == pred_label_ls[i]:
        corr +=1
  return corr/total, total/len(true_label_ls)


In [None]:
agreed_accuracy(df_crowdsourcing['label'], df_crowdsourcing['voterLabel'], attampt_file['prediction'])

(0.8582658959537572, 0.6134751773049646)

In [38]:
performance_dict = {
                    'model_name': [],
                    'accuracy': [],
                    'label_agreed':[]
                  }
                  
for attampt in prediction_csv_files:
  df_prediction=pd.read_csv(path+attampt)
  df_prediction = pd.concat([df_crowdsourcing, df_prediction], axis=1)
  df_prediction = df_prediction.dropna(how='any')
  attampt_file = df_prediction
  acc, label_agreed = agreed_accuracy(df_crowdsourcing['label'], df_crowdsourcing['voterLabel'], attampt_file['prediction'])
  predictions = pd.read_csv(path+attampt)['prediction']
  performance_dict['model_name'].append(attampt)
  performance_dict['accuracy'].append(acc)
  performance_dict['label_agreed'].append(label_agreed)

In [39]:
pd.DataFrame(performance_dict).sort_values(by='accuracy', ascending=False)

Unnamed: 0,model_name,accuracy,label_agreed
37,Fake-News-Bert-Detect_CoAID_Prediction.csv,0.985898,0.695815
19,XLNet_on_CoAID_Prediction.csv,0.979691,0.682107
4,finetune_bert_CoAID_Prediction.csv,0.973144,0.660895
36,roberta-fake-news_CoAID_Prediction.csv,0.972675,0.681241
3,roberta-fake-news_CoAID_PolitiFact_Prediction.csv,0.962598,0.65974
24,Fake-News-Bert-Detect_CoAID&PolitiFact_Predict...,0.95908,0.652381
35,Fake-News-Bert-Detect_CoAID&FNN_Prediction.csv,0.958877,0.65267
27,Text_CNN_CoAID_predict.csv,0.956219,0.642713
15,Fake-News-Bert-Detect_CoAID&GossipCop_Predicti...,0.956136,0.634921
13,roberta-fake-news_CoAID_GossipCop_Prediction.csv,0.95251,0.641126
