# Inter-dataset Sentiment Analysis for all datasets

In [9]:
import pandas as pd
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import glob

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Nicholas\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [10]:
DATA_DIR = '../data/all-translated'

In [11]:
def slice_dataframe_and_compute_sentiment(df, slice_cols, slice_vals):
    sliced_df = df.copy()
    for i in range(len(slice_cols)):
        sliced_df = sliced_df[sliced_df[slice_cols[i]] == slice_vals[i]]
    print(f'Found a total of {len(sliced_df)} examples')
    text = ' '.join(sliced_df['translated_text'])
    return SentimentIntensityAnalyzer().polarity_scores(text)

## Example Sentiment For Hateful Labels in English Basile dataset

In [12]:
# df = pd.read_csv(f'{DATA_DIR}/B_english_basile_processed.csv')
# df.head()

In [13]:
# print(slice_dataframe_and_compute_sentiment(df, ['hs'], [1]))

## Sentiment for all hateful datasets 

In [14]:
hateful_sentiment_dict = {}
for path in glob.glob(DATA_DIR + '/*.csv'):
    path_in_str = str(path)
    print(path_in_str)
    df = pd.read_csv(path_in_str)
    hateful_sentiment_dict[path_in_str] = slice_dataframe_and_compute_sentiment(df, ['hs'], [1])
    print(hateful_sentiment_dict[path_in_str])


../data/all-translated\T_B_arabic_mulki_processed.csv
Found a total of 468 examples
{'neg': 0.067, 'neu': 0.841, 'pos': 0.093, 'compound': 0.9992}
../data/all-translated\T_B_danish_processed.csv
Found a total of 425 examples
{'neg': 0.193, 'neu': 0.652, 'pos': 0.155, 'compound': -0.9999}
../data/all-translated\T_B_french_ousidhoum_processed.csv
Found a total of 207 examples
{'neg': 0.156, 'neu': 0.76, 'pos': 0.084, 'compound': -0.9995}
../data/all-translated\T_B_german_bretschneider_processed.csv
Found a total of 1331 examples
{'neg': 0.164, 'neu': 0.702, 'pos': 0.134, 'compound': -1.0}
../data/all-translated\T_B_german_ross_processed.csv
Found a total of 105 examples
{'neg': 0.191, 'neu': 0.708, 'pos': 0.101, 'compound': -0.9994}
../data/all-translated\T_B_indonesian_alfina_processed.csv
Found a total of 260 examples
{'neg': 0.14, 'neu': 0.735, 'pos': 0.125, 'compound': -0.9978}
../data/all-translated\T_B_italian_manuel_processed.csv
Found a total of 843 examples
{'neg': 0.188, 'neu':

## Sentiment for all normal labels in hateful datasets

In [15]:
non_hateful_sentiment_dict = {}
for path in glob.glob(DATA_DIR + '/*.csv'):
    path_in_str = str(path)
    print(path_in_str)
    df = pd.read_csv(path_in_str)
    non_hateful_sentiment_dict[path_in_str] = slice_dataframe_and_compute_sentiment(df, ['hs'], [0])
    print(non_hateful_sentiment_dict[path_in_str])

../data/all-translated\T_B_arabic_mulki_processed.csv
Found a total of 3649 examples
{'neg': 0.06, 'neu': 0.808, 'pos': 0.133, 'compound': 1.0}
../data/all-translated\T_B_danish_processed.csv
Found a total of 2850 examples
{'neg': 0.1, 'neu': 0.714, 'pos': 0.185, 'compound': 1.0}
../data/all-translated\T_B_french_ousidhoum_processed.csv
Found a total of 821 examples
{'neg': 0.143, 'neu': 0.721, 'pos': 0.137, 'compound': -0.999}
../data/all-translated\T_B_german_bretschneider_processed.csv
Found a total of 5141 examples
{'neg': 0.128, 'neu': 0.702, 'pos': 0.169, 'compound': 1.0}
../data/all-translated\T_B_german_ross_processed.csv
Found a total of 364 examples
{'neg': 0.148, 'neu': 0.737, 'pos': 0.115, 'compound': -0.9996}
../data/all-translated\T_B_indonesian_alfina_processed.csv
Found a total of 453 examples
{'neg': 0.059, 'neu': 0.707, 'pos': 0.234, 'compound': 1.0}
../data/all-translated\T_B_italian_manuel_processed.csv
Found a total of 4436 examples
{'neg': 0.169, 'neu': 0.751, 'po

In [16]:
print("Comparing sentiment for hate speech v non hatespeech")
for dataset in hateful_sentiment_dict:
    if dataset in non_hateful_sentiment_dict:
        print("For {}, \n \t HSvNormal: (Neg: {} v {}, Neu: {} v {}, Pos: {} v {})".format(dataset,
         hateful_sentiment_dict[dataset]['neg'], non_hateful_sentiment_dict[dataset]['neg'], 
         hateful_sentiment_dict[dataset]['neu'], non_hateful_sentiment_dict[dataset]['neu'], 
         hateful_sentiment_dict[dataset]['pos'], non_hateful_sentiment_dict[dataset]['pos'],))

Comparing sentiment for hate speech v non hatespeech
For ../data/all-translated\T_B_arabic_mulki_processed.csv, 
 	 HSvNormal: (Neg: 0.067 v 0.06, Neu: 0.841 v 0.808, Pos: 0.093 v 0.133)
For ../data/all-translated\T_B_danish_processed.csv, 
 	 HSvNormal: (Neg: 0.193 v 0.1, Neu: 0.652 v 0.714, Pos: 0.155 v 0.185)
For ../data/all-translated\T_B_french_ousidhoum_processed.csv, 
 	 HSvNormal: (Neg: 0.156 v 0.143, Neu: 0.76 v 0.721, Pos: 0.084 v 0.137)
For ../data/all-translated\T_B_german_bretschneider_processed.csv, 
 	 HSvNormal: (Neg: 0.164 v 0.128, Neu: 0.702 v 0.702, Pos: 0.134 v 0.169)
For ../data/all-translated\T_B_german_ross_processed.csv, 
 	 HSvNormal: (Neg: 0.191 v 0.148, Neu: 0.708 v 0.737, Pos: 0.101 v 0.115)
For ../data/all-translated\T_B_indonesian_alfina_processed.csv, 
 	 HSvNormal: (Neg: 0.14 v 0.059, Neu: 0.735 v 0.707, Pos: 0.125 v 0.234)
For ../data/all-translated\T_B_italian_manuel_processed.csv, 
 	 HSvNormal: (Neg: 0.188 v 0.169, Neu: 0.733 v 0.751, Pos: 0.08 v 0.0