In [3]:
import pandas as pd

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline

from sklearn.metrics import classification_report

### Get Data

In [2]:
df = pd.read_csv('../data/data.csv')
df = df[:400]

In [5]:
print(df['Sentiment'].unique())

['positive' 'negative' 'neutral']


### Predicting with Vader

In [6]:
def get_vader_prediction(analyzer, sentence):
    scores = analyzer.polarity_scores(sentence)
    max_value = max(scores['neg'], scores['pos'], scores['neu'])
    pred = [key for key, value in scores.items() if value == max_value][0]
    
    pred_map = {'neu': 'neutral', 'pos': 'positive', 'neg': 'negative'}
    
    return pred_map.get(pred)

vader_analyzer = SentimentIntensityAnalyzer()
print(get_vader_prediction(vader_analyzer, "Stock market is down 50 points"))

neutral


In [8]:
df['vader_pred'] = df['Sentence'].apply(lambda x: get_vader_prediction(vader_analyzer, x))
print(classification_report(df['vader_pred'], df['Sentiment']))

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         0
     neutral       1.00      0.54      0.70       399
    positive       0.01      1.00      0.02         1

    accuracy                           0.54       400
   macro avg       0.34      0.51      0.24       400
weighted avg       1.00      0.54      0.70       400



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Predicting with transformers pipeline

In [9]:
model = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)
2022-08-07 11:01:25.792396: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-08-07 11:01:25.792419: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-08-07 11:01:25.792436: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (yukikongju-Swift-SF314-511): /proc/driver/nvidia/version does not exist
2022-08-07 11:01:25.798420: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other

In [16]:
pd.DataFrame(model(["the stock market is down", "i like apples", "i am walking"]))

Unnamed: 0,label,score
0,NEGATIVE,0.99977
1,POSITIVE,0.997794
2,POSITIVE,0.995788


In [18]:
# remark: pipeline only predicts negative/positive, not neutral
df_emotions = df[df['Sentiment'] != 'neutral']
emotions_pred = model(df_emotions['Sentence'].tolist())
df_emotions['trans_pred'] = pd.DataFrame(emotions_pred)['label'].apply(lambda x: x.lower()).tolist()

In [27]:
print(classification_report(df_emotions['Sentiment'], df_emotions['trans_pred']))

              precision    recall  f1-score   support

    negative       0.50      0.94      0.66        64
    positive       0.94      0.51      0.66       120

    accuracy                           0.66       184
   macro avg       0.72      0.72      0.66       184
weighted avg       0.79      0.66      0.66       184

