In [1]:
import pandas as pd
from text_mining.utils import annotate_texts, analyze_with_vader, krippendorff_analysis, train_classifier, preprocess_tweets, analyze_with_transformer
from text_mining.data import load_excel_annotations, load_sentiment_dict

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to /home/krasky/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/krasky/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/krasky/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
df_train = load_excel_annotations("../../../../data/train.xlsx")
df_train["train"] = True
df_test = load_excel_annotations("../../../../data/test.xlsx")
df_test["train"] = False
df = pd.concat([df_train, df_test])
print(f"train len = {len(df_train)}, test len = {len(df_test)}")
df = df.dropna()
df.info()

train len = 5000, test len = 1000
<class 'pandas.core.frame.DataFrame'>
Index: 189 entries, 0 to 80
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      189 non-null    int64  
 1   CODE    189 non-null    float64
 2   TEXT    189 non-null    object 
 3   coder   189 non-null    int64  
 4   train   189 non-null    bool   
dtypes: bool(1), float64(1), int64(2), object(1)
memory usage: 7.6+ KB


## 2.1  **Own Dictionary**
   - Use a custom dictionary for tweet annotation.

In [3]:
annotations = annotate_texts(
    df["TEXT"].tolist(),
    sentiment_dict_path="../../../../data/COPSSentimentDict.csv"
    )
df["dict_sentiment"] = annotations
df["dict_sentiment_rounded"] = [round(annotation) for annotation in annotations]
# print(df["dict_sentiment_rounded"].unique())
df.describe()

Unnamed: 0,ID,CODE,coder,dict_sentiment,dict_sentiment_rounded
count,189.0,189.0,189.0,189.0,189.0
mean,6.737459e+17,0.248677,1.0,0.112875,0.116402
std,1797108000000000.0,0.755519,0.0,0.624771,0.607748
min,6.721652e+17,-1.0,1.0,-1.0,-1.0
25%,6.721886e+17,0.0,1.0,0.0,0.0
50%,6.722204e+17,0.0,1.0,0.0,0.0
75%,6.758148e+17,1.0,1.0,0.5,0.0
max,6.758268e+17,1.0,1.0,1.0,1.0


In [4]:
krippendorff_analysis(
    df[["CODE", "dict_sentiment"]].to_numpy(),
    method_names=["human", "our dict"]
)
print("\n -- ROUNDED")
krippendorff_analysis(
    df[["CODE", "dict_sentiment_rounded"]].to_numpy(),
    method_names=["human", "our dict rounded"]
)

Krippendorff's Alpha: 0.1194
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'our dict': 31.75% agreement

our dict comparison with other methods:
  'our dict' vs. 'human': 31.75% agreement

 -- ROUNDED
Krippendorff's Alpha: 0.0907
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'our dict rounded': 38.62% agreement

our dict rounded comparison with other methods:
  'our dict rounded' vs. 'human': 38.62% agreement


## **2.2: VADER**
   - Employ VADER for sentiment analysis.


In [5]:
v_annotations = analyze_with_vader(df["TEXT"].tolist())
df["v_sentiment"] = v_annotations
df["v_sentiment_rounded"] = [round(a) for a in v_annotations]
df[["v_sentiment", "v_sentiment_rounded", "CODE"]].sample(10).head()

Unnamed: 0,v_sentiment,v_sentiment_rounded,CODE
47,0.0772,0,1.0
53,0.0,0,-1.0
86,0.0,0,0.0
77,-0.4404,0,-1.0
31,0.0,0,1.0


In [6]:
krippendorff_analysis(
    df[["CODE", "v_sentiment"]].to_numpy(),
    method_names=["human", "vader"]
)
print("\n---ROUNDED---")
krippendorff_analysis(
    df[["CODE", "v_sentiment_rounded"]].to_numpy(),
    method_names=["human", "vader rounded"]
)

Krippendorff's Alpha: 0.3590
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'vader': 20.63% agreement

vader comparison with other methods:
  'vader' vs. 'human': 20.63% agreement

---ROUNDED---
Krippendorff's Alpha: 0.3303
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'vader rounded': 53.44% agreement

vader rounded comparison with other methods:
  'vader rounded' vs. 'human': 53.44% agreement


## 2.3: Machine Learning
   - Train a classifier (**Naive Bayes**) using bag-of-words features.

In [7]:
classifer, vectorizer = train_classifier(df["TEXT"].tolist(), labels=df["CODE"])
ml_annotations = classifer.predict(vectorizer.transform(preprocess_tweets(df["TEXT"])))
df["ml_sentiment"] = ml_annotations
df[["CODE", "ml_sentiment"]].sample(10).head()

Classification Rep:
              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00         8
         0.0       0.44      0.70      0.54        10
         1.0       0.77      0.85      0.81        20

    accuracy                           0.63        38
   macro avg       0.40      0.52      0.45        38
weighted avg       0.52      0.63      0.57        38



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,CODE,ml_sentiment
61,-1.0,-1.0
64,1.0,1.0
93,1.0,1.0
1,0.0,0.0
5,0.0,0.0


In [8]:
krippendorff_analysis(
    df[["CODE", "ml_sentiment"]].to_numpy(),
    method_names=["Human", "ML"]
) # NOTE duh the methods show strong agreement

Krippendorff's Alpha: 0.9084
The methods show strong agreement.

Method-wise Agreement Summary:

Human comparison with other methods:
  'Human' vs. 'ML': 92.59% agreement

ML comparison with other methods:
  'ML' vs. 'Human': 92.59% agreement


## 2.4: Transformer Model
   - Apply a fine-tuned transformer model for sentiment analysis.

In [9]:
llm_annotations = analyze_with_transformer(df["TEXT"].tolist())
df["llm_sentiment"] = llm_annotations
df["llm_sentiment_rounded"] = [round(s) for s in llm_annotations]
df[["CODE", "llm_sentiment", "llm_sentiment_rounded", "TEXT"]].sample(10)
# info: distilbert/distilbert-base-uncased-finetuned-sst-2-english

Device set to use cuda:0


Unnamed: 0,CODE,llm_sentiment,llm_sentiment_rounded,TEXT
67,0.0,-0.999503,-1,"mashable: ""All of us had to solve it together,..."
18,0.0,0.697897,1,Fri 4th Dec. Indonesian school children to sen...
20,-1.0,-0.984622,-1,The Climate Change Issue Global Leaders Aren‚Ä...
13,1.0,0.921732,1,President Obama &amp; Bill Gates announce hist...
47,0.0,-0.984413,-1,Editorial: @Stanford and others should join @U...
90,1.0,-0.991314,-1,RT @IOM_news: Reference to #migration in lates...
17,-1.0,-0.996951,-1,When you mean to write fossil fuel extraction ...
26,1.0,-0.964681,-1,@BishopMarc (Episcopal Dioc. California) to le...
41,1.0,-0.987544,-1,Germany and the Netherlands Tackle the Risk of...
42,1.0,-0.984917,-1,Hold thy tongue ¬†:¬†Times Argus Online Great ...


In [10]:
krippendorff_analysis(
    df[["CODE", "llm_sentiment"]],
    method_names=["human", "llm sentiment"]
)
print("\n---ROUNDED---")
krippendorff_analysis(
    df[["CODE", "llm_sentiment_rounded"]],
    method_names=["human", "llm rounded"]
)

Krippendorff's Alpha: 0.1666
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'llm sentiment': 0.00% agreement

llm sentiment comparison with other methods:
  'llm sentiment' vs. 'human': 0.00% agreement

---ROUNDED---
Krippendorff's Alpha: 0.0401
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'llm rounded': 34.39% agreement

llm rounded comparison with other methods:
  'llm rounded' vs. 'human': 34.39% agreement


In [None]:
# test kripp walk
temp = [
    [1, 1],  # Tweet 1: All methods agree on 'positive'
    [0, 0],  # Tweet 2: All methods agree on 'neutral'
    [-1, -1],  # Tweet 3: All methods agree on 'negative'
    [1, 1],  # Tweet 4: All methods agree on 'positive'
    [0, 0],  # Tweet 5: All methods agree on 'neutral'
]
krippendorff_analysis(temp, method_names=[f"{i}" for i in range(len(temp[0]))])