In [1]:
import pandas as pd
from text_mining.utils import annotate_texts, analyze_with_vader, krippendorff_analysis, train_classifier, preprocess_tweets, analyze_with_transformer
from text_mining.data import load_excel_annotations, load_sentiment_dict

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to /home/krasky/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/krasky/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/krasky/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
df_train = load_excel_annotations("../../../../data/train.xlsx")
df_train["train"] = True
df_test = load_excel_annotations("../../../../data/test.xlsx")
df_test["train"] = False
df = pd.concat([df_train, df_test])
print(f"train len = {len(df_train)}, test len = {len(df_test)}")
df = df.dropna()
df.info()

train len = 15000, test len = 3000
<class 'pandas.core.frame.DataFrame'>
Index: 621 entries, 0 to 2108
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      621 non-null    int64  
 1   CODE    621 non-null    float64
 2   TEXT    621 non-null    object 
 3   coder   621 non-null    int64  
 4   train   621 non-null    bool   
dtypes: bool(1), float64(1), int64(2), object(1)
memory usage: 24.9+ KB


## 2.1  **Own Dictionary**
   - Use a custom dictionary for tweet annotation.

In [3]:
annotations = annotate_texts(
    df["TEXT"].tolist(),
    sentiment_dict_path="../../../../data/COPSSentimentDict.csv"
    )
df["dict_sentiment"] = annotations
df["dict_sentiment_rounded"] = [round(annotation) for annotation in annotations]
# print(df["dict_sentiment_rounded"].unique())
df.describe()

Unnamed: 0,ID,CODE,coder,dict_sentiment,dict_sentiment_rounded
count,621.0,621.0,621.0,621.0,621.0
mean,6.73359e+17,0.201288,2.127214,0.102254,0.099839
std,1983991000000000.0,0.770252,0.849051,0.636278,0.624513
min,6.711166e+17,-1.0,1.0,-1.0,-1.0
25%,6.721735e+17,0.0,1.0,0.0,0.0
50%,6.722123e+17,0.0,2.0,0.0,0.0
75%,6.758118e+17,1.0,3.0,0.5,0.0
max,6.758268e+17,1.0,3.0,1.0,1.0


In [4]:
krippendorff_analysis(
    df[["CODE", "dict_sentiment"]].to_numpy(),
    method_names=["human", "our dict"]
)
print("\n -- ROUNDED")
krippendorff_analysis(
    df[["CODE", "dict_sentiment_rounded"]].to_numpy(),
    method_names=["human", "our dict rounded"]
)

Krippendorff's Alpha: 0.1728
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'our dict': 38.49% agreement

our dict comparison with other methods:
  'our dict' vs. 'human': 38.49% agreement

 -- ROUNDED
Krippendorff's Alpha: 0.1475
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'our dict rounded': 43.96% agreement

our dict rounded comparison with other methods:
  'our dict rounded' vs. 'human': 43.96% agreement


## **2.2: VADER**
   - Employ VADER for sentiment analysis.


In [5]:
v_annotations = analyze_with_vader(df["TEXT"].tolist())
df["v_sentiment"] = v_annotations
df["v_sentiment_rounded"] = [round(a) for a in v_annotations]
df[["v_sentiment", "v_sentiment_rounded", "CODE"]].sample(10).head()

Unnamed: 0,v_sentiment,v_sentiment_rounded,CODE
36,0.0772,0,0.0
1025,0.1027,0,1.0
9989,0.3182,0,0.0
8,-0.25,0,-1.0
5037,0.0,0,1.0


In [6]:
krippendorff_analysis(
    df[["CODE", "v_sentiment"]].to_numpy(),
    method_names=["human", "vader"]
)
print("\n---ROUNDED---")
krippendorff_analysis(
    df[["CODE", "v_sentiment_rounded"]].to_numpy(),
    method_names=["human", "vader rounded"]
)

Krippendorff's Alpha: 0.3281
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'vader': 19.16% agreement

vader comparison with other methods:
  'vader' vs. 'human': 19.16% agreement

---ROUNDED---
Krippendorff's Alpha: 0.2937
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'vader rounded': 49.44% agreement

vader rounded comparison with other methods:
  'vader rounded' vs. 'human': 49.44% agreement


## 2.3: Machine Learning
   - Train a classifier (**Naive Bayes**) using bag-of-words features.

In [7]:
classifer, vectorizer = train_classifier(df["TEXT"].tolist(), labels=df["CODE"])
ml_annotations = classifer.predict(vectorizer.transform(preprocess_tweets(df["TEXT"])))
df["ml_sentiment"] = ml_annotations
df[["CODE", "ml_sentiment"]].sample(10).head()

Classification Rep:
              precision    recall  f1-score   support

        -1.0       0.44      0.35      0.39        20
         0.0       0.53      0.64      0.58        47
         1.0       0.67      0.60      0.64        58

    accuracy                           0.58       125
   macro avg       0.55      0.53      0.53       125
weighted avg       0.58      0.58      0.57       125



Unnamed: 0,CODE,ml_sentiment
36,0.0,0.0
1040,-1.0,-1.0
2023,1.0,-1.0
10015,1.0,0.0
5010,1.0,0.0


In [8]:
krippendorff_analysis(
    df[["CODE", "ml_sentiment"]].to_numpy(),
    method_names=["Human", "ML"]
) # NOTE The agreeement is only on the rows that were in the training data (overfit?)

Krippendorff's Alpha: 0.7405
The methods show moderate agreement.

Method-wise Agreement Summary:

Human comparison with other methods:
  'Human' vs. 'ML': 80.35% agreement

ML comparison with other methods:
  'ML' vs. 'Human': 80.35% agreement


## 2.4: Transformer Model
   - Apply a fine-tuned transformer model for sentiment analysis.

In [12]:
llm_annotations = analyze_with_transformer(df["TEXT"])
df["llm_sentiment"] = llm_annotations
df["llm_sentiment_rounded"] = [round(s) for s in llm_annotations]
df[["CODE", "llm_sentiment", "llm_sentiment_rounded", "TEXT"]].sample(10)
# info: distilbert/distilbert-base-uncased-finetuned-sst-2-english

Device set to use cuda:0


Unnamed: 0,CODE,llm_sentiment,llm_sentiment_rounded,TEXT
14898,0.0,-0.999235,-1,What target is Vietnam going to #COP21 with? h...
19,-1.0,-0.983257,-1,What are all the things climate change can't d...
9993,1.0,0.997141,1,"Dear @twitter, I really like your #COP21 #Clim..."
1007,1.0,-0.988685,-1,Our CEO Dr. Chad Nelsen contributed to The Huf...
10017,-1.0,-0.996951,-1,When you mean to write fossil fuel extraction ...
54,1.0,-0.834195,-1,#Paris climate deal: nearly 200 nations sign i...
2062,1.0,0.99916,1,"Just when you start to lose faith in humanity,..."
14985,0.0,-0.973398,-1,DipNote ¬∞ Five Things You Need To Know About ...
5062,1.0,-0.994509,-1,Thanks for posting Civil Society's #COP21 stat...
2025,1.0,-0.974537,-1,#COP21 Historic Paris climate pact reached ‚Äì...


In [None]:
krippendorff_analysis(
    df[["CODE", "llm_sentiment"]],
    method_names=["human", "llm sentiment"]
)
print("\n---ROUNDED---")
krippendorff_analysis(
    df[["CODE", "llm_sentiment_rounded"]],
    method_names=["human", "llm rounded"]
)

Krippendorff's Alpha: 0.2595
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'vader': 0.00% agreement

vader comparison with other methods:
  'vader' vs. 'human': 0.00% agreement

---ROUNDED---
Krippendorff's Alpha: 0.1433
The methods show poor agreement.

Method-wise Agreement Summary:

human comparison with other methods:
  'human' vs. 'vader rounded': 38.97% agreement

vader rounded comparison with other methods:
  'vader rounded' vs. 'human': 38.97% agreement


In [None]:
# test kripp walk
temp = [
    [1, 1],  # Tweet 1: All methods agree on 'positive'
    [0, 0],  # Tweet 2: All methods agree on 'neutral'
    [-1, -1],  # Tweet 3: All methods agree on 'negative'
    [1, 1],  # Tweet 4: All methods agree on 'positive'
    [0, 0],  # Tweet 5: All methods agree on 'neutral'
]
krippendorff_analysis(temp, method_names=[f"{i}" for i in range(len(temp[0]))])