In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import pandas as pd
import numpy as np
gpt_df = pd.read_csv("/kaggle/input/gpt-dataset/gpt_dataset.csv")

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
PAT = user_secrets.get_secret("pat")


GITHUB_USERNAME = "vladkisin"
REPO_NAME = "workmind-dev"
REPO_URL = f"https://{GITHUB_USERNAME}:{PAT}@github.com/{GITHUB_USERNAME}/{REPO_NAME}.git"
os.system(f"git clone {REPO_URL}")
os.chdir("/kaggle/working/workmind-dev")

In [None]:
! pip install -U -r requirements.txt --quiet

In [None]:
import wandb
import torch
wandb.login(key=user_secrets.get_secret("wandb_pat"))

from workmind.experiment.wandb.sentiment import SentimentExperiment
from workmind.analyzers.constants import BaseSentiment
from workmind.analyzers.sentiment.llm import LLMSentimentAnalyzer

In [None]:
gpt_emails = gpt_df["text"].tolist()
y_gpt_email = gpt_df["sentiment_label"].tolist()
user_ids = gpt_df["user_id"].tolist() 
PROJECT_NAME = "workmind-email-data"



for model_name in [
    "tiiuae/Falcon3-10B-Instruct",
    "tiiuae/Falcon3-7B-Instruct",
    "Qwen/Qwen2.5-14B-Instruct-1M",
    "Qwen/Qwen2.5-7B-Instruct",
    "microsoft/phi-4"
]:

    llm_analyzer = LLMSentimentAnalyzer(
            model_name,
            class_labels=[BaseSentiment.NEGATIVE, BaseSentiment.POSITIVE, BaseSentiment.NEUTRAL],
            batch_size=16,
            bits=4,
            max_input_tokens=2048)
    
    with SentimentExperiment(
        llm_analyzer,
        f"{model_name} 4-bit" + " on ChatGPT-o1 Generated Data",
        y_gpt_email,
        project_name=PROJECT_NAME
    ) as experiment:
        experiment.evaluate(gpt_emails, user_ids)
    del llm_analyzer
    torch.cuda.empty_cache()