In [2]:
# Install required packages
# !pip install transformers torch supabase python-dotenv

# Import libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch
from supabase import create_client, Client
import os
from dotenv import load_dotenv
import pandas as pd

# Load environment variables
load_dotenv("../.env")

# Initialize Supabase client
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

print("✅ Setup complete!")

✅ Setup complete!


In [3]:
# Load sentiment analysis model
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

print(f"Loading model: {model_name}...")
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model=model_name,
    tokenizer=model_name,
    return_all_scores=True,
    device=0 if torch.cuda.is_available() else -1  # Use GPU if available
)

print("✅ Model loaded successfully!")
print(f"Device: {'GPU' if torch.cuda.is_available() else 'CPU'}")

Loading model: cardiffnlp/twitter-roberta-base-sentiment-latest...


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


vocab.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


✅ Model loaded successfully!
Device: CPU




In [4]:
# Query 10 articles from news_cleaned
query = supabase.table('news_cleaned') \
    .select('id, title, content_full, category, pub_date, source') \
    .not_.is_('content_full', 'null') \
    .order('pub_date', desc=True) \
    .limit(10) \
    .execute()

articles = query.data
print(f"✅ Fetched {len(articles)} articles")

# Preview first article
print("\n--- Sample Article ---")
print(f"Title: {articles[0]['title']}")
print(f"Content length: {len(articles[0]['content_full'])} chars")
print(f"Category: {articles[0]['category']}")

✅ Fetched 10 articles

--- Sample Article ---
Title: Coimbatore Junction-Kannur Express to start from Podanur Junction on July 8 and 10
Content length: 946 chars
Category: Coimbatore


In [5]:
# Run sentiment analysis on all articles
results = []

for i, article in enumerate(articles, 1):
    print(f"Processing {i}/{len(articles)}: {article['title'][:50]}...")
    
    # Truncate content if too long (model max: 512 tokens)
    content = article['content_full'][:2000]  
    
    # Get predictions
    prediction = sentiment_pipeline(content)[0]
    
    # Convert to dictionary for easier access
    scores = {pred['label'].lower(): pred['score'] for pred in prediction}
    
    # Determine top label
    top_label = max(scores, key=scores.get)
    
    results.append({
        'article_id': article['id'],
        'title': article['title'],
        'sentiment_label': top_label.upper(),
        'sentiment_scores': scores,
        'confidence': scores[top_label],
        'content_length': len(article['content_full']),
        'category': article['category'],
        'source': article['source']
    })

print(f"\n✅ Processed {len(results)} articles successfully!")

Processing 1/10: Coimbatore Junction-Kannur Express to start from P...
Processing 2/10: Train users’ assn. seeks revival of Tambaram servi...
Processing 3/10: Private hospital helps 20 poor students join the c...
Processing 4/10: Sivaganga custodial death: Special team followed i...
Processing 5/10: 99 firemen recruits pass out in Madurai...
Processing 6/10: T.N. police arrest two terror suspects who remaine...
Processing 7/10: Seized stray cattle should be sent to goshalas to ...
Processing 8/10: Six months on, long jump track work at Coimbatore ...
Processing 9/10: Sivaganga custodial torture: Palaniswami finds fau...
Processing 10/10: Thousands of devotees throng Chidambaram to take p...

✅ Processed 10 articles successfully!


In [6]:
# Display results
df = pd.DataFrame(results)
print(df[['title', 'sentiment_label', 'confidence', 'category']])

# Summary statistics
print("\n--- Sentiment Distribution ---")
print(df['sentiment_label'].value_counts())

print("\n--- Average Confidence by Sentiment ---")
print(df.groupby('sentiment_label')['confidence'].mean())

# Sample output structure for database
print("\n--- Sample Output Structure ---")
print(results[0])

                                               title sentiment_label  \
0  Coimbatore Junction-Kannur Express to start fr...         NEUTRAL   
1  Train users’ assn. seeks revival of Tambaram s...         NEUTRAL   
2  Private hospital helps 20 poor students join t...        POSITIVE   
3  Sivaganga custodial death: Special team follow...         NEUTRAL   
4            99 firemen recruits pass out in Madurai         NEUTRAL   
5  T.N. police arrest two terror suspects who rem...         NEUTRAL   
6  Seized stray cattle should be sent to goshalas...         NEUTRAL   
7  Six months on, long jump track work at Coimbat...        NEGATIVE   
8  Sivaganga custodial torture: Palaniswami finds...        NEGATIVE   
9  Thousands of devotees throng Chidambaram to ta...         NEUTRAL   

   confidence    category  
0    0.818252  Coimbatore  
1    0.851815  Coimbatore  
2    0.505324     Madurai  
3    0.695875  Tamil Nadu  
4    0.881934     Madurai  
5    0.657402  Tamil Nadu  
6    0.7550