In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import pandas as pd
import numpy as np
import torch

from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer

In [6]:
# Load model and tokenizer
distil = DistilBertForSequenceClassification.from_pretrained("/content/drive/My Drive/Research UG/codes/fine_tuned_model")
tokenizer = DistilBertTokenizer.from_pretrained("/content/drive/My Drive/Research UG/codes/fine_tuned_model")

In [54]:
news = pd.read_csv('/content/drive/My Drive/Research UG/codes/DataSet/impact_news.csv')

In [23]:
# Set model to evaluation mode
distil.eval()

def predict(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)

    # Make prediction
    with torch.no_grad():
        outputs = distil(**inputs)

    # Get probabilities using softmax
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)

    # Get predicted class
    predicted_class_id = torch.argmax(probabilities, dim=1).item()

    return predicted_class_id, probabilities.numpy()

# Example usage
text = "Government-subsidised vegetable programs can ease pressure on lower-income households and increase access to fresh produce"
predicted_class, probabilities = predict(text)
print(f"Predicted class: {predicted_class}")
print(f"Probabilities: {probabilities}")

Predicted class: 1
Probabilities: [[0.04407128 0.940251   0.01567782]]


In [24]:
for index, row in news.iterrows():
    text = row['Body']
    predicted_class, probabilities = predict(text)
    news.at[index, 'predicted_class'] = predicted_class
    # news.at[index, 'probability_neutral'] = probabilities[0][0]
    # news.at[index, 'probability_positive'] = probabilities[0][1]
    # news.at[index, 'probability_negative'] = probabilities[0][2]

In [26]:
news['predicted_class'].value_counts()

Unnamed: 0_level_0,count
predicted_class,Unnamed: 1_level_1
0.0,5215
2.0,3582
1.0,2496


In [32]:
news['class'] = news['predicted_class']+1
news['class'].value_counts()

Unnamed: 0_level_0,count
class,Unnamed: 1_level_1
1.0,5215
3.0,3582
2.0,2496


In [57]:
news.head(10)

Unnamed: 0,Title,Body,Date,combined_text,matched_keywords,class
0,Weatherman warns of more rains,The Meteorology Department has warned of more ...,5/18/2015,Weatherman warns of more rains The Meteorology...,['flood'],3.0
1,Research scientist raises kidney disease alarm...,Farmers in the Jaffna Peninsula were beginning...,5/18/2015,Research scientist raises kidney disease alarm...,['agrochemicals'],3.0
2,Fish rainin Nallur,People from Nallur were surprised to experienc...,5/18/2015,Fish rainin Nallur People from Nallur were sur...,['heavy rain'],1.0
3,Parliament adjourned in uproar over Central Ba...,Parliament was adjourned Tuesday in the wake o...,5/19/2015,Parliament adjourned in uproar over Central Ba...,['auction'],1.0
4,Sri Lanka IOC unit in losses after state direc...,"Lanka IOC, a unit of Indian Oil Corporation lo...",5/19/2015,Sri Lanka IOC unit in losses after state direc...,['fuel price'],1.0
5,Vasu claims MS saying having no problem with M...,UPFA MP Vasudeva Nanayakkara today claimed tha...,5/20/2015,Vasu claims MS saying having no problem with M...,['narahenpita'],1.0
6,"Given current scenario, 7% GDP growth for 2015...",GDP growth is expected to be around 7% this ye...,5/22/2015,"Given current scenario, 7% GDP growth for 2015...",['inflation'],2.0
7,Sri Lanka policy rates unchanged amid exchange...,Sri Lanka's central bank said it will keep pol...,5/22/2015,Sri Lanka policy rates unchanged amid exchange...,['inflation'],2.0
8,No solution to flooding in Colombo!,While much has been said about the causes for ...,5/22/2015,No solution to flooding in Colombo! While much...,['flood'],3.0
9,'UNP boss at controversial Treasury bond auction',Chairman of UNP was at a controversial Treasur...,5/22/2015,'UNP boss at controversial Treasury bond aucti...,['auction'],1.0


In [61]:
# Specify number of clusters
num_class = 3
class_range = list(range(1, num_class + 1))

# Create a frequency table of clusters for each date
class_freq1 = pd.crosstab(news['Date'], news['class'])

# Ensure all cluster columns are present before sorting
for c in class_range:
    if c not in class_freq1.columns:
        class_freq1[c] = 0

class_news = class_freq1[class_range]



In [63]:
class_news.to_csv('class.csv')

In [64]:
class_news.shape

(3099, 3)

In [49]:
class_news.to_csv('class.csv')