<a href="https://colab.research.google.com/github/waqarmm/AI-Labs/blob/master/BART_REHAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install transformers[torch]

Collecting accelerate>=0.20.3 (from transformers[torch])
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.25.0


In [None]:
import torch
from transformers import BartTokenizer, BartForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import pandas as pd

# Load your dataset (assuming it's in a CSV file)
your_dataset = pd.read_csv('new_data_with_emotions.csv')

# Extract text data and sentiment labels
text_data = your_dataset['combined_translated'].tolist()
sentiment_labels = your_dataset['emotion'].tolist()

# Split the dataset into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    text_data,
    sentiment_labels,
    test_size=0.2,
    random_state=42
)

# Load BART tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

# Tokenize the text data
train_encodings = tokenizer(train_texts, padding=True, truncation=True, return_tensors="pt")
val_encodings = tokenizer(val_texts, padding=True, truncation=True, return_tensors="pt")

# Convert sentiment labels to numerical values
label_dict = {'Love': 0, 'Happy': 1, 'Neutral': 2, 'Sad': 3, 'Angry': 4}  # Map sentiment labels to numerical values
train_labels = [label_dict[label] for label in train_labels]
val_labels = [label_dict[label] for label in val_labels]

# Define datasets using PyTorch Dataset class
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = SentimentDataset(train_encodings, train_labels)
val_dataset = SentimentDataset(val_encodings, val_labels)

# Initialize BART model for sequence classification
model = BartForSequenceClassification.from_pretrained('facebook/bart-base', num_labels=len(label_dict))

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch"
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

# Start model training
trainer.train()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.out_proj.weight', 'classification_head.out_proj.bias', 'classification_head.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.3729,0.378766
2,0.2473,0.282552
3,0.0919,0.318615


TrainOutput(global_step=2145, training_loss=0.32721525807202834, metrics={'train_runtime': 2230.7512, 'train_samples_per_second': 7.687, 'train_steps_per_second': 0.962, 'total_flos': 5269466399512968.0, 'train_loss': 0.32721525807202834, 'epoch': 3.0})

In [None]:
import torch
from transformers import BartTokenizer, BartForSequenceClassification
import pandas as pd
import random

# Load the fine-tuned BART model and tokenizer
model = BartForSequenceClassification.from_pretrained('./results/checkpoint-1000')  # Load your fine-tuned model directory
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

# Load your dataset (assuming it's in a CSV file)
your_dataset = pd.read_csv('new_data_with_emotions.csv')

# Extract text data, sentiment labels, and cities
text_data = your_dataset['combined_translated'].tolist()
sentiment_labels = your_dataset['emotion'].tolist()
cities = your_dataset['city'].tolist()

# User query (for demonstration purposes)
user_query = "I am feeling happy and excited today."  # User query

# Simulating sentiment extraction from a sentiment analysis model (replace this with actual model output)
user_sentiment = random.choice(["Love", "Happy", "Neutral", "Sad", "Angry"])  # Randomly select a sentiment label

# Tokenize the user query
input_ids = tokenizer.encode(user_query, return_tensors='pt')

# Generate prediction
with torch.no_grad():
    outputs = model(input_ids)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()

# Map predicted class to sentiment label
label_dict = {0: 'Love', 1: 'Happy', 2: 'Neutral', 3: 'Sad', 4: 'Angry'}  # Numerical labels to sentiment labels
predicted_sentiment = label_dict[predicted_class]

# Use predicted sentiment and city information to recommend a place
if predicted_sentiment == user_sentiment:
    # Filter places in the user's city based on sentiment (replace this with your recommendation logic)
    recommended_places = [place for place, city in zip(text_data, cities) if city == "User's City"]

    if recommended_places:
        recommended_place = recommended_places[0]  # Assuming the first place is recommended
    else:
        recommended_place = "No specific place found in your city."

else:
    recommended_place = "Another place might suit your current mood better."

print(f"Predicted Sentiment: {predicted_sentiment}")
print(f"Recommended Place: {recommended_place}")


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1', '2': 'LABEL_2', '3': 'LABEL_3', '4': 'LABEL_4'}. The number of labels wil be overwritten to 5.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Predicted Sentiment: Love
Recommended Place: No specific place found in your city.


In [None]:
ls

[0m[01;34mdrive[0m/  [01;34mlogs[0m/  new_data_with_emotions.csv  [01;34mresults[0m/  [01;34msample_data[0m/
