<a href="https://colab.research.google.com/github/yunialifah-ya/DeepLearningTasks/blob/main/TaskWeek7_Sentiment_Analysis_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Nama : Yuni Alifah

NPM : 2306288950

Link Original Code : https://huggingface.co/finiteautomata/bertweet-base-sentiment-analysis (finiteautomata/bertweet-base-sentiment-analysis)

Menjalankan Model

In [None]:
# Step 1: Install transformers and torch if not already installed
!pip install transformers torch

# Step 2: Import necessary libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Step 3: Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")

# Step 4: Define a function to perform sentiment analysis
def analyze_sentiment(text):
    # Tokenize the input text and convert to tensor
    inputs = tokenizer(text, return_tensors="pt")

    # Perform a forward pass with the model
    with torch.no_grad():
        outputs = model(**inputs)

    # Apply softmax to get probabilities
    probs = F.softmax(outputs.logits, dim=-1)
    sentiments = ["negative", "neutral", "positive"]

    # Get the sentiment with the highest probability
    predicted_sentiment = sentiments[torch.argmax(probs)]
    confidence = torch.max(probs).item()

    return predicted_sentiment, confidence

# Step 5: Test the function
text = "I'm thrilled with the results of this new project!"
sentiment, confidence = analyze_sentiment(text)
print(f"Sentiment: {sentiment}, Confidence: {confidence:.2f}")


Sentiment: positive, Confidence: 0.99


Prediksi Kalimat

In [None]:
# Impor pustaka yang diperlukan
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline

# Muat tokenizer dan model
model_name = 'finiteautomata/bertweet-base-sentiment-analysis'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Buat pipeline analisis sentimen
sentiment_pipeline = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

# Teks contoh untuk dianalisis
texts = [
    "Saya suka menggunakan model ini! Ini fantastis.",
    "Ini adalah pengalaman terburuk yang pernah saya alami.",
    "Saya tidak yakin bagaimana perasaan saya tentang ini.",
]

# Lakukan analisis sentimen
results = sentiment_pipeline(texts)

# Tampilkan hasilnya
for text, result in zip(texts, results):
    print(f"Teks: {text}\nSentimen: {result['label']}, Skor: {result['score']:.4f}\n")


Teks: Saya suka menggunakan model ini! Ini fantastis.
Sentimen: POS, Skor: 0.9803

Teks: Ini adalah pengalaman terburuk yang pernah saya alami.
Sentimen: NEU, Skor: 0.9721

Teks: Saya tidak yakin bagaimana perasaan saya tentang ini.
Sentimen: NEU, Skor: 0.9765



Test Model dengan Prediksi Kalimat Baru

In [None]:

# Buat pipeline analisis sentimen
sentiment_pipeline = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

# Fungsi untuk menganalisis sentimen
def analyze_sentiment(texts):
    """
    Menganalisis sentimen dari teks yang diberikan menggunakan model BERTweet.

    Parameter:
    texts (list): Daftar string untuk dianalisis.

    Mengembalikan:
    list: Daftar kamus yang berisi label sentimen dan skor.
    """
    results = sentiment_pipeline(texts)
    return results

# Contoh teks baru untuk dianalisis
new_texts = [
    "Today's lecture was very inspiring and made me even more enthusiastic about studying",
    "Today there is a deep learning assignment",
    "I'm not okay, there are so many assignments"
]

# Menggunakan fungsi untuk melakukan analisis sentimen
analysis_results = analyze_sentiment(new_texts)

# Menampilkan hasil analisis
for text, result in zip(new_texts, analysis_results):
    print(f"Teks: {text}\nSentimen: {result['label']}, Skor: {result['score']:.4f}\n")


Teks: Today's lecture was very inspiring and made me even more enthusiastic about studying
Sentimen: POS, Skor: 0.9930

Teks: Today there is a deep learning assignment
Sentimen: NEU, Skor: 0.7622

Teks: I'm not okay, there are so many assignments
Sentimen: NEG, Skor: 0.9666



Menghitung Akurasi

In [None]:
from sklearn.metrics import accuracy_score

# Sample labeled dataset with text and their true sentiment labels
# Labels: "positive" = 2, "neutral" = 1, "negative" = 0
test_data = [
    {"text": "I love this!", "label": "positive"},
    {"text": "This is okay, not great.", "label": "neutral"},
    {"text": "I really dislike this.", "label": "negative"},
    {"text": "What a fantastic product!", "label": "positive"},
    {"text": "This is terrible.", "label": "negative"},
    {"text": "It's just fine, nothing special.", "label": "neutral"}
]

# Mapping sentiment labels to integers for evaluation
# Updated label mapping to match the labels in test_data
label_map = {"positive": 2, "neutral": 1, "negative": 0}

# Initialize lists to store predictions and true labels
predictions = []
true_labels = []

# Analyze sentiment for each example in the test data
for item in test_data:
    # Get the result from the analyze_sentiment function
    result = analyze_sentiment([item["text"]])[0]  # Get the first element since analyze_sentiment returns a list

    # Extract the sentiment label from the result dictionary
    sentiment = result['label']

    # Map the sentiment label to its corresponding integer using label_map
    # Convert sentiment label to lowercase to handle potential case differences
    predicted_label = label_map.get(sentiment.lower().replace('neg', 'negative').replace('pos', 'positive').replace('neu', 'neutral'))

    # Check if the predicted label is valid before appending
    if predicted_label is not None:
        predictions.append(predicted_label)  # Predicted label
    else:
        print(f"Warning: Sentiment label '{sentiment}' not found in label_map. Skipping this prediction.")

    true_labels.append(label_map[item["label"]])  # True label

# Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 66.67%


Mengevaluasi Akurasi Model pada Dataset Sintetik.

In [None]:
# Install necessary libraries if not already installed
!pip install transformers datasets emoji==0.6.0

from transformers import pipeline
from sklearn.metrics import accuracy_score

# Load sentiment-analysis pipeline with the model
sentiment_analyzer = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")

# Step 1: Create a synthetic dataset
synthetic_data = [
    {"text": "I am extremely happy with the service!", "label": "positive"},
    {"text": "This is the worst experience I've ever had.", "label": "negative"},
    {"text": "The product is just okay, nothing special.", "label": "neutral"},
    {"text": "Absolutely loved the new update!", "label": "positive"},
    {"text": "I regret buying this item.", "label": "negative"},
    {"text": "It was fine, not too good or bad.", "label": "neutral"},
    {"text": "Amazing quality and fantastic support!", "label": "positive"},
    {"text": "I wouldn't recommend this to anyone.", "label": "negative"},
    {"text": "It's acceptable, but could be better.", "label": "neutral"},
]

# Initialize lists to store predictions and true labels
predictions = []
true_labels = []

# Step 2: Analyze sentiment and store predictions
for example in synthetic_data:
    sentiment_result = sentiment_analyzer(example['text'])[0]
    predictions.append(sentiment_result['label'].lower())  # Predicted label
    true_labels.append(example['label'])  # True label

# Step 3: Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)
print(f"Akurasi Model pada Synthetic Dataset: {accuracy * 100:.2f}%")

# Step 4: Display results for each example
for i, example in enumerate(synthetic_data):
    print(f"Teks: {example['text']}")
    print(f"Label Sebenarnya: {example['label']}, Prediksi: {predictions[i]}")
    print()


Collecting emoji==0.6.0
  Using cached emoji-0.6.0-py3-none-any.whl
Installing collected packages: emoji
Successfully installed emoji-0.6.0
Akurasi Model pada Synthetic Dataset: 0.00%
Teks: I am extremely happy with the service!
Label Sebenarnya: positive, Prediksi: pos

Teks: This is the worst experience I've ever had.
Label Sebenarnya: negative, Prediksi: neg

Teks: The product is just okay, nothing special.
Label Sebenarnya: neutral, Prediksi: neg

Teks: Absolutely loved the new update!
Label Sebenarnya: positive, Prediksi: pos

Teks: I regret buying this item.
Label Sebenarnya: negative, Prediksi: neg

Teks: It was fine, not too good or bad.
Label Sebenarnya: neutral, Prediksi: pos

Teks: Amazing quality and fantastic support!
Label Sebenarnya: positive, Prediksi: pos

Teks: I wouldn't recommend this to anyone.
Label Sebenarnya: negative, Prediksi: neg

Teks: It's acceptable, but could be better.
Label Sebenarnya: neutral, Prediksi: pos



Test the model's accuracy use the Amazon Customer Reviews on Hugging Face

In [None]:
from transformers import pipeline
from datasets import load_dataset
from sklearn.metrics import accuracy_score

# Load sentiment-analysis pipeline with truncation enabled
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="finiteautomata/bertweet-base-sentiment-analysis",
    truncation=True,
    max_length=128  # Set max_length to prevent indexing errors
)


In [None]:
# Load a subset of the Amazon reviews dataset
dataset = load_dataset("amazon_polarity", split="test[:1000]")  # Using a subset for faster processing


In [None]:
# Map numerical labels to strings for compatibility
# Amazon labels are 1 (positive) and 0 (negative), so we map accordingly
dataset = dataset.map(lambda x: {'label': 'positive' if x['label'] == 1 else 'negative'})


In [None]:
# Initialize lists to store predictions and true labels
predictions = []
true_labels = []

# Analyze sentiment and store predictions
for example in dataset:
    sentiment_result = sentiment_analyzer(example['content'])[0]
    predictions.append(sentiment_result['label'].lower())  # Predicted label
    true_labels.append(example['label'])  # True label


In [None]:
# Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)
print(f"Akurasi Model pada Amazon Reviews: {accuracy * 100:.2f}%")


Akurasi Model pada Amazon Reviews: 0.00%


In [None]:
# Display a few examples with true and predicted labels
for i, example in enumerate(dataset.select(range(5))):
    print(f"Teks: {example['content']}")
    print(f"Label Sebenarnya: {example['label']}, Prediksi: {predictions[i]}")
    print()


Teks: My lovely Pat has one of the GREAT voices of her generation. I have listened to this CD for YEARS and I still LOVE IT. When I'm in a good mood it makes me feel better. A bad mood just evaporates like sugar in the rain. This CD just oozes LIFE. Vocals are jusat STUUNNING and lyrics just kill. One of life's hidden gems. This is a desert isle CD in my book. Why she never made it big is just beyond me. Everytime I play this, no matter black, white, young, old, male, female EVERYBODY says one thing "Who was that singing ?"
Label Sebenarnya: 1, Prediksi: pos

Teks: Despite the fact that I have only played a small portion of the game, the music I heard (plus the connection to Chrono Trigger which was great as well) led me to purchase the soundtrack, and it remains one of my favorite albums. There is an incredible mix of fun, epic, and emotional songs. Those sad and beautiful tracks I especially like, as there's not too many of those kinds of songs in my other video game soundtracks. I m

Dataset Sentiment140

In [None]:
# Install necessary libraries if not already installed
!pip install transformers datasets emoji==0.6.0

from transformers import pipeline
from datasets import load_dataset
from sklearn.metrics import accuracy_score

# Load sentiment-analysis pipeline with truncation enabled
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="finiteautomata/bertweet-base-sentiment-analysis",
    truncation=True,
    max_length=128  # Set max_length to prevent indexing errors
)

# Step 1: Load the Sentiment140 dataset
dataset = load_dataset("sentiment140", split="test[:1000]")  # Using a subset for faster processing

# Step 2: Map numerical labels to strings for compatibility
# In the Sentiment140 dataset, 0 is negative and 4 is positive
dataset = dataset.map(lambda x: {'label': 'positive' if x['sentiment'] == 4 else 'negative'})

# Initialize lists to store predictions and true labels
predictions = []
true_labels = []

# Step 3: Analyze sentiment and store predictions
for example in dataset:
    sentiment_result = sentiment_analyzer(example['text'])[0]  # Changed 'content' to 'text'
    predicted_label = sentiment_result['label'].lower()  # Convert prediction to lowercase
    predictions.append(predicted_label)
    true_labels.append(example['label'])  # True label in lowercase

    # Debugging: Print the first few results to check label consistency
    if len(predictions) <= 5:
        print(f"Teks: {example['text']}")
        print(f"Label Sebenarnya: {example['label']}, Prediksi: {predicted_label}")
        print()

# Step 4: Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)
print(f"Akurasi Model pada Sentiment140: {accuracy * 100:.2f}%")

# Optional: Display a few examples with true and predicted labels
for i, example in enumerate(dataset.select(range(5))):
    print(f"Teks: {example['text']}")
    print(f"Label Sebenarnya: {example['label']}, Prediksi: {predictions[i]}")
    print()






README.md:   0%|          | 0.00/6.84k [00:00<?, ?B/s]

sentiment140.py:   0%|          | 0.00/4.03k [00:00<?, ?B/s]

Implementasi Atensi Transformers pada Model

In [1]:
# Langkah 1: Instal pustaka yang diperlukan
!pip install transformers torch

# Langkah 2: Impor pustaka yang diperlukan
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

# Langkah 3: Muat tokenizer dan model
model_name = "finiteautomata/bertweet-base-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Langkah 4: Definisikan fungsi untuk melakukan analisis sentimen
def analyze_sentiment(text):
    # Tokenisasi teks input dan konversi menjadi tensor
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)

    # Lakukan forward pass dengan model
    with torch.no_grad():
        outputs = model(**inputs)

    # Terapkan softmax untuk mendapatkan probabilitas
    probs = F.softmax(outputs.logits, dim=-1)

    sentiments = ["negative", "neutral", "positive"]

    # Dapatkan sentimen dengan probabilitas tertinggi
    predicted_sentiment = sentiments[torch.argmax(probs)]
    confidence = torch.max(probs).item()

    return predicted_sentiment, confidence

# Langkah 5: Uji fungsi
text = "I'm thrilled with the results of this new project!"
sentiment, confidence = analyze_sentiment(text)
print(f"Sentiment: {sentiment}, Confidence: {confidence:.2f}")

# Opsional: Implementasikan lapisan perhatian kustom (jika diperlukan)
class CustomAttentionModel(torch.nn.Module):
    def __init__(self, base_model):
        super(CustomAttentionModel, self).__init__()
        self.base_model = base_model
        # Tambahkan lapisan kustom di sini jika diperlukan

    def forward(self, input_ids, attention_mask=None):
        # Implementasikan mekanisme perhatian kustom jika diperlukan
        outputs = self.base_model(input_ids, attention_mask=attention_mask)
        return outputs

# Contoh penggunaan CustomAttentionModel (jika ada modifikasi)
# custom_model = CustomAttentionModel(model)




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/338 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/843k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/22.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


config.json:   0%|          | 0.00/949 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/540M [00:00<?, ?B/s]

Sentiment: positive, Confidence: 0.99


 Akurasi Model Setelah Menggunakan Attention Transformer

In [3]:

from sklearn.metrics import accuracy_score

# Step 3: Load the model and tokenizer
model_name = "finiteautomata/bertweet-base-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Step 4: Define a Custom Attention Model
class CustomAttentionModel(torch.nn.Module):
    def __init__(self, base_model):
        super(CustomAttentionModel, self).__init__()
        self.base_model = base_model

    def forward(self, input_ids, attention_mask=None, token_type_ids=None): # Add token_type_ids as an argument
        # Implement attention mechanism (if needed), for now, we use the base model
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # Pass token_type_ids to base_model
        return outputs

# Initialize the custom model
custom_model = CustomAttentionModel(base_model)

# Step 5: Create a synthetic dataset
synthetic_data = [
    {"text": "I am extremely happy with the service!", "label": "positive"},
    {"text": "This is the worst experience I've ever had.", "label": "negative"},
    {"text": "The product is just okay, nothing special.", "label": "neutral"},
    {"text": "Absolutely loved the new update!", "label": "positive"},
    {"text": "I regret buying this item.", "label": "negative"},
    {"text": "It was fine, not too good or bad.", "label": "neutral"},
    {"text": "Amazing quality and fantastic support!", "label": "positive"},
    {"text": "I wouldn't recommend this to anyone.", "label": "negative"},
    {"text": "It's acceptable, but could be better.", "label": "neutral"},
]

# Initialize lists to store predictions and true labels
predictions = []
true_labels = []

# Step 6: Analyze sentiment using the base model and store predictions
for example in synthetic_data:
    inputs = tokenizer(example['text'], return_tensors="pt", truncation=True, max_length=128)
    with torch.no_grad():
        outputs = base_model(**inputs)
    probs = F.softmax(outputs.logits, dim=-1)
    predicted_sentiment = ["negative", "neutral", "positive"][torch.argmax(probs)]
    predictions.append(predicted_sentiment)
    true_labels.append(example['label'])

# Step 7: Calculate accuracy for the base model
base_accuracy = accuracy_score(true_labels, predictions)

# Reset predictions for the custom model
predictions.clear()

# Step 8: Analyze sentiment using the custom attention model
for example in synthetic_data:
    inputs = tokenizer(example['text'], return_tensors="pt", truncation=True, max_length=128)
    with torch.no_grad():
        outputs = custom_model(**inputs)
    probs = F.softmax(outputs.logits, dim=-1)
    predicted_sentiment = ["negative", "neutral", "positive"][torch.argmax(probs)]
    predictions.append(predicted_sentiment)

# Step 9: Calculate accuracy for the custom attention model
custom_accuracy = accuracy_score(true_labels, predictions)

# Step 10: Print the results
print(f"Akurasi Model Dasar pada Synthetic Dataset: {base_accuracy * 100:.2f}%")
print(f"Akurasi Model Kustom pada Synthetic Dataset: {custom_accuracy * 100:.2f}%")


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


Akurasi Model Dasar pada Synthetic Dataset: 66.67%
Akurasi Model Kustom pada Synthetic Dataset: 66.67%
