<a href="https://colab.research.google.com/github/wal-03/Intern---Sentiment-Categorizer-PK/blob/main/AI_for_Predicting_Category_Comments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Library

In [1]:
import requests
import re
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Fetch Keywords

In [2]:
# Function to fetch and process keywords
def fetch_keywords(url):
    response = requests.get(url)
    if response.status_code == 200:
        lines = response.text.strip().split("\n")
        keywords = {}
        current_category = None
        for line in lines:
            line = line.strip()
            if not line:
                continue  # Skip empty lines
            if line.startswith("[") and line.endswith("]"):  # Vertical format
                current_category = line[1:-1].strip().lower()
                keywords[current_category] = []
            elif ":" in line:  # Horizontal format
                current_category, words = line.split(":", 1)
                current_category = current_category.strip().lower()
                keywords[current_category] = [word.strip().lower() for word in words.split(",") if word.strip()]
            elif current_category:  # Vertical format keywords
                keywords[current_category].append(line.strip().lower())
        return keywords
    else:
        raise Exception(f"Failed to fetch keywords. Status code: {response.status_code}")

# Example usage
keywords_url = 'https://raw.githubusercontent.com/wal-03/Intern---Sentiment-Categorizer-PK/refs/heads/main/AI_KEYWORDS.txt'
keywords = fetch_keywords(keywords_url)
keywords

{'positive': ['fantastic',
  'perfect',
  'amazing',
  'loved',
  'exceeded',
  'expectations',
  'excellent',
  'recommend',
  'great',
  'best',
  'ever',
  'friendly',
  'helpful',
  'kind',
  'happy',
  'purchase',
  'satisfied',
  'inspiring',
  'moving',
  'insightful',
  'breathtaking',
  'beautiful',
  'stunning',
  'wonderful',
  'easy',
  'effective',
  'useful',
  'highly recommend',
  'delicious',
  'outstanding',
  'top-notch',
  'game-changer',
  'innovative',
  'revolutionary',
  'impressed',
  'results',
  'positive',
  'exactly',
  'ideal',
  'fast',
  'quick',
  'efficient',
  'attention to detail',
  'meticulous',
  'thorough',
  'made my day',
  'joyful',
  "can't wait",
  'excited',
  'eager',
  'worth it',
  'valuable',
  'amazing (customer support)',
  'responsive (customer support)',
  'fan',
  'love',
  'support',
  'must-have',
  'essential',
  'necessary',
  'glad',
  'fortunate',
  'great value',
  'affordable',
  'reasonable'],
 'negative': ['hate',
  'terr

# Create a Function

In [3]:

# Function to categorize comments
def categorize_comment(comment, keywords):
    comment_lower = comment.lower()
    for category, words in keywords.items():
        if any(re.search(r'\b' + re.escape(word) + r'\b', comment_lower) for word in words):
            return category
    return 'uncategorized'

# Function to create a custom feature vector based on keywords
def create_custom_features(text, keywords):
    features = {}
    text_lower = text.lower()
    for category, words in keywords.items():
        features[category] = sum(1 for word in words if re.search(r'\b' + re.escape(word) + r'\b', text_lower))
    return features

# Use Function to Categorize Comments

In [4]:
# Sample data
data = {
    'text': [
        "This movie was absolutely fantastic! The acting, the plot, everything was perfect.",
        "I loved this product. It exceeded all my expectations.",
        "This product is absolutely terrible. It broke after only a week.",
        "I was extremely disappointed with the service.",
        "The movie was okay. It wasn't great, but it wasn't terrible either.",
        "The service was excellent. I would definitely recommend this place.",
        "This is the best coffee I've ever had!",
        "The staff was so friendly and helpful.",
        "I'm so happy with my purchase!",
        "This book was truly inspiring.",
        "The scenery was breathtaking.",
        "I had a wonderful experience.",
        "This app is so easy to use and very effective.",
        "I highly recommend this restaurant. The food is delicious.",
        "The quality of this item is outstanding.",
        "This is a game-changer!",
        "I'm impressed with the results.",
        "This is exactly what I was looking for.",
        "The delivery was super fast!",
        "I appreciate the attention to detail.",
        "This made my day!",
        "I can't wait to use this again.",
        "This product is worth every penny.",
        "The customer support was amazing.",
        "I'm a big fan of this brand.",
        "This is a must-have!",
        "I'm so glad I found this.",
        "This is a great value for the price.",
        "This movie was a complete waste of time.",
        "The food was disgusting. I couldn't even eat it.",
        "The staff was rude and unhelpful.",
        "I'm so frustrated with this product. It doesn't work.",
        "This book was boring and poorly written.",
        "The quality of this item is unacceptable.",
        "I had a terrible experience.",
        "This app is buggy and crashes all the time.",
        "I would not recommend this restaurant to anyone.",
        "The delivery was incredibly slow.",
        "I'm very disappointed with the results.",
        "This is nothing like what I expected.",
        "The customer support was awful.",
        "I'm very upset with this purchase.",
        "This is a rip-off.",
        "I regret buying this.",
        "This is a complete disaster.",
        "I would never buy this again.",
        "The product arrived damaged.",
        "This is a waste of money.",
        "I'm extremely dissatisfied.",
        "This is the worst experience I've ever had.",
        "I'm never using this service again.",
        "The product is as described.",
        "The service was adequate.",
        "The food was average.",
        "The staff was polite.",
        "The book was informative.",
        "The scenery was nice.",
        "I had a neutral experience.",
        "The app is functional.",
        "The restaurant was fine.",
        "The quality of this item is average.",
        "The delivery was on time.",
        "The results are as expected.",
        "The customer support was helpful.",
        "I have no strong feelings about this.",
        "This is a reasonable price.",
        "The product meets basic requirements.",
        "The instructions were clear.",
        "The packaging was sufficient.",
        "This is a standard product.",
        "The experience was unremarkable.",
        "I have no opinion on this.",
        "The product is okay for what it is.",
        "This is a basic model.",
        "The performance is adequate.",
        "The design is simple.",
        "It works as intended.",
        "The price is fair.",
        "I'm indifferent to this product.",
        "It's not bad, but it's not great either.",
        "This is a typical example of this type of product.",
        "I don't have any complaints.",
        "It's just okay.",
        "Nothing special.",
        "It serves its purpose.",
        "This is a run-of-the-mill product.",
        "I'm neither impressed nor disappointed.",
        "It's average in every way.",
        "This is a middle-of-the-road product.",
        "It's acceptable.",
        "It's adequate for my needs.",
        "This is a no-frills product.",
        "It's functional but not stylish.",
        "I have mixed feelings about this.",
        "It's a matter of personal preference.",
        "It depends on what you're looking for.",
        "It's not for everyone.",
        "It has its pros and cons.",
        "It's a decent option.",
        "It's worth considering.",
        "It's a viable choice.",
        "It's a reasonable alternative.",
        "It's a possible solution.",
        "It's a potential option.",
        "It's a good starting point.",
        "It's a solid foundation.",
        "It's a step in the right direction.",
        "It's a work in progress.",
        "It's under development.",
        "It's subject to change.",
        "It's still early days.",
        "It has potential.",
        "It shows promise.",
        "It has room for improvement.",
        "It needs some work.",
        "It could be better.",
        "It has some flaws.",
        "It's not perfect.",
        "It's not without its problems.",
        "It has its limitations.",
        "It's not the best, but it's not the worst.",
        "It's somewhere in between.",
        "It's a mixed bag.",
        "It's a bit of a gamble.",
        "It's a risk worth taking.",
        "It's a chance to try something new.",
        "It's an opportunity to learn.",
        "It's a way to grow.",
        "It's a path to success.",
        "It's a journey of discovery.",
        "It's a quest for knowledge.",
        "It's a search for truth.",
        "It's a pursuit of happiness.",
        "It's a struggle for survival.",
        "It's a fight for freedom.",
        "It's a battle against injustice.",
        "It's a war against evil.",
        "It's a conflict of interest.",
        "It's a clash of cultures.",
        "It's a meeting of minds.",
        "It's a fusion of ideas.",
        "It's a blend of styles.",
        "It's a mix of genres.",
        "It's a combination of factors.",
        "It's a complex issue.",
        "It's a multifaceted problem.",
        "It's a complicated situation.",
        "It's a challenging task.",
        "It's a difficult undertaking.",
        "It's a demanding job.",
        "It's a tough assignment.",
        "It's a hard slog.",
        "It's a long haul.",
        "It's a marathon, not a sprint.",
        "It's a test of endurance.",
        "It's a trial by fire.",
        "It's a baptism of fire.",
        "It's a rite of passage.",
        "It's a journey of self-discovery.",
        "It's a voyage of exploration.",
        "It's a quest for meaning.",
        "It's a search for purpose.",
        "It's a pursuit of excellence.",
        "It's a drive for success.",
        "It's a hunger for knowledge.",
        "It's a thirst for adventure.",
        "It's a passion for life.",
        "It's a love of learning.",
        "It's a joy of giving.",
        "It's a spirit of generosity.",
        "It's a heart of gold.",
        "It's a soul of kindness.",
        "It's a mind of wisdom.",
        "It's a body of strength.",
        "It's a force of nature.",
        "It's a beacon of hope.",
        "It's a symbol of peace.",
        "It's a harbinger of change.",
        "It's a catalyst for progress.",
        "It's an agent of transformation.",
        "It's a power for good.",
        "It's a light in the darkness.",
        "It's a voice for the voiceless.",
        "It's a champion for the underdog.",
        "It's a defender of the weak.",
        "It's a protector of the innocent.",
        "It's a guardian of the truth.",
        "It's a seeker of justice.",
        "It's a fighter for equality."
    ]
}
df = pd.DataFrame(data)
# Assuming df is your DataFrame with a 'text' column
df['category'] = df['text'].apply(lambda x: categorize_comment(x, keywords))
df

Unnamed: 0,text,category
0,This movie was absolutely fantastic! The actin...,positive
1,I loved this product. It exceeded all my expec...,positive
2,This product is absolutely terrible. It broke ...,negative
3,I was extremely disappointed with the service.,negative
4,"The movie was okay. It wasn't great, but it wa...",positive
...,...,...
184,It's a defender of the weak.,uncategorized
185,It's a protector of the innocent.,uncategorized
186,It's a guardian of the truth.,uncategorized
187,It's a seeker of justice.,uncategorized


# Build and Evaluate Machine Learning Model

In [5]:
# Create custom features
X = pd.DataFrame([create_custom_features(text, keywords) for text in df['text']])
y = df['text'].apply(lambda x: categorize_comment(x, keywords))  # Use your existing categorize_comment function

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
                precision    recall  f1-score   support

     negative       1.00      1.00      1.00         2
      neutral       1.00      1.00      1.00         7
     positive       1.00      1.00      1.00         9
uncategorized       1.00      1.00      1.00        20

     accuracy                           1.00        38
    macro avg       1.00      1.00      1.00        38
 weighted avg       1.00      1.00      1.00        38



# Create Function to Predict New Comments

In [6]:
# Function to predict category of new comments
def predict_category(new_comments):
    predictions = []
    for comment in new_comments:
        new_comment_features = pd.DataFrame([create_custom_features(comment, keywords)])
        predicted_category = model.predict(new_comment_features)[0]
        predictions.append((comment, predicted_category))
    return predictions

In [7]:
# Function to predict category of new comments and append to DataFrame
def predict_and_append(new_comments, df, model, keywords):
    predictions = []
    new_rows = []
    for comment in new_comments:
        # Predict category
        new_comment_features = pd.DataFrame([create_custom_features(comment, keywords)])
        predicted_category = model.predict(new_comment_features)[0]
        predictions.append((comment, predicted_category))

        # Create a new row for the DataFrame
        new_rows.append({'text': comment, 'category': predicted_category})

    # Append new rows to the DataFrame using pd.concat
    if new_rows:
        new_df = pd.DataFrame(new_rows)
        df = pd.concat([df, new_df], ignore_index=True)
    return df, predictions

# Add New Comments

In [8]:
# Example of predicting multiple new comments
new_comments = [
    "This product is neither good nor bad.",  # Neutral
    "I absolutely hate this, it's the worst!",  # Negative
    "This is the best thing I've ever bought!",  # Positive
    "The product is okay, but I expected more.",  # Neutral
    "Terrible experience, never buying again."  # Negative
]

### Prediction Without Adding to DataFrame

uncomment code dibawah ini jika ingin memprediksi tetapi tidak ingin menambahkan ke dataframe

In [9]:
predictions = predict_category(new_comments)

# Display predictions
for comment, category in predictions:
    print(f"Comment: '{comment}' -> Predicted Category: {category}")

Comment: 'This product is neither good nor bad.' -> Predicted Category: neutral
Comment: 'I absolutely hate this, it's the worst!' -> Predicted Category: negative
Comment: 'This is the best thing I've ever bought!' -> Predicted Category: positive
Comment: 'The product is okay, but I expected more.' -> Predicted Category: neutral
Comment: 'Terrible experience, never buying again.' -> Predicted Category: negative


### Prediction With Adding to DataFrame

Uncomment code dibawah ini jika ingin memprediksi dan menambahkan ke dataframe

In [10]:
# # Append new comments to the DataFrame
# df, predictions = predict_and_append(new_comments, df, model, keywords)

# # Display predictions
# for comment, category in predictions:
#     print(f"Comment: '{comment}' -> Predicted Category: {category}")

# # Display the updated DataFrame
# print("\nUpdated DataFrame:")
# df

# Filter Category

In [11]:
# Fungsi untuk menampilkan komentar berdasarkan kategori
def filter_by_category(df, category):
    return df[df['category'] == category]

In [12]:
# Contoh: Menampilkan komentar dengan kategori "Positif"
positive_comments = filter_by_category(df, "positive")
print("Komentar dengan kategori Positif:")
positive_comments

Komentar dengan kategori Positif:


Unnamed: 0,text,category
0,This movie was absolutely fantastic! The actin...,positive
1,I loved this product. It exceeded all my expec...,positive
4,"The movie was okay. It wasn't great, but it wa...",positive
5,The service was excellent. I would definitely ...,positive
6,This is the best coffee I've ever had!,positive
7,The staff was so friendly and helpful.,positive
8,I'm so happy with my purchase!,positive
9,This book was truly inspiring.,positive
10,The scenery was breathtaking.,positive
11,I had a wonderful experience.,positive


In [13]:
# Contoh: Menampilkan komentar dengan kategori "Negatif"
negative_comments = filter_by_category(df, "negative")
print("\nKomentar dengan kategori Negatif:")
negative_comments


Komentar dengan kategori Negatif:


Unnamed: 0,text,category
2,This product is absolutely terrible. It broke ...,negative
3,I was extremely disappointed with the service.,negative
28,This movie was a complete waste of time.,negative
29,The food was disgusting. I couldn't even eat it.,negative
30,The staff was rude and unhelpful.,negative
31,I'm so frustrated with this product. It doesn'...,negative
32,This book was boring and poorly written.,negative
33,The quality of this item is unacceptable.,negative
34,I had a terrible experience.,negative
35,This app is buggy and crashes all the time.,negative


In [14]:
# Contoh: Menampilkan komentar dengan kategori "neutral"
neutral_comments = filter_by_category(df, "neutral")
print("\nKomentar Neutral:")
neutral_comments


Komentar Neutral:


Unnamed: 0,text,category
39,This is nothing like what I expected.,neutral
51,The product is as described.,neutral
52,The service was adequate.,neutral
53,The food was average.,neutral
54,The staff was polite.,neutral
55,The book was informative.,neutral
56,The scenery was nice.,neutral
57,I had a neutral experience.,neutral
58,The app is functional.,neutral
59,The restaurant was fine.,neutral


In [15]:
# Contoh: Menampilkan komentar dengan kategori "Uncategorized"
uncategorized_comments = filter_by_category(df, "uncategorized")
print("\nKomentar uncategorized:")
uncategorized_comments


Komentar uncategorized:


Unnamed: 0,text,category
22,This product is worth every penny.,uncategorized
45,I would never buy this again.,uncategorized
47,This is a waste of money.,uncategorized
50,I'm never using this service again.,uncategorized
64,I have no strong feelings about this.,uncategorized
...,...,...
184,It's a defender of the weak.,uncategorized
185,It's a protector of the innocent.,uncategorized
186,It's a guardian of the truth.,uncategorized
187,It's a seeker of justice.,uncategorized
