<a href="https://colab.research.google.com/github/von-ai/modeling-sentiment-analysis/blob/main/Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
print(os.listdir())

['.config', 'drive', 'sample_data']


In [None]:
os.chdir('/content/drive/MyDrive/IMDB Movie Reviews Sentiment Analysis')

In [None]:
print(os.listdir())

['Data', 'Models', 'Data_Cleaning.ipynb', 'X_train_tfidf.pkl', 'X_test_tfidf.pkl', 'y_train.csv', 'y_test.csv', 'tfidf_vectorizer.pkl', 'final_IMDB_dataset.csv', 'Feature_Extraction.ipynb']


In [None]:
import os
import pickle
import joblib
import numpy as np
import pandas as pd
from google.colab import drive

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#buka drive
drive.mount('/content/drive', force_remount=True)

base_path = '/content/drive/MyDrive/IMDB Movie Reviews Sentiment Analysis'
if not os.path.exists(base_path):
    raise FileNotFoundError(f"Folder tidak ada: {base_path}")

os.chdir(base_path)
print(f"üìÇ Current directory: {os.getcwd()}")

def safe_load(path):
    """Load pickle/joblib/numpy safely"""
    if not os.path.exists(path):
        raise FileNotFoundError(f"‚ö†Ô∏è File not found: {path}")

    ext = os.path.splitext(path)[1].lower()
    try:
        if ext in ['.pkl', '.pickle']:
            return joblib.load(path)
        elif ext == '.joblib':
            return joblib.load(path)
        elif ext in ['.npy', '.npz']:
            return np.load(path, allow_pickle=False)
        else:
            raise ValueError("Unknown file type")
    except Exception as e:
        raise RuntimeError(f"Failed to load {path}: {e}")

#buka dataset
print("\nüîÑ Loading TF-IDF feature matrices...")
X_train = safe_load('X_train_tfidf.pkl')
X_test = safe_load('X_test_tfidf.pkl')

print("\nüîÑ Loading labels...")
y_train = pd.read_csv('y_train.csv').squeeze()
y_test = pd.read_csv('y_test.csv').squeeze()


#label encoding
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)  # positive ‚Üí 1, negative ‚Üí 0
y_test_enc = le.transform(y_test)

print("‚úÖ Label classes:", le.classes_)

#training model
models = {
    "Logistic Regression": LogisticRegression(max_iter=2000, class_weight='balanced'),
    "Support Vector Machine": LinearSVC(class_weight='balanced'),
    "Naive Bayes": MultinomialNB()
}

results = {}

print("\nüöÄ Training models...\n")
for name, model in models.items():
    print(f"üîπ Training {name}...")
    try:
        model.fit(X_train, y_train_enc)
        preds = model.predict(X_test)
        acc = accuracy_score(y_test_enc, preds)
        results[name] = acc

        print(f"‚úÖ {name} Accuracy: {acc:.4f}")
        print(classification_report(y_test_enc, preds, target_names=le.classes_))
        print("Confusion Matrix:\n", confusion_matrix(y_test_enc, preds))
        print("-" * 60)
    except Exception as e:
        print(f"‚ùå Error training {name}: {e}")


#summary
print("\nüèÅ MODEL PERFORMANCE SUMMARY")
for name, acc in results.items():
    print(f"{name:<25}: {acc:.4f}")

if results:
    best_model = max(results, key=results.get)
    print(f"\nüåü Best model: {best_model} with accuracy {results[best_model]:.4f}")
else:
    print("‚ö†Ô∏è No successful model training detected.")


Mounted at /content/drive
üìÇ Current directory: /content/drive/.shortcut-targets-by-id/1ZxE3MvSNrG1FcVVUeYVW4Y85XcoYeWPd/IMDB Movie Reviews Sentiment Analysis

üîÑ Loading TF-IDF feature matrices...

üîÑ Loading labels...
‚úÖ Label classes: ['negative' 'positive']

üöÄ Training models...

üîπ Training Logistic Regression...
‚úÖ Logistic Regression Accuracy: 0.8918
              precision    recall  f1-score   support

    negative       0.90      0.88      0.89      4961
    positive       0.88      0.91      0.89      5039

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000

Confusion Matrix:
 [[4347  614]
 [ 468 4571]]
------------------------------------------------------------
üîπ Training Support Vector Machine...
‚úÖ Support Vector Machine Accuracy: 0.8948
              precision    recall  f1-score   support

    negative       0.90      0.88      0.89      4961


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

print("\nüîÑ Loading TF-IDF vectorizer...")
try:
    vectorizer = joblib.load('tfidf_vectorizer.pkl')
    print("‚úÖ TF-IDF vectorizer loaded successfully!")
except:
    print("‚ö†Ô∏è Vectorizer file not found. Pastikan file 'tfidf_vectorizer.pkl' tersedia di folder yang sama.")
    raise

best_model_name = "Support Vector Machine"
model = models[best_model_name]

print(f"\nüåü Using best model: {best_model_name}")

def predict_sentiment(text):
    """Prediksi sentimen dari teks input"""
    X_input = vectorizer.transform([text])
    pred = model.predict(X_input)[0]
    sentiment = "üòä Positive" if pred == 1 else "üòû Negative"
    return sentiment

while True:
    review = input("\nüé¨ Masukkan review film (atau ketik 'exit' untuk keluar):\n> ")
    if review.lower() == 'exit':
        print("üëã Bye-bye~")
        break

    result = predict_sentiment(review)
    print(f"üîç Hasil prediksi: {result}")



üîÑ Loading TF-IDF vectorizer...
‚úÖ TF-IDF vectorizer loaded successfully!

üåü Using best model: Support Vector Machine

üé¨ Masukkan review film (atau ketik 'exit' untuk keluar):
> good
üîç Hasil prediksi: üòä Positive

üé¨ Masukkan review film (atau ketik 'exit' untuk keluar):
> nice film
üîç Hasil prediksi: üòä Positive

üé¨ Masukkan review film (atau ketik 'exit' untuk keluar):
> bad film
üîç Hasil prediksi: üòû Negative

üé¨ Masukkan review film (atau ketik 'exit' untuk keluar):
> disgusting
üîç Hasil prediksi: üòû Negative

üé¨ Masukkan review film (atau ketik 'exit' untuk keluar):
> exit
üëã Bye-bye~
