<a href="https://colab.research.google.com/github/zlfaris/DataMining3/blob/main/TugasDataMining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv("smartphones.csv")
df = df.drop_duplicates()
df = df.dropna(subset=['5G_or_not'])

# isi missing value
num_cols = df.select_dtypes(include=['int64','float64']).columns
cat_cols = df.select_dtypes(include=['object']).columns

df[num_cols] = df[num_cols].fillna(df[num_cols].median())
df[cat_cols] = df[cat_cols].fillna(df[cat_cols].mode().iloc[0])

X = df.drop(columns=['5G_or_not'])
y = df['5G_or_not']

num_features = X.select_dtypes(include=['int64','float64']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
    ]
)

joblib.dump(preprocessor, "preprocessor.pkl")

X_processed = preprocessor.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y
)

# Random Forest
model_rf = RandomForestClassifier(
    n_estimators=300,
    random_state=42
)
model_rf.fit(X_train, y_train)
acc_rf = accuracy_score(y_test, model_rf.predict(X_test))
print("Akurasi RandomForest :", acc_rf)

# Logistic Regression
model_lr = LogisticRegression(max_iter=500)
model_lr.fit(X_train, y_train)
acc_lr = accuracy_score(y_test, model_lr.predict(X_test))
print("Akurasi Logistic Regression :", acc_lr)

# ---- Ensemble Voting ----
ensemble = VotingClassifier(
    estimators=[
        ("rf", model_rf),
        ("lr", model_lr)
    ],
    voting="soft"
)
ensemble.fit(X_train, y_train)
acc_ens = accuracy_score(y_test, ensemble.predict(X_test))
print("Akurasi Ensemble Voting :", acc_ens)

joblib.dump(model_rf, "model_rf.pkl")
joblib.dump(model_lr, "model_lr.pkl")
joblib.dump(ensemble, "model_ensemble.pkl")

print("SEMUA MODEL TELAH DISIMPAN DENGAN AMAN.")


Akurasi RandomForest : 0.9438775510204082
Akurasi Logistic Regression : 0.9387755102040817
Akurasi Ensemble Voting : 0.9540816326530612
SEMUA MODEL TELAH DISIMPAN DENGAN AMAN.
