# Classification

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

# Menghitung metrik evaluasi
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print("Confusion Matrix:")
print(conf_matrix)

# ROC AUC
y_pred_proba = model.predict_proba(X_test)[:,1]
roc_auc = roc_auc_score(y_test, y_pred_proba)
print(f"ROC AUC Score: {roc_auc}")

In [None]:
# Visualisasi Confusion Matrix
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Plot ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.2f})")
plt.plot([0, 1], [0, 1], 'k--')  # Garis diagonal untuk acuan
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.show()

# Regression

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np


# Menghitung metrik evaluasi
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² Score: {r2}")

# Apriori

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Terapkan Apriori untuk menemukan itemset yang sering
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)

# Terapkan association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Tampilkan aturan asosiasi dan metrik evaluasi
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift', 'leverage', 'conviction']])

# Clustering

In [None]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

# Step 1: Train the model
clustering = AgglomerativeClustering(n_clusters=3)
labels = clustering.fit_predict(X)

# Step 2: Evaluate the model using various metrics

# a. Silhouette Score (ranges from -1 to 1, higher is better)
silhouette_avg = silhouette_score(X, labels)
print(f"Silhouette Score: {silhouette_avg}")

# b. Calinski-Harabasz Index (higher is better)
calinski_harabasz = calinski_harabasz_score(X, labels)
print(f"Calinski-Harabasz Index: {calinski_harabasz}")

# c. Davies-Bouldin Index (lower is better)
davies_bouldin = davies_bouldin_score(X, labels)
print(f"Davies-Bouldin Index: {davies_bouldin}")


In [2]:
## DBScan
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import numpy as np

# Step 1: Train the DBSCAN model
dbscan = DBSCAN(eps=0.5, min_samples=5)
labels = dbscan.fit_predict(X)

# Step 2: Filter out the noise points for evaluation (optional, for some metrics)
core_samples_mask = labels != -1  # True for core points, False for noise points

# Step 3: Evaluate the model using various metrics

# a. Silhouette Score (excluding noise points)
if len(np.unique(labels[core_samples_mask])) > 1:  # Silhouette score requires > 1 cluster
    silhouette_avg = silhouette_score(X[core_samples_mask], labels[core_samples_mask])
    print(f"Silhouette Score: {silhouette_avg}")
else:
    print("Silhouette Score: Not applicable, only one cluster or all points are noise.")

# b. Calinski-Harabasz Index (works best without noise)
if len(np.unique(labels[core_samples_mask])) > 1:
    calinski_harabasz = calinski_harabasz_score(X[core_samples_mask], labels[core_samples_mask])
    print(f"Calinski-Harabasz Index: {calinski_harabasz}")
else:
    print("Calinski-Harabasz Index: Not applicable, only one cluster or all points are noise.")

# c. Davies-Bouldin Index (includes noise points)
if len(np.unique(labels)) > 1:  # DB index works with noise but requires more than one cluster
    davies_bouldin = davies_bouldin_score(X[core_samples_mask], labels[core_samples_mask])
    print(f"Davies-Bouldin Index: {davies_bouldin}")
else:
    print("Davies-Bouldin Index: Not applicable, only one cluster or all points are noise.")

In [None]:
import matplotlib.pyplot as plt

# For 2D data
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.title("DBSCAN Clustering")
plt.show()

# Anomaly Detection

In [None]:
#One Class SVM
from sklearn.svm import OneClassSVM
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix
import numpy as np

# Step 1: Train One-Class SVM
# X_train contains only the "inliers" (normal data)
oc_svm = OneClassSVM(kernel='rbf', gamma=0.1, nu=0.1)  # RBF kernel, nu defines the proportion of outliers
oc_svm.fit(X_train)  # Train the model on inliers

# Step 2: Predict
# X_test contains both inliers and outliers (test data)
y_pred = oc_svm.predict(X_test)  # Predict the class of test data
# One-Class SVM predicts:
# - `1` for inliers (normal points)
# - `-1` for outliers

# Step 3: Evaluation
# For labeled test data, true labels (1 for inliers, -1 for outliers)
# y_true = np.array([...])  # Ground truth (1 for inliers, -1 for outliers)

# a. Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy}")

# b. Precision (for detecting outliers)
precision = precision_score(y_true, y_pred, pos_label=-1)  # Pos_label=-1 because we are interested in outliers
print(f"Precision: {precision}")

# c. Recall (for detecting outliers)
recall = recall_score(y_true, y_pred, pos_label=-1)  # Pos_label=-1 for outliers
print(f"Recall: {recall}")

# d. ROC AUC Score
roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC AUC Score: {roc_auc}")

# e. Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print(f"Confusion Matrix:\n{conf_matrix}")

In [None]:
#Isolation Forest
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix
import numpy as np

# Step 1: Train Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)
anomalies = iso_forest.fit_predict(X)

# Step 2: Evaluate the Model
# Assuming we have ground truth labels (1 for inliers, -1 for outliers)
# y_true = np.array([...])  # Ground truth labels, where 1 = inlier and -1 = outlier

# a. Accuracy
accuracy = accuracy_score(y_true, anomalies)
print(f"Accuracy: {accuracy}")

# b. Precision (for detecting outliers)
precision = precision_score(y_true, anomalies, pos_label=-1)  # Outliers are -1
print(f"Precision: {precision}")

# c. Recall (for detecting outliers)
recall = recall_score(y_true, anomalies, pos_label=-1)  # Outliers are -1
print(f"Recall: {recall}")

# d. ROC AUC Score
roc_auc = roc_auc_score(y_true, anomalies)
print(f"ROC AUC Score: {roc_auc}")

# e. Confusion Matrix
conf_matrix = confusion_matrix(y_true, anomalies)
print(f"Confusion Matrix:\n{conf_matrix}")

# Dimensionality Reduction

In [None]:
#PCA
from sklearn.decomposition import PCA
import numpy as np

# Step 1: Train PCA
pca = PCA(n_components=2)  # Reduce to 2 dimensions
X_reduced = pca.fit_transform(X)  # Fit PCA and reduce the dimensionality of X

# Step 2: Evaluation

# a. Explained Variance Ratio
explained_variance = pca.explained_variance_ratio_
print(f"Explained Variance Ratio: {explained_variance}")

# b. Total Variance Explained (cumulative sum)
cumulative_explained_variance = np.cumsum(explained_variance)
print(f"Cumulative Explained Variance: {cumulative_explained_variance}")

# c. Reconstruction Error (Optional)
# To check how well the reduced data reconstructs the original data
X_reconstructed = pca.inverse_transform(X_reduced)
reconstruction_error = np.mean((X - X_reconstructed) ** 2)
print(f"Reconstruction Error: {reconstruction_error}")
