In [6]:
import os
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import accuracy_score, classification_report

# 1. Load và xử lý dữ liệu
df = pd.read_csv("data_traning.1.2.csv")
df['Hour'] = ((df['Time'] // 3600) % 24).astype(int)
df['DayOfWeek'] = ((df['Time'] // (3600 * 24)) % 7).astype(int)
df = df.drop(columns=['Time'])

# 2. Chọn đặc trưng
features = [
    'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
    'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
    'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28',
    'Amount'
    # , 'Hour', 'DayOfWeek'
]
X = df[features]
y = df['Class']


In [None]:

# 3. Chuẩn hóa dữ liệu
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

# 4. Chia dữ liệu
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# 5. Tạo thư mục lưu mô hình
os.makedirs("models", exist_ok=True)

# 6. Hàm đánh giá và lưu mô hình
def evaluate_and_save(model, X_test, y_test, name, is_keras=False):
    if is_keras:
        X_eval = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
        y_pred = (model.predict(X_eval).ravel() > 0.5).astype(int)
    else:
        y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    print(f"\n=== {name.upper()} ===")
    print("Accuracy:", acc)
    print(classification_report(y_test, y_pred, digits=4))

    if is_keras:
        model.save(f"models/{name}.h5")
    else:
        joblib.dump(model, f"models/{name}.pkl")
    print(f"💾 Saved model: {name}")

# 7. Huấn luyện mô hình

# Random Forest
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)
evaluate_and_save(rf, X_test, y_test, "random_forest")

# MLP
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
mlp.fit(X_train, y_train)
evaluate_and_save(mlp, X_test, y_test, "neural_network")

# SVM
svm = SVC(kernel='rbf', probability=True, class_weight='balanced', random_state=42)
svm.fit(X_train, y_train)
evaluate_and_save(svm, X_test, y_test, "svm")

# Decision Tree
dt = DecisionTreeClassifier(class_weight='balanced', random_state=42)
dt.fit(X_train, y_train)
evaluate_and_save(dt, X_test, y_test, "decision_tree")

# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb.fit(X_train, y_train)
evaluate_and_save(xgb, X_test, y_test, "xgboost")

# LSTM
X_train_lstm = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

lstm = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train_lstm.shape[1], 1)),
    LSTM(50),
    Dense(1, activation='sigmoid')
])
lstm.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm.fit(X_train_lstm, y_train, epochs=10, batch_size=64, validation_data=(X_test_lstm, y_test), verbose=2)
evaluate_and_save(lstm, X_test, y_test, "lstm", is_keras=True)



=== RANDOM_FOREST ===
Accuracy: 0.9994889507630493
              precision    recall  f1-score   support

           0     0.9995    1.0000    0.9997     56651
           1     0.9853    0.7053    0.8221        95

    accuracy                         0.9995     56746
   macro avg     0.9924    0.8526    0.9109     56746
weighted avg     0.9995    0.9995    0.9994     56746

💾 Saved model: random_forest

=== NEURAL_NETWORK ===
Accuracy: 0.9994889507630493
              precision    recall  f1-score   support

           0     0.9996    0.9999    0.9997     56651
           1     0.9231    0.7579    0.8324        95

    accuracy                         0.9995     56746
   macro avg     0.9613    0.8789    0.9161     56746
weighted avg     0.9995    0.9995    0.9995     56746

💾 Saved model: neural_network

=== SVM ===
Accuracy: 0.99466041659324
              precision    recall  f1-score   support

           0     0.9995    0.9951    0.9973     56651
           1     0.1959    0.7053

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



=== XGBOOST ===
Accuracy: 0.9992598597257957
              precision    recall  f1-score   support

           0     0.9995    0.9998    0.9996     56651
           1     0.8354    0.6947    0.7586        95

    accuracy                         0.9993     56746
   macro avg     0.9175    0.8473    0.8791     56746
weighted avg     0.9992    0.9993    0.9992     56746

💾 Saved model: xgboost
Epoch 1/10


  super().__init__(**kwargs)


3547/3547 - 39s - 11ms/step - accuracy: 0.9987 - loss: 0.0084 - val_accuracy: 0.9994 - val_loss: 0.0039
Epoch 2/10
3547/3547 - 36s - 10ms/step - accuracy: 0.9993 - loss: 0.0037 - val_accuracy: 0.9994 - val_loss: 0.0040
Epoch 3/10
3547/3547 - 37s - 11ms/step - accuracy: 0.9993 - loss: 0.0034 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 4/10
3547/3547 - 37s - 10ms/step - accuracy: 0.9994 - loss: 0.0034 - val_accuracy: 0.9993 - val_loss: 0.0039
Epoch 5/10
3547/3547 - 38s - 11ms/step - accuracy: 0.9993 - loss: 0.0033 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 6/10
3547/3547 - 37s - 11ms/step - accuracy: 0.9994 - loss: 0.0032 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 7/10
3547/3547 - 38s - 11ms/step - accuracy: 0.9994 - loss: 0.0032 - val_accuracy: 0.9994 - val_loss: 0.0035
Epoch 8/10
3547/3547 - 37s - 10ms/step - accuracy: 0.9994 - loss: 0.0031 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 9/10
3547/3547 - 38s - 11ms/step - accuracy: 0.9994 - loss: 0.0030 - val_accuracy: 0.




=== LSTM ===
Accuracy: 0.9994008388256441
              precision    recall  f1-score   support

           0     0.9995    0.9999    0.9997     56651
           1     0.8961    0.7263    0.8023        95

    accuracy                         0.9994     56746
   macro avg     0.9478    0.8631    0.9010     56746
weighted avg     0.9994    0.9994    0.9994     56746

💾 Saved model: lstm


: 