**TrainingWiDS2021**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, confusion_matrix
import uuid

import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/kaggle_data/Data_other/TrainingWiDS2021.csv'
df = pd.read_csv(file_path)
df.head()


# Đọc dữ liệu
data = pd.read_csv(file_path)

# Các cột quan trọng
key_features = [
    'age', 'bmi', 'pre_icu_los_days', 'elective_surgery', 'apache_post_operative',
    'map_apache', 'heart_rate_apache', 'resprate_apache', 'temp_apache',
    'd1_spo2_max', 'd1_spo2_min', 'd1_heartrate_max', 'd1_heartrate_min',
    'd1_resprate_max', 'd1_resprate_min', 'd1_mbp_max', 'd1_mbp_min',
    'h1_spo2_max', 'h1_spo2_min', 'h1_mbp_max', 'h1_mbp_min',
    'bun_apache', 'creatinine_apache', 'albumin_apache',
    'd1_bun_max', 'd1_bun_min', 'd1_creatinine_max', 'd1_creatinine_min',
    'd1_albumin_max', 'd1_albumin_min', 'd1_lactate_max', 'd1_lactate_min',
    'd1_hco3_max', 'd1_hco3_min',
    'ph_apache', 'pao2_apache', 'fio2_apache', 'paco2_apache',
    'd1_arterial_ph_max', 'd1_arterial_ph_min', 'd1_arterial_po2_max', 'd1_arterial_po2_min',
    'd1_pao2fio2ratio_max', 'd1_pao2fio2ratio_min', 'd1_arterial_pco2_max', 'd1_arterial_pco2_min',
    'gcs_eyes_apache', 'gcs_motor_apache', 'gcs_verbal_apache', 'gcs_unable_apache',
    'intubated_apache', 'ventilated_apache', 'urineoutput_apache',
    'aids', 'cirrhosis', 'hepatic_failure', 'immunosuppression', 'leukemia',
    'lymphoma', 'solid_tumor_with_metastasis', 'diabetes_mellitus',
    'apache_2_diagnosis', 'apache_3j_diagnosis', 'arf_apache'
]

# Feature engineering
data['gcs_total'] = data['gcs_eyes_apache'] + data['gcs_motor_apache'] + data['gcs_verbal_apache']
data['pao2_fio2_ratio'] = data['pao2_apache'] / data['fio2_apache'].where(data['fio2_apache'] > 0)
key_features += ['gcs_total', 'pao2_fio2_ratio']

# Định nghĩa nhãn
STABLE = 0
MILD = 1
MODERATE = 2
SEVERE = 3
CRITICAL = 4

# Labeling Functions
def gcs_lf(row):
    if pd.isna(row['gcs_total']):
        return -1
    if row['gcs_total'] < 8 or row['gcs_unable_apache'] == 1:
        return CRITICAL
    elif row['gcs_total'] <= 10:
        return SEVERE
    elif row['gcs_total'] <= 13:
        return MODERATE
    else:
        return STABLE

def map_lf(row):
    if pd.isna(row['map_apache']):
        return -1
    if row['map_apache'] < 65:
        return SEVERE
    elif row['map_apache'] <= 70:
        return MODERATE
    elif row['map_apache'] > 100:
        return MILD
    else:
        return STABLE

def ventilation_lf(row):
    if pd.isna(row['ventilated_apache']) or pd.isna(row['intubated_apache']):
        return -1
    if row['intubated_apache'] == 1:
        return CRITICAL
    elif row['ventilated_apache'] == 1:
        return SEVERE
    else:
        return STABLE

def kidney_lf(row):
    if pd.isna(row['bun_apache']) or pd.isna(row['creatinine_apache']):
        return -1
    if row['bun_apache'] > 50 or row['creatinine_apache'] > 3 or row['arf_apache'] == 1:
        return SEVERE
    elif row['bun_apache'] > 20 or row['creatinine_apache'] > 1.5:
        return MODERATE
    else:
        return STABLE

def abg_lf(row):
    if pd.isna(row['ph_apache']) or pd.isna(row['pao2_fio2_ratio']):
        return -1
    if row['ph_apache'] < 7.2 or row['pao2_fio2_ratio'] < 200:
        return CRITICAL
    elif row['pao2_fio2_ratio'] < 300:
        return SEVERE
    else:
        return STABLE

def lactate_lf(row):
    if pd.isna(row['d1_lactate_max']):
        return -1
    if row['d1_lactate_max'] > 4:
        return CRITICAL
    elif row['d1_lactate_max'] > 2:
        return SEVERE
    else:
        return STABLE

# Tạo nhãn bằng weighted voting
def create_labels(data):
    labels = []
    weights = {
        'gcs': 0.35,
        'map': 0.2,
        'ventilation': 0.2,
        'kidney': 0.1,
        'abg': 0.1,
        'lactate': 0.05
    }
    for _, row in data.iterrows():
        lf_scores = [
            gcs_lf(row),
            map_lf(row),
            ventilation_lf(row),
            kidney_lf(row),
            abg_lf(row),
            lactate_lf(row)
        ]
        valid_scores = [s for s in lf_scores if s != -1]
        if not valid_scores:
            labels.append(np.nan)
            continue
        weighted_score = (
            (lf_scores[0] * weights['gcs'] if lf_scores[0] != -1 else 0) +
            (lf_scores[1] * weights['map'] if lf_scores[1] != -1 else 0) +
            (lf_scores[2] * weights['ventilation'] if lf_scores[2] != -1 else 0) +
            (lf_scores[3] * weights['kidney'] if lf_scores[3] != -1 else 0) +
            (lf_scores[4] * weights['abg'] if lf_scores[4] != -1 else 0) +
            (lf_scores[5] * weights['lactate'] if lf_scores[5] != -1 else 0)
        ) / sum([weights[k] for k, s in zip(weights.keys(), lf_scores) if s != -1])
        labels.append(round(weighted_score))
    return labels

# Thêm nhãn vào dữ liệu
data['severity_label'] = create_labels(data)

# Loại bỏ mẫu không có nhãn
data = data.dropna(subset=['severity_label'])

# Xử lý dữ liệu
X = data[key_features]
y = data['severity_label']

# Điền giá trị thiếu
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_imputed = pd.DataFrame(X_imputed, columns=X.columns)

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Chia dữ liệu
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Tinh chỉnh XGBoost với GridSearchCV
param_grid = {
    'max_depth': [4, 6, 8],
    'learning_rate': [0.05, 0.1],
    'n_estimators': [100, 200]
}
xgb_model = xgb.XGBClassifier(objective='multi:softprob', num_class=5, random_state=42)
grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Mô hình tốt nhất
best_model = grid_search.best_estimator_

# Dự đoán và đánh giá
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Mean Absolute Error: {mae}")
print("Confusion Matrix:")
print(conf_matrix)

# Lưu mô hình
best_model.save_model('severity_model_weighted_voting.json')

# Tìm các cột có NaN
cols_with_nan = data.columns[data.isna().any()].tolist()

# In ra danh sách cột có NaN
print("Các cột có NaN:", cols_with_nan)

# Chỉ xử lý các cột dạng số
for col in cols_with_nan:
    if data[col].dtype in ['float64', 'int64']:
        data[col].fillna(data[col].mean(), inplace=True)
print(data.info())
print(data.head())
output_path = '/content/drive/MyDrive/TrainingWiDS2021_filled.csv'
data.to_csv(output_path, index=False)

Best Parameters: {'learning_rate': 0.1, 'max_depth': 8, 'n_estimators': 200}
Mean Absolute Error: 0.00484019668100799
Confusion Matrix:
[[ 6981    10     0     0     0]
 [   12 10549    23     0     0]
 [    0    27  4432    14     0]
 [    0     0    23  3644     1]
 [    0     0     0    16   300]]


**diabetes**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, confusion_matrix
import uuid

# Đọc dữ liệu
data = pd.read_csv('/content/drive/MyDrive/kaggle_data/Data_other/diabetes.csv')

# Các cột quan trọng
key_features = [
    'chol', 'stab.glu', 'hdl', 'ratio', 'glyhb', 'age', 'height', 'weight',
    'bp.1s', 'bp.1d', 'bp.2s', 'bp.2d', 'waist', 'hip', 'time.ppn'
]

# Feature engineering
# Tính BMI: weight (lbs) / (height (in))^2 * 703
data['bmi'] = (data['weight'] / (data['height'] ** 2)) * 703
key_features.append('bmi')

# Chuyển đổi gender thành số
data['gender'] = data['gender'].map({'male': 1, 'female': 0})
key_features.append('gender')

# Chuyển đổi location thành số
data['location'] = data['location'].map({'Buckingham': 0, 'Louisa': 1})
key_features.append('location')

# Chuyển đổi frame thành số
data['frame'] = data['frame'].map({'small': 0, 'medium': 1, 'large': 2})
key_features.append('frame')

# Định nghĩa nhãn
NORMAL = 0
PREDIABETES = 1
DIABETES = 2
SEVERE_DIABETES = 3

# Labeling Functions
def glyhb_lf(row):
    if pd.isna(row['glyhb']):
        return -1
    if row['glyhb'] >= 7.0:
        return SEVERE_DIABETES
    elif row['glyhb'] >= 5.7:
        return DIABETES
    elif row['glyhb'] >= 5.0:
        return PREDIABETES
    else:
        return NORMAL

def stab_glu_lf(row):
    if pd.isna(row['stab.glu']):
        return -1
    if row['stab.glu'] >= 200:
        return SEVERE_DIABETES
    elif row['stab.glu'] >= 140:
        return DIABETES
    elif row['stab.glu'] >= 100:
        return PREDIABETES
    else:
        return NORMAL

def bmi_lf(row):
    if pd.isna(row['bmi']):
        return -1
    if row['bmi'] >= 35:
        return SEVERE_DIABETES
    elif row['bmi'] >= 30:
        return DIABETES
    elif row['bmi'] >= 25:
        return PREDIABETES
    else:
        return NORMAL

def chol_lf(row):
    if pd.isna(row['chol']):
        return -1
    if row['chol'] >= 240:
        return DIABETES
    elif row['chol'] >= 200:
        return PREDIABETES
    else:
        return NORMAL

def ratio_lf(row):
    if pd.isna(row['ratio']):
        return -1
    if row['ratio'] >= 6.0:
        return DIABETES
    elif row['ratio'] >= 5.0:
        return PREDIABETES
    else:
        return NORMAL

# Tạo nhãn bằng weighted voting
def create_labels(data):
    labels = []
    weights = {
        'glyhb': 0.4,
        'stab_glu': 0.3,
        'bmi': 0.15,
        'chol': 0.1,
        'ratio': 0.05
    }
    for _, row in data.iterrows():
        lf_scores = [
            glyhb_lf(row),
            stab_glu_lf(row),
            bmi_lf(row),
            chol_lf(row),
            ratio_lf(row)
        ]
        valid_scores = [s for s in lf_scores if s != -1]
        if not valid_scores:
            labels.append(np.nan)
            continue
        weighted_score = (
            (lf_scores[0] * weights['glyhb'] if lf_scores[0] != -1 else 0) +
            (lf_scores[1] * weights['stab_glu'] if lf_scores[1] != -1 else 0) +
            (lf_scores[2] * weights['bmi'] if lf_scores[2] != -1 else 0) +
            (lf_scores[3] * weights['chol'] if lf_scores[3] != -1 else 0) +
            (lf_scores[4] * weights['ratio'] if lf_scores[4] != -1 else 0)
        ) / sum([weights[k] for k, s in zip(weights.keys(), lf_scores) if s != -1])
        labels.append(round(weighted_score))
    return labels

# Thêm nhãn vào dữ liệu
data['severity_label'] = create_labels(data)

# Loại bỏ mẫu không có nhãn
data = data.dropna(subset=['severity_label'])

# Xử lý dữ liệu
X = data[key_features]
y = data['severity_label']

# Điền giá trị thiếu
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_imputed = pd.DataFrame(X_imputed, columns=X.columns)

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Chia dữ liệu
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Tinh chỉnh XGBoost với GridSearchCV
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.05, 0.1],
    'n_estimators': [100, 200]
}
xgb_model = xgb.XGBClassifier(objective='multi:softprob', num_class=4, random_state=42)
grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Mô hình tốt nhất
best_model = grid_search.best_estimator_

# Dự đoán và đánh giá
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Mean Absolute Error: {mae}")
print("Confusion Matrix:")
print(conf_matrix)

# Lưu mô hình
best_model.save_model('diabetes_severity_model.json')

# Tìm các cột có NaN
cols_with_nan = data.columns[data.isna().any()].tolist()

# In ra danh sách cột có NaN
print("Các cột có NaN:", cols_with_nan)

# Chỉ xử lý các cột dạng số
for col in cols_with_nan:
    if data[col].dtype in ['float64', 'int64']:
        data[col].fillna(data[col].mean(), inplace=True)
print(data.info())
print(data.head())
output_path = '/content/drive/MyDrive/diabetes_filled.csv'
data.to_csv(output_path, index=False)

Best Parameters: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 100}
Mean Absolute Error: 0.07407407407407407
Confusion Matrix:
[[37  1  0  0]
 [ 3 22  0  0]
 [ 0  1 13  0]
 [ 0  0  1  3]]
Các cột có NaN: ['chol', 'hdl', 'ratio', 'glyhb', 'height', 'weight', 'frame', 'bp.1s', 'bp.1d', 'bp.2s', 'bp.2d', 'waist', 'hip', 'time.ppn', 'bmi']
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 403 entries, 0 to 402
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              403 non-null    int64  
 1   chol            403 non-null    float64
 2   stab.glu        403 non-null    int64  
 3   hdl             403 non-null    float64
 4   ratio           403 non-null    float64
 5   glyhb           403 non-null    float64
 6   location        403 non-null    int64  
 7   age             403 non-null    int64  
 8   gender          403 non-null    int64  
 9   height          403 non-null    float64
 10  wei

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mean(), inplace=True)


**diabetes_prediction_dataset**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, confusion_matrix
import uuid

# Đọc dữ liệu
data = pd.read_csv('/content/drive/MyDrive/kaggle_data/Data_other/diabetes_prediction_dataset.csv')

# Các cột đặc trưng
key_features = [
    'age', 'bmi', 'HbA1c_level', 'blood_glucose_level',
    'hypertension', 'heart_disease'
]

# Feature engineering
# Mã hóa gender
data['gender'] = data['gender'].map({'Male': 1, 'Female': 0, 'Other': 2})
key_features.append('gender')

# Mã hóa smoking_history
smoking_mapping = {
    'never': 0, 'No Info': 1, 'former': 2, 'current': 3,
    'not current': 4, 'ever': 5
}
data['smoking_history'] = data['smoking_history'].map(smoking_mapping)
key_features.append('smoking_history')

# Định nghĩa nhãn mức độ
NORMAL = 0
PREDIABETES = 1
DIABETES = 2
SEVERE_DIABETES = 3

# Labeling Functions
def hba1c_lf(row):
    if pd.isna(row['HbA1c_level']):
        return -1
    if row['HbA1c_level'] >= 7.0:
        return SEVERE_DIABETES
    elif row['HbA1c_level'] >= 6.5:
        return DIABETES
    elif row['HbA1c_level'] >= 5.7:
        return PREDIABETES
    else:
        return NORMAL

def glucose_lf(row):
    if pd.isna(row['blood_glucose_level']):
        return -1
    if row['blood_glucose_level'] >= 200:
        return SEVERE_DIABETES
    elif row['blood_glucose_level'] >= 140:
        return DIABETES
    elif row['blood_glucose_level'] >= 100:
        return PREDIABETES
    else:
        return NORMAL

def bmi_lf(row):
    if pd.isna(row['bmi']):
        return -1
    if row['bmi'] >= 35:
        return SEVERE_DIABETES
    elif row['bmi'] >= 30:
        return DIABETES
    elif row['bmi'] >= 25:
        return PREDIABETES
    else:
        return NORMAL

# Tạo nhãn bằng weighted voting
def create_labels(data):
    labels = []
    weights = {
        'hba1c': 0.5,
        'glucose': 0.3,
        'bmi': 0.2
    }
    for _, row in data.iterrows():
        lf_scores = [
            hba1c_lf(row),
            glucose_lf(row),
            bmi_lf(row)
        ]
        valid_scores = [s for s in lf_scores if s != -1]
        if not valid_scores:
            labels.append(np.nan)
            continue
        weighted_score = (
            (lf_scores[0] * weights['hba1c'] if lf_scores[0] != -1 else 0) +
            (lf_scores[1] * weights['glucose'] if lf_scores[1] != -1 else 0) +
            (lf_scores[2] * weights['bmi'] if lf_scores[2] != -1 else 0)
        ) / sum([weights[k] for k, s in zip(weights.keys(), lf_scores) if s != -1])
        labels.append(round(weighted_score))
    return labels

# Thêm nhãn vào dữ liệu
data['severity_label'] = create_labels(data)

# Loại bỏ mẫu không có nhãn
data = data.dropna(subset=['severity_label'])

# Xử lý dữ liệu
X = data[key_features]
y = data['severity_label']

# Điền giá trị thiếu
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_imputed = pd.DataFrame(X_imputed, columns=X.columns)

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Chia dữ liệu
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Tinh chỉnh XGBoost với GridSearchCV
param_grid = {
    'max_depth': [3, 5],
    'learning_rate': [0.05, 0.1],
    'n_estimators': [100, 200]
}
xgb_model = xgb.XGBClassifier(objective='multi:softprob', num_class=4, random_state=42)
grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Mô hình tốt nhất
best_model = grid_search.best_estimator_

# Dự đoán và đánh giá
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Mean Absolute Error: {mae}")
print("Confusion Matrix:")
print(conf_matrix)

# Lưu mô hình
best_model.save_model('diabetes_prediction_dataset_severity_model.json')
# Tìm các cột có NaN
cols_with_nan = data.columns[data.isna().any()].tolist()

# In ra danh sách cột có NaN
print("Các cột có NaN:", cols_with_nan)

# Chỉ xử lý các cột dạng số
for col in cols_with_nan:
    if data[col].dtype in ['float64', 'int64']:
        data[col].fillna(data[col].mean(), inplace=True)
print(data.info())
print(data.head())
output_path = '/content/drive/MyDrive/diabetes_prediction_dataset_filled.csv'
data.to_csv(output_path, index=False)

Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 100}
Mean Absolute Error: 0.0012
Confusion Matrix:
[[ 3517     2     0     0]
 [    5 11606     6     0]
 [    0    11  4516     0]
 [    0     0     0   337]]
Các cột có NaN: []
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   gender               100000 non-null  int64  
 1   age                  100000 non-null  float64
 2   hypertension         100000 non-null  int64  
 3   heart_disease        100000 non-null  int64  
 4   smoking_history      100000 non-null  int64  
 5   bmi                  100000 non-null  float64
 6   HbA1c_level          100000 non-null  float64
 7   blood_glucose_level  100000 non-null  int64  
 8   diabetes             100000 non-null  int64  
 9   severity_label       100000 non-null  int64  
dtypes: float64(3), int64(

**Training**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.1-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.1-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.1 colorlog-6.9.0 optuna-4.3.0


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, precision_score, recall_score, cohen_kappa_score
from sklearn.feature_selection import SelectKBest, f_classif, VarianceThreshold
from imblearn.over_sampling import SMOTE
import optuna
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
from torch.utils.data import DataLoader, TensorDataset
import os

# Định nghĩa mô hình
class AdvancedMLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_classes, dropout_rate=0.3):
        super(AdvancedMLP, self).__init__()
        layers = []
        prev_size = input_size
        for hidden_size in hidden_sizes:
            layers.extend([
                nn.Linear(prev_size, hidden_size),
                nn.BatchNorm1d(hidden_size),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            ])
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, num_classes))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, alpha=None):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha if alpha is not None else torch.FloatTensor([1.7, 2.0, 1.0, 1.0, 1.0])

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none', weight=self.alpha)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss
        return focal_loss.mean()

# Hàm căn chỉnh cột
def align_columns(df_small, df_large, target_col):
    large_cols = [col for col in df_large.columns if col != target_col]
    small_cols = [col for col in df_small.columns if col != target_col]
    missing_cols = [col for col in large_cols if col not in small_cols]

    missing_data = {}
    for col in missing_cols:
        if df_large[col].dtype in ['float64', 'int64']:
            missing_data[col] = [df_large[col].median()] * len(df_small)
        else:
            missing_data[col] = [df_large[col].mode()[0]] * len(df_small)

    missing_df = pd.DataFrame(missing_data, index=df_small.index)
    df_small = pd.concat([df_small, missing_df], axis=1)
    df_small = df_small[[col for col in df_large.columns if col != target_col] + [target_col]]
    return df_small.copy()

# Hàm thêm mẫu lớp 4 giả
def add_synthetic_class_4(df_small, df_large, target_col, n_samples=100):
    class_4_samples = df_large[df_large[target_col] == 4].sample(n_samples, replace=True, random_state=42)
    return pd.concat([df_small, class_4_samples], ignore_index=True)

# Hàm tiền xử lý dữ liệu
def preprocess_clinical_data(df, target_col, preprocessor=None, apply_smote=False, selector=None, expected_features=None, dataset_name=""):
    for col in df.columns:
        if col != target_col and (df[col].dtype == 'object' or df[col].dtype.name == 'category'):
            df[col] = df[col].astype(str)

    df[target_col] = pd.to_numeric(df[target_col], errors='coerce').fillna(-1).astype(int)

    X = df.drop(columns=[target_col])
    y = df[target_col].values

    if preprocessor is None:
        numeric_cols = X.select_dtypes(include=['float64', 'int64']).columns
        categorical_cols = X.select_dtypes(include=['object', 'category']).columns
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', RobustScaler())
        ])
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
        ])
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_cols),
                ('cat', categorical_transformer, categorical_cols)
            ])
        X_processed = preprocessor.fit_transform(X)
    else:
        X_processed = preprocessor.transform(X)

    variance_threshold = VarianceThreshold(threshold=0.0)
    X_processed = variance_threshold.fit_transform(X_processed)

    if selector is None or expected_features is not None:
        selector = SelectKBest(f_classif, k=min(20, X_processed.shape[1]))
        X_processed = selector.fit_transform(X_processed, y)
    else:
        current_features = X_processed.shape[1]
        if current_features != expected_features:
            if current_features < expected_features:
                padding = np.zeros((X_processed.shape[0], expected_features - current_features))
                X_processed = np.hstack((X_processed, padding))
            elif current_features > expected_features:
                X_processed = X_processed[:, :expected_features]
        X_processed = selector.transform(X_processed)

    print(f"Number of features for {dataset_name}: {X_processed.shape[1]}")

    if apply_smote and len(np.unique(y)) > 1 and min(Counter(y).values()) > 1:
        smote = SMOTE(random_state=42, k_neighbors=1)
        X_processed, y = smote.fit_resample(X_processed, y)
        print(f"Class distribution after SMOTE for {dataset_name}: {Counter(y)}")

    X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
    X_train = torch.FloatTensor(X_train)
    X_test = torch.FloatTensor(X_test)
    y_train = torch.LongTensor(y_train)
    y_test = torch.LongTensor(y_test)

    # Lưu tập test
    np.save(f'/content/drive/MyDrive/X_test_{dataset_name}.npy', X_test.numpy())
    np.save(f'/content/drive/MyDrive/y_test_{dataset_name}.npy', y_test.numpy())
    print(f"Test set for {dataset_name} saved to /content/drive/MyDrive/X_test_{dataset_name}.npy and /content/drive/MyDrive/y_test_{dataset_name}.npy")

    return X_train, X_test, y_train, y_test, X_processed.shape[1], 5, preprocessor, selector

# Hàm huấn luyện
def train_model(model, X_train, y_train, X_val, y_val, num_epochs=10, learning_rate=0.001, patience=10, checkpoint_path='checkpoint.pth', load_checkpoint=False):
    dataset = TensorDataset(X_train, y_train)
    loader = DataLoader(dataset, batch_size=32, shuffle=True)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)
    best_qwk = -float('inf')
    epochs_no_improve = 0
    best_model_state = None

    initial_alpha = torch.FloatTensor([1.7, 2.0, 1.0, 1.0, 1.0])
    focal_loss = FocalLoss(gamma=2.0, alpha=initial_alpha)

    start_epoch = 0
    if load_checkpoint and os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, weights_only=False)
        try:
            model.load_state_dict(checkpoint['model_state_dict'], strict=False)
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            start_epoch = checkpoint['epoch'] + 1
            best_qwk = checkpoint['best_qwk']
            epochs_no_improve = checkpoint['epochs_no_improve']
            focal_loss.alpha = checkpoint['focal_alpha']
            print(f"Resuming from epoch {start_epoch}")
        except RuntimeError as e:
            print(f"Checkpoint mismatch: {e}")
            start_epoch = 0

    for epoch in range(start_epoch, num_epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        total = 0
        train_predictions = []
        train_true_labels = []

        for X_batch, y_batch in loader:
            outputs = model(X_batch)
            loss = focal_loss(outputs, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += y_batch.size(0)
            train_correct += (predicted == y_batch).sum().item()
            train_predictions.extend(predicted.cpu().numpy())
            train_true_labels.extend(y_batch.cpu().numpy())

        # Tính các chỉ số đánh giá trên tập train
        train_qwk = cohen_kappa_score(train_true_labels, train_predictions, labels=[0, 1, 2, 3, 4], weights='quadratic')
        train_accuracy = accuracy_score(train_true_labels, train_predictions)
        train_f1 = f1_score(train_true_labels, train_predictions, average=None, labels=[0, 1, 2, 3, 4], zero_division=0)
        train_sensitivity = recall_score(train_true_labels, train_predictions, average=None, labels=[0, 1, 2, 3, 4], zero_division=0)
        train_cm = confusion_matrix(train_true_labels, train_predictions, labels=[0, 1, 2, 3, 4])
        train_specificity = []
        for cls in range(5):
            tn = np.sum(train_cm) - np.sum(train_cm[cls, :]) - np.sum(train_cm[:, cls]) + train_cm[cls, cls]
            fp = np.sum(train_cm[:, cls]) - train_cm[cls, cls]
            specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
            train_specificity.append(specificity)
        train_precision = precision_score(train_true_labels, train_predictions, average=None, labels=[0, 1, 2, 3, 4], zero_division=0)

        # In các chỉ số trên tập train
        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"  Train QWK: {train_qwk:.4f}")
        print(f"  Train Accuracy: {train_accuracy:.4f}")
        print(f"  Train F1-score: {[f'{f1:.4f}' for f1 in train_f1]}")
        print(f"  Train Sensitivity: {[f'{sens:.4f}' for sens in train_sensitivity]}")
        print(f"  Train Specificity: {[f'{spec:.4f}' for spec in train_specificity]}")
        print(f"  Train Precision: {[f'{prec:.4f}' for prec in train_precision]}")

        # Đánh giá trên tập validation
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = focal_loss(val_outputs, y_val)
            _, val_predicted = torch.max(val_outputs, 1)
            val_qwk = cohen_kappa_score(y_val.numpy(), val_predicted.numpy(), labels=[0, 1, 2, 3, 4], weights='quadratic')
            val_accuracy = accuracy_score(y_val.numpy(), val_predicted.numpy())

            cm = confusion_matrix(y_val.numpy(), val_predicted.numpy(), labels=[0, 1, 2, 3, 4])
            recalls = [cm[i, i] / cm[i].sum() if cm[i].sum() > 0 else 0.01 for i in range(5)]
            class_weights = [1.0 / max(r, 0.01) for r in recalls]
            class_weights = torch.FloatTensor(class_weights) / sum(class_weights) * 5.0
            focal_loss.alpha = class_weights * initial_alpha
            print(f"  Val Accuracy: {val_accuracy:.4f}, Val QWK: {val_qwk:.4f}, Class Weights: {class_weights.tolist()}")

        scheduler.step(val_qwk)
        if val_qwk > best_qwk:
            best_qwk = val_qwk
            best_model_state = model.state_dict()
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if (epoch + 1) % 10 == 0 or epoch == num_epochs - 1:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_qwk': best_qwk,
                'epochs_no_improve': epochs_no_improve,
                'focal_alpha': focal_loss.alpha
            }, checkpoint_path)
            print(f"Checkpoint saved at epoch {epoch+1}")

        if epochs_no_improve >= patience:
            print("Early stopping triggered")
            break

    model.load_state_dict(best_model_state)
    return model, focal_loss, best_qwk

# Hàm fine-tuning
def fine_tune_model(model, X_fine, y_fine, hidden_sizes, input_size, num_epochs=20, learning_rate=0.00005, n_splits=3, checkpoint_path='fine_tune_checkpoint.pth'):
    dataset = TensorDataset(X_fine, y_fine)
    loader = DataLoader(dataset, batch_size=32, shuffle=True)
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    fine_tuned_models = []
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_fine)):
        print(f"\nFine-tuning fold {fold+1}/{n_splits}...")
        X_train_fold = X_fine[train_idx]
        y_train_fold = y_fine[train_idx]
        X_val_fold = X_fine[val_idx]
        y_val_fold = y_fine[val_idx]
        model_fold = AdvancedMLP(input_size, hidden_sizes, 5, dropout_rate=model.network[3].p)
        model_fold.load_state_dict(model.state_dict())
        model_fold, _, _ = train_model(
            model_fold, X_train_fold, y_train_fold, X_val_fold, y_val_fold,
            num_epochs=num_epochs, learning_rate=learning_rate, patience=10,
            checkpoint_path=f'{checkpoint_path}_fold{fold+1}', load_checkpoint=False
        )
        fine_tuned_models.append(model_fold)
    return fine_tuned_models

# Hàm fine-tuning trên nhiều tập dữ liệu nhỏ
def fine_tune_multiple_small_datasets(model, small_data_paths, target_col, hidden_sizes, input_size, preprocessor, selector):
    fine_tuned_models = []
    df_large = pd.read_csv('/content/drive/MyDrive/TrainingWiDS2021_filled.csv')
    expected_features = input_size
    for i, small_data_path in enumerate(small_data_paths):
        dataset_name = f"small_dataset_{i+1}"
        print(f"\nProcessing small dataset {i+1}: {small_data_path}")
        df_small = pd.read_csv(small_data_path)
        df_small = align_columns(df_small, df_large, target_col)
        df_small = add_synthetic_class_4(df_small, df_large, target_col, n_samples=100)
        X_fine, X_test_fine, y_fine, y_test_fine, fine_input_size, _, _, _ = preprocess_clinical_data(
            df_small, target_col, preprocessor=preprocessor, selector=None, apply_smote=True,
            expected_features=expected_features, dataset_name=dataset_name
        )
        print(f"Fine-tuning on small dataset {i+1} with input size {fine_input_size}...")
        models = fine_tune_model(
            model, X_fine, y_fine, hidden_sizes, input_size,
            num_epochs=20, learning_rate=0.00005, n_splits=3,
            checkpoint_path=f'fine_tune_checkpoint_small{i+1}'
        )
        fine_tuned_models.extend(models)
        for j, model_fold in enumerate(models):
            torch.save({
                'model_state_dict': model_fold.state_dict(),
                'input_size': input_size,
                'hidden_sizes': hidden_sizes,
                'num_classes': 5,
                'dropout_rate': model_fold.network[3].p,
                'focal_gamma': 2.0,
                'focal_alpha': None
            }, f'/content/drive/MyDrive/fine_tuned_model_small{i+1}_fold{j+1}.pth')
    return fine_tuned_models

# Hàm tính Quadratic Weighted Kappa
def quadratic_weighted_kappa(y_true, y_pred, num_classes=5):
    conf_mat = np.zeros((num_classes, num_classes))
    for t, p in zip(y_true, y_pred):
        conf_mat[t, p] += 1
    w = np.zeros((num_classes, num_classes))
    for i in range(num_classes):
        for j in range(num_classes):
            w[i, j] = ((i - j) ** 2) / ((num_classes - 1) ** 2)
    act_hist = np.histogram(y_true, bins=num_classes, range=(0, num_classes))[0]
    pred_hist = np.histogram(y_pred, bins=num_classes, range=(0, num_classes))[0]
    E = np.outer(act_hist, pred_hist) / np.sum(act_hist)
    conf_mat = conf_mat / conf_mat.sum()
    num = np.sum(w * conf_mat)
    den = np.sum(w * E)
    return 1 - num / den if den != 0 else 0

# Hàm đánh giá với trọng số ensemble
def evaluate_model(models, X_test, y_test, accuracy_threshold=0.85, dataset_name="", model_set_name=""):
    model_weights = []
    predictions = []
    for model in models:
        model.eval()
        with torch.no_grad():
            outputs = model(X_test)
            _, predicted = torch.max(outputs, 1)
            acc = accuracy_score(y_test.numpy(), predicted.numpy())
            cm = confusion_matrix(y_test.numpy(), predicted.numpy(), labels=[0, 1, 2, 3, 4])
            weight = acc * (0.3 * cm[0, 0] / cm[0].sum() + 0.7 * cm[1, 1] / cm[1].sum() if cm[0].sum() > 0 and cm[1].sum() > 0 else 0.01)
            model_weights.append(weight)
            predictions.append(predicted.numpy())

    model_weights = np.array(model_weights) / np.sum(model_weights)
    predictions = np.array(predictions)
    ensemble_preds = np.apply_along_axis(
        lambda x: np.bincount(x, weights=model_weights).argmax(),
        axis=0,
        arr=predictions
    )

    # Tính các chỉ số đánh giá trên tập test
    y_true_test = y_test.numpy()
    y_pred_classes_test = ensemble_preds

    # Quadratic Weighted Kappa (QWK)
    qwk_test = cohen_kappa_score(y_true_test, y_pred_classes_test, labels=[0, 1, 2, 3, 4], weights='quadratic')

    # Độ chính xác tổng thể (Accuracy)
    accuracy = accuracy_score(y_true_test, y_pred_classes_test)

    # F1-score và độ nhạy (Sensitivity/Recall) cho từng lớp
    f1_test = f1_score(y_true_test, y_pred_classes_test, average=None, labels=[0, 1, 2, 3, 4], zero_division=0)
    sensitivity_test = recall_score(y_true_test, y_pred_classes_test, average=None, labels=[0, 1, 2, 3, 4], zero_division=0)

    # Độ đặc hiệu (Specificity) cho từng lớp
    cm = confusion_matrix(y_true_test, y_pred_classes_test, labels=[0, 1, 2, 3, 4])
    specificity_test = []
    for cls in range(5):
        tn = np.sum(cm) - np.sum(cm[cls, :]) - np.sum(cm[:, cls]) + cm[cls, cls]
        fp = np.sum(cm[:, cls]) - cm[cls, cls]
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        specificity_test.append(specificity)

    # Độ chính xác (Precision) cho từng lớp
    precision_test = precision_score(y_true_test, y_pred_classes_test, average=None, labels=[0, 1, 2, 3, 4], zero_division=0)

    # Tạo dictionary cho các chỉ số để log
    log_metrics = {
        'Dataset': dataset_name,
        'Model_Set': model_set_name,
        'QWK': qwk_test,
        'Accuracy': accuracy,
        'F1_Class_0': f1_test[0],
        'F1_Class_1': f1_test[1],
        'F1_Class_2': f1_test[2],
        'F1_Class_3': f1_test[3],
        'F1_Class_4': f1_test[4],
        'Sensitivity_Class_0': sensitivity_test[0],
        'Sensitivity_Class_1': sensitivity_test[1],
        'Sensitivity_Class_2': sensitivity_test[2],
        'Sensitivity_Class_3': sensitivity_test[3],
        'Sensitivity_Class_4': sensitivity_test[4],
        'Specificity_Class_0': specificity_test[0],
        'Specificity_Class_1': specificity_test[1],
        'Specificity_Class_2': specificity_test[2],
        'Specificity_Class_3': specificity_test[3],
        'Specificity_Class_4': specificity_test[4],
        'Precision_Class_0': precision_test[0],
        'Precision_Class_1': precision_test[1],
        'Precision_Class_2': precision_test[2],
        'Precision_Class_3': precision_test[3],
        'Precision_Class_4': precision_test[4]
    }

    # In các chỉ số
    print(f"\nMetrics for {dataset_name} evaluated on {model_set_name}:")
    print(f"QWK trên tập test: {qwk_test:.4f}")
    print(f"Độ chính xác trên tập test: {accuracy:.4f}")
    print(f"F1-score cho từng lớp trên tập test: {[f'{f1:.4f}' for f1 in f1_test]}")
    print(f"Độ nhạy cho từng lớp trên tập test: {[f'{sens:.4f}' for sens in sensitivity_test]}")
    print(f"Độ đặc hiệu cho từng lớp trên tập test: {[f'{spec:.4f}' for spec in specificity_test]}")
    print(f"Độ chính xác cho từng lớp trên tập test: {[f'{prec:.4f}' for prec in precision_test]}")

    # Vẽ ma trận nhầm lẫn
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=[0, 1, 2, 3, 4], yticklabels=[0, 1, 2, 3, 4])
    plt.title(f'Confusion Matrix (Test) - {dataset_name} - {model_set_name}')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig(f'/content/drive/MyDrive/confusion_matrix_test_{dataset_name}_{model_set_name}.png')
    plt.close()

    if accuracy < accuracy_threshold:
        print(f"\nPer-class Metrics (Test) for {dataset_name} on {model_set_name}:")
        for i in range(5):
            print(f"Class {i}: Precision={precision_test[i]:.4f}, Recall={sensitivity_test[i]:.4f}, F1={f1_test[i]:.4f}")

        # Vẽ biểu đồ F1-score cho từng lớp
        plt.figure(figsize=(8, 6))
        plt.bar(range(5), f1_test, tick_label=[0, 1, 2, 3, 4])
        plt.title(f'F1-Score per Class (Test) - {dataset_name} - {model_set_name}')
        plt.xlabel('Class')
        plt.ylabel('F1-Score')
        plt.savefig(f'/content/drive/MyDrive/f1_score_per_class_test_{dataset_name}_{model_set_name}.png')
        plt.close()

    return ensemble_preds, log_metrics


def log_metrics_to_file(metrics, log_file_path='/content/drive/MyDrive/evaluation_metrics.csv'):
    import pandas as pd
    import os
    # Chuyển dict metrics thành DataFrame
    metrics_df = pd.DataFrame([metrics])
    # Nếu file đã tồn tại, đọc và nối thêm; nếu không, tạo file mới
    if os.path.exists(log_file_path):
        existing_df = pd.read_csv(log_file_path)
        metrics_df = pd.concat([existing_df, metrics_df], ignore_index=True)
    # Lưu vào file CSV
    metrics_df.to_csv(log_file_path, index=False)
    print(f"Metrics for {metrics['Dataset']} on {metrics['Model_Set']} logged to {log_file_path}")

# Hàm tối ưu hyperparameter
def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    hidden_size1 = trial.suggest_int('hidden_size1', 64, 256)
    hidden_size2 = trial.suggest_int('hidden_size2', 32, 128)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
    model = AdvancedMLP(input_size_global, [hidden_size1, hidden_size2], 5, dropout_rate)
    X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
        X_train_global.numpy(), y_train_global.numpy(), test_size=0.2, random_state=42
    )
    X_train_split = torch.FloatTensor(X_train_split)
    X_val_split = torch.FloatTensor(X_val_split)
    y_train_split = torch.LongTensor(y_train_split)
    y_val_split = torch.LongTensor(y_val_split)
    model, _, _ = train_model(
        model, X_train_split, y_train_split, X_val_split, y_val_split,
        num_epochs=10, learning_rate=learning_rate, patience=5,
        checkpoint_path=f'optuna_checkpoint_trial{trial.number}.pth',
        load_checkpoint=False
    )
    model.eval()
    with torch.no_grad():
        outputs = model(X_val_split)
        _, predicted = torch.max(outputs, 1)
        qwk = cohen_kappa_score(y_val_split.numpy(), predicted.numpy(), labels=[0, 1, 2, 3, 4], weights='quadratic')
    return qwk

# Hàm main
def main():
    global X_train_global, y_train_global, input_size_global, preprocessor, selector
    large_data_path = '/content/drive/MyDrive/TrainingWiDS2021_filled.csv'
    small_data_paths = ['/content/drive/MyDrive/diabetes_filled.csv', '/content/drive/MyDrive/diabetes_prediction_dataset_filled.csv']
    target_col = 'severity_label'
    log_file_path = '/content/drive/MyDrive/evaluation_metrics.csv'

    # Xử lý tập dữ liệu lớn
    df_large = pd.read_csv(large_data_path)
    X_train, X_test, y_train, y_test, input_size, num_classes, preprocessor, selector = preprocess_clinical_data(
        df_large, target_col, apply_smote=True, dataset_name="large_dataset"
    )
    X_train_global = X_train
    y_train_global = y_train
    input_size_global = input_size

    print("Optimizing hyperparameters...")
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=10)
    best_params = study.best_params
    print(f"Best hyperparameters: {best_params}")

    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    main_models = []
    best_qwk_overall = -float('inf')
    best_model_state_overall = None

    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train)):
        print(f"\nTraining fold {fold+1}/3 on large dataset...")
        X_train_fold = X_train[train_idx]
        y_train_fold = y_train[train_idx]
        X_val_fold = X_train[val_idx]
        y_val_fold = y_train[val_idx]
        model_main = AdvancedMLP(
            input_size=input_size,
            hidden_sizes=[best_params['hidden_size1'], best_params['hidden_size2']],
            num_classes=5,
            dropout_rate=best_params['dropout_rate']
        )
        model_main, _, fold_best_qwk = train_model(
            model_main, X_train_fold, y_train_fold, X_val_fold, y_val_fold,
            num_epochs=10, learning_rate=best_params['learning_rate'], patience=10,
            checkpoint_path=f'/content/drive/MyDrive/main_checkpoint_fold{fold+1}.pth'
        )
        main_models.append(model_main)
        torch.save({
            'model_state_dict': model_main.state_dict(),
            'input_size': input_size,
            'hidden_sizes': [best_params['hidden_size1'], best_params['hidden_size2']],
            'num_classes': 5,
            'dropout_rate': best_params['dropout_rate'],
            'focal_gamma': 2.0,
            'focal_alpha': torch.FloatTensor([1.7, 2.0, 1.0, 1.0, 1.0])
        }, f'/content/drive/MyDrive/main_model_fold{fold+1}.pth')

        if fold_best_qwk > best_qwk_overall:
            best_qwk_overall = fold_best_qwk
            best_model_state_overall = model_main.state_dict()

    if best_model_state_overall is not None:
        torch.save({
            'model_state_dict': best_model_state_overall,
            'input_size': input_size,
            'hidden_sizes': [best_params['hidden_size1'], best_params['hidden_size2']],
            'num_classes': 5,
            'dropout_rate': best_params['dropout_rate'],
            'focal_gamma': 2.0,
            'focal_alpha': torch.FloatTensor([1.7, 2.0, 1.0, 1.0, 1.0]),
            'best_qwk': best_qwk_overall
        }, '/content/drive/MyDrive/main_clinical_model.pth')
        print(f"Main clinical model saved to /content/drive/MyDrive/main_clinical_model.pth with QWK: {best_qwk_overall:.4f}")

    print("\nFine-tuning on multiple small datasets...")
    fine_tuned_models = fine_tune_multiple_small_datasets(
        main_models[0], small_data_paths, target_col,
        hidden_sizes=[best_params['hidden_size1'], best_params['hidden_size2']],
        input_size=input_size,
        preprocessor=preprocessor,
        selector=selector
    )

    print("\nEvaluating ensemble on each test set...")
    all_models = main_models + fine_tuned_models

    # Đánh giá tập test của dữ liệu lớn
    # 1. Trên main_models
    _, metrics_large_main = evaluate_model(main_models, X_test, y_test, dataset_name="large_dataset", model_set_name="main_models")
    log_metrics_to_file(metrics_large_main, log_file_path)
    # 2. Trên fine_tuned_models
    _, metrics_large_fine = evaluate_model(fine_tuned_models, X_test, y_test, dataset_name="large_dataset", model_set_name="fine_tuned_models")
    log_metrics_to_file(metrics_large_fine, log_file_path)
    # 3. Trên all_models
    _, metrics_large_all = evaluate_model(all_models, X_test, y_test, dataset_name="large_dataset", model_set_name="all_models")
    log_metrics_to_file(metrics_large_all, log_file_path)

    # Đánh giá tập test của các tập dữ liệu nhỏ
    for i in range(len(small_data_paths)):
        dataset_name = f"small_dataset_{i+1}"
        X_test_small = torch.FloatTensor(np.load(f'/content/drive/MyDrive/X_test_{dataset_name}.npy'))
        y_test_small = torch.LongTensor(np.load(f'/content/drive/MyDrive/y_test_{dataset_name}.npy'))
        # 1. Trên main_models
        _, metrics_small_main = evaluate_model(main_models, X_test_small, y_test_small, dataset_name=dataset_name, model_set_name="main_models")
        log_metrics_to_file(metrics_small_main, log_file_path)
        # 2. Trên fine_tuned_models
        _, metrics_small_fine = evaluate_model(fine_tuned_models, X_test_small, y_test_small, dataset_name=dataset_name, model_set_name="fine_tuned_models")
        log_metrics_to_file(metrics_small_fine, log_file_path)
        # 3. Trên all_models
        _, metrics_small_all = evaluate_model(all_models, X_test_small, y_test_small, dataset_name=dataset_name, model_set_name="all_models")
        log_metrics_to_file(metrics_small_all, log_file_path)

if __name__ == "__main__":
    main()

Number of features for large_dataset: 20
Class distribution after SMOTE for large_dataset: Counter({np.int64(2): 52828, np.int64(3): 52828, np.int64(1): 52828, np.int64(0): 52828, np.int64(4): 52828})


[I 2025-06-05 08:00:51,767] A new study created in memory with name: no-name-b8a9e62c-40aa-450c-9911-cbc3d160e38e


Test set for large_dataset saved to /content/drive/MyDrive/X_test_large_dataset.npy and /content/drive/MyDrive/y_test_large_dataset.npy
Optimizing hyperparameters...
Epoch 1/10:
  Train QWK: 0.9343
  Train Accuracy: 0.7480
  Train F1-score: ['0.8535', '0.7175', '0.6680', '0.6688', '0.8173']
  Train Sensitivity: ['0.8728', '0.7677', '0.6020', '0.6374', '0.8608']
  Train Specificity: ['0.9570', '0.9070', '0.9499', '0.9326', '0.9387']
  Train Precision: ['0.8350', '0.6735', '0.7501', '0.7034', '0.7779']
  Val Accuracy: 0.8357, Val QWK: 0.9604, Class Weights: [0.8484847545623779, 1.0302937030792236, 1.151896595954895, 1.1016144752502441, 0.8677104115486145]
Epoch 2/10:
  Train QWK: 0.9427
  Train Accuracy: 0.7777
  Train F1-score: ['0.8748', '0.7651', '0.7307', '0.7025', '0.8148']
  Train Sensitivity: ['0.8792', '0.8000', '0.6827', '0.7170', '0.8100']
  Train Specificity: ['0.9673', '0.9272', '0.9535', '0.9186', '0.9555']
  Train Precision: ['0.8704', '0.7331', '0.7858', '0.6885', '0.8197'

[I 2025-06-05 08:03:03,976] Trial 0 finished with value: 0.9643530755883012 and parameters: {'learning_rate': 0.008251052786142844, 'hidden_size1': 96, 'hidden_size2': 47, 'dropout_rate': 0.42512976230149463}. Best is trial 0 with value: 0.9643530755883012.


Epoch 10/10:
  Train QWK: 0.9504
  Train Accuracy: 0.8049
  Train F1-score: ['0.8944', '0.7944', '0.7772', '0.7074', '0.8431']
  Train Sensitivity: ['0.9188', '0.7836', '0.7763', '0.6750', '0.8713']
  Train Specificity: ['0.9661', '0.9527', '0.9447', '0.9414', '0.9512']
  Train Precision: ['0.8712', '0.8056', '0.7782', '0.7429', '0.8166']
  Val Accuracy: 0.8549, Val QWK: 0.9644, Class Weights: [0.9272822141647339, 1.005918264389038, 1.1197679042816162, 1.0162944793701172, 0.9307373762130737]
Checkpoint saved at epoch 10
Epoch 1/10:
  Train QWK: 0.9339
  Train Accuracy: 0.7475
  Train F1-score: ['0.8524', '0.7133', '0.6620', '0.6715', '0.8214']
  Train Sensitivity: ['0.8741', '0.7638', '0.5938', '0.6361', '0.8703']
  Train Specificity: ['0.9558', '0.9056', '0.9500', '0.9351', '0.9379']
  Train Precision: ['0.8317', '0.6690', '0.7479', '0.7110', '0.7776']
  Val Accuracy: 0.8405, Val QWK: 0.9614, Class Weights: [0.862594485282898, 0.9928964376449585, 1.1474932432174683, 1.1199733018875122

[I 2025-06-05 08:05:16,934] Trial 1 finished with value: 0.966250803493339 and parameters: {'learning_rate': 0.005509464163014177, 'hidden_size1': 247, 'hidden_size2': 42, 'dropout_rate': 0.495902146412039}. Best is trial 1 with value: 0.966250803493339.


Epoch 1/10:
  Train QWK: 0.8232
  Train Accuracy: 0.5587
  Train F1-score: ['0.7081', '0.5537', '0.2898', '0.5217', '0.6456']
  Train Sensitivity: ['0.7278', '0.7200', '0.2054', '0.5323', '0.6085']
  Train Specificity: ['0.9182', '0.7798', '0.9470', '0.8724', '0.9309']
  Train Precision: ['0.6896', '0.4498', '0.4918', '0.5116', '0.6874']
  Val Accuracy: 0.7102, Val QWK: 0.9220, Class Weights: [0.761036217212677, 0.8677471876144409, 1.5527312755584717, 1.0332719087600708, 0.7852134108543396]
Epoch 2/10:
  Train QWK: 0.9098
  Train Accuracy: 0.6905
  Train F1-score: ['0.7776', '0.6790', '0.6844', '0.6088', '0.7136']
  Train Sensitivity: ['0.7353', '0.7340', '0.7187', '0.6243', '0.6406']
  Train Specificity: ['0.9611', '0.8930', '0.9047', '0.8929', '0.9613']
  Train Precision: ['0.8251', '0.6317', '0.6533', '0.5941', '0.8053']
  Val Accuracy: 0.8070, Val QWK: 0.9464, Class Weights: [0.9183942675590515, 1.063464641571045, 0.9634434580802917, 1.1126766204833984, 0.9420210123062134]
Epoch 3/

[I 2025-06-05 08:07:23,079] Trial 2 finished with value: 0.9600445556888811 and parameters: {'learning_rate': 2.477240911939211e-05, 'hidden_size1': 113, 'hidden_size2': 56, 'dropout_rate': 0.2117271947131572}. Best is trial 1 with value: 0.966250803493339.


Epoch 10/10:
  Train QWK: 0.9454
  Train Accuracy: 0.7897
  Train F1-score: ['0.8708', '0.7573', '0.7310', '0.7352', '0.8511']
  Train Sensitivity: ['0.8694', '0.8031', '0.6863', '0.7233', '0.8668']
  Train Specificity: ['0.9682', '0.9206', '0.9521', '0.9387', '0.9576']
  Train Precision: ['0.8721', '0.7165', '0.7818', '0.7475', '0.8360']
  Val Accuracy: 0.8382, Val QWK: 0.9600, Class Weights: [0.9180819988250732, 0.9892542362213135, 1.107621192932129, 1.0987924337387085, 0.8862498998641968]
Checkpoint saved at epoch 10
Epoch 1/10:
  Train QWK: 0.8724
  Train Accuracy: 0.6243
  Train F1-score: ['0.7254', '0.6083', '0.5000', '0.5597', '0.7065']
  Train Sensitivity: ['0.7280', '0.7089', '0.4149', '0.5485', '0.7215']
  Train Specificity: ['0.9303', '0.8445', '0.9388', '0.8967', '0.9199']
  Train Precision: ['0.7229', '0.5326', '0.6289', '0.5714', '0.6922']
  Val Accuracy: 0.7741, Val QWK: 0.9401, Class Weights: [0.810684859752655, 0.9619437456130981, 1.2875218391418457, 1.1389801502227783

[I 2025-06-05 08:09:34,373] Trial 3 finished with value: 0.963487293887486 and parameters: {'learning_rate': 7.073698344839695e-05, 'hidden_size1': 194, 'hidden_size2': 78, 'dropout_rate': 0.49901494360924126}. Best is trial 1 with value: 0.966250803493339.


Epoch 1/10:
  Train QWK: 0.8299
  Train Accuracy: 0.5623
  Train F1-score: ['0.6790', '0.5609', '0.3764', '0.4959', '0.6511']
  Train Sensitivity: ['0.7007', '0.6734', '0.2921', '0.4817', '0.6642']
  Train Specificity: ['0.9094', '0.8181', '0.9350', '0.8843', '0.9062']
  Train Precision: ['0.6587', '0.4806', '0.5290', '0.5110', '0.6385']
  Val Accuracy: 0.7377, Val QWK: 0.9307, Class Weights: [0.839558482170105, 0.885566771030426, 1.3010793924331665, 1.2059388160705566, 0.7678565382957458]
Epoch 2/10:
  Train QWK: 0.9033
  Train Accuracy: 0.6652
  Train F1-score: ['0.7655', '0.6612', '0.6489', '0.6116', '0.6473']
  Train Sensitivity: ['0.7347', '0.7299', '0.6233', '0.6967', '0.5411']
  Train Specificity: ['0.9539', '0.8805', '0.9256', '0.8540', '0.9674']
  Train Precision: ['0.7990', '0.6043', '0.6766', '0.5451', '0.8054']
  Val Accuracy: 0.7996, Val QWK: 0.9458, Class Weights: [0.9359183311462402, 1.0121759176254272, 1.0800225734710693, 1.0666108131408691, 0.905272364616394]
Epoch 3/1

[I 2025-06-05 08:11:40,537] Trial 4 finished with value: 0.9577891531270348 and parameters: {'learning_rate': 4.1532796351827656e-05, 'hidden_size1': 183, 'hidden_size2': 45, 'dropout_rate': 0.4703810974017534}. Best is trial 1 with value: 0.966250803493339.


Epoch 10/10:
  Train QWK: 0.9405
  Train Accuracy: 0.7744
  Train F1-score: ['0.8657', '0.7419', '0.7110', '0.7066', '0.8402']
  Train Sensitivity: ['0.8748', '0.7786', '0.6769', '0.6783', '0.8639']
  Train Specificity: ['0.9635', '0.9200', '0.9432', '0.9393', '0.9519']
  Train Precision: ['0.8568', '0.7086', '0.7487', '0.7373', '0.8177']
  Val Accuracy: 0.8368, Val QWK: 0.9578, Class Weights: [0.9385445713996887, 0.9867215156555176, 1.0517666339874268, 1.1437997817993164, 0.879167377948761]
Checkpoint saved at epoch 10
Epoch 1/10:
  Train QWK: 0.9392
  Train Accuracy: 0.7643
  Train F1-score: ['0.8610', '0.7379', '0.7002', '0.6893', '0.8233']
  Train Sensitivity: ['0.8752', '0.7836', '0.6379', '0.6688', '0.8567']
  Train Specificity: ['0.9607', '0.9149', '0.9540', '0.9318', '0.9440']
  Train Precision: ['0.8474', '0.6972', '0.7760', '0.7111', '0.7925']
  Val Accuracy: 0.8387, Val QWK: 0.9613, Class Weights: [0.850864589214325, 1.0637495517730713, 1.125746250152588, 1.0689473152160645,

[I 2025-06-05 08:14:03,959] Trial 5 finished with value: 0.9646325158168166 and parameters: {'learning_rate': 0.008837712355146271, 'hidden_size1': 219, 'hidden_size2': 110, 'dropout_rate': 0.4251415239719019}. Best is trial 1 with value: 0.966250803493339.


Epoch 10/10:
  Train QWK: 0.9528
  Train Accuracy: 0.8121
  Train F1-score: ['0.8995', '0.7992', '0.7710', '0.7375', '0.8487']
  Train Sensitivity: ['0.9251', '0.8043', '0.7259', '0.7410', '0.8648']
  Train Specificity: ['0.9671', '0.9479', '0.9607', '0.9326', '0.9568']
  Train Precision: ['0.8753', '0.7942', '0.8221', '0.7340', '0.8333']
  Val Accuracy: 0.8559, Val QWK: 0.9646, Class Weights: [0.9490037560462952, 1.0036721229553223, 1.1032414436340332, 1.033464789390564, 0.910618007183075]
Checkpoint saved at epoch 10
Epoch 1/10:
  Train QWK: 0.7428
  Train Accuracy: 0.4848
  Train F1-score: ['0.5701', '0.5031', '0.1826', '0.4101', '0.6223']
  Train Sensitivity: ['0.5305', '0.7414', '0.1171', '0.3495', '0.6863']
  Train Specificity: ['0.9175', '0.6985', '0.9587', '0.9109', '0.8704']
  Train Precision: ['0.6160', '0.3807', '0.4146', '0.4961', '0.5693']
  Val Accuracy: 0.6389, Val QWK: 0.8898, Class Weights: [0.5662288665771484, 0.5844953656196594, 2.6055586338043213, 0.7448523640632629

[I 2025-06-05 08:16:08,320] Trial 6 finished with value: 0.9476774896151752 and parameters: {'learning_rate': 1.4622531686283306e-05, 'hidden_size1': 89, 'hidden_size2': 96, 'dropout_rate': 0.4280928804418376}. Best is trial 1 with value: 0.966250803493339.


Epoch 10/10:
  Train QWK: 0.9292
  Train Accuracy: 0.7393
  Train F1-score: ['0.8211', '0.7064', '0.6889', '0.6740', '0.8091']
  Train Sensitivity: ['0.7978', '0.7678', '0.6713', '0.6568', '0.8030']
  Train Specificity: ['0.9637', '0.8985', '0.9307', '0.9266', '0.9546']
  Train Precision: ['0.8457', '0.6540', '0.7076', '0.6920', '0.8153']
  Val Accuracy: 0.8052, Val QWK: 0.9477, Class Weights: [0.8434596061706543, 1.0312730073928833, 1.1136161088943481, 1.1676843166351318, 0.8439669609069824]
Checkpoint saved at epoch 10
Epoch 1/10:
  Train QWK: 0.9356
  Train Accuracy: 0.7637
  Train F1-score: ['0.8530', '0.7276', '0.6820', '0.7090', '0.8371']
  Train Sensitivity: ['0.8616', '0.7897', '0.6168', '0.6855', '0.8651']
  Train Specificity: ['0.9604', '0.9048', '0.9520', '0.9377', '0.9496']
  Train Precision: ['0.8445', '0.6745', '0.7627', '0.7342', '0.8109']
  Val Accuracy: 0.8420, Val QWK: 0.9615, Class Weights: [0.8745735883712769, 1.0049469470977783, 1.1585776805877686, 1.08253562450408

[I 2025-06-05 08:18:17,677] Trial 7 finished with value: 0.9678023914226783 and parameters: {'learning_rate': 0.00046924430640862813, 'hidden_size1': 125, 'hidden_size2': 102, 'dropout_rate': 0.25991905249863956}. Best is trial 7 with value: 0.9678023914226783.


Epoch 10/10:
  Train QWK: 0.9620
  Train Accuracy: 0.8460
  Train F1-score: ['0.9131', '0.8267', '0.8033', '0.7920', '0.8892']
  Train Sensitivity: ['0.9289', '0.8437', '0.7472', '0.7871', '0.9233']
  Train Specificity: ['0.9736', '0.9507', '0.9717', '0.9497', '0.9617']
  Train Precision: ['0.8979', '0.8105', '0.8684', '0.7971', '0.8575']
  Val Accuracy: 0.8696, Val QWK: 0.9678, Class Weights: [0.9273583292961121, 1.0082712173461914, 1.0816911458969116, 1.09384024143219, 0.888839066028595]
Checkpoint saved at epoch 10
Epoch 1/10:
  Train QWK: 0.9446
  Train Accuracy: 0.7860
  Train F1-score: ['0.8706', '0.7521', '0.7180', '0.7263', '0.8524']
  Train Sensitivity: ['0.8831', '0.8007', '0.6535', '0.6975', '0.8955']
  Train Specificity: ['0.9636', '0.9179', '0.9583', '0.9440', '0.9487']
  Train Precision: ['0.8584', '0.7091', '0.7966', '0.7576', '0.8132']
  Val Accuracy: 0.8578, Val QWK: 0.9647, Class Weights: [0.8797439336776733, 1.0510205030441284, 1.0479003190994263, 1.1369664669036865,

[I 2025-06-05 08:20:31,367] Trial 8 finished with value: 0.9673497426701148 and parameters: {'learning_rate': 0.001931172683608348, 'hidden_size1': 231, 'hidden_size2': 107, 'dropout_rate': 0.33073808975751634}. Best is trial 7 with value: 0.9678023914226783.


Epoch 1/10:
  Train QWK: 0.9413
  Train Accuracy: 0.7717
  Train F1-score: ['0.8723', '0.7433', '0.6947', '0.6988', '0.8346']
  Train Sensitivity: ['0.8950', '0.7897', '0.6246', '0.6699', '0.8796']
  Train Specificity: ['0.9608', '0.9162', '0.9566', '0.9379', '0.9431']
  Train Precision: ['0.8507', '0.7020', '0.7826', '0.7303', '0.7940']
  Val Accuracy: 0.8470, Val QWK: 0.9631, Class Weights: [0.8544060587882996, 1.0490331649780273, 1.1742265224456787, 1.0125385522842407, 0.9097956418991089]
Epoch 2/10:
  Train QWK: 0.9490
  Train Accuracy: 0.7992
  Train F1-score: ['0.8892', '0.7831', '0.7546', '0.7166', '0.8457']
  Train Sensitivity: ['0.8980', '0.8136', '0.7257', '0.6851', '0.8741']
  Train Specificity: ['0.9696', '0.9339', '0.9506', '0.9430', '0.9518']
  Train Precision: ['0.8805', '0.7548', '0.7858', '0.7512', '0.8192']
  Val Accuracy: 0.8609, Val QWK: 0.9659, Class Weights: [0.8845218420028687, 1.0743986368179321, 1.0285069942474365, 1.0669307708740234, 0.9456416368484497]
Epoch 

[I 2025-06-05 08:22:42,976] Trial 9 finished with value: 0.966349165926387 and parameters: {'learning_rate': 0.007836284287443212, 'hidden_size1': 245, 'hidden_size2': 33, 'dropout_rate': 0.2944903072692385}. Best is trial 7 with value: 0.9678023914226783.


Epoch 10/10:
  Train QWK: 0.9554
  Train Accuracy: 0.8226
  Train F1-score: ['0.9054', '0.8098', '0.7843', '0.7384', '0.8653']
  Train Sensitivity: ['0.9268', '0.8168', '0.7599', '0.6973', '0.9125']
  Train Specificity: ['0.9699', '0.9498', '0.9555', '0.9519', '0.9510']
  Train Precision: ['0.8849', '0.8028', '0.8102', '0.7846', '0.8228']
  Val Accuracy: 0.8636, Val QWK: 0.9663, Class Weights: [0.8817983865737915, 1.070266842842102, 0.9944707751274109, 1.1670951843261719, 0.8863686919212341]
Checkpoint saved at epoch 10
Best hyperparameters: {'learning_rate': 0.00046924430640862813, 'hidden_size1': 125, 'hidden_size2': 102, 'dropout_rate': 0.25991905249863956}

Training fold 1/3 on large dataset...
Epoch 1/10:
  Train QWK: 0.9337
  Train Accuracy: 0.7561
  Train F1-score: ['0.8457', '0.7186', '0.6762', '0.6990', '0.8322']
  Train Sensitivity: ['0.8502', '0.7818', '0.6131', '0.6740', '0.8620']
  Train Specificity: ['0.9600', '0.9013', '0.9498', '0.9363', '0.9477']
  Train Precision: ['0

  f = msb / msw


Number of features for small_dataset_1: 20
Class distribution after SMOTE for small_dataset_1: Counter({np.int64(0): 208, np.int64(1): 208, np.int64(2): 208, np.int64(3): 208, np.int64(4): 208})
Test set for small_dataset_1 saved to /content/drive/MyDrive/X_test_small_dataset_1.npy and /content/drive/MyDrive/y_test_small_dataset_1.npy
Fine-tuning on small dataset 1 with input size 20...

Fine-tuning fold 1/3...
Epoch 1/20:
  Train QWK: -0.1485
  Train Accuracy: 0.1841
  Train F1-score: ['0.0000', '0.0000', '0.3249', '0.1810', '0.0000']
  Train Sensitivity: ['0.0000', '0.0000', '0.8137', '0.1667', '0.0000']
  Train Specificity: ['1.0000', '0.8929', '0.2788', '0.8250', '0.9954']
  Train Precision: ['0.0000', '0.0000', '0.2029', '0.1979', '0.0000']
  Val Accuracy: 0.2266, Val QWK: -0.0975, Class Weights: [1.2468827962875366, 1.2468827962875366, 0.01246882788836956, 1.2468827962875366, 1.2468827962875366]
Epoch 2/20:
  Train QWK: -0.1187
  Train Accuracy: 0.2022
  Train F1-score: ['0.0000'

  f = msb / msw


Number of features for small_dataset_2: 20
Class distribution after SMOTE for small_dataset_2: Counter({np.int64(2): 58037, np.int64(1): 58037, np.int64(0): 58037, np.int64(3): 58037, np.int64(4): 58037})
Test set for small_dataset_2 saved to /content/drive/MyDrive/X_test_small_dataset_2.npy and /content/drive/MyDrive/y_test_small_dataset_2.npy
Fine-tuning on small dataset 2 with input size 20...

Fine-tuning fold 1/3...
Epoch 1/20:
  Train QWK: 0.5082
  Train Accuracy: 0.3619
  Train F1-score: ['0.2496', '0.3174', '0.2073', '0.1397', '0.8986']
  Train Sensitivity: ['0.2470', '0.4469', '0.1984', '0.1027', '0.8163']
  Train Specificity: ['0.8171', '0.6549', '0.8221', '0.9079', '0.9999']
  Train Precision: ['0.2524', '0.2461', '0.2171', '0.2186', '0.9994']
  Val Accuracy: 0.3993, Val QWK: 0.6681, Class Weights: [1.6556291580200195, 0.01655629090964794, 1.6556291580200195, 1.6556291580200195, 0.01655629090964794]
Epoch 2/20:
  Train QWK: 0.4678
  Train Accuracy: 0.3911
  Train F1-score: [