In [1]:
from google.colab import userdata
import json

# Get the Kaggle credentials from Colab's userdata
username = userdata.get("KAGGLE_USER")
key = userdata.get("KAGGLE_KEY")

# Echo the credentials into the kaggle.json file
!mkdir -p ~/.kaggle
!echo '{{"username":"{username}","key":"{key}"}}' > ~/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json


In [None]:
!kaggle competitions download -c io-t-sleep-stage-classification-version-2
!unzip /content/io-t-sleep-stage-classification-version-2.zip && rm -rf /content/io-t-sleep-stage-classification-version-2.zip

In [None]:
import pandas as pd
import glob

file_list = glob.glob("/content/train/train/*.csv")
all_data = []
for file in file_list:
    try:
        df = pd.read_csv(file)

        all_data.append(df)

    except Exception as e:
        print(f"⚠️ อ่านไฟล์ {file} ไม่ได้: {e}")

combined_df = pd.concat(all_data, ignore_index=True)
combined_df

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
from scipy.fftpack import fft

segment_size = 480

def extract_features(segment):
    features = {}

    for col in ["BVP", "ACC_X", "ACC_Y", "ACC_Z", "TEMP", "EDA", "HR", "IBI"]:
        values = segment[col].values
        features[f"mean_{col}"] = np.mean(values)
        features[f"std_{col}"] = np.std(values)
        features[f"max_{col}"] = np.max(values)
        features[f"min_{col}"] = np.min(values)
        features[f"median_{col}"] = np.median(values)
        features[f"skew_{col}"] = skew(values)
        features[f"kurtosis_{col}"] = kurtosis(values)


    features["sma_acc"] = np.sum(np.abs(segment["ACC_X"]) + np.abs(segment["ACC_Y"]) + np.abs(segment["ACC_Z"]))


    for col in ["BVP", "EDA", "HR"]:
        features[f"energy_{col}"] = np.sum(np.square(values)) / len(values)


    for col in ["HR", "BVP", "EDA"]:
        fft_values = np.abs(fft(segment[col].values))
        features[f"fft_peak_{col}"] = np.argmax(fft_values)
        features[f"fft_mean_{col}"] = np.mean(fft_values)
        features[f"fft_std_{col}"] = np.std(fft_values)

    features["Sleep_Stage"] = segment["Sleep_Stage"].values[0]

    return features


feature_list = []
for i in range(0, len(combined_df), segment_size):
    segment = combined_df.iloc[i:i+segment_size]
    if len(segment) == segment_size:
        feature_list.append(extract_features(segment))


feature_df = pd.DataFrame(feature_list)

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import LabelEncoder


X = feature_df.drop(columns=["Sleep_Stage"])

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(feature_df["Sleep_Stage"])

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

selector = SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42))
selector.fit(X, y)

X_selected = X.loc[:, selector.get_support()]
X_selected


Unnamed: 0,std_BVP,max_BVP,min_BVP,kurtosis_BVP,mean_ACC_X,std_ACC_X,max_ACC_X,min_ACC_X,median_ACC_X,ma_ACC_X,...,min_EDA,median_EDA,ma_EDA,std_HR,sma_acc,fft_mean_HR,fft_mean_BVP,fft_std_BVP,fft_mean_EDA,fft_std_EDA
0,3.380333,6.721362,-10.770070,-0.493754,-14.355264,2.244520e+00,-0.757936,-19.726688,-14.828335,-7.971930,...,0.275844,0.278518,0.277576,0.308713,47081.203465,89.632291,39.635784,62.560365,0.292170,6.091121
1,3.935875,31.704457,-14.867657,13.311258,-7.918330,7.591805e-02,-7.691100,-8.768982,-7.908425,-7.908425,...,0.275781,0.278512,0.277790,0.670249,44602.966736,88.714458,49.526668,70.591454,0.291152,6.088606
2,3.048122,5.466780,-6.612507,-0.977987,-7.908425,9.196477e-10,-7.908425,-7.908425,-7.908425,-7.908425,...,0.274561,0.278546,0.278760,0.622197,44603.515631,87.801156,31.244315,59.021417,0.292623,6.093841
3,3.019995,7.930728,-6.514300,-0.886724,-7.976691,2.974962e-01,-6.830426,-9.157902,-7.908425,-8.604140,...,0.276923,0.279736,0.280490,0.281799,44634.303798,87.206710,32.048157,57.885256,0.293954,6.116442
4,3.080849,8.962223,-11.341948,-0.315907,-9.390520,4.679507e-01,-5.900315,-10.680165,-9.463630,-9.309029,...,0.276998,0.279834,0.279472,0.830735,45270.894875,91.699332,39.636201,54.635046,0.295240,6.127307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66468,22.398665,40.263323,-42.427387,-1.208328,-9.843343,1.449272e-01,-8.811666,-10.077662,-9.885198,-9.885349,...,0.106086,0.110145,0.109377,1.775145,46957.876672,76.213788,121.197916,475.533078,0.122638,2.404188
66469,21.826509,42.411884,-46.638104,-1.109979,-9.637162,4.684972e-01,-7.839029,-10.907642,-9.883842,-8.899136,...,0.106055,0.110080,0.109420,0.769390,46869.171661,63.755645,189.131087,439.203670,0.123836,2.401621
66470,22.656818,43.579126,-44.022952,-1.091839,-9.811608,2.570399e-01,-8.779109,-11.453235,-9.885511,-10.182267,...,0.104824,0.107511,0.106678,0.730192,46878.500774,66.246882,189.481366,458.799144,0.121602,2.345269
66471,22.659089,38.140872,-43.031622,-1.189028,-7.978950,3.906126e-01,-6.726895,-10.024153,-7.908425,-7.908420,...,0.104861,0.107626,0.108146,1.167451,46125.325063,68.431656,119.438632,481.853995,0.119889,2.349436


In [13]:
!pip install lightgbm catboost
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import xgboost as xgb



X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)


###------------------------------------------------------------------------------###


# XGBoost
xgb_model = xgb.XGBClassifier(n_estimators=200, max_depth=10, learning_rate=0.05, random_state=42)
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

# รวมหมด
estimators = [
    ('xgb', xgb.XGBClassifier(n_estimators=200, max_depth=10, learning_rate=0.05,
                              tree_method='gpu_hist', predictor='gpu_predictor', random_state=42)),  # ✅ ใช้ GPU
    ('lgbm', LGBMClassifier(n_estimators=200, max_depth=10, learning_rate=0.05,
                             random_state=42)),
]

stacking_model = StackingClassifier(
    estimators=estimators,
    final_estimator=CatBoostClassifier(verbose=0, random_state=42)
)

stacking_model.fit(X_train, y_train)
stacking_pred = stacking_model.predict(X_test)


# ดูค่า
xgb_acc = accuracy_score(y_test, xgb_pred)
xgb_f1 = f1_score(y_test, xgb_pred, average='weighted')
stacking_acc = accuracy_score(y_test, stacking_pred)
stacking_f1 = f1_score(y_test, stacking_pred, average='weighted')


# print(f"🎯 Random Forest: Accuracy = {rf_acc:.4f}, F1-score = {rf_f1:.4f}")
print(f"🚀 XGBoost: Accuracy = {xgb_acc:.4f}, F1-score = {xgb_f1:.4f}")
print(f"🔗 Stacking Model: Accuracy = {stacking_acc:.4f}, F1-score = {stacking_f1:.4f}")




    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020686 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10200
[LightGBM] [Info] Number of data points in the train set: 53178, number of used features: 40
[LightGBM] [Info] Start training from score -0.426444
[LightGBM] [Info] Start training from score -2.241105
[LightGBM] [Info] Start training from score -1.423653



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.


    E.g. tree_method = "hist", device = "cuda"



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016363 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10200
[LightGBM] [Info] Number of data points in the train set: 42542, number of used features: 40
[LightGBM] [Info] Start training from score -0.426427
[LightGBM] [Info] Start training from score -2.241095
[LightGBM] [Info] Start training from score -1.423702
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016624 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10200
[LightGBM] [Info] Number of data points in the train set: 42542, number of used features: 40
[LightGBM] [Info] Start training from score -0.426427
[LightGBM] [Info] Start training from score -2.241095
[LightGBM] [Info] Start training from score -1.423702
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032268 s


    E.g. tree_method = "hist", device = "cuda"



🔗 Stacking Model: Accuracy = 0.8691, F1-score = 0.8679


  y = column_or_1d(y, warn=True)


In [15]:
import pandas as pd
import glob
import os
import numpy as np
from scipy.stats import skew, kurtosis
from scipy.fftpack import fft


test_path = "/content/test_segment/test_segment/"

test_file_list = glob.glob(os.path.join(test_path, "**/*.csv"), recursive=True)


feature_data = []

def extract_features(segment, subject_id):
    features = {}


    for col in ["BVP", "ACC_X", "ACC_Y", "ACC_Z", "TEMP", "EDA", "HR", "IBI"]:
        values = segment[col].values
        features[f"mean_{col}"] = np.mean(values)
        features[f"std_{col}"] = np.std(values)
        features[f"max_{col}"] = np.max(values)
        features[f"min_{col}"] = np.min(values)
        features[f"median_{col}"] = np.median(values)
        features[f"skew_{col}"] = skew(values)
        features[f"kurtosis_{col}"] = kurtosis(values)

    features["sma_acc"] = np.sum(np.abs(segment["ACC_X"]) + np.abs(segment["ACC_Y"]) + np.abs(segment["ACC_Z"]))

    for col in ["BVP", "EDA", "HR"]:
        features[f"energy_{col}"] = np.sum(np.square(values)) / len(values)


    for col in ["HR", "BVP", "EDA"]:
        fft_values = np.abs(fft(segment[col].values))
        features[f"fft_peak_{col}"] = np.argmax(fft_values)
        features[f"fft_mean_{col}"] = np.mean(fft_values)
        features[f"fft_std_{col}"] = np.std(fft_values)

    features["Segment_ID"] = subject_id

    return features


segment_size = 480

for file in test_file_list:
    try:
        df = pd.read_csv(file)


        subject_id = os.path.basename(file).replace(".csv", "")


        for i in range(0, len(df), segment_size):
            segment = df.iloc[i:i+segment_size]
            if len(segment) == segment_size:
                feature_data.append(extract_features(segment, subject_id))

    except Exception as e:
        print(f"อ่านไฟล์ {file} ไม่ได้: {e}")


test_feature_df = pd.DataFrame(feature_data)

test_feature_df


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(va

Unnamed: 0,mean_BVP,std_BVP,max_BVP,min_BVP,median_BVP,skew_BVP,kurtosis_BVP,ma_BVP,mean_ACC_X,std_ACC_X,...,fft_peak_HR,fft_mean_HR,fft_std_HR,fft_peak_BVP,fft_mean_BVP,fft_std_BVP,fft_peak_EDA,fft_mean_EDA,fft_std_EDA,Segment_ID
0,-0.660568,49.584365,78.714863,-95.118212,7.442161,-0.278681,-1.117319,-27.016843,-45.443401,0.114677,...,0,62.501150,1346.981621,32,382.146490,1017.008324,0,0.277362,5.804965,test006_00139
1,-0.282654,45.027972,100.510150,-102.085505,8.021984,-0.255636,-1.079404,-4.194065,-63.262251,0.295319,...,0,68.188057,1460.711923,32,375.611438,912.229769,0,0.252392,5.220101,test006_00060
2,1.254593,97.369294,154.554149,-194.467723,20.694666,-0.333876,-1.127100,65.794836,-57.339169,0.035139,...,0,60.254981,1313.062584,31,794.803613,1979.852743,0,0.259312,5.402157,test006_00370
3,0.063165,84.857607,148.536971,-195.968385,12.190177,-0.301702,-1.127790,44.462903,-57.410159,0.247891,...,0,59.012063,1279.882461,30,618.274194,1753.319530,0,0.261633,5.436311,test006_00391
4,-0.888846,62.167925,103.825793,-117.624006,6.300406,-0.152799,-1.201254,-40.749343,42.536589,0.109506,...,0,60.601962,1304.853912,31,430.009000,1292.516899,0,0.359393,7.196372,test006_00580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7005,-0.021821,7.182946,14.357846,-14.613163,0.750254,-0.091152,-1.091981,2.463648,34.084671,0.437056,...,0,73.635805,1595.733280,37,62.334206,144.499602,0,1.832750,25.855977,test004_00095
7006,0.047897,2.972990,7.344961,-7.039800,0.299656,-0.109628,-0.759120,0.858586,-46.286700,0.257063,...,0,74.036080,1598.087795,37,26.447441,59.533150,0,0.937156,19.045929,test004_00020
7007,-0.002356,4.867600,8.980665,-11.299846,0.568210,-0.254652,-1.092727,1.733892,22.318288,0.506093,...,0,73.535363,1568.567161,37,35.723289,100.482552,0,0.070915,1.020532,test004_00064
7008,0.063286,7.163264,14.056709,-13.331737,0.772995,-0.063407,-1.248231,-2.313406,44.474975,0.062850,...,0,66.498307,1442.874010,33,52.278840,147.982346,0,0.893044,16.841715,test004_00622


In [16]:
test_feature_df.columns

Index(['mean_BVP', 'std_BVP', 'max_BVP', 'min_BVP', 'median_BVP', 'skew_BVP',
       'kurtosis_BVP', 'ma_BVP', 'mean_ACC_X', 'std_ACC_X', 'max_ACC_X',
       'min_ACC_X', 'median_ACC_X', 'skew_ACC_X', 'kurtosis_ACC_X', 'ma_ACC_X',
       'mean_ACC_Y', 'std_ACC_Y', 'max_ACC_Y', 'min_ACC_Y', 'median_ACC_Y',
       'skew_ACC_Y', 'kurtosis_ACC_Y', 'ma_ACC_Y', 'mean_ACC_Z', 'std_ACC_Z',
       'max_ACC_Z', 'min_ACC_Z', 'median_ACC_Z', 'skew_ACC_Z',
       'kurtosis_ACC_Z', 'ma_ACC_Z', 'mean_TEMP', 'std_TEMP', 'max_TEMP',
       'min_TEMP', 'median_TEMP', 'skew_TEMP', 'kurtosis_TEMP', 'ma_TEMP',
       'mean_EDA', 'std_EDA', 'max_EDA', 'min_EDA', 'median_EDA', 'skew_EDA',
       'kurtosis_EDA', 'ma_EDA', 'mean_HR', 'std_HR', 'max_HR', 'min_HR',
       'median_HR', 'skew_HR', 'kurtosis_HR', 'ma_HR', 'mean_IBI', 'std_IBI',
       'max_IBI', 'min_IBI', 'median_IBI', 'skew_IBI', 'kurtosis_IBI',
       'ma_IBI', 'sma_acc', 'energy_BVP', 'energy_EDA', 'energy_HR',
       'fft_peak_HR', 'fft_mean_H

In [17]:
X_selected.columns

Index(['std_BVP', 'max_BVP', 'min_BVP', 'kurtosis_BVP', 'mean_ACC_X',
       'std_ACC_X', 'max_ACC_X', 'min_ACC_X', 'median_ACC_X', 'ma_ACC_X',
       'mean_ACC_Y', 'std_ACC_Y', 'max_ACC_Y', 'min_ACC_Y', 'median_ACC_Y',
       'ma_ACC_Y', 'mean_ACC_Z', 'std_ACC_Z', 'max_ACC_Z', 'min_ACC_Z',
       'median_ACC_Z', 'ma_ACC_Z', 'mean_TEMP', 'max_TEMP', 'min_TEMP',
       'median_TEMP', 'ma_TEMP', 'mean_EDA', 'std_EDA', 'max_EDA', 'min_EDA',
       'median_EDA', 'ma_EDA', 'std_HR', 'sma_acc', 'fft_mean_HR',
       'fft_mean_BVP', 'fft_std_BVP', 'fft_mean_EDA', 'fft_std_EDA'],
      dtype='object')

In [18]:
selected_columns = X_selected.columns

common_columns = [col for col in selected_columns if col in test_feature_df.columns]

test_feature_df1 = test_feature_df[common_columns]
test_feature_df1.columns

Index(['std_BVP', 'max_BVP', 'min_BVP', 'kurtosis_BVP', 'mean_ACC_X',
       'std_ACC_X', 'max_ACC_X', 'min_ACC_X', 'median_ACC_X', 'ma_ACC_X',
       'mean_ACC_Y', 'std_ACC_Y', 'max_ACC_Y', 'min_ACC_Y', 'median_ACC_Y',
       'ma_ACC_Y', 'mean_ACC_Z', 'std_ACC_Z', 'max_ACC_Z', 'min_ACC_Z',
       'median_ACC_Z', 'ma_ACC_Z', 'mean_TEMP', 'max_TEMP', 'min_TEMP',
       'median_TEMP', 'ma_TEMP', 'mean_EDA', 'std_EDA', 'max_EDA', 'min_EDA',
       'median_EDA', 'ma_EDA', 'std_HR', 'sma_acc', 'fft_mean_HR',
       'fft_mean_BVP', 'fft_std_BVP', 'fft_mean_EDA', 'fft_std_EDA'],
      dtype='object')

In [19]:
predictions = stacking_model.predict(test_feature_df1)

# แปลง Label กลับจากตัวเลข → ตัวอักษร (W, N, R)
predictions = label_encoder.inverse_transform(predictions)

  y = column_or_1d(y, warn=True)


In [20]:
submission_df = pd.DataFrame({
    "id": test_feature_df["Segment_ID"],
    "labels": predictions
})
submission_df = submission_df.sort_values(by=["id"]).reset_index(drop=True)

In [21]:
submission_df.head()

Unnamed: 0,id,labels
0,test001_00000,W
1,test001_00001,W
2,test001_00002,W
3,test001_00003,W
4,test001_00004,W


In [22]:
submission_df.to_csv("submission_stack.csv", index=False)