In [1]:
from google.colab import userdata
import json

# Get the Kaggle credentials from Colab's userdata
username = userdata.get("KAGGLE_USER")
key = userdata.get("KAGGLE_KEY")

# Echo the credentials into the kaggle.json file
!mkdir -p ~/.kaggle
!echo '{{"username":"{username}","key":"{key}"}}' > ~/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json

!kaggle competitions download -c spai-signal-sleep-staging-classification
!unzip /content/spai-signal-sleep-staging-classification.zip && rm -rf /content/spai-signal-sleep-staging-classification.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: test_segment/test_segment/test004/test004_00448.csv  
  inflating: test_segment/test_segment/test004/test004_00449.csv  
  inflating: test_segment/test_segment/test004/test004_00450.csv  
  inflating: test_segment/test_segment/test004/test004_00451.csv  
  inflating: test_segment/test_segment/test004/test004_00452.csv  
  inflating: test_segment/test_segment/test004/test004_00453.csv  
  inflating: test_segment/test_segment/test004/test004_00454.csv  
  inflating: test_segment/test_segment/test004/test004_00455.csv  
  inflating: test_segment/test_segment/test004/test004_00456.csv  
  inflating: test_segment/test_segment/test004/test004_00457.csv  
  inflating: test_segment/test_segment/test004/test004_00458.csv  
  inflating: test_segment/test_segment/test004/test004_00459.csv  
  inflating: test_segment/test_segment/test004/test004_00460.csv  
  inflating: test_segment/test_segment/test004/test004_00461.csv

In [2]:
import pandas as pd
import glob

file_list = glob.glob("/content/train/train/*.csv")
all_data = []
for file in file_list:
    try:
        df = pd.read_csv(file)

        all_data.append(df)

    except Exception as e:
        print(f"⚠️ อ่านไฟล์ {file} ไม่ได้: {e}")

combined_df = pd.concat(all_data, ignore_index=True)
combined_df

Unnamed: 0,BVP,ACC_X,ACC_Y,ACC_Z,TEMP,EDA,HR,IBI,Sleep_Stage
0,14.911117,45.482407,42.507783,6.919871,32.414649,0.192788,95.098866,0.648740,W
1,3.383955,45.048270,42.507783,6.919872,32.414666,0.192721,95.098832,0.648890,W
2,-8.372907,45.540109,42.507783,6.919871,32.414645,0.192822,95.098678,0.648342,W
3,-25.222627,45.090111,42.507783,6.919872,32.414665,0.192663,95.099039,0.649820,W
4,-38.976037,45.147290,42.507783,6.919872,32.414653,0.193540,95.098517,0.639262,W
...,...,...,...,...,...,...,...,...,...
32037595,-120.455926,1.977580,-39.542124,49.120572,34.530188,0.283600,62.378044,0.679630,W
32037596,-23.769004,1.977190,-39.542124,48.796908,34.530081,0.284480,62.376891,0.679630,W
32037597,66.115742,1.976646,-39.542124,48.566786,34.530254,0.285065,62.378633,0.679631,W
32037598,116.899550,1.977711,-39.542124,49.364336,34.530080,0.284926,62.376939,0.679630,W


In [3]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
from scipy.fftpack import fft

segment_size = 480

def extract_features(segment):
    features = {}

    for col in ["BVP", "ACC_X", "ACC_Y", "ACC_Z", "TEMP", "EDA", "HR", "IBI"]:
        values = segment[col].values
        features[f"mean_{col}"] = np.mean(values)
        features[f"std_{col}"] = np.std(values)
        features[f"max_{col}"] = np.max(values)
        features[f"min_{col}"] = np.min(values)
        features[f"median_{col}"] = np.median(values)
        features[f"skew_{col}"] = skew(values)
        features[f"kurtosis_{col}"] = kurtosis(values)


    features["sma_acc"] = np.sum(np.abs(segment["ACC_X"]) + np.abs(segment["ACC_Y"]) + np.abs(segment["ACC_Z"]))


    for col in ["BVP", "EDA", "HR"]:
        features[f"energy_{col}"] = np.sum(np.square(values)) / len(values)


    for col in ["HR", "BVP", "EDA"]:
        fft_values = np.abs(fft(segment[col].values))
        features[f"fft_peak_{col}"] = np.argmax(fft_values)
        features[f"fft_mean_{col}"] = np.mean(fft_values)
        features[f"fft_std_{col}"] = np.std(fft_values)

    features["Sleep_Stage"] = segment["Sleep_Stage"].values[0]

    return features


feature_list = []
for i in range(0, len(combined_df), segment_size):
    segment = combined_df.iloc[i:i+segment_size]
    if len(segment) == segment_size:
        feature_list.append(extract_features(segment))


feature_df = pd.DataFrame(feature_list)

  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)


In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import LabelEncoder


X = feature_df.drop(columns=["Sleep_Stage"])

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(feature_df["Sleep_Stage"])

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

selector = SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42))
selector.fit(X, y)

X_selected = X.loc[:, selector.get_support()]
X_selected


Unnamed: 0,std_BVP,max_BVP,min_BVP,kurtosis_BVP,mean_ACC_X,std_ACC_X,max_ACC_X,min_ACC_X,median_ACC_X,mean_ACC_Y,...,std_EDA,max_EDA,min_EDA,median_EDA,std_HR,sma_acc,fft_mean_BVP,fft_std_BVP,fft_mean_EDA,fft_std_EDA
0,26.291493,48.284755,-57.449019,-1.111277,45.165844,0.261896,45.707736,44.270696,45.176382,42.507783,...,0.002249,0.198254,0.186008,0.192636,0.317975,45411.301335,190.795172,543.501356,0.217476,4.210796
1,25.929263,40.907137,-52.264853,-1.155154,45.029770,0.251918,45.673592,44.341329,45.051399,42.507783,...,0.001845,0.194359,0.183286,0.187587,0.246141,45347.964332,228.643258,520.041455,0.208761,4.107361
2,27.753779,46.445247,-55.845860,-1.149710,44.961803,0.292123,45.670041,44.269740,44.965142,42.508803,...,0.002013,0.189814,0.179474,0.183920,0.307359,45319.294511,219.240902,567.200852,0.208677,4.023205
3,56.332723,280.928679,-318.154533,6.749922,38.626648,9.160512,53.464631,2.915958,39.008119,45.623274,...,0.007806,0.209380,0.178366,0.186499,0.218044,46663.985850,722.143310,1000.866547,0.243123,4.115501
4,30.516663,57.453865,-74.540853,-1.087406,38.736375,0.290877,40.085396,37.804123,38.594996,44.251330,...,0.001828,0.186869,0.177304,0.181593,0.586763,49317.212252,200.169454,637.956808,0.202163,3.981334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66740,107.054479,428.432224,-353.396214,1.230482,-0.122816,5.700853,27.757586,-24.403296,-1.977106,-38.135461,...,0.025078,0.287697,0.178290,0.282587,0.805933,44106.819942,1418.017297,1868.257982,0.483822,6.029153
66741,161.583503,522.321010,-621.780273,0.674714,5.533034,0.496310,7.008094,4.743891,5.670997,-36.592079,...,0.001288,0.282707,0.275713,0.278602,0.492061,44584.400404,1867.622007,3007.988398,0.293258,6.092018
66742,116.285860,576.291736,-695.631142,14.342059,5.059931,0.869299,11.517110,-1.123922,4.942765,-39.506829,...,0.003393,0.289190,0.263725,0.282958,0.707604,44732.379552,1465.921136,2084.820832,0.316170,6.172365
66743,19.534165,47.234923,-40.936830,-0.510079,5.179712,0.377956,6.173105,4.534012,4.944062,-39.553477,...,0.000948,0.287667,0.280857,0.285011,0.654003,44731.646957,235.411868,357.437660,0.298770,6.243164


In [7]:
!pip install lightgbm catboost
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import xgboost as xgb



X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)


###------------------------------------------------------------------------------###


# XGBoost
xgb_model = xgb.XGBClassifier(n_estimators=200, max_depth=10, learning_rate=0.05, random_state=42)
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

# # รวมหมด
# estimators = [
#     ('xgb', xgb.XGBClassifier(n_estimators=200, max_depth=10, learning_rate=0.05,
#                                random_state=42)),
#     ('lgbm', LGBMClassifier(n_estimators=200, max_depth=10, learning_rate=0.05,
#                              random_state=42)),
# ]

# stacking_model = StackingClassifier(
#     estimators=estimators,
#     final_estimator=CatBoostClassifier(verbose=0, random_state=42)
# )

# stacking_model.fit(X_train, y_train)
# stacking_pred = stacking_model.predict(X_test)


# ดูค่า
xgb_acc = accuracy_score(y_test, xgb_pred)
xgb_f1 = f1_score(y_test, xgb_pred, average='weighted')
# stacking_acc = accuracy_score(y_test, stacking_pred)
# stacking_f1 = f1_score(y_test, stacking_pred, average='weighted')


# print(f"🎯 Random Forest: Accuracy = {rf_acc:.4f}, F1-score = {rf_f1:.4f}")
print(f"🚀 XGBoost: Accuracy = {xgb_acc:.4f}, F1-score = {xgb_f1:.4f}")
# print(f"🔗 Stacking Model: Accuracy = {stacking_acc:.4f}, F1-score = {stacking_f1:.4f}")

🚀 XGBoost: Accuracy = 0.7591, F1-score = 0.7384


In [8]:
import pandas as pd
import glob
import os
import numpy as np
from scipy.stats import skew, kurtosis
from scipy.fftpack import fft


test_path = "/content/test_segment/test_segment/"

test_file_list = glob.glob(os.path.join(test_path, "**/*.csv"), recursive=True)


feature_data = []

def extract_features(segment, subject_id):
    features = {}


    for col in ["BVP", "ACC_X", "ACC_Y", "ACC_Z", "TEMP", "EDA", "HR", "IBI"]:
        values = segment[col].values
        features[f"mean_{col}"] = np.mean(values)
        features[f"std_{col}"] = np.std(values)
        features[f"max_{col}"] = np.max(values)
        features[f"min_{col}"] = np.min(values)
        features[f"median_{col}"] = np.median(values)
        features[f"skew_{col}"] = skew(values)
        features[f"kurtosis_{col}"] = kurtosis(values)

    features["sma_acc"] = np.sum(np.abs(segment["ACC_X"]) + np.abs(segment["ACC_Y"]) + np.abs(segment["ACC_Z"]))

    for col in ["BVP", "EDA", "HR"]:
        features[f"energy_{col}"] = np.sum(np.square(values)) / len(values)


    for col in ["HR", "BVP", "EDA"]:
        fft_values = np.abs(fft(segment[col].values))
        features[f"fft_peak_{col}"] = np.argmax(fft_values)
        features[f"fft_mean_{col}"] = np.mean(fft_values)
        features[f"fft_std_{col}"] = np.std(fft_values)

    features["Segment_ID"] = subject_id

    return features


segment_size = 480

for file in test_file_list:
    try:
        df = pd.read_csv(file)


        subject_id = os.path.basename(file).replace(".csv", "")


        for i in range(0, len(df), segment_size):
            segment = df.iloc[i:i+segment_size]
            if len(segment) == segment_size:
                feature_data.append(extract_features(segment, subject_id))

    except Exception as e:
        print(f"อ่านไฟล์ {file} ไม่ได้: {e}")


test_feature_df = pd.DataFrame(feature_data)

test_feature_df


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(values)
  features[f"kurtosis_{col}"] = kurtosis(values)
  features[f"skew_{col}"] = skew(va

Unnamed: 0,mean_BVP,std_BVP,max_BVP,min_BVP,median_BVP,skew_BVP,kurtosis_BVP,mean_ACC_X,std_ACC_X,max_ACC_X,...,fft_peak_HR,fft_mean_HR,fft_std_HR,fft_peak_BVP,fft_mean_BVP,fft_std_BVP,fft_peak_EDA,fft_mean_EDA,fft_std_EDA,Segment_ID
0,-0.025935,37.522355,48.748707,-90.544772,12.698174,-0.743178,-0.735831,-31.017282,3.697295e-01,-30.447316,...,0,46.380310,960.550161,23,384.199399,726.771046,0,0.798633,16.882403,test004_00613
1,-0.220381,41.726050,56.341262,-84.029505,12.670518,-0.534850,-1.093724,18.972967,2.582884e-01,19.882088,...,0,45.402945,986.145197,23,329.707045,852.658743,0,0.182026,3.504036,test004_00353
2,-0.354701,32.065587,44.368052,-77.182286,10.972864,-0.699558,-0.779642,-32.986795,3.431552e-01,-32.386448,...,0,45.095292,971.507766,22,347.988217,610.329009,0,1.163651,24.817162,test004_00531
3,0.297502,41.570117,60.178192,-83.170821,12.967591,-0.566591,-1.039768,18.862961,1.828028e-01,19.651642,...,0,45.892705,979.406031,22,378.021984,828.623968,0,0.186888,3.673434,test004_00377
4,-0.265774,75.725234,136.284044,-133.758542,6.611991,-0.091832,-1.128781,-63.266325,2.029195e-02,-62.857946,...,0,47.810358,1041.184571,24,643.216814,1529.305503,0,0.563822,11.850456,test004_00155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7827,-0.032133,84.918341,150.496060,-154.376607,-4.700358,0.044703,-1.084488,-55.351038,6.536928e-02,-54.596671,...,0,47.727648,1018.092420,24,687.653510,1728.720040,0,0.213969,4.435460,test007_00661
7828,-0.654500,98.426213,170.443497,-190.179671,-0.994370,-0.077819,-1.057983,-57.324753,7.749269e-02,-56.488967,...,0,45.895070,995.798008,23,810.822116,1998.218807,0,0.211914,4.358006,test007_00557
7829,1.118261,85.446069,148.327437,-165.359256,-1.825652,-0.004029,-1.043027,-57.336079,5.266296e-09,-57.336079,...,0,46.267335,1009.298973,23,726.440771,1725.508275,0,0.212994,4.408151,test007_00606
7830,-0.205542,85.973781,153.564899,-154.642716,-3.711371,-0.009079,-1.097900,-56.387700,1.313705e-01,-56.183108,...,0,46.701360,1001.704013,23,485.429372,1819.970960,0,0.212614,4.385789,test007_00636


In [10]:
selected_columns = X_selected.columns

common_columns = [col for col in selected_columns if col in test_feature_df.columns]

test_feature_df1 = test_feature_df[common_columns]

predictions = xgb_model.predict(test_feature_df1)

# แปลง Label กลับจากตัวเลข → ตัวอักษร (W, N, R)
predictions = label_encoder.inverse_transform(predictions)

submission_df = pd.DataFrame({
    "id": test_feature_df["Segment_ID"],
    "labels": predictions
})
submission_df = submission_df.sort_values(by=["id"]).reset_index(drop=True)
submission_df.head()

Unnamed: 0,id,labels
0,test001_00000,N2
1,test001_00001,N2
2,test001_00002,N2
3,test001_00003,N2
4,test001_00004,N2


In [11]:
submission_df.to_csv("submission_xgb.csv", index=False)