In [1]:
!pip install mne

Collecting mne
  Downloading mne-1.7.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m47.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mne
Successfully installed mne-1.7.0


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import mne
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter

path1 = [] # preprocessed data
path2 = [] # annotations

num_data_max = 154 # data의 개수 (SN001~SN005일 경우 : 5)

# Define the missing indices
missing_indices = ['014', '064', '135']

# Generate paths for preprocessed data
for i in range(81, num_data_max + 1):
    idx = str(i).zfill(3)
    if idx in missing_indices:
        continue
    path1.append("/content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/Pre_rev/pre_SN" + idx + ".edf")

# Generate paths for annotations
for i in range(81, num_data_max + 1):
    idx = str(i).zfill(3)
    if idx in missing_indices:
        continue
    path2.append("/content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/sleep_data/recordings/SN" + idx + "_sleepscoring.edf")

pre = []
for i in path1:
    pre.append(mne.io.read_raw_edf(i, preload=True))

# 채널 타입을 설정하기 위한 사전 정의
channel_types = {
    'EEG F4-M1': 'eeg',  # 인덱스 0
    'EEG C4-M1': 'eeg',  # 인덱스 1
    'EEG O2-M1': 'eeg',  # 인덱스 2
    'EEG C3-M2': 'eeg',  # 인덱스 3
    'EMG chin': 'emg',  # 인덱스 4, EMG chin
    'EOG E1-M2': 'eog',  # 인덱스 5, EOG
    'EOG E2-M2': 'eog',  # 인덱스 6, EOG
    'ECG': 'ecg'   # 인덱스 7, ECG
}

# 채널 타입을 설정
for i in pre:
    i.set_channel_types(channel_types)

# 어노테이션 데이터 로드
annotations = []
for idx in path2:
    annotations.append(mne.read_annotations(idx))

# pre 데이터 객체에 어노테이션 설정
for i in range(0,len(annotations)):
    pre[i].set_annotations(annotations[i])



def normalize_raw_data(raw, channel_types={'eeg': True, 'eog': True, 'emg': True, 'ecg': True}):

    # Ensure the data is preloaded
    if not raw.preload:
        raw.load_data()

    # Pick specified channel types
    channel_indices = mne.pick_types(raw.info, **channel_types)

    # Initialize the standard scaler
    scaler = StandardScaler()

    # Retrieve the data for selected channels
    data = raw.get_data(picks=channel_indices)

    # Reshape data for scaling
    data = data.reshape(data.shape[0], -1).T  # Transpose to have features along rows as expected by StandardScaler

    # Fit and transform the data
    scaled_data = scaler.fit_transform(data)

    # Replace original data with scaled data
    raw._data[channel_indices, :] = scaled_data.T  # Transpose back to original shape

    return raw

# Normalize all_pre directly
for i in pre:
    i = normalize_raw_data(i)

def annotation_remove(annotations):
    # Find Lights off & Lights on
    indices_to_remove = [idx for idx, desc in enumerate(annotations.description) if desc.startswith('Lights off') or desc.startswith('Lights on')]

    # Create new annotations
    new_annotations = mne.Annotations(onset=[annotations.onset[i] for i in range(len(annotations.onset)) if i not in indices_to_remove],
                                      duration=[annotations.duration[i] for i in range(len(annotations.duration)) if i not in indices_to_remove],
                                      description=[annotations.description[i] for i in range(len(annotations.description)) if i not in indices_to_remove],
                                      orig_time=annotations.orig_time)
    return new_annotations

for i in range(len(annotations)):
    annotations[i] = annotation_remove(annotations[i])

all_possible_labels = ['Sleep stage W','Sleep stage R','Sleep stage N1','Sleep stage N2','Sleep stage N3',]

label_encoder = LabelEncoder().fit(all_possible_labels)

# initialize list for encoded labels
encoded_labels = []

for idx in annotations:
    encoded_labels.append(label_encoder.transform(np.array(idx.description)))

epoch_duration =30

# Create events every 30 seconds
events = []
for idx in pre:
    events.append(mne.make_fixed_length_events(idx, duration=epoch_duration))

# Create the epochs
epochs = []
for i in range(0,len(pre)):
    epochs.append(mne.Epochs(pre[i], events[i], tmin=0.0, tmax=epoch_duration - 1 / 100, baseline=None, preload=True))



data_list = []
num_epochs = 800  # 각 데이터의 epoch 개수

for i in range(0,len(epochs)):
    if epochs[i].get_data(copy=False).shape[0] >= num_epochs:
      data_list.append(epochs[i].get_data(copy=False))
      data_list[-1]= np.float32(data_list[-1][:num_epochs])
    else:
      print('dropped'+str(i))


labels_list = []
for i in range(0,len(encoded_labels)):
    if len(encoded_labels[i]) >= num_epochs:
      labels_list.append(encoded_labels[i][:num_epochs])
    else:
      print('dropped'+str(i))

Extracting EDF parameters from /content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/Pre_rev/pre_SN081.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 3446899  =      0.000 ... 34468.990 secs...
Extracting EDF parameters from /content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/Pre_rev/pre_SN082.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2235599  =      0.000 ... 22355.990 secs...
Extracting EDF parameters from /content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/Pre_rev/pre_SN083.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 3263699  =      0.000 ... 32636.990 secs...
Extracting EDF parameters from /content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/Pre_rev/pre_SN084.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2630899  =      0.000 ... 26308.990 secs...
Extracting EDF parameters from /

In [4]:
data_array = np.array(data_list)
labels_array = np.array(labels_list)

data_train_tensor = torch.tensor(data_array, dtype=torch.float32)
labels_train_tensor = torch.tensor(labels_array, dtype=torch.float32)

folder_dir = '/content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/tensor_one/'
files = os.listdir(folder_dir)
idx = int(len(files)/2)

for i in range(len(data_list)):
  data_array = np.array(data_list[i])
  labels_array = np.array(labels_list[i])

  data_train_tensor = torch.tensor(data_array, dtype=torch.float32)
  labels_train_tensor = torch.tensor(labels_array, dtype=torch.float32)

  torch.save(data_train_tensor, f'/content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/tensor_one/data_{idx+i}.pt')
  torch.save(labels_train_tensor, f'/content/drive/MyDrive/데캡디(딥슬립딥드림)공유문서함/DATA/tensor_one/label_{idx+i}.pt')

In [5]:
print(len(labels_list))
print(len(encoded_labels))

54
73


nodata(3) : 14,64,135

1st dropped(11) : 21, 29, 31, 43, 44, 52, 55, 60, 66, 67, 73
2nd dropped(19) : 1,5,19,12,15,25,26,28,33,35,36,42,45,49,54,57,62,63,68

처음엔 67개

다음엔 54개

총 121개