In [2]:
import os
import torch
import numpy as np
import random
import torch.nn as nn
from scipy.fft import fft
from scipy.signal import stft
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.signal import welch
import os, warnings, pickle
from torch.autograd import Variable
from torch.utils.data import DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import csv
from collections import OrderedDict


### Preprocess data


In [None]:
select_channels = ['ch1_LF5 - FpZ', 'ch2_OTE_L-FpZ', 'ch4_RF6-FpZ', 'ch5_OTE_R-FpZ']

sampling_rate = 125
nperseg = 64
data_n = 1000
window_size_in_seconds = 8
window_size = window_size_in_seconds*sampling_rate
data_dir = '/data1/Test'
ex_files = []
start_idx = 32*sampling_rate
step = 1
files = [f for f in os.listdir(data_dir) if f.endswith('.csv') and not f.endswith('.processed.csv') and f not in ex_files]

# Define channels used
for f in files:
    
    csv_w = None
    out_f = ''
    print('input_f: %s'%f)
    df = pd.read_csv(os.path.join(data_dir, f))[select_channels]
    padding = np.random.randint(df.min().min(), df.max().max(), size=(window_size, df.shape[1]))
    padding = pd.DataFrame(padding, columns=df.columns)
    df = pd.concat([padding, df], ignore_index=True)
    
    # Create y vector with empty values
    y = np.empty(shape=len(df))
    y[:] = np.nan
    # calulate labels
    num_elements, l_step = data_n, int(len(df)/11)
    random_indices = np.random.choice(np.arange(0, 2*l_step + window_size), 2*num_elements, replace=True)
    y[random_indices] = 0.0
    random_indices = np.random.choice(np.arange(2*l_step + window_size, 4*l_step + window_size), num_elements, replace=True)
    y[random_indices] = 0.25
    random_indices = np.random.choice(np.arange(4*l_step + window_size, 6*l_step + window_size), num_elements, replace=True)
    y[random_indices] = 0.75
    random_indices = np.random.choice(np.arange(6*l_step + window_size, 11*l_step), 2*num_elements, replace=True)
    y[random_indices] = 1.0
    df['y'] = y
    samples = []
    for i in tqdm(range(window_size, len(df), step), desc=f'Processing {f}'):
        stft_features_list = []
        y = df['y'][i]
        for channel in select_channels:
            start_idx = i - window_size
            signal = df[channel].values[start_idx:i]
            # Short-time Fourier Transform
            _, _, Zxx = stft(signal, fs=sampling_rate, nperseg=nperseg)
            Zxx = Zxx[1:, 1:] # Get 32x32 feature map
            r = 20 * np.log10(np.abs(Zxx)) # real values
            stft_features_list.append(r)
        stft_features = np.stack(stft_features_list, axis=0).reshape(1, len(select_channels)*32*32).tolist()[0]
        feature_n = len(stft_features)
        # stft_features.append(y)
        stft_features_dict = OrderedDict()
        for i, v in enumerate(stft_features):
            stft_features_dict[str(i)] = v
        stft_features_dict['y'] = y
        if csv_w is None:
            out_f = os.path.join(data_dir, f.rpartition('.csv')[0] + f'feature_n{feature_n}.window_size_{window_size_in_seconds}s.processed.csv')
            csv_w = csv.DictWriter(open(out_f, 'w'), fieldnames=list(stft_features_dict.keys()))
            csv_w.writeheader()
        csv_w.writerow(stft_features_dict)
        # samples.append(stft_features)
    # df = pd.DataFrame(samples)
    # df.columns = [*df.columns[:-1], 'y']
    # feature_n = df.shape[1] - 1
    
    # df.to_csv(out_f, index=False, header=True)
    print('save to: %s'%out_f)
    # print(df.head(), df.shape)
    

input_f: Copy of eeg_focus_6sessions_cut_S13.csv


Processing Copy of eeg_focus_6sessions_cut_S13.csv: 100%|██████████| 83746/83746 [07:46<00:00, 179.42it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S13feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S8.csv


Processing Copy of eeg_focus_6sessions_cut_S8.csv: 100%|██████████| 78638/78638 [08:14<00:00, 158.92it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S8feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S3.csv


Processing Copy of eeg_focus_6sessions_cut_S3.csv: 100%|██████████| 77889/77889 [07:33<00:00, 171.84it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S3feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S4.csv


Processing Copy of eeg_focus_6sessions_cut_S4.csv: 100%|██████████| 78829/78829 [07:35<00:00, 173.20it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S4feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S10.csv


Processing Copy of eeg_focus_6sessions_cut_S10.csv: 100%|██████████| 73289/73289 [07:41<00:00, 158.85it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S10feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S6.csv


  r = 20 * np.log10(np.abs(Zxx)) # real values
Processing Copy of eeg_focus_6sessions_cut_S6.csv: 100%|██████████| 84800/84800 [08:58<00:00, 157.61it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S6feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S9.csv


Processing Copy of eeg_focus_6sessions_cut_S9.csv: 100%|██████████| 83758/83758 [10:10<00:00, 137.17it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S9feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S7.csv


Processing Copy of eeg_focus_6sessions_cut_S7.csv: 100%|██████████| 83192/83192 [07:46<00:00, 178.25it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S7feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S1.csv


Processing Copy of eeg_focus_6sessions_cut_S1.csv: 100%|██████████| 85057/85057 [07:56<00:00, 178.41it/s]


save to: /data1/Test/Copy of eeg_focus_6sessions_cut_S1feature_n4096.window_size_8s.processed.csv
input_f: Copy of eeg_focus_6sessions_cut_S2.csv


Processing Copy of eeg_focus_6sessions_cut_S2.csv:  35%|███▌      | 29780/84850 [02:46<05:03, 181.37it/s]

In [None]:
df = pd.read_csv('Data/Copy of eeg_focus_6sessions_cut_S2.csv')[select_channels]
df
df['y'] = 1
df['y'][100]
len(df['y'])