<a href="https://colab.research.google.com/github/x1096023/BCI_final_project/blob/main/CODE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

raw_XBG1

In [None]:
import os
import mne
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set the folder path
folder_path = r'ds003478'
set_files = []
# List all .set files in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if (file[4:7] != '038') :
                if file[22:24] == '01':
                    set_files.append(os.path.join(root, file))

# Assuming we have corresponding label files or manual labels
labels = [0,0,0,0,0,0,0,0,0,0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  # Replace with actual labels

# Check if the number of files and labels match
print(len(set_files), len(labels))
assert len(set_files) == len(labels), "Number of files and labels do not match"

# Lists to store features and labels
features = []
target = []

# List to store all data and common channels
all_data = []
common_channels = None

bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}


# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    raw.resample(100)  # Downsample to 100 Hz
    all_data.append((raw.get_data(), raw.info['ch_names'], raw.info['sfreq']))

    # Determine the common channels
    if common_channels is None:
        common_channels = set(raw.info['ch_names'])
    else:
        common_channels.intersection_update(raw.info['ch_names'])

# Convert common channels to a list
common_channels = list(common_channels)

# Find the maximum data length
max_length = max([data.shape[1] for data, _, _ in all_data])

for data_tuple, label in zip(all_data, labels):
    data, ch_names, sfreq = data_tuple

    # Select only common channels
    ch_indices = [ch_names.index(ch) for ch in common_channels]
    data = data[ch_indices, :]

    if data.shape[1] < max_length:
        # If data length is less than the maximum length, pad the data with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    elif data.shape[1] > max_length:
        # If data length is greater than the maximum length, truncate the data
        padded_data = data[:, :max_length]
    else:
        padded_data = data

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    features.append(feature_vector)
    target.append(label)

# Convert features and labels to NumPy arrays
features = np.array(features)
target = np.array(target)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=100)

# Convert to DMatrix format, which is the internal data format for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set XGBoost parameters
params = {
    'booster': 'gbtree',
    'objective': 'binary:logistic',  # Binary classification
    'eval_metric': 'logloss',  # Evaluation metric
    'eta': 0.1,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# Train the model
bst = xgb.train(params, dtrain, num_boost_round=100)

# Make predictions
y_pred_prob = bst.predict(dtest)
y_pred = (y_pred_prob > 0.5).astype(int)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

raw_XBG2

In [None]:
import os
import mne
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set the folder path
folder_path = r'ds003478'
set_files = []
# List all .set files in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if (file[4:7] != '038') :
                if file[22:24] == '02':
                    set_files.append(os.path.join(root, file))

# Assuming we have corresponding label files or manual labels
labels = [0,0,0,0,0,0,0,0,0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  # Replace with actual labels

# Check if the number of files and labels match
print(len(set_files), len(labels))
assert len(set_files) == len(labels), "Number of files and labels do not match"

# Lists to store features and labels
features = []
target = []

# List to store all data and common channels
all_data = []
common_channels = None

bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}


# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    raw.resample(100)  # Downsample to 100 Hz
    all_data.append((raw.get_data(), raw.info['ch_names'], raw.info['sfreq']))

    # Determine the common channels
    if common_channels is None:
        common_channels = set(raw.info['ch_names'])
    else:
        common_channels.intersection_update(raw.info['ch_names'])

# Convert common channels to a list
common_channels = list(common_channels)

# Find the maximum data length
max_length = max([data.shape[1] for data, _, _ in all_data])

for data_tuple, label in zip(all_data, labels):
    data, ch_names, sfreq = data_tuple

    # Select only common channels
    ch_indices = [ch_names.index(ch) for ch in common_channels]
    data = data[ch_indices, :]

    if data.shape[1] < max_length:
        # If data length is less than the maximum length, pad the data with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    elif data.shape[1] > max_length:
        # If data length is greater than the maximum length, truncate the data
        padded_data = data[:, :max_length]
    else:
        padded_data = data

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    features.append(feature_vector)
    target.append(label)

# Convert features and labels to NumPy arrays
features = np.array(features)
target = np.array(target)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=100)

# Convert to DMatrix format, which is the internal data format for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set XGBoost parameters
params = {
    'booster': 'gbtree',
    'objective': 'binary:logistic',  # Binary classification
    'eval_metric': 'logloss',  # Evaluation metric
    'eta': 0.1,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# Train the model
bst = xgb.train(params, dtrain, num_boost_round=100)

# Make predictions
y_pred_prob = bst.predict(dtest)
y_pred = (y_pred_prob > 0.5).astype(int)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

raw_forest1

In [None]:
import os
import mne
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set the folder path
folder_path = r'ds003478'
set_files = []
# List all .set files in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if (file[4:7] != '038') :
                if file[22:24] == '01':
                    set_files.append(os.path.join(root, file))

# Assuming we have corresponding label files or manual labels
labels = [0,0,0,0,0,0,0,0,0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  # Replace with actual labels

# Check if the number of files and labels match
print(len(set_files), len(labels))
assert len(set_files) == len(labels), "Number of files and labels do not match"

# Lists to store features and labels
features = []
target = []

# List to store all data and common channels
all_data = []
common_channels = None

bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    raw.resample(100)  # Downsample to 100 Hz
    all_data.append((raw.get_data(), raw.info['ch_names'], raw.info['sfreq']))

    # Determine the common channels
    if common_channels is None:
        common_channels = set(raw.info['ch_names'])
    else:
        common_channels.intersection_update(raw.info['ch_names'])

# Convert common channels to a list
common_channels = list(common_channels)

# Find the maximum data length
max_length = max([data.shape[1] for data, _, _ in all_data])

for data_tuple, label in zip(all_data, labels):
    data, ch_names, sfreq = data_tuple

    # Select only common channels
    ch_indices = [ch_names.index(ch) for ch in common_channels]
    data = data[ch_indices, :]

    if data.shape[1] < max_length:
        # If data length is less than the maximum length, pad the data with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    elif data.shape[1] > max_length:
        # If data length is greater than the maximum length, truncate the data
        padded_data = data[:, :max_length]
    else:
        padded_data = data

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    features.append(feature_vector)
    target.append(label)

# Convert features and labels to NumPy arrays
features = np.array(features)
target = np.array(target)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=100)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

raw_forest2

In [None]:
import os
import mne
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set the folder path
folder_path = r'ds003478'
set_files = []
# List all .set files in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if (file[4:7] != '038') :
                if file[22:24] == '02':
                    set_files.append(os.path.join(root, file))

# Assuming we have corresponding label files or manual labels
labels = [0,0,0,0,0,0,0,0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  # Replace with actual labels

# Check if the number of files and labels match
print(len(set_files), len(labels))
assert len(set_files) == len(labels), "Number of files and labels do not match"

# Lists to store features and labels
features = []
target = []

# List to store all data and common channels
all_data = []
common_channels = None

bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    raw.resample(100)  # Downsample to 100 Hz
    all_data.append((raw.get_data(), raw.info['ch_names'], raw.info['sfreq']))

    # Determine the common channels
    if common_channels is None:
        common_channels = set(raw.info['ch_names'])
    else:
        common_channels.intersection_update(raw.info['ch_names'])

# Convert common channels to a list
common_channels = list(common_channels)

# Find the maximum data length
max_length = max([data.shape[1] for data, _, _ in all_data])

for data_tuple, label in zip(all_data, labels):
    data, ch_names, sfreq = data_tuple

    # Select only common channels
    ch_indices = [ch_names.index(ch) for ch in common_channels]
    data = data[ch_indices, :]

    if data.shape[1] < max_length:
        # If data length is less than the maximum length, pad the data with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    elif data.shape[1] > max_length:
        # If data length is greater than the maximum length, truncate the data
        padded_data = data[:, :max_length]
    else:
        padded_data = data

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    features.append(feature_vector)
    target.append(label)

# Convert features and labels to NumPy arrays
features = np.array(features)
target = np.array(target)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=100)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

XGB1

In [None]:
import os
import mne
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set the folder path
folder_path = r'trans-01'
set_files = []
# List all .set files in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if file[0:3] != '052':
                set_files.append(os.path.join(root, file))

# Assuming we have corresponding label files or manual labels
labels = [0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
0 ,0 ,0 ,0 ,0 ,1 ,0 ,1 ,1 ,0 ,0 ,1,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1,1]  # Replace with actual labels

# Check if the number of files and labels match
print(len(set_files), len(labels))
assert len(set_files) == len(labels), "Number of files and labels do not match"

# Lists to store features and labels
features = []
target = []

# List to store all data
all_data = []

# Frequency bands
bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    all_data.append(raw.get_data())

# Find the maximum data length
max_length = max([data.shape[1] for data in all_data])

# Truncate or pad data to match the maximum data length
for data, label in zip(all_data, labels):
    if data.shape[1] < max_length:
        # If data length is less than the max length, pad with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    else:
        # If data length is greater than or equal to the max length, truncate the data
        padded_data = data[:, :max_length]

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    # Add to feature list
    features.append(feature_vector)
    target.append(label)

# Convert features and target to NumPy arrays
features = np.array(features)
target = np.array(target)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.4, random_state=150)

# Convert to DMatrix format for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set XGBoost parameters
params = {
    'booster': 'gbtree',
    'objective': 'binary:logistic',  # Binary classification
    'eval_metric': 'logloss',  # Evaluation metric
    'eta': 0.1,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# Train the model
bst = xgb.train(params, dtrain, num_boost_round=100)

# Make predictions
y_pred_prob = bst.predict(dtest)
y_pred = (y_pred_prob > 0.5).astype(int)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

XGB2

In [None]:
import os
import mne
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import asrpy
import mne_icalabel

# 設置資料夾路徑
folder_path = r'trans-02'
set_files = []
# 列出資料夾中的所有 .set 文件
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if(file[0:3] != '038'):
                set_files.append(os.path.join(root, file))

# 假設我們有相應的標籤文件或手動標籤
labels = [0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0,0 ,0 ,0 ,1 ,1 ,1 ,0 ,1 ,1 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,1 ,0 ,1 ,0 ,0 ,0 ,0 ,1 ,0 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1]  # 替換為實際標籤

# 檢查文件數量和標籤數量是否匹配
print(len(set_files),len(labels))
assert len(set_files) == len(labels), "文件數量和標籤數量不匹配"

# 存儲特徵和標籤的列表
features = []
target = []

# 存儲所有数据的列表
all_data = []

bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    all_data.append(raw.get_data())

# Find the maximum data length
max_length = max([data.shape[1] for data in all_data])

# Truncate or pad data to match the maximum data length
for data, label in zip(all_data, labels):
    if data.shape[1] < max_length:
        # If data length is less than the max length, pad with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    else:
        # If data length is greater than or equal to the max length, truncate the data
        padded_data = data[:, :max_length]

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    # Add to feature list
    features.append(feature_vector)
    target.append(label)
# 將特徵和標籤轉換為NumPy數組
features = np.array(features)
target = np.array(target)

# 分割訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.4, random_state=150)

# 轉換為DMatrix格式，這是XGBoost的內部數據格式
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# 設置XGBoost參數
params = {
    'booster': 'gblinear',
    'objective': 'binary:logistic',  # 二分類問題
    'eval_metric': 'rmse',  # 評估指標
    'eta': 0.1,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 60
}

# 訓練模型
bst = xgb.train(params, dtrain, num_boost_round=100)

# 進行預測
y_pred_prob = bst.predict(dtest)
y_pred = (y_pred_prob > 0.5).astype(int)

# 評估模型
accuracy = accuracy_score(y_test, y_pred)
print(f"準確率: {accuracy:.2f}")

# 顯示分類報告
print("\n分類報告:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# 繪製混淆矩陣
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

forest1

In [None]:
import os
import mne
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set the folder path
folder_path = r'trans-01'
set_files = []
# List all .set files in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if file[0:3] != '052':
                set_files.append(os.path.join(root, file))

# Assuming we have corresponding label files or manual labels
labels = [0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
0 ,0 ,0 ,0 ,0 ,1 ,0 ,1 ,1 ,0 ,0 ,1,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1,1]  # Replace with actual labels

# Check if the number of files and labels match
print(len(set_files), len(labels))
assert len(set_files) == len(labels), "Number of files and labels do not match"

# Lists to store features and labels
features = []
target = []

# List to store all data
all_data = []

# Frequency bands
bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    all_data.append(raw.get_data())

# Find the maximum data length
max_length = max([data.shape[1] for data in all_data])

# Truncate or pad data to match the maximum data length
for data, label in zip(all_data, labels):
    if data.shape[1] < max_length:
        # If data length is less than the max length, pad with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    else:
        # If data length is greater than or equal to the max length, truncate the data
        padded_data = data[:, :max_length]

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    # Add to feature list
    features.append(feature_vector)
    target.append(label)

# Convert features and target to NumPy arrays
features = np.array(features)
target = np.array(target)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.4, random_state=150)

# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

forest2

In [None]:
import os
import mne
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set the folder path
folder_path = r'trans-02'
set_files = []
# List all .set files in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.set'):
            if file[0:3] != '038':
                set_files.append(os.path.join(root, file))

# Assuming we have corresponding label files or manual labels
labels = [0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0,0 ,0 ,0 ,1 ,1 ,1 ,0 ,1 ,1 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,1 ,0 ,1 ,0 ,0 ,0 ,0 ,1 ,0 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1]  # Replace with actual labels

# Check if the number of files and labels match
print(len(set_files), len(labels))
assert len(set_files) == len(labels), "Number of files and labels do not match"

# Lists to store features and labels
features = []
target = []

# List to store all data
all_data = []

# Frequency bands
bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Read and process each file
for file, label in zip(set_files, labels):
    print(file)
    raw = mne.io.read_raw_eeglab(file, preload=True)
    all_data.append(raw.get_data())

# Find the maximum data length
max_length = max([data.shape[1] for data in all_data])

# Truncate or pad data to match the maximum data length
for data, label in zip(all_data, labels):
    if data.shape[1] < max_length:
        # If data length is less than the max length, pad with zeros
        padded_data = np.pad(data, ((0, 0), (0, max_length - data.shape[1])), mode='constant')
    else:
        # If data length is greater than or equal to the max length, truncate the data
        padded_data = data[:, :max_length]

    feature_vector = []

    # Filter data for each band and extract features
    for band, (low_freq, high_freq) in bands.items():
        filtered_data = mne.filter.filter_data(padded_data, sfreq=raw.info['sfreq'], l_freq=low_freq, h_freq=high_freq)
        mean_features = np.mean(filtered_data, axis=1)
        std_features = np.std(filtered_data, axis=1)
        ptp_features = np.ptp(filtered_data, axis=1)  # Peak-to-peak value
        # Combine features for this band
        feature_vector = np.concatenate([feature_vector, mean_features, std_features, ptp_features])

    # Add to feature list
    features.append(feature_vector)
    target.append(label)

# Convert features and target to NumPy arrays
features = np.array(features)
target = np.array(target)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.4, random_state=150)

# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Depressed']))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Depressed'], yticklabels=['Normal', 'Depressed'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()