In [63]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
import scipy.io
import numpy as np
import pandas as pd

In [80]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2,hidden_size3,output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(hidden_size3, output_size)
        self.relu4 = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
        x = self.relu4(x)
        return x

In [65]:
from google.colab import drive
import pandas as pd
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Lung Dataset

In [66]:
mat = scipy.io.loadmat("/content/drive/My Drive/datasets/lung.mat")

con_list = [[element for element in upperElement] for upperElement in mat['X']]

labels = mat['Y']
labels = np.array(labels.flatten())

df = pd.DataFrame(con_list)

In [67]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range=(0, 1))
sc.fit(df)
X = sc.transform(df)
type(X)

numpy.ndarray

In [68]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

In [69]:
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import DataLoader, TensorDataset


X_train_tensor = torch.FloatTensor(X_train)
X_test_tensor = torch.FloatTensor(X_test)


# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False, categories='auto')
y_train_onehot = torch.FloatTensor(encoder.fit_transform(y_train.reshape(-1, 1)))
y_test_onehot = torch.FloatTensor(encoder.transform(y_test.reshape(-1, 1)))

# Create DataLoader for training and testing sets
train_dataset = TensorDataset(X_train_tensor, y_train_onehot)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_onehot)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)




In [70]:
# Set hyperparameters
input_size = X.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size3 = 64
output_size = 5

In [71]:
model = MLP(input_size, hidden_size1,hidden_size2, hidden_size3,output_size)

In [72]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [73]:
num_epochs =100
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, labels.argmax(dim=1).long())
      loss.backward()
      optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 1.6094
Epoch [20/100], Loss: 1.6094
Epoch [30/100], Loss: 1.6094
Epoch [40/100], Loss: 1.6094
Epoch [50/100], Loss: 1.6094
Epoch [60/100], Loss: 1.6094
Epoch [70/100], Loss: 1.6094
Epoch [80/100], Loss: 1.6094
Epoch [90/100], Loss: 1.6094
Epoch [100/100], Loss: 1.6094


In [74]:
from sklearn.metrics import f1_score

model.eval()

all_predictions = []
all_true_labels = []

def get_cluster(arr):
  v=[]
  for a in arr:
    m, i = max((v, i) for i, v in enumerate(a))
    if m==0:
      v.append(0)
    else:
      v.append(i+1)
  return v

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        all_predictions.extend(get_cluster(outputs.numpy()))
        all_true_labels.extend(get_cluster(labels.numpy()))

# Convert lists to NumPy arrays
predicted_labels = np.array(all_predictions)
true_labels = np.array(all_true_labels)

# Calculate F1-score
f1 = f1_score(true_labels, predicted_labels,  average='macro')
print(f'f1-score: {f1}')

f1-score: 0.0


balance lung dataset

In [81]:
mat = scipy.io.loadmat("/content/drive/My Drive/datasets/lung.mat")

con_list = [[element for element in upperElement] for upperElement in mat['X']]


df = pd.DataFrame(con_list)

labels = mat['Y']
labels = np.array(labels.flatten())

df['label'] = labels
df['label'].value_counts()

1    139
3     21
4     20
2     17
5      6
Name: label, dtype: int64

In [82]:
import pandas as pd
from sklearn.utils import resample

# Assuming your DataFrame is named 'df' and has a column 'cluster'

df_1 = df[df['label'] == 1]
df_2 = df[df['label'] == 2]
df_3 = df[df['label'] == 3]
df_4 = df[df['label'] == 4]
df_5 = df[df['label'] == 5]




oversampled_2 = resample(df_2, replace=True, n_samples=139, random_state=42)
oversampled_3 = resample(df_3, replace=True, n_samples=139, random_state=42)
oversampled_4 = resample(df_4, replace=True, n_samples=139, random_state=42)
oversampled_5 = resample(df_5, replace=True, n_samples=139, random_state=42)
balanced_df = pd.concat([df_1, oversampled_2,oversampled_3,oversampled_4,oversampled_5])


balanced_df['label'].value_counts()

1    139
2    139
3    139
4    139
5    139
Name: label, dtype: int64

In [83]:
y = np.array(balanced_df.label)

from sklearn.preprocessing import MinMaxScaler

df = balanced_df.drop('label', axis=1)

sc = MinMaxScaler(feature_range=(0, 1))
sc.fit(df)
X = sc.transform(df)
type(X)

numpy.ndarray

In [84]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import DataLoader, TensorDataset

labels = y.flatten()
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

X_train_tensor = torch.FloatTensor(X_train)
X_test_tensor = torch.FloatTensor(X_test)


# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False, categories='auto')
y_train_onehot = torch.FloatTensor(encoder.fit_transform(y_train.reshape(-1, 1)))
y_test_onehot = torch.FloatTensor(encoder.transform(y_test.reshape(-1, 1)))

# Create DataLoader for training and testing sets
train_dataset = TensorDataset(X_train_tensor, y_train_onehot)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_onehot)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)




In [85]:
# Set hyperparameters
# Set hyperparameters
input_size = X.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size3 = 64
output_size = 5

model = MLP(input_size, hidden_size1,hidden_size2, hidden_size3,output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [86]:
num_epochs =100
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, labels.argmax(dim=1).long())
      loss.backward()
      optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 1.6094
Epoch [20/100], Loss: 1.6094
Epoch [30/100], Loss: 1.6094
Epoch [40/100], Loss: 1.6094
Epoch [50/100], Loss: 1.6094
Epoch [60/100], Loss: 1.6094
Epoch [70/100], Loss: 1.6094
Epoch [80/100], Loss: 1.6094
Epoch [90/100], Loss: 1.6094
Epoch [100/100], Loss: 1.6094


In [87]:
from sklearn.metrics import f1_score

model.eval()

all_predictions = []
all_true_labels = []

def get_cluster(arr):
  v=[]
  for a in arr:
    m, i = max((v, i) for i, v in enumerate(a))
    if m==0:
      v.append(0)
    else:
      v.append(i+1)
  return v

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        all_predictions.extend(get_cluster(outputs.numpy()))
        all_true_labels.extend(get_cluster(labels.numpy()))

# Convert lists to NumPy arrays
predicted_labels = np.array(all_predictions)
true_labels = np.array(all_true_labels)

# Calculate F1-score
f1 = f1_score(true_labels, predicted_labels,  average='macro')
print(f'f1-score: {f1}')

f1-score: 0.0


## Ids2 Dataset

In [88]:
mat = scipy.io.loadmat("/content/drive/My Drive/datasets/ids2.mat")

con_list = [[element for element in upperElement] for upperElement in mat['data']]

labels = mat['label']
labels = np.array(labels.flatten())

columns = ['data_x', 'data_y']
df = pd.DataFrame(con_list, columns=columns)

In [89]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range=(0, 1))
sc.fit(df)
X = sc.transform(df)
type(X)

numpy.ndarray

In [90]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

In [91]:
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import DataLoader, TensorDataset

# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
X_test_tensor = torch.FloatTensor(X_test)

# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False, categories='auto')
y_train_onehot = torch.FloatTensor(encoder.fit_transform(y_train.reshape(-1, 1)))
y_test_onehot = torch.FloatTensor(encoder.transform(y_test.reshape(-1, 1)))

# Create DataLoader for training and testing sets
train_dataset = TensorDataset(X_train_tensor, y_train_onehot)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_onehot)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)



In [92]:
# Set hyperparameters
input_size = X.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size2 = 64
output_size = 5

In [93]:
model = MLP(input_size, hidden_size1,hidden_size2, hidden_size3,output_size)

In [97]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [98]:
num_epochs =100
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, labels.argmax(dim=1).long())
      loss.backward()
      optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 1.2071
Epoch [20/100], Loss: 0.6035
Epoch [30/100], Loss: 0.4024
Epoch [40/100], Loss: 0.2012
Epoch [50/100], Loss: 0.4024
Epoch [60/100], Loss: 0.0000
Epoch [70/100], Loss: 0.2012
Epoch [80/100], Loss: 0.2012
Epoch [90/100], Loss: 0.0000
Epoch [100/100], Loss: 0.2012


In [99]:
from sklearn.metrics import f1_score

model.eval()

all_predictions = []
all_true_labels = []

def get_cluster(arr):
  v=[]
  for a in arr:
    m, i = max((v, i) for i, v in enumerate(a))
    if m==0:
      v.append(0)
    else:
      v.append(i+1)
  return v

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        all_predictions.extend(get_cluster(outputs.numpy()))
        all_true_labels.extend(get_cluster(labels.numpy()))

# Convert lists to NumPy arrays
predicted_labels = np.array(all_predictions)
true_labels = np.array(all_true_labels)

# Calculate F1-score
f1 = f1_score(true_labels, predicted_labels,  average='macro')
print(f'f1-score: {f1}')

f1-score: 0.3333333333333333


balance ids2 dataset

In [100]:
mat = scipy.io.loadmat("/content/drive/My Drive/datasets/ids2.mat")

con_list = [[element for element in upperElement] for upperElement in mat['data']]

labels = mat['label']
labels = np.array(labels.flatten())

columns = ['data_x', 'data_y']
df = pd.DataFrame(con_list, columns=columns)
df['label'] = labels
df['label'].value_counts()

1    2000
5     400
4     400
3     200
2     200
Name: label, dtype: int64

In [101]:
import pandas as pd
from sklearn.utils import resample

# Assuming your DataFrame is named 'df' and has a column 'cluster'

df_1 = df[df['label'] == 1]
df_2 = df[df['label'] == 2]
df_3 = df[df['label'] == 3]
df_4 = df[df['label'] == 4]
df_5 = df[df['label'] == 5]




oversampled_2 = resample(df_2, replace=True, n_samples=2000, random_state=42)
oversampled_3 = resample(df_3, replace=True, n_samples=2000, random_state=42)
oversampled_4 = resample(df_4, replace=True, n_samples=2000, random_state=42)
oversampled_5 = resample(df_5, replace=True, n_samples=2000, random_state=42)
balanced_df = pd.concat([df_1, oversampled_2,oversampled_3,oversampled_4,oversampled_5])

In [102]:
balanced_df['label'].value_counts()

1    2000
2    2000
3    2000
4    2000
5    2000
Name: label, dtype: int64

In [103]:
y = np.array(balanced_df.label)

from sklearn.preprocessing import MinMaxScaler

df = balanced_df.drop('label', axis=1)

sc = MinMaxScaler(feature_range=(0, 1))
sc.fit(df)
X = sc.transform(df)
type(X)

numpy.ndarray

In [104]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import DataLoader, TensorDataset

labels = y.flatten()
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

X_train_tensor = torch.FloatTensor(X_train)
X_test_tensor = torch.FloatTensor(X_test)


# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False, categories='auto')
y_train_onehot = torch.FloatTensor(encoder.fit_transform(y_train.reshape(-1, 1)))
y_test_onehot = torch.FloatTensor(encoder.transform(y_test.reshape(-1, 1)))

# Create DataLoader for training and testing sets
train_dataset = TensorDataset(X_train_tensor, y_train_onehot)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_onehot)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)




In [106]:
# Set hyperparameters
input_size = X.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size2 = 64
output_size = 5

model = MLP(input_size, hidden_size1,hidden_size2, hidden_size3,output_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs =100
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, labels.argmax(dim=1).long())
      loss.backward()
      optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.0044
Epoch [20/100], Loss: 0.0001
Epoch [30/100], Loss: 0.0000
Epoch [40/100], Loss: 0.0021
Epoch [50/100], Loss: 0.0000
Epoch [60/100], Loss: 0.0000
Epoch [70/100], Loss: 0.0000
Epoch [80/100], Loss: 0.0001
Epoch [90/100], Loss: 0.0053
Epoch [100/100], Loss: 0.0000


In [107]:
from sklearn.metrics import f1_score

model.eval()

all_predictions = []
all_true_labels = []

def get_cluster(arr):
  v=[]
  for a in arr:
    m, i = max((v, i) for i, v in enumerate(a))
    if m==0:
      v.append(0)
    else:
      v.append(i+1)
  return v

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        all_predictions.extend(get_cluster(outputs.numpy()))
        all_true_labels.extend(get_cluster(labels.numpy()))

# Convert lists to NumPy arrays
predicted_labels = np.array(all_predictions)
true_labels = np.array(all_true_labels)

# Calculate F1-score
f1 = f1_score(true_labels, predicted_labels,  average='macro')
print(f'f1-score: {f1}')

f1-score: 0.9990033882317899
