Converting CSV to PyTorch tensor: https://www.codegenes.net/blog/pytorch-dataset-from-csv/

In [161]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [162]:
path = r'/workspaces/ACT-coursework-2/star_classification.csv'

In [163]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [164]:
class SDSSDataset(Dataset):
    def __init__(self, path):
        self.data = pd.read_csv(path)
        self.data['class'] = LabelEncoder().fit_transform(self.data['class'])

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        row = self.data.iloc[index]
        features = torch.tensor(row[:'class'].values, dtype=torch.float32)
        target = torch.tensor(row['class'], dtype = torch.long)
        return features, target

In [165]:
batch_size = 128
num_epochs = 5
learning_rate = 0.01

In [173]:
dataset = SDSSDataset(path)


In [167]:
train_size = int(0.7*len(dataset))
test_size = len(dataset)- train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_dataloader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


In [168]:
class SDSSNN(nn.Module):
    def __init__(self):
        super(SDSSNN, self).__init__()
        self.fc1 = nn.Linear(14, 5)
        self.fc2 = nn.Linear(5, 3)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [169]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(SDSSNN().parameters(), lr=learning_rate)

In [170]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, 1)
    return torch.sum(preds == labels).item() / len(labels)


for epoch in range(num_epochs):

    running_loss = 0.0
    running_acc = 0.0
    for i, data in enumerate(train_dataloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = SDSSNN()(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_acc += accuracy(outputs, labels)
        if i % 100 == 99:   
            print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}, Accuracy: {running_acc /100:.4f}')
            running_loss = 0.0
print('Done')

[1, 100] loss: 278704279202174080.000, Accuracy: 0.3264
[1, 200] loss: 320737265895646144.000, Accuracy: 0.6724
[1, 300] loss: 310267668762879616.000, Accuracy: 1.0016
[1, 400] loss: 278597592988526848.000, Accuracy: 1.3355
[1, 500] loss: 300455289512022528.000, Accuracy: 1.6629
[2, 100] loss: 291448262844984832.000, Accuracy: 0.3233
[2, 200] loss: 291633187703131968.000, Accuracy: 0.6346
[2, 300] loss: 271966607201271808.000, Accuracy: 0.9511
[2, 400] loss: 265613080334916640.000, Accuracy: 1.2671
[2, 500] loss: 347592897867483712.000, Accuracy: 1.5781
[3, 100] loss: 297703261938510720.000, Accuracy: 0.3166
[3, 200] loss: 280889281356685632.000, Accuracy: 0.6647
[3, 300] loss: 304214762529488896.000, Accuracy: 1.0063
[3, 400] loss: 308857430456360000.000, Accuracy: 1.3280
[3, 500] loss: 261751699979234176.000, Accuracy: 1.6684
[4, 100] loss: 259265578910393312.000, Accuracy: 0.3361
[4, 200] loss: 276163221111187712.000, Accuracy: 0.6630
[4, 300] loss: 294863087627298432.000, Accuracy: