In [1]:
import pandas as pd
import numpy as np
import torch 
import torch.nn as nn
import torchmetrics

In [2]:
data = pd.read_csv('data_star.csv')
data.head()

Unnamed: 0,obj_ID,alpha,delta,u,g,r,i,z,run_ID,rerun_ID,cam_col,field_ID,spec_obj_ID,class,redshift,plate,MJD,fiber_ID
0,1.842794,1.678562,-0.189957,0.730051,1.412983,0.900283,0.608321,0.358271,1.84282,0.0,-0.331467,-0.503193,-0.074397,1,1.208208,-0.074421,0.109142,1.071997
1,1.675764,-1.582285,-0.653837,-0.181271,0.3301,0.684916,0.809094,0.849514,1.675851,0.0,-0.960351,1.894562,1.95191,1,-0.004177,1.951948,1.551299,-1.233386
2,0.742588,-1.418968,0.190066,-0.056231,-0.241676,-0.274001,-0.211583,-0.139071,0.742555,0.0,0.297417,-0.421452,-0.940243,2,-0.85244,-0.94023,-0.87392,-0.796538
3,1.870643,1.727888,0.421216,0.587597,-0.186553,-0.140538,0.0177,-0.005492,1.870732,0.0,-0.960351,-0.993643,0.421197,2,-0.852325,0.421189,0.488949,0.458942
4,1.644996,1.803823,-1.075539,1.2089,0.463576,0.698517,0.914775,0.882265,1.644894,0.0,0.926301,0.682061,-0.323588,2,-0.853049,-0.323567,0.114132,-0.994771


In [3]:
X = data.drop(labels=['class'],axis=1)
y = data['class']

In [4]:
# PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=12)
X = pca.fit_transform(X)

In [5]:
# Convert data to tensor
X = torch.tensor(X).to(torch.float32).squeeze()
y = torch.tensor(y).to(torch.long)
X[:5],y[:5]

(tensor([[-2.1566,  1.8858,  0.2564, -2.0829, -0.2722,  1.0747,  1.0373, -0.5046,
           0.7159,  0.6177, -0.6187, -0.1081],
         [-2.8937,  2.6210, -0.4013,  2.4907,  1.4968, -0.7737, -0.2777,  1.2912,
          -0.0525, -0.5826, -0.0252,  0.2450],
         [ 1.3644,  0.9952,  0.9485, -0.0967,  0.0874, -0.4827, -1.4682, -0.1757,
           1.0385, -0.4812,  0.1117, -0.0340],
         [-0.8143,  2.1672, -1.2757, -2.3160,  0.4075, -0.6764,  0.7916,  0.0345,
           0.2877, -0.2648,  0.4717,  0.0988],
         [-1.2334,  2.1352,  1.4537, -1.8244, -0.3299,  0.6656,  0.0812,  1.9185,
          -0.5341, -0.6298,  0.5729, -0.1900]]),
 tensor([1, 1, 2, 2, 2]))

In [6]:
X.shape, y.shape

(torch.Size([70277, 12]), torch.Size([70277]))

In [7]:
from sklearn.model_selection import train_test_split
X_test, X_train, y_test, y_train = train_test_split(X, y, test_size=0.2, random_state=21)

In [8]:
class ModuleClassification(nn.Module):
    def __init__(self,input_fearures,output_features,hidden_units = 32):
        super().__init__()
        self.stackLayers = nn.Sequential(
            nn.Linear(input_fearures,hidden_units),
            nn.ReLU(),
            nn.Linear(hidden_units,hidden_units),
            nn.ReLU(),
            nn.Linear(hidden_units,output_features)
        )
    def forward(self,x):
        return self.stackLayers(x)

In [9]:
# Change device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [10]:
# Creat model
model = ModuleClassification(input_fearures=12,output_features=3).to(device=device)
# Create loss function and optimizer
loss_function = nn.CrossEntropyLoss()
# Create optimizer
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)

In [11]:
# Convert data to device
X_train = X_train.to(device)
X_test = X_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)
# Create trainning loop
torch.manual_seed(21)
epochs = 5000

for epoch in range(epochs):
    model.train()
    # Trainning
    y_logist = model(X_train)
    y_pred = torch.softmax(y_logist,dim=1).argmax(dim=1)
    # Calculate accuraccy
    acc = torchmetrics.functional.accuracy(y_pred,y_train)
    # Caculate loss
    loss = loss_function(y_logist,y_train)
    # Zero grad
    optimizer.zero_grad()
    # Backward
    loss.backward()
    # Update weight
    optimizer.step()
    
    # Evaluation
    model.eval()

    if epoch % 1000 == 0:
        y_logist_test = model(X_test)
        y_pred_test = torch.softmax(y_logist_test,dim=1).argmax(dim=1)
        acc_test = torchmetrics.functional.accuracy(y_pred_test,y_test)
        print(f'Epoch {epoch}|Loss: {loss.item():.4f}|Acc: {acc.item() * 100:.4f}%|Acc test: {acc_test.item()*100:.4f}%')
        

Epoch 0|Loss: 1.0962|Acc: 30.8978%|Acc test: 31.0685%
Epoch 1000|Loss: 0.5250|Acc: 82.5555%|Acc test: 82.5848%
Epoch 2000|Loss: 0.2870|Acc: 91.7260%|Acc test: 91.5032%
Epoch 3000|Loss: 0.2244|Acc: 93.0990%|Acc test: 93.2392%
Epoch 4000|Loss: 0.1974|Acc: 94.0026%|Acc test: 93.9862%


In [12]:
# Metrics
from sklearn.metrics import classification_report
y_logist_test = model(X_test)
y_pred_test = torch.softmax(y_logist_test,dim=1).argmax(dim=1)
print(classification_report(y_test.cpu(),y_pred_test.cpu()))

              precision    recall  f1-score   support

           0       0.93      0.94      0.94     23746
           1       0.96      0.91      0.94     15193
           2       0.95      0.98      0.96     17282

    accuracy                           0.94     56221
   macro avg       0.95      0.94      0.95     56221
weighted avg       0.94      0.94      0.94     56221

