In [8]:
import pandas as pd

default = pd.read_csv("../../data/Default.csv")
default['default'] = pd.get_dummies(default['default'], drop_first=True).astype(int)
default['student'] = pd.get_dummies(default['student'], drop_first=True).astype(int)
default

Unnamed: 0,default,student,balance,income
0,0,0,729.526495,44361.625074
1,0,1,817.180407,12106.134700
2,0,0,1073.549164,31767.138947
3,0,0,529.250605,35704.493935
4,0,0,785.655883,38463.495879
...,...,...,...,...
9995,0,0,711.555020,52992.378914
9996,0,0,757.962918,19660.721768
9997,0,0,845.411989,58636.156984
9998,0,0,1569.009053,36669.112365


In [9]:
from ISLP.models import ModelSpec as MS 

model = MS(default.columns.drop('default'), intercept=False)
X = model.fit_transform(default).to_numpy()
Y = default['default'].to_numpy()

In [10]:
from sklearn.model_selection import train_test_split

(X_train, 
 X_test,
 Y_train,
 Y_test) = train_test_split(X,
                            Y,
                            test_size=1/3,
                            random_state=1)

In [11]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def_clf = LogisticRegression(max_iter=1000).fit(X_train, Y_train)
Yhat_prob = def_clf.predict_proba(X_test)[:, 1]
Yhat_class = def_clf.predict(X_test)
acc = accuracy_score(Y_test, Yhat_class)

print("Accuracy:", acc)

Accuracy: 0.9763047390521895


In [12]:
from torch import nn
import torch

class Default(nn.Module):

    def __init__(self, input_size):
        super(Default, self).__init__()
        self.flatten = nn.Flatten()
        self.sequential = nn.Sequential(
            nn.Linear(input_size, 10),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(10, 2))

    def forward(self, x):
        x = self.flatten(x)
        return self.sequential(x)


In [13]:
from torchinfo import summary

def_model = Default(X.shape[1])
summary(def_model, 
        input_size=X_train.shape,
        col_names=['input_size',
                   'output_size',
                   'num_params'])


Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
Default                                  [6666, 3]                 [6666, 2]                 --
├─Flatten: 1-1                           [6666, 3]                 [6666, 3]                 --
├─Sequential: 1-2                        [6666, 3]                 [6666, 2]                 --
│    └─Linear: 2-1                       [6666, 3]                 [6666, 10]                40
│    └─ReLU: 2-2                         [6666, 10]                [6666, 10]                --
│    └─Dropout: 2-3                      [6666, 10]                [6666, 10]                --
│    └─Linear: 2-4                       [6666, 10]                [6666, 2]                 22
Total params: 62
Trainable params: 62
Non-trainable params: 0
Total mult-adds (M): 0.41
Input size (MB): 0.08
Forward/backward pass size (MB): 0.64
Params size (MB): 0.00
Estimated Total Size (MB): 0.72

In [16]:
from torch.utils.data import TensorDataset
from ISLP.torch import SimpleDataModule, SimpleModule, ErrorTracker
from torchmetrics import MeanAbsoluteError
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CSVLogger

X_train_t = torch.tensor(X_train.astype(np.float32))
Y_train_t = torch.tensor(Y_train.astype(np.int64))
def_train = TensorDataset(X_train_t, Y_train_t)
X_test_t = torch.tensor(X_test.astype(np.float32))
Y_test_t = torch.tensor(Y_test.astype(np.int64))
def_test = TensorDataset(X_test_t, Y_test_t)

def_dm = SimpleDataModule(def_train,
                          def_test,
                          batch_size=32,
                          num_workers=4,
                          validation=def_test)

def_module = SimpleModule.classification(def_model,
                                        num_classes=2)
                           
def_trainer = Trainer(max_epochs=50,
                      log_every_n_steps=5,
                      callbacks=[ErrorTracker()])

def_trainer.fit(def_module, datamodule=def_dm)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params | Mode 
---------------------------------------------------
0 | model | Default          | 62     | eval 
1 | loss  | CrossEntropyLoss | 0      | train
---------------------------------------------------
62        Trainable params
0         Non-trainable params
62        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
7         Modules in eval mode


Epoch 49: 100%|██████████| 209/209 [00:01<00:00, 117.25it/s, v_num=4]       

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 209/209 [00:01<00:00, 116.93it/s, v_num=4]


In [17]:
def_trainer.test(def_module, datamodule=def_dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 105/105 [00:00<00:00, 409.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy          0.970005989074707
        test_loss           0.13561557233333588
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.13561557233333588, 'test_accuracy': 0.970005989074707}]

The results show that the PyTorch neural network achieved a **test loss of 0.136** and a **test accuracy of \~97.0%**, indicating that it predicts the target quite reliably with low error. The logistic regression model achieved a slightly higher **accuracy of \~97.6%**, suggesting that for this dataset, a simpler linear model performs comparably—or even slightly better—than the neural network, despite the latter’s greater flexibility. Overall, both models perform very well, with logistic regression having a small edge in accuracy.