# Episode 123. Pytorch for Neural Networks (ver. 2024)

## Check Google CoLab
Ensures that Google CoLab is running and maps Google Drive if needed.


In [None]:
import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
try:
    import google.colab

    COLAB = True
    print("Note: using Google CoLab")
except:
    print("Note: not using Google CoLab")
    COLAB = False

Note: using Google CoLab


## Basic Pytorch Operations

In [None]:
import torch

# Make use of a GPU or MPS (Apple) if one is available.
has_mps = torch.backends.mps.is_built()
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


basic linear algebra examples: multiplication of a row and column matrix.


In [None]:
# Create a Constant 2x1 matrix
matrix1 = torch.tensor([[3.0, 3.0]], device=device)

# Create another Constant 2x1 matrix
matrix2 = torch.tensor([[2.0], [2.0]], device=device)

# matrix multiplication.
product = torch.mm(matrix1, matrix2)

print(product)
print(float(product))

tensor([[12.]], device='cuda:0')
12.0


Subtract a constant from a variable.


In [None]:
x = torch.tensor([1.0, 2.0], device=device)
a = torch.tensor([3.0, 3.0], device=device)

# Add an op to subtract 'a' from 'x'.
sub = torch.subtract(x, a)
print(sub)
# Use cpu() in case tensor was on GPU.
print(sub.cpu().numpy())
# ==> [-2. -1.]

tensor([-2., -1.], device='cuda:0')
[-2. -1.]


change in value by calling the assign function.

In [None]:
x[0] = 4.0
x[1] = 6.0

subtraction with this new value.


In [None]:
sub = torch.subtract(x, a)
print(sub)
print(sub.cpu().numpy())

tensor([1., 3.], device='cuda:0')
[1. 3.]


# Feature Vector

Encoding a Feature Vector for PyTorch Deep Learning

Neural networks require numeric input. This numeric form is called a feature vector. Each input neurons receive one feature (or column) from this vector. Each row of training data typically becomes one vector. See how to encode the following tabular data into a feature vector.

In [None]:
import pandas as pd

pd.set_option("display.max_columns", 7)
pd.set_option("display.max_rows", 5)

df = pd.read_csv(
    "https://raw.githubusercontent.com/yunssamfinance/DeepLearningInFinance/main/income_evaluation4.csv",
    na_values=["NA", "?"],
)

pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 5)

display(df)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K
32560,52,Self-emp-inc,287927,HS-grad,9,Married-civ-spouse,Exec-managerial,Wife,White,Female,15024,0,40,United-States,>50K


convert the race code into dummy variables.

In [None]:
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 5)

dummies = pd.get_dummies(df["race"], prefix="race")
print(dummies.shape)

pd.set_option("display.max_columns", 5)
pd.set_option("display.max_rows", 10)

display(dummies)

(32561, 5)


Unnamed: 0,race_ Amer-Indian-Eskimo,race_ Asian-Pac-Islander,race_ Black,race_ Other,race_ White
0,False,False,False,False,True
1,False,False,False,False,True
2,False,False,False,False,True
3,False,False,True,False,False
4,False,False,True,False,False
...,...,...,...,...,...
32556,False,False,False,False,True
32557,False,False,False,False,True
32558,False,False,False,False,True
32559,False,False,False,False,True


Next, we must merge these dummies back into the main data frame. We also drop the original "race" field, as the dummies now represent it.

In [None]:
pd.set_option("display.max_columns", 7)
pd.set_option("display.max_rows", 5)

df = pd.concat([df, dummies], axis=1)
df.drop("race", axis=1, inplace=True)

pd.set_option("display.max_columns", 20)
pd.set_option("display.max_rows", 10)

display(df)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,sex,capital-gain,capital-loss,hours-per-week,native-country,income,race_ Amer-Indian-Eskimo,race_ Asian-Pac-Islander,race_ Black,race_ Other,race_ White
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,Male,2174,0,40,United-States,<=50K,False,False,False,False,True
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,Male,0,0,13,United-States,<=50K,False,False,False,False,True
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,Male,0,0,40,United-States,<=50K,False,False,False,False,True
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Male,0,0,40,United-States,<=50K,False,False,True,False,False
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Female,0,0,40,Cuba,<=50K,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,Female,0,0,38,United-States,<=50K,False,False,False,False,True
32557,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,Male,0,0,40,United-States,>50K,False,False,False,False,True
32558,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,Female,0,0,40,United-States,<=50K,False,False,False,False,True
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,Male,0,0,20,United-States,<=50K,False,False,False,False,True


fill in missing age values.

In [None]:
med = df["age"].median()
df["age"] = df["age"].fillna(med)

In [None]:
# Generate X and Y for a Classification Neural Network
from sklearn import preprocessing

# Convert to numpy - Classification
x_columns = df.columns.drop("workclass").drop("fnlwgt").drop("education").drop("marital-status").drop("occupation").drop("relationship").drop("sex").drop("native-country")
x = df[x_columns].values
le = preprocessing.LabelEncoder()
y = le.fit_transform(df["income"])
highincome = le.classes_
y = dummies.values

In [None]:
print(x)
print(y)

[[39 13 2174 ... False False True]
 [50 13 0 ... False False True]
 [38 9 0 ... False False True]
 ...
 [58 9 0 ... False False True]
 [22 9 0 ... False False True]
 [52 9 15024 ... False False True]]
[[False False False False  True]
 [False False False False  True]
 [False False False False  True]
 ...
 [False False False False  True]
 [False False False False  True]
 [False False False False  True]]


## Neural Network Example 1: Simple PyTorch Regression - Boston Housing Prices


### Import libraries

In [None]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.autograd import Variable

### Load Data

In [None]:
# Read the Boston Housing dataset.
df = pd.read_csv(
    "https://raw.githubusercontent.com/yunssamfinance/DeepLearningInFinance/main/boston.csv", na_values=["NA", "?"]
)

### Convert Pandas to Numpy

In [None]:
# Pandas to Numpy
x = df[
    [
        "CRIM",
        "ZN",
        "INDUS",
        "CHAS",
        "NOX",
        "RM",
        "AGE",
        "DIS",
        "RAD",
        "TAX",
        "PTRATIO",
        "B",
        "LSTAT"
    ]
].values
y = df["MEDV"].values  # regression

### Convert Numpy to Pytorch

In [None]:
# Numpy to PyTorch
x = torch.tensor(x, device=device, dtype=torch.float32)
y = torch.tensor(y, device=device, dtype=torch.float32)

### Create Neural Network

In [None]:
# Create the neural network
model = nn.Sequential(
    nn.Linear(x.shape[1], 50),
    nn.ReLU(),
    nn.Linear(50, 25),
    nn.ReLU(),
    nn.Linear(25, 1)
)

# PyTorch 2.0 Model Compile (improved performance), but does not work as well on MPS
#model = torch.compile(model,backend="aot_eager").to(device)
model = model.to(device)

# Define the loss function for regression
loss_fn = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### Train Neural Network

In [None]:
# Train for 1000 epochs.
for epoch in range(1000):
    optimizer.zero_grad()
    out = model(x).flatten()
    loss = loss_fn(out, y)
    loss.backward()
    optimizer.step()

    # Display status every 100 epochs.
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, loss: {loss.item()}")

Epoch 0, loss: 1962.1619873046875
Epoch 100, loss: 51.51643753051758
Epoch 200, loss: 19.79680633544922
Epoch 300, loss: 15.152705192565918
Epoch 400, loss: 13.691800117492676
Epoch 500, loss: 12.826031684875488
Epoch 600, loss: 12.016812324523926
Epoch 700, loss: 11.210287094116211
Epoch 800, loss: 10.617677688598633
Epoch 900, loss: 10.637430191040039


In [None]:
# Predic Prices using trained model
pred = model(x)
print(f"Shape: {pred.shape}")
print(pred[0:10])

Shape: torch.Size([506, 1])
tensor([[30.1455],
        [21.2389],
        [31.6443],
        [36.0304],
        [33.0872],
        [28.4399],
        [20.9794],
        [18.8234],
        [16.5516],
        [18.9095]], device='cuda:0', grad_fn=<SliceBackward0>)


We would like to see how good these predictions are. We know the correct MEDV for each house so we can measure how close the neural network was. We will first see how we calculate RMSE with standard Sklearn metrics. To utilize Sklearn we must bring the predictions back to the CPU and deatch them from the neural network graph. The following code accomplishes this with **cpu().detach()**.


In [None]:
from sklearn import metrics

# Measure RMSE error.  RMSE is common for regression.
score = np.sqrt(metrics.mean_squared_error(pred.cpu().detach(), y.cpu().detach()))
print(f"Final score (RMSE): {score}")

Final score (RMSE): 3.0863842964172363


We can accomplish the same task entirely within PyTorch with less code. It is important to know how to perform these calculations both with PyTorch and Scikit-learn.


In [None]:
score = torch.sqrt(torch.nn.functional.mse_loss(pred.flatten(), y))
print(f"Final score (RMSE): {score}")

Final score (RMSE): 3.0863842964172363


## Neural Network Example 2 - Simple TensorFlow Classification: Loan Default

In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing

In [None]:
# Load Data
df = pd.read_csv(
    "https://raw.githubusercontent.com/yunssamfinance/DeepLearningInFinance/main/Default_Fin.csv", na_values=["NA", "?"]
)

In [None]:
# Pandas Dataframe to Numpy
le = preprocessing.LabelEncoder()

x = df[["Employed", "Bank Balance", "Annual Salary"]].values
y = le.fit_transform(df["Defaulted?"])
defaulted = le.classes_

In [None]:
# Numpy to PyTorch
x = torch.tensor(x, device=device, dtype=torch.float32)
y = torch.tensor(y, device=device, dtype=torch.long)

In [None]:
# Create the neural network
model = nn.Sequential(
    nn.Linear(x.shape[1], 50),
    nn.ReLU(),
    nn.Linear(50, 25),
    nn.ReLU(),
    nn.Linear(25, len(defaulted)),
    nn.LogSoftmax(dim=1), # Implied by use of CrossEntropyLoss
)

# PyTorch 2.0 Model Compile (improved performance), but does not work as well on MPS
#model = torch.compile(model,backend="aot_eager").to(device)
model = model.to(device)

criterion = nn.CrossEntropyLoss()  # cross entropy loss

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Train network
model.train()
for epoch in range(1000):
    optimizer.zero_grad()
    out = model(x)
    # Note: CrossEntropyLoss combines nn.LogSoftmax() and nn.NLLLoss() so don't use Softmax in the model
    loss = criterion(out, y)
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, loss: {loss.item()}")

Epoch 0, loss: 577.6837768554688
Epoch 100, loss: 1280.04345703125
Epoch 200, loss: 13.427490234375
Epoch 300, loss: 189.4287567138672
Epoch 400, loss: 0.17203228175640106
Epoch 500, loss: 0.15021558105945587
Epoch 600, loss: 0.14691606163978577
Epoch 700, loss: 0.14621882140636444
Epoch 800, loss: 0.1460670530796051
Epoch 900, loss: 0.14603546261787415


In [None]:
print(defaulted)

[0 1]


Now that you have a neural network trained, we would like to be able to use it. The following code makes use of our neural network. We will generate predictions. Notice that three values come back for each of the borrower. There were two types of default (0 for not defaulted, 1 for defaulted). We call the **eval** function to inform PyTorch that we are no longer training and wish to evaluate.

In [None]:
model.eval()
pred = model(x)
print(f"Borrower Characteristics: {pred.shape}")
print(pred[0:10])

Borrower Characteristics: torch.Size([10000, 2])
tensor([[-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944],
        [-0.0341, -3.3944]], device='cuda:0', grad_fn=<SliceBackward0>)


If you would like to turn of scientific notation, the following line can be used:


In [None]:
np.set_printoptions(suppress=True)

Now we see these values rounded up.


In [None]:
print(pred[0:10].cpu().detach().numpy())

[[-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]
 [-0.03413714 -3.3943896 ]]


Usually, the program considers the column with the highest prediction to be the prediction of the neural network. It is easy to convert the predictions to the expected default status. The argmax function finds the index of the maximum prediction for each row.


In [None]:
_, predict_classes = torch.max(pred, 1)
print(f"Predictions: {predict_classes}")
print(f"Expected: {y}")

Predictions: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
Expected: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


Of course, it is straightforward to turn these indexes back into iborrower status. We use the species list that we created earlier.


In [None]:
print(defaulted[predict_classes[1:10].cpu().detach()])

[0 0 0 0 0 0 0 0 0]


Accuracy might be a more easily understood error metric. It is essentially a test score. For all of the default predictions, what percent were correct? The downside is it does not consider how confident the neural network was in each prediction.


In [None]:
from sklearn.metrics import accuracy_score

correct = accuracy_score(y.cpu().detach(), predict_classes.cpu().detach())
print(f"Accuracy: {correct}")

Accuracy: 0.9669


The code below performs two ad hoc predictions. The first prediction is a single borrower, and the second predicts two borrowers. Notice that the **argmax** in the second prediction requires **axis=1**? Since we have a 2D array now, we must specify which axis to take the **argmax** over. The value **axis=1** specifies we want the max column index for each row.


In [None]:
sample_borrower = torch.tensor([[1.0, 9000.0, 500000.0]], device=device)
pred = model(sample_borrower)
print(pred)
_, predict_classes = torch.max(pred, 1)
print(f"Predict that {sample_borrower} is: {defaulted[predict_classes]}")

tensor([[-2.5479e-03, -5.9737e+00]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)
Predict that tensor([[1.0000e+00, 9.0000e+03, 5.0000e+05]], device='cuda:0') is: 0


You can also predict two sample borrowers.


In [None]:
sample_borrower = torch.tensor(
    [[1.0, 9000.0, 500000.0], [0.0, 15000.0, 700000.0]], device=device
)
pred = model(sample_borrower).to(device)
print(pred)
_, predict_classes = torch.max(pred, 1)
print(f"Predict that these two borrowers {sample_borrower} ")
print(f"are: {defaulted[predict_classes.cpu().detach()]}")

tensor([[-2.5479e-03, -5.9737e+00],
        [-2.5444e-03, -5.9751e+00]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)
Predict that these two borrowers tensor([[1.0000e+00, 9.0000e+03, 5.0000e+05],
        [0.0000e+00, 1.5000e+04, 7.0000e+05]], device='cuda:0') 
are: [0 0]


In [None]:
print(torch.exp(pred))

tensor([[0.9975, 0.0025],
        [0.9975, 0.0025]], device='cuda:0', grad_fn=<ExpBackward0>)
