<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>


# Deep Learning Basics with PyTorch

**Dr. Yves J. Hilpisch with GPT-5**


# Chapter 2 — Data, Features, and Representations

This Colab-ready notebook mirrors the Iris example: visualization, a scaler+logistic regression pipeline, and a 2D decision boundary.

In [None]:
# Optional: ensure packages are present (Colab usually has these)
# !pip -q install scikit-learn matplotlib numpy
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8') # plotting
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
%config InlineBackend.figure_format = 'retina'


## Load and visualize two features

In [None]:
iris = datasets.load_iris()
X = iris.data[:, [2, 3]] # petal length, petal width # inputs # inputs  # inputs
y = iris.target # targets/labels # targets/labels  # targets/labels
plt.figure(figsize = (4, 3)) # plotting
for cls, marker, label in [(0, 'o', iris.target_names[0]), (1, 's',     iris.target_names[1]), (2, '^', iris.target_names[2])]:
    idx = y == cls
    plt.scatter(X[idx, 0], X[idx, 1], marker = marker, label = label,   # plotting
    s = 25) # plotting
    plt.xlabel('petal length (cm)') # plotting
    plt.ylabel('petal width (cm)') # plotting
    plt.legend(frameon = False) # plotting
    plt.tight_layout() # plotting
    plt.show() # plotting


## Train a scaler + logistic regression pipeline

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,     random_state = 42, stratify = y)
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter = 1000))
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f'Accuracy: {acc:.3f}')
ConfusionMatrixDisplay.from_predictions(y_test, y_pred,     display_labels = iris.target_names)
plt.tight_layout() # plotting
plt.show() # plotting


## Decision boundary in 2D

In [None]:
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter = 1000))
pipe.fit(X, y)
xmin, xmax = X[:, 0].min()-0.5, X[:, 0].max()+0.5
ymin, ymax = X[:, 1].min()-0.5, X[:, 1].max()+0.5
xx, yy = np.meshgrid(np.linspace(xmin, xmax, 200), np.linspace(ymin, ymax, 200))
grid = np.c_[xx.ravel(), yy.ravel()]
zz = pipe.predict(grid).reshape(xx.shape)
plt.figure(figsize = (4, 3)) # plotting
plt.contourf(xx, yy, zz, alpha = 0.2, levels = [-0.5, 0.5, 1.5, 2.5], # plotting
cmap = 'coolwarm') # plotting
for cls, marker, label in [(0, 'o', iris.target_names[0]), (1, 's',     iris.target_names[1]), (2, '^', iris.target_names[2])]:
    idx = y == cls
    plt.scatter(X[idx, 0], X[idx, 1], marker = marker, label = label,   # plotting
    s = 25) # plotting
    plt.xlabel('petal length (cm)') # plotting
    plt.ylabel('petal width (cm)') # plotting
    plt.legend(frameon = False) # plotting
    plt.tight_layout() # plotting
    plt.show() # plotting


<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>
