<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>


# Deep Learning Basics with PyTorch

**Dr. Yves J. Hilpisch with GPT-5**


# Chapter 4 — The Limits of Classical ML
This Colab-ready notebook mirrors the figures and small experiments from the chapter: distance concentration, polynomial feature growth, kernel scaling, complexity vs depth, learning curves, and residual patterns when the model is mis-specified.

In [None]:
# Optional: Colab usually has these
# !pip -q install numpy matplotlib scikit-learn
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8')  # plotting
%config InlineBackend.figure_format = 'retina'


## Distance concentration

In [None]:
rng = np.random.default_rng(0)
dims = np.array([2, 5, 10, 20, 50, 100])
n_points = 500
rel = []

for d in dims:
    # Sample n_points points uniformly in [0,1]^d
    X = rng.random((n_points, int(d)))
    # Subsample indices to estimate min/max distances
    idx = rng.choice(n_points, size=60, replace=False)
    mins, maxs = [], []
    for i in idx:
        diffs = X - X[i]
        dists = np.sqrt(np.sum(diffs * diffs, axis=1))
        dists = dists[dists > 0]
        mins.append(dists.min())
        maxs.append(dists.max())
        # Relative contrast for this dimension d
        rel.append((np.mean(maxs) - np.mean(mins)) / np.mean(mins))

        # Plot once after accumulating all contrasts
        plt.figure(figsize=(5, 3.2))
        plt.plot(dims, rel, marker='o')
        plt.xlabel('dimension d')
        plt.ylabel('relative contrast')
        plt.tight_layout()
        plt.show()


## Polynomial feature growth

In [None]:
import math
def poly_count(d, K):
    return sum(math.comb(d+k-1, k) for k in range(1, K+1))
    d = 20
    Ks = np.arange(1, 8)
    counts = [poly_count(d, int(K)) for K in Ks]
    plt.figure(figsize = (5, 3.2))  # plotting
    plt.plot(Ks, counts, marker = 'o')  # plotting
    plt.yscale('log')  # plotting
    plt.xlabel('degree K')  # plotting
    plt.ylabel('  # features (log)')
    plt.tight_layout()  # plotting
    plt.show()  # plotting


## Kernel scaling (n^2 memory)

In [None]:
n = np.array([200, 500, 1000, 2000, 5000, 10000])
mem_gb = (n.astype(float)**2 * 8) / (1024**3)
plt.figure(figsize = (5, 3.2))  # plotting
plt.plot(n, mem_gb, marker = 'o')  # plotting
plt.xlabel('samples n')  # plotting
plt.ylabel('Kernel matrix memory (GB)')  # plotting
plt.tight_layout()  # plotting
plt.show()  # plotting


## Complexity vs depth (Decision Tree)

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

X, y = make_moons(n_samples=600, noise=0.25, random_state=0)
X_tr, X_te, y_tr, y_te = train_test_split(
X, y, test_size=0.25, random_state=42, stratify=y
)

depths = range(1, 16)
tr, te = [], []
for d in depths:
    clf = DecisionTreeClassifier(max_depth=d, random_state=0).fit(X_tr, y_tr)
    tr.append(clf.score(X_tr, y_tr))
    te.append(clf.score(X_te, y_te))

    plt.figure(figsize=(5.2, 3.2))
    plt.plot(depths, tr, marker='o', label='train')
    plt.plot(depths, te, marker='o', label='test')
    plt.legend(frameon=False)
    plt.xlabel('max_depth')
    plt.ylabel('accuracy')
    plt.tight_layout()
    plt.show()


## Learning curve (Logistic Regression)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import learning_curve
X, y = make_moons(n_samples = 2000, noise = 0.25, random_state = 0)
model = make_pipeline(StandardScaler(), LogisticRegression(max_iter = 2000))
sizes, tr, te = learning_curve(model, X, y, cv = 5, train_sizes = np.linspace(0.1, 1.0,     8))
plt.figure(figsize = (5.2, 3.2))  # plotting
plt.plot(sizes, tr.mean(axis = 1), marker = 'o', label = 'train')  # plotting
plt.plot(sizes, te.mean(axis = 1), marker = 'o', label = 'test')  # plotting
plt.legend(frameon = False)  # plotting
plt.xlabel('training size')  # plotting
plt.ylabel('accuracy')  # plotting
plt.tight_layout()  # plotting
plt.show()  # plotting


## Residuals under mis-specification

In [None]:
rng = np.random.default_rng(0)  # RNG setup
x = np.linspace(-3, 3, 80)
y_true = 0.6*x**2 - 0.5*x + 0.3
y = y_true + rng.normal(0, 0.5, size = x.shape)  # targets/labels
A = np.vstack([x, np.ones_like(x)]).T
a, b = np.linalg.lstsq(A, y, rcond = None)[0]
resid = y - (a*x + b)
plt.figure(figsize = (5.2, 3.2))  # plotting
plt.scatter(x, resid, s = 18)  # plotting
plt.axhline(0, color = 'k', lw = 1)  # plotting
plt.xlabel('x')  # plotting
plt.ylabel('residual')  # plotting
plt.tight_layout()  # plotting
plt.show()  # plotting


<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>
