Before you turn this problem in, make sure everything runs as expected. First, **restart the kernel** (in the menubar, select Kernel$\rightarrow$Restart) and then **run all cells** (in the menubar, select Cell$\rightarrow$Run All).

Make sure you fill in any place that says `YOUR CODE HERE` or "YOUR ANSWER HERE", as well as your name and collaborators below:

In [None]:
NAME = ""
COLLABORATORS = ""

---

## Recall Linear Least square regression

$\newcommand{\bfm}{\mathbf{m}}$
$\newcommand{\bfx}{\mathbf{x}}$
$\newcommand{\bfy}{\mathbf{y}}$
$\newcommand{\bfV}{\mathbf{V}}$
$\newcommand{\bfX}{\mathbf{X}}$
\begin{align}
\mathbf{0}^\top &= \frac{\p }{\p \bfm} ( \bfy^\top\bfy + \bfm^\top \bfX^\top \bfX \bfm - 2\bfy^\top \bfX \bfm)\\
      &= 2 {\bfm^*}^\top \bfX^\top \bfX  - 2\bfy^\top \bfX
\end{align}

This gives us the solution
$$ \bfm^* = (\bfX^\top \bfX)^{-1} \bfX^\top \bfy $$

The symbol $\bfV^{-1}$ is called inverse of matrix $\bfV$.

The term $(\bfX^\top \bfX)^{-1} \bfX^\top$ is also called the pseudo-inverse of a matrix $\bfX$, denoted as $\bfX^\dagger$.

In [None]:
%%writefile saltconcentration.tsv
#Observation	SaltConcentration	RoadwayArea
1	3.8	0.19
2	5.9	0.15
3	14.1	0.57
4	10.4	0.4
5	14.6	0.7
6	14.5	0.67
7	15.1	0.63
8	11.9	0.47
9	15.5	0.75
10	9.3	0.6
11	15.6	0.78
12	20.8	0.81
13	14.6	0.78
14	16.6	0.69
15	25.6	1.3
16	20.9	1.05
17	29.9	1.52
18	19.6	1.06
19	31.3	1.74
20	32.7	1.62

In [None]:
# numpy can import text files separated by seprator like tab or comma
import numpy as np
salt_concentration_data = np.loadtxt("saltconcentration.tsv")

In [None]:
n = salt_concentration_data.shape[0]
bfx = salt_concentration_data[:, 2:3]
bfy = salt_concentration_data[:, 1]
bfX = np.hstack((bfx, np.ones((bfx.shape[0], 1))))
bfX

In [None]:
bfm = np.linalg.inv(bfX.T @ bfX) @ bfX.T @ bfy
print(bfm)
bfm, *_ = np.linalg.lstsq(bfX, bfy, rcond=None)
print(bfm)

In [None]:
import matplotlib.pyplot as plt
m = bfm.flatten()[0]
c = bfm.flatten()[1]

# Plot the points
fig, ax = plt.subplots()
ax.scatter(salt_concentration_data[:, 2], salt_concentration_data[:, 1])
ax.set_xlabel(r"Roadway area $\%$")
ax.set_ylabel(r"Salt concentration (mg/L)")
x = salt_concentration_data[:, 2]
y = m * x + c
# Plot the points
ax.plot(x, y, 'r-') # the line

## Second derivative



### Geometry of second derivative

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({
    "text.usetex": False # turns on math latex rendering in matplotlib
})

In [None]:
x = np.linspace(-10, 10, 100)
fig, ax = plt.subplots(1, 3, figsize=(9, 3))
ax[0].plot(x, x**2, 'r', label=r'$f(x)=x^2$')
ax[0].plot(x, 2*x, 'b', label=r'$\frac{df(x)}{dx}$')
ax[0].set_xlabel('x')
ax[0].legend()

ax[1].plot(x, -x**2, 'r', label=r'$f(x)=-x^2$')
ax[1].plot(x, -2*x, 'b', label=r'$\frac{df(x)}{dx}$')
ax[1].set_xlabel('x')
ax[1].legend()


ax[2].plot(x, x**3, 'r', label=r'$f(x)=x^3$')
ax[2].plot(x, -3*x**2, 'b', label=r'$\frac{df(x)}{dx}$')
ax[2].set_xlabel('x')
ax[2].legend()

### Second derivatives in 2 dimension

1. $ f(x, y) = 2x^2 + 4y^2 - xy - 6x - 8y  + 6$
2. $ f(x, y) = - 2x^2 - 4y^2 + xy + 6x + 8y  + 6$
3. $ f(x, y) = 2x^2 - 4y^2 - xy - 6x + 8y  + 6$

Example 1:
$$ f(x, y) = 2x^2 + 4y^2 - xy - 6x - 8y  + 6$$ 
$$ f([x, y]) = \begin{bmatrix}x & y \end{bmatrix}
\begin{bmatrix}2 & -1/2\\ -1/2 & 4\end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix} 
+
\begin{bmatrix} - 6 & - 8 \end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix}  + 6$$ 

Example 2:
$$ f(x, y) = - 2x^2 - 4y^2 + xy + 6x + 8y  + 6$$ 
$$ f([x, y]) = \begin{bmatrix}x & y \end{bmatrix}
\begin{bmatrix}-2 & 1/2\\ 1/2 & -4\end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix} 
+
\begin{bmatrix} 6 & 8 \end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix}  + 6$$ 

Example 3:
$$ f(x, y) = 2x^2 - 4y^2 - xy - 6x + 8y  + 6$$ 
$$ f([x, y]) = \begin{bmatrix}x & y \end{bmatrix}
\begin{bmatrix}2 & -1/2\\ -1/2 & -4\end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix} 
+
\begin{bmatrix} - 6 & 8 \end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix}  + 6$$ 

In [None]:
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
def plot_surface(func):
    x, y = np.mgrid[-20:20:21j,
                    -20:20:21j]
    f =  func(x, y)

    print(f.shape, x.shape, y.shape)
    fig = go.Figure(data=[go.Surface(z=f, x=x, y=y)])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.show()

Example 1:

$$ f(x, y) = 2x^2 + 4y^2 - xy - 6x - 8y  + 6$$ 

$$ f([x, y]) = \begin{bmatrix}x & y \end{bmatrix}
\begin{bmatrix}2 & -1/2\\ -1/2 & 4\end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix} 
+
\begin{bmatrix} - 6 & - 8 \end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix}  + 6$$ 

In [None]:
def f(x, y):
    return 2*x**2 + 4*y**2 - x*y - 6*x - 8*y  + 6

def f_vec(x, y):
    # x is n x n and y is n x n
    xn = x[..., None] # n x n x 1
    yn = y[..., None] # n x n x 1
    vecx = np.concatenate([xn, yn], axis=-1) # n x n x 2
    vecx_col_vec = vecx[..., None] # n x n x 2 x 1
    vecx_row_vec = vecx[..., None, :] # n x n x 1 x 2
    A = np.array([[2, -0.5],
                  [-0.5, 4]]) # 2 x 2
    b = np.array([-6, -8]) # 2
    c = 6
    print("Minima at, ", -np.linalg.inv(A + A.T) @ b)
    quad  = (vecx_row_vec @ A @ vecx_col_vec).squeeze(-1).squeeze(-1)
    return quad + vecx @ b + c

plot_surface(f_vec)

$$ f(x, y) = - 2x^2 - 4y^2 + xy - 6x - 8y  + 6$$ 

In [None]:
def f(x, y): return - 2*x**2 - 4*y**2 + x*y + 6*x + 8*y  + 6

def f_vec(x, y):
    # x is n x n and y is n x n
    xn = x[..., None] # n x n x 1
    yn = y[..., None] # n x n x 1
    vecx = np.concatenate([xn, yn], axis=-1) # n x n x 2
    vecx_col_vec = vecx[..., None] # n x n x 2 x 1
    vecx_row_vec = vecx[..., None, :] # n x n x 1 x 2
    A = np.array([[-2, 0.5],
                  [0.5, -4]]) # 2 x 2
    b = np.array([6, 8]) # 2
    c = 6
    print("Maxima at, ", -np.linalg.inv(A + A.T) @ b)
    quad  = (vecx_row_vec @ A @ vecx_col_vec).squeeze(-1).squeeze(-1)
    return quad + vecx @ b + c
plot_surface(f_vec)

$$ f(x, ) = 2x^2 - 4y^2 - xy - 6x - 8y  + 6$$ 

In [None]:
def f(x, y): return  2*x**2 - 4*y**2 - x*y - 6*x + 8*y  + 6
plot_surface(f)

## Second derivative in n-D : Hessian matrix
$\newcommand{\bbR}{\mathbb{R}}$
$\newcommand{\calH}{\mathcal{H}}$
$\newcommand{\p}{\partial}$
$\newcommand{\pfxixj}[2]{\frac{\p^2 f}{\p x_{#1} \p x_{#2}}}$
$\newcommand{\calJ}{\mathcal{J}}$
Hessian matrix of a scalar-valued vector function $f: \bbR^n \to \bbR$ is defined as the following arrangement of second derivatives,
$$ \calH f(\bfx) = \begin{bmatrix}
\pfxixj11 & \pfxixj12 & \dots & \pfxixj1n \\
\pfxixj21 & \pfxixj22 & \dots & \pfxixj2n \\
\vdots & \vdots & \ddots  & \vdots \\
\pfxixj{n}1 & \pfxixj{n}2 & \dots & \pfxixj{n}n \\
\end{bmatrix}$$

It is sometimes also written as $\nabla^2 f(\bfx) $, and hessian can be computed by taking the Jacobian of the gradient,
$$ \calH f(\bfx) = \calJ^\top ( \nabla f(\bfx) )$$

If the second partial derivatives are continuous then the Hessian matrix is symmetric.

### Find the Hessian of the general quadratic form,
$\newcommand{\bfb}{\mathbf{b}}$
$$f(\bfx) = \bfx^\top A \bfx + \bfb^\top \bfx + c$$

Find the gradient of $f(\bfx)$
$$\nabla^\top f(\bfx) = \bfx^\top (A + A^\top) + \bfb^\top $$
Take transpose
$$\nabla f(\bfx) =  (A + A^\top)\bfx + \bfb $$

Find the Jacobian of the gradient
$$\calJ^\top \nabla f(\bfx) = (A + A^\top)$$

#### Homework 4: Problem 1
Find the Hessian of the quadratic function that we got as the objective function in linear regression,
$$ R(\bfm) = \bfy^\top \bfy - 2\bfy^\top \bfX \bfm + \bfm^\top \bfm$$

Find $\calH_\bfm R(\bfm) $

## Positive definite, Negative definite and Indefinite

#### Positiive definite

A square matrix $A \in \bbR^{n \times n}$ is called positive definite if for all $\bfx \in \bbR^n$, $\bfx^\top A \bfx \succ 0$.

#### Negative definite

A square matrix $A \in \bbR^{n \times n}$ is called negative definite if for all $\bfx \in \bbR^n$, $\bfx^\top A \bfx \prec 0$.

#### Indefinite

A square matrix $A \in \bbR^{n \times n}$ is called indefinite if it is neither positive definite nor negative definite.


## Eigenvalues and Eigen vectors

Eigen values $\lambda \in \bbR$ and eigen vector $\bfv \in \bbR^n$ of a given matrix $\bfA$ are the solutions of the equation,
$$ A \bfv = \lambda \bfv $$

You might have solved for eigen values and eigen vectors using the equation
$$(A - \lambda I_n) \bfv = 0$$ whose solution is given by,
$$\det(A - \lambda I_n) = 0$$

## Contour Plots

![](imgs/contourplot.png)

In [None]:
def plot_contour(func):
    x, y = np.mgrid[-20:20:21j,
                    -20:20:21j]
    bfx = np.array([x, y])
    f = func(x,y)

    plt.contour(x, y, f, 20, cmap='Blues_r')
    plt.plot([1.8], [1.2], 'ro') 
    plt.text(1.8+1, 1.2, '$x^*$', color='r')
    plt.xlabel('$x$')
    plt.ylabel('$y$')
    plt.show()
    

In [None]:
def f(x, y): return  2*x**2 + 4*y**2 - x*y - 6*x - 8*y  + 6
plot_contour(f)

But how about other kinds of functions say:

$$ \arg~\min_x f(x) = x \exp(-(x^2 + y^2))$$ 

In [None]:
def plot_contour(func):
    x, y = np.mgrid[-2:2:201j,
                    -2:3:201j]
    f = func(x,y)

    ctr = plt.contour(x, y, f, 10, cmap='Blues_r')
    plt.clabel(ctr, ctr.levels, inline=True, fontsize=6)
    plt.show()

In [None]:
def f(x,y): return  x * np.exp(-(x**2 + y**2))
plot_contour(f)

In [None]:
def plot_surface_3d(func):
    x, y = np.mgrid[-2:2:201j,
                    -2:3:201j]
    f = func(x,y)
    fig = go.Figure(data=[go.Surface(z=f, x=x, y=y,
                                    contours = {
                                        "x": {"start": -2, "end": 2, "size": 0.2},
                                        "z": {"start": -2, "end": 2, "size": 0.2}
                                    },
                                    )])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True, project_z=True))
    fig.show()

In [None]:
plot_surface_3d(f)

## Geometry of eigen vectors and eigen values

Example 1:

$$ f(x, y) = 2x^2 + 4y^2 - xy - 6x - 8y  + 6$$ 

$$ f([x, y]) = \begin{bmatrix}x & y \end{bmatrix}
\begin{bmatrix}2 & -1/2\\ -1/2 & 4\end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix} 
+
\begin{bmatrix} - 6 & - 8 \end{bmatrix}
\begin{bmatrix} x \\ y \end{bmatrix}  + 6$$ 

In [None]:
def f(x, y):
    return 2*x**2 + 4*y**2 - x*y - 6*x - 8*y  + 6

def f_vec(x, y):
    # x is n x n and y is n x n
    xn = x[..., None] # n x n x 1
    yn = y[..., None] # n x n x 1
    vecx = np.concatenate([xn, yn], axis=-1) # n x n x 2
    vecx_col_vec = vecx[..., None] # n x n x 2 x 1
    vecx_row_vec = vecx[..., None, :] # n x n x 1 x 2
    A = np.array([[2, -0.5],
                  [-0.5, 4]]) # 2 x 2
    b = np.array([-6, -8]) # 2
    c = 6
    print("Minima at, ", -np.linalg.inv(A + A.T) @ b)
    quad  = (vecx_row_vec @ A @ vecx_col_vec).squeeze(-1).squeeze(-1)
    return quad + vecx @ b + c

x, y = np.mgrid[-4:4:201j,
                -4:4:201j]
fvals = f(x,y)


A = np.array([[2, -0.5],
              [-0.5, 4]]) # 2 x 2

b = np.array([-6, -8]) # 2
minpt = -np.linalg.inv(A + A.T) @ b
ctr = plt.contour(x, y, fvals, 10, cmap='Blues_r')
lambdas, V = np.linalg.eigh(A+A.T)
v1 = V[:, 0]
v2 = V[:, 1]
plt.arrow(minpt[0], minpt[1], 4*v1[0]/np.sqrt(lambdas[0]),  4*v1[1] / np.sqrt(lambdas[0]), color='r')
plt.arrow(minpt[0], minpt[1], 4*v2[0]/np.sqrt(lambdas[1]),  4*v2[1] / np.sqrt(lambdas[1]), color='g')
plt.clabel(ctr, ctr.levels, inline=True, fontsize=6)
plt.axis('equal')
plt.show()


In [None]:
!F=train-images-idx3-ubyte && cd data && \
    [ ! -f $F ] && \
    wget http://yann.lecun.com/exdb/mnist/$F.gz  && \
    gunzip $F.gz
!F=train-labels-idx1-ubyte && cd data && \
    [ ! -f $F ] && \
    wget http://yann.lecun.com/exdb/mnist/$F.gz  && \
    gunzip $F.gz

In [None]:
import struct
import numpy as np

# Ref:https://github.com/sorki/python-mnist/blob/master/mnist/loader.py
def mnist_read_labels(fname='data/train-labels-idx1-ubyte'):
    with open(fname, 'rb') as file:
        # The file starts with 4 byte 2 unsigned ints 
        magic, size = struct.unpack('>II', file.read(8))
        assert magic == 2049
        labels = np.frombuffer(file.read(), dtype='u1')
        return labels
    
# Ref:https://github.com/sorki/python-mnist/blob/master/mnist/loader.py
def mnist_read_images(fname='data/train-images-idx3-ubyte'):
    with open(fname, 'rb') as file:
        # The file starts with 4 byte 4 unsigned ints 
        magic, size, rows, cols = struct.unpack('>IIII', file.read(16))
        assert magic == 2051
        image_data = np.frombuffer(file.read(), dtype='u1')
        images = image_data.reshape(size, rows, cols)
        return images

In [None]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib as mpl
mpl.rc('animation', html='jshtml')
train_images = mnist_read_images('data/train-images-idx3-ubyte')
labels = mnist_read_labels('data/train-labels-idx1-ubyte')
zero_images = train_images[labels==0, ...] # Filter by label == 0
one_images = train_images[labels==1, ...] # Filter by label == 1

# fig, axes = plt.subplots(2, 10)
# for axrow, imgs in zip(axes, (zero_images, one_images)):
#     for ax, img in zip(axrow, imgs):    
#         ax.imshow(img, cmap='gray', vmin=0, vmax=255)
#         ax.axis('off')

        
fig, ax = plt.subplots()
# ims is a list of lists, each row is a list of artists to draw in the
# current frame; here we are just animating one artist, the image, in
# each frame

ims = [[ax.imshow(zero_images[i], animated=True, cmap='gray', vmin=0, vmax=255)]
    for i in range(60)]
zero_images_anim = animation.ArtistAnimation(fig, ims, interval=50, blit=True,
                                repeat_delay=1000, repeat=False)

In [None]:
zero_images_anim

In [None]:
fig, ax = plt.subplots()
oneims = [[ax.imshow(one_images[i], animated=True, cmap='gray', vmin=0, vmax=255)]
    for i in range(60)]
one_images_anim = animation.ArtistAnimation(fig, oneims, interval=50, blit=True,
                                            repeat_delay=1000, repeat=False)

In [None]:
one_images_anim

# What is a feature

Any property of data sample that helps with the task.

In [None]:
def feature_n_pxls(imgs):
    n, *shape = imgs.shape
    return np.sum(imgs[:, :, :].reshape(n, -1) > 128, axis=1)

n_pxls_zero_images = feature_n_pxls(zero_images)
n_pxls_one_images = feature_n_pxls(one_images)
fig, ax = plt.subplots()
ax.plot(n_pxls_zero_images, '.')
ax.plot(n_pxls_one_images, '+')

In [None]:
fig, ax = plt.subplots()
for i in range(5):
    ax.plot(zero_images[i].mean(axis=0), 'b-')
for i in range(5):
    plt.plot(one_images[i].mean(axis=0), 'r-')

In [None]:
wts = zero_images[0].mean(axis=0)
mean = (np.arange(wts.shape[0]) * wts).sum() / np.sum(wts)
var = ((np.arange(wts.shape[0]) - mean)**2 * wts).sum() / np.sum(wts)
var

In [None]:
def feature_y_var(img):
    wts = img.mean(axis=0)
    mean = (np.arange(wts.shape[0]) * wts).sum() / np.sum(wts)
    var = ((np.arange(wts.shape[0]) - mean)**2 * wts).sum() / np.sum(wts)
    return var
feature_y_var(zero_images[0]), feature_y_var(one_images[0])


In [None]:
def feature_y_var(imgs):
    wts = imgs.mean(axis=-2)
    arange = np.arange(wts.shape[-1])
    mean = (arange * wts).sum(axis=-1) / wts.sum(axis=-1)
    mean = mean[:, np.newaxis]
    var = ((arange - mean)**2 * wts).sum(axis=-1) / wts.sum(axis=-1)
    return var

fig, ax = plt.subplots()
ax.plot(feature_y_var(zero_images), '.')
ax.plot(feature_y_var(one_images), '+')

In [None]:
def features_extract(images):
    return np.vstack((feature_n_pxls(images),
                      feature_y_var(images))).T
zero_features = features_extract(zero_images)
one_features = features_extract(one_images)


def draw_features(ax, zero_features, one_features):
    zf = ax.scatter(zero_features[:, 0], zero_features[:, 1], marker='.', label='0', alpha=0.5)
    of = ax.scatter(one_features[:, 0], one_features[:, 1], marker='+', label='1', alpha=0.3)
    ax.legend()
    ax.set_xlabel('Feature 1: count of pixels')
    ax.set_ylabel('Feature 2: Variance along x-axis')
    return [zf, of] # return list of artists

In [None]:
fig, ax = plt.subplots()
draw_features(ax, zero_features, one_features)
plt.show()

In [None]:
bfm = np.ones(2)
fig, ax = plt.subplots()
draw_features(ax, zero_features, one_features)
x = np.linspace(-1, 1, 21)
ax.plot(x, x*bfm[0] + bfm[1], 'r-')

In [None]:
bfm = np.ones(2)

Y = np.hstack((np.ones(zero_features.shape[0]), np.full(one_features.shape[0], -1.0)))
features = np.vstack((zero_features, one_features))
FEATURES_MEAN = features.mean(axis=0, keepdims=1)
FEATURES_STD = features.std(axis=0, keepdims=1)
np.savez('features_stats.npz', mean=FEATURES_MEAN, std=FEATURES_STD)

def norm_features(features):
    return (features - FEATURES_MEAN) / FEATURES_STD
    
X = norm_features(features)

fig, ax = plt.subplots()
draw_features(ax, X[Y > 0, :], X[Y < 0, :])
x = np.linspace(-1, 1, 21)
ax.plot(x, x*bfm[0] + bfm[1], 'r-')

In [None]:


def error(X, Y, bfm):
    # YOUR CODE HERE
    raise NotImplementedError()

def grad_error(Xw, Yw, bfm):
    # YOUR CODE HERE
    raise NotImplementedError()

def train(X, Y, lr = 0.1):
    # YOUR CODE HERE
    raise NotImplementedError()

OPTIMAL_BFM, list_of_bfms, list_of_errors = train(X, Y)
fig, ax = plt.subplots()
ax.plot(list_of_errors)
ax.set_xlabel('t')
ax.set_ylabel('loss')
plt.show()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(5, 7.5))
class Anim:
    def __init__(self, fig, axes, X, Y):
        self.fig = fig
        self.ax = axes[0]
        self.ax1 = axes[1]
        self.fts = draw_features(self.ax, X[Y > 0, :], X[Y < 0, :])
        self.line, = self.ax.plot([], [], 'r-')
        
        m, c = np.meshgrid(np.linspace(-1, 1, 51), np.linspace(-1, 1, 51))
        totalerr = np.empty_like(m)
        for i in range(m.shape[0]):
            for j in range(m.shape[1]):
                err = error(X, Y, [m[i, j], c[i,j]])
                totalerr[i, j] = err[err > 0].mean()

        self.ctr = self.ax1.contour(m, c, totalerr, 30, cmap='Blues_r')
        self.ax1.set_xlabel('m')
        self.ax1.set_ylabel('c')
        self.ax1.clabel(self.ctr, self.ctr.levels, inline=True, fontsize=6)
        self.m_hist = []fig, axes = plt.subplots(2, 1, figsize=(5, 7.5))
class Anim:
    def __init__(self, fig, axes, X, Y):
        self.fig = fig
        self.ax = axes[0]
        self.ax1 = axes[1]
        self.fts = draw_features(self.ax, X[Y > 0, :], X[Y < 0, :])
        self.line, = self.ax.plot([], [], 'r-')
        
        m, c = np.meshgrid(np.linspace(-1, 1, 51), np.linspace(-1, 1, 51))
        totalerr = np.empty_like(m)
        for i in range(m.shape[0]):
            for j in range(m.shape[1]):
                err = error(X, Y, [m[i, j], c[i,j]])
                totalerr[i, j] = err[err > 0].mean()

        self.ctr = self.ax1.contour(m, c, totalerr, 30, cmap='Blues_r')
        self.ax1.set_xlabel('m')
        self.ax1.set_ylabel('c')
        self.ax1.clabel(self.ctr, self.ctr.levels, inline=True, fontsize=6)
        self.m_hist = []
        self.c_hist = []
        self.line2, = self.ax1.plot([], [], 'r*-')

        
    def anim_init(self):
        return (self.line, self.line2)
        
    def update(self, bfm):
        x = np.linspace(-2, 2, 21)
        self.line.set_data(x, x * bfm[0] + bfm[1])
        self.m_hist.append(bfm[0])
        self.c_hist.append(bfm[1])
        self.line2.set_data(self.m_hist, self.c_hist)
        return self.line, self.line2
        
a = Anim(fig, axes, X, Y)
animation.FuncAnimation(fig, a.update, frames=list_of_bfms[::3],
                        init_func=a.anim_init, blit=True, repeat=False)
        self.c_hist = []
        self.line2, = self.ax1.plot([], [], 'r*-')

        
    def anim_init(self):
        return (self.line, self.line2)
        
    def update(self, bfm):
        x = np.linspace(-2, 2, 21)
        self.line.set_data(x, x * bfm[0] + bfm[1])
        self.m_hist.append(bfm[0])
        self.c_hist.append(bfm[1])
        self.line2.set_data(self.m_hist, self.c_hist)
        return self.line, self.line2
        
a = Anim(fig, axes, X, Y)
animation.FuncAnimation(fig, a.update, frames=list_of_bfms[::3],
                        init_func=a.anim_init, blit=True, repeat=False)

In [None]:
test_images = mnist_read_images('data/t10k-images-idx3-ubyte')
test_labels = mnist_read_labels('data/t10k-labels-idx1-ubyte')
zero_one_filter = (test_labels == 0) | (test_labels == 1)
zero_one_test_images = test_images[zero_one_filter, ...]

def returnclasslabel(test_imgs):
    Xtest = norm_features(features_extract(test_imgs))
    bfm = OPTIMAL_BFM
    return np.where(
        Xtest[:, 1] - Xtest[:, 0] * bfm[0] - bfm[1] > 0, 
        0,
        1)
zero_one_predicted_labels = returnclasslabel(zero_one_test_images)

In [None]:

fig, ax = plt.subplots()
artists = []
for i in range(60):
    artists.append(
        [ax.imshow(zero_one_test_images[i], animated=True, cmap='gray', vmin=0, vmax=255),
        ax.text(0, 2, 'The number is %d' % zero_one_predicted_labels[i], animated=True, color='w')])
animation.ArtistAnimation(fig, artists, interval=50, blit=True,
                                repeat_delay=1000, repeat=False)

# Perceptron

![](https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Perceptron_example.svg/500px-Perceptron_example.svg.png)

![](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Perceptron_algorithm.gif/800px-Perceptron_algorithm.gif?20170429203100)