In [None]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

# Today's data

400 fotos of human faces. Each face is a 2d array [64x64] of pixel brightness.

In [None]:
from sklearn.datasets import fetch_olivetti_faces
data = fetch_olivetti_faces().images

In [None]:
# @this code showcases matplotlib subplots. The syntax is: plt.subplot(height, width, index_starting_from_1)
plt.subplot(2,2,1)
plt.imshow(data[0],cmap='gray')
plt.subplot(2,2,2)
plt.imshow(data[1],cmap='gray')
plt.subplot(2,2,3)
plt.imshow(data[2],cmap='gray')
plt.subplot(2,2,4)
plt.imshow(data[3],cmap='gray')

# Face reconstruction problem

Let's solve the face reconstruction problem: given left halves of facex __(X)__, our algorithm shall predict the right half __(y)__. Our first step is to slice the photos into X and y using slices.

__Slices in numpy:__
* In regular python, slice looks roughly like this: `a[2:5]` _(select elements from 2 to 5)_
* Numpy allows you to slice N-dimensional arrays along each dimension: [image_index, height, width]
  * `data[:10]` - Select first 10 images
  * `data[:, :10]` - For all images, select a horizontal stripe 10 pixels high at the top of the image
  * `data[10:20, :, -25:-15]` - Take images [10, 11, ..., 19], for each image select a _vetrical stripe_ of width 10 pixels, 15 pixels away from the _right_ side.

__Your task:__

Let's use slices to select all __left image halves as X__ and all __right halves as y__.

In [None]:
# select left half of each face as X, right half as Y
X = <Slice left half-images>
y = <Slice right half-images>

In [None]:
# If you did everything right, you're gonna see left half-image and right half-image drawn separately in natural order
plt.subplot(1,2,1)
plt.imshow(X[0],cmap='gray')
plt.subplot(1,2,2)
plt.imshow(y[0],cmap='gray')

assert X.shape == y.shape == (len(data), 64, 32), "Please slice exactly the left half-face to X and right half-face to Y"

In [None]:
def glue(left_half,right_half):
    # merge photos back together
    left_half = left_half.reshape([-1,64,32])
    right_half = right_half.reshape([-1,64,32])
    return np.concatenate([left_half,right_half],axis=-1)


# if you did everything right, you're gonna see a valid face
plt.imshow(glue(X,y)[99],cmap='gray')

# Machine learning stuff

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X.reshape([len(X),-1]),
                                                 y.reshape([len(y),-1]),
                                                 test_size=0.05,random_state=42)

print(X_test.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train,Y_train)

measure mean squared error

In [None]:
from sklearn.metrics import mean_squared_error

print("Train MSE:", mean_squared_error(Y_train,model.predict(X_train)))
print("Test MSE:", mean_squared_error(Y_test,model.predict(X_test)))

In [None]:
# Train predictions
pics = glue(X_train,model.predict(X_train))
plt.figure(figsize=[16,12])
for i in range(20):
    plt.subplot(4,5,i+1)
    plt.imshow(pics[i],cmap='gray')

In [None]:
# Test predictions
pics = glue(X_test,model.predict(X_test))
plt.figure(figsize=[16,12])
for i in range(20):
    plt.subplot(4,5,i+1)
    plt.imshow(pics[i],cmap='gray')

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```


# Ridge regression
RidgeRegression is just a LinearRegression, with l2 regularization - penalized for $ \alpha \cdot \sum _i w_i^2$

Let's train such a model with alpha=0.5

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=0.5)

In [None]:
<YOUR CODE: fit the model on training set>

In [None]:
<YOUR CODE: predict and measure MSE on train and test>

In [None]:
# Test predictions
pics = glue(X_test,ridge.predict(X_test))
plt.figure(figsize=[16,12])
for i in range(20):
    plt.subplot(4,5,i+1)
    plt.imshow(pics[i],cmap='gray')

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

# Grid search

Train model with diferent $\alpha$ and find one that has minimal test MSE. It's okay to use loops or any other python stuff here.

In [None]:
<YOUR CODE>

In [None]:
# Test predictions
pics = glue(X_test,<predict with your best model>)
plt.figure(figsize=[16,12])
for i in range(20):
    plt.subplot(4,5,i+1)
    plt.imshow(pics[i],cmap='gray')