In [None]:
import numpy as np
import matplotlib.pyplot as plt

## Problem 1: Toy datasets

The goal is to construct neural networks that achieve high accuracy on some toy datasets.

**Contents**
- [Neural Network Stuff](#Neural-Network-Stuff)
- [Toy Dataset 1](#Toy-Dataset-1)
- [Toy Dataset 2](#Toy-Dataset-2)
- [Toy Dataset 3](#Toy-Dataset-3)
- [Toy Dataset 4](#Toy-Dataset-4)

## Neural Network Stuff

In [None]:
class layer:
    'Neural network dense layer'
    
    # initialization (weights and biases)
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1*np.random.randn(n_inputs, n_neurons) 
        self.biases = np.zeros((1,n_neurons))

    # output
    def forward(self, inputs):
        self.output = inputs.dot(self.weights) + self.biases
        
        # remember input values
        self.inputs = inputs
      
    def backward(self, dvalues):
        
        # gradient
        self.dinputs = dvalues.dot(self.weights.T)
        
        # Gradients on parameters
        self.dweights = self.inputs.T.dot(dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)

        
class activation_ReLU:
    'rectified linear unit activation function'
    
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)
        
        # remember input values
        self.inputs = inputs
        
    def backward(self, dvalues):

        # copy dvalues
        self.dinputs = dvalues.copy()
        
        # zero gradient where input values were negative
        self.dinputs[self.inputs <= 0] = 0
        
class activation_softmax:
    'softmax activation function'
    
    def forward(self, inputs):
        
        # unnormalized probabilities
        exp_values = np.exp(inputs-np.max(inputs,axis=1, keepdims=True))
        
        # normalized probabilities
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities
        
        # remember input values
        self.inputs = inputs
        
    def backward(self,dvalues):

        
        # initialized gradient (derivative) array
        self.dinputs = np.zeros(dvalues.shape)
        
        # For each data point, compute gradient
        for i in range(len(dvalues)):
            output_row = self.output[i]
            dvalues_row = dvalues[i]
            
            # Flatten output array
            output_row = output_row.reshape(-1, 1)

            # Calculate derivative matrix of the output
            derivative_matrix = np.diagflat(output_row) - output_row.dot(output_row.T)
            
            # store gradient
            self.dinputs[i] = dvalues_row.dot(derivative_matrix)


class loss:
    def calculate(self, output,y):
        sample_losses = self.forward(output,y)
        loss = np.mean(sample_losses)
        return loss
    
# cross-entropy loss        
class loss_crossentropy(loss):
    
    def forward(self, y_pred, y_true):
        
        # number of data points
        n_samples = len(y_pred)
        
        # clip data to prevent division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7,1 - 1e-7)
        
        # confidence values
        confidence_values = y_pred_clipped[range(n_samples),y_true]
       
        # sample losses 
        losses = -np.log(confidence_values)
        return losses
    
    
    def backward(self, dvalues, y_true):
        
        
        # Number of samples
        n_samples = len(dvalues)
    
        #one-hot matrix
        Y = np.zeros(dvalues.shape)
        Y[np.arange(len(y_true)),y_true] = 1
        
        # Calculate gradient
        self.dinputs = -Y / dvalues
        
        # Normalize gradient
        self.dinputs = self.dinputs / n_samples
        
# DS optimizer
class optimizer_GD:

    def __init__(self, learning_rate=1.0):
        self.learning_rate = learning_rate
        
    def update_parameters(self, layer):
        layer.weights = layer.weights - self.learning_rate*layer.dweights
        layer.biases = layer.biases - self.learning_rate*layer.dbiases

## Toy Dataset 1

In [None]:
# run this cell to generate and plot the first toy dataset
from sklearn.datasets import make_moons
X,y = make_moons(n_samples=1000,noise=0.2)

plt.figure(figsize=(12,5))
plt.plot(X[y==0,0],X[y==0,1],'o',label = 'class '+str(0))
plt.plot(X[y==1,0],X[y==1,1],'o',label = 'class '+str(1))
plt.legend(fontsize=15)

**Part 1:** Fit a neural network to the dataset.

In [None]:
# build a neural network 


In [None]:
# fit the neural network to the data (use the Gradient Descent optimizer)


**Part 2:** visualize the classification regions (you can reuse the code [here](https://nbviewer.jupyter.org/github/um-perez-alvaro/Data-Science-Theory/blob/master/Jupyter%20Notebooks/Neural%20Nets/notebooks/Three%20Spirals%20Dataset.ipynb))

In [None]:
# visualize the classification regions


## Toy Dataset 2

In [None]:
# run this cell to generate and plot the second toy dataset
from sklearn.datasets import make_circles
X,y = make_circles(n_samples=500,noise=0.05, factor=0.2)

plt.figure(figsize=(12,5))
plt.plot(X[y==0,0],X[y==0,1],'o',label = 'class '+str(0))
plt.plot(X[y==1,0],X[y==1,1],'o',label = 'class '+str(1))
plt.legend(fontsize=15)

**Part 1:** Fit a neural network to the dataset.

In [None]:
# build a neural network (use the Gradient Descent optimizer)


In [None]:
# fit the neural network to the data


**Part 2:** visualize the classification regions (you can reuse the code [here](https://nbviewer.jupyter.org/github/um-perez-alvaro/Data-Science-Theory/blob/master/Jupyter%20Notebooks/Neural%20Nets/notebooks/Three%20Spirals%20Dataset.ipynb))

In [None]:
# visualize the classification regions


## Toy Dataset 3

In [None]:
# run this cell to generate and plot the third toy dataset
X = np.zeros((600,2))
y = np.zeros(600).astype('int')

# class 0
X[:200] = np.random.randn(200,2).dot(np.array([[0.05,0],[0,1.25]]))

# class 1
X[200:225] = 0.1*np.random.randn(25,2) + [0.5,0]
X[225:250] = 0.1*np.random.randn(25,2) + [-0.5,0] 
y[200:250] = 1

# class 2
theta = np.linspace(0,2*np.pi,150) 
X[250:400,0] = np.sin(theta)+0.05*np.random.randn(150)
X[250:400,1] = np.cos(theta)+0.05*np.random.randn(150)
y[250:400] = 2

# class 3
theta = np.linspace(0,2*np.pi,200) 
X[400:600,0] = 1.5*np.sin(theta)+0.05*np.random.randn(200)
X[400:600,1] = 1.5*np.cos(theta)+0.05*np.random.randn(200)
y[400:600] = 3

plt.figure(figsize=(12,7))
plt.plot(X[y==0,0],X[y==0,1],'o', label='class 0')
plt.plot(X[y==1,0],X[y==1,1],'o', label='class 1')
plt.plot(X[y==2,0],X[y==2,1],'o', label='class 2')
plt.plot(X[y==3,0],X[y==3,1],'o', label='class 3')
plt.legend()

**Part 1:** Fit a neural network to the dataset.

In [None]:
# build neural network (use the Gradient Descent optimizer)


In [None]:
# fit the neural network to the data


**Part 2:** visualize the classification regions (you can reuse the code [here](https://nbviewer.jupyter.org/github/um-perez-alvaro/Data-Science-Theory/blob/master/Jupyter%20Notebooks/Neural%20Nets/notebooks/Three%20Spirals%20Dataset.ipynb))

In [None]:
# visualize the classification regions


## Toy Dataset 4

In [None]:
# run this cell to generate and plot the fourth toy dataset
classes = 3
n_points = 200
noise = 0.1

X = np.zeros((n_points*classes, 2))
y = np.zeros(n_points*classes).astype('int')

for class_number in range(classes):
    ix = range(n_points*class_number, n_points*(class_number+1))
    r = np.linspace(0,1, n_points)
    t = 2*np.linspace(class_number*4, (class_number+1)*4, n_points) + np.random.randn(n_points)*noise
    X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
    y[ix] = class_number

plt.figure(figsize=(12,7))
for i in range(classes):
    plt.plot(X[y==i,0],X[y==i,1],'o', label='class '+str(i))
plt.legend()
plt.savefig('spirals.png')

**Part 1:** Fit a neural network to the dataset.

In [None]:
# build a neural network (use the Gradient Descent optimizer)


In [None]:
# fit the neural network to the data


**Part 2:** visualize the classification regions (you can reuse the code [here](https://nbviewer.jupyter.org/github/um-perez-alvaro/Data-Science-Theory/blob/master/Jupyter%20Notebooks/Neural%20Nets/notebooks/Three%20Spirals%20Dataset.ipynb))

In [None]:
# visualize the classification regions
