In [1]:
# Import dependencies
import torch
import torch.nn as nn
from plot_lib import set_default, show_scatterplot, plot_bases
from matplotlib.pyplot import plot, title, axis

In [2]:
# Set up your device 
cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")

In [3]:
# Set up random seed to 1008. Do not change the random seed.
seed = 1008
torch.manual_seed(seed)
if cuda:
    torch.cuda.manual_seed_all(seed)

# 1. Full, slice, fill
Write a function `warm_up` that returns the 2D tensor with integers below. **Do not use any loops**.

```
1 2 1 1 1 1 2 1 1 1 1 2 1
2 2 2 2 2 2 2 2 2 2 2 2 2
1 2 1 1 1 1 2 1 1 1 1 2 1
1 2 1 3 3 1 2 1 3 3 1 2 1
1 2 1 3 3 1 2 1 3 3 1 2 1
1 2 1 1 1 1 2 1 1 1 1 2 1
2 2 2 2 2 2 2 2 2 2 2 2 2
1 2 1 1 1 1 2 1 1 1 1 2 1
1 2 1 3 3 1 2 1 3 3 1 2 1
1 2 1 3 3 1 2 1 3 3 1 2 1
1 2 1 1 1 1 2 1 1 1 1 2 1
2 2 2 2 2 2 2 2 2 2 2 2 2
1 2 1 1 1 1 2 1 1 1 1 2 1
```


Hint: Use `torch.full`, `torch.fill_`, and the slicing operator.

In [40]:
def warm_up():
    t = torch.IntTensor(13, 13)
    t.fill_(2)
    r = torch.IntTensor(13)
    r.fill_(2)
    r[0] = 1
    r[-1] = 1
    r[-2] = 2
    r1 = torch.full((4,), 1, dtype=torch.int32)
    r[2:6] = r1
    r[7:11] = r1
    t33 = torch.full((2,2), 3, dtype=torch.int32)
    t11 = torch.IntTensor(4,4)
    t11.fill_(1)
    t11[1:3, 1:3] = t33
    t22 = torch.IntTensor(6,6)
    t22.fill_(2)
    t22[1:5, 1:5] = t11
    t[0] = r
    t[-1] = r
    rt = r.view(r.shape[0], 1).view(1, r.shape[0])
    t[:,0] = rt
    t[:,-1] = rt
    t[1:7,1:7] = t22
    t[1:7,6:12] = t22
    t[6:12,1:7] = t22
    t[6:12,6:12] = t22
    return t


# Uncomment line below once you implement this function. 
print(warm_up())

tensor([[1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1]], dtype=torch.int32)


# 2. To Loop or not to loop

## 2.1. `mul_row_loop`
Write a function `mul_row_loop`, using python loops (and not even slicing operators), that gets a 2D tensor
as input, and returns a tensor of same size, equal to the one given as argument, with the first row
kept unchanged, the second multiplied by two, the third by three, etc.
For instance:
```
>>> t = torch.full((4, 8), 2.0)
>>> t
tensor([[2., 2., 2., 2., 2., 2., 2., 2.],
[2., 2., 2., 2., 2., 2., 2., 2.],
[2., 2., 2., 2., 2., 2., 2., 2.],
[2., 2., 2., 2., 2., 2., 2., 2.]])
>>> mul_row(t)
tensor([[2., 2., 2., 2., 2., 2., 2., 2.],
[4., 4., 4., 4., 4., 4., 4., 4.],
[6., 6., 6., 6., 6., 6., 6., 6.],
[8., 8., 8., 8., 8., 8., 8., 8.]])
```

In [49]:
def mul_row_loop(input_tensor):
    output_tensor = torch.Tensor(input_tensor.shape)
    n = input_tensor.shape[0]
    for i in range(n):
        output_tensor[i] = input_tensor[i] * (i+1)
    return output_tensor

tensor([[2., 2., 2., 2., 2., 2., 2., 2.],
        [4., 4., 4., 4., 4., 4., 4., 4.],
        [6., 6., 6., 6., 6., 6., 6., 6.],
        [8., 8., 8., 8., 8., 8., 8., 8.]])


# 2.2. `mul_row_fast`
Write a second version of the same function named `mul_row_fast` which uses tensor operations and no looping.

**Hint**: Use broadcasting and `torch.arange`, `torch.view`, and `torch.mul`.

In [108]:
def mul_row_fast(input_tensor):
    r  = torch.mul(input_tensor.view(input_tensor.shape[1], input_tensor.shape[0]), 
                   torch.arange(1,input_tensor.shape[0]+1, dtype=torch.float32).view(1, _.shape[0]))
    return r.view(r.shape[1], r.shape[0])

def mul_row_fast_2(input_tensor):
    t1 = input_tensor.view(input_tensor.shape[1], input_tensor.shape[0])
    t2 = torch.arange(1,input_tensor.shape[0]+1, dtype=torch.float32)
    t2 = t2.view(1, t2.shape[0])    
    r  = torch.mul(t1,t2)
    r = r.view(r.shape[1], r.shape[0])
    return r

t = torch.full((4, 8), 2.0)
print(mul_row_fast(t))

tensor([[2., 4., 6., 8., 2., 4., 6., 8.],
        [2., 4., 6., 8., 2., 4., 6., 8.],
        [2., 4., 6., 8., 2., 4., 6., 8.],
        [2., 4., 6., 8., 2., 4., 6., 8.]])


# 2.3. `times`
Write a function `times` which takes a 2D tensor as input and returns the run times of `mul_row_loop` and `mul_row_fast` on this tensor, respectively. Use `time.perf_counter`.

Use `torch.ones` to create a 2D tensor of size (1000, 400) full of ones and run `times` on it (there should be more than two orders of magnitude difference).

In [109]:
from time import perf_counter
def times(input_tensor):
    t_0 = perf_counter()
    mul_row_loop(input_tensor)
    time_1 = perf_counter() - t_0
    t_0 = perf_counter()
    mul_row_fast_2(input_tensor)
    time_2 = perf_counter() - t_0
    return time_1, time_2

# Uncomment lines below once you implement this function. 
random_tensor = torch.ones(1000, 400)
time_1, time_2 = times(random_tensor)
print('{}, {}'.format(time_1, time_2))

0.018058394081890583, 0.00025727110914886


# 3. Non-linearities

## 3.1. ReLU
ReLU (Rectified Linear Unit) is a non-linear activation fuction defined as:

$$y = \mathrm{max}(0,x)$$

Define a fully connected neural network `linear_fc_relu` which:
- takes 2 dimensional data as input and passes it through linear modules (`torch.nn.Linear`)
- has one hidden layer of dimension 5 
- has output dimension of 2
- has ReLu as an activation function

Create a tensor with input data $X$ of size (100, 2) using `torch.randn`. 

Following the example in https://github.com/Atcold/pytorch-Deep-Learning-Minicourse/blob/master/02-space_stretching.ipynb, visualize the output of passing `X` through the neural network `linear_fc_relu`.

In [110]:
# Input data
X = torch.randn(100, 2)

In [None]:
# create 1-layer neural networks with ReLU activation
linear_fc_relu = nn.Sequential(
            nn.Linear(2, n_hidden), 
            NL, 
            nn.Linear(n_hidden, 2)
        )
# Visualize: TODO

In [None]:
show_scatterplot(X, colors, title='x')
n_hidden = 5

NL = nn.ReLU()
#NL = nn.Tanh()

for i in range(5):
    # create 1-layer neural networks with random weights
    model = nn.Sequential(
            nn.Linear(2, n_hidden), 
            NL, 
            nn.Linear(n_hidden, 2)
        )
    with torch.no_grad():
        Y = model(X)
    show_scatterplot(Y, colors, title='f(x)')

## 3.2. Sigmoid
The sigmoid function is another popular choice for a non-linear activation function which maps its input to values in the interval $(0,1)$. It is formally defined as:

$$\sigma(x) = \frac{1}{1+ e^{-x}}$$

Define a new neural network `linear_fc_sigmoid` which is the same architecture as in part `3.1.` but with a sigmoid unit instead of ReLU. 

Using the same $X$ as in part `3.1`, visualize the output of passing `X` through the neural network `linear_fc_sigmoid`.

In [None]:
# create 1-layer neural networks with Sigmoid activation
# linear_fc_sigmoid = TODO
# Visualize: TODO