In [20]:
import numpy as np 
import torch


In [21]:
x_numpy = np.array([.1,.2,.3])
x_torch = torch.tensor([.1,.2,.3])
print(x_numpy, x_torch)

[0.1 0.2 0.3] tensor([0.1000, 0.2000, 0.3000])


In [22]:
print(torch.from_numpy(x_numpy), x_torch.numpy)

tensor([0.1000, 0.2000, 0.3000], dtype=torch.float64) <built-in method numpy of Tensor object at 0x7fe3b0551d38>


In [23]:
y_numpy = np.array([3,4,5])
y_torch = torch.tensor([3,4,5])

print(x_numpy + y_numpy)
print(x_torch + y_torch)

[3.1 4.2 5.3]
tensor([3.1000, 4.2000, 5.3000])


In [24]:
N, C, W, H = 10000, 3, 28, 28
X = torch.randn((N, C, W, H))

print(X.shape)
print(X.view(N, C, 784).shape)

torch.Size([10000, 3, 28, 28])
torch.Size([10000, 3, 784])


---

Let $w = [w_{1}, w_{2}]^{T}$

Consider $g(w) = 2w_{1}w_{2} + w_{2}cos(w_{1})$

Q: Compute  $\nabla_{w}g(w)$ and verify $\nabla_{w}g([\pi, 1]) = [2, \pi - 1]^{T}$

In [25]:
def g(w):
    return 2*w[0]*w[1] + w[1]*torch.cos(w[0])

def grad_g(w):
    return torch.tensor([2*w[1] - w[1]*torch.sin(w[0]), 2*w[0] + torch.cos(w[0])])

w = torch.tensor([np.pi, 1], requires_grad=True)

z =g(w)
z.backward()

print(z)

print(grad_g(w))
print(w.grad)

tensor(5.2832, grad_fn=<AddBackward0>)
tensor([2.0000, 5.2832])
tensor([2.0000, 5.2832])


---

For **pytorch.tensor**:

`@` matrix multiplication

`*` matrix is multiplited by elements 


In [26]:
d = 2
n = 50
X = torch.randn(n, d)
true_w = torch.tensor([[-1.0], [2.0]])
y = X @ true_w + torch.randn(n, 1) * 0.1

print(X.shape)
print(y.shape)
print(true_w)

torch.Size([50, 2])
torch.Size([50, 1])
tensor([[-1.],
        [ 2.]])


In [27]:
# defien a linear model with no bias
def model(X, w):
    return X @ w

# the residual sum of squares loss function
def rss(y, y_hat): 
    return torch.norm(y - y_hat)**2 / n

# analytical expression for the gradient
def grad_rss(X, y, w):
    return -2*X.t() @ (y - X @ w) / n

w = torch.tensor([[1.], [0]], requires_grad=True)
y_hat = model(X, w)

loss = rss(y, y_hat)
loss.backward()

print(grad_rss(X, y, w).detach().view(2).numpy())
print(w.grad.view(2).numpy())

[ 4.0762863 -4.3227434]
[ 4.0762863 -4.3227434]


In [29]:
step_size = 0.1
print('iter \tloss \tw')

for i in range(20):
    y_hat = model(X, w)
    loss = rss(y, y_hat)

    loss.backward()

    w.data = w.data - step_size*w.grad

    print('{} \t{:.2f} \t{}'.format(i, loss.item(), w.view(2).detach().numpy()))

    w.grad.detach()
    w.grad.zero_()

print(true_w.view(2).numpy())
print(w.view(2).detach().numpy())
    

iter 	loss 	w
0 	5.27 	[0.26795283 0.77162653]
1 	3.30 	[0.00964156 1.0379208 ]
2 	2.06 	[-0.19613089  1.2467923 ]
3 	1.29 	[-0.36013207  1.4105444 ]
4 	0.81 	[-0.49090934  1.5388565 ]
5 	0.51 	[-0.59525    1.6393421]
6 	0.32 	[-0.67854536  1.7179877 ]
7 	0.21 	[-0.7450795  1.7794993]
8 	0.13 	[-0.79825795  1.8275752 ]
9 	0.09 	[-0.8407888  1.8651206]
10 	0.06 	[-0.87482655  1.8944172 ]
11 	0.04 	[-0.9020861  1.9172559]
12 	0.03 	[-0.9239327  1.9350421]
13 	0.02 	[-0.9414542  1.9488782]
14 	0.02 	[-0.95551753  1.9596282 ]
15 	0.01 	[-0.96681416  1.9679693 ]
16 	0.01 	[-0.9758957  1.9744316]
17 	0.01 	[-0.9832026  1.9794302]
18 	0.01 	[-0.9890866  1.9832895]
19 	0.01 	[-0.9938291  1.986263 ]
[-1.  2.]
[-0.9938291  1.986263 ]
