# Tensor Manipulation_1


## 1D Array with Numpy

In [1]:
import numpy as np
import torch

In [13]:
t = np.array([0., 1., 2., 3., 4., 5., 6.,])
print(t)
print('Rank of t: ', t.ndim)
print('Shape of t: ', t.shape)

[0. 1. 2. 3. 4. 5. 6.]
Rank of t:  1
Shape of t:  (7,)


In [7]:
print('t[0] t[1] t[-1]) = ', t[0], t[1], t[-1])
print('t[2:5] t[4:-1]) = ', t[2:5], t[4:-1])

t[0] t[1] t[-1]) =  0.0 1.0 6.0
t[2:5] t[4:-1]) =  [2. 3. 4.] [4. 5.]


## 2D Array with NumPy

In [14]:
t = np.array([[0., 1., 2.], [3., 4., 5.], [6.,7.,8.], [9.,10.,11.]])
print(t)
print(t.ndim)
print(t.shape)

[[ 0.  1.  2.]
 [ 3.  4.  5.]
 [ 6.  7.  8.]
 [ 9. 10. 11.]]
2
(4, 3)


## 1D Array with PyTorch

In [16]:
t = torch.FloatTensor([0., 1., 2., 3., 4., 5., 6.])
print(t)
print(t.dim())
print(t.shape)
print(t.size())
print(t[0], t[1], t[-1])
print(t[1:3], t[-1:-3])

tensor([0., 1., 2., 3., 4., 5., 6.])
1
torch.Size([7])
torch.Size([7])
tensor(0.) tensor(1.) tensor(6.)
tensor([1., 2.]) tensor([])


## 2D Array with Pytorch

In [24]:
t = torch.FloatTensor([[1.,2.,3.,],
                     [4.,5.,6.,],
                     [7.,8.,9.,],
                     [10.,11.,12.,]])
print(t)
print(t.dim())
print(t.size())
print(t[:, 1])
print(t[1, :])
print(t[0][1])

tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]])
2
torch.Size([4, 3])
tensor([ 2.,  5.,  8., 11.])
tensor([4., 5., 6.])
tensor(2.)


## Broadcasting

In [25]:
# Same shape
m1 = torch.FloatTensor([[3, 3]])
m2 = torch.FloatTensor([[2, 2]])
print(m1 + m2)

tensor([[5., 5.]])


In [26]:
# Vector + scalar
m1 = torch.FloatTensor([[1, 2]])
m2 = torch.FloatTensor([[3,]])
print(m1 + m2)

tensor([[4., 5.]])


In [29]:
# 2 x 1 vector + 1 x 2 vector
m1 = torch.FloatTensor([[1,2]])
m2 = torch.FloatTensor([[2], [1]])
print(m1 + m2)

tensor([[3., 4.],
        [2., 3.]])


## Multiplication vs. Matrix Multiplication

In [30]:
print('=' * 40)
print('Mul vs Matmul')
print('=' * 40)

m1 = torch.FloatTensor([[1, 2], [3,4]])
m2 = torch.FloatTensor([[1], [2]])

print('Shape of Matrix 1: ', m1.shape) # 2 x 2
print('Shape of Matrix 2: ', m2.shape) # 2 x 2
print(m1.matmul(m2)) # 2 x 1

m1 = torch.FloatTensor([[1, 2], [3, 4]])
m2 = torch.FloatTensor([[1], [2]])
print('Shape of Matrix 1: ', m1.shape)
print('Shape of Matrix 2: ', m2.shape)
print(m1 * m2)
print(m1.mul(m2))

Mul vs Matmul
Shape of Matrix 1:  torch.Size([2, 2])
Shape of Matrix 2:  torch.Size([2, 1])
tensor([[ 5.],
        [11.]])
Shape of Matrix 1:  torch.Size([2, 2])
Shape of Matrix 2:  torch.Size([2, 1])
tensor([[1., 2.],
        [6., 8.]])
tensor([[1., 2.],
        [6., 8.]])


## Mean

In [244]:
t = torch.FloatTensor([1,2])
print(t.mean())
print(t.shape)

tensor(1.5000)
torch.Size([2])


In [33]:
# Can't use mean() on integers
t = torch.LongTensor([1, 2])
try:
    print(t.mean())
except Exception as exc:
    print(exc)

Can only calculate the mean of floating types. Got Long instead.


Yo can also use 't.mean' for higher rank tensors to get mean of all elements, or mean by particular dimension.

In [34]:
t = torch.FloatTensor([[1 , 2], [3 , 4]])

In [38]:
print(t.mean)
print(t.mean(dim=0))
print(t.mean(dim=1))
print(t.mean(dim=-1))

<built-in method mean of Tensor object at 0x7fd638096048>
tensor([2., 3.])
tensor([1.5000, 3.5000])
tensor([1.5000, 3.5000])


## Sum

In [42]:
print(t.sum)
print(t.sum(dim=0))
print(t.sum(dim=1))
print(t.sum(dim=-1))

<built-in method sum of Tensor object at 0x7fd638096048>
tensor([4., 6.])
tensor([3., 7.])
tensor([3., 7.])


## Max and Argmax

In [52]:
print(t.max())
print(t.max(dim=0))
print('Max: ', t.max(dim=0)[0])
print('ArgMax: ', t.argmax(dim=0)[1])
print(t.max(dim=1))
print(t.max(dim=-1))

tensor(4.)
torch.return_types.max(
values=tensor([3., 4.]),
indices=tensor([1, 1]))
Max:  tensor([3., 4.])
ArgMax:  tensor(1)
torch.return_types.max(
values=tensor([2., 4.]),
indices=tensor([1, 1]))
torch.return_types.max(
values=tensor([2., 4.]),
indices=tensor([1, 1]))


# Tensor Manipulation_2

## View (Reshape)

In [54]:
t = np.array([[[0, 1, 2],
             [3, 4, 5]],
            
            [[6, 7, 8],
            [9, 10, 11]]])
ft = torch.FloatTensor(t)
print(ft.shape)

torch.Size([2, 2, 3])


In [58]:
print(ft.view([-1, 3]))
print(ft.view([-1, 3]).shape)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])
torch.Size([4, 3])


In [59]:
print(ft.view([-1, 1, 3]))
print(ft.view([-1, 1, 3]).shape)

tensor([[[ 0.,  1.,  2.]],

        [[ 3.,  4.,  5.]],

        [[ 6.,  7.,  8.]],

        [[ 9., 10., 11.]]])
torch.Size([4, 1, 3])


## Squeeze

In [60]:
ft = torch.FloatTensor([[0], [1], [2]])
print(ft)
print(ft.shape)

tensor([[0.],
        [1.],
        [2.]])
torch.Size([3, 1])


In [63]:
print(ft.squeeze())
print(ft.squeeze().shape)

tensor([0., 1., 2.])
torch.Size([3])


## Unsqueeze

In [66]:
ft = torch.Tensor([0, 1, 2])
print(ft.shape)

torch.Size([3])


In [68]:
print(ft.unsqueeze(0))
print(ft.unsqueeze(0).shape)

tensor([[0., 1., 2.]])
torch.Size([1, 3])


In [69]:
print(ft.view(1, -1))
print(ft.view(1, -1).shape)

tensor([[0., 1., 2.]])
torch.Size([1, 3])


In [70]:
print(ft.unsqueeze(1))
print(ft.unsqueeze(1).shape)

tensor([[0.],
        [1.],
        [2.]])
torch.Size([3, 1])


In [71]:
print(ft.unsqueeze(-1))
print(ft.unsqueeze(-1).shape)

tensor([[0.],
        [1.],
        [2.]])
torch.Size([3, 1])


## Type Casting

In [73]:
lt = torch.LongTensor([1, 2, 3, 4])
print(lt)

tensor([1, 2, 3, 4])


In [74]:
print(lt.float())

tensor([1., 2., 3., 4.])


In [75]:
bt = torch.ByteTensor([True, False, False, True])
print(bt)

tensor([1, 0, 0, 1], dtype=torch.uint8)


In [76]:
print(bt.long())
print(bt.float())

tensor([1, 0, 0, 1])
tensor([1., 0., 0., 1.])


## Concatenate

In [77]:
x = torch.FloatTensor([[1, 2], [3, 4]])
y = torch.FloatTensor([[5, 6], [7, 8]])

In [79]:
print(torch.cat([x, y], dim=0))
a=torch.cat([x,y])
print(a.size())
print(torch.cat([x, y], dim=1))
b=torch.cat([x,y], dim=1)
print(b.size())

tensor([[1., 2.],
        [3., 4.],
        [5., 6.],
        [7., 8.]])
torch.Size([4, 2])
tensor([[1., 2., 5., 6.],
        [3., 4., 7., 8.]])
torch.Size([2, 4])


## Stacking

In [82]:
x = torch.FloatTensor([1, 4])
y = torch.FloatTensor([2, 5])
z = torch.FloatTensor([3, 6])

In [83]:
print(torch.stack([x, y, z]))
print(torch.stack([x, y, z], dim=1))

tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [85]:
print(torch.cat([x.unsqueeze(0), y.unsqueeze(0), z.unsqueeze(0)], dim=0))

tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])


## Ones and Zeros

In [86]:
x = torch.FloatTensor([[0, 1, 2], [2, 1, 0]])
print(x)

tensor([[0., 1., 2.],
        [2., 1., 0.]])


In [88]:
print(torch.ones_like(x))
print(torch.zeros_like(x))

tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0., 0., 0.],
        [0., 0., 0.]])


## In-place Operation

In [89]:
x = torch.FloatTensor([[1, 2], [3, 4]])

In [91]:
print(x.mul(2.))
print(x)
print(x.mul_(2.)) # 메모리를  새로 선언하지 않고 기존의 값에 넣음.
print(x)

tensor([[2., 4.],
        [6., 8.]])
tensor([[1., 2.],
        [3., 4.]])
tensor([[2., 4.],
        [6., 8.]])
tensor([[2., 4.],
        [6., 8.]])


# Linear regression

## Hypothesis

In [155]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[2], [4], [6]])

W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = torch.optim.SGD([W, b], lr=0.01)

nb_epochs = 1000

for epoch in arange(1, nb_epochs + 1):
    
    hypothesis = x_train * W + b
    cost = torch.mean((hypothesis - y_train) ** 2)

    optimizer.zero_grad() # gradient 초기화
    cost.backward() # gradient 계산
    optimizer.step() # 계산된 편미분방향의 반대인 step 으로 개선.
    
## for문 range -> arange ???

# Gradient Descent

In [165]:
## non optim library

# # 데이터
# x_train = torch.FloatTensor([[0], [1], [2]])
# y_train = torch.FloatTensor([[0], [1], [2]])
# # 모델 초기화
# W = torch.zeros(1)
# # learning rate 
# lr = 0.1

# nb_epochs = 10
# for epoch in arange(nb_epochs + 1):
    
#     # H(x)
#     hypothesis = x_train * W
#     # Cost
#     cost = torch.mean((hypothesis - y_train)**2)
#     gradient = torch.sum((W * x_train - y_train) * x_train) ##
    
#     print('Epoch {:4d}/{} W: {:.3f}, Cost: {:.6f}'.format(epoch, nb_epochs, W.item(), cost.item()))
    
#     # Cost gradient 로 H(x) 개선
#     W -= lr * gradient

## including optim library


# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
# 모델 초기화
W = torch.zeros(1, requires_grad=True)

## optimizer , learning 도 여기에 포함되어있음.
optimizer = torch.optim.SGD([W], lr=0.15)

nb_epochs = 10
for epoch in arange(nb_epochs + 1):
    
    # H(x)
    hypothesis = x_train * W
    
    # Cost
    cost = torch.mean((hypothesis - y_train)**2)
        
    print('Epoch {:4d}/{} W: {:.3f}, Cost: {:.6f}'.format(epoch, nb_epochs, W.item(), cost.item()))
    
    # cost 로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

Epoch    0/10 W: 0.000, Cost: 4.666667
Epoch    1/10 W: 1.400, Cost: 0.746667
Epoch    2/10 W: 0.840, Cost: 0.119467
Epoch    3/10 W: 1.064, Cost: 0.019115
Epoch    4/10 W: 0.974, Cost: 0.003058
Epoch    5/10 W: 1.010, Cost: 0.000489
Epoch    6/10 W: 0.996, Cost: 0.000078
Epoch    7/10 W: 1.002, Cost: 0.000013
Epoch    8/10 W: 0.999, Cost: 0.000002
Epoch    9/10 W: 1.000, Cost: 0.000000
Epoch   10/10 W: 1.000, Cost: 0.000000


# Multivariate Linear Regression

In [167]:
x_train = torch.FloatTensor([[73, 85, 94],
                            [72, 75, 84],
                            [98, 45, 74],
                            [75, 83, 68],
                            [79, 88, 99],])
y_train = torch.FloatTensor([[123],
                            [143],
                            [153],
                            [126],
                            [112]])

In [168]:
# non using nn.module
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = torch.optim.SGD([W, b], lr=1e-5)

nb_epochs = 20
for epoch in arange(nb_epochs + 1):
    
    # H(x) 
    hypothesis = x_train.matmul(W) + b
    
    # cost
    cost = torch.mean((hypothesis - y_train) ** 2)
    
    # cost -> H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    print('Epoch: {:4d}/{} hypothesis: {} Cost: {}'.format(epoch, nb_epochs, hypothesis.squeeze().detach(), cost.item()))

Epoch:    0/20 hypothesis: tensor([0., 0., 0., 0., 0.]) Cost: 17481.400390625
Epoch:    1/20 hypothesis: tensor([52.3126, 47.9851, 45.4645, 46.6727, 55.2458]) Cost: 7020.45703125
Epoch:    2/20 hypothesis: tensor([84.6223, 77.6370, 73.6554, 75.5157, 89.3734]) Cost: 3020.271484375
Epoch:    3/20 hypothesis: tensor([104.5667,  95.9556,  91.1676,  93.3369, 110.4462]) Cost: 1489.1002197265625
Epoch:    4/20 hypothesis: tensor([116.8673, 107.2683, 102.0778, 104.3445, 123.4490]) Cost: 901.4947509765625
Epoch:    5/20 hypothesis: tensor([124.4427, 114.2501, 108.9062, 111.1403, 131.4630]) Cost: 674.5035400390625
Epoch:    6/20 hypothesis: tensor([129.0974, 118.5547, 113.2109, 115.3324, 136.3933]) Cost: 585.3509521484375
Epoch:    7/20 hypothesis: tensor([131.9465, 121.2043, 115.9549, 117.9150, 139.4174]) Cost: 548.9025268554688
Epoch:    8/20 hypothesis: tensor([133.6798, 122.8310, 117.7336, 119.5029, 141.2633]) Cost: 532.6240234375
Epoch:    9/20 hypothesis: tensor([134.7233, 123.8253, 118.91

In [185]:
import torch.nn as nn
import torch.nn.functional as F

class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)
        
    def forward(self, x):
        return self.linear(x)

# using nn.module
# 데이터
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])
# 모델 초기화
model = MultivariateLinearRegressionModel()
# optimizer 설정
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

nb_epochs = 20
for epoch in arange(nb_epochs+1):
    
    # H(x) 계산
    prediction = model(x_train)
    
    # cost 계산
    cost = F.mse_loss(prediction, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 20번마다 로그 출력
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs, cost.item()
    ))

Epoch    0/20 Cost: 36476.191406
Epoch    1/20 Cost: 11434.528320
Epoch    2/20 Cost: 3585.295654
Epoch    3/20 Cost: 1124.977905
Epoch    4/20 Cost: 353.798187
Epoch    5/20 Cost: 112.073692
Epoch    6/20 Cost: 36.305428
Epoch    7/20 Cost: 12.555538
Epoch    8/20 Cost: 5.110576
Epoch    9/20 Cost: 2.776458
Epoch   10/20 Cost: 2.044274
Epoch   11/20 Cost: 1.814185
Epoch   12/20 Cost: 1.741501
Epoch   13/20 Cost: 1.718168
Epoch   14/20 Cost: 1.710293
Epoch   15/20 Cost: 1.707250
Epoch   16/20 Cost: 1.705762
Epoch   17/20 Cost: 1.704715
Epoch   18/20 Cost: 1.703820
Epoch   19/20 Cost: 1.702994
Epoch   20/20 Cost: 1.702168


## Dataset 

In [176]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self):
        self.x_data = torch.FloatTensor([[73, 85, 94],
                            [72, 75, 84],
                            [98, 45, 74],
                            [75, 83, 68],
                            [79, 88, 99],])
        self.y_data = torch.FloatTensor([[123],
                                        [143],
                                        [153],
                                        [126],
                                        [112]])
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        
        return x,y
    
dataset = CustomDataset()

## DataLoader

In [178]:
from torch.utils.data import DataLoader

dataloader = DataLoader(
    dataset,
    batch_size = 2,
    shuffle = True,
)

In [179]:
nb_epochs = 20
for epoch in arange(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataloader):
        x_train, y_train = samples
        # H(x)
        prediction = model(x_train)
        
        # cost
        cost = F.mse_loss(prediction, y_train)
        
        # cost -> H(x)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(epoch, nb_epochs, batch_idx+1, len(dataloader), cost.item()))

Epoch    0/20 Batch 1/3 Cost: 32555.132812
Epoch    0/20 Batch 2/3 Cost: 34658.738281
Epoch    0/20 Batch 3/3 Cost: 31815.185547
Epoch    1/20 Batch 1/3 Cost: 35347.015625
Epoch    1/20 Batch 2/3 Cost: 32847.347656
Epoch    1/20 Batch 3/3 Cost: 29854.203125
Epoch    2/20 Batch 1/3 Cost: 31126.908203
Epoch    2/20 Batch 2/3 Cost: 34366.523438
Epoch    2/20 Batch 3/3 Cost: 35256.062500
Epoch    3/20 Batch 1/3 Cost: 31126.908203
Epoch    3/20 Batch 2/3 Cost: 32555.132812
Epoch    3/20 Batch 3/3 Cost: 38878.843750
Epoch    4/20 Batch 1/3 Cost: 30146.417969
Epoch    4/20 Batch 2/3 Cost: 37067.453125
Epoch    4/20 Batch 3/3 Cost: 31815.185547
Epoch    5/20 Batch 1/3 Cost: 32847.347656
Epoch    5/20 Batch 2/3 Cost: 35347.015625
Epoch    5/20 Batch 3/3 Cost: 29854.203125
Epoch    6/20 Batch 1/3 Cost: 30146.417969
Epoch    6/20 Batch 2/3 Cost: 33535.625000
Epoch    6/20 Batch 3/3 Cost: 38878.843750
Epoch    7/20 Batch 1/3 Cost: 32555.132812
Epoch    7/20 Batch 2/3 Cost: 35347.015625
Epoch    7/

# Logistic Regression

In [187]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(2020)

<torch._C.Generator at 0x7fd5f62bdf30>

## Training Data

In [188]:
x_data = [[1,2], [2,3], [3,4], [4,5], [5,6], [6,7]]
y_data = [[0], [1], [1], [1], [0], [0]]

In [196]:
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)

In [197]:
print(x_train.shape)
print(y_train.shape)

torch.Size([6, 2])
torch.Size([6, 1])


## Computing the Hypothesis

In [203]:
W = torch.zeros((2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

hypothesis = 1 / (1 + torch.exp(-(x_train.matmul(W) + b)))

In [205]:
print(hypothesis)
print(hypothesis.shape)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<MulBackward0>)
torch.Size([6, 1])


### torch.sigmoid()

In [206]:
hypothesis = torch.sigmoid(x_train.matmul(W) + b)

print(hypothesis)
print(hypothesis.shape)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward>)
torch.Size([6, 1])


## Computing the Cost Function

In [211]:
# sample -(y_train[0] * torch.log(hypothesis[0]) + (1 - y_train[0]) * (torch.log(1 - hypothesis[0]))
losses = -(y_train * torch.log(hypothesis) + 
           (1 - y_train) * torch.log(1 - hypothesis))
print(losses)
##
cost = losses.mean()
print(cost)

tensor([[0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931]], grad_fn=<NegBackward>)
tensor(0.6931, grad_fn=<MeanBackward0>)


In [213]:
# same the above equation
F.binary_cross_entropy(hypothesis, y_train)

tensor(0.6931, grad_fn=<BinaryCrossEntropyBackward>)

## Whole Training Procedure

In [220]:
# 모델 초기화
W = torch.zeros((2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer 설정
optimizer = optim.SGD([W, b], lr=1)

nb_epochs = 1000
for epoch in arange(nb_epochs + 1):
    
    # Cost
    hypothesis = torch.sigmoid(x_train.matmul(W) + b)
    cost = F.binary_cross_entropy(hypothesis, y_train)
    
    # Cost -> H(x)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # log
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} cost: {:.6f}'.format(
        epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 cost: 0.693147
Epoch  100/1000 cost: 1.208955
Epoch  200/1000 cost: 0.900994
Epoch  300/1000 cost: 1.785612
Epoch  400/1000 cost: 3.589777
Epoch  500/1000 cost: 1.367346
Epoch  600/1000 cost: 0.875838
Epoch  700/1000 cost: 0.888636
Epoch  800/1000 cost: 1.976118
Epoch  900/1000 cost: 1.202257
Epoch 1000/1000 cost: 0.894779


## Evaluation

In [221]:
hypothesis = torch.sigmoid(x_train.matmul(W) + b)
print(hypothesis[:5])

tensor([[0.9449],
        [0.7264],
        [0.2913],
        [0.0598],
        [0.0098]], grad_fn=<SliceBackward>)


In [227]:
prediction = hypothesis >= torch.FloatTensor([0.5])
print(prediction[:5])

tensor([[ True],
        [ True],
        [False],
        [False],
        [False]])


In [229]:
display(hypothesis[:5])
display(prediction[:5])
display(y_train[:5])

tensor([[0.9449],
        [0.7264],
        [0.2913],
        [0.0598],
        [0.0098]], grad_fn=<SliceBackward>)

tensor([[ True],
        [ True],
        [False],
        [False],
        [False]])

tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.]])

In [230]:
correct_prediction = prediction.float() == y_train
print(correct_prediction[:5])

tensor([[False],
        [ True],
        [False],
        [False],
        [ True]])


## Higher Implementation with Class

In [285]:
class BinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(2, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        return self.sigmoid(self.linear(x))

In [286]:
model = BinaryClassifier()

In [288]:
# optimizer 
optimizer = optim.SGD(model.parameters(), lr=1)

nb_epochs = 100
for epochs in arange(nb_epochs + 1):
    
    # h(x)
    hypothesis = model(x_train)
    
    # cost
    cost = F.binary_cross_entropy(hypothesis, y_train)
    
    # cost -> H(x)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # each 20 epochs then print
    if epoch % 20 == 0:
        prediction = hypothesis >= torch.FloatTensor([0.5])
        correct_prediction = prediction.float() == y_train
        accuracy = correct_prediction.sum().item() / len(correct_prediction)
        print('Epoch {:4d}/{} Cost: {:.6f} Accuracy {:2.2f}%'.format(
        epoch, nb_epochs, cost.item(), accuracy * 100))    

Epoch 1000/100 Cost: 0.974912 Accuracy 50.00%
Epoch 1000/100 Cost: 4.857275 Accuracy 50.00%
Epoch 1000/100 Cost: 2.046420 Accuracy 50.00%
Epoch 1000/100 Cost: 6.185209 Accuracy 50.00%
Epoch 1000/100 Cost: 0.728534 Accuracy 50.00%
Epoch 1000/100 Cost: 2.653549 Accuracy 50.00%
Epoch 1000/100 Cost: 4.573590 Accuracy 50.00%
Epoch 1000/100 Cost: 4.535166 Accuracy 50.00%
Epoch 1000/100 Cost: 2.344078 Accuracy 50.00%
Epoch 1000/100 Cost: 6.065827 Accuracy 50.00%
Epoch 1000/100 Cost: 0.743901 Accuracy 50.00%
Epoch 1000/100 Cost: 2.961812 Accuracy 50.00%
Epoch 1000/100 Cost: 4.130284 Accuracy 50.00%
Epoch 1000/100 Cost: 4.823746 Accuracy 50.00%
Epoch 1000/100 Cost: 1.915762 Accuracy 50.00%
Epoch 1000/100 Cost: 6.204871 Accuracy 50.00%
Epoch 1000/100 Cost: 0.668050 Accuracy 83.33%
Epoch 1000/100 Cost: 1.326213 Accuracy 50.00%
Epoch 1000/100 Cost: 5.031044 Accuracy 50.00%
Epoch 1000/100 Cost: 4.094687 Accuracy 50.00%
Epoch 1000/100 Cost: 2.720934 Accuracy 50.00%
Epoch 1000/100 Cost: 5.796669 Accu

# Softmax Classification

In [290]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn

In [292]:
torch.manual_seed(2020)

<torch._C.Generator at 0x7fd5f62bdf30>

## Discrete Probability Distribution

연속형 / 이산형 확률변수에 대한 개요

## Softmax

max / softmax 에 대한 개요

## Cross Entropy

In [297]:
z

tensor([[0.4869, 0.1052, 0.5883, 0.1161, 0.4949],
        [0.2824, 0.5899, 0.8105, 0.2512, 0.6307],
        [0.5403, 0.8033, 0.7781, 0.4966, 0.8888]], requires_grad=True)

In [293]:
z = torch.rand(3, 5, requires_grad=True)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.2228, 0.1521, 0.2466, 0.1538, 0.2246],
        [0.1552, 0.2111, 0.2632, 0.1505, 0.2199],
        [0.1683, 0.2189, 0.2134, 0.1611, 0.2384]], grad_fn=<SoftmaxBackward>)


In [296]:
y = torch.randint(5, (3,)).long()
print(y)

tensor([4, 2, 0])


In [298]:
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0.]])

In [302]:
print(y_one_hot.shape)
torch.log(hypothesis).shape

torch.Size([3, 5])


torch.Size([3, 5])

In [304]:
display(y_one_hot * -torch.log(hypothesis))
display((y_one_hot * -torch.log(hypothesis)).sum(dim=0))
display((y_one_hot * -torch.log(hypothesis)).sum(dim=1))
display((y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean())

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 1.4934],
        [0.0000, 0.0000, 1.3347, 0.0000, 0.0000],
        [1.7823, 0.0000, 0.0000, 0.0000, 0.0000]], grad_fn=<MulBackward0>)

tensor([1.7823, 0.0000, 1.3347, 0.0000, 1.4934], grad_fn=<SumBackward1>)

tensor([1.4934, 1.3347, 1.7823], grad_fn=<SumBackward1>)

tensor(1.5368, grad_fn=<MeanBackward0>)

In [299]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.5368, grad_fn=<MeanBackward0>)


In [307]:
# low level
torch.log(F.softmax(z, dim=1))

# high level
F.log_softmax(z, dim=1)

tensor([[-1.5014, -1.8830, -1.3999, -1.8721, -1.4934],
        [-1.8628, -1.5553, -1.3347, -1.8940, -1.5145],
        [-1.7823, -1.5193, -1.5444, -1.8260, -1.4338]],
       grad_fn=<LogSoftmaxBackward>)

In [311]:
# low level
(y_one_hot * -torch.log(F.softmax(z, dim=1))).sum(dim=1).mean()

# high level
F.nll_loss(F.log_softmax(z, dim=1), y)

# nll -> Negative Log Likelihood

tensor(1.5368, grad_fn=<NllLossBackward>)

In [312]:
F.cross_entropy(z,y)

tensor(1.5368, grad_fn=<NllLossBackward>)

### Training with Low-level Cross Entropy Loss

In [339]:
import numpy as np
np.random.seed(0)
x_train = (np.random.rand(8,4)*10).round()
y_train = (np.random.rand(1,8)*10).round()
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [341]:
# display(x_train)
# display(y_train)
# print(x_train.shape)
# print(y_train.shape)

tensor([[ 5.,  7.,  6.,  5.],
        [ 4.,  6.,  4.,  9.],
        [10.,  4.,  8.,  5.],
        [ 6.,  9.,  1.,  1.],
        [ 0.,  8.,  8.,  9.],
        [10.,  8.,  5.,  8.],
        [ 1.,  6.,  1.,  9.],
        [ 5.,  4.,  3.,  8.]])

tensor([[5, 6, 0, 6, 6, 6, 9, 7]])

torch.Size([8, 4])
torch.Size([1, 8])


In [379]:
# 모델 초기화
W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer
optimizer = optim.SGD([W, b], lr=0.1)

nb_epochs=1000
for epochs in arange(nb_epochs + 1):
    
    hypothesis = F.softmax(x_train.matmul(W) + b, dim=1)
    y_one_hot = torch.zeros_like(hypothesis)
    y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim=1))).sum(dim=1)
    
    # cost -> H(x)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

RuntimeError: index 5 is out of bounds for dimension 1 with size 3

In [348]:
xx_train == x_train
xx_train.shape == x_train.shape
y_train == yy_train
y_train.shape == yy_train.shape

True

In [350]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

# 모델 초기화
W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer 설정
optimizer = optim.SGD([W, b], lr=0.1)

nb_epochs = 1000
for epoch in arange(nb_epochs + 1):

    # Cost 계산 (1)
    hypothesis = F.softmax(x_train.matmul(W) + b, dim=1) # or .mm or @
    y_one_hot = torch.zeros_like(hypothesis)
    y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim=1))).sum(dim=1).mean()

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 1.098612
Epoch  100/1000 Cost: 0.901535
Epoch  200/1000 Cost: 0.839114
Epoch  300/1000 Cost: 0.807826
Epoch  400/1000 Cost: 0.788472
Epoch  500/1000 Cost: 0.774822
Epoch  600/1000 Cost: 0.764449
Epoch  700/1000 Cost: 0.756191
Epoch  800/1000 Cost: 0.749398
Epoch  900/1000 Cost: 0.743671
Epoch 1000/1000 Cost: 0.738749


### Training with F.corss_entropy

In [383]:
# 모델 초기화
W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer 설정
optimizer = optim.SGD([W, b], lr=0.1)

nb_epochs = 1000
for epoch in arange(nb_epochs + 1):
    
    # Cost 계산
    z = x_train.matmul(W) + b
    cost = F.cross_entropy(z, y_train)
    
    # cost -> H(x)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 1.098612
Epoch  100/1000 Cost: 0.761050
Epoch  200/1000 Cost: 0.689991
Epoch  300/1000 Cost: 0.643229
Epoch  400/1000 Cost: 0.604117
Epoch  500/1000 Cost: 0.568256
Epoch  600/1000 Cost: 0.533922
Epoch  700/1000 Cost: 0.500291
Epoch  800/1000 Cost: 0.466908
Epoch  900/1000 Cost: 0.433507
Epoch 1000/1000 Cost: 0.399962


## High-level Implementation with nn.Module

In [386]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4, 3) # 4개의 값을 받아서 3개의 값을 내놓음.
        
    def forward(self, x):
        return self.linear(x)

In [387]:
model = SoftmaxClassifierModel()

In [391]:
optimizer = optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 1000
for epoch in arange(nb_epochs + 1):
    
    # H(x)
    prediction = model(x_train)
    
    # cost
    cost = F.cross_entropy(prediction, y_train)
    
    # cost -> H(x)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} cost: {:6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 cost: 1.106180
Epoch  100/1000 cost: 0.720618
Epoch  200/1000 cost: 0.637684
Epoch  300/1000 cost: 0.579574
Epoch  400/1000 cost: 0.528705
Epoch  500/1000 cost: 0.480843
Epoch  600/1000 cost: 0.434375
Epoch  700/1000 cost: 0.388519
Epoch  800/1000 cost: 0.342926
Epoch  900/1000 cost: 0.297923
Epoch 1000/1000 cost: 0.257454


# Tips

## MLE

## Overfitting

- More Data


- Less features


- Regularization

### Regularization

- Early Stopping


- Reducing Network Size


- Weight Decay


- Dropout


- Batch Normalization

## Basic Approach to Train DNN

1. Make a neural network architecture.


2. Train and check that model is over-fitted.

    a) If it is not, increase the model size (deeper and wider)
    
    b) If it is, add regularization, such as drop-out, batch_normalization
    

3. Repeat from step-2

# practice

In [393]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [394]:
torch.manual_seed(2020)

<torch._C.Generator at 0x7fd5f62bdf30>

## Training and Test Dataset

In [414]:
np.random.seed(2020)
x_train = torch.FloatTensor((np.random.rand(8, 3)*10).round().astype(int))
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

In [416]:
x_test = torch.FloatTensor([[2, 1, 1], [3, 1, 2], [3, 3, 4]])
y_test = torch.LongTensor([2, 2, 2])

## Model

In [419]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 3)
    def forward(self, x):
        return self.linear(x)

In [420]:
model = SoftmaxClassifierModel()

In [421]:
# optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1)

## Training

In [437]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in arange(nb_epochs):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.cross_entropy(prediction, y_train)

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

## Test ( Validation )

In [425]:
def test(model, optimizer, x_test, y_test):
    prediction = model(x_test)
    predicted_classes = prediction.max(1)[1]
    correct_count = (predicted_classes == y_test).sum().item()
    cost = F.cross_entropy(prediction, y_test)
    
    print('Accuracy: {}% Cost: {:.6f}'.format(
        correct_count / len(y_test) * 100, cost.item()
    ))

## Run

In [438]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 809201.125000
Epoch    1/20 Cost: 948511.625000
Epoch    2/20 Cost: 148469.781250
Epoch    3/20 Cost: 138872.312500
Epoch    4/20 Cost: 26594.777344
Epoch    5/20 Cost: 221221.562500
Epoch    6/20 Cost: 720689.562500
Epoch    7/20 Cost: 266843.875000
Epoch    8/20 Cost: 189094.781250
Epoch    9/20 Cost: 52934.812500
Epoch   10/20 Cost: 67219.781250
Epoch   11/20 Cost: 69968.882812
Epoch   12/20 Cost: 414682.500000
Epoch   13/20 Cost: 953195.625000
Epoch   14/20 Cost: 1211724.625000
Epoch   15/20 Cost: 166786.656250
Epoch   16/20 Cost: 59407.296875
Epoch   17/20 Cost: 381466.375000
Epoch   18/20 Cost: 845689.625000
Epoch   19/20 Cost: 266786.656250


In [429]:
test(model, optimizer, x_test, y_test)

Accuracy: 66.66666666666666% Cost: 0.759458


## Learning Rate

In [440]:
model = SoftmaxClassifierModel()

### learning rate 가 너무 클 때

In [450]:
model = SoftmaxClassifierModel()

In [451]:
optimizer = optim.SGD(model.parameters(), lr=1e5)

In [452]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 1.896620
Epoch    1/20 Cost: 1272039.750000
Epoch    2/20 Cost: 2769402.500000
Epoch    3/20 Cost: 1324117.625000
Epoch    4/20 Cost: 732977.250000
Epoch    5/20 Cost: 2317840.000000
Epoch    6/20 Cost: 1282894.500000
Epoch    7/20 Cost: 684057.062500
Epoch    8/20 Cost: 583175.062500
Epoch    9/20 Cost: 710953.000000
Epoch   10/20 Cost: 857302.250000
Epoch   11/20 Cost: 316649.500000
Epoch   12/20 Cost: 134790.375000
Epoch   13/20 Cost: 133231.875000
Epoch   14/20 Cost: 462182.125000
Epoch   15/20 Cost: 742839.812500
Epoch   16/20 Cost: 1520328.125000
Epoch   17/20 Cost: 559456.937500
Epoch   18/20 Cost: 240410.515625
Epoch   19/20 Cost: 346421.500000


### learning rate 가 너무 작을 때

In [453]:
model = SoftmaxClassifierModel()

In [454]:
optimizer = optim.SGD(model.parameters(), lr=1e-10)

In [455]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 3.360192
Epoch    1/20 Cost: 3.360192
Epoch    2/20 Cost: 3.360192
Epoch    3/20 Cost: 3.360192
Epoch    4/20 Cost: 3.360192
Epoch    5/20 Cost: 3.360192
Epoch    6/20 Cost: 3.360192
Epoch    7/20 Cost: 3.360192
Epoch    8/20 Cost: 3.360192
Epoch    9/20 Cost: 3.360192
Epoch   10/20 Cost: 3.360192
Epoch   11/20 Cost: 3.360192
Epoch   12/20 Cost: 3.360192
Epoch   13/20 Cost: 3.360192
Epoch   14/20 Cost: 3.360192
Epoch   15/20 Cost: 3.360192
Epoch   16/20 Cost: 3.360192
Epoch   17/20 Cost: 3.360192
Epoch   18/20 Cost: 3.360192
Epoch   19/20 Cost: 3.360192


### learning rate가 적절할 때

In [460]:
model = SoftmaxClassifierModel()

In [461]:
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [462]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 2.182966
Epoch    1/20 Cost: 1.544655
Epoch    2/20 Cost: 0.991795
Epoch    3/20 Cost: 0.931347
Epoch    4/20 Cost: 1.021245
Epoch    5/20 Cost: 0.909969
Epoch    6/20 Cost: 0.793979
Epoch    7/20 Cost: 0.805483
Epoch    8/20 Cost: 0.703795
Epoch    9/20 Cost: 0.697935
Epoch   10/20 Cost: 0.633792
Epoch   11/20 Cost: 0.610649
Epoch   12/20 Cost: 0.569784
Epoch   13/20 Cost: 0.540126
Epoch   14/20 Cost: 0.514346
Epoch   15/20 Cost: 0.488174
Epoch   16/20 Cost: 0.471633
Epoch   17/20 Cost: 0.452177
Epoch   18/20 Cost: 0.440520
Epoch   19/20 Cost: 0.426650


## Data Preprocessing

In [463]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

### Standardization

In [464]:
mu = x_train.mean(dim=0)

In [467]:
sigma = x_train.std(dim=0)

In [468]:
norm_x_train = (x_train - mu) / sigma

In [483]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

In [488]:
model = MultivariateLinearRegressionModel()

In [490]:
optimizer = optim.SGD(model.parameters(), lr=1e-1)

## Training with Preprocessed Data

In [491]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in arange(nb_epochs):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.mse_loss(prediction, y_train)

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [492]:
train(model, optimizer, norm_x_train, y_train)

Epoch    0/20 Cost: 29579.099609
Epoch    1/20 Cost: 18780.074219
Epoch    2/20 Cost: 11975.608398
Epoch    3/20 Cost: 7651.863281
Epoch    4/20 Cost: 4893.715820
Epoch    5/20 Cost: 3131.131104
Epoch    6/20 Cost: 2003.836670
Epoch    7/20 Cost: 1282.585205
Epoch    8/20 Cost: 821.041992
Epoch    9/20 Cost: 525.666687
Epoch   10/20 Cost: 336.625061
Epoch   11/20 Cost: 215.634201
Epoch   12/20 Cost: 138.194412
Epoch   13/20 Cost: 88.627426
Epoch   14/20 Cost: 56.899132
Epoch   15/20 Cost: 36.587547
Epoch   16/20 Cost: 23.583166
Epoch   17/20 Cost: 15.255445
Epoch   18/20 Cost: 9.921062
Epoch   19/20 Cost: 6.502402
