In [1]:
import torch
import torch.nn as nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)
print(device)

cuda


# Single-Layer Perceptron

In [2]:
X = torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y = torch.FloatTensor([[0],[1],[1],[0]]).to(device)

In [3]:
linear = nn.Linear(2, 1, bias=True)
sigmoid = nn.Sigmoid()
model = nn.Sequential(linear, sigmoid).to(device)

In [4]:
criterion = torch.nn.BCELoss().to(device).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 1000 == 0:
        print(step, cost.item())

0 0.7273973822593689
1000 0.6931471824645996
2000 0.6931471824645996
3000 0.6931471824645996
4000 0.6931471824645996
5000 0.6931471824645996
6000 0.6931471824645996
7000 0.6931471824645996
8000 0.6931471824645996
9000 0.6931471824645996
10000 0.6931471824645996


In [5]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\nHypothesis:\n', hypothesis.detach().cpu().numpy())
    print('Predicted:\n', predicted.detach().cpu().numpy())
    print('Y:\n', Y.detach().cpu().numpy())
    print('Accuracy: ', accuracy.item())


Hypothesis:
 [[0.5]
 [0.5]
 [0.5]
 [0.5]]
Predicted:
 [[0.]
 [0.]
 [0.]
 [0.]]
Y:
 [[0.]
 [1.]
 [1.]
 [0.]]
Accuracy:  0.5


**It doesn't work!**

# Multi-Layer Perceptron

In [6]:
%reset -f

In [7]:
import torch
import torch.nn as nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)
print(device)

cuda


In [10]:
X = torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y = torch.FloatTensor([[0],[1],[1],[0]]).to(device)

model = nn.Sequential(
    nn.Linear(2, 10, bias=True),
    nn.Sigmoid(),
    nn.Linear(10, 10, bias=True),
    nn.Sigmoid(),
    nn.Linear(10, 10, bias=True),
    nn.Sigmoid(),
    nn.Linear(10, 1, bias=True),
    nn.Sigmoid()
).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

for epoch in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if epoch % 1000 == 0:
        print(epoch, cost.item())

0 0.7676107287406921
1000 0.6931401491165161
2000 0.6930943131446838
3000 0.6928845643997192
4000 0.5360997915267944
5000 0.001193253556266427
6000 0.0005051061161793768
7000 0.00031278873211704195
8000 0.00022425605857279152
9000 0.00017382728401571512
10000 0.00014145647583063692


In [11]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\nHypothesis:\n', hypothesis.detach().cpu().numpy())
    print('Predicted:\n', predicted.detach().cpu().numpy())
    print('Y:\n', Y.detach().cpu().numpy())
    print('Accuracy: ', accuracy.item())


Hypothesis:
 [[1.03575905e-04]
 [9.99860406e-01]
 [9.99844909e-01]
 [1.67353428e-04]]
Predicted:
 [[0.]
 [1.]
 [1.]
 [0.]]
Y:
 [[0.]
 [1.]
 [1.]
 [0.]]
Accuracy:  1.0


# 호기심천국 >> 기울기 소실 예시
#### 중간에 Sigmoid 끼워넣기 vs ReLU 끼워넣기

In [15]:
Sigmoid_Model = nn.Sequential(
    nn.Linear(2, 10, bias=True),
    nn.Sigmoid(),
    nn.Linear(10, 10, bias=True),
    nn.Sigmoid(),
    nn.Linear(10, 10, bias=True),
    nn.Sigmoid(),
    nn.Linear(10, 1, bias=True),
    nn.Sigmoid()
).to(device)

ReLU_Model = nn.Sequential(
    nn.Linear(2, 10, bias=True),
    nn.ReLU(),
    nn.Linear(10, 10, bias=True),
    nn.ReLU(),
    nn.Linear(10, 10, bias=True),
    nn.ReLU(),
    nn.Linear(10, 1, bias=True),
    nn.Sigmoid()
).to(device)

sig_criterion = nn.BCELoss().to(device)
sig_optimizer = torch.optim.SGD(Sigmoid_Model.parameters(), lr=1)

relu_criterion = nn.BCELoss().to(device)
relu_optimizer = torch.optim.SGD(ReLU_Model.parameters(), lr=1)

print("epoch", "sig", "relu")
for epoch in range(3001):
    sig_optimizer.zero_grad()
    sig_hypothesis = Sigmoid_Model(X)
    relu_optimizer.zero_grad()
    relu_hypothesis = ReLU_Model(X)

    sig_cost = sig_criterion(sig_hypothesis, Y)
    sig_cost.backward()
    relu_cost = relu_criterion(relu_hypothesis, Y)
    relu_cost.backward()

    sig_optimizer.step()
    relu_optimizer.step()
    if epoch % 200 == 0:
        print(f"epoch: {epoch}, ", "%.8f %.8f"%(sig_cost.item(), relu_cost.item()))

epoch sig relu
epoch: 0,  0.69503367 0.69540036
epoch: 200,  0.69313389 0.00099309
epoch: 400,  0.69312912 0.00020995
epoch: 600,  0.69312394 0.00010528
epoch: 800,  0.69311810 0.00006761
epoch: 1000,  0.69311136 0.00004886
epoch: 1200,  0.69310355 0.00003784
epoch: 1400,  0.69309413 0.00003062
epoch: 1600,  0.69308269 0.00002557
epoch: 1800,  0.69306827 0.00002188
epoch: 2000,  0.69304979 0.00001907
epoch: 2200,  0.69302541 0.00001682
epoch: 2400,  0.69299161 0.00001508
epoch: 2600,  0.69294310 0.00001359
epoch: 2800,  0.69286847 0.00001237
epoch: 3000,  0.69274366 0.00001135


# 호기심 천국 > 엄청 간단한 모델

In [18]:
X = torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y = torch.FloatTensor([[0],[1],[1],[0]]).to(device)

model = nn.Sequential(
    nn.Linear(2, 2),
    nn.LeakyReLU(),
    nn.Linear(2, 1),
    nn.Sigmoid()
).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

for epoch in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if epoch % 1000 == 0:
        print(epoch, cost.item())

0 0.7162560224533081
1000 0.48951148986816406
2000 0.49005889892578125
3000 0.4885522425174713
4000 0.48454415798187256
5000 0.44300296902656555
6000 0.2459835261106491
7000 0.0851263478398323
8000 0.03932800143957138
9000 0.02341928519308567
10000 0.016069140285253525


단순한 모델은 복잡한 모델에 비해 학습 능력이 낮다.