In [None]:
import numpy as np

# 시그모이드 함수와 그 미분
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(y):
    return y * (1 - y)

# Binary Cross Entropy Loss
def binary_cross_entropy(y_true, y_pred):
    # epsilon = 1e-7  # log(0) 방지
    # y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# XOR 입력과 출력
x_data = np.array([[0, 0],
                   [0, 1],
                   [1, 0],
                   [1, 1]], dtype=np.float32)

y_data = np.array([[0],
                   [1],
                   [1],
                   [0]], dtype=np.float32)

# 하이퍼파라미터
input_size = 2
hidden_size = 2
output_size = 1
learning_rate = 0.1
epochs = 10000

# 가중치 초기화
np.random.seed(0)
W1 = np.random.uniform(-1, 1, (input_size, hidden_size))
b1 = np.zeros((1, hidden_size))

W2 = np.random.uniform(-1, 1, (hidden_size, output_size))
b2 = np.zeros((1, output_size))

# 학습
for epoch in range(epochs):
    # 순전파
    z1 = np.dot(x_data, W1) + b1
    a1 = sigmoid(z1)

    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    # 손실 계산 (Binary Cross Entropy)
    loss = binary_cross_entropy(y_data, a2)

    # 역전파
    d_a2 = a2 - y_data                               # BCE + sigmoid 조합의 도함수
    d_a1 = np.matmul(d_a2, W2.T) * sigmoid_derivative(a1)

    # 가중치 업데이트
    W2 -= np.matmul(a1.T, d_a2) * learning_rate
    # b2 -= np.sum(d_a2, axis=0, keepdims=True) * learning_rate
    b2 -= np.sum(d_a2, axis=0) * learning_rate
    
    W1 -= x_data.T.dot(d_a1) * learning_rate
    b1 -= np.sum(d_a1, axis=0) * learning_rate
    # b1 -= np.sum(d_a1, axis=0, keepdims=True) * learning_rate
    
    # 출력
    if epoch % 1000 == 0:
        print(f"Epoch {epoch:5d} BCE Loss: {loss:.6f}")

# 예측 결과
print("\n최종 예측 결과:")
output = sigmoid(np.dot(sigmoid(np.dot(x_data, W1) + b1), W2) + b2)
print(np.round(output, 4))


Epoch     0 BCE Loss: 0.693982
Epoch  1000 BCE Loss: 0.691454
Epoch  2000 BCE Loss: 0.162601
Epoch  3000 BCE Loss: 0.023738
Epoch  4000 BCE Loss: 0.012139
Epoch  5000 BCE Loss: 0.008087
Epoch  6000 BCE Loss: 0.006045
Epoch  7000 BCE Loss: 0.004818
Epoch  8000 BCE Loss: 0.004002
Epoch  9000 BCE Loss: 0.003420

최종 예측 결과:
[[0.0038]
 [0.9973]
 [0.9973]
 [0.0028]]


In [2]:
####################################################################
# XOR 문제를 해결하기 위한 2층 신경망 구현
####################################################################
import numpy as np

# 시그모이드 함수와 그 미분
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(y):
    return y * (1 - y)

# 하이퍼파라미터
learning_rate = 0.1
np.random.seed(0)

# 데이터  (XOR 문제)
x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
y_data = np.array([[0], [1], [1], [0]], dtype=np.float32)
x_data.shape, y_data.shape

((4, 2), (4, 1))

In [3]:
# 가중치와 바이어스 초기화
W1 = np.random.randn(2, 3)
b1 = np.random.randn(3)
W2 = np.random.randn(3, 1)
b2 = np.random.randn(1)
W1.shape, b1.shape, W2.shape, b2.shape

((2, 3), (3,), (3, 1), (1,))

In [4]:
# Forward propagation
layer1 = sigmoid(np.dot(x_data, W1) + b1)      # shape: (N, 3)
hypothesis = sigmoid(np.dot(layer1, W2) + b2)  # shape: (N, 1)
layer1.shape, hypothesis.shape

((4, 3), (4, 1))

In [5]:
# 비용 함수 계산
cost = -np.mean(y_data * np.log(hypothesis) + (1 - y_data) * np.log(1 - hypothesis))
cost

1.0891365337429304

In [6]:
# Backward propagation
# 출력층 에러
error = hypothesis - y_data
d_hypothesis = error * sigmoid_derivative(hypothesis)
error.shape, d_hypothesis.shape

((4, 1), (4, 1))

In [7]:
# 은닉층 에러
layer1_error = np.dot(d_hypothesis, W2.T)
d_layer1 = layer1_error * sigmoid_derivative(layer1)

# 가중치와 바이어스 업데이트
W2 -= learning_rate * np.dot(layer1.T, d_hypothesis)   # shape: (3, 1)
b2 -= learning_rate * np.sum(d_hypothesis, axis=0)     # shape: (1,)
W1 -= learning_rate * np.dot(x_data.T, d_layer1)       # shape: (2, 3)
b1 -= learning_rate * np.sum(d_layer1, axis=0)         # shape: (3,)

In [None]:

# 훈련
for step in range(10001):
    # Forward propagation
    layer1 = sigmoid(np.dot(x_data, W1) + b1)
    hypothesis = sigmoid(np.dot(layer1, W2) + b2)
    
    # 비용 함수 계산
    cost = -np.mean(y_data * np.log(hypothesis) + (1 - y_data) * np.log(1 - hypothesis))
    
    # Backward propagation
    # 출력층 에러
    error = hypothesis - y_data
    d_hypothesis = error * sigmoid_derivative(hypothesis)
    
    # 은닉층 에러
    layer1_error = np.dot(d_hypothesis, W2.T)
    d_layer1 = layer1_error * sigmoid_derivative(layer1)
    
    # 가중치와 바이어스 업데이트
    W2 -= learning_rate * np.dot(layer1.T, d_hypothesis)
    b2 -= learning_rate * np.sum(d_hypothesis, axis=0)
    W1 -= learning_rate * np.dot(x_data.T, d_layer1)
    b1 -= learning_rate * np.sum(d_layer1, axis=0)
    
    # 출력
    if step % 100 == 0:
        print(step, cost, W2.flatten())

# 최종 결과
layer1 = sigmoid(np.dot(x_data, W1) + b1)
hypothesis = sigmoid(np.dot(layer1, W2) + b2)
predicted = (hypothesis > 0.5).astype(np.float32)
accuracy = np.mean(predicted == y_data)

print("\nHypothesis:\n", hypothesis)
print("\nCorrect:\n", predicted)
print("\nAccuracy:\n", accuracy)

In [13]:
####################################################################
# XOR 문제를 해결하기 위한 2층 신경망 구현
####################################################################
import numpy as np

# 시그모이드 함수와 그 미분
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# 하이퍼파라미터
learning_rate = 0.1
np.random.seed(0)

# 데이터  (XOR 문제)
x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
y_data = np.array([[0], [1], [1], [0]], dtype=np.float32)

# 가중치와 바이어스 초기화
W1 = np.random.normal(size=(2, 3))
b1 = np.random.normal(size=(3))
W2 = np.random.normal(size=(3, 1))
b2 = np.random.normal(size=(1))

# 훈련
for step in range(10001):
    # Forward propagation
    layer1 = sigmoid(np.dot(x_data, W1) + b1)
    hypothesis = sigmoid(np.dot(layer1, W2) + b2)
    
    # 비용 함수 계산
    cost = -np.mean(y_data * np.log(hypothesis) + (1 - y_data) * np.log(1 - hypothesis))
    
    # Backward propagation
    # 출력층 에러
    error = hypothesis - y_data
    d_hypothesis = error * sigmoid_derivative(hypothesis)
    
    # 은닉층 에러
    layer1_error = np.dot(d_hypothesis, W2.T)
    d_layer1 = layer1_error * sigmoid_derivative(layer1)
    
    # 가중치와 바이어스 업데이트
    W2 -= learning_rate * np.dot(layer1.T, d_hypothesis)
    b2 -= learning_rate * np.sum(d_hypothesis, axis=0)
    W1 -= learning_rate * np.dot(x_data.T, d_layer1)
    b1 -= learning_rate * np.sum(d_layer1, axis=0)
    
    # 출력
    if step % 100 == 0:
        print(step, cost, W2.flatten())

# 최종 결과
layer1 = sigmoid(np.dot(x_data, W1) + b1)
hypothesis = sigmoid(np.dot(layer1, W2) + b2)
predicted = (hypothesis > 0.5).astype(np.float32)
accuracy = np.mean(predicted == y_data)

print("\nHypothesis:\n", hypothesis)
print("\nCorrect:\n", predicted)
print("\nAccuracy:\n", accuracy)

0 1.0891365337429304 [0.39673688 0.13324823 1.44606571]
100 0.6956167131383346 [-0.10405314 -0.29137544  1.07228891]
200 0.6893437054152814 [ 0.03460206 -0.23916921  1.03362018]
300 0.6836980139597717 [ 0.16995484 -0.20014552  1.02013146]
400 0.6780877072117059 [ 0.30370312 -0.17310792  1.02786104]
500 0.6720557667515878 [ 0.43897025 -0.15612677  1.05477467]
600 0.6652123894670156 [ 0.57838492 -0.14797802  1.09939233]
700 0.6572032901088167 [ 0.72419768 -0.14793976  1.16046963]
800 0.6477236074435835 [ 0.87816361 -0.15561573  1.23673108]
900 0.6365776190263086 [ 1.04116058 -0.17075303  1.32663466]
1000 0.623769168667645 [ 1.21261129 -0.19304605  1.42818074]
1100 0.6095653943128686 [ 1.39003359 -0.2219646   1.53881403]
1200 0.5944564896460977 [ 1.56925721 -0.25667278  1.65549376]
1300 0.579011322998094 [ 1.74551893 -0.29607494  1.77497509]
1400 0.5637347044212916 [ 1.91483376 -0.33896465  1.89422239]
1500 0.5490061995866804 [ 2.07476739 -0.38422307  2.01076632]
1600 0.5350795229576236 [

In [65]:
import sympy as sp 

a, b, x = sp.symbols('a b x')
sigmoid = 1 / (1 + sp.exp(-(a * x + b)))

sp.Derivative(sigmoid, x).doit()  # 시그모이드 함수의 미분

a*exp(-a*x - b)/(exp(-a*x - b) + 1)**2

In [None]:
import numpy as np

# 시그모이드 함수와 그 미분
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(y):
    return y * (1 - y)

# 하이퍼파라미터
learning_rate = 0.1
np.random.seed(0)

# 데이터
x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
y_data = np.array([[0], [1], [1], [0]], dtype=np.float32)

# 가중치와 바이어스 초기화
# 입력층 -> 첫 번째 히든 레이어 (2 -> 4)
W1 = np.random.normal(size=(2, 4))
b1 = np.random.normal(size=(4))
# 첫 번째 히든 레이어 -> 두 번째 히든 레이어 (4 -> 2)
W2 = np.random.normal(size=(4, 2))
b2 = np.random.normal(size=(2))
# 두 번째 히든 레이어 -> 출력층 (2 -> 1)
W3 = np.random.normal(size=(2, 1))
b3 = np.random.normal(size=(1))

# 훈련
for step in range(10001):
    # Forward propagation
    layer1 = sigmoid(np.dot(x_data, W1) + b1)  # 첫 번째 히든 레이어
    layer2 = sigmoid(np.dot(layer1, W2) + b2)  # 두 번째 히든 레이어
    hypothesis = sigmoid(np.dot(layer2, W3) + b3)  # 출력층
    
    # 비용 함수 계산 (Binary Cross-Entropy)
    cost = -np.mean(y_data * np.log(hypothesis) + (1 - y_data) * np.log(1 - hypothesis))
    
    # Backward propagation
    # 출력층 에러
    error = hypothesis - y_data
    d_hypothesis = error * sigmoid_derivative(hypothesis)
    
    # 두 번째 히든 레이어 에러
    layer2_error = np.dot(d_hypothesis, W3.T)
    d_layer2 = layer2_error * sigmoid_derivative(layer2)
    
    # 첫 번째 히든 레이어 에러
    layer1_error = np.dot(d_layer2, W2.T)
    d_layer1 = layer1_error * sigmoid_derivative(layer1)
    
    # 가중치와 바이어스 업데이트
    W3 -= learning_rate * np.dot(layer2.T, d_hypothesis)
    b3 -= learning_rate * np.sum(d_hypothesis, axis=0)
    W2 -= learning_rate * np.dot(layer1.T, d_layer2)
    b2 -= learning_rate * np.sum(d_layer2, axis=0)
    W1 -= learning_rate * np.dot(x_data.T, d_layer1)
    b1 -= learning_rate * np.sum(d_layer1, axis=0)
    
    # 출력
    if step % 100 == 0:
        print(step, cost, W3.flatten())

# 최종 결과
layer1 = sigmoid(np.dot(x_data, W1) + b1)
layer2 = sigmoid(np.dot(layer1, W2) + b2)
hypothesis = sigmoid(np.dot(layer2, W3) + b3)
predicted = (hypothesis > 0.5).astype(np.float32)
accuracy = np.mean(predicted == y_data)

print("\nHypothesis:\n", hypothesis)
print("\nCorrect:\n", predicted)
print("\nAccuracy:\n", accuracy)

ValueError: shapes (4,1) and (4,2) not aligned: 1 (dim 1) != 4 (dim 0)