In [49]:
#Tính hàm mất mát (loss) và đạo hàm (grad) của nó bằng cách naive
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(2)
# sample data small
N,C,d = 10,3, 5 # số điểm dữ liệu, 3 class, dimension =5
reg =0.1 # regularization
W = np.random.randn(d,C)
X = np.random.randn(d,N)
y = np.random.randint(C, size =N)
print(y)

# naive way to calculate loss and grad
def svm_loss_naive(W,X,y,reg):
    d,C = W.shape # W(d,C)
    _,N = X.shape # X (N)
    # naive loss and grad
    loss =0 # loss = 1/N *sum(max(margin)) + reg/2* sum(w*w)
    dw = np.zeros_like(W) # đạo hàm : nếu margin<0 =>đạo hàm =0;còn nếu >0 => đạo hàm sẽ = -Xn nếu đạo hàm theo Wyn, còn đạo hàm sẽ bằng Xn nếu đạo hàm theo Wj
    for n in range(N):
        xn = X[:,n] # 1 điểm dữ liệu
        score = W.T.dot(xn) # zn
        for j in range(C):
            if( j == y[n]):
                continue
            margin = 1 - score[y[n]] + score[j] # margin = 1 - (Wyn).T* xn + (Wj).T*xn
            if margin >0:
                loss += margin
                dw[:,y[n]] -= xn
                dw[:,j]    += xn
    # loss function
    loss /=N
    loss += 0.5*reg*np.sum(W*W) # + regularization
    # grad
    dw /=N
    dw += reg*W
    return loss, dw
print('loss if without regularization:', svm_loss_naive(W,X,y,0)[0])
print('loss if with regularization: ', svm_loss_naive(W,X,y,0.1)[0])

# check xem grad có đúng không?
f = lambda W : svm_loss_naive(W,X,y,0.1)[0]
print(f)

def numerical_grad(W,f):
    eps = 1e-6
    g = np.zeros_like(W)
    # flatening variable -> 1d. Then we need 
    # only one for loop
    W_flattened = W.flatten() # làm phẳng ma trận thành 1 vector
    g_flattened = np.zeros_like(W_flattened)
    for i in range(W.size):
        W_p = W_flattened.copy()
        W_n = W_flattened.copy()
        W_p[i]  += eps
        W_n[i]  -=eps
        # back to shape of W 
        W_p = W_p.reshape(W.shape)
        W_n = W_n.reshape(W.shape)
        g_flattened[i] = (f(W_p)-f(W_n))/(2*eps)
    # convert back to original shape
    return g_flattened.reshape(W.shape)

g1 = svm_loss_naive(W,X,y,0.1)[1]
g2 = numerical_grad(W,f)
print('norm: ', np.linalg.norm(g1-g2))
# norm should very small 

# Tính hàm mất mát và đạo hàm của nó bằng cách vectorized ( giảm hơn 140 lần đạo hàm bằng vòng for)
def svm_loss_vectorized(W,X,y,reg):
    d, C = W.shape
    _, N = X.shape
    loss =0
    dw = np.zeros_like(W)
    
    Z = W.T.dot(X)
    correct_class_score = np.choose(y,Z).reshape(N,1).T
    margin = np.maximum(0,Z - correct_class_score +1)
    margin[y, np.arange(margin.shape[1])] = 0
    loss = np.sum( margin, axis =(0,1))
    loss /=N
    loss += 0.5*reg*np.sum(W*W)
    
    F = (margin > 0).astype(int)
    F [y, np.arange(F.shape[1])] = np.sum(-F, axis =0)
    dw = X.dot(F.T)/N + reg*W
    return loss, dw


N,C, d = 49000, 10, 3073
W = np.random.randn(d,C)
X = np.random.randn(d,N)
y = np.random.randint(C, size =N)
import time
t1 = time.time()
loss1, dw1 = svm_loss_naive(W,X,y,0.1)
t2 = time.time()
print('time navie: ',(t2-t1))
t3 = time.time()
loss2, dw2 = svm_loss_vectorized(W,X,y,0.1)
t4 = time.time()
print('time navie: ',(t4-t3))
# độ sai lệch
print('loss difference: ',loss2 -loss1)
print('grad difference: ', np.linalg.norm(dw2 -dw1))

[1 2 0 2 1 0 2 0 2 0]
loss if without regularization: 6.443917350191055
loss if with regularization:  7.554177384246895
<function <lambda> at 0x000001D41DF4F840>
norm:  3.1612752429049813e-09
time navie:  20.34653663635254
time navie:  0.5651655197143555
loss difference:  4.547473508864641e-13
grad difference:  2.698408027214262e-14


In [None]:
# Gradient Descent cho Multi-class SVM

def multiclass_svm_GD(X,y,W_init, reg,batch_size =100, eta =0.1, iters =1000, print_every =100):
    W = W_init
    loss_history = np.zeros((iters))
    for it in range(iters):
        # randomly pick a batch of X
        idx = np.random.choice(X.shape[1], batch_size)
        X_batch = X[:,idx]
        y_batch =y[idx]
        
        loss_history[it], dw = svm_loss_vectorized(W,X_batch, y_batch, reg)
        
        W = W - eta*dw
        if it% print_every ==0:
            print('it:',it,'loss: ',loss_history[it])
    return W,loss_history
N,C,d = 49000, 10, 3073
W = np.random.randn(d,C)
X = np.random.randn(d,N)
y = np.random.randint(C, size =N)
W, loss_history = multiclass_svm_GD(X,y,W,0.1)
plt.plot(loss_history)
plt.show()

it: 0 loss:  1774.132040076219
it: 100 loss:  250.21889948971824
it: 200 loss:  61.516449009981734
it: 300 loss:  40.79514608556997
it: 400 loss:  44.65437011818748
