In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import Adam

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def fsmax(x):
    e = np.exp(x-x.max())
    p = e/e.sum()
    return p
def Dfsmax(x):
    e = np.exp(x-x.max())
    p = e/e.sum()
    dpdx = np.diag(p) - np.outer(p,p)
    return dpdx
def CE(p,ph):
    ce = -(p*np.log(ph)).sum()
    return ce
def loss(x,p,W):
    yh = np.matmul(x,W)
    ph = fsmax(yh)
    l = CE(p,ph)
    return l
def TotalLoss(X,P,W):
    num = X.shape[0]
    l = 0
    for i in range(num):
        l = l + loss(X[i,:],P[i,:],W)
    l = l / num
    return l
def Dloss(x,p,W):
    yh = np.matmul(x,W)
    ph = fsmax(yh)
    dpdy = Dfsmax(yh)
    u = p/ph
    v = np.matmul(u,dpdy)
    dldW = - np.outer(x,v)
    return dldW
def Dloss_numeric(x,p,W):
    eps = 10**(-5)
    (d,N) = W.shape
    d = d-1
    dldW = np.zeros((d+1,N))
    for j in range(d+1):
        for k in range(N):
            dW = np.zeros((d+1,N))
            dW[j,k] = eps
            l_p = loss(x,p,W+dW)
            l_m = loss(x,p,W-dW)
            dldW[j,k] = (l_p - l_m)/(2*eps)
    return dldW
def accuracy(X,P,W):
    n = X.shape[0]
    Yh = np.matmul(X,W)
    Ph = np.zeros(Yh.shape)
    for i in range(n):
        Ph[i,:] = fsmax(Yh[i,:])
    num_correct = (Ph.argmax(axis=1) == P.argmax(axis=1)).sum()
    acc = num_correct/n
    return acc
def StochasticGradientDescent(X,P,W,h,epochs,verbose=0):
    num = X.shape[0]
    l = 0
    for i in range(epochs):
        for j in range(num):
            W = W - h * Dloss(X[j,:],P[j,:],W)
        l = TotalLoss(X,P,W)
        acc = accuracy(X,P,W)
        if verbose!=0:
            print('epoch =',epoch+1,'of',epochs,' loss =',l,' acc =',np.round(acc,3))
    return (W,l,acc)

In [3]:
d = 2
N = 3
x = np.random.rand(d+1)
W = np.random.rand(d+1,N)
p = fsmax(np.random.rand(N))
print('numeric dldW:',Dloss_numeric(x,p,W))
print('analytical dldW:',Dloss(x,p,W))
print('error:',Dloss_numeric(x,p,W) - Dloss(x,p,W))

numeric dldW: [[-0.03482371 -0.15735717  0.19218088]
 [-0.04028602 -0.1820396   0.22232562]
 [-0.00283245 -0.01279894  0.0156314 ]]
analytical dldW: [[-0.03482371 -0.15735717  0.19218088]
 [-0.04028602 -0.1820396   0.22232562]
 [-0.00283245 -0.01279894  0.0156314 ]]
error: [[ 1.52863694e-11  5.01568231e-12  1.30045696e-11]
 [-6.46865894e-12  8.47544257e-13 -5.48117107e-12]
 [-1.02421040e-11 -1.01659722e-11  9.30584429e-12]]


In [4]:
df = pd.read_csv('Iris-cleaned.csv')
df.head(3)

Unnamed: 0,sepal length,sepal width,petal length,petal width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [5]:
features = df.drop(['species'],axis=1)
features = (features - features.mean())/features.std()
features['bias'] = 1
target = pd.get_dummies(df['species'])
X = features.values
P = target.values

In [6]:
W = np.random.rand(X.shape[1],P.shape[1])
(W,l,acc) = StochasticGradientDescent(X,P,W,0.1,500,verbose=0)
print('Stochastic loss', l)
print('Stochastic acc', acc)

Stochastic loss 0.04226207508604257
Stochastic acc 0.98


In [7]:
model = Sequential()
model.add(Dense(3,input_shape=(5,),use_bias=False))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
hist = model.fit(X,P,epochs=500,verbose=0)

In [8]:
print('Keras loss',hist.history['loss'][-1])
print('Keras acc',hist.history['acc'][-1])

Keras loss 0.21672653218110402
Keras acc 0.9399999976158142
