In [1]:
from minpy.core import grad as gradient
from minpy.nn.layers import softmax_loss as softmax
import minpy.numpy as np
from minpy.context import set_context, cpu
set_context(cpu())

In [2]:
import sys
sys.path.append('../')
from utilities.data_utility import load_cifar10
X, labels, _, _, _, _ = load_cifar10('../utilities/cifar/', center=True, rescale=True)

In [3]:
# LAYERS = 4
# dimensions = (3072,) * LAYERS + (10,)
# weights = []
# for shape in zip(dimensions[:-1], dimensions[1:]):
#     weights.append(np.random.normal(0, 0.1, shape))
# biases = [np.zeros(n) for n in dimensions[1:]]

In [4]:
import cPickle as pickle
parameters = pickle.load(open('training-parameters-checkpoints/parameters-iteration-10000', 'rb'))

In [5]:
for key, value in parameters.items():
    print key, value.shape

fullyconnected3_weight (1024, 1024)
fullyconnected1_bias (1024,)
fullyconnected4_weight (1024, 10)
fullyconnected4_bias (10,)
fullyconnected2_bias (1024,)
fullyconnected0_weight (3072, 1024)
fullyconnected0_bias (1024,)
fullyconnected2_weight (1024, 1024)
fullyconnected1_weight (1024, 1024)
fullyconnected3_bias (1024,)


In [6]:
weights = [parameters['fullyconnected%d_weight' % i] for i in range(5)]
biases = [parameters['fullyconnected%d_bias' % i] for i in range(5)]

In [7]:
Z = 0.0
X_cache = []
Y_cache = []
activate = lambda X : np.maximum(0, X)
def forward_layer(X, W, bias):
    Y = np.dot(X, W) + bias
#     output = Z * activate(Y) + (1 - Z) * X
    output = activate(Y)
    return output
def cached_forward(X, W, bias):
    X_cache.append(X)
    Y = np.dot(X, W) + bias
    Y_cache.append(Y)
#     output = Z * activate(Y) + (1 - Z) * X
    output = activate(Y)
    return output

In [8]:
N = 1024
output = X[:N]
for weight, bias in zip(weights[:-1], biases[:-1]):
    output = cached_forward(output, weight, bias)
predictions = np.dot(output, weights[-1]) + biases[-1]

In [16]:
def partial_forward(X, Y, i):
#     output = Z * activate(Y) + (1 - Z) * X
    output = activate(Y)
#     start = i.asnumpy() + 1
    start = i + 1
    for weight, bias in zip(weights[start : -1], biases[start : -1]):
        output = forward_layer(output, weight, bias)
    predictions = np.dot(output, weights[-1]) + biases[-1]
    return softmax(predictions, labels[:N])

In [10]:
g = gradient(partial_forward, range(2))

In [11]:
dX_cache = []
dY_cache = []
for index, cache in enumerate(zip(X_cache, Y_cache)):
    X, Y = cache
    dX, dY = g(X, Y, index)
    dX_cache.append(dX)
    dY_cache.append(dY)

In [60]:
def covariance(left, right):
    return np.mean((left - np.mean(left)) * (right - np.mean(right)))
def cco(left, right):
    factor = 1
    return (covariance(left, right) * factor) / (np.std(left) * np.std(right) * factor)

In [56]:
for X, Y, dX, dY in zip(X_cache, Y_cache, dX_cache, dY_cache):
    if X.shape == Y.shape:
        print \
            'X Y', covariance(X, Y), \
            'Y dY', covariance(Y, dY), \
            'X dY', covariance(X, dY)

X Y -0.796023 Y dY 8.21571e-08 X dY -1.67065e-08
X Y -0.243009 Y dY 3.22914e-08 X dY 1.74822e-08
X Y -0.269953 Y dY 6.45694e-08 X dY 7.12108e-10


In [62]:
for X, Y, dX, dY in zip(X_cache, Y_cache, dX_cache, dY_cache):
    if X.shape == Y.shape:
        print \
            'X Y', cco(X, Y), \
            'Y dY', cco(Y, dY), \
            'X dY', cco(X, dY)

X Y -0.0441406321474 Y dY 0.00103920118835 X dY -0.000741838605897
X Y -0.0182333580785 Y dY 0.000493072213104 X dY 0.00068771330996
X Y -0.0370194778368 Y dY 0.00129204010315 X dY 3.35408018416e-05


In [53]:
for dY in dY_cache:
    print np.min(dY), np.max(dY)

[-0.00052877] [ 0.0006162]
[-0.00049199] [ 0.0006672]
[-0.00058099] [ 0.00047715]
[-0.0005451] [ 0.00047315]


In [36]:
c = []
rho = []
for i in range(16):
    P = np.random.normal(0, 1, (1024, 1024))
    Q = np.random.normal(0, 1, (1024, 1024))
    R = np.dot(P, Q)
    c.append(covariance(P, R))
    rho.append(cco(P, R))
print np.mean(np.array(c))
print np.mean(np.array(rho))

-0.0131724
-0.00041111


In [57]:
for W, X, Y, dX, dY in zip(weights[:-1], X_cache, Y_cache, dX_cache, dY_cache):
    if X.shape == Y.shape:
        print \
            'W X', covariance(W, X), \
            'W Y', covariance(W, Y), \
            'W, dY', covariance(W, dY)

W X -4.76798e-05 W Y 0.00605928 W, dY 1.60163e-10
W X 0.000126902 W Y 0.00196992 W, dY 8.16905e-10
W X -9.02909e-06 W Y 0.00132417 W, dY 1.62051e-09


In [68]:
dfY_cache = [(X > 0).asnumpy().astype(int) for X in X_cache[1:]]

In [66]:
for 

<type 'numpy.ndarray'>
<class 'minpy.array.Array'>
<class 'minpy.array.Array'>
<class 'minpy.array.Array'>
