# Test CKA

In [23]:
import numpy as np
import pickle
import gzip
import cca_core
from CKA import linear_CKA, kernel_CKA

In [8]:
X = np.random.randn(100, 64)
Y = np.random.randn(100, 64)

print('Linear CKA, between X and Y: {}'.format(linear_CKA(X, Y)))
print('Linear CKA, between X and X: {}'.format(linear_CKA(X, X)))

print('RBF Kernel CKA, between X and Y: {}'.format(kernel_CKA(X, Y)))
print('RBF Kernel CKA, between X and X: {}'.format(kernel_CKA(X, X)))

Linear CKA, between X and Y: 0.4080416615691328
Linear CKA, between X and X: 1.0000000000000002
RBF Kernel CKA, between X and Y: 0.5327389546914577
RBF Kernel CKA, between X and X: 1.0


# MNIST Example of CKA

The minist layers are: 784(input)--500--500--10(output)

In [9]:
# Load up second hidden layer of MNIST networks and compare
with open("model_activations/MNIST/model_0_lay01.p", "rb") as f:
    acts1 = pickle.load(f)
with open("model_activations/MNIST/model_1_lay01.p", "rb") as f:
    acts2 = pickle.load(f)
    
print("activation shapes", acts1.shape, acts2.shape)

#results = cca_core.get_cca_similarity(acts1, acts2, epsilon=1e-10, verbose=False)
    

activation shapes (500, 10000) (500, 10000)


In [14]:
# The problem of CKA: time-consuming with large data points
print('Linear CKA: {}'.format(linear_CKA(acts1.T, acts2.T)))
print('RBF Kernel: {}'.format(kernel_CKA(acts1.T, acts2.T)))

Linear CKA: 0.8539226154965
RBF Kernel: 0.8674208076036767


The results of CCA for the same feature

In [19]:
# similarity index by CCA
results = cca_core.get_cca_similarity(acts1, acts2, epsilon=1e-10, verbose=False)
print("Mean CCA similarity", np.mean(results["cca_coef1"]))

Mean CCA similarity 0.45669867603921466


# CKA for Conv Nets with SVHN
SVHN consists of images that are 32 x 32 (height 32, width 32). Our architecture looks like:

**conv1(3x3,32 channels)-->maxpool(2x2)-->conv2(3x3,64 channels)-->maxpool(2x2)-->batchnorm-->fc(200)-->fc(10)**

In [24]:
# Load up conv 2 activations from SVHN
with gzip.open("model_activations/SVHN/model_0_lay03.p", "rb") as f:
    acts1 = pickle.load(f)
    
with gzip.open("model_activations/SVHN/model_1_lay03.p", "rb") as f:
    acts2 = pickle.load(f)

print(acts1.shape, acts2.shape)

(1000, 16, 16, 64) (1000, 16, 16, 64)


#### Average Pool for the features

In [25]:
avg_acts1 = np.mean(acts1, axis=(1,2))
avg_acts2 = np.mean(acts2, axis=(1,2))
print(avg_acts1.shape, avg_acts2.shape)

(1000, 64) (1000, 64)


In [27]:
# CKA
print('Linear CKA: {}'.format(linear_CKA(avg_acts1, avg_acts2)))
print('RBF Kernel CKA: {}'.format(kernel_CKA(avg_acts1, avg_acts2)))

# CCA
a_results = cca_core.get_cca_similarity(avg_acts1.T, avg_acts2.T, epsilon=1e-10, verbose=False)
print("Mean CCA similarity", np.mean(a_results["cca_coef1"]))

Linear CKA: 0.9241440273864195
RBF Kernel CKA: 0.9197327226169598
Mean CCA similarity 0.6382306681306912


#### Interpolate for the features

In [29]:
with gzip.open("./model_activations/SVHN/model_1_lay04.p", "rb") as f:
    pool2 = pickle.load(f)
    
print("shape of first conv", acts1.shape, "shape of second conv", pool2.shape)

shape of first conv (1000, 16, 16, 64) shape of second conv (1000, 8, 8, 64)


In [30]:
from scipy import interpolate

num_d, h, w, _ = acts1.shape
num_c = pool2.shape[-1]
pool2_interp = np.zeros((num_d, h, w, num_c))

for d in range(num_d):
    for c in range(num_c):
        # form interpolation function
        idxs1 = np.linspace(0, pool2.shape[1],
                            pool2.shape[1],
                            endpoint=False)
        idxs2 = np.linspace(0, pool2.shape[2],
                            pool2.shape[2],
                            endpoint=False)
        arr = pool2[d,:,:,c]
        f_interp = interpolate.interp2d(idxs1, idxs2, arr)
        
        # creater larger arr
        large_idxs1 = np.linspace(0, pool2.shape[1],
                            acts1.shape[1],
                            endpoint=False)
        large_idxs2 = np.linspace(0, pool2.shape[2],
                            acts1.shape[2],
                            endpoint=False)
        
        pool2_interp[d, :, :, c] = f_interp(large_idxs1, large_idxs2)

print("new shape", pool2_interp.shape)

  return f(*args, **kwds)
  return f(*args, **kwds)


new shape (1000, 16, 16, 64)


In [36]:
num_datapoints, h, w, channels = acts1.shape
f_acts1 = acts1.reshape((num_datapoints*h*w, channels))

num_datapoints, h, w, channels = pool2_interp.shape
f_pool2 = pool2_interp.reshape((num_datapoints*h*w, channels))

# CCA
f_results = cca_core.get_cca_similarity(f_acts1.T[:,::5], f_pool2.T[:,::5], epsilon=1e-10, verbose=False)
print("Mean CCA similarity", np.mean(f_results["cca_coef1"]))


# CKA
#print('Linear CKA: {}'.format(linear_CKA(f_acts1, f_pool2)))      # the shape is too large for CKA
#print('RBF Kernel CKA: {}'.format(kernel_CKA(f_acts1, f_pool2)))  # the shape is too large for CKA

Mean CCA similarity 0.3573426816272488


In [35]:
f_acts1.shape

(256000, 64)