In [54]:
from keras.datasets import mnist
from keras.datasets import cifar10
import numpy as np
import math
import random

nc = 32 # no of categories
df = 256 // nc # dividing factor

In [55]:
# Identical test for MNIST taking all pixels in a single image (averaging over some random images)

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

no_of_images = 5000
image_indexes = random.sample(list(range(0, train_images.shape[0])), no_of_images)

minq = float("inf")
maxq = 0
avg = 0
catcount = np.zeros(nc)

e = train_images.shape[1]*train_images.shape[2] / nc # expected no in each category if identically distributed
# print(e)
for k in image_indexes:
    train_images[k] = train_images[k] // df
    for i in range(nc):
        catcount[i] = np.count_nonzero(train_images[k] == i)
    # print(catcount) # no of pixels in the ith category across the dataset, 0<=i<=15
    qmnist = np.sum((catcount - e)**2) / e # chi squared statistic
    if qmnist<minq:
        minq = qmnist
    if qmnist>maxq:
        maxq = qmnist
    avg = avg+qmnist

avg/=no_of_images
print("Q statistic for", no_of_images, "images:")
print("min=",minq)
print("max=",maxq)
print("avg=",avg)

Q statistic for 5000 images:
min= 10703.42857142857
max= 21835.836734693876
avg= 16052.39774693881


In [56]:
# Identical test for CIFAR10 taking all pixels in a single image (averaging over some random images)

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

no_of_images = 5000
image_indexes = random.sample(list(range(0, train_images.shape[0])), no_of_images)

minq = [float("inf"), float("inf"), float("inf")]
maxq = [0,0,0]
avg = np.zeros(3)
catcount = np.zeros((3,nc))

e = train_images.shape[1]*train_images.shape[2] / nc

for k in image_indexes:
    train_images[k] = train_images[k] // df
    train_trans = np.transpose(train_images[k])
    for i in range(3):
        for j in range(nc):
            catcount[i][j] = np.count_nonzero(train_trans[i] == j)
        qi = np.sum((catcount[i] - e)**2) / e
        if(qi > maxq[i]):
            maxq[i] = qi
        if(qi < minq[i]):
            minq[i] = qi
        avg[i] = avg[i] + qi

avg /= no_of_images
print("Q statistic for", no_of_images, "images:")
for i in range(3):
    print("\nChannel ", i)
    print("min=",minq[i])
    print("max=",maxq[i])
    print("avg=",avg[i])

Q statistic for 5000 images:

Channel  0
min= 51.125
max= 26991.75
avg= 1571.3974125

Channel  1
min= 58.4375
max= 28693.25
avg= 1645.1216125

Channel  2
min= 74.375
max= 27038.25
avg= 1803.486875


In [57]:
# Identical test for MNIST, taking same pixel across all images (averaging over all pixels)

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = np.transpose(train_images) # to convert shape to (28,28,60000)

no_of_pixels = 28*28
a = random.sample(list(range(0,28*28)), no_of_pixels)
pixels = [(n//28, n%28) for n in a]

minq= float("inf")
maxq= 0
avg=0
catcount = np.zeros(nc)

e = train_images.shape[2] / nc

for (i,j) in pixels:
    X = train_images[i][j]
    X = X // df
    for i in range(nc):
        catcount[i] = np.count_nonzero(X == i)
    qmnist = np.sum((catcount - e)**2) / e # chi squared statistic
    if qmnist<minq:
        minq = qmnist
    if qmnist>maxq:
        maxq = qmnist
    avg = avg+qmnist
    
avg/=no_of_pixels
print("Q statistic for", no_of_pixels, "pixels:")
print("min=",minq)
print("max=",maxq)
print("avg=",avg)

Q statistic for 784 pixels:
min= 318537.5381333333
max= 1860000.0
avg= 1343814.1679959192


In [58]:
# Identical test for CIFAR10, taking same pixel across all images (averaging over all pixels)

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
train_images = np.transpose(train_images) # to convert shape to (3,32,32,60000)

no_of_pixels = 1024
a = random.sample(list(range(0,32*32)), no_of_pixels)
pixels = [(n//32, n%32) for n in a]

minq= [float("inf"),float("inf"),float("inf")]
maxq= [0,0,0]
avg= np.zeros(3)
catcount = np.zeros(nc)

e = train_images.shape[3] / nc

for (i,j) in pixels:
    for k in range(3):
        X = train_images[k][i][j] // df
        for cat in range(nc):
            catcount[cat] = np.count_nonzero(X==cat)
        qk = np.sum((catcount - e)**2) / e
        if(qk > maxq[k]):
            maxq[k] = qk
        if(qk < minq[k]):
            minq[k] = qk
        avg[k] = avg[k] + qk
        
avg/=no_of_pixels
print("Q statistic for", no_of_pixels, "pixels:")
for i in range(3):
    print("\nChannel ", i)
    print("min=",minq[i])
    print("max=",maxq[i])
    print("avg=",avg[i])

Q statistic for 1024 pixels:

Channel  0
min= 3644.36992
max= 8907.88864
avg= 6477.72751

Channel  1
min= 3935.17056
max= 10583.91168
avg= 7889.8206999999975

Channel  2
min= 2368.69632
max= 12819.75296
avg= 7463.207749999997


In [59]:
# Independent test for MNIST, taking all pixels of a single image

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

no_of_images = 100
image_indexes = random.sample(list(range(0, train_images.shape[0])), no_of_images)

def neighbour_pixel(x,y,k):
    pixval = 0
    count = 0
    for i in range(-1,2):
        for j in range(-1,2):
            if (not (i==0 and j==0)) and x+i>=0 and x+i<28 and y+j>=0 and y+j<28:
                count+=1
                pixval+=train_images[k][x+i][y+j]
    pixval=pixval/count
    return pixval

minr= 1
maxr= 0
avg=0
train_nbs = np.zeros((28,28))

for k in image_indexes:
    for i in range(28):
        for j in range(28):
            train_nbs[i][j] = neighbour_pixel(i,j,k)
    Xbar = np.mean(train_images[k])
    Ybar = np.mean(train_nbs)
    num = np.sum(np.multiply(train_images[k]-Xbar, train_nbs-Ybar))
    denom1 = np.sum((train_images[k]-Xbar)**2)
    denom2 = np.sum((train_nbs-Ybar)**2)
    r = num / math.sqrt(denom1*denom2)
    if r<minr:
        minr = r
    if r>maxr:
        maxr = r
    avg = avg+r
avg/=no_of_images
print("Correlation coefficient for", no_of_images, "images:")
print("min=",minr)
print("max=",maxr)
print("avg=",avg)

Correlation coefficient for 100 images:
min= 0.7883325581552935
max= 0.9578603018935714
avg= 0.9252409359790164


In [60]:
# Independent test for CIFAR10, taking all pixels of a single image

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

no_of_images = 100
image_indexes = random.sample(list(range(0, train_images.shape[0])), no_of_images)

train_trans = np.zeros((3,32,32))
train_nbs = np.zeros((32,32))
minr=[1,1,1]
maxr = np.zeros(3)
avgr = np.zeros(3)

def neighbour_cifar(x,y,c):
    pixval = 0
    count = 0
    for i in range(-1,2):
        for j in range(-1,2):
            if (not (i==0 and j==0)) and x+i>=0 and x+i<32 and y+j>=0 and y+j<32:
                count+=1
                pixval+=train_trans[c][x+i][y+j]
    pixval=pixval/count
    return pixval

for k in image_indexes:
    train_trans = np.transpose(train_images[k])
    for c in range(3):
        for i in range(32):
            for j in range(32):
                train_nbs[i][j] = neighbour_cifar(i,j,c)
        Xbar = np.mean(train_trans[c])
        Ybar = np.mean(train_nbs)
        num = np.sum(np.multiply(train_trans[c]-Xbar, train_nbs-Ybar))
        denom1 = np.sum((train_trans[c]-Xbar)**2)
        denom2 = np.sum((train_nbs-Ybar)**2)
        r = num / math.sqrt(denom1*denom2)
        if r < minr[c]:
            minr[c] = r
        if r > maxr[c]:
            maxr[c] = r
        avgr[c] = avgr[c] + r

avgr /= no_of_images
print("Correlation coefficient for", no_of_images, "images:")
for i in range(3):
    print("\nChannel ", i)
    print("min=",minr[i])
    print("max=",maxr[i])
    print("avg=",avgr[i])

Correlation coefficient for 100 images:

Channel  0
min= 0.856630190665157
max= 0.9862379867000688
avg= 0.946087523099827

Channel  1
min= 0.8325320335102526
max= 0.9872568417098561
avg= 0.9446490618408153

Channel  2
min= 0.8300236640451425
max= 0.9891029866867769
avg= 0.944847280397873


In [63]:
# Independent test for MNIST, taking same pixel across all images

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

no_of_pixels = 50

a = random.sample(list(range(0,28*28)), no_of_pixels)
pixels = [(n//28, n%28) for n in a]

minr= 1
maxr= 0
avg=0
X = np.zeros((train_images.shape[0]))
Y = np.zeros((train_images.shape[0]))

for (i,j) in pixels:
    for k in range(train_images.shape[0]):
        X[k] = train_images[k][i][j]
        Y[k] = neighbour_pixel(i,j,k)
    Xbar = np.mean(X)
    Ybar = np.mean(Y)
    num = np.dot(X-Xbar, Y-Ybar)
    denom1 = np.sum((X-Xbar)**2)
    denom2 = np.sum((Y-Ybar)**2)
    if denom1==0 or denom2==0:
        no_of_pixels =no_of_pixels - 1
        continue
    r = num / math.sqrt(denom1*denom2)
    if r<minr:
        minr = r
    if r>maxr:
        maxr = r
    avg = avg+r
avg/=no_of_pixels
print("Correlation coefficient for", no_of_pixels, "pixels:")
print("min=",minr)
print("max=",maxr)
print("avg=",avg)

Correlation coefficient for 49 pixels:
min= 0.22718905426155378
max= 0.9316566411961111
avg= 0.7263448559303516


In [62]:
# Independent test for CIFAR10, taking same pixel across all images

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

no_of_pixels = 20

a = random.sample(list(range(0,32*32)), no_of_pixels)
pixels = [(n//32, n%32) for n in a]

minr= [1,1,1]
maxr= np.zeros(3)
avg= np.zeros(3)
X = np.zeros((train_images.shape[0]))
Y = np.zeros((train_images.shape[0]))

def nb_cifar(k,x,y,c):
    pixval = 0
    count = 0
    for i in range(-1,2):
        for j in range(-1,2):
            if (not (i==0 and j==0)) and x+i>=0 and x+i<32 and y+j>=0 and y+j<32:
                count+=1
                pixval+=train_images[k][x+i][y+j][c]
    pixval=pixval/count
    return pixval

for (i,j) in pixels:
    for c in range(3):
        for k in range(train_images.shape[0]):
            X[k] = train_images[k][i][j][c]
            Y[k] = nb_cifar(k,i,j,c)
        Xbar = np.mean(X)
        Ybar = np.mean(Y)
        num = np.sum(np.multiply(X-Xbar,Y-Ybar))
        denom1 = np.sum((X-Xbar)**2)
        denom2 = np.sum((Y-Ybar)**2)
        if denom1==0 or denom2==0:
            no_of_pixels =no_of_pixels - 1
            continue
        r = num / math.sqrt(denom1*denom2)
        if r<minr[c]:
            minr[c] = r
        if r>maxr[c]:
            maxr[c] = r
        avg[c] = avg[c]+r
avg/=no_of_pixels
print("Correlation coefficient for", no_of_pixels, "pixels:")
for i in range(3):
    print("\nChannel ", i)
    print("min=",minr[i])
    print("max=",maxr[i])
    print("avg=",avg[i])

Correlation coefficient for 20 pixels:

Channel  0
min= 0.9529080033881377
max= 0.9865450329397039
avg= 0.9675856572449618

Channel  1
min= 0.9507059254310584
max= 0.9866025255051655
avg= 0.9669747897177878

Channel  2
min= 0.9539777758053922
max= 0.9890128547821713
avg= 0.9712093764956473


In [1]:
# Independent test for MNIST, taking all pixels of a single image , with the method of taking Y as two after horizontally

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

no_of_images = 100
image_indexes = random.sample(list(range(0, train_images.shape[0])), no_of_images)

minr= 1
maxr= 0
avg=0
train_nbs = np.zeros((28,28))

for k in image_indexes:
    for i in range(28):
        for j in range(26):
            train_nbs[i][j] = train_images[k][i][j+2]
    Xbar = np.mean(train_images[k])
    Ybar = np.mean(train_nbs)
    num = np.sum(np.multiply(train_images[k]-Xbar, train_nbs-Ybar))
    denom1 = np.sum((train_images[k]-Xbar)**2)
    denom2 = np.sum((train_nbs-Ybar)**2)
    r = num / math.sqrt(denom1*denom2)
    if r<minr:
        minr = r
    if r>maxr:
        maxr = r
    avg = avg+r
avg/=no_of_images
print("Correlation coefficient for", no_of_images, "images:")
print("min=",minr)
print("max=",maxr)
print("avg=",avg)

NameError: name 'mnist' is not defined

In [None]:
# Independent test for CIFAR10, taking all pixels of a single image , taking Y

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

no_of_images = 100
image_indexes = random.sample(list(range(0, train_images.shape[0])), no_of_images)

train_trans = np.zeros((3,32,32))
train_nbs = np.zeros((32,32))
minr=[1,1,1]
maxr = np.zeros(3)
avgr = np.zeros(3)


for k in image_indexes:
    train_trans = np.transpose(train_images[k])
    for c in range(3):
        for i in range(32):
            for j in range(30):
                train_nbs[i][j] = train_trans[c][i][j+2]  # ye thoda check kr to , tune likha tha similar
        Xbar = np.mean(train_trans[c])
        Ybar = np.mean(train_nbs)
        num = np.sum(np.multiply(train_trans[c]-Xbar, train_nbs-Ybar))
        denom1 = np.sum((train_trans[c]-Xbar)**2)
        denom2 = np.sum((train_nbs-Ybar)**2)
        r = num / math.sqrt(denom1*denom2)
        if r < minr[c]:
            minr[c] = r
        if r > maxr[c]:
            maxr[c] = r
        avgr[c] = avgr[c] + r

avgr /= no_of_images
print("Correlation coefficient for", no_of_images, "images:")
for i in range(3):
    print("\nChannel ", i)
    print("min=",minr[i])
    print("max=",maxr[i])
    print("avg=",avgr[i])