# Content Based Recommendation for Bowl Type Classification


In [3]:
#!unzip /content/sample_data/test_set.zip -d /content/sample_data/
!pip install scipy==1.1.0



Collecting scipy==1.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/a8/0b/f163da98d3a01b3e0ef1cab8dd2123c34aee2bafbb1c5bffa354cc8a1730/scipy-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (31.2MB)
[K     |████████████████████████████████| 31.2MB 104kB/s 
[31mERROR: umap-learn 0.4.6 has requirement scipy>=1.3.1, but you'll have scipy 1.1.0 which is incompatible.[0m
[31mERROR: tensorflow 2.2.0 has requirement scipy==1.4.1; python_version >= "3", but you'll have scipy 1.1.0 which is incompatible.[0m
[31mERROR: plotnine 0.6.0 has requirement scipy>=1.2.0, but you'll have scipy 1.1.0 which is incompatible.[0m
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
Installing collected packages: scipy
  Found existing installation: scipy 1.4.1
    Uninstalling scipy-1.4.1:
      Successfully uninstalled scipy-1.4.1
Successfully installed scipy-1.1.0


In [8]:
import numpy as np
import os
from scipy.misc import imread, imresize
import datetime
import os

We set the random seed so that the results don't vary drastically.

In [9]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
#tf.random.set_seed(30)

In this block, you read the folder names for training and validation. You also set the `batch_size` here. Note that you set the batch size in such a way that you are able to use the GPU in full capacity. You keep increasing the batch size until the machine throws an error.

## Generator
This is one of the most important part of the code. The overall structure of the generator has been given. In the generator, you are going to preprocess the images as you have images of 2 different dimensions as well as create a batch of video frames. You have to experiment with some of the parts of the generator function such that you get high accuracy.

In [10]:
def generator(source_path, folder_list, batch_size):
    print( 'Source path = ', source_path, '; batch size =', batch_size)
    img_idx = [0,1,2,4,6,8,10,12,14,16,18,20,22,24,26,27,28,29]
    #img_idx = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
    while True:
        t = np.random.permutation(folder_list)
        num_batches = int(len(t)/batch_size)
        for batch in range(num_batches):
            batch_data = np.zeros((batch_size,18,128,128,3))
            batch_labels = np.zeros((batch_size,3))
            for folder in range(batch_size):
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0])
                for idx,item in enumerate(img_idx):
                    #print(imgs[item])
                    #print(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item])
                    image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    image = imresize(image,(128,128)).astype(np.float32)
                    '''
                    if image.shape[1] == 160:
                        image = imresize(image[:,20:140,:],(84,84)).astype(np.float32)
                    else:
                        image = imresize(image,(84,84)).astype(np.float32)
                    '''
                    batch_data[folder,idx,:,:,0] = image[:,:,0] - 104
                    batch_data[folder,idx,:,:,1] = image[:,:,1] - 117
                    batch_data[folder,idx,:,:,2] = image[:,:,2] - 123
                    
                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            yield batch_data, batch_labels

        if (len(t)%batch_size) != 0:
            batch_data = np.zeros((len(t)%batch_size,18,128,128,3))
            batch_labels = np.zeros((len(t)%batch_size,3))
            for folder in range(len(t)%batch_size):
                imgs = os.listdir(source_path+'/'+ t[folder + (num_batches*batch_size)].split(';')[0])
                for idx,item in enumerate(img_idx):
                    image = imread(source_path+'/'+ t[folder + (num_batches*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    image = imresize(image,(128,128)).astype(np.float32)
                    '''
                    print(image.shape[1],image.shape[0])
                    if image.shape[1] == 460:
                        image = imresize(image[:,20:140,:],(84,84)).astype(np.float32)
                    else:
                        image = imresize(image,(84,84)).astype(np.float32)
                    '''

                    batch_data[folder,idx,:,:,0] = image[:,:,0] - 104
                    batch_data[folder,idx,:,:,1] = image[:,:,1] - 117
                    batch_data[folder,idx,:,:,2] = image[:,:,2] - 123

                batch_labels[folder, int(t[folder + (num_batches*batch_size)].strip().split(';')[2])] = 1

            yield batch_data, batch_labels

## Model
Here you make the model using different functionalities that Keras provides. Remember to use `Conv3D` and `MaxPooling3D` and not `Conv2D` and `Maxpooling2D`. Also remember that the last layer is the softmax. Remember that the network is designed in such a way that the model is able to fit in the memory of the webcam.

In [18]:
from keras.models import Sequential
from keras.layers import Dense, GRU, Dropout, Flatten, BatchNormalization, Activation
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers

model = Sequential()
model.add(Conv3D(64, (3,3,3), strides=(1,1,1), padding='same', input_shape=(18,128,128,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2,2,1), strides=(2,2,1)))

model.add(Conv3D(128, (3,3,3), strides=(1,1,1), padding='same'))

model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))

# model.add(Dropout(0.25))

model.add(Conv3D(256, (3,3,3), strides=(1,1,1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))

# model.add(Dropout(0.25))

model.add(Conv3D(256, (3,3,3), strides=(1,1,1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

Now that you have written the model, the next step is to `compile` the model. When you print the `summary` of the model, you'll see the total number of parameters you have to train.

In [29]:
#Test Code

from keras.models import load_model, Model
#######
    
model_name = 'D:/Hacks/Cricket_The_Last_Project/Conv3d_Weights/model-00025-0.05238-1.00000-0.19227-0.96667.h5'
    
#######
    
test_doc = open('D:/Hacks/Cricket_The_Last_Project/test_set/test.csv').readlines()
test_path = 'D:/Hacks/Cricket_The_Last_Project/test_set/Dataset'
num_test_sequences = len(test_doc)
batch_size=1
print ('# testing sequences =', num_test_sequences)
print ('# batch size =', batch_size)
test_generator = generator(test_path, test_doc, batch_size)
model = load_model(model_name)
print("Model loaded.")
model_func = Model(inputs=[model.input], outputs=model.get_layer('flatten_2').output)
 
acc = 0
num_batches = int(num_test_sequences/batch_size)
    
feature_matrix = np.zeros([num_test_sequences, 32768])   # initialize the matrix with zeros
feature_matrix.shape


# testing sequences = 7
# batch size = 1


ValueError: Unknown layer:name

In [51]:
for i in range(num_batches):
    x,true_labels = test_generator.__next__()
    print ("shape of x:", x.shape, "and shape of true_labels:", true_labels.shape)
    #pred_idx = np.argmax(model_func.predict_on_batch(x), axis=1)
    feature_matrix[i,:]=model_func.predict_on_batch(x)
    
'''
for i in range(num_batches):
    x,true_labels = test_generator.__next__()
    print ("shape of x:", x.shape, "and shape of true_labels:", true_labels.shape)
    pred_idx = np.argmax(model_func.predict_on_batch(x), axis=1)
    print("True_labels: ",true_labels)
    print("Pred_labels: ",pred_idx)
    for j,k in enumerate(pred_idx):
        if true_labels[j,k] == 1:
            acc += 1
                
if (num_test_sequences%batch_size) != 0:
    x,true_labels = test_generator.__next__()
    print ("shape of x:", x.shape, "and shape of true_labels:", true_labels.shape)
    pred_idx = np.argmax(model_func.predict_on_batch(x), axis=1)
    for j,k in enumerate(pred_idx):
        if true_labels[j,k] == 1:
            acc += 1

print('Accuracy is =', acc/num_test_sequences) 

'''

Source path =  /content/sample_data/test_set/Dataset ; batch size = 1
shape of x: (1, 18, 128, 128, 3) and shape of true_labels: (1, 3)


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  app.launch_new_instance()
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


shape of x: (1, 18, 128, 128, 3) and shape of true_labels: (1, 3)
shape of x: (1, 18, 128, 128, 3) and shape of true_labels: (1, 3)
shape of x: (1, 18, 128, 128, 3) and shape of true_labels: (1, 3)
shape of x: (1, 18, 128, 128, 3) and shape of true_labels: (1, 3)
shape of x: (1, 18, 128, 128, 3) and shape of true_labels: (1, 3)
shape of x: (1, 18, 128, 128, 3) and shape of true_labels: (1, 3)


'\nfor i in range(num_batches):\n    x,true_labels = test_generator.__next__()\n    print ("shape of x:", x.shape, "and shape of true_labels:", true_labels.shape)\n    pred_idx = np.argmax(model_func.predict_on_batch(x), axis=1)\n    print("True_labels: ",true_labels)\n    print("Pred_labels: ",pred_idx)\n    for j,k in enumerate(pred_idx):\n        if true_labels[j,k] == 1:\n            acc += 1\n                \nif (num_test_sequences%batch_size) != 0:\n    x,true_labels = test_generator.__next__()\n    print ("shape of x:", x.shape, "and shape of true_labels:", true_labels.shape)\n    pred_idx = np.argmax(model_func.predict_on_batch(x), axis=1)\n    for j,k in enumerate(pred_idx):\n        if true_labels[j,k] == 1:\n            acc += 1\n\nprint(\'Accuracy is =\', acc/num_test_sequences) \n\n'

In [9]:
!rm -r /content/sample_data/test_set/Dataset/Offspin1

In [23]:
for layer in model.layers:
  print(layer.name)

conv3d_5
batch_normalization_5
activation_5
max_pooling3d_5
conv3d_6
batch_normalization_6
activation_6
max_pooling3d_6
conv3d_7
batch_normalization_7
activation_7
max_pooling3d_7
conv3d_8
batch_normalization_8
activation_8
max_pooling3d_8
flatten_2
dropout_3
dense_3
dropout_4
dense_4


In [22]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_5 (Conv3D)            (None, 18, 128, 128, 64)  5248      
_________________________________________________________________
batch_normalization_5 (Batch (None, 18, 128, 128, 64)  256       
_________________________________________________________________
activation_5 (Activation)    (None, 18, 128, 128, 64)  0         
_________________________________________________________________
max_pooling3d_5 (MaxPooling3 (None, 9, 64, 128, 64)    0         
_________________________________________________________________
conv3d_6 (Conv3D)            (None, 9, 64, 128, 128)   221312    
_________________________________________________________________
batch_normalization_6 (Batch (None, 9, 64, 128, 128)   512       
_________________________________________________________________
activation_6 (Activation)    (None, 9, 64, 128, 128)   0         
__________

In [12]:
print(model.output)

Tensor("dense_28_1/Softmax:0", shape=(None, 3), dtype=float32)


In [13]:
print(model.input)

Tensor("conv3d_53_input_1:0", shape=(None, 18, 128, 128, 3), dtype=float32)


In [24]:
model.layers[16].name

'flatten_2'

In [18]:
layer_name = 'my_layer'
intermediate_layer_model = Model(inputs=model.input,
                                       outputs=model.get_layer('flatten_14').output)
intermediate_output = intermediate_layer_model(data)

NameError: ignored

In [20]:
intermediate_layer_model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_53_input (InputLayer) (None, 18, 128, 128, 3)   0         
_________________________________________________________________
conv3d_53 (Conv3D)           (None, 18, 128, 128, 64)  5248      
_________________________________________________________________
batch_normalization_53 (Batc (None, 18, 128, 128, 64)  256       
_________________________________________________________________
activation_53 (Activation)   (None, 18, 128, 128, 64)  0         
_________________________________________________________________
max_pooling3d_53 (MaxPooling (None, 9, 64, 128, 64)    0         
_________________________________________________________________
conv3d_54 (Conv3D)           (None, 9, 64, 128, 128)   221312    
_________________________________________________________________
batch_normalization_54 (Batc (None, 9, 64, 128, 128)   512 

In [30]:
len(feature_matrix[0,:])

32768

In [67]:
dot_product = feature_matrix.dot(feature_matrix.T)
norms = np.array([np.sqrt(np.diagonal(dot_product))])
similarity = dot_product / (norms * norms.T)

In [32]:
similarity.shape

(7, 7)

In [37]:
similarity[3,3]

1.0

In [65]:
closest_ids = np.argsort(similarity[5, :])[::-1][0:2]

In [66]:
closest_ids

array([5, 2])

In [16]:
pwd

'D:\\Hacks\\Cricket_The_Last_Project'

In [31]:
import keras
print(keras.__version__)

2.2.2


In [32]:
print(tf.__version__)

1.7.1
