In [1]:
# Standards
import os
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from pprint import pprint
from IPython import display

# Image Processing Libraries
import cv2
import skimage.feature as feature
from skimage.feature import hog
from skimage.feature import CENSURE
from skimage import data, color, exposure, filters

# Feature Extractions
from sklearn.decomposition import MiniBatchDictionaryLearning
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.feature_extraction.image import reconstruct_from_patches_2d

rseed=4444

%matplotlib inline

In [None]:
df_img = pd.read_pickle('df_img.pkl')

# Data Inventory

In [None]:
def seeImg(img):
    plt.imshow(img, aspect='auto')
    plt.grid(False)
    plt.axis('off')

# Image Processing Pipeline

# SIFT Features

In [10]:
columns = ['indx', 'label', 'point', 'size', 'angle', 'response', 'octave', 'class_id']
ft_cols = ['indx', 'label']
dtypes =  {'indx': 'int', 
           'label': 'object',
           'point': 'object',
           'size': 'float',
           'angle': 'float',
           'response': 'float',
           'octave': 'float',
           'class_id': 'int'}

ft_dtypes =  {'indx': 'int', 
              'label': 'object'}

for x in range(0,128):
    colname = 'des_'+str(x+1)
    columns.append(colname)
    ft_cols.append(colname)
    dtypes[colname] = 'int16'
    ft_dtypes[colname] = 'int16'

In [11]:
df_sift_features = pd.DataFrame(columns=ft_cols)
df_sift_features = df_sift_features.astype(ft_dtypes)

In [None]:
def drawSift(img, kp):
    sift_img=cv2.drawKeypoints(img,kp,img)
    
    fig, ax = plt.subplots()
    ax.axis('off')
    ax.imshow(sift_img)
    # descs_num = descs.shape[0] * descs.shape[1]
    # ax.set_title('%i DAISY descriptors extracted:' % descs_num)
    plt.show()

In [None]:
os.system('afplay /System/Library/Sounds/Sosumi.aiff')
os.system('say "Complete"')

In [None]:
# cutest tree ever: images[1400]

## Building Cluster

In [2]:
from sklearn.cluster import MiniBatchKMeans
# number of clusters / size of 'image model' / size of histogram
d = 500
kmeans = MiniBatchKMeans(n_clusters=d, random_state=rseed)

In [3]:
targets= ['ak47', 'american-flag', 'backpack', 'baseball-bat',
          'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear',
          'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp',
          'bonsai', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove']

In [12]:
df_sift_chunks = pd.read_csv('sift_features_resized.csv',
                             header=None,
                             names=columns,
                             iterator=True,
                             chunksize=2000,
                             dtype=dtypes)

In [6]:
# KMeans Clustering
for features in df_sift_chunks:
    print('Current index:', features.index[0])
    print(features.label.values[0])
    
    chk_lbl = features.label.isin(targets)
    features = features[chk_lbl]
    
    print('Any true?',chk_lbl.any())
    
    if chk_lbl.any():
        kmeans.partial_fit(features.iloc[:,8:])
        display.clear_output(wait=True)
    else:
        print('Breaking!')
        break

Current index: 876000
brain
Any true? False
Breaking!


In [15]:
with open('./pkls/sift_kmeans_500.pkl', 'wb') as f:
    pickle.dump(kmeans, f)

# Building Features

In [14]:
for features in df_sift_chunks:
    print('Current index:', features.index[0])
    print('Current Label:',features.label.values[0],'\n')
    
    chk_lbl = features.label.isin(targets)
    features = features[chk_lbl]
    
    if chk_lbl.any():
        features['cluster'] = (features
                                .apply(lambda x: pd.Series(kmeans.predict(x.iloc[8:].values.reshape(1,-1))),
                                       axis=1))

        df = (features
              .groupby(['label','indx'])['cluster']
              .apply(lambda x: np.histogram(x, bins=range(0,d))[0])
              .reset_index()
              .rename(columns={'cluster': 'des'}))
    #     print('Dataframe:\n\n',df)

        des = df['des'].apply(pd.Series)
        des = des.rename(columns = lambda x: 'des_' + str(x))
    #     print('\nDesc:\n\n',desc)

        df = pd.concat([df[:], des[:]], axis=1, join='inner')
        df = df.drop(['des'], axis=1)
    #     print('\nDataframe:\n\n',df)

        df_sift_features = pd.concat([df_sift_features, df])
        
        display.clear_output(wait=True)
    else:
        print('Breaking!')
        break

Current index: 876000
Current Label: brain 

Breaking!


In [16]:
df_sift_features = df_sift_features.reset_index()
df_sift_features = df_sift_features.drop(['index'], axis=1)
df_sift_features = (df_sift_features
                     .groupby(['label','indx'])
                     .sum()
                     .reset_index())

In [17]:
with open('./pkls/df_sift_features500.pkl', 'wb') as f:
    pickle.dump(df_sift_features, f)