In [None]:
from filter import *
import pandas as pd
import os
import cv2
from sklearn.cluster import KMeans,MiniBatchKMeans
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import csv
from sklearn.neighbors import KNeighborsClassifier
from matplotlib import pyplot as plt
%matplotlib inline
from libKMCUDA import kmeans_cuda
from scipy.cluster.vq import vq

In [None]:
# read data from disk to memory
def readData(path): 
    all_food_data = pd.read_csv(path, sep="\t", names=['imageName', 'dishName'])

    all_pic_names = all_food_data.drop('dishName', axis=1)
    all_dish_name = all_food_data['dishName']


    return all_pic_names, all_dish_name

In [None]:
# read data from disk to memory
def readData2(path): 
    all_food_data = pd.read_csv(path, sep="\t", names=['imageName', 'dishName'])

    x = all_food_data.drop('dishName', axis=1)
    y = all_food_data['dishName']

    #Dropped all multilabel data
    y=y.str.split(" ", n = 1, expand = True)
    data =  pd.concat([x, y], axis=1, join='inner')
    data.columns = ['imageName', 'dishName', 'trivial']
    data = data[pd.isnull(data['trivial'])]
    data = data.drop('trivial', axis=1)
    data = data.reset_index()
    del data['index']
    
    all_pic_names = data.drop('dishName', axis=1)
    all_dish_name = data['dishName']

    
    return all_pic_names, all_dish_name

In [None]:
def extractSchmidResponses(img_paths):
    schmid_filters = make_schmid_filters()
    responses = np.array([])
    base_path = '../Data/UNICT-FD1200_Small/'
    
    full_path = base_path + img_paths.iloc[0]['imageName']
    print(full_path)
    img = cv2.imread(full_path)
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    responses =  [apply_filter_bank(lab, schmid_filters)]
    
    for path in img_paths.iloc[1:]['imageName']:
        full_path = base_path + path
        img = cv2.imread(full_path)
        lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
        tmp = apply_filter_bank(lab, schmid_filters)
        responses = np.append(responses, [tmp] , axis=0)
        print(path)
    return responses

In [None]:
def build_histogram(descriptor_list, cluster_alg):
    histogram = np.zeros(len(cluster_alg.cluster_centers_))
    cluster_result =  cluster_alg.predict(descriptor_list)
    for i in cluster_result:
        histogram[i] += 1.0
    return histogram

In [None]:
x_train_path, y_train = readData("Train_1.txt")
x_test_path, y_test = readData2("Test_1.txt")

In [None]:
savedResponsesFiles = os.listdir('files')
if 'train_responses12000Lab.npy' not in savedResponsesFiles:
    print('Train response features not found!')
    train_response_vector = extractSchmidResponses(x_train_path)
    np.save('files/train_responses12000Lab', train_response_vector)
    print('Train response features created!')
else:
    print('Train response fetaures found!')
    train_response_vector = np.load('files/train_responses12000Lab.npy')
    print('Train response features loaded!')

In [None]:
train_response_vector.shape

In [None]:
train_response_vector = train_response_vector.reshape(train_response_vector.shape[0]*train_response_vector.shape[1], 21) # convert from (1200, 76800)
train_response_vector.shape

In [None]:
def clusterResponses(responses):    
    #kmeans = KMeans(init='k-means++', n_clusters=1200, verbose=2, a)
    #kmeans = MiniBatchKMeans(init='k-means++', n_clusters=1200, batch_size=360000 ,verbose=2)
    responses = np.split(responses, 8)
    #print(str((responses[0].nbytes/1024)/1024))
    centroids, _ = kmeans_cuda(responses[0], 12002, init="k-means++",verbosity=2, yinyang_t=0)
    
    for arr in range(1,8):
        centroids, assignments = kmeans_cuda(arr, 12002, init=centroids, verbosity=2, yinyang_t=0)
    #kmeans.fit(responses)    
    #vocabulary = [kmeans.cluster_centers_]
    
    return centroids, assignments

In [None]:
#centroids, assigments = clusterResponses(train_response_vector)

In [None]:
centroids = np.load('files/centroids.npy')

In [None]:
np.argwhere(np.isnan(centroids))

In [None]:
kmeans = KMeans(init=centroids, n_clusters=12000, verbose=2, max_iter=1)
kmeans.fit(train_response_vector[:12000])

In [None]:
centroids = np.array(np.delete(centroids, 407, 0))
centroids = np.array(np.delete(centroids, 6656, 0))
np.any(np.isnan(centroids))

In [None]:
np.save('vocabulary200NoMultiLabel', vocabulary)

In [None]:
vocabulary = np.array(vocabulary)
vocabulary = vocabulary.reshape(vocabulary.shape[0]*vocabulary.shape[1], 21)

In [None]:
test = extractSchmidResponses(x_test_path[1511:1512])

In [None]:
test = np.array(test).reshape(76800, 21)
test.shape

In [None]:
#test = np.array(test)
#test = test.reshape(76800,21)

In [None]:
hist = build_histogram(test, kmeans)

In [None]:
print(*hist)

In [None]:
preprocessed_image = []
for i in range(200):
    img = x_train_path.iloc[i:i+1]
    resp = extractSchmidResponses(img)
    resp = np.array(resp)
    resp = resp.reshape(resp.shape[0]*resp.shape[1],21)
    if (resp is not None):
        histogram = build_histogram(resp, model)
        preprocessed_image.append(histogram)

In [None]:
preprocessed_test_image = []
for i in range(200):
    img = x_test_path.iloc[i:i+1]
    resp = extractSchmidResponses(img)
    resp = np.array(resp)
    resp = resp.reshape(resp.shape[0]*resp.shape[1],21)
    if (resp is not None):
        histogram = build_histogram(resp, model)
        preprocessed_test_image.append(histogram)

In [None]:
np.save('preprocessed_train_image_NL_200',preprocessed_image)
np.save('preprocessed_test_image_NL_200',preprocessed_test_image)

In [None]:
a = np.load('preprocessed_train_image_NL_200.npy')#np.array(preprocessed_image)

In [None]:
knn = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
knn.fit(a, y_train)

In [None]:
b = np.load('preprocessed_test_image_NL_200.npy')#preprocessed_test_image)

In [None]:
pr = knn.predict(b)

In [None]:
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(pr, y_test))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix  
print(classification_report(pr, y_test))  

In [None]:
from pandas_ml import ConfusionMatrix
confusion_matrix = ConfusionMatrix(y_test, pr )
confusion_matrix.plt()
plt.show

In [None]:
plot_confusion_matrix(y_test, pr, classes=y_test,
                      title='Confusion matrix, without normalization')

In [None]:
train_response_vector.shape

In [None]:
train_response_vector = extractSchmidResponses(x_train_path)

In [None]:
np.save('files/train_responses1200', train_response_vector)

In [None]:
tc = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 100, 0.001) # I don't know what the parameters mean
BoW = cv2.BOWKMeansTrainer(1200, tc,1,cv2.KMEANS_PP_CENTERS)# create a BagOfFeaturesWord. It can be take a long time 
vocabulary = BoW.cluster(train_response_vector)

In [None]:
all_train_responses = extractSchmidResponses(x_path)

In [None]:
np.save('files/train_responses.csv', all_train_responses)

In [None]:
words = clusterResponses(all_train_responses)kme

In [None]:
np.save('files/train_clusters.csv', all_train_responses)

In [None]:
def calculate_centroids_histogram(voc, model):

    feature_vectors=[]
    class_vectors=[]
    
    for item in voc:
        predict_kmeans = model.predict(item)
        hist, bin_edges = np.histogram(predict_kmeans, 1200)
        feature_vectors.append(hist)
        
    feature_vectors = np.asarray(feature_vectors)
    
    return feature_vectors

In [None]:
kmeans = KMeans(n_clusters=1200).fit(new_words)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(words, y, test_size=0.25)

In [None]:
train = calculate_centroids_histogram(X_train, kmeans)

In [None]:
test = calculate_centroids_histogram(X_test, kmeans)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(preprocessed_image, y, test_size=0.25)

In [None]:
from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  
scaler.fit(train)

train = scaler.transform(train)  
test = scaler.transform(test)  

In [None]:
from sklearn.neighbors import KNeighborsClassifier  
classifier = KNeighborsClassifier(n_neighbors=10)  
classifier.fit(train, y_train) 

In [None]:
y_pred = classifier.predict(test)  

In [None]:
from sklearn.metrics import classification_report, confusion_matrix  
print(confusion_matrix(y_test, y_pred))  
print(classification_report(y_test, y_pred))  

In [None]:
#from sklearn.svm import SVC  
#svclassifier = SVC(kernel='poly', degree=8)  
#svclassifier.fit(preprocessed_image, y_train)  

In [None]:
y_pred = svclassifier.predict(b)  

In [None]:
from sklearn.metrics import classification_report, confusion_matrix  
print(confusion_matrix(y_test, y_pred))  
print(classification_report(y_test, y_pred))  
