In [None]:
import cv2 as cv
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import math as m
import heapq

from random import choice
from sklearn import preprocessing
from sklearn.utils import shuffle

In [None]:

class KNN:
    
    def __init__(self, k, validation, metric):
        
        self.k = k
        self.validation = validation
        self.metric = metric
    
    def learning(self, set_data_train, set_data_valid):
                
        tmp_0 = set_data_train
        train_numpy_ = tmp_0[:,:-1]
        train_numpy_label_ = tmp_0[:,-1]
        
        tmp_1 = set_data_valid
        validation_numpy_ = tmp_1[:,:-1]
        validation_numpy_label_ = tmp_1[:,-1]
         
        def distance_(train_numpy_, train_numpy_label_, validation_numpy_, validation_numpy_label_):
            
            local_container = []
            common_container = []
            validation_label_container = []
            
            for i in range(len(validation_numpy_label_)):
                for j in range(len(train_numpy_label_)):
                    
                    subtraction_operation = validation_numpy_[i:i+1,:] - train_numpy_[j:j+1,:]
                    
                    if (metric == 'l1'):
                        _operation = np.absolute(subtraction_operation)
                        sum_operation = np.sum(_operation)
                        tmp_distance = sum_operation
                    elif (metric == 'l2'):
                        _operation = np.square(subtraction_operation)
                        sum_operation = np.sum(_operation)
                        tmp_distance = m.sqrt(sum_operation)
                        
                    tmp_label_train = train_numpy_label_[j]
                    distance_and_label = (tmp_distance, tmp_label_train)
                    local_container.append(distance_and_label)

                common_container.append(local_container)
                validation_label_container.append(validation_numpy_label_[i])
                local_container = []
                
            output = [common_container, validation_label_container]
            return output
                
        output_ = distance_(train_numpy_, train_numpy_label_, validation_numpy_, validation_numpy_label_)          
        return output_          
        
    def class_definition(self, learning_data):
        
        class_prediction = []
        counter_zeros = 0.0
        counter_ones = 0.0
        
        for i in range(len(learning_data[1])):
            
            nearet_neighbour = heapq.nsmallest(k, learning_data[0][i])
            
            for j in range(len(nearet_neighbour)):
                
                if (nearet_neighbour[j][1] == 0.0):
                    
                    counter_zeros += 1.0
                    
                elif (nearet_neighbour[j][1] == 1.0):
                    
                    counter_ones += 1.0
                    
            if (counter_zeros > counter_ones):
                    
                class_prediction.append(0.0)

            elif (counter_zeros < counter_ones):
                    
                class_prediction.append(1.0)
                
            elif (counter_zeros == counter_ones):
                    
                class_prediction.append(choice((0,1)))    
        
        tmp_0 = np.array([class_prediction])
        tmp_1 = np.array([learning_data[1]])
        tmp_0 = np.transpose(tmp_0)
        tmp_1 = np.transpose(tmp_1)
        class_prediction = np.concatenate((tmp_0, tmp_1), axis = 1)
        
        return class_prediction
    
    def accuracy(self, class_prediction):
        
        sum_accur = 0.0
        
        for i in range(len(class_prediction[:,1])):
            
            if(class_prediction[i][0] == class_prediction[i][1]):
                
                sum_accur += 1.0
                
        total_accur = sum_accur /  len(class_prediction[:,1])
        
        return total_accur

In [None]:
# Загрузка и начальная обработка данных
def input_data():
        
        #Подготовка данных и выделение признаков
        data = np.zeros((0,62500))
        outcome_0 = np.zeros((81,1))
        outcome_1 = np.ones((154,1))
        
        for j in range(2):
            
            i = 1
            if (j == 0): name_part = 'no'; y = 1; n = 82          
            elif (j == 1): name_part = 'yes'; n = 155
                
            for i in range(1,n):
                
                # Загрузка изображения
                s = 'images/brain_tumor_dataset/' + str(name_part) + '/1 (' + str(i) + ').jpg'
                img = cv.imread(s,0)

                # Приведение к одному разрешению
                x_new = 250
                y_new = 250
                dsize = (x_new, y_new)
                output_img = cv.resize(img, dsize, interpolation = cv.INTER_AREA)
                new_img_array = np.array(output_img)

                # Отображение мрт-изображения
                plt.subplots(figsize = (5,5))
                plt.imshow(output_img, cmap = 'gray')
                plt.title('Resizing Image' + str(i)), plt.xticks([]), plt.yticks([])
                plt.show()

                # Преобразование матрицы в строку
                flatten_img_array = np.ravel(new_img_array)
                flatten_img_array = np.array([flatten_img_array])
                array_data = np.concatenate((data, flatten_img_array), axis=0)

                data = array_data
                print(s)
        
        
        outcome_0 = np.zeros((81,1))
        outcome_1 = np.ones((154,1))
        array_outcome = np.concatenate((outcome_0, outcome_1), axis=0)
        array_data = np.concatenate((array_data, array_outcome), axis=1)
        return array_data

dataset = input_data()

In [None]:
###########################
k = 8
validation = 'valid'  # 'cross_valid'
metric = 'l2'  # 'l1'
###########################

model = KNN(k, validation, metric)

In [None]:
dataset = shuffle(dataset)
dataset_pandas = pd.DataFrame(dataset) # Конвертация numpy в pandas

test = dataset_pandas.head(int(len(dataset_pandas)*0.2))
train = dataset_pandas.tail(int(len(dataset_pandas)*0.8))
validation = train.head(int(len(train)*0.2))

test_numpy = test.to_numpy() # Конвертация в numpy массив
train_numpy = train.to_numpy() # Конвертация в numpy массив
validation_numpy = validation.to_numpy() # Конвертация в numpy массив

# для Нормализации
train_numpy_norm = train_numpy
validation_numpy_norm = validation_numpy
test_numpy_norm = test_numpy
_data_to_norm = (train_numpy_norm, validation_numpy_norm, test_numpy_norm)

In [None]:
_data_to_norm

In [None]:

#Нормализация
for i in range(3):
    tmp_data = _data_to_norm[i][:,:-1]
    # Нормализация
    scaler_tmp_data = preprocessing.StandardScaler().fit(tmp_data)
    _data_to_norm[i][:,:-1] = scaler_tmp_data.transform(tmp_data)


In [None]:
_data_to_norm

In [None]:
set_data_train = _data_to_norm[0]
set_data_valid = _data_to_norm[1]
set_data_test = _data_to_norm[2]

In [None]:
# Процедура нахождения расстояний между валид. данными и тренировочными данными:
learning_data = model.learning(set_data_train, set_data_valid)

In [None]:
# Процедура определения класса
prediction = model.class_definition(learning_data)

In [None]:
prediction

In [None]:
model.accuracy(prediction)

In [None]:
# Восстановление изображений
for i in range(len(set_data_valid[:,-1])):
    img_array = set_data_valid[i,:-1]
    img_array = np.reshape(img_array, (250,250))
    img_array

    plt.subplots(figsize = (5,5))
    plt.imshow(img_array, cmap = 'gray')
    plt.title('Resizing Image' + str(i)), plt.xticks([]), plt.yticks([])
    plt.show()
    print("Предсказанное значение: ", prediction[i][0], ' ', "Истинное значение: ", prediction[i][1])

In [None]:
# Тестирование
set_data_valid = set_data_test
learning_data = model.learning(set_data_train, set_data_valid)
prediction = model.class_definition(learning_data)
prediction

In [None]:
model.accuracy(prediction)

In [None]:
# Восстановление изображений (тест)
for i in range(len(set_data_valid[:,-1])):
    img_array = set_data_valid[i,:-1]
    img_array = np.reshape(img_array, (250,250))
    img_array

    plt.subplots(figsize = (5,5))
    plt.imshow(img_array, cmap = 'gray')
    plt.title('Resizing Image' + str(i)), plt.xticks([]), plt.yticks([])
    plt.show()
    print("Предсказанное значение: ", prediction[i][0], ' ', "Истинное значение: ", prediction[i][1])