In [None]:
#The cv2 library in Python is the OpenCV (Open Source Computer Vision) library
#.It is a library for computer vision programming and is used for a wide range of tasks such as image processing,
#video analysis, face detection, object tracking,
#etc. The library provides a vast collection of algorithms for image and video processing,
#and has a large user community, which has contributed a lot of pre-trained models for various computer vision tasks.
import cv2

In [None]:
#The os library in Python provides a way of using operating system 
#dependent functionality like reading or writing to the file system, 
#starting a new process or killing a process, reading or writing to the environment variables,
#etc. It provides a portable way of using these functions across multiple platforms such as Windows, Mac, Linux, etc
import os

In [None]:
#source of dataset
#https://www.kaggle.com/datasets/navoneel/brain-mri-images-for-brain-tumor-detection?resource=download

images_dir = 'brain tumor/'
NoTumor = os.listdir(images_dir + 'no/')
print(NoTumor)
len(NoTumor)

In [None]:
images_dir = 'brain tumor/'
YesTumor = os.listdir(images_dir+ 'yes/')
YesTumor

In [None]:
from PIL import Image

In [None]:
import numpy as np

In [None]:
dataset=[]
label=[]
size = 200

# resize images

In [None]:
import matplotlib.pyplot as plt
for i, Name in enumerate(NoTumor):
    if(Name.split('.')[1]=='jpg'):
        image = cv2.imread(images_dir + 'no/' + Name)
        image= Image.fromarray(image, 'RGB')
        image= image.resize((size,size))
        dataset.append(np.array(image))
        label.append(0)

In [None]:
for i , Name in enumerate(YesTumor):
    if(Name.split('.')[1]=='jpg'):
        image = cv2.imread(images_dir + 'yes/' + Name)
        image= Image.fromarray(image, 'RGB')
        image= image.resize((size,size))
        dataset.append(np.array(image))
        label.append(1)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(dataset[0])
plt.show()

In [None]:
#print(dataset,label)

# convert data into numpy array 

In [None]:
dataset = np.array(dataset)
label = np.array(label)


In [None]:
for i in range(10):
    print("Image", i+1)
    print(dataset[i])
    print("Label:", label[i])


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train , x_test , y_train , y_test = train_test_split(dataset, label , test_size=0.2,random_state=0)

In [None]:
print(x_train.shape)

In [None]:
#import numpy as np
#
#np.save('x_train.npy', x_train)
#np.save('x_test.npy', x_test)
#np.save('y_train.npy', y_train)
#np.save('y_test.npy', y_test)

# normalize data

In [None]:
#These lines of code import various functions from the keras.
#layers module of the Keras library.
#Keras is a high-level neural networks API,
#written in Python and capable of running on top of TensorFlow.
#The specific functions imported here are for building Convolutional Neural Network (ConvNet/CNN) models in Keras.

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
from keras.utils import normalize

In [None]:
#The statement from keras.models import Sequential is used to import the Sequential class from the Keras models module.
#In Keras, the Sequential class is used to create a linear stack of layers,
#which can be used to build a neural network model. A Sequential model is defined as a sequence of layers, 
#where each layer is added to the model one after the other.
from keras.models import Sequential


In [None]:
#Conv2D: Implements the 2D convolution operation for spatial data,which is commonly used in image classification.
#MaxPooling2D: Implements max pooling operation for spatial data, 
#which is used for down-sampling and reducing the spatial dimensions of the data while retaining the important features.

    
from keras.layers import Conv2D , MaxPooling2D

In [None]:
#Activation: Implements activation functions such as ReLU, sigmoid, etc., which are used to introduce non-linearity into the model.
#Dropout: Implements the dropout regularization technique, which is used to prevent overfitting in the model by randomly dropping out some units during training.
#Flatten: Implements the flattening operation, which is used to convert multi-dimensional arrays into single-dimensional arrays before passing them through fully connected layers.
#Dense: Implements fully connected layers, which are used to make predictions based on the features learned by the ConvNet.

from keras.layers import Activation ,Dropout,Flatten, Dense

In [None]:
#This line of code normalizes the data in the x_test and x_train arrays along the column (axis=1) axis. 
#"Normalizing" refers to the process of scaling the values in
#the array so that they have a unit norm (i.e., a length of 1 in a multi-dimensional space).
#This is often done to preprocess the data before using it in machine learning models, 
#so that the variables are on a similar scale. 
#The normalize function is likely part of a library such as scikit-learn,
#but without more information it's impossible to say for sure

# import data + models ++ build model

In [None]:
x_train = normalize(x_train,axis=1)

In [None]:
x_test = normalize(x_test,axis=1)

In [None]:
model = Sequential()

# Filter

In [None]:
#This code defines a neural network architecture in the Keras library.
#The architecture consists of three layers: a convolutional layer, an activation layer, and a max pooling layer.

#The first line defines the first layer in the network, a 2D convolutional layer. It has 32 filters of size (3, 3),
#and the input shape is defined as (size, size, 3),
#where size is a variable that specifies the size of the input image (presumably 200 in this case).

#The second line adds a ReLU activation layer,
#which applies the Rectified Linear Unit activation function to the output of the convolutional layer. 
#The ReLU activation function returns the input if it is positive, and returns 0 if it is negative.

#The third line adds a max pooling layer,
#which down-samples the spatial dimensions of the data.
#The pool_size argument specifies the size of the max pooling window as (2,# 2).
#This means that the max pooling layer will take the maximum value from each 2x2 region in the input
#and produce a lower-resolution output with half the number of rows and columns.

In [None]:
#1
model.add(Conv2D(32,(3,3),input_shape=(size,size,3)))
#2
model.add(Activation('relu'))
#3
model.add(MaxPooling2D(pool_size=(2,2)))

In [None]:
#1
model.add(Conv2D(32,(3,3),kernel_initializer='he_uniform'))
#2
model.add(Activation('relu'))
#3
model.add(MaxPooling2D(pool_size=(2,2)))

In [None]:
#1
model.add(Conv2D(64,(3,3),kernel_initializer='he_uniform'))
#2
model.add(Activation('relu'))
#3
model.add(MaxPooling2D(pool_size=(2,2)))

In [None]:
#The idea behind adding multiple sets is to extract different features from the input images at different scales and resolutions,
#which can improve the accuracy of the model.

# Flattening

In [None]:
#This code defines the rest of the neural network architecture in the Keras library.
#It consists of 5 additional layers: a flatten layer, two dense layers, a dropout layer, and an activation layer.

#The first line adds a flatten layer,
#which takes the multi-dimensional output from the previous layer and flattens it into a single dimension,
#so it can be fed into the dense layers.

#The second line adds a dense layer with 64 neurons, which is a fully connected layer.

#The third line adds a ReLU activation layer,
#which applies the Rectified Linear Unit activation function to the output of the dense layer.

#The fourth line adds a dropout layer with a rate of 0.5,
#which randomly drops out 50% of the neurons during each training iteration.
#This is a regularization technique used to prevent overfitting in the model.

#The fifth line adds a dense layer with 1 neuron, which will produce the final prediction.

#The sixth line adds a sigmoid activation layer,
#which applies the sigmoid activation function to the output of the dense layer.
#This is often used for binary classification problems, where the output should be a value between 0 and 1,
#indicating the probability of belonging to one class or another.

In [None]:
#'relu' is being used to introduce non-linearity in the convolutional and dense layers,
#while 'sigmoid' is being used to produce the binary classification output.

#1
model.add(Flatten())
#2
model.add(Dense(64))
#3
model.add(Activation('relu'))
#4
model.add(Dropout(0.5))
#5
model.add(Dense(1)) #why 1 becaus we have only two folders of data yes and no so 0 and 1.
#6
model.add(Activation('sigmoid'))

In [None]:
#This line of code compiles the neural network model defined in the previous steps. 
#It sets the loss function, optimization algorithm, and evaluation metrics for the model.

#loss='binary_crossentropy' sets the loss function for the model. 
#This is the common choice for binary classification problems,
#where the target values are binary (0 or 1).
#The binary cross-entropy loss measures the dissimilarity between the predicted probability distribution
#and the true distribution.

#optimizer='adam' sets the optimization algorithm used to update the model weights during training.
#Adam is a widely used optimization algorithm that adaptively changes the learning rate 
#for each weight based on the historical gradient information.

#metrics=['accuracy'] specifies the metrics to be used to evaluate the performance of the model. 
#In this case, the accuracy metric is used, which calculates the fraction of correct predictions made by the model.

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',metrics=['accuracy'])


In [None]:
#This line of code fits the compiled Keras model to the training data.
#x_train and y_train are the training data and labels, respectively.

#batch_size=16 specifies the number of samples to be used in each update of the model weights during training.
#In this case, the model will update its weights after every 16 samples.

#verbose=1 specifies the verbosity level of the training output. 
#1 means that the progress bar will be displayed during training.

#epochs=10 specifies the number of times to go through the entire training dataset during training.

#validation_data=(x_test, y_test) specifies the validation data and labels to be used for evaluating the model after each epoch.
#This helps to track the performance of the model during training and detect overfitting.

#shuffle=False specifies whether or not to shuffle the training data before each epoch.
#If False, the data will be used in the order it was passed to fit()

In [None]:
model.fit(x_train,y_train, batch_size=16,
          verbose=1, epochs=20,
          validation_data=(x_test,y_test),shuffle=False)

In [None]:
#This line of code saves the trained machine learning model as an .h5 file. .h5
#is a file format for storing large amounts of numerical data. 
#The h5 in BrainTumor.h5 refers to the Hierarchical Data Format version 5,
#which is a popular file format for storing large datasets in machine learning applications.

In [None]:
model.save('BrainTumorDetection.h5')

In [None]:
#sources
#https://www.nature.com/articles/s41598-022-05572-6
#https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9468505/
#https://www.youtube.com/watch?v=7MceDfpnP8k&t=22s
#https://keras.io/api/layers/convolution_layers/convolution2d/
#https://keras.io/api/layers/pooling_layers/max_pooling2d/
#https://keras.io/api/layers/core_layers/dense/
#https://towardsdatascience.com/everything-you-need-to-know-about-activation-functions-in-deep-learning-models-84ba9f82c253
#https://www.superdatascience.com/blogs/convolutional-neural-networks-cnn-step-3-flattening
#https://machinelearningmastery.com/using-activation-functions-in-deep-learning-models/