# Logistic Regression Model for Tumour Classification

## Import Libraries

Necessary Libraries are pandas, numpy, os, matplotlib and sklearn.

In [1]:
import pandas as pd
import numpy as np
import os
from os import path
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

## Read .csv File 
## Define Directory Name

In [3]:
csv_array = pd.read_csv('./dataset/label.csv')
#csv_array.columns
#csv_array.file_name
#csv_array.label
dir_image = "./dataset/image/"
dir_small_img = "./dataset/small_img/"

if path.exists(dir_small_img) == 'false':
    os.makedirs(dir_small_img)

## Reduce Image Size and Store In Array (Image Pre-processing)

In [5]:
rows, cols = (3000, 16384)             #rows is data set size; cols is width*length (after image size reduction. Here is 128*128)
#imgs = [[0 for i in range(cols)] for j in range(rows)]     #Initialize an array for all reduced images
imgs = np.zeros((rows,cols))

from PIL import Image                 #Import PIL for image size reduction

basewidth = 128                       #Target image width; original width is 512
img = Image.open("/Users/kuiwang/Documents/Applied ML Systems/FinalAssignment/dataset/image/IMAGE_0000.jpg")
wpercent = float(basewidth) / float(img.size[0])
hsize = int((float(img.size[1]) * float(wpercent)))    #calculated target image height

for i in range(rows):                 #Reduce every image's size and save them in 'small_img' folder
    img = Image.open(dir_image + csv_array.file_name[i])
    img = img.resize((basewidth, hsize), Image.ANTIALIAS)
    img.save(dir_small_img + csv_array.file_name[i])

for i in range(rows):                 #Read every small image and reduce it from RGB to grayscale format. Transform it to 1-d array. Store all images in 2-d array imgs. 
    img = mpimg.imread(dir_small_img + csv_array.file_name[i])[...,0]
    imgs[i] = img.flatten()
print(imgs.shape)

(3000, 16384)


## Split Train/Test Data Set

In [6]:
x_train, x_test, y_train, y_test = train_test_split(imgs, csv_array.label[0:rows], test_size=0.10, random_state=0)

## Train Logistic Regression Model with Training Data

In [7]:
logisticRegr = LogisticRegression(solver='lbfgs', max_iter=3000, n_jobs = -1)
logisticRegr.fit(x_train, y_train)

LogisticRegression(max_iter=3000, n_jobs=-1)

## Make Prediction Based On Trained Model For Test Data

In [8]:
predictions = logisticRegr.predict(x_test)

## Score the Accuracy of Model

In [9]:
score = logisticRegr.score(x_test, y_test)
print(score)

0.7833333333333333
