# Breast Cancer Recurrence Prediciton 

## Imports

In [2]:
from fastai import *
from fastai.tabular import *

import numpy as np
import pandas as pd
import math
import random

import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
plt.style.use('fivethirtyeight')

## The Dataset

In [3]:
cancer = pd.read_csv('breastcancer.csv')

In [4]:
np.random.seed(5)
cancer = cancer.sample(frac=1).reset_index(drop=True)
#print(cancer.head(1))
#print(cancer.columns)
cancer.shape

(286, 10)

In [5]:
np.count_nonzero(cancer.isnull().values == True)

9

In [6]:
path = Path('breastcancer.csv')
dep_var = 'Class'
cat_names = ['age', 'menopause', 'tumor-size', 'inv-nodes', 'node-caps', 'deg-malig', 'breast','breast-quad','irradiat']
cont_names = []
procs = [FillMissing, Categorify, Normalize]

In [7]:
data = (TabularList.from_df(df = cancer, cat_names=cat_names, procs=procs)
.split_by_idx(cancer.tail(86).index)
.label_from_df(cols=dep_var)
.databunch())


#valid_idx = cancer.tail(86).index
#emb_szs={'age': 10, 'menopause':10, 'tumor-size':10, 'inv-nodes':10, 'node-caps':10, 'deg-malig':10, 'breast':10,'breast-quad':10,'irradiat':10}


## The Learner

In [8]:
learner = tabular_learner(data, layers = [1000, 250] , metrics = accuracy)

In [9]:
learner.fit_one_cycle(50, max_lr=1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.687751,0.66999,0.686047,00:01
1,0.629969,0.666752,0.686047,00:00
2,0.58419,0.662023,0.686047,00:00
3,0.552926,0.654979,0.686047,00:00
4,0.523966,0.646963,0.686047,00:00
5,0.498188,0.635587,0.686047,00:00
6,0.471275,0.624667,0.686047,00:00
7,0.441509,0.622023,0.686047,00:00
8,0.407986,0.632345,0.686047,00:00
9,0.381753,0.66459,0.686047,00:00


## The Results and Predictions

In [10]:
cancer.iloc[250,:]

age                        40-49
menopause                   ge40
tumor-size                 20-24
inv-nodes                    3-5
node-caps                     no
deg-malig                      3
breast                     right
breast-quad             left_low
irradiat                     yes
Class          recurrence-events
Name: 250, dtype: object

In [11]:
learner.predict(cancer.iloc[250,:])

(Category no-recurrence-events, tensor(0), tensor([0.9772, 0.0228]))

In [12]:
def predict(row_number):
    return learner.predict(cancer.iloc[row_number,:])

In [13]:
predict(280)

(Category no-recurrence-events, tensor(0), tensor([0.8358, 0.1642]))

In [14]:
cancer.iloc[280,:]

age                           30-39
menopause                   premeno
tumor-size                    25-29
inv-nodes                       0-2
node-caps                        no
deg-malig                         2
breast                         left
breast-quad                left_low
irradiat                         no
Class          no-recurrence-events
Name: 280, dtype: object