# Breast Cancer Detection using Machine Learning

##  enviroment and global variables 

In [1]:
import os
import glob
import numpy as np
import pandas as pd

from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn.utils.class_weight import compute_class_weight


import matplotlib.pyplot as plt
import cv2


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam



In [2]:
TRAIN_CSV = "./data/train/feats.csv"
TRAIN_IMG_DIR = "./data/train/images"

TEST_CSV = "./data/test/feats.csv"
TEST_IMG_DIR = "./data/test/images"

CLEAN_DATA_FILE = "./data/train/cleaned_data.csv"

MODEL_PATH = "./model"
RESULT_FILE = "./result/submission.csv"

IMG_HEIGHT = 320
IMG_WIDTH = 320

## 1. data anaylysis

In [3]:
train_df = pd.read_csv(TRAIN_CSV)
train_df.head()

Unnamed: 0,id,age,HER2,P53,molecular_subtype
0,8a1af6f74cafc720511888998f2f361767c77965a07464...,48,3,True,3
1,b18adad8c39bb458f208581fe40e9be7b04f2b49be04b0...,54,3,False,2
2,8b9af020d0e621168fd0f3913f2cd74e8bb5ea85f61ee0...,55,2,True,3
3,1a0e54f07794fbb95a0fd7f0b4ef6f79ef1a9c83dbaa05...,66,0,False,4
4,2805706a656b013743d84357d5ffa44536e19d18f0509f...,31,0,False,1


In [4]:
print(train_df.shape)
train_df.groupby('molecular_subtype')['id'].nunique()

(200, 5)


molecular_subtype
1    58
2    98
3    33
4    11
Name: id, dtype: int64

In [5]:
test_df = pd.read_csv(TEST_CSV)
test_df.head()

Unnamed: 0,id,age,HER2,P53
0,d6b47f0c2ccbbd7923e37dc434ab25445df6bc060d5338...,57,0,False
1,2e9d400ee8450c30d19c43a20e9da770774d3169da4574...,36,0,False
2,72f92da6ca6d521221baee367b5c76fcbfe61edc00c0bc...,81,0,False
3,ca7e160144dd6f459e2b18c48a33d4a53d09de5d7c2557...,82,0,False
4,d65889918d2b9073e7601c2ba67c3a9d26f60612f14e93...,59,2,False


## data preprocessing

### each id may have more than one image. Create 1 to 1 mapping between image path and label. 

In [6]:
f = open(CLEAN_DATA_FILE, "w")
for idx, row in train_df.iterrows():
    image_path_list = glob.glob(os.path.join(TRAIN_IMG_DIR, row["id"], "*.*"))
    for image_path in image_path_list:
        f.write(f'{image_path}, {row["molecular_subtype"]}\n')

f.close()
print(f"cleaned mapping stored in {CLEAN_DATA_FILE}.")

cleaned mapping stored in ./data/train/cleaned_data.csv.


In [7]:
print(dir(tf.keras.applications))
'''
base_model = tf.keras.applications.ResNet50(input_shape=(480, 480, 3),
                                               include_top=False,
                                               weights='imagenet')
                                               '''

['DenseNet121', 'DenseNet169', 'DenseNet201', 'InceptionResNetV2', 'InceptionV3', 'MobileNet', 'MobileNetV2', 'NASNetLarge', 'NASNetMobile', 'ResNet101', 'ResNet101V2', 'ResNet152', 'ResNet152V2', 'ResNet50', 'ResNet50V2', 'VGG16', 'VGG19', 'Xception', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_sys', 'densenet', 'imagenet_utils', 'inception_resnet_v2', 'inception_v3', 'mobilenet', 'mobilenet_v2', 'nasnet', 'resnet', 'resnet50', 'resnet_v2', 'vgg16', 'vgg19', 'xception']


"\nbase_model = tf.keras.applications.ResNet50(input_shape=(480, 480, 3),\n                                               include_top=False,\n                                               weights='imagenet')\n                                               "

### process training image path and convert labels to one-hot encoding

In [8]:
train_image_path = []
train_raw_label = []
with open(CLEAN_DATA_FILE, "r") as f:
    lines = f.readlines()
    for line in lines:
        image_path, label = line.split(",")
        label = label.strip() 
        train_image_path.append(image_path)
        train_raw_label.append(label)

        

#print(list(train_raw_label))
from collections import Counter
c = Counter(list(train_raw_label))
print(c)        

train_image_path = np.asarray(train_image_path)
train_raw_label = np.asarray(train_raw_label).reshape(-1, 1)
train_image_path, train_raw_label = shuffle(train_image_path, train_raw_label)

encoder = OneHotEncoder(sparse=False)
train_onehot_label = encoder.fit_transform(train_raw_label)

#[print(i, j, k) for i, j, k in zip(train_image_path, train_onehot_label, train_raw_label)]

print(f"{train_image_path.shape}, {train_onehot_label.shape}")




Counter({'2': 472, '1': 267, '3': 136, '4': 50})
(925,), (925, 4)


### image preprocessing function

In [9]:
def preprocess_image(image_path):
    
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH))
    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    
    image = cv2.normalize(image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    
    image = np.asarray(image)
    print(image_path, np.max(image), np.min(image), np.mean(image))
    #image = image /255.0
    #image -= 0.5
    #print(image_path, np.max(image), np.min(image), np.mean(image))
    
    #print(image.shape)
    return image

train_X = []
for image_path in train_image_path:
    train_X.append(preprocess_image(image_path))

train_X = np.asarray(train_X)
#train_X = np.expand_dims(train_X)


train_Y = train_onehot_label

print(train_X.shape, train_Y.shape)

#[print(j) for i, j in zip(train_X, train_Y)]


./data/train/images\0101c6663374a5642ddd7797c30be68580aca958778b401c6cd5025c2e521e0c\4.jpg 1.0 0.0 0.19692859
./data/train/images\acdfb816ad14915969eb89e35e2f743ad3147bbb7201f298966f7ce9bb239d1f\2.jpg 1.0 0.0 0.16586532
./data/train/images\3f6ea1c2520b77bfcb9feeeae296c877dd99c4cddd3a1ec47a6cfed1f00faecb\7.jpg 1.0 0.0 0.17125367
./data/train/images\c384353571e553aee109fefca0193c9857eeefa33b3eb2677bee2103f3b47a91\3.tif 1.0 0.0 0.20171218
./data/train/images\c97db71c13153820e3215a7f83fe220de808fa5138fe9174b9f75cafaf52e510\2.jpg 1.0 0.0 0.13946952
./data/train/images\d3e2eea0af3b1f27ba8d12a6b3fb84b50a0abdae07e402de0f42e398e4fec68b\2.tif 1.0 0.0 0.14553109
./data/train/images\b4d96c1b6f43d9ed9a74ae48099c98f3b487c9f2d1e817aae2b19479a952ea04\3.tif 1.0 0.0 0.10003671
./data/train/images\747343a12696021f350bf278f268e94d4791226fae87f074d2ee5b543d8b28bd\3.jpg 1.0 0.0 0.1846972
./data/train/images\835abcb983db5de376a04fd48d75fdbfc7cac5092eb3af59b0396fb693bcd589\2.tif 1.0 0.0 0.24320556
./data/trai

./data/train/images\a346f1a28846093e80f3afd5ee06bd81fe7ed42f30ef95762b762dfa6a019fda\2.jpg 1.0 0.0 0.19745904
./data/train/images\f05c15c05b722e61362f4a82b0c826e6ba520402ee3b0da000a2e8dbb862a86b\3.tif 1.0 0.0 0.16222398
./data/train/images\832182c229f0808a5622e46c0b379611505a5b21e9e631c1e3c7791830f8eab7\3.jpg 1.0 0.0 0.17007743
./data/train/images\d189aebb7f9b0f0fe372eb49da3c6c0d10626de431c5e34ac955dd0ed96e803a\4.jpg 1.0 0.0 0.15013304
./data/train/images\5ad5ff3bc7a572d13c2d0c991e739df69e849011bfcfd3dffcd2e39fbed1bc4e\2.jpg 1.0 0.0 0.16972071
./data/train/images\5ad5ff3bc7a572d13c2d0c991e739df69e849011bfcfd3dffcd2e39fbed1bc4e\4.jpg 1.0 0.0 0.15770246
./data/train/images\546bea8fc6370d214f6d661584552efd03a47b2bf62dafacb47ee98782149ccc\4.jpg 1.0 0.0 0.16703935
./data/train/images\78e14508621be63fe1a6469df276a1f0a9dacb816c9c90d5f7f3a458b3873aab\3.jpg 1.0 0.0 0.3366573
./data/train/images\783d964941972fa430f8ed5e1bc6d9115753c7f7af9094046e1caa933fd578a8\4.jpg 1.0 0.0 0.19768678
./data/trai

./data/train/images\e04692446cec243e343739144de86d582eeb5eb77f1c3c7e5083636dad936cd9\4.jpg 1.0 0.0 0.11293132
./data/train/images\9e89f205900e42aa3607d662938e457b928e8775153a04191a2d7cf7e7247874\3.jpg 1.0 0.0 0.11614686
./data/train/images\8251d60fe2b0268685f2caa29e39fc67e91c44216552acbe263bb4e5ce8974df\4.jpg 1.0 0.0 0.12682536
./data/train/images\c34cae1e6affb27847f7d5d9c8ab807a59960fade1e577b3c5c93301c09973bc\2.jpg 1.0 0.0 0.15096934
./data/train/images\8a2e7f29b613b9afbfd09b86c5435879c22c4400f59cf3d08ebcd1144b271d80\6.jpg 1.0 0.0 0.12561256
./data/train/images\6253d0b0942da897da924939b13a5263acb9e72064553d98c41a151664b4ef06\3.jpg 1.0 0.0 0.22958006
./data/train/images\7d83911c950cddd5fbadfaf20ab2c230b4f5ecb059024e34b2a006cbce5a803e\3.jpg 1.0 0.0 0.15776303
./data/train/images\b53d1b015707ea4f9614c41fb8c73eb2c7a849aabceb1e98eedf57e5d5218b28\6.jpg 1.0 0.0 0.2002619
./data/train/images\25cdd0028d691dfb5e83d856cd65a515a697847c68452e9b421bad12d7e4abb0\1.tif 1.0 0.0 0.14579464
./data/trai

./data/train/images\4a70f5de3e74d8571ad934db5ec244f7112fdca9d05c63e90193b09cd1b7fe92\8.jpg 1.0 0.0 0.16270213
./data/train/images\9c4f89478fc10721f79a12ad507b2f37ede17a15a192db76957747510e6d82cd\1.jpg 1.0 0.0 0.17084776
./data/train/images\40eb5fd9e50f063b259bf2ea356c1f116fdda296e2f0162c8b62ae3d1eb7e094\2.jpg 1.0 0.0 0.21135655
./data/train/images\2bf2ccaf14d32628af62a20b9bd8c89a098c17569159685a32ef9d47e8dc13d6\2.tif 1.0 0.0 0.2467443
./data/train/images\ce54ebdd56a7044b4c9606e590e76a11afc7c3780369c5cdc0eccfe6fdc7f30b\3.tif 1.0 0.0 0.2412199
./data/train/images\bc2b0210855ef4718b38bc2c0ddde355d65b964c92455b640e27786ac1182251\5.jpg 1.0 0.0 0.16336477
./data/train/images\eb11bf800ee8630c03ed4011fd3378ee4698e122f8e994fc5ce554c02cbe5e12\1.tif 1.0 0.0 0.25362384
./data/train/images\c5eb37b91650a7c345ceaee49365de1abeeff914cf00a7f1a2d22fe41f633c1a\2.jpg 1.0 0.0 0.10502554
./data/train/images\d44ba69633aaa5b5f76f584f6c5453792f46342bc87cb17148f82bf27d2fab8d\6.jpg 1.0 0.0 0.14335035
./data/train

./data/train/images\b18adad8c39bb458f208581fe40e9be7b04f2b49be04b0d6026eadcf7bdd56ab\2.bmp 1.0 0.0 0.21539433
./data/train/images\9de2666883bb2f435caa27032ee3cf60f07d4b40e36c28fc72d15ef27ecdf34f\5.jpg 1.0 0.0 0.17648225
./data/train/images\40336bff63056434fdc360041df017407dee8bc96cb3a713511b427face9475e\2.jpg 1.0 0.0 0.1451082
./data/train/images\d189aebb7f9b0f0fe372eb49da3c6c0d10626de431c5e34ac955dd0ed96e803a\6.jpg 1.0 0.0 0.20093198
./data/train/images\99be9d73dc97a49c57793e1d84d9e7a6a93b52db37cb74b205638c48273f68ea\1.tif 1.0 0.0 0.19072583
./data/train/images\cd6b43d4120bf0b6bdd260a66a11b4527bc0c7f9cce03f8bf9b4f361e7cac5a0\1.jpg 1.0 0.0 0.13845004
./data/train/images\832182c229f0808a5622e46c0b379611505a5b21e9e631c1e3c7791830f8eab7\1.jpg 1.0 0.0 0.15444815
./data/train/images\c7d7c8371d50a7155148127e6012ec5a8aea5eebd6fa62aac88333593214d25f\1.jpg 1.0 0.0 0.27268562
./data/train/images\b53d1b015707ea4f9614c41fb8c73eb2c7a849aabceb1e98eedf57e5d5218b28\9.jpg 1.0 0.0 0.2297558
./data/train

./data/train/images\7b86619747f20386e7a4323dc2f837b8807cc36ae12f17203a8ea3e3309819aa\2.jpg 1.0 0.0 0.15903908
./data/train/images\9fb96a27d99d7844d433f133963a1e1c67b1bfa28dfee0f2a1f42251c6da7bc7\4.jpg 1.0 0.0 0.19509341
./data/train/images\45c5135c374d495b120b1ab9b8698e594a0183254e7a87fb1cf0631ea9a8c48e\1.jpg 1.0 0.0 0.14672284
./data/train/images\bc2b0210855ef4718b38bc2c0ddde355d65b964c92455b640e27786ac1182251\4.jpg 1.0 0.0 0.16679294
./data/train/images\48323b36713034fb77e610f2eb5db9cd80397cd9cecd600abc2a26fdb0bf96e2\2.jpg 1.0 0.0 0.23818077
./data/train/images\00b0847ed013c1c8a20e36bd45e7cbf825b5de366ee69cc9a289442919e583e5\4.jpg 1.0 0.0 0.16155118
./data/train/images\f56950ad7781f0e4d02e4762a722e45187d180f894f6fbc95adcd825931bab00\1.tif 1.0 0.0 0.24451062
./data/train/images\ec4ba427153fa2832dd93c51cc49db02e19a47ff9107a60e07c2021fbc1b0690\3.jpg 1.0 0.0 0.18827601
./data/train/images\fb5ecbb278818b01c8c462a0c2be5828f987160a62e69742aa04d826270ff01e\6.jpg 1.0 0.0 0.19425963
./data/tra

./data/train/images\e2a593ce4c229c3e6444d58dee0e93e565a13a950a2878fa7ce50b274c7e65ee\2.jpg 1.0 0.0 0.19132099
./data/train/images\1879f41d4297fbb95c3c8795afc7720281f97f629c379990c8160f1a89910da7\3.bmp 1.0 0.0 0.28096053
./data/train/images\4a70f5de3e74d8571ad934db5ec244f7112fdca9d05c63e90193b09cd1b7fe92\3.jpg 1.0 0.0 0.17992625
./data/train/images\82853af8b51a9b6ccdd49a7d448c847f67aac2e7a49df773082721ba6c297d68\1.tif 1.0 0.0 0.19779187
./data/train/images\48323b36713034fb77e610f2eb5db9cd80397cd9cecd600abc2a26fdb0bf96e2\1.jpg 1.0 0.0 0.23874635
./data/train/images\c97db71c13153820e3215a7f83fe220de808fa5138fe9174b9f75cafaf52e510\4.jpg 1.0 0.0 0.119236894
./data/train/images\3e91fd58c6585357b7b99f2f831a130184934737a466ecaef7cf868921539112\16.jpg 1.0 0.0 0.2408444
./data/train/images\8251d60fe2b0268685f2caa29e39fc67e91c44216552acbe263bb4e5ce8974df\7.jpg 1.0 0.0 0.14421439
./data/train/images\082458729e0838bd900bb19017c8b4fbf2c8212a9d7a732a7694ed9306ba8f95\5.jpg 1.0 0.0 0.1543221
./data/tra

./data/train/images\482f9151be61bd9c80567890c5af187bb008871e8c66c3f9d687b6737c181fff\2.jpg 1.0 0.0 0.16997895
./data/train/images\5dd72d377864c1072af351c4d5b733ee87c70b2aff62841eb9c2d1e8fe9e234f\4.jpg 1.0 0.0 0.17342277
./data/train/images\3e91fd58c6585357b7b99f2f831a130184934737a466ecaef7cf868921539112\9.jpg 1.0 0.0 0.12236106
./data/train/images\f56950ad7781f0e4d02e4762a722e45187d180f894f6fbc95adcd825931bab00\2.tif 1.0 0.0 0.22987945
./data/train/images\2248c7d671a997a7436f2f479584876ad062d36c0f2c3fce1e1e1d2de7669386\2.tif 1.0 0.0 0.20352846
./data/train/images\45c5135c374d495b120b1ab9b8698e594a0183254e7a87fb1cf0631ea9a8c48e\2.jpg 1.0 0.0 0.12724943
./data/train/images\9e89f205900e42aa3607d662938e457b928e8775153a04191a2d7cf7e7247874\4.jpg 1.0 0.0 0.114026986
./data/train/images\5c107c7aac2fa546515bc4dc1e416268c5f122c542e2780b486eb5282ce4afcd\1.jpg 1.0 0.0 0.1551065
./data/train/images\082458729e0838bd900bb19017c8b4fbf2c8212a9d7a732a7694ed9306ba8f95\6.jpg 1.0 0.0 0.14483882
./data/tra

./data/train/images\89a5395ef5a21529ac3c25e30362e6d1c72044b945fadb74524a58dab41b22bf\2.jpg 1.0 0.0 0.11017789
./data/train/images\3b6382fe180d139fda38dcda9dc2c33e9208cd89ad9bed99d24cb76b05913333\8.jpg 1.0 0.0 0.1613527
./data/train/images\8902efd513d3278e2b838e6367dc4d4e476185e5b010cb4e3c5590bd1af21712\3.jpg 1.0 0.0 0.17550895
./data/train/images\0c35125d7face4cbd7bfd3377217e91274f64c23790def761fe505b6ee795290\4.jpg 1.0 0.0 0.11671523
./data/train/images\bc2b0210855ef4718b38bc2c0ddde355d65b964c92455b640e27786ac1182251\1.jpg 1.0 0.0 0.16447002
./data/train/images\eb4deb1cb6bc8ce33ee6a5921bb1f4db49591e1be759acab170ad0855fae3098\1.jpg 1.0 0.0 0.17071714
./data/train/images\7ab881c597928577683865c0907742b3f42002323e7c020daa02f4f18cd9c11c\4.jpg 1.0 0.0 0.14999403
./data/train/images\c214b99cc67fb493f3f0e9e4b9403b04c60f60a6bcbd6de1c9809d3598ca0071\2.jpg 1.0 0.0 0.17134285
./data/train/images\1784cdf62ff0da426d2a400f9c3491b3f83574bfe5abd8efcf4b892e406421b1\2.tif 1.0 0.0 0.21314149
./data/trai

./data/train/images\26ea1fb53a8b40ab39e00c4826641c2b5e45385b38643eddf8252616c04038c9\4.bmp 1.0 0.0 0.18333562
./data/train/images\fb5ecbb278818b01c8c462a0c2be5828f987160a62e69742aa04d826270ff01e\9.jpg 1.0 0.0 0.19389862
./data/train/images\35d9e0aeb64bcbb2adce3bbac237d58898d011cfa1a5bbb7a52e37638b5e49f9\11.jpg 1.0 0.0 0.19181532
./data/train/images\86b100327b860642dc3abe7e2901f7cf8301292ccd63a81c7f90455a2f2417b4\4.jpg 1.0 0.0 0.15788847
./data/train/images\1b77b71ab08bf9312265083930af9ddaf354447dffdde0dfc323cc7ee652513d\4.jpg 1.0 0.0 0.22160217
./data/train/images\e645cea7e27d01e9f23d3c6f7ee47781022a2c877d54b7da06a68fa58e11174a\1.jpg 1.0 0.0 0.22070733
./data/train/images\3e91fd58c6585357b7b99f2f831a130184934737a466ecaef7cf868921539112\19.jpg 1.0 0.0 0.20891485
./data/train/images\7f890cfbea3e5e9804c0e227314f5deda75ab042bc23d467ec2a86b53ca4f729\13.jpg 1.0 0.0 0.21732949
./data/train/images\6e7fd33e3c26c49512d80fc518a7f12656844d485ee3e6533a3f2de93d6c1163\3.tif 1.0 0.0 0.32907778
./data/

./data/train/images\45d72c14ca38234bbb1e0529cf9b5491f4d733f8b6eb01420859afca25d8d569\1.tif 1.0 0.0 0.1986575
./data/train/images\c1fe26c5187af842f6dc56f342f0526a5b0d9db32eeb596e17cc1a773f5e3380\7.jpg 1.0 0.0 0.18431416
./data/train/images\89890e2d1de1766dcb1ecf3e0c8c9d4b47666eab3a26dbcc388b4d9ab313bc56\1.tif 1.0 0.0 0.16965201
./data/train/images\1226c2653d4dd7eded0ac1d9f05f227f26d454bafe28d4376ba380c504ca8253\3.tif 1.0 0.0 0.22901535
./data/train/images\1b24063c7a165f27acbb8357a16b6cc25958f602c86986c6a1fa21c82684bc63\2.jpg 1.0 0.0 0.15524074
./data/train/images\c214b99cc67fb493f3f0e9e4b9403b04c60f60a6bcbd6de1c9809d3598ca0071\1.jpg 1.0 0.0 0.18075489
./data/train/images\93f491d79fb8c8d5670b5f8dfd6962f4cb0d8ad900241f07ee705f7ced7e906c\1.jpg 1.0 0.0 0.2589087
./data/train/images\c1fe26c5187af842f6dc56f342f0526a5b0d9db32eeb596e17cc1a773f5e3380\8.jpg 1.0 0.0 0.1820785
./data/train/images\1226c2653d4dd7eded0ac1d9f05f227f26d454bafe28d4376ba380c504ca8253\1.tif 1.0 0.0 0.22437772
./data/train/

./data/train/images\08724b98bd6d9cf9789107a33c37b1a6edfd61e4b01f51e7ea905a5b54e88e62\2.tif 1.0 0.0 0.09351463
./data/train/images\3b6382fe180d139fda38dcda9dc2c33e9208cd89ad9bed99d24cb76b05913333\4.jpg 1.0 0.0 0.16326062
./data/train/images\9e89f205900e42aa3607d662938e457b928e8775153a04191a2d7cf7e7247874\7.jpg 1.0 0.0 0.14996997
./data/train/images\d3433b4fb9260044ed0ef85f148a5e07e787a11d63b67b55062deb22c218858d\1.jpg 1.0 0.0 0.16833223
./data/train/images\ba2b173d0545c60ebdfeae6213f0747cbc60c963c315571b1bee3b264b44e420\1.tif 1.0 0.0 0.26612905
./data/train/images\8dac9abf3937bc05d5c35f305a9a6cb0614a6f01ce6cc0adb28e0271ead54cd5\1.jpg 1.0 0.0 0.1533103
(925, 320, 320, 3) (925, 4)


## build machine learning model 

In [10]:
def build_model():
    model = Sequential()
    model.add(Conv2D(16, 3, padding='valid', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)))
    model.add(MaxPooling2D())

    model.add(Conv2D(32, 3, padding='valid', activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(64, 3, padding='valid', activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, 3, padding='valid', activation='relu'))
    model.add(MaxPooling2D())

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(4, activation='softmax'))
    
    
    model.summary()
    return model



In [11]:
def build_transfer_learning_model(base_model):
    
    base_model.trainable = False
    
    model = Sequential()
    model.add(base_model)
    model.add(Flatten())
    #model.add(Dense(256, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dense(128, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dense(64, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dense(16, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dense(4, activation='softmax'))
    
    model.summary()
    return model

In [12]:
datagen = ImageDataGenerator(
    featurewise_center=False,
    featurewise_std_normalization=False,
    rotation_range=0,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen.fit(train_X)


In [13]:
print(train_X[100])

[[[0.10588236 0.10588236 0.10588236]
  [0.10588236 0.10588236 0.10588236]
  [0.10588236 0.10588236 0.10588236]
  ...
  [0.10588236 0.10588236 0.10588236]
  [0.10588236 0.10588236 0.10588236]
  [0.10588236 0.10588236 0.10588236]]

 [[0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]
  ...
  [0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]]

 [[0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]
  ...
  [0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]
  [0.00784314 0.00784314 0.00784314]]

 ...

 [[0.00784314 0.00784314 0.00784314]
  [0.01176471 0.01176471 0.01176471]
  [0.         0.         0.        ]
  ...
  [0.         0.         0.        ]
  [0.         0.         0.        ]
  [0.         0.         0.        ]]

 [[0.         0.         0.        ]
  [0.         0.         0.        ]


### simple CNN

In [None]:
model =  build_model() #build_transfer_learning_model() # #

optimizer = Adam(
    learning_rate=0.001, epsilon=1e-07, amsgrad=False,)

model.compile(optimizer=optimizer,
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


y_integers = np.argmax(train_Y, axis=1)
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
d_class_weights = dict(enumerate(class_weights))

print("class_weight: ", d_class_weights)


history = model.fit(datagen.flow(train_X, train_Y, batch_size = 32),  epochs=50, shuffle = True, class_weight = d_class_weights)
                   

model.save(os.path.join(MODEL_PATH, "saved_model.h5"))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 318, 318, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 159, 159, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 157, 157, 32)      4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 78, 78, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 76, 76, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 38, 38, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 36, 36, 128)       7

### transfer Learning

In [None]:
base_model = tf.keras.applications.densenet(input_shape=(IMG_HEIGHT, IMG_WIDTH ,3), include_top=False, weights='imagenet')

model = build_transfer_learning_model(base_model) # #build_model() #

optimizer = Adam(
    learning_rate=0.001, epsilon=1e-07, amsgrad=False,)

model.compile(optimizer=optimizer,
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


y_integers = np.argmax(train_Y, axis=1)
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
d_class_weights = dict(enumerate(class_weights))

print("class_weight: ", d_class_weights)


history = model.fit(datagen.flow(train_X, train_Y, batch_size = 32),  epochs=50, shuffle = True, class_weight = d_class_weights)
                   

model.save(os.path.join(MODEL_PATH, "saved_model.h5"))

In [None]:
def predict(model, image_path):
    image = preprocess_image(image_path)
    prediction =  model.predict(np.asarray([image]))
    print(prediction)
    result = np.argmax(prediction) + 1
    return result




## create final result

In [None]:
def create_result(model, csv_path, image_dir):
    test_df = pd.read_csv(csv_path)
    test_image_id = list(test_df["id"])
    #print(test_image_id)
    f = open(RESULT_FILE, "w")
    for idx in test_image_id:
        test_image_file_path = glob.glob(os.path.join(image_dir, idx, "*.*"))
        #print(test_image_file_path)
        result = []
        
        for p in test_image_file_path:
            result.append(predict(model, p))
        
        #print(result)
        result = np.asarray(result)
        if result.shape[0] != 0:
            counts = np.bincount(result)
            final = np.argmax(counts)
            print(f"{idx}, {result}, {final}")
        else:
            print(f"{idx} NO result")
            final = 1
        
        f.write(f"{idx},{final}\n")
    
    f.close()
    print("done")

    
create_result(model, TEST_CSV, TEST_IMG_DIR)
    