# Intel & MobileODT Cervical Cancer Screening
* [Intel & MobileODT Cervical Cancer Screening | Kaggle](https://www.kaggle.com/c/intel-mobileodt-cervical-cancer-screening/data)

In [1]:
import numpy as np
import pandas as pd

## Data Resized

In [2]:
#path = "/home/tsu-nera/repo/kaggle/input/intel-mobileodt-cervical-cancer-screening/"
path="/input/"
width = 100
height = 100

In [None]:
import os
os.makedirs(path+"resized", exist_ok=True)

for filetype in ["train/", "valid/"]:
    for category in ["Type_1", "Type_2", "Type_3"]:
        os.makedirs(path+"resized/"+filetype+category,  exist_ok=True)
os.makedirs(path+"resized/test/unknown",  exist_ok=True)

In [None]:
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
def resize_img(src, dst, width, height):
    img = Image.open(src, 'r')
    resized = img.resize((width, height))
    resized.save(dst, 'JPEG', optimize=True)

In [None]:
%cd $path

In [None]:
import glob
categories = ['Type_1/', 'Type_2/', 'Type_3/']
filetypes = ["train/"]

for filetype in filetypes:
    for category in categories:
            globpath = filetype+category+"*.jpg"
            files = glob.glob(globpath)
            for file in files:
                src = path+file
                dst = path+"resized/"+file
                resize_img(src, dst, width, height)

In [None]:
%cd $path

In [None]:
globpath = "test/unknown/*.jpg"
files = glob.glob(globpath)
for file in files:
    src = path+file
    dst = path+"resized/"+file
    resize_img(src, dst, width, height)

##  Create train/valid data

In [None]:
path = "/home/tsu-nera/repo/kaggle/input/intel-mobileodt-cervical-cancer-screening/"

In [None]:
%cd $path

In [None]:
%mkdir -p valid/Type_1
%mkdir -p valid/Type_2
%mkdir -p valid/Type_3

In [None]:
%cd $path
%cd resized/train

In [None]:
import glob
import shutil
g = glob.glob('*/*.jpg')
shuf = np.random.permutation(g)
for i in range(81): 
    shutil.move(shuf[i], '../valid/' + shuf[i])

## Load Data

In [3]:
import keras
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16

Using TensorFlow backend.


In [25]:
batch_size = 16

In [26]:
gen_t = image.ImageDataGenerator(
                rescale=1. / 255,
                shear_range=0.2,
                zoom_range=0.2,
                horizontal_flip=True)
gen = image.ImageDataGenerator(rescale=1. / 255)

In [27]:
batches = gen_t.flow_from_directory(path+'train', batch_size=batch_size, target_size=(width,height))
val_batches = gen.flow_from_directory(path+'valid', batch_size=batch_size, target_size=(width, height))
test_batches = gen.flow_from_directory(path+'test', batch_size=batch_size, target_size=(width, height))

Found 1181 images belonging to 3 classes.
Found 300 images belonging to 3 classes.
Found 512 images belonging to 1 classes.


## Use Vgg16

In [7]:
import keras
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Dropout, BatchNormalization, Activation
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam, Nadam
from keras.applications.vgg16 import VGG16
from keras.layers.advanced_activations import PReLU

In [8]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(width,height,3))

###  finetune
* [Fine-tuning pre-trained VGG16 not possible since `add` method is not defined for `Model` class? · Issue #4040 · fchollet/keras](https://github.com/fchollet/keras/issues/4040)

In [9]:
nf = 128
p = 0.5

In [10]:
base_model.output_shape
last = base_model.output

In [None]:
#from keras import backend as K
#K.set_image_dim_ordering('th')

x = BatchNormalization(axis=1)(last)
x = Conv2D(nf,3,3, border_mode='same')(x)
x = PReLU()(x)
x = BatchNormalization(axis=1)(x)
#x = MaxPooling2D(dim_ordering="th")(x)
x = Conv2D(nf,3,3, border_mode='same')(x)
x = PReLU()(x)
x = BatchNormalization(axis=1)(x)
#x = MaxPooling2D(dim_ordering="th")(x)
x = Conv2D(nf,3,3, border_mode='same')(x)
x = PReLU()(x)
x = BatchNormalization(axis=1)(x)
x = MaxPooling2D((1,2))(x)
x = Conv2D(3,3,3, border_mode='same')(x)
x = Dropout(p)(x)
x = GlobalAveragePooling2D()(x)
preds = Activation('softmax')(x)

model = Model(base_model.input, preds)

In [39]:
x = Flatten()(last)
x = Dense(512)(x)
x = PReLU()(x)
x = BatchNormalization(axis=1)(x)
x = Dropout(0.5)(x)
x = Dense(512)(x)
x = PReLU()(x)
x = BatchNormalization(axis=1)(x)
x = Dropout(0.5)(x)
preds = Dense(3, activation='softmax')(x)

model = Model(base_model.input, preds)

In [40]:
for layer in base_model.layers:
    layer.trainable = False

In [41]:
model.compile(Nadam(), loss='categorical_crossentropy', metrics=['accuracy'])

大量のWarningがでてkernel が死ぬときの解決法
* [Intel & MobileODT Cervical Cancer Screening | Kaggle](https://www.kaggle.com/c/intel-mobileodt-cervical-cancer-screening/discussion/31558)

これでも死んだ。画像サイズがおおきいのかな？？ リサイズしてみる。

In [31]:
from keras.callbacks import EarlyStopping
escb = EarlyStopping(monitor="loss", patience=1)

Memory Allocation Errorで実行できない。。。

In [42]:
model.fit_generator(batches, batches.n//batch_size, epochs=30,
                    validation_data=val_batches, validation_steps=val_batches.n//batch_size, callbacks=[escb])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30


<keras.callbacks.History at 0x7f0300878240>

## Predict

In [46]:
preds = model.predict_generator(test_batches, test_batches.n//batch_size)

## Submit

In [47]:
def do_clip(arr, mx): return np.clip(arr, (1-mx)/2, mx)

In [48]:
subm = do_clip(preds,0.99)

In [49]:
test_filenames = [a[8:] for a in test_batches.filenames]

In [50]:
classes = sorted(batches.class_indices, key=batches.class_indices.get)
submission = pd.DataFrame(subm, columns=classes)
submission.insert(0, 'image_name', test_filenames)
submission.head()

Unnamed: 0,image_name,Type_1,Type_2,Type_3
0,10.jpg,0.140263,0.847545,0.012192
1,377.jpg,0.005,0.028148,0.967293
2,296.jpg,0.096546,0.749115,0.154339
3,458.jpg,0.036773,0.743163,0.220064
4,89.jpg,0.290981,0.697503,0.011516


In [None]:
%cd '/home/tsu-nera/repo/kaggle/intel-mobileodt-cervical-cancer-screening'

In [51]:
from IPython.display import FileLink
submission.to_csv('submission.csv', index=False)
FileLink('submission.csv')