# Intel & MobileODT Cervical Cancer Screening
* [Intel & MobileODT Cervical Cancer Screening | Kaggle](https://www.kaggle.com/c/intel-mobileodt-cervical-cancer-screening/data)

In [2]:
import numpy as np
import pandas as pd

## Data Resized

In [1]:
path="/input/"
resized = "/input/"
#path = "/home/tsu-nera/repo/kaggle/input/intel-mobileodt-cervical-cancer-screening/"
#resized = path+"resized/"

width = 100
height = 100

In [3]:
import os
os.makedirs(path+"resized", exist_ok=True)

for filetype in ["train/", "valid/"]:
    for category in ["Type_1", "Type_2", "Type_3"]:
        os.makedirs(path+"resized/"+filetype+category,  exist_ok=True)
os.makedirs(path+"resized/test/unknown",  exist_ok=True)

In [5]:
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
def resize_img(src, dst, width, height):
    img = Image.open(src, 'r')
    resized = img.resize((width, height))
    resized.save(dst, 'JPEG', optimize=True)

In [10]:
%cd $path

/home/tsu-nera/repo/kaggle/input/intel-mobileodt-cervical-cancer-screening


In [11]:
import glob
categories = ['Type_1/', 'Type_2/', 'Type_3/']
filetypes = ["train/"]

for filetype in filetypes:
    for category in categories:
            globpath = filetype+category+"*.jpg"
            files = glob.glob(globpath)
            for file in files:
                src = path+file
                dst = path+"resized/"+file
                resize_img(src, dst, width, height)

In [None]:
%cd $path

In [None]:
globpath = "test/unknown/*.jpg"
files = glob.glob(globpath)
for file in files:
    src = path+file
    dst = path+"resized/"+file
    resize_img(src, dst, width, height)

##  Create train/valid data

In [4]:
path = "/home/tsu-nera/repo/kaggle/input/intel-mobileodt-cervical-cancer-screening/"

In [5]:
%cd $path

/home/tsu-nera/repo/kaggle/input/intel-mobileodt-cervical-cancer-screening


In [6]:
%mkdir -p valid/Type_1
%mkdir -p valid/Type_2
%mkdir -p valid/Type_3

In [16]:
%cd $path/resized/train

/home/tsu-nera/repo/kaggle/input/intel-mobileodt-cervical-cancer-screening/resized/valid


In [17]:
import glob
import shutil
g = glob.glob('*/*.jpg')
shuf = np.random.permutation(g)
for i in range(300): 
    shutil.move(shuf[i], '../valid/' + shuf[i])

## Load Data

In [3]:
import keras
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16

Using TensorFlow backend.


In [4]:
batch_size = 64

In [5]:
gen_t = image.ImageDataGenerator(rescale=1. / 255,
                 rotation_range=15, height_shift_range=0.05, 
                 shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
gen = image.ImageDataGenerator(rescale=1. / 255)

In [7]:
batches = gen_t.flow_from_directory(path+'train', batch_size=batch_size, target_size=(width,height))
val_batches = gen.flow_from_directory(path+'valid', batch_size=batch_size, target_size=(width, height))
test_batches = gen.flow_from_directory(path+'test', batch_size=batch_size, target_size=(width, height))

Found 1181 images belonging to 3 classes.
Found 300 images belonging to 3 classes.
Found 512 images belonging to 1 classes.


## Build CNN

In [8]:
import keras
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Dropout, BatchNormalization, Activation
from keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D
from keras.optimizers import Adam, Nadam
from keras.applications.vgg16 import VGG16
from keras.layers.advanced_activations import PReLU

In [34]:
 def get_model():
    model = Sequential()
    model.add(BatchNormalization(axis=1,input_shape=(width,height,3)))
    model.add(Conv2D(filters=32,kernel_size=(3,3)))
    model.add(PReLU())
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=64,kernel_size=(3,3)))
    model.add(PReLU())
    model.add(BatchNormalization(axis=1))
    model.add(MaxPool2D())
    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    return model

In [35]:
model = get_model()

In [36]:
model.compile(Nadam(), loss='categorical_crossentropy', metrics=['accuracy'])

大量のWarningがでてkernel が死ぬときの解決法
* [Intel & MobileODT Cervical Cancer Screening | Kaggle](https://www.kaggle.com/c/intel-mobileodt-cervical-cancer-screening/discussion/31558)

これでも死んだ。画像サイズがおおきいのかな？？ リサイズしてみる。

In [14]:
from keras.callbacks import EarlyStopping
escb = EarlyStopping(monitor="loss", patience=1)

In [39]:
model.fit_generator(batches, batches.n//batch_size, epochs=100,
                    validation_data=val_batches, validation_steps=val_batches.n//batch_size, callbacks=[escb])

Epoch 1/100


KeyboardInterrupt: 

### Model2

In [9]:
 def get_model2():
    model = Sequential()
    model.add(BatchNormalization(axis=1,input_shape=(width,height,3)))
    model.add(Conv2D(filters=32,kernel_size=(3,3), padding="same"))
    model.add(PReLU())
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=64,kernel_size=(3,3), padding="same"))
    model.add(PReLU())
    model.add(BatchNormalization(axis=1))
    model.add(MaxPool2D())
    model.add(Conv2D(3,(3,3), padding='same'))
    model.add(Dropout(0.5))
    model.add(GlobalAveragePooling2D())
    model.add(Activation('softmax'))
    return model

In [18]:
model2 = get_model2()

In [19]:
model2.compile(Nadam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
model2.fit_generator(batches, batches.n//batch_size, epochs=30,
                    validation_data=val_batches, validation_steps=val_batches.n//batch_size, callbacks=[escb])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30


<keras.callbacks.History at 0x7f14654cc9b0>

## Predict

In [21]:
preds = model2.predict_generator(test_batches, test_batches.n//batch_size)

## Submit

In [22]:
def do_clip(arr, mx): return np.clip(arr, (1-mx)/2, mx)

In [23]:
subm = do_clip(preds,0.93)

In [24]:
test_filenames = [a[8:] for a in test_batches.filenames]

In [25]:
classes = sorted(batches.class_indices, key=batches.class_indices.get)
submission = pd.DataFrame(subm, columns=classes)
submission.insert(0, 'image_name', test_filenames)
submission.head()

Unnamed: 0,image_name,Type_1,Type_2,Type_3
0,10.jpg,0.117069,0.444912,0.438019
1,377.jpg,0.13971,0.475033,0.385257
2,296.jpg,0.155455,0.437143,0.407402
3,458.jpg,0.1705,0.488972,0.340528
4,89.jpg,0.114029,0.434496,0.451474


In [None]:
%cd '/home/tsu-nera/repo/kaggle/intel-mobileodt-cervical-cancer-screening'

In [26]:
from IPython.display import FileLink
submission.to_csv('submission.csv', index=False)
FileLink('submission.csv')