In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Activation, Dropout, MaxPooling2D, Flatten, GlobalMaxPooling2D, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import SGD, Adam
from keras.preprocessing.image import ImageDataGenerator

train = pd.read_json("data/iceberg/train.json")
test  = pd.read_json("data/iceberg/test.json")

Using TensorFlow backend.


In [3]:
X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
X = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis],((X_band_1+X_band_2)/2)[:, :, :, np.newaxis]], axis=-1)

In [4]:
X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
Test = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis],((X_band_1+X_band_2)/2)[:, :, :, np.newaxis]], axis=-1)

In [5]:
target = train['is_iceberg']
ID = test['id']

In [6]:
x_train, x_test, y_train, y_test = train_test_split(X,target,test_size=0.25,stratify=target,random_state=10)

In [7]:
datagen = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.,
                         height_shift_range = 0.,
                         channel_shift_range=0,
                         zoom_range = 0.2,
                         rotation_range = 10)

In [8]:
model = Sequential()

model.add(BatchNormalization(input_shape=(75,75,3)))

model.add(Conv2D(32, kernel_size=(3, 3),padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128,kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, kernel_size=(3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(GlobalMaxPooling2D())

model.add(Dense(64)) #512
model.add(Activation('relu'))


model.add(Dense(1))
model.add(Activation('sigmoid'))

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

check = ModelCheckpoint("weights.{epoch:02d}-{val_acc:.5f}.hdf5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='auto')
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=20, verbose=1, mode='max')


In [14]:
model.fit_generator(datagen.flow(x_train, y_train, batch_size=32),steps_per_epoch=len(x_train)/32,epochs=5,callbacks=[check,early],validation_data=(x_test,y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 00032: early stopping


<keras.callbacks.History at 0x7f40b8b82da0>

In [15]:
pred = model.predict_proba(Test)



In [16]:
submission = pd.DataFrame()
submission['id'] = ID
submission['is_iceberg'] = pred
submission.to_csv('submissions.csv', index=False)

#### Pseudo-labeling

In [27]:
y_pseudo = model.predict(Test)

In [30]:
y_train.shape

(1203,)

In [42]:
i_trn = 0
i_test = 0

# iterate through 800 mini-batch
num_iter = 600*2
# mini-batch size
size_trn = 48
size_test = 16
num_batch_per_epoch_trn = int(x_train.shape[0]/size_trn)
num_batch_per_epoch_test = int(x_test.shape[0]/size_test)
index_trn = np.random.permutation(num_batch_per_epoch_trn)
index_test = np.random.permutation(num_batch_per_epoch_test)
for i in range(num_iter):
    i_trn = index_trn[i%num_batch_per_epoch_trn]
    i_test = index_test[i%num_batch_per_epoch_test]
    
    comb_features = np.concatenate((x_train[(size_trn*i_trn):size_trn*(i_trn+1)],
                                   Test[(size_test*i_test):size_test*(i_test+1)]),axis=0)
    comb_labels = np.concatenate((y_train[(size_trn*i_trn):size_trn*(i_trn+1)],
                                 y_pseudo[:,0][(size_test*i_test):size_test*(i_test+1)]), axis=0)
    
    model.train_on_batch(comb_features, comb_labels)
    
    if (i+1)%num_batch_per_epoch_trn == 0:
        index_trn = np.random.permutation(num_batch_per_epoch_trn)
    if (i+1)%num_batch_per_epoch_test == 0:
        index_test = np.random.permutation(num_batch_per_epoch_test)

In [50]:
model.optimizer.lr = 0
model.fit(x_train, y_train, batch_size=32,epochs=40,callbacks=[check,early],validation_data=(x_test,y_test))

Train on 1203 samples, validate on 401 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40


Epoch 34/40
Epoch 00033: early stopping


<keras.callbacks.History at 0x7f40b81b7898>

In [51]:
pred = model.predict_proba(Test)



In [52]:
submission = pd.DataFrame()
submission['id'] = ID
submission['is_iceberg'] = pred
submission.to_csv('submissions_pseudo.csv', index=False)

##### pseduo_labelling didnot work. 
Got 0.39

In [32]:
x_train.shape

(1203, 75, 75, 3)

In [33]:
Test.shape

(8424, 75, 75, 3)

In [34]:
y_train.shape

(1203,)

In [41]:
y_pseudo[:,0].shape

(8424,)