In [2]:
import argparse
import os, sys
from utils.data_utils import *
from glob import glob
import pandas as pd
import numpy as np
import shutil
from tqdm import tqdm
%matplotlib inline  
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [12,9]

from keras.models import Input, Model, Sequential
from keras.layers import Dense
from keras.models import load_model
import keras.backend as K

np.set_printoptions(precision=5)
np.set_printoptions(suppress=True)

In [3]:
def load_jpg_data(df_csv, data_dir, label_map, img_size=None, subset_size=None):
    X = []
    Y = []

    # for f, tags in tqdm(df_train.sample(subset_size).values, miniters=1000):
    if subset_size is not None:
        data_progress = tqdm(df_csv.sample(subset_size).values, miniters=1000)
    else:
        data_progress = tqdm(df_csv.values, miniters=1000)

    for f, tags in data_progress:
        f_name = '{}.jpg'.format(f)
        img = cv2.imread(os.path.join(data_dir, f_name))
        targets = np.zeros(17)
        for t in tags.split(' '):
            targets[label_map[t]] = 1
        if img_size is not None:
            X.append(cv2.resize(img, (img_size, img_size)))
        else:
            X.append(img)
        Y.append(targets)
    print("Creating numpy array for data...")
    X = np.array(X, np.float16) / 255.
    Y = np.array(Y, np.uint8)
    return X, Y

def get_labels():
    labels = [
        'slash_burn',
        'clear',
        'blooming',
        'primary',
        'cloudy',
        'conventional_mine',
        'water',
        'haze',
        'cultivation',
        'partly_cloudy',
        'artisinal_mine',
        'habitation',
        'bare_ground',
        'blow_down',
        'agriculture',
        'road',
        'selective_logging'
    ]

    l_map = {l: i for i, l in enumerate(labels)}
    inv_map = {i: l for l, i in l_map.items()}
    return labels, l_map, inv_map

def get_data_dir(a, t):
    if t:
        if a:
            data_dir = 'train_ex'
        else:
            data_dir = 'train'
    else:
        if a:
            data_dir = 'valid_ex'
        else:
            data_dir = 'valid'
    return data_dir

def load_loss_df(l_dirs, augmented=False, train=False):
    data_dir = get_data_dir(augmented, train)
    dataframes = []
    for d in l_dirs:
        dataframes.append(pd.read_csv(os.path.join('predictions', data_dir, d, 'models.csv')))

    df_l = pd.concat(dataframes, ignore_index=True)
    return df_l

def is_windows():
    return sys.platform == 'win32'

In [4]:
df_test = pd.read_csv(os.path.join('inputs', 'sample_submission_v2.csv'))
test_data_dir = os.path.join('inputs', 'test-jpg')
labels, label_map, inv_label_map = get_labels()
X, _ = load_jpg_data(df_test, test_data_dir, label_map, img_size=128)

100%|██████████| 61191/61191 [00:49<00:00, 1227.54it/s]


Creating numpy array for data...


In [5]:
if is_windows():
    l_dirs = ['0.21', '0.22', '0.23', '0.24', '0.25']
else:
    l_dirs = ['0.09']

df_loss_valid = load_loss_df(l_dirs, augmented=True, train=False)
print(len(df_loss_valid))

76


In [6]:
ensemble = [25, 13, 46]
df_ensemble = df_loss_valid.loc[ensemble]
df_ensemble

Unnamed: 0,model,model_name,loss,epoch,path,score,res_path
25,starter1_22_0.09,starter1,0.09,22,models/starter1_128_binary_crossentropy_rmspro...,0.922563,predictions/valid_ex/0.09/starter1_128_binary_...
13,net1_25_0.09,net1,0.09,25,models/net1_128_binary_crossentropy_rmsprop/ne...,0.923863,predictions/valid_ex/0.09/net1_128_binary_cros...
46,best1_24_0.09,best1,0.09,24,models/best1_128_binary_crossentropy_rmsprop/b...,0.923987,predictions/valid_ex/0.09/best1_128_binary_cro...


In [7]:
preds = []
for m in df_ensemble.path:
    print("Loading model %s" % m)
    model = load_model(m)
    print("Predicting model %s" % m)
    preds.append(model.predict(X))
    del model
    K.clear_session()
print("Prediction finished")
preds = np.array(preds)
preds.shape

Loading model models/starter1_128_binary_crossentropy_rmsprop_x8/starter1_22_0.09.h5
Predicting model models/starter1_128_binary_crossentropy_rmsprop_x8/starter1_22_0.09.h5
Loading model models/net1_128_binary_crossentropy_rmsprop/net1_25_0.09.h5
Predicting model models/net1_128_binary_crossentropy_rmsprop/net1_25_0.09.h5
Loading model models/best1_128_binary_crossentropy_rmsprop/best1_24_0.09.h5
Predicting model models/best1_128_binary_crossentropy_rmsprop/best1_24_0.09.h5
Prediction finished


(3, 61191, 17)

In [9]:
preds_avg = np.mean(preds, axis=0)
preds_final = (preds_avg>0.2).astype(int)
preds_final.shape

(61191, 17)

In [15]:
range_min = 0.002
range_max = 0.999
condition = np.all((preds_avg > range_max) | (preds_avg < range_min), axis=1)
preds_in_range = preds_final[condition]
X_in_range = X[condition]
preds_in_range.shape

(5196, 17)

In [17]:
idx = np.arange(len(preds_final))
idx_in_range = idx[condition]
idx_in_range.shape

(5196,)

In [29]:
labeling_dir = 'extra_labeled'
if not os.path.exists(labeling_dir):
    os.makedirs(labeling_dir)

df = pd.DataFrame(columns=['idx'])
df.idx = idx_in_range
df.to_csv(os.path.join(labeling_dir, 'index_orig.csv'), index=False)

np.savez_compressed(os.path.join(labeling_dir, 'X_test_extra.npz'), X_in_range)
np.savez_compressed(os.path.join(labeling_dir, 'Y_test_extra.npz'), preds_in_range)