In [3]:
%load_ext autoreload

%autoreload 2

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path

from fastai import *
from fastai.vision import *

import json

from utils import *

In [5]:
NUM_SAMPLES_PER_CLASS = 1_000
NUM_VAL = 50 * 340

In [6]:
%pwd

'/home/paperspace/kaggle_experiments/quickdraw'

In [13]:
PATH = Path('/home/paperspace/data')

In [12]:
!ls /home/paperspace/data

test-128  test_simplified.csv  train  train-128  train_simplified.zip


In [14]:
PATH.mkdir(exist_ok=True)
(PATH/'train_txt').mkdir(exist_ok=True)
(PATH/'test_txt').mkdir(exist_ok=True)

In [32]:
def create_train_txts_from_df(path):
    df = pd.read_csv(path, engine = 'python')
    klass = '_'.join(path.stem.split())
    (PATH/'train_txt'/klass).mkdir(exist_ok=True)
    for row in df.sample(NUM_SAMPLES_PER_CLASS).iterrows():
        example = {
            'countrycode': row[1].countrycode,
            'drawing': json.loads(row[1].drawing),
            'key_id': row[1].key_id,
            'recognized': row[1].recognized
        }
        with open(PATH/'train_txt'/klass/f'{example["key_id"]}.txt', mode='w') as f: json.dump(example, f)

def create_test_txts_from_df(path):
    df = pd.read_csv(path)
    for row in df.iterrows():
        example = {
            'countrycode': row[1].countrycode,
            'drawing': json.loads(row[1].drawing),
            'key_id': row[1].key_id
        }
        with open(PATH/'test_txt'/f'{example["key_id"]}.txt', mode='w') as f: json.dump(example, f)

In [21]:
%time create_test_txts_from_df(PATH/'test_simplified.csv')

CPU times: user 39 s, sys: 3.14 s, total: 42.2 s
Wall time: 42.3 s


In [41]:
%time for p in Path('/home/paperspace/data/train').iterdir(): create_train_txts_from_df(p)

CPU times: user 9min 13s, sys: 22.4 s, total: 9min 35s
Wall time: 9min 23s


In [42]:
sz = 128
bs = 640

def create_func(path):
    with open(path) as f: j = json.load(f)
    drawing = list2drawing(j['drawing'], size=sz)
    tensor = drawing2tensor(drawing)
    return Image(tensor.div_(255))

In [4]:
item_list = ItemList.from_folder(PATH/'train_txt',create_func = create_func)

AttributeError: type object 'ItemList' has no attribute 'from_folder'

In [None]:
idxs = np.arange(item_list.items.shape[0])
np.random.shuffle(idxs)
val_idxs = idxs[:NUM_VAL]

In [None]:
item_lists = item_list.split_by_idx(val_idxs)

In [None]:
classes = pd.read_pickle('data/classes.pkl')

label_lists = item_lists.label_from_folder(classes=classes)

test_items = ItemList.from_folder(PATH/'test', create_func=create_func)
label_lists.add_test(test_items);

In [None]:
train_dl = DataLoader(label_lists.train, bs, True, num_workers=12)
valid_dl = DataLoader(label_lists.valid, 2*bs, False, num_workers=12)
test_dl = DataLoader(label_lists.test, 2*bs, False, num_workers=12)

data_bunch = ImageDataBunch(train_dl, valid_dl, test_dl)

In [None]:
batch_stats = pd.read_pickle(f'data/batch_stats_{sz}.pkl')

In [None]:
data_bunch.normalize(batch_stats);

In [None]:
data_bunch.show_batch(rows=4)

In [None]:
name = f'res34-{sz}'

In [None]:
learn = create_cnn(data_bunch, models.resnet34, metrics=[accuracy, map3])

In [None]:
learn.fit_one_cycle(2)

In [None]:
learn.save(f'{name}-stage-1')

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(6, max_lr=6e-4)

In [None]:
learn.save(f'{name}-stage-2')

In [None]:
learn.load(f'{name}-stage-2');

In [None]:
preds, _ = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
create_submission(preds, data_bunch.test_dl, name)

In [None]:
pd.read_csv(f'subs/{name}.csv.gz').head()

In [None]:
# !kaggle competitions submit -c quickdraw-doodle-recognition -f subs/{name}.csv.gz -m "{name}"