In [1]:
!nvidia-smi

Fri Mar 26 15:27:22 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.100      Driver Version: 440.100      CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   44C    P0    41W / 300W |      0MiB / 16160MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

- https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution
- https://www.kaggle.com/zzy990106/b0-bert-cv0-9

In [2]:
import pandas as pd
import numpy as np

%load_ext autoreload
%autoreload 2
%aimport

HOME = '/data/git/shopee-product-matching'
import sys
sys.path.append(f"{HOME}/src")


import argparse
import os
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.backends import cudnn


from dataset import get_transforms, ShopeeDataset
from util import global_average_precision_score
from models import DenseCrossEntropy, Swish_module
from models import ArcFaceLossAdaptiveMargin, Effnet_Landmark, RexNet20_Landmark, ResNest101_Landmark
from train import *

HOME = '/data/git/shopee-product-matching'
pcomp = f'{HOME}/input/shopee-product-matching'
!ls $pcomp

pout = f'{HOME}/output/dev0023'
!mkdir -p pout

Modules to reload:
all-except-skipped

Modules to skip:

sample_submission.csv  test.csv  test_images  train.csv  train_images


# preprocess

In [3]:
# preprocess.py
# def main()

from sklearn.model_selection import train_test_split, StratifiedKFold
df = pd.read_csv(f'{pcomp}/train.csv')
skf = StratifiedKFold(5, shuffle=True, random_state=233)
df['fold'] = -1
for i, (train_idx, valid_idx) in enumerate(skf.split(df, df.label_group)):
    df.loc[valid_idx, 'fold'] = i
df['filepath'] = f"{pcomp}/train_images/"+df.image
display(df.head(3))
display(df.fold.value_counts())



Unnamed: 0,posting_id,image,image_phash,title,label_group,fold,filepath
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,0,/data/git/shopee-product-matching/input/shopee...
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",2937985045,2,/data/git/shopee-product-matching/input/shopee...
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,0,/data/git/shopee-product-matching/input/shopee...


4    6850
3    6850
2    6850
1    6850
0    6850
Name: fold, dtype: int64

# train

##  `if __name__ == '__main__':`

In [4]:
parser = argparse.ArgumentParser()
parser.add_argument('--kernel-type', type=str, required=True)
parser.add_argument('--image-size', type=int, default=224)
# parser.add_argument("--local_rank", type=int)
parser.add_argument('--enet-type', type=str, required=True)
parser.add_argument('--batch-size', type=int, default=8)
parser.add_argument('--num-workers', type=int, default=8)
parser.add_argument('--init-lr', type=float, default=1e-4)
parser.add_argument('--n-epochs', type=int, default=15)
parser.add_argument('--start-from-epoch', type=int, default=1)
parser.add_argument('--stop-at-epoch', type=int, default=999)
# parser.add_argument('--use-amp', action='store_false')
# parser.add_argument('--DEBUG', action='store_true')
parser.add_argument('--model-dir', type=str, default=f'{pout}/weights')
parser.add_argument('--log-dir', type=str, default=f'{pout}/logs')
# parser.add_argument('--CUDA_VISIBLE_DEVICES', type=str, default='0,1,2,3,4,5,6,7')
parser.add_argument('--fold', type=int, default=0)
# parser.add_argument('--load-from', type=str, default='')
parser.add_argument('--device', type=str, default='cuda')

args = parser.parse_args([
    '--kernel-type', 'nest101_DDP_final_256_300w_f4_10ep_3e-5',
    '--enet-type', 'nest101',
    '--n-epochs', '3',
    '--device', 'cpu',
    
])

if args.enet_type == 'nest101':
    ModelClass = ResNest101_Landmark
elif args.enet_type == 'rex20':
    ModelClass = RexNet20_Landmark
else:
    ModelClass = Effnet_Landmark

device = torch.device(args.device)

## `main()`

In [5]:
df = df.head(10)

In [6]:
# train.py
# def main():

out_dim = df.label_group.nunique()
print(f"out_dim = {out_dim}")

i2grp = sorted(df.label_group.unique())
grp2i = {v: k for k,v in enumerate(i2grp)}
df.label_group = df.label_group.map(grp2i)

# get adaptive margin
tmp = np.sqrt(1 / np.sqrt(df.label_group.value_counts().sort_index().values))
margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05
print("margins:",len(margins), margins[:3])

# get augmentations
transforms_train, transforms_val = get_transforms(args.image_size)


df_train = df[df['fold'] != args.fold]
df_valid = df[df['fold'] == args.fold].reset_index(drop=True).query("index % 15==0")
print("train valid lens", len(df_train), len(df_valid))

dataset_train = ShopeeDataset(df_train, 'train', 'train', transform=transforms_train)
dataset_valid = ShopeeDataset(df_valid, 'train', 'val', transform=transforms_val)
valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers)


# model
model = ModelClass(args.enet_type, out_dim=out_dim)
model = model.to(device)

# loss func
def criterion(logits_m, target):
    arc = ArcFaceLossAdaptiveMargin(margins=margins, s=80)
    loss_m = arc(logits_m, target, out_dim)
    return loss_m

# optimizer
optimizer = optim.Adam(model.parameters(), lr=args.init_lr)


# lr scheduler
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, args.n_epochs-1)


out_dim = 10
margins: 10 [nan nan nan]
train valid lens 7 1


  del sys.path[0]


In [9]:
# train & valid loop
gap_m_max = 0.
model_file = os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}.pth')
for epoch in range(args.start_from_epoch, args.n_epochs+1):

    print(time.ctime(), 'Epoch:', epoch)
    scheduler_cosine.step(epoch - 1)

    train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, num_workers=args.num_workers,
                                              shuffle=True, drop_last=True)        

    train_loss = train_epoch(model, train_loader, optimizer, criterion)
    val_loss, acc_m = val_epoch(model, valid_loader, criterion)



0it [00:00, ?it/s]

Fri Mar 26 15:28:07 2021 Epoch: 1


0it [00:00, ?it/s]
100%|██████████| 1/1 [00:01<00:00,  1.09s/it]
0it [00:00, ?it/s]

Fri Mar 26 15:28:08 2021 Epoch: 2


0it [00:00, ?it/s]
100%|██████████| 1/1 [00:01<00:00,  1.14s/it]
0it [00:00, ?it/s]

Fri Mar 26 15:28:10 2021 Epoch: 3


0it [00:00, ?it/s]
100%|██████████| 1/1 [00:01<00:00,  1.09s/it]


In [10]:
!nvidia-smi

[autoreload of models failed: Traceback (most recent call last):
  File "/data/anaconda3/envs/shopee/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/data/anaconda3/envs/shopee/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 410, in superreload
    update_generic(old_obj, new_obj)
  File "/data/anaconda3/envs/shopee/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/data/anaconda3/envs/shopee/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 302, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/data/anaconda3/envs/shopee/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/data/anaconda3/envs/shopee/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 302, in update_class
    if update_generic(o

Fri Mar 26 15:32:21 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.100      Driver Version: 440.100      CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   36C    P0    39W / 300W |      0MiB / 16160MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru