In [1]:
%matplotlib inline
import torch
import torchvision
from torch import nn
import torch.nn.functional as F
from torch.utils.data import WeightedRandomSampler
import utils
from pathlib import Path
from PIL import Image
import pandas as pd
import json
import numpy as np
from collections import OrderedDict

In [2]:
data_dir = Path(r'../data/dog-breed-identification')
img_train_dir = data_dir / 'train'
img_test_dir = data_dir / 'test'
labels_csv = data_dir / 'labels.csv'

In [3]:
class_text = """
affenpinscher
afghan_hound
african_hunting_dog
airedale
american_staffordshire_terrier
appenzeller
australian_terrier
basenji
basset
beagle
bedlington_terrier
bernese_mountain_dog
black-and-tan_coonhound
blenheim_spaniel
bloodhound
bluetick
border_collie
border_terrier
borzoi
boston_bull
bouvier_des_flandres
boxer
brabancon_griffon
briard
brittany_spaniel
bull_mastiff
cairn
cardigan
chesapeake_bay_retriever
chihuahua
chow
clumber
cocker_spaniel
collie
curly-coated_retriever
dandie_dinmont
dhole
dingo
doberman
english_foxhound
english_setter
english_springer
entlebucher
eskimo_dog
flat-coated_retriever
french_bulldog
german_shepherd
german_short-haired_pointer
giant_schnauzer
golden_retriever
gordon_setter
great_dane
great_pyrenees
greater_swiss_mountain_dog
groenendael
ibizan_hound
irish_setter
irish_terrier
irish_water_spaniel
irish_wolfhound
italian_greyhound
japanese_spaniel
keeshond
kelpie
kerry_blue_terrier
komondor
kuvasz
labrador_retriever
lakeland_terrier
leonberg
lhasa
malamute
malinois
maltese_dog
mexican_hairless
miniature_pinscher
miniature_poodle
miniature_schnauzer
newfoundland
norfolk_terrier
norwegian_elkhound
norwich_terrier
old_english_sheepdog
otterhound
papillon
pekinese
pembroke
pomeranian
pug
redbone
rhodesian_ridgeback
rottweiler
saint_bernard
saluki
samoyed
schipperke
scotch_terrier
scottish_deerhound
sealyham_terrier
shetland_sheepdog
shih-tzu
siberian_husky
silky_terrier
soft-coated_wheaten_terrier
staffordshire_bullterrier
standard_poodle
standard_schnauzer
sussex_spaniel
tibetan_mastiff
tibetan_terrier
toy_poodle
toy_terrier
vizsla
walker_hound
weimaraner
welsh_springer_spaniel
west_highland_white_terrier
whippet
wire-haired_fox_terrier
yorkshire_terrier"""

In [4]:
label_list = list()
for line in class_text.split('\n'):
    class_name = str(line).strip()
    if class_name == '':
        continue
    label_list.append(class_name)

In [5]:
label_2_class_no_dict = dict()
class_no = 0
for label in label_list:
    label_2_class_no_dict[label] = class_no
    class_no += 1
    
class_no_2_label_dict = dict()
for label, no in label_2_class_no_dict.items():
    class_no_2_label_dict[str(no)] = label

len(label_2_class_no_dict), len(class_no_2_label_dict)

(120, 120)

In [6]:
labels_df = pd.read_csv(labels_csv.as_posix())
labels_df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [7]:
labels_value_counts = labels_df.value_counts(subset='breed')
labels_value_counts

breed
scottish_deerhound      126
maltese_dog             117
afghan_hound            116
entlebucher             115
bernese_mountain_dog    114
                       ... 
golden_retriever         67
brabancon_griffon        67
komondor                 67
briard                   66
eskimo_dog               66
Length: 120, dtype: int64

In [8]:
labels_weights = 1.0 / labels_value_counts
labels_weights

breed
scottish_deerhound      0.007937
maltese_dog             0.008547
afghan_hound            0.008621
entlebucher             0.008696
bernese_mountain_dog    0.008772
                          ...   
golden_retriever        0.014925
brabancon_griffon       0.014925
komondor                0.014925
briard                  0.015152
eskimo_dog              0.015152
Length: 120, dtype: float64

In [9]:
labels_weights['maltese_dog'], labels_weights['boston_bull']

(0.008547008547008548, 0.011494252873563218)

In [10]:
train_id_class_no_mapping = dict()
train_label_2_sampler_weight_dict = dict()
for i, row in labels_df.iterrows():
    img_id = row['id']
    label_name = row['breed']
    label = label_2_class_no_dict[label_name]
    train_id_class_no_mapping[img_id] = label
    train_label_2_sampler_weight_dict[str(label)] = labels_weights[label_name]
    
print(len(train_id_class_no_mapping), len(train_label_2_sampler_weight_dict))

random_img_id = '000bec180eb18c7604dcecc8fe0dba07'
print(train_id_class_no_mapping[random_img_id], train_label_2_sampler_weight_dict['19'])
print(label_2_class_no_dict['boston_bull'], labels_weights['boston_bull'])

10222 120
19 0.011494252873563218
19 0.011494252873563218


In [11]:
class DogDataset(torch.utils.data.Dataset):
    """
        dog数据集
    """
    def __init__(self, data_dir: Path, transforms=None, is_train=True):
        """
            data_dir: 数据集目录
            data_csv: 数据-类对应文件
            transforms：数据增广
        """
        self.data_dir = data_dir
        self.transforms = transforms
        self.is_train = is_train
        self.data_list = self.read_data()
        
    def read_data(self):
        """
            读取数据
        """
        return [img_path for img_path in self.data_dir.iterdir()]     

    def __getitem__(self, i):
        # print(self.data_list[i])
        img_path = self.data_list[i]
        img = Image.open(img_path.as_posix()).convert('RGB')
        if self.transforms is not None:
            img = self.transforms(img)          
        if self.is_train:
            img_id = img_path.stem
            label = train_id_class_no_mapping[img_id]
            return img, label     
        return img
    
    def __len__(self):
        return len(self.data_list)

In [12]:
# 使用RGB通道的均值和标准差，以标准化每个通道
normalize = torchvision.transforms.Normalize(
    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=128),
    torchvision.transforms.RandomResizedCrop(size=(112, 112), scale=(0.81, 1), ratio=(0.8, 1.2)),
    torchvision.transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    torchvision.transforms.RandomRotation(degrees=30),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    normalize])

test_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=112),
    torchvision.transforms.CenterCrop(size=112),
    torchvision.transforms.ToTensor(),
    normalize])

In [13]:
train_dataset_all = DogDataset(img_train_dir, transforms=train_augs)
test_dataset = DogDataset(img_test_dir, transforms=test_augs, is_train=False)

In [14]:
valid_dataset_len = int(len(train_dataset_all) / 10)
train_dataset_len = len(train_dataset_all) - valid_dataset_len

train_dataset, valid_dataset = torch.utils.data.random_split(
    dataset=train_dataset_all,
    lengths=[train_dataset_len, valid_dataset_len],
    generator=torch.Generator().manual_seed(0)
)

In [15]:
# # 获取训练集和验证集的抽样权重列表
# train_dataset_sampler_weight_list = list()
# for img_tensor, label in train_dataset:
#     train_dataset_sampler_weight_list.append(train_label_2_sampler_weight_dict[str(label)])

# valid_dataset_sampler_weight_list = list()
# for img_tensor, label in valid_dataset:
#     valid_dataset_sampler_weight_list.append(train_label_2_sampler_weight_dict[str(label)])
    
# len(train_dataset_sampler_weight_list), len(valid_dataset_sampler_weight_list)

In [16]:
# # 实例化训练集和验证集抽样器（按权重有放回抽样，实现重采样效果）
# train_dataset_sampler = WeightedRandomSampler(
#     train_dataset_sampler_weight_list, len(train_dataset_sampler_weight_list), replacement=True)
# valid_dataset_sampler = WeightedRandomSampler(
#     valid_dataset_sampler_weight_list, len(valid_dataset_sampler_weight_list), replacement=True)

In [17]:
net = torchvision.models.resnet34(pretrained=True)
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
# 自定义fc层，将最终的输出类别设置为当前需要的120
net.fc = nn.Linear(net.fc.in_features, 256)
net = nn.Sequential(
    OrderedDict(
        [('resnet', net),
         # ('bn', nn.BatchNorm1d(1024)),
         ('relu', nn.ReLU(inplace=True)),
         # ('dropout', nn.Dropout(0.1)),
         ('fc', nn.Linear(256, 120))]
    )
)
nn.init.xavier_uniform_(net.resnet.fc.weight);
nn.init.xavier_uniform_(net.fc.weight);
net

Sequential(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

In [19]:
# 测试网络畅通
random_x = torch.randn(size=[2, 3, 112, 112])
random_y = net(random_x)
random_y.shape

torch.Size([2, 120])

In [17]:
# 如果param_group=True，输出层中的模型参数将使用十倍的学习率
def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=5, weight_decay=1e-3, 
                      fc_lr_times=10, param_group=True, freeze=False, train_all_data=False):
    devices = utils.try_all_gpus()
    loss = nn.CrossEntropyLoss(reduction="none")
    if freeze:
        for name, param in net.named_parameters():
            if name not in ["resnet.fc.weight", "resnet.fc.bias", "fc.weight", "fc.bias"]:
                param.requires_grad = False
    if param_group:
        params_1x = [param for name, param in net.named_parameters()
             if name not in ["resnet.fc.weight", "resnet.fc.bias", "fc.weight", "fc.bias"]]
        trainer = torch.optim.SGD(
            [
                {'params': params_1x}, 
                {'params': net.fc.parameters(), 
                 'lr': learning_rate * fc_lr_times}
            ], 
            lr=learning_rate, 
            weight_decay=weight_decay
        )
    else:
        trainer = torch.optim.SGD(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    if train_all_data:
        train_iter = torch.utils.data.DataLoader(
            train_dataset_all, batch_size=batch_size, shuffle=True)
        valid_iter = None
    else:
        # train_iter = torch.utils.data.DataLoader(
        #     train_dataset, batch_size=batch_size, sampler=train_dataset_sampler)
        # valid_iter = torch.utils.data.DataLoader(
        #     valid_dataset, batch_size=batch_size, sampler=valid_dataset_sampler)
        train_iter = torch.utils.data.DataLoader(
            train_dataset, batch_size=batch_size, shuffle=True)
        valid_iter = torch.utils.data.DataLoader(
            valid_dataset, batch_size=batch_size)
    utils.train_gpus(net, train_iter, valid_iter, loss, trainer, num_epochs, devices)

In [21]:
train_times = 5
for i in range(train_times):
    print(f'--- {i+1}/{train_times} ---')
    # 训练模型
    train_fine_tuning(net, learning_rate=5e-5, batch_size=128, num_epochs=10, 
                      weight_decay=1e-4, fc_lr_times=10)
    # 保存训练好的模型weight
    save_model_path = Path(fr'../data/dog_resnet34_state_dict_{i+1}.sd')
    torch.save(net.state_dict(), save_model_path.as_posix())

--- 1/5 ---
training on: [device(type='cuda', index=0)], [2024-06-15 15:57:43]
epoch: 1/10, loss: 4.880, train_acc: 0.030, test_acc: 0.065, epoch_time: [0:00:01:06]
epoch: 2/10, loss: 4.005, train_acc: 0.133, test_acc: 0.189, epoch_time: [0:00:01:04]
epoch: 3/10, loss: 3.231, train_acc: 0.268, test_acc: 0.288, epoch_time: [0:00:01:04]
epoch: 4/10, loss: 2.654, train_acc: 0.369, test_acc: 0.359, epoch_time: [0:00:01:04]
epoch: 5/10, loss: 2.240, train_acc: 0.450, test_acc: 0.415, epoch_time: [0:00:01:04]
epoch: 6/10, loss: 1.990, train_acc: 0.486, test_acc: 0.443, epoch_time: [0:00:01:03]
epoch: 7/10, loss: 1.800, train_acc: 0.528, test_acc: 0.448, epoch_time: [0:00:01:03]
epoch: 8/10, loss: 1.660, train_acc: 0.566, test_acc: 0.496, epoch_time: [0:00:01:07]
epoch: 9/10, loss: 1.547, train_acc: 0.590, test_acc: 0.483, epoch_time: [0:00:01:07]
epoch: 10/10, loss: 1.458, train_acc: 0.605, test_acc: 0.514, epoch_time: [0:00:01:06]
*** training speed: 153.0 examples/sec on [device(type='cuda

In [23]:
# 调整学习率、权重衰减等参数，多次学习调整
train_fine_tuning(net, learning_rate=1e-5, batch_size=128, num_epochs=20, 
                  weight_decay=1e-4, fc_lr_times=10, train_all_data=True)

training on: [device(type='cuda', index=0)], [2024-06-15 19:01:40]
epoch: 1/20, loss: 0.276, train_acc: 0.934, test_acc: 0.000, epoch_time: [0:00:01:08]
epoch: 2/20, loss: 0.277, train_acc: 0.934, test_acc: 0.000, epoch_time: [0:00:01:08]
epoch: 3/20, loss: 0.266, train_acc: 0.937, test_acc: 0.000, epoch_time: [0:00:01:07]
epoch: 4/20, loss: 0.269, train_acc: 0.935, test_acc: 0.000, epoch_time: [0:00:01:08]
epoch: 5/20, loss: 0.261, train_acc: 0.939, test_acc: 0.000, epoch_time: [0:00:01:06]
epoch: 6/20, loss: 0.253, train_acc: 0.937, test_acc: 0.000, epoch_time: [0:00:01:05]
epoch: 7/20, loss: 0.255, train_acc: 0.938, test_acc: 0.000, epoch_time: [0:00:01:07]
epoch: 8/20, loss: 0.242, train_acc: 0.941, test_acc: 0.000, epoch_time: [0:00:01:08]
epoch: 9/20, loss: 0.240, train_acc: 0.945, test_acc: 0.000, epoch_time: [0:00:01:07]
epoch: 10/20, loss: 0.238, train_acc: 0.943, test_acc: 0.000, epoch_time: [0:00:01:05]
epoch: 11/20, loss: 0.233, train_acc: 0.943, test_acc: 0.000, epoch_time

In [18]:
model_i = 6
save_model_path = Path(fr'../data/dog_resnet34_state_dict_{model_i}.sd')

In [25]:
# 保存训练好的模型weight
torch.save(net.state_dict(), save_model_path.as_posix())

In [19]:
# 加载训练好的模型
net = None
devices = utils.try_all_gpus()
# 加载模型参数state_dict
net = torchvision.models.resnet34()

net.fc = nn.Linear(net.fc.in_features, 256)
net = nn.Sequential(
    OrderedDict(
        [('resnet', net),
         # ('bn', nn.BatchNorm1d(1024)),
         ('relu', nn.ReLU(inplace=True)),
         # ('dropout', nn.Dropout(0.1)),
         ('fc', nn.Linear(256, 120))]
    )
)

net.load_state_dict(torch.load(save_model_path.as_posix()))
# net.to(torch.device('cpu'))
net.to(devices[0])

Sequential(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

In [33]:
net.eval()
batch_size = 256
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [34]:
y_batch_list = list()

In [35]:
with torch.no_grad():
    for i, X in enumerate(test_iter):
        # X = X.to(torch.device('cpu'))
        X = X.to(devices[0])
        y_hat = F.softmax(net(X), dim=1)
        y_batch_list.append(y_hat.cpu().numpy())
        print(f'{i+1}/{len(test_iter)}', end='\r')

41/41

In [31]:
len(y_list), y_list[0].shape

(41, (256, 120))

In [36]:
y_hat_all = np.concatenate(y_list, axis=0)

In [38]:
y_hat_all.shape

(10357, 120)

In [39]:
y_df = pd.DataFrame(y_hat_all, columns=label_list)
y_df.head()

Unnamed: 0,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,beagle,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,2.711867e-06,3e-05,1.747541e-08,1.452441e-10,6.543413e-09,2.835062e-07,1.48094e-08,3.90989e-09,2.039874e-08,2.317103e-07,...,5.250623e-06,2.924174e-06,9.131053e-08,1.185091e-06,6.872342e-08,2.456299e-07,2.609945e-08,1.418848e-07,3.021624e-08,1.065663e-07
1,7.867058e-06,0.000124,1.635723e-05,2.549136e-07,2.094797e-05,4.735816e-06,3.190464e-05,1.034327e-05,1.838679e-05,3.313416e-05,...,0.0003122038,7.614285e-05,1.414518e-06,5.489089e-05,6.658959e-07,3.370886e-06,0.008700495,0.0002743236,8.404207e-05,9.231219e-05
2,2.203002e-07,6e-06,4.624247e-08,3.173095e-08,3.28241e-07,8.314909e-08,9.987005e-09,1.197046e-10,3.231467e-08,2.219173e-07,...,3.065106e-07,1.204781e-07,8.429366e-09,3.010364e-06,2.293262e-05,1.087413e-05,1.619083e-07,1.30159e-06,1.441378e-07,2.485281e-08
3,0.002044039,0.004334,9.752688e-05,0.00014455,1.154016e-06,8.205259e-05,2.406887e-06,9.443501e-07,1.597639e-06,1.389095e-05,...,0.01218509,1.010641e-06,0.0001844949,7.747636e-07,0.002210342,1.616564e-05,1.476835e-07,2.809275e-05,6.2106e-07,1.61252e-05
4,2.555161e-05,0.000527,7.607624e-09,4.462957e-07,1.514729e-07,1.095726e-07,3.124774e-06,5.639925e-09,1.253262e-07,3.247613e-07,...,1.153274e-07,2.324452e-08,4.966965e-09,6.765757e-08,6.246e-08,6.502719e-08,4.76404e-07,2.947104e-07,1.563451e-07,9.705115e-08


In [43]:
id_list = list()
for img_path in test_dataset.data_list:
    img_id = img_path.stem
    id_list.append(img_id)

len(id_list)

10357

In [44]:
y_df.insert(loc=0, column='id', value=id_list)
y_df.head()

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,2.711867e-06,3e-05,1.747541e-08,1.452441e-10,6.543413e-09,2.835062e-07,1.48094e-08,3.90989e-09,2.039874e-08,...,5.250623e-06,2.924174e-06,9.131053e-08,1.185091e-06,6.872342e-08,2.456299e-07,2.609945e-08,1.418848e-07,3.021624e-08,1.065663e-07
1,00102ee9d8eb90812350685311fe5890,7.867058e-06,0.000124,1.635723e-05,2.549136e-07,2.094797e-05,4.735816e-06,3.190464e-05,1.034327e-05,1.838679e-05,...,0.0003122038,7.614285e-05,1.414518e-06,5.489089e-05,6.658959e-07,3.370886e-06,0.008700495,0.0002743236,8.404207e-05,9.231219e-05
2,0012a730dfa437f5f3613fb75efcd4ce,2.203002e-07,6e-06,4.624247e-08,3.173095e-08,3.28241e-07,8.314909e-08,9.987005e-09,1.197046e-10,3.231467e-08,...,3.065106e-07,1.204781e-07,8.429366e-09,3.010364e-06,2.293262e-05,1.087413e-05,1.619083e-07,1.30159e-06,1.441378e-07,2.485281e-08
3,001510bc8570bbeee98c8d80c8a95ec1,0.002044039,0.004334,9.752688e-05,0.00014455,1.154016e-06,8.205259e-05,2.406887e-06,9.443501e-07,1.597639e-06,...,0.01218509,1.010641e-06,0.0001844949,7.747636e-07,0.002210342,1.616564e-05,1.476835e-07,2.809275e-05,6.2106e-07,1.61252e-05
4,001a5f3114548acdefa3d4da05474c2e,2.555161e-05,0.000527,7.607624e-09,4.462957e-07,1.514729e-07,1.095726e-07,3.124774e-06,5.639925e-09,1.253262e-07,...,1.153274e-07,2.324452e-08,4.966965e-09,6.765757e-08,6.246e-08,6.502719e-08,4.76404e-07,2.947104e-07,1.563451e-07,9.705115e-08


In [46]:
y_df.to_csv((data_dir / 'submission.csv').as_posix(), index=False)