In [None]:
import os
import cv2
import copy
import glob
import json
import shutil
import random
import zipfile
import functools
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import OrderedDict
from sklearn import preprocessing
from sklearn.cluster import KMeans

import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torch.nn import init
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import models
from torchvision import transforms

from albumentations.pytorch import ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    ShiftScaleRotate, CenterCrop, Resize, RandomCrop)

In [None]:
def mkdir(path):
    os.makedirs(path, exist_ok=True)

def get_img(path, return_axis0=False):
    im_bgr = cv2.imread(path)
    if return_axis0:
        return im_bgr[:,:,0]
    else:
        return im_bgr
    
def rescale(img):
    return np.where((0<img)&(img<255), 64, img)

def show_file_num():
    res = []
    for t in AUG_CFG.keys():
        for d in os.listdir("./"):
            output_path = f"./{d}/{t}"
            if os.path.exists(output_path):
                res.append([d, t, len(os.listdir(output_path))])
    df = pd.DataFrame(res, columns=["type","label","cnt"])
    df = pd.pivot_table(df, index="label",columns="type",values="cnt",fill_value=0).reset_index()
    print("Noise ratio: ", df.noise.sum() / (df.noise.sum() + df.clean.sum()))
    return df
    
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [None]:
class ResnetGenerator(nn.Module):
    """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations.
    We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style)
    """

    def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'):
        """Construct a Resnet-based generator
        Parameters:
            input_nc (int)      -- the number of channels in input images
            output_nc (int)     -- the number of channels in output images
            ngf (int)           -- the number of filters in the last conv layer
            norm_layer          -- normalization layer
            use_dropout (bool)  -- if use dropout layers
            n_blocks (int)      -- the number of ResNet blocks
            padding_type (str)  -- the name of padding layer in conv layers: reflect | replicate | zero
        """
        assert(n_blocks >= 0)
        super(ResnetGenerator, self).__init__()
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d

        model = [nn.ReflectionPad2d(3),
                 nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias),
                 norm_layer(ngf),
                 nn.ReLU(True)]

        n_downsampling = 2
        for i in range(n_downsampling):  # add downsampling layers
            mult = 2 ** i
            model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias),
                      norm_layer(ngf * mult * 2),
                      nn.ReLU(True)]

        mult = 2 ** n_downsampling
        for i in range(n_blocks):       # add ResNet blocks

            model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]

        for i in range(n_downsampling):  # add upsampling layers
            mult = 2 ** (n_downsampling - i)
            model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
                                         kernel_size=3, stride=2,
                                         padding=1, output_padding=1,
                                         bias=use_bias),
                      norm_layer(int(ngf * mult / 2)),
                      nn.ReLU(True)]
        model += [nn.ReflectionPad2d(3)]
        model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
        model += [nn.Tanh()]

        self.model = nn.Sequential(*model)

    def forward(self, input):
        """Standard forward"""
        return self.model(input)

class ResnetBlock(nn.Module):
    """Define a Resnet block"""

    def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
        """Initialize the Resnet block
        A resnet block is a conv block with skip connections
        We construct a conv block with build_conv_block function,
        and implement skip connections in <forward> function.
        Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf
        """
        super(ResnetBlock, self).__init__()
        self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias)

    def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias):
        """Construct a convolutional block.
        Parameters:
            dim (int)           -- the number of channels in the conv layer.
            padding_type (str)  -- the name of padding layer: reflect | replicate | zero
            norm_layer          -- normalization layer
            use_dropout (bool)  -- if use dropout layers.
            use_bias (bool)     -- if the conv layer uses bias or not
        Returns a conv block (with a conv layer, a normalization layer, and a non-linearity layer (ReLU))
        """
        conv_block = []
        p = 0
        if padding_type == 'reflect':
            conv_block += [nn.ReflectionPad2d(1)]
        elif padding_type == 'replicate':
            conv_block += [nn.ReplicationPad2d(1)]
        elif padding_type == 'zero':
            p = 1
        else:
            raise NotImplementedError('padding [%s] is not implemented' % padding_type)

        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)]
        if use_dropout:
            conv_block += [nn.Dropout(0.5)]

        p = 0
        if padding_type == 'reflect':
            conv_block += [nn.ReflectionPad2d(1)]
        elif padding_type == 'replicate':
            conv_block += [nn.ReplicationPad2d(1)]
        elif padding_type == 'zero':
            p = 1
        else:
            raise NotImplementedError('padding [%s] is not implemented' % padding_type)
        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)]

        return nn.Sequential(*conv_block)

    def forward(self, x):
        """Forward function (with skip connections)"""
        out = x + self.conv_block(x)  # add skip connections
        return out

### Configuration

In [None]:
CFG = {
    "input_path" : "../input/data-centric-competition-data/data_centric_clean_noise_labeled_data/data_centric_clean_noise_labeled_data/",
    "maug_path"  : "../input/data-centric-competition-data/data_centric_make_augmented_data/data_centric_make_augmented_data/",
    "img_size"   : 128,
    "num_workers": 4,
    "device"     : "cuda"
}
AUG_CFG = {
    "i"   : [False,False,True, False],
    "ii"  : [False,False,True, False],
    "iii" : [False,False,True, False],
    "iv"  : [False,False,False,True],
    "v"   : [True, False,False,False],
    "vi"  : [False,False,False,True],
    "vii" : [False,False,False,False],
    "viii": [False,False,False,False],
    "ix"  : [False,True, False,False],
    "x"   : [False,True, True, False]
}

### Augmentation methods implmented myself

In [None]:
def get_object(path):
    def sum_with_axis(img, axis):
        img_sum = img.sum(axis)
        _min = np.where(img_sum != 0)[0][0] - 1
        _min = 0 if _min < 0 else _min
        _max = len(img_sum) - np.where(img_sum[::-1] != 0)[0][0] - 1
        return _min, _max
    img_rgb  = cv2.imread(path, cv2.IMREAD_COLOR)
    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    _, img_bin = cv2.threshold(img_gray, 50, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
    img_bin  = cv2.medianBlur(img_bin, 3)
    min_x, max_x = sum_with_axis(img_bin, 0)
    min_y, max_y = sum_with_axis(img_bin, 1)
    return img_rgb[min_y:max_y,min_x:max_x]

def add_black_line(img, line_width=2, add_x=True, add_y=True):
    img  = copy.deepcopy(img)
    h, w = img.shape[:2]
    n_x  = random.choices([1,2], k=1, weights=[0.7,0.3])[0]
    n_y  = random.choices([1,2], k=1, weights=[0.7,0.3])[0]
    xs   = random.sample(list(np.arange(w)), n_x)
    ys   = random.sample(list(np.arange(h)), n_y)
    if add_x: 
        for x in xs:
            img[:,x-line_width:x+line_width] = img.min()
    if add_y:
        for y in ys:
            img[y-line_width:y+line_width,:] = img.min()
    return img

def add_white_line(img, line_width=2):
    img   = copy.deepcopy(img)
    h, w  = img.shape[:2]
    n_y   = random.choices([10,13,16], k=1)[0]
    add_y = random.sample(list(np.arange(h)), n_y)
    for y in add_y:
        img[y-line_width:y+line_width,:] = img.max()
    return img

def add_edge_noise(img):
    img  = copy.deepcopy(img)
    prob = np.random.rand(1)[0]
    if prob < 0.8:
        n = random.choices([1,2], k=1, weights=[0.7,0.3])[0]
        add_points = random.sample(list(np.arange(4)), n)
    else:
        n = random.choices([3,4], k=1, weights=[0.6,0.4])[0]
        add_points = random.sample(list(np.arange(4)), n)

    h,  w  = img.shape[:2]
    hs, ws = int(h*0.2), int(w*0.2)
    if 0 in add_points:
        xs = random.sample(list(np.arange(w-ws)), 1)[0]
        xe = xs + ws
        ys = 0
        ye = hs
        img[ys:ye, xs:xe] = 0
    if 1 in add_points:
        xs = random.sample(list(np.arange(w-ws)), 1)[0]
        xe = xs + ws
        ys = h - hs
        ye = h
        img[ys:ye, xs:xe] = 0
    if 2 in add_points:
        ys = random.sample(list(np.arange(h-hs)), 1)[0]
        ye = ys + hs
        xs = 0
        xe = ws
        img[ys:ye, xs:xe] = 0
    if 3 in add_points:
        ys = random.sample(list(np.arange(h-hs)), 1)[0]
        ye = ys + hs
        xs = w - ws
        xe = w
        img[ys:ye, xs:xe] = 0
    return img

def expand_centre(path, n_expand=100):
    img    = get_object(path)
    h,w    = img.shape[:2]
    hc, wc = int(h/2), int(w/2)
    img_left   = img[:,:wc]
    img_right  = img[:,wc:]
    img_centre = img[:,wc-1:wc]
    img_expand = img_centre
    for _ in range(n_expand):
        img_expand = np.hstack([img_expand, img_centre])
    img_expand = np.hstack([img_left, img_expand, img_right])
    return img_expand

### Define of CycleGan and albumentation

In [None]:
def get_generator(path):
    norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
    generator  = ResnetGenerator(input_nc=1, output_nc=1, ngf=64, norm_layer=norm_layer, use_dropout=False, n_blocks=9)
    checkpoint = torch.load(path)
    new_state_dict = OrderedDict()
    for k, v in checkpoint.items():
        name = k.replace("module.", "") # remove module.
        new_state_dict[name] = v
    # load params
    generator.load_state_dict(new_state_dict, strict=False)
    return generator

def get_visual_transforms():
    return Compose([
        ShiftScaleRotate(shift_limit=(-0.05, 0.05), scale_limit=(-0.07, 0.07), rotate_limit=(-10,10), p=1)], p=1
    )

def get_visual_noise_transforms():
    return Compose([
        ShiftScaleRotate(shift_limit=(-0.05, 0.05), scale_limit=(-0.07, 0.07), rotate_limit=(-10,10), p=1),
        CoarseDropout(max_holes=30, p=1)], p=1
    )

def get_noise_transforms():
    return Compose([
        CoarseDropout(p=1)], p=1
    )

def get_gan_transforms():
    return Compose([
        Resize(CFG['img_size'], CFG['img_size']),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)], p=1
    )

transforms_v   = get_visual_transforms()
transforms_vn  = get_visual_noise_transforms()
transforms_n   = get_noise_transforms()
transforms_gan = get_gan_transforms()

# Run an augmentation with competition data

In [None]:
!rm -rf ./*
NUM_CLEAN = len(glob.glob(f"{CFG['input_path']}clean/**/*"))
NUM_NOISE = len(glob.glob(f"{CFG['input_path']}noise/**/*"))
RATIO_NOISE = NUM_NOISE / (NUM_NOISE + NUM_CLEAN)
RATIO_NOISE = max(0.2, RATIO_NOISE)
RATIO_NOISE

In [None]:
%%time
!rm -rf ./*

for t in AUG_CFG.keys():
    horizontal_flip, vertical_flip, horizontal_vertical_flip, add_with_horizontal_flip = AUG_CFG[t]
    for c_n in ["clean", "noise"]:
        output_path = f"./{c_n}/{t}/"
        mkdir(output_path)
        input_path  = f"{CFG['input_path']}{c_n}/{t}"
        for p in os.listdir(input_path):
            img = get_img(f"{input_path}/{p}")
            shutil.copy(f"{input_path}/{p}", f"{output_path}/{p}")
            if horizontal_flip:
                # i, ii, iii, v, x
                cv2.imwrite(f"{output_path}hf_{p}", img[:,::-1,:])
            if vertical_flip:
                # i, ii, iii, ix, x
                cv2.imwrite(f"{output_path}vf_{p}", img[::-1,:,:])
            if horizontal_vertical_flip:
                # i, ii, iii, x
                cv2.imwrite(f"{output_path}hv_{p}", img[::-1,::-1,:])
            if add_with_horizontal_flip:
                # iv -> vi, iv -> vi
                if   t == "iv":
                    output_path_aw_hf = f"./{c_n}/vi/"
                elif t == "vi":
                    output_path_aw_hf = f"./{c_n}/iv/"
                else:
                    continue
                mkdir(output_path_aw_hf)
                cv2.imwrite(f"{output_path_aw_hf}aw_hf_{p}", img[:,::-1,:])

show_file_num()

In [None]:
%%time

RATIO_CENTRE_WHITE = 0.3
AUG_PATTERN  = {"ii"    : ["ii",   300, "i",   "i"],
                "iii-1" : ["iii",  150, "ii",  "i"],
                "iii-2" : ["iii",  150, "i",   "ii"],
                "vi"    : ["vi",   300, "v",   "i"],
                "vii-1" : ["vii",  150, "v",   "ii"],
                "vii-2" : ["vii",  150, "vi",  "i"],
                "viii-1": ["viii", 100, "v",   "iii"],
                "viii-2": ["viii", 100, "vi",  "ii"],
                "viii-3": ["viii", 100, "vii", "i"],
                "iv"    : ["iv",   300, "i",   "v"],
                "ix"    : ["ix",   300, "i",   "x"]}

for key, (t, n, l, r) in AUG_PATTERN.items():
    left_img_path  = os.listdir(CFG['maug_path']+l)
    left_max_idx   = len(left_img_path)
    left_sample_idxes  = list(np.arange(left_max_idx))
    right_img_path = os.listdir(CFG['maug_path']+r)
    right_max_idx  = len(right_img_path)
    right_sample_idxes = list(np.arange(right_max_idx))
    for i in range(n):
        left  = random.sample(left_sample_idxes,1)[0]
        left_path  = f"{CFG['maug_path']}{l}/{left_img_path[left]}"
        right = random.sample(right_sample_idxes,1)[0]
        right_path = f"{CFG['maug_path']}{r}/{right_img_path[right]}"
        
        left_img  = get_object(left_path)
        right_img = get_object(right_path)
        
        ly, lx  = left_img.shape[:2]
        ry, rx  = right_img.shape[:2]
        resizey = min(ly,ry)
        left_img  = cv2.resize(left_img,  (lx,resizey))
        right_img = cv2.resize(right_img, (rx,resizey))
        
        prob = np.random.rand(1)[0]
        if prob < RATIO_CENTRE_WHITE:
            centre_white = np.ones((resizey,100,3), dtype="int8") * 255
            convert_img  = np.hstack([left_img,centre_white,right_img])
            if prob < RATIO_CENTRE_WHITE/2:
                ub_black    = np.zeros((6,convert_img.shape[1],3), dtype="int8")
                convert_img = np.vstack([ub_black,convert_img[10:-10,:],ub_black])
        else:
            convert_img  = np.hstack([left_img,right_img])
        padx = int(min(lx,rx)/2)
        pady = int(resizey/2)
        convert_img = np.pad(convert_img, ((padx, padx), (pady, pady), (0,0)), constant_values=255)
        cv2.imwrite(f"./clean/{t}/maug{i}_{os.path.basename(right_path)}", convert_img)            
        
show_file_num()

In [None]:
%%time
!rm -rf ./clean_aug
!rm -rf ./noise_aug

NUM_LIMIT_CLEAN  = 1000
NUM_LIMIT_NOISE  = 500
LINE_WIDTH_BLACK = 2
LINE_WIDTH_WHITE = 3

for t in AUG_CFG.keys():
    output_path_clean = f"./clean_aug/{t}/"
    output_path_noise = f"./noise_aug/{t}/"
    mkdir(output_path_clean)
    mkdir(output_path_noise)

    # Run an augmentation with clean data
    input_path = f"./clean/{t}"
    paths = [f"{input_path}/{p}" for p in os.listdir(input_path)]
    idxes = list(np.arange(len(paths)))
    for i in range(NUM_LIMIT_CLEAN):
        prob = np.random.rand(1)[0]
        s    = random.sample(idxes, 1)[0]
        path = paths[s]
        img  = get_img(path)
        img  = rescale(transforms_v(image=img)['image'])
        cv2.imwrite(f"{output_path_clean}aug{i}_{os.path.basename(path)}", img)
        
    # Run an augmentation with noise data
    input_path_clean = f"./clean/{t}"
    input_path_noise = f"./noise/{t}"
    paths  = [f"{input_path_clean}/{p}" for p in os.listdir(input_path_clean)]
    paths += [f"{input_path_noise}/{p}" for p in os.listdir(input_path_noise)]
    idxes   = list(np.arange(len(paths)))
    for i in range(NUM_LIMIT_NOISE):
        prob = np.random.rand(1)[0]
        s    = random.sample(idxes, 1)[0]
        path = paths[s]
        img  = get_img(path)
        prob = np.random.rand(1)[0]
        if   prob < 0.20:
            # Add black dot noise
            img = transforms_vn(image=img)['image']
        elif prob < 0.40:
            # Add edge noise
            img = transforms_v(image=img)['image']
            img = add_edge_noise(img)
        elif prob < 0.60:
            # Add black line
            img = transforms_v(image=img)['image']
            img = add_black_line(img, LINE_WIDTH_BLACK)
        elif prob < 0.80:
            # Add black line and white line
            img = transforms_v(image=img)['image']
            img = add_black_line(img, LINE_WIDTH_BLACK, add_y=False)
            img = add_white_line(img, LINE_WIDTH_WHITE)
        else:
            # Add white line to delete part of the number
            img = transforms_v(image=img)['image']
            img = add_white_line(img, LINE_WIDTH_WHITE)
        img = rescale(img)
        cv2.imwrite(f"{output_path_noise}aug{i}_{os.path.basename(path)}".replace("clean","noise"), img)

show_file_num()

In [None]:
%%time

NOISE_TYPE      = 5
RATIO_ADD_NOISE = 1

path = "../input/data-centric-cyclegan-models2/"
g_paths = [["dot", path+"noise_dot/generator_5.pth", 0],
           ["bg",  path+"noise_background/generator_3.pth", 0.5]]
for t in AUG_CFG.keys():
    input_path_clean  = f"./clean/{t}"
    output_path_noise = f"./noise_aug/{t}"
    paths = [f"{input_path_clean}/{p}" for p in os.listdir(input_path_clean)]
    random.shuffle(paths)
    paths = paths[:int(len(os.listdir(output_path_noise))/NOISE_TYPE*RATIO_ADD_NOISE)]
    
    for name, gp, rotate_prob in g_paths:
        generator = get_generator(gp)
        generator = generator.to(CFG['device'])
        for p in paths:
            img  = get_img(p)
            prob = np.random.rand(1)[0]
            if prob < rotate_prob:
                img = img[::-1,::-1,:]
            img = transforms_gan(image=img)['image'][0,:,:][np.newaxis,np.newaxis,:,:]
            img = img.to(CFG['device'])
            g_img = generator(img).detach().to("cpu").numpy()[0,0,:,:]
            g_img = ((g_img*0.229)+0.485)*255
            g_img = np.where(g_img>140, 255, g_img)[:,:,np.newaxis]
            g_img = np.concatenate([g_img, g_img, g_img], axis=2).astype(int)
            g_img = rescale(g_img)
            if prob < rotate_prob:
                g_img = g_img[::-1,::-1,:]
            cv2.imwrite(f"{output_path_noise}/cyclegan_{name}_{os.path.basename(p)}", g_img)
            
show_file_num()

In [None]:
%%time

NUM_VALIDATION   = 300
RATIO_REAL_NOISE = 0.2

num_noise_real = int(NUM_VALIDATION * RATIO_NOISE * RATIO_REAL_NOISE)
num_noise = int(NUM_VALIDATION * RATIO_NOISE) - num_noise_real
num_clean = NUM_VALIDATION - num_noise - num_noise_real
print(num_clean, num_noise, num_noise_real)
for t in AUG_CFG.keys():
    mkdir(f"./val/{t}")
    input_path_clean_aug  = f"./clean_aug/{t}"
    input_path_noise_aug  = f"./noise_aug/{t}"
    input_path_noise_real = f"./noise/{t}"
        
    paths_clean_aug = [f"{input_path_clean_aug}/{p}"  for p in os.listdir(input_path_clean_aug)]
    paths_noise_aug = [f"{input_path_noise_aug}/{p}"  for p in os.listdir(input_path_noise_aug)]
    paths_noise     = [f"{input_path_noise_real}/{p}" for p in os.listdir(input_path_noise_real)]
    random.shuffle(paths_clean_aug)
    random.shuffle(paths_noise_aug)
    random.shuffle(paths_noise)
    clean_train  = paths_clean_aug[:num_clean]
    noise_train  = paths_noise[:num_noise_real]
    noise_train += paths_noise_aug[:(num_noise + (num_noise_real - len(noise_train)))]
    for p in clean_train + noise_train:
        file_name = os.path.basename(p)
        shutil.move(p, f"./val/{t}/{file_name}")

show_file_num()

In [None]:
!rm -rf ./train

MAX_NUM = 1000 - NUM_VALIDATION
CLEAN_NOISE_RATIO = 0.7  # Set clean ratio

CLEAN  = int(MAX_NUM * CLEAN_NOISE_RATIO)
NOISE  = MAX_NUM - CLEAN
CLEAN -= 1

!rm -rf ./train

for t in AUG_CFG.keys():
    mkdir(f"./train/{t}")
    input_path_clean_aug  = f"./clean_aug/{t}"
    input_path_noise_aug  = f"./noise_aug/{t}"
    input_path_noise_real = f"./noise/{t}"
    paths_clean  = [f"{input_path_clean_aug}/{p}"  for p in os.listdir(input_path_clean_aug)]
    paths_noise  = [f"{input_path_noise_aug}/{p}"  for p in os.listdir(input_path_noise_aug)]
    paths_noise += [f"{input_path_noise_real}/{p}" for p in os.listdir(input_path_noise_real)]
    random.shuffle(paths_clean)
    random.shuffle(paths_noise)
    clean_train = paths_clean[:CLEAN]
    noise_train = paths_noise[:NOISE]
    for p in clean_train + noise_train:
        shutil.move(p, f"./train/{t}/{os.path.basename(p)}")
        
show_file_num()

In [None]:
!rm -rf ./submission
mkdir("./submission")
mkdir("./submission/submission")
shutil.copytree("./train", "./submission/submission/train")
shutil.copytree("./val",   "./submission/submission/val")

In [None]:
shutil.make_archive('./submission', 'zip', root_dir='./submission')

In [None]:
!rm -rf ./submission/
!rm -rf ./clean/
!rm -rf ./noise/
!rm -rf ./clean_aug/
!rm -rf ./noise_aug/
!rm -rf ./train/
!rm -rf ./val/