<a href="https://colab.research.google.com/github/ykitaguchi77/FundusPhoto/blob/main/Divide_dataset_trainvaltest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**データセットをランダムに振り分け**

In [None]:
"""
dataset-------1
          |---2
          |---3
          |---4
↓

dataset---train-----1
        |       |---2
        |
        |--val-------3  
        |       |--4
        |
        |--test------5
               |---6 
"""

In [None]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.utils.data as data
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import math
import shutil

#Advanced Pytorchから
import glob
import os.path as osp
import random
import json
from PIL import Image
from tqdm import tqdm
%matplotlib inline

#サポートパッチのインポート
from google.colab.patches import cv2_imshow
import cv2

plt.ion()   # interactive mode
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Set random seem for reproducibility
manualSeed = 20200815
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.cuda.manual_seed(manualSeed)

torch.torch.backends.cudnn.benchmark = True
torch.torch.backends.cudnn.enabled = True


#google driveをcolabolatoryにマウント
from google.colab import drive
drive.mount('/content/drive')

#**データセットをランダムに振り分け**

In [None]:
#処理時間の計測
start = time.time()

in_path = '/content/drive/MyDrive/Deep_learning/FundusPhoto/cropped_img'
out_path = '/content/drive/MyDrive/Deep_learning/FundusPhoto/cropped_img_trainval'
train_rate = 0.7
val_rate = 0.2

#移動先のフォルダを作成
if os.path.exists(out_path):
    shutil.rmtree(out_path)
os.makedirs(out_path, exist_ok=True)

os.makedirs(out_path +'/train/', exist_ok=True) #trainのフォルダを作成
os.makedirs(out_path +'/val/', exist_ok=True) #valのフォルダを作成
os.makedirs(out_path +'/test/', exist_ok=True) #testのフォルダを作成


total = os.listdir(in_path)
total_num = len(os.listdir(in_path))
train_num = int(total_num*train_rate) #trainの総数
val_num = int(total_num*val_rate)  #valの総数
test_num = total_num - train_num - val_num  #testの総数

trainval = random.sample(total, train_num+val_num) #train+val
val = random.sample(trainval, val_num) #valのリスト
train = list(set(trainval)-set(val)) #trainのリスト
test = list(set(total) - set(trainval)) #testのリスト

l=0
for j in train:
      shutil.copy(in_path+'/'+j, out_path +'/train/'+j)
      l+=1
      print(str(l)+'/'+str(total_num)+' images copied')
for j in val:
      shutil.copy(in_path+'/'+j, out_path +'/val/'+j)
      l+=1
      print(str(l)+'/'+str(total_num)+' images copied')
for j in test:
      shutil.copy(in_path+'/'+j, out_path +'/test/'+j)
      l+=1
      print(str(l)+'/'+str(total_num)+' images copied')

print('Process done!!')
elapsed_time = time.time() - start
print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

#**ランダムでない振り分け（1から順に）**

In [None]:
#処理時間の計測
start = time.time()

in_path = '/content/drive/MyDrive/Deep_learning/FundusPhoto/cropped_img'
out_path = '/content/drive/MyDrive/Deep_learning/FundusPhoto/cropped_img_trainval'
train_rate = 0.7
val_rate = 0.2

#移動先のフォルダを作成
if os.path.exists(out_path):
    shutil.rmtree(out_path)
os.makedirs(out_path, exist_ok=True)

os.makedirs(out_path +'/train/', exist_ok=True) #trainのフォルダを作成
os.makedirs(out_path +'/val/', exist_ok=True) #valのフォルダを作成
os.makedirs(out_path +'/test/', exist_ok=True) #testのフォルダを作成


total = os.listdir(in_path)
total_num = len(os.listdir(in_path))
train_num = int(total_num*train_rate) #trainの総数
val_num = int(total_num*val_rate)  #valの総数
test_num = total_num - train_num - val_num  #testの総数

train = total[0:train_num] #trainのリスト
val = total[train_num:train_num+val_num] #valのリスト
test = total[train_num+val_num:len(total)] #testのリスト

l=0
for j in train:
      shutil.copy(in_path+'/'+j, out_path +'/train/'+j)
      l+=1
      print(str(l)+'/'+str(total_num)+' images copied')
for j in val:
      shutil.copy(in_path+'/'+j, out_path +'/val/'+j)
      l+=1
      print(str(l)+'/'+str(total_num)+' images copied')
for j in test:
      shutil.copy(in_path+'/'+j, out_path +'/test/'+j)
      l+=1
      print(str(l)+'/'+str(total_num)+' images copied')

print('Process done!!')
elapsed_time = time.time() - start
print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")