In [None]:
from utils.dataset import VideoDataset
from utils.earlystopping import EarlyStopping
from utils.function import make_datapath_list
from utils.model import HBM
from utils.transform import VideoTransform

import torch
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import random
import os
from tqdm import tqdm
from datetime import datetime

from pytorch_metric_learning.losses import ArcFaceLoss

# 乱数のシードを設定
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

device = "cuda:1" if torch.cuda.is_available() else "cpu"

# train, test, validation
classes = ('5_A',  # 16
           '5_B',  # 17
           '5_C',  # 18
           '5_D',  # 19
           '6_A',  # 20
           '6_B',  # 21
           '6_C',  # 22
           '6_D',  # 23
           '7_A',  # 24
           '7_B',  # 25
           '7_C',  # 26
           '7_D',  # 27
           '8_A',  # 28
           '8_B',  # 29
           '8_C',  # 30
           '8_D',  # 31
           '9_A',  # 32
           '9_B',  # 33
           '9_C',  # 34
           '9_D',  # 35
           '10_A', # 36
           '10_B', # 37
           '10_C', # 38
           '10_D') # 39

# unknown
unknown_classes = ('1_A',  # 0
                   '1_B',  # 1
                   '1_C',  # 2
                   '1_D',  # 3
                   '2_A',  # 4
                   '2_B',  # 5
                   '2_C',  # 6
                   '2_D',  # 7
                   '3_A',  # 8
                   '3_B',  # 9
                   '3_C',  # 10
                   '3_D',  # 11
                   '4_A',  # 12
                   '4_B',  # 13
                   '4_C',  # 14
                   '4_D')  # 15

In [None]:
# train セットアップ ファインチューニングあり
batch_size = 16
num_classes = len(classes)
embedding_size = 256

# ファイルパスのリストを作成
root_path = './data/unknown_1_to_4/'
train_video_list = make_datapath_list(os.path.join(root_path, "Train"))
val_video_list = make_datapath_list(os.path.join(root_path, "Validation"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (train)
train_csv_path = os.path.join(root_path, "Train.csv")
train_df = pd.read_csv(train_csv_path)
train_videoid_labelid_dict = dict(train_df[["video_id", "label_id"]].to_numpy())

# video_id label_id 辞書 (val)
val_csv_path = os.path.join(root_path, "Validation.csv")
val_df = pd.read_csv(val_csv_path)
val_videoid_labelid_dict = dict(val_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
train_dataset = VideoDataset(train_video_list, train_videoid_labelid_dict, num_segments=32,
                             phase="train", transform=video_transform, img_tmpl="{:05d}.jpg", random_frame=True)
val_dataset = VideoDataset(val_video_list, val_videoid_labelid_dict, num_segments=32,
                           phase="val", transform=video_transform, img_tmpl="{:05d}.jpg", random_frame=False)


# データローダーの作成
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}


loss_func = ArcFaceLoss(num_classes, embedding_size).to(device)

# model読み込み
model = HBM().to(device)
load_model_path = './weights/pretrained_HBM_model.pth'
# load_model_weights = torch.load(load_model_path, map_location={'cuda:2': 'cpu'})
load_model_weights = torch.load(load_model_path)
model.load_state_dict(load_model_weights)


# ファインチューニングで学習させるパラメータを、変数params_to_updateの1～3に格納する
params_to_update_1 = []
params_to_update_2 = []
params_to_update_3 = []

# 学習させる層のパラメータ名を指定
update_param_names_1 = ["eco_2d"]
update_param_names_2 = ["eco_3d"]
update_param_names_3 = ["eco_left.fc_final.weight", "eco_left.fc_final.bias",
                        "eco_right.fc_final.weight", "eco_right.fc_final.bias"]

# パラメータごとに各リストに格納する
for name, param in model.named_parameters():
    if update_param_names_1[0] in name:
        param.requires_grad = True
        params_to_update_1.append(param)
        print("params_to_update_1に格納：", name)

    elif update_param_names_2[0] in name:
        param.requires_grad = True
        params_to_update_2.append(param)
        print("params_to_update_2に格納：", name)

    elif name in update_param_names_3:
        param.requires_grad = True
        params_to_update_3.append(param)
        print("params_to_update_3に格納：", name)

    else:
        param.requires_grad = False
        print("勾配計算なし。学習しない：", name)


optimizer = optim.Adam([{'params': params_to_update_1, 'lr': 1e-5},
                        {'params': params_to_update_2, 'lr': 5e-5},
                        {'params': params_to_update_3, 'lr': 1e-4},
                        {'params': loss_func.parameters(), 'lr': 1e-4}])


# モデルの学習
train_losses, val_losses = train_model(model, loss_func, device, dataloaders_dict, optimizer, num_epochs)


# loss の保存
date = datetime.now().strftime("%Y%m%d_%H%M%S")
outfile = 'loss_{}.cpt'.format(date)
torch.save({'train_losses': train_losses, 'val_losses': val_losses}, outfile)

In [None]:
# test セットアップ
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
num_classes = len(classes)
embedding_size = 256

# model読み込み
model = HBM().to(device)
load_model_path = './weights/checkpoint_fine_unknown_1_and_2_aug_10-data_model_20220205_173510.pth'
load_model_weights = torch.load(load_model_path, map_location=device)
model.load_state_dict(load_model_weights)

# loss_func読み込み
loss_func = ArcFaceLoss(num_classes, embedding_size).to(device)
load_loss_func_path = './weights/checkpoint_fine_unknown_1_and_2_aug_10-data_loss_func_20220205_173510.pth'
load_loss_func_weights = torch.load(load_loss_func_path, map_location=device)
loss_func.load_state_dict(load_loss_func_weights)


# ファイルパスのリストを作成 (test)
# root_path = './data/'
# root_path = './data/all_data/'
root_path = './data/unknown_1_and_2/'
test_video_list = make_datapath_list(os.path.join(root_path, "Test"))
# test_video_list = make_datapath_list(os.path.join(root_path, "Unknown"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (test)
test_csv_path = os.path.join(root_path, "Test.csv")
# test_csv_path = './data/Unknown.csv'
test_df = pd.read_csv(test_csv_path)
test_videoid_labelid_dict = dict(test_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
test_dataset = VideoDataset(test_video_list, test_videoid_labelid_dict, num_segments=32,
                           phase="val", transform=video_transform, img_tmpl="{:05d}.jpg")


# データローダーの作成
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=96, shuffle=False) # test
# test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=120, shuffle=False) # unknown

In [None]:
# unknown セットアップ
num_classes = len(classes)
embedding_size = 256


# ファイルパスのリストを作成 (unknown)
# root_path = './data/'
root_path = './data/unknown_1_and_2/'
unknown_video_list = make_datapath_list(os.path.join(root_path, "Unknown"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (unknown)
unknown_csv_path = os.path.join(root_path, "Unknown.csv")
unknown_df = pd.read_csv(unknown_csv_path)
unknown_videoid_labelid_dict = dict(unknown_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
unknown_dataset = VideoDataset(unknown_video_list, unknown_videoid_labelid_dict, num_segments=32,
                               phase="val", transform=video_transform, img_tmpl="{:05d}.jpg")


# データローダーの作成
unknown_dataloader = torch.utils.data.DataLoader(unknown_dataset, batch_size=120, shuffle=False) # unknown

In [None]:
# test and unknown セットアップ デフォルトのラベルを使用する場合
# ファイルパスのリストを作成 
# root_path = './data/'
root_path = './data/unknown_1_and_2/'
test_video_list = make_datapath_list(os.path.join(root_path, "Test"))
unknown_video_list = make_datapath_list(os.path.join(root_path, "Unknown"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (test)
test_csv_path = os.path.join(root_path, "Test_default.csv")
test_df = pd.read_csv(test_csv_path)
test_videoid_labelid_dict = dict(test_df[["video_id", "label_id"]].to_numpy())

# video_id label_id 辞書 (unknown)
unknown_csv_path = os.path.join(root_path, "Unknown_default.csv")
unknown_df = pd.read_csv(unknown_csv_path)
unknown_videoid_labelid_dict = dict(unknown_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
test_dataset = VideoDataset(test_video_list, test_videoid_labelid_dict, num_segments=32,
                            phase="val", transform=video_transform, img_tmpl="{:05d}.jpg")
unknown_dataset = VideoDataset(unknown_video_list, unknown_videoid_labelid_dict, num_segments=32,
                               phase="val", transform=video_transform, img_tmpl="{:05d}.jpg")


# データローダーの作成
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=96, shuffle=False) # test
unknown_dataloader = torch.utils.data.DataLoader(unknown_dataset, batch_size=120, shuffle=False) # unknown

In [None]:
# test data の精度
import torch.nn.functional as F

W_cos = loss_func.state_dict()['W']

correct = 0
total = 0
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))

with torch.no_grad():
    for inputs, labels in test_dataloader:
#         inputs, labels = inputs.to(device), labels.to(device)
        inputs, labels = [side_inputs.to(device) for side_inputs in inputs], labels.to(device)
        outputs = model(inputs)
        cosine = F.linear(F.normalize(outputs), F.normalize(W_cos.T))
        _, predicted = torch.max(cosine, 1)
        
        c = (predicted == labels).squeeze()
        total += labels.size(0)
        correct += c.sum().item()
        
        for i in range(labels.size(0)):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

print('Accuracy: %d %%' % (100 * correct / total))

for i in range(num_classes):
    print('Accuracy of %s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

In [None]:
# loss の推移
import matplotlib.pyplot as plt
%matplotlib inline

# loss の読み込み
cptfile = 'loss_fine_unknown_1_and_2_aug_10-data_20220205_175701.cpt'
# cptfile = 'loss_fine_unknown_1_and_2_10-data_20220112_033846.cpt'
# cptfile = 'loss_fine_unknown_1_and_2_augmiss_10-data_20220204_133540.cpt'
cpt = torch.load(cptfile)
train_losses = cpt['train_losses']
val_losses = cpt['val_losses']

# plot learning curve
plt.figure()
plt.plot(range(len(train_losses)), train_losses, 'r-', label='train_loss')
plt.plot(range(len(val_losses[:-1])), val_losses[:-1], 'b-', label='val_loss')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.grid()
date = datetime.now().strftime("%Y%m%d_%H%M%S")
outfile = 'loss_output_{}.png'.format(date)
plt.savefig(outfile)
plt.show()

In [None]:
def plot_tSNE(outputs, labels, perplexity):
    # 特徴量ベクトル
    embeddings = outputs.detach().cpu().numpy()
    embeddings = embeddings.reshape(embeddings.shape[0], embeddings.shape[1])
    lebels_tSNE = labels.detach().cpu().numpy()
    
    # ２次元に埋め込み
    tSNE_metrics = TSNE(n_components=2, random_state=0, perplexity=perplexity).fit_transform(embeddings)
    
    # 同じラベルが近くに分布しているか
    plt.scatter(tSNE_metrics[:, 0], tSNE_metrics[:, 1], c=lebels_tSNE)
    plt.colorbar()
    date = datetime.now().strftime("%Y%m%d_%H%M%S")
#     outfile = 't-SNE_label_{}_perplexity={}.eps'.format(date, perplexity)
    outfile = 't-SNE_label_{}_perplexity={}.png'.format(date, perplexity)
    plt.savefig(outfile)
    plt.show()
    
    # 同じジェスチャが近くに分布しているか
    labels_gesture = np.where(lebels_tSNE%4==0, 0, lebels_tSNE)
    labels_gesture = np.where(labels_gesture%4==1, 1, labels_gesture)
    labels_gesture = np.where(labels_gesture%4==2, 2, labels_gesture)
    labels_gesture = np.where(labels_gesture%4==3, 3, labels_gesture)
    plt.scatter(tSNE_metrics[:, 0], tSNE_metrics[:, 1], c=labels_gesture)
    plt.colorbar()
    date = datetime.now().strftime("%Y%m%d_%H%M%S")
#     outfile = 't-SNE_gesture_{}_perplexity={}.eps'.format(date, perplexity)
    outfile = 't-SNE_gesture_{}_perplexity={}.png'.format(date, perplexity)
    plt.savefig(outfile)
    plt.show()
    
    # 同じユーザが近くに分布しているか
    labels_user = np.where(lebels_tSNE<4, 0, lebels_tSNE)
    labels_user = np.where((lebels_tSNE>=4) & (lebels_tSNE<8), 1, labels_user)
    labels_user = np.where((lebels_tSNE>=8) & (lebels_tSNE<12), 2, labels_user)
    labels_user = np.where((lebels_tSNE>=12) & (lebels_tSNE<16), 3, labels_user)
    labels_user = np.where((lebels_tSNE>=16) & (lebels_tSNE<20), 4, labels_user)
    labels_user = np.where((lebels_tSNE>=20) & (lebels_tSNE<24), 5, labels_user)
    labels_user = np.where((lebels_tSNE>=24) & (lebels_tSNE<28), 6, labels_user)
    labels_user = np.where((lebels_tSNE>=28) & (lebels_tSNE<32), 7, labels_user)
    labels_user = np.where((lebels_tSNE>=32) & (lebels_tSNE<36), 8, labels_user)
    labels_user = np.where((lebels_tSNE>=36) & (lebels_tSNE<40), 9, labels_user)
    plt.scatter(tSNE_metrics[:, 0], tSNE_metrics[:, 1], c=labels_user)
    plt.colorbar()
    date = datetime.now().strftime("%Y%m%d_%H%M%S")
#     outfile = 't-SNE_user_{}_perplexity={}.eps'.format(date, perplexity)
    outfile = 't-SNE_user_{}_perplexity={}.png'.format(date, perplexity)
    plt.savefig(outfile)
    plt.show()

In [None]:
def calculate_EER(x_p, x_n, data_group="all"):
    print(x_p.shape, x_n.shape)
#     print(x_p)
#     print(x_n)
    num_far = x_n.shape[0]
    num_frr = x_p.shape[0]
    fig, ax = plt.subplots()
    ax.hist(x_n, bins=50, alpha=0.5, range=(-1, 1), label='nagative pair')
    ax.hist(x_p, bins=50, alpha=0.5, range=(-1, 1), label='positive pair')
#     ax.hist(x_n, bins=50, range=(-1, 1), label='nagative pair')
#     ax.hist(x_p, bins=50, range=(-1, 1), label='positive pair')
    ax.set_title('all pair')
    ax.set_xlabel('cos similarity')
    plt.legend(loc='upper left')
    date = datetime.now().strftime("%Y%m%d_%H%M%S")
    outfile = 'cos_similarity_{}_{}.png'.format(data_group, date)
    plt.savefig(outfile)
    plt.show()
    
    far = []
    for i in range(2001):
        num = 0
        
        for x in x_n:
            if x > (-1 + 0.001*i):
                num+=1
        far.append(num)

    frr = []
    for i in range(2001):
        num = 0
    
        for x in x_p:
            if x < (-1 + 0.001*i):
                num+=1
        frr.append(num)

    far = np.array(far)
    frr = np.array(frr)
#     np.set_printoptions(threshold=np.inf)
#     print('far', far)
#     print('frr', frr)
    
    threshold = [-1 + 0.001*i for i in range(2001)]
    threshold = np.array(threshold)
    
#     for i in range(2001):
#         print(-1 + 0.001*i, "frr", frr[i]/num_frr)
#         print(-1 + 0.001*i, "far", far[i]/num_far)
#         print("________________")
    
    plt.plot(threshold,frr/num_frr,'--b')
    plt.plot(threshold,far/num_far,'--r')
    plt.xlabel('threshold')
    plt.title('FAR and FRR')
    plt.axis([-1, 1, -0.005, 1.005])
    date = datetime.now().strftime("%Y%m%d_%H%M%S")
#     outfile = 'FAR and FRR_{}_{}.eps'.format(data_group, date)
    outfile = 'FAR and FRR_{}_{}.png'.format(data_group, date)
    plt.savefig(outfile)
    plt.show()
    
    EER = -1
    EER_FAR = -1
    EER_FRR = -1
    EER_T = []
    FAR_FRR_0 = -1
    FAR_FRR_0_T = []
    FRR_FAR_0 = -1
    FRR_FAR_0_T = []
    
    if x_n.max() < x_p.min():
        # EER is 0
        for i in range(2001):
            a = frr[i]/num_frr
            b = far[i]/num_far
            if a == b:
                EER_T.append(i)
        EER = 0
        EER_FAR = 0
        EER_FRR = 0
        FAR_FRR_0 = 0
        FRR_FAR_0 = 0
        FAR_FRR_0_T = EER_T
        FRR_FAR_0_T = EER_T
        return EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T
    else:
        # EER is not 0
        for i in range(2001):
            a = frr[i]/num_frr
            b = far[i]/num_far
            if a > b and EER == -1:
                EER_T.append(i)
                EER = frr[i]/num_frr
                EER_FAR = far[i]/num_far
                EER_FRR = frr[i]/num_frr
            if a > 0 and FAR_FRR_0 == -1:
                FAR_FRR_0_T.append(i-1)
                FAR_FRR_0 = far[i-1]/num_far
            if b == 0 and FRR_FAR_0 == -1:
                FRR_FAR_0_T.append(i)
                FRR_FAR_0 = frr[i]/num_frr
        return EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T
    

def display_info(EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T):
    if len(EER_T) > 1:
        # EER is 0
        print("EER:", EER)
        print("EER_FAR:", EER_FAR)
        print("EER_FRR:", EER_FRR)
        print("EER_T_min:", EER_T[0], -1 + 0.001*EER_T[0])
        print("EER_T_max:", EER_T[-1], -1 + 0.001*EER_T[-1])
        print("FAR_FRR_0:", FAR_FRR_0)
        print("FRR_FAR_0:", FRR_FAR_0)
        print("FAR_FRR_0_T and FRR_FAR_0_T are the same as EER_T")
    else:
        # EER is not 0
        print("EER:", EER)
        print("EER_FAR:", EER_FAR)
        print("EER_FRR:", EER_FRR)
        print("EER_T:", EER_T[0], -1 + 0.001*EER_T[0])
        print("FAR_FRR_0:", FAR_FRR_0)
        print("FAR_FRR_0_T:", FAR_FRR_0_T[0], -1 + 0.001*FAR_FRR_0_T[0])
        print("FRR_FAR_0:", FRR_FAR_0)
        print("FRR_FAR_0_T:", FRR_FAR_0_T[0], -1 + 0.001*FRR_FAR_0_T[0])

In [None]:
# exp_1
from utils.dataset import VideoDataset
from utils.earlystopping import EarlyStopping
from utils.function import make_datapath_list
from utils.model import HBM
from utils.transform import VideoTransform

import torch
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import random
import os
from tqdm import tqdm
from datetime import datetime

from pytorch_metric_learning.losses import ArcFaceLoss

# 乱数のシードを設定
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

device = "cuda:1" if torch.cuda.is_available() else "cpu"

# all
classes = ('1_A',  # 0
           '1_B',  # 1
           '1_C',  # 2
           '1_D',  # 3
           '2_A',  # 4
           '2_B',  # 5
           '2_C',  # 6
           '2_D',  # 7
           '3_A',  # 8
           '3_B',  # 9
           '3_C',  # 10
           '3_D',  # 11
           '4_A',  # 12
           '4_B',  # 13
           '4_C',  # 14
           '4_D',  # 15
           '5_A',  # 16
           '5_B',  # 17
           '5_C',  # 18
           '5_D',  # 19
           '6_A',  # 20
           '6_B',  # 21
           '6_C',  # 22
           '6_D',  # 23
           '7_A',  # 24
           '7_B',  # 25
           '7_C',  # 26
           '7_D',  # 27
           '8_A',  # 28
           '8_B',  # 29
           '8_C',  # 30
           '8_D',  # 31
           '9_A',  # 32
           '9_B',  # 33
           '9_C',  # 34
           '9_D',  # 35
           '10_A', # 36
           '10_B', # 37
           '10_C', # 38
           '10_D') # 39


# test セットアップ
num_classes = len(classes)
embedding_size = 256

# model読み込み
model = HBM().to(device)
load_model_path = './weights/checkpoint_fine_all_data_aug_10-data_model_20220205_183047.pth'
load_model_weights = torch.load(load_model_path, map_location=device)
model.load_state_dict(load_model_weights)

# loss_func読み込み
loss_func = ArcFaceLoss(num_classes, embedding_size).to(device)
load_loss_func_path = './weights/checkpoint_fine_all_data_aug_10-data_loss_func_20220205_183047.pth'
load_loss_func_weights = torch.load(load_loss_func_path, map_location=device)
loss_func.load_state_dict(load_loss_func_weights)


# ファイルパスのリストを作成 (test)
# root_path = './data/'
root_path = './data/all_data/'
test_video_list = make_datapath_list(os.path.join(root_path, "Test"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (test)
test_csv_path = os.path.join(root_path, "Test.csv")
test_df = pd.read_csv(test_csv_path)
test_videoid_labelid_dict = dict(test_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
test_dataset = VideoDataset(test_video_list, test_videoid_labelid_dict, num_segments=32,
                           phase="val", transform=video_transform, img_tmpl="{:05d}.jpg", random_frame=False)


# データローダーの作成
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=120, shuffle=False)




# t-SNE, FAR, FRR, EER, cos
model.eval()
perplexity = 20
W_cos = loss_func.state_dict()['W']

with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs, labels = [side_inputs.to(device) for side_inputs in inputs], labels.to(device)
        outputs = model(inputs)
        
        # t-SNEで２次元にプロット
        plot_tSNE(outputs, labels, perplexity)
        
        cosine = F.linear(F.normalize(outputs), F.normalize(W_cos.T))
        
        # EER all
        x_p_all = cosine[torch.arange(labels.size(0)), labels].detach().cpu().numpy()
        delete_index = [i*W_cos.size(1) + l for i, l in enumerate(labels.detach().cpu().numpy())]
        x_n_all = np.delete(cosine.detach().cpu().numpy(), delete_index).reshape(labels.size(0), -1)
        x_n_all = np.reshape(x_n_all, -1)
        
        print("x_n_mean:", x_n_all.mean())
        print("x_p_mean:", x_p_all.mean())
        
        EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T = calculate_EER(x_p_all, x_n_all, data_group="all")
        display_info(EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T)

        # EER gesture A, B, C and D
        for i, X in enumerate(("A", "B", "C", "D")):
            cosine_X = cosine[labels%4==i, :]
            cosine_X = cosine_X[:, torch.arange(num_classes)%4==i]
            
            labels_X = labels[labels%4==i].detach().cpu().numpy()
            labels_X = ((labels_X-i)/4).astype(np.int64)
        
            x_p_X = cosine_X[np.arange(labels_X.size), labels_X].detach().cpu().numpy()
            delete_index = [j*int(W_cos.size(1)/4) + l for j, l in enumerate(labels_X)]
            x_n_X = np.delete(cosine_X.detach().cpu().numpy(), delete_index).reshape(-1)
            
            print("____________________________________________________")
            
            print("x_n_mean:", x_n_X.mean())
            print("x_p_mean:", x_p_X.mean())
            
            EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T = calculate_EER(x_p_X, x_n_X, data_group=X)
            display_info(EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T)

In [None]:
# exp_2
from utils.dataset import VideoDataset
from utils.earlystopping import EarlyStopping
from utils.function import make_datapath_list
from utils.model import HBM
from utils.transform import VideoTransform

import torch
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import random
import os
from tqdm import tqdm
from datetime import datetime

from pytorch_metric_learning.losses import ArcFaceLoss

# 乱数のシードを設定
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

device = "cuda:1" if torch.cuda.is_available() else "cpu"

# train, test, validation
classes = ('1_A',  # 0
           '1_B',  # 1
           '1_C',  # 2
           '1_D',  # 3
           '2_A',  # 4
           '2_B',  # 5
           '2_C',  # 6
           '2_D',  # 7
           '3_A',  # 8
           '3_B',  # 9
           '3_C',  # 10
           '3_D',  # 11
           '4_A',  # 12
           '4_B',  # 13
           '4_C',  # 14
           '4_D',  # 15
           '5_A',  # 16
           '5_B',  # 17
           '5_C',  # 18
           '5_D',  # 19
           '6_A',  # 20
           '6_B',  # 21
           '6_C',  # 22
           '6_D',  # 23
           '7_A',  # 24
           '7_B',  # 25
           '7_C',  # 26
           '7_D',  # 27
           '8_A',  # 28
           '8_B',  # 29
           '8_C',  # 30
           '8_D')  # 31

# unknown
unknown_classes = ('9_A',  # 32
                   '9_B',  # 33
                   '9_C',  # 34
                   '9_D',  # 35
                   '10_A', # 36
                   '10_B', # 37
                   '10_C', # 38
                   '10_D') # 39

# # train, test, validation
# classes = ('5_A',  # 16
#            '5_B',  # 17
#            '5_C',  # 18
#            '5_D',  # 19
#            '6_A',  # 20
#            '6_B',  # 21
#            '6_C',  # 22
#            '6_D',  # 23
#            '7_A',  # 24
#            '7_B',  # 25
#            '7_C',  # 26
#            '7_D',  # 27
#            '8_A',  # 28
#            '8_B',  # 29
#            '8_C',  # 30
#            '8_D',  # 31
#            '9_A',  # 32
#            '9_B',  # 33
#            '9_C',  # 34
#            '9_D',  # 35
#            '10_A', # 36
#            '10_B', # 37
#            '10_C', # 38
#            '10_D') # 39

# # unknown
# unknown_classes = ('1_A',  # 0
#                    '1_B',  # 1
#                    '1_C',  # 2
#                    '1_D',  # 3
#                    '2_A',  # 4
#                    '2_B',  # 5
#                    '2_C',  # 6
#                    '2_D',  # 7
#                    '3_A',  # 8
#                    '3_B',  # 9
#                    '3_C',  # 10
#                    '3_D',  # 11
#                    '4_A',  # 12
#                    '4_B',  # 13
#                    '4_C',  # 14
#                    '4_D')  # 15



# test セットアップ
num_classes = len(classes)
embedding_size = 256

# model読み込み
model = HBM().to(device)
load_model_path = './weights/checkpoint_fine_unknown_9_and_10_aug_10-data_model_20220205_200617.pth'
load_model_weights = torch.load(load_model_path, map_location=device)
model.load_state_dict(load_model_weights)

# loss_func読み込み
loss_func = ArcFaceLoss(num_classes, embedding_size).to(device)
load_loss_func_path = './weights/checkpoint_fine_unknown_9_and_10_aug_10-data_loss_func_20220205_200617.pth'
load_loss_func_weights = torch.load(load_loss_func_path, map_location=device)
loss_func.load_state_dict(load_loss_func_weights)


# ファイルパスのリストを作成 (test)
# root_path = './data/'
root_path = './data/unknown_9_and_10/'
test_video_list = make_datapath_list(os.path.join(root_path, "Test"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (test)
test_csv_path = os.path.join(root_path, "Test.csv")
test_df = pd.read_csv(test_csv_path)
test_videoid_labelid_dict = dict(test_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
test_dataset = VideoDataset(test_video_list, test_videoid_labelid_dict, num_segments=32,
                           phase="val", transform=video_transform, img_tmpl="{:05d}.jpg", random_frame=False)


# データローダーの作成
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=96, shuffle=False)



# unknown セットアップ

# ファイルパスのリストを作成 (unknown)
# root_path = './data/'
root_path = './data/unknown_9_and_10/'
unknown_video_list = make_datapath_list(os.path.join(root_path, "Unknown"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (unknown)
unknown_csv_path = os.path.join(root_path, "Unknown.csv")
unknown_df = pd.read_csv(unknown_csv_path)
unknown_videoid_labelid_dict = dict(unknown_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
unknown_dataset = VideoDataset(unknown_video_list, unknown_videoid_labelid_dict, num_segments=32,
                               phase="val", transform=video_transform, img_tmpl="{:05d}.jpg", random_frame=False)


# データローダーの作成
unknown_dataloader = torch.utils.data.DataLoader(unknown_dataset, batch_size=120, shuffle=False)


# FAR, FRR, EER, cos
model.eval()
W_cos = loss_func.state_dict()['W']

with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs, labels = [side_inputs.to(device) for side_inputs in inputs], labels.to(device)
        outputs = model(inputs)
        
        cosine = F.linear(F.normalize(outputs), F.normalize(W_cos.T))
        
        # EER test
        x_p_test = cosine[torch.arange(labels.size(0)), labels].detach().cpu().numpy()
        delete_index = [i*W_cos.size(1) + l for i, l in enumerate(labels.detach().cpu().numpy())]
        x_n_test = np.delete(cosine.detach().cpu().numpy(), delete_index).reshape(-1)
        
        EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T = calculate_EER(x_p_test, x_n_test, data_group="test")
        display_info(EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T)

print("____________________________________________________")
            
with torch.no_grad():
    for inputs, labels in unknown_dataloader:
        inputs, labels = [side_inputs.to(device) for side_inputs in inputs], labels.to(device)
        outputs = model(inputs)
        
        cosine = F.linear(F.normalize(outputs), F.normalize(W_cos.T))
        
        # EER test and unknown
        x_n_unknown = np.reshape(cosine.detach().cpu().numpy(), -1) 
        combined_x_n = np.concatenate([x_n_test, x_n_unknown])
        
        EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T = calculate_EER(x_p_test, combined_x_n, data_group="test_and_unknown")
        display_info(EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T)

            
test_pair = np.concatenate([x_p_test, x_n_test])
fig, ax = plt.subplots()
ax.hist(test_pair, bins=50, alpha=0.5, range=(-1, 1), label='test pair')
ax.hist(x_n_unknown, bins=50, alpha=0.5, range=(-1, 1), label='unknown pair')
ax.set_title('test pair and unknown pair')
ax.set_xlabel('cos similarity')
plt.legend(loc='upper left')
date = datetime.now().strftime("%Y%m%d_%H%M%S")
outfile = 'cos_similarity_test_and_unknown_divided_{}.png'.format(date)
plt.savefig(outfile)
plt.show()

In [None]:
# exp_3 unknown_1_to_4
from utils.dataset import VideoDataset
from utils.earlystopping import EarlyStopping
from utils.function import make_datapath_list
from utils.model import HBM
from utils.transform import VideoTransform

import torch
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import random
import os
from tqdm import tqdm
from datetime import datetime

from pytorch_metric_learning.losses import ArcFaceLoss

# plt.rcParams['ps.useafm'] = True
# plt.rcParams['pdf.use14corefonts'] = True
# plt.rcParams['text.usetex'] = True

# 乱数のシードを設定
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

device = "cuda:1" if torch.cuda.is_available() else "cpu"

# # train, test, validation
# classes = ('1_A',  # 0
#            '1_B',  # 1
#            '1_C',  # 2
#            '1_D',  # 3
#            '2_A',  # 4
#            '2_B',  # 5
#            '2_C',  # 6
#            '2_D',  # 7
#            '3_A',  # 8
#            '3_B',  # 9
#            '3_C',  # 10
#            '3_D',  # 11
#            '4_A',  # 12
#            '4_B',  # 13
#            '4_C',  # 14
#            '4_D',  # 15
#            '5_A',  # 16
#            '5_B',  # 17
#            '5_C',  # 18
#            '5_D',  # 19
#            '6_A',  # 20
#            '6_B',  # 21
#            '6_C',  # 22
#            '6_D',  # 23
#            '7_A',  # 24
#            '7_B',  # 25
#            '7_C',  # 26
#            '7_D',  # 27
#            '8_A',  # 28
#            '8_B',  # 29
#            '8_C',  # 30
#            '8_D')  # 31

# # unknown
# unknown_classes = ('9_A',  # 32
#                    '9_B',  # 33
#                    '9_C',  # 34
#                    '9_D',  # 35
#                    '10_A', # 36
#                    '10_B', # 37
#                    '10_C', # 38
#                    '10_D') # 39

# train, test, validation
classes = ('5_A',  # 16
           '5_B',  # 17
           '5_C',  # 18
           '5_D',  # 19
           '6_A',  # 20
           '6_B',  # 21
           '6_C',  # 22
           '6_D',  # 23
           '7_A',  # 24
           '7_B',  # 25
           '7_C',  # 26
           '7_D',  # 27
           '8_A',  # 28
           '8_B',  # 29
           '8_C',  # 30
           '8_D',  # 31
           '9_A',  # 32
           '9_B',  # 33
           '9_C',  # 34
           '9_D',  # 35
           '10_A', # 36
           '10_B', # 37
           '10_C', # 38
           '10_D') # 39

# unknown
unknown_classes = ('1_A',  # 0
                   '1_B',  # 1
                   '1_C',  # 2
                   '1_D',  # 3
                   '2_A',  # 4
                   '2_B',  # 5
                   '2_C',  # 6
                   '2_D',  # 7
                   '3_A',  # 8
                   '3_B',  # 9
                   '3_C',  # 10
                   '3_D',  # 11
                   '4_A',  # 12
                   '4_B',  # 13
                   '4_C',  # 14
                   '4_D')  # 15


# test セットアップ
num_classes = len(classes)
embedding_size = 256

# model読み込み
model = HBM().to(device)
load_model_path = './weights/checkpoint_fine_unknown_1_to_4_aug_10-data_model_20220210_115641.pth'
load_model_weights = torch.load(load_model_path, map_location=device)
model.load_state_dict(load_model_weights)

# loss_func読み込み
loss_func = ArcFaceLoss(num_classes, embedding_size).to(device)
load_loss_func_path = './weights/checkpoint_fine_unknown_1_to_4_aug_10-data_loss_func_20220210_115641.pth'
load_loss_func_weights = torch.load(load_loss_func_path, map_location=device)
loss_func.load_state_dict(load_loss_func_weights)


# ファイルパスのリストを作成 (test)
# root_path = './data/'
root_path = './data/unknown_1_to_4/'
test_video_list = make_datapath_list(os.path.join(root_path, "Test"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (test)
test_csv_path = os.path.join(root_path, "Test.csv")
test_df = pd.read_csv(test_csv_path)
test_videoid_labelid_dict = dict(test_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
test_dataset = VideoDataset(test_video_list, test_videoid_labelid_dict, num_segments=32,
                           phase="val", transform=video_transform, img_tmpl="{:05d}.jpg", random_frame=False)


# データローダーの作成
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=72, shuffle=False)



# unknown セットアップ

# ファイルパスのリストを作成 (unknown)
# root_path = './data/'
root_path = './data/unknown_1_to_4/'
unknown_video_list = make_datapath_list(os.path.join(root_path, "Unknown"))


# 前処理の設定
resize = 96
ccrop_size = 96
rcrop_size = (96, 120)
mean, std = [0.449], [0.226]
# mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
video_transform = VideoTransform(resize, ccrop_size, rcrop_size, mean, std)


# video_id label_id 辞書 (unknown)
unknown_csv_path = os.path.join(root_path, "Unknown.csv")
unknown_df = pd.read_csv(unknown_csv_path)
unknown_videoid_labelid_dict = dict(unknown_df[["video_id", "label_id"]].to_numpy())


# データセットの作成
unknown_dataset = VideoDataset(unknown_video_list, unknown_videoid_labelid_dict, num_segments=32,
                               phase="val", transform=video_transform, img_tmpl="{:05d}.jpg", random_frame=False)


# データローダーの作成
unknown_dataloader = torch.utils.data.DataLoader(unknown_dataset, batch_size=120, shuffle=False)


# FAR, FRR, EER, cos
model.eval()
W_cos = loss_func.state_dict()['W']
x_p_test = []
x_n_test = []
x_p_unknown = []
x_n_unknown = []
all_outputs = []
all_labels = []


with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs, labels = [side_inputs.to(device) for side_inputs in inputs], labels.to(device)
        outputs = model(inputs)
        
        # EER test
        for i in range(0, len(classes)):
            for j in range(i, len(classes)):
                cosine = F.linear(F.normalize(outputs[labels==i]), F.normalize(outputs[labels==j])).detach().cpu().numpy()
                if i == j:
                    cosine_positive = np.tril(cosine, k=-1)
                    cosine_positive = cosine_positive[cosine_positive!=0]
                    x_p_test.append(cosine_positive)
                else:
                    cosine_negative = np.reshape(cosine, -1)
                    x_n_test.append(cosine_negative)
        
        x_p_test = np.reshape(np.array(x_p_test), -1)
        x_n_test = np.reshape(np.array(x_n_test), -1)
        
        EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T = calculate_EER(x_p_test, x_n_test, data_group="test")
        display_info(EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T)

print("____________________________________________________")
            
with torch.no_grad():
    for inputs, labels in unknown_dataloader:
        inputs, labels = [side_inputs.to(device) for side_inputs in inputs], labels.to(device)
        outputs = model(inputs)
        
        all_outputs.append(outputs)
        all_labels.append(labels)

# print(len(all_outputs))
# print(type(all_outputs))
# print(len(all_labels))
# print(type(all_labels))
outputs = torch.cat(all_outputs)
labels = torch.cat(all_labels)
# print(outputs.shape)
# print(type(outputs))
# print(labels.shape)
# print(type(labels))

# outputs = outputs.view(-1)
# labels = labels.view(-1)
# print(outputs.shape)
# print(type(outputs))
# print(labels.shape)
# print(type(labels))
        
# EER unknown
for i in range(0, len(unknown_classes)):
    if i < 8:
        continue
    for j in range(i, len(unknown_classes)):
        if j < 8:
            continue
        cosine = F.linear(F.normalize(outputs[labels==i]), F.normalize(outputs[labels==j])).detach().cpu().numpy()
        if i == j:
            cosine_positive = np.tril(cosine, k=-1)
            cosine_positive = cosine_positive[cosine_positive!=0]
            x_p_unknown.append(cosine_positive)
        else:
            cosine_negative = np.reshape(cosine, -1)
            x_n_unknown.append(cosine_negative)
        
x_p_unknown = np.reshape(np.array(x_p_unknown), -1)
x_n_unknown = np.reshape(np.array(x_n_unknown), -1)
        
EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T = calculate_EER(x_p_unknown, x_n_unknown, data_group="unknown")
display_info(EER, EER_FAR, EER_FRR, EER_T, FAR_FRR_0, FAR_FRR_0_T, FRR_FAR_0, FRR_FAR_0_T)

            
# test_pair = np.concatenate([x_p_test, x_n_test])
# fig, ax = plt.subplots()
# ax.hist(test_pair, bins=50, alpha=0.5, range=(-1, 1), label='test pair')
# ax.hist(x_n_unknown, bins=50, alpha=0.5, range=(-1, 1), label='unknown pair')
# ax.set_title('test pair and unknown pair')
# ax.set_xlabel('cos similarity')
# plt.legend(loc='upper left')
# date = datetime.now().strftime("%Y%m%d_%H%M%S")
# outfile = 'cos_similarity_test_and_unknown_divided_{}.png'.format(date)
# plt.savefig(outfile)
# plt.show()

In [None]:
# Original を Train, Test, Validation, Unknown に分割
# Originalと各フォルダを準備し、Unknownのindexを決める
import pandas as pd
import numpy as np
import shutil
import os

from utils.function import make_datapath_list


original_dir = './data/Original/'
save_dir_path = './data/unknown_1_to_4/'

if os.path.exists(original_dir):
    # originalのファイルパスのリストを作成
    original_list = make_datapath_list(original_dir)
    
    # original.csv読み込み
    original_csv_path = './data/Original.csv'
    original_df = pd.read_csv(original_csv_path)

    
    # train, test, validationのindexを取得
    index = np.arange(len(original_list))
    
    # unknownのtarget_index
#     unknown_target_index = [i for i in index if i>=0 and i<120]
#     unknown_target_index = [i for i in index if i>=120 and i<240]
#     unknown_target_index = [i for i in index if i>=240 and i<360]
#     unknown_target_index = [i for i in index if i>=360 and i<480]
#     unknown_target_index = [i for i in index if i>=480 and i<600]
    unknown_target_index = [i for i in index if i>=0 and i<240]
    print(unknown_target_index)
    print(len(unknown_target_index))
    
    # known_indexを取得
    known_index_mask = np.ones(len(original_list), dtype=bool)
    known_index_mask[unknown_target_index] = False
    known_index = index[known_index_mask]
    print(known_index)
    print(len(known_index))
    
    # unknown_indexを取得
    unknown_index_mask = np.zeros(len(original_list), dtype=bool)
    unknown_index_mask[unknown_target_index] = True
    unknown_index = index[unknown_index_mask]
    print(unknown_index)
    print(len(unknown_index))
    
    # testのtarget_index
    test_target_index = [i for i in known_index if i % 5 == 1]
    print(test_target_index)
    print(len(test_target_index))
        
    # validationのtarget_index
    validation_target_index = [i for i in known_index if i % 5 == 3]
    print(validation_target_index)
    print(len(validation_target_index))
    
    # train_indexを取得
    train_index_mask = np.ones(len(original_list), dtype=bool)
    train_index_mask[unknown_target_index] = False
    train_index_mask[test_target_index] = False
    train_index_mask[validation_target_index] = False
    train_index = index[train_index_mask]
    print(train_index)
    print(len(train_index))

    # test_indexを取得
    test_index_mask = np.zeros(len(original_list), dtype=bool)
    test_index_mask[test_target_index] = True
    test_index = index[test_index_mask]
    print(test_index)
    print(len(test_index))
    
    # validation_indexを取得
    validation_index_mask = np.zeros(len(original_list), dtype=bool)
    validation_index_mask[validation_target_index] = True
    validation_index = index[validation_index_mask]
    print(validation_index)
    print(len(validation_index))
    

    # Train.csvの作成
    train_df = original_df.iloc[train_index, :]
    train_df.to_csv(os.path.join(save_dir_path, "Train.csv"), index=False)

    # Test.csvの作成
    test_df = original_df.iloc[test_index, :]
    test_df.to_csv(os.path.join(save_dir_path, "Test.csv"), index=False)
    
    # Validation.csvの作成
    validation_df = original_df.iloc[validation_index, :]
    validation_df.to_csv(os.path.join(save_dir_path, "Validation.csv"), index=False)
    
    # Unknown.csvの作成
    unknown_df = original_df.iloc[unknown_index, :]
    unknown_df.to_csv(os.path.join(save_dir_path, "Unknown.csv"), index=False)

    # Trainフォルダに移動
    for video_id in train_df["video_id"]:
        path = original_dir + str(video_id) + '/'
        shutil.move(path, os.path.join(save_dir_path, "Train"))

    # Testフォルダに移動
    for video_id in test_df["video_id"]:
        path = original_dir + str(video_id) + '/'
        shutil.move(path, os.path.join(save_dir_path, "Test"))
    
    # Validationフォルダに移動
    for video_id in validation_df["video_id"]:
        path = original_dir + str(video_id) + '/'
        shutil.move(path, os.path.join(save_dir_path, "Validation"))
        
    # Unknownフォルダに移動
    for video_id in unknown_df["video_id"]:
        path = original_dir + str(video_id) + '/'
        shutil.move(path, os.path.join(save_dir_path, "Unknown"))

In [None]:
# label_id を修正 train, test, validation
# range を調整
import pandas as pd
import os

original_dir = './data/unknown_1_to_4/'

original_train_csv_path = os.path.join(original_dir, "Train.csv")
original_train_df = pd.read_csv(original_train_csv_path)

original_test_csv_path = os.path.join(original_dir, "Test.csv")
original_test_df = pd.read_csv(original_test_csv_path)

original_validation_csv_path = os.path.join(original_dir, "Validation.csv")
original_validation_df = pd.read_csv(original_validation_csv_path)

transformed_train_df = original_train_df
transformed_test_df = original_test_df
transformed_validation_df = original_validation_df

for i in range(0, 24): # (0, 32), (8, 32), (16, 32), (24, 32), None
#     transformed_train_df = transformed_train_df.replace({'label_id': {i+8: i}})
#     transformed_test_df = transformed_test_df.replace({'label_id': {i+8: i}})
#     transformed_validation_df = transformed_validation_df.replace({'label_id': {i+8: i}})
    transformed_train_df = transformed_train_df.replace({'label_id': {i+16: i}})
    transformed_test_df = transformed_test_df.replace({'label_id': {i+16: i}})
    transformed_validation_df = transformed_validation_df.replace({'label_id': {i+16: i}})

os.rename(os.path.join(original_dir, "Train.csv"), os.path.join(original_dir, "Train_default.csv")) 
transformed_train_df.to_csv(os.path.join(original_dir, "Train.csv"), index=False)
os.rename(os.path.join(original_dir, "Test.csv"), os.path.join(original_dir, "Test_default.csv")) 
transformed_test_df.to_csv(os.path.join(original_dir, "Test.csv"), index=False)
os.rename(os.path.join(original_dir, "Validation.csv"), os.path.join(original_dir, "Validation_default.csv")) 
transformed_validation_df.to_csv(os.path.join(original_dir, "Validation.csv"), index=False)

In [None]:
# label_id を修正 unknown
# range を調整
import pandas as pd

original_dir = './data/unknown_1_to_4/'

original_unknown_csv_path = os.path.join(original_dir, "Unknown.csv")
original_unknown_df = pd.read_csv(original_unknown_csv_path)
transformed_unknown_df = original_unknown_df

for i in range(0, 16):
    transformed_unknown_df = transformed_unknown_df.replace({'label_id': {i+0: i}}) # 0, 8, 16, 24, 32

os.rename(os.path.join(original_dir, "Unknown.csv"), os.path.join(original_dir, "Unknown_default.csv")) 
transformed_unknown_df.to_csv(os.path.join(original_dir, "Unknown.csv"), index=False)