In [1]:
#!git clone https://github.com/facebookresearch/pytorchvideo.git
#!pip install fvcore
#!pip install albumentations
#!pip install albumentations.pytorch
#!pip install transformers

In [2]:
import torch
import json
import random
import numpy as np
import glob2
import cv2
import os
import math
import pandas as pd
from PIL import Image
from argparse import Namespace
from tqdm.auto import tqdm

import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, IterableDataset
import albumentations
import albumentations.pytorch
from transformers.optimization import AdamW, get_cosine_schedule_with_warmup
from transformers import set_seed
import pytorchvideo.models.hub as pyvideo
!nvidia-smi
os.environ["CUDA_VISIBLE_DEVICES"]= "1"





Tue Oct 19 10:17:47 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:18:00.0 Off |                    0 |
| N/A   47C    P0    38W / 250W |   1181MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE...  Off  | 00000000:AF:00.0 Off |                    0 |
| N/A   32C    P0    24W / 250W |      4MiB / 32510MiB |      0%      Default |
|       

In [3]:
random_seed = 42
set_seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)
os.environ["PYTHONHASHSEED"] = str(random_seed)

In [4]:
opt = {
    "batch_size": 4,
    "num_workers": 1,
    "lr": 5e-5,
    "max_epochs": 50,
    "warmup_ratio": 0.2,
    "print_step": 100,
    "save_path": "model_weights",
} 
args = Namespace(**opt)

In [5]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [6]:
class ActionBasicModule(nn.Module):
    def __init__(self, device="cpu", net=None):
        super().__init__()
        self.device = device
        self.model = net
        self.model.blocks[6].proj = nn.Linear(self.model.blocks[6].proj.in_features, 6, bias=True)
        #self.model = self.model.to(self.device)
        

    def forward(self, x, label=None, loss_mode="smoothin", smoothing=0.0):
        x = self.model(x)
        if label is not None:
            if loss_mode == "smoothing":
                lossFunc = LabelSmoothingLoss(6, smoothing=smoothing).to(self.device)
            else:
                lossFunc = nn.CrossEntropyLoss().to(self.device)
            label = label.to(self.device)    
            loss = lossFunc(x, label)
            return x, loss
        return x, _

In [7]:
device = "cuda"
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
num_frames = 64
sampling_rate = 2
frames_per_second = 30
slowfast_alpha = 4
num_clips = 10
num_crops = 3

class PackPathway(torch.nn.Module):
    """
    Transform for converting video frames as a list of tensors. 
    """
    def __init__(self):
        super().__init__()
        
    def forward(self, frames: torch.Tensor):
        fast_pathway = frames
        # Perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(
            frames,
            1,
            torch.linspace(
                0, frames.shape[1] - 1, frames.shape[1] // slowfast_alpha
            ).long(),
        )
        frame_list = [slow_pathway, fast_pathway]
        return frame_list


In [8]:
def make_circle(js, idx, img=None):
    x_list = []
    y_list = []
    color = []
    dat = js.get('sequence').get('2d_pos')[idx]
    bbox = js.get('sequence').get('bounding_box')[idx]
    x1, y1, x2, y2 = float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])
    for i in range(len(dat)):
        if i % 3 == 0:
            x_list.append(int(float(dat[i]) - x1))
        elif i % 3 == 1:
            y_list.append(int(float(dat[i]) - y1))
        else:
            if int(dat[i]) == 0:
                color.append((0, 0, 255))
            else:
                color.append((255, 0, 0))
    if img is None:
        img = np.zeros((int(y2-y1), int(x2-x1), 3), np.uint8) + 255
    for j in range(len(x_list)):
        img = cv2.circle(img, (x_list[j],y_list[j]), 2, color[j], 5)

    return img
        

In [9]:
class ActionDataset(Dataset):
    def __init__(self, file, interval=sampling_rate, max_len=num_frames, transform=None, train=True, mode="image"):
        super().__init__()
        self.file = file
        self.len = len(self.file)
        self.interval = interval
        self.max_len = max_len
        self.transform = transform
        self.train = train
        self.datalayer = PackPathway()
        self.mode = mode
    
    def __getitem__(self, idx):
        file = self.file[idx]
        imageFolder = sorted(glob2.glob(file + "/*.jpg"))
        folderName = file.split("/")[-1]
        jsonFile = file +  "/" + folderName + ".json"
        with open(jsonFile, "rb") as f:
            js = json.load(f)  

        
        label = None
        if "action" in js:
            label = js["action"] 
            if folderName == "file_33":
                print(label)
                label = 5
            label = torch.as_tensor(label, dtype=torch.long)

        vid = []
        for idx in range(len(js.get('sequence').get('2d_pos'))):
            img = make_circle(js, idx, img=None)
            vid.append(img)

        trainImages = []
        start = random.randint(0, len(imageFolder)-1-self.interval*self.max_len)
        for i in range(start, start+self.interval*self.max_len):
            if (i - start) % self.interval == 0:
                if self.mode == "image":
                    pil_image = Image.open(imageFolder[i])               
                    arr = np.array(pil_image)       
                else:
                    arr = vid[i]
                if self.transform:
                    augmented = self.transform(image=arr) 
                    image = augmented['image']
                trainImages.append(image)
        C, H, W = image.shape
        video = torch.stack(trainImages)
        video = self._add_padding(video, self.max_len)
        
        frames = self.datalayer(video.permute(1,0,2,3))

        return frames, label
        

    def __len__(self):
        return self.len

    def _add_padding(self, video, max_len):
        if video.shape[0] < max_len:
            T, C, H, W = video.shape
            pad = torch.zeros(max_len-T, C, H, W)
            video = torch.cat([video, pad], dim=0)
        else:
            video = video[:max_len]

        return video

In [10]:
class ActionTestDataset(Dataset):
    def __init__(self, file, interval=sampling_rate, max_len=num_frames, transform=None, train=True, mode="image"):
        super().__init__()
        self.file = file
        self.len = len(self.file)
        self.interval = interval
        self.max_len = max_len
        self.transform = transform
        self.train = train
        self.datalayer = PackPathway()
        self.mode = mode
    
    def __getitem__(self, idx):
        file = self.file[idx]
        imageFolder = sorted(glob2.glob(file + "/*.jpg"))
        folderName = file.split("/")[-1]
        jsonFile = file +  "/" + folderName + ".json"
        with open(jsonFile, "rb") as f:
            js = json.load(f)  

        label = None
        if "action" in js:
            label = js["action"] 
            label = torch.as_tensor(label, dtype=torch.long)

        vid = []
        for idx in range(len(js.get('sequence').get('2d_pos'))):
            img = make_circle(js, idx, img=None)
            vid.append(img)

        
        videos = []
        N = len(imageFolder)-1-self.interval*self.max_len
        startRange = range(0, N, int(N//1))
        for r in range(len(startRange)):
            start = startRange[r]
            trainImages = []
            for i in range(start, start+self.interval*self.max_len):
                if i % self.interval == 0:
                    if self.mode == "image":
                        pil_image = Image.open(imageFolder[i])               
                        arr = np.array(pil_image)       
                    else:
                        arr = vid[i]
                    if self.transform:
                        augmented = self.transform(image=arr) 
                        image = augmented['image']
                    trainImages.append(image)
            video = torch.stack(trainImages)
            video = self._add_padding(video, self.max_len)
            frames = self.datalayer(video.permute(1,0,2,3))
            videos.append(frames)
            #####
        #videos = torch.stack(videos)

        return videos, _
        

    def __len__(self):
        return self.len

    def _add_padding(self, video, max_len):
        if video.shape[0] < max_len:
            T, C, H, W = video.shape
            pad = torch.zeros(max_len-T, C, H, W)
            video = torch.cat([video, pad], dim=0)
        else:
            video = video[:max_len]

        return video

In [11]:
def prepare_accuracy(output, label):
    predict = torch.softmax(output, dim=-1).argmax(dim=-1).to(label.device)
    return (predict==label).sum(), len(label)


def inference_data(data, model):
    data_logit = []
    for i in range(len(data)):
        with torch.no_grad():
            sframes = []
            fframes = []
            sframes.append(data[i][0])
            fframes.append(data[i][1])
            x = [torch.stack(sframes), torch.stack(fframes)]
            x = [j.to(device)[...] for j in x]
            logit, _ = model(x, label=None)
            data_logit.append(logit)

    return torch.stack(data_logit)

In [12]:
if not os.path.exists("model_weights"):
    os.mkdir("model_weights")

In [14]:
def train_func(train_type="1", mode="image"):

    if train_type == "1" or train_type == "3":
        side_size = 224
    elif train_type == "2" or train_type == "4":
        side_size = 256

    if train_type == "1" or train_type == "2":
        path = "cropped_train/"
    elif train_type == "3" or train_type == "4":
        path = "cropped_train2/"
   
    if not os.path.exists(args.save_path): os.mkdir(args.save_path)
    videoFolder = sorted(glob2.glob(path + "*"))

    trainVideo = []
    validVideo = []
    validList = []
    for i in range(len(videoFolder)):
        if int(videoFolder[i].split("_")[-1]) in validList:
            validVideo.append(videoFolder[i])
        else:
            trainVideo.append(videoFolder[i])

    albumentations_traintransform = albumentations.Compose([
        albumentations.Resize(side_size , side_size), 
        albumentations.Normalize(mean, std),
        albumentations.pytorch.transforms.ToTensorV2()
    ])

    albumentations_transform = albumentations.Compose([
        albumentations.Resize(side_size , side_size), 
        albumentations.Normalize(mean, std),
        albumentations.pytorch.transforms.ToTensorV2()
    ])

    trainDataset = ActionDataset(trainVideo, transform=albumentations_traintransform, mode=mode)
    validDataset = ActionDataset(validVideo, transform=albumentations_transform, mode=mode)
    trainLoader = DataLoader(trainDataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True)
    validLoader = DataLoader(validDataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False)


    net = pyvideo.slowfast.slowfast_16x8_r101_50_50()
    modelPath = "SLOWFAST_16x8_R101_50_50.pyth"
    net.load_state_dict(torch.load(modelPath)["model_state"])

    device = "cuda"
    model = ActionBasicModule(device, net=net)
    model = model.to(device)
    x = trainDataset[0][0]
    x = [i.to(device)[None, ...] for i in x]
    out = model(x, label=None)
    print(out[0])

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(
            nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(
            nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                        lr=args.lr, correct_bias=False)

    train_len = len(trainLoader.dataset)
    print(f'data length {train_len}')
    num_train_steps = int(train_len / (args.batch_size * args.num_workers) * args.max_epochs)
    print(f'num_train_steps : {num_train_steps}')
    num_warmup_steps = int(num_train_steps * args.warmup_ratio)
    print(f'num_warmup_steps : {num_warmup_steps}')
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps)

    for epoch in range(args.max_epochs):
        total_loss = 0
        model.train()
        print("------------TRAIN------------")
        for i, d in enumerate(tqdm(trainLoader)):  
            data, label = d
            x = [i.to(device)[...] for i in data]
            optimizer.zero_grad()
            output, loss = model(x, label, loss_mode="smoothing")
            total_loss += loss 
            loss.backward()
            optimizer.step()
            scheduler.step()
            
            #if i % args.print_step == 0:
            #    print("step:", i)
            #    print("loss:{:.2f}".format(loss.item()))
        print("EPOCH:", epoch)
        print("train_loss:{:.6f}".format(total_loss/len(trainLoader)))   

        total_loss = 0
        total_answer = 0
        
        model.eval()

        torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_lastEpoch.pth"
        )
        if epoch == 1:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_21Epoch.pth"
                    )

        if epoch == 1:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_22Epoch.pth"
                    )

        if epoch == 1:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_23Epoch.pth"
                    )

        if epoch == 2:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_24Epoch.pth"
                    )

        if epoch == 3:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_25Epoch.pth"
                    )
        if epoch == 3:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_26Epoch.pth"
                    )
        if epoch == 3:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_27Epoch.pth"
                    )

        if epoch == 0:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_38Epoch.pth"
                    )

        if epoch == 0:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_39Epoch.pth"
                    )

        if epoch == 0:
            torch.save(
                    model.state_dict(),
                    args.save_path + f"/modeltype{train_type}_{mode}_49Epoch.pth"
                    )
        
        
    
    


In [15]:
train_func(train_type="2", mode="image")
train_func(train_type="4", mode="image")
train_func(train_type="4", mode="pose")

tensor([[ 0.0572, -0.1228,  0.0215, -0.0713, -0.0927, -0.2712]],
       device='cuda:0', grad_fn=<ViewBackward>)
data length 142
num_train_steps : 1775
num_warmup_steps : 355
------------TRAIN------------


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

0

EPOCH: 0
train_loss:1.793534
------------TRAIN------------


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

0

EPOCH: 1
train_loss:1.563009
------------TRAIN------------


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

0

EPOCH: 2
train_loss:1.169222
------------TRAIN------------


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

0


In [None]:
def test(train_type, mode, weight="25Epoch"):
    if train_type == "1" or train_type == "3":
        side_size = 224
    elif train_type == "2" or train_type == "4":
        side_size = 256

    if train_type == "1" or train_type == "2":
        path = "cropped_test/"
    elif train_type == "3" or train_type == "4":
        path = "cropped_test2/"

    albumentations_transform = albumentations.Compose([
        albumentations.Resize(side_size , side_size), 
        albumentations.Normalize(mean, std),
        albumentations.pytorch.transforms.ToTensorV2()
    ])

    testVideo = sorted(glob2.glob(path + "*"))
    testDataset = ActionTestDataset(testVideo, transform=albumentations_transform, mode=mode)
    testLoader = DataLoader(testDataset, batch_size=1, num_workers=args.num_workers, shuffle=False)

    net = pyvideo.slowfast.slowfast_16x8_r101_50_50()
    modelPath = "SLOWFAST_16x8_R101_50_50.pyth"
    net.load_state_dict(torch.load(modelPath)["model_state"])

    device = "cuda"
    model = ActionBasicModule(device, net=net)
    model = model.to(device)
    model.load_state_dict(torch.load(args.save_path + f"/modeltype{train_type}_{mode}_{weight}.pth"))
    model.eval()

    if not os.path.exists("submission"):
        os.mkdir("submission")
    print("------------TEST------------")    
    logits = torch.ones(len(testDataset), 6) * 0.000
    for i in tqdm(range(len(testDataset))):
        with torch.no_grad():
            data, _ = testDataset[i]
            logit = inference_data(data, model)
            prob = torch.softmax(logit, dim=-1)
            prob = torch.mean(prob, dim=0)
            ### Extreme 1
            index = prob.argmax(dim=-1)
            logits[i] = prob #1.
    logits =  logits.tolist()
    probability = np.array(logits)
    submission = pd.read_csv("sample_submission.csv")
    for i in range(6):
        submission[f'Label_{i}'] = probability[:, i]
    test_label = {}
    for i in range(len(submission)):
        f = submission.iloc[i]
        filename = f["file_path"]
        test_label[filename] = labels[int(filename.split("_")[-1])]

    submission.to_csv(f"submission/modeltype{train_type}_{mode}_{weight}.csv", index=False)

In [None]:
t = ["21Epoch", "22Epoch", "23Epoch", "24Epoch", "25Epoch", "26Epoch", "27Epoch", "38Epoch", "39Epoch", "49Epoch"]

In [None]:
for k in t:  
    if k in ["21Epoch", "22Epoch", "23Epoch", "24Epoch", "25Epoch", "26Epoch", "27Epoch"]:
        test("4", "image", k) 
    elif k in ["39Epoch"]:
        test("4", "pose", k)
        test("4", "image", k) 
    elif k in ["38Epoch", "49Epoch"]:
        test("2", "image", k)
        test("4", "image", k) 

    

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

------------TEST------------


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

In [None]:
subs = sorted(glob2.glob("submission/*"))

In [None]:
submission = pd.read_csv("sample_submission.csv")
for i in range(6):
    ans = pd.read_csv(subs[0])[f"Label_{i}"]
    for j in range(1, len(subs)):
        ans += pd.read_csv(subs[j])[f"Label_{i}"]
    submission[f'Label_{i}'] = list(ans/len(subs))
    
submission2 = pd.read_csv("sample_submission.csv")
new_prob = np.zeros((len(submission), 6))
for i in range(len(submission)):
    f = submission.iloc[i]
    prob = np.array([f["Label_0"], f["Label_1"], f["Label_2"], f["Label_3"], f["Label_4"], f["Label_5"]])
    index = prob.argmax(axis=-1)
    new_prob[i][index] = 1.

for i in range(6):
     submission2[f'Label_{i}'] = new_prob[:, i]

submission2.to_csv("final_result.csv", index=False)  

IndexError: list index out of range

Unnamed: 0,file_path,Label_0,Label_1,Label_2,Label_3,Label_4,Label_5
0,./test\file_142,0.001233,0.000438,0.893248,0.049957,0.000791,0.054333
1,./test\file_143,0.000716,7.6e-05,0.000376,5.9e-05,1.3e-05,0.99876
2,./test\file_144,2.2e-05,9e-06,0.00041,0.999492,2.6e-05,4.1e-05
3,./test\file_145,0.002842,0.003151,0.847881,0.038699,0.002136,0.10529
4,./test\file_146,0.96939,0.00012,0.000103,5.8e-05,0.030232,9.7e-05
5,./test\file_147,0.897046,2e-06,7.6e-05,5.7e-05,0.102809,1e-05
6,./test\file_148,0.938206,0.000323,0.000378,0.000285,0.060731,7.7e-05
7,./test\file_149,0.001302,4.3e-05,7e-06,4.9e-05,0.998593,6e-06
8,./test\file_150,7.2e-05,0.998786,5.7e-05,0.000293,0.000114,0.000678
9,./test\file_151,0.009668,0.006152,0.000996,0.000497,0.000303,0.982383
