In [1]:
from lib.core.config import config as Config

with Config:
    Config.OUTPUT_DIR = ''
    Config.LOG_DIR = ''
    Config.DATA_DIR = ''
    Config.GPUS = '0'
    Config.WORKERS = 0
    Config.PRINT_FREQ = 20

    # Cudnn related params
    Config.CUDNN.BENCHMARK = True
    Config.CUDNN.DETERMINISTIC = False
    Config.CUDNN.ENABLED = True
    
    # common params for NETWORK
    Config.MODEL.NAME = 'pose_resnet'
    Config.MODEL.INIT_WEIGHTS = True
    Config.MODEL.PRETRAINED = 'pretrained_weight\pose_resnet_50_384x288.pth.tar'
    Config.MODEL.NUM_JOINTS = 17 #17 for coco, 16 for mpii
    Config.MODEL.IMAGE_SIZE = [256,256]  # width * height, ex: 192 * 256
    Config.MODEL.EXTRA.NUM_LAYERS = 50
    Config.MODEL.EXTRA.DECONV_WITH_BIAS = False
    Config.MODEL.EXTRA.NUM_DECONV_LAYERS = 3
    Config.MODEL.EXTRA.NUM_DECONV_FILTERS = [256, 256, 256]
    Config.MODEL.EXTRA.NUM_DECONV_KERNELS = [4, 4, 4]
    Config.MODEL.EXTRA.FINAL_CONV_KERNEL = 1
    Config.MODEL.EXTRA.TARGET_TYPE = 'gaussian'
    Config.MODEL.EXTRA.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
    Config.MODEL.EXTRA.SIGMA = 2

    Config.MODEL.STYLE = 'pytorch' #caffe or pytorch

    Config.LOSS.USE_TARGET_WEIGHT = True

    # DATASET related params
    Config.DATASET.ROOT = r'D:\Datasets\coco2017'
    Config.DATASET.DATASET = 'coco' # mpii or coco
    Config.DATASET.TRAIN_SET = 'train'
    Config.DATASET.TEST_SET = 'validation'
    Config.DATASET.DATA_FORMAT = 'jpg'
    Config.DATASET.HYBRID_JOINTS_TYPE = ''
    Config.DATASET.SELECT_DATA = True

    # training data augmentation
    Config.DATASET.FLIP = True
    Config.DATASET.SCALE_FACTOR = 0.25
    Config.DATASET.ROT_FACTOR = 30

    # train
    Config.TRAIN.LR_FACTOR = 0.1
    Config.TRAIN.LR_STEP = [90, 110]
    Config.TRAIN.LR = 0.001

    Config.TRAIN.OPTIMIZER = 'adam'
    Config.TRAIN.MOMENTUM = 0.9
    Config.TRAIN.WD = 0.0001
    Config.TRAIN.NESTEROV = False
    Config.TRAIN.GAMMA1 = 0.99
    Config.TRAIN.GAMMA2 = 0.0

    Config.TRAIN.BEGIN_EPOCH = 0
    Config.TRAIN.END_EPOCH = 140

    Config.TRAIN.RESUME = False
    Config.TRAIN.CHECKPOINT = ''

    Config.TRAIN.BATCH_SIZE = 2
    Config.TRAIN.SHUFFLE = True

    # testing
    # size of images for each device
    Config.TEST.BATCH_SIZE = 2
    # Test Model Epoch
    Config.TEST.FLIP_TEST = True
    Config.TEST.POST_PROCESS = True
    Config.TEST.SHIFT_HEATMAP = True

    Config.TEST.USE_GT_BBOX = True
    # nms
    Config.TEST.OKS_THRE = 0.5
    Config.TEST.IN_VIS_THRE = 0.0
    Config.TEST.COCO_BBOX_FILE = ''
    Config.TEST.BBOX_THRE = 1.0
    Config.TEST.MODEL_FILE = ''
    Config.TEST.IMAGE_THRE = 0.0
    Config.TEST.NMS_THRE = 1.0

    # debug
    Config.DEBUG.DEBUG = False
    Config.DEBUG.SAVE_BATCH_IMAGES_GT = False
    Config.DEBUG.SAVE_BATCH_IMAGES_PRED = False
    Config.DEBUG.SAVE_HEATMAPS_GT = False
    Config.DEBUG.SAVE_HEATMAPS_PRED = False

In [2]:
# %run pose_estimation/train.py

In [3]:
import cv2
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import pathlib
import torch

class TestDataset(Dataset):
    def __init__(self, root, image_size):
        self.db = list(pathlib.Path(root).rglob('*.jpg'))
        self.image_size = image_size #(W,H)
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])

    def __len__(self,):
        return len(self.db)

    def __getitem__(self, idx):
        image_file = self.db[idx].as_posix()
        image = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if (image.shape[0] != self.image_size[1]) or (image.shape[1] != self.image_size[0]):
            image = cv2.resize(image,self.image_size)
        image = self.transform(image)
        return image, image_file

r=3/4
test_dataset = TestDataset(r'D:\Datasets\Golf\IdeasLab\validation',[int(1280*r),int(720*r)])
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=0,
    pin_memory=True)

In [4]:
from lib.models.pose_resnet import get_pose_net
from collections import OrderedDict

model_weight_path = r'D:\Portfolio\Golfer_Motion_Tracking\DL_approach\human-pose-estimation\coco\pose_resnet_50\model_best.pth.tar'

model = get_pose_net(Config, is_train=False)
state_dict = OrderedDict()
for k,v in torch.load(model_weight_path).items():
    state_dict[k.replace('module.','')] = v

model.load_state_dict(state_dict)
model = model.eval()

In [5]:
import numpy as np
import math
from lib.utils.transforms import flip_back

def get_coor(batch_heatmaps):
    batch_size = batch_heatmaps.shape[0]
    num_joints = batch_heatmaps.shape[1]
    width = batch_heatmaps.shape[3]
    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
    idx = np.argmax(heatmaps_reshaped, 2)
    maxvals = np.amax(heatmaps_reshaped, 2)
    maxvals = maxvals.reshape((batch_size, num_joints, 1))
    idx = idx.reshape((batch_size, num_joints, 1))
    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
    preds[:, :, 0] = (preds[:, :, 0]) % width
    preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
    pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
    pred_mask = pred_mask.astype(np.float32)
    preds *= pred_mask
    
    heatmap_height = batch_heatmaps.shape[2]
    heatmap_width = batch_heatmaps.shape[3]
    for n in range(batch_size):
        for p in range(num_joints):
            hm = batch_heatmaps[n][p]
            px = int(math.floor(preds[n][p][0] + 0.5))
            py = int(math.floor(preds[n][p][1] + 0.5))
            if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
                diff = np.array([hm[py][px+1] - hm[py][px-1],hm[py+1][px]-hm[py-1][px]])
                preds[n][p] += np.sign(diff) * .25
    
    return preds, maxvals.squeeze(axis=-1)


def predict(model,images,axi_flip=True):
    with torch.no_grad():
        if axi_flip:
            flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
            flip_images = np.flip(images.numpy(), 3)
            flip_images = torch.from_numpy(flip_images.copy())
            flip_heatmaps = model(flip_images).numpy()
            flip_heatmaps = flip_back(flip_heatmaps,flip_pairs)
            flip_heatmaps[:, :, :, 1:] = flip_heatmaps[:, :, :, 0:-1]
            
            heatmaps = model(images).numpy()
            heatmaps = (heatmaps + flip_heatmaps)/2
        else:
            heatmaps = model(images).numpy()
        
    joints_coor, joints_score = get_coor(heatmaps)
    return heatmaps, joints_coor, joints_score

In [6]:
def coor_transform(coor, heatmaps, source_images):
    heatmaps_shape = np.array(heatmaps.shape[2:][::-1])
    source_images_shape = np.array([s.shape[:2][::-1] for s in source_images])[:,None,:]
    coor = coor/heatmaps_shape * source_images_shape
    coor = np.round(coor).astype(int)
    return coor

def heatmaps_resize(heatmaps, source_images):
    return [cv2.resize(h,i.shape[:2][::-1]).transpose([2,0,1]) for h,i in zip(heatmaps.transpose([0,2,3,1]),source_images)]

In [7]:
import io

def save_visualize_result(source_path,source_image,joints_coor,heatmaps):
    joints_name = {
        0: "nose",
        1: "left_eye",
        2: "right_eye",
        3: "left_ear",
        4: "right_ear",
        5: "left_shoulder",
        6: "right_shoulder",
        7: "left_elbow",
        8: "right_elbow",
        9: "left_wrist",
        10: "right_wrist",
        11: "left_hip",
        12: "right_hip",
        13: "left_knee",
        14: "right_knee",
        15: "left_ankle",
        16: "right_ankle"}
    image_copy = source_image.copy()
    for j_idx, j_coor in enumerate(joints_coor):
        cv2.circle(image_copy,j_coor,3,(255,0,0),-1)
        cv2.putText(image_copy, joints_name[j_idx], j_coor, cv2.FONT_HERSHEY_DUPLEX, 0.4, (255, 0, 0))
    fig = plt.figure(figsize=(128,72),dpi=15)
    plt.imshow(image_copy)
    if heatmaps is not None:
        plt.imshow(np.clip(heatmaps.max(axis=0),0,1)*255,alpha=0.5)
    plt.axis(False)
    plt.tight_layout()
    
    buf = io.BytesIO()
    plt.savefig(buf, format='raw')
    plt.close()
    buf.seek(0)
    save_fig = np.reshape(np.frombuffer(buf.getvalue(), dtype=np.uint8),newshape=(int(fig.bbox.bounds[3]), int(fig.bbox.bounds[2]), -1))
    buf.close()
    
    source_path = pathlib.Path(source_path)
    save_path = pathlib.Path('GolferPrediction',source_path.parent.parent.name)
    save_path.mkdir(parents=True,exist_ok=True)
    plt.imsave(save_path.joinpath(source_path.name),save_fig)
    # plt.show()

In [9]:
import matplotlib.pyplot as plt

for batch_test_data in test_loader:
    input_images,source_image_paths = batch_test_data
    source_images = [plt.imread(f) for f in source_image_paths]
    heatmaps, joints_coor, joints_score = predict(model,input_images)
    joints_coor = coor_transform(joints_coor, heatmaps, source_images)
    heatmaps = heatmaps_resize(heatmaps, source_images)
    for i,p in enumerate(source_image_paths):
        save_visualize_result(p,source_images[i],joints_coor[i],heatmaps[i])