In [3]:
import numpy as np
import json
import os
import torch
import torchvision
import matplotlib.pyplot as plt
from torch import nn, optim
from torch.autograd.variable import Variable

In [127]:
np.random.seed(0)
motion[:250]
NUM_BODY_PARTS = 13
TOTAL_FRAMES = 250
# We have 250 frames. We are going to going to take the 17 body parts, 
# and turn it into 13 (remove eyes and ears). Then 13x2 (13 body parts, 2 vectors), gets shaped to
# 26. We then take that 26, and convert it into a 250 x 26, each frame shows a body part.
def from_motion_to_numpy_vector(motion):
    # For now, we only take the first person. Later we can maybe try to feed in all people, or do batches of two
    motion_vector = np.zeros((250, NUM_BODY_PARTS * 2))
    if len(motion) != 250:
        print("We need 250 frames.")
    for i, frame in enumerate(motion):
        if len(frame) > 0 and i < TOTAL_FRAMES:
            current_frame_data = frame
            person0 = current_frame_data[0][1:]
            current_frame_vector = np.zeros((NUM_BODY_PARTS, 2))
            current_body_part_idx = 0
            for body_part_data in person0:
                body_part = body_part_data[0]
                if body_part not in ['left_eye', 'left_ear', 'right_eye', 'right_ear']:
                    current_frame_vector[current_body_part_idx] = body_part_data[1]
                    current_body_part_idx = current_body_part_idx + 1
            motion_vector[i] = current_frame_vector.reshape(NUM_BODY_PARTS * 2)
    return motion_vector

def from_numpy_vector_to_motion_coordinates(motion_vector):
    # Reshape so each element in array is an a NUM_BODY_PARTS x 2 array that has coordinates
    return motion_vector.reshape(TOTAL_FRAMES, NUM_BODY_PARTS, 2)

class LetsDanceDataset(torch.utils.data.Dataset):
    categories_hash = {'tango': 0, 'break': 1, 'swing': 2,'quickstep': 3,
                  'foxtrot': 4,'pasodoble': 5,'tap': 6,'samba': 7,'flamenco': 8,
                  'ballet': 9,'rumba': 10,'waltz': 11,'cha': 12,'latin': 13,
                  'square': 14,'jive': 15}
    
    def __init__(self):
        super().__init__()

        # For this first test, we are just using Latin dances
        with open('./dance-frame-list.json') as f:
            frames_index = json.load(f)
                    
        latin_dances = list(filter(lambda dance: dance[0] == 'latin' and dance[2] >= TOTAL_FRAMES, frames_index))
        
        self.data = np.zeros((len(latin_dances), TOTAL_FRAMES, NUM_BODY_PARTS * 2))
        self.metadata = latin_dances
        
        for i, dance in enumerate(latin_dances):
            [category, dance_id, frames] = dance
            current_frame_path = "{}{}/{}.json".format(self.root_dir, category, dance_id)
            with open(current_frame_path) as f:
                motion = json.load(f)
            self.data[i] = from_motion_to_numpy_vector(motion)
            
        f.close()
        
    def __len__(self):
        return len(self.data)
    
    def getitem_metadata(self, index):
        return self.metadata[index]
    
    def __getitem__(self, index):
        return self.data[index]

# Examples of the data

In [128]:
current_frame_path = "../densepose/full/latin/QmL0QYsctV0_030.json"
with open(current_frame_path) as f:
    motion = json.load(f)
        
# for now, just return person 1 on the first frame, 
# just to see if it generates any stick figures
# motion = np.array([pose_to_numpy(frame)[0].reshape(34) for frame in motion if len(frame) > 0])

In [129]:
person0 = motion[0]
person0
# Bounding box idea
# try to clean data

[['person0',
  ['nose', [1051.7425537109375, 74.26245880126953]],
  ['left_eye', [1049.74853515625, 56.2840690612793]],
  ['right_eye', [1045.760498046875, 56.2840690612793]],
  ['left_ear', [1007.874755859375, 54.286468505859375]],
  ['right_ear', [1011.8627319335938, 53.28767013549805]],
  ['left_shoulder', [1033.796630859375, 122.204833984375]],
  ['right_shoulder', [979.9589233398438, 107.22283935546875]],
  ['left_elbow', [1061.7125244140625, 203.1075897216797]],
  ['right_elbow', [1025.8206787109375, 195.1171875]],
  ['left_wrist', [1098.601318359375, 213.0955810546875]],
  ['right_wrist', [1099.5982666015625, 209.1003875732422]],
  ['left_hip', [1032.7996826171875, 318.96832275390625]],
  ['right_hip', [992.9197998046875, 317.9695129394531]],
  ['left_knee', [1087.6343994140625, 465.7918395996094]],
  ['right_knee', [1057.7244873046875, 466.7906494140625]],
  ['left_ankle', [1047.7545166015625, 649.5709228515625]],
  ['right_ankle', [1022.8296508789062, 663.5541381835938]]],
 ['

In [123]:
a = np.zeros((12, 2))
a[0] = [1,2]
a.reshape(24)

array([1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0.])

# Running

In [124]:
def draw_points(points):
    plt.xlim(0, 1980)
    plt.ylim(-1000, 0)
    reshaped = points.reshape(NUM_BODY_PARTS, 2)
    x = reshaped[:,0]
    y = reshaped[:,1]
    plt.scatter(x, -y, s=10, marker='.',)
    plt.show()

In [125]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [130]:
dataloader = LetsDanceDataset()
dataloader[0]

TypeError: 'tuple' object cannot be interpreted as an integer

In [91]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden