In [61]:
import numpy as np
import json
import os
import torch
import torchvision
import matplotlib.pyplot as plt
from torch import nn, optim
from torch.autograd.variable import Variable

In [62]:
def pose_to_numpy(frame):
    people = [person[1:] for person in frame]
    return np.array([[body_part[1] for body_part in person] for person in people])

class LetsDanceDataset(torch.utils.data.Dataset):
    categories_hash = {'tango': 0, 'break': 1, 'swing': 2,'quickstep': 3,
                  'foxtrot': 4,'pasodoble': 5,'tap': 6,'samba': 7,'flamenco': 8,
                  'ballet': 9,'rumba': 10,'waltz': 11,'cha': 12,'latin': 13,
                  'square': 14,'jive': 15}
    
    def __init__(self, root_dir, frames):
        super().__init__()

        # frames_index is [category, id, frame_no]
        self.root_dir = root_dir
        self.data = frames_index
        f.close()
        
    def __len__(self):
        return len(self.data)
    
    def getitem_metadata(self, index):
        return self.data[index]
    
    def __getitem__(self, index):
        [category, dance_id, frames] = self.data[index]
        motion = []
        current_frame_path = "{}{}/{}.json".format(self.root_dir, category, dance_id)
        with open(current_frame_path) as f:
            motion = json.load(f)
            # for now, just return person 1 on the first frame, 
            # just to see if it generates any stick figures
            motion = np.array([pose_to_numpy(frame)[0].reshape(34) for frame in motion if len(frame) > 0])
        return (motion, self.categories_hash[category])

In [63]:
with open('./dance-frame-list.json') as f:
    frames_index = json.load(f)
    np.random.shuffle(frames_index)
    
dataloader = LetsDanceDataset("../densepose/full/", frames_index)

In [64]:
def draw_points(points):
    plt.xlim(0, 1980)
    plt.ylim(-1000, 0)
    reshaped = points.reshape(17, 2)
    x = reshaped[:,0]
    y = reshaped[:,1]
    plt.scatter(x, -y, s=10, marker='.',)
    plt.show()

In [65]:
from matplotlib import animation
%matplotlib notebook

motion = dataloader[1000][0]

fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal', autoscale_on=False,
                     xlim=(0,1920), ylim=(-1080, 0))

motion_points = motion
reshaped = motion_points[0].reshape(17, 2)
x = reshaped[:,0]
y = reshaped[:,1]
body_points, = ax.plot(x, -y, '.')

def animate(frame):
    global body_points, motion_points
    reshaped = motion_points[frame].reshape(17, 2)
    x = reshaped[:,0]
    y = reshaped[:,1]
    body_points.set_data(x, -y)
    return body_points, 

anim = animation.FuncAnimation(fig, animate, frames=len(motion), interval=50)
plt.show()

<IPython.core.display.Javascript object>

In [66]:
dataloader[100][0][0:30]

array([[ 588.65057373,  314.55380249,  588.65057373, ..., 1032.5032959 ,
         675.5357666 , 1030.50341797],
       [ 591.91925049,  313.60333252,  591.91925049, ..., 1022.50762939,
         663.81628418,  974.51409912],
       [ 588.64318848,  314.63897705,  588.64318848, ..., 1029.5090332 ,
         671.58813477, 1004.5135498 ],
       ...,
       [ 711.7409668 ,  269.81677246,  734.71063232, ...,  986.20007324,
         660.8081665 , 1046.1484375 ],
       [  92.19258118,    3.5       ,  121.16827393, ..., 1075.5       ,
         300.01824951, 1075.5       ],
       [ 102.39691162,    3.49861503,  134.36473083, ..., 1067.07751465,
         299.19876099, 1026.09375   ]])

In [67]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        n_features = 30 # 30 frames of noise?
        n_out = 34 * 30 # 17 points (* 2 for x and y) * 30 frames
 
        self.hidden0 = nn.Sequential(
            nn.Linear(n_features, 256),
            nn.LeakyReLU(0.2)
        )
        self.hidden1 = nn.Sequential(            
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2)
        )
        
        self.out = nn.Sequential(
            nn.Linear(1024, n_out),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.out(x)
        return x
        

In [68]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        n_features = 34 * 30 
        n_out = 1
        
        self.hidden0 = nn.Sequential( 
            nn.Linear(n_features, 1024),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.hidden1 = nn.Sequential(
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.4)
        )
        self.out = nn.Sequential(
            torch.nn.Linear(256, n_out),
            torch.nn.Sigmoid()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.out(x)
        return x

In [69]:
def noise(size):
    n = Variable(torch.randn(size, 30))
    if torch.cuda.is_available(): return n.cuda() 
    return n

In [106]:
discriminator = Discriminator()
generator = Generator()
if torch.cuda.is_available():
    discriminator.cuda()
    generator.cuda()
    
# Optimizers
d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002)
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002)

# Loss function
loss = nn.CrossEntropyLoss()

# Number of steps to apply to the discriminator
d_steps = 1  # In Goodfellow et. al 2014 this variable is assigned to 1
# Number of epochs
num_epochs = 200

In [107]:
def real_data_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    if torch.cuda.is_available(): return data.cuda()
    return data

def fake_data_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    if torch.cuda.is_available(): return data.cuda()
    return data

In [111]:
def train_discriminator(optimizer, real_data, fake_data):
    # Reset gradients
    optimizer.zero_grad()
    
    # 1.1 Train on Real Data
    prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    
    error_real = loss(prediction_real, 1)
    error_real.backward()

    # 1.2 Train on Fake Data
    prediction_fake = discriminator(fake_data)

    # Calculate error and backpropagate
    error_fake = loss(prediction_fake, 0)
    error_fake.backward()
    
    # 1.3 Update weights with gradients
    optimizer.step()
    
    # Return error
    return error_real + error_fake, prediction_real, prediction_fake

def train_generator(optimizer, fake_data):
    # 2. Train Generator
    # Reset gradients
    optimizer.zero_grad()
    # Sample noise and generate fake data
    prediction = discriminator(fake_data)
    # Calculate error and backpropagate
    error = loss(prediction, real_data_target(prediction.size(0)))
    error.backward()
    # Update weights with gradients
    optimizer.step()
    # Return error
    return error

In [112]:
num_test_samples = 16
num_batches = len(dataloader)
test_noise = noise(num_test_samples)

In [113]:
for epoch in range(num_epochs):
    for n_batch, (motion, label) in enumerate(dataloader):
        real_data = Variable(torch.FloatTensor(motion[0:30].flatten()))
        if torch.cuda.is_available(): real_data = real_data.cuda()
        # Generate fake data
        fake_data = generator(noise(real_data.size(0))).detach()
        # Train D
        d_error, d_pred_real, d_pred_fake = train_discriminator(d_optimizer,
                                                                real_data, fake_data)
        
        # 2. Train Generator
        # Generate fake data
        fake_data = generator(noise(real_data.size(0)))
        # Train G
        g_error = train_generator(g_optimizer, fake_data)
        print('Epoch {} Error D {} G {}. [{}/{}]'.format(epoch, d_error, g_error, n_batch, num_batches))
        

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [336]:
dataloader[0][0].size

34

In [377]:
generator(noise(30))

tensor([[ 1.0000, -1.0000,  1.0000,  ...,  1.0000,  1.0000,  1.0000],
        [ 1.0000, -1.0000,  1.0000,  ...,  1.0000,  1.0000,  1.0000],
        [ 1.0000, -1.0000,  1.0000,  ...,  1.0000,  1.0000,  1.0000],
        ...,
        [ 1.0000, -1.0000,  1.0000,  ...,  1.0000,  1.0000,  1.0000],
        [ 1.0000, -1.0000,  1.0000,  ...,  1.0000,  1.0000,  1.0000],
        [ 1.0000, -1.0000,  1.0000,  ...,  1.0000,  1.0000,  1.0000]],
       grad_fn=<TanhBackward>)

In [401]:
discriminator(generator(noise(3)))

tensor([[0.5130],
        [0.5087],
        [0.5183]], grad_fn=<SigmoidBackward>)

In [381]:
dataloader[1]

(array([378.39746094, 295.95141602, 386.38766479, 285.96206665,
        364.4145813 , 285.96206665, 387.38644409, 294.95248413,
        333.45254517, 294.95248413, 397.37417603, 351.89172363,
        339.44519043, 352.89065552, 402.36807251, 426.8117981 ,
        422.34359741, 386.85443115, 434.32888794, 487.74676514,
        507.23953247, 401.83843994, 391.38153076, 531.69989014,
        331.45498657, 523.70837402, 428.33624268, 640.5836792 ,
        294.50027466, 650.57305908, 401.36929321, 717.50164795,
        287.5088501 , 717.50164795]),
 7)