In [None]:
import os
import json
import re
import numpy as np

class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
            np.int16, np.int32, np.int64, np.uint8,
            np.uint16, np.uint32, np.uint64)):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32, 
            np.float64)):
            return float(obj)
        elif isinstance(obj,(np.ndarray,)): #### This is the fix
            return obj.tolist()
        return json.JSONEncoder.default(self, obj) 
    
def save_to_json(dic,target_dir):
    dumped = json.dumps(dic, cls=NumpyEncoder)  
    file = open(target_dir, 'w')  
    json.dump(dumped, file)
    file.close()
    
def read_from_json(target_dir):
    f = open(target_dir,'r')
    data = json.load(f)
    data = json.loads(data)
    f.close()
    return data
def tryint(s):                       
    try:
        return int(s)
    except ValueError:
        return s
def str2int(v_str):                
    return [tryint(sub_str) for sub_str in re.split('([0-9]+)', v_str)]

def sort_humanly(v_list):    
    return sorted(v_list, key=str2int)

In [None]:
# the fuction needed for later procesing
def convert_single_pose(pose):
    dance_array = np.empty([2,17])
    for i in range(2):
        for j in range(17):
            dance_array[i][j]=pose[j][1][i]
    return dance_array

def convert_str_list(pose):
    pose_sequence=[]
    for index in sorted(pose.keys()):
        dance_array = np.empty([2,17])
        for i in range(2):
            for j in range(17):
                dance_array[i][j]=pose[index][j][1][i]
        #print(dance_array)
        pose_sequence.append(dance_array)
        #print(dance_array)
    return pose_sequence
    
def overlap_get(target,overlapping,target_len,cat):
    N,C,V=target.shape
    x=[]
    label=[]
    if N<target_len:
        return x,label
    number=(N-target_len)//overlapping
    for i in range(number+1):
        slice=target[i*overlapping:(i*overlapping+50)]
        x.append(slice)
        label.append(cat)
    return x,label

def pose_normalize(pose,img_size):
    print(img_size)
    norm_pose = np.empty_like(pose)
    flag = True
    if img_size == [1920,1080]:
        x=pose[:,0:1,:]/960 - 1
        y=pose[:,1::,:]/540 - 1
    elif img_size == [1280,720]:
        x=pose[:,0:1,:]/640 - 1
        y=pose[:,1::,:]/360 - 1
    elif img_size == [0,0]:
        x_max=np.max(pose[:,0:1,:])
        y_max=np.max(pose[:,1::,:])
        if x_max<=1280 and y_max<=720:
            x=pose[:,0:1,:]/640 - 1
            y=pose[:,1::,:]/360 - 1
        else:
            x=pose[:,0:1,:]/960 - 1
            y=pose[:,1::,:]/540 - 1
    else:
        w=img_size[0]/2
        h=img_size[1]/2
        x=pose[:,0:1,:]/w - 1
        y=pose[:,1::,:]/h - 1
    norm_pose[:,0:1,:]=x
    norm_pose[:,1::,:]=y
#     assert np.max(norm_pose) <= 1
#     assert np.min(norm_pose) >= -1
    if np.max(norm_pose) > 1 or np.min(norm_pose) < -1:
        flag = False
    return norm_pose,flag

In [None]:
# the category list 
txtList=['ballet', 'break', 'cha', 'flamenco', 'foxtrot', 'jive', 'latin', 'pasodoble', 'quickstep', 'rumba', 'samba', 'square', 'swing', 'tango', 'tap', 'waltz']

In [None]:
# for here you need to change the path to your own json file path
# also you need to change the save path
# this piece of code is time consuming
# for now the json file process the data frame by frame
for index in txtList:
    mainpath=os.path.join("/mnt/external4/densepose/txt",index)
    mainList= os.listdir(mainpath)
    mainList.sort()
    dance_dataset={}
    c_name=""
    c_person=0
    number=0
    deal_list=[]
    Flag=False
    for file_name in mainList:
        number+=1
        path=os.path.join(mainpath,file_name)
        with open(path, 'r') as f:
            data = json.load(f)
        (file,extension) = os.path.splitext(file_name)
        split=file.split("_")
        name=split[0]
        if name =="":
            name=split[1]
        num=split[-1]
        if c_name != name:
            dance_dataset[name]={}
            c_name=name
            c_person=0
            #Flag=
        dance_dataset[name][num]={}
        for i,person in enumerate(data):
            joint=person[1::]
            dance_dataset[name][num][str(i)]=joint
    json_file=index+".json"
    json_path=os.path.join("/mnt/external4/xuanchi/new_GIT",json_file)
    with open(json_path, 'w') as f:
        json.dump(dance_dataset, f)
    print(json_file+" is ok")

In [None]:
# load all the category to one dictionary
total={}
for index in txtList:
    json_file=index+".json"
    json_path=os.path.join("/mnt/external4/xuanchi/new_GIT",json_file)
    print(json_path)
    with open(json_path, 'r') as f:
        data = json.load(f)
    total[index]=data

In [None]:
# load the file for I merge different notebook 
# Here you just need 
dance_data = total
# with open("/mnt/external4/xuanchi/new_GIT/total.json", 'r') as f:
#     dance_data = json.load(f)

In [None]:
# here you need to change the path and the imgPath to the json files that record the img size
# for the dataset is very unclean and the img size of different video is different. They need the original rgb one to check the img size
# the code here spilt people in the same frame to several sequnce.
# sequence_data in the for loop contains the skeleton data of all the people in the video. Each one is a np array.

x=[]
target=[]
imgPath="/mnt/external4/xuanchi/GIT_Img"

for i,cat in enumerate(txtList):
    data_1=dance_data[cat]
    img = os.path.join(imgPath,cat+".json")
    with open(img, 'r') as f:
        img_data = json.load(f)
    for name in sorted(data_1.keys()): #每一个视频
        img_size = img_data[name]
        sequence_data = {}
        index = 0
        max_person = 0
        for frame in sorted(data_1[name].keys()):#每一帧
            dance=data_1[name][frame] #每一帧的舞蹈
            if index == 0:#第一帧
                print("First")
                max_person = len(dance.keys())
                for person in sort_humanly(dance.keys()):
                    sequence_data[person] = []
                    sequence_data[person].append(convert_single_pose(dance[person]))
                    index+=1
            else:#比较 和前一帧的距离
                complete = []
                #如果这一帧的人数没有上一帧人数多
                if len(dance.keys()) <= max_person:
                    #取每一个person出来
                    for person in sort_humanly(dance.keys()):
                        min_distance = 9999.99
                        keep = 0
                        pose_2 = convert_single_pose(dance[person])
                        #print("Key",sorted(sequence_data.keys()))
                        for j in range(max_person):
                            pose_1 = sequence_data[str(j)][-1]
                            delta = pose_1-pose_2
                            distance=(np.sum(abs(delta)))
                            if distance < min_distance:
                                if j not in complete:
                                    keep = j
                                    min_distance = distance
                        complete.append(keep)
                        sequence_data[str(keep)].append(pose_2)
                    index+=1
                elif len(dance.keys()) >max_person: #下一帧的人数更多
                    complete=[]
                    distance = np.empty((max_person,len(dance.keys())))
                    for row in range(max_person):
                        for col in range(len(dance.keys())):
                            pose_2 = convert_single_pose(dance[str(col)])
                            distance[row][col] = np.sum(abs(sequence_data[str(row)][-1]-pose_2))
                    indexs = np.argmin(distance,axis=1)
                    #print(distance)
                    #print(indexs)
                    haha=indexs.tolist()
                    if len(haha)!=len(set(haha)):
                        print("dump")
                        continue
                    for k,col in enumerate(indexs):
                        complete.append(col)
                        pose_2 = convert_single_pose(dance[str(col)])
                        sequence_data[str(k)].append(pose_2)
                            
#                     for person in range(len(dance.keys())):
#                         if person not in complete:
#                             person = str(person)
#                             sequence_data[person] = []
#                             sequence_data[person].append(convert_single_pose(dance[person]))
                            
                    #max_person = len(dance.keys())
#                   max_person = len(sequence_data.keys())
                    index+=1
        for single in sorted(sequence_data.keys()):
            n_array=np.array(sequence_data[single])
            n_array,flag=pose_normalize(n_array,img_size)
            print(n_array.shape)
            if flag==True:
                x.append(n_array)
                target.append(i)
                print(i)

In [None]:
# now we make the sequence all to be length = 50
# and we overlappingly get the sequence.
# the shape of dealed_x should be (74084, 50, 2, 17)
dealed_x=None
dealed_label=None
for i,tar in enumerate(x):
    cat=target[i]
    result,label=overlap_get(tar,25,50,cat)
    result=np.array(result)
    label=np.array(label)
    if result.shape==(0,):
        continue
    if i == 0:
        dealed_x=result
        dealed_label=label
    else:
        dealed_x=np.concatenate((dealed_x,result))
        dealed_label=np.concatenate((dealed_label,label))
    print(i)
print(dealed_x.shape)
print(dealed_label.shape)

In [None]:
#code to save the data
dict={}
dict["x"]=dealed_x
dict["label"]=dealed_label
save_to_json(dict,"/mnt/external4/xuanchi/new_GIT/norm_17.json")

In [None]:
# code to split the data to train and val (about 7:3)
import random
aa=[]
bb=[]
cc = list(zip(dealed_x,dealed_label))
random.shuffle(cc)
aa[:], bb[:] = zip(*cc)
print(len(aa), len(bb))
train_dict={}
train_x=np.array(aa[0:51858])
train_y=np.array(bb[0:51858])
print(train_x.shape)
print(train_y.shape)
train_dict["x"]=train_x
train_dict["label"]=train_y
save_to_json(train_dict,"/mnt/external4/xuanchi/new_GIT/norm_17_train.json")
valid_dict={}
valid_x=np.array(aa[51858::])
valid_y=np.array(bb[51858::])
print(valid_x.shape)
print(valid_y.shape)
valid_dict["x"]=valid_x
valid_dict["label"]=valid_y
save_to_json(valid_dict,"/mnt/external4/xuanchi/new_GIT/norm_17_test.json")