In [1]:
import numpy as np
import pandas as pd
import json
from pandas.io.json import json_normalize

## Get dimensions of each image

In [2]:
df = pd.DataFrame()

with open('annotations/person_keypoints_train2017.json') as json_data:
    data = json.load(json_data)
    
file_names = []
heights = []
widths = []
for i in data['images']:
    file_names.append(i['file_name'])
    heights.append(i['height'])
    widths.append(i['width'])
    
with open('annotations/person_keypoints_val2017.json') as json_data:
    data = json.load(json_data)
    
for i in data['images']:
    file_names.append(i['file_name'])
    heights.append(i['height'])
    widths.append(i['width'])
    
df['file_name'] = file_names
df['height'] = heights
df['width'] = widths
df

Unnamed: 0,file_name,height,width
0,000000391895.jpg,360,640
1,000000522418.jpg,480,640
2,000000184613.jpg,336,500
3,000000318219.jpg,640,556
4,000000554625.jpg,640,426
...,...,...,...
123282,000000512403.jpg,640,529
123283,000000168974.jpg,500,375
123284,000000552775.jpg,500,375
123285,000000394940.jpg,640,426


In [3]:
df.to_csv('img_info.csv',index=False)

## COCO bbox (train)

In [4]:
with open('annotations/person_keypoints_train2017.json') as json_data:
    data = json.load(json_data)

tempDf = pd.DataFrame(columns=['image_id','bbox'])
image_ids = []
for i in data['annotations']:
    image_ids.append(i['image_id'])
    
tempDf['image_id'] = image_ids

bboxes = []
for i in data['annotations']:
    bboxes.append(i['bbox'])
    
tempDf['bbox'] = bboxes
tempDf

Unnamed: 0,image_id,bbox
0,537548,"[267.03, 104.32, 229.19, 320]"
1,117891,"[206.77, 1.44, 433.23, 408.73]"
2,120021,"[276.12, 0.29, 61.18, 118.73]"
3,403255,"[355.87, 118.43, 30.84, 42.87]"
4,209468,"[178.38, 120.54, 114.59, 354.6]"
...,...,...
262460,128732,"[9, 284, 560, 141]"
262461,489186,"[298, 0, 341, 127]"
262462,390883,"[40, 104, 394, 43]"
262463,554743,"[275, 207, 153, 148]"


In [5]:
tempDf.to_csv('coco_bbox.csv', index=False)

## Add training labels

In [6]:
df = pd.read_csv('coco_bbox.csv')
df['image_file'] = df['image_id']
df['label_file'] = df['image_id']
df

Unnamed: 0,image_id,bbox,image_file,label_file
0,537548,"[267.03, 104.32, 229.19, 320]",537548,537548
1,117891,"[206.77, 1.44, 433.23, 408.73]",117891,117891
2,120021,"[276.12, 0.29, 61.18, 118.73]",120021,120021
3,403255,"[355.87, 118.43, 30.84, 42.87]",403255,403255
4,209468,"[178.38, 120.54, 114.59, 354.6]",209468,209468
...,...,...,...,...
262460,128732,"[9, 284, 560, 141]",128732,128732
262461,489186,"[298, 0, 341, 127]",489186,489186
262462,390883,"[40, 104, 394, 43]",390883,390883
262463,554743,"[275, 207, 153, 148]",554743,554743


In [7]:
df = df.astype({'image_file':'string'})
df = df.astype({'label_file':'string'})
df.dtypes

image_id       int64
bbox          object
image_file    string
label_file    string
dtype: object

In [8]:
image_files = df['image_file']
label_files = df['label_file']

for i in range(len(image_files)):
    s = image_files[i]
    n = len(s)
    zeros = '0' * (12-n)
    new_s = zeros + s + '.jpg'
    image_files[i] = new_s
    
for i in range(len(label_files)):
    s = label_files[i]
    n = len(s)
    zeros = '0' * (12-n)
    new_s = zeros + s + '.txt'
    label_files[i] = new_s
    
df['image_file'] = image_files
df['label_file'] = label_files
df

Unnamed: 0,image_id,bbox,image_file,label_file
0,537548,"[267.03, 104.32, 229.19, 320]",000000537548.jpg,000000537548.txt
1,117891,"[206.77, 1.44, 433.23, 408.73]",000000117891.jpg,000000117891.txt
2,120021,"[276.12, 0.29, 61.18, 118.73]",000000120021.jpg,000000120021.txt
3,403255,"[355.87, 118.43, 30.84, 42.87]",000000403255.jpg,000000403255.txt
4,209468,"[178.38, 120.54, 114.59, 354.6]",000000209468.jpg,000000209468.txt
...,...,...,...,...
262460,128732,"[9, 284, 560, 141]",000000128732.jpg,000000128732.txt
262461,489186,"[298, 0, 341, 127]",000000489186.jpg,000000489186.txt
262462,390883,"[40, 104, 394, 43]",000000390883.jpg,000000390883.txt
262463,554743,"[275, 207, 153, 148]",000000554743.jpg,000000554743.txt


In [9]:
df['to_put'] = df['bbox']
df = df.astype({'to_put':'string'})
df.dtypes

image_id       int64
bbox          object
image_file    string
label_file    string
to_put        string
dtype: object

In [None]:
img_info = pd.read_csv('img_info.csv', index_col='file_name')

boxes = df['to_put']
for i in range(len(boxes)):
    x = eval(boxes[i])
    img_width = img_info.loc[image_files[i]]['width']
    img_height = img_info.loc[image_files[i]]['height']
    box_width = x[2]
    box_height = x[3]
    box_startX = x[0]
    box_startY = x[1]
    box_midX = (box_startX + (box_startX + box_width)) / 2
    box_midY = (box_startY + (box_startY + box_height)) / 2
    
    box_width /= img_width
    box_midX /= img_width
    box_height /= img_height
    box_midY /= img_height
    
    x = [box_midX, box_midY, box_width, box_height]
    for j in range(len(x)):
        if x[j]<=0:
            x[j] = 0.000001
        if x[j]>1:
            x[j] = 1
    for j in x:
        if j<=0 or j>1:
            print(j)
    y = [str(z) for z in x]
    s = ' '.join(y)
    s = '0 ' + s
    boxes[i] = s

boxes

In [None]:
df['to_put'] = boxes
df

In [None]:
for index, row in df.iterrows():
    f = open('labels/' + row['label_file'],'a+')
    f.write(row['to_put']+'\n')
    f.close()

## Creating train.csv

In [None]:
full_df = pd.DataFrame()

img_files = list(set(df['image_file']))

full_df['image_file'] = img_files
full_df['label_file'] = full_df['image_file']

label_files = full_df['label_file']

for i in range(len(label_files)):
    s = label_files[i]
    new_s = s[:-3] + 'txt'
    label_files[i] = new_s

full_df['label_file'] = label_files
full_df

In [None]:
full_df.to_csv('train.csv', index=False)

## COCO bbox (test)

In [None]:
with open('annotations/person_keypoints_val2017.json') as json_data:
    data = json.load(json_data)

tempDf = pd.DataFrame(columns=['image_id','bbox'])
image_ids = []
for i in data['annotations']:
    image_ids.append(i['image_id'])
    
tempDf['image_id'] = image_ids

bboxes = []
for i in data['annotations']:
    bboxes.append(i['bbox'])
    
tempDf['bbox'] = bboxes
tempDf

In [None]:
tempDf.to_csv('coco_bbox_test.csv', index=False)

## Add test labels

In [None]:
df = pd.read_csv('coco_bbox_test.csv')
df['image_file'] = df['image_id']
df['label_file'] = df['image_id']
df

In [None]:
df = df.astype({'image_file':'string'})
df = df.astype({'label_file':'string'})
df.dtypes

In [None]:
image_files = df['image_file']
label_files = df['label_file']

for i in range(len(image_files)):
    s = image_files[i]
    n = len(s)
    zeros = '0' * (12-n)
    new_s = zeros + s + '.jpg'
    image_files[i] = new_s
    
for i in range(len(label_files)):
    s = label_files[i]
    n = len(s)
    zeros = '0' * (12-n)
    new_s = zeros + s + '.txt'
    label_files[i] = new_s
    
df['image_file'] = image_files
df['label_file'] = label_files
df

In [None]:
df['to_put'] = df['bbox']
df = df.astype({'to_put':'string'})
df.dtypes

In [None]:
img_info = pd.read_csv('img_info.csv', index_col='file_name')

boxes = df['to_put']
for i in range(len(boxes)):
    x = eval(boxes[i])
    img_width = img_info.loc[image_files[i]]['width']
    img_height = img_info.loc[image_files[i]]['height']
    box_width = x[2]
    box_height = x[3]
    box_startX = x[0]
    box_startY = x[1]
    box_midX = (box_startX + (box_startX + box_width)) / 2
    box_midY = (box_startY + (box_startY + box_height)) / 2
    
    box_width /= img_width
    box_midX /= img_width
    box_height /= img_height
    box_midY /= img_height
    
    x = [box_midX, box_midY, box_width, box_height]
    for j in range(len(x)):
        if x[j]<=0:
            x[j] = 0.000001
        if x[j]>1:
            x[j] = 1
    for j in x:
        if j<=0 or j>1:
            print(j)
    y = [str(z) for z in x]
    s = ' '.join(y)
    s = '0 ' + s
    boxes[i] = s

boxes

In [None]:
df['to_put'] = boxes
df

In [None]:
for index, row in df.iterrows():
    f = open('labels/' + row['label_file'],'a+')
    f.write(row['to_put']+'\n')
    f.close()

## Creating test.csv

In [None]:
full_df = pd.DataFrame()

img_files = list(set(df['image_file']))

full_df['image_file'] = img_files
full_df['label_file'] = full_df['image_file']

label_files = full_df['label_file']

for i in range(len(label_files)):
    s = label_files[i]
    new_s = s[:-3] + 'txt'
    label_files[i] = new_s

full_df['label_file'] = label_files
full_df

In [None]:
full_df.to_csv('test.csv', index=False)

Next step: Manually move the images to 'images' folder