# Unstructured JSON to .csv

## Dependencies

In [7]:
import json
import pandas as pd

## Reading Dataset

In [16]:
json_path = 'data/instances_test_nogt.json'
with open(json_path) as json_file:
    json_data = json.load(json_file)

### Examples

In [12]:
json_paths = [
    'data/instances_train.json',
    'data/instances_val.json',
    'data/instances_test_nogt.json'
]

In [18]:
json_data['info']

{'year': '2022',
 'version': '2',
 'description': '',
 'contributor': '',
 'url': 'localhost',
 'date_created': '2022-08-15T15:17:20.683314'}

In [19]:
json_data['licenses']

[{'id': 1, 'url': '', 'name': 'Unknown'}]

In [20]:
json_data['categories']

[{'supercategory': 'ignored', 'id': 0, 'name': 'ignored'},
 {'supercategory': 'person', 'id': 1, 'name': 'swimmer'},
 {'supercategory': 'boat', 'id': 2, 'name': 'boat'},
 {'supercategory': 'boat', 'id': 3, 'name': 'jetski'},
 {'supercategory': 'object', 'id': 4, 'name': 'life_saving_appliances'},
 {'supercategory': 'object', 'id': 5, 'name': 'buoy'}]

In [21]:
json_data['images'][0]

{'id': 4055,
 'file_name': '4055.png',
 'height': 2160,
 'width': 3840,
 'source': 'mavic',
 'date_time': '',
 'meta': {'image_name': '',
  'datetime(utc)': '',
  'latitude': '',
  'longitude': '',
  'height_above_takeoff(meter)': 17.19916123133191,
  'speed(m/s)': 0.0,
  'xspeed(m/s)': 0.0,
  'yspeed(m/s)': 0.0,
  'zspeed(m/s)': 0.0,
  'compass_heading(degrees)': 280.9,
  'gimbal_heading(degrees)': 271.4,
  'gimbal_pitch(degrees)': 1.6,
  'date_time': ''}}

In [23]:
json_data['annotations'][0]

IndexError: list index out of range

In [None]:
json_data['annotations'][1]

In [None]:
json_data['videos'][0]

In [None]:
json_data['tracks'][0]

## Converting to csv

### Image metadata Dataset

In [None]:
# create a pandas dataframe from the json key 'images'
df_img = pd.DataFrame(json_data['images'])
df_img.head()

In [None]:
# now expand the column 'source'
df_img = pd.concat([df_img.drop(['source'], axis=1), df_img['source'].apply(pd.Series)], axis=1)
df_img.head()

In [None]:
# now expand the column 'meta'
df_img = pd.concat([df_img.drop(['meta'], axis=1), df_img['meta'].apply(pd.Series)], axis=1)
df_img.head()

In [None]:
# check the type of the columns
df_img.info()

In [None]:
# save the dataframe as a csv file
df_img.to_csv('data/instances_train_swimmer_images.csv', index=False)

## Annotations Dataset

In [None]:
# create a pandas dataframe from the json key 'annotations'
df_ann = pd.DataFrame(json_data['annotations'])
df_ann.head()

In [None]:
# now expand the column 'bbox' (it's a list
df_ann = pd.concat([df_ann.drop(['bbox'], axis=1), df_ann['bbox'].apply(pd.Series)], axis=1)
df_ann.rename(columns={0: 'bbox_x', 1: 'bbox_y', 2: 'bbox_w', 3: 'bbox_h'}, inplace=True)
df_ann.head()

In [None]:
# check the type of the columns
df_ann.info()

In [None]:
# save the dataframe as a csv file
df_ann.to_csv('data/instances_train_swimmer_annotations.csv', index=False)