refs:

https://github.com/michalfaber/dataset_toolkit/blob/main/coco_analysis.ipynb

In [1]:
import os
import numpy as np
import pandas as pd
from pycocotools.coco import COCO
from collections import defaultdict

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [3]:
# !!! SET THE CORRECT PATH TO YOUR FILES AND FOLDERS !!!

cocoRoot = f'/data/ylw/datasets/test/cocos'
dataType = "val2017"
train_annot_path = os.path.join(cocoRoot, f'annotations/instances_{dataType}.json')
train_img_path = '/data/ylw/datasets/test/cocos/train2017'

val_annot_path = train_annot_path
val_img_path = train_img_path

train_coco = COCO(train_annot_path)
val_coco = COCO(val_annot_path)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [4]:
def get_meta(coco):
    ids = list(coco.imgs.keys())
    for i, img_id in enumerate(ids):
        img_meta = coco.imgs[img_id]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)
        img_file_name = img_meta['file_name']
        w = img_meta['width']
        h = img_meta['height']
        
        yield [img_id, img_file_name, w, h, anns]
        
def convert_to_df(coco):
    images_data = []
    persons_data = []
    
    for img_id, img_fname, w, h, meta in get_meta(coco):
        images_data.append({
            'image_id': int(img_id),
            'path': img_fname,
            'width': int(w),
            'height': int(h)
        })
        for m in meta: 
            persons_data.append({
                'image_id': m['image_id'],
                'is_crowd': m['iscrowd'],
                'bbox': m['bbox'],
                'area': m['area'],
                'num_keypoints': m['num_keypoints'],            
                'keypoints': m['keypoints'],            
            })
    
    images_df = pd.DataFrame(images_data) 
    images_df.set_index('image_id', inplace=True)

    persons_df = pd.DataFrame(persons_data) 
    persons_df.set_index('image_id', inplace=True)
    
    return images_df, persons_df  

##  Convert COCO dataset format to DataFrames to simplify the analysis

Merge dataframes images and annotations for training set.

In [None]:
images_df, persons_df = convert_to_df(train_coco)     
train_coco_df = pd.merge(images_df, persons_df, right_index=True, left_index=True)
train_coco_df['source'] = 0
train_coco_df.head()

Merge dataframes images and annotations for validation set

In [None]:
images_df, persons_df = convert_to_df(val_coco)      
val_coco_df = pd.merge(images_df, persons_df, right_index=True, left_index=True)
val_coco_df['source'] = 1
val_coco_df.head()

Combine traininng set ('source' = 0) and validation set ('source' = 1).

In [None]:
coco_df = pd.concat([train_coco_df, val_coco_df], ignore_index=True)
coco_df

### Show how many people are visible in a single image

In [None]:
# count number of annotations per image

annotated_persons_df = coco_df[(coco_df['is_crowd'] == 0)]
crowd_df = coco_df[coco_df['is_crowd'] == 1]

print("Number of people in total: " + str(len(annotated_persons_df)))
print("Number of crowd annotations: " + str(len(crowd_df)))

persons_in_img_df = pd.DataFrame({
    'cnt': annotated_persons_df[['path','source']].value_counts()
})
persons_in_img_df.reset_index(level=[0,1], inplace=True) 

# group by counter so we will get the dataframe with number of annotated people
# in a single image

persons_in_img_cnt_df = persons_in_img_df.groupby(['cnt']).count()

# extract arrays

x_occurences = persons_in_img_cnt_df.index.values
y_images = persons_in_img_cnt_df['path'].values

# plot
f = plt.figure(figsize=(14, 8))
plt.bar(x_occurences, y_images)
plt.title('People on a single image ')
plt.xticks(x_occurences, x_occurences)
plt.xlabel('Number of people in a single image')
plt.ylabel('Number of images')
plt.show()

Utility functions for displaying images

In [11]:
def get_full_path(df, row = 0):
    path = df['path'].values[row]
    source = df['source'].values[row]
    if source == 0:
        return os.path.join(train_img_path, path)
    else:
        return os.path.join(val_img_path, path)    

Show a few examples of images with 13 annotated persons

In [None]:
subset_df = persons_in_img_df[(persons_in_img_df['cnt'] == 13) & (persons_in_img_df['source'] == 1)]

# take only 9 images -> grid 3x3

subset_df = subset_df[:9]

# read images

full_paths = [get_full_path(subset_df, row = r) for r in range(len(subset_df))]
imgs = [mpimg.imread(path) for path in full_paths]

# plot

_, axs = plt.subplots(3, 3, figsize=(14, 8))
axs = axs.flatten()
for img, ax in zip(imgs, axs):
    ax.imshow(img)
plt.show()

subset_df

Image showing the largest number of annotated people = 19

In [None]:
subset_df = persons_in_img_df[(persons_in_img_df['cnt'] == 19)]

path = get_full_path(subset_df)
img = mpimg.imread(path)
plt.imshow(img)

path = subset_df.loc[0, 'path']
source = subset_df.loc[0, 'source']
subset_full = coco_df[(coco_df['path'] == path) & (coco_df['source'] == source) & (coco_df['is_crowd'] == 0)]
bbox = np.array(subset_full['bbox'].values.tolist())
for r in bbox:
    x = [r[0], r[0] + r[2], r[0] + r[2], r[0], r[0]]
    y = [r[1], r[1], r[1] + r[3], r[1] + r[3], r[1]]
    plt.plot(x, y, color ='tab:red')      

subset_df

There exists annotations that are very small, possible without any keypoints.

Show statistics of images with persons having some keypoints

In [None]:
annotated_persons_nokp_df = coco_df[(coco_df['is_crowd'] == 0) & (coco_df['num_keypoints'] == 0)]
annotated_persons_kp_df = coco_df[(coco_df['is_crowd'] == 0) & (coco_df['num_keypoints'] > 0)]

print("Number of people (with keypoints) in total: " + str(len(annotated_persons_kp_df)))
print("Number of people without any keypoints in total: " + str(len(annotated_persons_nokp_df)))

persons_in_img_kp_df = pd.DataFrame({
    'cnt': annotated_persons_kp_df[['path','source']].value_counts()
})
persons_in_img_kp_df.reset_index(level=[0,1], inplace=True) 
persons_in_img_cnt_df = persons_in_img_kp_df.groupby(['cnt']).count()
x_occurences_kp = persons_in_img_cnt_df.index.values
y_images_kp = persons_in_img_cnt_df['path'].values

f = plt.figure(figsize=(14, 8))
width = 0.4
plt.bar(x_occurences_kp, y_images_kp, width=width, label='with keypoints')
plt.bar(x_occurences + width, y_images, width=width, label='no keypoints')

plt.title('People on a single image ')
plt.xticks(x_occurences + width/2, x_occurences)
plt.xlabel('Number of people in a single image')
plt.ylabel('Number of images')
plt.legend(loc = 'best')
plt.show()

Show a few examples of images with 13 annotated persons having at least 1 keypoint

In [None]:
subset_df = persons_in_img_kp_df[(persons_in_img_kp_df['cnt'] == 13) & (persons_in_img_kp_df['source'] == 1)]

# take only 9 images -> grid 3x3

subset_df = subset_df[:9]

# read images

full_paths = [get_full_path(subset_df, row = r) for r in range(len(subset_df))]
imgs = [mpimg.imread(path) for path in full_paths]

# plot

_, axs = plt.subplots(3, 3, figsize=(14, 8))
axs = axs.flatten()
for img, ax in zip(imgs, axs):
    ax.imshow(img)
plt.show()

subset_df

Add extra attributes to dataframe

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
 
class AttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, num_keypoints, w_ix, h_ix, bbox_ix, kp_ix):
        self.num_keypoints = num_keypoints
        self.w_ix = w_ix
        self.h_ix = h_ix
        self.bbox_ix = bbox_ix
        self.kp_ix = kp_ix
    
    def fit(self, X, y=None):
        return self  # nothing else to do    
    
    def transform(self, X): 
        
        # retrieve specific columns
        
        w = X[:, self.w_ix] 
        h = X[:, self.h_ix]
        bbox = np.array(X[:, self.bbox_ix].tolist())
        keypoints = np.array(X[:, self.kp_ix].tolist())
        
        # calculate scale factors for bounding boxes
        
        scale_x = bbox[:,2] / w   
        scale_y = bbox[:,3] / h           
        aspect_ratio = w / h
        
        # categorize scales into 4 buckets S,M,L,XL. scale factor = 0.4 means that height of a bounding box 
        # takes 40% of a total height of an image and will be put into the bucket 'S'
        scale_cat = pd.cut(scale_y,
                                bins=[0., 0.4, 0.6, 0.8, np.inf],
                                labels=['S', 'M', 'L', 'XL']) # 0-0.4  0.4-0.6  0.6-0.8  0.8-1
                    
        return np.c_[X, scale_x, scale_y, scale_cat, aspect_ratio, keypoints] 

# get number of keypoints and column indexes    

num_keypoints = 17 
w_ix = coco_df.columns.get_loc('width') 
h_ix = coco_df.columns.get_loc('height') 
bbox_ix = coco_df.columns.get_loc('bbox') 
kp_ix = coco_df.columns.get_loc('keypoints')

# transformer object that is used to add new columns

attr_adder = AttributesAdder(
    num_keypoints=num_keypoints,
    w_ix = w_ix, 
    h_ix = h_ix, 
    bbox_ix = bbox_ix, 
    kp_ix = kp_ix)
coco_extra_attribs = attr_adder.transform(coco_df.values)

# determine column nmaes for keypoints. In coco dataset each keypoint is represented by a triple x,y,v - coordinates x,y
# and visibility flag. Each value is extracted to a separate column in dataframe like x1,x2,v1,... etc
keypoints_cols = [['x'+str(idx), 'y'+str(idx), 'v'+str(idx)] for idx, k in enumerate(range(num_keypoints))]
keypoints_cols = np.concatenate(keypoints_cols).tolist()

# crate a new richer dataframe

coco_extra_attribs_df = pd.DataFrame(
    coco_extra_attribs,
    columns=list(coco_df.columns)+["scale_x", "scale_y", "scale_cat", "aspect_ratio"] + keypoints_cols,
    index=coco_df.index)

coco_extra_attribs_df.head()

#### Where are the noses in images

In [None]:
# only horizontal images to normalize keypoints coordinates
horiz_imgs_df = coco_extra_attribs_df[coco_extra_attribs_df['aspect_ratio'] >= 1.]

# get the mean width and height - used to scale keypoint coordinates

avg_w = int(horiz_imgs_df['width'].mean())
avg_h = int(horiz_imgs_df['height'].mean())

# indexes of required columns

w_ix = horiz_imgs_df.columns.get_loc('width') 
h_ix = horiz_imgs_df.columns.get_loc('height') 
x1_ix = horiz_imgs_df.columns.get_loc('x0') # x coord of a nose is the column 'x0'
y1_ix = horiz_imgs_df.columns.get_loc('y0') # y coord of a nose is the column 'y0'
v1_ix = horiz_imgs_df.columns.get_loc('v0') 

print ("avg width " + str(avg_w))
print ("avg height " + str(avg_h))

class NoseAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, avg_w, avg_h, w_ix, h_ix, x1_ix, y1_ix, v1_ix):
        self.avg_w = avg_w
        self.avg_h = avg_h
        self.w_ix = w_ix 
        self.h_ix = h_ix 
        self.x1_ix = x1_ix
        self.y1_ix = y1_ix
        self.v1_ix = v1_ix
    
    def fit(self, X, y=None):
        return self  # nothing else to do    
    
    def transform(self, X):
        w = X[:, self.w_ix] 
        h = X[:, self.h_ix]         
        x1 = X[:, self.x1_ix] 
        y1 = X[:, self.y1_ix]

        # normalize nose coords to the given global width, height
        
        scale_x = self.avg_w / w
        scale_y = self.avg_h / h                
        nose_x = x1 * scale_x
        nose_y = y1 * scale_y
                            
        return np.c_[X, nose_x, nose_y]

# transformer object that is used to add normalized nose coordinates columns

attr_adder = NoseAttributesAdder(
    avg_w = avg_w, 
    avg_h = avg_h,
    w_ix = w_ix, 
    h_ix = h_ix, 
    x1_ix = x1_ix, 
    y1_ix = y1_ix, 
    v1_ix = v1_ix
)
coco_noses = attr_adder.transform(horiz_imgs_df.values)

# crate dataframe with new normalized coordinates

coco_noses_df = pd.DataFrame(
    coco_noses,
    columns=list(horiz_imgs_df.columns) + ["normalized_nose_x", "normalized_nose_y"],
    index=horiz_imgs_df.index)

# get only subset of columns

coco_noses_df = coco_noses_df[["path", "source", "x0", "y0", "v0", "normalized_nose_x", "normalized_nose_y"]]

# filtering - only visible noses

coco_noses_df = coco_noses_df[coco_noses_df["v0"] == 2]
coco_noses_df.head()

In [None]:
coco_noses_df.plot(kind="scatter", x="normalized_nose_x", y="normalized_nose_y", alpha=0.3).invert_yaxis()


In [None]:
low_noses_df = coco_noses_df[coco_noses_df['normalized_nose_y'] > 430 ]
low_noses_df

In [None]:
path = '000000289222.jpg'
source = 1
selected = low_noses_df[(low_noses_df['path'] == path) & (low_noses_df['source'] == source)]

full_path = get_full_path(selected)
img = mpimg.imread(full_path)
plt.imshow(img)
plt.plot(selected['x0'], selected['y0'], 'ro')

#### Number of keypoints

In [None]:
y_images = coco_extra_attribs_df['num_keypoints'].value_counts()
x_keypoints = y_images.index.values

# plot

plt.figsize=(10,5)
plt.bar(x_keypoints, y_images)
plt.title('Histogram of keypoints')
plt.xticks(x_keypoints)
plt.xlabel('Number of keypoints')
plt.ylabel('Number of bboxes')
plt.show()

# percentage of images (column) with a number of keypoints (rows)

kp_df = pd.DataFrame({
    "Num keypoints %": coco_extra_attribs_df["num_keypoints"].value_counts() / len(coco_extra_attribs_df)
}).sort_index()
kp_df

#### Scales

In [None]:
coco_extra_attribs_df.head()

In [None]:
persons_df = coco_extra_attribs_df[coco_extra_attribs_df['num_keypoints'] > 0]
persons_df['scale_cat'].hist()

In [None]:
scales_props_df = pd.DataFrame({
    "Scales %": persons_df["scale_cat"].value_counts() / len(persons_df)
})
scales_props_df

### Is COCO train and validation data stratified?

Strata: scales

In [None]:
persons_df = coco_extra_attribs_df[coco_extra_attribs_df['num_keypoints'] > 0]
train_df = persons_df[persons_df['source'] == 0]
val_df = persons_df[persons_df['source'] == 1]

scales_props_df = pd.DataFrame({
    "Scales in train set %": train_df["scale_cat"].value_counts() / len(train_df),
    "Scales in val set %": val_df["scale_cat"].value_counts() / len(val_df)
})
scales_props_df["Diff 100%"] = 100 * np.absolute(scales_props_df["Scales in train set %"] - scales_props_df["Scales in val set %"])
scales_props_df

Strata: number of keypoints

In [None]:
train_df = coco_extra_attribs_df[coco_extra_attribs_df['source'] == 0]
val_df = coco_extra_attribs_df[coco_extra_attribs_df['source'] == 1]

kp_props_df = pd.DataFrame({
    "Num keypoints in train set %": train_df["num_keypoints"].value_counts() / len(train_df),
    "Num keypoints in val set %": val_df["num_keypoints"].value_counts() / len(val_df)
}).sort_index()
kp_props_df["Diff 100%"] = 100 * np.absolute(kp_props_df["Num keypoints in train set %"] - kp_props_df["Num keypoints in val set %"])
kp_props_df