# Data Analysis

Load and parse classification csvs to review segmentation results, user metrics, and identify difficult images.

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import getpass
import re
import csv
import json
import math
import datetime
import pandas as pd
import numpy as np
from glob import glob
from skimage import io
from tqdm.notebook import tqdm
from itertools import combinations

import custom_cmaps
from metrics import iou, average_precision
from aggregation import *
from helpers import *
from matplotlib import pyplot as plt

## Load the results csv

To access the most recent results you must first "Request new classification export" from the Zooniverse project. Wait a minute or two after making the request to try downloading the results (or until you get the confirmation email from zooniverse).

In [None]:
# necessary to handle overflow of long csv columns
_ = csv.field_size_limit(256<<12)

Set and verify the parameters in the cell below before going further.

In [None]:
source_dir = "" # directory containing the jpg images uploaded to Zooniverse
csv_path = "" # path to the downloaded classifications csv

IS_FLIPBOOK = True # images in source_dir from flipbooks?
SPAN = 5 # number of images per flipbook
IM_SIZE = (480, 480) # image size that was used for prep_flipbooks/prep_images

RETIRE_LIMIT = 10 # annotations before image is retired

# checks
assert source_dir
assert csv_path

In [None]:
results_df = pd.read_csv(csv_path)

# convert the metadata fields to parsable json strings
results_df['metadata_json'] = [json.loads(q) for q in results_df.metadata]
results_df['annotations_json'] = [json.loads(q) for q in results_df.annotations]
results_df['subject_data_json'] = [json.loads(q) for q in results_df.subject_data]

## Parsing functions

Define some helper functions to parse the results.

In [None]:
def get_image_name(row):
    """
    Return the name of an image from results dataframe row.
    """
    image_idx = SPAN // 2 if IS_FLIPBOOK else 0
    image_name = list(row['subject_data_json'].values())[0][f'Image {image_idx}']
    return image_name
    
def get_image_size(row):
    """
    Return the size of an image from results dataframe row, if available.
    Otherwise return the default size defined by the IM_SIZE variable.
    """
    image_dims = row['metadata_json']['subject_dimensions'][image_idx]
    if image_dims is not None:
        w, h = image_dims['naturalWidth'], image_dims['naturalHeight']
    else:
        w, h = IM_SIZE
        
    return (w, h)

def calculate_time(row):
    """
    Computes time, in minutes, spent on a given annotation 
    based on row metadata.
    """
    metadata = row['metadata_json']
    start, finish = metadata['started_at'], metadata['finished_at']
    start = datetime.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S.%fZ')
    finish = datetime.datetime.strptime(finish, '%Y-%m-%dT%H:%M:%S.%fZ')
    return (finish - start).total_seconds() / 60

def parse_annotations(row):
    """
    Parse out details of the segmentation and confidence
    rating tasks from a row and return them in a dictionary.
    """
    annotations = row['annotations_json']
    
    if len(annotations) != 2:
        raise Exception(f'Expected 2 annotations (segmentation and confidence)')
    
    res = {}
    for task in annotations:
        # segmentation is task T0
        if task['task'] == 'T0':
            # if not labeled objects, annotation is blank list
            if task['value']:
                res['n_objects'] = len(task['value'])
                res['segmentation'] = [
                    polygon_to_array(value['points']) for value in task['value']
                ]
            else:
                res['n_objects'] = 0
                res['segmentation'] = task['value']
        else:
            res['confidence'] = int(task['value'][:1])
            
    return res

## Subject aggregation

This section organizes segmentations by subject image such that they can be aggregated into a consensus.

In [None]:
# store results in a nested dict, keys are subject names and values are attributes
subject_annotations = {}
for i, row in results_df.iterrows():
    subject_id = list(row['subject_data_json'].keys())[0]
    
    # load entry for subject, or create a new one
    image_name = get_image_name(row)
    
    if image_name not in subject_annotations:
        size = get_image_size(row)
        subject_annotations[image_name] = {
            'shape': size, 'confidences': [],
            'annotations': [], 'users': []
        }
        
    # get annotation and metadata
    annotation_dict = parse_annotations(row)
    polygons = annotation_dict['segmentation']
    confidence = annotation_dict['confidence']
    
    subject_annotations[image_name]['id'] = subject_id
    subject_annotations[image_name]['annotations'].append(polygons)
    subject_annotations[image_name]['confidences'].append(confidence)
    subject_annotations[image_name]['users'].append(row['user_name'])
    
# mark the retired subjects
for k, v in subject_annotations.items():
    if len(v['annotations']) >= RETIRE_LIMIT:
        v['retired'] = True
    else:
        v['retired'] = False
        
all_subjects = list(subject_annotations.keys())
retired_subjects = [k for k,v in subject_annotations.items() if v['retired']]
print(f'{len(retired_subjects)} retired subjects out of {len(all_subjects)}')

### Review random subject annotations and consensus

Randomly pick a subject (retired or otherwise) and plot all the user created segmentations along with a consensus segmentation for the image.

In [None]:
# randomly pick a retired image to analyze
choice = np.random.choice(retired_subjects) # or choose from all_subjects instead

subject_dict = subject_annotations[choice]
subject_id = subject_dict['id']
image_shape = subject_dict['shape']

usernames = subject_dict['users']

# handle the case that 1 user annotated an image multiple times
usernames, indices = np.unique(usernames, return_index=True)
usernames = usernames.tolist()
annotations = [subject_dict['annotations'][ix] for ix in indices]
confidences = [subject_dict['confidences'][ix] for ix in indices]

# create masks from the polygons
masks = []
for i,annotation in enumerate(annotations):
    mask = poly2segmentation(annotation, image_shape)
    masks.append(mask)
    
# create the consensus instance segmentation
instance_scores = mask_aggregation(masks)
instance_seg = aggregated_instance_segmentation(instance_scores, 0.5)

In [None]:
# plot all the user segmentations along with the consensus
cols = 6
rows = int(math.ceil(RETIRE_LIMIT / 5))

f, ax = plt.subplots(rows, cols, figsize=(24, 8))

image = io.imread(source_dir + choice)
ax[0, 0].imshow(image, cmap='gray')
ax[0, 0].set_xticks([])
ax[0, 0].set_yticks([])
ax[0, 0].set_title('Image')

ax[1, 0].imshow(image, cmap='gray')
ax[1, 0].imshow(instance_seg, alpha=0.5, cmap='hsv_alpha', interpolation='nearest')
ax[1, 0].set_xticks([])
ax[1, 0].set_yticks([])
ax[1, 0].set_title('Image + Consensus')

for _ in range(RETIRE_LIMIT - len(masks)):
    masks.append(np.zeros_like(image))
    usernames.append('Empty')
    confidences.append(0)

c = 0
for y in range(rows):
    for x in range(1, cols):
        ax[y, x].imshow(masks[c], cmap='hsv_black', interpolation='nearest')
        ax[y, x].set_xticks([])
        ax[y, x].set_yticks([])
        ax[y, x].set_title(f'{usernames[c]} {confidences[c]}')
            
        c += 1
    
plt.tight_layout()

### Review subject difficulty

Find subjects were users report low annotation confidence or there was a high level of disagreement between individual segmentations and the consensus.

In [None]:
# add consensus strength to the subject_annotations
for imname, subject_dict in tqdm(subject_annotations.items(), total=len(subject_annotations.keys())):
    if 'consensus' in subject_annotations[imname]:
        continue
    
    subject_id = subject_dict['id']
    image_shape = subject_dict['shape']

    masks = []
    for annotation in subject_dict['annotations']:
        mask = poly2segmentation(annotation, image_shape)
        masks.append(mask)

    instance_scores = mask_aggregation(masks)
    instance_seg = aggregated_instance_segmentation(instance_scores, 0.75)
    
    # compute ap of each individual annotation against consensus
    scores = [average_precision(instance_seg, mask, 0.50, False)[0] for mask in masks]
    
    avg_confidence = np.mean(confidences)
    subject_annotations[imname]['consensus'] = instance_seg
    subject_annotations[imname]['consensus_strength'] = np.mean(scores)
    
subjects = np.array(list(subject_annotations.keys()))
subject_ids = np.array([sd['id'] for sd in subject_annotations.values()])
user_confs = np.array([np.median(sd['confidences']) for sd in subject_annotations.values()])
user_scores = np.array([sd['consensus_strength'] for sd in subject_annotations.values()])

# higher means consensus segmentation should be better
most_agreed_indices = np.argsort(user_scores)[::-1]
most_confident_indices = np.argsort(user_confs)[::-1]

In [None]:
# plot images and consensus segmentations with worst/best agreement between annotators

chosen = most_agreed_indices[-10:] # 10 least agreed
#chosen = most_agreed_indices[:10] # 10 most agreed

f, ax = plt.subplots(2, 5, figsize=(20, 8))

c = 0
for y in range(2):
    for x in range(5):
        index = chosen[c]
        subj_id = subject_ids[index]
        imname = subjects[index]
        sc = user_scores[index]
        
        image = io.imread(os.path.join(source_dir, imname))
        mask = subject_annotations[imname]['consensus']
        
        ax[y, x].imshow(image, cmap='gray')
        ax[y, x].imshow(mask, cmap='hsv_alpha', alpha=0.5, interpolation='nearest')
        ax[y, x].set_xticks([])
        ax[y, x].set_yticks([])
        ax[y, x].set_title(f'Subject {subj_id}; Consensus Str. {sc:.3f}')
        c += 1
        
plt.tight_layout()

In [None]:
# plot images and consensus segmentations with lowest/highest median user annotation confidence

chosen = most_confident_indices[-10:] # 10 least confident
#chosen = most_confident_indices[:10] # 10 most confident

f, ax = plt.subplots(2, 5, figsize=(20, 8))

c = 0
for y in range(2):
    for x in range(5):
        index = chosen[c]
        subj_id = subject_ids[index]
        imname = subjects[index]
        sc = user_confs[index]
        
        image = io.imread(os.path.join(source_dir, imname))
        mask = subject_annotations[imname]['consensus']
        
        ax[y, x].imshow(image, cmap='gray')
        ax[y, x].imshow(mask, cmap='hsv_alpha', alpha=0.5, interpolation='nearest')
        ax[y, x].set_xticks([])
        ax[y, x].set_yticks([])
        ax[y, x].set_title(f'Subject {subj_id}; Median conf. {sc:.3f}')
        c += 1
        
plt.tight_layout()

## User evaluation

This section organizes annotations by user not by subject. Useful for user level metrics like number of images segmented or accuracy (i.e. average precision and IoU).

In [None]:
user_annotations = {}
for i, row in results_df.iterrows():
    # get annotation and metadata
    annotation_dict = {}
    annotation_dict['image_size'] = get_image_size(row)
    annotation_dict['time'] = calculate_time(row)
    annotation_dict |= parse_annotations(row)

    # update user's dict
    user_id = row['user_name']
    image_name = get_image_name(row)
    user_annotations[user_id] = user_annotations.get(user_id, {}) | {image_name: annotation_dict}

In [None]:
print(f'{len(user_annotations.keys())} users have segmented data in this project.')

In [None]:
# plot the histogram of the number of annotations per user
plt.figure(figsize=(6, 3))
plt.hist([len(v) for v in user_annotations.values()])
plt.ylabel('Number of users')
plt.xlabel('Number of annotations')

In [None]:
# user ranking by number of images annotated
users = list(user_annotations.keys())
n_annotated = [len(v) for v in user_annotations.values()]
rankings = np.argsort(n_annotated)[::-1][:10] # top 10 only

for rank, idx in enumerate(rankings, 1):
    print(rank, users[idx], n_annotated[idx])