In [1]:
import json
import csv


In [2]:
# libraries required for visualisation:
from IPython.display import SVG, display
import svgwrite # conda install -c omnia svgwrite=1.1.6
import PIL
from PIL import Image
import matplotlib.pyplot as plt

In [11]:
def first_time(stroke):
    return stroke[2][0]

def last_time(stroke):
    return stroke[2][len(stroke[2]) - 1]

def min_time(stroke):
    return min(stroke[2])

def max_time(stroke):
    return max(stroke[2])

def mixed_up_time(stroke):
    mixed_up = False
    for idx, t in enumerate(stroke[2]):
        if idx > 0:
            if t < stroke[2][idx - 1]:
                mixed_up = True
    return mixed_up
        
        

def total_time(stroke):
    # stroke is an array of length 3
    # 3rd (index 2) element is a list
    # with t: t is the time in milliseconds since the first point.
    first_t = first_time(stroke)
    last_t = last_time(stroke)
    return last_t - first_t

def pause_before(drawing, stroke, stroke_index):
    if (stroke_index == 0):
        return 0
    prev_stroke = drawing['drawing'][stroke_index - 1]
    return first_time(stroke) - last_time(prev_stroke)


def gen_stats_stroke(drawing, stroke, stroke_index):
    
    last_stroke = drawing['drawing'][len(drawing['drawing']) - 1]
    stats = {
        #"key_id": drawing['key_id'],
        #"recognized": drawing['recognized'],
        #"word": drawing['word'],
        #"countrycode": drawing['countrycode'],
        #"drawing_time": last_time(last_stroke),
        #"stroke_count": len(drawing['drawing']),
        "stroke_index": stroke_index,
        "stroke_time": total_time(stroke),
        "stroke_time_first": first_time(stroke),
        "stroke_time_last": last_time(stroke),
        "stroke_time_pause": pause_before(drawing, stroke, stroke_index),
        "stroke_time_min": min_time(stroke),
        "stroke_time_max": max_time(stroke),
        "stroke_time_mixedup": mixed_up_time(stroke)
    }
    
    return stats
    
    
def gen_stats(drawing):
    first_stroke = drawing['drawing'][0]
    last_stroke = drawing['drawing'][len(drawing['drawing']) - 1]
    stats = {
        "key_id": drawing['key_id'],
        "recognized": drawing['recognized'],
        "word": drawing['word'],
        "stroke_count": len(drawing['drawing']),
        "countrycode": drawing['countrycode'],
        "drawing_time": last_time(last_stroke) - first_time(first_stroke),
    }
    strokes = []
    for idx, stroke in enumerate(drawing['drawing']):
        strokes.append(gen_stats_stroke(drawing, stroke, idx))
    #stats['strokes'] = strokes
    pause_times = [s['stroke_time_pause'] for s in strokes]
    stats['drawing_time_pause'] = sum(pause_times)
    stroke_times = [s['stroke_time'] for s in strokes]
    stats['drawing_time_draw'] = sum(stroke_times)
    stats['stroke_in_order'] = sum([s['stroke_time_mixedup'] for s in strokes])
    stats['drawing_time_min'] = min([s['stroke_time_min'] for s in strokes])
    stats['drawing_time_max'] = max([s['stroke_time_max'] for s in strokes])
    
    return stats
    
    

In [12]:
def save_stats(draw_id, filename):
    #strokes = []
    stats = []
    with open(filename) as f:
        for line in f:
            drawing = json.loads(line)
            drawing_stats = gen_stats(drawing)
            stats.append(drawing_stats)
            #for stroke in drawing_stats['strokes']:
            #    strokes.append(stroke)
    with open('./data/' + draw_id + '.stats.csv', 'w') as f: 
        w = csv.DictWriter(f, stats[0].keys())
        w.writeheader()
        w.writerows(stats)        

In [14]:
draw_id = 'cat'
filename = "/Users/vlandham/code/data/quickdraw/raw/" + draw_id + ".ndjson"
save_stats(draw_id, filename)

In [10]:
problems = []
with open(filename) as f:
    for line in f:
        drawing = json.loads(line)
        #if (drawing['key_id'] == '6639331143843840'):
        #    problems.append(drawing)
        for stroke in drawing['drawing']:
            for t in stroke[2]:
                if t < 0:
                    problems.append(drawing)

In [97]:

len(problems)

457

In [9]:
problem_keys = [p['key_id'] for p in problems]

NameError: name 'problems' is not defined

In [100]:
problem = problems[2]
print(len(problem['drawing'][0]))
problem['drawing'][0][2]

3


[0,
 121,
 138,
 173,
 191,
 208,
 225,
 242,
 259,
 277,
 296,
 330,
 364,
 397,
 414,
 448,
 500,
 667,
 719,
 737,
 753,
 770,
 787,
 805,
 822,
 839,
 856,
 873,
 890,
 924,
 1314,
 1366,
 1399,
 -17351,
 -17318,
 -17284,
 -17250,
 -17216,
 -17182,
 -17147,
 -17130,
 -17096,
 -17062,
 -17045,
 -17010,
 -16975,
 -16941,
 -16605,
 -16570,
 -16553,
 -16536,
 -16519,
 -16485,
 -16449,
 -16416,
 -16382,
 -16364,
 -16330,
 -16295,
 -16261,
 -16208,
 -16012,
 -15978,
 -15961,
 -15943,
 -15927,
 -15910,
 -15892,
 -15875,
 -15857,
 -15806,
 -15771,
 -15753,
 -15736,
 -15719,
 -15702,
 -15667,
 -15630,
 -15613,
 -15237,
 -15203,
 -15168,
 -15133,
 -15099,
 -15065,
 -15029,
 -15012,
 -14978,
 -14944,
 -14927,
 -14893,
 -14859,
 -14841,
 -14825,
 -14791,
 -14756,
 -14739,
 -14705,
 -14671,
 -14637,
 -14620,
 -14585,
 -14551,
 -14533,
 -14516,
 -14499,
 -14481,
 -14464,
 -14447,
 -14431,
 -14413,
 -14396,
 -14093,
 -14076,
 -14043,
 -14026,
 -14008,
 -13992,
 -13975,
 -13958,
 -13941,
 -13924,
