In [1]:
%matplotlib inline
import sys
import os
import json
import csv

import matplotlib.pyplot as plt
from IPython.display import Image

from io import BytesIO
import PIL

from PIL import ImageDraw

## Look at Artist Images

First part here collects info on artist images. 

In [2]:
def import_data(filename):
    data = []
    with open(filename) as json_data:
        data = json.load(json_data)
    return data

In [12]:
def get_image(cat, artist):
    artist_img_file = os.path.join('.', 'data', 'imgs', cat['id'], artist['id'] + '.jpg')
    if not os.path.isfile(artist_img_file):
        return None
    return PIL.Image.open(artist_img_file)

In [7]:
def save_csv(rows, filename):
    with open(filename, 'w') as f:
        w = csv.DictWriter(f, rows[0].keys())
        w.writeheader()
        w.writerows(rows)

In [10]:
def main(input_file):
    cats = import_data(input_file)
    rows = []
    for cat in cats:
        for artist in cat['artist_details']:
            pimage = get_image(cat, artist)
            if pimage:
                row = {
                    'cat': cat['id'],
                    'artist': artist['id'],
                    'face_num': len(artist['faces']),
                    'img_width': pimage.width,
                    'img_height': pimage.height,
                }
                rows.append(row)
                
    save_csv(rows, 'data/images.csv')

In [13]:
main('./data/faces.json')

## Face Images

This section aggregates info on the faces found

In [15]:
import glob

In [14]:
def get_image_from_file(filename):
    if not os.path.isfile(filename):
        return None
    return PIL.Image.open(filename)

In [17]:

START_DIR = './data/imgs_out'

cat_dirs = glob.glob(os.path.join(START_DIR, "*"))

print(len(cat_dirs))

10


In [21]:
rows = []
for cat_dir in cat_dirs:
    print(cat_dir)
    cat_id = cat_dir.split("/")[-1]
    image_files = glob.glob(os.path.join(cat_dir, "*.jpg"))
    
    print(len(image_files))
    for img in image_files:
        artist_id = os.path.basename(img).split(".")[0].split("_")[0]
        
        pimage = get_image_from_file(img)
        row = {
            "cat_id": cat_id,
            "artist_id": artist_id,
            "img_width": pimage.width,
            "img_height": pimage.height
            
        }
        rows.append(row)

./data/imgs_out/country
556
./data/imgs_out/folk_americana
1224
./data/imgs_out/hiphop
760
./data/imgs_out/indie_alt
1815
./data/imgs_out/metal
1521
./data/imgs_out/pop
871
./data/imgs_out/punk
689
./data/imgs_out/rnb
698
./data/imgs_out/rock
2376
./data/imgs_out/soul
800


In [22]:
save_csv(rows, 'data/face_sizes.csv')