In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stat
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import math

In [3]:
from research.strings import VIOLATIONS, MONTHS

In [4]:
df_canvass = pd.read_csv("data/canvass_records.csv.gz")
df_complaint = pd.read_csv("data/complaint_records.csv.gz")
df_canvass["aka_name"] = df_canvass["aka_name"].fillna("NULL_NAME")
df_complaint["aka_name"] = df_complaint["aka_name"].fillna("NULL_NAME")
df_canvass["critical_found"] = df_canvass[VIOLATIONS].max(axis=1)
df_complaint["critical_found"] = df_complaint[VIOLATIONS].max(axis=1)
print("Read {} canvass records.".format(len(df_canvass)))
print("Read {} complaint records.".format(len(df_complaint)))

Read 50462 canvass records.
Read 17088 complaint records.


In [5]:
df_complaint["inspection_date"].head()

0    2011-08-19T00:00:00.000
1    2018-01-10T00:00:00.000
2    2014-03-13T00:00:00.000
3    2012-09-06T00:00:00.000
4    2012-09-06T00:00:00.000
Name: inspection_date, dtype: object

In [11]:
start_date = "2014-09-01"
end_date = "2014-11-01"
date_query = "inspection_date >= '{}' and inspection_date < '{}'".format(start_date, end_date)
test_canv = df_canvass.query(date_query)
test_comp = df_complaint.query(date_query)
print("Found {} canvass records during test set.".format(len(test_canv)))
print("Found {} complaint records during test set.".format(len(test_comp)))

Found 1368 canvass records during test set.
Found 352 complaint records during test set.


In [19]:
lids_canv = set(test_canv["license_id"])
lids_comp = set(test_comp["license_id"])
lids_ix = lids_canv.intersection(lids_comp)
print("Found {} unique license IDs in test set canvass records.".format(len(lids_canv)))
print("Found {} unique license IDs in test set complaint records.".format(len(lids_comp)))
print("Intersection = {} unique license IDs.".format(len(lids_ix)))

Found 1364 unique license IDs in test set canvass records.
Found 330 unique license IDs in test set complaint records.
Intersection = 13 unique license IDs.


In [18]:
test_canv["critical_found"].sum()

222

In [36]:
mask = [lid in lids_ix for lid in test_comp["license_id"].values]
test_comp[mask]

Unnamed: 0,inspection_id,address,aka_name,city,dba_name,facility_type,inspection_date,inspection_type,latitude,license_id,...,V6,V7,V8,V9,V10,V11,V12,V13,V14,critical_found
21,1424521,11601 W TOUHY AVE,FRONTERA TORTAS BY RICK BAYLESS (T3 K4),CHICAGO,FRONTERA TORTAS BY RICK BAYLESS GATE K4 T3,Restaurant,2014-09-26T00:00:00.000,Short Form Complaint,42.008536,2114331,...,0,0,0,0,0,0,0,0,0,0
887,1497716,21 E HUBBARD ST,SHAW'S CRAB HOUSE,CHICAGO,SHAW'S CRAB HOUSE,Restaurant,2014-10-09T00:00:00.000,Short Form Complaint,41.889971,9278,...,0,0,0,0,0,0,0,0,0,0
2791,1496932,21 E HUBBARD ST,SHAW'S CRAB HOUSE,CHICAGO,SHAW'S CRAB HOUSE,Restaurant,2014-09-19T00:00:00.000,Suspected Food Poisoning,41.889971,9278,...,0,0,0,0,0,0,0,0,0,1
3609,1385710,4312 W FULLERTON AVE,PONCE,CHICAGO,PONCE,Restaurant,2014-10-20T00:00:00.000,Complaint,41.9245,1905309,...,0,0,0,0,0,0,0,0,0,0
4061,1497077,21 E HUBBARD ST,SHAW'S CRAB HOUSE,CHICAGO,SHAW'S CRAB HOUSE,Restaurant,2014-09-23T00:00:00.000,Short Form Complaint,41.889971,9278,...,0,0,0,0,0,0,0,0,0,1
4283,1498183,828 N STATE ST,STARBUCKS COFFEE,CHICAGO,STARBUCKS COFFEE #2215,Restaurant,2014-10-24T00:00:00.000,Short Form Complaint,41.897542,23051,...,0,0,0,0,0,0,0,0,0,0
4890,1467542,1335 S HALSTED ST,JOY YEE NOODLE,CHICAGO,JOY YEE NOODLE,Restaurant,2014-10-30T00:00:00.000,Short Form Complaint,41.864411,1650274,...,0,0,0,0,0,0,0,0,0,0
5201,1307733,2757 W 59TH ST,VALLARTA,CHICAGO,VALLARTA,Restaurant,2014-10-31T00:00:00.000,Short Form Complaint,41.786301,69963,...,0,0,0,0,0,0,0,0,0,0
5392,1325271,3357 W HARRISON ST,MURRAY'S FISH AND CHICKEN,CHICAGO,MURRAY'S FISH AND CHICKEN,Restaurant,2014-10-23T00:00:00.000,Short Form Complaint,41.87352,2060861,...,0,0,0,0,0,0,0,0,0,0
10887,1199561,500 W MADISON ST,DUNKIN DONUTS,CHICAGO,DUNKIN DONUTS,Restaurant,2014-10-15T00:00:00.000,Short Form Complaint,41.881994,1820926,...,0,0,0,0,0,0,0,0,0,0


In [20]:
lids_ix

{9278,
 23051,
 33144,
 56642,
 69963,
 82530,
 1168258,
 1650274,
 1820926,
 1905309,
 2060861,
 2088536,
 2114331}

In [26]:
test_canv.query("license_id == 9278")["violations"].values[0]

'34. FLOORS: CONSTRUCTED PER CODE, CLEANED, GOOD REPAIR, COVING INSTALLED, DUST-LESS CLEANING METHODS USED - Comments: OBSERVED GROUT WORN BETWEEN FLOOR TILES IN THE DISH ROOM, SUSHI AND OYSTER BAR COOK LINE. INSTRUCTED TO PROVIDE AND MAINTAIN GROUT BETWEEN  FLOOR TILES. | 35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTRUCTED PER CODE: GOOD REPAIR, SURFACES CLEAN AND DUST-LESS CLEANING METHODS - Comments: OBSERVED THE WALL IN DISREPAIR IN THE MOP SINK CLOSET. INSTRUCTED TO REPAIR AND MAINTAIN ALL WALLS. | 36. LIGHTING: REQUIRED MINIMUM FOOT-CANDLES OF LIGHT PROVIDED, FIXTURES SHIELDED - Comments: OBSERVED BLOWN LIGHT BULBS UNDER THE REAR BROILER AND STEAMER HOOD. INSTRUCTED TO PROVIDE AND MAINTAIN ADEQUATE LIGHTING. | 38. VENTILATION: ROOMS AND EQUIPMENT VENTED AS REQUIRED: PLUMBING: INSTALLED AND MAINTAINED - Comments: OBSERVED THE PLUMBING LEAKING AT THE SALAD PREP EXPOSED HANDSINK. INSTRUCTED TO REPAIR AND MAINTAIN ALL PLUMBING. | 42. APPROPRIATE METHOD OF HANDLING OF FOOD (ICE) HAIR 

In [27]:
test_comp.query("license_id == 9278")["violations"].values[0]

'34. FLOORS: CONSTRUCTED PER CODE, CLEANED, GOOD REPAIR, COVING INSTALLED, DUST-LESS CLEANING METHODS USED - Comments: OBSERVED GROUT WORN BETWEEN FLOOR TILES IN THE DISH ROOM, SUSHI AND OYSTER BAR COOK LINE. INSTRUCTED TO PROVIDE AND MAINTAIN GROUT BETWEEN FLOOR TILES.    | 35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTRUCTED PER CODE: GOOD REPAIR, SURFACES CLEAN AND DUST-LESS CLEANING METHODS - Comments: OBSERVED THE WALL IN DISREPAIR IN THE MOP SINK CLOSET. INSTRUCTED TO REPAIR AND MAINTAIN ALL WALLS.   '