# Playing around with data

## Food Inspection

In [2]:
from senpai.food_insp import download_csv

In [3]:
import pandas as pd

In [25]:
import numpy as np

In [4]:
df = pd.read_csv("./food_insp.csv", dtype=object)

In [5]:
df.columns.values

array(['businessname', 'dbaname', 'legalowner', 'namelast', 'namefirst',
       'licenseno', 'issdttm', 'expdttm', 'licstatus', 'licensecat',
       'descript', 'result', 'resultdttm', 'violation', 'viollevel',
       'violdesc', 'violdttm', 'violstatus', 'statusdate', 'comments',
       'address', 'city', 'state', 'zip', 'property_id', 'location'],
      dtype=object)

In [35]:
# Project to select only the column we wants
DF = df[["businessname", "licenseno", "violstatus", "address", "city", "state", "zip", "property_id", "location"]]

# Remove the row which has an empty value [x for row in table for x in row if x] 
DF = DF.dropna()

# Select all the Fail violations 
DF = DF[DF["violstatus"] == "Fail"]

# Count violations per restaurant
VC = DF.groupby("licenseno").count()

DF_R = DF.set_index("licenseno")[["businessname", "address", "city", "state", "zip", "location"]]
DF_R["violation_count"] = VC["violstatus"]
DF_R = DF_R.drop_duplicates()

DF_R["_id"] = DF_R.index.values
DF_R["location"] = DF_R["location"].map(parse_coor)


In [36]:
DF_R.loc["87059","location"]

[42.27859, -71.11944]

In [30]:
def parse_coor(s):
    """
    Parse the string to tuple of coordinate
    In the format of (lat, long)
    """
    
    lat, long = s.split(", ")
    lat = lat[1:]
    long = long[:-1]
    lat = float(lat)
    long = float(long)
    
    return [lat, long]
    

In [37]:
DF_R.to_dict(orient="record")

[{'businessname': '100 Percent Delicia Food',
  'address': '635   Hyde Park AVE',
  'city': 'Roslindale',
  'state': 'MA',
  'zip': '02131',
  'location': [42.27859, -71.11944],
  'violation_count': 91,
  '_id': '87059'},
 {'businessname': '1000 Degrees Pizza',
  'address': '55   Court ST',
  'city': 'Boston',
  'state': 'MA',
  'zip': '02108',
  'location': [42.359227, -71.058878],
  'violation_count': 7,
  '_id': '313440'},
 {'businessname': '1000 Washington Cafe',
  'address': '1000   Washington ST',
  'city': 'Roxbury',
  'state': 'MA',
  'zip': '02118',
  'location': [42.345875, -71.064496],
  'violation_count': 8,
  '_id': '34789'},
 {'businessname': '125 Nashua St. Cafe (MGH)',
  'address': '125  Nashua St',
  'city': 'Boston',
  'state': 'MA',
  'zip': '02114',
  'location': [42.36783, -71.06495],
  'violation_count': 1,
  '_id': '130844'},
 {'businessname': '150 Boylston St. Dining Room @ Emerson College',
  'address': '150   Boylston ST',
  'city': 'Boston',
  'state': 'MA',
