# Eat Safe, Love

## Notebook Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database
print(db.list_collection_names())

['establishments']


In [5]:
# assign the collection to a variable
establishments = db['establishments']

In [6]:
# pull an example document to have exact field names and schema available for the following queries
establishments.find_one()

{'_id': ObjectId('64f50545b816a17dee6a0751'),
 'BusinessName': 'Penang Flavours',
 'BusinessType': 'Restaurant/Cafe/Canteen',
 'BusinessTypeID': 1,
 'AddressLine1': 'Penang Flavours',
 'AddressLine2': '146A Plumstead Rd',
 'AddressLine3': 'London',
 'AddressLine4': '',
 'PostCode': 'SE18 7DY',
 'Phone': '',
 'LocalAuthorityCode': '511',
 'LocalAuthorityName': 'Greenwich',
 'LocalAuthorityWebSite': 'http://www.royalgreenwich.gov.uk',
 'LocalAuthorityEmailAddress': 'health@royalgreenwich.gov.uk',
 'scores': {'Hygiene': '', 'Structural': '', 'ConfidenceInManagement': ''},
 'SchemeType': 'FHRS',
 'geocode': {'longitude': 0.08384, 'latitude': 51.490142},
 'RightToReply': '',
 'Distance': 4623.972328074718,
 'NewRatingPending': True,
 'RatingValue': None}

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [7]:
# Find the establishments with a hygiene score of 20
hygiene_query = {"scores.Hygiene": 20}

# Use count_documents to display the number of documents in the result
print("Number of Establishments With a Hygiene Score of 20:", establishments.count_documents(hygiene_query))
print()

# Display the first document in the results using pprint
pprint(establishments.find_one(hygiene_query))

Number of Establishments With a Hygiene Score of 20: 0

None


In [8]:
# Convert the result to a Pandas DataFrame
hygiene_data = establishments.find(hygiene_query)
hygiene_df = pd.DataFrame(hygiene_data)

# Pretty print the first two results
limit = 2
hresults = establishments.find(hygiene_query).limit(limit)

for hresult in hresults:
    pprint(hresult)

# Display the number of rows in the DataFrame
print()
print("Number of Rows in the Hygiene DataFrame:", hygiene_df.shape[0])

# Display the first 10 rows of the DataFrame
hygiene_df.head(10)


Number of Rows in the Hygiene DataFrame: 0


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [10]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
london_query = {"LocalAuthorityName": {"$regex": "London"}, "RatingValue": {"$gte": 4}}

# Use count_documents to display the number of documents in the result
print("Num of Establishments in London With Rating Value 4 or higher:", establishments.count_documents(london_query))
print()

# Display the first document in the results using pprint
ldata = establishments.find(london_query)

for i in range(1):
    pprint(ldata[i])

Num of Establishments in London With Rating Value 4 or higher: 0



IndexError: no such item for Cursor instance

In [None]:
# Convert the result to a Pandas DataFrame
london_df = pd.DataFrame(ldata)

# Display the number of rows in the DataFrame
print("Number of Rows in the London DataFrame:", london_df.shape[0])

# Display the first 10 rows of the DataFrame
london_df.head(10)

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [11]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score
degree_search = 0.01
latitude = 51.49014200
longitude = 0.08384000

top5_nearby_query = {'geocode.latitude': {'$gte':latitude-degree_search, '$lte': latitude+degree_search},
                     'geocode.longitude': {'$gte':longitude-degree_search, '$lte': longitude+degree_search},
                     'RatingValue': 5}
sort = [('score.Hygiene', 1)]
fields = {'FHRSID':1,'BusinessName':1, 'BusinessType':1, 'geocode.latitude':1, 'geocode.longitude':1, 'scores.Hygiene':1, 'RatingValue':1}
limit = 5

# Print the results
top5_nearby_results = establishments.find(top5_nearby_query, fields).sort(sort).limit(limit)
for top5_result in top5_nearby_results:
    pprint(top5_result)

In [12]:
# Convert result to Pandas DataFrame
top5_nearby_results = establishments.find(top5_nearby_query, fields).sort(sort).limit(limit)
top5_df = pd.DataFrame(top5_nearby_results)
print(top5_df.shape)
top5_df.head()

(0, 0)


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [14]:
# Create a pipeline that: 
# 1. Matches establishments with a hygiene score of 0
# 2. Groups the matches by Local Authority
# 3. Sorts the matches from highest to lowest

# Print the number of documents in the result

# Print the first 10 results

bad_hygiene_query = [
    {"$match": 
        {"scores.Hygiene": 0}
    },
    {"$group": 
        {'_id': "$LocalAuthorityName",
         "num_estabs": {"$sum": 1}
        }
    },
    {"$sort": {"num_estabs": -1}}
]

bad_hygiene_data = list(establishments.aggregate(bad_hygiene_query))
print(f"There are {len(bad_hygiene_data)} Local Authorities with at least one Restaurant with a Hygiene Score of 0")
print()

pprint(bad_hygiene_data[0:10])

There are 0 Local Authorities with at least one Restaurant with a Hygiene Score of 0

[]


In [15]:
# Convert the result to a Pandas DataFrame
bad_hygiene_df = pd.DataFrame(bad_hygiene_data)

# Display the number of rows in the DataFrame
print(bad_hygiene_df.shape)

# Display the first 10 rows of the DataFrame
bad_hygiene_df.head(10)

(0, 0)
