In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import MaxNLocator

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
nyc=pd.read_csv('/kaggle/input/dohmh-new-york-city-restaurant-inspection-2023/DOHMH_New_York_City_Restaurant_Inspection_Results.csv')

# **Original Data Processessing**

In [None]:
# Filter out restaurants that are after 2022-01-01
nyc['INSPECTION DATE'] = pd.to_datetime(nyc['INSPECTION DATE'])
filtered_nyc = nyc.loc[nyc['INSPECTION DATE'] > '2022-01-01']

In [None]:
# Filter out restaurants that have no zipcode location
filtered_nyc = filtered_nyc.dropna(subset=['ZIPCODE'])

In [None]:
# Download the new,filtered dataframe
filtered_nyc.to_csv('nyc-restaurant-inspection-filtered.csv')

In [None]:
# Load in the new dataframe
df_nyc = pd.read_csv("/kaggle/working/nyc-restaurant-inspection-filtered.csv")

In [None]:
# Extract the latitudes and longitudes of restaurants
latitudes = df_nyc['Latitude']
longitudes = df_nyc['Longitude']
score = df_nyc['SCORE']

# Create list of tuples using zip()
data = list(zip(latitudes, longitudes, score))

# Create DataFrame from list of tuples
df = pd.DataFrame(data, columns=['Latitudes', 'Longitudes', 'Score'])

In [None]:
df_closed = df_nyc.loc[(df_nyc['ACTION'] == 'Establishment Closed by DOHMH. Violations were cited in the following area(s) and those requiring immediate action were addressed.') | (df_nyc['ACTION'] == 'Establishment re-closed by DOHMH.')]

In [None]:
# Extract the latitudes and longitudes of closed restaurants
latitudes_closed = df_closed['Latitude']
longitudes_closed = df_closed['Longitude']

# Create list of tuples using zip()
data_closed = list(zip(latitudes_closed, longitudes_closed))

# Create DataFrame from list of tuples
df_closed = pd.DataFrame(data_closed, columns=['Latitudes', 'Longitudes'])

# **Core Data Analysis**

In [None]:
boros = df_nyc.loc[df_nyc['BORO'] != '0']
boro_inspections = boros['BORO'].value_counts()


# plot a pie chart
plt.gca().axis("equal")
pie = plt.pie(boro_inspections, startangle=0, autopct='%1.0f%%', pctdistance=0.9, radius=1.2)
labels=boro_inspections.index
plt.title('Restaurant Distribution by Borough', weight='bold', size=14)
plt.legend(pie[0],labels, bbox_to_anchor=(1,0.5), loc="center right", fontsize=10, 
           bbox_transform=plt.gcf().transFigure)
plt.subplots_adjust(left=0.0, bottom=0.1, right=0.85)

plt.show()
plt.clf()
plt.close()


In [None]:
import plotly.express as px

fig = px.scatter_mapbox(df, lat="Latitudes", lon="Longitudes", zoom=10, height=500, title = 'Restaurant Distribution by Location')
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [None]:
import plotly.express as px

fig = px.scatter_mapbox(df_closed, lat="Latitudes", lon="Longitudes", zoom=10, height=500, title = 'Closed Restaurant Distribution by Location')
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [None]:
import plotly.express as px

fig = px.scatter_mapbox(df, lat="Latitudes", lon="Longitudes", color=score, color_continuous_scale=px.colors.sequential.Pinkyl, zoom=10, height=500, title = 'Restaurant Inspection Scores by Location')
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [None]:
boros_closed = df_closed.loc[df_closed['BORO'] != '0']
boro_closed_counts = boros_closed['BORO'].value_counts()


# plot a pie chart
plt.gca().axis("equal")
pie = plt.pie(boro_closed_counts, startangle=0, autopct='%1.0f%%', pctdistance=0.9, radius=1.2)
labels=boro_closed_counts.index
plt.title('Closed Restaurant Distribution by Borough', weight='bold', size=14)
plt.legend(pie[0],labels, bbox_to_anchor=(1,0.5), loc="center right", fontsize=10, 
           bbox_transform=plt.gcf().transFigure)
plt.subplots_adjust(left=0.0, bottom=0.1, right=0.85)

plt.show()
plt.clf()
plt.close()

In [None]:
#categorize by percent
boros_closed_percent =  boro_closed_counts / boro_inspections
boros_closed_percent = boros_closed_percent.to_frame()


In [None]:
#Citation for graph design https://towardsdatascience.com/5-steps-to-build-beautiful-bar-charts-with-python-3691d434117a

#Find the average
average = boro_closed_counts.sum() / boro_inspections.sum()
below_average = boros_closed_percent[boros_closed_percent['BORO']<average]
above_average = boros_closed_percent[boros_closed_percent['BORO']>=average]

# Colours
colors_high = ["#ff5a5f", "#c81d25"] # Extreme colours of the high scale
colors_low = ["#2196f3","#bbdefb"] # Extreme colours of the low scale

# Create the figure and axes objects, specify the size and the dots per inches 
fig, ax = plt.subplots(figsize=(13.33,7.5), dpi = 96)

# Create the grid 
ax.grid(which="major", axis='x', color='#DAD8D7', alpha=0.5, zorder=1)
ax.grid(which="major", axis='y', color='#DAD8D7', alpha=0.5, zorder=1)

# Reformat x-axis label and tick labels
ax.set_xlabel('Borough', fontsize=12, labelpad=10) # No need for an axis label
ax.xaxis.set_label_position("bottom")

# Reformat y-axis
ax.set_ylabel('Percent of Restaurants Closed Down', fontsize=12, labelpad=10)
ax.yaxis.set_label_position("left")
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.yaxis.set_tick_params(pad=2, labeltop=False, labelbottom=True, bottom=False, labelsize=12)

# Remove the spines
ax.spines[['top','left','bottom']].set_visible(False)

# Make the left spine thicker
ax.spines['right'].set_linewidth(1.1)

# Add in red line and rectangle on top
ax.plot([0.12, .9], [.98, .98], transform=fig.transFigure, clip_on=False, color='#E3120B', linewidth=.6)
ax.add_patch(plt.Rectangle((0.12,.98), 0.04, -0.02, facecolor='#E3120B', transform=fig.transFigure, clip_on=False, linewidth = 0))

# Add in title and subtitle
ax.text(x=0.12, y=.93, s="Percent of Restaurants Closed in Each Borough", transform=fig.transFigure, ha='left', fontsize=14, weight='bold', alpha=.8)
ax.text(x=0.12, y=.90, s="Calculated as a percent of all inspections which resulted in closing down a restaurant in that borough in 2023", transform=fig.transFigure, ha='left', fontsize=12, alpha=.8)

# Set source text
ax.text(x=0.1, y=0.12, s="Source: Department of Health and Mental Hygiene (DOHMH) May 7, 2023 ", transform=fig.transFigure, ha='left', fontsize=10, alpha=.7)

# Adjust the margins around the plot area
plt.subplots_adjust(left=None, bottom=0.2, right=None, top=0.85, wspace=None, hspace=None)

# Set a white background
fig.patch.set_facecolor('white')

# Colormap - Build the colour maps
cmap_low = mpl.colors.LinearSegmentedColormap.from_list("low_map", colors_low, N=256)
cmap_high = mpl.colors.LinearSegmentedColormap.from_list("high_map", colors_high, N=256)
norm_low = mpl.colors.Normalize(below_average['BORO'].min(), average) # linearly normalizes data into the [0.0, 1.0] interval
norm_high = mpl.colors.Normalize(average, above_average['BORO'].max())

# Plot bars and average (horizontal) line
bar1 = ax.bar(below_average.index, below_average['BORO'], color=cmap_low(norm_low(below_average['BORO'])), width=0.6, label='Below Average', zorder=2)
bar2 = ax.bar(above_average.index, above_average['BORO'], color=cmap_high(norm_high(above_average['BORO'])), width=0.6, label='Above Average', zorder=2)
plt.axhline(y=average, color = 'grey', linewidth=3)

# Add label on top of each bar
ax.bar_label(bar1, labels=[f'{round(e, 4)}' for e in below_average['BORO']], padding=3, color='black', fontsize=8) 
ax.bar_label(bar2, labels=[f'{round(e, 4)}' for e in above_average['BORO']], padding=3, color='black', fontsize=8) 

# Determine the y-limits of the plot
ymin, ymax = ax.get_ylim()
# Calculate a suitable y position for the text label
y_pos = average/ymax + 0.03
# Annotate the average line
ax.text(0.88, y_pos, f'Percent Across Entire City = {average:f}', ha='right', va='center', transform=ax.transAxes, size=8, zorder=3)

# Add legend
ax.legend(loc="best", ncol=2, bbox_to_anchor=[1, 1.07], borderaxespad=0, frameon=False, fontsize=8)

In [None]:
styles_inspections = df_nyc['CUISINE DESCRIPTION'].value_counts()
styles_inspections = styles_inspections.to_dict()

# group together all elements in the dictionary whose value is less than 50
# name this group 'All the rest'
import itertools
newdic_violations={}
for key, group in itertools.groupby(styles_inspections, lambda k: 'Other' if (styles_inspections[k]<2500) else k):
     newdic_violations[key] = sum([styles_inspections[k] for k in list(group)])   

labels = newdic_violations.keys()
sizes = newdic_violations.values()

fig, ax = plt.subplots()

ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=0)
ax.axis('equal')
plt.tight_layout()
plt.title('Restaurant Cuisine Distribution', weight='bold', size=14)

plt.show()





In [None]:
styles_closed = df_closed['CUISINE DESCRIPTION'].value_counts()
styles_closed = styles_closed.to_dict()

# group together all elements in the dictionary whose value is less than 50
# name this group 'All the rest'
import itertools
newdic_closed={}
for key, group in itertools.groupby(styles_closed, lambda k: 'Other' if (styles_closed[k]<130) else k):
     newdic_closed[key] = sum([styles_closed[k] for k in list(group)])   

labels = newdic_closed.keys()
sizes = newdic_closed.values()

fig, ax = plt.subplots()

ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=0)
ax.axis('equal')
plt.tight_layout()
plt.title('Restaurant Cuisine Closures Distribution', weight='bold', size=14)

plt.show()


In [None]:
newdic_percent = {}
styles = list(newdic_violations.keys())
styles = styles + ["Indian", "Thai", "Japanese"]
for i in styles:
    if i != "Other":
        newdic_percent[i] = styles_closed[i] / styles_inspections[i]

styles_closed_percent = pd.Series(newdic_percent).to_frame()

#Citation for graph design https://towardsdatascience.com/5-steps-to-build-beautiful-bar-charts-with-python-3691d434117a

#Find the average
average = df_closed['CUISINE DESCRIPTION'].value_counts().sum() / df_nyc['CUISINE DESCRIPTION'].value_counts().sum()
below_average = styles_closed_percent[styles_closed_percent[0]<average]
above_average = styles_closed_percent[styles_closed_percent[0]>=average]

# Colours
colors_high = ["#ff5a5f", "#c81d25"] # Extreme colours of the high scale
colors_low = ["#2196f3","#bbdefb"] # Extreme colours of the low scale

# Create the figure and axes objects, specify the size and the dots per inches 
fig, ax = plt.subplots(figsize=(35,12.5), dpi = 96)
plt.rcParams.update({'font.size': 13})


# Create the grid 
ax.grid(which="major", axis='x', color='#DAD8D7', alpha=0.5, zorder=1)
ax.grid(which="major", axis='y', color='#DAD8D7', alpha=0.5, zorder=1)

# Reformat x-axis label and tick labels
ax.set_xlabel('Cuisine Style', fontsize=24, labelpad=10)
ax.xaxis.set_label_position("bottom")

# Reformat y-axis
ax.set_ylabel('Percent of Restaurants Closed Down', fontsize=24, labelpad=10)
ax.yaxis.set_label_position("left")
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.yaxis.set_tick_params(pad=2, labeltop=False, labelbottom=True, bottom=False, labelsize=24)

# Remove the spines
ax.spines[['top','left','bottom']].set_visible(False)

# Make the left spine thicker
ax.spines['right'].set_linewidth(1.1)

# Add in red line and rectangle on top
ax.plot([0.12, .9], [.98, .98], transform=fig.transFigure, clip_on=False, color='#E3120B', linewidth=.6)
ax.add_patch(plt.Rectangle((0.12,.98), 0.04, -0.02, facecolor='#E3120B', transform=fig.transFigure, clip_on=False, linewidth = 0))

# Add in title and subtitle
ax.text(x=0.12, y=.93, s="Percent of Restaurants Closed in Each Style", transform=fig.transFigure, ha='left', fontsize=28, weight='bold', alpha=.8)
ax.text(x=0.12, y=.90, s="Calculated as a percent of all inspections which resulted in closing down a restaurant in that style of cuisine in 2023", transform=fig.transFigure, ha='left', fontsize=24, alpha=.8)

# Set source text
ax.text(x=0.1, y=0.12, s="Source: Department of Health and Mental Hygiene (DOHMH) May 7, 2023 ", transform=fig.transFigure, ha='left', fontsize=20, alpha=.7)

# Adjust the margins around the plot area
plt.subplots_adjust(left=None, bottom=0.2, right=None, top=0.85, wspace=None, hspace=None)

# Set a white background
fig.patch.set_facecolor('white')

# Colormap - Build the colour maps
cmap_low = mpl.colors.LinearSegmentedColormap.from_list("low_map", colors_low, N=256)
cmap_high = mpl.colors.LinearSegmentedColormap.from_list("high_map", colors_high, N=256)
norm_low = mpl.colors.Normalize(below_average[0].min(), average) # linearly normalizes data into the [0.0, 1.0] interval
norm_high = mpl.colors.Normalize(average, above_average[0].max())

# Plot bars and average (horizontal) line
bar1 = ax.bar(below_average.index, below_average[0], color=cmap_low(norm_low(below_average[0])), width=0.6, label='Below Average', zorder=2)
bar2 = ax.bar(above_average.index, above_average[0], color=cmap_high(norm_high(above_average[0])), width=0.6, label='Above Average', zorder=2)
plt.axhline(y=average, color = 'grey', linewidth=3)

# Add label on top of each bar
ax.bar_label(bar1, labels=[f'{round(e, 4)}' for e in below_average[0]], padding=3, color='black', fontsize=18) 
ax.bar_label(bar2, labels=[f'{round(e, 4)}' for e in above_average[0]], padding=3, color='black', fontsize=18) 

# Determine the y-limits of the plot
ymin, ymax = ax.get_ylim()
# Calculate a suitable y position for the text label
y_pos = average/ymax + 0.03
# Annotate the average line
ax.text(0.88, y_pos, f'Percent Across Entire City = {average:f}', ha='right', va='center', transform=ax.transAxes, size=16, zorder=3)

# Add legend
ax.legend(loc="best", ncol=2, bbox_to_anchor=[1, 1.07], borderaxespad=0, frameon=False, fontsize=16)

# ****Income and Volunteer Zip Code Analysis****

In [None]:
income_df = pd.read_csv('/kaggle/input/us-household-income-stats-geo-locations/kaggle_income.csv', encoding='latin-1')

In [None]:
volunteering_df = pd.read_csv('/kaggle/input/social-capital-data-zip-code/social_capital_zip (1).csv', encoding='latin-1')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

total_zipcodes = df_nyc['ZIPCODE'].value_counts().index

#process the income data, the volunteering data, and the inspection score data
zipcode_scores_y = []
zipcode_income_x = []
zipcode_volunteering_x = []
for zipcode in total_zipcodes:
    incomes = income_df[income_df['Zip_Code'] == zipcode]['Mean']
    volunteering = volunteering_df[volunteering_df['zip'] == zipcode]['volunteering_rate_zip']
    scores = df_nyc[df_nyc['ZIPCODE'] == zipcode]['SCORE']
    if len(scores) > 0 and len(incomes) > 0 and len(volunteering) > 0:
        avg_score = scores.mean()
        zipcode_scores_y.append(avg_score)
        avg_income = incomes[incomes != 0].mean()
        zipcode_income_x.append(avg_income)
        zipcode_volunteering_x.append(volunteering.iloc[0])

In [None]:
#plot the income data
plt.axhline(y=13, xmin=0, xmax=max(zipcode_income_x), label='Maximum for an A', color = "red")
plt.axhline(y=27, xmin=0, xmax=max(zipcode_income_x), label='Maximum for an B', color = "blue")
plt.scatter(zipcode_income_x, zipcode_scores_y, c ="green")
plt.title('Zipcode Mean Income vs. Average Inspection Score', fontweight='bold')
plt.xlabel('Mean Zip Code Income', fontweight='bold')
plt.ylabel('Average Inspection Score', fontweight='bold')
plt.legend()
plt.show()

In [None]:
#plot the volunteering data
plt.axhline(y=13,label='Maximum for an A', color = "red")
plt.axhline(y=27,label='Maximum for an B', color = "blue")
plt.scatter(zipcode_volunteering_x, zipcode_scores_y, c ="green")
plt.title('Zipcode Mean Volunteering Rate vs. Average Inspection Score', fontweight='bold')
plt.xlabel('Mean Zip Code Facebook Volunteering Rate', fontweight='bold')
plt.ylabel('Average Inspection Score', fontweight='bold')
plt.legend()
plt.show()

**Nutrition**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the FastFood_Nutrition.csv dataset
df_nutrition = pd.read_csv('/kaggle/input/fastfood-nutrition/fastfood.csv')

# Create a scatter plot to visualize the calorie content versus total fat content of different fast food items
plt.figure(figsize=(10,6))
plt.scatter(df_nutrition['total_fat'], df_nutrition['calories'], color='blue', alpha=0.5)
plt.xlabel('Total Fat (g)')
plt.ylabel('Calories')
plt.title('Calorie Content versus Total Fat Content of Fast Food Items')
plt.show()

In [None]:
import pandas as pd

nutrition_data = pd.read_csv('/kaggle/input/fastfood-nutrition/fastfood.csv')
top_restaurants = pd.read_csv('/kaggle/input/top-50-fastfood-chains-in-usa/Top 50 Fast-Food Chains in USA.csv')

#plot 1 average fat

# rename the "fast-food chains" column to "restaurant"
top_restaurants = top_restaurants.rename(columns={"Fast-Food Chains": "restaurant"})

# Merge the two datasets based on the "restaurant" column
merged_data = pd.merge(nutrition_data, top_restaurants, on='restaurant')

# Group by restaurant and calculate the mean total fat content for each chain
mean_fat_by_restaurant = merged_data.groupby('restaurant')['total_fat'].mean()

# Sort the mean values in descending order
sorted_mean_fat = mean_fat_by_restaurant.sort_values(ascending=False)

# Plot the mean total fat content for each restaurant chain
sorted_mean_fat.plot(kind='bar', figsize=(12,6), xlabel='Restaurant Chain', ylabel='Mean Total Fat (g)', title='Mean Total Fat Content by Restaurant Chain')
plt.show()
#plot 2 average sales

# Group by restaurant and calculate the mean total sales for each unit
mean_sales_by_restaurant = merged_data.groupby('restaurant')['Average Sales per Unit (Thousands - U.S Dollars)'].mean()

# Sort the mean values in descending order
sorted_mean_fat = mean_sales_by_restaurant.sort_values(ascending=False)

# Plot the mean total sales content for each restaurant chain
mean_sales_by_restaurant.plot(kind='bar', figsize=(12,6), xlabel='Restaurant Chain', ylabel='Average Sales per Location (in Thousands of Dollars)', title='Average Sales Nationally by Restaurant Chain')
plt.show()
#plot 3 scatter plot combined

plt.figure(figsize=(10,6))
plt.scatter(mean_fat_by_restaurant, mean_sales_by_restaurant, color='blue', alpha=0.5)
plt.xlabel('Mean Fat Content')
plt.ylabel('Average Sales per Location (in Thousands of Dollars)')
plt.title('Average Fat Content vs. Average Sales per Location')
plt.show()

**Burger King Inspection Data**

In [None]:
# Looking into the fast food restaurants with high total fat and their inspection results
import pandas as pd
import matplotlib.pyplot as plt

# Retrieve Burger King inspection in NYC
bk_insp = df_nyc[df_nyc['DBA'] == 'BURGER KING']

# Retrieve summary of inspection action
bk_vio = bk_insp['ACTION'].value_counts()

# Retrieve summary of inspection action
bk_boro = bk_insp['BORO'].value_counts()

# -------------Plot 1---------------------
# Plot boro distribution of Burger Kings
plt.pie(bk_boro, startangle=0, autopct='%1.0f%%', pctdistance=0.9, radius=1.2, labels=bk_boro.index)

# Add title
plt.title('Distribution of Burger King throughout NYC')

# Show the plot
plt.show()

**Taco Bell Inspection Data**

In [None]:
# Looking into the fast food restaurants with high total fat and their inspection results
import pandas as pd
import matplotlib.pyplot as plt

# Retrieve Burger King inspection in NYC
tb_insp = df_nyc[df_nyc['DBA'] == 'TACO BELL']

# Retrieve summary of inspection action
tb_vio = tb_insp['ACTION'].value_counts()

# Retrieve summary of inspection action
tb_boro = tb_insp['BORO'].value_counts()


# -------------Plot 2---------------------
# Plot boro distribution of Burger Kings
plt.pie(tb_boro, startangle=0, autopct='%1.0f%%', pctdistance=0.9, radius=1.2, labels=tb_boro.index)

# Add title
plt.title('Distribution of Taco Bell throughout NYC')

# Show the plot
plt.show()

**Subway Inspection Data**

In [None]:
# Looking into the fast food restaurants with high total fat and their inspection results
import pandas as pd
import matplotlib.pyplot as plt

# Retrieve Burger King inspection in NYC
s_insp = df_nyc[df_nyc['DBA'] == 'SUBWAY']

# Retrieve summary of inspection action
s_vio = s_insp['ACTION'].value_counts()

# Retrieve summary of inspection action
s_boro = s_insp['BORO'].value_counts()

# -------------Plot 3---------------------
# Plot boro distribution of Burger Kings
plt.pie(s_boro, startangle=0, autopct='%1.0f%%', pctdistance=0.9, radius=1.2, labels=tb_boro.index)

# Add title
plt.title('Distribution of Subway throughout NYC')

# Show the plot
plt.show()

In [None]:
print(bk_vio)

In [None]:
print(tb_vio)

In [None]:
print(s_vio)

In [None]:
violation_percentage = []

In [None]:
bk_violation_percentage = 367/(367+2)
print(bk_violation_percentage)
violation_percentage.append(bk_violation_percentage)

In [None]:
tb_violation_percentage = 175/(175+4)
print(tb_violation_percentage)
violation_percentage.append(tb_violation_percentage)

In [None]:
s_violation_percentage = (937+10)/(937+10+4+4)
violation_percentage.append(s_violation_percentage)

In [None]:
v_per = pd.Series(violation_percentage, index = ['Burger King', 'Taco Bell', 'Subway'])
ax = v_per.plot.bar()
# set the y-axis limits
plt.ylim(0.95, 1)

for i in ax.containers:
    ax.bar_label(i, label_type='edge', fontsize=10, padding=2)

# Add labels and title
plt.xlabel('Fast Food Restaurant')
plt.ylabel('Violation Percentage')
plt.title('Violation Percentage for NYC Fast Food')

# Show the plot
plt.show()

In [None]:
mean_fat_by_restaurant = mean_fat_by_restaurant.drop('Dairy Queen')

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(mean_fat_by_restaurant, v_per, color='blue', alpha=0.5)
plt.xlabel('Mean Fat Content')
plt.ylabel('Violation percentage per fast food')
plt.title('Average Fat Content vs. Violation Percentage per Fast Food')
plt.show()

In [None]:
mean_sales_by_restaurant = mean_sales_by_restaurant.drop('Dairy Queen')

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(mean_sales_by_restaurant, v_per, color='blue', alpha=0.5)
plt.xlabel('Average Sales per Location (in Thousands of Dollars) ')
plt.ylabel('Violation percentage per fast food')
plt.title('Average Sales per Location vs. Violation Percentage per Fast Food')
plt.show()