In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts

# Import business dataset from resources
biz1_df = pd.read_json('Resources/yelp_academic_dataset_business.json', lines=True)

# save the row data for 
biz1_df = biz1_df.loc[(biz1_df['name'] == "The Halal Guys") |               # 10
                      (biz1_df['name'] == "Chipotle Mexican Grill") |       # 183
                      (biz1_df['name'] == "Taco Bell") |                    # 313
                      (biz1_df['name'] == "McDonald's") |                   # 806
                      (biz1_df['name'] == "Panera Bread") |                 # 157
                      (biz1_df['name'] == "Five Guys Burgers and Fries") |  # 10
                      (biz1_df['name'] == "Five Guys") |                    # 99
                      (biz1_df['name'] == "Texas Roadhouse") |              # 24
                      (biz1_df['name'] == "Shake Shack")                    # 10
                       , :]  

# 'Five Guys' will need to combine with 'Five Guys Burgers and Fries'
biz1_df['name'] = biz1_df['name'].replace({"Five Guys Burgers and Fries":"Five Guys"})
biz1_df['name'].value_counts()

McDonald's                806
Taco Bell                 313
Chipotle Mexican Grill    183
Panera Bread              157
Five Guys                 109
Texas Roadhouse            24
Shake Shack                10
The Halal Guys             10
Name: name, dtype: int64

In [32]:
# save the business name, latitude, longitude, city, state, review_count, stars
biz2_df = biz1_df.loc[:, ["name", "latitude", "longitude", "city", "state", "review_count", "stars"]]
biz2_df.head()

Unnamed: 0,name,latitude,longitude,city,state,review_count,stars
11,Taco Bell,33.495194,-112.028588,Phoenix,AZ,18,3.0
298,Five Guys,33.378699,-112.031013,Phoenix,AZ,23,3.5
381,Panera Bread,33.248168,-111.839765,Chandler,AZ,21,2.5
542,Chipotle Mexican Grill,36.120095,-115.171769,Las Vegas,NV,115,3.5
628,Panera Bread,43.856858,-79.434457,Richmond Hill,ON,53,3.0


In [33]:
# Groupby resturant and star rating
biz3_df = biz2_df.loc[:,["name","stars","review_count"]]
biz3_df = biz3_df.groupby(["name","stars"]).agg({"review_count":"sum"})
# This also works: biz3_df = biz3_df.groupby(["name","stars"]).sum()
biz3_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,review_count
name,stars,Unnamed: 2_level_1
Chipotle Mexican Grill,1.0,3
Chipotle Mexican Grill,1.5,285
Chipotle Mexican Grill,2.0,316
Chipotle Mexican Grill,2.5,1978
Chipotle Mexican Grill,3.0,3769


In [34]:
# Pivot the name index (row headers) to a column header
biz3_df = biz3_df.unstack(0)
biz3_df.columns = biz3_df.columns.get_level_values(1)

# Data Munging: Fill in the NaN and combine 'Five Guys' with 'Five Guys Burgers and Fries'
biz3_df = biz3_df.fillna(0)

# This DataFrame shows the weighted average star ratings of each franchise from 1.0 to 5.0
biz3_df

name,Chipotle Mexican Grill,Five Guys,McDonald's,Panera Bread,Shake Shack,Taco Bell,Texas Roadhouse,The Halal Guys
stars,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,3.0,0.0,684.0,0.0,0.0,56.0,0.0,0.0
1.5,285.0,4.0,4759.0,113.0,0.0,552.0,0.0,0.0
2.0,316.0,0.0,5462.0,646.0,0.0,934.0,0.0,0.0
2.5,1978.0,67.0,2214.0,2346.0,0.0,1644.0,0.0,19.0
3.0,3769.0,738.0,912.0,2149.0,0.0,1139.0,242.0,344.0
3.5,2718.0,2637.0,230.0,1181.0,1774.0,470.0,3031.0,935.0
4.0,197.0,1473.0,54.0,32.0,3297.0,235.0,0.0,74.0
4.5,0.0,116.0,13.0,22.0,0.0,4.0,0.0,0.0
5.0,0.0,12.0,0.0,0.0,0.0,0.0,5.0,0.0


In [35]:
# Chipotle: Make Histograms for Above Visualization
CMG = biz3_df["Chipotle Mexican Grill"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='black', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("Chipotle Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/Chipotle_Bar_Chart.png")
plt.close()

In [36]:
# Five Guys
CMG = biz3_df["Five Guys"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='red', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("Five Guys Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/Five_Guys_Chart.png")
plt.close()

In [37]:
# McDonald's
CMG = biz3_df["McDonald's"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='yellow', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("McDonald's Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/McDonalds_Chart.png")
plt.close()

In [38]:
# Panera
CMG = biz3_df["Panera Bread"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='brown', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("Panera Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/Panera_Chart.png")
plt.close()

In [39]:
# Shake Shack
CMG = biz3_df["Shake Shack"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='green', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("Shake Shack Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/Shake_Shack_Chart.png")
plt.close()

In [40]:
# Taco Bell
CMG = biz3_df["Taco Bell"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='purple', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("Taco Bell Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/Taco_Bell_Chart.png")
plt.close()

In [41]:
# Texas Roadhouse
CMG = biz3_df["Texas Roadhouse"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='orange', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("Texas Roadhouse Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/Texas_Roadhouse_Chart.png")
plt.close()

In [42]:
# Halal Guys
CMG = biz3_df["The Halal Guys"].to_list()
CMG_xaxis = np.arange(len(CMG))
plt.bar(CMG_xaxis, CMG, color='pink', alpha=0.5, align='center');
ticks = [value for value in CMG_xaxis]
plt.xticks(ticks,biz3_df.index.to_list());
plt.title("Halal Guys Ratings Spread")
plt.xlabel("Star Rating")
plt.ylabel("Number of Reviews")

# Savfig and close display to save memory
plt.savefig("Output/Halal_Guys_Chart.png")
plt.close()