In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts


# Import business dataset from resources
biz1_df = pd.read_json('Resources/yelp_academic_dataset_business.json', lines=True)

# save the row data for 
biz1_df = biz1_df.loc[(biz1_df['name'] == "The Halal Guys") |               # 10
                      (biz1_df['name'] == "Chipotle Mexican Grill") |       # 183
                      (biz1_df['name'] == "Taco Bell") |                    # 313
                      (biz1_df['name'] == "McDonald's") |                   # 806
                      (biz1_df['name'] == "Panera Bread") |                 # 157
                      (biz1_df['name'] == "Five Guys Burgers and Fries") |  # 10
                      (biz1_df['name'] == "Five Guys") |                    # 99
                      (biz1_df['name'] == "Texas Roadhouse") |              # 24
                      (biz1_df['name'] == "Shake Shack")                    # 10
                       , :]  

# 'Five Guys' will need to combine with 'Five Guys Burgers and Fries'
biz1_df['name'] = biz1_df['name'].replace({"Five Guys Burgers and Fries":"Five Guys"})
biz1_df.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
11,2450 E Indian School Rd,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...",1Dfx3zM-rW4n-31KeC8sJg,"Restaurants, Breakfast & Brunch, Mexican, Taco...",Phoenix,"{'Monday': '7:0-0:0', 'Tuesday': '7:0-0:0', 'W...",1,33.495194,-112.028588,Taco Bell,85016,18,3.0,AZ
298,2340 E Baseline Rd,"{'RestaurantsReservations': 'False', 'Restaura...",vg4ynLZ0yJ89pT4s_CEMzQ,"Fast Food, Restaurants, Burgers, Food",Phoenix,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",1,33.378699,-112.031013,Five Guys,85042,23,3.5,AZ
381,88 E Ocotillo Rd,"{'BusinessAcceptsCreditCards': 'True', 'BikePa...",rtKB12BkQVb3x1e4diTIeA,"Food, Sandwiches, Restaurants, Soup, Bakeries,...",Chandler,"{'Monday': '6:0-21:0', 'Tuesday': '6:0-21:0', ...",0,33.248168,-111.839765,Panera Bread,85248,21,2.5,AZ
542,3475 Las Vegas Blvd S,"{'BusinessParking': '{'garage': True, 'street'...",dusNIzdCaH6EoLl2hRy6cQ,"Fast Food, Mexican, Restaurants",Las Vegas,"{'Monday': '10:0-0:0', 'Tuesday': '10:0-0:0', ...",1,36.120095,-115.171769,Chipotle Mexican Grill,89109,115,3.5,NV
628,9350 Yonge Street,"{'OutdoorSeating': 'True', 'Caters': 'True', '...",MPPlVchUxAKCS-Dy-tMGAg,"Salad, Bagels, Breakfast & Brunch, Sandwiches,...",Richmond Hill,"{'Monday': '6:0-22:0', 'Tuesday': '6:0-22:0', ...",1,43.856858,-79.434457,Panera Bread,L4C 5G2,53,3.0,ON


In [7]:
# Condense the above DataFrame as shown to include cities
biz4_df = pd.DataFrame(biz1_df[['name','city', 'review_count','stars']])
biz4_df.head()

Unnamed: 0,name,city,review_count,stars
11,Taco Bell,Phoenix,18,3.0
298,Five Guys,Phoenix,23,3.5
381,Panera Bread,Chandler,21,2.5
542,Chipotle Mexican Grill,Las Vegas,115,3.5
628,Panera Bread,Richmond Hill,53,3.0


In [13]:
# Double groupby to get a DataFrame of number of reviews for each franchise in each city
biz4_df = biz4_df.groupby(['name','city']).agg({'review_count':'sum'})
biz4_df = biz4_df.unstack(0)
biz4_df.columns = biz4_df.columns.get_level_values(1)
biz4_df = biz4_df.fillna(0)

# Counts the total review number by city (new column with values)
biz4_df['total review number'] = biz4_df.sum(axis=1)
biz4_df = biz4_df.sort_values(by='total review number', ascending=False)

biz4_df.head(10)

name,Chipotle Mexican Grill,Five Guys,McDonald's,Panera Bread,Shake Shack,Taco Bell,Texas Roadhouse,The Halal Guys,total review number
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Las Vegas,1636.0,837.0,3448.0,881.0,3631.0,1083.0,0.0,619.0,12135.0
Phoenix,1098.0,628.0,1519.0,757.0,386.0,565.0,623.0,108.0,5684.0
Charlotte,546.0,307.0,709.0,614.0,228.0,325.0,0.0,0.0,2729.0
Scottsdale,633.0,212.0,448.0,145.0,495.0,230.0,0.0,0.0,2163.0
Henderson,301.0,315.0,466.0,476.0,213.0,201.0,0.0,0.0,1972.0
Toronto,511.0,239.0,631.0,162.0,0.0,31.0,0.0,223.0,1797.0
Tempe,305.0,133.0,357.0,153.0,0.0,184.0,246.0,290.0,1668.0
Mesa,302.0,197.0,478.0,88.0,0.0,202.0,300.0,37.0,1604.0
Pittsburgh,399.0,154.0,243.0,330.0,0.0,119.0,84.0,0.0,1329.0
Gilbert,280.0,13.0,346.0,48.0,0.0,128.0,411.0,0.0,1226.0


In [16]:
# For each franchise, save lists of Top 10 Cities by numbers of Reviews
biz4_df = biz4_df.sort_values(by='Chipotle Mexican Grill', ascending=False)
chipotle = biz4_df.index

biz4_df = biz4_df.sort_values(by='Five Guys', ascending=False)
fiveguys = biz4_df.index

biz4_df = biz4_df.sort_values(by='Five Guys', ascending=False)
fiveguys = biz4_df.index


Index(['Las Vegas', 'Phoenix', 'Henderson', 'Charlotte', 'Toronto',
       'Scottsdale', 'Mesa', 'Calgary', 'Pittsburgh', 'North Las Vegas',
       ...
       'Coraopolis', 'East Cleveland', 'Waunakee', 'Carnegie', 'Mahomet',
       'Dallas', 'Stallings', 'Monongahela', 'Berea', 'Clarkson'],
      dtype='object', name='city', length=240)