# 레스토랑 정보 클리닝

In [4]:
import graphlab as gl
import pandas as pd
import sys
import numpy as np
import json

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [5]:
rt = gl.SFrame('./dataset/yelp_academic_dataset_business/')

## 1. 미국 내 레스토랑 정보만 가져온다.

In [6]:
## 원래 정보 갯수
rt.num_rows()

188593

In [7]:
# http://en.wikipedia.org/wiki/Extreme_points_of_the_United_States#Westernmost
def is_america(i):
    top = 49.3457868 # north lat
    left = -124.7844079 # west long
    right = -66.9513812 # east long
    bottom =  24.7433195 # south lat
    
    lat = i['latitude']
    lon = i['longitude']
    return (bottom <= lat <= top and left <= lon <= right)

In [8]:
## 조금 줄였다.
rt = rt[rt.apply( lambda x: is_america(x))]
rt.num_rows()

180276

## 2. 일별 근무 시간을 컬럼별로 쪼갠다.

In [9]:
dates = ['Sunday', 'Monday', 'Tuesday', 'Thursday', 'Friday', 'Saturday']
for i in dates:
    rt[i] = rt.apply(lambda x: x['hours'] and x['hours'].get(i) or '')
rt.head()

address,attributes,business_id,categories
,"{'GoodForMeal': ""{'dessert': False, ...",AjEbIBw6ZFfln7ePHha9PA,"Chicken Wings, Burgers, Caterers, Street Vend ..."
1335 rue Beaubien E,"{'GoodForMeal': ""{'dessert': False, ...",O8S5hYJ1SMc8fA4QBtVujA,"Breakfast & Brunch, Restaurants, French, ..."
211 W Monroe St,,bFzdJJ3wp3PZssNEsyU23g,"Insurance, Financial Services ..."
"20235 N Cave Creek Rd, Ste 1115 ...","{'BusinessParking': ""{'garage': False, ...",45bWSZtniwPRiqlivpS8Og,"Coffee & Tea, Food"
631 Bloor St W,"{'RestaurantsPriceRange2' : '1', 'BusinessParki ...",9A2quhZLyWk0akUetBd8hQ,"Food, Bakeries"
"3417 Derry Road E, Unit 103 ...","{'Alcohol': 'none', 'GoodForKids': 'True', ...",6OuOZAok8ikONMS_T3EzXg,"Restaurants, Thai"
1440 N. Dysart Ave,"{'GoodForMeal': ""{'dessert': False, ...",8-NRKkPY1UiFXW20WXKiXg,"Mexican, Restaurants"
209 Oakland Ave,"{'BusinessAcceptsCreditCa rds': 'True', ...",UTm5QZThPQlT35mkAcGOjg,"Flowers & Gifts, Gift Shops, Shopping ..."
4568 Highway 7 E,"{'RestaurantsAttire': 'casual', 'NoiseLevel': ...",KapTdGyGs7RK0c68Z6hhhg,"Restaurants, Japanese"
595 Markham Street,"{'GoodForMeal': ""{'dessert': False, ...",tZnSodhPwNr4bzrwJ1CSbw,"Cajun/Creole, Southern, Restaurants ..."

city,hours,is_open,latitude,longitude,name
Henderson,"{'Sunday': '17:0-23:0', 'Friday': '17:0-23:0', ...",0,35.9607337,-114.939821,CK'S BBQ & Catering
Montréal,"{'Monday': '10:0-22:0', 'Tuesday': '10:0-22:0', ...",0,45.5405031,-73.5993003,La Bastringue
Phoenix,,1,33.4499993,-112.0769793,Geico Insurance
Phoenix,"{'Monday': '5:30-20:0', 'Tuesday': '5:30-20:0', ...",1,33.6713751,-112.0300171,The Coffee Bean & Tea Leaf ...
Toronto,,0,43.6643776,-79.4144238,Bnc Cake House
Mississauga,,1,43.7129464,-79.6327631,Thai One On
Avondale,"{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', ...",1,33.4481059352,-112.341302074,Filiberto's Mexican Food
Pittsburgh,"{'Monday': '9:0-18:0', 'Tuesday': '9:0-18:0', ...",1,40.4414214,-79.9564571,Maggie & Stella's Gifts
Markham,"{'Monday': '11:30-23:0', 'Tuesday': '11:30-23:0', ...",0,43.862484,-79.3069597,Sushi 8
Toronto,"{'Tuesday': '17:0-1:0', 'Friday': '17:0-1:0', ...",0,43.6641249,-79.4118861,Southern Accent Restaurant ...

neighborhood,postal_code,review_count,stars,state,Sunday,Monday,Tuesday
,89002,3,4.5,NV,17:0-23:0,,
Rosemont-La Petite-Patrie,H2G 1K7,5,4.0,QC,10:0-22:0,10:0-22:0,10:0-22:0
,85003,8,1.5,AZ,,,
,85024,63,4.0,AZ,6:30-19:0,5:30-20:0,5:30-20:0
Koreatown,M6G 1K8,7,4.0,ON,,,
Ridgewood,L4T 1A8,7,2.0,ON,,,
,85323,40,2.5,AZ,0:0-0:0,0:0-0:0,0:0-0:0
Oakland,15213,3,3.5,PA,,9:0-18:0,9:0-18:0
Unionville,L3R 1M5,12,1.5,ON,12:0-23:0,11:30-23:0,11:30-23:0
Palmerston,M6G 2L7,146,4.0,ON,17:0-1:0,,17:0-1:0

Thursday,Friday,Saturday
,17:0-23:0,17:0-23:0
10:0-22:0,10:0-22:0,10:0-22:0
,,
5:30-20:0,5:30-21:0,5:30-21:0
,,
,,
0:0-0:0,0:0-0:0,0:0-0:0
9:0-18:0,9:0-17:0,10:0-17:0
11:30-23:0,11:30-23:0,11:30-23:0
17:0-1:0,17:0-1:0,17:0-1:0


## 3. 추가 속성 정보 중 필요한 정보를 가져온다.

In [10]:
rt[0].get('attributes')

{'Alcohol': 'none',
 'BikeParking': 'False',
 'BusinessAcceptsCreditCards': 'True',
 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': True, 'valet': False}",
 'Caters': 'True',
 'DogsAllowed': 'True',
 'DriveThru': 'False',
 'GoodForKids': 'True',
 'GoodForMeal': "{'dessert': False, 'latenight': False, 'lunch': False, 'dinner': False, 'breakfast': False, 'brunch': False}",
 'HasTV': 'False',
 'OutdoorSeating': 'True',
 'RestaurantsAttire': 'casual',
 'RestaurantsDelivery': 'False',
 'RestaurantsGoodForGroups': 'True',
 'RestaurantsPriceRange2': '2',
 'RestaurantsReservations': 'False',
 'RestaurantsTableService': 'False',
 'RestaurantsTakeOut': 'True',
 'WheelchairAccessible': 'True',
 'WiFi': 'no'}

In [11]:
def get_attributes(x, attribute_name):
    if x.get('attributes') is not None:
        return x['attributes'].get(attribute_name) == 'True'
    else:
        return False;

In [12]:
rt['RestaurantsDelivery'] = rt.apply(lambda x: get_attributes(x, 'RestaurantsDelivery'))

In [13]:
rt['BusinessAcceptsCreditCards'] = rt.apply(lambda x: get_attributes(x, 'BusinessAcceptsCreditCards'))

In [14]:
rt['RestaurantsTakeOut'] = rt.apply(lambda x: get_attributes(x, 'RestaurantsTakeOut'))

In [15]:
def get_alcohol_attributes(x, attribute_name):
    if x.get('attributes') is not None:
        return x['attributes'].get(attribute_name) != 'none'
    else:
        return False;

In [16]:
rt['Alcohol'] = rt.apply(lambda x: get_alcohol_attributes(x, 'Alcohol'))

In [17]:
def get_price_range(x):
    if x.get('attributes') is not None:
        return x['attributes'].get('RestaurantsPriceRange2');
    else:
        return None;

In [18]:
rt['price'] = rt.apply(lambda x: get_price_range(x))

## 4. 영업 중이지 않은 가게는 제외한다.

In [19]:
rt = rt[rt['is_open'] == 1]
rt.head()

address,attributes,business_id,categories
211 W Monroe St,,bFzdJJ3wp3PZssNEsyU23g,"Insurance, Financial Services ..."
"20235 N Cave Creek Rd, Ste 1115 ...","{'BusinessParking': ""{'garage': False, ...",45bWSZtniwPRiqlivpS8Og,"Coffee & Tea, Food"
"3417 Derry Road E, Unit 103 ...","{'Alcohol': 'none', 'GoodForKids': 'True', ...",6OuOZAok8ikONMS_T3EzXg,"Restaurants, Thai"
1440 N. Dysart Ave,"{'GoodForMeal': ""{'dessert': False, ...",8-NRKkPY1UiFXW20WXKiXg,"Mexican, Restaurants"
209 Oakland Ave,"{'BusinessAcceptsCreditCa rds': 'True', ...",UTm5QZThPQlT35mkAcGOjg,"Flowers & Gifts, Gift Shops, Shopping ..."
2801 N 15th Ave,"{'Alcohol': 'full_bar', 'HasTV': 'True', ...",_c3ixq9jYKxhLUB0czi0ug,"Bars, Sports Bars, Dive Bars, Burgers, Nightl ..."
4216 Saint-Laurent Boul,"{'GoodForMeal': ""{'dessert': False, ...",bBUMib8l6Me1ZB1_Qkezkg,"Restaurants, Pakistani, Indian, Middle Eastern ..."
703 N Rancho Dr,"{'BusinessAcceptsCreditCa rds': 'True', ...",hTzcHtk4-0QJnFUbkKpd5Q,"Shopping, Fashion, Department Stores ..."
1549 N Rancho Dr,,UwIpS9UKsPiKAv1fiEYhqg,"Financial Services, Check Cashing/Pay-day Loans, ..."
,{'BusinessAcceptsCreditCa rds': 'True'} ...,YZCHr68c5aEVHz0bkq9K2g,"Home Services, Masonry/Concrete, ..."

city,hours,is_open,latitude,longitude,name
Phoenix,,1,33.4499993,-112.0769793,Geico Insurance
Phoenix,"{'Monday': '5:30-20:0', 'Tuesday': '5:30-20:0', ...",1,33.6713751,-112.0300171,The Coffee Bean & Tea Leaf ...
Mississauga,,1,43.7129464,-79.6327631,Thai One On
Avondale,"{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', ...",1,33.4481059352,-112.341302074,Filiberto's Mexican Food
Pittsburgh,"{'Monday': '9:0-18:0', 'Tuesday': '9:0-18:0', ...",1,40.4414214,-79.9564571,Maggie & Stella's Gifts
Phoenix,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0', ...",1,33.4798071,-112.0911877,Original Hamburger Works
Montréal,,1,45.5180358,-73.5821744,Mysore Indian Cuisine
Las Vegas,,1,36.1783477,-115.1769162,Citi Trends
Las Vegas,"{'Monday': '10:0-19:0', 'Tuesday': '10:0-18:0', ...",1,36.1883858514,-115.186123699,Nevada Title And Payday Loans ...
Las Vegas,,1,36.2608162,-115.1711298,Park Stone Pavers

neighborhood,postal_code,review_count,stars,state,Sunday,Monday,Tuesday,Thursday
,85003,8,1.5,AZ,,,,
,85024,63,4.0,AZ,6:30-19:0,5:30-20:0,5:30-20:0,5:30-20:0
Ridgewood,L4T 1A8,7,2.0,ON,,,,
,85323,40,2.5,AZ,0:0-0:0,0:0-0:0,0:0-0:0,0:0-0:0
Oakland,15213,3,3.5,PA,,9:0-18:0,9:0-18:0,9:0-18:0
,85007,277,4.0,AZ,11:0-21:0,11:0-22:0,11:0-22:0,11:0-22:0
Plateau-Mont-Royal,H2W 1Z3,19,3.5,QC,,,,
,89106,4,4.0,NV,,,,
,89106,4,1.0,NV,8:0-17:0,10:0-19:0,10:0-18:0,10:0-18:0
,89031,20,5.0,NV,,,,

Friday,Saturday,RestaurantsDelivery,BusinessAcceptsCreditCard s ...,RestaurantsTakeOut,Alcohol,price
,,0,0,0,0,
5:30-21:0,5:30-21:0,0,1,1,1,1.0
,,0,1,1,0,2.0
0:0-0:0,0:0-0:0,0,1,1,0,1.0
9:0-17:0,10:0-17:0,0,1,0,1,2.0
11:0-23:0,11:0-22:0,0,1,1,1,1.0
,,1,1,1,1,2.0
,,0,1,0,1,2.0
10:0-19:0,9:0-16:0,0,0,0,0,
,,0,1,0,1,


In [20]:
rt.num_rows()

149518

## 5. 필요 없는 컬럼을 제거한다.

In [21]:
rt = rt.remove_columns(['neighborhood', 'attributes'])

## 6. 데이터 저장

In [20]:
rt.save('./final-dataset/restaurant-dataset')

## 7. 레스토랑이 아닌 정보가 있다는 사실도 알게 되었다. 추가로 정리한다.

In [22]:
import graphlab as gl
import pymysql as sql
import pymysql.cursors


from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

google_host = '35.237.205.46' # 구글 클라우드 호스트
google_user = 'root'
connection = sql.connect(host=google_host,
                         user=google_user,
                         password='8888',
                         db='ITM',
                         charset='utf8mb4',
                         cursorclass=pymysql.cursors.DictCursor)
connection

<pymysql.connections.Connection at 0x7f0b2c075f10>

In [23]:
import pandas as pd

In [24]:
rf = pd.read_sql('select * from restaurants', connection)
rf = gl.SFrame(rf)
rf.head(5)

id,address,business_id,city,longitude,latitude,name
1,211 W Monroe St,bFzdJJ3wp3PZssNEsyU23g,Phoenix,-112.077,33.45,Geico Insurance
2,"20235 N Cave Creek Rd, Ste 1115 ...",45bWSZtniwPRiqlivpS8Og,Phoenix,-112.03,33.6714,The Coffee Bean & Tea Leaf ...
3,"3417 Derry Road E, Unit 103 ...",6OuOZAok8ikONMS_T3EzXg,Mississauga,-79.6328,43.7129,Thai One On
4,1440 N. Dysart Ave,8-NRKkPY1UiFXW20WXKiXg,Avondale,-112.341,33.4481,Filiberto's Mexican Food
5,209 Oakland Ave,UTm5QZThPQlT35mkAcGOjg,Pittsburgh,-79.9565,40.4414,Maggie & Stella's Gifts

postal_code,review_count,stars,state,delivery_yn,card_yn,takeout_yn,alcohol_yn,price_category
85003,8,1.5,AZ,0,0,0,0,
85024,63,4.0,AZ,0,1,1,1,1.0
L4T 1A8,7,2.0,ON,0,1,1,0,2.0
85323,40,2.5,AZ,0,1,1,0,1.0
15213,3,3.5,PA,0,1,0,1,2.0

created_at,updated_at
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00


In [25]:
cf = pd.read_sql('select * from categories', connection)
cf = gl.SFrame(cf)
cf.head(5)

id,category_name,category_type
1,insurance,
2,financial services,
3,coffee & tea,
4,food,
5,restaurants,


In [26]:
# 지형님께서 수동으로 필터링한 데이터...
food_word_id =  ["5", "4", "28", "16", "3", "29", "36", "14", "46", "197", "7", "45", "125", "184", "30", "156", "37", "53", "75", "144", "76", "54", "185", "313", "208", "124", "302", "220", "146", "227", "399", "161", "323", "6", "18", "19", "126", "304", "127", "485", "423", "74", "221", "205", "62", "270", "196", "383", "514", "312", "392", "352", "215", "139", "626", "95", "236", "808", "303", "247", "301", "190", "581", "564", "17", "642", "195", "463", "472", "504", "467", "390", "515", "888", "436", "347", "381", "807", "829", "421", "820", "415", "464", "248", "193", "689", "322", "429", "943", "649", "901", "524", "713", "802", "367", "766", "635", "557", "788", "920", "578", "646", "617", "518", "982", "613", "789", "225", "688", "724", "958", "639", "938", "727", "865", "675", "640", "873", "1026", "862", "887", "800", "175", "799", "962", "922", "735", "1015", "726", "249", "422", "521", "906", "226", "1109", "1107", "926", "1061", "869", "1136", "763", "641", "1167", "1145", "994", "1044", "1155", "944", "1246", "1108", "1143", "1233", "1191", "1225", "1200", "1158", "1214", "1091", "1263", "1253", "1254", "1267", "1237", "1248", "1280", "1249", "1282"] 
food_word_id = map(int, food_word_id)
food_word_id

[5,
 4,
 28,
 16,
 3,
 29,
 36,
 14,
 46,
 197,
 7,
 45,
 125,
 184,
 30,
 156,
 37,
 53,
 75,
 144,
 76,
 54,
 185,
 313,
 208,
 124,
 302,
 220,
 146,
 227,
 399,
 161,
 323,
 6,
 18,
 19,
 126,
 304,
 127,
 485,
 423,
 74,
 221,
 205,
 62,
 270,
 196,
 383,
 514,
 312,
 392,
 352,
 215,
 139,
 626,
 95,
 236,
 808,
 303,
 247,
 301,
 190,
 581,
 564,
 17,
 642,
 195,
 463,
 472,
 504,
 467,
 390,
 515,
 888,
 436,
 347,
 381,
 807,
 829,
 421,
 820,
 415,
 464,
 248,
 193,
 689,
 322,
 429,
 943,
 649,
 901,
 524,
 713,
 802,
 367,
 766,
 635,
 557,
 788,
 920,
 578,
 646,
 617,
 518,
 982,
 613,
 789,
 225,
 688,
 724,
 958,
 639,
 938,
 727,
 865,
 675,
 640,
 873,
 1026,
 862,
 887,
 800,
 175,
 799,
 962,
 922,
 735,
 1015,
 726,
 249,
 422,
 521,
 906,
 226,
 1109,
 1107,
 926,
 1061,
 869,
 1136,
 763,
 641,
 1167,
 1145,
 994,
 1044,
 1155,
 944,
 1246,
 1108,
 1143,
 1233,
 1191,
 1225,
 1200,
 1158,
 1214,
 1091,
 1263,
 1253,
 1254,
 1267,
 1237,
 1248,
 1280,
 1249,
 1282

In [27]:
filtered_ct = cf.filter_by(food_word_id, 'id')
filtered_ct

id,category_name,category_type
3,coffee & tea,
4,food,
5,restaurants,
6,thai,
7,mexican,
14,burgers,
16,sandwiches,
17,pakistani,
18,indian,
19,middle eastern,


In [28]:
crf = pd.read_sql('select * from restaurant_categories', connection)
crf = gl.SFrame(crf)
crf.head(5)

id,restaurant_id,category_id
1,1,1
2,1,2
3,2,3
4,2,4
5,3,5


In [29]:
filtered_crf = crf.filter_by(filtered_ct['id'], 'category_id')
filtered_crf

id,restaurant_id,category_id
3,2,3
4,2,4
5,3,5
6,3,6
7,4,7
8,4,5
15,6,14
17,6,16
18,6,5
19,7,5


In [30]:
filtered_rf = rf.filter_by(filtered_crf['restaurant_id'], 'id')
filtered_rf

id,address,business_id,city,longitude,latitude
2,"20235 N Cave Creek Rd, Ste 1115 ...",45bWSZtniwPRiqlivpS8Og,Phoenix,-112.03,33.6714
3,"3417 Derry Road E, Unit 103 ...",6OuOZAok8ikONMS_T3EzXg,Mississauga,-79.6328,43.7129
4,1440 N. Dysart Ave,8-NRKkPY1UiFXW20WXKiXg,Avondale,-112.341,33.4481
6,2801 N 15th Ave,_c3ixq9jYKxhLUB0czi0ug,Phoenix,-112.091,33.4798
7,4216 Saint-Laurent Boul,bBUMib8l6Me1ZB1_Qkezkg,Montréal,-73.5822,45.518
11,3040 Carnegie Ave,gJ5xSt6147gkcZ9Es0WxlA,Cleveland,-81.6664,41.5
12,4131 N 83rd Ave,3ByGQOVgds2YEu6kzl-XEQ,Phoenix,-112.236,33.4953
13,746 Street Clair Avenue W,5J3b7j3Fzo9ISjChmoUoUA,Toronto,-79.4279,43.6813
14,7745 W Thomas Rd,CeuTRtwsq6w5rztGOyNMPg,Phoenix,-112.225,33.4797
17,9525 State Rt 14,6YC6CsXRrmPv_iwfvc9onA,Streetsboro,-81.3567,41.2438

name,postal_code,review_count,stars,state,delivery_yn,card_yn,takeout_yn,alcohol_yn
The Coffee Bean & Tea Leaf ...,85024,63,4.0,AZ,0,1,1,1
Thai One On,L4T 1A8,7,2.0,ON,0,1,1,0
Filiberto's Mexican Food,85323,40,2.5,AZ,0,1,1,0
Original Hamburger Works,85007,277,4.0,AZ,0,1,1,1
Mysore Indian Cuisine,H2W 1Z3,19,3.5,QC,1,1,1,1
Rally's Hamburgers,44115,5,3.0,OH,0,0,0,0
Rib Shop,85033,3,2.5,AZ,0,1,1,1
Mabel's Bakery,M6C 1B5,23,4.0,ON,0,1,1,1
Salsitas,85033,14,2.5,AZ,0,1,1,1
Teresas Pizzeria,44241,11,3.0,OH,1,1,1,1

price_category,created_at,updated_at
1.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
1.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
1.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
1.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
1.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2.0,2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00


In [37]:
# filtered_rf.save('./final-dataset/restaurant-dataset-v2/')
filtered_rf.save('./final-dataset/restaurant-dataset-v3/')

In [38]:
# filtered_rf.export_csv('./final-dataset/restaurant-dataset-v2.csv', )
filtered_rf.export_csv('./final-dataset/restaurant-dataset-v3.csv', )

In [39]:
len(filtered_ct)

167

In [40]:
len(filtered_crf)

160333

In [41]:
len(filtered_rf)

51861

## 8. 여전히 데이터가 많아서 데이터 추출에 어려움이 많다. 라스베거스만 대상으로 한다.

In [42]:
rt = gl.SFrame('./final-dataset/restaurant-dataset-v3/')

In [43]:
rt.head(5)

id,address,business_id,city,longitude,latitude,name
2,"20235 N Cave Creek Rd, Ste 1115 ...",45bWSZtniwPRiqlivpS8Og,Phoenix,-112.03,33.6714,The Coffee Bean & Tea Leaf ...
3,"3417 Derry Road E, Unit 103 ...",6OuOZAok8ikONMS_T3EzXg,Mississauga,-79.6328,43.7129,Thai One On
4,1440 N. Dysart Ave,8-NRKkPY1UiFXW20WXKiXg,Avondale,-112.341,33.4481,Filiberto's Mexican Food
6,2801 N 15th Ave,_c3ixq9jYKxhLUB0czi0ug,Phoenix,-112.091,33.4798,Original Hamburger Works
7,4216 Saint-Laurent Boul,bBUMib8l6Me1ZB1_Qkezkg,Montréal,-73.5822,45.518,Mysore Indian Cuisine

postal_code,review_count,stars,state,delivery_yn,card_yn,takeout_yn,alcohol_yn,price_category
85024,63,4.0,AZ,0,1,1,1,1.0
L4T 1A8,7,2.0,ON,0,1,1,0,2.0
85323,40,2.5,AZ,0,1,1,0,1.0
85007,277,4.0,AZ,0,1,1,1,1.0
H2W 1Z3,19,3.5,QC,1,1,1,1,2.0

created_at,updated_at
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00
2018-11-22 02:40:21+00:00,2018-11-22 02:40:21+00:00


In [44]:
import graphlab.aggregate as agg

city_rt = rt.groupby(key_columns='city', operations={'count': agg.COUNT()})

In [45]:
city_rt.sort('count', ascending=False)

city,count
Toronto,6822
Las Vegas,5579
Phoenix,3680
Montréal,3289
Charlotte,2579
Pittsburgh,2236
Mississauga,1395
Cleveland,1379
Scottsdale,1297
Mesa,1190


In [46]:
lv_rt = rt.filter_by(['Las Vegas'], 'city')
len(lv_rt)

5579

In [48]:
lv_rt.save('./las_vegas_v2/restaurant')
lv_rt.export_csv('./las_vegas_v2/restaurant.csv', )

In [50]:
toronto_rt = rt.filter_by(['Toronto'], 'city')
len(toronto_rt)

6822

In [51]:
toronto_rt.save('./toronto/restaurant')
toronto_rt.export_csv('./toronto/restaurant.csv', )

In [None]:
lv_rt = lv_rt.join()