In [20]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

from collections import defaultdict
pd.set_option('max_columns', 100)

In [2]:
import nltk
import random
from nltk.classify.scikitlearn import SklearnClassifier
import pickle
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from nltk.classify import ClassifierI
from statistics import mode
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import sent_tokenize, word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
parent_dir = '../Data Sets/'
path_dic = {'B': 'business_s.csv', 'R':'review_text.csv', 'U':'user.csv', 'I':'review_info.csv'}

def read_files():
    d = defaultdict(list)
    for key in path_dic:
        d[key] = pd.read_csv(parent_dir + path_dic[key]).drop('Unnamed: 0', 1)
    return d
def show():
    sns.despine()
    plt.show()
    
def clean_format(w):
    w = w.lower().replace('.', '').replace(',', '').replace('!', '')
    return w
d = read_files()

# Modify colnames and apply filters

In [4]:
print(d['B']['review_count'].describe())
print(d['U']['review_count'].describe())

count    135612.000000
mean         32.286368
std         102.457144
min           3.000000
25%           5.000000
50%           9.000000
75%          25.000000
max        6979.000000
Name: review_count, dtype: float64
count    1.183362e+06
mean     2.372686e+01
std      8.050645e+01
min      0.000000e+00
25%      2.000000e+00
50%      5.000000e+00
75%      1.600000e+01
max      1.165600e+04
Name: review_count, dtype: float64


In [5]:
min_review_per_valid_buz = 5
min_review_per_valid_user = 2

d['B'] = d['B'][d['B']['review_count'] >= min_review_per_valid_buz]
d['U'] = d['U'][d['U']['review_count'] >= min_review_per_valid_user]

print(d['B'].shape)
print(d['U'].shape)

(102497, 17)
(968039, 22)


In [8]:
print("buz cols: ")
print(list(d['B']))
print()
print("review_text cols: ")
print(list(d['R']))
print()
print("review_info cols: ")
print(list(d['I']))
print()
print("user cols: ")
print(list(d['U']))

buz cols: 
['address', 'attributes', 'business_id', 'categories', 'city', 'hours', 'is_open', 'latitude', 'longitude', 'name', 'neighborhood', 'postal_code', 'review_count', 'stars', 'state', 'price', 'credit_card']

review_text cols: 
['text_stem', 'review_id']

review_info cols: 
['business_id', 'review_cool', 'date', 'review_funny', 'review_id', 'review_stars', 'review_useful', 'user_id', 'polarity', 'subjectivity']

user cols: 
['average_stars', 'compliment_cool', 'compliment_cute', 'compliment_funny', 'compliment_hot', 'compliment_list', 'compliment_more', 'compliment_note', 'compliment_photos', 'compliment_plain', 'compliment_profile', 'compliment_writer', 'cool', 'elite', 'fans', 'friends', 'funny', 'name', 'review_count', 'useful', 'user_id', 'yelping_since']


In [9]:
d['R'] = d['R'].merge(d['I'], on = 'review_id', how = 'inner')

In [11]:
# renaming the columns
d['B'] = d['B'].rename(columns = {'name':'buz_name', 'review_count':'buz_review_count', 'stars':'buz_stars'})
# d['R'] = d['R'].rename(columns = {'cool':'review_cool', 'funny':'review_funny', 'stars':'review_stars', 'useful':'review_useful', \
#                 'text':'review_text'})
d['U'] = d['U'].rename(columns = {'cool':'user_cool', 'funny':'user_funny','useful':'user_useful', 'name':'user_name', \
                                  'review_count':'user_review_count'})


In [12]:
cols = list(d['B']) + list(d['R']) + list(d['U'])
print("Remaining duplicated cols:", [s for s in cols if cols.count(s) > 1])

Remaining duplicated cols: ['business_id', 'business_id', 'user_id', 'user_id']


# Calculating polarity and subjectivity using TextBlob

In [13]:
print(d['R'].shape)
d['R'].head()

(4736897, 11)


Unnamed: 0,text_stem,review_id,business_id,review_cool,date,review_funny,review_stars,review_useful,user_id,polarity,subjectivity
0,My girlfriend and I stay here for 3 night and ...,VfBHSwC5Vz_pbFluy07i9Q,uYHaNptLzDLoV_JZ_MuzUA,0.0,2016-07-12,0.0,5.0,0.0,cjpdDjZyprfyDG3RlkVG3w,0.311454,0.671768
1,If you need an inexpens place to stay for a ni...,3zRpneRKDsOPq92tq7ybAA,uYHaNptLzDLoV_JZ_MuzUA,0.0,2016-10-02,0.0,3.0,0.0,bjTcT8Ty4cJZhEOEo01FGA,0.133042,0.579203
2,mittlerweil gibt es in edinburgh zwei ableg de...,ne5WhI1jUFOcRn-b-gAzHA,uYHaNptLzDLoV_JZ_MuzUA,0.0,2015-09-17,0.0,3.0,0.0,AXgRULmWcME7J6Ix3I--ww,0.166667,0.833333
3,locat is everyth and thi hotel ha it ! the rec...,llmdwOgDReucVoWEry61Lw,uYHaNptLzDLoV_JZ_MuzUA,0.0,2016-08-21,0.0,4.0,0.0,oU2SSOmsp_A8JYI7Z2JJ5w,0.204262,0.44284
4,gute lage im stadtzentrum . shoppingmeil und s...,DuffS87NaSMDmIfluvT83g,uYHaNptLzDLoV_JZ_MuzUA,0.0,2013-11-20,0.0,5.0,0.0,0xtbPEna2Kei11vsU-U2Mw,0.45,0.65


In [11]:
# from textblob import TextBlob
# d['R']['polarity'] = d['R']['review_text'].apply(lambda s: TextBlob(s).sentiment.polarity)
# d['R']['subjectivity'] = d['R']['review_text'].apply(lambda s: TextBlob(s).sentiment.subjectivity)
# d['R'].head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,business_id,review_cool,date,review_funny,review_id,review_stars,review_text,review_useful,user_id,polarity,subjectivity
0,uYHaNptLzDLoV_JZ_MuzUA,0,2016-07-12,0.0,VfBHSwC5Vz_pbFluy07i9Q,5.0,My girlfriend and I stayed here for 3 nights a...,0.0,cjpdDjZyprfyDG3RlkVG3w,0.311454,0.671768
1,uYHaNptLzDLoV_JZ_MuzUA,0,2016-10-02,0.0,3zRpneRKDsOPq92tq7ybAA,3.0,If you need an inexpensive place to stay for a...,0.0,bjTcT8Ty4cJZhEOEo01FGA,0.133042,0.579203
2,uYHaNptLzDLoV_JZ_MuzUA,0,2015-09-17,0.0,ne5WhI1jUFOcRn-b-gAzHA,3.0,Mittlerweile gibt es in Edinburgh zwei Ableger...,0.0,AXgRULmWcME7J6Ix3I--ww,0.166667,0.833333


In [14]:
# save
# d['R'].to_csv(parent_dir + 'review_tagged.csv')

In [14]:
print(d['R'].shape)
print(list(d['R']))

(4736897, 11)
['text_stem', 'review_id', 'business_id', 'review_cool', 'date', 'review_funny', 'review_stars', 'review_useful', 'user_id', 'polarity', 'subjectivity']


In [23]:
# # Stem all the words in reviews
# length = len(d['R']['review_text'])

# ps = PorterStemmer()
# d['R']['text_stem'] = [' '.join([ps.stem(w) for w in word_tokenize(d['R']['review_text'][i])]) for i in range(length)]

# lemmatizer = WordNetLemmatizer()
# d['R']['text_lemma'] = [' '.join([lemmatizer.lemmatize(w) for w in word_tokenize(d['R']['review_text'][i])]) for i in range(length)]

In [25]:
# save
# d['R'].to_csv(parent_dir + 'review_tagged2.csv')

In [15]:
d['R'].loc[0,'text_stem']

'My girlfriend and I stay here for 3 night and love it . the locat of thi hotel and veri decent price make thi an amaz deal . when you walk out the front door scott monument and princ street are right in front of you , edinburgh castl and the royal mile is a 2 minut walk via a close right around the corner , and there are so mani hidden gem nearbi includ calton hill and the newli open arch that made thi locat incred . the hotel itself wa also veri nice with a reason price bar , veri consider staff , and small but comfort room with excel bathroom and shower . onli two minor complaint are no telephon in room for room servic ( not a huge deal for us ) and no AC in the room , but they have huge window which can be fulli open . the staff were incred though , let us borrow umbrella for the rain , give us map and direct , and also when we had lost our onli UK adapt for charg our phone gave us a veri fanci one for free . I would highli recommend thi hotel to friend , and when I return to edinb

In [27]:
# d['U'] = d['U'].drop('Unnamed: 0', 1)
# d['B'] = d['B'].drop('Unnamed: 0', 1)

In [29]:
print(list(d['U']))

['average_stars', 'compliment_cool', 'compliment_cute', 'compliment_funny', 'compliment_hot', 'compliment_list', 'compliment_more', 'compliment_note', 'compliment_photos', 'compliment_plain', 'compliment_profile', 'compliment_writer', 'user_cool', 'elite', 'fans', 'friends', 'user_funny', 'user_name', 'user_review_count', 'user_useful', 'user_id', 'yelping_since']


In [30]:
d['RU'] = d['U'][['user_id']].merge(d['R'], on = 'user_id', how = 'left')
grouped = d['RU'].groupby('user_id').agg(['count', 'mean', 'std'])
res = pd.DataFrame(grouped.to_records())
print(list(res))
res = res[["user_id", "('review_cool', 'count')", "('review_cool', 'mean')", "('review_funny', 'mean')",\
           "('review_useful', 'mean')", "('review_stars', 'mean')", "('review_stars', 'std')",\
           "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]]

res = res.rename(columns = {"('review_cool', 'count')":"user_review_count", "('review_cool', 'mean')":"user_cool_mean", \
                      "('review_funny', 'mean')":"user_funny_mean","('review_useful', 'mean')":"user_useful_mean", \
                      "('review_stars', 'mean')":"user_star_mean", "('review_stars', 'std')":"user_star_std", \
                      "('polarity', 'mean')":"user_polarity_mean", "('polarity', 'std')":"user_polarity_std", \
                      "('subjectivity', 'mean')":"user_subjectivity_mean", "('subjectivity', 'std')":"user_subjectivity_std"})
res = res.fillna(0)
res.head()

['user_id', "('review_cool', 'count')", "('review_cool', 'mean')", "('review_cool', 'std')", "('review_funny', 'count')", "('review_funny', 'mean')", "('review_funny', 'std')", "('review_stars', 'count')", "('review_stars', 'mean')", "('review_stars', 'std')", "('review_useful', 'count')", "('review_useful', 'mean')", "('review_useful', 'std')", "('polarity', 'count')", "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'count')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]


Unnamed: 0,user_id,user_review_count,user_cool_mean,user_funny_mean,user_useful_mean,user_star_mean,user_star_std,user_polarity_mean,user_polarity_std,user_subjectivity_mean,user_subjectivity_std
0,---1lKK3aKOuomHnwAkAow,117,0.735043,0.623932,1.57265,4.068376,1.356507,0.21275,0.186009,0.548334,0.17448
1,---94vtJ_5o_nikEs6hUjg,1,0.0,0.0,0.0,5.0,0.0,0.566667,0.0,0.866667,0.0
2,---PLwSf5gKdIoVnyRHgBA,2,0.0,0.0,0.0,4.0,1.414214,0.239726,0.31505,0.614042,0.107421
3,---cu1hq55BP9DWVXXKHZg,3,0.0,0.0,0.333333,2.0,1.732051,0.066577,0.196531,0.613889,0.19009
4,---fhiwiwBYrvqhpXgcWDQ,1,1.0,1.0,2.0,1.0,0.0,-0.078737,0.0,0.447163,0.0


In [32]:
d['U'] = d['U'].rename(columns = {'user_review_count':'user_review_count_original'})
print(list(d['U']))

['average_stars', 'compliment_cool', 'compliment_cute', 'compliment_funny', 'compliment_hot', 'compliment_list', 'compliment_more', 'compliment_note', 'compliment_photos', 'compliment_plain', 'compliment_profile', 'compliment_writer', 'user_cool', 'elite', 'fans', 'friends', 'user_funny', 'user_name', 'user_review_count_original', 'user_useful', 'user_id', 'yelping_since']


In [33]:
d['U'] = d['U'].drop(['compliment_cool', 'compliment_cute', 'compliment_funny', 'compliment_hot', \
                      'compliment_list', 'compliment_more', 'compliment_note', 'compliment_photos', \
                      'compliment_plain', 'compliment_profile', 'compliment_writer', 'user_funny', \
                      'user_review_count_original', 'user_useful', 'user_cool'], axis = 1)
d['U'] = d['U'].merge(res, on = 'user_id', how = 'left')
d['U'].head(10)

Unnamed: 0,average_stars,elite,fans,friends,user_name,user_id,yelping_since,user_review_count,user_cool_mean,user_funny_mean,user_useful_mean,user_star_mean,user_star_std,user_polarity_mean,user_polarity_std,user_subjectivity_mean,user_subjectivity_std
0,3.8,"[2014, 2016, 2013, 2011, 2012, 2015, 2010, 2017]",209,"['M19NwFwAXKRZzt8koF11hQ', 'QRcMZ8pJJBBZaKubHO...",Cin,lsSiIjAKVl-QRxKjRErBeg,2010-07-13,14,19.714286,18.571429,22.071429,3.571429,0.937614,0.17797,0.106062,0.563582,0.068131
1,3.94,"[2014, 2017, 2011, 2012, 2015, 2009, 2013, 200...",835,"['eoSSJzdprj3jxXyi94vDXg', 'QF0urZa-0bxga17ZeY...",Andrea,om5ZiponkpRqUNa3pVPiRg,2006-01-18,72,4.736111,3.263889,5.222222,3.902778,0.73465,0.220047,0.104949,0.559004,0.090721
2,4.72,[],17,"['Oa84FFGBw1axX8O6uDkmqg', 'SRcWERSl4rhm-Bz9zN...",Gabe,-lGwMGHMC_XihFJNKCJNRg,2014-10-31,20,0.35,0.55,0.4,4.85,0.366348,0.425986,0.213721,0.646139,0.11438
3,3.76,"[2016, 2017]",11,"['96DJovjKAtExnyBZRfniPQ', 'Djczq-bRn761U4FW77...",Leah,D-ydMTPGWXTVm4_jjp0k9g,2013-04-01,16,0.5,0.3125,0.6875,3.75,0.68313,0.259618,0.099733,0.562014,0.081573
4,4.23,"[2017, 2016]",49,"['iN0A6QIrEFYoSGHFaknh8Q', 'B2HDoWNIzLlon0IhS1...",Juan,PcvbBOCOcs6_suRDH7TSTg,2012-08-16,510,4.464706,2.309804,4.705882,4.107843,0.832857,0.358967,0.18056,0.620263,0.115157
5,5.0,[],1,"['HanGVaZP_0IwhQdc0F5paQ', '_v1_G2Fs56K8JTAzzI...",Charmaine,uXjR2GhCAYhqxVr21aC4vQ,2011-10-14,14,0.214286,0.5,0.571429,5.0,0.0,0.279822,0.379623,0.65378,0.115426
6,4.4,"[2017, 2016]",17,"['ZWD8UH1T7QXQr0Eq-mcWYg', 'ObSXluQeEYiOP9l9mu...",Victoria,N2arP_u4sMGLgOZhA6ZFoQ,2013-12-20,9,3.0,0.555556,2.444444,4.333333,0.866025,0.322131,0.098181,0.608021,0.087161
7,4.06,"[2016, 2015]",5,"['Puvuej6lzJ1JOEmtjG7V_Q', 'fq7CL1myWPYeH0d4bK...",Erica,YHJIMK_zVH_VY6HCY6bYvg,2012-06-08,1,0.0,1.0,0.0,3.0,0.0,0.195076,0.0,0.531439,0.0
8,3.63,[],3,"['b8NTnxrhHCs6IPAKrzYL9w', 'vaNvEgtznFwSPDNkbM...",Whitney,g3V76Ja0XgWS1rqx0gxL_A,2007-09-11,86,0.197674,0.127907,0.430233,3.755814,1.245567,0.212151,0.192327,0.563515,0.147443
9,4.19,[],5,"['lM_HnyWHrpBsrgts3VnkxA', 'ynYGa-R1ZJf-tJOVlQ...",Paul,VTL4Dep5p7XrrOtgnXdEIQ,2010-04-25,3,1.333333,0.333333,2.0,4.666667,0.57735,0.291667,0.094989,0.55889,0.052442


In [34]:
# d['U'].to_csv(parent_dir + 'user.csv')

In [36]:
d['U'].head(1)

Unnamed: 0,average_stars,elite,fans,friends,user_name,user_id,yelping_since,user_review_count,user_cool_mean,user_funny_mean,user_useful_mean,user_star_mean,user_star_std,user_polarity_mean,user_polarity_std,user_subjectivity_mean,user_subjectivity_std
0,3.8,"[2014, 2016, 2013, 2011, 2012, 2015, 2010, 2017]",209,"['M19NwFwAXKRZzt8koF11hQ', 'QRcMZ8pJJBBZaKubHO...",Cin,lsSiIjAKVl-QRxKjRErBeg,2010-07-13,14,19.714286,18.571429,22.071429,3.571429,0.937614,0.17797,0.106062,0.563582,0.068131


In [18]:
d['RB'] = d['B'][['business_id']].merge(d['R'], on = 'business_id', how = 'left')
grouped = d['RB'].groupby('business_id').agg(['count', 'mean', 'std'])
res = pd.DataFrame(grouped.to_records())
print(list(res))
res = res[["business_id", "('review_cool', 'count')", "('review_cool', 'mean')", "('review_funny', 'mean')",\
           "('review_useful', 'mean')", "('review_stars', 'mean')", "('review_stars', 'std')",\
           "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]]

res = res.rename(columns = {"('review_cool', 'count')":"buz_review_count", "('review_cool', 'mean')":"buz_cool_mean", \
                      "('review_funny', 'mean')":"buz_funny_mean","('review_useful', 'mean')":"buz_useful_mean", \
                      "('review_stars', 'mean')":"buz_star_mean", "('review_stars', 'std')":"buz_star_std", \
                      "('polarity', 'mean')":"buz_polarity_mean", "('polarity', 'std')":"buz_polarity_std", \
                      "('subjectivity', 'mean')":"buz_subjectivity_mean", "('subjectivity', 'std')":"buz_subjectivity_std"})
res = res.fillna(0)
res.head()

['business_id', "('review_cool', 'count')", "('review_cool', 'mean')", "('review_cool', 'std')", "('review_funny', 'count')", "('review_funny', 'mean')", "('review_funny', 'std')", "('review_stars', 'count')", "('review_stars', 'mean')", "('review_stars', 'std')", "('review_useful', 'count')", "('review_useful', 'mean')", "('review_useful', 'std')", "('polarity', 'count')", "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'count')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]


Unnamed: 0,business_id,buz_review_count,buz_cool_mean,buz_funny_mean,buz_useful_mean,buz_star_mean,buz_star_std,buz_polarity_mean,buz_polarity_std,buz_subjectivity_mean,buz_subjectivity_std
0,--6MefnULPED_I942VcFNA,30,0.8,0.666667,1.0,3.233333,1.278019,0.196373,0.192862,0.514233,0.090911
1,--7zmmkVg-IMGaXbuVd0SQ,42,0.809524,0.404762,1.309524,3.904762,1.122052,0.256366,0.276829,0.596447,0.123973
2,--9QQLMTbFzLJ_oT-ON3Xw,9,0.222222,0.333333,0.888889,3.0,1.802776,0.130629,0.159182,0.536588,0.167924
3,--9e1ONYQuAa-CB_Rrw7Tw,1389,0.631389,0.471562,0.942405,4.087113,1.105546,0.321624,0.193368,0.583644,0.11667
4,--DaPTJW3-tB1vP-PfdTEg,37,0.081081,0.0,0.216216,3.648649,1.252025,0.314294,0.219135,0.576406,0.133092


In [110]:
print(list(d['B']))

['address', 'attributes', 'business_id', 'categories', 'city', 'hours', 'is_open', 'latitude', 'longitude', 'buz_name', 'neighborhood', 'postal_code', 'buz_review_count', 'buz_stars', 'state', 'price', 'credit_card']


In [24]:
d['B'] = pd.read_csv(parent_dir + 'business_s.csv').drop('Unnamed: 0', 1)

d['B'] = d['B'][d['B']['review_count'] >= min_review_per_valid_buz]
d['B'] = d['B'].rename(columns = {'name':'buz_name', 'review_count':'buz_review_count_original', 'stars':'buz_stars'})
print(d['B'].shape)
print(list(d['B']))

(102497, 17)
['address', 'attributes', 'business_id', 'categories', 'city', 'hours', 'is_open', 'latitude', 'longitude', 'buz_name', 'neighborhood', 'postal_code', 'buz_review_count_original', 'buz_stars', 'state', 'price', 'credit_card']


In [25]:
d['B'] = d['B'].merge(res, on = 'business_id', how = 'left')
d['B'].head(10)

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,buz_name,neighborhood,postal_code,buz_review_count_original,buz_stars,state,price,credit_card,buz_review_count,buz_cool_mean,buz_funny_mean,buz_useful_mean,buz_star_mean,buz_star_std,buz_polarity_mean,buz_polarity_std,buz_subjectivity_mean,buz_subjectivity_std
0,691 Richmond Rd,"{'RestaurantsPriceRange2': 2, 'BikeParking': T...",YDf95gJZaq05wvo7hTQbbQ,"['Shopping', 'Shopping Centers']",Richmond Heights,"{'Sunday': '11:00-18:00', 'Wednesday': '10:00-...",1,41.541716,-81.493116,Richmond Town Square,,44143,17,2.0,OH,2.0,,17,0.176471,0.470588,1.235294,2.058824,0.966345,0.17027,0.191149,0.613203,0.120936
1,337 Danforth Avenue,"{'OutdoorSeating': False, 'WiFi': 'no', 'Wheel...",v2WhjAB3PIBA8J8VxG3wEg,"['Food', 'Coffee & Tea']",Toronto,"{'Sunday': '12:00-17:00', 'Wednesday': '10:00-...",0,43.677126,-79.353285,The Tea Emporium,Riverdale,M4K 1N7,7,4.5,ON,2.0,True,7,0.714286,0.142857,0.857143,4.285714,0.755929,0.218827,0.067684,0.532261,0.061219
2,4719 N 20Th St,"{'OutdoorSeating': False, 'WiFi': 'no', 'Resta...",duHFBe87uNSXImQmvBh87Q,"['Sandwiches', 'Restaurants']",Phoenix,{},0,33.505928,-112.038847,Blimpie,,85016,10,4.5,AZ,1.0,True,10,0.8,0.5,0.9,4.6,0.699206,0.282466,0.23915,0.548494,0.089913
3,2017 E Camelback Rd,"{'BusinessAcceptsCreditCards': True, 'BikePark...",uUEMrhJiL1a1pCA_I1SU7Q,"['Shopping', 'Tobacco Shops']",Phoenix,"{'Sunday': '10:00-19:00', 'Wednesday': '10:00-...",0,33.508068,-112.037552,Baxter's Cigars,,85016,35,5.0,AZ,2.0,True,35,0.2,0.2,0.514286,5.0,0.0,0.392109,0.177307,0.574468,0.099517
4,"4425 N 24th St, Ste 125","{'BusinessAcceptsCreditCards': True, 'AcceptsI...",2eJEUJIP54tex7T9YOcLSw,"['Chiropractors', 'Health & Medical']",Phoenix,"{'Monday': '14:30-17:00', 'Tuesday': '14:00-19...",1,33.502848,-112.012696,Back-Health Chiropractic,,85016,19,5.0,AZ,,True,19,0.052632,0.0,0.052632,4.789474,0.713283,0.270686,0.115237,0.469295,0.128543
5,5770 Butler St,{'BusinessAcceptsCreditCards': True},fEylCY3UEH8YJ0Xa7lu6lA,"['Automotive', 'Oil Change Stations', 'Car Was...",Pittsburgh,"{'Sunday': '8:00-16:30', 'Wednesday': '8:00-17...",1,40.485939,-79.943727,Auto Bathouse,Lawrenceville,15201,16,2.5,PA,,True,16,0.4375,0.1875,1.0625,2.3125,1.74045,0.009312,0.167826,0.498867,0.143863
6,3220 Washington Rd,"{'WheelchairAccessible': True, 'RestaurantsPri...",kFtuYklkAIlmYw8RZAieGw,"['Jewelry Repair', 'Gold Buyers', 'Local Servi...",McMurray,"{'Wednesday': '10:00-20:00', 'Thursday': '10:0...",1,40.290498,-80.110021,JAB Jewelry Designs,,15317,7,5.0,PA,2.0,True,7,0.285714,0.0,0.285714,5.0,0.0,0.395844,0.187387,0.637869,0.16325
7,"1425 S Higley Rd, Ste 103","{'BusinessAcceptsCreditCards': True, 'AcceptsI...",NqiQdFa93wzUJGo29NbTPQ,"['Health & Medical', 'Optometrists']",Gilbert,"{'Monday': '7:30-17:00', 'Tuesday': '7:30-17:0...",1,33.324539,-111.720449,Neighborhood Vision Center,,85296,8,5.0,AZ,,True,8,0.125,0.0,0.25,5.0,0.0,0.344961,0.17368,0.574378,0.178266
8,11022 W Charleston Blvd,"{'BusinessAcceptsCreditCards': True, 'BikePark...",N9BN9ldVl1FNzcB9_eAstw,"['Bowling', 'Active Life']",Las Vegas,{},1,36.159098,-115.337915,Red Rock Bowling UYE Part 2,Summerlin,89135,13,4.5,NV,,True,13,4.615385,4.0,3.923077,4.615385,1.120897,0.242133,0.139878,0.544664,0.134789
9,"9393 N 90th St, Ste 112","{'HairSpecializesIn': {'perms': False, 'africa...",6s3z3TlpHOIecuSyPEOp7A,"['Barbers', ""Men's Hair Salons"", 'Hair Removal...",Scottsdale,"{'Wednesday': '10:00-18:00', 'Thursday': '10:0...",0,33.570421,-111.886492,Sq Cutz,,85258,7,4.5,AZ,2.0,True,7,0.0,0.285714,0.0,4.285714,1.496026,0.230783,0.232248,0.517781,0.14931


In [26]:
d['B'] = d['B'].drop(['buz_review_count_original', 'buz_stars'], axis = 1)

In [28]:
# d['B'].to_csv(parent_dir + 'business_s.csv')