# Append Mean Sentiment to Businesses

In [1]:
import pandas as pd

In [2]:
# Read previous dataset preprocessed, see https://github.com/xiehongfeng100/yelper_dpps_and_eda/tree/master/dpps/las_vegas 
yelp_lv_bizes = pd.read_csv('las_vegas_business_preprocessed_with_db_id.csv')
yelp_lv_rvs = pd.read_csv('las_vegas_review_with_db_id.csv')
yelp_lv_sentiment = pd.read_csv('las_vegas_review_text_sentiment_with_db_id.csv')

In [3]:
# Business reviews maps
biz_rv_maps = [{row.db_id: yelp_lv_rvs[yelp_lv_rvs.business_db_id==row.db_id].db_id} for _, row in yelp_lv_bizes.iterrows()]

In [4]:
# Business mean (cnn) sentiment maps
biz_sent_maps = list()
for biz_rvs in biz_rv_maps:
    biz_db_id = biz_rvs.keys()[0]
    rvs = biz_rvs.values()[0]
    mean_sent = yelp_lv_sentiment[yelp_lv_sentiment.review_db_id.isin(rvs)].cnn_sentiment.mean()
    biz_sent_maps.append([biz_db_id, mean_sent])

In [5]:
len(biz_sent_maps)

26777

In [6]:
biz_sent_maps[:5]

[[4, 0.7299031828223214],
 [11, 0.6340573860000001],
 [12, 0.6375108796384211],
 [29, 0.5705963248706666],
 [33, 0.8443184092857141]]

In [7]:
# Make the maps a dataframe for easy join with business dataframe
biz_sent_df = pd.DataFrame(biz_sent_maps, columns=['db_id', 'sentiment'])

In [8]:
biz_sent_df[:5]

Unnamed: 0,db_id,sentiment
0,4,0.729903
1,11,0.634057
2,12,0.637511
3,29,0.570596
4,33,0.844318


In [9]:
# Join the maps to business dataframe
yelp_lv_bizes = yelp_lv_bizes.join(biz_sent_df.set_index('db_id'), on='db_id')

In [10]:
yelp_lv_bizes[['db_id', 'business_id', 'latitude', 'longitude', 'popularity', 'sentiment']][:5]

Unnamed: 0,db_id,business_id,latitude,longitude,popularity,sentiment
0,4,--9e1ONYQuAa-CB_Rrw7Tw,36.1232,-115.169,0.096371,0.729903
1,11,--DdmeR16TRb3LsjG0ejrQ,36.1143,-115.171,0.000297,0.634057
2,12,--e8PjCNhEz32pprnPhCwQ,36.1589,-115.133,0.000895,0.637511
3,29,--o5BoU7qYMALeVDK6mwVg,36.1016,-115.132,0.000167,0.570596
4,33,--q7kSBRb0vWC8lSkXFByA,36.0167,-115.173,0.000885,0.844318


In [11]:
# Save for future research
yelp_lv_bizes[['db_id', 'business_id', 'latitude', 'longitude', 'popularity', 'sentiment']].to_csv('las_vegas_businesses.csv', index=False)