In [1]:
import pandas as pd

## 1. Load Data

### 1.1 Load Business Data

In [2]:
yelp_lv_bizes = pd.read_csv('../../dataset/las_vegas/las_vegas_business_with_db_id.csv')

In [3]:
len(yelp_lv_bizes)

26777

In [4]:
yelp_lv_bizes[:5]

Unnamed: 0,db_id,business_id,stars,review_count,latitude,longitude,city
0,4,--9e1ONYQuAa-CB_Rrw7Tw,4.0,1451,36.1232,-115.169,Las Vegas
1,11,--DdmeR16TRb3LsjG0ejrQ,3.0,5,36.1143,-115.171,Las Vegas
2,12,--e8PjCNhEz32pprnPhCwQ,3.5,19,36.1589,-115.133,Las Vegas
3,29,--o5BoU7qYMALeVDK6mwVg,3.5,6,36.1016,-115.132,Las Vegas
4,33,--q7kSBRb0vWC8lSkXFByA,4.0,7,36.0167,-115.173,Las Vegas


### 1.2 Load Checkin Data and Rename Columns

In [5]:
yelp_lv_cks = pd.read_csv('../../dataset/las_vegas/las_vegas_checkin_with_db_id.csv')

In [6]:
len(yelp_lv_cks)

23242

In [7]:
yelp_lv_cks[:5]

Unnamed: 0,business_db_id,count
0,4,2568
1,11,30
2,12,1
3,33,107
4,48,2


In [8]:
yelp_lv_cks = yelp_lv_cks.rename(index=str, columns={'business_db_id': 'db_id', 'count': 'checkin_count'})

In [9]:
yelp_lv_cks[:5]

Unnamed: 0,db_id,checkin_count
0,4,2568
1,11,30
2,12,1
3,33,107
4,48,2


## 2. Business Left Join Checkin

In [10]:
yelp_lv_bizes = pd.merge(yelp_lv_bizes, yelp_lv_cks, how='left', on=['db_id'])

In [11]:
yelp_lv_bizes[:5]

Unnamed: 0,db_id,business_id,stars,review_count,latitude,longitude,city,checkin_count
0,4,--9e1ONYQuAa-CB_Rrw7Tw,4.0,1451,36.1232,-115.169,Las Vegas,2568.0
1,11,--DdmeR16TRb3LsjG0ejrQ,3.0,5,36.1143,-115.171,Las Vegas,30.0
2,12,--e8PjCNhEz32pprnPhCwQ,3.5,19,36.1589,-115.133,Las Vegas,1.0
3,29,--o5BoU7qYMALeVDK6mwVg,3.5,6,36.1016,-115.132,Las Vegas,
4,33,--q7kSBRb0vWC8lSkXFByA,4.0,7,36.0167,-115.173,Las Vegas,107.0


In [12]:
yelp_lv_bizes = yelp_lv_bizes.fillna(0)

In [13]:
yelp_lv_bizes[:5]

Unnamed: 0,db_id,business_id,stars,review_count,latitude,longitude,city,checkin_count
0,4,--9e1ONYQuAa-CB_Rrw7Tw,4.0,1451,36.1232,-115.169,Las Vegas,2568.0
1,11,--DdmeR16TRb3LsjG0ejrQ,3.0,5,36.1143,-115.171,Las Vegas,30.0
2,12,--e8PjCNhEz32pprnPhCwQ,3.5,19,36.1589,-115.133,Las Vegas,1.0
3,29,--o5BoU7qYMALeVDK6mwVg,3.5,6,36.1016,-115.132,Las Vegas,0.0
4,33,--q7kSBRb0vWC8lSkXFByA,4.0,7,36.0167,-115.173,Las Vegas,107.0


## 3. Scale Review and Checkin Count

In [14]:
from sklearn.preprocessing import MinMaxScaler

### 3.1 Scale Review Count within (0, 1)

In [15]:
rc_scaler = MinMaxScaler()

In [16]:
rc_scaled = rc_scaler.fit_transform(yelp_lv_bizes.review_count.values.reshape(-1, 1))



In [17]:
rc_scaled[:5]

array([[0.19679261],
       [0.00027181],
       [0.0021745 ],
       [0.00040772],
       [0.00054363]])

In [18]:
yelp_lv_bizes = yelp_lv_bizes.assign(review_count_scaled=rc_scaled)

### 3.2 Scale Checkin Count within (0, 1)

In [19]:
ck_scaler = MinMaxScaler()

In [20]:
ck_scaled = ck_scaler.fit_transform(yelp_lv_bizes.checkin_count.values.reshape(-1, 1))

In [21]:
ck_scaled[:5]

array([[1.94607375e-02],
       [2.27345064e-04],
       [7.57816881e-06],
       [0.00000000e+00],
       [8.10864063e-04]])

In [22]:
yelp_lv_bizes = yelp_lv_bizes.assign(checkin_count_scaled=ck_scaled)

## 4. Save File for Future Analysis

In [23]:
yelp_lv_bizes[:5]

Unnamed: 0,db_id,business_id,stars,review_count,latitude,longitude,city,checkin_count,review_count_scaled,checkin_count_scaled
0,4,--9e1ONYQuAa-CB_Rrw7Tw,4.0,1451,36.1232,-115.169,Las Vegas,2568.0,0.196793,0.019461
1,11,--DdmeR16TRb3LsjG0ejrQ,3.0,5,36.1143,-115.171,Las Vegas,30.0,0.000272,0.000227
2,12,--e8PjCNhEz32pprnPhCwQ,3.5,19,36.1589,-115.133,Las Vegas,1.0,0.002175,8e-06
3,29,--o5BoU7qYMALeVDK6mwVg,3.5,6,36.1016,-115.132,Las Vegas,0.0,0.000408,0.0
4,33,--q7kSBRb0vWC8lSkXFByA,4.0,7,36.0167,-115.173,Las Vegas,107.0,0.000544,0.000811


In [24]:
yelp_lv_bizes.to_csv('../../dataset/las_vegas/las_vegas_business_preprocessed_with_db_id.csv', index=False)