### Introduction
This is an exploratory notebook for airbnb listings in Nairobi


In [324]:
# load modules
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
%matplotlib inline

In [325]:
# load data
listings = pd.read_json('../data/listings.json')

In [326]:
listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   title          280 non-null    object 
 1   url            0 non-null      float64
 2   info           0 non-null      float64
 3   description    280 non-null    object 
 4   details        280 non-null    object 
 5   is_superhost   124 non-null    object 
 6   price          280 non-null    object 
 7   total_reviews  280 non-null    object 
 8   review_score   280 non-null    object 
dtypes: float64(2), object(7)
memory usage: 19.8+ KB


In [327]:
listings.head()

Unnamed: 0,title,url,info,description,details,is_superhost,price,total_reviews,review_score
0,Entire apartment in Nairobi,,,Cozy 2 bedroom near popular restaurant,"[4 guests, 2 bedrooms, 3 beds, 2.5 baths, Wifi...",SUPERHOST,[$52],[],[]
1,Entire apartment in Nairobi,,,Brookview Kilimani 1 Bedroom (Monthly),"[2 guests, 1 bedroom, 1 bed, 1 bath, Wifi, Kit...",SUPERHOST,[$22],[],[]
2,Entire guesthouse in Nairobi,,,"Kileleshwa, 1 bedroom private cottage","[2 guests, 1 bedroom, 2 beds, 1 bath, Wifi, Ki...",,[$10],"[ (, 5, )]",[5.0]
3,Entire apartment in Nairobi,,,"Cozy studio with CBD view, Netflix + rooftop pool","[2 guests, Studio, 1 bed, 1 bath, Wifi, Kitche...",,[$25],"[ (, 33, )]",[4.64]
4,Entire apartment in Nairobi,,,SkyView Top Studio,"[2 guests, Studio, 1 bed, 1 bath, Wifi, Kitche...",SUPERHOST,[$25],"[ (, 11, )]",[5.0]


In [328]:
# replace specific values
# indices = [31,73,230,260]

listings.at[31,'details']= ['2 guests', '1 bedroom','1 bed', '1 bath', 'Wifi']
listings.at[73,'details'] = ['2 guests', '1 bedroom','1 bed', '1 bath', 'Wifi','Kitchen']
listings.at[230,'details'] = ['2 guests', 'Studio','1 bed', '1 bath', 'Wifi','Kitchen','Free parking']
listings.at[260,'details'] = ['1 guest', '1 bedroom','1 bed', '1 shared bath', 'Wifi','Kitchen','Free parking']
listings.at[104,'details'] = ['2 guests', 'Studio', '2 beds', '1 bath', 'Wifi','','Free parking']
listings.at[106,'details'] = ['2 guests', '1 bedroom', '1 bed', '1 bath', 'Wifi','', 'Free parking']
listings.at[169,'details'] = ['2 guests', '1 bedroom', '1 bed', '1 bath', 'Wifi','', 'Free parking']
listings.at[271,'details'] = ['2 guests', 'Studio', '1 bed', '1 bath', 'Wifi','', 'Free parking']
listings.at[278,'details'] = ['2 guests', 'Studio', '1 bed', '1 bath', 'Wifi','', 'Free parking']

In [329]:
## check total number of amenities offered
listings['total_amenities'] = listings['details'].apply(len)
listings[['guests','bedrooms','beds','baths','wifi','kitchen','parking','pool']] = listings['details'].apply(pd.Series)
listings['price($)'] = listings['price'].apply(pd.Series)
listings['rating'] = listings['review_score'].apply(pd.Series)
listings[['x','reviews','y']] = listings['total_reviews'].apply(pd.Series)
listings['price($)'] = listings['price($)'].str.replace('$','')
listings['price($)'] = listings['price($)'].astype(int)
listings['is_superhost'] = np.where(listings['is_superhost']=='SUPERHOST','yes','no')

listings = listings.drop(['x','y'],axis=1)
listings['rating'] = np.where(listings['rating'].isna(),0,listings['rating'])
listings['reviews'] = np.where(listings['reviews'].isna(),0,listings['reviews'])

# clean values
listings['guests'] = listings['guests'].str.replace(' guests| guest', '')
listings['bedrooms'] = listings['bedrooms'].str.replace(' bedrooms| bedroom','')
listings['beds'] = listings['beds'].str.replace(' beds|bed','')
listings['bedrooms'] = listings['bedrooms'].str.replace('Studio','0')
listings['baths'] = listings['baths'].str.replace(' baths| bath','')
listings['baths'] = listings['baths'].str.replace('Half-bath','0.5')
listings['shared_bath'] = np.where(listings['baths'].str.contains('shared'),'yes','no')
listings['baths'] = listings['baths'].str.replace(' shared| private','')
listings['wifi'] = np.where(listings['wifi']=='Wifi','yes','no')
listings['kitchen'] = np.where(listings['kitchen']=='Kitchen','yes','no')
listings['parking'] = np.where(listings['parking']=='Free parking','yes','no')
listings['pool'] = np.where(listings['pool']=='Pool','yes','no')


In [333]:
listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   title            280 non-null    object 
 1   url              0 non-null      float64
 2   info             0 non-null      float64
 3   description      280 non-null    object 
 4   details          280 non-null    object 
 5   is_superhost     280 non-null    object 
 6   price            280 non-null    object 
 7   total_reviews    280 non-null    object 
 8   review_score     280 non-null    object 
 9   total_amenities  280 non-null    int64  
 10  guests           280 non-null    object 
 11  bedrooms         280 non-null    object 
 12  beds             280 non-null    object 
 13  baths            280 non-null    object 
 14  wifi             280 non-null    object 
 15  kitchen          280 non-null    object 
 16  parking          280 non-null    object 
 17  pool            

In [336]:
# select clean columns
clean_cols = ['title','description','is_superhost','guests',
 'bedrooms','beds','baths','wifi','kitchen','parking','pool',
 'price($)','rating','reviews','shared_bath']
listings_clean = listings[clean_cols]
listings_clean.head()

Unnamed: 0,title,description,is_superhost,guests,bedrooms,beds,baths,wifi,kitchen,parking,pool,price($),rating,reviews,shared_bath
0,Entire apartment in Nairobi,Cozy 2 bedroom near popular restaurant,yes,4,2,3,2.5,yes,yes,yes,no,52,0.0,0,no
1,Entire apartment in Nairobi,Brookview Kilimani 1 Bedroom (Monthly),yes,2,1,1,1.0,yes,yes,yes,yes,22,0.0,0,no
2,Entire guesthouse in Nairobi,"Kileleshwa, 1 bedroom private cottage",no,2,1,2,1.0,yes,yes,yes,no,10,5.0,5,no
3,Entire apartment in Nairobi,"Cozy studio with CBD view, Netflix + rooftop pool",no,2,0,1,1.0,yes,yes,yes,yes,25,4.64,33,no
4,Entire apartment in Nairobi,SkyView Top Studio,yes,2,0,1,1.0,yes,yes,yes,no,25,5.0,11,no
