In [1]:
rm -rf ~/.matplotlib/*

In [2]:
import os
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
%matplotlib inline



In [3]:
airbnb_room_table_cols = ["room_id", "user_id", "room_type", "country", "city", "neighborhood", "address",
                           "reviews", "rating", "accomodates", "bedrooms", "bathrooms", "price", "deleted",
                           "min_stay", "last_modified", "latitude", "longitude", "survey_id", "location"]

In [4]:
airbnb_room_table = pd.read_csv("room_airbnb.csv", error_bad_lines=False, names = airbnb_room_table_cols)

In [5]:
airbnb_room_table.head()

Unnamed: 0,room_id,user_id,room_type,country,city,neighborhood,address,reviews,rating,accomodates,bedrooms,bathrooms,price,deleted,min_stay,last_modified,latitude,longitude,survey_id,location
0,13311781,75218313.0,Private room,Mexico,Ciudad de México,Ciudad de Méxic,Ciudad de Méxic,0,,1,1.0,2.0,11.0,0.0,3.0,2016-08-12 19:36:42.593277,19.402872,-99.182288,1.0,0101000020E6100000029D499BAACB58C05F79909E2267...
1,796477,4195771.0,Private room,Mexico,Mexico City,Mexico Cit,Mexico Cit,70,5.0,2,1.0,1.0,41.0,0.0,1.0,2016-08-12 20:16:12.508709,19.416327,-99.175053,1.0,0101000020E610000045847F1134CB58C019710168946A...
2,11089363,1011894.0,Private room,Mexico,Ciudad de México,Ciudad de Méxic,Ciudad de Méxic,29,5.0,2,1.0,1.0,36.0,0.0,2.0,2016-08-12 20:33:23.890845,19.410631,-99.17299,1.0,0101000020E61000003E22A64412CB58C049B9FB1C1F69...
3,12088827,57771772.0,Private room,Mexico,Ciudad de México,Ciudad de Méxic,Ciudad de Méxic,1,,2,1.0,1.0,56.0,0.0,1.0,2016-08-12 21:32:43.450263,19.442962,-99.204891,1.0,0101000020E6100000AA0F24EF1CCD58C0EA5E27F56571...
4,11692281,62122323.0,Private room,Mexico,Ciudad de México,Ciudad de Méxic,Ciudad de Méxic,5,4.5,2,1.0,1.0,14.0,0.0,1.0,2016-08-12 22:33:00.899338,19.402246,-99.174304,1.0,0101000020E6100000F8E3F6CB27CB58C063F20698F966...


In [6]:
airbnb_room_table.dtypes

room_id            int64
user_id          float64
room_type         object
country           object
city              object
neighborhood      object
address           object
reviews           object
rating            object
accomodates       object
bedrooms          object
bathrooms        float64
price            float64
deleted          float64
min_stay         float64
last_modified     object
latitude         float64
longitude         object
survey_id        float64
location          object
dtype: object

In [7]:
airbnb_room_table["country"].unique()

array(['Mexico', nan, 'Brazil', 'Uruguay', 'Jamaica', 'Spain', 'France',
       'United States', 'Australia', 'China', 'Poland', 'Greece', 'Italy',
       'India', 'Costa Rica', 'Canada', 'Germany', 'Macedonia',
       'South Africa', 'United Kingdom', 'Austria'], dtype=object)

Subset analysis to Mexico City

In [8]:
mexico_city_airbnb_room_tbl = airbnb_room_table[airbnb_room_table["country"]=='Mexico']

In [9]:
mexico_city_airbnb_geocode = mexico_city_airbnb_room_tbl[["room_id", "room_type", "city", "price", "latitude", "longitude"]]
mexico_city_airbnb_geocode.shape

(5137, 6)

In [10]:
mexico_city_airbnb_geocode.head()

Unnamed: 0,room_id,room_type,city,price,latitude,longitude
0,13311781,Private room,Ciudad de México,11.0,19.402872,-99.182288
1,796477,Private room,Mexico City,41.0,19.416327,-99.175053
2,11089363,Private room,Ciudad de México,36.0,19.410631,-99.17299
3,12088827,Private room,Ciudad de México,56.0,19.442962,-99.204891
4,11692281,Private room,Ciudad de México,14.0,19.402246,-99.174304


Remove values from longitude column that are not coordinates

In [11]:
mask = mexico_city_airbnb_geocode["longitude"].str.len() == 10
mexico_city_airbnb_geocode= mexico_city_airbnb_geocode.loc[mask]
mexico_city_airbnb_geocode.shape

(5118, 6)

Only select private rooms for analysis

In [12]:
mexico_city_airbnb_geocode = mexico_city_airbnb_geocode[mexico_city_airbnb_geocode["room_type"]=='Private room']
mexico_city_airbnb_geocode.shape

(2684, 6)

In [13]:
mexico_city_airbnb_geocode["longitude"] = mexico_city_airbnb_geocode["longitude"].astype('float64').fillna(0.0)
mexico_city_airbnb_geocode.head()

Unnamed: 0,room_id,room_type,city,price,latitude,longitude
0,13311781,Private room,Ciudad de México,11.0,19.402872,-99.182288
1,796477,Private room,Mexico City,41.0,19.416327,-99.175053
2,11089363,Private room,Ciudad de México,36.0,19.410631,-99.17299
3,12088827,Private room,Ciudad de México,56.0,19.442962,-99.204891
4,11692281,Private room,Ciudad de México,14.0,19.402246,-99.174304


In [14]:
mexico_city_airbnb_geocode.to_csv("mexico_city_airbnb_geocode.csv", index=False)