In [28]:
import numpy as np
import pandas as pd

In [2]:
file_path = "C://Users/vapog/Downloads/airbnb_data.csv"

# Examining the dataframe
df = pd.read_csv(file_path)
print(df.head(10))

     id                                              name  host_id  \
0  2539                Clean & quiet apt home by the park     2787   
1  2595                             Skylit Midtown Castle     2845   
2  3647               THE VILLAGE OF HARLEM....NEW YORK !     4632   
3  3831                   Cozy Entire Floor of Brownstone     4869   
4  5022  Entire Apt: Spacious Studio/Loft by central park     7192   
5  5099         Large Cozy 1 BR Apartment In Midtown East     7322   
6  5121                                   BlissArtsSpace!     7356   
7  5178                  Large Furnished Room Near B'way      8967   
8  5203                Cozy Clean Guest Room - Family Apt     7490   
9  5238                Cute & Cozy Lower East Side 1 bdrm     7549   

     host_name neighbourhood_group       neighbourhood  latitude  longitude  \
0         John            Brooklyn          Kensington  40.64749  -73.97237   
1     Jennifer           Manhattan             Midtown  40.75362  -73.9

In [3]:
# Number of rentals by New York borough
bronx_df = df[df['neighbourhood_group'] == 'Bronx']
print("Bronx: # of rentals ",bronx_df.shape[0])
brooklyn_df = df[df['neighbourhood_group'] == 'Brooklyn']
print("Brooklyn: # of rentals ",brooklyn_df.shape[0])
manhattan_df = df[df['neighbourhood_group'] == 'Manhattan']
print("Manhattan: # of rentals ",manhattan_df.shape[0])
staten_island_df = df[df['neighbourhood_group'] == 'Staten Island']
print("Staten Island: # of rentals ",staten_island_df.shape[0])

Bronx: # of rentals  1091
Brooklyn: # of rentals  20104
Manhattan: # of rentals  21661
Staten Island: # of rentals  373


In [9]:
# Most popular neighborhood by number of reviews
sorted_df = df.sort_values(by='number_of_reviews', ascending=False)
locations_and_num_reviews_df = sorted_df[['neighbourhood','number_of_reviews']]
top_20_df = locations_and_num_reviews_df.head(20)

# Group by neighborhood and take aggregate mean
grouped_df = top_20_df.groupby('neighbourhood')['number_of_reviews'].mean()
print(grouped_df)

neighbourhood
Astoria            441.0
Bushwick           480.0
East Elmhurst      485.2
East Village       451.0
Flushing           474.0
Harlem             564.0
Jamaica            553.0
Lower East Side    540.0
Park Slope         488.0
South Slope        467.0
Tribeca            447.0
Name: number_of_reviews, dtype: float64


In [33]:
# Filtering for a client by price in Manhattan's Upper East Side
upper_east_df = df[df['neighbourhood'] == 'Upper East Side']
ninetieth_percentile = np.quantile(upper_east_df['number_of_reviews'], 0.85) # Normalizes the results by getting rid of bad postings
upper_east_df = upper_east_df[upper_east_df['number_of_reviews'] >= ninetieth_percentile]


## Used to examine the unique room types available
print(df['room_type'].unique())

## Cheapest private room type rental in Manhattan's Upper East Side
private_rooms_df = upper_east_df[upper_east_df['room_type'] == 'Private room']
private_rooms_df = private_rooms_df.sort_values('price', ascending=True)
private_rooms_df = private_rooms_df[['neighbourhood','room_type','price']]
print(private_rooms_df.head(5))

## Cheapest entire homes/appartments
entire_homes_df = upper_east_df[upper_east_df['room_type'] == 'Entire home/apt']
entire_homes_df = entire_homes_df.sort_values('price', ascending=True)
entire_homes_df = entire_homes_df[['neighbourhood','room_type','price']]
print(entire_homes_df.head(5))

['Private room' 'Entire home/apt' 'Shared room']
         neighbourhood     room_type  price
8416   Upper East Side  Private room     49
40185  Upper East Side  Private room     50
35976  Upper East Side  Private room     60
21283  Upper East Side  Private room     65
19830  Upper East Side  Private room     65
         neighbourhood        room_type  price
18882  Upper East Side  Entire home/apt     69
7181   Upper East Side  Entire home/apt     75
5759   Upper East Side  Entire home/apt     92
22122  Upper East Side  Entire home/apt     95
27040  Upper East Side  Entire home/apt     95
