In [163]:
import numpy as np
import pandas as pd
import plotly.express as px

In [164]:
df=pd.read_csv("./listings_cleaned.csv")

In [165]:
df.head()

Unnamed: 0,id,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,calculated_host_listings_count,availability_365,number_of_reviews_ltm,rating,bedrooms,baths
0,3686,Historic Anacostia,38.86339,-76.98889,Private room,67.0,31,84,1,365,3,4.64,1.0,1.0
1,3943,"Edgewood, Bloomingdale, Truxton Circle, Eckington",38.91195,-77.00456,Private room,66.0,1,495,5,252,46,4.83,1.0,1.0
2,5589,"Kalorama Heights, Adams Morgan, Lanier Heights",38.91887,-77.04008,Entire home/apt,60.0,31,96,1,121,1,4.5,1.0,1.0
3,6165,"Brightwood Park, Crestwood, Petworth",38.95331,-77.03624,Private room,82.0,31,23,3,365,1,4.95,1.0,5.0
4,7103,"Spring Valley, Palisades, Wesley Heights, Foxh...",38.91999,-77.09774,Entire home/apt,66.0,31,91,33,364,0,4.79,1.0,1.0


In [166]:
df.dtypes

id                                  int64
neighbourhood                      object
latitude                          float64
longitude                         float64
room_type                          object
price                             float64
minimum_nights                      int64
number_of_reviews                   int64
calculated_host_listings_count      int64
availability_365                    int64
number_of_reviews_ltm               int64
rating                            float64
bedrooms                          float64
baths                             float64
dtype: object

In [167]:
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')
df['bedrooms'] = pd.to_numeric(df['bedrooms'], errors='coerce')
df['baths'] = pd.to_numeric(df['baths'], errors='coerce')

In [168]:
df.dtypes

id                                  int64
neighbourhood                      object
latitude                          float64
longitude                         float64
room_type                          object
price                             float64
minimum_nights                      int64
number_of_reviews                   int64
calculated_host_listings_count      int64
availability_365                    int64
number_of_reviews_ltm               int64
rating                            float64
bedrooms                          float64
baths                             float64
dtype: object

In [169]:
df_cleaned = df.dropna(subset=['price', 'bedrooms', 'baths', 'rating'])
df_cleaned['neighbourhood'] = df_cleaned['neighbourhood'].str.split(',').str[0]

In [170]:
df_cleaned.size

43666

In [171]:
top_10_neighbourhood_names = df_cleaned['neighbourhood'].value_counts().head(10).index.tolist()

# Filter top 1-
df_cleaned = df_cleaned[df_cleaned['neighbourhood'].isin(top_10_neighbourhood_names)]


In [172]:
color_map = ["#EE7EA0", "#7D8BE0", "#BCC07B", "#D5EDF8", "#9A81B0", "#BE715B", "#E5DACA","#EA7D70","#FFD7D6",  "#FFAFAE"]

unique_neighbourhoods = df_cleaned['neighbourhood'].unique()
color_dict = {neighbourhood: color for neighbourhood, color in zip(unique_neighbourhoods, color_map)}

fig = px.scatter(df_cleaned, x="price", y="rating", size='number_of_reviews', color="neighbourhood",
           hover_name="neighbourhood", log_x=True, size_max=50, color_discrete_map=color_dict)
fig.update_layout(
    plot_bgcolor='white',  # Inner background color
    paper_bgcolor='white'  # Outer background color
)
fig.show()
fig.write_html("scatter_plot.html")


In [173]:
color_scale = [
    [0.0, "#FFE5EC"],  # Very light pink
    [0.25, "#FFCCE5"],  # Light pink
    [0.5, "#EE7EA0"],   # Medium pink (your specified color)
    [0.75, "#C5486D"],  # Darker pink
    [1.0, "#8B0A50"]    # Darkest pink
]

fig = px.parallel_coordinates(df_cleaned, color='rating',
                              dimensions=['rating', 'bedrooms', 'baths', 'price'],
                              color_continuous_scale=color_scale,
                              labels={'rating': 'Rating', 'bedrooms': 'Bedrooms',
                                      'baths': 'Bathrooms', 'price': 'Price'})
fig.update_layout(
    plot_bgcolor='whitesmoke',  # Inner background color
    paper_bgcolor='whitesmoke' # Outer background color
)
fig.show()
fig.write_html("parallel_coordinates.html")

In [174]:
fig = px.box(df_cleaned, x="neighbourhood", y="price",
             color="neighbourhood",
             notched=True,color_discrete_map=color_dict )  # can toggle notched shape
fig.update_layout(
    plot_bgcolor='whitesmoke',  # Inner background color
    paper_bgcolor='whitesmoke')
fig.show()
fig.write_html("box_plot.html")


In [175]:

fig = px.density_heatmap(df_cleaned, x="bedrooms", y="baths",color_continuous_scale=["#FFFFFF","#FFE5EC", "#FFCCE5", "#EE7EA0", "#C5486D", "#8B0A50"],
)

fig.show()
fig.write_html("density_heatmap.html")
