# Toronto Dwellings Analysis

In this assignment, you will perform fundamental analysis for the Toronto dwellings market to allow potential real estate investors to choose rental investment properties.

In [None]:
# # imports
# import panel as pn
# pn.extension('plotly')
# import pandas as pd
# from panel.interact import interact
# from panel import widgets
# import plotly.express as px
# #import holoviews as hv
# import hvplot.pandas
# import hvplot.dask
# #from holoviews.operation.datashader import datashade, rasterize, dynspread
# #from datashader import reductions
# #import matplotlib.pyplot as plt
# import os
# from pathlib import Path
# from dotenv import load_dotenv

In [None]:
# imports
import panel as pn
pn.extension('plotly')
import plotly.express as px
import pandas as pd
import hvplot.pandas
import matplotlib.pyplot as plt
import os
from pathlib import Path
from dotenv import load_dotenv

In [None]:
# Read the Mapbox API key
load_dotenv()
map_box_api = os.getenv("mapbox")


# Set the Mapbox API
px.set_mapbox_access_token(map_box_api)

## Load Data

In [None]:
# Read the census data into a Pandas DataFrame
file_path = Path("../data/toronto_neighbourhoods_census_data.csv")
census_data = pd.read_csv(file_path,index_col="year")
census_data.head()

- - - 

## Dwelling Types Per Year

In this section, you will calculate the number of dwelling types per year. Visualize the results using bar charts and the Pandas plot function. 

**Hint:** Use the Pandas `groupby` function.

**Optional challenge:** Plot each bar chart in a different color.

In [None]:
# Calculate the sum number of dwelling types units per year (hint: use groupby)
# YOUR CODE HERE!
dt_only_data = census_data.loc[:,'single_detached_house':'other_house']
dt_only_data.head()

In [None]:
dt_per_year= dt_only_data.groupby('year').sum()

In [None]:
# Helper create_bar_chart function
# def create_bar_chart(data,ttl,clr,ht,wd,xl,yl):
     
#     plt=data.hvplot.bar(title=ttl,color=clr,height=ht,width=wd,xlabel=xl,ylabel=yl,rot=90,yformatter='%f')
#     return plt

# dt_per_yearplt=create_bar_chart(dt_per_year,'Dwelling Types per year',"skyblue",800,1400,"Dwelling Types","Number")
# dt_per_yearplt
#df_dwelling_types = census_data_sum.drop(columns=["average_house_value","shelter_costs_owned","shelter_costs_rented"])

In [None]:
# Save the dataframe as a csv file
to_csv = dt_per_year.to_csv(path_or_buf="../data/grouped_data_from_df.csv")

In [None]:
# Helper create_bar_chart function
def create_bar_chart(data,ttl,clr,ht,wd,xl,yl,rt,bgbg):
     
    plt=data.hvplot.bar(title=ttl,color=clr,height=ht,width=wd,xlabel=xl,ylabel=yl,rot=rt,yformatter='%f').opts(bgcolor=bgbg)
    return plt

In [None]:
# Create a bar chart per year to show the number of dwelling types
# Bar chart for 2001
bar2001=create_bar_chart(dt_per_year.loc[2001],
                         '2001 Census of Dwelling types',
                         "red",450,450,"Dwelling Types",
                         "Number of dwelling types in this year",
                         90,"lightblue")
# Bar chart for 2006
bar2006=create_bar_chart(dt_per_year.loc[2006],
                         '2006 Census of Dwelling types',
                         "yellow",450,450,
                         "Dwelling Types",
                         "Number of dwelling types in this year",
                         90,"lightsteelblue")
# Bar chart for 2011
bar2011=create_bar_chart(dt_per_year.loc[2011],
                         '2011 Census of Dwelling types',
                         "blue",450,450,
                         "Dwelling Types",
                         "Number of dwelling types in this year",
                         90,"lightsteelblue") 

# Bar chart for 2016
bar2016=create_bar_chart(dt_per_year.loc[2016],
                         '2016 Census of Dwelling types',
                         "lawngreen",450,450,"Dwelling Types",
                         "Number of dwelling types in this year",
                         90,"lightsteelblue") 
# All the beautiful charts in beautiful colors and labels and what not
bar2001 + bar2006 + bar2011 + bar2016

- - - 

## Average Monthly Shelter Costs in Toronto Per Year

In this section, you will calculate the average monthly shelter costs for owned and rented dwellings and the average house value for each year. Plot the results as a line chart.

**Optional challenge:** Plot each line chart in a different color.

In [None]:
# Calculate the average monthly shelter costs for owned and rented dwellings
# YOUR CODE HERE!
#census_data.head()
avg_monthly_owned = census_data.groupby('year')['shelter_costs_owned'].mean()/12
avg_monthly_rented = census_data.groupby('year')['shelter_costs_rented'].mean()/12

In [None]:
avg_monthly_owned = census_data.groupby('year')['shelter_costs_owned'].mean()/12
avg_monthly_owned

In [None]:
avg_monthly_rented = census_data.groupby('year')['shelter_costs_rented'].mean()/12
avg_monthly_rented

In [None]:
# Helper create_line_chart function
def create_line_chart(data,ttl,xaxis,clr,bgbg):
    plt=data.hvplot.line(title=ttl,x=xaxis,color=clr,yformatter='%f').opts(bgcolor=bgbg,height=400,width=400)
    return plt

In [None]:
# Create two line charts, one to plot the monthly shelter costs for owned dwelleing and other for rented dwellings per year
# Line chart for owned dwellings
line_chart_average_owned_cost=create_line_chart(avg_monthly_owned,
                                                'Avg. Monthly Costs Owned in Toronto',
                                                'year',"red",'lightgreen')
# Line chart for rented dwellings
line_chart_average_rented_cost=create_line_chart(avg_monthly_rented,
                                                 'Avg. Monthly Costs Rented in Toronto',
                                                 'year',"brown",'lightblue')
line_chart_average_owned_cost + line_chart_average_rented_cost

## Average House Value per Year

In this section, you want to determine the average house value per year. An investor may want to understand better the sales price of the rental property over time. For example, a customer will want to know if they should expect an increase or decrease in the property value over time so they can determine how long to hold the rental property. You will visualize the `average_house_value` per year as a bar chart.

In [None]:
# Calculate the average house value per year
avg_houseval_peryear=round(census_data.groupby('year')['average_house_value'].mean(),2)
avg_houseval_peryear

In [None]:
# Plot the average house value per year as a line chart
line_chart_average_house_value=create_line_chart(avg_houseval_peryear,'Avg. Value in Toronto',
                                                 'year',"cadetblue",'khaki')
line_chart_average_house_value

- - - 

## Average House Value by Neighbourhood

In this section, you will use `hvplot` to create an interactive visualization of the average house value with a dropdown selector for the neighbourhood.

**Hint:** It will be easier to create a new DataFrame from grouping the data and calculating the mean house values for each year and neighbourhood.

In [None]:
# Create a new DataFrame with the mean house values by neighbourhood per year
avg_hsval_bynbhood=pd.DataFrame()
avg_hsval_bynbhood=census_data[['neighbourhood','average_house_value']]

In [None]:
# Use hvplot to create an interactive line chart of the average house value per neighbourhood
# The plot should have a dropdown selector for the neighbourhood
def line_chart_with_dropdown_selector(data,xval,grpval,clr,bgbg):
    plt=data.hvplot.line(x=xval,groupby=grpval,color=clr,
                         yformatter='$%f',colorbar=True).opts(framewise=True,bgcolor=bgbg)
    return plt

In [None]:
linechart_avghouseval=line_chart_with_dropdown_selector(avg_hsval_bynbhood,'year',
                                                        'neighbourhood',"slateblue","ghostwhite")
# linechart_avghouseval.yaxis.formatter=NumeralTickFormatter(format="00")
linechart_avghouseval

## Number of Dwelling Types per Year

In this section, you will use `hvplot` to create an interactive visualization of the average number of dwelling types per year with a dropdown selector for the neighbourhood.

In [None]:
# Fetch the data of all dwelling types per year
num_dwtypes_bynbhood=pd.DataFrame()
num_dwtypes_bynbhood=census_data.loc[:,:'other_house']
num_dwtypes_bynbhood.head()

In [None]:
# Use hvplot to create an interactive bar chart of the number of dwelling types per neighbourhood
# Helper create_bar_chart2 function ----this is bonus that I made -Vishal
def create_bar_chart2(data,xaxis,grp,rt,ht,wd):
            plt=data.hvplot.bar(x=xaxis,groupby=grp,legend='top_left',rot=rt,height=ht,width=wd)
            return plt

In [None]:
# The plot should have a dropdown selector for the neighbourhood
barchart_dwtpes_peryr=create_bar_chart2(num_dwtypes_bynbhood,'year',"neighbourhood",90,700,900)
barchart_dwtpes_peryr

- - - 

## The Top 10 Most Expensive Neighbourhoods

In this section, you will need to calculate the house value for each neighbourhood and then sort the values to obtain the top 10 most expensive neighbourhoods on average. Plot the results as a bar chart.

In [None]:
# Getting the data from the top 10 expensive neighbourhoods
top10_nbhood=pd.DataFrame()
top10_nbhood=census_data.nlargest(10,'average_house_value')
top10_nbhood = top10_nbhood[['neighbourhood','average_house_value']]
top10_nbhood

In [None]:
# Plotting the data from the top 10 expensive neighbourhoods
def create_bar_chart2(data,ttl,xaxis,yaxis,varyby,ht,wd,rt):
     
    plt=data.hvplot.bar(title=ttl,x=xaxis,y=yaxis,by=varyby,height=ht,width=wd,rot=rt,yformatter='$%f')
    return plt

line_chart_top10=create_bar_chart2(top10_nbhood,'Top10 neighbourhoods in Toronto',
                                   'neighbourhood','average_house_value','year',400,1000,90)
line_chart_top10

- - - 

## Neighbourhood Map

In this section, you will read in neighbourhoods location data and build an interactive map with the average house value per neighbourhood. Use a `scatter_mapbox` from Plotly express to create the visualization. Remember, you will need your Mapbox API key for this.

### Load Location Data

In [None]:
# Load neighbourhoods coordinates data
file_path = Path("../data/toronto_neighbourhoods_coordinates.csv")
df_neighbourhood_locations = pd.read_csv(file_path)
df_neighbourhood_locations.head()

### Data Preparation

You will need to join the location data with the mean values per neighbourhood.

1. Calculate the mean values for each neighbourhood.

2. Join the average values with the neighbourhood locations.

In [None]:
# Calculate the mean values for each neighborhood
avg_hsval_bynbhood.head()

In [None]:
# Join the average values with the neighbourhood locations
nbhood_locs= avg_hsval_bynbhood.join(df_neighbourhood_locations.set_index('neighbourhood'),on='neighbourhood')
nbhood_locs.head()

### Mapbox Visualization

Plot the average values per neighbourhood using a Plotly express `scatter_mapbox` visualization.

In [None]:
# Create a scatter mapbox to analyze neighbourhood info
map = px.scatter_mapbox(
    nbhood_locs,
    lat="lat",
    lon="lon",
    size="average_house_value",
    color="neighbourhood",
    zoom=9,width=1000,height=450
)
map.show()

- - -

## Cost Analysis - Optional Challenge

In this section, you will use Plotly express to a couple of plots that investors can interactively filter and explore various factors related to the house value of the Toronto's neighbourhoods. 

### Create a bar chart row facet to plot the average house values for all Toronto's neighbourhoods per year

In [None]:
nbhood_data = avg_hsval_bynbhood.reset_index()
#facet_data
fig = px.bar(nbhood_data,x="neighbourhood",y="average_house_value",facet_row="year",height=1000)
fig.show()

### Create a sunburst chart to conduct a costs analysis of most expensive neighbourhoods in Toronto per year

In [None]:
# Fetch the data from all expensive neighbourhoods per year.
# top20_nbhood=census_data.nlargest(20,'average_house_value')
top10_nbhood.head()

In [None]:
fig = px.sunburst(top10_nbhood,path=['year','neighbourhood'],values='average_house_value',color='year')
fig.show()