In [None]:
# Initial imports
import pandas as pd
import numpy as np
import hvplot.pandas
import holoviews as hv
from pathlib import Path
import seaborn as sns
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.graph_objects as go
from mortgage import Loan

import warnings
warnings.filterwarnings("ignore")

## Reading our data into dataFrames
Data includes:
 * Latitude and Longitude of all Canada Provinces~ from [url](https://www.latlong.net/category/provinces-40-60.html).
 * Price range for houses in different provinces of Canada from excel file and avg household income per province using csv file.
 * Stock Prices data for *RioCan(REI-UN.TO)* Real Estate Investment Trust  & *Tricon Residential(TCN.TO)* a Canadian real estate company

In [None]:
#Get the latitude and longitude valuse for Canada provinces using url
lat_lon_df = pd.read_html("https://www.latlong.net/category/provinces-40-60.html")

In [None]:
#Converting our list into dataFrame
lat_lon_df = pd.DataFrame(np.reshape(lat_lon_df, (10,3)), columns = ['Province', 'Latitude', 'Longitude'])
#Remove the unwanted string from our columns
lat_lon_df['Province'] = lat_lon_df['Province'].str.replace(', Canada', '')
#Replace 'Saskatchewan Province' and 'Quebec Province'
lat_lon_df['Province'] = lat_lon_df['Province'].replace({
    'Saskatchewan Province': 'Saskatchewan',
    'Quebec Province': 'Quebec'
})
#Display our latitude & longitude dataFrame
lat_lon_df

In [None]:
# Using the read_csv function and Path module
# Create a absorbed_unit_df DataFrame by importing the
#'absorbed-units-price-range-canada-provinces-for-all-dwelling-types-2019-2022-en.xlsx file' from the Resources folder
absorbed_unit_df = pd.read_excel(Path("""./Resources/absorbed-units-price-range-canada-provinces-for-all-dwelling-types-2019-2022-en.xlsx"""),
                                header = 1,
                                sheet_name =None)

#Create dataFrame and import average household income for each province across years data
avg_income_df = pd.read_csv(Path('./Resources/avg_household_income_canada.csv'))

In [None]:
#Function to fethch the Province names from dictionary of DataFrames-absorbed_unit_df
def getProvince(absorbed_unit_df):
    province = []
    for key in absorbed_unit_df.keys():
        province.append(key)
    return province

In [None]:
#Creating a DataFrame that contains all the housing data
canada_housing_data_df = pd.DataFrame()
for i in getProvince(absorbed_unit_df):
    canada_housing_data_df = pd.concat([canada_housing_data_df,absorbed_unit_df[i]], axis = 0)

#Set 'Year' as the index of the DataFrame
canada_housing_data_df.set_index('Year', inplace = True)
display(canada_housing_data_df.tail())

In [None]:
#Showing relevant years for analysis
np.unique(canada_housing_data_df.index)

In [None]:
#Fill NaN valuse with zero
df1=canada_housing_data_df.fillna(value = 0)

In [None]:
#Drop rows where all Dwelling Type Columns{ Absorbed Single, Semi, row & other} contain zeros
canada_housing_data_df = canada_housing_data_df.loc[
    (canada_housing_data_df[['Absorbed - Single','Absorbed - Semi','Absorbed - Row','Absorbed - Apartment and other']] != 0)
    .any(axis = 1)
]

#Find the average price range and drop columns Price Range low($) & Price Range high($)
#Calculate the Price Midpoint as 'Avg of Price Range'
canada_housing_data_df['Avg of Price Range']= canada_housing_data_df[['Price Range low($)', 'Price Range high($)']].mean(axis=1)
canada_housing_data_df.drop(['Price Range low($)', 'Price Range high($)'], axis = 1, inplace = True)

#Disply sample data in the dataFrame
display(canada_housing_data_df.tail())

In [None]:
#Reorganize columns in canada_housing_data_df
canada_housing_data_df=canada_housing_data_df.reindex(sorted(canada_housing_data_df.columns, reverse = True), axis=1)
display(canada_housing_data_df.tail())

In [None]:
#Rename the columns
new_column_names = {
    'Absorbed - Single': 'Single',
    'Absorbed - Semi': 'Semi',
    'Absorbed - Row': 'Row',
    'Absorbed - Apartment and other': 'Apartment and other',
}

canada_housing_data_df = canada_housing_data_df.rename(columns=new_column_names)
canada_housing_data_df.tail(5)

## Q1. Identify price trend in housing across different provinces in Canada over the yeras.

### Logic: Using Weighted Average Price for each house type

In our data, we have different price ranges for each house type. To offer a more comprehensive view of the housing market trend, we took the weighted average price for each house type.

For example:

Weighted Average Price (Single-Family House, 2019)=

[(# of unit from 1st price range in 2019 * midpoint price of 1st price range) / total unit in 2019] +

[(# of unit from 2nd price range in 2019 * midpoint price of 2nd price range) / total unit in 2019] +
 
... (so on for all price ranges)

In [None]:
# Define dwelling types
dwelling_types = ['Single', 'Semi', 'Row', 'Apartment and other']

# Initialize an empty DataFrame
housing_price_df = pd.DataFrame()

# For Loop for each dwelling type
for dwelling_type in dwelling_types:
    # Calculate weighted price for each dwelling type
    canada_housing_data_df[f'weighted_price_{dwelling_type}'] = canada_housing_data_df[f'{dwelling_type}'] * canada_housing_data_df['Avg of Price Range']

    # Group by Province and Year and sum up the weighted price
    weighted_price_grouped = canada_housing_data_df.groupby(['Province', 'Year'])[f'weighted_price_{dwelling_type}'].sum()

    # Group by Province and Year and sum up the absorbed units
    absorbed_units_grouped = canada_housing_data_df.groupby(['Province', 'Year'])[f'{dwelling_type}'].sum()

    # Merge the two series into a DataFrame
    merged = pd.DataFrame(weighted_price_grouped)
    merged[f'{dwelling_type}'] = absorbed_units_grouped

    # Calculate the weighted average price
    merged[f'Weighted_Avg_Price_{dwelling_type}'] = merged[f'weighted_price_{dwelling_type}'] / merged[f'{dwelling_type}']

    # Drop the intermediate columns
    merged.drop([f'weighted_price_{dwelling_type}', f'{dwelling_type}'], axis=1, inplace=True)

    # Merge the result into the housing_price_df DataFrame
    if housing_price_df.empty:
        housing_price_df = merged
    else:
        housing_price_df = pd.merge(housing_price_df, merged, on=['Province', 'Year'])

#Reset index and round to 2 decimal
housing_price_df = housing_price_df.reset_index().round(2)

In [None]:
housing_price_df.tail()

In [None]:
# Define a function to plot trend for each dwelling type
def plot_trend(dwelling_type):
    # The melt function is used to convert the DataFrame from wide format to long format.
    plot_df = pd.melt(housing_price_df, id_vars=['Year', 'Province'], value_vars=[f'Weighted_Avg_Price_{dwelling_type}'])

    # Create a line plot of the WAP over years for each province using the hvplot library
    trend_plot=plot_df.hvplot.line('Year', 'value', by='Province',width=800).opts(yformatter='%.0f',title=f'Price trend for {dwelling_type}')

    return trend_plot

In [None]:
# Reset the index of the housing_price_df dataframe
housing_price_df.reset_index(inplace=True)

#Plot Price Trend for all the dwelling type
plot_trend('Single')+plot_trend('Semi')+plot_trend('Row')+plot_trend('Apartment and other')


## Q2.Which province has experienced the most significant growth in housing prices?

### Logic: Using Single-Family House price as reference for return on housing market.

The data ('housing_price_df') includes the weighted average price for houses across different provinces in Canada. For the purposes of our analysis, we have decided to use the price of single houses as a reference point to gauge returns in the housing market. This decision was motivated by several factors.

First, single houses are a common and popular type of residential property in Canada. They represent a significant portion of the housing market and thus serve as a reliable indicator of market trends.

Second, by focusing on one specific type of property, we can control for variations in price that might occur due to differences in property type. This ensures that our analysis of housing returns is more consistent and reliable, as it won't be influenced by differences in the characteristics or demand for different types of properties.

In [None]:
#Isolate price data for Single-Family House
return_house_df=housing_price_df[['Year', 'Province','Weighted_Avg_Price_Single']].copy()
return_house_df.set_index(['Year', 'Province'], inplace=True)

#use 'pct_change' to calculate annual return
return_house_df['Annual Return %']=return_house_df.groupby('Province')['Weighted_Avg_Price_Single'].pct_change()
return_house_df['Annual Return %']=return_house_df['Annual Return %']*100

return_house_df.tail()

In [None]:
#Calculate and Plot Cumulative Growth Rate for Single-Family House over the past 4 years.
CGR = ((return_house_df.loc[2022]['Weighted_Avg_Price_Single'] / return_house_df.loc[2019]['Weighted_Avg_Price_Single']) - 1)*100
CGR_df = pd.DataFrame(CGR).rename(columns={"Weighted_Avg_Price_Single": "Cumulative Return %"})
CGR_df.hvplot.bar(rot=30).opts(title="Cumulative growth for Single-Family House in the past 4 years")

In [None]:
#Plot annual return for Single-Family House over the years
return_house_df['Annual Return %'].dropna().hvplot.bar(groupby='Year',rot=30,height=400)

## Q3. What is the most popular type of house and its corresponding units sold in each province in 2022? 

In [None]:
#Reset the index of the dataframe `canada_housing_data_df`
sales_house_df = canada_housing_data_df.reset_index()

#Melt the dataframe `sales_house_df` to change its format from wide to long
#So each row only contain one data point.
sales_house_df = sales_house_df.melt(id_vars=['Year','Province', 'Avg of Price Range'],
                                          value_vars=['Single', 'Semi', 'Row', 'Apartment and other'],
                                          var_name='House Type',
                                          value_name='Units Sold')

In [None]:
#Group the dataframe by 'Province', 'Year', 'House Type' and calculate the sum of 'Units Sold'
grouped_sales_house_df=sales_house_df.groupby(['Province', 'Year', 'House Type'])['Units Sold'].sum().reset_index()
grouped_sales_house_df.tail()

In [None]:
#Find the index of maximum 'Units Sold' in each group
idx = grouped_sales_house_df.groupby(['Province', 'Year'])['Units Sold'].idxmax()

#Select the rows with the index of maximum 'Units Sold'
most_popular_type_df=grouped_sales_house_df.loc[idx]
most_popular_type_df.tail()

In [None]:
#Filter the dataframe to include only the data of the year 2022
df=most_popular_type_df[most_popular_type_df['Year']==2022]

#Merge the dataframe `df` with the dataframe `lat_lon_df` on 'Province'
df=df.merge(lat_lon_df, on='Province', how='left')
df

In [None]:
#Create a map plot using hvplot,
#The more Units Sold, the bigger the circle
#Different colors show different house type
#Showing the circle at corresponding 'Longitude' and 'Latitude' on the map
map_plot = df.hvplot.points(
    'Longitude', 
    'Latitude', 
    geo=True, 
    size='Units Sold',
    color='House Type',
    frame_width=700,
    frame_height=500,
    title='Most popular house type by provinces',
    scale=0.6
    )

In [None]:
#Create a label plot using hvplot
#Showing the 'Province' at corresponding 'Longitude' and 'Latitude' on the map
label_plot=df.hvplot.labels(
    'Longitude', 
    'Latitude', 
    text='Province',
    geo=True, 
    color='white',
    frame_width=700,
    frame_height=500,
    text_font_size='8pt',
    text_alpha=0.5
    )

In [None]:
#Use OpenStreetMap tiles as the base map
tiles = hv.element.tiles.OSM().opts(alpha=0.5, width=700, height=500)

#Overlay the map plot and the label plot on the tiles, and show the final plot
final_plot = tiles*map_plot*label_plot
final_plot

canada_housing_data_df## Q4. Which price range demonstrates the highest market demand, indicated by the most absorbed units, and how does this demand vary across different provinces?

In [None]:
#Calculate the total units sold in each provinces per year.
canada_housing_data_df['Total Absorbed Units']=canada_housing_data_df[["Single","Semi","Row","Apartment and other"]].sum(axis=1)
sum_of_sales=canada_housing_data_df.groupby(['Province','Year'])['Total Absorbed Units'].sum()

# Calculate the sum of absorbed units for each price range
demand_data = canada_housing_data_df.groupby('Avg of Price Range')['Total Absorbed Units'].sum()

# Find the price range with the highest demand
highest_demand_price_range = demand_data.idxmax()

# Calculate the demand for each price range in each province
demand_by_province = canada_housing_data_df.groupby(['Province', 'Avg of Price Range'])['Total Absorbed Units'].sum().unstack()
demand_by_province = demand_by_province.fillna(0)
# Get the unique price ranges
price_ranges = demand_by_province.columns

# Get the total absorbed units for each province
total_absorbed_units = demand_by_province.sum()

# Set up the scatter plot
fig, ax = plt.subplots(figsize=(12, 8))

# Plotting the demand by price range across provinces using bubble markers
for province in demand_by_province.index:
    x = price_ranges
    y = [province] * len(price_ranges)
    sizes = demand_by_province.loc[province] / total_absorbed_units * 500
    ax.scatter(x, y, s=sizes, alpha=0.7, label=province)

# Customize the plot
ax.set_title('Market Demand by Price Range across Provinces')
ax.set_xlabel('Price Range in Million($)')
ax.set_ylabel('Province')
ax.legend(title='Province')

plt.show()

## Q5. Given the average income for each province, what type of house can the average person or household realistically afford? How does this vary across different provinces?

We are most interested in the recent year (year 2022).

In [None]:
def cal_affordable_price(row):
    return row['Avg Income'] * 0.3 * 25

avg_income_df['Affordable Price'] = avg_income_df.apply(cal_affordable_price, axis=1)
avg_income_df = avg_income_df.reset_index().set_index(['Year', 'Province'])

housing_price_df = housing_price_df.reset_index().set_index(['Year', 'Province'])
affordable_house_df = pd.concat([housing_price_df, avg_income_df], join='inner', axis=1)

affordable_house_2022=affordable_house_df.loc[2022]

In [None]:
fig = go.Figure()
colors = {"Single House":"blue",
          "Semi House":"yellow",
          "Row House":"green",
          "Apartment and other types":"purple",
          "Affordable Price":"red"}

# Iterate over each index in affordable_house_df
for index in affordable_house_2022.index.unique():
    subset_df = affordable_house_2022.loc[index]
    province = index
    aff_price = subset_df['Affordable Price']
    single_price = subset_df['Weighted_Avg_Price_Single']
    semi_price = subset_df['Weighted_Avg_Price_Semi']
    row_price = subset_df['Weighted_Avg_Price_Row']
    condo_price = subset_df['Weighted_Avg_Price_Apartment and other']

    # Add a scatter plot for the affordable price range
    fig.add_trace(go.Scatter(
        x=[province],
        y=[aff_price],
        mode='markers',
        marker=dict(size=10,color=colors['Affordable Price']),
        text=[f"Affordable Price: ${aff_price:,.2f}"],
        hovertemplate="%{text}",
        legendgroup='Affordable Price',
        name='Affordable Price',
        showlegend=province==affordable_house_2022.index.unique()[0]
    ))

    # Add a scatter plot for each type of house, not filtered by affordable price anymore
    fig.add_trace(go.Scatter(
        x=[province],
        y=[single_price],
        mode='markers',
        marker=dict(size=10,color=colors['Single House']),
        text=["Single House"],
        hovertemplate="%{text}",
        legendgroup='Single House',
        name='Single House',
        showlegend=province==affordable_house_2022.index.unique()[0]
    ))
    fig.add_trace(go.Scatter(
        x=[province],
        y=[semi_price],
        mode='markers',
        marker=dict(size=10, color=colors['Semi House']),
        text=["Semi House"],
        hovertemplate="%{text}",
        legendgroup='Semi House',
        name='Semi House',
        showlegend=province==affordable_house_2022.index.unique()[0]
    ))
    fig.add_trace(go.Scatter(
        x=[province],
        y=[row_price],
        mode='markers',
        marker=dict(size=10, color=colors['Row House']),
        text=["Row House"],
        hovertemplate="%{text}",
        legendgroup='Row House',
        name='Row House',
        showlegend=province==affordable_house_2022.index.unique()[0]
    ))
    fig.add_trace(go.Scatter(
        x=[province],
        y=[condo_price],
        mode='markers',
        marker=dict(size=10, color=colors['Apartment and other types']),
        text=["Apartment and other types"],
        hovertemplate="%{text}",
        legendgroup='Apartment and other types',
        name='Apartment and other types',
        showlegend=province==affordable_house_2022.index.unique()[0]
    ))


# Update the layout of the figure for Affordable Price 
fig.update_layout(
    title='Affordable Price vs. Housing Prices for Each Province 2022',
    xaxis=dict(title='Province'),
    yaxis=dict(title='Price'),
    hovermode='closest',
    height=700
)

# Show the plot
fig.show()


## Q6. Identify provinces in Canada where the user could potentially afford to buy a house, based on their personal financial situation.

In [None]:
#Filter out year 2022 data
affordable_house_2022=affordable_house_df.loc[2022]

In [None]:
def find_afforable_house():
    #get user inputs
    mortgage_rate=float(input("Enter your mortgage rate (e.g. for 5% enter 5):") or 5)
    mortgage_term=int(input("Enter your mortgage term/years (e.g. for 25 years enter 25):") or 25)
    down_payment_percent=float(input("Enter your down payment as percentage (e.g. for 20% enter 20):") or 20)
    income=int(input("Enter your income (e.g. for $50000 enter 50000):") or 50000)

    mortgage_rate=mortgage_rate/100
    down_payment_percent=down_payment_percent/100

    affordable_provinces=[]
    for province in affordable_house_2022.index.unique():
        for house_type in dwelling_types:
            house_price=affordable_house_2022[f'Weighted_Avg_Price_{house_type}'].values[0]

            principal = house_price * (1-down_payment_percent)
            loan = Loan(principal=principal, interest=mortgage_rate, term=mortgage_term)

            required_income=float(loan.monthly_payment)*12/0.3 #spend 30% on mortgage

            if income>=required_income:
                affordable_provinces.append((province,house_type))

    if affordable_provinces:
        print("You can afford a house in the following provinces:")
        for province,house_type in affordable_provinces:
            print(f"-{province} ({house_type})")
    else:
        print("Based on your input, You are too poor to afford a house in Canada.")

In [None]:
find_afforable_house()

## Q7. How have the real estate-related stocks performed over the past 4 year? What is the stock beta for selected real estate and construction companies, and how does it contribute to our understanding of their risk profiles and market performances?

In [None]:
# Reading REI.UN
REI_UN_csv = Path("Resources","REI.UN.csv")
REI_UN_df = pd.read_csv(REI_UN_csv, index_col="Date", parse_dates=True)
REI_UN_df.sort_index()
REI_UN_df.head(10)

In [None]:
# Calculate Daily Returns
REI_UN_returns =REI_UN_df['Close'].pct_change().dropna()

REI_UN_returns.name = 'RioCan Real Estate'

display(REI_UN_returns.head())

In [None]:
# Reading TCN.TO
TCN_TO_csv = Path("Resources","TCN.TO.csv")
TCN_TO_dF = pd.read_csv(TCN_TO_csv, index_col="Date", parse_dates=True)
TCN_TO_dF.sort_index()
TCN_TO_dF.head(10)

In [None]:
# Calculate Daily Returns
TCN_TO_returns =TCN_TO_dF['Close'].pct_change().dropna()
TCN_TO_returns.name = 'Tricon Residential'
display(TCN_TO_returns.head())

In [None]:
# Reading sp 500 tsx
sptsx_csv = Path("Resources","sptsx.csv")
sptsx_dF = pd.read_csv(sptsx_csv, index_col="Date", parse_dates=True)
sptsx_dF.sort_index()
sptsx_dF.head(10)

In [None]:
# Calculate Daily Returns
sptsx_return =sptsx_dF['Close'].pct_change().dropna()
sptsx_return.name = 'sp tsx'
display(sptsx_dF.head())

In [None]:
portfolio_returns = pd.concat([REI_UN_returns, TCN_TO_returns, sptsx_return], axis="columns", join="inner")
portfolio_returns = portfolio_returns.sort_index()
portfolio_returns.tail(10)

In [None]:
# Plot daily returns of all portfolios
portfolio_returns.plot()

In [None]:
# Calculate cumulative returns of all portfolios

cumulative_returns = (1 + portfolio_returns).cumprod()
# Plot cumulative returns
cumulative_returns.plot(title="Cumulative Returns of All Portfolios")

In [None]:
correlation = cumulative_returns.corr()
correlation

In [None]:
# Calculate the correlation
correlation_matrix = portfolio_returns.corr()

# Display the correlation matrix
sns.heatmap(correlation_matrix, vmin=-1, vmax=1)

In [None]:
# Calculate the daily standard deviations of all portfolios
portfolio_std = portfolio_returns.std()
portfolio_std

In [None]:
# Calculate the daily standard deviation of S&P TSX 60
sp_tsx_std = sptsx_return.std()

# Determine which portfolios are riskier than the S&P TSX 60
riskier_portfolios = {}

# Calculate the standard deviation of all portfolios
# Iterate over the portfolios
for portfolio, std_dev in portfolio_std.iteritems():
    # If the portfolio's standard deviation is greater than that of the S&P TSX 60
    if std_dev > sp_tsx_std:
        riskier_portfolios[portfolio] = std_dev

# Convert the dictionary to a DataFrame for better visualization
riskier_portfolios_df = pd.DataFrame(list(riskier_portfolios.items()), columns=['Portfolio', 'Std Dev'])

print(riskier_portfolios_df)

In [None]:
# Calculate the annualized standard deviation (252 trading days)
annualized_std = portfolio_std * np.sqrt(252)
annualized_std

In [None]:
# Calculate covariance of a single portfolio
rolling_covariance = portfolio_returns['Tricon Residential'].rolling(window=60).cov(sptsx_return)
# Calculate variance of S&P TSX

rolling_variance = sptsx_return.rolling(window=60).var()

# Computing beta
rolling_beta = rolling_covariance.mean() / rolling_variance

# Plot beta trend

rolling_beta.plot(title='Rolling Beta for Tricon Residential')


In [None]:
# Calculate covariance of a single portfolio
rolling_covariance = portfolio_returns['RioCan Real Estate'].rolling(window=60).cov(sptsx_return)

# Calculate variance of S&P TSX
rolling_variance = sptsx_return.rolling(window=60).var()

# Computing beta
rolling_beta = rolling_covariance.mean() / rolling_variance

# Plot beta trend
rolling_beta.plot(title='Rolling Beta for RioCan Real Estate')