In [None]:
# Initial imports
import os
import requests
import pandas as pd
import numpy as np
#from dotenv import load_dotenv
import hvplot.pandas
from pathlib import Path
import seaborn as sns
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

## Reading our data into dataFrames
Data includes:
 * Latitude and Longitude of all Canada Provinces~ from [url](https://www.latlong.net/category/provinces-40-60.html).
 * Price range for houses in different provinces of Canada from excel file and avg household income per province using csv file.
 * Stock Prices data for *RioCan(REI-UN.TO)* Real Estate Investment Trust  & *Tricon Residential(TCN.TO)* a Canadian real estate company 

In [None]:
#get the latitude and longitude valuse for Canada provinces using url
lat_lon_df = pd.read_html("https://www.latlong.net/category/provinces-40-60.html")

In [None]:
#converting our list into dataFrame
lat_lon_df = pd.DataFrame(np.reshape(lat_lon_df, (10,3)), columns = ['Province', 'Latitude', 'Longitude'])
#remove the unwanted string from our columns
lat_lon_df['Province'] = lat_lon_df['Province'].str.replace(', Canada', '')
#display our latitude & longitude dataFrame
lat_lon_df

In [None]:
# Using the read_csv function and Path module 
# create a absorbed_unit_df DataFrame by importing the 
#'absorbed-units-price-range-canada-provinces-for-all-dwelling-types-2019-2022-en.xlsx file' from the Resources folder
absorbed_unit_df = pd.read_excel(Path("""Resources/absorbed-units-price-range-canada-provinces-for-all-dwelling-types-2019-2022-en.xlsx"""), 
                                header = 1,
                                sheet_name =None)

#create dataFrame and import average household income for each province across years data
avg_income_df = pd.read_csv(Path('Resources/avg_household_income_canada.csv'))

In [None]:
#function to fethch the Province names from dictionary of DataFrames-absorbed_unit_df 
def getProvince(absorbed_unit_df):
    province = []
    for key in absorbed_unit_df.keys():
        province.append(key)
    return province

In [None]:
canada_housing_data_df = pd.DataFrame()
for i in getProvince(absorbed_unit_df):
    #absorbed_unit_df[i].set_index(['Year','Province'], inplace = True)
    canada_housing_data_df = pd.concat([canada_housing_data_df,absorbed_unit_df[i]], axis = 0)

canada_housing_data_df.set_index('Year', inplace = True)
display(canada_housing_data_df.head())
display(canada_housing_data_df.tail())

In [None]:
#relevant years for analysis
np.unique(canada_housing_data_df.index)

In [None]:
#Fill NaN valuse with zero
df1=canada_housing_data_df.fillna(value = 0)

In [None]:
#Drop rows where all Dwelling Type Columns{ Absorbed Single, Semi, row & other} contain zeros
canada_housing_data_df = canada_housing_data_df.loc[
    (canada_housing_data_df[['Absorbed - Single','Absorbed - Semi','Absorbed - Row','Absorbed - Apartment and other']] != 0)
    .any(axis = 1)
]

#find the average price range and drop columns Price Range low($) & Price Range high($)
canada_housing_data_df['Avg of Price Range']= canada_housing_data_df[['Price Range low($)', 'Price Range high($)']].mean(axis=1)
canada_housing_data_df.drop(['Price Range low($)', 'Price Range high($)'], axis = 1, inplace = True)


#disply sample data in the dataFrame
display(canada_housing_data_df.head())
display(canada_housing_data_df.tail())

In [None]:
'''
#merge lat_lon_df & canada_housing_data_df to get the lat & long valuse in our dataFrame
canada_housing_data_df = canada_housing_data_df.merge(lat_lon_df, on = 'Province', how = 'inner' )

#merge avg_income_df & canada_housing_data_df to get the avg income valuse in our final dataFrame
canada_housing_data_df = canada_housing_data_df.merge(avg_income_df, on = 'Province', how = 'inner' )
'''

In [None]:
#reorganize columns in canada_housing_data_df
canada_housing_data_df=canada_housing_data_df.reindex(sorted(canada_housing_data_df.columns, reverse = True), axis=1)

Questions:

Q1.trend across the year--> Province wise

Q2.which province suffered major impact-->  benefitted or sufered loss

Q3.most units sold and min units sold --> specific province and Year. --> the price range


Q4.stock beta for real estate companies.

Q5. Market Demand by Price Range: Which price range has the most absorbed units, indicating the highest demand in the market? How does this vary across different provinces?

Q6. Affordability Analysis: Based on the median household income in each province, which price ranges are realistically affordable to most families?

Q7. Income Requirements for Home Ownership: Based on the absorbed units’ price range in each province, what is the estimated annual income required to afford housing? How does this compare across provinces, and how does it align with the actual median annual incomes in those provinces?

In [None]:
# Rename the columns
new_column_names = {
    'Absorbed - Single': 'Single',
    'Absorbed - Semi': 'Semi',
    'Absorbed - Row': 'Row',
    'Absorbed - Apartment and other': 'Apartment and other',
}

canada_housing_data_df = canada_housing_data_df.rename(columns=new_column_names)
canada_housing_data_df.tail(5)

In [None]:
#calculate the total units sold in each provinces per year.
canada_housing_data_df['Total Absorbed Units']=canada_housing_data_df[["Single","Semi","Row","Apartment and other"]].sum(axis=1)
sum_of_sales=canada_housing_data_df.groupby(['Province','Year'])['Total Absorbed Units'].sum()
#sum_of_sales.hvplot.bar()

#Comment from ZHU:
#The purpose of this secetion of code is to show the overall volume in housing market.
#I dont think this graph is infomative enough. since the original data set is Absorbed Units, which is the newly built properties.
#Some provinces may have higher volume in terms of pre-owned houses. There may be provinces that don't build as many new houses.

weighted price for single house =

(# of unit from 1st price range in 2019/total unit in 2019) * midpoint price of 1st price range +

(# of unit from 2nd price range in 2019/total unit in 2019) * midpoint price of 2nd price range +

...

In [None]:
# Define dwelling types
dwelling_types = ['Single', 'Semi', 'Row', 'Apartment and other']

# Initialize an empty DataFrame
housing_price_df = pd.DataFrame()

# For Loop for each dwelling type
for dwelling_type in dwelling_types:
    # Calculate weighted price for each dwelling type
    canada_housing_data_df[f'weighted_price_{dwelling_type}'] = canada_housing_data_df[f'{dwelling_type}'] * canada_housing_data_df['Avg of Price Range']
    
    # Group by Province and Year and sum up the weighted price
    weighted_price_grouped = canada_housing_data_df.groupby(['Province', 'Year'])[f'weighted_price_{dwelling_type}'].sum()

    # Group by Province and Year and sum up the absorbed units
    absorbed_units_grouped = canada_housing_data_df.groupby(['Province', 'Year'])[f'{dwelling_type}'].sum()

    # Merge the two series into a DataFrame
    merged = pd.DataFrame(weighted_price_grouped)
    merged[f'{dwelling_type}'] = absorbed_units_grouped

    # Calculate the weighted average price
    merged[f'Weighted_Avg_Price_{dwelling_type}'] = merged[f'weighted_price_{dwelling_type}'] / merged[f'{dwelling_type}']

    # Drop the intermediate columns
    merged.drop([f'weighted_price_{dwelling_type}', f'{dwelling_type}'], axis=1, inplace=True)

    # Merge the result into the housing_price_df DataFrame
    if housing_price_df.empty:
        housing_price_df = merged
    else:
        housing_price_df = pd.merge(housing_price_df, merged, on=['Province', 'Year'])

housing_price_df = housing_price_df.reset_index()

In [None]:
housing_price_df.tail()

In [None]:
# Define a function to plot trend for each dwelling type
def plot_trend(dwelling_type):   
    # The melt function is used to convert the DataFrame from wide format to long format.
    plot_df = pd.melt(housing_price_df, id_vars=['Year', 'Province'], value_vars=[f'Weighted_Avg_Price_{dwelling_type}'])
    
    # Create a line plot of the WAP over years for each province using the hvplot library
    trend_plot=plot_df.hvplot.line('Year', 'value', by='Province',width=800).opts(yformatter='%.0f',title=f'Price trend for {dwelling_type}')
    
    return trend_plot

In [None]:
# Reset the index of the housing_price_df dataframe
housing_price_df.reset_index(inplace=True)

#Plot Price Trend for all the dwelling type
plot_trend('Single')+plot_trend('Semi')+plot_trend('Row')+plot_trend('Apartment and other')

In [None]:
#Question2
#we are using Single house price as reference for return on housing market. 
return_house_df=housing_price_df[['Year', 'Province','Weighted_Avg_Price_Single']].copy()
return_house_df.set_index(['Year', 'Province'], inplace=True)
return_house_df['Annual Return %']=return_house_df.groupby('Province')['Weighted_Avg_Price_Single'].pct_change()
return_house_df['Annual Return %']=return_house_df['Annual Return %']*100

return_house_df.tail()

In [None]:
CGR = ((return_house_df.loc[2022]['Weighted_Avg_Price_Single'] / return_house_df.loc[2019]['Weighted_Avg_Price_Single']) - 1)*100
CGR_df = pd.DataFrame(CGR).rename(columns={"Weighted_Avg_Price_Single": "Cumulative Return %"})
CGR_df.hvplot.bar(rot=30).opts(title="Cumulative growth for each province in the past 4 years")

In [None]:
return_house_df['Annual Return %'].dropna().hvplot.bar(groupby='Year',rot=30,height=400)

In [None]:
#question3
#most units sold and min units sold --> specific province and Year. --> the price range
#Most popular type of house and price range in each province in each year
sales_house_df = canada_housing_data_df.reset_index()

sales_house_df = sales_house_df.melt(id_vars=['Year','Province', 'Avg of Price Range'], 
                                          value_vars=['Single', 'Semi', 'Row', 'Apartment and other'], 
                                          var_name='House Type', 
                                          value_name='Units Sold')

In [None]:
grouped_sales_house_df=sales_house_df.groupby(['Province', 'Year', 'House Type'])['Units Sold'].sum().reset_index()
grouped_sales_house_df.tail()

In [None]:
idx = grouped_sales_house_df.groupby(['Province', 'Year'])['Units Sold'].idxmax()
most_popular_type_df=grouped_sales_house_df.loc[idx]
most_popular_type_df
#visualization

### Q4

In [None]:
# Reading REI.UN
REI_UN_csv = Path("Resources","REI.UN.csv")
REI_UN_df = pd.read_csv(REI_UN_csv, index_col="Date", parse_dates=True)
REI_UN_df.sort_index()
REI_UN_df.head(10)

In [None]:
# Calculate Daily Returns
REI_UN_returns =REI_UN_df['Close'].pct_change().dropna()

REI_UN_returns.name = 'RioCan Real Estate'

display(REI_UN_returns.head())


In [None]:
# Reading TCN.TO
TCN_TO_csv = Path("Resources","TCN.TO.csv")
TCN_TO_dF = pd.read_csv(TCN_TO_csv, index_col="Date", parse_dates=True)
TCN_TO_dF.sort_index()
TCN_TO_dF.head(10)

In [None]:
# Calculate Daily Returns
TCN_TO_returns =TCN_TO_dF['Close'].pct_change().dropna()
TCN_TO_returns.name = 'Tricon Residential'
display(TCN_TO_returns.head())


In [None]:
# Reading sp 500 tsx
sptsx_csv = Path("Resources","sptsx.csv")
sptsx_dF = pd.read_csv(sptsx_csv, index_col="Date", parse_dates=True)
sptsx_dF.sort_index()
sptsx_dF.head(10)

In [None]:
# Calculate Daily Returns
sptsx_return =sptsx_dF['Close'].pct_change().dropna()
sptsx_return.name = 'sp tsx'
display(sptsx_dF.head())

In [None]:
portfolio_returns = pd.concat([REI_UN_returns, TCN_TO_returns, sptsx_return], axis="columns", join="inner")
portfolio_returns = portfolio_returns.sort_index()
portfolio_returns.tail(10)

In [None]:
# Plot daily returns of all portfolios
portfolio_returns.plot()

In [None]:
# Calculate cumulative returns of all portfolios

cumulative_returns = (1 + portfolio_returns).cumprod()
# Plot cumulative returns
cumulative_returns.plot(title="Cumulative Returns of All Portfolios")

In [None]:
correlation = cumulative_returns.corr()
correlation

In [None]:
# Calculate the correlation
correlation_matrix = portfolio_returns.corr()

# Display de correlation matrix
sns.heatmap(correlation_matrix, vmin=-1, vmax=1)

In [None]:
# Calculate the daily standard deviations of all portfolios
portfolio_std = portfolio_returns.std()
portfolio_std

In [None]:
# Calculate the daily standard deviation of S&P TSX 60
sp_tsx_std = sptsx_return.std()

# Determine which portfolios are riskier than the S&P TSX 60
riskier_portfolios = {}

# Calculate the standard deviation of all portfolios
# Iterate over the portfolios
for portfolio, std_dev in portfolio_std.iteritems():
    # If the portfolio's standard deviation is greater than that of the S&P TSX 60
    if std_dev > sp_tsx_std:
        riskier_portfolios[portfolio] = std_dev

# Convert the dictionary to a DataFrame for better visualization
riskier_portfolios_df = pd.DataFrame(list(riskier_portfolios.items()), columns=['Portfolio', 'Std Dev'])

print(riskier_portfolios_df)

In [None]:
# Calculate the annualized standard deviation (252 trading days)
annualized_std = portfolio_std * np.sqrt(252)
annualized_std

In [None]:
# Calculate covariance of a single portfolio
rolling_covariance = portfolio_returns['Tricon Residential'].rolling(window=60).cov(sptsx_return)
# Calculate variance of S&P TSX

rolling_variance = sptsx_return.rolling(window=60).var()

# Computing beta
rolling_beta = rolling_covariance.mean() / rolling_variance

# Plot beta trend

rolling_beta.plot(title='Rolling Beta for Tricon Residential')



In [None]:
# Calculate covariance of a single portfolio
rolling_covariance = portfolio_returns['RioCan Real Estate'].rolling(window=60).cov(sptsx_return)
# Calculate variance of S&P TSX

rolling_variance = sptsx_return.rolling(window=60).var()

# Computing beta
rolling_beta = rolling_covariance.mean() / rolling_variance

# Plot beta trend

rolling_beta.plot(title='Rolling Beta for RioCan Real Estate')



Q6. Based on the median household income in each province, which price ranges are realistically affordable to most families?

Q7. Income Requirements for Home Ownership: Based on the absorbed units’ price range in each province, what is the estimated annual income required to afford housing? How does this compare across provinces, and how does it align with the actual median annual incomes in those provinces?

In [None]:
def cal_affordable_price(row):
    return row['Avg Income']*0.3*25

avg_income_df['Affordable Price']=avg_income_df.apply(cal_affordable_price,axis=1)
avg_income_df=avg_income_df.set_index(['Year','Province'])


In [None]:
housing_price_df=housing_price_df.set_index(['Year','Province'])


In [None]:
affordable_house_df=pd.concat([housing_price_df,avg_income_df],join='inner',axis=1)

In [None]:
affordable_house_df=affordable_house_df.reset_index()

In [None]:
affordable_house_df.sample(5)

In [None]:
for n in range(len(affordable_house_df['Affordable Price'])):
    aff_price=affordable_house_df['Affordable Price'][n]
    condo_price=affordable_house_df['Weighted_Avg_Price_Apartment and other'][n]
    row_price=affordable_house_df['Weighted_Avg_Price_Row'][n]
    semi_price=affordable_house_df['Weighted_Avg_Price_Semi'][n]
    single_price=affordable_house_df['Weighted_Avg_Price_Single'][n]
    if (aff_price<condo_price):
        
        if (aff_price<row_price):
            
            if (aff_price<semi_price):
    else:
        print('not affordable')


In [None]:
affordable_house_df