In [None]:
# Initial imports
import os
import requests
import pandas as pd
import numpy as np
#from dotenv import load_dotenv
import hvplot.pandas
from pathlib import Path
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

## Reading our data into dataFrames
Data includes:
 * Latitude and Longitude of all Canada Provinces~ from [url](https://www.latlong.net/category/provinces-40-60.html).
 * Price range for houses in different provinces of Canada from excel file and avg household income per province using csv file.
 * Stock Prices data for *RioCan(REI-UN.TO)* Real Estate Investment Trust  & *Tricon Residential(TCN.TO)* a Canadian real estate company 

In [None]:
#get the latitude and longitude valuse for Canada provinces using url
lat_lon_df = pd.read_html("https://www.latlong.net/category/provinces-40-60.html")

In [None]:
#converting our list into dataFrame
lat_lon_df = pd.DataFrame(np.reshape(lat_lon_df, (10,3)), columns = ['Province', 'Latitude', 'Longitude'])

In [None]:
#remove the unwanted string from our columns
lat_lon_df['Province'] = lat_lon_df['Province'].str.replace(', Canada', '')

In [None]:
# display our latitude & longitude dataFrame
lat_lon_df

In [None]:
# Using the read_csv function and Path module 
# create a housing_data_df DataFrame by importing the 
#'absorbed-units-price-range-canada-provinces-for-all-dwelling-types-2019-2022-en.xlsx file' from the Resources folder
housing_data_df = pd.read_excel(Path("""Resources/absorbed-units-price-range-canada-provinces-for-all-dwelling-types-2019-2022-en.xlsx"""), 
                                header = 1,
                                sheet_name =None)

#create dataFrame and import average household income for each province across years data
avg_income_df = pd.read_csv(Path('Resources/avg_household_income_canada.csv'))
#create dataFrame and import RioCan and Tricon Residential stock data
rei_df = pd.read_csv(Path('Resources/REI.UN.csv'))
tcn_df = pd.read_csv(Path('Resources/TCN.TO.csv'))

In [None]:
#function to fethch the Province names from dictionary of DataFrames- housing_data_df 
def getProvince(housing_data_df):
    province = []
    for key in housing_data_df.keys():
        province.append(key)
         
    return province

In [None]:
canada_housing_data_df = pd.DataFrame()
for i in getProvince(housing_data_df):
    #housing_data_df[i].set_index(['Year','Province'], inplace = True)
    canada_housing_data_df = pd.concat([canada_housing_data_df,housing_data_df[i]], axis = 0)

canada_housing_data_df.set_index('Year', inplace = True)
display(canada_housing_data_df.head())
display(canada_housing_data_df.tail())

In [None]:
#relevant years for analysis
np.unique(canada_housing_data_df.index)

In [None]:
#Fill NaN valuse with zero
df1=canada_housing_data_df.fillna(value = 0)

In [None]:
#Drop rows where all Dwelling Type Columns{ Absorbed Single, Semi, row & other} contain zeros
canada_housing_data_df = canada_housing_data_df.loc[
    (canada_housing_data_df[['Absorbed - Single','Absorbed - Semi','Absorbed - Row','Absorbed - Apartment and other']] != 0)
    .any(axis = 1)
]

#find the average price range and drop columns Price Range low($) & Price Range high($)
canada_housing_data_df['Avg of Price Range']= canada_housing_data_df[['Price Range low($)', 'Price Range high($)']].mean(axis=1)
canada_housing_data_df.drop(['Price Range low($)', 'Price Range high($)'], axis = 1, inplace = True)


#disply sample data in the dataFrame
display(canada_housing_data_df.head())
display(canada_housing_data_df.tail())

In [None]:
'''
#merge lat_lon_df & canada_housing_data_df to get the lat & long valuse in our dataFrame
canada_housing_data_df = canada_housing_data_df.merge(lat_lon_df, on = 'Province', how = 'inner' )

#merge avg_income_df & canada_housing_data_df to get the avg income valuse in our final dataFrame
canada_housing_data_df = canada_housing_data_df.merge(avg_income_df, on = 'Province', how = 'inner' )
'''

In [None]:
#reorganize columns in canada_housing_data_df
canada_housing_data_df=canada_housing_data_df.reindex(sorted(canada_housing_data_df.columns, reverse = True), axis=1)

Questions:

Q1.trend across the year--> Province wise

Q2.which province suffered major impact-->  benefitted or sufered loss

Q3.most units sold and min units sold --> specific province and Year. --> the price range
relation between inflation rate, avg income, unit sold and avg house price
(2-d at a time)

Q4.stock correlation with housing price in Canada housing price in Canada

Q5. Market Demand by Price Range: Which price range has the most absorbed units, indicating the highest demand in the market? How does this vary across different provinces?

Q6. Affordability Analysis: Based on the median household income in each province, which price ranges are realistically affordable to most families?

Q7. Income Requirements for Home Ownership: Based on the absorbed units’ price range in each province, what is the estimated annual income required to afford housing? How does this compare across provinces, and how does it align with the actual median annual incomes in those provinces?

In [None]:
canada_housing_data_df.tail(5)

In [None]:
#calculate the total units sold in each provinces per year.
canada_housing_data_df['Total Absorbed Units']=canada_housing_data_df[["Absorbed - Single","Absorbed - Semi","Absorbed - Row","Absorbed - Apartment and other"]].sum(axis=1)
group_df=canada_housing_data_df.groupby(['Province','Year'])['Total Absorbed Units'].sum()
group_df.hvplot.bar()

In [None]:
#single_house_df=canada_housing_data_df[canada_housing_data_df['Absorbed - Single']!=0]
total_unit_single=canada_housing_data_df.groupby(['Province','Year'])["Absorbed - Single"].sum()
#total_unit_single.loc['Alberta',2019]

In [None]:
canada_housing_data_df['weighted price - single']=canada_housing_data_df['Avg of Price Range']*canada_housing_data_df['Absorbed - Single']
canada_housing_data_df.tail()

In [None]:
grouped = canada_housing_data_df.groupby(['Province', 'Year'])['weighted price - single'].sum()

In [None]:
grouped = pd.merge(grouped,total_unit_single,on=['Province', 'Year'])

In [None]:
grouped['Weighted Avg Price - Single'] = grouped['weighted price - single'] / grouped['Absorbed - Single']
grouped['Weighted Avg Price - Single'].hvplot(groupby='Province')

In [None]:
ontraio=grouped.loc['Ontario']
overlay = ontraio['Weighted Avg Price - Single'].hvplot.line()
for i,Province in enumerate(getProvince(housing_data_df)):
    if i==0:
        continue
    else:
        df=grouped.loc[Province]
        line_plot=df['Weighted Avg Price - Single'].hvplot.line()
        overlay*=line_plot

In [None]:
overlay
#additonal task: add legend for each province

In [None]:
#question2
grouped['% Change in Price']=grouped.groupby('Province')['Weighted Avg Price - Single'].pct_change()
grouped['% Change in Price'].idxmax()
grouped['% Change in Price'].idxmin()

In [99]:
#question3!