The GDP by industry measures provide an alternate dimension that supplements the income and expenditure-based GDP estimates, and constitute an extension of the Canadian System of Macroeconomic Accounts, Supply, Use and Input-Output tables.

https://www150.statcan.gc.ca/n1/en/dsbbcan

In [1]:
import pandas as pd
import numpy as np

# Data Wrangling

**1. Gross domestic product, expenditure-based, provincial and territorial, annual (x 1,000,000)**

Statistics Canada. Table 36-10-0222-01  Gross domestic product, expenditure-based, provincial and territorial, annual (x 1,000,000)\
https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=3610022201

In [2]:
expenditure_gdp = pd.read_csv('StatCan_GDP_ExpenditureBased.csv', skiprows=[i for i in range(0,6)])
expenditure_gdp.drop(expenditure_gdp.index[0], inplace=True)
expenditure_gdp['Geography'] = expenditure_gdp['Geography'].fillna(method='ffill')
expenditure_gdp = expenditure_gdp[:-7]
expenditure_gdp['Geography'] = expenditure_gdp['Geography'].replace(['Canada 1 (map)','Northwest Territories 2 (map)', 'Nunavut 2 (map)'],['Canada','Northwest Territories', 'Nunavut'])
expenditure_gdp.rename(columns={'Reference period': 'Year', 'Gross domestic product at market prices': 'Exp. based GDP (2012 chained C$)', 'Gross domestic product at market prices.1': 'Exp. based GDP (current C$)'}, inplace=True)
expenditure_gdp.reset_index(inplace=True)
expenditure_gdp.drop('index', axis=1, inplace=True)
expenditure_gdp = expenditure_gdp.replace(',','', regex=True)
convert_dict = {'Exp. based GDP (2012 chained C$)': float, 'Exp. based GDP (current C$)': float} 
expenditure_gdp = expenditure_gdp.astype(convert_dict) 
expenditure_gdp['Geography'] = expenditure_gdp['Geography'].str.strip()

**2. Gross domestic product, income-based, provincial and territorial, annual (x 1,000,000)**

Statistics Canada. Table 36-10-0221-01  Gross domestic product, income-based, provincial and territorial, annual (x 1,000,000)\
https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=3610022101

In [3]:
income_gdp = pd.read_csv('StatCan_GDP_IncomeBased.csv', skiprows=[i for i in range(0,4)])
income_gdp.drop(income_gdp.index[0], inplace=True)
income_gdp['Geography'] = income_gdp['Geography'].fillna(method='ffill')
income_gdp = income_gdp[:-9]
income_gdp['Geography'] = income_gdp['Geography'].replace(['Canada 1 (map)','Northwest Territories 3 (map)', 'Nunavut 3 (map)'],['Canada','Northwest Territories', 'Nunavut'])
indexNames = income_gdp[income_gdp['Geography'] == 'Northwest Territories including Nunavut 2'].index
income_gdp.drop(indexNames , inplace=True)
income_gdp.rename(columns={'Reference period': 'Year', 'Gross domestic product at market prices': 'Income based GDP'}, inplace=True)
income_gdp.reset_index(inplace=True)
income_gdp.drop('index', axis=1, inplace=True)
income_gdp = income_gdp.replace(',','', regex=True)
convert_dict = {'Income based GDP': float} 
income_gdp = income_gdp.astype(convert_dict) 
income_gdp['Geography'] = income_gdp['Geography'].str.strip()

**3. Gross domestic product, expenditure-based, provincial and territorial, annual (x 1,000,000)**

Statistics Canada. Table 36-10-0222-01  Gross domestic product, expenditure-based, provincial and territorial, annual (x 1,000,000)\
https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=3610022201

In [4]:
industry_gdp = pd.read_csv('StatCan_GDP_byIndustry.csv', skiprows=[i for i in range(0,4)])
industry_gdp.drop(industry_gdp.index[0], inplace=True)
industry_gdp['Geography'] = industry_gdp['Geography'].fillna(method='ffill')
industry_gdp['North American Industry Classification System (NAICS) 7 8'] = industry_gdp['North American Industry Classification System (NAICS) 7 8'].fillna(method='ffill')
industry_gdp['North American Industry Classification System (NAICS) 7 8'] = industry_gdp['North American Industry Classification System (NAICS) 7 8'].str.split('[').str[0]
industry_gdp = industry_gdp[:-28]
industry_gdp.rename(columns={'Reference period': 'Year', 'North American Industry Classification System (NAICS) 7 8': 'Industry', 'Current dollars': 'Industry GDP (current C$)', 'Chained (2012) dollars 9': 'Industry GDP (2012 chained C$)'}, inplace=True)
industry_gdp.reset_index(inplace=True)
industry_gdp.drop('index', axis=1, inplace=True)
industry_gdp = industry_gdp.replace(',','', regex=True)
industry_gdp.replace(to_replace ="..", value ="0", inplace= True) 
convert_dict = {'Industry GDP (current C$)': float, 'Industry GDP (2012 chained C$)': float} 
industry_gdp = industry_gdp.astype(convert_dict) 
industry_gdp['Year'] = industry_gdp['Year'].astype(str)
industry_gdp['Industry'] = industry_gdp['Industry'].astype(str).str.strip()
industry_gdp['Year'] = industry_gdp['Year'].str.split('.').str[0]
industry_gdp['Geography'] = industry_gdp['Geography'].str.strip()

In [5]:
industry_gdp1 = industry_gdp[industry_gdp["Industry"] == 'All industries']
industry_gdp1 = industry_gdp1.drop('Industry', axis=1)
industry_gdp1 = industry_gdp1.groupby('Year').sum().reset_index()
industry_gdp1['Geography'] = 'Canada'
industry_gdp1 = industry_gdp1[['Geography', 'Year', 'Industry GDP (current C$)', 'Industry GDP (2012 chained C$)']]
industry_gdp2 = industry_gdp[industry_gdp["Industry"] == 'All industries']
industry_gdp2 = industry_gdp2.drop('Industry', axis=1)
industry_gdp2 = pd.concat([industry_gdp1, industry_gdp2])

**4. Consumer Price Index, annual average, not seasonally adjusted**

Statistics Canada. Table 18-10-0005-01  Consumer Price Index, annual average, not seasonally adjusted\
https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=1810000501

In [6]:
#2002 = 100

CPI = pd.read_csv('StatCan_CPI.csv', skiprows=[i for i in range(0,4)])
CPI.drop(CPI.index[0], inplace=True)
CPI['Geography'] = CPI['Geography'].fillna(method='ffill')
CPI.rename(columns={'Reference period': 'Year', 'All-items': 'CPI (all items)', 'Gasoline': 'CPI (gasoline)'}, inplace=True)
CPI = CPI[:-12]
CPI['Geography'] = CPI['Geography'].replace(['Whitehorse, Yukon 5 (map)','Yellowknife, Northwest Territories 5 (map)', 'Iqaluit, Nunavut 6 (map)'],['Yukon','Northwest Territories', 'Nunavut'])
CPI.reset_index(inplace=True)
CPI.drop('index', axis=1, inplace=True)
CPI.replace(to_replace ="..", value ="0", inplace= True) 
convert_dict = {'CPI (all items)': float, 'CPI (gasoline)': float} 
CPI = CPI.astype(convert_dict) 

**5. Labour force characteristics by industry, annual**

Statistics Canada. Table 14-10-0023-01  Labour force characteristics by industry, annual (x 1,000)\
https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=1410002301

In [7]:
labour = pd.read_csv('StatCan_LabourForce.csv', skiprows=[i for i in range(0,5)])
labour.drop(labour.index[0], inplace=True)
labour.rename(columns={'Reference period': 'Year', 'Geography 2': 'Geography', 'North American Industry Classification System (NAICS) 3': 'Industry', 'Labour force 4': 'Labour', 'Employment 5': 'Employment (persons*1,000)', 'Unemployment 6': 'Unemployment', 'Unemployment rate 7': 'Unemployment rate'}, inplace=True)
labour['Geography'] = labour['Geography'].fillna(method='ffill')
labour['Industry'] = labour['Industry'].fillna(method='ffill')
labour['Sex'] = labour['Sex'].fillna(method='ffill')
labour['Age group'] = labour['Age group'].fillna(method='ffill')
labour = labour[:-17]
labour.drop('Sex', axis=1, inplace=True)
labour['Labour'] = labour['Labour'].str.split('(').str[0]
labour['Employment (persons*1,000)'] = labour['Employment (persons*1,000)'].str.split('(').str[0]
labour['Unemployment'] = labour['Unemployment'].str.split('(').str[0]
labour['Industry'] = labour['Industry'].replace(['Total, all industries 8','Goods-producing sector 9', 'Services-producing sector 10', 'Unclassified industries 11'],['All industries','Goods-producing sector', 'Services-producing sector', 'Unclassified industries'])
labour = labour.replace(',','', regex=True)
labour.replace(to_replace ="..", value ="0", inplace= True) 
convert_dict = {'Labour': float, 'Employment (persons*1,000)': float, 'Unemployment': float} 
labour = labour.astype(convert_dict) 
labour.reset_index(inplace=True)
labour.drop('index', axis=1, inplace=True)
labour['Year'] = labour['Year'].astype(str)
labour['Year'] = labour['Year'].str.split('.').str[0]
labour.drop('Unemployment rate', axis=1, inplace=True)
labour['Unemployment rate %'] = labour['Unemployment'] / labour['Labour']
labour.rename(columns={'Labour': 'Labour force (persons*1000)', 'Unemployment': 'Unemployment (persons*1,000)', 'Unemployment rate %': 'Unemployment rate'}, inplace=True)

In [8]:
labour1 = labour[labour["Industry"] == 'All industries']
labour1 = labour1.drop('Industry', axis=1)
labour1 = labour1.groupby(['Geography', 'Year']).sum().reset_index()

**6. Average weekly earnings by industry, annual**

Statistics Canada. Table 14-10-0204-01  Average weekly earnings by industry, annual\
https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=1410020401

In [9]:
earnings = pd.read_csv('StatCan_avgWeeklyEarnings.csv', skiprows=[i for i in range(0,7)])
earnings.drop(earnings.index[0], inplace=True)
earnings.rename(columns={'Reference period': 'Year', 'Industrial aggregate excluding unclassified businesses 6 7': 'All industries', 'Goods producing industries 8': 'Goods-producing sector', 'Service producing industries 9': 'Service-producing sector'}, inplace=True)
earnings['Geography'] = earnings['Geography'].fillna(method='ffill')
earnings = earnings[:-19]
earnings['Geography'] = earnings['Geography'].replace(['Northwest Territories 10 11 (map)','Nunavut 10 11 (map)'],['Northwest Territories', 'Nunavut'])
earnings = earnings.replace(',','', regex=True)
earnings['Goods-producing sector'].replace(to_replace ="F", value ="1431.82", inplace= True) 
convert_dict = {'All industries': float, 'Goods-producing sector': float, 'Service-producing sector': float} 
earnings = earnings.astype(convert_dict) 
earnings.reset_index(inplace=True)
earnings.drop('index', axis=1, inplace=True)

**7. Population estimates on July 1st, by age and sex**

Statistics Canada. Table 17-10-0005-01  Population estimates on July 1st, by age and sex\
https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=1710000501

In [10]:
population = pd.read_csv('StatCan_PopulationEstimates.csv', skiprows=[i for i in range(0,6)])
population.drop(earnings.index[0], inplace=True)
population['Geography'] = population['Geography'].fillna(method='ffill')
population.rename(columns={'Reference period': 'Year', 'All ages': 'Population'}, inplace=True)
population = population[:-13]
population['Geography'] = population['Geography'].replace(['Northwest Territories including Nunavut (Terminated) 6','Northwest Territories 7 (map)', 'Nunavut 7 (map)'],['nwtt','Northwest Territories', 'Nunavut'])
indexNames = population[population['Geography'] == 'nwtt'].index
population.drop(indexNames , inplace=True)
population = population.replace(',','', regex=True)
convert_dict = {'Population': float} 
population = population.astype(convert_dict) 

# Data Analysis

In [11]:
merged = expenditure_gdp.merge(income_gdp, on=['Geography', 'Year'])
merged = merged.merge(industry_gdp2, on=['Geography', 'Year'], how = 'outer')
merged = merged.merge(CPI, on=['Geography', 'Year'], how = 'outer')
merged = merged.merge(labour1, on=['Geography', 'Year'], how = 'outer')
merged = merged.merge(population, on=['Geography', 'Year'], how = 'outer')
merged['Exp. based GDP per capita (2012 chained C$)'] = merged['Exp. based GDP (2012 chained C$)'] / merged['Population'] * 1000000
merged['Exp. based GDP per capita (current C$)'] = merged['Exp. based GDP (current C$)'] / merged['Population'] * 1000000
merged['Income based GDP per capita'] = merged['Income based GDP'] / merged['Population'] * 1000000
merged['Industry GDP per capita (current C$)'] = merged['Industry GDP (current C$)'] / merged['Population'] * 1000000
merged['Industry GDP per capita (2012 chained C$)'] = merged['Industry GDP (2012 chained C$)'] / merged['Population'] * 1000000
Canada_economic_indicators = merged
Canada_economic_indicators.set_index(['Geography', 'Year'], inplace=True)
Canada_economic_indicators_pct = Canada_economic_indicators.pct_change().reset_index()
Canada_economic_indicators_pct.loc[Canada_economic_indicators_pct.Year == '2010', ['Exp. based GDP (2012 chained C$)', 'Exp. based GDP (current C$)', 'Income based GDP', 'Industry GDP (current C$)', 'Industry GDP (2012 chained C$)', 'CPI (all items)', 'CPI (gasoline)', 'Labour force (persons*1000)', 'Employment (persons*1,000)', 'Unemployment (persons*1,000)', 'Unemployment rate', 'Population', 'Exp. based GDP per capita (2012 chained C$)', 'Exp. based GDP per capita (current C$)', 'Income based GDP per capita', 'Industry GDP per capita (current C$)', 'Industry GDP per capita (2012 chained C$)']] = 0
Canada_economic_indicators = Canada_economic_indicators.reset_index()
industry_gdp.set_index(['Geography', 'Industry', 'Year'], inplace=True)
industry_gdp_pct = industry_gdp.pct_change().reset_index()
industry_gdp_pct.loc[industry_gdp_pct.Year == '2010', ['Industry GDP (current C$)', 'Industry GDP (2012 chained C$)']] = 0
industry_gdp = industry_gdp.reset_index()
labour.set_index(['Geography', 'Industry', 'Age group', 'Year'], inplace=True)
labour_pct = labour.pct_change().reset_index()
labour_pct.loc[labour_pct.Year == '2010', ['Labour force (persons*1000)', 'Employment (persons*1,000)', 'Unemployment (persons*1,000)', 'Unemployment rate']] = 0
labour = labour.reset_index()
earnings.set_index(['Geography', 'Year'], inplace=True)
earnings_pct = earnings.pct_change().reset_index()
earnings_pct.loc[earnings_pct.Year == '2010', ['All industries', 'Goods-producing sector', 'Service-producing sector']] = 0
earnings = earnings.reset_index()

Canada_economic_indicators.head()

Unnamed: 0,Geography,Year,Exp. based GDP (2012 chained C$),Exp. based GDP (current C$),Income based GDP,Industry GDP (current C$),Industry GDP (2012 chained C$),CPI (all items),CPI (gasoline),Labour force (persons*1000),"Employment (persons*1,000)","Unemployment (persons*1,000)",Unemployment rate,Population,Exp. based GDP per capita (2012 chained C$),Exp. based GDP per capita (current C$),Income based GDP per capita,Industry GDP per capita (current C$),Industry GDP per capita (2012 chained C$)
0,Canada,2010,1740814.0,1666048.0,1666048.0,1558298.2,1625807.8,116.5,148.2,36800.0,33814.6,2985.3,0.361703,34004889.0,51193.050505,48994.366663,48994.366663,45825.710532,47811.00153
1,Canada,2011,1795582.0,1774063.0,1774063.0,1661609.2,1678548.5,119.9,177.8,37149.4,34343.4,2806.0,0.342738,34339328.0,52289.375028,51662.717453,51662.717453,48387.935838,48881.227379
2,Canada,2012,1827201.0,1827201.0,1827201.0,1709695.5,1709692.7,121.7,182.3,37474.9,34725.8,2749.0,0.336109,34714222.0,52635.51636,52635.51636,52635.51636,49250.578048,49250.49739
3,Canada,2013,1869759.0,1902247.0,1902247.0,1781206.7,1753295.5,122.8,183.4,37865.2,35170.2,2695.0,0.32621,35082954.0,53295.369597,54221.403363,54221.403363,50771.286249,49975.709001
4,Canada,2014,1923422.0,1994898.0,1994898.0,1867032.1,1803539.5,125.2,183.8,37995.0,35354.3,2640.7,0.318431,35437435.0,54276.558109,56293.521244,56293.521244,52685.305807,50893.624214


In [12]:
Canada_economic_indicators_pct.head()

Unnamed: 0,Geography,Year,Exp. based GDP (2012 chained C$),Exp. based GDP (current C$),Income based GDP,Industry GDP (current C$),Industry GDP (2012 chained C$),CPI (all items),CPI (gasoline),Labour force (persons*1000),"Employment (persons*1,000)","Unemployment (persons*1,000)",Unemployment rate,Population,Exp. based GDP per capita (2012 chained C$),Exp. based GDP per capita (current C$),Income based GDP per capita,Industry GDP per capita (current C$),Industry GDP per capita (2012 chained C$)
0,Canada,2010,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Canada,2011,0.031461,0.064833,0.064833,0.066297,0.03244,0.029185,0.19973,0.009495,0.015638,-0.060061,-0.052433,0.009835,0.021415,0.054462,0.054462,0.055912,0.022385
2,Canada,2012,0.017609,0.029953,0.029953,0.02894,0.018554,0.015013,0.025309,0.008762,0.011135,-0.020314,-0.019341,0.010917,0.00662,0.01883,0.01883,0.017828,0.007554
3,Canada,2013,0.023291,0.041072,0.041072,0.041827,0.025503,0.009039,0.006034,0.010415,0.012797,-0.019644,-0.029452,0.010622,0.012536,0.03013,0.03013,0.030877,0.014725
4,Canada,2014,0.0287,0.048706,0.048706,0.048184,0.028657,0.019544,0.002181,0.003428,0.005235,-0.020148,-0.023845,0.010104,0.01841,0.038216,0.038216,0.037699,0.018367


In [13]:
industry_gdp.head()

Unnamed: 0,Geography,Industry,Year,Industry GDP (current C$),Industry GDP (2012 chained C$)
0,Newfoundland and Labrador,All industries,2010,27183.0,30474.4
1,Newfoundland and Labrador,All industries,2011,31614.0,31375.5
2,Newfoundland and Labrador,All industries,2012,29977.1,29977.2
3,Newfoundland and Labrador,All industries,2013,32403.8,31486.3
4,Newfoundland and Labrador,All industries,2014,32136.7,31143.3


In [14]:
industry_gdp_pct.head()

Unnamed: 0,Geography,Industry,Year,Industry GDP (current C$),Industry GDP (2012 chained C$)
0,Newfoundland and Labrador,All industries,2010,0.0,0.0
1,Newfoundland and Labrador,All industries,2011,0.163006,0.029569
2,Newfoundland and Labrador,All industries,2012,-0.051778,-0.044567
3,Newfoundland and Labrador,All industries,2013,0.080952,0.050342
4,Newfoundland and Labrador,All industries,2014,-0.008243,-0.010894


In [15]:
labour.head()

Unnamed: 0,Geography,Industry,Age group,Year,Labour force (persons*1000),"Employment (persons*1,000)","Unemployment (persons*1,000)",Unemployment rate
0,Canada,All industries,15 years and over,2010,18400.0,16907.3,1492.7,0.081125
1,Canada,All industries,15 years and over,2011,18574.7,17171.7,1403.0,0.075533
2,Canada,All industries,15 years and over,2012,18737.4,17362.9,1374.5,0.073356
3,Canada,All industries,15 years and over,2013,18932.6,17585.1,1347.5,0.071174
4,Canada,All industries,15 years and over,2014,18997.5,17677.2,1320.3,0.069499


In [16]:
labour_pct.head()

Unnamed: 0,Geography,Industry,Age group,Year,Labour force (persons*1000),"Employment (persons*1,000)","Unemployment (persons*1,000)",Unemployment rate
0,Canada,All industries,15 years and over,2010,0.0,0.0,0.0,0.0
1,Canada,All industries,15 years and over,2011,0.009495,0.015638,-0.060092,-0.068933
2,Canada,All industries,15 years and over,2012,0.008759,0.011135,-0.020314,-0.02882
3,Canada,All industries,15 years and over,2013,0.010418,0.012797,-0.019644,-0.029751
4,Canada,All industries,15 years and over,2014,0.003428,0.005237,-0.020186,-0.023533


In [17]:
earnings.head()

Unnamed: 0,Geography,Year,All industries,Goods-producing sector,Service-producing sector
0,Canada,2010,852.44,1072.67,803.01
1,Canada,2011,873.64,1102.8,821.37
2,Canada,2012,895.57,1143.25,838.56
3,Canada,2013,911.44,1173.32,851.38
4,Canada,2014,935.52,1217.28,871.28


In [18]:
earnings_pct.head()

Unnamed: 0,Geography,Year,All industries,Goods-producing sector,Service-producing sector
0,Canada,2010,0.0,0.0,0.0
1,Canada,2011,0.02487,0.028089,0.022864
2,Canada,2012,0.025102,0.036679,0.020928
3,Canada,2013,0.017721,0.026302,0.015288
4,Canada,2014,0.02642,0.037466,0.023374


In [21]:
Canada_economic_indicators.to_csv(r'Canada_economic_indicators.csv', index=False)

In [22]:
Canada_economic_indicators_pct.to_csv(r'Canada_economic_indicators_pct.csv', index=False)
industry_gdp.to_csv(r'industry_gdp.csv', index=False)
industry_gdp_pct.to_csv(r'industry_gdp_pct.csv', index=False)
labour.to_csv(r'labour.csv', index=False)
labour_pct.to_csv(r'labour_pct.csv', index=False)
earnings.to_csv(r'earnings.csv', index=False)
earnings_pct.to_csv(r'earnings_pct.csv', index=False)