In [170]:
import pandas as pd
import altair as alt
import numpy as np
from altair import datum

alt.data_transformers.disable_max_rows()
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

# 1. Loading the dataset

In [171]:
gdp = pd.read_csv('GDP.csv', skiprows=[i for i in range(0,4)])

# 2. Wrangling the dataset

In [172]:
# Handling null values and deleting non usable information.
gdp.drop(gdp.index[0], inplace=True)
gdp['Geography'] = gdp['Geography'].fillna(method='ffill')
gdp['North American Industry Classification System (NAICS) 7 8'] = gdp['North American Industry Classification System (NAICS) 7 8'].fillna(method='ffill')
gdp.reset_index(inplace=True)
gdp.drop('index', axis=1, inplace=True)
gdp = gdp[:-27]
gdp = gdp.replace(',','', regex=True)
gdp.replace(to_replace ="..", value ="0", inplace= True) 

# Indicating the correct data types
gdp['North American Industry Classification System (NAICS) 7 8'] = gdp['North American Industry Classification System (NAICS) 7 8'].str.split('[').str[0]
gdp['Geography'] = gdp['Geography'].astype('string')
gdp['North American Industry Classification System (NAICS) 7 8'] = gdp['North American Industry Classification System (NAICS) 7 8'].astype('string')
gdp['Reference period'] = pd.to_numeric(gdp['Reference period'])
gdp['Chained (2012) dollars 9'] = pd.to_numeric(gdp['Chained (2012) dollars 9'])

# Renaming the columns
gdp.rename(columns={"North American Industry Classification System (NAICS) 7 8": "Industry", "Reference period": "Year", "Chained (2012) dollars 9": "GDP"}, inplace=True)

# Adding the growth rates columns
gdp['Percent Change'] = gdp.GDP.pct_change()
gdp.loc[gdp.Year == 1997, "Percent Change"] = 0

# 3. Dropdown menu items

In [173]:
year = 2019
Geography = 'Newfoundland and Labrador'

# 4. Visualizations

In [174]:
bar = alt.Chart(gdp, title="Total GDP ").mark_bar(size=80).transform_aggregate(
        groupby =['Geography', 'Year'], GDP='sum(GDP)').encode(
            x=alt.X('sum(GDP):Q', title='GDP (dollars x 1,000,000)', axis=alt.Axis(grid=False, ticks=False, labels=False, labelFontSize=10)), 
            y=alt.Y('Geography:O', sort='-x', title=None, axis=alt.Axis(grid=False, labelFontSize=20)),
            tooltip=[alt.Tooltip('sum(GDP):Q', format=('$,.2f'), title='Total GDP $')]).transform_filter(
                alt.FieldEqualPredicate(field='Geography', equal= Geography)).transform_filter(
                alt.FieldEqualPredicate(field='Year', equal= year)).properties(height=200, width=400)

total_gdp = bar.mark_text(dx=-175, color='navy', size=60).encode(text=alt.Text('sum(GDP):Q', format=('$,.2f'))).configure_view(strokeOpacity=0)

total_gdp

In [175]:
historical_gdp = alt.Chart(gdp, title="GDP Historical Evolution").mark_line(point=alt.OverlayMarkDef(filled=False, fill='navy'), size=5).encode(
                    x=alt.X('Year', axis=alt.Axis(grid=False, ticks=False, format='Y', labelFontSize=10), title='Year'),
                    y=alt.Y('sum(GDP):Q', axis=alt.Axis(grid=False, ticks=False, format=('$,f'), labelFontSize=10), title='GDP (dollars x 1,000,000)'),
                    tooltip = [alt.Tooltip('Year'), alt.Tooltip('sum(GDP):Q', format=('$,.2f'), title='Total GDP $')]).transform_filter(
                        alt.FieldEqualPredicate(field='Geography', equal= Geography)).transform_filter(
                        alt.FieldRangePredicate('Year',[1997,year])).configure_view(strokeOpacity=0).properties(height=200, width=400)

historical_gdp

In [176]:
industry_gdp = alt.Chart(gdp, title="GDP Industry Contribution").mark_bar().encode(
                x=alt.X('sum(GDP):Q', title='GDP (dollars x 1,000,000)', axis=alt.Axis(format='$,f', labelFontSize=10)), 
                y=alt.Y('Industry:O', sort='-x'),
                tooltip=[alt.Tooltip('Industry'), alt.Tooltip('sum(GDP):Q', format=('$,.2f'), title='Total GDP $')]).transform_filter(
                    alt.FieldEqualPredicate(field='Geography', equal= Geography)).transform_filter(
                    alt.FieldEqualPredicate(field='Year', equal= year))

industry_gdp

In [180]:
# The idea with this visualization is to recreate it as it is coded here but on a map of Canada

heatmap= alt.Chart(gdp).mark_rect().encode(
            x=alt.X('Geography:O', sort='-color', title=None, axis=alt.Axis(labelFontSize=10, labelAngle=-90)),
            color=alt.Color('sum(GDP)', title='Total GDP'),
            tooltip=[alt.Tooltip('Geography'), alt.Tooltip('sum(GDP):Q', format=('$,.2f'), title='Total GDP $')]).transform_filter(
                alt.FieldEqualPredicate(field='Year', equal= year)).properties(height=400, width=800)

heatmap

In [181]:
# Work in progrees for making the map visualization

source = alt.topo_feature("https://gist.githubusercontent.com/Brideau/2391df60938462571ca9/raw/f5a1f3b47ff671eaf2fb7e7b798bacfc6962606a/canadaprovtopo.json", "canadaprov")

alt.Chart(source).mark_geoshape(stroke='white')

In [182]:
# Work in progrees for making the map visualization

alt.Chart(source).mark_geoshape(stroke='white').encode(
    color=alt.Color('sum(GDP):Q')).transform_lookup(
        lookup='Geography',
        from_=alt.LookupData(gdp, 'Geography', ['GDP'])).properties(width=700,height=450) 