In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
sys.path.append(f'{project_path}/ft_events/src/utils')

from google.cloud import bigquery
from google.cloud import storage

import importlib

import numpy as np
import pandas as pd
from plotly import graph_objs as go
import seaborn as sns
import geopandas as gpd

import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import plotly.express as px

from fintrans_toolbox.src import table_utils as t
from fintrans_toolbox.src import bq_utils as bq


client = bigquery.Client()

In [None]:
def add_dec_markers(fig):
    for n in [19, 20, 21, 22, 23, 24]:
        if n == 24:
            colour = 'lightgrey'
        else: 
            colour = 'gainsboro'
        for start_date, end_date in [(f"20{n}-11-20", f"20{n}-12-01")]:
            fig.add_shape(
                type="rect",
                xref="x",
                yref="paper",
                x0=start_date,
                y0=0,
                x1=end_date,
                y1=1,
                fillcolor=colour,
                opacity=0.3,
                layer="below",
                line_width=0,
            )

    return fig

#### Geographies available:

Postal area (UK: 124)

Postal district (UK: 3,118)

Imputed local authority district/OSLAUA (UK: ~360 )

Imputed region/rgn 

Imputed 2021 census middle layer super output/MSOA21 (No Scotland, 6,856 MSOAs in England and 408 in Wales)

Imputed 2021 census lower layer super output/LSOA21 (No Scotland,  33,755 LSOAs in England and 1,917 in Wales)

#### How to get imputed/geography converted data:

Follow instructions in the 'Running the Pipeline' page of the ft_impute_missing_data Wiki.

The datasets used in this script were run with MCG='All', CARD_MERCH = 'merchant', and GEOG = 'oslaua'/'msoa21'. 

The imputation pipeline is to be run once to get the imputed visa dataset. The geography conversion pipeline is to be run individually for each geography (oslaua/msoa).



#### Comparing LAD/MSOA/raw district/imputed district spend for Holyhead analysis

In [None]:
# load imputed and converted OSLAUA/LAD data
lad_visa = pd.read_parquet(
    f'{project_path}/ft_impute_missing_data/data/converted_geography_visa_data/visa_oslaua_merchant_All.parquet'
)
lad_code = 'W06000001'


# load imputed and converted MSOA21 data
msoa_visa = pd.read_parquet(
    f'{project_path}/ft_impute_missing_data/data/converted_geography_visa_data/visa_msoa21_merchant_All.parquet'
)
msoa_code = 'W02000003'


# load imputed visa data
xgboost_visa = pd.read_parquet(
    f'{project_path}/ft_impute_missing_data/data/imputed_visa_data/final_imputed_visa_merchant_All.parquet'
)


# load raw visa data
client = bigquery.Client()

sql = f"""SELECT time_period_value, merchant_location, spend, transactions, cardholders
  FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
  WHERE time_period = 'Month' AND 
  merchant_location_level = 'POSTAL_DISTRICT' AND
  cardholder_issuing_level = 'All' AND
  mcg = 'All' AND
  merchant_location = 'LL65'
  ORDER BY time_period_value, merchant_location
  """

raw_visa = client.query(sql).to_dataframe()
raw_visa = t.create_date_time(raw_visa)


In [None]:
# Filter to retain just LL65/port data
lad_port = lad_visa[lad_visa['oslaua'] == lad_code].reset_index(drop = True).copy()
msoa_port = msoa_visa[msoa_visa['msoa21'] == msoa_code].reset_index(drop = True).copy()
### visa district data is complete
sector_port = xgboost_visa[xgboost_visa['merchant_location'] == 'LL65 1'].reset_index(drop = True).copy()

In [None]:
lad_port = lad_port.rename(columns={"oslaua": "level"})
lad_port['level'] = 'LAD'

msoa_port = msoa_port.rename(columns={"msoa21": "level"})
msoa_port['level'] = 'MSOA'

raw_port = raw_visa[['date_time', 'spend']].rename(columns={"date_time": "date"}).copy()
raw_port['level'] = 'district'
sector_port = sector_port[['date', 'spend']].copy()
sector_port['level'] = 'sector'


levels_df = pd.concat([lad_port, msoa_port, raw_port, sector_port]).reset_index(drop = True)

#### Sum spend at geographies

In [None]:
fig = px.line(
levels_df,
x="date",
y="spend",
color = 'level',
template='simple_white',
    title = 'Raw spend at port geographies',
height = 500,
width = 800)
fig = add_dec_markers(fig)

fig.show()

#### Indexed sum spend at geographies

In [None]:
result = levels_df.copy()
group_list = ['level']

result['year'] = result['date'].dt.year
result['month'] = result['date'].dt.month

metrics = ['spend']
month_group = group_list + ['month']

for i in metrics:
    # calc year-on-year differences
    result[f'yoy_{i}'] = result.groupby(month_group)[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    result[f'yoy_{i}_perc'] = result.groupby(month_group)[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    result[f'index_{i}_2019'] = result.groupby(group_list)[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0)))

    # index to jan 2019
    result[f"index_{i}"] = result.groupby(group_list)[f"{i}"].transform(
    lambda x: x / x.iloc[0]
)

In [None]:
fig = px.line(
result,
x="date",
y="index_spend",
color = 'level',
template='simple_white',
    title = 'Spend at Holyhead port geographies, indexed to Jan 2019',
height = 500,
width = 800)
fig = add_dec_markers(fig)

fig.show()

#### Annual spend growth at geographies

In [None]:
fig = px.line(
result[result['date'] >='2020-01-01'],
x="date",
y="yoy_spend_perc",
color = 'level',
template='simple_white',
    title = 'Annual growth of spend at Holyhead port geographies',
height = 500,
width = 800)

fig = add_dec_markers(fig)
fig.show()