In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
sys.path.append(f'{project_path}/ft_articles/src/utils')

from google.cloud import bigquery
import importlib

import numpy as np
import pandas as pd
from plotly import graph_objs as go
import seaborn as sns


import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import plotly.express as px
import plotly.colors as pc


from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

------------------

### Taylor Swift concert dates:

<b> June 2024

Edinburgh (Murrayfield, EH12 5PJ, 67,144) x3

Liverpool (Anfield, L4 0TH, 61,276) x3

Cardiff (Principality,  CF10 1NS, 74,500) x1

London (Wembley, HA9 0WS, 90,000) x3

<b> August 2024

London (Wembley, HA9 0WS, 90,000) x3 

------------------

In [None]:
sql_spend = f"""SELECT time_period_value, merchant_location_level,
merchant_location, cardholder_issuing_country, mcg, mcc, spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
merchant_location_level != 'All' AND
cardholder_issuing_level != 'All' 
GROUP BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders"""

sml_df = client.query(sql_spend).to_dataframe()
sml_df = t.create_date_time(sml_df)

In [None]:
sml_df1 = sml_df[(sml_df['mcc'] != 'All')].copy()
sml_df1.head(2)

In [None]:
sml_df2 = sml_df[(sml_df['mcg'] == 'All')].copy()
sml_df2.head(2)

In [None]:
df = pd.concat([sml_df1, sml_df2])

In [None]:
def add_ts_periods(fig):

    # define covid lockdown start and end dates
    concert_periods = [
        ("2024-05-20", "2024-06-10"),
    ]
    # change colour of the covid lockdowns
    fillcolor = "grey"

    # add a dummy trace so that we can add the covid periods to the legend
    # add markers to square to make it show as a square in the legend.
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(color=fillcolor, symbol="square"),
            name="TS Concert",
        )
    )

    # add covid periods to plot
    for start_date, end_date in concert_periods:
        fig.add_shape(
            type="rect",
            xref="x",
            yref="paper",
            x0=start_date,
            y0=0,
            x1=end_date,
            y1=1,
            fillcolor=fillcolor,
            opacity=0.5,
            layer="below",
            line_width=0,
        )

    return fig

In [None]:
def add_extra_wembley(fig):

    # define covid lockdown start and end dates
    concert_periods = [
        ("2024-07-20", "2024-08-10"),
    ]
    # change colour of the period
    fillcolor = "gainsboro"

    # add a dummy trace so that we can add the covid periods to the legend
    # add markers to square to make it show as a square in the legend.
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(color=fillcolor, symbol="square"),
            name="Extra Wembley (HA)",
        )
    )

    # add covid periods to plot
    for start_date, end_date in concert_periods:
        fig.add_shape(
            type="rect",
            xref="x",
            yref="paper",
            x0=start_date,
            y0=0,
            x1=end_date,
            y1=1,
            fillcolor=fillcolor,
            opacity=0.5,
            layer="below",
            line_width=0,
        )

    return fig

------------

------------

------------

# Pick mcg

In [None]:
mcg = 'All'

--------------------

--------------------

--------------------

In [None]:
ts_pas = ['EH', 'L', 'CF', 'HA']

In [None]:
for_areas = df[ (df['merchant_location_level'] == 'POSTAL_AREA')  & (df['mcg'] == mcg)  & (df['cardholder_issuing_country'] != 'All')  & (df['merchant_location'].isin(ts_pas))].copy()

In [None]:
for_areas = for_areas.groupby(['date_time', 'merchant_location', 'cardholder_issuing_country', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
countries_list = for_areas['cardholder_issuing_country'].unique()

In [None]:
colours2 = ['#12436D',
            '#28A197',
            '#801650',
            '#f66068',
            '#F46A25',
            '#3D3D3D',
            '#A285D1',
            '#003c57',
            '#a8bd3a',
            '#00a3a6',
            '#206095',
            '#27a0cc',
            '#746cb1',
            '#22d0b6',
            '#118c7b',
            '#871a5b']


areas_uk_colours = {'UK': '#003c57',
 'CF': '#a8bd3a',
 'EH': '#27a0cc',
 'HA': '#0f8243',
 'L': '#F46A25'}




In [None]:
colours = pc.qualitative.Alphabet
colour_map = {countries_list[i]: colours2[i % len(colours2)] for i in range(len(countries_list))}

### More international

In [None]:
uk_intsums = df[(df['merchant_location_level'] == 'POSTAL_AREA') &
                (df['cardholder_issuing_country'] != 'UNITED KINGDOM') &
                (df['mcg'] == mcg) &
                (~df['merchant_location'].isin(ts_pas)) &
               (df['merchant_location'] != 'UNKNOWN')].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

uk_intsums['merchant_location'] = 'UK'

ts_intsums = df[(df['merchant_location_level'] == 'POSTAL_AREA') &
                (df['cardholder_issuing_country'] != 'UNITED KINGDOM') &
                (df['mcg'] == mcg) &
                (df['merchant_location'].isin(ts_pas))].groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

international_df = pd.concat([uk_intsums, ts_intsums])

In [None]:
international_df = pd.concat([uk_intsums, ts_intsums])

In [None]:
international_df['year'] = international_df['date_time'].dt.year
international_df['month'] = international_df['date_time'].dt.month

metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    international_df[f'yoy_{i}'] = international_df.groupby(['merchant_location','month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    international_df[f'yoy_{i}_perc'] = international_df.groupby(['merchant_location', 'month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    international_df[f'index_{i}_2019'] = international_df.groupby(['merchant_location'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
fig = px.line(
international_df,

x="date_time",
y='index_spend_2019',
color = 'merchant_location',
title=f"Sum international spend at stadium postal areas, indexed to 2019 average",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Concert venue postal area:', title_subtitle_text=f'MCG = {mcg}',
                 yaxis_title = 'Indexed spend', xaxis_title = 'Date')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
international_df[international_df['date_time'] >= '2020-01-01'],

x="date_time",
y='yoy_spend_perc',
color = 'merchant_location',
title=f"Year-on-year percentage change in international inbound spend at stadium postal areas",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig.add_hline(y=0, opacity=0.7, line_width=0.5)

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

## Country specific international spends at postal areas

In [None]:
country_choice = 'UNITED STATES OF AMERICA'

In [None]:
uk_intsums2 = df[(df['merchant_location_level'] == 'POSTAL_AREA') &
                (df['cardholder_issuing_country'] != 'UNITED KINGDOM') &
                (df['mcg'] == mcg) &
                (~df['merchant_location'].isin(ts_pas)) &
               (df['merchant_location'] != 'UNKNOWN')].groupby(['date_time', "mcg", 'cardholder_issuing_country']).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

uk_intsums2['merchant_location'] = 'UK'

ts_intsums2 = df[(df['merchant_location_level'] == 'POSTAL_AREA') &
                (df['cardholder_issuing_country'] != 'UNITED KINGDOM') &
                (df['mcg'] == mcg) &
                (df['merchant_location'].isin(ts_pas))].groupby(['date_time', 'merchant_location', "mcg", 'cardholder_issuing_country']).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()


In [None]:
international_df2 = pd.concat([uk_intsums2, ts_intsums2])

In [None]:
international_df2['year'] = international_df2['date_time'].dt.year
international_df2['month'] = international_df2['date_time'].dt.month

metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    international_df2[f'yoy_{i}'] = international_df2.groupby(['merchant_location','cardholder_issuing_country', 'month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    international_df2[f'yoy_{i}_perc'] = international_df2.groupby(['merchant_location','cardholder_issuing_country',  'month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    international_df2[f'index_{i}_2019'] = international_df2.groupby(['merchant_location','cardholder_issuing_country'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
fig = px.line(
international_df2[international_df2['cardholder_issuing_country'] == country_choice],

x="date_time",
y='index_spend_2019',
color = 'merchant_location',
title=f"Sum {country_choice} spend at stadium postal areas, indexed to 2019 average",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
international_df2[(international_df2['cardholder_issuing_country'] == country_choice) & (international_df2['date_time'] >= '2020-01-01')],

x="date_time",
y='yoy_spend_perc',
color = 'merchant_location',
title=f"Year-on-year percentage change in {country_choice} spend at stadium postal areas",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig.add_hline(y=0, opacity=0.7, line_width=0.5)

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

## Area specific international spends

In [None]:
selected_countries = ['UNITED STATES OF AMERICA', 'FRANCE', 'GERMANY', 'REST OF EUROPE', 'REPUBLIC OF IRELAND', 'AUSTRALIA', 'CHINA', 'CANADA', 'SPAIN', 'INDIA'] 

In [None]:
for area in ts_pas:
    fig = px.line(
        for_areas[(for_areas['merchant_location'] == area) & (for_areas['date_time'] >= '2023-01-01') & (for_areas['cardholder_issuing_country'].isin(selected_countries))],
    x="date_time",
    y="spend",
                  color = "cardholder_issuing_country",
    title=f"MCG = {mcg}, {area}",
    color_discrete_map=colour_map,
    template="simple_white"
    )
    fig = add_ts_periods(fig)
    
    if area == 'HA':
        fig = add_extra_wembley(fig)

    fig.show()

### Indexed spend

In [None]:
# As of the 2024 Q3 ingest, the total number of months the series covers is 69
# can remove countries that do not have 69 months of data

# can also index the spend to 2022 onwards to avoid removing countries with full spend except during lockdown period

int_spend_df = for_areas.copy()


In [None]:
month_counts = int_spend_df.groupby(['merchant_location', 'cardholder_issuing_country'])['date_time'].nunique()
full_countries = month_counts[month_counts >= 69].index
int_spend_df_cut = int_spend_df[int_spend_df.set_index(['merchant_location', 'cardholder_issuing_country']).index.isin(full_countries)].copy()

In [None]:
int_spend_df_cut['index_spend_full'] = int_spend_df_cut.groupby([ 'merchant_location', 'cardholder_issuing_country'])['spend'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
int_spend_df_cut[(int_spend_df_cut['merchant_location'] == 'CF') & (int_spend_df_cut['cardholder_issuing_country'] == 'AUSTRALIA')]['spend'][0:11].mean()

In [None]:
int_spend_df_cut[(int_spend_df_cut['merchant_location'] == 'CF') & (int_spend_df_cut['cardholder_issuing_country'] == 'AUSTRALIA')]

In [None]:
for area in ts_pas:
    fig = px.line(
        int_spend_df_cut[(int_spend_df_cut['merchant_location'] == area)&  (int_spend_df_cut['cardholder_issuing_country'].isin(selected_countries)) & (for_areas['cardholder_issuing_country'] != 'UNITED KINGDOM')],
    x="date_time",
    y="index_spend_full",
                  color = "cardholder_issuing_country",
    title=f"MCG = {mcg}, {area}",
    color_discrete_map=colour_map,
    template="simple_white",
    height = 450
    )
    fig = add_ts_periods(fig)
    
    if area == 'HA':
        fig = add_extra_wembley(fig)
        
#     labels_df = int_spend_df_cut[(int_spend_df_cut['merchant_location'] == area)&  (int_spend_df_cut['cardholder_issuing_country'].isin(selected_countries))].copy() 

#     for i in labels_df['cardholder_issuing_country'].unique():
#         labels_df2 = labels_df[labels_df['cardholder_issuing_country'] == i].copy()
        
#         labels_df2.sort_values('index_spend_full', inplace=True)

#         x_label = labels_df2.iloc[-1]['date_time']
#         y_label = labels_df2.iloc[-1]['index_spend_full']
#         country_text = labels_df2.iloc[-1]['cardholder_issuing_country']

#         fig.add_annotation(x=x_label, y=y_label,
#                 text=country_text,
#                 align = 'left',
#                 font=dict(size=10))
        

    fig.show()

### MCG international spend

In [None]:
mcg_int_df = sml_df[(sml_df['merchant_location'].isin(ts_pas)) & (sml_df['cardholder_issuing_country'] != 'UNITED KINGDOM') &
                    (sml_df['merchant_location_level'] == 'POSTAL_AREA') & (sml_df['cardholder_issuing_country'] != 'All') & 
                    (sml_df['mcc'] == 'All') & (sml_df['mcg'] != 'All')].copy()

### Select country

In [None]:
mcg_int_df['cardholder_issuing_country'].unique()

In [None]:
country = 'UNITED STATES OF AMERICA'
selected_mcgs = ['APPAREL & ACCESSORIES', 'ENTERTAINMENT', 'FOOD & GROCERY', 'LODGING', 'QSR', 'RESTAURANTS', 'RETAIL GOODS', 'TRANSPORTATION']

mcg_list = mcg_int_df['mcg'].unique()
colour_map_mcg = {mcg_list[i]: colours2[i % len(colours2)] for i in range(len(mcg_list))}

In [None]:
for area in ts_pas:
    fig = px.line(
        mcg_int_df[(mcg_int_df['merchant_location'] == area) & (mcg_int_df['date_time'] >= '2023-01-01') & (mcg_int_df['cardholder_issuing_country'] == country)& (mcg_int_df['mcg'].isin(selected_mcgs))],
    x="date_time",
    y="spend",
    color = "mcg",
    title=f"{country} spend in {area}",
        template = 'simple_white',
    color_discrete_map=colour_map_mcg
    )
    fig = add_ts_periods(fig)
    
    if area == 'HA':
        fig = add_extra_wembley(fig)

    fig.show()

# Year-on-year

In [None]:
ts_df = df[(df['merchant_location'].isin(ts_pas)) & (df['merchant_location_level'] == 'POSTAL_AREA')]
uk_df = df[(~df['merchant_location'].isin(ts_pas)) & (df['merchant_location_level'] == 'POSTAL_AREA')]

ts_grouped = ts_df.groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
uk_grouped = uk_df.groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

# Cutting down to post-2022
#ts_short = ts_grouped[ts_grouped['date_time'] >= '2022-01-01']
#uk_short = uk_grouped[uk_grouped['date_time'] >= '2022-01-01']

# for full time series
ts_short = ts_grouped.copy()
uk_short = uk_grouped.copy()

In [None]:
ts_short_mcg = ts_short[ts_short['mcg'] == mcg].copy()
uk_short_mcg = uk_short[uk_short['mcg'] == mcg].copy()


# retrieving year and month
ts_short_mcg['year'] = ts_short_mcg['date_time'].dt.year
ts_short_mcg['month'] = ts_short_mcg['date_time'].dt.month

uk_short_mcg['year'] = uk_short_mcg['date_time'].dt.year
uk_short_mcg['month'] = uk_short_mcg['date_time'].dt.month

# calculating year-on-year and month-on-month differences
uk_short_mcg['y-o-y'] =uk_short_mcg.groupby('month')['spend'].diff(periods=1)
uk_short_mcg['y-o-y-cards'] =uk_short_mcg.groupby('month')['cardholders'].diff(periods=1)
uk_short_mcg['m-o-m'] =uk_short_mcg['spend'].diff(periods=1)
uk_short_mcg['m-m_perc'] = (uk_short_mcg['m-o-m']/uk_short_mcg['spend'].shift(1)) * 100
uk_short_mcg['y-y_perc'] = ((uk_short_mcg['spend'] - uk_short_mcg['spend'].shift(12))/uk_short_mcg['spend'].shift(12)) *100
uk_short_mcg['y-y_card_perc'] = ((uk_short_mcg['cardholders'] - uk_short_mcg['cardholders'].shift(12))/uk_short_mcg['cardholders'].shift(12)) *100

ts_short_mcg['y-o-y'] =ts_short_mcg.groupby(['merchant_location', 'month'])['spend'].diff(periods=1)
ts_short_mcg['y-o-y-cards'] =ts_short_mcg.groupby(['merchant_location', 'month'])['cardholders'].diff(periods=1)

ts_short_mcg['m-o-m'] =ts_short_mcg.groupby('merchant_location')['spend'].diff(periods=1)

cf_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'CF'].copy()
eh_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'EH'].copy()
l_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'L'].copy()
ha_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'HA'].copy()


cf_df['m-m_perc'] = (cf_df['m-o-m']/cf_df['spend'].shift(1)) * 100
eh_df['m-m_perc'] = (eh_df['m-o-m']/eh_df['spend'].shift(1)) * 100
l_df['m-m_perc'] = (l_df['m-o-m']/l_df['spend'].shift(1)) * 100
ha_df['m-m_perc'] = (ha_df['m-o-m']/ha_df['spend'].shift(1)) * 100


cf_df['y-y_perc'] = ((cf_df['spend'] - cf_df['spend'].shift(12))/cf_df['spend'].shift(12)) *100
eh_df['y-y_perc'] = ((eh_df['spend'] - eh_df['spend'].shift(12))/eh_df['spend'].shift(12)) *100
l_df['y-y_perc'] = ((l_df['spend'] - l_df['spend'].shift(12))/l_df['spend'].shift(12)) *100
ha_df['y-y_perc'] = ((ha_df['spend'] - ha_df['spend'].shift(12))/ha_df['spend'].shift(12)) *100




cf_df['y-y_card_perc'] = ((cf_df['cardholders'] - cf_df['cardholders'].shift(12))/cf_df['cardholders'].shift(12)) *100
eh_df['y-y_card_perc'] = ((eh_df['cardholders'] - eh_df['cardholders'].shift(12))/eh_df['cardholders'].shift(12)) *100
l_df['y-y_card_perc'] = ((l_df['cardholders'] - l_df['cardholders'].shift(12))/l_df['cardholders'].shift(12)) *100
ha_df['y-y_card_perc'] = ((ha_df['cardholders'] - ha_df['cardholders'].shift(12))/ha_df['cardholders'].shift(12)) *100

cf_df['m-m_card'] = ((cf_df['cardholders'] - cf_df['cardholders'].shift(1))/cf_df['cardholders'].shift(1)) *100
eh_df['m-m_card'] = ((eh_df['cardholders'] - eh_df['cardholders'].shift(1))/eh_df['cardholders'].shift(1)) *100
l_df['m-m_card'] = ((l_df['cardholders'] - l_df['cardholders'].shift(1))/l_df['cardholders'].shift(1)) *100
ha_df['m-m_card'] = ((ha_df['cardholders'] - ha_df['cardholders'].shift(1))/ha_df['cardholders'].shift(1)) *100

full_ts_df = pd.concat([cf_df, eh_df, ha_df, l_df])

### options:

y-o-y (year on year sums)

m-o-m (month on month sums)

m-m_perc (month on month percentage change of spend)

y-y_perc (year on year percentage change of spend)

card_perc (year on year percentage change of cardholders)

In [None]:
y_column = 'cardholders'

In [None]:
if y_column == 'y-o-y':
    fig_title = ": Year on year sum spend"
if y_column == 'spend':
    fig_title = "Sum monthly spend"
if y_column == 'transactions':
    fig_title = "Sum monthly transactions"
if y_column == 'cardholders':
    fig_title = "Sum monthly cardholders"
if y_column == 'm-o-m':
    fig_title = ': Month on month sum spend'
if y_column == 'y-y_perc':
    fig_title = ': Year on year sum spend percentage change'
if y_column == 'y-y_card_perc':
    fig_title = ': Year on year sum cardholders percentage change'
if y_column == 'm-m_perc':
    fig_title = ': Month on month sum spend percentage change'
if y_column == 'm-m_card':
    fig_title = ': Month on month sum cardholders percentage change'

In [None]:
colour_map2 = {full_ts_df['merchant_location'].unique()[i]: colours2[i % len(colours2)] for i in range(len(full_ts_df['merchant_location'].unique()))}

In [None]:
fig = px.line(
full_ts_df,

x="date_time",
y=y_column,
color = 'merchant_location',
#title=f"MCG = {mcg} {fig_title}",
title = f'{fig_title} at stadium postal areas',
height = 500,
template = 'simple_white',
color_discrete_map=colour_map2
)
fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

## Indexing spend


In [None]:
# specifying colours

areas_uk_colours = {'UK': '#003c57',
 'CF': '#a8bd3a',
 'EH': '#27a0cc',
 'HA': '#0f8243',
 'L': '#F46A25'}

#### Specify date for indexing of a trimmed time period 

In [None]:
trimmed_start = '2022-01-01'

In [None]:
uk_short_mcg['merchant_location'] = 'UK'

stat_df = pd.concat([uk_short_mcg, full_ts_df])
index_fulldf = stat_df.copy()

index_fulldf['index_spend_full'] = index_fulldf.groupby([ 'merchant_location'])['spend'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)
index_fulldf['index_cards_full'] = index_fulldf.groupby([ 'merchant_location'])['cardholders'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)

colour_map3 = {index_fulldf['merchant_location'].unique()[i]: colours2[i % len(colours2)] for i in range(len(index_fulldf['merchant_location'].unique()))}
#index_fulldf['index_spend_short'] = index_fulldf[index_fulldf['date_time']>= trimmed_start].groupby([ 'merchant_location'])['spend'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)

In [None]:
fig = px.line(
index_fulldf,

x="date_time",
y='index_spend_full',
color = 'merchant_location',
title=f"Sum spend at stadium postal areas, indexed to 2019 average",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Concert postal area:', 
                  title_subtitle_text=f'MCG = {mcg}',
                  yaxis_title = 'Indexed sum spend',
                 xaxis_title = 'Date')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
index_fulldf,

x="date_time",
y='index_cards_full',
color = 'merchant_location',
title=f"Sum cardholders at stadium postal areas, indexed to 2019 average",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
index_fulldf[index_fulldf['date_time'] >= '2020-01-01'],

x="date_time",
y='y-y_perc',
color = 'merchant_location',
title=f"Year-on-year percentage difference in spend at stadium postal areas",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig.add_hline(y=0, opacity=0.7, line_width=0.5)


fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
index_fulldf[index_fulldf['date_time'] >= '2020-01-01'],

x="date_time",
y='y-y_card_perc',
color = 'merchant_location',
title=f"Year-on-year percentage difference in cardholders at stadium postal areas",
height = 500,
template = 'simple_white',
color_discrete_map=areas_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig.add_hline(y=0, opacity=0.7, line_width=0.5)

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

# Mapping

In [None]:
# import geopandas as gpd
# from datetime import datetime
# from folium import Map
# from folium.plugins import FloatImage
# from shapely.geometry import box
# import folium
# import mapclassify
# import ipywidgets as widgets
# from IPython.display import display

# from google.cloud import storage
# client = storage.Client()


In [None]:
# from fintrans_toolbox.src import bq_utils as bq

# client = storage.Client()
# bq.boundary_file_download(client, postal_level="postcode_area", output_location="")
# shape = gpd.read_file("postcode_area" + ".shp")

In [None]:
# full_df = pd.concat([full_ts_df, uk_short_mcg])
# full_df = full_df[['date_time', 'merchant_location', 'mcg', 'm-m_perc']].copy()

# shape_areas = pd.DataFrame(shape['postarea'].unique(), columns = ['areas'])
# shape_areas = shape_areas[~shape_areas['areas'].isin(ts_pas)]
# shape_trimmed = shape[~shape['postarea'].isin(ts_pas)]
# shape_trimmed

In [None]:
# from shapely.ops import unary_union

# merged_polygon = shape_trimmed.unary_union
# rest_of_uk_gdf = gpd.GeoDataFrame(geometry=[merged_polygon], crs =  shape.crs)
# rest_of_uk_gdf.plot()

In [None]:
# shape_justs= shape[shape['postarea'].isin(ts_pas)]
# props_table = pd.concat([shape_justs, rest_of_uk_gdf], axis=0, ignore_index=True)
# props_table.fillna('UK', inplace=True)
# props_table = props_table[['postarea', 'geometry']]
# props_table

In [None]:
# full_df2 = full_df[full_df['date_time'] >= '2023-01-01'].copy()
# full_df2.fillna('UK', inplace=True)
# geodata = full_df2.merge(
#     props_table, how="left", left_on="merchant_location", right_on="postarea"
# )


In [None]:
# def plot_interactive_map(gdf, column, vmin=None, vmax=None):
#     def plot_map(date):
#         fig, ax = plt.subplots(figsize=(10, 10))
#         filtered_gdf = gdf[gdf["date_time"] == date]
        
#         filtered_gdf.plot(ax = ax,
#                           column = column,
#                           legend = True, cmap='OrRd')
#         plt.title(f"{mcg} Year on Year Percentage Changes")
#         plt.show()

#     # create an interactive slider for dates
#     dates = sorted(gdf["date_time"].unique())
#     date_slider = widgets.SelectionSlider(options=dates, description="Select Month:")

#     # return the interactive map
#     return widgets.interactive(plot_map, date=date_slider)


In [None]:
# geodata2 = geodata[['date_time', 'postarea', 'm-m_perc', 'geometry']]
# test = props_table.merge(
#     geodata2[['date_time','postarea', 'm-m_perc']], how="left", left_on="postarea", right_on="postarea"
# )


In [None]:
# plot_interactive_map (gdf= test, column ='m-m_perc', vmin=None, vmax=None)

# Rural - urban

#### Code from ft_articles/notebooks/article_2/rural_urban_classifications_subnational.ipynb

In [None]:
# client = bigquery.Client()
# sql_ruc = """SELECT pcds, ru11ind, doterm        
#     FROM ons-fintrans-data-prod.fintrans_reference.NSPL_MAY_2022_UK
#     GROUP BY pcds, ru11ind, doterm"""

# ruc = bq.read_bq_table_sql(client, sql_ruc)

# #=Create df for rural / urban cat
# data = {'ru11ind' : ["A1","B1","C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", 
#            "1","2","3","4","5","6","7","8", "Z9"], 
        
#         'RU11NM' : ["(England/Wales) Urban major conurbation", "(England/Wales) Urban minor conurbation","(England/Wales) Urban city and town",
#          "(England/Wales) Urban city and town in a sparse setting","(England/Wales) Rural town and fringe","(England/Wales) Rural town and fringe in a sparse setting",
#          "(England/Wales) Rural village","(England/Wales) Rural village in a sparse setting","(England/Wales) Rural hamlet and isolated dwellings",
#          "(England/Wales) Rural hamlet and isolated dwellings in a sparse setting","(Scotland) Large Urban Area","(Scotland) Other Urban Area",
#          "(Scotland) Accessible Small Town","(Scotland) Remote Small Town","(Scotland) Very Remote Small Town","(Scotland) Accessible Rural",
#          "(Scotland) Remote Rural","(Scotland) Very Remote Rural","(pseudo) Channel Islands/Isle of Man"],
        
#         'ruc_group' : ["Urban", "Urban","Urban",
#          "Urban","Rural","Rural",
#          "Rural","Rural","Rural",
#          "Rural","Urban","Urban",
#          "Rural","Rural","Rural","Rural",
#          "Rural","Rural", "None"]
#        }


# ruc_df = pd.DataFrame(data, columns = ['ru11ind', 'RU11NM','ruc_group'])
# ruc_df
# print(len(ruc))
# ruc = ruc.query('doterm >= 201901| doterm.isnull()')
# print(len(ruc))
# ruc = ruc.query('ru11ind != "" and ru11ind != "Z9"') #remove empty and (pusedo channel islands_
# print(len(ruc))

# #Extract postal area
# #Caluclate area, district, sector
# df_nspl = ruc.copy()
# df_nspl[['postal_district','postal_sector']] = df_nspl["pcds"].str.split(" ", n=1, expand=True)
# df_nspl['postal_sector'] = df_nspl['postal_sector'].str.strip()
# df_nspl['postal_sector'] = df_nspl['postal_sector'].str[:1]
# df_nspl['postal_sector'] = df_nspl['postal_district'] + " " +  df_nspl['postal_sector']
# df_nspl['postal_area'] = df_nspl['postal_sector'].str[:2]
# df_nspl['postal_area'] = df_nspl['postal_area'].str.replace('\d+', '')
# df_nspl['postal_area'] = df_nspl['postal_area'].str[:2]

# #remove whitespace
# df_nspl['postal_area'] = df_nspl['postal_area'].str.strip()
# df_nspl['postal_sector'] = df_nspl['postal_sector'].str.strip()
# df_nspl['postal_district'] = df_nspl['postal_district'].str.strip()

# ruc= df_nspl[['pcds','postal_area','ru11ind']]

# ruc = ruc.merge(ruc_df, how = "left", on = "ru11ind")

# #Caclulate proportion of postcodes in postal area/ruc combination
# df = ruc[['postal_area', 'pcds', 'ruc_group']].groupby(['postal_area', 'ruc_group']).count().reset_index()
# #Add in postal areas that are 100% rural
# df = df.set_index(['postal_area','ruc_group'])['pcds'].unstack(fill_value=0).stack().reset_index(name='pcds')

# #R code still to convert
# df['pcd_prop'] = df.groupby(['postal_area'])['pcds'].transform(lambda x: x/x.sum()) 
# df = df[['postal_area', 'ruc_group', 'pcd_prop']]

# #filter urban only
# df = df[df['ruc_group'] == "Urban"]

# #Remove WV and NN due to uncertainty and calculate quintiles
# nspl_ruc_quintile = df.query('postal_area != "NN" & postal_area != "WV"')
# nspl_ruc_quintile = nspl_ruc_quintile.sort_values('pcd_prop')
# # adding Quantile_rank column to the DataFrame 
# nspl_ruc_quintile['ruc_quin'] = pd.qcut(nspl_ruc_quintile['pcd_prop'], 5, labels=[1,2,3,4,5]) 


In [None]:
# client = bigquery.Client()

# sql_tot = """SELECT time_period, time_period_value, merchant_location, spend
#   FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
#   WHERE cardholder_location_level = 'All' AND mcg = 'All' AND
#   merchant_location_level = 'POSTAL_AREA' AND time_period = 'Month'
#   ORDER BY time_period, time_period_value,spend"""

# ml = bq.read_bq_table_sql(client, sql_tot)
# df_ml= t.create_date_time(ml)
# #Join to Visa data and calculate adjusted spend for each region-postal area combination
# sort_nspl_ruc = df_ml.merge(nspl_ruc_quintile, how = "left", left_on = "merchant_location", right_on ="postal_area")
# t = sort_nspl_ruc.query('postal_area.isnull()')
# t['merchant_location'].unique()
# # Channel islands and northern ireland removed
# sort_nspl_ruc = sort_nspl_ruc.query('postal_area.notnull()')
# fin = sort_nspl_ruc[['merchant_location', 'ruc_quin']]
# fin = fin.drop_duplicates()


In [None]:
# #read in NSPL scaling factors
# sql = """SELECT * FROM ons-fintrans-analysis-prod.fin_wip_notebook.nspl_postal_level_scaling_factors"""
# df_postal_all = client.query(sql).to_dataframe()
# df_postal_all = df_postal_all[['postal_area', 'region', 'area_sf']]

# df_postal_all = df_postal_all.sort_values('area_sf', ascending = False)
# df_postal_all = df_postal_all.drop_duplicates(subset = 'postal_area',keep = 'first') 
# df_postal_all = df_postal_all[['postal_area', 'region']].reset_index(drop = True)
# df_postal_all

In [None]:


# # renaming col to match 
# fin.rename(columns = {'merchant_location':'postal_area'}, inplace = True)
# # merge on postal areas
# area_region_ruc = pd.merge(
#   fin, df_postal_all, how="outer", on=["postal_area"])


In [None]:
#fin['postal_area'].unique()

In [None]:
#fin.rename(columns = {'postal_area':'merchant_location'}, inplace = True)

#fin

In [None]:
#df = pd.concat([sml_df1, sml_df2])

In [None]:
#ru_df = df[(df['merchant_location_level'] == 'POSTAL_AREA') & (df['merchant_location'] != 'UNKNOWN') & (df['cardholder_issuing_country'] != 'All')].copy()

In [None]:
#ru_df = ru_df.groupby(['date_time', 'merchant_location', 'cardholder_issuing_country', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
#ruq_df = pd.merge(
#  ru_df, fin, how="outer", on=["merchant_location"])

In [None]:
#len(ruq_df[ruq_df['ruc_quin'] == 3]['merchant_location'].unique())

In [None]:
#ruq_df[ruq_df['ruc_quin'] == 3]['merchant_location'].unique()

In [None]:
#len(ruq_df[ruq_df['ruc_quin'] == 4]['merchant_location'].unique())

In [None]:
#ruq_df[ruq_df['ruc_quin'] == 4]['merchant_location'].unique()

In [None]:
#len(ruq_df[ruq_df['ruc_quin'] == 5]['merchant_location'].unique())

In [None]:
#ruq_df[ruq_df['ruc_quin'] == 5]['merchant_location'].unique()

In [None]:
#fin[fin['merchant_location'].isin(ts_pas)]

# ruc_quin 5, 4, 3

In [None]:
# # ruq specific dfs without the concert postal areas
# ruq_df3 = ruq_df[(ruq_df['ruc_quin'] == 3.0) & (~ruq_df['merchant_location'].isin(ts_pas))]
# ruq_df4 = ruq_df[(ruq_df['ruc_quin'] == 4.0) & (~ruq_df['merchant_location'].isin(ts_pas))]
# ruq_df5 = ruq_df[(ruq_df['ruc_quin'] == 5.0) & (~ruq_df['merchant_location'].isin(ts_pas))]

In [None]:
# # grouping ruq specific areas by mcg sums
# ruq_df3 = ruq_df3.groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
# ruq_df4 = ruq_df4.groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
# ruq_df5 = ruq_df5.groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()


In [None]:
# # saving all of the unique ruq specific postal areas (excluding concert areas)
# ruq_3_pas = list(ruq_df3['merchant_location'].unique())
# ruq_4_pas = list(ruq_df4['merchant_location'].unique())
# ruq_5_pas = list(ruq_df5['merchant_location'].unique())

In [None]:
# ruq_df3_mcg = ruq_df3[ruq_df3['mcg'] == mcg].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index().copy()
# ruq_df4_mcg = ruq_df4[ruq_df4['mcg'] == mcg].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index().copy()
# ruq_df5_mcg = ruq_df5[ruq_df5['mcg'] == mcg].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index().copy()

In [None]:
# ruq_df3_mcg.insert(1, 'merchant_location', 'rest')
# ruq_df4_mcg.insert(1, 'merchant_location', 'rest')
# ruq_df5_mcg.insert(1, 'merchant_location', 'rest')


In [None]:
# calculating year on year and month on month


In [None]:
#all_ruq_dfs = [ruq_df3_mcg, ruq_df4_mcg, ruq_df5_mcg]

In [None]:
# for i in range(len(all_ruq_dfs)):
#     all_ruq_dfs[i]['year'] = all_ruq_dfs[i]['date_time'].dt.year
#     all_ruq_dfs[i]['month'] = all_ruq_dfs[i]['date_time'].dt.month


#     all_ruq_dfs[i]['yoy_spend'] =all_ruq_dfs[i].groupby('month')['spend'].diff(periods=1)
#     all_ruq_dfs[i]['mom_spend'] =all_ruq_dfs[i]['spend'].diff(periods=1)
#     all_ruq_dfs[i]['mom_spend_perc'] = (all_ruq_dfs[i]['mom_spend']/all_ruq_dfs[i]['spend'].shift(1)) * 100
#     all_ruq_dfs[i]['yoy_spend_perc'] = (all_ruq_dfs[i]['yoy_spend']/all_ruq_dfs[i]['spend'].shift(12)) *100



In [None]:
# ruq_3_f = all_ruq_dfs[0].copy()
# ruq_4_f = all_ruq_dfs[1].copy()
# ruq_5_f = all_ruq_dfs[2].copy()

# conjoining the concert postal areas to relevant ruq

# assign ruq to each postcode

In [None]:
# ts_ruqs = fin[fin['merchant_location'].isin(ts_pas)]
# ts_ruqs

In [None]:
# ts_ruq_areas = pd.merge(
#   full_ts_df, ts_ruqs, how="outer", on=["merchant_location"])
# ts_ruq_areas = ts_ruq_areas.rename(columns={'y-o-y': 'yoy_spend', 'm-o-m': 'mom_spend', 'perc_spend_yoy':'yoy_spend_perc', 'perc_spend_mom':'mom_spend_perc'})


In [None]:
# fin_ruq5 = pd.concat([ruq_5_f, ts_ruq_areas[ts_ruq_areas['ruc_quin'] == 5]])
# fin_ruq4 = pd.concat([ruq_4_f, ts_ruq_areas[ts_ruq_areas['ruc_quin'] == 4]])
# fin_ruq3 = pd.concat([ruq_3_f, ts_ruq_areas[ts_ruq_areas['ruc_quin'] == 3]])

In [None]:
#ruqs_dfs = [fin_ruq3, fin_ruq4, fin_ruq5]


In [None]:
# for dfx in ruqs_dfs:
    
#     ruq = str(dfx['ruc_quin'].unique()[1])
#     fig = px.line(dfx[dfx['date_time'] >= '2023-01-01'],
#     x="date_time",
#     y="mom_spend_perc",
#     color="merchant_location",
#     title=f"RUQ {ruq} {mcg} month-month spend perc change",
#     )
#     fig = add_ts_periods(fig)
#     fig.show()

# Looking at month to month 

In [None]:
ts_short_mcg = ts_short[ts_short['mcg'] == mcg].copy()
uk_short_mcg = uk_short[uk_short['mcg'] == mcg].copy()


# retrieving year and month
ts_short_mcg['year'] = ts_short_mcg['date_time'].dt.year
ts_short_mcg['month'] = ts_short_mcg['date_time'].dt.month

uk_short_mcg['year'] = uk_short_mcg['date_time'].dt.year
uk_short_mcg['month'] = uk_short_mcg['date_time'].dt.month

# calculating year-on-year and month-on-month differences
uk_short_mcg['y-o-y'] =uk_short_mcg.groupby('month')['spend'].diff(periods=1)
uk_short_mcg['m-o-m'] =uk_short_mcg['spend'].diff(periods=1)
uk_short_mcg['m-m_perc'] = (uk_short_mcg['m-o-m']/uk_short_mcg['spend'].shift(1)) * 100
uk_short_mcg['y-y_perc'] = (uk_short_mcg['y-o-y']/uk_short_mcg['spend'].shift(12)) *100

ts_short_mcg['y-o-y'] =ts_short_mcg.groupby(['merchant_location', 'month'])['spend'].diff(periods=1)
ts_short_mcg['m-o-m'] =ts_short_mcg.groupby('merchant_location')['spend'].diff(periods=1)

cf_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'CF'].copy()
eh_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'EH'].copy()
l_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'L'].copy()
ha_df = ts_short_mcg[ts_short_mcg['merchant_location'] == 'HA'].copy()

cf_df['perc_spend_yoy'] = ((cf_df['spend'] - cf_df['spend'].shift(12))/cf_df['spend'].shift(12)) *100
eh_df['perc_spend_yoy'] = ((eh_df['spend'] - eh_df['spend'].shift(12))/eh_df['spend'].shift(12)) *100
l_df['perc_spend_yoy'] = ((l_df['spend'] - l_df['spend'].shift(12))/l_df['spend'].shift(12)) *100
ha_df['perc_spend_yoy'] = ((ha_df['spend'] - ha_df['spend'].shift(12))/ha_df['spend'].shift(12)) *100

cf_df['perc_spend_mom'] = ((cf_df['m-o-m'])/cf_df['spend'].shift(1)) *100
eh_df['perc_spend_mom'] = ((eh_df['m-o-m'])/eh_df['spend'].shift(1)) *100
l_df['perc_spend_mom'] = ((l_df['m-o-m'])/l_df['spend'].shift(1)) *100
ha_df['perc_spend_mom'] = ((ha_df['m-o-m'])/ha_df['spend'].shift(12)) *100



full_ts_df = pd.concat([cf_df, eh_df, ha_df, l_df])

In [None]:

uk_short_mcg.insert(1, 'merchant_location', 'UK')
uk_short_mcg = uk_short_mcg.rename(columns={'m-m_perc': 'perc_spend_mom', 'y-y_perc': 'perc_spend_yoy'})


In [None]:
df_concert_uk = pd.concat([full_ts_df, uk_short_mcg])


In [None]:
fig = px.line(df_concert_uk[df_concert_uk['date_time'] >= '2023-01-01'],
x="date_time",
y="perc_spend_mom",
color="merchant_location",
title=f"{mcg} Month on Month Percentage Change",
)
fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)
fig.show()

In [None]:
df_concert_uk[df_concert_uk['date_time'] == '2024-06-01']

---------------------

# Postal district level

<b> June 2024

Edinburgh (Murrayfield, EH12 5PJ, 67,144) x3

Liverpool (Anfield, L4 0TH, 61,276) x3

Cardiff (Principality,  CF10 1NS, 74,500) x1

London (Wembley, HA9 0WS, 90,000) x3

<b> August 2024

London (Wembley, HA9 0WS, 90,000) x3 

In [None]:
stadium_districts = ['EH12', 'L4', 'CF10', 'HA9']
df = pd.concat([sml_df1, sml_df2])

In [None]:
district_df = df[df['merchant_location'].isin(stadium_districts)].copy()
non_districts_df = df[(~df['merchant_location'].isin(stadium_districts)) & (df['merchant_location_level'] == 'POSTAL_DISTRICT') & (df['merchant_location'] != 'UNKNOWN')].copy()
uk_district_info = non_districts_df[non_districts_df['mcg'] == mcg].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
district_tourism = district_df.groupby(['date_time', 'merchant_location', 'cardholder_issuing_country', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

### General metrics at district level

In [None]:
# specifying colours


districts_uk_colours = {'UK': '#003c57',
 'CF10': '#a8bd3a',
 'EH12': '#27a0cc',
 'HA9': '#0f8243',
 'L4': '#F46A25'}

district_info = district_tourism[district_tourism['mcg'] == mcg].groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
uk_district_info['merchant_location'] = 'UK'

dis_df = pd.concat([district_info, uk_district_info])

In [None]:
dis_df['year'] = dis_df['date_time'].dt.year
dis_df['month'] = dis_df['date_time'].dt.month

In [None]:
metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    dis_df[f'yoy_{i}'] = dis_df.groupby(['merchant_location','month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    dis_df[f'yoy_{i}_perc'] = dis_df.groupby(['merchant_location', 'month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    dis_df[f'index_{i}_2019'] = dis_df.groupby(['merchant_location'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
fig = px.line(
dis_df,

x="date_time",
y='spend',
color = 'merchant_location',
#title=f"MCG = {mcg} {fig_title}",
title = f'Sum spend at stadium postal districts',
height = 500,
template = 'simple_white',
color_discrete_map=districts_uk_colours
)
fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
dis_df,

x="date_time",
y='index_spend_2019',
color = 'merchant_location',
#title=f"MCG = {mcg} {fig_title}",
title = f'Indexed sum spend at stadium postal districts, indexed to 2019 average',
height = 500,
template = 'simple_white',
color_discrete_map=districts_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal district:', title_subtitle_text=f'MCG = {mcg}')
#fig.update_traces(line=dict(dash='solid'))  
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
dis_df,

x="date_time",
y='index_cardholders_2019',
color = 'merchant_location',
#title=f"MCG = {mcg} {fig_title}",
title = f'Indexed sum cardholders at stadium postal districts, indexed to 2019 average',
height = 500,
template = 'simple_white',
color_discrete_map=districts_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal district:', title_subtitle_text=f'MCG = {mcg}')
#fig.update_traces(line=dict(dash='solid'))  
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
dis_df[dis_df['date_time']>='2020-01-01'],

x="date_time",
y='yoy_spend_perc',
color = 'merchant_location',
#title=f"MCG = {mcg} {fig_title}",
title = f'Year-on-year percentage change of cardholders at postal districts',
height = 500,
template = 'simple_white',
color_discrete_map=districts_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal district:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig.add_hline(y=0, opacity=0.7, line_width=0.5)

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)
fig.show()

### Inbound international spend at stadium districts

In [None]:
non_districts_df['merchant_location'] = 'UK'
int_sums_districts_df = pd.concat([non_districts_df[non_districts_df['mcg'] == mcg], district_df[district_df['mcg'] == mcg]])

In [None]:
int_sums_districts_df = int_sums_districts_df.groupby(['date_time', 'merchant_location']).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
int_sums_districts_df['year'] = int_sums_districts_df['date_time'].dt.year
int_sums_districts_df['month'] = int_sums_districts_df['date_time'].dt.month

metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    int_sums_districts_df[f'yoy_{i}'] = int_sums_districts_df.groupby(['merchant_location','month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    int_sums_districts_df[f'yoy_{i}_perc'] = int_sums_districts_df.groupby(['merchant_location', 'month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    int_sums_districts_df[f'index_{i}_2019'] = int_sums_districts_df.groupby(['merchant_location'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
districts_uk_colours = {'UK': '#003c57',
 'CF10': '#a8bd3a',
 'EH12': '#27a0cc',
 'HA9': '#0f8243',
 'L4': '#F46A25'}

In [None]:
fig = px.line(
int_sums_districts_df,

x="date_time",
y='index_spend_2019',
color = 'merchant_location',
title=f"Sum international spend at stadium postal districts, indexed to 2019 average",
height = 500,
template = 'simple_white',
color_discrete_map=districts_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal district:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

In [None]:
fig = px.line(
int_sums_districts_df[int_sums_districts_df['date_time'] >= '2020-01-01'],

x="date_time",
y='yoy_spend_perc',
color = 'merchant_location',
title=f"Year-on-year percentage difference in international spend at stadium postal districts",
height = 500,
template = 'simple_white',
color_discrete_map=districts_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal district:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
fig.add_hline(y=0, opacity=0.7, line_width=0.5)

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

### Individual 

In [None]:
for district in stadium_districts:
    fig = px.line(
        district_tourism[(district_tourism['merchant_location'] == district) &  (district_tourism['cardholder_issuing_country'].isin(selected_countries)) & (district_tourism['mcg'] == 'All')],
    x="date_time",
    y="spend",
                  color = "cardholder_issuing_country",
    title=f"{mcg} {district}",
    template = 'simple_white',
    color_discrete_map=colour_map,
    height = 450
    )
    fig = add_ts_periods(fig)

    
    if district == 'HA9':
        fig = add_extra_wembley(fig)
        
    labels_df = district_tourism[(district_tourism['merchant_location'] == district) & (district_tourism['merchant_location'] == district) &(district_tourism['mcg'] == mcg)&  (district_tourism['cardholder_issuing_country'].isin(selected_countries))].copy() 

    for i in labels_df['cardholder_issuing_country'].unique():
        labels_df2 = labels_df[labels_df['cardholder_issuing_country'] == i].copy()
        
        labels_df2.sort_values('spend', inplace=True)

        x_label = labels_df2.iloc[-1]['date_time']
        y_label = labels_df2.iloc[-1]['spend']
        country_text = labels_df2.iloc[-1]['cardholder_issuing_country']

        fig.add_annotation(x=x_label, y=y_label,
                text=country_text,
                align = 'left',
                font=dict(size=9.5))
        
    fig.update_traces(connectgaps = True)


    fig.show()

In [None]:
labels_df2.sort_values('spend')

### Indexing inbound spend to 2019 average

In [None]:
dis_int_df = district_tourism[(district_tourism['mcg'] == mcg) & (~district_tourism['cardholder_issuing_country'].isin(['UNITED KINGDOM', 'All']))].copy()

# 33 data points since 2020
month_counts = dis_int_df.groupby(['merchant_location', 'cardholder_issuing_country'])['date_time'].nunique()
full_countries = month_counts[month_counts >= 33].index
dis_int_df_cut = dis_int_df[dis_int_df.set_index(['merchant_location', 'cardholder_issuing_country']).index.isin(full_countries)].copy()

In [None]:
dis_int_df_cut['index_spend'] = dis_int_df_cut.groupby([ 'merchant_location', 'cardholder_issuing_country'])['spend'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)
dis_int_df_cut['index_trans'] = dis_int_df_cut.groupby([ 'merchant_location', 'cardholder_issuing_country'])['transactions'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)
dis_int_df_cut['index_card'] = dis_int_df_cut.groupby([ 'merchant_location', 'cardholder_issuing_country'])['cardholders'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
dis_int_df_cut['merchant_location'].unique()

In [None]:
for district in stadium_districts:
    fig = px.line(
        dis_int_df_cut[(dis_int_df_cut['merchant_location'] == district) &  (dis_int_df_cut['cardholder_issuing_country'].isin(selected_countries))],
    x="date_time",
    y="index_spend",
                  color = "cardholder_issuing_country",
    title=f"2019 Indexed spend - {district}",
    template = 'simple_white',
    color_discrete_map=colour_map,
    height = 450
    )
    fig = add_ts_periods(fig)

    
    if district == 'HA9':
        fig = add_extra_wembley(fig)
        
    labels_df = dis_int_df_cut[(dis_int_df_cut['merchant_location'] == district)&  (dis_int_df_cut['cardholder_issuing_country'].isin(selected_countries))].copy() 

    for i in labels_df['cardholder_issuing_country'].unique():
        labels_df2 = labels_df[labels_df['cardholder_issuing_country'] == i].copy()
        
        labels_df2.sort_values('index_spend', inplace=True)

        x_label = labels_df2.iloc[-1]['date_time']
        y_label = labels_df2.iloc[-1]['index_spend']
        country_text = labels_df2.iloc[-1]['cardholder_issuing_country']

        fig.add_annotation(x=x_label, y=y_label,
                text=country_text,
                align = 'left',
                font=dict(size=9.5))
        

    fig.show()

## Isolated country district spend 

In [None]:
country_choice = 'UNITED STATES OF AMERICA'

In [None]:
districts_internationals = pd.concat([non_districts_df[non_districts_df['mcg'] == mcg], district_df[district_df['mcg'] == mcg]]).groupby(['date_time', 'merchant_location', 'cardholder_issuing_country']).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
districts_internationals['year'] = districts_internationals['date_time'].dt.year
districts_internationals['month'] = districts_internationals['date_time'].dt.month

metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    districts_internationals[f'yoy_{i}'] = districts_internationals.groupby(['merchant_location','cardholder_issuing_country', 'month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    districts_internationals[f'yoy_{i}_perc'] = districts_internationals.groupby(['merchant_location','cardholder_issuing_country',  'month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    districts_internationals[f'index_{i}_2019'] = districts_internationals.groupby(['merchant_location','cardholder_issuing_country'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
fig = px.line(
districts_internationals[districts_internationals['cardholder_issuing_country'] == country_choice],

x="date_time",
y='index_spend_2019',
color = 'merchant_location',
title=f"Sum {country_choice} spend at stadium postal districts, indexed to 2019 average",
height = 500,
template = 'simple_white',
color_discrete_map=districts_uk_colours

)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Postal district:', title_subtitle_text=f'MCG = {mcg}')
fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

fig = add_ts_periods(fig)
fig = add_extra_wembley(fig)

fig.show()

### Sum district spend across mcgs

In [None]:
district_sumspend = district_df.groupby(['date_time', 'merchant_location',  "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
selected_mcgs = ['APPAREL & ACCESSORIES', 'ENTERTAINMENT', 'FOOD & GROCERY', 'LODGING', 'QSR', 'RESTAURANTS', 'RETAIL GOODS', 'TRANSPORTATION']

In [None]:
for district in stadium_districts:
    fig = px.line(
        district_sumspend[(district_sumspend['merchant_location'] == district) & (district_sumspend['date_time'] >= '2023-01-01') & (district_sumspend['mcg'].isin(selected_mcgs)) ],
    x="date_time",
    y="spend",
    color = "mcg",
    title=f"{district}",
    template = 'simple_white',
    color_discrete_map = colour_map_mcg
    )
    fig = add_ts_periods(fig)
    
    if district == 'HA9':
        fig = add_extra_wembley(fig)

    fig.show()