In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
sys.path.append(f'{project_path}/ft_events/src/utils')

from google.cloud import bigquery
import importlib

import numpy as np
import pandas as pd
from plotly import graph_objs as go
import seaborn as sns

import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import plotly.express as px

from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

## 2019-2024 Fixtures

* (F/M) = February/March
* 2021 the games were held without spectators

|  | 2019 |  | ----  |  | 2020 |  |   
|---------| ----------|---------|------------|---------|----------|---------|

| Cardiff | Edinburgh | London | ---- | Cardiff | Edinburgh | London |
|---------|----------|---------|------|---------|----------|---------|
| v England (F) | v Italy (F) | v France (F) |---- | v Italy (F) | v England (F) | v Ireland (F) |
| v Ireland (M) | v Ireland (F) | v Italy (M) |---- | v France (F) | v France (M) | v Wales (M) |
|   | v Wales (M) | v Scotland (M) |----  | | | |



|  | 2022 |  | ----  |  | 2023 |  |  
|---------|----------|---------|------|---------|----------|---------|

| Cardiff | Edinburgh | London | ---- | Cardiff | Edinburgh | London |
|---------|----------|---------|--------|---------|----------|---------|
| v Scotland (F) | v England (F) | v Wales (F) | ----  | v Ireland (F) | v Wales (F) | v Scotland (F) |
| v France (M) | v France (F) | v Ireland (M) |----  | v England (M) | v Ireland (M) | v Italy (F) |
| v Italy (M) |  |   |---- |   | v Italy (M) | v France (M) |


|  | 2024 |  |  
|---------|----------|---------|

| Cardiff | Edinburgh | London |
|---------|----------|---------|
| v Scotland (F) | v England (F) | v Wales (F) |
| v France (M) | v France (F) | v Ireland (M) |
| v Italy (M) |  |   |



### Dates:

February - March annually



In [None]:
sql_spend = f"""SELECT time_period_value, merchant_location_level,
merchant_location, cardholder_issuing_country, mcg, mcc, spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
merchant_location_level != 'All' AND
cardholder_issuing_level != 'All' 
GROUP BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders"""

sml_df = client.query(sql_spend).to_dataframe()
sml_df = t.create_date_time(sml_df)

In [None]:
sml_df1 = sml_df[(sml_df['mcc'] != 'All')].copy()
sml_df2 = sml_df[(sml_df['mcg'] == 'All')].copy()
df = pd.concat([sml_df1, sml_df2])
df.head()

In [None]:
areas_uk_colours = {'UK': '#003c57',
 'CF': '#a8bd3a',
 'EH': '#27a0cc',
 'TW': '#F46A25'}

districts_uk_colours = {'UK': '#003c57',
 'CF10': '#a8bd3a',
 'EH12': '#27a0cc',
 'TW2': '#F46A25'}

-----------------------

-----------------------

In [None]:
mcg = 'All'

-----------------------

-----------------------

## Initial checks

In [None]:
six_nation_areas = ['CF', 'EH', 'TW']
six_nation_countries = ['FRANCE', 'ITALY', 'REPUBLIC OF IRELAND']

stadiums_df = df[ (df['merchant_location_level'] == 'POSTAL_AREA')  & (df['mcg'] == mcg)  & (df['cardholder_issuing_country'] != 'All') & (df['merchant_location'].isin(six_nation_areas))].copy()

In [None]:
stadiums_df = stadiums_df.groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
def add_nations_periods(fig):

    # define covid lockdown start and end dates
    game_periods = [
        ("2019-02-01", "2019-03-16"),
        ("2020-02-01", "2020-03-08"),
        ("2021-02-06", "2021-03-26"),
        ("2022-02-01", "2022-03-01"),
        ("2023-02-01", "2023-03-01"),
        ("2024-02-01", "2024-03-01"),
    ]
    # change colour of the covid lockdowns
    fillcolor = "grey"

    # add a dummy trace so that we can add the covid periods to the legend
    # add markers to square to make it show as a square in the legend.
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(color=fillcolor, symbol="square"),
            name="Six Nations",
        )
    )

    # add covid periods to plot
    for start_date, end_date in game_periods:
        fig.add_shape(
            type="rect",
            xref="x",
            yref="paper",
            x0=start_date,
            y0=0,
            x1=end_date,
            y1=1,
            fillcolor=fillcolor,
            opacity=0.5,
            layer="below",
            line_width=0,
        )

    return fig

In [None]:
def calc_index_yoy(df, need_date_cols, group_list):
    
    result = df.copy()
    if need_date_cols is True:
        result['year'] = result['date_time'].dt.year
        result['month'] = result['date_time'].dt.month
    
    metrics = ['spend', 'transactions', 'cardholders']
    month_group = group_list + ['month']

    for i in metrics:
        # calc year-on-year differences
        result[f'yoy_{i}'] = result.groupby(month_group)[f'{i}'].diff(periods=1)

        # calc year-on-year % change
        result[f'yoy_{i}_perc'] = result.groupby(month_group)[f'{i}'].pct_change(periods=1)*100

        # index to 2019 average
        result[f'index_{i}_2019'] = result.groupby(group_list)[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)
        
    return result

In [None]:
uk_df = df[ (df['merchant_location_level'] == 'POSTAL_AREA')  & (df['mcg'] == mcg)  & (df['cardholder_issuing_country'] != 'All')  & (~df['merchant_location'].isin(six_nation_areas))].copy()
uk_df = uk_df.groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

uk_df['merchant_location'] = 'UK'

stadiums_uk_df = pd.concat([stadiums_df, uk_df])

In [None]:
metrics = ['spend', 'transactions', 'cardholders']

for metric in metrics:
    fig = px.line(
    stadiums_uk_df,
    x="date_time",
    y=metric,
    color = 'merchant_location',
    title=f"Sum {metric} at stadium postal areas",
        template='simple_white',
    color_discrete_map = areas_uk_colours
    )
    
    fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))
    fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
    fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

    fig = add_nations_periods(fig)
    fig.show()

## stat pull outs

In [None]:
word_df = stadiums_uk_df.copy()

word_df['year'] = word_df['date_time'].dt.year
word_df['month'] = word_df['date_time'].dt.month

# word_df = word_df[word_df['month'].isin([1, 2, 3])].copy()

word_df['m-o-m'] =word_df.groupby('merchant_location')['spend'].diff(periods=1)




In [None]:
word_df['m-o-m_pct'] = word_df.groupby(['merchant_location'])['spend'].pct_change()*100

In [None]:
fig = px.line(
word_df,
x="date_time",
y='m-o-m_pct',
color = 'merchant_location',
title=f"Month on month percentage change",
    template='simple_white',
height = 450
)
fig = add_nations_periods(fig)
fig.show()

In [None]:
word_df.reset_index(inplace = True, drop=True)

In [None]:
# filter to just feb and march

for ml in word_df['merchant_location'].unique():
    calc = word_df[(word_df['merchant_location'] == ml) & (word_df['month'].isin([2]))]

    covid_exc = calc[(~calc['year'].isin([2021, 2020]))].copy()
    print(ml, '2019-2024 (exc 2021) average =', covid_exc['m-o-m_pct'].sum()/len(covid_exc))
#     for year in calc['year'].unique():
#         year_df = calc[calc['year'] == year].copy()
#         print(ml, year, 'average m-o-m percentage change in Feb and March =', year_df['m-o-m_pct'].sum()/2)
# #word_df[word_df['month'].isin([2,3])]

## Indexing spend at stadium areas

In [None]:
indexed_stadiums = stadiums_uk_df.copy()
indexed_stadiums['year'] = indexed_stadiums['date_time'].dt.year
indexed_stadiums['month'] = indexed_stadiums['date_time'].dt.month

metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    indexed_stadiums[f'yoy_{i}'] = indexed_stadiums.groupby(['merchant_location','month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    indexed_stadiums[f'yoy_{i}_perc'] = indexed_stadiums.groupby(['merchant_location', 'month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    indexed_stadiums[f'index_{i}_2019'] = indexed_stadiums.groupby(['merchant_location'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
#indexed_metrics = ['index_spend_full', 'index_transactions_full', 'index_cardholders_full']

for indexed_metric in [col for col in indexed_stadiums.columns if col.startswith('index')]:
    fig = px.line(
    indexed_stadiums,
    x="date_time",
    y=indexed_metric,
    color = 'merchant_location',
    title=f"Sum {indexed_metric.split('_')[1]} at stadium postal areas, indexed to 2019 average",
    height = 500,
    template = 'simple_white',
    color_discrete_map=areas_uk_colours
    )
    fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

    fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
    fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

    fig = add_nations_periods(fig)
    fig.show()

# Year-on-year changes

In [None]:
for indexed_metric in [col for col in indexed_stadiums.columns if col.startswith('yoy') and col.endswith('perc')]:
    fig = px.line(indexed_stadiums[indexed_stadiums['date_time']>='2020-01-01'],
    x="date_time",
    y=indexed_metric,
    color = 'merchant_location',
    title=f"Year-on-year {indexed_metric.split('_')[1]} percentage change at stadium postal areas",
    height = 500,
    template = 'simple_white',
    color_discrete_map=areas_uk_colours
    )
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ))

    fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
    fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK
    fig.add_hline(y=0, opacity=0.7, line_width=0.5)

    fig = add_nations_periods(fig)
    fig.show()

### Checking cardholder changes in line with stadium capacities
Edinburgh – EH12, 67,144 capacity


Twickenham – TW2, 82,000 capacity


Cardiff – CF10, 74,000 capacity


In [None]:
stadiums_df2 = stadiums_df.copy()

In [None]:
stadiums_df2['cardholders_mm'] = stadiums_df2.groupby('merchant_location')['cardholders'].diff()

# since matches are on Feb/March, the changes would be seen in the March/April months.
stadiums_df2['month'] = stadiums_df2['date_time'].dt.month_name()
stadiums_iso = stadiums_df2[stadiums_df2['month'].isin(['March', 'April'])].copy()
stadiums_iso['year'] = stadiums_df2['date_time'].dt.strftime('%y')

stadiums_iso_net = stadiums_iso.groupby(["merchant_location", "year"]).agg({"cardholders_mm" : "sum"}).reset_index()

In [None]:
fig = px.line(
    stadiums_df2[stadiums_df2['merchant_location'] == 'TW'],
    x="date_time",
    y='cardholders_mm',
    color = 'merchant_location',
    title=f"Month-Month changes to cardholder numbers in TW",
    template = 'simple_white'
    )
fig = add_nations_periods(fig)
fig.add_hline(y=82000, opacity=0.7, line_width=1.5)
fig.show()

In [None]:
fig = px.line(
    stadiums_iso_net,
    x="year",
    y="cardholders_mm",
    color = "merchant_location",
    title=f"Net change to cardholders Jan-Feb-March",
    template = 'simple_white'
    )
fig.show()

------------------

------------------------

-----------------

## Inbound spend

In [None]:
six_nation_countries = ['ITALY', 'FRANCE', 'REPUBLIC OF IRELAND']


In [None]:
inbound_spend_df = df[ (df['merchant_location_level'] == 'POSTAL_AREA')  & (df['mcg'] == mcg)  & (df['cardholder_issuing_country'] != 'All')  & (df['merchant_location'].isin(six_nation_areas))].groupby(['date_time', 'merchant_location', "cardholder_issuing_country", "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
inbound_spend_df_uk = df[ (df['merchant_location_level'] == 'POSTAL_AREA')  & (df['mcg'] == mcg)  & (df['cardholder_issuing_country'] != 'All')  & (~df['merchant_location'].isin(six_nation_areas))].groupby(['date_time', "cardholder_issuing_country", "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
inbound_spend_df_uk['merchant_location'] = 'UK'
inbound_df = pd.concat([inbound_spend_df_uk, inbound_spend_df])

sn_inbound_df = inbound_df[inbound_df['cardholder_issuing_country'].isin(six_nation_countries)].groupby(['date_time', "merchant_location"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
sn_inbound_df['year'] = sn_inbound_df['date_time'].dt.year
sn_inbound_df['month'] = sn_inbound_df['date_time'].dt.month

metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    sn_inbound_df[f'yoy_{i}'] = sn_inbound_df.groupby(['merchant_location','month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    sn_inbound_df[f'yoy_{i}_perc'] = sn_inbound_df.groupby(['merchant_location', 'month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    sn_inbound_df[f'index_{i}_2019'] = sn_inbound_df.groupby(['merchant_location'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
for indexed_metric in [col for col in indexed_stadiums.columns if col.startswith('index')]:
    fig = px.line(
    sn_inbound_df,
    x="date_time",
    y=indexed_metric,
    color = 'merchant_location',
    title=f"Sum {indexed_metric.split('_')[1]} from Six Nation participating countries at UK host postal areas, indexed to 2019 average",
    height = 500,
    template = 'simple_white',
    color_discrete_map=areas_uk_colours
    )
    fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

    fig.update_layout(legend_title_text='Postal area:', title_subtitle_text=f'MCG = {mcg}')
    fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

    fig = add_nations_periods(fig)
    fig.show()

## Individual Six Nation's participating countries spend at UK host areas

In [None]:
matches_log = pd.DataFrame(
    # Cardiff
    [{'date_time':'2019-03-01', 'area': 'CF', 'country': 'REPUBLIC OF IRELAND'},
    {'date_time':'2020-02-01', 'area': 'CF', 'country': 'ITALY'},
    {'date_time':'2020-02-01', 'area': 'CF', 'country': 'FRANCE'},
    {'date_time':'2022-03-01', 'area': 'CF', 'country': 'FRANCE'},
    {'date_time':'2022-03-01', 'area': 'CF', 'country': 'ITALY'},
    {'date_time':'2023-02-01', 'area': 'CF', 'country': 'REPUBLIC OF IRELAND'},
    {'date_time':'2024-03-01', 'area': 'CF', 'country': 'FRANCE'},
    {'date_time':'2024-03-01', 'area': 'CF', 'country': 'ITALY'},
    
    # Edinburgh
    {'date_time':'2019-02-01', 'area': 'EH', 'country': 'ITALY'},
    {'date_time':'2019-02-01', 'area': 'EH', 'country': 'REPUBLIC OF IRELAND'},
    {'date_time':'2020-03-01', 'area': 'EH', 'country': 'FRANCE'},
    {'date_time':'2022-02-01', 'area': 'EH', 'country': 'FRANCE'},
    {'date_time':'2023-03-01', 'area': 'EH', 'country': 'REPUBLIC OF IRELAND'},
    {'date_time':'2023-03-01', 'area': 'EH', 'country': 'ITALY'},
    {'date_time':'2024-02-01', 'area': 'EH', 'country': 'FRANCE'},
    
    # Twickenham
    {'date_time':'2019-02-01', 'area': 'TW', 'country': 'FRANCE'},
    {'date_time':'2019-03-01', 'area': 'TW', 'country': 'ITALY'},
    {'date_time':'2020-02-01', 'area': 'TW', 'country': 'REPUBLIC OF IRELAND'},
    {'date_time':'2022-03-01', 'area': 'TW', 'country': 'REPUBLIC OF IRELAND'},
    {'date_time':'2023-02-01', 'area': 'TW', 'country': 'ITALY'},
    {'date_time':'2023-03-01', 'area': 'TW', 'country': 'FRANCE'},
    {'date_time':'2024-03-01', 'area': 'TW', 'country': 'REPUBLIC OF IRELAND'}])


matches_log['game_name'] = 'v ' + matches_log['country']
area_to_host = {'CF': 'Wales', 'TW': 'England', 'EH': 'Scotland'}

matches_log['host_name'] = matches_log['area'].map(area_to_host)


In [None]:
uk_matches_log = pd.DataFrame(
    # Cardiff
    [{'date_time':'2019-02-01', 'area': 'CF', 'country': 'ENGLAND'},
     {'date_time':'2022-02-01', 'area': 'CF', 'country': 'SCOTLAND'},
     {'date_time':'2023-03-01', 'area': 'CF', 'country': 'ENGLAND'},
     {'date_time':'2024-02-01', 'area': 'CF', 'country': 'SCOTLAND'},

    
    # Edinburgh
     {'date_time':'2019-03-01', 'area': 'EH', 'country': 'WALES'},
     {'date_time':'2020-02-01', 'area': 'EH', 'country': 'ENGLAND'},
     {'date_time':'2022-02-01', 'area': 'EH', 'country': 'ENGLAND'},
     {'date_time':'2023-02-01', 'area': 'EH', 'country': 'WALES'},
     {'date_time':'2024-02-01', 'area': 'EH', 'country': 'ENGLAND'},
    
    # Twickenham
     {'date_time':'2019-03-01', 'area': 'TW', 'country': 'SCOTLAND'},
     {'date_time':'2020-03-01', 'area': 'TW', 'country': 'WALES'},
     {'date_time':'2022-02-01', 'area': 'TW', 'country': 'WALES'},
     {'date_time':'2023-02-01', 'area': 'TW', 'country': 'SCOTLAND'},
     {'date_time':'2024-02-01', 'area': 'TW', 'country': 'WALES'}])


uk_matches_log['game_name'] = 'v ' + uk_matches_log['country']

area_to_host = {'CF': 'Wales', 'TW': 'England', 'EH': 'Scotland'}

uk_matches_log['host_name'] = uk_matches_log['area'].map(area_to_host)


In [None]:
matches_log['date_time'] = pd.to_datetime(matches_log['date_time'], format='%Y-%m-%d')
uk_matches_log['date_time'] = pd.to_datetime(uk_matches_log['date_time'], format='%Y-%m-%d')

inbound_df['date_time'] = pd.to_datetime(inbound_df['date_time'], format='%Y-%m-%d')


In [None]:
six_nation_countries = ['ITALY', 'FRANCE', 'REPUBLIC OF IRELAND']
six_nation_areas = ['CF', 'EH', 'TW']

for area in six_nation_areas:
    fig = px.line(
    inbound_df[ (inbound_df['cardholder_issuing_country'].isin(six_nation_countries)) & (inbound_df['merchant_location'] == area)],
    x="date_time",
    y="spend",
    color = "cardholder_issuing_country",
    title=f"Six Nation country sum spend at {area}",
    template = "simple_white",
    height = 500
    )
    fig = add_nations_periods(fig)
    
    fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

    fig.update_layout(legend_title_text='Inbound spend country:', title_subtitle_text=f'MCG = {mcg}')
    
    
    # Filter matches_log for the current area
    matches_in_area = matches_log[matches_log['area'] == area]

    for _, match in matches_in_area.iterrows():
        # Extract spend value for the specific date_time and cardholder_issuing_country
        spend_value = inbound_df[
            (inbound_df['date_time'] == match['date_time']) & 
            (inbound_df['cardholder_issuing_country'] == match['country']) & 
            (inbound_df['merchant_location'] == match['area'])
        ]['spend'].iloc[0]

        # Adding vertical line at the match date
        fig.add_vline(
            x=match['date_time'],
            line_dash='dot',
            line_color='gray',
            opacity=0.6
        )

        # Adding text annotations at the corresponding 'spend' value
        fig.add_annotation(
            x=match['date_time'],  # Position the annotation at the same x position
            y=spend_value,  # Use the extracted spend value as the y position
            text=match['host_name'] + ' v '+  match['game_name'][2:].title(),
            font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
            align='center',  # Text alignment,
        )
    
    # Show the plot for the current area
    fig.show()
    

## Individual country spend

In [None]:
all_mcgs = sml_df[(sml_df['merchant_location'].isin(six_nation_areas)) & (sml_df['mcg'] != 'All') & (sml_df['cardholder_issuing_country'].isin(six_nation_countries)) & (sml_df['merchant_location_level'] == 'POSTAL_AREA') & (sml_df['mcc'] == 'All')].copy()

In [None]:
metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    all_mcgs[f'yoy_{i}'] = all_mcgs.groupby(['merchant_location','cardholder_issuing_country', 'mcg','month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    all_mcgs[f'yoy_{i}_perc'] = all_mcgs.groupby(['merchant_location', 'cardholder_issuing_country', 'mcg','month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    all_mcgs[f'index_{i}_2019'] = all_mcgs.groupby(['merchant_location','cardholder_issuing_country', 'mcg',])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
six_nation_countries = ['ITALY', 'FRANCE', 'REPUBLIC OF IRELAND']
mcgs_of_interest = ['FOOD & GROCERY', 'RESTAURANTS', 'QSR', 'RETAIL GOODS', 'TRANSPORTATION']

for area in six_nation_areas:
    
    for country in six_nation_countries:

        fig = px.line(
            all_mcgs[ (all_mcgs['cardholder_issuing_country'] == country) & (all_mcgs['merchant_location'] == area) & (all_mcgs['mcg'].isin(mcgs_of_interest))],
            x="date_time",
            y="index_spend_2019",
            color = "mcg",
            title=f"{country} MCG sum spend at {area}, indexed to 2019 average",
            template = "simple_white",
            height = 500
            )
        fig.update_layout(legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ))

        fig.update_layout(legend_title_text='MCGs:', yaxis_title = 'Indexed spend')
        fig = add_nations_periods(fig)
        
        # Filter matches_log for the current area and country
        matches_in_area = matches_log[(matches_log['area'] == area) & (matches_log['country'] == country)] 

        for _, match in matches_in_area.iterrows():
            
            # Adding text annotations at the corresponding 'date_time' value
            fig.add_annotation(
                x=match['date_time'],  # Position the annotation at the same x position
                y=0, 
                text=match['host_name'] + ' v '+  match['game_name'][2:].title(),
                font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
                align='center', # Text alignment
                bgcolor = 'lightgrey',
                showarrow = False
            )
        
        
        fig.show()

In [None]:
six_nation_countries = ['ITALY', 'FRANCE', 'REPUBLIC OF IRELAND']
mcgs_of_interest = ['FOOD & GROCERY', 'RESTAURANTS', 'QSR', 'RETAIL GOODS', 'TRANSPORTATION']

for area in six_nation_areas:
    
    for country in six_nation_countries:

        fig = px.line(
            all_mcgs[ (all_mcgs['cardholder_issuing_country'] == country) & (all_mcgs['merchant_location'] == area) & (all_mcgs['mcg'].isin(mcgs_of_interest))],
            x="date_time",
            y="yoy_spend_perc",
            color = "mcg",
            title=f"{country} MCG year-on-year percentage spend change at {area}",
            template = "simple_white",
            height = 500
            )
        fig.update_layout(legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ))

        fig.update_layout(legend_title_text='MCGs:',)
        fig = add_nations_periods(fig)
        
        # Filter matches_log for the current area and country
        matches_in_area = matches_log[(matches_log['area'] == area) & (matches_log['country'] == country)] 

        for _, match in matches_in_area.iterrows():
            
            # Adding text annotations at the corresponding 'date_time' value
            fig.add_annotation(
                x=match['date_time'],  # Position the annotation at the same x position
                y=0, 
                text=match['game_name'],
                font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
                align='center', # Text alignment
                bgcolor = 'lightgrey',
                showarrow = False
            )
        
        
        fig.show()

### mccs

In [None]:
all_mccs = sml_df[(sml_df['merchant_location'].isin(six_nation_areas)) & (sml_df['mcg'] != 'All') & (sml_df['cardholder_issuing_country'].isin(six_nation_countries)) & (sml_df['merchant_location_level'] == 'POSTAL_AREA') & (sml_df['mcc'] != 'All')].copy()

In [None]:
for area in six_nation_areas:

    fig = px.line(
        all_mccs[ (all_mccs['cardholder_issuing_country'] == 'FRANCE') & (all_mccs['merchant_location'] == area)],
        x="date_time",
        y="spend",
        color = "mcc",
        title=f"French mcg spend at {area}",
        template = "simple_white",
        height = 500
        )
    fig = add_nations_periods(fig)
    fig.show()

---------------------------------

## Outbound Welsh spending

In [None]:
sql_ret = f"""SELECT time_period_value, cardholder_location, merchant_location, mcg, 
spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
WHERE time_period = 'Month' AND 
cardholder_location_level = "POSTAL_AREA" AND
merchant_location_level = "POSTAL_AREA" AND
cardholder_location != 'All' AND
merchant_location != 'All' 
GROUP BY time_period_value, cardholder_location, merchant_location, mcg, spend, transactions, cardholders
ORDER BY time_period_value, cardholder_location, merchant_location, mcg, spend, transactions, cardholders"""

ret_df = client.query(sql_ret).to_dataframe()
ret_df = t.create_date_time(ret_df)

In [None]:
edi_df = ret_df[(ret_df['merchant_location'] == 'EH') & (ret_df['mcg'] == mcg)].copy()

In [None]:
welsh_areas = ['NP', 'CF', 'LL', 'SY', 'SA', 'LD']


In [None]:
tw_df = ret_df[(ret_df['merchant_location'] == 'TW') & (ret_df['mcg'] == mcg)].copy()
tw_df_welsh = tw_df[tw_df['cardholder_location'].isin(welsh_areas)].copy()
tw_df_welsh = tw_df_welsh.groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
edi_df_welsh = edi_df[edi_df['cardholder_location'].isin(welsh_areas)].copy()
edi_df_welsh = edi_df_welsh.groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
edi_df_welsh['merchant_location'] = 'EH'
tw_df_welsh['merchant_location'] = 'TW'

In [None]:
edi_tw_df = pd.concat([edi_df_welsh, tw_df_welsh])

In [None]:
fig = px.line(
edi_tw_df,
x="date_time",
y="spend",
color = 'merchant_location',
title=f"Welsh cardholder spend in EH and TW",
template = 'simple_white',
    height = 500,
    width = 1000
)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Merchant location:',)

matches_in_area = uk_matches_log[(uk_matches_log['area'].isin(edi_tw_df['merchant_location'].unique())) &  (uk_matches_log['country'] == 'WALES')]

for _, match in matches_in_area.iterrows():
    # Extract spend value for the specific date_time and cardholder_issuing_country
    spend_value = edi_tw_df[
        (edi_tw_df['date_time'] == match['date_time']) & 
        (edi_tw_df['merchant_location'] == match['area'])
    ]['spend'].iloc[0]

    # Adding vertical line at the match date
    fig.add_vline(
        x=match['date_time'],
        line_dash='dot',
        line_color='gray',
        opacity=0.6
    )

    # Adding text annotations at the corresponding 'spend' value
    fig.add_annotation(
        x=match['date_time'],  # Position the annotation at the same x position
        y=spend_value,  # Use the extracted spend value as the y position
        text=match['host_name'] + ' v '+  match['game_name'][2:].title(),
        font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
        align='center',  # Text alignment,
    )
    
    
    
fig = add_nations_periods(fig)
fig.show()

------------------------------

## Checking sum spend of welsh postcodes across mcgs

In [None]:
# UK six nations held areas = CF, EH, and TW (Twickenham stadium, London)
six_nations_areas_uk = ['CF', 'EH', 'TW']

In [None]:
wales_ret_df = ret_df[(ret_df['cardholder_location'].isin(welsh_areas)) & (ret_df['merchant_location'].isin(six_nations_areas_uk))].groupby(['date_time', "merchant_location", "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
mcgs_of_interest = ['QSR', 'RESTAURANTS', 'TRANSPORTATION', 'FOOD & GROCERY', 'FUEL', 'LODGING', 'APPAREL & ACCESSORIES', 'ENTERTAINMENT',]

In [None]:
welsh_spend2 = calc_index_yoy(df = wales_ret_df, 
                              need_date_cols = True, 
                              group_list = ['merchant_location', 'mcg'])

In [None]:
for i in six_nations_areas_uk[1:]:
    fig = px.line(
    welsh_spend2[(welsh_spend2['merchant_location'] == i) & (welsh_spend2['mcg'].isin(mcgs_of_interest))],
    x="date_time",
    y="index_spend_2019",
    color = "mcg",
    title=f"Sum spend of Welsh cardholders at {i}",
    template = 'simple_white',
        height = 500
    )
    
    fig.update_layout(legend_title_text='MCG:', title_subtitle_text=f'Indexed to 2019 average')

#     fig.update_layout(legend=dict(
#         orientation="h",
#         yanchor="bottom",
#         y=1,
#         xanchor="right",
#         x=1,
#         font = dict(size =8)
#     ))


    fig = add_nations_periods(fig)
    
    # Filter matches_log for the current area and country
    matches_in_area = uk_matches_log[(uk_matches_log['area'] == i) & (uk_matches_log['country'] == 'WALES')] 

    for _, match in matches_in_area.iterrows():

        # Adding text annotations at the corresponding 'date_time' value
        fig.add_annotation(
            x=match['date_time'],  # Position the annotation at the same x position
            y=0, 
            text=match['game_name'],
            font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
            align='center', # Text alignment
            bgcolor = 'lightgrey',
            showarrow = False
        )



    fig.show()

# Welsh cardholder spend at EH vs rest of Scotland

In [None]:
scottish_areas = ['AB', 'DD', 'DG', 'EH', 'FK', 'G', 'HS', 'IV', 'KA', 'KW', 'KY', 'ML', 'PA', 'PH', 'TD', 'ZE']

In [None]:
welsh_scotland = ret_df[(ret_df['cardholder_location'].isin(welsh_areas)) & (ret_df['merchant_location'].isin(scottish_areas)) & (ret_df['merchant_location'] != 'EH')].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
welsh_eh = ret_df[(ret_df['cardholder_location'].isin(welsh_areas)) & (ret_df['merchant_location'] == 'EH')].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

welsh_scotland['merchant_location'] = 'Rest of Scotland'
welsh_eh['merchant_location'] = 'EH'

welsh_spend_scotland = pd.concat([welsh_scotland, welsh_eh])

In [None]:
welsh_spend_scotland = calc_index_yoy(df = welsh_spend_scotland, 
                              need_date_cols = True, 
                              group_list = ['merchant_location', 'mcg'])

In [None]:
fig = px.line(
welsh_spend_scotland[welsh_spend_scotland['mcg'] == mcg],
x="date_time",
y="index_spend_2019",
color = 'merchant_location',
title=f"Welsh cardholder spend in EH compared to rest of Scotland",
template = 'simple_white',
    height = 500,
    width = 1000
)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Merchant location:',
                  title_subtitle_text ='Indexed to 2019 average',
                 yaxis_title = 'Indexed spend')

matches_in_area = uk_matches_log[(uk_matches_log['area'] == 'EH') & (uk_matches_log['country'] == 'WALES')] 

for _, match in matches_in_area.iterrows():

    # Adding text annotations at the corresponding 'date_time' value
    fig.add_annotation(
        x=match['date_time'],  # Position the annotation at the same x position
        y=0, 
        text=match['host_name'] + ' v '+  match['game_name'][2:].title(),
        font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
        align='center', # Text alignment
        bgcolor = 'lightgrey',
        showarrow = False
    )


    
fig = add_nations_periods(fig)
fig.show()

--------------------------------

## Scottish spend

In [None]:
scottish_areas = ['AB', 'DD', 'DG', 'EH', 'FK', 'G', 'HS', 'IV', 'KA', 'KW', 'KY', 'ML', 'PA', 'PH', 'TD', 'ZE']

In [None]:
scot_ret_df = ret_df[(ret_df['cardholder_location'].isin(scottish_areas)) & (ret_df['merchant_location'].isin(six_nations_areas_uk))].copy()
scot_spend = scot_ret_df.groupby(['date_time', "merchant_location", "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
for i in six_nations_areas_uk:
    fig = px.line(
    scot_spend[(scot_spend['merchant_location'] == i)], # & (scot_spend['mcg'].isin(mcgs_of_interest))
    x="date_time",
    y="spend",
    color = "mcg",
    title=f"Spending of Scottish cardholders at {i}",
    )
    fig = add_nations_periods(fig)
    fig.show()

In [None]:
scottish_df = ret_df[(ret_df['cardholder_location'].isin(scottish_areas)) & (ret_df['merchant_location'].isin(['CF', 'TW'])) & (ret_df['mcg'] == mcg)].copy()
scottish_df = scottish_df.groupby(['date_time', "merchant_location"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
scottish_df2 = calc_index_yoy(df = scottish_df, 
                              need_date_cols = True, 
                              group_list = ['merchant_location'])

In [None]:
fig = px.line(
scottish_df,
x="date_time",
y="spend",
color = 'merchant_location',
title=f"Scottish cardholder spend in CF and TW",
template = 'simple_white',
    height = 500,
    width = 1000
)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Merchant location:',)

matches_in_area = uk_matches_log[(uk_matches_log['area'].isin(scottish_df2['merchant_location'].unique())) &  (uk_matches_log['country'] == 'SCOTLAND')]

for _, match in matches_in_area.iterrows():
    # Extract spend value for the specific date_time and cardholder_issuing_country
    spend_value = scottish_df[
        (scottish_df['date_time'] == match['date_time']) & 
        (scottish_df['merchant_location'] == match['area'])
    ]['spend'].iloc[0]

    # Adding vertical line at the match date
    fig.add_vline(
        x=match['date_time'],
        line_dash='dot',
        line_color='gray',
        opacity=0.6
    )

    # Adding text annotations at the corresponding 'spend' value
    fig.add_annotation(
        x=match['date_time'],  # Position the annotation at the same x position
        y=spend_value,  # Use the extracted spend value as the y position
        text=match['host_name'] + ' v '+  match['game_name'][2:].title(),
        font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
        align='center',  # Text alignment,
    )
    
    
    
fig = add_nations_periods(fig)
fig.show()

# Scottish spend in CF vs rest of Wales

In [None]:
welsh_areas = ['NP', 'CF', 'LL', 'SY', 'SA', 'LD']

scotland_welsh = ret_df[(ret_df['cardholder_location'].isin(scottish_areas)) & (ret_df['merchant_location'].isin(welsh_areas)) & (ret_df['merchant_location'] != 'CF')].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
scotland_cf = ret_df[(ret_df['cardholder_location'].isin(scottish_areas)) & (ret_df['merchant_location'] == 'CF')].groupby(['date_time', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

scotland_welsh['merchant_location'] = 'Rest of Wales'
scotland_cf['merchant_location'] = 'CF'

scottish_spend_wales = pd.concat([scotland_welsh, scotland_cf])

In [None]:
scottish_spend_wales = calc_index_yoy(df = scottish_spend_wales, 
                              need_date_cols = True, 
                              group_list = ['merchant_location', 'mcg'])

In [None]:
fig = px.line(
scottish_spend_wales[scottish_spend_wales['mcg'] == mcg],
x="date_time",
y="index_spend_2019",
color = 'merchant_location',
title=f"Scottish cardholder spend in CF compared to rest of Wales",
template = 'simple_white',
    height = 500,
    width = 1000
)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Merchant location:', title_subtitle_text ='Indexed to 2019 average')

matches_in_area = uk_matches_log[(uk_matches_log['area'] == 'CF') & (uk_matches_log['country'] == 'SCOTLAND')] 

for _, match in matches_in_area.iterrows():

    # Adding text annotations at the corresponding 'date_time' value
    fig.add_annotation(
        x=match['date_time'],  # Position the annotation at the same x position
        y=0, 
        text=match['host_name'] + ' v '+  match['game_name'][2:].title(),
        font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
        align='center', # Text alignment
        bgcolor = 'lightgrey',
        showarrow = False
    )


    
fig = add_nations_periods(fig)
fig.show()

In [None]:
fig = px.line(
scottish_spend_wales[scottish_spend_wales['mcg'] == 'TRANSPORTATION'],
x="date_time",
y="index_spend_2019",
color = 'merchant_location',
title=f"Scottish restaurant spend in CF compared to rest of Wales",
template = 'simple_white',
    height = 500,
    width = 1000
)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='Merchant location:', title_subtitle_text ='Indexed to 2019 average | MCG = Restaurants')

matches_in_area = uk_matches_log[(uk_matches_log['area'] == 'CF') & (uk_matches_log['country'] == 'SCOTLAND')] 

for _, match in matches_in_area.iterrows():

    # Adding text annotations at the corresponding 'date_time' value
    fig.add_annotation(
        x=match['date_time'],  # Position the annotation at the same x position
        y=0, 
        text=match['host_name'] + ' v '+  match['game_name'][2:].title(),
        font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
        align='center', # Text alignment
        bgcolor = 'lightgrey',
        showarrow = False
    )


    
fig = add_nations_periods(fig)
fig.show()

-------------------------------

## Postal district level

murrayfield - EH12


cardiff - cf10


twickenham - tw2

In [None]:
stadium_districts = ['EH12', 'CF10', 'TW2']
districts_df = sml_df[(sml_df['merchant_location'].isin(stadium_districts)) & (sml_df['mcg'] == 'All')].groupby(['date_time', 'merchant_location','cardholder_issuing_country']).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
## getting UK average
uk_av_dis = sml_df[(sml_df['merchant_location_level'] == 'POSTAL_DISTRICT') & 
(~sml_df['merchant_location'].isin(stadium_districts)) &
(sml_df['mcg'] == 'All')].groupby(['date_time', 'cardholder_issuing_country']).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
uk_av_dis['merchant_location'] = 'UK'

districts_uk = pd.concat([districts_df, uk_av_dis])

In [None]:
districts_uk_allspend = districts_uk[(~districts_uk['cardholder_issuing_country'].isin(['UNITED KINGDOM', 'All']))].groupby(['date_time', 'merchant_location']).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()

In [None]:
districts_uk_allspend['year'] = districts_uk_allspend['date_time'].dt.year
districts_uk_allspend['month'] = districts_uk_allspend['date_time'].dt.month

metrics = ['spend', 'transactions', 'cardholders']

for i in metrics:
    # calc year-on-year differences
    districts_uk_allspend[f'yoy_{i}'] = districts_uk_allspend.groupby(['merchant_location', 'month'])[f'{i}'].diff(periods=1)

    # calc year-on-year % change
    districts_uk_allspend[f'yoy_{i}_perc'] = districts_uk_allspend.groupby(['merchant_location','month'])[f'{i}'].pct_change(periods=1)*100

    # index to 2019 average
    districts_uk_allspend[f'index_{i}_2019'] = districts_uk_allspend.groupby(['merchant_location'])[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0))*100)


In [None]:
for indexed_metric in [col for col in districts_uk_allspend.columns if col.startswith('index')]:
    fig = px.line(
    districts_uk_allspend,
    x="date_time",
    y=indexed_metric,
    color = 'merchant_location',
    title=f"Sum {indexed_metric.split('_')[1]} at UK host postal districts, indexed to 2019 average",
    height = 500,
    template = 'simple_white',
    color_discrete_map=districts_uk_colours
    )
    
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ))

    fig.update_layout(legend_title_text='Postal district:', title_subtitle_text=f'MCG = {mcg} | Includes international spend')
    fig.update_traces(selector=dict(name='UK'), line=dict(dash='dash'))  #dashed line for UK

    fig = add_nations_periods(fig)
    fig.show()

# International spend at host districts

In [None]:
uk_internationals = sml_df[(~sml_df['merchant_location'].isin(stadium_districts)) & (sml_df['cardholder_issuing_country'] != 'UNITED KINGDOM')].groupby(['date_time','cardholder_issuing_country', 'mcg',]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
uk_internationals['merchant_location'] = 'UK'
districts_internationals = sml_df[(sml_df['merchant_location'].isin(stadium_districts))& (sml_df['cardholder_issuing_country'] != 'UNITED KINGDOM')].groupby(['date_time', 'merchant_location','cardholder_issuing_country', 'mcg',]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()
full_internationals = pd.concat([ districts_internationals, uk_internationals])

In [None]:
sn_internationals = full_internationals[full_internationals['cardholder_issuing_country'].isin(six_nation_countries)].copy()

In [None]:
sn_internationals = calc_index_yoy(df = sn_internationals, 
                              need_date_cols = True, 
                              group_list = ['merchant_location', 'cardholder_issuing_country','mcg'])

In [None]:
for district in stadium_districts:
    fig = px.line(
    sn_internationals[ (sn_internationals['merchant_location'] == district) & (sn_internationals['mcg'] == mcg)],
    x="date_time",
    y="spend",
    color = "cardholder_issuing_country",
    title=f"Six Nation country sum spend at {district}",
    template = "simple_white",
    height = 500
    )
    fig = add_nations_periods(fig)
    
    fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

    fig.update_layout(legend_title_text='Inbound spend country:', title_subtitle_text=f'MCG = {mcg}')
    

    
    # Filter matches_log for the current area
    matches_in_area = matches_log[matches_log['area'] == district[:2]]

    for _, match in matches_in_area.iterrows():
        
        try:
            
            # Extract spend value for the specific date_time and cardholder_issuing_country
            spend_value = sn_internationals[
                (sn_internationals['date_time'] == match['date_time']) & 
                (sn_internationals['mcg'] == mcg) & 
                (sn_internationals['cardholder_issuing_country'] == match['country']) & 
                (sn_internationals['merchant_location'].str.startswith(match['area']))
            ]['spend'].iloc[0]

            # Adding text annotations at the corresponding 'spend' value
            fig.add_annotation(
                x=match['date_time'],  # Position the annotation at the same x position
                y=spend_value,  # Use the extracted spend value as the y position
                text=match['game_name'],
                font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
                align='center',  # Text alignment,
            )
        except:
            
            fig.add_annotation(
            x=match['date_time'],  # Position the annotation at the same x position
            y=0, 
            text='Data Missing for ' + match['country'],
            font=dict(size=8, color="black",style="italic"),  # Font style of the annotation
            align='center', # Text alignment
            bgcolor = 'lightgrey',
            showarrow = False)
            
            pass
 
    # Show the plot for the current area
    fig.show()
    

# UK spending at host districts

In [None]:
stadium_districts = ['EH12', 'CF10', 'TW2']


sql_ret_dis = f"""SELECT time_period_value, cardholder_location, merchant_location, mcg, 
spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
WHERE time_period = 'Month' AND 
cardholder_location_level = "POSTAL_DISTRICT" AND
merchant_location_level = "POSTAL_DISTRICT" AND
cardholder_location != 'All' AND
merchant_location != 'All' AND
merchant_location = 'EH12' OR merchant_location = 'CF10' OR merchant_location = 'TW2'
GROUP BY time_period_value, cardholder_location, merchant_location, mcg, spend, transactions, cardholders
ORDER BY time_period_value, cardholder_location, merchant_location, mcg, spend, transactions, cardholders"""

district_df = client.query(sql_ret_dis).to_dataframe()
district_df = t.create_date_time(district_df)

In [None]:
welsh_areas = ['NP', 'CF', 'LL', 'SY', 'SA', 'LD']
welsh_spend_district_df = district_df[(district_df["cardholder_location"].str.contains("NP|CF|LL|SY|SA|LD")) & (district_df['merchant_location'] != 'CF10') & (~district_df["time_period_value"].str.contains("Q")) & (district_df["cardholder_location"].apply(lambda x: len(x) > 2))].copy()


In [None]:
welsh_spend_district_df = welsh_spend_district_df.groupby(['date_time', 'merchant_location', "mcg"]).agg({"spend" : "sum", "transactions" : "sum", "cardholders" : "sum"}).reset_index()