In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
sys.path.append(f'{project_path}/ft_events/src/utils')

from google.cloud import bigquery
from google.cloud import storage

import importlib

import numpy as np
import pandas as pd
from plotly import graph_objs as go
import seaborn as sns
import geopandas as gpd

import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
from matplotlib.lines import Line2D
import plotly.express as px
import ipywidgets as widgets


from fintrans_toolbox.src import table_utils as t
from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

# Map of non-local district spend at TS venues

## Data preparation

In [None]:
ts_pds = ('EH12', 'L4', 'CF10', 'HA9')

def calc_index_yoy(df, need_date_cols, group_list):
    
    result = df.copy()
    if need_date_cols is True:
        result['year'] = result['date_time'].dt.year
        result['month'] = result['date_time'].dt.month
    
    metrics = ['spend', 'transactions', 'cardholders']
    month_group = group_list + ['month']

    for i in metrics:
        # calc year-on-year differences
        result[f'yoy_{i}'] = result.groupby(month_group)[f'{i}'].diff(periods=1)

        # calc year-on-year % change
        result[f'yoy_{i}_perc'] = result.groupby(month_group)[f'{i}'].pct_change(periods=1)*100

        # index to 2019 average
        result[f'index_{i}_2019'] = result.groupby(group_list)[f'{i}'].transform(lambda x: x / (x.iloc[0:11].mean(axis = 0)))
        
        # index to jan 2019
        result[f"index_{i}"] = result.groupby(group_list)[f"{i}"].transform(
        lambda x: x / x.iloc[0]
    )
        
    return result

In [None]:
ts_out_sql = f"""SELECT time_period_value,
merchant_location, cardholder_location,  mcg, spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
WHERE time_period = 'Month' AND
mcg = 'All' AND
merchant_location IN  {ts_pds} AND
cardholder_location_level = 'POSTAL_DISTRICT'
ORDER BY time_period_value,  merchant_location, cardholder_location, mcg, spend, transactions, cardholders"""

oc_df = client.query(ts_out_sql).to_dataframe()
oc_df = t.create_date_time(oc_df)

In [None]:
# remove non-district locations

# remove codes with no numbers
oc_df["number"] = oc_df['cardholder_location'].str.extract(
        "(\d+)", expand=False
    )
oc_df = oc_df.loc[oc_df["number"].isna() == False].drop(
                "number", axis=1
            )

# make sure no spaces in code
oc_df["number"] = oc_df['cardholder_location'].str.count(" ")

if len(oc_df['number'].unique()) == 1:
    oc_df = oc_df.drop(
                "number", axis=1
            )
else:
    oc_df = oc_df.loc[oc_df["number"] > 1].drop(
                "number", axis=1
            )

In [None]:
# identify and exclude nearby districts

from google.cloud import storage
from geopandas import gpd

# Loading shapes
client = storage.Client()
#bq.boundary_file_download(client, postal_level = "postcode_district", output_location = '')
district_shape = gpd.read_file('postcode_district.shp')
client = bigquery.Client()

# saving geo shapes of arena districts
venues = district_shape[district_shape['postdistri'].isin(ts_pds)].geometry
# saving postal district name + geographies
venue_names = district_shape[district_shape['postdistri'].isin(ts_pds)]
# creating a 25km buffer around the venue geographies
buffer = venues.buffer(25000)

# done individually for each district - since the buffer around EH does not need to be excluded for CF etc
# Exclude district geoetries that intersect the 25km buffer
buffered1 = district_shape[
                        (~district_shape.geometry.intersects(buffer.iloc[0]))].copy() # Exclude those within the set distance
buffered2 = district_shape[
                        (~district_shape.geometry.intersects(buffer.iloc[1]))].copy() 
buffered3 = district_shape[
                        (~district_shape.geometry.intersects(buffer.iloc[2]))].copy() 
buffered4 = district_shape[
                        (~district_shape.geometry.intersects(buffer.iloc[3]))].copy() 

# Add a column to the buffered df geographies for the name of the district.
# venue_names and buffer have same order of districts
buffered1['merchant_location'] = venue_names['postdistri'].iloc[0]
buffered2['merchant_location'] = venue_names['postdistri'].iloc[1]
buffered3['merchant_location'] = venue_names['postdistri'].iloc[2]
buffered4['merchant_location'] = venue_names['postdistri'].iloc[3]


# Saving unique postal districts for each of the buffered zones
cf10_buffered = buffered1['postdistri'].unique()
eh12_buffered = buffered2['postdistri'].unique()
ha9_buffered = buffered3['postdistri'].unique()
l4_buffered = buffered4['postdistri'].unique()



In [None]:
# filtering df to exclude eh12 buffers
eh_df = oc_df[(oc_df['merchant_location'] == 'EH12') & (oc_df['cardholder_location'].isin(eh12_buffered))].copy()

In [None]:
eh_df[f'total_spend'] = eh_df.groupby(['date_time', 'merchant_location'])['spend'].transform('sum')
eh_df[f'percentage_spend'] = round(eh_df['spend'] / eh_df[f'total_spend'] *100,2)

district_shape.rename(columns={'postdistri':"cardholder_location"}, inplace = True)

merged_df = district_shape[['geometry', 'cardholder_location']].merge(eh_df, on='cardholder_location')

In [None]:
def plot_interactive_map (gdf):
    
    def plot_map(date):
        fig, ax = plt.subplots(figsize=(10, 10))
        filtered = gdf[(gdf['time_period_value'] == date)]
        filtered.plot(ax=ax, column='percentage_spend', legend=True)
        plt.title(f'Edinburgh proportion of spend')


        plt.show()

    # create an interactive slider for dates
    dates = sorted(gdf['time_period_value'].unique())
    date_slider = widgets.Dropdown(options=dates, description="Select date")

    # return the interactive map
    return widgets.interactive(plot_map, date=date_slider)

In [None]:
plot_interactive_map(merged_df)

In [None]:

def plot_interactive_map(gdf):
    def plot_map(date):
        fig, ax = plt.subplots(figsize=(10, 10))
        filtered = gdf[gdf['time_period_value'] == date]

        
        colors = ['#FFFFCC', '#CCFF99', '#66FFFF', '#3399FF', '#000066']
        bins = np.linspace(filtered['percentage_spend'].min(), filtered['percentage_spend'].max(), len(colors) + 1)
        cmap = ListedColormap(colors)
        norm = BoundaryNorm(bins, len(colors))

        # Plot with custom colormap and legend outside
        filtered.plot(ax=ax, column='percentage_spend', cmap=cmap, norm=norm, legend=True,
                      legend_kwds={'label': "Percentage Spend", 'orientation': "vertical"})
        ax.set_title(f'Edinburgh non-local proportion of spend - {date}')
        ax.set_axis_off()

        # Move legend outside the plot
        leg = ax.get_legend()
        if leg:
            leg.set_bbox_to_anchor((1.15, 0.5))

        plt.tight_layout()
        plt.show()

    # Create an interactive dropdown for dates
    dates = sorted(gdf['time_period_value'].unique())
    date_slider = widgets.Dropdown(options=dates, description="Select date")

    return widgets.interactive(plot_map, date=date_slider)





In [None]:
plot_interactive_map(merged_df)