In [None]:
import json
import os
import pandas
import pyarrow
import sys

from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
import create_queries
import prepare_data
%load_ext autoreload
%autoreload 2

In [None]:
COMPANY_IDENTIFIER = 'CCC'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'

In [None]:
company_sales_receipts_query = create_queries.create_company_sales_receipts_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)

company_sales_receipts_dataframe = pandas.read_sql_query(company_sales_receipts_query, engine)

In [None]:
company_sales_receipts_dataframe.columns

In [None]:
company_sales_receipts_dataframe = company_sales_receipts_dataframe[[
    'receipt_id',
    'receipt_number',
    'sales_datetime',
    'license_number',
]]

In [None]:
company_sales_receipts_dataframe['receipt_id'] = company_sales_receipts_dataframe['receipt_id'].apply(lambda receipt_id: int(receipt_id))

In [None]:
company_sales_receipts_dataframe = company_sales_receipts_dataframe.sort_values(by=['receipt_id'])

In [None]:
company_sales_receipts_dataframe

In [None]:
sales_receipt_records = company_sales_receipts_dataframe.to_dict('records')
sales_receipt_records[:5]

In [None]:
previous_receipt_id = None
previous_sales_datetime = None

THRESHOLD = 500

for sales_receipt_record in sales_receipt_records:
    receipt_id = sales_receipt_record['receipt_id']
    sales_datetime = sales_receipt_record['sales_datetime']

    if (
        previous_receipt_id and
        receipt_id - previous_receipt_id > THRESHOLD and
        sales_datetime - previous_sales_datetime > pandas.Timedelta('1 days')
    ):
        print(f'Receipt ID {receipt_id} ({sales_datetime}) and previous receipt ID {previous_receipt_id} ({previous_sales_datetime}) are far apart')

    previous_receipt_id = receipt_id
    previous_sales_datetime = sales_datetime

In [None]:
company_sales_receipts_dataframe.sort_values(by=['sales_datetime'])