In [65]:
import json
import os
import pandas
import pyarrow
import sys

from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')

In [83]:
sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
import create_queries
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [133]:
COMPANY_IDENTIFIER = 'PL'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'

In [134]:
# company_sales_receipts_query = create_queries.create_company_sales_receipts_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)
company_sales_receipts_with_transactions_query = create_queries.create_company_sales_receipts_with_transactions_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)

engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

# company_sales_receipts_dataframe = pandas.read_sql_query(company_sales_receipts_query, engine)
company_sales_receipts_with_transactions_dataframe = pandas.read_sql_query(company_sales_receipts_with_transactions_query, engine)

In [136]:
sales_receipts_dataframe = company_sales_receipts_with_transactions_dataframe
fn = lambda row: f'{row.sales_datetime.year}-{"0" if row.sales_datetime.month < 10 else ""}{row.sales_datetime.month}'
col = sales_receipts_dataframe.apply(fn, axis=1)
sales_receipts_dataframe = sales_receipts_dataframe.assign(sales_month=col.values)

In [137]:
sales_receipts_records = sales_receipts_dataframe.to_dict('records')

In [139]:
receipt_number_to_transactions = {}
for sales_receipt_record in sales_receipts_records:
    receipt_number = sales_receipt_record['receipt_number']
    if receipt_number in receipt_number_to_transactions:
        receipt_number_to_transactions[receipt_number] += [sales_receipt_record]
    else:
        receipt_number_to_transactions[receipt_number] = [sales_receipt_record]

In [144]:
import math

def float_eq(a: float, b: float) -> bool:
	return math.isclose(a, b, abs_tol=0.01)

mismatch_count = 0 # Count of receipts where receipt total price does not match transactions total price.
missing_count = 0 # Count of receipts with no transactions.
total_count = 0 # Count of receipts (including those missing transactions).

mismatch_over_count = 0
mismatch_under_count = 0

month_to_mismatch_count = {}
month_to_missing_count = {}

month_to_mismatch_over_count = {}
month_to_mismatch_under_count = {}

example_mismatch_over_receipts = []
example_mismatch_under_receipts = []

for receipt_number, receipt_transactions in list(receipt_number_to_transactions.items()):
    receipt_total_price = receipt_transactions[0]['rt_total_price']
    receipt_sales_month = receipt_transactions[0]['sales_month']
    total_count += 1

    if len(receipt_transactions) == 1 and receipt_transactions[0]['tx_id'] == None:
        missing_count += 1
        if receipt_sales_month not in month_to_missing_count:
            month_to_missing_count[receipt_sales_month] = 0
        month_to_missing_count[receipt_sales_month] += 1
        continue

    transactions_total_price = sum(receipt_transaction['tx_total_price'] for receipt_transaction in receipt_transactions)
    if not float_eq(receipt_total_price, transactions_total_price):
        mismatch_count += 1
        if receipt_total_price < transactions_total_price:
            mismatch_over_count += 1
            example_mismatch_over_receipts += [(receipt_number, receipt_transactions)]
        else:
            mismatch_under_count += 1
            example_mismatch_under_receipts += [(receipt_number, receipt_transactions)]

        if receipt_sales_month not in month_to_mismatch_count:
            month_to_mismatch_count[receipt_sales_month] = 0
        month_to_mismatch_count[receipt_sales_month] += 1
        continue

print(f'# mismatch receipt vs transactions: {mismatch_count} ({mismatch_count / total_count * 100}%)')
print(f'# missing transactions: {missing_count} ({missing_count / total_count * 100}%)')
print(f'# total: {total_count}')

print(f'# mismatch receipt vs transactions (transactions over): {mismatch_over_count} ({mismatch_over_count / mismatch_count * 100}%)')
print(f'# mismatch receipt vs transactions (transactions under): {mismatch_under_count} ({mismatch_under_count / mismatch_count * 100}%)')

# mismatch receipt vs transactions: 110369 (22.989949487059313%)
# missing transactions: 487 (0.10144248294537313%)
# total: 480075
# mismatch receipt vs transactions (transactions over): 109905 (99.57959209560656%)
# mismatch receipt vs transactions (transactions under): 464 (0.4204079043934438%)


In [145]:
month_to_missing_count

{'2021-07': 31,
 '2020-12': 62,
 '2020-11': 93,
 '2020-10': 14,
 '2020-09': 24,
 '2020-08': 38,
 '2020-07': 3,
 '2020-06': 77,
 '2020-05': 47,
 '2020-04': 33,
 '2020-03': 6,
 '2020-02': 15,
 '2020-01': 44}

In [146]:
month_to_mismatch_count

{'2021-10': 404,
 '2021-07': 5118,
 '2021-06': 27293,
 '2021-05': 17605,
 '2021-04': 20073,
 '2021-03': 11961,
 '2021-02': 2405,
 '2021-01': 25493,
 '2020-12': 1,
 '2020-11': 2,
 '2020-08': 2,
 '2020-07': 3,
 '2020-05': 3,
 '2020-02': 2,
 '2020-01': 4}

In [147]:
example_mismatch_over_receipts[0]

('0135083778',
 [{'rt_id': '25f3d3ed-4407-479c-af8a-9abdc8e912a3',
   'license_number': 'C10-0000241-LIC',
   'receipt_number': '0135083778',
   'rt_type': 'active',
   'sales_customer_type': 'Consumer',
   'sales_datetime': Timestamp('2021-10-09 14:46:17+0000', tz='UTC'),
   'total_packages': 3,
   'rt_total_price': 75.97,
   'tx_id': 'dc8b69de-4d24-4a20-a2ea-eaec0938cbfa',
   'tx_type': 'active',
   'tx_package_id': '18180500',
   'tx_package_label': '1A4060300008410000001329',
   'tx_product_name': 'Maui wowie',
   'tx_product_category_name': 'Vape Cartridge (weight - each)',
   'tx_unit_of_measure': 'Each',
   'tx_quantity_sold': 1.0,
   'tx_total_price': 34.99,
   'sales_month': '2021-10'},
  {'rt_id': '25f3d3ed-4407-479c-af8a-9abdc8e912a3',
   'license_number': 'C10-0000241-LIC',
   'receipt_number': '0135083778',
   'rt_type': 'active',
   'sales_customer_type': 'Consumer',
   'sales_datetime': Timestamp('2021-10-09 14:46:17+0000', tz='UTC'),
   'total_packages': 3,
   'rt_total

In [148]:
example_mismatch_under_receipts[0]

('0135047120',
 [{'rt_id': '9457afc7-9124-4069-b145-2a3c79d9e2b8',
   'license_number': 'C10-0000241-LIC',
   'receipt_number': '0135047120',
   'rt_type': 'active',
   'sales_customer_type': 'Consumer',
   'sales_datetime': Timestamp('2021-10-09 12:23:14+0000', tz='UTC'),
   'total_packages': 1,
   'rt_total_price': -27.54,
   'tx_id': 'fad5ffd9-a858-4f3e-9bbd-66e21f6fc5e7',
   'tx_type': 'active',
   'tx_package_id': '17969853',
   'tx_package_label': '1A406030000A429000123686',
   'tx_product_name': 'LIIIL - Blue Dream Disposable - 0.5g',
   'tx_product_category_name': 'Vape Cartridge (weight - each)',
   'tx_unit_of_measure': 'Each',
   'tx_quantity_sold': -1.0,
   'tx_total_price': -27.54,
   'sales_month': '2021-10'},
  {'rt_id': '9457afc7-9124-4069-b145-2a3c79d9e2b8',
   'license_number': 'C10-0000241-LIC',
   'receipt_number': '0135047120',
   'rt_type': 'active',
   'sales_customer_type': 'Consumer',
   'sales_datetime': Timestamp('2021-10-09 12:23:14+0000', tz='UTC'),
   'tot