In [191]:
import json
import os
import pandas
import pyarrow
import sys

from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')

In [192]:
sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
import create_queries
import prepare_data
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [158]:
COMPANY_IDENTIFIER = 'RA'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'

In [159]:
# company_sales_receipts_query = create_queries.create_company_sales_receipts_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)
company_sales_receipts_with_transactions_query = create_queries.create_company_sales_receipts_with_transactions_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)

engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

# company_sales_receipts_dataframe = pandas.read_sql_query(company_sales_receipts_query, engine)
company_sales_receipts_with_transactions_dataframe = pandas.read_sql_query(company_sales_receipts_with_transactions_query, engine)

In [194]:
sales_receipts_dataframe = company_sales_receipts_with_transactions_dataframe
fn = lambda row: f'{row.sales_datetime.year}-{"0" if row.sales_datetime.month < 10 else ""}{row.sales_datetime.month}'
col = sales_receipts_dataframe.apply(fn, axis=1)
sales_receipts_dataframe = sales_receipts_dataframe.assign(sales_month=col.values)

In [161]:
sales_receipt_records = sales_receipts_dataframe.to_dict('records')

In [162]:
receipt_number_to_transactions = {}
for sales_receipt_record in sales_receipt_records:
    receipt_number = sales_receipt_record['receipt_number']
    if receipt_number in receipt_number_to_transactions:
        receipt_number_to_transactions[receipt_number] += [sales_receipt_record]
    else:
        receipt_number_to_transactions[receipt_number] = [sales_receipt_record]

In [163]:
import math

def float_eq(a: float, b: float) -> bool:
	return math.isclose(a, b, abs_tol=0.01)

mismatch_count = 0 # Count of receipts where receipt total price does not match transactions total price.
missing_count = 0 # Count of receipts with no transactions.
total_count = 0 # Count of receipts (including those missing transactions).

mismatch_over_count = 0
mismatch_under_count = 0

month_to_mismatch_count = {}
month_to_missing_count = {}

month_to_mismatch_over_count = {}
month_to_mismatch_under_count = {}

example_mismatch_over_receipts = []
example_mismatch_under_receipts = []

for receipt_number, receipt_transactions in list(receipt_number_to_transactions.items()):
    receipt_total_price = receipt_transactions[0]['rt_total_price']
    receipt_sales_month = receipt_transactions[0]['sales_month']
    total_count += 1

    if len(receipt_transactions) == 1 and receipt_transactions[0]['tx_id'] == None:
        missing_count += 1
        if receipt_sales_month not in month_to_missing_count:
            month_to_missing_count[receipt_sales_month] = 0
        month_to_missing_count[receipt_sales_month] += 1
        continue

    transactions_total_price = sum(receipt_transaction['tx_total_price'] for receipt_transaction in receipt_transactions)
    if not float_eq(receipt_total_price, transactions_total_price):
        mismatch_count += 1
        if receipt_total_price < transactions_total_price:
            mismatch_over_count += 1
            example_mismatch_over_receipts += [(receipt_number, receipt_transactions)]
        else:
            mismatch_under_count += 1
            example_mismatch_under_receipts += [(receipt_number, receipt_transactions)]

        if receipt_sales_month not in month_to_mismatch_count:
            month_to_mismatch_count[receipt_sales_month] = 0
        month_to_mismatch_count[receipt_sales_month] += 1
        continue

print(f'# mismatch receipt vs transactions: {mismatch_count} ({mismatch_count / total_count * 100}%)')
print(f'# missing transactions: {missing_count} ({missing_count / total_count * 100}%)')
print(f'# total: {total_count}')

print(f'# mismatch receipt vs transactions (transactions over): {mismatch_over_count} ({mismatch_over_count / mismatch_count * 100}%)')
print(f'# mismatch receipt vs transactions (transactions under): {mismatch_under_count} ({mismatch_under_count / mismatch_count * 100}%)')

# mismatch receipt vs transactions: 20494 (38.25577271284837%)
# missing transactions: 190 (0.3546695040226988%)
# total: 53571
# mismatch receipt vs transactions (transactions over): 20447 (99.77066458475652%)
# mismatch receipt vs transactions (transactions under): 47 (0.2293354152434859%)


In [164]:
month_to_missing_count

{'2021-08': 1, '2020-12': 189}

In [165]:
month_to_mismatch_count

{'2021-10': 404,
 '2021-09': 10,
 '2021-08': 12,
 '2021-07': 10,
 '2021-06': 8,
 '2021-05': 18,
 '2021-04': 16,
 '2021-03': 18,
 '2021-02': 17,
 '2021-01': 11,
 '2020-12': 15,
 '2020-11': 2887,
 '2020-10': 3043,
 '2020-09': 2886,
 '2020-08': 2582,
 '2020-07': 2403,
 '2020-06': 2194,
 '2020-05': 1968,
 '2020-04': 1393,
 '2020-03': 599}

In [166]:
example_mismatch_over_receipts[0]

('0136074360',
 [{'rt_id': '3123c749-1207-4bf0-869c-39299b2735d6',
   'license_number': 'C10-0000596-LIC',
   'receipt_number': '0136074360',
   'rt_type': 'active',
   'sales_customer_type': 'Patient',
   'sales_datetime': Timestamp('2021-10-13 21:44:46.170000+0000', tz='UTC'),
   'total_packages': 2,
   'rt_total_price': 64.0,
   'tx_id': 'e3b12fd4-48f9-4b86-92ca-b92fc04454dd',
   'tx_type': 'active',
   'tx_package_id': '18395501',
   'tx_package_label': '1A406030002E7C2000000067',
   'tx_product_name': 'Extreme OG',
   'tx_product_category_name': 'Flower (packaged eighth - each)',
   'tx_unit_of_measure': 'Each',
   'tx_quantity_sold': 1.0,
   'tx_total_price': 32.0,
   'sales_month': '2021-10'},
  {'rt_id': '3123c749-1207-4bf0-869c-39299b2735d6',
   'license_number': 'C10-0000596-LIC',
   'receipt_number': '0136074360',
   'rt_type': 'active',
   'sales_customer_type': 'Patient',
   'sales_datetime': Timestamp('2021-10-13 21:44:46.170000+0000', tz='UTC'),
   'total_packages': 2,
 

In [167]:
example_mismatch_under_receipts[0]

('0133900371',
 [{'rt_id': '11ae4a89-d946-4bca-8883-6e9327bbe709',
   'license_number': 'C10-0000596-LIC',
   'receipt_number': '0133900371',
   'rt_type': 'active',
   'sales_customer_type': 'Consumer',
   'sales_datetime': Timestamp('2021-10-04 14:20:34.470000+0000', tz='UTC'),
   'total_packages': 3,
   'rt_total_price': 108.0,
   'tx_id': '2008e4a0-6a01-40eb-b60d-e1e493275673',
   'tx_type': 'active',
   'tx_package_id': '17481359',
   'tx_package_label': '1A4060300003BC9000058381',
   'tx_product_name': 'LL Face Mints 3.5g Bag B060421R04FMT-LL',
   'tx_product_category_name': 'Flower (packaged eighth - each)',
   'tx_unit_of_measure': 'Each',
   'tx_quantity_sold': 1.0,
   'tx_total_price': 32.03,
   'sales_month': '2021-10'},
  {'rt_id': '11ae4a89-d946-4bca-8883-6e9327bbe709',
   'license_number': 'C10-0000596-LIC',
   'receipt_number': '0133900371',
   'rt_type': 'active',
   'sales_customer_type': 'Consumer',
   'sales_datetime': Timestamp('2021-10-04 14:20:34.470000+0000', tz=

In [168]:
## Post-processing to fix issue

In [169]:
receipt_number_to_transactions = {}
for sales_receipt_record in sales_receipt_records:
    receipt_number = sales_receipt_record['receipt_number']
    if receipt_number in receipt_number_to_transactions:
        receipt_number_to_transactions[receipt_number] += [sales_receipt_record]
    else:
        receipt_number_to_transactions[receipt_number] = [sales_receipt_record]

In [175]:
fixed_sales_receipt_records = []
receipt_numbers_set = set([])

for sales_receipt_record in sales_receipt_records:
    receipt_number = sales_receipt_record['receipt_number']
    if receipt_number in receipt_numbers_set:
        continue
    else:
        receipt_numbers_set.add(receipt_number)

    receipt_transactions = receipt_number_to_transactions[receipt_number]
    receipt_package_ids_set = set([])
    for receipt_transaction in receipt_transactions:
        package_id = receipt_transaction['tx_package_id']
        if package_id in receipt_package_ids_set:
            continue
        else:
            fixed_sales_receipt_records += [receipt_transaction]
            receipt_package_ids_set.add(package_id)

len(sales_receipt_records), len(fixed_sales_receipt_records)

(153002, 110362)

In [176]:
sales_receipt_records[0]

{'rt_id': '6e1737df-814e-4cb6-82c1-969f866f4ac6',
 'license_number': 'C10-0000596-LIC',
 'receipt_number': '0136818143',
 'rt_type': 'active',
 'sales_customer_type': 'Consumer',
 'sales_datetime': Timestamp('2021-10-16 20:37:08.110000+0000', tz='UTC'),
 'total_packages': 1,
 'rt_total_price': 0.0,
 'tx_id': 'f10d5e90-3306-49ff-8d42-a125b0688610',
 'tx_type': 'active',
 'tx_package_id': '16853032',
 'tx_package_label': '1A40603000072DE000075450',
 'tx_product_name': '.7g Single - Sour Bubbles, Stone Road',
 'tx_product_category_name': 'Pre-Roll Flower',
 'tx_unit_of_measure': 'Each',
 'tx_quantity_sold': 1.0,
 'tx_total_price': 0.0,
 'sales_month': '2021-10'}

In [174]:
fixed_sales_receipt_records[0]

{'rt_id': '6e1737df-814e-4cb6-82c1-969f866f4ac6',
 'license_number': 'C10-0000596-LIC',
 'receipt_number': '0136818143',
 'rt_type': 'active',
 'sales_customer_type': 'Consumer',
 'sales_datetime': Timestamp('2021-10-16 20:37:08.110000+0000', tz='UTC'),
 'total_packages': 1,
 'rt_total_price': 0.0,
 'tx_id': 'f10d5e90-3306-49ff-8d42-a125b0688610',
 'tx_type': 'active',
 'tx_package_id': '16853032',
 'tx_package_label': '1A40603000072DE000075450',
 'tx_product_name': '.7g Single - Sour Bubbles, Stone Road',
 'tx_product_category_name': 'Pre-Roll Flower',
 'tx_unit_of_measure': 'Each',
 'tx_quantity_sold': 1.0,
 'tx_total_price': 0.0,
 'sales_month': '2021-10'}

In [178]:
post_processed_sales_receipts_with_transactions_dataframe = pandas.DataFrame(
    fixed_sales_receipt_records,
    columns=sales_receipts_dataframe.columns,
)
post_processed_sales_receipts_with_transactions_dataframe

Unnamed: 0,rt_id,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,total_packages,rt_total_price,tx_id,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,sales_month
0,6e1737df-814e-4cb6-82c1-969f866f4ac6,C10-0000596-LIC,0136818143,active,Consumer,2021-10-16 20:37:08.110000+00:00,1,0.0,f10d5e90-3306-49ff-8d42-a125b0688610,active,16853032,1A40603000072DE000075450,".7g Single - Sour Bubbles, Stone Road",Pre-Roll Flower,Each,1.0,0.0,2021-10
1,40d7bd31-73a3-42b2-aeb5-cc39fe8779d0,C10-0000596-LIC,0136817748,active,Consumer,2021-10-16 20:35:06.170000+00:00,2,18.0,ce3b9cb4-81b4-47da-aa56-a60f169af181,active,18459735,1A406030000339A000013005,Wedding Crashers 1g,Flower (packaged gram - each),Each,1.0,9.0,2021-10
2,40d7bd31-73a3-42b2-aeb5-cc39fe8779d0,C10-0000596-LIC,0136817748,active,Consumer,2021-10-16 20:35:06.170000+00:00,2,18.0,d74d927a-169c-42b0-80c2-cc07d2bf15fe,active,18441370,1A406030000339A000013026,Blue Berry Muffin 1g,Flower (packaged gram - each),Each,1.0,9.0,2021-10
3,78426d50-4914-4edf-8952-cf384c5473dc,C10-0000596-LIC,0136585295,active,Consumer,2021-10-15 21:02:04.760000+00:00,4,43.5,8919910c-1c17-41c8-b658-ab57bc047584,active,16892484,1A406030002E7C2000000026,Beary OG .5g Vape Cartridge,Vape Cartridge (weight - each),Each,1.0,25.0,2021-10
4,78426d50-4914-4edf-8952-cf384c5473dc,C10-0000596-LIC,0136585295,active,Consumer,2021-10-15 21:02:04.760000+00:00,4,43.5,59cba90b-aa30-48c7-b388-5a2431d87334,active,16851983,1A40603000072DE000075449,"1g Hash Single - Acapulco Gold, Stone Road",Pre-Roll Infused,Each,1.0,8.0,2021-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110357,b7d94a3f-b8b1-4066-9b2a-a0479d47e4a5,C10-0000596-LIC,0014210790,active,Consumer,2020-03-08 12:37:08.400000+00:00,1,13.5,0c728fcd-ddc3-475d-af9c-678372979610,active,3049581,1A406030000A3CF000000223,THC D- Dosi Dos 1g Pouch,Flower,Grams,1.0,13.5,2020-03
110358,417e4e96-75ae-40ff-a30f-d4192210af62,C10-0000596-LIC,0014210031,active,Consumer,2020-03-08 12:31:37.390000+00:00,1,27.0,89753fa5-b3f8-49f6-8030-14b11fcb56eb,active,3016868,1A4060300008D22000021556,STIIIZY - Blue Dream Pod - 0.5g,Vape Cartridge (volume - each),Each,1.0,27.0,2020-03
110359,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.0,093a0805-7961-4521-9500-2504d7e5e826,active,2634052,1A4060300007B40000003102,Cinn Indica 10 Pk,Edible (weight - each),Each,1.0,0.0,2020-03
110360,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.0,fdde2ddf-78b3-4114-acf9-8824bb9192ff,active,3016876,1A4060300008D22000021564,STIIIZY - OG Kush Pod - 0.5g,Vape Cartridge (volume - each),Each,2.0,54.0,2020-03


In [193]:
x = prepare_data.dedupe_sales_transactions(sales_receipts_dataframe)
x

Unnamed: 0,rt_id,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,total_packages,rt_total_price,tx_id,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,sales_month
0,6e1737df-814e-4cb6-82c1-969f866f4ac6,C10-0000596-LIC,0136818143,active,Consumer,2021-10-16 20:37:08.110000+00:00,1,0.0,f10d5e90-3306-49ff-8d42-a125b0688610,active,16853032,1A40603000072DE000075450,".7g Single - Sour Bubbles, Stone Road",Pre-Roll Flower,Each,1.0,0.0,2021-10
1,40d7bd31-73a3-42b2-aeb5-cc39fe8779d0,C10-0000596-LIC,0136817748,active,Consumer,2021-10-16 20:35:06.170000+00:00,2,18.0,ce3b9cb4-81b4-47da-aa56-a60f169af181,active,18459735,1A406030000339A000013005,Wedding Crashers 1g,Flower (packaged gram - each),Each,1.0,9.0,2021-10
2,40d7bd31-73a3-42b2-aeb5-cc39fe8779d0,C10-0000596-LIC,0136817748,active,Consumer,2021-10-16 20:35:06.170000+00:00,2,18.0,d74d927a-169c-42b0-80c2-cc07d2bf15fe,active,18441370,1A406030000339A000013026,Blue Berry Muffin 1g,Flower (packaged gram - each),Each,1.0,9.0,2021-10
3,78426d50-4914-4edf-8952-cf384c5473dc,C10-0000596-LIC,0136585295,active,Consumer,2021-10-15 21:02:04.760000+00:00,4,43.5,8919910c-1c17-41c8-b658-ab57bc047584,active,16892484,1A406030002E7C2000000026,Beary OG .5g Vape Cartridge,Vape Cartridge (weight - each),Each,1.0,25.0,2021-10
4,78426d50-4914-4edf-8952-cf384c5473dc,C10-0000596-LIC,0136585295,active,Consumer,2021-10-15 21:02:04.760000+00:00,4,43.5,59cba90b-aa30-48c7-b388-5a2431d87334,active,16851983,1A40603000072DE000075449,"1g Hash Single - Acapulco Gold, Stone Road",Pre-Roll Infused,Each,1.0,8.0,2021-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110357,b7d94a3f-b8b1-4066-9b2a-a0479d47e4a5,C10-0000596-LIC,0014210790,active,Consumer,2020-03-08 12:37:08.400000+00:00,1,13.5,0c728fcd-ddc3-475d-af9c-678372979610,active,3049581,1A406030000A3CF000000223,THC D- Dosi Dos 1g Pouch,Flower,Grams,1.0,13.5,2020-03
110358,417e4e96-75ae-40ff-a30f-d4192210af62,C10-0000596-LIC,0014210031,active,Consumer,2020-03-08 12:31:37.390000+00:00,1,27.0,89753fa5-b3f8-49f6-8030-14b11fcb56eb,active,3016868,1A4060300008D22000021556,STIIIZY - Blue Dream Pod - 0.5g,Vape Cartridge (volume - each),Each,1.0,27.0,2020-03
110359,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.0,093a0805-7961-4521-9500-2504d7e5e826,active,2634052,1A4060300007B40000003102,Cinn Indica 10 Pk,Edible (weight - each),Each,1.0,0.0,2020-03
110360,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.0,fdde2ddf-78b3-4114-acf9-8824bb9192ff,active,3016876,1A4060300008D22000021564,STIIIZY - OG Kush Pod - 0.5g,Vape Cartridge (volume - each),Each,2.0,54.0,2020-03
