In [106]:
import json
import os
import numpy
import pandas
import pyarrow
import sys

from datetime import date
from os import path
from dotenv import load_dotenv
from sqlalchemy import create_engine

# Steps to install
# 1. pip install sqlalchemy-bigquery google-cloud-bigquery-storage pyarrow
# 2. Copy the credentials file to wherever you set BIGQUERY_CREDENTIALS_PATH to
load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

In [107]:
sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts/analysis")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

from util import active_inventory_util as util

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
import create_queries
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [108]:
COMPANY_NAME = 'HPCC'
COMPANY_IDENTIFIER = 'HPCC'
TRANSFER_PACKAGES_START_DATE = '2019-01-01'
SALES_TRANSACTIONS_START_DATE = '2019-01-01'
INVENTORY_DATES = [
    '09/30/2020',
    '10/31/2020',
    '11/30/2020',
    '12/31/2020',
    '01/31/2021',
    '02/28/2021',
    '03/31/2021',
    '04/30/2021',
    '05/31/2021',
    '06/30/2021',
    '07/31/2021',
    '08/31/2021',
    '09/30/2021',
]
ANALYSIS_PARAMS = {
    'sold_threshold': 1.0
}
TODAY_DATE = date.today().strftime('%m/%d/%Y')
INVENTORY_DATES.append(TODAY_DATE)
print('Today is {}'.format(TODAY_DATE))

Today is 10/31/2021


In [109]:
# Download packages, sales transactions, incoming / outgoing tranfers

company_licenses_query = create_queries.create_company_licenses_query(COMPANY_IDENTIFIER)
company_incoming_transfer_packages_query = create_queries.create_company_incoming_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_outgoing_transfer_packages_query = create_queries.create_company_outgoing_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_sales_transactions_query = create_queries.create_company_sales_transactions_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)
company_inventory_packages_query = create_queries.create_company_inventory_packages_query(
    COMPANY_IDENTIFIER, include_quantity_zero=True)

company_licenses_dataframe = pandas.read_sql_query(company_licenses_query, engine)
company_incoming_transfer_packages_dataframe = pandas.read_sql_query(company_incoming_transfer_packages_query, engine)
company_outgoing_transfer_packages_dataframe = pandas.read_sql_query(company_outgoing_transfer_packages_query, engine)
company_sales_transactions_dataframe = pandas.read_sql_query(company_sales_transactions_query, engine)
company_inventory_packages_dataframe = pandas.read_sql_query(company_inventory_packages_query, engine)

In [110]:
company_licenses_dataframe

Unnamed: 0,us_state,license_number,license_category,legal_name,is_current,license_status,rollup_id,license_description
0,CA,C10-0000005-LIC,Retailer,"DPC SF, LLC",True,Active,CA-01004,Retailer
1,CA,C10-0000064-LIC,Retailer,"HUENEME PATIENT CONSUMER COLLECTIVE, LLC.",True,Active,CA-01805,Retailer


In [117]:
print(f'Filtering incoming transfer packages to only "Accepted" shipment package state...')
print(f'Before filter # of incoming transfer packages: {len(company_incoming_transfer_packages_dataframe.index)}')
company_incoming_transfer_packages_dataframe = company_incoming_transfer_packages_dataframe[
    company_incoming_transfer_packages_dataframe['shipment_package_state'] == 'Accepted'
]
print(f'After filter # of incoming transfer packages: {len(company_incoming_transfer_packages_dataframe.index)}')

Filtering incoming transfer packages to only "Accepted" shipment package state...
Before filter # of incoming transfer packages: 48001
After filter # of incoming transfer packages: 47727


In [118]:
d = util.Download()
d.download_dataframes(
    incoming_transfer_packages_dataframe=company_incoming_transfer_packages_dataframe,
    outgoing_transfer_packages_dataframe=company_outgoing_transfer_packages_dataframe,
    sales_transactions_dataframe=company_sales_transactions_dataframe,
    engine=engine,
)

In [119]:
import importlib
importlib.reload(util)

# TODO(dlluncor): Just for debugging to make this faster
INVENTORY_DATES = [TODAY_DATE]

date_to_inventory_packages_dataframe = {}
id_to_history = util.get_histories(d)

for inventory_date in INVENTORY_DATES:
    computed_inventory_package_records = util.create_inventory_dataframe_by_date(
        id_to_history,
        inventory_date,
        params=ANALYSIS_PARAMS,
    )
    computed_inventory_packages_dataframe = pandas.DataFrame(
        computed_inventory_package_records,
        columns=util.get_inventory_column_names(),
    )
    date_to_inventory_packages_dataframe[inventory_date] = computed_inventory_packages_dataframe

WARN: package #5172609 does not have a shipped quantity
WARN: package #848108 does not have a shipped quantity


In [128]:
from_packages_inventory_dataframe = company_inventory_packages_dataframe[[
    'package_id',
    'packaged_date',
    'product_category_name',
    'product_name',
    'quantity',
    'unit_of_measure',
]].sort_values('package_id')

In [121]:
import importlib
importlib.reload(util)

res = util.compare_inventory_dataframes(
    computed=date_to_inventory_packages_dataframe[TODAY_DATE],
    actual=from_packages_inventory_dataframe,
    options={
        'num_errors_to_show': 20,
        'accept_computed_when_sold_out': True
    }
)

# Two categories for: Num actual packages not computed: 229
# 1. Actual has a package that we've never seen
# 2. Actual has a package that we've sold out of, and that's why it's in the inventory

# Two categories for: Num computed packages not in actual: 237
# 1. We never saw the package in the actual inventory
# 2. Actual inventory is sold out, but we think it's not sold out in computed

Pct of # inventory matching: 89.15% (1856 / 2082)
Accuracy of quantities: 92.76%
Pct of # inventory packages over-estimated: 1.92%
Pct of # quantity over-estimated: 0.02%
Avg quantity delta: 0.91
Avg quantity: 12.51

Num matching packages: 1856
Num actual packages not computed: 226
  but computed at some point: 0, e.g., 0.00% of non-computed packages
  avg quantity from actual packages 0.00
Num computed packages not in actual: 40
  but in actual inventory at some point: 19

Computed has these extra package IDs; first 20
4465593: computed quantity 185 (Grams)
3987073: computed quantity 65 (Each)
4426074: computed quantity 46 (Each)
3463140: computed quantity 25 (Each)
3383175: computed quantity 21 (Each)
4381611: computed quantity 20 (Each)
19027319: computed quantity 20 (Each)
3707140: computed quantity 17 (Each)
3995488: computed quantity 15 (Each)
3992450: computed quantity 12 (Each)
3453980: computed quantity 7 (Each)
1875607: computed quantity 5 (Each)
12964622: computed quantity 3

In [130]:
# For debugging individual package histories
# You have to run the above block to reload the package_id_to_history array

import importlib
importlib.reload(util)

PACKAGE_IDS = [
    '18420668'
]

package_id_to_actual_row = {}
for index, row in company_inventory_packages_dataframe.iterrows():
    package_id_to_actual_row[str(row['package_id'])] = row

util.analyze_specific_package_histories(
    d,
    package_id_to_actual_row,
    PACKAGE_IDS,
    params=ANALYSIS_PARAMS,
)

Matching metrc_package:
license_number                                         C10-0000005-LIC
package_id                                                    18420668
package_label                                 1A4060300024735000017552
type                                                            active
packaged_date                                               2021-10-04
package_type                                                   Product
product_name             Korova Preroll - M.A.C. One - 1g / Case of 50
product_category_name                                  Pre-Roll Flower
quantity                                                          67.0
unit_of_measure                                                   Each
is_testing_sample                                                False
is_trade_sample                                                  False
is_on_hold                                                       False
archived_date                                        

In [123]:
inactive_packages_df = pandas.read_sql_query(
    util.are_packages_inactive_query(res['computed_extra_package_ids']),
    engine
)

In [124]:
inactive_packages_df[inactive_packages_df['package_id'] == '107500']

Unnamed: 0,identifier,license_number,type,package_id,package_label,product_category_name,product_name,archiveddate,finisheddate,quantity
