In [None]:
import json
import numpy
import os
import pandas
import pyarrow
import sys

from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util
from bespoke.inventory.analysis import stale_inventory_util

%load_ext autoreload
%autoreload 2

In [None]:
# CHANGE ME
COMPANY_IDENTIFIER = 'CSC'
TRANSFER_PACKAGES_START_DATE = '2020-01-01'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'

## Queries 1

In [None]:
company_licenses_query = create_queries.create_company_licenses_query(COMPANY_IDENTIFIER)
company_download_summaries_query = create_queries.create_company_download_summaries_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)

company_licenses_dataframe = pandas.read_sql_query(company_licenses_query, engine)
company_download_summaries_dataframe = pandas.read_sql_query(company_download_summaries_query, engine)

## Licenses

In [None]:
company_licenses_dataframe

## Download summaries

In [None]:
license_numbers = company_download_summaries_dataframe['license_number'].unique()
download_summary_records = company_download_summaries_dataframe.to_dict('records')

license_number_to_download_summary_records = {}

for license_number in license_numbers:
    license_number_to_download_summary_records[license_number] = list(filter(
        lambda download_summary_record: download_summary_record['license_number'] == license_number,
        download_summary_records
    ))

bad_count = 0

for license_number, download_summary_records in license_number_to_download_summary_records.items():
    print(f'Verifying download summaries for license {license_number}...')
    print(f'Earliest download summary: {download_summary_records[-1]["date"]}')
    print(f'Latest download summary: {download_summary_records[0]["date"]}')
    for download_summary_record in download_summary_records:
        if download_summary_record['status'] != 'completed':
            bad_count += 1
            print(f'Found bad download summary for license {license_number} on date {download_summary_record["date"]}')
    print('')

if bad_count > 0:
    print(f'[FAILURE] Found a total of {bad_count} bad download summaries')
else:
    print(f'[SUCCESS] All download summaries look good!')

## Queries 2

In [None]:
company_license_records = company_licenses_dataframe.to_dict('records')
# List of retailer license numbers.
# TODO: figure out which license numbers to use based on which ones have sales transactions, not only based on license category.
license_numbers = []
for company_license_record in company_license_records:
    license_number = company_license_record['license_number']
    license_category = company_license_record['license_category']
    if license_category == 'Retailer' or license_category == 'Multiple':
        license_numbers.append(license_number)
license_numbers

In [None]:
company_incoming_transfer_packages_query = create_queries.create_company_incoming_transfer_packages_query(
    COMPANY_IDENTIFIER,
    TRANSFER_PACKAGES_START_DATE,
    license_numbers=license_numbers,
)
company_outgoing_transfer_packages_query = create_queries.create_company_outgoing_transfer_packages_query(
    COMPANY_IDENTIFIER,
    TRANSFER_PACKAGES_START_DATE,
    license_numbers=license_numbers,
)
company_unknown_transfer_packages_query = create_queries.create_company_unknown_transfer_packages_query(
    COMPANY_IDENTIFIER,
    TRANSFER_PACKAGES_START_DATE,
#     license_numbers=license_numbers,
)
company_sales_transactions_query = create_queries.create_company_sales_transactions_query(
    COMPANY_IDENTIFIER,
    SALES_TRANSACTIONS_START_DATE,
    license_numbers=license_numbers,
)
company_sales_receipts_query = create_queries.create_company_sales_receipts_query(
    COMPANY_IDENTIFIER,
    SALES_TRANSACTIONS_START_DATE,
    license_numbers=license_numbers,
)
company_sales_receipts_with_transactions_query = create_queries.create_company_sales_receipts_with_transactions_query(
    COMPANY_IDENTIFIER,
    SALES_TRANSACTIONS_START_DATE,
    license_numbers=license_numbers,
)
company_inventory_packages_query = create_queries.create_company_inventory_packages_query(
    COMPANY_IDENTIFIER,
    include_quantity_zero=True,
    license_numbers=license_numbers,
)

company_incoming_transfer_packages_dataframe = pandas.read_sql_query(company_incoming_transfer_packages_query, engine)
company_outgoing_transfer_packages_dataframe = pandas.read_sql_query(company_outgoing_transfer_packages_query, engine)
company_unknown_transfer_packages_dataframe = pandas.read_sql_query(company_unknown_transfer_packages_query, engine)
company_sales_transactions_dataframe = pandas.read_sql_query(company_sales_transactions_query, engine)
company_sales_receipts_dataframe = pandas.read_sql_query(company_sales_receipts_query, engine)
company_sales_receipts_with_transactions_dataframe = pandas.read_sql_query(company_sales_receipts_with_transactions_query, engine)
company_inventory_packages_dataframe = pandas.read_sql_query(company_inventory_packages_query, engine)

In [None]:
print(f'# incoming transfer packages: {len(company_incoming_transfer_packages_dataframe.index)}')
print(f'# outgoing transfer packages: {len(company_outgoing_transfer_packages_dataframe.index)}')
print(f'# sales transactions: {len(company_sales_transactions_dataframe.index)}')
print(f'# inventory packages: {len(company_inventory_packages_dataframe.index)}')

## Anonymize data

In [None]:
def anonymize_column(dataframe, column_name):
    anonymize_fn = lambda row: str(hash(f'BF-{row[column_name]}') % ((sys.maxsize + 1) * 2))
    col = dataframe.apply(anonymize_fn, axis=1)
    kwargs = {}
    kwargs[column_name] = col.values
    return dataframe.assign(**kwargs)

def anonymize_columns(dataframe, column_names):
    for column_name in column_names:
        dataframe = anonymize_column(dataframe, column_name)
    return dataframe

In [None]:
anonymized_company_incoming_transfer_packages_dataframe = company_incoming_transfer_packages_dataframe[[
    'delivery_type',
    'license_number', # Anonymization necessary
    'manifest_number', # Anonymization necessary
    'created_date',
    'received_datetime',
    'shipment_transaction_type',
    'shipper_facility_license_number', # Anonymization necessary
#     'shipper_facility_name',
    'recipient_facility_license_number', # Anonymization necessary
#     'recipient_facility_name',
    'shipment_type_name',
#     'shipment_transaction_type_1',
    'package_id', # Anonymization necessary
#     'package_label',
    'type',
#     'source_package_labels',
#     'source_harvest_names',
    'shipment_package_state',
    'is_testing_sample',
    'is_trade_sample',
    'product_category_name',
    'product_name',
#     'package_lab_results_status',
    'shipper_wholesale_price',
    'shipped_quantity',
    'shipped_unit_of_measure',
    'receiver_wholesale_price',
    'received_quantity',
    'received_unit_of_measure',
#     'receiver_wholesale_price_1',
    'item_unit_weight',
    'item_unit_weight_unit_of_measure_name',
]]
anonymized_company_incoming_transfer_packages_dataframe = anonymize_columns(
    anonymized_company_incoming_transfer_packages_dataframe,
    [
        'license_number',
        'manifest_number',
        'shipper_facility_license_number',
        'recipient_facility_license_number',
        'package_id',
    ]
)
# anonymized_company_incoming_transfer_packages_dataframe

In [None]:
anonymized_company_outgoing_transfer_packages_dataframe = company_outgoing_transfer_packages_dataframe[[
    'delivery_type',
    'license_number', # Anonymization necessary
    'manifest_number', # Anonymization necessary
    'created_date',
    'received_datetime',
    'shipment_transaction_type',
    'shipper_facility_license_number', # Anonymization necessary
#     'shipper_facility_name',
    'recipient_facility_license_number', # Anonymization necessary
#     'recipient_facility_name',
    'shipment_type_name',
#     'shipment_transaction_type_1',
    'package_id', # Anonymization necessary
#     'package_label',
    'type',
#     'source_package_labels',
#     'source_harvest_names',
    'shipment_package_state',
    'is_testing_sample',
    'is_trade_sample',
    'product_category_name',
    'product_name',
#     'package_lab_results_status',
    'shipper_wholesale_price',
    'shipped_quantity',
    'shipped_unit_of_measure',
    'receiver_wholesale_price',
    'received_quantity',
    'received_unit_of_measure',
#     'receiver_wholesale_price_1',
    'item_unit_weight',
    'item_unit_weight_unit_of_measure_name',
]]
anonymized_company_outgoing_transfer_packages_dataframe = anonymize_columns(
    anonymized_company_outgoing_transfer_packages_dataframe,
    [
        'license_number',
        'manifest_number',
        'shipper_facility_license_number',
        'recipient_facility_license_number',
        'package_id',
    ]
)
# anonymized_company_outgoing_transfer_packages_dataframe

In [None]:
anonymized_company_sales_transactions_dataframe = company_sales_transactions_dataframe[[
    'license_number', # Anonymization necessary
    'receipt_id', # Anonymization necessary
#     'receipt_number',
    'rt_type',
    'sales_customer_type',
    'sales_datetime',
    'total_packages',
    'rt_total_price',
    'tx_type',
#     'package_id', # Anonymization necessary
    'tx_package_id', # Anonymization necessary
#     'tx_package_label',
    'tx_product_name',
    'tx_product_category_name',
    'tx_unit_of_measure',
    'tx_quantity_sold',
    'tx_total_price',
#     'tx_is_deleted'
]]
anonymized_company_sales_transactions_dataframe = anonymize_columns(
    anonymized_company_sales_transactions_dataframe,
    [
        'license_number',
        'receipt_id',
#         'package_id',
        'tx_package_id',
    ]
)
# anonymized_company_sales_transactions_dataframe

In [None]:
anonymized_company_inventory_packages_dataframe = company_inventory_packages_dataframe[[
    'license_number', # Anonymization necessary
    'package_id', # Anonymization necessary
#     'package_label',
    'type',
    'packaged_date',
#     'last_modified_at',
    'package_type',
    'product_name',
    'product_category_name',
    'quantity',
    'unit_of_measure',
    'item_id', # Anonymization necessary
    'item_product_category_type',
#     'production_batch_number',
#     'source_production_batch_numbers',
#     'source_harvest_names',
    'is_testing_sample',
    'is_trade_sample',
    'is_on_hold',
    'archived_date',
    'finished_date',
]]
anonymized_company_inventory_packages_dataframe = anonymize_columns(
    anonymized_company_inventory_packages_dataframe,
    [
        'license_number',
        'package_id',
        'item_id',
    ]
)
# anonymized_company_inventory_packages_dataframe

In [None]:
print(f'# anonymized incoming transfer packages: {len(anonymized_company_incoming_transfer_packages_dataframe.index)}')
print(f'# anonymized outgoing transfer packages: {len(anonymized_company_outgoing_transfer_packages_dataframe.index)}')
print(f'# anonymized sales transactions: {len(anonymized_company_sales_transactions_dataframe.index)}')
print(f'# anonymized inventory packages: {len(anonymized_company_inventory_packages_dataframe.index)}')

## Export data

In [None]:
# Flip flag if you want to export data to files.
is_export_enabled = True

In [None]:
import time
from datetime import date

TODAY_DATE = date.today().strftime('%m-%d-%Y')
NOW = int(time.time())

def format_export_dataframe(dataframe):
    date_columns = dataframe.select_dtypes(include=['datetime64[ns, UTC]']).columns
    for date_column in date_columns:
        dataframe[date_column] = dataframe[date_column].dt.date
    return dataframe

export_incoming_transfer_packages_dataframe = format_export_dataframe(anonymized_company_incoming_transfer_packages_dataframe)
incoming_transfer_packages_file_name = f'~/Downloads/anonymized_company_incoming_transfer_packages_{NOW}'

export_outgoing_transfer_packages_dataframe = format_export_dataframe(anonymized_company_outgoing_transfer_packages_dataframe)
outgoing_transfer_packages_file_name = f'~/Downloads/anonymized_company_outgoing_transfer_packages_{NOW}'

export_sales_transactions_dataframe = format_export_dataframe(anonymized_company_sales_transactions_dataframe)
sales_transactions_file_name = f'~/Downloads/anonymized_company_sales_transactions_{NOW}'

export_inventory_packages_dataframe = format_export_dataframe(anonymized_company_inventory_packages_dataframe)
inventory_packages_file_name = f'~/Downloads/anonymized_company_inventory_packages_{NOW}'

if is_export_enabled:
    num_incoming_transfer_packages = len(export_incoming_transfer_packages_dataframe.index)
    print(f'Exporting {num_incoming_transfer_packages} incoming transfer packages to files...')
    export_incoming_transfer_packages_dataframe.reset_index().to_excel(f'{incoming_transfer_packages_file_name}.xlsx', index=False)
    export_incoming_transfer_packages_dataframe.reset_index().to_csv(f'{incoming_transfer_packages_file_name}.csv', index=False)
    print(f'Exported {num_incoming_transfer_packages} incoming transfer packages to files')

    num_outgoing_transfer_packages = len(export_outgoing_transfer_packages_dataframe.index)
    print(f'Exporting {num_outgoing_transfer_packages} outgoing transfer packages to files...')
    export_outgoing_transfer_packages_dataframe.reset_index().to_excel(f'{outgoing_transfer_packages_file_name}.xlsx', index=False)
    export_outgoing_transfer_packages_dataframe.reset_index().to_csv(f'{outgoing_transfer_packages_file_name}.csv', index=False)
    print(f'Exported {num_outgoing_transfer_packages} outgoing transfer packages to files')

    num_sales_transactions = len(export_sales_transactions_dataframe.index)
    print(f'Exporting {num_sales_transactions} sales transactions to files...')
    export_sales_transactions_dataframe.reset_index().to_excel(f'{sales_transactions_file_name}.xlsx', index=False)
    export_sales_transactions_dataframe.reset_index().to_csv(f'{sales_transactions_file_name}.csv', index=False)
    print(f'Exported {num_sales_transactions} sales transactions to files')

    num_inventory_packages = len(export_inventory_packages_dataframe.index)
    print(f'Exporting {num_inventory_packages} inventory packages to files...')
    export_inventory_packages_dataframe.reset_index().to_excel(f'{inventory_packages_file_name}.xlsx', index=False)
    export_inventory_packages_dataframe.reset_index().to_csv(f'{inventory_packages_file_name}.csv', index=False)
    print(f'Exported {num_inventory_packages} inventory packages to files')