In [None]:
# Steps to install
# 1. pip install sqlalchemy-bigquery google-cloud-bigquery-storage pyarrow
# 2. Copy the credentials file to wherever you set BIGQUERY_CREDENTIALS_PATH to

In [1]:
import json
import os
import pandas as pd
import pyarrow
import numpy as np

import sys
from os import path
import numpy

from dotenv import load_dotenv
from sqlalchemy import create_engine


load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')

In [2]:
sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
import create_queries
%load_ext autoreload
%autoreload 2

In [3]:
import prepare_data

In [4]:
pd.options.display.float_format = '{:,.2f}'.format

In [5]:
from datetime import date

In [6]:
today = date.today()

In [7]:
COMPANY_NAME = 'CTE'
COMPANY_IDENTIFIER = 'CTE'
TRANSFER_PACKAGES_START_DATE = '2020-01-01'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'

INVENTORY_DATE = '01/10/2022'

In [8]:
company_incoming_transfer_packages_query = create_queries.create_company_incoming_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_outgoing_transfer_packages_query = create_queries.create_company_outgoing_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_sales_transactions_query = create_queries.create_company_sales_transactions_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)
company_inventory_packages_query = create_queries.create_company_inventory_packages_query(COMPANY_IDENTIFIER)

engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))
company_incoming_transfer_packages_dataframe = pd.read_sql_query(company_incoming_transfer_packages_query, engine)
company_outgoing_transfer_packages_dataframe = pd.read_sql_query(company_outgoing_transfer_packages_query, engine)
company_sales_transactions_dataframe = pd.read_sql_query(company_sales_transactions_query, engine)
company_inventory_packages_dataframe = pd.read_sql_query(company_inventory_packages_query, engine)

In [9]:
deduped_sales_receipts_dataframe = prepare_data.dedupe_sales_transactions(company_sales_transactions_dataframe)

In [10]:
df_in_og = company_incoming_transfer_packages_dataframe
df_inventory_og = company_inventory_packages_dataframe
df_sales_og = deduped_sales_receipts_dataframe

In [11]:
df_in = df_in_og
df_inventory = df_inventory_og
df_sales = df_sales_og

In [16]:
company_inventory_packages_dataframe

Unnamed: 0,license_number,package_id,package_label,type,packaged_date,last_modified_at,package_type,product_name,product_category_name,quantity,...,item_id,item_product_category_type,production_batch_number,source_production_batch_numbers,source_harvest_names,is_testing_sample,is_trade_sample,is_on_hold,archived_date,finished_date


In [13]:
df_in.groupby('license_number').sum()

In [78]:
license_number = 'AU-R-000156'

In [79]:
df_license_specific = df_in[df_in['license_number'] == license_number]
df_license_specific.reset_index(drop=True, inplace=True)
legal_name = df_license_specific['recipient_facility_name'][0]

In [80]:
df_sales = df_sales[df_sales['license_number'] == license_number]
df_inventory = df_inventory[df_inventory['license_number'] == license_number]

In [158]:
# df_in.groupby('recipient_facility_name').sum()
# df_sales_licenses = df_sales.groupby('license_number').sum()
# df_sales_licenses.to_csv('./TL_licenses.csv')
# df_inventory.groupby('license_number').sum()
# df_sales = df_sales[(df_sales['license_number'] == '402-00473') | (df_sales['license_number'] == '402R-00536')]
# df_inventory = df_inventory[(df_inventory['license_number'] == '402-00473') | (df_inventory['license_number'] == '402R-00536')]

## Sales

In [81]:
df_sales['per_unit'] = df_sales['tx_total_price'] / df_sales['tx_quantity_sold']
df_sales['year_month'] = df_sales['sales_datetime'].dt.strftime("%Y-%m")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [82]:
s_revenue = df_sales.groupby('year_month')['tx_total_price'].sum()
df_revenue = pd.Series(s_revenue).to_frame()
df_revenue = df_revenue.reset_index()
df_revenue.rename(columns={'tx_total_price': 'revenue'}, inplace=True)

In [83]:
s_total_count = df_sales.groupby('year_month')['tx_total_price'].count()
df_total_count = pd.Series(s_total_count).to_frame()
df_total_count = df_total_count.reset_index()
df_total_count.rename(columns={'tx_total_price':'total_count'}, inplace=True)

In [29]:
# df_sales.to_csv('./WTH_sales.csv')

## Vendor Churn

In [22]:
df_in.to_csv('./verts_incoming.csv')

In [163]:
df_vendor_churn = df_in[df_in['license_number'] == license_number]

In [164]:
df_vendor_churn.to_csv('./' + license_number + '_incoming.csv')

In [165]:
# df_sales.to_csv('./' + license_number + '_sales.csv')

In [166]:
# df_vendor_churn = df_in[(df_in['license_number'] == '402-01142') | (df_in['license_number'] == '402R-00573')]

In [167]:
# df_vendor_churn.to_csv('./Tweedleaf_Jason_incoming.csv')

## Incoming - get the avg price per package id then product name

In [168]:
# df_in = df_in[df_in['shipper_wholesale_price'] > 1]

In [84]:
df_in['per_unit_incoming'] = df_in['shipper_wholesale_price'] / df_in['shipped_quantity']
df_in_price = df_in[df_in['shipper_wholesale_price'].notnull()]

In [85]:
average_incoming_package_id = df_in_price.groupby('package_id')['per_unit_incoming'].mean()
df_avg_incoming_price = pd.Series(average_incoming_package_id).to_frame()
df_avg_incoming_price = df_avg_incoming_price.reset_index()

In [86]:
average_incoming_product = df_in_price.groupby('product_name')['per_unit_incoming'].mean()
df_avg_product = pd.Series(average_incoming_product).to_frame()
df_avg_product = df_avg_product.reset_index()
df_avg_product.rename(columns={'per_unit_incoming':'per_unit_product'}, inplace=True)

## COGS

In [87]:
df_cogs_package_id = pd.merge(df_sales, df_avg_incoming_price, left_on='tx_package_id', right_on='package_id', how='left')
df_cogs_package_id['total_incoming'] = df_cogs_package_id['per_unit_incoming'] * df_cogs_package_id['tx_quantity_sold']
df_cogs_package_id.replace([np.inf], np.nan, inplace=True)
df_cogs_package_id_notnull = df_cogs_package_id[df_cogs_package_id['total_incoming'].notnull()]

In [88]:
s_cogs = df_cogs_package_id_notnull.groupby('year_month')['total_incoming'].sum()
df_cogs_id = pd.Series(s_cogs).to_frame()
df_cogs_id = df_cogs_id.reset_index()

In [89]:
s_cogs_count = df_cogs_package_id_notnull.groupby('year_month')['total_incoming'].count()
df_cogs_count = pd.Series(s_cogs_count).to_frame()
df_cogs_count = df_cogs_count.reset_index()
df_cogs_count.rename(columns={'total_incoming':'count_incoming'}, inplace=True)

In [90]:
df_cogs_average_product = pd.merge(df_cogs_package_id, df_avg_product, left_on='tx_product_name', right_on='product_name', how='left')
df_cogs_average_product['total_product'] = df_cogs_average_product['tx_quantity_sold'] * df_cogs_average_product['per_unit_product']
df_cogs_null = df_cogs_average_product[df_cogs_average_product['per_unit_incoming'].isnull()]
df_cogs_product = df_cogs_null[df_cogs_null['per_unit_product'].notnull()]

In [91]:
product_sum = df_cogs_product.groupby('year_month')['total_product'].sum()
df_product_sum = pd.Series(product_sum).to_frame()
df_product_sum = df_product_sum.reset_index()
df_product_sum.rename(columns={'total_product':'product_sum'}, inplace=True)

In [92]:
product_count = df_cogs_product.groupby('year_month')['total_product'].count()
df_product_count = pd.Series(product_count).to_frame()
df_product_count = df_product_count.reset_index()
df_product_count.rename(columns={'total_product':'product_count'}, inplace=True)

In [93]:
df_cogs_product = pd.merge(df_product_sum, df_product_count)

In [94]:
# df_cogs_package_id.to_csv('./UHHC_cogs.csv')

## Summary

In [95]:
df_summary = pd.merge(df_revenue, df_cogs_product, how='left')
df_summary = pd.merge(df_summary, df_cogs_id, how='left')
df_summary['product_sum'] = df_summary['product_sum'].fillna(0)
df_summary['product_count'] = df_summary['product_count'].fillna(0)
df_summary['cogs'] = df_summary['total_incoming'] + df_summary['product_sum']
df_summary = pd.merge(df_summary, df_cogs_count)
df_summary = pd.merge(df_summary, df_total_count)
df_summary['total_count_incoming'] = df_summary['product_count'] + df_summary['count_incoming']
df_summary['margin_$'] = df_summary['revenue'] - df_summary['cogs']
df_summary['margin_%'] = df_summary['margin_$'] / df_summary['revenue']
df_summary['coverage'] = df_summary['total_count_incoming'] / df_summary['total_count']

In [96]:
df_summary_simp = df_summary[['year_month', 'revenue', 'cogs', 'margin_$', 'margin_%', 'total_count_incoming', 'total_count', 'coverage']]

In [97]:
df_summary_simp

Unnamed: 0,year_month,revenue,cogs,margin_$,margin_%,total_count_incoming,total_count,coverage
0,2020-01,27652.06,2856.0,24796.06,0.9,77.0,581,0.13
1,2020-02,139241.2,34716.67,104524.53,0.75,1948.0,3607,0.54
2,2020-03,182948.84,177831.0,5117.84,0.03,3275.0,5260,0.62
3,2020-04,170204.18,61550.89,108653.29,0.64,3828.0,5389,0.71
4,2020-05,193976.07,74715.97,119260.1,0.61,3950.0,5316,0.74
5,2020-06,209691.07,95929.3,113761.77,0.54,5286.0,5906,0.9
6,2020-07,209833.14,73081.06,136752.08,0.65,4576.0,6195,0.74
7,2020-08,184908.18,97059.36,87848.82,0.48,5160.0,5161,1.0
8,2020-09,139216.47,74890.96,64325.51,0.46,3786.0,3814,0.99
9,2020-10,94740.27,35134.03,59606.24,0.63,1675.0,2411,0.69


In [56]:
df_summary_simp.to_excel('./' + license_number + '_analysis.xlsx')

In [57]:
df_summary_simp.to_excel('./verts_analysis.xlsx')

## Inventory

In [58]:
df_inventory_incoming = pd.merge(df_inventory, df_avg_incoming_price, on='package_id', how='left')
df_inventory_incoming.replace([np.inf], np.nan, inplace=True)
df_inv_null = df_inventory_incoming[df_inventory_incoming['per_unit_incoming'].isnull()]
df_inv_product = pd.merge(df_inv_null, df_avg_product, on='product_name', how='left')
df_inv_product.replace([np.inf], np.nan, inplace=True)
df_inv_product_price = df_inv_product[df_inv_product['per_unit_product'].notnull()]
df_inv_product_price['total_price'] = df_inv_product_price['quantity'] * df_inv_product_price['per_unit_product']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [59]:
inventory_product_value = df_inv_product_price['total_price'].sum()

In [60]:
df_inventory_incoming['total_price'] = df_inventory_incoming['quantity'] * df_inventory_incoming['per_unit_incoming']
inventory_value = df_inventory_incoming['total_price'].sum()

In [61]:
# inventory_product_value

In [62]:
total_inv_value = inventory_product_value + inventory_value


In [63]:
print(f'{total_inv_value:,.2f}')

36,930.99


In [64]:
inv_count_product = df_inv_product_price['per_unit_product'].count()
inv_count_incoming = df_inventory_incoming['per_unit_incoming'].count()
inv_count_total = df_inventory_incoming['quantity'].count()
inv_total_incoming = inv_count_product + inv_count_incoming

In [65]:
inv_count_total

223

In [66]:
# inv_count_total

In [67]:
inventory_coverage = inv_total_incoming / inv_count_total
print(f'{inventory_coverage:,.0%}')

93%


In [68]:
data = [['Date', today], 
        ['Value', total_inv_value], 
        ['Total Incoming', inv_total_incoming], 
        ['Total', inv_count_total], 
        ['Coverage', inventory_coverage],
        ['License', license_number],
        ['Legal Name', legal_name]]

In [69]:
df_inventory_license = pd.DataFrame(data, columns = ['0', '1'])

In [70]:
df_inventory_license

Unnamed: 0,0,1
0,Date,2022-01-04
1,Value,36930.99
2,Total Incoming,208
3,Total,223
4,Coverage,0.93
5,License,402R-00804
6,Legal Name,BUFFALO BIOTECH LLC


In [71]:
df_inventory_license.to_excel('./' + license_number + '_inventory_name.xlsx')

# Notes

In [None]:
# df_cogs[df_cogs['per_unit_incoming'].isnull() & df_cogs['per_unit_product'].notnull()]
# df_sales = df_sales[df_sales['license_number'] == 'C10-0000824-LIC']
# df_inventory = df_inventory[df_inventory['license_number'] == 'C10-0000824-LIC']
# df_sales['tx_total_price'].count()

In [None]:
# %autoreload 2

# sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts/analysis")))
# sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

# from util import active_inventory_util as util

In [None]:
# d = util.Download()
# d.download_dataframes(
#     incoming_transfer_packages_dataframe=company_incoming_transfer_packages_dataframe,
#     outgoing_transfer_packages_dataframe=company_outgoing_transfer_packages_dataframe,
#     sales_transactions_dataframe=company_sales_transactions_dataframe,
# )

In [None]:
# q = util.Query()
# q.inventory_dates = INVENTORY_DATES
# q.company_name = COMPANY_NAME

# id_to_history = util.get_histories(d)
# util.print_counts(id_to_history)
# util.create_inventory_xlsx(id_to_history, q)

In [None]:
# computed_inventory_package_records = util.create_inventory_dataframe_by_date(id_to_history, INVENTORY_DATE)
# computed_inventory_packages_dataframe = pandas.DataFrame(
#     computed_inventory_package_records,
#     columns=[
#         'package_id',
#         'Arrived Date',
#         'Product Category',
#         'Product Name',
#         'Current Quantity',
#         'Sold Date',
#     ]
# )
# computed_inventory_packages_dataframe

In [None]:
# date_to_inventory_dataframe = {}
# for date, inventory_records in date_to_inventory_records.items():
#     date_to_inventory_dataframe[date] = pandas.DataFrame(
#         inventory_records,
#         columns=[
#             'package_id',
#             'Arrived Date',
#             'Product Category',
#             'Product Name',
#             'Current Quantity',
#             'Sold Date',
#         ]
#     )
    
# date_to_inventory_dataframe[list(date_to_inventory_dataframe.keys())[0]]

In [None]:
# raw_incoming_transfer_packages_dataframe = pandas.read_excel('data/20210930/royal_apothecary_incoming_transfer_packages_20200101_20210930.xlsx', header=0)
# len(raw_incoming_transfer_packages_dataframe.index), raw_incoming_transfer_packages_dataframe.columns

In [None]:
# for date, inventory_dataframe in date_to_inventory_dataframe.items():
#     print(date)
#     print(f'# of packages in inventory: {len(inventory_dataframe.index)}')

#     inventory_with_incoming_transfer_packages_dataframe = inventory_dataframe.astype({'package_id': 'int64'}).merge(incoming_transfer_packages_dataframe, on='package_id', how='inner', suffixes=('_l', '_r'))
# #     print(f'# of packages in inventory with incoming package: {len(inventory_with_incoming_transfer_packages_dataframe.index)}')
    
#     inventory_with_cost_records = inventory_with_incoming_transfer_packages_dataframe.to_dict('record')

#     total_valuation_cost = 0
#     import math
#     for inventory_with_cost_record in inventory_with_cost_records:
#         incoming_shipped_price = inventory_with_cost_record['shipper_wholesale_price']
#         if math.isnan(incoming_shipped_price):
#             incoming_shipped_price = 0
#         incoming_shipped_quantity = inventory_with_cost_record['shipped_quantity']
#         current_quantity = inventory_with_cost_record['Current Quantity']
#         total_valuation_cost += float(current_quantity) * (incoming_shipped_price / incoming_shipped_quantity)

#     print(f'Inventory valuation (based on COST): ${total_valuation_cost}')
#     print('')