In [None]:
# Steps to install
# 1. pip install sqlalchemy-bigquery google-cloud-bigquery-storage pyarrow
# 2. Copy the credentials file to wherever you set BIGQUERY_CREDENTIALS_PATH to

In [1]:
import json
import os
import pandas as pd
import pyarrow
import numpy as np

import sys
from os import path
import numpy

from dotenv import load_dotenv
from sqlalchemy import create_engine


load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')

In [2]:
sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
import create_queries
%load_ext autoreload
%autoreload 2

In [3]:
import prepare_data

In [4]:
COMPANY_NAME = 'BBF'
COMPANY_IDENTIFIER = 'BBF'
TRANSFER_PACKAGES_START_DATE = '2020-01-01'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'

INVENTORY_DATE = '11/05/2021'

In [5]:
company_incoming_transfer_packages_query = create_queries.create_company_incoming_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_outgoing_transfer_packages_query = create_queries.create_company_outgoing_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_sales_transactions_query = create_queries.create_company_sales_transactions_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)
company_inventory_packages_query = create_queries.create_company_inventory_packages_query(COMPANY_IDENTIFIER)

engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))
company_incoming_transfer_packages_dataframe = pd.read_sql_query(company_incoming_transfer_packages_query, engine)
company_outgoing_transfer_packages_dataframe = pd.read_sql_query(company_outgoing_transfer_packages_query, engine)
company_sales_transactions_dataframe = pd.read_sql_query(company_sales_transactions_query, engine)
company_inventory_packages_dataframe = pd.read_sql_query(company_inventory_packages_query, engine)

AttributeError: module 'create_queries' has no attribute 'create_company_incoming_transfer_packages_query'

In [6]:
deduped_sales_receipts_dataframe = prepare_data.dedupe_sales_transactions(company_sales_transactions_dataframe)

In [7]:
df_in = company_incoming_transfer_packages_dataframe
df_out = company_outgoing_transfer_packages_dataframe

In [8]:
df_inventory = company_inventory_packages_dataframe

In [9]:
df_sales = deduped_sales_receipts_dataframe

In [10]:
df_sales.groupby('license_number').sum()

Unnamed: 0_level_0,total_packages,rt_total_price,tx_quantity_sold,tx_total_price
license_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MR281525,84950,3184667.99,56353.5558,1119179.63


In [48]:
df_sales.count()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30858 entries, 0 to 30857
Data columns (total 19 columns):
 #   Column                    Non-Null Count  Dtype              
---  ------                    --------------  -----              
 0   id                        30858 non-null  object             
 1   license_number            30858 non-null  object             
 2   receipt_number            30858 non-null  object             
 3   rt_type                   30858 non-null  object             
 4   sales_customer_type       30858 non-null  object             
 5   sales_datetime            30858 non-null  datetime64[ns, UTC]
 6   total_packages            30858 non-null  int64              
 7   rt_total_price            30858 non-null  float64            
 8   tx_type                   30858 non-null  object             
 9   tx_package_id             30858 non-null  object             
 10  tx_package_label          30858 non-null  object             
 11  tx_product_name

In [12]:
df_inventory.groupby('license_number').sum()

Unnamed: 0_level_0,quantity,is_testing_sample,is_trade_sample,is_on_hold
license_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MP281397,22459.65,0,0,0
MR281525,14793.4,0,0,0


In [11]:
# df_sales['per_unit'] = df_sales['tx_total_price'] / df_sales['tx_quantity_sold']
# df_sales['year_month'] = df_sales['sales_datetime'].dt.strftime("%Y-%m")

In [13]:
pd.options.display.float_format = '{:,.2f}'.format

In [504]:
# df_sales = df_sales[df_sales['license_number'] == 'C10-0000824-LIC']

In [505]:
# df_inventory = df_inventory[df_inventory['license_number'] == 'C10-0000824-LIC']

## Sales

In [14]:
df_sales['per_unit'] = df_sales['tx_total_price'] / df_sales['tx_quantity_sold']
df_sales['year_month'] = df_sales['sales_datetime'].dt.strftime("%Y-%m")

In [15]:
s_total_count = df_sales.groupby('year_month')['tx_total_price'].count()
df_total_count = pd.Series(s_total_count).to_frame()
df_total_count = df_total_count.reset_index()
df_total_count.rename(columns={'tx_total_price':'total_count'}, inplace=True)

In [16]:
s_revenue = df_sales.groupby('year_month')['tx_total_price'].sum()
df_revenue = pd.Series(s_revenue).to_frame()
df_revenue = df_revenue.reset_index()
df_revenue.rename(columns={'tx_total_price': 'revenue'}, inplace=True)

In [17]:
df_sales_oct = df_sales[df_sales['year_month'] == '2021-10']

In [564]:
# df_sales_oct.to_csv('./CCC_sales_2021_10.csv')

## Incoming

In [54]:
df_in

Unnamed: 0,package_row_id,delivery_type,license_number,manifest_number,created_date,received_datetime,shipper_facility_license_number,shipper_facility_name,recipient_facility_license_number,recipient_facility_name,...,product_name,package_lab_results_status,shipper_wholesale_price,shipped_quantity,shipped_unit_of_measure,received_quantity,received_unit_of_measure,item_unit_weight,item_unit_weight_unit_of_measure_name,per_unit_incoming
0,30e8168f-4e94-4ccb-8d28-ca984fae85df,INCOMING_FROM_VENDOR,MR281525,0000798113,2021-11-03,2021-11-04 16:58:10+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00001057253: Sira MAC1 Preroll 1g,passed,136.50,21.00,Grams,21.00,Grams,,,6.50
1,ee5c5b4e-b42e-4c1e-906a-afe1e711083c,INCOMING_FROM_VENDOR,MR281525,0000798113,2021-11-03,2021-11-04 16:58:10+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00001057253: Sira MAC1 Preroll 1g,passed,188.50,29.00,Grams,29.00,Grams,,,6.50
2,f6bf506d-50f5-4407-a0ea-8b0ff0a9014d,INCOMING_FROM_VENDOR,MR281525,0000798113,2021-11-03,2021-11-04 16:58:10+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00000975801: Sira Dosido #22 Preroll 1g,passed,390.00,60.00,Grams,60.00,Grams,,,6.50
3,f9fcbfd1-15ec-4a76-ad29-35a86caece21,INCOMING_FROM_VENDOR,MR281525,0000798113,2021-11-03,2021-11-04 16:58:10+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00001074741: Sira Blueberry Preroll 1g,passed,559.00,86.00,Grams,86.00,Grams,,,6.50
4,40cc2c14-b30d-4053-b39d-1c825523ce4a,INCOMING_FROM_VENDOR,MR281525,0000798113,2021-11-03,2021-11-04 16:58:10+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00001057253: Sira MAC1 Preroll 1g,passed,162.50,25.00,Grams,25.00,Grams,,,6.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
708,f842972b-7322-449b-986b-7375c068c6b2,INCOMING_FROM_VENDOR,MR281525,0000115711,2020-06-04,2020-06-05 15:15:36+00:00,MC281252,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00000211450: Sira Mother of Grapes Preroll 1g,passed,80.00,10.00,Grams,10.00,Grams,,,8.00
709,87553ab7-8fe4-4b2a-941e-b5820189fd13,INCOMING_FROM_VENDOR,MR281525,0000115810,2020-06-04,2020-06-05 16:41:20+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00000197846: Vape Product - EHO - Sira Headba...,passed,300.00,10.00,Each,10.00,Each,,,30.00
710,8650a5fa-3c68-4f46-b13c-57f18efe2743,INCOMING_FROM_VENDOR,MR281525,0000115810,2020-06-04,2020-06-05 16:41:20+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,Sira Dosido #22 Wax 1g,passed,300.00,10.00,Grams,10.00,Grams,,,30.00
711,af583a5d-73b6-410a-aec1-cb1c3a6d7ae6,INCOMING_FROM_VENDOR,MR281525,0000115810,2020-06-04,2020-06-05 16:41:20+00:00,MP281303,"SIRA NATURALS, INC.",MR281525,Boston Bud Factory Inc.,...,M00000245541: Vape Product - EHO - Entourage S...,passed,185.00,10.00,Each,10.00,Each,,,18.50


In [49]:
df_in['shipper_wholesale_price'].sum()

501779.83999999997

In [51]:
df_sales['tx_total_price'].sum()

1119179.63

In [53]:
(1119179.63 - 501779.83999999997) / 1119179.63

0.5516538841937285

In [18]:
df_in['per_unit_incoming'] = df_in['shipper_wholesale_price'] / df_in['shipped_quantity']

In [19]:
df_in_price = df_in[df_in['shipper_wholesale_price'].notnull()]

#### getting the average price per package id

In [20]:
average_incoming_package_id = df_in_price.groupby('package_id')['per_unit_incoming'].mean()
df_avg_incoming_price = pd.Series(average_incoming_package_id).to_frame()
df_avg_incoming_price = df_avg_incoming_price.reset_index()

#### getting average price per product name

In [21]:
average_incoming_product = df_in_price.groupby('product_name')['per_unit_incoming'].mean()
df_avg_product = pd.Series(average_incoming_product).to_frame()
df_avg_product = df_avg_product.reset_index()
df_avg_product.rename(columns={'per_unit_incoming':'per_unit_product'}, inplace=True)

## COGS

#### Incoming Price - Package ID

In [22]:
df_cogs_package_id = pd.merge(df_sales, df_avg_incoming_price, left_on='tx_package_id', right_on='package_id', how='left')

In [23]:
df_cogs_package_id['total_incoming'] = df_cogs_package_id['per_unit_incoming'] * df_cogs_package_id['tx_quantity_sold']

In [24]:
df_cogs_package_id.replace([np.inf], np.nan, inplace=True)

In [25]:
df_cogs_package_id_notnull = df_cogs_package_id[df_cogs_package_id['total_incoming'].notnull()]

In [26]:
s_cogs = df_cogs_package_id_notnull.groupby('year_month')['total_incoming'].sum()
df_cogs_id = pd.Series(s_cogs).to_frame()
df_cogs_id = df_cogs_id.reset_index()

In [27]:
s_cogs_count = df_cogs_package_id_notnull.groupby('year_month')['total_incoming'].count()
df_cogs_count = pd.Series(s_cogs_count).to_frame()
df_cogs_count = df_cogs_count.reset_index()
df_cogs_count.rename(columns={'total_incoming':'count_incoming'}, inplace=True)

#### Incoming Price - Average Product Price

In [28]:
df_cogs_average_product = pd.merge(df_cogs_package_id, df_avg_product, left_on='tx_product_name', right_on='product_name', how='left')

In [29]:
df_cogs_average_product['total_product'] = df_cogs_average_product['tx_quantity_sold'] * df_cogs_average_product['per_unit_product']

In [30]:
df_cogs_null = df_cogs_average_product[df_cogs_average_product['per_unit_incoming'].isnull()]

In [31]:
df_cogs_product = df_cogs_null[df_cogs_null['per_unit_product'].notnull()]

In [32]:
product_sum = df_cogs_product.groupby('year_month')['total_product'].sum()
df_product_sum = pd.Series(product_sum).to_frame()
df_product_sum = df_product_sum.reset_index()
df_product_sum.rename(columns={'total_product':'product_sum'}, inplace=True)

In [33]:
product_count = df_cogs_product.groupby('year_month')['total_product'].count()
df_product_count = pd.Series(product_count).to_frame()
df_product_count = df_product_count.reset_index()
df_product_count.rename(columns={'total_product':'product_count'}, inplace=True)

In [34]:
df_cogs_product = pd.merge(df_product_sum, df_product_count)

In [35]:
# df_cogs[df_cogs['per_unit_incoming'].isnull() & df_cogs['per_unit_product'].notnull()]

## Summary

In [36]:
df_summary = pd.merge(df_revenue, df_cogs_product, how='left')

In [37]:
df_summary = pd.merge(df_summary, df_cogs_id, how='left')

In [38]:
df_summary['product_sum'] = df_summary['product_sum'].fillna(0)

In [39]:
df_summary['product_count'] = df_summary['product_count'].fillna(0)

In [40]:
df_summary['cogs'] = df_summary['total_incoming'] + df_summary['product_sum']

In [41]:
df_summary = pd.merge(df_summary, df_cogs_count)
df_summary = pd.merge(df_summary, df_total_count)

In [42]:
df_summary['total_count_incoming'] = df_summary['product_count'] + df_summary['count_incoming']

In [43]:
df_summary['margin_$'] = df_summary['revenue'] - df_summary['cogs']
df_summary['margin_%'] = df_summary['margin_$'] / df_summary['revenue']

In [44]:
df_summary['coverage'] = df_summary['total_count_incoming'] / df_summary['total_count']

In [45]:
df_summary_simp = df_summary[['year_month', 'revenue', 'cogs', 'margin_$', 'margin_%', 'total_count_incoming', 'total_count', 'coverage']]

In [46]:
df_summary_simp

Unnamed: 0,year_month,revenue,cogs,margin_$,margin_%,total_count_incoming,total_count,coverage
0,2020-07,18987.45,7915.67,11071.78,0.58,555,555,1.0
1,2020-08,48676.69,11197.32,37479.37,0.77,955,1234,0.77
2,2020-09,57498.43,12230.65,45267.78,0.79,830,1729,0.48
3,2020-10,54418.89,10922.78,43496.11,0.8,741,1600,0.46
4,2020-11,53743.43,8114.2,45629.23,0.85,707,1580,0.45
5,2020-12,70496.93,6469.26,64027.67,0.91,758,1796,0.42
6,2021-01,84761.2,8666.21,76094.99,0.9,1047,2259,0.46
7,2021-02,73902.73,8221.32,65681.41,0.89,850,1933,0.44
8,2021-03,84178.99,11398.77,72780.22,0.86,1448,2331,0.62
9,2021-04,79262.08,8701.31,70560.77,0.89,1278,2108,0.61


In [593]:
# df_summary_simp.to_excel('./CCC_analysis.xlsx')

## Inventory

In [55]:
df_inventory_incoming = pd.merge(df_inventory, df_avg_incoming_price, on='package_id', how='left')

In [56]:
df_inventory_incoming.replace([np.inf], np.nan, inplace=True)

In [57]:
df_inv_null = df_inventory_incoming[df_inventory_incoming['per_unit_incoming'].isnull()]

In [58]:
df_inv_product = pd.merge(df_inv_null, df_avg_product, on='product_name', how='left')

In [59]:
df_inv_product.replace([np.inf], np.nan, inplace=True)

In [60]:
df_inv_product_price = df_inv_product[df_inv_product['per_unit_product'].notnull()]

In [61]:
df_inv_product_price['total_price'] = df_inv_product_price['quantity'] * df_inv_product_price['per_unit_product']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [62]:
inventory_product_value = df_inv_product_price['total_price'].sum()
# print(f'{inventory_product_value:,.2f}')

94.49


In [63]:
df_inventory_incoming['total_price'] = df_inventory_incoming['quantity'] * df_inventory_incoming['per_unit_incoming']
inventory_value = df_inventory_incoming['total_price'].sum()
# print(f'{inventory_value:,.2f}')

68,357.27


In [64]:
total_inv_value = inventory_product_value + inventory_value
print(f'{total_inv_value:,.2f}')

68,451.76


In [65]:
inv_count_product = df_inv_product_price['per_unit_product'].count()

In [67]:
inv_count_incoming = df_inventory_incoming['per_unit_incoming'].count()
inv_count_total = df_inventory_incoming['quantity'].count()

In [68]:
inv_total_incoming = inv_count_product + inv_count_incoming

In [69]:
inv_total_incoming

180

In [70]:
inv_count_total

255

In [71]:
inventory_coverage = inv_total_incoming / inv_count_total
print(f'{inventory_coverage:,.0%}')

71%


In [68]:
# df_inventory_incoming.to_csv('./ccc_inventory.csv')

# Notes

In [None]:
# df_in[df_in.duplicated('package_id')]
# df_sales[df_sales.duplicated('tx_package_id')]

In [None]:
# pd.merge(df_sales, df_in[['package_id','per_unit_incoming']], left_on='tx_package_id', right_on='package_id', how='left')

In [None]:
# df_sales.merge(df_in['package_id','per_unit_incoming'], left_on='tx_package_id', right_on='package_id', how='left')

In [None]:
# incoming_transfer_simp

In [None]:
# company_sales_transactions_dataframe.to_csv('hpcc_all_sales_10_13_21.csv')

In [None]:
# company_sales_transactions_dataframe.info()

In [None]:
# sales_simp['year_month'] = sales_simp['sales_datetime'].dt.strftime("%Y-%m")

In [None]:
# sales_simp.groupby(by=['year_month'], as_index=False).count()

In [None]:
# sales_drop = sales_simp.drop(['sales_datetime', 'month_year'], axis=1)

In [None]:
# sales_simp.to_csv('./hpcc_sales_2021_10_13.csv')

In [None]:
# INVENTORY_DATES = [
#     '09/30/2020',
#     '10/31/2020',
#     '11/30/2020',
#     '12/31/2020',
#     '01/31/2021',
#     '02/28/2021',
#     '03/31/2021',
#     '04/30/2021',
#     '05/31/2021',
#     '06/30/2021',
#     '07/31/2021',
#     '08/31/2021',
#     '09/30/2021',
# ]

In [None]:
# %autoreload 2

# sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts/analysis")))
# sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

# from util import active_inventory_util as util

In [None]:
# d = util.Download()
# d.download_dataframes(
#     incoming_transfer_packages_dataframe=company_incoming_transfer_packages_dataframe,
#     outgoing_transfer_packages_dataframe=company_outgoing_transfer_packages_dataframe,
#     sales_transactions_dataframe=company_sales_transactions_dataframe,
# )

In [None]:
# q = util.Query()
# q.inventory_dates = INVENTORY_DATES
# q.company_name = COMPANY_NAME

# id_to_history = util.get_histories(d)
# util.print_counts(id_to_history)
# util.create_inventory_xlsx(id_to_history, q)

In [None]:
# computed_inventory_package_records = util.create_inventory_dataframe_by_date(id_to_history, INVENTORY_DATE)
# computed_inventory_packages_dataframe = pandas.DataFrame(
#     computed_inventory_package_records,
#     columns=[
#         'package_id',
#         'Arrived Date',
#         'Product Category',
#         'Product Name',
#         'Current Quantity',
#         'Sold Date',
#     ]
# )
# computed_inventory_packages_dataframe

In [None]:
# date_to_inventory_dataframe = {}
# for date, inventory_records in date_to_inventory_records.items():
#     date_to_inventory_dataframe[date] = pandas.DataFrame(
#         inventory_records,
#         columns=[
#             'package_id',
#             'Arrived Date',
#             'Product Category',
#             'Product Name',
#             'Current Quantity',
#             'Sold Date',
#         ]
#     )
    
# date_to_inventory_dataframe[list(date_to_inventory_dataframe.keys())[0]]

In [None]:
# raw_incoming_transfer_packages_dataframe = pandas.read_excel('data/20210930/royal_apothecary_incoming_transfer_packages_20200101_20210930.xlsx', header=0)
# len(raw_incoming_transfer_packages_dataframe.index), raw_incoming_transfer_packages_dataframe.columns

In [None]:
# for date, inventory_dataframe in date_to_inventory_dataframe.items():
#     print(date)
#     print(f'# of packages in inventory: {len(inventory_dataframe.index)}')

#     inventory_with_incoming_transfer_packages_dataframe = inventory_dataframe.astype({'package_id': 'int64'}).merge(incoming_transfer_packages_dataframe, on='package_id', how='inner', suffixes=('_l', '_r'))
# #     print(f'# of packages in inventory with incoming package: {len(inventory_with_incoming_transfer_packages_dataframe.index)}')
    
#     inventory_with_cost_records = inventory_with_incoming_transfer_packages_dataframe.to_dict('record')

#     total_valuation_cost = 0
#     import math
#     for inventory_with_cost_record in inventory_with_cost_records:
#         incoming_shipped_price = inventory_with_cost_record['shipper_wholesale_price']
#         if math.isnan(incoming_shipped_price):
#             incoming_shipped_price = 0
#         incoming_shipped_quantity = inventory_with_cost_record['shipped_quantity']
#         current_quantity = inventory_with_cost_record['Current Quantity']
#         total_valuation_cost += float(current_quantity) * (incoming_shipped_price / incoming_shipped_quantity)

#     print(f'Inventory valuation (based on COST): ${total_valuation_cost}')
#     print('')

In [None]:
# import json
# import pandas
# import numpy
# import os
# import sys
# from os import path

In [None]:
# df_cogs_2020_04 = df_cogs_package_id_notnull[df_cogs_package_id_notnull['year_month'] == '2020-04']
# df_cogs_2020_04.sum()
# df_cogs_2020_04.to_csv('./gf_2020_04.csv')
# df_cogs_package_id_notnull.to_csv('./gf_cogs_not_null.csv')
# df_cogs_package_id_notnull[df_cogs_package_id_notnull['year_month'] == '2021-04']

In [None]:
Outgoing
# df_out.info()
# df_out['per_unit'] = df_out['shipper_wholesale_price'] / df_out['shipped_quantity']
# df_out['year_month'] = df_out['received_datetime'].dt.strftime("%Y-%m")
# df_out.groupby('year_month').sum()