In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys
from tqdm import tqdm

from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util

%load_ext autoreload
%autoreload 2

In [3]:
from underwriting import data_quality_checks

In [4]:
def cogs_analysis(df_in:pd.DataFrame,df_sales:pd.DataFrame,freq: str) -> Tuple[pd.DataFrame,pd.DataFrame]:
    df_sales['per_unit'] = df_sales['tx_total_price'] / df_sales['tx_quantity_sold']
    # set frequency
    if freq == 'monthly':
        df_sales['date'] = df_sales['sales_datetime'].dt.strftime("%Y-%m")
    elif freq == 'weekly':
        df_sales['date'] = df_sales['sales_datetime'].dt.strftime("%Y-%W")
        df_sales['week'] = df_sales['sales_datetime'].dt.strftime("%W")
    # total # of trxns
    s_total_count = df_sales.groupby('date')['tx_total_price'].count()
    df_total_count = pd.Series(s_total_count).to_frame()
    df_total_count = df_total_count.reset_index()
    df_total_count.rename(columns={'tx_total_price':'total_count'}, inplace=True)
    # revenue
    s_revenue = df_sales.groupby('date')['tx_total_price'].sum()
    df_revenue = pd.Series(s_revenue).to_frame()
    df_revenue = df_revenue.reset_index()
    df_revenue.rename(columns={'tx_total_price': 'revenue'}, inplace=True)

    df_in['per_unit_incoming'] = df_in['shipper_wholesale_price'] / df_in['shipped_quantity']
    
    # per unit price by package id
    df_in_price = df_in[df_in['shipper_wholesale_price'].notnull()]
    average_incoming_package_id = df_in_price.groupby('package_id')['per_unit_incoming'].mean()
    df_avg_incoming_price = pd.Series(average_incoming_package_id).to_frame()
    df_avg_incoming_price = df_avg_incoming_price.reset_index()
    # per unit price by product name
    average_incoming_product = df_in_price.groupby('product_name')['per_unit_incoming'].mean()
    df_avg_product = pd.Series(average_incoming_product).to_frame()
    df_avg_product = df_avg_product.reset_index()
    df_avg_product.rename(columns={'per_unit_incoming':'per_unit_product'}, inplace=True)

    # merge with (cogs by package id)
    df_cogs_package_id = pd.merge(df_sales, df_avg_incoming_price, left_on='tx_package_id', right_on='package_id', how='left')
    df_cogs_package_id['total_incoming'] = df_cogs_package_id['per_unit_incoming'] * df_cogs_package_id['tx_quantity_sold']
    df_cogs_package_id.replace([numpy.inf], numpy.nan, inplace=True)
    df_cogs_package_id_notnull = df_cogs_package_id[df_cogs_package_id['total_incoming'].notnull()]

    # sum cogs by package id
    s_cogs = df_cogs_package_id_notnull.groupby('date')['total_incoming'].sum()
    df_cogs_id = pd.Series(s_cogs).to_frame()
    df_cogs_id = df_cogs_id.reset_index()
    # count # of trxn by package id
    s_cogs_count = df_cogs_package_id_notnull.groupby('date')['total_incoming'].count()
    df_cogs_count = pd.Series(s_cogs_count).to_frame()
    df_cogs_count = df_cogs_count.reset_index()
    df_cogs_count.rename(columns={'total_incoming':'count_incoming'}, inplace=True)
    
    # merge with (cogs by product name)
    df_cogs_average_product = pd.merge(df_cogs_package_id, df_avg_product, left_on='tx_product_name', right_on='product_name', how='left')
    df_cogs_average_product['total_product'] = df_cogs_average_product['tx_quantity_sold'] * df_cogs_average_product['per_unit_product']
    df_cogs_null = df_cogs_average_product[df_cogs_average_product['per_unit_incoming'].isnull()]
    df_cogs_product = df_cogs_null[df_cogs_null['per_unit_product'].notnull()]
    # sum cogs filldown by product name
    product_sum = df_cogs_product.groupby('date')['total_product'].sum()
    df_product_sum = pd.Series(product_sum).to_frame()
    df_product_sum = df_product_sum.reset_index()
    df_product_sum.rename(columns={'total_product':'product_sum'}, inplace=True)
    # count # of trxn filldown by product name
    product_count = df_cogs_product.groupby('date')['total_product'].count()
    df_product_count = pd.Series(product_count).to_frame()
    df_product_count = df_product_count.reset_index()
    df_product_count.rename(columns={'total_product':'product_count'}, inplace=True)
    df_cogs_product_df = pd.merge(df_product_sum, df_product_count)
    
    # prepare summary
    df_summary = pd.merge(df_revenue, df_cogs_product_df, how='left')
    df_summary = pd.merge(df_summary, df_cogs_id, how='left')
    df_summary['product_sum'] = df_summary['product_sum'].fillna(0)
    df_summary['product_count'] = df_summary['product_count'].fillna(0)
    # total cogs = by product id cogs + by product name cogs
    df_summary['cogs'] = df_summary['total_incoming'] + df_summary['product_sum']
    df_summary = pd.merge(df_summary, df_cogs_count)
    df_summary = pd.merge(df_summary, df_total_count)
    # total count = by package id count + by product count
    df_summary['total_count_incoming'] = df_summary['count_incoming'] + df_summary['product_count']
    df_summary['margin_$'] = df_summary['revenue'] - df_summary['cogs']
    df_summary['margin_%'] = df_summary['margin_$'] / df_summary['revenue']
    df_summary['coverage'] = df_summary['total_count_incoming'] / df_summary['total_count']
    df_summary_simp = df_summary[['date', 'revenue', 'cogs', 'margin_$', 'margin_%', 'total_count_incoming','product_count','count_incoming', 'coverage']]
    
    return df_summary_simp,df_cogs_average_product




In [5]:
TRANSFER_PACKAGES_START_DATE = '2020-01-01'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'
#CURRENT_MONTH = '2022-02'

In [6]:
ca_df = pd.read_csv('all_retailer_company.csv',index_col = 0)
ca_df.columns = ['company_name','company_identifier','state','licenses']
ca_df['company_identifier'] = ca_df['company_identifier'].astype(str)

In [7]:
ca_df_clean = ca_df[~ca_df['company_identifier'].isin(['PL','WHT','GF','UR'])]
ca_df_clean.shape

(62, 4)

In [8]:
len(ca_df_clean['company_identifier'].unique())

62

In [9]:
ca_df_clean['lic_list'] = [list(ca_df_clean['licenses'])[i].split(";") for i in range(len(ca_df_clean))]

In [10]:
ca_df_clean.head()

Unnamed: 0,company_name,company_identifier,state,licenses,lic_list
0,99 High Tide,99HT,CA,C10-0000279-LIC,[C10-0000279-LIC]
1,Altum LLC,ALT,MI,PC-000310;AU-R-000380,"[PC-000310, AU-R-000380]"
2,Bella Sol,BS,MI,PC-000185;AU-R-000156,"[PC-000185, AU-R-000156]"
3,Big Medicine Cannabissary,BMC,CO,402-00930,[402-00930]
4,Boston Bud Factory,BBF,MA,MR281525,[MR281525]


In [11]:
def read_df_in(COMPANY_IDENTIFIER,license_numbers):
    company_incoming_transfer_packages_query = create_queries.create_company_incoming_transfer_packages_query(
    COMPANY_IDENTIFIER,
    TRANSFER_PACKAGES_START_DATE,
    license_numbers=license_numbers,
    )
    company_incoming_transfer_packages_dataframe = pd.read_sql_query(company_incoming_transfer_packages_query, engine)
    return company_incoming_transfer_packages_dataframe


In [12]:
def read_df_sales(COMPANY_IDENTIFIER,license_numbers):
    query = create_queries.create_company_sales_receipts_with_transactions_query(
    COMPANY_IDENTIFIER,
    SALES_TRANSACTIONS_START_DATE,
    license_numbers=license_numbers,
    )
    company_sales_receipts_with_transactions_dataframe = pd.read_sql_query(query, engine)
    deduped_sales_receipts_with_transactions_dataframe = prepare_data.dedupe_sales_transactions(company_sales_receipts_with_transactions_dataframe)
    deduped_sales_receipts_with_transactions_dataframe['sales_datetime'] = pd.to_datetime(deduped_sales_receipts_with_transactions_dataframe['sales_datetime'])
    deduped_sales_receipts_with_transactions_dataframe['sales_month'] = deduped_sales_receipts_with_transactions_dataframe['sales_datetime'].dt.strftime('%Y-%m')
    return deduped_sales_receipts_with_transactions_dataframe


In [13]:
def get_cogs_list(company_list,transfer_packages_start_date,sales_transactrions_start_date,map_df):
    cogs_df_list = {}
    for c in tqdm(company_list):
        df_summary_simp_list = {}
        print(c)
        l_list = map_df[map_df['company_identifier'] == c]['lic_list'].to_list()[0]
        for l in tqdm(l_list):
            print(l)
            df_in = read_df_in([c],[l])
            df_sales = read_df_sales([c],[l])
            df_in['per_unit_incoming'] = df_in['shipper_wholesale_price'] / df_in['shipped_quantity']
            df_in = df_in[df_in['per_unit_incoming'] <= 10000]
            df_summary_simp,df_cogs_average_product = cogs_analysis(df_in,df_sales,'monthly')
            df_summary_simp.index = df_summary_simp.date 
            # tax treatment
            df_summary_simp['revenue_after_tax'] = df_summary_simp['revenue'] * 1.15
            df_summary_simp['cogs_after_tax'] = df_summary_simp['cogs'] * 1.27
            df_summary_simp['margin_$_after_tax'] = df_summary_simp['revenue_after_tax'] - df_summary_simp['cogs_after_tax']
            df_summary_simp['margin_%_after_tax'] = df_summary_simp['margin_$_after_tax'] / df_summary_simp['revenue_after_tax']

            df_summary_simp['gm_past_quarter'] = df_summary_simp[['margin_%']].rolling(3).mean().values
            df_summary_simp['gm_past_2quarters'] = df_summary_simp[['margin_%']].rolling(6).mean().values
            df_summary_simp['gm_past_3quarters'] = df_summary_simp[['margin_%']].rolling(9).mean().values
            
            #rolling gm %
            df_summary_simp['gm_past_quarter_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(3).mean().values
            df_summary_simp['gm_past_2quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(6).mean().values
            df_summary_simp['gm_past_3quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(9).mean().values

            #rolling gm $
            df_summary_simp['gm$_past_quarter_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(3).mean().values
            df_summary_simp['gm$_past_2quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(6).mean().values
            df_summary_simp['gm$_past_3quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(9).mean().values
            df_summary_simp_list[l] = df_summary_simp
        cogs_df_list[c] =  df_summary_simp_list
    return cogs_df_list

    

    

In [None]:
#list(ca_df_clean['company_identifier'])[0:3]

# cogs & coverage

In [14]:
a = get_cogs_list(list(ca_df_clean['company_identifier']),TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,ca_df_clean)

  0%|          | 0/62 [00:00<?, ?it/s]

99HT



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000279-LIC



100%|██████████| 1/1 [00:06<00:00,  6.40s/it][A
  2%|▏         | 1/62 [00:06<06:30,  6.41s/it]

ALT



  0%|          | 0/2 [00:00<?, ?it/s][A

PC-000310



 50%|█████     | 1/2 [00:02<00:02,  2.27s/it][A

AU-R-000380



100%|██████████| 2/2 [00:07<00:00,  3.81s/it][A
  3%|▎         | 2/62 [00:14<07:06,  7.12s/it]

BS



  0%|          | 0/2 [00:00<?, ?it/s][A

PC-000185



 50%|█████     | 1/2 [00:03<00:03,  3.38s/it][A

AU-R-000156



100%|██████████| 2/2 [00:08<00:00,  4.22s/it][A
  5%|▍         | 3/62 [00:22<07:35,  7.72s/it]

BMC



  0%|          | 0/1 [00:00<?, ?it/s][A

402-00930



100%|██████████| 1/1 [00:07<00:00,  7.04s/it][A
  6%|▋         | 4/62 [00:29<07:12,  7.45s/it]

BBF



  0%|          | 0/1 [00:00<?, ?it/s][A

MR281525



100%|██████████| 1/1 [00:07<00:00,  7.28s/it][A
  8%|▊         | 5/62 [00:36<07:01,  7.39s/it]

BUD



  0%|          | 0/5 [00:00<?, ?it/s][A

C9-0000464-LIC



 20%|██        | 1/5 [00:07<00:31,  7.86s/it][A

C9-0000467-LIC



 40%|████      | 2/5 [00:15<00:22,  7.49s/it][A

C9-0000444-LIC



 60%|██████    | 3/5 [00:22<00:14,  7.27s/it][A

C9-0000167-LIC



 80%|████████  | 4/5 [00:33<00:08,  8.80s/it][A

C9-0000399-LIC



100%|██████████| 5/5 [00:42<00:00,  8.44s/it][A
 10%|▉         | 6/62 [01:18<17:56, 19.22s/it]

CHO



  0%|          | 0/1 [00:00<?, ?it/s][A

AU-R-000575



100%|██████████| 1/1 [00:11<00:00, 11.31s/it][A
 11%|█▏        | 7/62 [01:30<15:14, 16.63s/it]

CG



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000157-LIC



100%|██████████| 1/1 [00:08<00:00,  8.80s/it][A
 13%|█▎        | 8/62 [01:39<12:43, 14.14s/it]

CPA



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000461-LIC



 50%|█████     | 1/2 [00:18<00:18, 18.64s/it][A

C9-0000348-LIC



100%|██████████| 2/2 [00:28<00:00, 14.35s/it][A
 15%|█▍        | 9/62 [02:07<16:30, 18.69s/it]

CPC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000056-LIC



100%|██████████| 1/1 [00:08<00:00,  8.63s/it][A
 16%|█▌        | 10/62 [02:16<13:30, 15.59s/it]

CSC



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000670-LIC



 50%|█████     | 1/2 [00:16<00:16, 16.35s/it][A

C10-0000918-LIC



100%|██████████| 2/2 [00:23<00:00, 11.89s/it][A
 18%|█▊        | 11/62 [02:40<15:23, 18.10s/it]

CCC



  0%|          | 0/1 [00:00<?, ?it/s][A

C12-0000087-LIC



100%|██████████| 1/1 [00:18<00:00, 18.79s/it][A
 19%|█▉        | 12/62 [02:59<15:15, 18.31s/it]

CC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000758-LIC



100%|██████████| 1/1 [00:07<00:00,  7.18s/it][A
 21%|██        | 13/62 [03:06<12:12, 14.94s/it]

DWF



  0%|          | 0/1 [00:00<?, ?it/s][A

402R-00804



100%|██████████| 1/1 [00:09<00:00,  9.74s/it][A
 23%|██▎       | 14/62 [03:15<10:41, 13.37s/it]

DCO



  0%|          | 0/2 [00:00<?, ?it/s][A

050-10052885D4C



 50%|█████     | 1/2 [00:11<00:11, 11.13s/it][A

050-10070593E9E



100%|██████████| 2/2 [00:17<00:00,  8.62s/it][A
 24%|██▍       | 15/62 [03:33<11:23, 14.54s/it]

DL



  0%|          | 0/1 [00:00<?, ?it/s][A

MR283369



100%|██████████| 1/1 [00:08<00:00,  8.26s/it][A
 26%|██▌       | 16/62 [03:41<09:41, 12.65s/it]

DGG



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000016-LIC



100%|██████████| 1/1 [00:07<00:00,  7.16s/it][A
 27%|██▋       | 17/62 [03:48<08:14, 11.00s/it]

DG



  0%|          | 0/6 [00:00<?, ?it/s][A

PC-000485



 17%|█▋        | 1/6 [00:06<00:31,  6.25s/it][A

AU-R-000287



 33%|███▎      | 2/6 [00:18<00:38,  9.75s/it][A

AU-R-000359



 50%|█████     | 3/6 [00:26<00:27,  9.01s/it][A

AU-R-000422



 67%|██████▋   | 4/6 [00:42<00:23, 11.67s/it][A

AU-R-000470



 83%|████████▎ | 5/6 [00:49<00:10, 10.16s/it][A

AU-R-000572



100%|██████████| 6/6 [00:56<00:00,  9.40s/it][A
 29%|██▉       | 18/62 [04:44<18:04, 24.64s/it]

EL



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000370-LIC



100%|██████████| 1/1 [00:06<00:00,  6.58s/it][A
 31%|███       | 19/62 [04:51<13:46, 19.22s/it]

EMA



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000774-LIC



100%|██████████| 1/1 [00:10<00:00, 10.25s/it][A
 32%|███▏      | 20/62 [05:01<11:34, 16.52s/it]

EMF



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000939-LIC



100%|██████████| 1/1 [00:06<00:00,  6.43s/it][A
 34%|███▍      | 21/62 [05:08<09:13, 13.50s/it]

EMM



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000786-LIC



100%|██████████| 1/1 [00:11<00:00, 11.09s/it][A
 35%|███▌      | 22/62 [05:19<08:30, 12.77s/it]

EMT



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000695-LIC



100%|██████████| 1/1 [00:15<00:00, 15.00s/it][A
 37%|███▋      | 23/62 [05:34<08:44, 13.44s/it]

EH



  0%|          | 0/1 [00:00<?, ?it/s][A

MR283073



100%|██████████| 1/1 [00:06<00:00,  6.44s/it][A
 39%|███▊      | 24/62 [05:40<07:11, 11.34s/it]

FW



  0%|          | 0/5 [00:00<?, ?it/s][A

402-00558



 20%|██        | 1/5 [00:09<00:36,  9.00s/it][A

402R-00017



 40%|████      | 2/5 [00:31<00:50, 16.77s/it][A

402-00891



 60%|██████    | 3/5 [00:42<00:28, 14.12s/it][A

402R-00020



 80%|████████  | 4/5 [01:12<00:20, 20.50s/it][A

402R-00747



100%|██████████| 5/5 [01:27<00:00, 17.53s/it][A
 40%|████      | 25/62 [07:08<21:06, 34.24s/it]

GT



  0%|          | 0/2 [00:00<?, ?it/s][A

C9-0000385-LIC



 50%|█████     | 1/2 [00:10<00:10, 10.57s/it][A

C9-0000170-LIC



100%|██████████| 2/2 [00:16<00:00,  8.48s/it][A
 42%|████▏     | 26/62 [07:25<17:25, 29.06s/it]

GLNR



  0%|          | 0/6 [00:00<?, ?it/s][A

PC-000334



 17%|█▋        | 1/6 [00:05<00:27,  5.47s/it][A

PC-000613



 33%|███▎      | 2/6 [00:10<00:19,  4.93s/it][A

PC-000722



 50%|█████     | 3/6 [00:14<00:14,  4.92s/it][A

AU-R-000182



 67%|██████▋   | 4/6 [00:22<00:12,  6.06s/it][A

AU-R-000461



 83%|████████▎ | 5/6 [00:27<00:05,  5.67s/it][A

AU-R-000559



100%|██████████| 6/6 [00:33<00:00,  5.61s/it][A
 44%|████▎     | 27/62 [07:59<17:45, 30.44s/it]

GRG



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000169-LIC



 50%|█████     | 1/2 [00:25<00:25, 25.46s/it][A

C10-0000408-LIC



100%|██████████| 2/2 [00:37<00:00, 18.98s/it][A
 45%|████▌     | 28/62 [08:37<18:31, 32.70s/it]

GTR



  0%|          | 0/2 [00:00<?, ?it/s][A

PC-000225



 50%|█████     | 1/2 [00:07<00:07,  7.77s/it][A

AU-R-000163



100%|██████████| 2/2 [00:28<00:00, 14.05s/it][A
 47%|████▋     | 29/62 [09:05<17:13, 31.32s/it]

GHC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000414-LIC



100%|██████████| 1/1 [00:15<00:00, 15.35s/it][A
 48%|████▊     | 30/62 [09:20<14:08, 26.53s/it]

HB



  0%|          | 0/3 [00:00<?, ?it/s][A

C10-0000339-LIC



 33%|███▎      | 1/3 [00:23<00:47, 23.64s/it][A

C10-0000624-LIC



 67%|██████▋   | 2/3 [00:36<00:17, 17.57s/it][A

C10-0000301-LIC



100%|██████████| 3/3 [00:43<00:00, 14.46s/it][A
 50%|█████     | 31/62 [10:03<16:19, 31.58s/it]

HPCC



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000064-LIC



 50%|█████     | 1/2 [00:59<00:59, 59.84s/it][A

C10-0000005-LIC



100%|██████████| 2/2 [01:15<00:00, 37.78s/it][A
 52%|█████▏    | 32/62 [11:19<22:23, 44.78s/it]

HC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000341-LIC



100%|██████████| 1/1 [00:06<00:00,  6.87s/it][A
 53%|█████▎    | 33/62 [11:26<16:08, 33.41s/it]

IDC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000320-LIC



100%|██████████| 1/1 [00:10<00:00, 10.23s/it][A
 55%|█████▍    | 34/62 [11:36<12:20, 26.45s/it]

KC



  0%|          | 0/11 [00:00<?, ?it/s][A

402-00664



  9%|▉         | 1/11 [00:07<01:12,  7.27s/it][A

402R-00515



 18%|█▊        | 2/11 [00:18<01:24,  9.43s/it][A

402-01146



 27%|██▋       | 3/11 [00:23<01:01,  7.65s/it][A

402-00573



 36%|███▋      | 4/11 [00:30<00:50,  7.25s/it][A

402R-00544



 45%|████▌     | 5/11 [00:39<00:48,  8.04s/it][A

402-00323



 55%|█████▍    | 6/11 [00:47<00:39,  7.91s/it][A

402R-00900



 64%|██████▎   | 7/11 [00:55<00:31,  7.89s/it][A

402R-00002



 73%|███████▎  | 8/11 [01:02<00:22,  7.54s/it][A

402R-00035



 82%|████████▏ | 9/11 [01:09<00:14,  7.50s/it][A

402R-00574



 91%|█████████ | 10/11 [01:17<00:07,  7.53s/it][A

402-00037



100%|██████████| 11/11 [01:22<00:00,  7.54s/it][A
 56%|█████▋    | 35/62 [12:59<19:31, 43.41s/it]

LBC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000154-LIC



100%|██████████| 1/1 [00:06<00:00,  6.73s/it][A
 58%|█████▊    | 36/62 [13:06<14:02, 32.40s/it]

ML



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000300-LIC



100%|██████████| 1/1 [00:12<00:00, 13.00s/it][A
 60%|█████▉    | 37/62 [13:19<11:04, 26.58s/it]

MW



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000332-LIC



100%|██████████| 1/1 [00:19<00:00, 19.12s/it][A
 61%|██████▏   | 38/62 [13:38<09:44, 24.34s/it]

MPW



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000547-LIC



100%|██████████| 1/1 [00:24<00:00, 24.25s/it][A
 63%|██████▎   | 39/62 [14:02<09:19, 24.32s/it]

MD



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000247-LIC



100%|██████████| 1/1 [00:09<00:00,  9.21s/it][A
 65%|██████▍   | 40/62 [14:11<07:15, 19.79s/it]

ND



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000427-LIC



100%|██████████| 1/1 [00:06<00:00,  6.58s/it][A
 66%|██████▌   | 41/62 [14:18<05:32, 15.82s/it]

PGM



  0%|          | 0/11 [00:00<?, ?it/s][A

PC-000160



  9%|▉         | 1/11 [00:06<01:03,  6.36s/it][A

PC-000167



 18%|█▊        | 2/11 [00:13<01:00,  6.71s/it][A

PC-000169



 27%|██▋       | 3/11 [00:19<00:52,  6.61s/it][A

PC-000415



 36%|███▋      | 4/11 [00:27<00:48,  6.90s/it][A

PC-000551



 45%|████▌     | 5/11 [00:32<00:37,  6.30s/it][A

PC-000725



 55%|█████▍    | 6/11 [00:38<00:30,  6.07s/it][A

AU-R-000196



 64%|██████▎   | 7/11 [00:51<00:34,  8.53s/it][A

AU-R-000197



 73%|███████▎  | 8/11 [01:03<00:28,  9.56s/it][A

AU-R-000198



 82%|████████▏ | 9/11 [01:14<00:20, 10.14s/it][A

AU-R-000366



 91%|█████████ | 10/11 [01:23<00:09,  9.74s/it][A

AU-R-000506



100%|██████████| 11/11 [01:36<00:00,  8.76s/it][A
 68%|██████▊   | 42/62 [15:54<13:20, 40.00s/it]

PRP



  0%|          | 0/3 [00:00<?, ?it/s][A

C10-0000403-LIC



 33%|███▎      | 1/3 [00:18<00:37, 18.57s/it][A

C10-0000155-LIC



 67%|██████▋   | 2/3 [00:32<00:15, 15.72s/it][A

C10-0000286-LIC



100%|██████████| 3/3 [00:47<00:00, 15.96s/it][A
 69%|██████▉   | 43/62 [16:42<13:24, 42.36s/it]

PRL



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000759-LIC



100%|██████████| 1/1 [00:16<00:00, 16.45s/it][A
 71%|███████   | 44/62 [16:59<10:22, 34.59s/it]

QR



  0%|          | 0/4 [00:00<?, ?it/s][A

PC-000356



 25%|██▌       | 1/4 [00:06<00:18,  6.15s/it][A

AU-R-000233



 50%|█████     | 2/4 [00:21<00:23, 11.65s/it][A

AU-R-000346



 75%|███████▌  | 3/4 [00:39<00:14, 14.30s/it][A

AU-R-000633



100%|██████████| 4/4 [00:46<00:00, 11.60s/it][A
 73%|███████▎  | 45/62 [17:45<10:48, 38.13s/it]

RA



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000596-LIC



100%|██████████| 1/1 [00:11<00:00, 11.96s/it][A
 74%|███████▍  | 46/62 [17:57<08:04, 30.28s/it]

RGD



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000057-LIC



100%|██████████| 1/1 [00:08<00:00,  8.09s/it][A
 76%|███████▌  | 47/62 [18:05<05:54, 23.63s/it]

SFV



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000041-LIC



100%|██████████| 1/1 [00:24<00:00, 24.00s/it][A
 77%|███████▋  | 48/62 [18:29<05:32, 23.74s/it]

SV



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000146-LIC



100%|██████████| 1/1 [00:11<00:00, 11.19s/it][A
 79%|███████▉  | 49/62 [18:40<04:19, 19.97s/it]

SLCC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000474-LIC



100%|██████████| 1/1 [00:07<00:00,  7.95s/it][A
 81%|████████  | 50/62 [18:48<03:16, 16.37s/it]

SO



  0%|          | 0/3 [00:00<?, ?it/s][A

C10-0000271-LIC



 33%|███▎      | 1/3 [00:11<00:22, 11.43s/it][A

C12-0000328-LIC



 67%|██████▋   | 2/3 [00:15<00:07,  7.39s/it][A

C9-0000130-LIC



100%|██████████| 3/3 [00:24<00:00,  8.24s/it][A
 82%|████████▏ | 51/62 [19:13<03:27, 18.87s/it]

ST



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000323-LIC



100%|██████████| 1/1 [00:10<00:00, 10.44s/it][A
 84%|████████▍ | 52/62 [19:23<02:43, 16.34s/it]

DW



  0%|          | 0/1 [00:00<?, ?it/s][A

MR282376



100%|██████████| 1/1 [00:22<00:00, 22.85s/it][A
 85%|████████▌ | 53/62 [19:46<02:44, 18.29s/it]

TGL



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000412-LIC



100%|██████████| 1/1 [00:06<00:00,  6.59s/it][A
 87%|████████▋ | 54/62 [19:53<01:58, 14.78s/it]

TJR



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000105-LIC



100%|██████████| 1/1 [00:30<00:00, 30.58s/it][A
 89%|████████▊ | 55/62 [20:23<02:16, 19.52s/it]

TTS



  0%|          | 0/2 [00:00<?, ?it/s][A

PC-000718



 50%|█████     | 1/2 [00:04<00:04,  4.84s/it][A

AU-R-000546



100%|██████████| 2/2 [00:10<00:00,  5.21s/it][A
 90%|█████████ | 56/62 [20:34<01:40, 16.79s/it]

TT



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000451-LIC



100%|██████████| 1/1 [00:10<00:00, 10.15s/it][A
 92%|█████████▏| 57/62 [20:44<01:14, 14.80s/it]

T4L



  0%|          | 0/1 [00:00<?, ?it/s][A

C12-0000302-LIC



100%|██████████| 1/1 [00:06<00:00,  6.48s/it][A
 94%|█████████▎| 58/62 [20:50<00:49, 12.30s/it]

TL



  0%|          | 0/19 [00:00<?, ?it/s][A

402R-00808



  5%|▌         | 1/19 [00:03<01:06,  3.71s/it][A

402-00370



 11%|█         | 2/19 [00:09<01:26,  5.07s/it][A

402R-00602



 16%|█▌        | 3/19 [00:22<02:16,  8.51s/it][A

402R-00041



 21%|██        | 4/19 [00:32<02:18,  9.22s/it][A

402R-00807



 26%|██▋       | 5/19 [00:47<02:36, 11.21s/it][A

402-01142



 32%|███▏      | 6/19 [00:53<02:04,  9.58s/it][A

402R-00573



 37%|███▋      | 7/19 [01:03<01:53,  9.46s/it][A

402-00473



 42%|████▏     | 8/19 [01:09<01:34,  8.56s/it][A

402R-00536



 47%|████▋     | 9/19 [01:17<01:23,  8.33s/it][A

402R-00066



 53%|█████▎    | 10/19 [01:24<01:12,  8.05s/it][A

402R-00362



 58%|█████▊    | 11/19 [01:28<00:54,  6.77s/it][A

402-00390



 63%|██████▎   | 12/19 [01:44<01:06,  9.48s/it][A

402-01201



 68%|██████▊   | 13/19 [02:03<01:14, 12.48s/it][A

402R-00781



 74%|███████▎  | 14/19 [02:15<01:01, 12.38s/it][A

402R-00505



 79%|███████▉  | 15/19 [02:20<00:39,  9.86s/it][A

402R-00487



 84%|████████▍ | 16/19 [02:23<00:24,  8.00s/it][A

402R-00242



 89%|████████▉ | 17/19 [02:27<00:13,  6.69s/it][A

402R-00246



 95%|█████████▍| 18/19 [02:31<00:05,  5.97s/it][A

402R-00571



100%|██████████| 19/19 [02:35<00:00,  8.18s/it][A
 95%|█████████▌| 59/62 [23:26<02:45, 55.22s/it]

UHHC



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000817-LIC



 50%|█████     | 1/2 [00:06<00:06,  6.16s/it][A

C9-0000082-LIC



100%|██████████| 2/2 [00:15<00:00,  7.97s/it][A
 97%|█████████▋| 60/62 [23:42<01:26, 43.44s/it]

VS



  0%|          | 0/2 [00:00<?, ?it/s][A

402-00840



 50%|█████     | 1/2 [00:06<00:06,  6.47s/it][A

402R-00545



100%|██████████| 2/2 [00:29<00:00, 14.83s/it][A
 98%|█████████▊| 61/62 [24:11<00:39, 39.31s/it]

VOY



  0%|          | 0/3 [00:00<?, ?it/s][A

C12-0000030-LIC



 33%|███▎      | 1/3 [00:05<00:11,  5.67s/it][A

C10-0000802-LIC



 67%|██████▋   | 2/3 [00:11<00:05,  5.95s/it][A

C12-0000159-LIC



100%|██████████| 3/3 [00:16<00:00,  5.44s/it][A
100%|██████████| 62/62 [24:28<00:00, 23.68s/it]


In [None]:
# def calculate_cogs(company_identifier,transfer_packages_start_date,sales_transactrions_start_date,map_df):
#     # setup
#     df_summary_simp_list = {}
#     # read data
#     df_in = 
#     df_in,df_sales_deduped,bad_dl,unknown_transfer,rwp,miss_incoming,miss_receipts = data_quality_checks.run(company_identifier,transfer_packages_start_date,sales_transactrions_start_date)    
#     licenses = map_df[map_df['company_identifier'] == company_identifier]['lic_list'].to_list()[0]
#     print(licenses)
#     # loop thru locations
#     for l in tqdm(licenses):
#         print(l)
#         df_in_l = df_in[df_in['license_number'] == l]
#         df_sales_deduped_l = df_sales_deduped[df_sales_deduped['license_number'] == l]
#         df_in_l['per_unit_incoming'] = df_in_l['shipper_wholesale_price'] / df_in_l['shipped_quantity']
#         df_in_l = df_in_l[df_in_l['per_unit_incoming'] <= 10000]
#         df_sales_l = df_sales_deduped_l
#         df_summary_simp,df_cogs_average_product = cogs_analysis(df_in_l,df_sales_l,'monthly')
#         df_summary_simp.index = df_summary_simp.date 
#         # tax treatment
#         df_summary_simp['revenue_after_tax'] = df_summary_simp['revenue'] * 1.15
#         df_summary_simp['cogs_after_tax'] = df_summary_simp['cogs'] * 1.27
#         df_summary_simp['margin_$_after_tax'] = df_summary_simp['revenue_after_tax'] - df_summary_simp['cogs_after_tax']
#         df_summary_simp['margin_%_after_tax'] = df_summary_simp['margin_$_after_tax'] / df_summary_simp['revenue_after_tax']
        
#         #rolling gm %
#         df_summary_simp['gm_past_quarter_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(3).mean().values
#         df_summary_simp['gm_past_2quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(6).mean().values
#         df_summary_simp['gm_past_3quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(9).mean().values
        
#         #rolling gm $
#         df_summary_simp['gm$_past_quarter_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(3).mean().values
#         df_summary_simp['gm$_past_2quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(6).mean().values
#         df_summary_simp['gm$_past_3quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(9).mean().values
#         df_summary_simp_list[l] = df_summary_simp
     
#     return df_summary_simp_list
    

In [None]:
# def get_ca_gmv_change_bm_list(company_list,transfer_packages_start_date,sales_transactrions_start_date):
#     cogs_df_list = {}
#     for c in tqdm(company_list):
#         c
#         cogs_df = calculate_ca_gmv_change([c],transfer_packages_start_date,sales_transactrions_start_date,ca_df_clean)
#         cogs_df_list[c] = cogs_df
#     return cogs_df_list
    

In [None]:
#test out
#a,b = get_ca_gmv_change_bm_list(list(ca_df_clean['company_identifier'])[0:2],TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,CURRENT_MONTH)


# cogs & coverage

In [15]:
df = pd.DataFrame()
for k in a.keys():
    print(k)
    for l in a[k].keys():
        print(l)
        data = a[k][l]
        data['company'] = k
        data['location'] = l
        print(data.shape)
        df = df.append(data)
        #data.to_excel('ca_location_analysis/'+'analysis_' + k + '_'+ l+'.xlsx')

99HT
C10-0000279-LIC
(16, 24)
ALT
PC-000310
(15, 24)
AU-R-000380
(14, 24)
BS
PC-000185
(28, 24)
AU-R-000156
(28, 24)
BMC
402-00930
(28, 24)
BBF
MR281525
(22, 24)
BUD
C9-0000464-LIC
(5, 24)
C9-0000467-LIC
(4, 24)
C9-0000444-LIC
(4, 24)
C9-0000167-LIC
(11, 24)
C9-0000399-LIC
(12, 24)
CHO
AU-R-000575
(6, 24)
CG
C9-0000157-LIC
(26, 24)
CPA
C10-0000461-LIC
(27, 24)
C9-0000348-LIC
(17, 24)
CPC
C9-0000056-LIC
(19, 24)
CSC
C10-0000670-LIC
(28, 24)
C10-0000918-LIC
(5, 24)
CCC
C12-0000087-LIC
(28, 24)
CC
C10-0000758-LIC
(14, 24)
DWF
402R-00804
(20, 24)
DCO
050-10052885D4C
(26, 24)
050-10070593E9E
(27, 24)
DL
MR283369
(10, 24)
DGG
C9-0000016-LIC
(28, 24)
DG
PC-000485
(16, 24)
AU-R-000287
(16, 24)
AU-R-000359
(16, 24)
AU-R-000422
(13, 24)
AU-R-000470
(10, 24)
AU-R-000572
(6, 24)
EL
C9-0000370-LIC
(20, 24)
EMA
C10-0000774-LIC
(16, 24)
EMF
C10-0000939-LIC
(3, 24)
EMM
C10-0000786-LIC
(14, 24)
EMT
C10-0000695-LIC
(23, 24)
EH
MR283073
(6, 24)
FW
402-00558
(28, 24)
402R-00017
(28, 24)
402-00891
(28, 24)

In [16]:
len(df['company'].unique())

62

In [17]:
df['id'] = list(zip(df.company, df.location))
df = df.reset_index(drop = True)

In [18]:
len(df['id'].unique())

131

In [19]:
#df.to_csv('all_metrc_cogs_0316.csv')

In [20]:
df.tail()

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id
2446,2021-12,32715.38,12198.903894,20516.476106,0.62712,1062.0,129.0,933,0.919481,37622.687,15492.607945,22130.079055,0.588211,0.67801,,,0.644411,,,16567.283526,,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
2447,2022-01,24339.01,11123.403847,13215.606153,0.54298,831.0,43.0,788,0.936866,27989.8615,14126.722885,13863.138615,0.495291,0.626175,,,0.587167,,,16873.76507,,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
2448,2022-02,24450.82,10410.082937,14040.737063,0.574244,796.0,37.0,759,0.953293,28118.443,13220.805331,14897.637669,0.529817,0.581448,0.605722,,0.537773,0.56458,,16963.618446,14587.000533,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
2449,2022-03,27920.68,11465.0723,16455.6077,0.58937,933.0,54.0,879,0.947208,32108.782,14560.641821,17548.140179,0.546522,0.568865,0.623437,,0.523877,0.584144,,15436.305488,16001.794507,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
2450,2022-04,5013.7,1757.345833,3256.354167,0.649491,168.0,8.0,160,0.94382,5765.755,2231.829208,3533.925792,0.612916,0.604368,0.615272,,0.563085,0.575126,,11993.234547,14433.499808,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"


In [21]:
df_with_state = pd.merge(df,ca_df_clean[['company_identifier','state']],left_on = 'company',right_on = 'company_identifier',how = 'left')



In [22]:
df_with_state

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
0,2021-01,291646.25,69798.326998,221847.923002,0.760675,4808.0,2003.0,2805,0.696005,335393.1875,88643.875287,246749.312213,0.735702,,,,,,,,,,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
1,2021-02,262906.73,85244.682175,177662.047825,0.675761,5464.0,377.0,5087,0.948611,302342.7395,108260.746362,194081.993138,0.641927,,,,,,,,,,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
2,2021-03,315154.72,99796.521792,215358.198208,0.683341,6902.0,159.0,6743,0.951475,362427.9280,126741.582676,235686.345324,0.650299,0.706592,,,0.675976,,,225505.883559,,,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
3,2021-04,329925.48,100825.215287,229100.264713,0.694400,7081.0,112.0,6969,0.931465,379414.3020,128048.023414,251366.278586,0.662511,0.684501,,,0.651579,,,227044.872350,,,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
4,2021-05,336793.80,98971.770002,237822.029998,0.706135,7286.0,131.0,7155,0.918326,387312.8700,125694.147903,261618.722097,0.675471,0.694626,,,0.662760,,,249557.115336,,,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2446,2021-12,32715.38,12198.903894,20516.476106,0.627120,1062.0,129.0,933,0.919481,37622.6870,15492.607945,22130.079055,0.588211,0.678010,,,0.644411,,,16567.283526,,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA
2447,2022-01,24339.01,11123.403847,13215.606153,0.542980,831.0,43.0,788,0.936866,27989.8615,14126.722885,13863.138615,0.495291,0.626175,,,0.587167,,,16873.765070,,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA
2448,2022-02,24450.82,10410.082937,14040.737063,0.574244,796.0,37.0,759,0.953293,28118.4430,13220.805331,14897.637669,0.529817,0.581448,0.605722,,0.537773,0.564580,,16963.618446,14587.000533,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA
2449,2022-03,27920.68,11465.072300,16455.607700,0.589370,933.0,54.0,879,0.947208,32108.7820,14560.641821,17548.140179,0.546522,0.568865,0.623437,,0.523877,0.584144,,15436.305488,16001.794507,,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA


In [23]:
df_with_state.to_csv('all_metrc_gm_0408.csv')

In [None]:
state_count = df_with_state[['state','id','date']].groupby(['state','date']).count().unstack().T.reset_index()

In [None]:
state_count

In [None]:
state_avg = df_with_state[['state','coverage','date']].groupby(['state','date']).mean().unstack().T.reset_index()
state_avg.index = pd.to_datetime(state_avg['date'])

ax = state_avg.plot(kind='bar', stacked=False,figsize = (12,9))
ax.legend(loc = 2)

In [None]:
state_avg

# inventory & coverage

In [None]:
today = date.today()
today

In [None]:
def read_inv_data(COMPANY_IDENTIFIER,license_numbers):
    
    company_inventory_packages_query = create_queries.create_company_inventory_packages_query(
    COMPANY_IDENTIFIER,
    include_quantity_zero=True,
    license_numbers=license_numbers,
    )
    company_inventory_packages_dataframe = pd.read_sql_query(company_inventory_packages_query, engine)
    return company_inventory_packages_dataframe


In [None]:
def calculate_inventory_valuation(incoming_transfer_df,inventory_df,license_list):
    # legal name
    legal_name = incoming_transfer_df[incoming_transfer_df['license_number'].isin(license_list)]['recipient_facility_name'].values[0]
    # process df_in and df_sales
    #sales_df['per_unit'] = sales_df['tx_total_price'] / sales_df['tx_quantity_sold']
    #sales_df['year_month'] = sales_df['sales_datetime'].dt.strftime("%Y-%m")
    incoming_transfer_df['per_unit_incoming'] = incoming_transfer_df['shipper_wholesale_price'] / incoming_transfer_df['shipped_quantity']
    incoming_transfer_df_price = incoming_transfer_df[incoming_transfer_df['shipper_wholesale_price'].notnull()]
    # by package id
    average_incoming_package_id = incoming_transfer_df_price.groupby(['package_id'])['per_unit_incoming'].mean()
    df_avg_incoming_price = pd.Series(average_incoming_package_id).to_frame()
    df_avg_incoming_price = df_avg_incoming_price.reset_index()
    # by product
    average_incoming_product = incoming_transfer_df_price.groupby(['product_name'])['per_unit_incoming'].mean()
    df_avg_product = pd.Series(average_incoming_product).to_frame()
    df_avg_product = df_avg_product.reset_index()
    df_avg_product.rename(columns={'per_unit_incoming':'per_unit_product'}, inplace=True)
    #calculate inventory
    df_inventory_incoming = pd.merge(inventory_df, df_avg_incoming_price, left_on=['package_id'],right_on = ['package_id'], how='left')
    df_inventory_incoming.replace([numpy.inf], numpy.nan, inplace=True)
    df_inv_null = df_inventory_incoming[df_inventory_incoming['per_unit_incoming'].isnull()]
    df_inv_product = pd.merge(df_inv_null, df_avg_product, left_on=['product_name'],right_on = ['product_name'], how='left')
    df_inv_product.replace([numpy.inf], numpy.nan, inplace=True)
    df_inv_product_price = df_inv_product[df_inv_product['per_unit_product'].notnull()]
    df_inv_product_price['total_price'] = df_inv_product_price['quantity'] * df_inv_product_price['per_unit_product']
    
    inventory_product_value = df_inv_product_price['total_price'].sum()
    df_inventory_incoming['total_price'] = df_inventory_incoming['quantity'] * df_inventory_incoming['per_unit_incoming']
    inventory_value = df_inventory_incoming['total_price'].sum()
    total_inv_value = inventory_product_value + inventory_value
    inv_count_product = df_inv_product_price['per_unit_product'].count()
    inv_count_incoming = df_inventory_incoming['per_unit_incoming'].count()
    inv_count_total = df_inventory_incoming['quantity'].count()
    inv_total_incoming = inv_count_product + inv_count_incoming
    inventory_coverage = inv_total_incoming / inv_count_total
    # prepare data
    data = [[today], 
            [total_inv_value], 
            [inv_total_incoming], 
            [inv_count_total], 
            [inventory_coverage],
            [license_list],
            [legal_name]]
    df_inventory_license = pd.DataFrame(data).T
    df_inventory_license.columns = ['date','value','total_incoming','total','coverage','license','legal_name']
    return df_inventory_license
    

In [None]:
def get_inv_list(company_list,transfer_packages_start_date,sales_transactrions_start_date,map_df):
    inv_df_list = {}
    for c in tqdm(company_list):
        inv_list = {}
        print(c)
        l_list = map_df[map_df['company_identifier'] == c]['lic_list'].to_list()[0]
        for l in tqdm(l_list):
            print(l)
            df_in = read_df_in([c],[l])
            #df_sales = read_df_sales([c],[l])
            df_inv = read_inv_data([c],[l])
            if df_inv.shape[0] == 0:
                inv_list[l] = numpy.nan
                continue
            else:
                inv = calculate_inventory_valuation(df_in,df_inv,[l])
                inv_list[l] = inv
        inv_df_list[c] =  inv_list
    return inv_df_list

    

    

In [None]:
b = get_inv_list(list(ca_df_clean['company_identifier']),TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,ca_df_clean)




In [None]:
b.keys()

In [None]:
for k in b.keys():
    print(k)
    for l in b[k].keys():
        print(l)

In [None]:
str(b['QR']['AU-R-000233'])

In [None]:
df_inv = pd.DataFrame()
for k in b.keys():
    print(k)
    for l in b[k].keys():
        print(l)
        data_inv = b[k][l]
        if str(data_inv) == 'nan':
            continue
        else:
            data_inv['company'] = k
            data_inv['location'] = l
            print(data_inv.shape)
            df_inv = df_inv.append(data_inv)
        #data.to_excel('ca_location_analysis/'+'analysis_' + k + '_'+ l+'.xlsx')

In [None]:
#df_inv.to_csv('all_metrc_inv_0316.csv')



In [None]:
len(df_inv['company'].unique())

In [None]:
df_inv['id'] = list(zip(df_inv.company, df_inv.location))
df_inv = df_inv.reset_index(drop = True)

In [None]:
len(df_inv['id'].unique())

In [None]:
df_inv_with_state = pd.merge(df_inv,ca_df_clean[['company_identifier','state']],left_on = 'company',right_on = 'company_identifier',how = 'left')



In [None]:
#df_inv_with_state.to_csv('all_metrc_inv_0317.csv')

In [None]:
df_inv_with_state['coverage'] = df_inv_with_state['coverage'].astype('float')

In [None]:
state_count_inv = df_inv_with_state[['state','id']].groupby(['state']).count().unstack().T.reset_index()
state_avg_inv = df_inv_with_state[['state','coverage']].groupby(['state']).mean().unstack().T.reset_index()
state_med_inv = df_inv_with_state[['state','coverage']].groupby(['state']).median().unstack().T.reset_index()



In [None]:
state_count_inv

In [None]:
state_avg_inv

In [None]:
state_med_inv

In [None]:
co_inv = df_inv_with_state[df_inv_with_state['state'] == 'MI'][['id','coverage']]
co_inv.index = co_inv.id

In [None]:
co_inv

In [None]:
co_inv.plot(kind = 'bar',figsize = (15,7))

# Vendor Churn

In [None]:
WINDOW = 4
VC_START_DATE = '2020-01-01'
VC_END_DATE = '2022-03-01'
VC_MONTH_LIST = ['2021-11-01','2021-12-01','2022-01-01','2022-02-01']
VC_MONTH_END = VC_MONTH_LIST[-1]

In [None]:
def calculate_vendor_churn(incoming_transfer_df,license_list,vc_windown,vc_start_date,vc_end_date,vc_month_list,vc_month_end):
    df_vendor_churn = incoming_transfer_df[incoming_transfer_df['license_number'].isin(license_list)]
    df_vendor_churn['year_month'] = pd.to_datetime(df_vendor_churn['created_date']).dt.strftime("%Y-%m")
    vc = df_vendor_churn[['year_month','shipper_facility_name','shipper_wholesale_price']].groupby(['year_month','shipper_facility_name']).sum().reset_index()
    vc= vc.assign(year_month=lambda df:pd.to_datetime(df['year_month']))

    vc_full = (
        vc
        .groupby('shipper_facility_name').apply(
            lambda df: df.merge(
            pd.Series(
                None, 
                index = pd.date_range(start = vc_start_date, end = vc_end_date, freq = 'MS'), 
                name='__place_holder'
            ), 
            how ='right',
            left_on = 'year_month', 
            right_index=True,
        ).assign(
            **{
                "shipper_facility_name": lambda df_: df_.shipper_facility_name.dropna().unique()[0],
                "shipper_wholesale_price": lambda df_: df_.shipper_wholesale_price.fillna(0),
            }
        )
        .drop('__place_holder', axis=1)
        )
    ).reset_index(drop=True)
    
    rolling_4m_sum =  vc_full.groupby('shipper_facility_name').apply(
    lambda df: df.set_index('year_month').sort_index().rolling(vc_windown).sum()
    )
    rolling_4m_sum.columns = ['rolling_4m_total_price']
    facility_monthly_running_total = vc_full.groupby('shipper_facility_name').apply(
        lambda df: df.set_index('year_month').sort_index()['shipper_wholesale_price'].cumsum().to_frame()
    )
    facility_monthly_running_total.columns = ['facility_running_total']
    monthly_running_total = facility_monthly_running_total.reset_index().groupby('year_month')['facility_running_total'].sum().to_frame()
    monthly_running_total.columns = ['monthly_running_total']

    vc_result = rolling_4m_sum.merge(
        facility_monthly_running_total,
        how='inner',
        left_index=True,
        right_index=True
    ).reset_index().merge(
        monthly_running_total,
        how='left',
        on = 'year_month'
    )
    vc_result['%_total'] = vc_result['facility_running_total'] / vc_result['monthly_running_total']
    vc_result['last_4m_active'] = vc_result['rolling_4m_total_price'] > 0
    vc_result ['significant'] = vc_result['%_total'] > 0.001
    vc_result['measure'] = vc_result.apply(
    lambda row: "Active"  if (row['last_4m_active'] & row['significant']) else ("Inactive" if row['significant'] else "Exclude"),
    axis=1
    )
    #churn
    churn = vc_result.groupby(['year_month']).apply(lambda x: x[x['measure'] == 'Inactive']['%_total'].sum()).reset_index()
    churn.columns = ['year_month','%_inactive']
    churn.index = churn.year_month
    
    # output vendor churn matrix
    vc_data = vc_full[vc_full['year_month'] <= VC_MONTH_END]
    vc_data['year_month'] = vc_data['year_month'].astype(str)
    vc_matrix = pd.pivot_table(vc_data, values='shipper_wholesale_price', index='shipper_facility_name',
    columns='year_month', fill_value=0).reset_index()
    vc_matrix['facility_total'] = vc_matrix.sum(axis= 1)
    vc_matrix['grand_total'] = vc_matrix['facility_total'].sum()
    vc_matrix['perc_total'] = vc_matrix['facility_total'] / vc_matrix['grand_total']
    vc_matrix['last_4m_total'] = vc_matrix[VC_MONTH_LIST].sum(axis = 1)
    vc_matrix['last_4m_active'] = vc_matrix['last_4m_total'] > 0
    vc_matrix ['significant'] = vc_matrix['perc_total'] > 0.001
    vc_matrix['measure'] = vc_matrix.apply(
        lambda row: "Active"  if (row['last_4m_active'] & row['significant']) else ("Inactive" if row['significant'] else "Exclude"),
        axis=1
    )
    return churn
    


In [None]:
def get_vc_list(company_list,transfer_packages_start_date,sales_transactrions_start_date,map_df):
    vc_df_list = {}
    for c in tqdm(company_list):
        vc_list = {}
        print(c)
        l_list = map_df[map_df['company_identifier'] == c]['lic_list'].to_list()[0]
        for l in tqdm(l_list):
            print(l)
            df_in = read_df_in([c],[l])
            #df_sales = read_df_sales([c],[l])
            #df_inv = read_inv_data([c],[l])
            if df_in.shape[0] == 0:
                vc_list[l] = numpy.nan
                continue
            else:
                vc = calculate_vendor_churn(df_in,[l],WINDOW,VC_START_DATE,VC_END_DATE,VC_MONTH_LIST,VC_MONTH_END)
                vc_list[l] = vc
        vc_df_list[c] =  vc_list
    return vc_df_list

    

    

In [None]:
c = get_vc_list(list(ca_df_clean['company_identifier']),TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,ca_df_clean)




In [None]:
df_vc = pd.DataFrame()
for k in c.keys():
    print(k)
    for l in c[k].keys():
        print(l)
        data_vc = c[k][l]
        if str(data_vc) == 'nan':
            continue
        else:
            data_vc['company'] = k
            data_vc['location'] = l
            print(data_vc.shape)
            df_vc = df_vc.append(data_vc)
        #data.to_excel('ca_location_analysis/'+'analysis_' + k + '_'+ l+'.xlsx')

In [None]:
df_vc['id'] = list(zip(df_vc.company, df_vc.location))
df_vc = df_vc.reset_index(drop = True)

In [None]:
len(df_vc['id'].unique())

In [None]:
df_vc_with_state = pd.merge(df_vc,ca_df_clean[['company_identifier','state']],left_on = 'company',right_on = 'company_identifier',how = 'left')

df_vc_with_state.to_csv('all_metrc_vc_0322.csv')




In [None]:
df_vc_with_state.to_csv('all_metrc_vc_0322.csv')

In [None]:
df_vc_with_state_feb = df_vc_with_state[df_vc_with_state['year_month'] == '2022-02-01']

In [None]:
state_count_vc = df_vc_with_state[['state','year_month','id']].groupby(['state','year_month']).count().unstack().T.reset_index()
state_avg_vc = df_vc_with_state[['state','year_month','%_inactive']].groupby(['state','year_month']).mean().unstack().T.reset_index()
state_med_vc = df_vc_with_state[['state','year_month','%_inactive']].groupby(['state','year_month']).median().unstack().T.reset_index()



In [None]:
state_count_vc


# flowhub

In [None]:
fh = pd.read_csv('flowhub_aftertax_cogs_0322.csv',index_col= 0)

In [None]:
fh

In [None]:
fh_clean = fh[['year_month','subtotalInDollars','location_id','location_state','margin_perc']]

In [None]:
fh_clean

In [None]:
df_with_state_clean = df_with_state[['date','revenue','id','state','margin_%']]

In [None]:
fh_clean.columns = df_with_state_clean.columns

In [None]:
joined = fh_clean.append(df_with_state_clean)

In [None]:
joined.head()

In [None]:
joined['date'] = pd.to_datetime(joined['date'])

In [None]:
df_mom_rev = joined.groupby('id').apply(lambda df: df.set_index('date').resample('1MS').first()['revenue'].pct_change()).reset_index()

In [None]:
df_mom_rev.columns = ['id','date','rev_change']

In [None]:
metrc_fh = df_mom_rev.merge(joined,on = ['id','date'],how = 'right')

In [None]:
fh_all = metrc_fh.loc[:3375]

In [None]:
fh_all.groupby(['state'])['revenue'].describe().reset_index()

In [None]:
fh_all[['date','state','id']].groupby(['date','state']).count().unstack().reset_index()

In [None]:
metrc_all  = metrc_fh.loc[3376:]

In [None]:
metrc_all[['date','state','id']].groupby(['date','state']).count().unstack().reset_index()

In [None]:
metrc_all.head()

In [None]:
metrc_all_trim = metrc_all[(metrc_all['margin_%'] > 0.2)&(metrc_all['margin_%'] < 0.8)]

In [None]:
metrc_all_trim.groupby(['state'])['margin_%'].describe().reset_index()

In [None]:
margin_distr = metrc_all_trim.groupby(['state','date'])['margin_%'].describe().reset_index()
margin_distr['date'] = pd.to_datetime(margin_distr['date'])

In [None]:
margin_distr 

In [None]:
import seaborn as sns

In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr, x="date", y="min", hue="state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("min", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr, x="date", y="max", hue="state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("max", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr, x="date", y="25%", hue="state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("25%", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr, x="date", y="75%", hue="state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("75%", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr, x="date", y="mean", hue="state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("mean", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr, x="date", y="50%", hue="state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("median", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
margin_distr.to_csv('fh_margin_distr.csv')

In [None]:
#metrc_fh.to_csv('metrc_fh.csv')

In [None]:
pos = pd.read_csv('pos_csv - Sheet1.csv')

In [None]:
pos['date'] = pd.to_datetime(pos['date'])

In [None]:
pos['% Margin w/o Tax'] = pos['% Margin w/o Tax'].astype('float')
pos['Revenue w/o Tax'] = pos['Revenue w/o Tax'].astype('float')


In [None]:
pos[['date','US state','Company identifier']].groupby(['date','US state']).count().unstack().reset_index().fillna(0)

In [None]:
pos_Rev_change = pos.groupby('Company identifier').apply(lambda df: df.set_index('date').resample('1MS').first()['Revenue w/o Tax'].pct_change()).reset_index()

pos_Rev_change.columns = ['Company identifier','date','rev_change']


In [None]:
pos_Rev_change

In [None]:
pos_data_all = pos_Rev_change.merge(pos,on = ['Company identifier','date'],how = 'right')

In [None]:
pos_data_all.to_csv('pos_data_all.csv')

In [None]:
margin_distr_pos = pos.groupby(['US state'])['% Margin w/o Tax'].describe().reset_index()

In [None]:
margin_distr_pos


In [None]:
margin_distr_pos = pos.groupby(['US state','date'])['% Margin w/o Tax'].describe().reset_index()
margin_distr_pos['date'] = pd.to_datetime(margin_distr_pos['date'])

In [None]:
margin_distr_pos

In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr_pos, x="date", y="min", hue="US state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("min", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr_pos, x="date", y="max", hue="US state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("max", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr_pos, x="date", y="25%", hue="US state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("25%", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr_pos, x="date", y="75%", hue="US state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("75%", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr_pos, x="date", y="mean", hue="US state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("mean", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr_pos, x="date", y="50%", hue="US state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("median", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


# GM$

In [None]:
import seaborn as sns

## 3M

In [None]:
df_high_coverage_gm_trim_3m = df_high_coverage[(df_high_coverage['gm$_past_quarter_after_tax'] > 0)&(df_high_coverage['gm$_past_quarter_after_tax'] < 500000)]




In [None]:
df_high_coverage_gm_trim_3m[['date','gm$_past_quarter_after_tax']].groupby(['date']).quantile(.75)


## 6m

In [None]:
df_high_coverage_gm_trim_6m = df_high_coverage[(df_high_coverage['gm$_past_2quarters_after_tax'] > 0)&(df_high_coverage['gm$_past_2quarters_after_tax'] < 500000)]




In [None]:
df_high_coverage_gm_trim_6m[['date','gm$_past_quarter_after_tax']].groupby(['date']).quantile(.75)

## 9M

In [None]:
df_high_coverage_gm_trim_9m = df_high_coverage[(df_high_coverage['gm$_past_3quarters_after_tax'] > 0)&(df_high_coverage['gm$_past_3quarters_after_tax'] < 500000)]




In [None]:
df_high_coverage_gm_trim_9m[['date','gm$_past_quarter_after_tax']].groupby(['date']).quantile(.75)

In [None]:
df_high_coverage_gm_trim = df_high_coverage[(df_high_coverage['margin_$_after_tax'] > 0)&(df_high_coverage['margin_$_after_tax'] < 500000)]

In [None]:
df_high_coverage['margin_$_after_tax'].quantile(0.9)

In [None]:
df_high_coverage[df_high_coverage['margin_$_after_tax'] > 800000]

In [None]:
df_high_coverage_gm_trim.shape[0] / df_high_coverage.shape[0]

In [None]:
sns.boxplot(df_high_coverage['margin_$_after_tax'])

In [None]:
df_high_coverage['margin_$_after_tax'].describe()

In [None]:
df_high_coverage['margin_$_after_tax'].quantile(0.9)

In [None]:
stats.percentileofscore(df_high_coverage['margin_$_after_tax'],270000, kind='strict')

In [None]:
plt.hist(df_high_coverage['margin_$_after_tax'],bins = 100)

In [None]:
df_high_coverage_gm_trim[['date','margin_$_after_tax']].groupby(['date']).quantile(.9)


In [None]:
df_high_coverage[['date','margin_$_after_tax']].groupby(['date']).quantile(.5)