In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys
from tqdm import tqdm

from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util

%load_ext autoreload
%autoreload 2

In [3]:
from underwriting import data_quality_checks

In [4]:
from underwriting import client_surveillance_jupyter

In [None]:
# def cogs_analysis(df_in:pd.DataFrame,df_sales:pd.DataFrame,freq: str) -> Tuple[pd.DataFrame,pd.DataFrame]:
#     df_sales['per_unit'] = df_sales['tx_total_price'] / df_sales['tx_quantity_sold']
#     # set frequency
#     if freq == 'monthly':
#         df_sales['date'] = df_sales['sales_datetime'].dt.strftime("%Y-%m")
#     elif freq == 'weekly':
#         df_sales['date'] = df_sales['sales_datetime'].dt.strftime("%Y-%W")
#         df_sales['week'] = df_sales['sales_datetime'].dt.strftime("%W")
#     # total # of trxns
#     s_total_count = df_sales.groupby('date')['tx_total_price'].count()
#     df_total_count = pd.Series(s_total_count).to_frame()
#     df_total_count = df_total_count.reset_index()
#     df_total_count.rename(columns={'tx_total_price':'total_count'}, inplace=True)
#     # revenue
#     s_revenue = df_sales.groupby('date')['tx_total_price'].sum()
#     df_revenue = pd.Series(s_revenue).to_frame()
#     df_revenue = df_revenue.reset_index()
#     df_revenue.rename(columns={'tx_total_price': 'revenue'}, inplace=True)

#     df_in['per_unit_incoming'] = df_in['shipper_wholesale_price'] / df_in['shipped_quantity']
    
#     # per unit price by package id
#     df_in_price = df_in[df_in['shipper_wholesale_price'].notnull()]
#     average_incoming_package_id = df_in_price.groupby('package_id')['per_unit_incoming'].mean()
#     df_avg_incoming_price = pd.Series(average_incoming_package_id).to_frame()
#     df_avg_incoming_price = df_avg_incoming_price.reset_index()
#     # per unit price by product name
#     average_incoming_product = df_in_price.groupby('product_name')['per_unit_incoming'].mean()
#     df_avg_product = pd.Series(average_incoming_product).to_frame()
#     df_avg_product = df_avg_product.reset_index()
#     df_avg_product.rename(columns={'per_unit_incoming':'per_unit_product'}, inplace=True)

#     # merge with (cogs by package id)
#     df_cogs_package_id = pd.merge(df_sales, df_avg_incoming_price, left_on='tx_package_id', right_on='package_id', how='left')
#     df_cogs_package_id['total_incoming'] = df_cogs_package_id['per_unit_incoming'] * df_cogs_package_id['tx_quantity_sold']
#     df_cogs_package_id.replace([numpy.inf], numpy.nan, inplace=True)
#     df_cogs_package_id_notnull = df_cogs_package_id[df_cogs_package_id['total_incoming'].notnull()]

#     # sum cogs by package id
#     s_cogs = df_cogs_package_id_notnull.groupby('date')['total_incoming'].sum()
#     df_cogs_id = pd.Series(s_cogs).to_frame()
#     df_cogs_id = df_cogs_id.reset_index()
#     # count # of trxn by package id
#     s_cogs_count = df_cogs_package_id_notnull.groupby('date')['total_incoming'].count()
#     df_cogs_count = pd.Series(s_cogs_count).to_frame()
#     df_cogs_count = df_cogs_count.reset_index()
#     df_cogs_count.rename(columns={'total_incoming':'count_incoming'}, inplace=True)
    
#     # merge with (cogs by product name)
#     df_cogs_average_product = pd.merge(df_cogs_package_id, df_avg_product, left_on='tx_product_name', right_on='product_name', how='left')
#     df_cogs_average_product['total_product'] = df_cogs_average_product['tx_quantity_sold'] * df_cogs_average_product['per_unit_product']
#     df_cogs_null = df_cogs_average_product[df_cogs_average_product['per_unit_incoming'].isnull()]
#     df_cogs_product = df_cogs_null[df_cogs_null['per_unit_product'].notnull()]
#     # sum cogs filldown by product name
#     product_sum = df_cogs_product.groupby('date')['total_product'].sum()
#     df_product_sum = pd.Series(product_sum).to_frame()
#     df_product_sum = df_product_sum.reset_index()
#     df_product_sum.rename(columns={'total_product':'product_sum'}, inplace=True)
#     # count # of trxn filldown by product name
#     product_count = df_cogs_product.groupby('date')['total_product'].count()
#     df_product_count = pd.Series(product_count).to_frame()
#     df_product_count = df_product_count.reset_index()
#     df_product_count.rename(columns={'total_product':'product_count'}, inplace=True)
#     df_cogs_product_df = pd.merge(df_product_sum, df_product_count)
    
#     # prepare summary
#     df_summary = pd.merge(df_revenue, df_cogs_product_df, how='left')
#     df_summary = pd.merge(df_summary, df_cogs_id, how='left')
#     df_summary['product_sum'] = df_summary['product_sum'].fillna(0)
#     df_summary['product_count'] = df_summary['product_count'].fillna(0)
#     # total cogs = by product id cogs + by product name cogs
#     df_summary['cogs'] = df_summary['total_incoming'] + df_summary['product_sum']
#     df_summary = pd.merge(df_summary, df_cogs_count)
#     df_summary = pd.merge(df_summary, df_total_count)
#     # total count = by package id count + by product count
#     df_summary['total_count_incoming'] = df_summary['count_incoming'] + df_summary['product_count']
#     df_summary['margin_$'] = df_summary['revenue'] - df_summary['cogs']
#     df_summary['margin_%'] = df_summary['margin_$'] / df_summary['revenue']
#     df_summary['coverage'] = df_summary['total_count_incoming'] / df_summary['total_count']
#     df_summary_simp = df_summary[['date', 'revenue', 'cogs', 'margin_$', 'margin_%', 'total_count_incoming','product_count','count_incoming', 'coverage']]
    
#     return df_summary_simp,df_cogs_average_product




In [5]:
TRANSFER_PACKAGES_START_DATE = '2020-01-01'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'
#CURRENT_MONTH = '2022-02'

In [6]:
ca_df = pd.read_csv('df_0720.csv',index_col = 0)
ca_df.columns = ['company_name','company_identifier','state','licenses']
ca_df['company_identifier'] = ca_df['company_identifier'].astype(str)

In [7]:
ca_df_clean = ca_df[~ca_df['company_identifier'].isin(['PL','WHT','GF','UR'])]
ca_df_clean.shape

(82, 4)

In [8]:
ca_df_clean['lic_list'] = [list(ca_df_clean['licenses'])[i].split(";") for i in range(len(ca_df_clean))]

In [9]:
ca_df_clean.head()

Unnamed: 0,company_name,company_identifier,state,licenses,lic_list
0,99 High Tide,99HT,CA,C10-0000279-LIC,[C10-0000279-LIC]
1,A Green Alternative Inc.,AGA,CA,C10-0000341-LIC,[C10-0000341-LIC]
2,Altum LLC,ALT,MI,PC-000310;AU-R-000380,"[PC-000310, AU-R-000380]"
3,Bella Sol,BS,MI,AU-R-000156;PC-000185,"[AU-R-000156, PC-000185]"
4,Big Medicine Cannabissary,BMC,CO,402-00930,[402-00930]


In [10]:
def read_df_in(COMPANY_IDENTIFIER,license_numbers):
    company_incoming_transfer_packages_query = create_queries.create_company_incoming_transfer_packages_query(
    COMPANY_IDENTIFIER,
    TRANSFER_PACKAGES_START_DATE,
    license_numbers=license_numbers,
    )
    company_incoming_transfer_packages_dataframe = pd.read_sql_query(company_incoming_transfer_packages_query, engine)
    return company_incoming_transfer_packages_dataframe


In [11]:
def read_df_sales(COMPANY_IDENTIFIER,license_numbers):
    query = create_queries.create_company_sales_receipts_with_transactions_query(
    COMPANY_IDENTIFIER,
    SALES_TRANSACTIONS_START_DATE,
    license_numbers=license_numbers,
    )
    company_sales_receipts_with_transactions_dataframe = pd.read_sql_query(query, engine)
    deduped_sales_receipts_with_transactions_dataframe = prepare_data.dedupe_sales_transactions(company_sales_receipts_with_transactions_dataframe)
    deduped_sales_receipts_with_transactions_dataframe['sales_datetime'] = pd.to_datetime(deduped_sales_receipts_with_transactions_dataframe['sales_datetime'])
    deduped_sales_receipts_with_transactions_dataframe['sales_month'] = deduped_sales_receipts_with_transactions_dataframe['sales_datetime'].dt.strftime('%Y-%m')
    return deduped_sales_receipts_with_transactions_dataframe


In [22]:
def get_cogs_list(company_list,transfer_packages_start_date,sales_transactrions_start_date,map_df):
    cogs_df_list = {}
    for c in tqdm(company_list):
        df_summary_simp_list = {}
        print(c)
        c_state = map_df[map_df['company_identifier'] == c]['state'].values[0]
        l_list = map_df[map_df['company_identifier'] == c]['lic_list'].to_list()[0]
        for l in tqdm(l_list):
            print(l)
            df_in = read_df_in([c],[l])
            df_sales = read_df_sales([c],[l])
            df_in['per_unit_incoming'] = df_in['shipper_wholesale_price'] / df_in['shipped_quantity']
            df_in = df_in[df_in['per_unit_incoming'] <= 10000]
            df_summary_simp,df_cogs_average_product = client_surveillance_jupyter.cogs_analysis_covered(df_in,df_sales,'monthly',c_state)
            df_summary_simp.index = df_summary_simp.date 
#             # tax treatment
#             df_summary_simp['revenue_after_tax'] = df_summary_simp['revenue'] * 1.15
#             df_summary_simp['cogs_after_tax'] = df_summary_simp['cogs'] * 1.2625
#             df_summary_simp['margin_$_after_tax'] = df_summary_simp['revenue_after_tax'] - df_summary_simp['cogs_after_tax']
#             df_summary_simp['margin_%_after_tax'] = df_summary_simp['margin_$_after_tax'] / df_summary_simp['revenue_after_tax']

#             df_summary_simp['gm_past_quarter'] = df_summary_simp[['margin_%']].rolling(3).mean().values
#             df_summary_simp['gm_past_2quarters'] = df_summary_simp[['margin_%']].rolling(6).mean().values
#             df_summary_simp['gm_past_3quarters'] = df_summary_simp[['margin_%']].rolling(9).mean().values
            
#             #rolling gm %
#             df_summary_simp['gm_past_quarter_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(3).mean().values
#             df_summary_simp['gm_past_2quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(6).mean().values
#             df_summary_simp['gm_past_3quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(9).mean().values

#             #rolling gm $
#             df_summary_simp['gm$_past_quarter_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(3).mean().values
#             df_summary_simp['gm$_past_2quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(6).mean().values
#             df_summary_simp['gm$_past_3quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(9).mean().values
            df_summary_simp_list[l] = df_summary_simp
        cogs_df_list[c] =  df_summary_simp_list
    return cogs_df_list

    

    

In [None]:
#list(ca_df_clean['company_identifier'])[0:3]

In [20]:
ca_df_clean[ca_df_clean['company_identifier'] == 'ST']['state'].values[0]


'CA'

# cogs & coverage

In [23]:
a = get_cogs_list(list(ca_df_clean['company_identifier']),TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,ca_df_clean)

  0%|          | 0/82 [00:00<?, ?it/s]

99HT



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000279-LIC



100%|██████████| 1/1 [00:07<00:00,  7.95s/it][A
  1%|          | 1/82 [00:07<10:44,  7.95s/it]

AGA



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000341-LIC



100%|██████████| 1/1 [00:21<00:00, 21.50s/it][A
  2%|▏         | 2/82 [00:29<21:13, 15.92s/it]

ALT



  0%|          | 0/2 [00:00<?, ?it/s][A

PC-000310



 50%|█████     | 1/2 [00:04<00:04,  4.72s/it][A

AU-R-000380



100%|██████████| 2/2 [00:15<00:00,  7.77s/it][A
  4%|▎         | 3/82 [00:45<20:44, 15.75s/it]

BS



  0%|          | 0/2 [00:00<?, ?it/s][A

AU-R-000156



 50%|█████     | 1/2 [00:09<00:09,  9.38s/it][A

PC-000185



100%|██████████| 2/2 [00:16<00:00,  8.08s/it][A
  5%|▍         | 4/82 [01:01<20:41, 15.91s/it]

BMC



  0%|          | 0/1 [00:00<?, ?it/s][A

402-00930



100%|██████████| 1/1 [00:06<00:00,  6.26s/it][A
  6%|▌         | 5/82 [01:07<15:57, 12.43s/it]

BBF



  0%|          | 0/1 [00:00<?, ?it/s][A

MR281525



100%|██████████| 1/1 [00:06<00:00,  6.48s/it][A
  7%|▋         | 6/82 [01:13<13:11, 10.41s/it]

BUD



  0%|          | 0/5 [00:00<?, ?it/s][A

C9-0000444-LIC



 20%|██        | 1/5 [00:05<00:21,  5.44s/it][A

C9-0000464-LIC



 40%|████      | 2/5 [00:12<00:18,  6.19s/it][A

C9-0000467-LIC



 60%|██████    | 3/5 [00:18<00:12,  6.40s/it][A

C9-0000167-LIC



 80%|████████  | 4/5 [00:30<00:08,  8.35s/it][A

C9-0000399-LIC



100%|██████████| 5/5 [00:37<00:00,  7.54s/it][A
  9%|▊         | 7/82 [01:51<24:10, 19.34s/it]

BRC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000603-LIC



100%|██████████| 1/1 [00:08<00:00,  8.95s/it][A
 10%|▉         | 8/82 [02:00<19:46, 16.03s/it]

BYN



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000528-LIC



100%|██████████| 1/1 [00:09<00:00,  9.61s/it][A
 11%|█         | 9/82 [02:10<17:03, 14.02s/it]

CHO



  0%|          | 0/1 [00:00<?, ?it/s][A

AU-R-000575



100%|██████████| 1/1 [00:05<00:00,  5.88s/it][A
 12%|█▏        | 10/82 [02:16<13:48, 11.51s/it]

CG



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000157-LIC



100%|██████████| 1/1 [00:05<00:00,  5.87s/it][A
 13%|█▎        | 11/82 [02:21<11:34,  9.78s/it]

CPA



  0%|          | 0/2 [00:00<?, ?it/s][A

C9-0000348-LIC



 50%|█████     | 1/2 [00:09<00:09,  9.85s/it][A

C10-0000461-LIC



100%|██████████| 2/2 [00:29<00:00, 14.52s/it][A
 15%|█▍        | 12/82 [02:50<18:15, 15.64s/it]

CPC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000056-LIC



100%|██████████| 1/1 [00:09<00:00,  9.01s/it][A
 16%|█▌        | 13/82 [02:59<15:40, 13.63s/it]

CSC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000670-LIC



100%|██████████| 1/1 [00:16<00:00, 16.31s/it][A
 17%|█▋        | 14/82 [03:16<16:22, 14.44s/it]

CSCC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000918-LIC



100%|██████████| 1/1 [00:07<00:00,  7.24s/it][A
 18%|█▊        | 15/82 [03:23<13:42, 12.27s/it]

CCC



  0%|          | 0/1 [00:00<?, ?it/s][A

C12-0000087-LIC



100%|██████████| 1/1 [00:18<00:00, 18.69s/it][A
 20%|█▉        | 16/82 [03:42<15:37, 14.21s/it]

CLC



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000986-LIC



 50%|█████     | 1/2 [00:03<00:03,  3.68s/it][A

C12-0000318-LIC



100%|██████████| 2/2 [00:06<00:00,  3.28s/it][A
 21%|██        | 17/82 [03:48<12:54, 11.91s/it]

CC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000758-LIC



100%|██████████| 1/1 [00:07<00:00,  7.34s/it][A
 22%|██▏       | 18/82 [03:56<11:14, 10.54s/it]

CED



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000094-LIC



100%|██████████| 1/1 [00:45<00:00, 45.26s/it][A
 23%|██▎       | 19/82 [04:41<22:00, 20.97s/it]

DWF



  0%|          | 0/2 [00:00<?, ?it/s][A

402R-00296



 50%|█████     | 1/2 [00:04<00:04,  4.79s/it][A

402R-00804



100%|██████████| 2/2 [00:10<00:00,  5.33s/it][A
 24%|██▍       | 20/82 [04:52<18:28, 17.87s/it]

DCO



  0%|          | 0/2 [00:00<?, ?it/s][A

050-10052885D4C



 50%|█████     | 1/2 [00:09<00:09,  9.88s/it][A

050-10070593E9E



100%|██████████| 2/2 [00:16<00:00,  8.12s/it][A
 26%|██▌       | 21/82 [05:08<17:40, 17.38s/it]

DL



  0%|          | 0/1 [00:00<?, ?it/s][A

MR283369



100%|██████████| 1/1 [00:08<00:00,  8.80s/it][A
 27%|██▋       | 22/82 [05:17<14:48, 14.81s/it]

DGG



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000016-LIC



100%|██████████| 1/1 [00:06<00:00,  6.02s/it][A
 28%|██▊       | 23/82 [05:23<11:58, 12.17s/it]

DG



  0%|          | 0/6 [00:00<?, ?it/s][A

AU-R-000287



 17%|█▋        | 1/6 [00:11<00:57, 11.44s/it][A

AU-R-000359



 33%|███▎      | 2/6 [00:20<00:39, 10.00s/it][A

AU-R-000422



 50%|█████     | 3/6 [00:35<00:36, 12.30s/it][A

AU-R-000470



 67%|██████▋   | 4/6 [00:42<00:20, 10.23s/it][A

AU-R-000572



 83%|████████▎ | 5/6 [00:49<00:09,  9.02s/it][A

PC-000485



100%|██████████| 6/6 [00:53<00:00,  8.98s/it][A
 29%|██▉       | 24/82 [06:16<23:51, 24.68s/it]

EL



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000370-LIC



100%|██████████| 1/1 [00:05<00:00,  5.98s/it][A
 30%|███       | 25/82 [06:22<18:06, 19.07s/it]

EMA



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000774-LIC



100%|██████████| 1/1 [00:11<00:00, 12.00s/it][A
 32%|███▏      | 26/82 [06:34<15:49, 16.95s/it]

EMF



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000939-LIC



100%|██████████| 1/1 [00:07<00:00,  7.00s/it][A
 33%|███▎      | 27/82 [06:41<12:48, 13.97s/it]

EMM



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000786-LIC



100%|██████████| 1/1 [00:12<00:00, 12.12s/it][A
 34%|███▍      | 28/82 [06:54<12:04, 13.41s/it]

EMT



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000695-LIC



100%|██████████| 1/1 [00:15<00:00, 15.55s/it][A
 35%|███▌      | 29/82 [07:09<12:24, 14.05s/it]

EPC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000034-LIC



100%|██████████| 1/1 [00:10<00:00, 10.64s/it][A
 37%|███▋      | 30/82 [07:20<11:17, 13.03s/it]

EH



  0%|          | 0/1 [00:00<?, ?it/s][A

MR283073



100%|██████████| 1/1 [00:04<00:00,  4.77s/it][A
 38%|███▊      | 31/82 [07:25<08:58, 10.55s/it]

FI



  0%|          | 0/3 [00:00<?, ?it/s][A

C10-0000606-LIC



 33%|███▎      | 1/3 [00:03<00:06,  3.44s/it][A

C10-0000753-LIC



 67%|██████▋   | 2/3 [00:10<00:05,  5.85s/it][A

C12-0000400-LIC



100%|██████████| 3/3 [00:16<00:00,  5.47s/it][A
 39%|███▉      | 32/82 [07:41<10:15, 12.31s/it]

FW



  0%|          | 0/5 [00:00<?, ?it/s][A

402-00558



 20%|██        | 1/5 [00:07<00:31,  7.91s/it][A

402-00891



 40%|████      | 2/5 [00:18<00:28,  9.57s/it][A

402R-00017



 60%|██████    | 3/5 [00:42<00:31, 15.87s/it][A

402R-00020



 80%|████████  | 4/5 [01:13<00:22, 22.12s/it][A

402R-00747



100%|██████████| 5/5 [01:29<00:00, 17.82s/it][A
 40%|████      | 33/82 [09:10<28:51, 35.34s/it]

GTC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000849-LIC



100%|██████████| 1/1 [00:11<00:00, 11.50s/it][A
 41%|████▏     | 34/82 [09:22<22:33, 28.19s/it]

GT



  0%|          | 0/2 [00:00<?, ?it/s][A

C9-0000385-LIC



 50%|█████     | 1/2 [00:11<00:11, 11.41s/it][A

C9-0000170-LIC



100%|██████████| 2/2 [00:17<00:00,  8.50s/it][A
 43%|████▎     | 35/82 [09:39<19:27, 24.84s/it]

GLNR



  0%|          | 0/6 [00:00<?, ?it/s][A

PC-000334



 17%|█▋        | 1/6 [00:04<00:20,  4.06s/it][A

PC-000613



 33%|███▎      | 2/6 [00:07<00:15,  3.95s/it][A

PC-000722



 50%|█████     | 3/6 [00:11<00:11,  3.88s/it][A

AU-R-000182



 67%|██████▋   | 4/6 [00:18<00:10,  5.14s/it][A

AU-R-000461



 83%|████████▎ | 5/6 [00:23<00:05,  5.14s/it][A

AU-R-000559



100%|██████████| 6/6 [00:28<00:00,  4.82s/it][A
 44%|████▍     | 36/82 [10:07<19:58, 26.06s/it]

GRG



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000169-LIC



 50%|█████     | 1/2 [00:27<00:27, 27.96s/it][A

C10-0000408-LIC



100%|██████████| 2/2 [00:42<00:00, 21.19s/it][A
 45%|████▌     | 37/82 [10:50<23:12, 30.95s/it]

GTR



  0%|          | 0/2 [00:00<?, ?it/s][A

PC-000225



 50%|█████     | 1/2 [00:07<00:07,  7.40s/it][A

AU-R-000163



100%|██████████| 2/2 [00:31<00:00, 15.92s/it][A
 46%|████▋     | 38/82 [11:22<22:53, 31.22s/it]

GHC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000414-LIC



100%|██████████| 1/1 [00:16<00:00, 16.80s/it][A
 48%|████▊     | 39/82 [11:39<19:16, 26.90s/it]

GFEE



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000224-LIC



100%|██████████| 1/1 [02:02<00:00, 122.10s/it][A
 49%|████▉     | 40/82 [13:41<38:49, 55.46s/it]

GFWF



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000117-LIC



100%|██████████| 1/1 [01:23<00:00, 83.95s/it][A
 50%|█████     | 41/82 [15:05<43:44, 64.01s/it]

HB



  0%|          | 0/3 [00:00<?, ?it/s][A

C10-0000624-LIC



 33%|███▎      | 1/3 [00:14<00:28, 14.06s/it][A

C10-0000301-LIC



 67%|██████▋   | 2/3 [00:19<00:08,  8.97s/it][A

C10-0000339-LIC



100%|██████████| 3/3 [00:43<00:00, 14.41s/it][A
 51%|█████     | 42/82 [15:48<38:31, 57.78s/it]

HS



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000441-LIC



100%|██████████| 1/1 [00:06<00:00,  6.96s/it][A
 52%|█████▏    | 43/82 [15:55<27:38, 42.53s/it]

HPCC



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000064-LIC



 50%|█████     | 1/2 [01:01<01:01, 61.98s/it][A

C10-0000005-LIC



100%|██████████| 2/2 [01:13<00:00, 36.96s/it][A
 54%|█████▎    | 44/82 [17:09<32:53, 51.95s/it]

HC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000341-LIC



100%|██████████| 1/1 [00:05<00:00,  5.62s/it][A
 55%|█████▍    | 45/82 [17:14<23:27, 38.05s/it]

IDC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000320-LIC



100%|██████████| 1/1 [00:10<00:00, 10.04s/it][A
 56%|█████▌    | 46/82 [17:24<17:47, 29.65s/it]

ID



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000166-LIC



100%|██████████| 1/1 [00:04<00:00,  4.68s/it][A
 57%|█████▋    | 47/82 [17:29<12:55, 22.16s/it]

KAR



  0%|          | 0/2 [00:00<?, ?it/s][A

402-00693



 50%|█████     | 1/2 [00:05<00:05,  5.34s/it][A

402R-00744



100%|██████████| 2/2 [00:10<00:00,  5.42s/it][A
 59%|█████▊    | 48/82 [17:40<10:37, 18.76s/it]

KC



  0%|          | 0/11 [00:00<?, ?it/s][A

402-00323



  9%|▉         | 1/11 [00:06<01:04,  6.43s/it][A

402-00573



 18%|█▊        | 2/11 [00:12<00:53,  5.99s/it][A

402-00664



 27%|██▋       | 3/11 [00:17<00:44,  5.56s/it][A

402R-00515



 36%|███▋      | 4/11 [00:26<00:49,  7.04s/it][A

402R-00544



 45%|████▌     | 5/11 [00:34<00:44,  7.39s/it][A

402R-00900



 55%|█████▍    | 6/11 [00:41<00:36,  7.31s/it][A

402R-00002



 64%|██████▎   | 7/11 [00:47<00:27,  6.87s/it][A

402-01146



 73%|███████▎  | 8/11 [00:51<00:17,  5.97s/it][A

402-00037



 82%|████████▏ | 9/11 [00:57<00:11,  5.79s/it][A

402R-00035



 91%|█████████ | 10/11 [01:03<00:06,  6.13s/it][A

402R-00574



100%|██████████| 11/11 [01:10<00:00,  6.45s/it][A
 60%|█████▉    | 49/82 [18:51<18:55, 34.41s/it]

LEG



  0%|          | 0/4 [00:00<?, ?it/s][A

PC-000231



 25%|██▌       | 1/4 [00:15<00:46, 15.53s/it][A

PC-000335



 50%|█████     | 2/4 [00:21<00:19,  9.80s/it][A

AU-R-000260



 75%|███████▌  | 3/4 [00:48<00:17, 17.53s/it][A

AU-R-000261



100%|██████████| 4/4 [01:24<00:00, 21.07s/it][A
 61%|██████    | 50/82 [20:15<26:19, 49.37s/it]

LBC



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000154-LIC



100%|██████████| 1/1 [00:06<00:00,  6.41s/it][A
 62%|██████▏   | 51/82 [20:21<18:50, 36.48s/it]

MSS



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000816-LIC



100%|██████████| 1/1 [00:07<00:00,  7.49s/it][A
 63%|██████▎   | 52/82 [20:29<13:53, 27.78s/it]

ML



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000300-LIC



100%|██████████| 1/1 [00:14<00:00, 14.35s/it][A
 65%|██████▍   | 53/82 [20:43<11:28, 23.75s/it]

MW



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000332-LIC



100%|██████████| 1/1 [00:20<00:00, 20.40s/it][A
 66%|██████▌   | 54/82 [21:04<10:36, 22.75s/it]

MPW



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000547-LIC



100%|██████████| 1/1 [00:29<00:00, 29.91s/it][A
 67%|██████▋   | 55/82 [21:34<11:12, 24.90s/it]

MD



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000247-LIC



100%|██████████| 1/1 [00:08<00:00,  8.40s/it][A
 68%|██████▊   | 56/82 [21:42<08:38, 19.95s/it]

ND



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000427-LIC



100%|██████████| 1/1 [00:06<00:00,  6.51s/it][A
 70%|██████▉   | 57/82 [21:49<06:37, 15.92s/it]

NECC



  0%|          | 0/1 [00:00<?, ?it/s][A

MR283416



100%|██████████| 1/1 [00:08<00:00,  8.89s/it][A
 71%|███████   | 58/82 [21:57<05:31, 13.81s/it]

PGM



  0%|          | 0/11 [00:00<?, ?it/s][A

PC-000415



  9%|▉         | 1/11 [00:06<01:09,  6.92s/it][A

PC-000551



 18%|█▊        | 2/11 [00:11<00:49,  5.49s/it][A

PC-000725



 27%|██▋       | 3/11 [00:16<00:43,  5.43s/it][A

PC-000167



 36%|███▋      | 4/11 [00:22<00:39,  5.63s/it][A

PC-000169



 45%|████▌     | 5/11 [00:27<00:32,  5.47s/it][A

AU-R-000196



 55%|█████▍    | 6/11 [00:45<00:48,  9.73s/it][A

AU-R-000197



 64%|██████▎   | 7/11 [00:59<00:43, 10.91s/it][A

AU-R-000198



 73%|███████▎  | 8/11 [01:12<00:34, 11.62s/it][A

AU-R-000366



 82%|████████▏ | 9/11 [01:22<00:22, 11.05s/it][A

AU-R-000506



 91%|█████████ | 10/11 [01:39<00:13, 13.07s/it][A

PC-000160



100%|██████████| 11/11 [01:44<00:00,  9.53s/it][A
 72%|███████▏  | 59/82 [23:42<15:46, 41.13s/it]

PRP



  0%|          | 0/3 [00:00<?, ?it/s][A

C10-0000155-LIC



 33%|███▎      | 1/3 [00:14<00:29, 14.54s/it][A

C10-0000286-LIC



 67%|██████▋   | 2/3 [00:30<00:15, 15.28s/it][A

C10-0000403-LIC



100%|██████████| 3/3 [00:52<00:00, 17.53s/it][A
 73%|███████▎  | 60/82 [24:35<16:20, 44.57s/it]

PRL



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000759-LIC



100%|██████████| 1/1 [00:16<00:00, 16.42s/it][A
 74%|███████▍  | 61/82 [24:51<12:38, 36.13s/it]

QR



  0%|          | 0/4 [00:00<?, ?it/s][A

AU-R-000233



 25%|██▌       | 1/4 [00:17<00:53, 18.00s/it][A

AU-R-000346



 50%|█████     | 2/4 [00:40<00:41, 20.81s/it][A

PC-000356



 75%|███████▌  | 3/4 [00:45<00:13, 13.33s/it][A

AU-R-000633



100%|██████████| 4/4 [00:56<00:00, 14.02s/it][A
 76%|███████▌  | 62/82 [25:47<14:02, 42.12s/it]

MC



  0%|          | 0/1 [00:00<?, ?it/s][A

050-10111574ADA



100%|██████████| 1/1 [00:07<00:00,  7.91s/it][A
 77%|███████▋  | 63/82 [25:55<10:05, 31.86s/it]

RA



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000596-LIC



100%|██████████| 1/1 [00:11<00:00, 11.18s/it][A
 78%|███████▊  | 64/82 [26:07<07:41, 25.66s/it]

RGD



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000057-LIC



100%|██████████| 1/1 [00:06<00:00,  6.66s/it][A
 79%|███████▉  | 65/82 [26:13<05:39, 19.96s/it]

SGF



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000454-LIC



100%|██████████| 1/1 [00:04<00:00,  4.98s/it][A
 80%|████████  | 66/82 [26:18<04:07, 15.46s/it]

SFV



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000041-LIC



100%|██████████| 1/1 [00:25<00:00, 25.53s/it][A
 82%|████████▏ | 67/82 [26:44<04:37, 18.48s/it]

SV



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000146-LIC



100%|██████████| 1/1 [00:10<00:00, 10.96s/it][A
 83%|████████▎ | 68/82 [26:55<03:47, 16.23s/it]

SLCC



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000474-LIC



100%|██████████| 1/1 [00:06<00:00,  6.98s/it][A
 84%|████████▍ | 69/82 [27:02<02:54, 13.45s/it]

SO



  0%|          | 0/3 [00:00<?, ?it/s][A

C12-0000328-LIC



 33%|███▎      | 1/3 [00:03<00:06,  3.25s/it][A

C9-0000130-LIC



 67%|██████▋   | 2/3 [00:10<00:05,  5.87s/it][A

C10-0000271-LIC



100%|██████████| 3/3 [00:20<00:00,  6.77s/it][A
 85%|████████▌ | 70/82 [27:22<03:06, 15.51s/it]

ST



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000323-LIC



100%|██████████| 1/1 [00:09<00:00,  9.66s/it][A
 87%|████████▋ | 71/82 [27:32<02:31, 13.76s/it]

DW



  0%|          | 0/1 [00:00<?, ?it/s][A

MR282376



100%|██████████| 1/1 [00:25<00:00, 25.52s/it][A
 88%|████████▊ | 72/82 [27:57<02:52, 17.29s/it]

FD



  0%|          | 0/1 [00:00<?, ?it/s][A

C10-0000916-LIC



100%|██████████| 1/1 [00:05<00:00,  5.95s/it][A
 89%|████████▉ | 73/82 [28:03<02:04, 13.88s/it]

TGL



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000412-LIC



100%|██████████| 1/1 [00:04<00:00,  4.64s/it][A
 90%|█████████ | 74/82 [28:08<01:28, 11.11s/it]

TJR



  0%|          | 0/1 [00:00<?, ?it/s][A

C9-0000105-LIC



100%|██████████| 1/1 [00:34<00:00, 34.16s/it][A
 91%|█████████▏| 75/82 [28:42<02:06, 18.03s/it]

TTS



  0%|          | 0/2 [00:00<?, ?it/s][A

PC-000718



 50%|█████     | 1/2 [00:03<00:03,  3.82s/it][A

AU-R-000546



100%|██████████| 2/2 [00:08<00:00,  4.41s/it][A
 93%|█████████▎| 76/82 [28:51<01:31, 15.27s/it]

TT



  0%|          | 0/2 [00:00<?, ?it/s][A

C9-0000551-LIC



 50%|█████     | 1/2 [00:07<00:07,  7.05s/it][A

C9-0000451-LIC



100%|██████████| 2/2 [00:16<00:00,  8.43s/it][A
 94%|█████████▍| 77/82 [29:08<01:18, 15.75s/it]

T4L



  0%|          | 0/1 [00:00<?, ?it/s][A

C12-0000302-LIC



100%|██████████| 1/1 [00:05<00:00,  5.67s/it][A
 95%|█████████▌| 78/82 [29:13<00:50, 12.72s/it]

TL



  0%|          | 0/19 [00:00<?, ?it/s][A

402-00390



  5%|▌         | 1/19 [00:14<04:17, 14.33s/it][A

402R-00807



 11%|█         | 2/19 [00:28<03:58, 14.01s/it][A

402-00473



 16%|█▌        | 3/19 [00:33<02:39,  9.99s/it][A

402-01201



 21%|██        | 4/19 [00:51<03:17, 13.14s/it][A

402R-00066



 26%|██▋       | 5/19 [00:58<02:31, 10.86s/it][A

402R-00487



 32%|███▏      | 6/19 [01:00<01:42,  7.90s/it][A

402R-00505



 37%|███▋      | 7/19 [01:02<01:12,  6.02s/it][A

402R-00536



 42%|████▏     | 8/19 [01:09<01:11,  6.50s/it][A

402R-00571



 47%|████▋     | 9/19 [01:12<00:51,  5.15s/it][A

402R-00602



 53%|█████▎    | 10/19 [01:22<01:01,  6.87s/it][A

402R-00808



 58%|█████▊    | 11/19 [01:25<00:44,  5.52s/it][A

402-00370



 63%|██████▎   | 12/19 [01:30<00:38,  5.52s/it][A

402-01142



 68%|██████▊   | 13/19 [01:36<00:33,  5.51s/it][A

402R-00041



 74%|███████▎  | 14/19 [01:45<00:32,  6.59s/it][A

402R-00242



 79%|███████▉  | 15/19 [01:47<00:21,  5.30s/it][A

402R-00246



 84%|████████▍ | 16/19 [01:51<00:14,  4.72s/it][A

402R-00362



 89%|████████▉ | 17/19 [01:53<00:08,  4.02s/it][A

402R-00573



 95%|█████████▍| 18/19 [02:01<00:05,  5.38s/it][A

402R-00781



100%|██████████| 19/19 [02:12<00:00,  6.97s/it][A
 96%|█████████▋| 79/82 [31:26<02:25, 48.63s/it]

UHHC



  0%|          | 0/2 [00:00<?, ?it/s][A

C10-0000817-LIC



 50%|█████     | 1/2 [00:06<00:06,  6.58s/it][A

C9-0000082-LIC



100%|██████████| 2/2 [00:16<00:00,  8.21s/it][A
 98%|█████████▊| 80/82 [31:42<01:17, 38.96s/it]

VS



  0%|          | 0/2 [00:00<?, ?it/s][A

402-00840



 50%|█████     | 1/2 [00:05<00:05,  5.46s/it][A

402R-00545



100%|██████████| 2/2 [00:30<00:00, 15.07s/it][A
 99%|█████████▉| 81/82 [32:12<00:36, 36.32s/it]

VOY



  0%|          | 0/3 [00:00<?, ?it/s][A

C12-0000030-LIC



 33%|███▎      | 1/3 [00:04<00:09,  4.54s/it][A

C10-0000802-LIC



 67%|██████▋   | 2/3 [00:10<00:05,  5.44s/it][A

C12-0000159-LIC



100%|██████████| 3/3 [00:13<00:00,  4.61s/it][A
100%|██████████| 82/82 [32:26<00:00, 23.74s/it]


In [None]:
# def calculate_cogs(company_identifier,transfer_packages_start_date,sales_transactrions_start_date,map_df):
#     # setup
#     df_summary_simp_list = {}
#     # read data
#     df_in = 
#     df_in,df_sales_deduped,bad_dl,unknown_transfer,rwp,miss_incoming,miss_receipts = data_quality_checks.run(company_identifier,transfer_packages_start_date,sales_transactrions_start_date)    
#     licenses = map_df[map_df['company_identifier'] == company_identifier]['lic_list'].to_list()[0]
#     print(licenses)
#     # loop thru locations
#     for l in tqdm(licenses):
#         print(l)
#         df_in_l = df_in[df_in['license_number'] == l]
#         df_sales_deduped_l = df_sales_deduped[df_sales_deduped['license_number'] == l]
#         df_in_l['per_unit_incoming'] = df_in_l['shipper_wholesale_price'] / df_in_l['shipped_quantity']
#         df_in_l = df_in_l[df_in_l['per_unit_incoming'] <= 10000]
#         df_sales_l = df_sales_deduped_l
#         df_summary_simp,df_cogs_average_product = cogs_analysis(df_in_l,df_sales_l,'monthly')
#         df_summary_simp.index = df_summary_simp.date 
#         # tax treatment
#         df_summary_simp['revenue_after_tax'] = df_summary_simp['revenue'] * 1.15
#         df_summary_simp['cogs_after_tax'] = df_summary_simp['cogs'] * 1.27
#         df_summary_simp['margin_$_after_tax'] = df_summary_simp['revenue_after_tax'] - df_summary_simp['cogs_after_tax']
#         df_summary_simp['margin_%_after_tax'] = df_summary_simp['margin_$_after_tax'] / df_summary_simp['revenue_after_tax']
        
#         #rolling gm %
#         df_summary_simp['gm_past_quarter_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(3).mean().values
#         df_summary_simp['gm_past_2quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(6).mean().values
#         df_summary_simp['gm_past_3quarters_after_tax'] = df_summary_simp[['margin_%_after_tax']].rolling(9).mean().values
        
#         #rolling gm $
#         df_summary_simp['gm$_past_quarter_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(3).mean().values
#         df_summary_simp['gm$_past_2quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(6).mean().values
#         df_summary_simp['gm$_past_3quarters_after_tax'] = df_summary_simp[['margin_$_after_tax']].rolling(9).mean().values
#         df_summary_simp_list[l] = df_summary_simp
     
#     return df_summary_simp_list
    

In [None]:
# def get_ca_gmv_change_bm_list(company_list,transfer_packages_start_date,sales_transactrions_start_date):
#     cogs_df_list = {}
#     for c in tqdm(company_list):
#         c
#         cogs_df = calculate_ca_gmv_change([c],transfer_packages_start_date,sales_transactrions_start_date,ca_df_clean)
#         cogs_df_list[c] = cogs_df
#     return cogs_df_list
    

In [None]:
#test out
#a,b = get_ca_gmv_change_bm_list(list(ca_df_clean['company_identifier'])[0:2],TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,CURRENT_MONTH)


# cogs & coverage

In [24]:
df = pd.DataFrame()
for k in a.keys():
    print(k)
    for l in a[k].keys():
        print(l)
        data = a[k][l]
        data['company'] = k
        data['location'] = l
        print(data.shape)
        df = df.append(data)
        #data.to_excel('ca_location_analysis/'+'analysis_' + k + '_'+ l+'.xlsx')

99HT
C10-0000279-LIC
(24, 31)
AGA
C10-0000341-LIC
(21, 31)
ALT
PC-000310
(19, 31)
AU-R-000380
(18, 31)
BS
AU-R-000156
(29, 31)
PC-000185
(29, 31)
BMC
402-00930
(31, 31)
BBF
MR281525
(26, 31)
BUD
C9-0000444-LIC
(4, 31)
C9-0000464-LIC
(5, 31)
C9-0000467-LIC
(4, 31)
C9-0000167-LIC
(15, 31)
C9-0000399-LIC
(12, 31)
BRC
C10-0000603-LIC
(20, 31)
BYN
C10-0000528-LIC
(21, 31)
CHO
AU-R-000575
(10, 31)
CG
C9-0000157-LIC
(26, 31)
CPA
C9-0000348-LIC
(21, 31)
C10-0000461-LIC
(31, 31)
CPC
C9-0000056-LIC
(19, 31)
CSC
C10-0000670-LIC
(32, 31)
CSCC
C10-0000918-LIC
(9, 31)
CCC
C12-0000087-LIC
(32, 31)
CLC
C10-0000986-LIC
(0, 31)
C12-0000318-LIC
(0, 31)
CC
C10-0000758-LIC
(18, 31)
CED
C9-0000094-LIC
(19, 31)
DWF
402R-00296
(0, 31)
402R-00804
(20, 31)
DCO
050-10052885D4C
(26, 31)
050-10070593E9E
(27, 31)
DL
MR283369
(14, 31)
DGG
C9-0000016-LIC
(32, 31)
DG
AU-R-000287
(20, 31)
AU-R-000359
(20, 31)
AU-R-000422
(17, 31)
AU-R-000470
(14, 31)
AU-R-000572
(10, 31)
PC-000485
(18, 31)
EL
C9-0000370-LIC
(24, 31)
EM

In [25]:
len(df['company'].unique())

81

In [26]:
df['id'] = list(zip(df.company, df.location))
df = df.reset_index(drop = True)

In [27]:
len(df['id'].unique())

155

In [None]:
#df.to_csv('all_metrc_cogs_0316.csv')

In [28]:
df.tail()

Unnamed: 0,date,revenue,revenue_old,cogs,margin_$,margin_$_old,margin_%,margin_%_old,total_count_incoming,product_count,count_incoming,coverage,total_count,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,sum_cogs_past_3months,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm_final,gm_past_quarter_final,gm_past_2quarters_final,gm_past_3quarters_final,revenue_change,company,location,id
3199,2022-04,41904.76,42846.56,19041.6,22863.16,23804.96,0.55,0.56,2067.0,45.0,2022,0.99,2087,48190.47,24040.02,24150.46,0.5,0.55,0.57,,41191.0,0.51,0.52,,0.5,0.51,0.52,,0.53,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
3200,2022-05,67441.46,69440.47,30294.39,37147.07,39146.08,0.55,0.56,2551.0,36.0,2515,0.98,2596,77557.68,38246.66,39311.01,0.51,0.55,0.55,0.56,61030.06,0.51,0.5,0.52,0.51,0.51,0.5,0.52,0.62,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
3201,2022-06,84026.36,89339.83,35369.94,48656.42,53969.89,0.58,0.6,2938.0,152.0,2786,0.95,3107,96630.31,44654.55,51975.77,0.54,0.56,0.55,0.57,84705.92,0.52,0.5,0.53,0.54,0.52,0.5,0.53,0.29,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
3202,2022-07,95669.58,100820.83,38943.08,56726.5,61877.75,0.59,0.61,3551.0,248.0,3303,0.95,3739,110020.02,49165.64,60854.37,0.55,0.57,0.56,0.57,104607.41,0.53,0.52,0.53,0.55,0.53,0.52,0.53,0.13,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"
3203,2022-08,84701.14,94151.47,35460.42,49240.72,58691.05,0.58,0.62,2935.0,287.0,2648,0.9,3263,97406.31,44768.78,52637.53,0.54,0.58,0.57,0.56,109773.44,0.54,0.53,0.52,0.54,0.54,0.53,0.52,-0.07,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)"


In [29]:
df_with_state = pd.merge(df,ca_df_clean[['company_identifier','state']],left_on = 'company',right_on = 'company_identifier',how = 'left')



In [30]:
df_with_state

Unnamed: 0,date,revenue,revenue_old,cogs,margin_$,margin_$_old,margin_%,margin_%_old,total_count_incoming,product_count,count_incoming,coverage,total_count,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,sum_cogs_past_3months,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm_final,gm_past_quarter_final,gm_past_2quarters_final,gm_past_3quarters_final,revenue_change,company,location,id,company_identifier,state
0,2020-07,22.00,137.00,10.00,12.00,127.00,0.55,0.93,1.0,0.0,1,0.33,3,25.30,12.62,12.67,0.50,,,,,,,,0.50,,,,,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
1,2020-08,42.00,175.25,16.00,26.00,159.25,0.62,0.91,1.0,0.0,1,0.25,4,48.30,20.20,28.10,0.58,,,,,,,,0.58,,,,0.28,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
2,2020-11,223.00,381.00,74.12,148.88,306.88,0.67,0.81,7.0,3.0,4,0.70,10,256.45,93.58,162.87,0.64,0.61,,,100.12,0.57,,,0.64,0.57,,,1.17,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
3,2020-12,10007.66,15205.81,3067.13,6940.53,12138.68,0.69,0.80,225.0,184.0,41,0.66,343,11508.81,3872.25,7636.56,0.66,0.66,,,3157.25,0.63,,,0.66,0.63,,,38.91,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
4,2021-01,246883.53,291646.25,82189.05,164694.48,209457.20,0.67,0.72,5584.0,1979.0,3605,0.81,6908,283916.06,103763.68,180152.38,0.63,0.68,,,85330.30,0.64,,,0.63,0.64,,,18.18,99HT,C10-0000279-LIC,"(99HT, C10-0000279-LIC)",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3199,2022-04,41904.76,42846.56,19041.60,22863.16,23804.96,0.55,0.56,2067.0,45.0,2022,0.99,2087,48190.47,24040.02,24150.46,0.50,0.55,0.57,,41191.00,0.51,0.52,,0.50,0.51,0.52,,0.53,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA
3200,2022-05,67441.46,69440.47,30294.39,37147.07,39146.08,0.55,0.56,2551.0,36.0,2515,0.98,2596,77557.68,38246.66,39311.01,0.51,0.55,0.55,0.56,61030.06,0.51,0.50,0.52,0.51,0.51,0.50,0.52,0.62,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA
3201,2022-06,84026.36,89339.83,35369.94,48656.42,53969.89,0.58,0.60,2938.0,152.0,2786,0.95,3107,96630.31,44654.55,51975.77,0.54,0.56,0.55,0.57,84705.92,0.52,0.50,0.53,0.54,0.52,0.50,0.53,0.29,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA
3202,2022-07,95669.58,100820.83,38943.08,56726.50,61877.75,0.59,0.61,3551.0,248.0,3303,0.95,3739,110020.02,49165.64,60854.37,0.55,0.57,0.56,0.57,104607.41,0.53,0.52,0.53,0.55,0.53,0.52,0.53,0.13,VOY,C10-0000802-LIC,"(VOY, C10-0000802-LIC)",VOY,CA


In [31]:
#df_with_state.to_csv('all_metrc_gm_0830.csv')

In [None]:
state_count = df_with_state[['state','id','date']].groupby(['state','date']).count().unstack().T.reset_index()

In [None]:
state_count

In [None]:
state_avg = df_with_state[['state','coverage','date']].groupby(['state','date']).mean().unstack().T.reset_index()
state_avg.index = pd.to_datetime(state_avg['date'])

ax = state_avg.plot(kind='bar', stacked=False,figsize = (12,9))
ax.legend(loc = 2)

In [None]:
state_avg

# inventory & coverage

In [None]:
today = date.today()
today

In [None]:
def read_inv_data(COMPANY_IDENTIFIER,license_numbers):
    
    company_inventory_packages_query = create_queries.create_company_inventory_packages_query(
    COMPANY_IDENTIFIER,
    include_quantity_zero=True,
    license_numbers=license_numbers,
    )
    company_inventory_packages_dataframe = pd.read_sql_query(company_inventory_packages_query, engine)
    return company_inventory_packages_dataframe


In [None]:
def calculate_inventory_valuation(incoming_transfer_df,inventory_df,license_list):
    # legal name
    legal_name = incoming_transfer_df[incoming_transfer_df['license_number'].isin(license_list)]['recipient_facility_name'].values[0]
    # process df_in and df_sales
    #sales_df['per_unit'] = sales_df['tx_total_price'] / sales_df['tx_quantity_sold']
    #sales_df['year_month'] = sales_df['sales_datetime'].dt.strftime("%Y-%m")
    incoming_transfer_df['per_unit_incoming'] = incoming_transfer_df['shipper_wholesale_price'] / incoming_transfer_df['shipped_quantity']
    incoming_transfer_df_price = incoming_transfer_df[incoming_transfer_df['shipper_wholesale_price'].notnull()]
    # by package id
    average_incoming_package_id = incoming_transfer_df_price.groupby(['package_id'])['per_unit_incoming'].mean()
    df_avg_incoming_price = pd.Series(average_incoming_package_id).to_frame()
    df_avg_incoming_price = df_avg_incoming_price.reset_index()
    # by product
    average_incoming_product = incoming_transfer_df_price.groupby(['product_name'])['per_unit_incoming'].mean()
    df_avg_product = pd.Series(average_incoming_product).to_frame()
    df_avg_product = df_avg_product.reset_index()
    df_avg_product.rename(columns={'per_unit_incoming':'per_unit_product'}, inplace=True)
    #calculate inventory
    df_inventory_incoming = pd.merge(inventory_df, df_avg_incoming_price, left_on=['package_id'],right_on = ['package_id'], how='left')
    df_inventory_incoming.replace([numpy.inf], numpy.nan, inplace=True)
    df_inv_null = df_inventory_incoming[df_inventory_incoming['per_unit_incoming'].isnull()]
    df_inv_product = pd.merge(df_inv_null, df_avg_product, left_on=['product_name'],right_on = ['product_name'], how='left')
    df_inv_product.replace([numpy.inf], numpy.nan, inplace=True)
    df_inv_product_price = df_inv_product[df_inv_product['per_unit_product'].notnull()]
    df_inv_product_price['total_price'] = df_inv_product_price['quantity'] * df_inv_product_price['per_unit_product']
    
    inventory_product_value = df_inv_product_price['total_price'].sum()
    df_inventory_incoming['total_price'] = df_inventory_incoming['quantity'] * df_inventory_incoming['per_unit_incoming']
    inventory_value = df_inventory_incoming['total_price'].sum()
    total_inv_value = inventory_product_value + inventory_value
    inv_count_product = df_inv_product_price['per_unit_product'].count()
    inv_count_incoming = df_inventory_incoming['per_unit_incoming'].count()
    inv_count_total = df_inventory_incoming['quantity'].count()
    inv_total_incoming = inv_count_product + inv_count_incoming
    inventory_coverage = inv_total_incoming / inv_count_total
    # prepare data
    data = [[today], 
            [total_inv_value], 
            [inv_total_incoming], 
            [inv_count_total], 
            [inventory_coverage],
            [license_list],
            [legal_name]]
    df_inventory_license = pd.DataFrame(data).T
    df_inventory_license.columns = ['date','value','total_incoming','total','coverage','license','legal_name']
    return df_inventory_license
    

In [None]:
def get_inv_list(company_list,transfer_packages_start_date,sales_transactrions_start_date,map_df):
    inv_df_list = {}
    for c in tqdm(company_list):
        inv_list = {}
        print(c)
        l_list = map_df[map_df['company_identifier'] == c]['lic_list'].to_list()[0]
        for l in tqdm(l_list):
            print(l)
            df_in = read_df_in([c],[l])
            #df_sales = read_df_sales([c],[l])
            df_inv = read_inv_data([c],[l])
            if df_inv.shape[0] == 0:
                inv_list[l] = numpy.nan
                continue
            else:
                inv = calculate_inventory_valuation(df_in,df_inv,[l])
                inv_list[l] = inv
        inv_df_list[c] =  inv_list
    return inv_df_list

    

    

In [None]:
b = get_inv_list(list(ca_df_clean['company_identifier']),TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,ca_df_clean)




In [None]:
b.keys()

In [None]:
for k in b.keys():
    print(k)
    for l in b[k].keys():
        print(l)

In [None]:
str(b['QR']['AU-R-000233'])

In [None]:
df_inv = pd.DataFrame()
for k in b.keys():
    print(k)
    for l in b[k].keys():
        print(l)
        data_inv = b[k][l]
        if str(data_inv) == 'nan':
            continue
        else:
            data_inv['company'] = k
            data_inv['location'] = l
            print(data_inv.shape)
            df_inv = df_inv.append(data_inv)
        #data.to_excel('ca_location_analysis/'+'analysis_' + k + '_'+ l+'.xlsx')

In [None]:
#df_inv.to_csv('all_metrc_inv_0316.csv')



In [None]:
len(df_inv['company'].unique())

In [None]:
df_inv['id'] = list(zip(df_inv.company, df_inv.location))
df_inv = df_inv.reset_index(drop = True)

In [None]:
len(df_inv['id'].unique())

In [None]:
df_inv_with_state = pd.merge(df_inv,ca_df_clean[['company_identifier','state']],left_on = 'company',right_on = 'company_identifier',how = 'left')



In [None]:
#df_inv_with_state.to_csv('all_metrc_inv_0317.csv')

In [None]:
df_inv_with_state['coverage'] = df_inv_with_state['coverage'].astype('float')

In [None]:
state_count_inv = df_inv_with_state[['state','id']].groupby(['state']).count().unstack().T.reset_index()
state_avg_inv = df_inv_with_state[['state','coverage']].groupby(['state']).mean().unstack().T.reset_index()
state_med_inv = df_inv_with_state[['state','coverage']].groupby(['state']).median().unstack().T.reset_index()



In [None]:
state_count_inv

In [None]:
state_avg_inv

In [None]:
state_med_inv

In [None]:
co_inv = df_inv_with_state[df_inv_with_state['state'] == 'MI'][['id','coverage']]
co_inv.index = co_inv.id

In [None]:
co_inv

In [None]:
co_inv.plot(kind = 'bar',figsize = (15,7))

# Vendor Churn

In [None]:
WINDOW = 4
VC_START_DATE = '2020-01-01'
VC_END_DATE = '2022-03-01'
VC_MONTH_LIST = ['2021-11-01','2021-12-01','2022-01-01','2022-02-01']
VC_MONTH_END = VC_MONTH_LIST[-1]

In [None]:
def calculate_vendor_churn(incoming_transfer_df,license_list,vc_windown,vc_start_date,vc_end_date,vc_month_list,vc_month_end):
    df_vendor_churn = incoming_transfer_df[incoming_transfer_df['license_number'].isin(license_list)]
    df_vendor_churn['year_month'] = pd.to_datetime(df_vendor_churn['created_date']).dt.strftime("%Y-%m")
    vc = df_vendor_churn[['year_month','shipper_facility_name','shipper_wholesale_price']].groupby(['year_month','shipper_facility_name']).sum().reset_index()
    vc= vc.assign(year_month=lambda df:pd.to_datetime(df['year_month']))

    vc_full = (
        vc
        .groupby('shipper_facility_name').apply(
            lambda df: df.merge(
            pd.Series(
                None, 
                index = pd.date_range(start = vc_start_date, end = vc_end_date, freq = 'MS'), 
                name='__place_holder'
            ), 
            how ='right',
            left_on = 'year_month', 
            right_index=True,
        ).assign(
            **{
                "shipper_facility_name": lambda df_: df_.shipper_facility_name.dropna().unique()[0],
                "shipper_wholesale_price": lambda df_: df_.shipper_wholesale_price.fillna(0),
            }
        )
        .drop('__place_holder', axis=1)
        )
    ).reset_index(drop=True)
    
    rolling_4m_sum =  vc_full.groupby('shipper_facility_name').apply(
    lambda df: df.set_index('year_month').sort_index().rolling(vc_windown).sum()
    )
    rolling_4m_sum.columns = ['rolling_4m_total_price']
    facility_monthly_running_total = vc_full.groupby('shipper_facility_name').apply(
        lambda df: df.set_index('year_month').sort_index()['shipper_wholesale_price'].cumsum().to_frame()
    )
    facility_monthly_running_total.columns = ['facility_running_total']
    monthly_running_total = facility_monthly_running_total.reset_index().groupby('year_month')['facility_running_total'].sum().to_frame()
    monthly_running_total.columns = ['monthly_running_total']

    vc_result = rolling_4m_sum.merge(
        facility_monthly_running_total,
        how='inner',
        left_index=True,
        right_index=True
    ).reset_index().merge(
        monthly_running_total,
        how='left',
        on = 'year_month'
    )
    vc_result['%_total'] = vc_result['facility_running_total'] / vc_result['monthly_running_total']
    vc_result['last_4m_active'] = vc_result['rolling_4m_total_price'] > 0
    vc_result ['significant'] = vc_result['%_total'] > 0.001
    vc_result['measure'] = vc_result.apply(
    lambda row: "Active"  if (row['last_4m_active'] & row['significant']) else ("Inactive" if row['significant'] else "Exclude"),
    axis=1
    )
    #churn
    churn = vc_result.groupby(['year_month']).apply(lambda x: x[x['measure'] == 'Inactive']['%_total'].sum()).reset_index()
    churn.columns = ['year_month','%_inactive']
    churn.index = churn.year_month
    
    # output vendor churn matrix
    vc_data = vc_full[vc_full['year_month'] <= VC_MONTH_END]
    vc_data['year_month'] = vc_data['year_month'].astype(str)
    vc_matrix = pd.pivot_table(vc_data, values='shipper_wholesale_price', index='shipper_facility_name',
    columns='year_month', fill_value=0).reset_index()
    vc_matrix['facility_total'] = vc_matrix.sum(axis= 1)
    vc_matrix['grand_total'] = vc_matrix['facility_total'].sum()
    vc_matrix['perc_total'] = vc_matrix['facility_total'] / vc_matrix['grand_total']
    vc_matrix['last_4m_total'] = vc_matrix[VC_MONTH_LIST].sum(axis = 1)
    vc_matrix['last_4m_active'] = vc_matrix['last_4m_total'] > 0
    vc_matrix ['significant'] = vc_matrix['perc_total'] > 0.001
    vc_matrix['measure'] = vc_matrix.apply(
        lambda row: "Active"  if (row['last_4m_active'] & row['significant']) else ("Inactive" if row['significant'] else "Exclude"),
        axis=1
    )
    return churn
    


In [None]:
def get_vc_list(company_list,transfer_packages_start_date,sales_transactrions_start_date,map_df):
    vc_df_list = {}
    for c in tqdm(company_list):
        vc_list = {}
        print(c)
        l_list = map_df[map_df['company_identifier'] == c]['lic_list'].to_list()[0]
        for l in tqdm(l_list):
            print(l)
            df_in = read_df_in([c],[l])
            #df_sales = read_df_sales([c],[l])
            #df_inv = read_inv_data([c],[l])
            if df_in.shape[0] == 0:
                vc_list[l] = numpy.nan
                continue
            else:
                vc = calculate_vendor_churn(df_in,[l],WINDOW,VC_START_DATE,VC_END_DATE,VC_MONTH_LIST,VC_MONTH_END)
                vc_list[l] = vc
        vc_df_list[c] =  vc_list
    return vc_df_list

    

    

In [None]:
c = get_vc_list(list(ca_df_clean['company_identifier']),TRANSFER_PACKAGES_START_DATE,SALES_TRANSACTIONS_START_DATE,ca_df_clean)




In [None]:
df_vc = pd.DataFrame()
for k in c.keys():
    print(k)
    for l in c[k].keys():
        print(l)
        data_vc = c[k][l]
        if str(data_vc) == 'nan':
            continue
        else:
            data_vc['company'] = k
            data_vc['location'] = l
            print(data_vc.shape)
            df_vc = df_vc.append(data_vc)
        #data.to_excel('ca_location_analysis/'+'analysis_' + k + '_'+ l+'.xlsx')

In [None]:
df_vc['id'] = list(zip(df_vc.company, df_vc.location))
df_vc = df_vc.reset_index(drop = True)

In [None]:
len(df_vc['id'].unique())

In [None]:
df_vc_with_state = pd.merge(df_vc,ca_df_clean[['company_identifier','state']],left_on = 'company',right_on = 'company_identifier',how = 'left')

df_vc_with_state.to_csv('all_metrc_vc_0322.csv')




In [None]:
df_vc_with_state.to_csv('all_metrc_vc_0322.csv')

In [None]:
df_vc_with_state_feb = df_vc_with_state[df_vc_with_state['year_month'] == '2022-02-01']

In [None]:
state_count_vc = df_vc_with_state[['state','year_month','id']].groupby(['state','year_month']).count().unstack().T.reset_index()
state_avg_vc = df_vc_with_state[['state','year_month','%_inactive']].groupby(['state','year_month']).mean().unstack().T.reset_index()
state_med_vc = df_vc_with_state[['state','year_month','%_inactive']].groupby(['state','year_month']).median().unstack().T.reset_index()



In [None]:
state_count_vc


# flowhub

In [None]:
fh = pd.read_csv('flowhub_aftertax_cogs_0322.csv',index_col= 0)

In [None]:
fh

In [None]:
fh_clean = fh[['year_month','subtotalInDollars','location_id','location_state','margin_perc']]

In [None]:
fh_clean

In [None]:
df_with_state_clean = df_with_state[['date','revenue','id','state','margin_%']]

In [None]:
fh_clean.columns = df_with_state_clean.columns

In [None]:
joined = fh_clean.append(df_with_state_clean)

In [None]:
joined.head()

In [None]:
joined['date'] = pd.to_datetime(joined['date'])

In [None]:
df_mom_rev = joined.groupby('id').apply(lambda df: df.set_index('date').resample('1MS').first()['revenue'].pct_change()).reset_index()

In [None]:
df_mom_rev.columns = ['id','date','rev_change']

In [None]:
metrc_fh = df_mom_rev.merge(joined,on = ['id','date'],how = 'right')

In [None]:
fh_all = metrc_fh.loc[:3375]

In [None]:
fh_all.groupby(['state'])['revenue'].describe().reset_index()

In [None]:
fh_all[['date','state','id']].groupby(['date','state']).count().unstack().reset_index()

In [None]:
metrc_all  = metrc_fh.loc[3376:]

In [None]:
metrc_all[['date','state','id']].groupby(['date','state']).count().unstack().reset_index()

In [None]:
metrc_all.head()

In [None]:
metrc_all_trim = metrc_all[(metrc_all['margin_%'] > 0.2)&(metrc_all['margin_%'] < 0.8)]

In [None]:
metrc_all_trim.groupby(['state'])['margin_%'].describe().reset_index()

In [None]:
margin_distr = metrc_all_trim.groupby(['state','date'])['margin_%'].describe().reset_index()
margin_distr['date'] = pd.to_datetime(margin_distr['date'])

In [None]:
margin_distr 

In [None]:
import seaborn as sns

In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr, x="date", y="min", hue="state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("min", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr, x="date", y="max", hue="state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("max", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr, x="date", y="25%", hue="state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("25%", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr, x="date", y="75%", hue="state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("75%", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr, x="date", y="mean", hue="state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("mean", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr, x="date", y="50%", hue="state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("median", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
margin_distr.to_csv('fh_margin_distr.csv')

In [None]:
#metrc_fh.to_csv('metrc_fh.csv')

In [None]:
pos = pd.read_csv('pos_csv - Sheet1.csv')

In [None]:
pos['date'] = pd.to_datetime(pos['date'])

In [None]:
pos['% Margin w/o Tax'] = pos['% Margin w/o Tax'].astype('float')
pos['Revenue w/o Tax'] = pos['Revenue w/o Tax'].astype('float')


In [None]:
pos[['date','US state','Company identifier']].groupby(['date','US state']).count().unstack().reset_index().fillna(0)

In [None]:
pos_Rev_change = pos.groupby('Company identifier').apply(lambda df: df.set_index('date').resample('1MS').first()['Revenue w/o Tax'].pct_change()).reset_index()

pos_Rev_change.columns = ['Company identifier','date','rev_change']


In [None]:
pos_Rev_change

In [None]:
pos_data_all = pos_Rev_change.merge(pos,on = ['Company identifier','date'],how = 'right')

In [None]:
pos_data_all.to_csv('pos_data_all.csv')

In [None]:
margin_distr_pos = pos.groupby(['US state'])['% Margin w/o Tax'].describe().reset_index()

In [None]:
margin_distr_pos


In [None]:
margin_distr_pos = pos.groupby(['US state','date'])['% Margin w/o Tax'].describe().reset_index()
margin_distr_pos['date'] = pd.to_datetime(margin_distr_pos['date'])

In [None]:
margin_distr_pos

In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr_pos, x="date", y="min", hue="US state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("min", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr_pos, x="date", y="max", hue="US state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("max", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr_pos, x="date", y="25%", hue="US state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("25%", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr_pos, x="date", y="75%", hue="US state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("75%", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


In [None]:
# trimmed pre tax
fig, ax =plt.subplots(1,2)
sns.lineplot(data=margin_distr_pos, x="date", y="mean", hue="US state",marker= 'o', markersize=9,ax=ax[0])
ax[0].legend(loc = 4)
ax[0].set_title("mean", fontsize=15)
ax[0].set_xlabel ("year month")
ax[0].set_ylabel ("margin %")
sns.lineplot(data=margin_distr_pos, x="date", y="50%", hue="US state",marker= 'o', markersize=9,ax=ax[1])
ax[1].legend(loc = 4)
ax[1].set_title("median", fontsize=15)
ax[1].set_xlabel ("year month")
ax[1].set_ylabel ("margin %")
#ax[1].set(ylim=(0.2, 1))
sns.set(rc={'figure.figsize':(20,6)})


# GM$

In [None]:
import seaborn as sns

## 3M

In [None]:
df_high_coverage_gm_trim_3m = df_high_coverage[(df_high_coverage['gm$_past_quarter_after_tax'] > 0)&(df_high_coverage['gm$_past_quarter_after_tax'] < 500000)]




In [None]:
df_high_coverage_gm_trim_3m[['date','gm$_past_quarter_after_tax']].groupby(['date']).quantile(.75)


## 6m

In [None]:
df_high_coverage_gm_trim_6m = df_high_coverage[(df_high_coverage['gm$_past_2quarters_after_tax'] > 0)&(df_high_coverage['gm$_past_2quarters_after_tax'] < 500000)]




In [None]:
df_high_coverage_gm_trim_6m[['date','gm$_past_quarter_after_tax']].groupby(['date']).quantile(.75)

## 9M

In [None]:
df_high_coverage_gm_trim_9m = df_high_coverage[(df_high_coverage['gm$_past_3quarters_after_tax'] > 0)&(df_high_coverage['gm$_past_3quarters_after_tax'] < 500000)]




In [None]:
df_high_coverage_gm_trim_9m[['date','gm$_past_quarter_after_tax']].groupby(['date']).quantile(.75)

In [None]:
df_high_coverage_gm_trim = df_high_coverage[(df_high_coverage['margin_$_after_tax'] > 0)&(df_high_coverage['margin_$_after_tax'] < 500000)]

In [None]:
df_high_coverage['margin_$_after_tax'].quantile(0.9)

In [None]:
df_high_coverage[df_high_coverage['margin_$_after_tax'] > 800000]

In [None]:
df_high_coverage_gm_trim.shape[0] / df_high_coverage.shape[0]

In [None]:
sns.boxplot(df_high_coverage['margin_$_after_tax'])

In [None]:
df_high_coverage['margin_$_after_tax'].describe()

In [None]:
df_high_coverage['margin_$_after_tax'].quantile(0.9)

In [None]:
stats.percentileofscore(df_high_coverage['margin_$_after_tax'],270000, kind='strict')

In [None]:
plt.hist(df_high_coverage['margin_$_after_tax'],bins = 100)

In [None]:
df_high_coverage_gm_trim[['date','margin_$_after_tax']].groupby(['date']).quantile(.9)


In [None]:
df_high_coverage[['date','margin_$_after_tax']].groupby(['date']).quantile(.5)