In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys

from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util

%load_ext autoreload
%autoreload 2

In [3]:
gm = pd.read_csv('all_metrc_gm_0408.csv',index_col = 0)

In [4]:
gm

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
0,2021-01,291646.25,69798.326998,221847.923002,0.760675,4808.0,2003.0,2805,0.696005,335393.1875,88643.875287,246749.312213,0.735702,,,,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
1,2021-02,262906.73,85244.682175,177662.047825,0.675761,5464.0,377.0,5087,0.948611,302342.7395,108260.746362,194081.993138,0.641927,,,,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
2,2021-03,315154.72,99796.521792,215358.198208,0.683341,6902.0,159.0,6743,0.951475,362427.9280,126741.582676,235686.345324,0.650299,0.706592,,,0.675976,,,225505.883559,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
3,2021-04,329925.48,100825.215287,229100.264713,0.694400,7081.0,112.0,6969,0.931465,379414.3020,128048.023414,251366.278586,0.662511,0.684501,,,0.651579,,,227044.872350,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
4,2021-05,336793.80,98971.770002,237822.029998,0.706135,7286.0,131.0,7155,0.918326,387312.8700,125694.147903,261618.722097,0.675471,0.694626,,,0.662760,,,249557.115336,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2446,2021-12,32715.38,12198.903894,20516.476106,0.627120,1062.0,129.0,933,0.919481,37622.6870,15492.607945,22130.079055,0.588211,0.678010,,,0.644411,,,16567.283526,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2447,2022-01,24339.01,11123.403847,13215.606153,0.542980,831.0,43.0,788,0.936866,27989.8615,14126.722885,13863.138615,0.495291,0.626175,,,0.587167,,,16873.765070,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2448,2022-02,24450.82,10410.082937,14040.737063,0.574244,796.0,37.0,759,0.953293,28118.4430,13220.805331,14897.637669,0.529817,0.581448,0.605722,,0.537773,0.564580,,16963.618446,14587.000533,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2449,2022-03,27920.68,11465.072300,16455.607700,0.589370,933.0,54.0,879,0.947208,32108.7820,14560.641821,17548.140179,0.546522,0.568865,0.623437,,0.523877,0.584144,,15436.305488,16001.794507,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA


In [5]:
gm_high_coverage = gm[gm['coverage'] >= 0.7]

In [6]:
gm_high_coverage_3m_trim = gm_high_coverage[(gm_high_coverage['gm_past_quarter'] > 0.2) & (gm_high_coverage['gm_past_quarter'] < 0.8) & (gm_high_coverage['date'] < '2022-04-01')]

gm_high_coverage_6m_trim = gm_high_coverage[(gm_high_coverage['gm_past_2quarters'] > 0.2) & (gm_high_coverage['gm_past_2quarters'] < 0.8) & (gm_high_coverage['date'] < '2022-04-01')]

gm_high_coverage_9m_trim = gm_high_coverage[(gm_high_coverage['gm_past_3quarters'] > 0.2) & (gm_high_coverage['gm_past_3quarters'] < 0.8) & (gm_high_coverage['date'] < '2022-04-01')]



In [37]:
state_count = gm_high_coverage_6m_trim[['state','id','date']].groupby(['state','date']).count().unstack().T.reset_index()
state_count



state,level_0,date,CA,CO,MA,MI,OR
0,id,2020-06,13.0,8.0,,1.0,
1,id,2020-07,15.0,9.0,,3.0,1.0
2,id,2020-08,17.0,9.0,,3.0,1.0
3,id,2020-09,18.0,10.0,,2.0,1.0
4,id,2020-10,19.0,9.0,,2.0,1.0
5,id,2020-11,20.0,9.0,1.0,1.0,1.0
6,id,2020-12,24.0,9.0,1.0,1.0,
7,id,2021-01,24.0,9.0,1.0,1.0,
8,id,2021-02,26.0,10.0,1.0,1.0,1.0
9,id,2021-03,25.0,10.0,1.0,1.0,


## CA

In [8]:
### 3m

In [9]:
gm_high_coverage_3m_trim_ca = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'CA']

In [10]:
margin_distr_3m_ca = gm_high_coverage_3m_trim_ca.groupby(['date'])['gm_past_quarter_after_tax'].describe().reset_index()
margin_distr_3m_ca['date'] = pd.to_datetime(margin_distr_3m_ca['date'])



In [None]:
#margin_distr_3m_ca[['25%','50%','75%']].mean().round(2)

In [11]:
margin_distr_3m_ca[margin_distr_3m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.44
50%    0.50
75%    0.59
dtype: float64

In [12]:
### 6m

In [13]:
gm_high_coverage_6m_trim_ca = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'CA']
margin_distr_6m_ca = gm_high_coverage_6m_trim_ca.groupby(['date'])['gm_past_2quarters_after_tax'].describe().reset_index()
margin_distr_6m_ca['date'] = pd.to_datetime(margin_distr_6m_ca['date'])

#margin_distr_6m_ca[['25%','50%','75%']].mean().round(2)



In [14]:
margin_distr_6m_ca[margin_distr_6m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.44
50%    0.50
75%    0.59
dtype: float64

In [15]:
### 9m

In [16]:
gm_high_coverage_9m_trim_ca = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'CA']
margin_distr_9m_ca = gm_high_coverage_9m_trim_ca.groupby(['date'])['gm_past_3quarters_after_tax'].describe().reset_index()
margin_distr_9m_ca['date'] = pd.to_datetime(margin_distr_9m_ca['date'])

#margin_distr_9m_ca[['25%','50%','75%']].mean().round(2)



In [17]:
margin_distr_9m_ca[margin_distr_9m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.45
50%    0.51
75%    0.59
dtype: float64

## CO

In [18]:
### 3m

In [19]:
gm_high_coverage_3m_trim_co = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'CO']

In [20]:
margin_distr_3m_co = gm_high_coverage_3m_trim_co.groupby(['date'])['gm_past_quarter'].describe().reset_index()
margin_distr_3m_co['date'] = pd.to_datetime(margin_distr_3m_co['date'])



In [21]:
margin_distr_3m_co[['25%','50%','75%']].mean().round(2)

25%    0.49
50%    0.57
75%    0.62
dtype: float64

In [22]:
margin_distr_3m_co[margin_distr_3m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.48
50%    0.57
75%    0.63
dtype: float64

In [23]:
### 6m

In [24]:
gm_high_coverage_6m_trim_co = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'CO']
margin_distr_6m_co = gm_high_coverage_6m_trim_co.groupby(['date'])['gm_past_2quarters'].describe().reset_index()
margin_distr_6m_co['date'] = pd.to_datetime(margin_distr_6m_co['date'])

margin_distr_6m_co[['25%','50%','75%']].mean().round(2)



25%    0.48
50%    0.57
75%    0.62
dtype: float64

In [25]:
margin_distr_6m_co[margin_distr_6m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.48
50%    0.57
75%    0.63
dtype: float64

In [26]:
### 9m

In [27]:
gm_high_coverage_9m_trim_co = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'CO']
margin_distr_9m_co = gm_high_coverage_9m_trim_co.groupby(['date'])['gm_past_3quarters'].describe().reset_index()
margin_distr_9m_co['date'] = pd.to_datetime(margin_distr_9m_co['date'])

margin_distr_9m_co[['25%','50%','75%']].mean().round(2)



25%    0.48
50%    0.57
75%    0.62
dtype: float64

In [28]:
margin_distr_9m_co[margin_distr_9m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.49
50%    0.57
75%    0.62
dtype: float64

## MI

In [None]:
### 3m

In [29]:
gm_high_coverage_3m_trim_mi = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'MI']
margin_distr_3m_mi = gm_high_coverage_3m_trim_mi.groupby(['date'])['gm_past_quarter'].describe().reset_index()
margin_distr_3m_mi['date'] = pd.to_datetime(margin_distr_3m_mi['date'])

margin_distr_3m_mi[['25%','50%','75%']].mean().round(2)



25%    0.46
50%    0.51
75%    0.56
dtype: float64

In [30]:
margin_distr_3m_mi[margin_distr_3m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.43
50%    0.48
75%    0.54
dtype: float64

In [31]:
### 6m

In [32]:
gm_high_coverage_6m_trim_mi = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'MI']
margin_distr_6m_mi = gm_high_coverage_6m_trim_mi.groupby(['date'])['gm_past_2quarters'].describe().reset_index()
margin_distr_6m_mi['date'] = pd.to_datetime(margin_distr_6m_mi['date'])

margin_distr_6m_mi[['25%','50%','75%']].mean().round(2)



25%    0.53
50%    0.57
75%    0.60
dtype: float64

In [33]:
margin_distr_6m_mi[margin_distr_6m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.49
50%    0.53
75%    0.57
dtype: float64

In [34]:
### 9m

In [35]:
gm_high_coverage_9m_trim_mi = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'MI']
margin_distr_9m_mi = gm_high_coverage_9m_trim_mi.groupby(['date'])['gm_past_3quarters'].describe().reset_index()
margin_distr_9m_mi['date'] = pd.to_datetime(margin_distr_9m_mi['date'])

margin_distr_9m_mi[['25%','50%','75%']].mean().round(2)



25%    0.56
50%    0.59
75%    0.60
dtype: float64

In [36]:
margin_distr_9m_mi[margin_distr_9m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.54
50%    0.56
75%    0.58
dtype: float64

## MA

In [38]:
### 3m

In [39]:
gm_high_coverage_3m_trim_ma = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'MA']
margin_distr_3m_ma = gm_high_coverage_3m_trim_ma.groupby(['date'])['gm_past_quarter'].describe().reset_index()
margin_distr_3m_ma['date'] = pd.to_datetime(margin_distr_3m_ma['date'])

margin_distr_3m_ma[['25%','50%','75%']].mean().round(2)



25%    0.53
50%    0.54
75%    0.55
dtype: float64

In [40]:
margin_distr_3m_ma[margin_distr_3m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.53
50%    0.54
75%    0.56
dtype: float64

In [41]:
### 6m

In [42]:
gm_high_coverage_6m_trim_ma = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'MA']
margin_distr_6m_ma = gm_high_coverage_6m_trim_ma.groupby(['date'])['gm_past_2quarters'].describe().reset_index()
margin_distr_6m_ma['date'] = pd.to_datetime(margin_distr_6m_ma['date'])

margin_distr_6m_ma[['25%','50%','75%']].mean().round(2)



25%    0.53
50%    0.54
75%    0.55
dtype: float64

In [43]:
margin_distr_6m_ma[margin_distr_6m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.54
50%    0.54
75%    0.56
dtype: float64

In [44]:
### 9m

In [45]:
gm_high_coverage_9m_trim_ma = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'MA']
margin_distr_9m_ma = gm_high_coverage_9m_trim_ma.groupby(['date'])['gm_past_3quarters'].describe().reset_index()
margin_distr_9m_ma['date'] = pd.to_datetime(margin_distr_9m_ma['date'])

margin_distr_9m_ma[['25%','50%','75%']].mean().round(2)



25%    0.53
50%    0.53
75%    0.55
dtype: float64

In [46]:
margin_distr_9m_ma[margin_distr_9m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.53
50%    0.53
75%    0.55
dtype: float64