In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys

from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util

%load_ext autoreload
%autoreload 2

In [5]:
data = pd.read_csv('all_metrc_gm_0526.csv',index_col=0)

In [6]:
data

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
0,2021-01,291646.25,69820.627134,221825.622866,0.760598,4810.0,2005.0,2805,0.696294,335393.1875,88672.196461,246720.991039,0.735617,,,,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
1,2021-02,262906.73,85237.307175,177669.422825,0.675789,5464.0,377.0,5087,0.948611,302342.7395,108251.380112,194091.359388,0.641958,,,,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
2,2021-03,315154.72,99796.271792,215358.448208,0.683342,6902.0,159.0,6743,0.951475,362427.9280,126741.265176,235686.662824,0.650299,0.706576,,,0.675958,,,225499.671084,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
3,2021-04,329925.48,100825.215287,229100.264713,0.694400,7081.0,112.0,6969,0.931465,379414.3020,128048.023414,251366.278586,0.662511,0.684510,,,0.651590,,,227048.100266,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
4,2021-05,336793.80,98971.770002,237822.029998,0.706135,7286.0,131.0,7155,0.918326,387312.8700,125694.147903,261618.722097,0.675471,0.694626,,,0.662761,,,249557.221169,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2676,2022-01,24339.01,11123.403847,13215.606153,0.542980,831.0,43.0,788,0.936866,27989.8615,14126.722885,13863.138615,0.495291,0.626026,,,0.587002,,,16870.717070,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2677,2022-02,24450.82,10430.082937,14020.737063,0.573426,798.0,39.0,759,0.955689,28118.4430,13246.205331,14872.237669,0.528914,0.581204,0.604904,,0.537504,0.563676,,16956.337113,14568.225700,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2678,2022-03,27920.68,11679.072300,16241.607700,0.581705,950.0,71.0,879,0.964467,32108.7820,14832.421821,17276.360179,0.538057,0.566037,0.621341,,0.520754,0.581829,,15337.245488,15937.723007,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2679,2022-04,42846.56,19011.597583,23834.962417,0.556286,2064.0,42.0,2022,0.988979,49273.5440,24144.728931,25128.815069,0.509986,0.570473,0.598249,,0.525652,0.556327,,19092.470972,17981.594021,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA


In [7]:
df_clients = [
'DL',
'DW',
'EMA',
'EMM',
'EMT',
'EMF',
'ST',
'GRG',
'EL',
'VS',
'TT',
'MD',
'DWF',
'GHC',
'SV',
'99HT',
'NECC',
'MPW',
'LBC'
]

In [8]:
data_client = data[data['company_identifier'].isin(df_clients)]

In [9]:
data_client['company_identifier'].unique()

array(['99HT', 'DWF', 'DL', 'EL', 'EMA', 'EMF', 'EMM', 'EMT', 'GRG',
       'GHC', 'LBC', 'MPW', 'MD', 'NECC', 'SV', 'ST', 'DW', 'TT', 'VS'],
      dtype=object)

In [10]:
data_client

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
0,2021-01,291646.25,69820.627134,221825.622866,0.760598,4810.0,2005.0,2805,0.696294,335393.1875,88672.196461,246720.991039,0.735617,,,,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
1,2021-02,262906.73,85237.307175,177669.422825,0.675789,5464.0,377.0,5087,0.948611,302342.7395,108251.380112,194091.359388,0.641958,,,,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
2,2021-03,315154.72,99796.271792,215358.448208,0.683342,6902.0,159.0,6743,0.951475,362427.9280,126741.265176,235686.662824,0.650299,0.706576,,,0.675958,,,225499.671084,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
3,2021-04,329925.48,100825.215287,229100.264713,0.694400,7081.0,112.0,6969,0.931465,379414.3020,128048.023414,251366.278586,0.662511,0.684510,,,0.651590,,,227048.100266,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
4,2021-05,336793.80,98971.770002,237822.029998,0.706135,7286.0,131.0,7155,0.918326,387312.8700,125694.147903,261618.722097,0.675471,0.694626,,,0.662761,,,249557.221169,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2667,2022-01,435566.37,177725.054761,257841.315239,0.591968,12801.0,0.0,12801,0.922462,500901.3255,225710.819546,275190.505954,0.549391,0.548712,0.519920,0.486326,0.501621,0.469825,0.432725,267500.302738,273004.565680,256285.182191,VS,402R-00545,"('VS', '402R-00545')",VS,CO
2668,2022-02,393248.45,215992.402057,177256.047943,0.450748,12013.0,132.0,11881,0.934864,452235.7175,274310.350612,177925.366888,0.393435,0.520338,0.511786,0.490970,0.470286,0.460842,0.437854,230143.835160,254647.974957,252081.562221,VS,402R-00545,"('VS', '402R-00545')",VS,CO
2669,2022-03,420528.44,230621.392647,189907.047353,0.451591,13015.0,139.0,12876,0.959525,483607.7060,292889.168662,190718.537338,0.394366,0.498103,0.514072,0.496658,0.445731,0.463366,0.444135,214611.470060,245849.495440,250542.216130,VS,402R-00545,"('VS', '402R-00545')",VS,CO
2670,2022-04,417415.92,191172.626430,226243.293570,0.542009,12957.0,89.0,12868,0.905071,480028.3080,242789.235566,237239.072434,0.494219,0.481450,0.515081,0.507097,0.427340,0.464481,0.455663,201960.992220,234730.647479,249323.374526,VS,402R-00545,"('VS', '402R-00545')",VS,CO


In [15]:
margin_distr = data_client.groupby(['date'])['margin_%'].describe().reset_index()
margin_distr['date'] = pd.to_datetime(margin_distr['date'])

In [16]:
margin_distr[margin_distr['date'] >= '2021-01-01']

Unnamed: 0,date,count,mean,std,min,25%,50%,75%,max
12,2021-01-01,15.0,0.588142,0.129039,0.37426,0.512239,0.625896,0.652314,0.810345
13,2021-02-01,15.0,0.573114,0.103838,0.393055,0.489807,0.599818,0.665724,0.69344
14,2021-03-01,16.0,0.556437,0.103607,0.375906,0.492289,0.559886,0.638797,0.700704
15,2021-04-01,16.0,0.559236,0.110142,0.341996,0.495757,0.576955,0.639611,0.722108
16,2021-05-01,16.0,0.568153,0.096955,0.388239,0.514654,0.580261,0.642722,0.706135
17,2021-06-01,16.0,0.559255,0.096536,0.379299,0.523839,0.56819,0.616269,0.69822
18,2021-07-01,17.0,0.555835,0.092869,0.36941,0.509048,0.568944,0.607189,0.703229
19,2021-08-01,18.0,0.541328,0.102685,0.286105,0.506818,0.540868,0.603454,0.701284
20,2021-09-01,19.0,0.544118,0.110898,0.314469,0.508234,0.558778,0.599427,0.741049
21,2021-10-01,19.0,0.557013,0.083183,0.371533,0.523709,0.588549,0.602744,0.696515


In [13]:
data_high_coverage = data[data['coverage'] >= 0.8]

In [14]:
data_high_coverage

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter,gm_past_2quarters,gm_past_3quarters,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
1,2021-02,262906.73,85237.307175,177669.422825,0.675789,5464.0,377.0,5087,0.948611,302342.7395,108251.380112,194091.359388,0.641958,,,,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
2,2021-03,315154.72,99796.271792,215358.448208,0.683342,6902.0,159.0,6743,0.951475,362427.9280,126741.265176,235686.662824,0.650299,0.706576,,,0.675958,,,225499.671084,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
3,2021-04,329925.48,100825.215287,229100.264713,0.694400,7081.0,112.0,6969,0.931465,379414.3020,128048.023414,251366.278586,0.662511,0.684510,,,0.651590,,,227048.100266,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
4,2021-05,336793.80,98971.770002,237822.029998,0.706135,7286.0,131.0,7155,0.918326,387312.8700,125694.147903,261618.722097,0.675471,0.694626,,,0.662761,,,249557.221169,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
5,2021-06,324788.69,98014.813307,226773.876693,0.698220,6860.0,114.0,6746,0.926026,373506.9935,124478.812900,249028.180600,0.666730,0.699585,0.703081,,0.668237,0.672098,,254004.393761,239752.032422,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2676,2022-01,24339.01,11123.403847,13215.606153,0.542980,831.0,43.0,788,0.936866,27989.8615,14126.722885,13863.138615,0.495291,0.626026,,,0.587002,,,16870.717070,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2677,2022-02,24450.82,10430.082937,14020.737063,0.573426,798.0,39.0,759,0.955689,28118.4430,13246.205331,14872.237669,0.528914,0.581204,0.604904,,0.537504,0.563676,,16956.337113,14568.225700,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2678,2022-03,27920.68,11679.072300,16241.607700,0.581705,950.0,71.0,879,0.964467,32108.7820,14832.421821,17276.360179,0.538057,0.566037,0.621341,,0.520754,0.581829,,15337.245488,15937.723007,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2679,2022-04,42846.56,19011.597583,23834.962417,0.556286,2064.0,42.0,2022,0.988979,49273.5440,24144.728931,25128.815069,0.509986,0.570473,0.598249,,0.525652,0.556327,,19092.470972,17981.594021,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA


In [None]:
margin_distr_high = data_high_coverage.groupby(['date'])['margin_%'].describe().reset_index()
margin_distr_high['date'] = pd.to_datetime(margin_distr_high['date'])

In [None]:
margin_distr_high[margin_distr_high['date'] >= '2021-01-01']