In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys

from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util

%load_ext autoreload
%autoreload 2

In [3]:
gm = pd.read_csv('all_metrc_gm_0830.csv',index_col = 0)

In [4]:
gm['company_identifier'].unique()

array(['99HT', 'AGA', 'ALT', 'BS', 'BMC', 'BBF', 'BUD', 'BRC', 'BYN',
       'CHO', 'CG', 'CPA', 'CPC', 'CSC', 'CSCC', 'CCC', 'CC', 'CED',
       'DWF', 'DCO', 'DL', 'DGG', 'DG', 'EL', 'EMA', 'EMF', 'EMM', 'EMT',
       'EPC', 'EH', 'FI', 'FW', 'GTC', 'GT', 'GLNR', 'GRG', 'GTR', 'GHC',
       'GFEE', 'GFWF', 'HB', 'HS', 'HPCC', 'HC', 'IDC', 'ID', 'KAR', 'KC',
       'LEG', 'LBC', 'MSS', 'ML', 'MW', 'MPW', 'MD', 'ND', 'NECC', 'PGM',
       'PRP', 'PRL', 'QR', 'MC', 'RA', 'RGD', 'SGF', 'SFV', 'SV', 'SLCC',
       'SO', 'ST', 'DW', 'FD', 'TGL', 'TJR', 'TTS', 'TT', 'T4L', 'TL',
       'UHHC', 'VS', 'VOY'], dtype=object)

In [5]:
gm_high_coverage = gm[gm['coverage'] >= 0.7]

In [7]:
gm_high_coverage_3m_trim = gm_high_coverage[(gm_high_coverage['gm_past_quarter'] > 0.2) & (gm_high_coverage['gm_past_quarter'] < 0.8) & (gm_high_coverage['date'] < '2022-07-01')]

gm_high_coverage_6m_trim = gm_high_coverage[(gm_high_coverage['gm_past_2quarters'] > 0.2) & (gm_high_coverage['gm_past_2quarters'] < 0.8) & (gm_high_coverage['date'] < '2022-07-01')]

gm_high_coverage_9m_trim = gm_high_coverage[(gm_high_coverage['gm_past_3quarters'] > 0.2) & (gm_high_coverage['gm_past_3quarters'] < 0.8) & (gm_high_coverage['date'] < '2022-07-01')]



In [77]:
state_count = gm_high_coverage_3m_trim[['state','id','date']].groupby(['state','date']).count().unstack().T.reset_index()
state_count



state,level_0,date,CA,CO,MA,MI,OR
0,id,2020-03,15.0,8.0,,,
1,id,2020-04,16.0,10.0,,1.0,1.0
2,id,2020-05,19.0,9.0,,2.0,1.0
3,id,2020-06,20.0,8.0,,2.0,
4,id,2020-07,22.0,9.0,,4.0,1.0
5,id,2020-08,24.0,9.0,1.0,3.0,1.0
6,id,2020-09,26.0,9.0,1.0,2.0,1.0
7,id,2020-10,26.0,9.0,1.0,2.0,1.0
8,id,2020-11,28.0,10.0,1.0,1.0,1.0
9,id,2020-12,27.0,10.0,1.0,2.0,


In [144]:
state_count[state_count['date'] >=  '2021-01-01']

state,level_0,date,CA,CO,MA,MI,OR
11,id,2021-02,37.0,11.0,1.0,2.0,1.0
12,id,2021-03,43.0,12.0,1.0,6.0,
13,id,2021-04,44.0,14.0,1.0,6.0,
14,id,2021-05,48.0,14.0,1.0,9.0,
15,id,2021-06,49.0,15.0,1.0,11.0,1.0
16,id,2021-07,49.0,15.0,1.0,14.0,1.0
17,id,2021-08,50.0,14.0,1.0,13.0,1.0
18,id,2021-09,51.0,13.0,2.0,17.0,1.0
19,id,2021-10,54.0,12.0,2.0,18.0,1.0
20,id,2021-11,56.0,11.0,3.0,19.0,1.0


In [None]:
gm_high_coverage_3m_trim.sort_values(by ='gm_past_3quarters_after_tax',ascending = False )

## CA

In [None]:
### 3m

In [98]:
gm_high_coverage_3m_trim_ca = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'CA']

In [17]:
margin_distr_3m_ca = gm_high_coverage_3m_trim_ca.groupby(['date'])['gm_past_quarter_final'].describe().reset_index()
margin_distr_3m_ca['date'] = pd.to_datetime(margin_distr_3m_ca['date'])



In [18]:
#margin_distr_3m_ca[['25%','50%','75%']].mean().round(2)

In [78]:
margin_distr_3m_ca[margin_distr_3m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.43
50%    0.49
75%    0.57
dtype: float64

In [86]:
gm_high_coverage_3m_trim_ca.groupby(['date'])['gm_past_quarter_final'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03,15.0,0.498667,0.090385,0.34,0.44,0.51,0.555,0.64
2020-04,16.0,0.505,0.079498,0.36,0.445,0.505,0.5525,0.64
2020-05,19.0,0.503158,0.106669,0.25,0.46,0.5,0.545,0.75
2020-06,20.0,0.5015,0.093711,0.31,0.4575,0.495,0.54,0.72
2020-07,22.0,0.495455,0.098789,0.34,0.415,0.485,0.5575,0.69
2020-08,24.0,0.4825,0.12195,0.24,0.405,0.48,0.565,0.69
2020-09,26.0,0.483077,0.129082,0.14,0.425,0.49,0.5575,0.69
2020-10,26.0,0.503462,0.109542,0.24,0.445,0.49,0.56,0.7
2020-11,28.0,0.512143,0.099679,0.34,0.445,0.51,0.5725,0.73
2020-12,27.0,0.490741,0.09907,0.32,0.43,0.47,0.555,0.68


In [130]:
gm_high_coverage_3m_trim_ca['date'] = pd.to_datetime(gm_high_coverage_3m_trim_ca['date'])
(gm_high_coverage_3m_trim_ca[gm_high_coverage_3m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.2).mean(),\
gm_high_coverage_3m_trim_ca[gm_high_coverage_3m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.4).mean(),\
 gm_high_coverage_3m_trim_ca[gm_high_coverage_3m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.7).mean())




(0.4047368421052632, 0.47231578947368424, 0.5504736842105263)

In [None]:
### 6m

In [20]:
gm_high_coverage_6m_trim_ca = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'CA']
margin_distr_6m_ca = gm_high_coverage_6m_trim_ca.groupby(['date'])['gm_past_2quarters_final'].describe().reset_index()
margin_distr_6m_ca['date'] = pd.to_datetime(margin_distr_6m_ca['date'])

#margin_distr_6m_ca[['25%','50%','75%']].mean().round(2)



In [21]:
margin_distr_6m_ca[margin_distr_6m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.43
50%    0.49
75%    0.57
dtype: float64

In [135]:
gm_high_coverage_6m_trim_ca['date'] = pd.to_datetime(gm_high_coverage_6m_trim_ca['date'])
(gm_high_coverage_6m_trim_ca[gm_high_coverage_6m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_6m_trim_ca[gm_high_coverage_6m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_6m_trim_ca[gm_high_coverage_6m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.7).mean())




(0.4081052631578947, 0.4708421052631579, 0.5500526315789473)

In [14]:
### 9m

In [22]:
gm_high_coverage_9m_trim_ca = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'CA']
margin_distr_9m_ca = gm_high_coverage_9m_trim_ca.groupby(['date'])['gm_past_3quarters_final'].describe().reset_index()
margin_distr_9m_ca['date'] = pd.to_datetime(margin_distr_9m_ca['date'])

#margin_distr_9m_ca[['25%','50%','75%']].mean().round(2)



In [23]:
margin_distr_9m_ca[margin_distr_9m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.43
50%    0.49
75%    0.57
dtype: float64

In [136]:
gm_high_coverage_9m_trim_ca['date'] = pd.to_datetime(gm_high_coverage_9m_trim_ca['date'])
(gm_high_coverage_9m_trim_ca[gm_high_coverage_9m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_9m_trim_ca[gm_high_coverage_9m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_9m_trim_ca[gm_high_coverage_9m_trim_ca['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.7).mean())




(0.4056842105263158, 0.4688421052631579, 0.5489473684210526)

## CO

In [None]:
### 3m

In [113]:
gm_high_coverage_3m_trim_co = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'CO']

In [105]:
margin_distr_3m_co = gm_high_coverage_3m_trim_co.groupby(['date'])['gm_past_quarter_final'].describe().reset_index()
margin_distr_3m_co['date'] = pd.to_datetime(margin_distr_3m_co['date'])



In [106]:
#margin_distr_3m_co[['25%','50%','75%']].mean().round(2)

In [107]:
margin_distr_3m_co[margin_distr_3m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.39
50%    0.51
75%    0.60
dtype: float64

In [112]:
gm_high_coverage_3m_trim_co.groupby(['date'])['gm_past_quarter_final'].quantile(0.25).mean()

0.4177586206896552

In [131]:
gm_high_coverage_3m_trim_co['date'] = pd.to_datetime(gm_high_coverage_3m_trim_co['date'])
(gm_high_coverage_3m_trim_co[gm_high_coverage_3m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.2).mean(),\
gm_high_coverage_3m_trim_co[gm_high_coverage_3m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.4).mean(),\
 gm_high_coverage_3m_trim_co[gm_high_coverage_3m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.7).mean())




(0.3693684210526315, 0.466, 0.5780000000000002)

In [108]:
### 6m

In [109]:
gm_high_coverage_6m_trim_co = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'CO']
margin_distr_6m_co = gm_high_coverage_6m_trim_co.groupby(['date'])['gm_past_2quarters_final'].describe().reset_index()
margin_distr_6m_co['date'] = pd.to_datetime(margin_distr_6m_co['date'])

#margin_distr_6m_co[['25%','50%','75%']].mean().round(2)



In [110]:
margin_distr_6m_co[margin_distr_6m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.41
50%    0.52
75%    0.60
dtype: float64

In [137]:
gm_high_coverage_6m_trim_co['date'] = pd.to_datetime(gm_high_coverage_6m_trim_co['date'])
(gm_high_coverage_6m_trim_co[gm_high_coverage_6m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_6m_trim_co[gm_high_coverage_6m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_6m_trim_co[gm_high_coverage_6m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.7).mean())




(0.3794736842105263, 0.4752631578947367, 0.5785789473684211)

In [59]:
### 9m

In [60]:
gm_high_coverage_9m_trim_co = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'CO']
margin_distr_9m_co = gm_high_coverage_9m_trim_co.groupby(['date'])['gm_past_3quarters_final'].describe().reset_index()
margin_distr_9m_co['date'] = pd.to_datetime(margin_distr_9m_co['date'])

margin_distr_9m_co[['25%','50%','75%']].mean().round(2)



25%    0.43
50%    0.52
75%    0.59
dtype: float64

In [61]:
margin_distr_9m_co[margin_distr_9m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.42
50%    0.52
75%    0.60
dtype: float64

In [138]:
gm_high_coverage_9m_trim_co['date'] = pd.to_datetime(gm_high_coverage_9m_trim_co['date'])
(gm_high_coverage_9m_trim_co[gm_high_coverage_9m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_9m_trim_co[gm_high_coverage_9m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_9m_trim_co[gm_high_coverage_9m_trim_co['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.7).mean())




(0.3910526315789474, 0.48852631578947375, 0.5812631578947367)

## MI

In [None]:
### 3m

In [62]:
gm_high_coverage_3m_trim_mi = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'MI']
margin_distr_3m_mi = gm_high_coverage_3m_trim_mi.groupby(['date'])['gm_past_quarter_final'].describe().reset_index()
margin_distr_3m_mi['date'] = pd.to_datetime(margin_distr_3m_mi['date'])

margin_distr_3m_mi[['25%','50%','75%']].mean().round(2)



25%    0.37
50%    0.41
75%    0.46
dtype: float64

In [63]:
margin_distr_3m_mi[margin_distr_3m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.34
50%    0.40
75%    0.45
dtype: float64

In [134]:
gm_high_coverage_3m_trim_mi['date'] = pd.to_datetime(gm_high_coverage_3m_trim_mi['date'])
(gm_high_coverage_3m_trim_mi[gm_high_coverage_3m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.2).mean(),\
gm_high_coverage_3m_trim_mi[gm_high_coverage_3m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.4).mean(),\
 gm_high_coverage_3m_trim_mi[gm_high_coverage_3m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.7).mean())




(0.3286315789473684, 0.3788421052631579, 0.44189473684210523)

In [None]:
### 6m

In [64]:
gm_high_coverage_6m_trim_mi = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'MI']
margin_distr_6m_mi = gm_high_coverage_6m_trim_mi.groupby(['date'])['gm_past_2quarters_final'].describe().reset_index()
margin_distr_6m_mi['date'] = pd.to_datetime(margin_distr_6m_mi['date'])

margin_distr_6m_mi[['25%','50%','75%']].mean().round(2)



25%    0.40
50%    0.43
75%    0.46
dtype: float64

In [65]:
margin_distr_6m_mi[margin_distr_6m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.37
50%    0.41
75%    0.45
dtype: float64

In [139]:
gm_high_coverage_6m_trim_mi['date'] = pd.to_datetime(gm_high_coverage_6m_trim_mi['date'])
(gm_high_coverage_6m_trim_mi[gm_high_coverage_6m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_6m_trim_mi[gm_high_coverage_6m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_6m_trim_mi[gm_high_coverage_6m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.7).mean())




(0.3647368421052632, 0.39315789473684204, 0.44294736842105253)

In [None]:
### 9m

In [66]:
gm_high_coverage_9m_trim_mi = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'MI']
margin_distr_9m_mi = gm_high_coverage_9m_trim_mi.groupby(['date'])['gm_past_3quarters_final'].describe().reset_index()
margin_distr_9m_mi['date'] = pd.to_datetime(margin_distr_9m_mi['date'])

margin_distr_9m_mi[['25%','50%','75%']].mean().round(2)



25%    0.41
50%    0.43
75%    0.46
dtype: float64

In [67]:
margin_distr_9m_mi[margin_distr_9m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.39
50%    0.41
75%    0.46
dtype: float64

In [140]:
gm_high_coverage_9m_trim_mi['date'] = pd.to_datetime(gm_high_coverage_9m_trim_mi['date'])
(gm_high_coverage_9m_trim_mi[gm_high_coverage_9m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_9m_trim_mi[gm_high_coverage_9m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_9m_trim_mi[gm_high_coverage_9m_trim_mi['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.7).mean())




(0.3878947368421053, 0.4030526315789474, 0.4472631578947368)

## MA

In [68]:
### 3m

In [69]:
gm_high_coverage_3m_trim_ma = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'MA']
margin_distr_3m_ma = gm_high_coverage_3m_trim_ma.groupby(['date'])['gm_past_quarter_final'].describe().reset_index()
margin_distr_3m_ma['date'] = pd.to_datetime(margin_distr_3m_ma['date'])

margin_distr_3m_ma[['25%','50%','75%']].mean().round(2)



25%    0.53
50%    0.53
75%    0.54
dtype: float64

In [70]:
margin_distr_3m_ma[margin_distr_3m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.53
50%    0.53
75%    0.55
dtype: float64

In [141]:
gm_high_coverage_3m_trim_ma['date'] = pd.to_datetime(gm_high_coverage_3m_trim_ma['date'])
(gm_high_coverage_3m_trim_ma[gm_high_coverage_3m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.2).mean(),\
gm_high_coverage_3m_trim_ma[gm_high_coverage_3m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.4).mean(),\
 gm_high_coverage_3m_trim_ma[gm_high_coverage_3m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_quarter_final'].quantile(0.7).mean())




(0.526, 0.5313684210526317, 0.5423684210526316)

In [71]:
### 6m

In [72]:
gm_high_coverage_6m_trim_ma = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'MA']
margin_distr_6m_ma = gm_high_coverage_6m_trim_ma.groupby(['date'])['gm_past_2quarters_final'].describe().reset_index()
margin_distr_6m_ma['date'] = pd.to_datetime(margin_distr_6m_ma['date'])

margin_distr_6m_ma[['25%','50%','75%']].mean().round(2)



25%    0.53
50%    0.53
75%    0.54
dtype: float64

In [73]:
margin_distr_6m_ma[margin_distr_6m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.53
50%    0.53
75%    0.55
dtype: float64

In [142]:
gm_high_coverage_6m_trim_ma['date'] = pd.to_datetime(gm_high_coverage_6m_trim_ma['date'])
(gm_high_coverage_6m_trim_ma[gm_high_coverage_6m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_6m_trim_ma[gm_high_coverage_6m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_6m_trim_ma[gm_high_coverage_6m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_2quarters_final'].quantile(0.7).mean())




(0.5276842105263159, 0.5312631578947368, 0.5418421052631579)

In [None]:
### 9m

In [74]:
gm_high_coverage_9m_trim_ma = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'MA']
margin_distr_9m_ma = gm_high_coverage_9m_trim_ma.groupby(['date'])['gm_past_3quarters_final'].describe().reset_index()
margin_distr_9m_ma['date'] = pd.to_datetime(margin_distr_9m_ma['date'])

margin_distr_9m_ma[['25%','50%','75%']].mean().round(2)



25%    0.53
50%    0.54
75%    0.55
dtype: float64

In [75]:
margin_distr_9m_ma[margin_distr_9m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

25%    0.53
50%    0.54
75%    0.55
dtype: float64

In [143]:
gm_high_coverage_9m_trim_ma['date'] = pd.to_datetime(gm_high_coverage_9m_trim_ma['date'])
(gm_high_coverage_9m_trim_ma[gm_high_coverage_9m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.2).mean(),\
gm_high_coverage_9m_trim_ma[gm_high_coverage_9m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.4).mean(),\
 gm_high_coverage_9m_trim_ma[gm_high_coverage_9m_trim_ma['date'] >= '2021-01-01'].groupby(['date'])['gm_past_3quarters_final'].quantile(0.7).mean())




(0.5323333333333333, 0.5357777777777778, 0.5487222222222222)

In [None]:
df_clients = [
'DL',
'DW',
'EMA',
'EMM',
'EMT',
'EMF',
'ST',
'GRG',
'EL',
'VS',
'TT',
'MD',
'DWF',
'GHC',
'SV',
'99HT',
'NECC',
'MPW',
'LBC'
]

data_client = gm[gm['company_identifier'].isin(df_clients)]

In [None]:
gm_high_coverage = data_client[data_client['coverage'] >= 0.7]

gm_high_coverage_3m_trim = gm_high_coverage[(gm_high_coverage['gm_past_quarter'] > 0.2) & (gm_high_coverage['gm_past_quarter'] < 0.8) & (gm_high_coverage['date'] < '2022-07-01')]

gm_high_coverage_6m_trim = gm_high_coverage[(gm_high_coverage['gm_past_2quarters'] > 0.2) & (gm_high_coverage['gm_past_2quarters'] < 0.8) & (gm_high_coverage['date'] < '2022-07-01')]

gm_high_coverage_9m_trim = gm_high_coverage[(gm_high_coverage['gm_past_3quarters'] > 0.2) & (gm_high_coverage['gm_past_3quarters'] < 0.8) & (gm_high_coverage['date'] < '2022-07-01')]



In [None]:
## CA

### 3m

gm_high_coverage_3m_trim_ca = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'CA']

margin_distr_3m_ca = gm_high_coverage_3m_trim_ca.groupby(['date'])['gm_past_quarter_after_tax'].describe().reset_index()
margin_distr_3m_ca['date'] = pd.to_datetime(margin_distr_3m_ca['date'])



#margin_distr_3m_ca[['25%','50%','75%']].mean().round(2)

margin_distr_3m_ca[margin_distr_3m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
### 6m

gm_high_coverage_6m_trim_ca = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'CA']
margin_distr_6m_ca = gm_high_coverage_6m_trim_ca.groupby(['date'])['gm_past_2quarters_after_tax'].describe().reset_index()
margin_distr_6m_ca['date'] = pd.to_datetime(margin_distr_6m_ca['date'])

#margin_distr_6m_ca[['25%','50%','75%']].mean().round(2)



margin_distr_6m_ca[margin_distr_6m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
### 9m

gm_high_coverage_9m_trim_ca = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'CA']
margin_distr_9m_ca = gm_high_coverage_9m_trim_ca.groupby(['date'])['gm_past_3quarters_after_tax'].describe().reset_index()
margin_distr_9m_ca['date'] = pd.to_datetime(margin_distr_9m_ca['date'])

#margin_distr_9m_ca[['25%','50%','75%']].mean().round(2)



margin_distr_9m_ca[margin_distr_9m_ca['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
## CO

### 3m

gm_high_coverage_3m_trim_co = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'CO']

margin_distr_3m_co = gm_high_coverage_3m_trim_co.groupby(['date'])['gm_past_quarter'].describe().reset_index()
margin_distr_3m_co['date'] = pd.to_datetime(margin_distr_3m_co['date'])
margin_distr_3m_co[['25%','50%','75%']].mean().round(2)

margin_distr_3m_co[margin_distr_3m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:


### 6m

gm_high_coverage_6m_trim_co = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'CO']
margin_distr_6m_co = gm_high_coverage_6m_trim_co.groupby(['date'])['gm_past_2quarters'].describe().reset_index()
margin_distr_6m_co['date'] = pd.to_datetime(margin_distr_6m_co['date'])

margin_distr_6m_co[['25%','50%','75%']].mean().round(2)



margin_distr_6m_co[margin_distr_6m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
### 9m

gm_high_coverage_9m_trim_co = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'CO']
margin_distr_9m_co = gm_high_coverage_9m_trim_co.groupby(['date'])['gm_past_3quarters'].describe().reset_index()
margin_distr_9m_co['date'] = pd.to_datetime(margin_distr_9m_co['date'])

margin_distr_9m_co[['25%','50%','75%']].mean().round(2)



margin_distr_9m_co[margin_distr_9m_co['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
# ## MI

# ### 3m

# gm_high_coverage_3m_trim_mi = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'MI']
# margin_distr_3m_mi = gm_high_coverage_3m_trim_mi.groupby(['date'])['gm_past_quarter'].describe().reset_index()
# margin_distr_3m_mi['date'] = pd.to_datetime(margin_distr_3m_mi['date'])

# margin_distr_3m_mi[['25%','50%','75%']].mean().round(2)



# margin_distr_3m_mi[margin_distr_3m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
# ### 6m

# gm_high_coverage_6m_trim_mi = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'MI']
# margin_distr_6m_mi = gm_high_coverage_6m_trim_mi.groupby(['date'])['gm_past_2quarters'].describe().reset_index()
# margin_distr_6m_mi['date'] = pd.to_datetime(margin_distr_6m_mi['date'])

# margin_distr_6m_mi[['25%','50%','75%']].mean().round(2)



# margin_distr_6m_mi[margin_distr_6m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
# ### 9m

# gm_high_coverage_9m_trim_mi = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'MI']
# margin_distr_9m_mi = gm_high_coverage_9m_trim_mi.groupby(['date'])['gm_past_3quarters'].describe().reset_index()
# margin_distr_9m_mi['date'] = pd.to_datetime(margin_distr_9m_mi['date'])

# margin_distr_9m_mi[['25%','50%','75%']].mean().round(2)



# margin_distr_9m_mi[margin_distr_9m_mi['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
## MA

### 3m

gm_high_coverage_3m_trim_ma = gm_high_coverage_3m_trim[gm_high_coverage_3m_trim['state'] == 'MA']
margin_distr_3m_ma = gm_high_coverage_3m_trim_ma.groupby(['date'])['gm_past_quarter'].describe().reset_index()
margin_distr_3m_ma['date'] = pd.to_datetime(margin_distr_3m_ma['date'])

margin_distr_3m_ma[['25%','50%','75%']].mean().round(2)



margin_distr_3m_ma[margin_distr_3m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
### 6m

gm_high_coverage_6m_trim_ma = gm_high_coverage_6m_trim[gm_high_coverage_6m_trim['state'] == 'MA']
margin_distr_6m_ma = gm_high_coverage_6m_trim_ma.groupby(['date'])['gm_past_2quarters'].describe().reset_index()
margin_distr_6m_ma['date'] = pd.to_datetime(margin_distr_6m_ma['date'])

margin_distr_6m_ma[['25%','50%','75%']].mean().round(2)



margin_distr_6m_ma[margin_distr_6m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)

In [None]:
### 9m

gm_high_coverage_9m_trim_ma = gm_high_coverage_9m_trim[gm_high_coverage_9m_trim['state'] == 'MA']
margin_distr_9m_ma = gm_high_coverage_9m_trim_ma.groupby(['date'])['gm_past_3quarters'].describe().reset_index()
margin_distr_9m_ma['date'] = pd.to_datetime(margin_distr_9m_ma['date'])

margin_distr_9m_ma[['25%','50%','75%']].mean().round(2)



margin_distr_9m_ma[margin_distr_9m_ma['date'] >= '2021-01-01'][['25%','50%','75%']].mean().round(2)