In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys

from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util

%load_ext autoreload
%autoreload 2

In [3]:
data = pd.read_csv('finance_margin_analysis.csv')

In [4]:
data

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
0,2021-01,291646.25,69944.725610,221701.52440,0.760173,4808,2003,2805,0.696005,335393.1875,88829.801520,246563.386000,0.735147,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
1,2021-02,262906.73,85322.341900,177584.38810,0.675465,5464,377,5087,0.948611,302342.7395,108359.374200,193983.365300,0.641601,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
2,2021-03,315154.72,99950.244010,215204.47600,0.682853,6902,159,6743,0.951475,362427.9280,126936.809900,235491.118100,0.649760,0.675503,,,225345.95650,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
3,2021-04,329925.48,100937.302800,228988.17720,0.694060,7081,112,6969,0.931465,379414.3020,128190.374500,251223.927500,0.662136,0.651166,,,226899.47030,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
4,2021-05,336793.80,99047.578800,237746.22120,0.705910,7286,131,7155,0.918326,387312.8700,125790.425100,261522.444900,0.675223,0.662373,,,249412.49680,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2345,2021-11,18761.21,5470.325953,13290.88405,0.708424,610,52,558,0.884058,21575.3915,6947.313960,14628.077540,0.677998,0.591387,,,12210.38262,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2346,2021-12,32715.38,12198.903890,20516.47611,0.627120,1062,129,933,0.919481,37622.6870,15492.607940,22130.079060,0.588211,0.644411,,,16567.28353,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2347,2022-01,24339.01,11123.403850,13215.60615,0.542980,831,43,788,0.936866,27989.8615,14126.722890,13863.138610,0.495291,0.587167,,,16873.76507,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2348,2022-02,24450.82,10410.082940,14040.73706,0.574244,796,37,759,0.953293,28118.4430,13220.805330,14897.637670,0.529817,0.537773,0.564580,,16963.61845,14587.00053,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA


In [5]:
df_clients = [
'DL',
'DW',
'EMA',
'EMM',
'EMT',
'EMF',
'ST',
'GRG',
'EL',
'VS',
'TT',
'MD',
'DWF',
'GHC',
'SV',
'99HT'
]

In [6]:
data_client = data[data['company_identifier'].isin(df_clients)]

In [8]:
data_client['company_identifier'].unique()

array(['99HT', 'DWF', 'DL', 'EL', 'EMA', 'EMF', 'EMM', 'EMT', 'GRG',
       'GHC', 'MD', 'SV', 'ST', 'DW', 'TT', 'VS'], dtype=object)

In [16]:
data_client

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
0,2021-01,291646.25,69944.72561,221701.52440,0.760173,4808,2003,2805,0.696005,335393.1875,88829.80152,246563.38600,0.735147,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
1,2021-02,262906.73,85322.34190,177584.38810,0.675465,5464,377,5087,0.948611,302342.7395,108359.37420,193983.36530,0.641601,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
2,2021-03,315154.72,99950.24401,215204.47600,0.682853,6902,159,6743,0.951475,362427.9280,126936.80990,235491.11810,0.649760,0.675503,,,225345.9565,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
3,2021-04,329925.48,100937.30280,228988.17720,0.694060,7081,112,6969,0.931465,379414.3020,128190.37450,251223.92750,0.662136,0.651166,,,226899.4703,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
4,2021-05,336793.80,99047.57880,237746.22120,0.705910,7286,131,7155,0.918326,387312.8700,125790.42510,261522.44490,0.675223,0.662373,,,249412.4968,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2338,2021-11,517332.55,240108.39600,277224.15400,0.535872,15731,380,15351,0.935033,594932.4325,304937.66290,289994.76960,0.487442,0.451398,0.421637,0.397003,279151.7113,263050.0522,248627.1309,VS,402R-00545,"('VS', '402R-00545')",VS,CO
2339,2021-12,440912.77,212389.01800,228523.75200,0.518297,13516,0,13516,0.936855,507049.6855,269734.05290,237315.63260,0.468032,0.481002,0.443337,0.414475,277087.5208,268507.2156,253221.0241,VS,402R-00545,"('VS', '402R-00545')",VS,CO
2340,2022-01,435566.37,177725.05480,257841.31520,0.591968,12801,0,12801,0.922462,500901.3255,225710.81950,275190.50600,0.549391,0.501621,0.469824,0.432725,267500.3027,273004.1922,256284.9332,VS,402R-00545,"('VS', '402R-00545')",VS,CO
2341,2022-02,393248.45,215992.40210,177256.04790,0.450748,12013,132,11881,0.934864,452235.7175,274310.35060,177925.36690,0.393435,0.470286,0.460842,0.437853,230143.8352,254647.7733,252081.3132,VS,402R-00545,"('VS', '402R-00545')",VS,CO


In [23]:
margin_distr = data_client.groupby(['date'])['margin_%_after_tax'].describe().reset_index()
margin_distr['date'] = pd.to_datetime(margin_distr['date'])

In [24]:
margin_distr[margin_distr['date'] >= '2021-01-01']

Unnamed: 0,date,count,mean,std,min,25%,50%,75%,max
12,2021-01-01,13.0,0.555652,0.141233,0.308966,0.46996,0.586887,0.624697,0.790569
13,2021-02-01,13.0,0.534143,0.106342,0.329721,0.462279,0.558735,0.629182,0.661451
14,2021-03-01,14.0,0.509222,0.10714,0.310783,0.455558,0.513871,0.590789,0.64976
15,2021-04-01,14.0,0.514971,0.106578,0.273334,0.469101,0.532922,0.590413,0.662136
16,2021-05-01,14.0,0.527344,0.093206,0.347279,0.472298,0.536462,0.598896,0.675223
17,2021-06-01,14.0,0.516664,0.091134,0.33783,0.482937,0.523131,0.57456,0.666557
18,2021-07-01,15.0,0.510289,0.089101,0.303609,0.463669,0.523964,0.56603,0.650712
19,2021-08-01,16.0,0.492922,0.102951,0.211764,0.456647,0.492706,0.555278,0.649505
20,2021-09-01,17.0,0.494927,0.115393,0.243394,0.465847,0.512738,0.547641,0.713748
21,2021-10-01,17.0,0.510976,0.080358,0.30626,0.476219,0.545595,0.555464,0.607659


In [19]:
data_high_coverage = data[data['coverage'] >= 0.8]

In [20]:
data_high_coverage

Unnamed: 0,date,revenue,cogs,margin_$,margin_%,total_count_incoming,product_count,count_incoming,coverage,revenue_after_tax,cogs_after_tax,margin_$_after_tax,margin_%_after_tax,gm_past_quarter_after_tax,gm_past_2quarters_after_tax,gm_past_3quarters_after_tax,gm$_past_quarter_after_tax,gm$_past_2quarters_after_tax,gm$_past_3quarters_after_tax,company,location,id,company_identifier,state
1,2021-02,262906.73,85322.341900,177584.38810,0.675465,5464,377,5087,0.948611,302342.7395,108359.374200,193983.365300,0.641601,,,,,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
2,2021-03,315154.72,99950.244010,215204.47600,0.682853,6902,159,6743,0.951475,362427.9280,126936.809900,235491.118100,0.649760,0.675503,,,225345.95650,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
3,2021-04,329925.48,100937.302800,228988.17720,0.694060,7081,112,6969,0.931465,379414.3020,128190.374500,251223.927500,0.662136,0.651166,,,226899.47030,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
4,2021-05,336793.80,99047.578800,237746.22120,0.705910,7286,131,7155,0.918326,387312.8700,125790.425100,261522.444900,0.675223,0.662373,,,249412.49680,,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
5,2021-06,324788.69,98065.517010,226723.17300,0.698064,6860,114,6746,0.926026,373506.9935,124543.206600,248963.786900,0.666557,0.667972,0.671737,,253903.38640,239624.67140,,99HT,C10-0000279-LIC,"('99HT', 'C10-0000279-LIC')",99HT,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2345,2021-11,18761.21,5470.325953,13290.88405,0.708424,610,52,558,0.884058,21575.3915,6947.313960,14628.077540,0.677998,0.591387,,,12210.38262,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2346,2021-12,32715.38,12198.903890,20516.47611,0.627120,1062,129,933,0.919481,37622.6870,15492.607940,22130.079060,0.588211,0.644411,,,16567.28353,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2347,2022-01,24339.01,11123.403850,13215.60615,0.542980,831,43,788,0.936866,27989.8615,14126.722890,13863.138610,0.495291,0.587167,,,16873.76507,,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA
2348,2022-02,24450.82,10410.082940,14040.73706,0.574244,796,37,759,0.953293,28118.4430,13220.805330,14897.637670,0.529817,0.537773,0.564580,,16963.61845,14587.00053,,VOY,C10-0000802-LIC,"('VOY', 'C10-0000802-LIC')",VOY,CA


In [21]:
margin_distr_high = data_high_coverage.groupby(['date'])['margin_%'].describe().reset_index()
margin_distr_high['date'] = pd.to_datetime(margin_distr_high['date'])

In [22]:
margin_distr_high[margin_distr_high['date'] >= '2021-01-01']

Unnamed: 0,date,count,mean,std,min,25%,50%,75%,max
12,2021-01-01,43.0,0.380038,0.979454,-5.823773,0.454279,0.538247,0.630613,0.749541
13,2021-02-01,53.0,0.169221,1.589999,-9.13275,0.449937,0.524454,0.600203,0.714314
14,2021-03-01,57.0,0.29704,0.900478,-5.139432,0.426126,0.511427,0.598422,0.721043
15,2021-04-01,64.0,-0.171296,2.320035,-10.460928,0.394705,0.511804,0.586286,0.722108
16,2021-05-01,66.0,-0.358163,3.643938,-23.562108,0.428563,0.534181,0.605583,0.70591
17,2021-06-01,67.0,-0.21976,4.242715,-33.408254,0.420821,0.532507,0.606989,0.700282
18,2021-07-01,69.0,0.039624,3.42715,-27.663804,0.448063,0.533427,0.607178,0.733125
19,2021-08-01,73.0,-34.297957,294.389706,-2515.012432,0.435221,0.524525,0.594221,0.741826
20,2021-09-01,75.0,0.072047,3.374918,-28.564076,0.434799,0.517214,0.568832,0.736499
21,2021-10-01,81.0,0.058284,3.880241,-34.353985,0.42104,0.521561,0.600372,0.700892
