In [1]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys
from tqdm import tqdm
import seaborn as sns
from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util
from underwriting import msrp_band_analyzer

%load_ext autoreload
%autoreload 2



In [2]:
COMPANY_IDENTIFIER_LIST = [
    'DL',
    'DW',
    'EMA',
    'EMM',
    'EMT',
    'EMF',
    'ST',
    'GRG',
    'EL',
    'VS',
]

In [3]:
mba = msrp_band_analyzer.MSRPBand()

In [4]:
mba.update_company_data(COMPANY_IDENTIFIER_LIST)


Verifying download summaries for license 402R-00545...
Earliest download summary: 2020-01-01
Latest download summary: 2022-03-30

Verifying download summaries for license 402-00840...
Earliest download summary: 2020-01-01
Latest download summary: 2022-03-30

Verifying download summaries for license MR282376...
Earliest download summary: 2020-01-01
Latest download summary: 2022-03-30
Found bad download summary for license MR282376 on date 2022-03-24

Verifying download summaries for license C10-0000786-LIC...
Earliest download summary: 2021-10-12
Latest download summary: 2022-03-30

Verifying download summaries for license C12-0000359-LIC...
Earliest download summary: 2022-01-03
Latest download summary: 2022-03-30

Verifying download summaries for license MR283369...
Earliest download summary: 2020-01-01
Latest download summary: 2022-03-30
Found bad download summary for license MR283369 on date 2022-03-10
Found bad download summary for license MR283369 on date 2022-03-05

Verifying down

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['extracted_units'][idx] = df[measure_column][idx].apply(mba_util.EXTRACTED_MEASUREMENT_COLUMNS[measure_column])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['extracted_units'][idx] = df[measure_column][idx].apply(mba_util.EXTRACTED_MEASUREMENT_COLUMNS[measure_column])


### Combining same product categories with different measurements ### 	


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][includes_measurement] = self.unit_conversion_ratio(measurement, measurement_unit)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][includes_measurement] = self.unit_conversion_ratio(measurement, measurement_unit)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][extracted_units_idx] = df['extracted_units'][extracted_units_idx]
A value is trying to be set on a copy of a slice from a 

### Breaking down product names into different brands ### 	


  (df[product_name].str.contains("^(1 ml)") == False) &
  (df[product_name].str.contains("^(1 ml)") == False) &


### Available combined product category name in costs dataframe ### 	
['concentrate' 'flower' 'vape cartridge' 'other concentrate' 'infused'
 'shake/trim' 'edible' 'extract' 'pre-roll infused' 'pre-roll flower'
 'pre-roll leaf' 'buds' 'tincture' 'topical' 'capsule' 'shake'
 'wet whole plants' 'vape product' 'raw pre-rolls' 'kief' 'seeds'
 'shake/trim allocated for extraction' 'infused butter/oil'
 'infused pre-rolls' 'infused beverage' 'clone - cutting' 'immature plant']
### Available combined product category name in sales dataframe ### 	
['other concentrate' 'buds' 'shake/trim' 'flower' 'pre-roll flower'
 'edible' 'concentrate' 'infused' 'extract' 'vape cartridge'
 'pre-roll infused' 'pre-roll leaf' 'topical' 'immature plants' 'capsule'
 'tincture' 'shake' 'vape product' 'raw pre-rolls' 'kief'
 'shake/trim allocated for extraction' 'infused butter/oil'
 'infused pre-rolls' 'seeds' 'infused beverage' 'clone - cutting']


In [5]:
###
df_sales = mba.company_sales_df.copy()

In [9]:
df_sales.head()

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,original_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2289060,C10-0000169-LIC,5181809,inactive,Consumer,2020-01-02 08:16:34+00:00,2020-01,2,33.76,inactive,1782399,1A4060300004D62000000167,VVS - Vape - 0.3g - Gold - Hybrid - GSC,Other Concentrate (weight - each),Each,1.0,16.88,16.88,2020-01,,VVS - Vape - 0.3g - Gold - Hybrid - GSC,0.3,0.3g,,,56.266667,0.3,other concentrate,True,VVS,VVS (Other Concentrate (weight - each))
2268971,402-00840,52954312,inactive,Patient,2020-01-22 13:43:28+00:00,2020-01,3,209.24,inactive,17994538,1A4000500266F2ED00002227,Durban Ghost #2,Buds,Grams,14.0,60.44,4.317143,2020-01,,Durban Ghost #2,,,,,4.317143,1.0,buds,False,Durban Ghost #2,Durban Ghost #2
2268972,402R-00545,52954311,inactive,Consumer,2020-01-22 13:43:23+00:00,2020-01,1,10.03,inactive,18017431,1A400031266EE9B000047163,WR Jenny Kush - 2 Pack Ready Roll,Shake/Trim (by strain),Grams,1.0,10.03,10.03,2020-01,,WR Jenny Kush - 2 Pack Ready Roll,,,,,10.03,1.0,shake/trim,True,WR Jenny Kush,WR Jenny Kush (Shake/Trim (by strain))
2268973,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,2111948,1A4060300004B03000010682,THC Design - Flower - 3.5g - Sativa - Crescendo,Flower,Grams,3.5,1.0,0.285714,2020-01,,THC Design - Flower - 3.5g - Sativa - Crescendo,,3.5g,,,0.285714,1.0,flower,True,THC Design,THC Design (Flower)
2268974,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1782532,1A4060300004D62000000231,Puffy Delivery - Pre-roll - 0.7g - Jack's Poison,Pre-Roll Flower,Each,1.0,8.68,8.68,2020-01,,Puffy Delivery - Pre-roll - 0.7g - Jack's Poison,0.7,0.7g,,,12.4,0.7,pre-roll flower,True,Puffy Delivery,Puffy Delivery (Pre-Roll Flower)


In [34]:
###
df_sales.groupby(['tx_product_category_name','tx_unit_of_measure']).count().sort_values(by = 'tx_unit_of_measure',ascending = False)

Unnamed: 0_level_0,Unnamed: 1_level_0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,original_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
tx_product_category_name,tx_unit_of_measure,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
Buds,Grams,346535,346535,346535,346535,346535,346535,346535,346535,346535,346535,346535,346535,346535,346535,346535,346535,81293,346535,0,130112,1010,1998,346535,346535,346535,346535,346535,346535
Extract (weight),Grams,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,0,11,0,1,10,0,11,11,11,11,11,11
Shake/Trim allocated for extraction,Grams,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,0,20,0,0,0,0,20,20,20,20,20,20
Shake/Trim (by strain),Grams,28236,28236,28236,28236,28236,28236,28236,28236,28236,28236,28236,28236,28236,28236,28236,28236,6007,28236,0,7035,488,0,28236,28236,28236,28236,28236,28236
Shake/Trim,Grams,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,0,120,0,12,12,0,120,120,120,120,120,120
Raw Pre-Rolls,Grams,169773,169773,169773,169773,169773,169773,169773,169773,169773,169773,169773,169773,169773,169773,169773,169773,135967,169773,0,134990,1081,0,169773,169773,169773,169773,169773,169773
Infused Pre-Rolls,Grams,3957,3957,3957,3957,3957,3957,3957,3957,3957,3957,3957,3957,3957,3957,3957,3957,3337,3957,0,3957,0,0,3957,3957,3957,3957,3957,3957
Flower,Grams,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,9551,38110,0,35075,2,233,38110,38110,38110,38110,38110,38110
Kief,Grams,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,15,16,0,16,0,0,16,16,16,16,16,16
Concentrate,Grams,115401,115401,115401,115401,115401,115401,115401,115401,115401,115401,115401,115401,115401,115401,115401,115401,1876,115401,0,18603,10919,0,115401,115401,115401,115401,115401,115401


# 1. uom - each:
- Using same UOM (ex. gram): comes in different quantity
- Using different UOM (ex. blend of gram and oz)

In [11]:
## examples

In [36]:
df_sales[(df_sales['tx_product_category_name'] == 'Other Concentrate (weight - each)')&(df_sales['tx_product_name'] == 'Wave - Cartridge - 0.5g - Hybrid - Lemon')]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()





Unnamed: 0,tx_product_category_name,tx_product_name,tx_unit_of_measure,tx_quantity_sold
2269218,Other Concentrate (weight - each),Wave - Cartridge - 0.5g - Hybrid - Lemon,Each,1.0
2269171,Other Concentrate (weight - each),Wave - Cartridge - 0.5g - Hybrid - Lemon,Each,1.0
2268553,Other Concentrate (weight - each),Wave - Cartridge - 0.5g - Hybrid - Lemon,Each,1.0
2267213,Other Concentrate (weight - each),Wave - Cartridge - 0.5g - Hybrid - Lemon,Each,1.0
2268055,Other Concentrate (weight - each),Wave - Cartridge - 0.5g - Hybrid - Lemon,Each,1.0


In [37]:
df_sales[(df_sales['tx_product_category_name'] == 'Other Concentrate (weight - each)')&(df_sales['tx_product_name'] == 'Thclear - 1g - Honey Pot - Skywalker OG')]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()





Unnamed: 0,tx_product_category_name,tx_product_name,tx_unit_of_measure,tx_quantity_sold
2269094,Other Concentrate (weight - each),Thclear - 1g - Honey Pot - Skywalker OG,Each,1.0
2270801,Other Concentrate (weight - each),Thclear - 1g - Honey Pot - Skywalker OG,Each,1.0
2272006,Other Concentrate (weight - each),Thclear - 1g - Honey Pot - Skywalker OG,Each,1.0
2284661,Other Concentrate (weight - each),Thclear - 1g - Honey Pot - Skywalker OG,Each,1.0
2281942,Other Concentrate (weight - each),Thclear - 1g - Honey Pot - Skywalker OG,Each,1.0


In [None]:
## examples

In [38]:

df_sales[(df_sales['tx_product_category_name'] == 'Edible (volume - each)')&(df_sales['tx_product_name'] == "Uncle Arnie's Iced Tea Lemonade Beverage 8oz Glass")]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()

Unnamed: 0,tx_product_category_name,tx_product_name,tx_unit_of_measure,tx_quantity_sold
1722334,Edible (volume - each),Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,Each,1.0
1722228,Edible (volume - each),Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,Each,1.0
1724791,Edible (volume - each),Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,Each,1.0
1726344,Edible (volume - each),Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,Each,1.0
1706804,Edible (volume - each),Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,Each,1.0


In [39]:

df_sales[(df_sales['tx_product_category_name'] == 'Edible (volume - each)')&(df_sales['tx_product_name'] == "Hapy - Edible - 100mg - Syrup Shot - Tropical Mix")]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()

Unnamed: 0,tx_product_category_name,tx_product_name,tx_unit_of_measure,tx_quantity_sold
2268695,Edible (volume - each),Hapy - Edible - 100mg - Syrup Shot - Tropical Mix,Each,1.0
2267487,Edible (volume - each),Hapy - Edible - 100mg - Syrup Shot - Tropical Mix,Each,1.0
2272401,Edible (volume - each),Hapy - Edible - 100mg - Syrup Shot - Tropical Mix,Each,1.0
2271109,Edible (volume - each),Hapy - Edible - 100mg - Syrup Shot - Tropical Mix,Each,1.0
2270641,Edible (volume - each),Hapy - Edible - 100mg - Syrup Shot - Tropical Mix,Each,1.0


# 2. how do we do regex

## 1. remove bad numbers

In [40]:
df_sales[df_sales['bad_numbers_from_product_name'].notna()][['original_product_name','bad_numbers_from_product_name','tx_product_name']].head()

Unnamed: 0,original_product_name,bad_numbers_from_product_name,tx_product_name
2269275,10496 Pacific Stone Flower 7.0g Pouch Indica P...,10496,Pacific Stone Flower 7.0g Pouch Indica Privat...
2269280,10928 Pacific Stone Flower 7.0g Pouch Hybrid 8...,10928,Pacific Stone Flower 7.0g Pouch Hybrid 805 Gl...
2269281,10496 Pacific Stone Flower 7.0g Pouch Indica P...,10496,Pacific Stone Flower 7.0g Pouch Indica Privat...
2269282,10938 Pacific Stone Flower 7.0g Pouch Sativa B...,10938,Pacific Stone Flower 7.0g Pouch Sativa Banjo ...
2268480,10928 Pacific Stone Flower 7.0g Pouch Hybrid 8...,10928,Pacific Stone Flower 7.0g Pouch Hybrid 805 Gl...


In [43]:
df_sales[df_sales['bad_numbers_from_product_name'].notna()].shape[0] / df_sales.shape[0]

0.24784748520522268

## 2. grab letter grams / gram / oz

In [50]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()][['tx_product_name','letter_gram_measure_from_product_name',]].head()




Unnamed: 0,tx_product_name,letter_gram_measure_from_product_name
2289060,VVS - Vape - 0.3g - Gold - Hybrid - GSC,0.3g
2268973,THC Design - Flower - 3.5g - Sativa - Crescendo,3.5g
2268974,Puffy Delivery - Pre-roll - 0.7g - Jack's Poison,0.7g
2268975,Puffy Delivery - Pre Roll - 0.8g - Sativa - Fa...,0.8g
2268976,LOL Edibles - Edible - 100mg - Doob Cube - Blu...,100mg


In [56]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

0.6261035071906623

In [70]:

df_sales[(df_sales['gram_measure_from_product_name'].notna()) & (df_sales['gram_measure_from_product_name'] == 'gram')][['tx_product_name','gram_measure_from_product_name']]\
.head()




Unnamed: 0,tx_product_name,gram_measure_from_product_name
2273303,1 gram Pre-Rolls 3-pack - Jack - 3g,gram
2266022,1/2 gram Wedding Crasher,gram
2266023,1 gram Blue Zkittles Sample,gram
2266024,1 gram Doc OG Sample,gram
2266368,1 gram Pre-Rolls 3-pack - Jack - 3g,gram


In [72]:
df_sales[df_sales['gram_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

0.018168279839836876

In [71]:


df_sales[(df_sales['oz_measure_from_product_name'].notna())][['tx_product_name','oz_measure_from_product_name']]\
.head()




Unnamed: 0,tx_product_name,oz_measure_from_product_name
2272038,GF Hybrid 1/8 oz. Jar - Trifi Cookies,1/8 oz
2262161,10oz Cherry Cola 100mg,10oz
2259549,GF Hybrid 1/8 oz. Jar - Trifi Cookies,1/8 oz
2258992,GF Indica 1/8 oz. Jar : Wedding Cake,1/8 oz
2260776,GF Hybrid 1/8 oz. Jar - Trifi Cookies,1/8 oz


In [73]:
df_sales[df_sales['oz_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

0.01606884195163676

# 3. combine 

In [75]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()][['tx_product_name','letter_gram_measure_from_product_name','extracted_units']].head()




Unnamed: 0,tx_product_name,letter_gram_measure_from_product_name,extracted_units
2289060,VVS - Vape - 0.3g - Gold - Hybrid - GSC,0.3g,0.3
2268973,THC Design - Flower - 3.5g - Sativa - Crescendo,3.5g,
2268974,Puffy Delivery - Pre-roll - 0.7g - Jack's Poison,0.7g,0.7
2268975,Puffy Delivery - Pre Roll - 0.8g - Sativa - Fa...,0.8g,0.8
2268976,LOL Edibles - Edible - 100mg - Doob Cube - Blu...,100mg,0.1


In [77]:

df_sales[(df_sales['gram_measure_from_product_name'].notna()) & (df_sales['gram_measure_from_product_name'] == 'gram')][['tx_product_name','gram_measure_from_product_name','extracted_units']]\
.head()




Unnamed: 0,tx_product_name,gram_measure_from_product_name,extracted_units
2273303,1 gram Pre-Rolls 3-pack - Jack - 3g,gram,1.0
2266022,1/2 gram Wedding Crasher,gram,1.0
2266023,1 gram Blue Zkittles Sample,gram,
2266024,1 gram Doc OG Sample,gram,
2266368,1 gram Pre-Rolls 3-pack - Jack - 3g,gram,1.0


In [79]:


df_sales[(df_sales['oz_measure_from_product_name'].notna())]




Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,original_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2272038,C9-0000323-LIC,0007109676,active,Consumer,2020-01-18 23:28:03+00:00,2020-01,3,79.29,active,1718011,1A40603000044D3000009524,GF Hybrid 1/8 oz. Jar - Trifi Cookies,Flower,Grams,3.54,35.24,9.954802,2020-01,,GF Hybrid 1/8 oz. Jar - Trifi Cookies,,,,1/8 oz,9.954802,1.0,flower,True,GF Hybrid 1/8 oz. Jar,GF Hybrid 1/8 oz. Jar (Flower)
2262161,402-00840,0053610743,inactive,Patient,2020-01-29 10:45:03+00:00,2020-01,3,114.00,inactive,18150757,1A400050026875EA00036381,10oz Cherry Cola 100mg,Infused (edible),Each,1.00,13.95,13.950000,2020-01,,10oz Cherry Cola 100mg,280.0,100mg,,10oz,0.049821,280.0,infused,False,10oz Cherry Cola 100mg,10oz Cherry Cola 100mg
2259549,C9-0000323-LIC,0008708254,active,Consumer,2020-01-31 16:08:41+00:00,2020-01,2,69.72,active,1718011,1A40603000044D3000009524,GF Hybrid 1/8 oz. Jar - Trifi Cookies,Flower,Grams,3.54,34.86,9.847458,2020-01,,GF Hybrid 1/8 oz. Jar - Trifi Cookies,,,,1/8 oz,9.847458,1.0,flower,True,GF Hybrid 1/8 oz. Jar,GF Hybrid 1/8 oz. Jar (Flower)
2258992,C9-0000323-LIC,0008756066,active,Consumer,2020-01-31 20:13:49+00:00,2020-01,1,34.86,active,1718009,1A40603000044D3000009522,GF Indica 1/8 oz. Jar : Wedding Cake,Flower,Grams,3.54,34.86,9.847458,2020-01,,GF Indica 1/8 oz. Jar : Wedding Cake,,,,1/8 oz,9.847458,1.0,flower,False,GF Indica 1/8 oz. Jar : Wedding Cake,GF Indica 1/8 oz. Jar : Wedding Cake
2260776,C9-0000323-LIC,0008563737,active,Consumer,2020-01-30 16:06:13+00:00,2020-01,2,69.72,active,1718011,1A40603000044D3000009524,GF Hybrid 1/8 oz. Jar - Trifi Cookies,Flower,Grams,3.54,34.86,9.847458,2020-01,,GF Hybrid 1/8 oz. Jar - Trifi Cookies,,,,1/8 oz,9.847458,1.0,flower,True,GF Hybrid 1/8 oz. Jar,GF Hybrid 1/8 oz. Jar (Flower)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96391,C10-0000169-LIC,0170799408,active,Consumer,2022-03-10 19:38:44.650000+00:00,2022-03,1,58.23,active,21850519,1A4060300003782000020543,Speedy Weedy - Gorilla Glue 1/2oz,Flower (packaged half ounce - each),Each,1.00,58.23,58.230000,2022-03,,Speedy Weedy - Gorilla Glue 1/2oz,14.0,,,1/2oz,4.159286,14.0,flower,True,Speedy Weedy,Speedy Weedy (Flower (packaged half ounce - ea...
96207,C10-0000169-LIC,0170804594,active,Consumer,2022-03-10 20:05:13.480000+00:00,2022-03,1,68.23,active,21847654,1A4060300003782000020533,Speedy Weedy Dosilato Truffle 1/2oz,Flower (packaged half ounce - each),Each,1.00,68.23,68.230000,2022-03,,Speedy Weedy Dosilato Truffle 1/2oz,14.0,,,1/2oz,4.873571,14.0,flower,False,Speedy Weedy Dosilato Truffle 1/2oz,Speedy Weedy Dosilato Truffle 1/2oz
96178,C10-0000169-LIC,0170805826,active,Consumer,2022-03-10 20:10:25.290000+00:00,2022-03,1,59.70,active,21848397,1A4060300003782000020541,Speedy Weedy Sweet Diesel OG 1/2oz,Flower (packaged half ounce - each),Each,1.00,59.70,59.700000,2022-03,,Speedy Weedy Sweet Diesel OG 1/2oz,14.0,,,1/2oz,4.264286,14.0,flower,False,Speedy Weedy Sweet Diesel OG 1/2oz,Speedy Weedy Sweet Diesel OG 1/2oz
96279,C10-0000939-LIC,0170802190,active,Consumer,2022-03-10 19:56:07+00:00,2022-03,2,51.98,active,22463306,1A406030000A616000005324,Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,Edible (volume - each),Each,1.00,8.51,8.510000,2022-03,,Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,224.0,,,8oz,0.037991,224.0,edible,False,Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...,Uncle Arnie's Iced Tea Lemonade Beverage 8oz G...


# 4. coverage

In [None]:
df_sales

In [81]:
##
df_coverage = pd.DataFrame()
for p in df_sales[df_sales['tx_unit_of_measure'] != 'Grams']['tx_product_category_name'].unique():
    print(p)
    df_temp = pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
    / mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)].shape[0]])
    df_temp['category'] = p
    df_coverage = df_coverage.append(df_temp)




Other Concentrate (weight - each)
Pre-Roll Flower
Edible (weight - each)
Infused (edible)
Extract (weight - each)
Concentrate (Each)
Vape Cartridge (volume - each)
Pre-Roll Infused
Infused (non-edible)
Pre-Roll Leaf
Edible (volume - each)
Topical (weight - each)
Immature Plants
Capsule (weight - each)
Flower (packaged eighth - each)
Tincture (volume - each)
Flower (packaged quarter - each)
Shake (Packaged Quarter - each)
Vape Cartridge (weight - each)
Topical (volume - each)
Flower (packaged gram - each)
Shake (Packaged Half Ounce - each)
Other Concentrate (volume - each)
Vape Product
Flower (packaged half ounce - each)
Extract (volume - each)
Shake (Packaged Eighth - each)
Flower (packaged ounce - each)
Shake (Packaged Gram - each)
Flower (packaged - each)
Tincture (weight - each)
Infused Butter/Oil (weight - each)
Shake (Packaged Ounce - each)
Seeds
Seeds (each)
Infused Beverage
Clone - Cutting
Infused Butter/Oil (volume - each)


# 5. sanity checks

In [83]:
df_sales[df_sales['tx_product_category_name'] == 'Flower (packaged quarter - each)']

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,original_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2173666,C10-0000169-LIC,0021303234,inactive,Consumer,2020-04-20 17:50:17+00:00,2020-04,1,27.63,inactive,3816313,1A4060300004B03000017159,Puffy Delivery - Flower - Littles - 7g - Chem ...,Flower (packaged quarter - each),Each,1.0,27.63,27.63,2020-04,,Puffy Delivery - Flower - Littles - 7g - Chem ...,7.0,7g,,,3.947143,7.0,flower,True,Puffy Delivery,Puffy Delivery (Flower (packaged quarter - each))
2173672,C10-0000169-LIC,0021092976,inactive,Consumer,2020-04-20 17:47:38+00:00,2020-04,2,61.40,inactive,3815924,1A4060300004B03000017151,Puffy Delivery - Flower - Littles - 7g - Superman,Flower (packaged quarter - each),Each,1.0,30.70,30.70,2020-04,,Puffy Delivery - Flower - Littles - 7g - Superman,7.0,7g,,,4.385714,7.0,flower,True,Puffy Delivery,Puffy Delivery (Flower (packaged quarter - each))
2173673,C10-0000169-LIC,0021092976,inactive,Consumer,2020-04-20 17:47:38+00:00,2020-04,2,61.40,inactive,3816313,1A4060300004B03000017159,Puffy Delivery - Flower - Littles - 7g - Chem ...,Flower (packaged quarter - each),Each,1.0,30.70,30.70,2020-04,,Puffy Delivery - Flower - Littles - 7g - Chem ...,7.0,7g,,,4.385714,7.0,flower,True,Puffy Delivery,Puffy Delivery (Flower (packaged quarter - each))
2173680,C10-0000169-LIC,0021092946,inactive,Consumer,2020-04-20 17:43:37+00:00,2020-04,1,27.63,inactive,3815924,1A4060300004B03000017151,Puffy Delivery - Flower - Littles - 7g - Superman,Flower (packaged quarter - each),Each,1.0,27.63,27.63,2020-04,,Puffy Delivery - Flower - Littles - 7g - Superman,7.0,7g,,,3.947143,7.0,flower,True,Puffy Delivery,Puffy Delivery (Flower (packaged quarter - each))
2173662,C10-0000169-LIC,0021105613,inactive,Consumer,2020-04-20 17:50:51+00:00,2020-04,1,54.26,inactive,3816313,1A4060300004B03000017159,Puffy Delivery - Flower - Littles - 7g - Chem ...,Flower (packaged quarter - each),Each,2.0,54.26,27.13,2020-04,,Puffy Delivery - Flower - Littles - 7g - Chem ...,7.0,7g,,,3.875714,7.0,flower,True,Puffy Delivery,Puffy Delivery (Flower (packaged quarter - each))
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95945,C9-0000323-LIC,0170813244,active,Consumer,2022-03-10 20:42:52+00:00,2022-03,2,55.06,active,22443911,1A4060300006D0A000639954,Pacific Stone Flower 7.0g Pouch Hybrid Kush M...,Flower (packaged quarter - each),Each,1.0,32.18,32.18,2022-03,17230,17230 Pacific Stone Flower 7.0g Pouch Hybrid K...,7.0,7.0g,,,4.597143,7.0,flower,False,Pacific Stone Flower 7.0g Pouch Hybrid Kush M...,Pacific Stone Flower 7.0g Pouch Hybrid Kush M...
96318,C9-0000323-LIC,0170800054,active,Consumer,2022-03-10 19:48:02+00:00,2022-03,3,69.14,active,22799762,1A4060300006D0A000646015,Pacific Stone Flower 7.0g Pouch Indica Weddin...,Flower (packaged quarter - each),Each,1.0,33.80,33.80,2022-03,14171,14171 Pacific Stone Flower 7.0g Pouch Indica W...,7.0,7.0g,,,4.828571,7.0,flower,False,Pacific Stone Flower 7.0g Pouch Indica Weddin...,Pacific Stone Flower 7.0g Pouch Indica Weddin...
96189,C9-0000370-LIC,0170831472,active,Consumer,2022-03-10 20:08:47+00:00,2022-03,2,95.00,active,21841366,1A406030000A429000324117,Honeyleaf Smalls - indica - 7g,Flower (packaged quarter - each),Each,1.0,60.00,60.00,2022-03,,Honeyleaf Smalls - indica - 7g,7.0,7g,,,8.571429,7.0,flower,True,Honeyleaf Smalls,Honeyleaf Smalls (Flower (packaged quarter - e...
96201,C10-0000169-LIC,0170804881,active,Consumer,2022-03-10 20:06:15.280000+00:00,2022-03,1,72.49,active,22638676,1A4060300004A9D000001627,"Zombie Cookies Smalls 7g, Ember Valley",Flower (packaged quarter - each),Each,1.0,72.49,72.49,2022-03,,"Zombie Cookies Smalls 7g, Ember Valley",7.0,7g,,,10.355714,7.0,flower,False,"Zombie Cookies Smalls 7g, Ember Valley","Zombie Cookies Smalls 7g, Ember Valley"


In [None]:
###
#mba.company_costs_df.groupby(['product_category_name','shipped_unit_of_measure']).count()

In [None]:
###
#mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == 'Concentrate']

In [None]:
pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)')&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
/ mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)')].shape[0]])





In [None]:

mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)'].extracted_units.value_counts(normalize = False)

In [None]:
mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Capsule (weight - each)')]



In [None]:
##
df = pd.DataFrame()
for p in mba.company_sales_df['tx_product_category_name'].unique():
    print(p)
    df_temp = mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == p].extracted_units.value_counts(normalize = True).to_frame().sort_values(by ='extracted_units',ascending = False )
    df_temp['category'] = p
    df = df.append(df_temp)




In [None]:
##
df_coverage = pd.DataFrame()
for p in mba.company_sales_df['tx_product_category_name'].unique():
    print(p)
    df_temp = pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
    / mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)].shape[0]])
    df_temp['category'] = p
    df_coverage = df_coverage.append(df_temp)




In [None]:
df_coverage

In [None]:
#df.to_csv('dominant_unit.csv')

In [None]:
mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Concentrate (Each)')&(mba.company_sales_df['extracted_units'].isna())]



In [None]:
###
mba.company_sales_df[(mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)')&(mba.company_sales_df['extracted_units'].notna())][['adjusted_tx_price_per_unit','sales_month']].groupby(['sales_month']).median()



In [None]:
mba.company_sales_df.adjusted_tx_price_per_unit.isna().sum()

In [None]:
mba.company_sales_df.adjusted_tx_price_per_unit.shape

In [None]:
temp = mba.company_sales_df.copy()

In [None]:
(temp.letter_gram_measure_from_product_name.isna() == False).sum()

In [None]:
temp.head()

In [None]:
((temp.tx_unit_of_measure == 'Each') & (temp.extracted_units.isna() == True) & (temp.measurement_ratio_vs_gram == 1)).sum()

In [None]:
## Coverage %
1 - (299024/1405912)

In [None]:
mba.run_analysis('tx_product_category_name', 'Pre-Roll Flower', 'S')

In [None]:
mba.run_analysis('product_category_name', 'Pre-Roll Flower', 'C')

In [None]:
mba.run_analysis('combined_product_category', 'flower', 'S')

In [None]:
mba.run_analysis('combined_product_category', 'flower', 'C')

In [None]:
###
#mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)'][['adjusted_tx_price_per_unit','sales_month']].groupby(['sales_month']).mean()



In [None]:
combined_median_price = mba.company_sales_df[mba.company_sales_df.combined_product_category == 'flower'].adjusted_tx_price_per_unit.median()

eighth_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged eighth - each)'].tx_price_per_unit.median()
quarter_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)'].tx_price_per_unit.median()
half_once_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged half ounce - each)'].tx_price_per_unit.median()
once_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged ounce - each)'].tx_price_per_unit.median()

In [None]:
combined_median_price_flower_non_labeled = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower'].adjusted_tx_price_per_unit.median()


In [None]:
combined_median_price

In [None]:
combined_median_price_flower_non_labeled

In [None]:
eighth_median_price / 3.5

In [None]:
quarter_median_price / 7

In [None]:
half_once_median_price / 14

In [None]:
once_median_price / 28

In [None]:
mba.company_sales_df[mba.company_sales_df.tx_product_category_name.str.contains('Flower', case = False)][['tx_price_per_unit', 'adjusted_tx_price_per_unit', 'tx_product_category_name']].groupby('tx_product_category_name').describe()

We see that adjusted_price is overall on a smaller scale than tx_price_per_unit due to units being converted down to grams. 

In [None]:
mba.output_time_series_metadata()

In [None]:
mba.save()

In [None]:
import pickle
picklefile = open('msrp_band_analyzer_training_object', 'rb')
old_mba = pickle.load(picklefile)

In [None]:
old_mba['company_sales_df'].head()

In [None]:
new_mba = msrp_band_analyzer.MSRPBand(company_costs_df = old_mba['company_costs_df'], company_sales_df = old_mba['company_sales_df'])

In [None]:
new_mba.company_sales_df.head()

In [None]:
mba.msrp_summary_table_by_time

# Testing

In [None]:
from underwriting import msrp_band_analyzer_testing


In [None]:
result = msrp_band_analyzer_testing.run_test_set_analysis('product_category_name', 'Flower', 'C')

In [None]:
result[0]

In [None]:
result[1]

In [None]:
result = run_test_set_analysis('tx_product_category_name', 'Pre-Roll Flower', 'S')