In [1]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys
from tqdm import tqdm
import seaborn as sns
from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util
from underwriting import msrp_band_analyzer

%load_ext autoreload
%autoreload 2



In [2]:
COMPANY_IDENTIFIER_LIST = [
    'DL',
    'DW',
    'EMA',
    'EMM',
    'EMT',
    'EMF',
    'ST',
    'GRG',
    'EL',
    'VS',
]

In [3]:
mba = msrp_band_analyzer.MSRPBand()

In [4]:
mba.update_company_data(COMPANY_IDENTIFIER_LIST)


Verifying download summaries for license 402R-00545...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-05

Verifying download summaries for license C9-0000323-LIC...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-05

Verifying download summaries for license C9-0000370-LIC...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-05

Verifying download summaries for license 402-00840...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-05

Verifying download summaries for license MR282376...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-04
Found bad download summary for license MR282376 on date 2022-03-31
Found bad download summary for license MR282376 on date 2022-03-24

Verifying download summaries for license MR283369...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-04
Found bad download summary for license MR283369 on date 2022-03-10
Found bad downloa

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['extracted_units'][idx] = df[measure_column][idx].apply(mba_util.EXTRACTED_MEASUREMENT_COLUMNS[measure_column])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['extracted_units'][idx] = df[measure_column][idx].apply(mba_util.EXTRACTED_MEASUREMENT_COLUMNS[measure_column])


### Combining same product categories with different measurements ### 	


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][includes_measurement] = self.unit_conversion_ratio(measurement, measurement_unit)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][includes_measurement] = self.unit_conversion_ratio(measurement, measurement_unit)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][extracted_units_idx] = df['extracted_units'][extracted_units_idx]
A value is trying to be set on a copy of a slice from a 

### Breaking down product names into different brands ### 	


  (df[product_name].str.contains("^(1 ml)") == False) &
  (df[product_name].str.contains("^(1 ml)") == False) &


### Available combined product category name in costs dataframe ### 	
['buds' 'other concentrate' 'edible' 'flower' 'concentrate'
 'vape cartridge' 'shake/trim' 'infused' 'pre-roll infused' 'extract'
 'pre-roll flower' 'pre-roll leaf' 'topical' 'tincture' 'capsule' 'shake'
 'wet whole plants' 'vape product' 'raw pre-rolls' 'kief' 'seeds'
 'shake/trim allocated for extraction' 'infused butter/oil'
 'infused pre-rolls' 'infused beverage' 'clone - cutting' 'immature plant']
### Available combined product category name in sales dataframe ### 	
['other concentrate' 'buds' 'shake/trim' 'pre-roll flower' 'edible'
 'flower' 'concentrate' 'infused' 'extract' 'vape cartridge'
 'pre-roll infused' 'pre-roll leaf' 'topical' 'immature plants' 'capsule'
 'tincture' 'shake' 'vape product' 'raw pre-rolls' 'kief'
 'shake/trim allocated for extraction' 'infused butter/oil'
 'infused pre-rolls' 'seeds' 'infused beverage' 'clone - cutting']


In [5]:
###
df_sales = mba.company_sales_df.copy()

In [6]:
df_sales.head()

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2320571,C10-0000169-LIC,5181809,inactive,Consumer,2020-01-02 08:16:34+00:00,2020-01,2,33.76,inactive,1782399,1A4060300004D62000000167,VVS - Vape - 0.3g - Gold - Hybrid - GSC,Other Concentrate (weight - each),Each,1.0,16.88,16.88,2020-01,,0.3,0.3g,,,56.266667,0.3,other concentrate,True,VVS,VVS (Other Concentrate (weight - each))
2300482,402-00840,52954312,inactive,Patient,2020-01-22 13:43:28+00:00,2020-01,3,209.24,inactive,17994538,1A4000500266F2ED00002227,Durban Ghost #2,Buds,Grams,14.0,60.44,4.317143,2020-01,,,,,,4.317143,1.0,buds,False,Durban Ghost #2,Durban Ghost #2
2300483,402R-00545,52954311,inactive,Consumer,2020-01-22 13:43:23+00:00,2020-01,1,10.03,inactive,18017431,1A400031266EE9B000047163,WR Jenny Kush - 2 Pack Ready Roll,Shake/Trim (by strain),Grams,1.0,10.03,10.03,2020-01,,,,,,10.03,1.0,shake/trim,True,WR Jenny Kush,WR Jenny Kush (Shake/Trim (by strain))
2300484,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1782532,1A4060300004D62000000231,Puffy Delivery - Pre-roll - 0.7g - Jack's Poison,Pre-Roll Flower,Each,1.0,8.68,8.68,2020-01,,0.7,0.7g,,,12.4,0.7,pre-roll flower,True,Puffy Delivery,Puffy Delivery (Pre-Roll Flower)
2300485,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1803899,1A4060300004B03000009717,LOL Edibles - Edible - 100mg - Doob Cube - Blu...,Edible (weight - each),Each,1.0,17.22,17.22,2020-01,,0.1,100mg,,,172.2,0.1,edible,True,LOL Edibles,LOL Edibles (Edible (weight - each))


In [7]:
###
df_sales.groupby(['tx_product_category_name','tx_unit_of_measure']).count().sort_values(by = 'tx_unit_of_measure',ascending = False)

Unnamed: 0_level_0,Unnamed: 1_level_0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
tx_product_category_name,tx_unit_of_measure,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Buds,Grams,349284,349284,349284,349284,349284,349284,349284,349284,349284,349284,349284,349284,349284,349284,349284,349284,82881,0,131684,1010,1998,349284,349284,349284,349284,349284,349284
Extract (weight),Grams,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,0,0,1,10,0,11,11,11,11,11,11
Shake/Trim allocated for extraction,Grams,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,0,0,0,0,0,20,20,20,20,20,20
Shake/Trim (by strain),Grams,28416,28416,28416,28416,28416,28416,28416,28416,28416,28416,28416,28416,28416,28416,28416,28416,6007,0,7173,508,0,28416,28416,28416,28416,28416,28416
Shake/Trim,Grams,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,0,0,12,12,0,120,120,120,120,120,120
Raw Pre-Rolls,Grams,172723,172723,172723,172723,172723,172723,172723,172723,172723,172723,172723,172723,172723,172723,172723,172723,138796,0,136921,1131,0,172723,172723,172723,172723,172723,172723
Infused Pre-Rolls,Grams,4078,4078,4078,4078,4078,4078,4078,4078,4078,4078,4078,4078,4078,4078,4078,4078,3458,0,4078,0,0,4078,4078,4078,4078,4078,4078
Flower,Grams,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,38110,9551,0,35075,2,233,38110,38110,38110,38110,38110,38110
Kief,Grams,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,15,0,16,0,0,16,16,16,16,16,16
Concentrate,Grams,115926,115926,115926,115926,115926,115926,115926,115926,115926,115926,115926,115926,115926,115926,115926,115926,1895,0,18720,10980,0,115926,115926,115926,115926,115926,115926


# 1. uom - each:
- Using same UOM (ex. gram): comes in different quantity
- Using different UOM (ex. blend of gram and oz)

In [None]:
## examples

In [None]:
df_sales[(df_sales['tx_product_category_name'] == 'Other Concentrate (weight - each)')&(df_sales['tx_product_name'] == 'Wave - Cartridge - 0.5g - Hybrid - Lemon')]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()





In [None]:
df_sales[(df_sales['tx_product_category_name'] == 'Other Concentrate (weight - each)')&(df_sales['tx_product_name'] == 'Thclear - 1g - Honey Pot - Skywalker OG')]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()





In [None]:
## examples

In [None]:

df_sales[(df_sales['tx_product_category_name'] == 'Edible (volume - each)')&(df_sales['tx_product_name'] == "Uncle Arnie's Iced Tea Lemonade Beverage 8oz Glass")]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()

In [None]:

df_sales[(df_sales['tx_product_category_name'] == 'Edible (volume - each)')&(df_sales['tx_product_name'] == "Hapy - Edible - 100mg - Syrup Shot - Tropical Mix")]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()

# 2. how do we do regex

## 1. remove bad numbers

In [None]:
df_sales[df_sales['bad_numbers_from_product_name'].notna()][['original_product_name','bad_numbers_from_product_name','tx_product_name']].head()

In [None]:
df_sales[df_sales['bad_numbers_from_product_name'].notna()].shape[0] / df_sales.shape[0]

## 2. grab letter grams / gram / oz

In [None]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()][['tx_product_name','letter_gram_measure_from_product_name',]].head()




In [None]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

In [None]:

df_sales[(df_sales['gram_measure_from_product_name'].notna()) & (df_sales['gram_measure_from_product_name'] == 'gram')][['tx_product_name','gram_measure_from_product_name']]\
.head()




In [None]:
df_sales[df_sales['gram_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

In [None]:


df_sales[(df_sales['oz_measure_from_product_name'].notna())][['tx_product_name','oz_measure_from_product_name']]\
.head()




In [None]:
df_sales[df_sales['oz_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

# 3. combine 

In [None]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()][['tx_product_name','letter_gram_measure_from_product_name','extracted_units']].head()




In [None]:

df_sales[(df_sales['gram_measure_from_product_name'].notna()) & (df_sales['gram_measure_from_product_name'] == 'gram')][['tx_product_name','gram_measure_from_product_name','extracted_units']]\
.head()




In [None]:


df_sales[(df_sales['oz_measure_from_product_name'].notna())]




# 4. coverage

In [None]:
df_sales

In [None]:
##
df_coverage = pd.DataFrame()
for p in df_sales[df_sales['tx_unit_of_measure'] != 'Grams']['tx_product_category_name'].unique():
    print(p)
    df_temp = pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
    / mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)].shape[0]])
    df_temp['category'] = p
    df_coverage = df_coverage.append(df_temp)




# 5. sanity checks

In [None]:
df_sales[df_sales['tx_product_category_name'] == 'Flower (packaged quarter - each)']

In [None]:
###
#mba.company_costs_df.groupby(['product_category_name','shipped_unit_of_measure']).count()

In [None]:
###
#mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == 'Concentrate']

In [None]:
pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)')&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
/ mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)')].shape[0]])





In [None]:

mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)'].extracted_units.value_counts(normalize = False)

In [None]:
mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Capsule (weight - each)')]



In [None]:
##
df = pd.DataFrame()
for p in mba.company_sales_df['tx_product_category_name'].unique():
    print(p)
    df_temp = mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == p].extracted_units.value_counts(normalize = True).to_frame().sort_values(by ='extracted_units',ascending = False )
    df_temp['category'] = p
    df = df.append(df_temp)




In [None]:
##
df_coverage = pd.DataFrame()
for p in mba.company_sales_df['tx_product_category_name'].unique():
    print(p)
    df_temp = pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
    / mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)].shape[0]])
    df_temp['category'] = p
    df_coverage = df_coverage.append(df_temp)




In [None]:
df_coverage

In [None]:
#df.to_csv('dominant_unit.csv')

In [None]:
mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Concentrate (Each)')&(mba.company_sales_df['extracted_units'].isna())]



In [None]:
###
mba.company_sales_df[(mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)')&(mba.company_sales_df['extracted_units'].notna())][['adjusted_tx_price_per_unit','sales_month']].groupby(['sales_month']).median()



In [None]:
mba.company_sales_df.adjusted_tx_price_per_unit.isna().sum()

In [None]:
mba.company_sales_df.adjusted_tx_price_per_unit.shape

In [None]:
temp = mba.company_sales_df.copy()

In [None]:
(temp.letter_gram_measure_from_product_name.isna() == False).sum()

In [None]:
temp.head()

In [None]:
((temp.tx_unit_of_measure == 'Each') & (temp.extracted_units.isna() == True) & (temp.measurement_ratio_vs_gram == 1)).sum()

In [None]:
## Coverage %
1 - (299024/1405912)

In [None]:
mba.run_analysis('tx_product_category_name', 'Pre-Roll Flower', 'S')

In [None]:
mba.run_analysis('product_category_name', 'Pre-Roll Flower', 'C')

In [None]:
mba.run_analysis('combined_product_category', 'flower', 'S')

In [None]:
mba.run_analysis('combined_product_category', 'flower', 'C')

In [None]:
###
#mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)'][['adjusted_tx_price_per_unit','sales_month']].groupby(['sales_month']).mean()



In [None]:
combined_median_price = mba.company_sales_df[mba.company_sales_df.combined_product_category == 'flower'].adjusted_tx_price_per_unit.median()

eighth_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged eighth - each)'].tx_price_per_unit.median()
quarter_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)'].tx_price_per_unit.median()
half_once_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged half ounce - each)'].tx_price_per_unit.median()
once_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged ounce - each)'].tx_price_per_unit.median()

In [None]:
combined_median_price_flower_non_labeled = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower'].adjusted_tx_price_per_unit.median()


In [None]:
combined_median_price

In [None]:
combined_median_price_flower_non_labeled

In [None]:
eighth_median_price / 3.5

In [None]:
quarter_median_price / 7

In [None]:
half_once_median_price / 14

In [None]:
once_median_price / 28

In [None]:
mba.company_sales_df[mba.company_sales_df.tx_product_category_name.str.contains('Flower', case = False)][['tx_price_per_unit', 'adjusted_tx_price_per_unit', 'tx_product_category_name']].groupby('tx_product_category_name').describe()

We see that adjusted_price is overall on a smaller scale than tx_price_per_unit due to units being converted down to grams. 

In [None]:
mba.output_time_series_metadata()

In [None]:
mba.save()

In [None]:
import pickle
picklefile = open('msrp_band_analyzer_training_object', 'rb')
old_mba = pickle.load(picklefile)

In [None]:
old_mba['company_sales_df'].head()

In [None]:
new_mba = msrp_band_analyzer.MSRPBand(company_costs_df = old_mba['company_costs_df'], company_sales_df = old_mba['company_sales_df'])

In [None]:
new_mba.company_sales_df.head()

In [None]:
mba.msrp_summary_table_by_time

# edible

In [19]:
df_sales.head()

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2320571,C10-0000169-LIC,5181809,inactive,Consumer,2020-01-02 08:16:34+00:00,2020-01,2,33.76,inactive,1782399,1A4060300004D62000000167,VVS - Vape - 0.3g - Gold - Hybrid - GSC,Other Concentrate (weight - each),Each,1.0,16.88,16.88,2020-01,,0.3,0.3g,,,56.266667,0.3,other concentrate,True,VVS,VVS (Other Concentrate (weight - each))
2300482,402-00840,52954312,inactive,Patient,2020-01-22 13:43:28+00:00,2020-01,3,209.24,inactive,17994538,1A4000500266F2ED00002227,Durban Ghost #2,Buds,Grams,14.0,60.44,4.317143,2020-01,,,,,,4.317143,1.0,buds,False,Durban Ghost #2,Durban Ghost #2
2300483,402R-00545,52954311,inactive,Consumer,2020-01-22 13:43:23+00:00,2020-01,1,10.03,inactive,18017431,1A400031266EE9B000047163,WR Jenny Kush - 2 Pack Ready Roll,Shake/Trim (by strain),Grams,1.0,10.03,10.03,2020-01,,,,,,10.03,1.0,shake/trim,True,WR Jenny Kush,WR Jenny Kush (Shake/Trim (by strain))
2300484,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1782532,1A4060300004D62000000231,Puffy Delivery - Pre-roll - 0.7g - Jack's Poison,Pre-Roll Flower,Each,1.0,8.68,8.68,2020-01,,0.7,0.7g,,,12.4,0.7,pre-roll flower,True,Puffy Delivery,Puffy Delivery (Pre-Roll Flower)
2300485,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1803899,1A4060300004B03000009717,LOL Edibles - Edible - 100mg - Doob Cube - Blu...,Edible (weight - each),Each,1.0,17.22,17.22,2020-01,,0.1,100mg,,,172.2,0.1,edible,True,LOL Edibles,LOL Edibles (Edible (weight - each))


In [29]:
df_sales['tx_product_category_name'].unique()

array(['Other Concentrate (weight - each)', 'Buds',
       'Shake/Trim (by strain)', 'Pre-Roll Flower',
       'Edible (weight - each)', 'Flower', 'Concentrate',
       'Infused (edible)', 'Extract (weight - each)',
       'Concentrate (Each)', 'Vape Cartridge (volume - each)',
       'Pre-Roll Infused', 'Infused (non-edible)', 'Pre-Roll Leaf',
       'Edible (volume - each)', 'Topical (weight - each)',
       'Immature Plants', 'Capsule (weight - each)', 'Extract (weight)',
       'Flower (packaged eighth - each)', 'Tincture (volume - each)',
       'Flower (packaged quarter - each)',
       'Shake (Packaged Quarter - each)',
       'Vape Cartridge (weight - each)', 'Topical (volume - each)',
       'Flower (packaged gram - each)',
       'Shake (Packaged Half Ounce - each)',
       'Other Concentrate (volume - each)', 'Shake/Trim', 'Vape Product',
       'Raw Pre-Rolls', 'Flower (packaged half ounce - each)', 'Kief',
       'Extract (volume - each)', 'Shake (Packaged Eighth - each)',

In [47]:
'Shake/Trim' in ['Shake/Trim']

True

In [73]:
for p in df_sales['tx_product_category_name'].unique():
    print(p)
    if p in ['Shake/Trim (by strain)','Shake/Trim','Infused Butter/Oil (weight - each)','Infused Butter/Oil (volume - each)',\
            'Shake/Trim allocated for extraction']:
        continue
    else:
        temp = df_sales[df_sales['tx_product_category_name'] == p]
        temp_gb = temp[['letter_gram_measure_from_product_name','tx_price_per_unit']].groupby(['letter_gram_measure_from_product_name']).count().\
        reset_index()['letter_gram_measure_from_product_name'].str.lower()
        #print(temp_gb)
        temp_gb.to_csv(str(p) + '.csv')

Other Concentrate (weight - each)
Buds
Shake/Trim (by strain)
Pre-Roll Flower
Edible (weight - each)
Flower
Concentrate
Infused (edible)
Extract (weight - each)
Concentrate (Each)
Vape Cartridge (volume - each)
Pre-Roll Infused
Infused (non-edible)
Pre-Roll Leaf
Edible (volume - each)
Topical (weight - each)
Immature Plants
Capsule (weight - each)
Extract (weight)
Flower (packaged eighth - each)
Tincture (volume - each)
Flower (packaged quarter - each)
Shake (Packaged Quarter - each)
Vape Cartridge (weight - each)
Topical (volume - each)
Flower (packaged gram - each)
Shake (Packaged Half Ounce - each)
Other Concentrate (volume - each)
Shake/Trim
Vape Product
Raw Pre-Rolls
Flower (packaged half ounce - each)
Kief
Extract (volume - each)
Shake (Packaged Eighth - each)
Flower (packaged ounce - each)
Shake (Packaged Gram - each)
Flower (packaged - each)
Tincture (weight - each)
Shake/Trim allocated for extraction
Infused Butter/Oil (weight - each)
Shake (Packaged Ounce - each)
Infused Pre-

In [123]:

df_sales[df_sales['tx_product_name'] == ' Pacific Stone Preroll 0.5g Sativa Banjo 14-Pack 7.0g (16ct)']

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2228476,C9-0000323-LIC,0017288007,active,Consumer,2020-03-28 17:52:54+00:00,2020-03,5,173.29,active,2976895,1A4060300006D0A000103479,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,43.76,43.76,2020-03,12185,0.5,0.5g,,,87.52,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
2228781,C9-0000323-LIC,0017247788,active,Consumer,2020-03-28 14:08:41+00:00,2020-03,1,39.39,active,2976895,1A4060300006D0A000103479,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,39.39,39.39,2020-03,12185,0.5,0.5g,,,78.78,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
2227845,C9-0000323-LIC,0017400896,active,Consumer,2020-03-29 17:15:56+00:00,2020-03,2,82.79,active,2976895,1A4060300006D0A000103479,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,43.57,43.57,2020-03,12185,0.5,0.5g,,,87.14,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
2227226,C9-0000323-LIC,0017481505,active,Consumer,2020-03-30 16:17:58+00:00,2020-03,2,87.52,active,2976895,1A4060300006D0A000103479,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,43.76,43.76,2020-03,12185,0.5,0.5g,,,87.52,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
2230816,C9-0000323-LIC,0016882846,active,Consumer,2020-03-25 22:51:59+00:00,2020-03,2,61.27,active,2976895,1A4060300006D0A000103479,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,43.76,43.76,2020-03,12185,0.5,0.5g,,,87.52,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834209,C9-0000323-LIC,0136254481,active,Consumer,2021-10-14 18:56:28+00:00,2021-10,2,66.15,active,17399289,1A4060300006D0A000510361,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,37.74,37.74,2021-10,12185,0.5,0.5g,,,75.48,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
826068,C9-0000323-LIC,0136683555,active,Consumer,2021-10-16 12:12:35+00:00,2021-10,2,75.31,active,17399289,1A4060300006D0A000510361,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,31.01,31.01,2021-10,12185,0.5,0.5g,,,62.02,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
834006,C9-0000323-LIC,0136262586,active,Consumer,2021-10-14 19:24:22+00:00,2021-10,3,115.17,active,17399289,1A4060300006D0A000510361,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,37.65,37.65,2021-10,12185,0.5,0.5g,,,75.30,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...
855014,C9-0000323-LIC,0135315780,active,Consumer,2021-10-10 15:04:58+00:00,2021-10,1,44.40,active,17399289,1A4060300006D0A000510361,Pacific Stone Preroll 0.5g Sativa Banjo 14-Pa...,Pre-Roll Flower,Each,1.0,44.40,44.40,2021-10,12185,0.5,0.5g,,,88.80,0.5,pre-roll flower,True,Pacific Stone Preroll 0.5g Sativa Banjo 14,Pacific Stone Preroll 0.5g Sativa Banjo 14 (Pr...


In [118]:
edible = df_sales[(df_sales['tx_product_name'] == 'Concentrate (Each)')&(df_sales['oz_measure_from_product_name'] == '1oz')]
edible

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
1628714,MR282376,0017487226,active,Consumer,2021-02-22 09:42:57.557000+00:00,2021-02,8,440.33,active,1501801,1A40A03000002BF000013152,M: AU: 1oz Tincture RSO Indica 150mg,Concentrate (Each),Each,1.0,45.0,45.0,2021-02,00000788517,28.0,150mg,,1oz,1.607143,28.0,concentrate,False,M: AU: 1oz Tincture RSO Indica 150mg,M: AU: 1oz Tincture RSO Indica 150mg
1619985,MR282376,0017649822,active,Consumer,2021-02-26 14:04:48.180000+00:00,2021-02,3,180.98,active,1501801,1A40A03000002BF000013152,M: AU: 1oz Tincture RSO Indica 150mg,Concentrate (Each),Each,1.0,45.0,45.0,2021-02,00000788517,28.0,150mg,,1oz,1.607143,28.0,concentrate,False,M: AU: 1oz Tincture RSO Indica 150mg,M: AU: 1oz Tincture RSO Indica 150mg
1613369,MR282376,0017739670,active,Consumer,2021-02-28 18:45:51.513000+00:00,2021-02,3,140.00,active,1501801,1A40A03000002BF000013152,M: AU: 1oz Tincture RSO Indica 150mg,Concentrate (Each),Each,1.0,45.0,45.0,2021-02,00000788517,28.0,150mg,,1oz,1.607143,28.0,concentrate,False,M: AU: 1oz Tincture RSO Indica 150mg,M: AU: 1oz Tincture RSO Indica 150mg
1615982,MR282376,0017699160,active,Consumer,2021-02-27 17:31:38.933000+00:00,2021-02,1,45.00,active,1501801,1A40A03000002BF000013152,M: AU: 1oz Tincture RSO Indica 150mg,Concentrate (Each),Each,1.0,45.0,45.0,2021-02,00000788517,28.0,150mg,,1oz,1.607143,28.0,concentrate,False,M: AU: 1oz Tincture RSO Indica 150mg,M: AU: 1oz Tincture RSO Indica 150mg
1627628,MR282376,0017487420,active,Consumer,2021-02-22 17:31:18+00:00,2021-02,7,420.89,active,1501801,1A40A03000002BF000013152,M: AU: 1oz Tincture RSO Indica 150mg,Concentrate (Each),Each,2.0,90.0,45.0,2021-02,00000788517,28.0,150mg,,1oz,1.607143,28.0,concentrate,False,M: AU: 1oz Tincture RSO Indica 150mg,M: AU: 1oz Tincture RSO Indica 150mg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136813,MR282376,0037552498,active,Consumer,2022-03-08 21:14:11.800000+00:00,2022-03,3,84.38,active,3218774,1A40A03000002BF000041158,M: AU: 1oz Tincture RSO Hybrid 150mg,Concentrate (Each),Each,1.0,35.0,35.0,2022-03,00000788629,28.0,150mg,,1oz,1.250000,28.0,concentrate,False,M: AU: 1oz Tincture RSO Hybrid 150mg,M: AU: 1oz Tincture RSO Hybrid 150mg
117968,MR282376,0037801561,active,Consumer,2022-03-12 16:08:59.950000+00:00,2022-03,2,45.00,active,3134269,1A40A03000002BF000033666,M: AU: 1oz Tincture RSO Indica 150mg,Concentrate (Each),Each,1.0,35.0,35.0,2022-03,00000788517,28.0,150mg,,1oz,1.250000,28.0,concentrate,False,M: AU: 1oz Tincture RSO Indica 150mg,M: AU: 1oz Tincture RSO Indica 150mg
125990,MR282376,0037741781,active,Consumer,2022-03-11 11:56:54.417000+00:00,2022-03,2,51.99,active,3134269,1A40A03000002BF000033666,M: AU: 1oz Tincture RSO Indica 150mg,Concentrate (Each),Each,1.0,35.0,35.0,2022-03,00000788517,28.0,150mg,,1oz,1.250000,28.0,concentrate,False,M: AU: 1oz Tincture RSO Indica 150mg,M: AU: 1oz Tincture RSO Indica 150mg
699,MR282376,0039177659,active,Consumer,2022-04-04 19:27:07.800000+00:00,2022-04,1,22.50,active,3218774,1A40A03000002BF000041158,M: AU: 1oz Tincture RSO Hybrid 150mg,Concentrate (Each),Each,1.0,22.5,22.5,2022-04,00000788629,28.0,150mg,,1oz,0.803571,28.0,concentrate,False,M: AU: 1oz Tincture RSO Hybrid 150mg,M: AU: 1oz Tincture RSO Hybrid 150mg


In [162]:
df_sales[df_sales['tx_product_category_name'] == 'Shake/Trim allocated for extraction']\
[['oz_measure_from_product_name','tx_price_per_unit']].groupby(['oz_measure_from_product_name']).count().reset_index()['oz_measure_from_product_name'].str.lower()




Series([], Name: oz_measure_from_product_name, dtype: object)

In [43]:
'','','Infused Butter/Oil (weight - each)','Infused Butter/Oil (volume - each)',\
            'Shake/Trim allocated for extraction'

2300483    Shake/Trim (by strain)
2300471    Shake/Trim (by strain)
2300476    Shake/Trim (by strain)
2300557    Shake/Trim (by strain)
2300519    Shake/Trim (by strain)
                    ...          
14384      Shake/Trim (by strain)
14315      Shake/Trim (by strain)
14036      Shake/Trim (by strain)
14275      Shake/Trim (by strain)
14289      Shake/Trim (by strain)
Name: tx_product_category_name, Length: 28416, dtype: object

In [25]:
edible[edible['letter_gram_measure_from_product_name'] == '.1G']

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
628364,C10-0000169-LIC,0146386635,active,Consumer,2021-11-26 20:34:14.670000+00:00,2021-11,17,177.36,active,19998060,1A4060300005DD3000034709,Herbarium Zerbert .1G Pre-Roll,Pre-Roll Flower,Each,1.0,12.30,12.30,2021-11,,0.1,.1G,,,123.0,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll,Herbarium Zerbert .1G Pre-Roll
611181,C10-0000169-LIC,0147388616,active,Consumer,2021-11-30 21:22:11.120000+00:00,2021-11,1,10.23,active,19998060,1A4060300005DD3000034709,Herbarium Zerbert .1G Pre-Roll,Pre-Roll Flower,Each,1.0,10.23,10.23,2021-11,,0.1,.1G,,,102.3,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll,Herbarium Zerbert .1G Pre-Roll
531231,C10-0000169-LIC,0151448229,inactive,Consumer,2021-12-18 10:41:22.910000+00:00,2021-12,17,209.29,inactive,19998060,1A4060300005DD3000034709,Herbarium Zerbert .1G Pre-Roll,Pre-Roll Flower,Each,1.0,11.50,11.50,2021-12,,0.1,.1G,,,115.0,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll,Herbarium Zerbert .1G Pre-Roll
528151,C10-0000169-LIC,0151555127,inactive,Consumer,2021-12-18 17:24:37.880000+00:00,2021-12,4,46.40,inactive,19998060,1A4060300005DD3000034709,Herbarium Zerbert .1G Pre-Roll,Pre-Roll Flower,Each,1.0,13.44,13.44,2021-12,,0.1,.1G,,,134.4,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll,Herbarium Zerbert .1G Pre-Roll
518894,C10-0000169-LIC,0151974770,inactive,Consumer,2021-12-20 16:29:41.210000+00:00,2021-12,6,129.34,inactive,19998060,1A4060300005DD3000034709,Herbarium Zerbert .1G Pre-Roll,Pre-Roll Flower,Each,1.0,8.50,8.50,2021-12,,0.1,.1G,,,85.0,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll,Herbarium Zerbert .1G Pre-Roll
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127180,C10-0000169-LIC,0170824201,active,Patient,2022-03-10 21:42:56.990000+00:00,2022-03,5,191.90,active,22785354,1A406030000A803000002864,Herbarium Zerbert .1G Pre-Roll 5 Pack,Pre-Roll Flower,Each,1.0,51.26,51.26,2022-03,,0.1,.1G,,,512.6,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll 5 Pack,Herbarium Zerbert .1G Pre-Roll 5 Pack
124622,C10-0000169-LIC,0170969604,active,Consumer,2022-03-11 15:12:36.900000+00:00,2022-03,3,85.93,active,22785354,1A406030000A803000002864,Herbarium Zerbert .1G Pre-Roll 5 Pack,Pre-Roll Flower,Each,1.0,28.64,28.64,2022-03,,0.1,.1G,,,286.4,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll 5 Pack,Herbarium Zerbert .1G Pre-Roll 5 Pack
3938,C10-0000169-LIC,0177503207,active,Consumer,2022-04-04 10:03:59.330000+00:00,2022-04,10,280.05,active,22785354,1A406030000A803000002864,Herbarium Zerbert .1G Pre-Roll 5 Pack,Pre-Roll Flower,Each,1.0,-3.21,-3.21,2022-04,,0.1,.1G,,,-32.1,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll 5 Pack,Herbarium Zerbert .1G Pre-Roll 5 Pack
16211,C10-0000169-LIC,0176929107,active,Patient,2022-04-01 19:27:19.340000+00:00,2022-04,7,307.04,active,22785354,1A406030000A803000002864,Herbarium Zerbert .1G Pre-Roll 5 Pack,Pre-Roll Flower,Each,1.0,51.26,51.26,2022-04,,0.1,.1G,,,512.6,0.1,pre-roll flower,False,Herbarium Zerbert .1G Pre-Roll 5 Pack,Herbarium Zerbert .1G Pre-Roll 5 Pack


In [23]:
edible[['gram_measure_from_product_name','tx_price_per_unit']].groupby(['gram_measure_from_product_name']).count()




Unnamed: 0_level_0,tx_price_per_unit
gram_measure_from_product_name,Unnamed: 1_level_1
Gram,2066
Half Gram,17
gram,1333


In [24]:
edible[['oz_measure_from_product_name','tx_price_per_unit']].groupby(['oz_measure_from_product_name']).count()




Unnamed: 0_level_0,tx_price_per_unit
oz_measure_from_product_name,Unnamed: 1_level_1
.09oz,65


# Testing

In [None]:
from underwriting import msrp_band_analyzer_testing


In [None]:
result = msrp_band_analyzer_testing.run_test_set_analysis('product_category_name', 'Flower', 'C')

In [None]:
result[0]

In [None]:
result[1]

In [None]:
result = run_test_set_analysis('tx_product_category_name', 'Pre-Roll Flower', 'S')