In [1]:
import json
import numpy 
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import pyarrow
import sys
from tqdm import tqdm
import seaborn as sns
from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
from os import path
from typing import List,Dict, Tuple
from collections import defaultdict
pd.set_option("display.max_columns", None)

load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')
engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))

sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))
sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts")))

import create_queries
import prepare_data

from bespoke.inventory.analysis.shared import download_util, inventory_types
from bespoke.inventory.analysis import active_inventory_util as util
from bespoke.inventory.analysis import inventory_valuations_util as valuations_util
from underwriting import msrp_band_analyzer

%load_ext autoreload
%autoreload 2



In [2]:
COMPANY_IDENTIFIER_LIST = [
    'DL',
    'DW',
    'EMA',
    'EMM',
    'EMT',
    'EMF',
    'ST',
    'GRG',
    'EL',
    'VS',
]

In [3]:
mba = msrp_band_analyzer.MSRPBand()

In [4]:
mba.update_company_data(COMPANY_IDENTIFIER_LIST)


Verifying download summaries for license MR282376...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-06
Found bad download summary for license MR282376 on date 2022-03-31
Found bad download summary for license MR282376 on date 2022-03-24

Verifying download summaries for license MR283369...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-06
Found bad download summary for license MR283369 on date 2022-03-10
Found bad download summary for license MR283369 on date 2022-03-05

Verifying download summaries for license 402-00840...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-06

Verifying download summaries for license 403-01293...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-06

Verifying download summaries for license 402R-00545...
Earliest download summary: 2020-01-01
Latest download summary: 2022-04-06

Verifying download summaries for license 403R-01201...
Earliest download summary: 2020-0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['count_measure_from_product_name'][count_measure_non_na_index] = df['count_measure_from_product_name'][count_measure_non_na_index].apply(mba_util.extract_count_units)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['extracted_units'][idx] = df[measure_column][idx].apply(mba_util.EXTRACTED_MEASUREMENT_COLUMNS[measure_column])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['count_measure_from_product_name'][count_measure_non_na_index] = df['count_measure_from_

### Combining same product categories with different measurements ### 	


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][includes_measurement] = self.unit_conversion_ratio(measurement, measurement_unit)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][includes_measurement] = self.unit_conversion_ratio(measurement, measurement_unit)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_ratio_column_name][extracted_units_idx] = df['extracted_units'][extracted_units_idx]
A value is trying to be set on a copy of a slice from a 

### Breaking down product names into different brands ### 	


  (df[product_name].str.contains("^(1 ml)") == False) &
  (df[product_name].str.contains("^(1 ml)") == False) &


### Available combined product category name in costs dataframe ### 	
['concentrate' 'flower' 'edible' 'extract' 'pre-roll infused' 'shake/trim'
 'infused' 'vape cartridge' 'other concentrate' 'buds' 'pre-roll leaf'
 'pre-roll flower' 'topical' 'tincture' 'capsule' 'shake'
 'wet whole plants' 'vape product' 'raw pre-rolls' 'seeds'
 'infused butter/oil' 'infused pre-rolls' 'kief' 'infused beverage'
 'shake/trim allocated for extraction' 'clone - cutting']
### Available combined product category name in sales dataframe ### 	
['other concentrate' 'edible' 'flower' 'pre-roll flower' 'buds'
 'concentrate' 'infused' 'shake/trim' 'extract' 'vape cartridge'
 'pre-roll infused' 'pre-roll leaf' 'topical' 'immature plants' 'capsule'
 'tincture' 'shake' 'vape product' 'raw pre-rolls'
 'shake/trim allocated for extraction' 'infused pre-rolls'
 'infused butter/oil' 'seeds' 'infused beverage' 'kief' 'clone - cutting']


In [5]:
###
df_sales = mba.company_sales_df.copy()

In [6]:
df_sales.head()

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,count_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2326073,C10-0000169-LIC,5181809,inactive,Consumer,2020-01-02 08:16:34+00:00,2020-01,2,33.76,inactive,1782428,1A4060300004D62000000186,VVS - Vape - 0.3g - Rose Gold - Indica - XXX OG,Other Concentrate (weight - each),Each,1.0,16.88,16.88,2020-01,,0.3,0.3g,,,,56.266667,0.3,other concentrate,True,VVS,VVS (Other Concentrate (weight - each))
2305986,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1803899,1A4060300004B03000009717,LOL Edibles - Edible - 100mg - Doob Cube - Blu...,Edible (weight - each),Each,1.0,17.22,17.22,2020-01,,0.1,100mg,,,,172.2,0.1,edible,True,LOL Edibles,LOL Edibles (Edible (weight - each))
2305987,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1804087,1A4060300004B03000009761,LOL Edibles - Edible - 100mg - Doob Cube - Ras...,Edible (weight - each),Each,1.0,1.0,1.0,2020-01,,0.1,100mg,,,,10.0,0.1,edible,True,LOL Edibles,LOL Edibles (Edible (weight - each))
2305988,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,2111948,1A4060300004B03000010682,THC Design - Flower - 3.5g - Sativa - Crescendo,Flower,Grams,3.5,1.0,0.285714,2020-01,,,3.5g,,,,0.285714,1.0,flower,True,THC Design,THC Design (Flower)
2305989,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1782339,1A4060300004D62000000057,Puffy Delivery - Pre Roll - 0.8g - Sativa - Fa...,Pre-Roll Flower,Each,1.0,8.68,8.68,2020-01,,0.8,0.8g,,,,10.85,0.8,pre-roll flower,True,Puffy Delivery,Puffy Delivery (Pre-Roll Flower)


In [None]:
###
df_sales.groupby(['tx_product_category_name','tx_unit_of_measure']).count().sort_values(by = 'tx_unit_of_measure',ascending = False)

# 1. uom - each:
- Using same UOM (ex. gram): comes in different quantity
- Using different UOM (ex. blend of gram and oz)

In [None]:
## examples

In [None]:
df_sales[(df_sales['tx_product_category_name'] == 'Other Concentrate (weight - each)')&(df_sales['tx_product_name'] == 'Wave - Cartridge - 0.5g - Hybrid - Lemon')]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()





In [None]:
df_sales[(df_sales['tx_product_category_name'] == 'Other Concentrate (weight - each)')&(df_sales['tx_product_name'] == 'Thclear - 1g - Honey Pot - Skywalker OG')]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()





In [None]:
## examples

In [None]:

df_sales[(df_sales['tx_product_category_name'] == 'Edible (volume - each)')&(df_sales['tx_product_name'] == "Uncle Arnie's Iced Tea Lemonade Beverage 8oz Glass")]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()

In [None]:

df_sales[(df_sales['tx_product_category_name'] == 'Edible (volume - each)')&(df_sales['tx_product_name'] == "Hapy - Edible - 100mg - Syrup Shot - Tropical Mix")]\
[['tx_product_category_name','tx_product_name','tx_unit_of_measure','tx_quantity_sold']].head()

# 2. how do we do regex

## 1. remove bad numbers

In [None]:
df_sales[df_sales['bad_numbers_from_product_name'].notna()][['original_product_name','bad_numbers_from_product_name','tx_product_name']].head()

In [None]:
df_sales[df_sales['bad_numbers_from_product_name'].notna()].shape[0] / df_sales.shape[0]

## 2. grab letter grams / gram / oz

In [None]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()][['tx_product_name','letter_gram_measure_from_product_name',]].head()




In [None]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

In [None]:

df_sales[(df_sales['gram_measure_from_product_name'].notna()) & (df_sales['gram_measure_from_product_name'] == 'gram')][['tx_product_name','gram_measure_from_product_name']]\
.head()




In [None]:
df_sales[df_sales['gram_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

In [None]:


df_sales[(df_sales['oz_measure_from_product_name'].notna())][['tx_product_name','oz_measure_from_product_name']]\
.head()




In [None]:
df_sales[df_sales['oz_measure_from_product_name'].notna()].shape[0] / df_sales.shape[0]

# 3. combine 

In [None]:
df_sales[df_sales['letter_gram_measure_from_product_name'].notna()][['tx_product_name','letter_gram_measure_from_product_name','extracted_units']].head()




In [None]:

df_sales[(df_sales['gram_measure_from_product_name'].notna()) & (df_sales['gram_measure_from_product_name'] == 'gram')][['tx_product_name','gram_measure_from_product_name','extracted_units']]\
.head()




In [None]:


df_sales[(df_sales['oz_measure_from_product_name'].notna())]




# 4. coverage

In [None]:
df_sales

In [None]:
##
df_coverage = pd.DataFrame()
for p in df_sales[df_sales['tx_unit_of_measure'] != 'Grams']['tx_product_category_name'].unique():
    print(p)
    df_temp = pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
    / mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)].shape[0]])
    df_temp['category'] = p
    df_coverage = df_coverage.append(df_temp)




# 5. sanity checks

In [None]:
df_sales[df_sales['tx_product_category_name'] == 'Flower (packaged quarter - each)']

In [None]:
###
#mba.company_costs_df.groupby(['product_category_name','shipped_unit_of_measure']).count()

In [None]:
###
#mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == 'Concentrate']

In [None]:
pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)')&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
/ mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)')].shape[0]])





In [None]:

mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == 'Flower (packaged quarter - each)'].extracted_units.value_counts(normalize = False)

In [None]:
mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Capsule (weight - each)')]



In [None]:
##
df = pd.DataFrame()
for p in mba.company_sales_df['tx_product_category_name'].unique():
    print(p)
    df_temp = mba.company_sales_df[mba.company_sales_df['tx_product_category_name'] == p].extracted_units.value_counts(normalize = True).to_frame().sort_values(by ='extracted_units',ascending = False )
    df_temp['category'] = p
    df = df.append(df_temp)




In [None]:
##
df_coverage = pd.DataFrame()
for p in mba.company_sales_df['tx_product_category_name'].unique():
    print(p)
    df_temp = pd.DataFrame([mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)&(mba.company_sales_df['extracted_units'].notna())].shape[0]\
    / mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == p)].shape[0]])
    df_temp['category'] = p
    df_coverage = df_coverage.append(df_temp)




In [None]:
df_coverage

In [None]:
#df.to_csv('dominant_unit.csv')

In [None]:
mba.company_sales_df[(mba.company_sales_df['tx_product_category_name'] == 'Concentrate (Each)')&(mba.company_sales_df['extracted_units'].isna())]



In [None]:
###
mba.company_sales_df[(mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)')&(mba.company_sales_df['extracted_units'].notna())][['adjusted_tx_price_per_unit','sales_month']].groupby(['sales_month']).median()



In [None]:
mba.company_sales_df.adjusted_tx_price_per_unit.isna().sum()

In [None]:
mba.company_sales_df.adjusted_tx_price_per_unit.shape

In [None]:
temp = mba.company_sales_df.copy()

In [None]:
(temp.letter_gram_measure_from_product_name.isna() == False).sum()

In [None]:
temp.head()

In [None]:
((temp.tx_unit_of_measure == 'Each') & (temp.extracted_units.isna() == True) & (temp.measurement_ratio_vs_gram == 1)).sum()

In [None]:
## Coverage %
1 - (299024/1405912)

In [None]:
mba.run_analysis('tx_product_category_name', 'Pre-Roll Flower', 'S')

In [None]:
mba.run_analysis('product_category_name', 'Pre-Roll Flower', 'C')

In [None]:
mba.run_analysis('combined_product_category', 'flower', 'S')

In [None]:
mba.run_analysis('combined_product_category', 'flower', 'C')

In [None]:
###
#mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)'][['adjusted_tx_price_per_unit','sales_month']].groupby(['sales_month']).mean()



In [None]:
combined_median_price = mba.company_sales_df[mba.company_sales_df.combined_product_category == 'flower'].adjusted_tx_price_per_unit.median()

eighth_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged eighth - each)'].tx_price_per_unit.median()
quarter_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged quarter - each)'].tx_price_per_unit.median()
half_once_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged half ounce - each)'].tx_price_per_unit.median()
once_median_price = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower (packaged ounce - each)'].tx_price_per_unit.median()

In [None]:
combined_median_price_flower_non_labeled = mba.company_sales_df[mba.company_sales_df.tx_product_category_name == 'Flower'].adjusted_tx_price_per_unit.median()


In [None]:
combined_median_price

In [None]:
combined_median_price_flower_non_labeled

In [None]:
eighth_median_price / 3.5

In [None]:
quarter_median_price / 7

In [None]:
half_once_median_price / 14

In [None]:
once_median_price / 28

In [None]:
mba.company_sales_df[mba.company_sales_df.tx_product_category_name.str.contains('Flower', case = False)][['tx_price_per_unit', 'adjusted_tx_price_per_unit', 'tx_product_category_name']].groupby('tx_product_category_name').describe()

We see that adjusted_price is overall on a smaller scale than tx_price_per_unit due to units being converted down to grams. 

In [None]:
mba.output_time_series_metadata()

In [None]:
mba.save()

In [None]:
import pickle
picklefile = open('msrp_band_analyzer_training_object', 'rb')
old_mba = pickle.load(picklefile)

In [None]:
old_mba['company_sales_df'].head()

In [None]:
new_mba = msrp_band_analyzer.MSRPBand(company_costs_df = old_mba['company_costs_df'], company_sales_df = old_mba['company_sales_df'])

In [None]:
new_mba.company_sales_df.head()

In [None]:
mba.msrp_summary_table_by_time

# edible

In [7]:
df_sales.head()

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,count_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2326073,C10-0000169-LIC,5181809,inactive,Consumer,2020-01-02 08:16:34+00:00,2020-01,2,33.76,inactive,1782428,1A4060300004D62000000186,VVS - Vape - 0.3g - Rose Gold - Indica - XXX OG,Other Concentrate (weight - each),Each,1.0,16.88,16.88,2020-01,,0.3,0.3g,,,,56.266667,0.3,other concentrate,True,VVS,VVS (Other Concentrate (weight - each))
2305986,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1803899,1A4060300004B03000009717,LOL Edibles - Edible - 100mg - Doob Cube - Blu...,Edible (weight - each),Each,1.0,17.22,17.22,2020-01,,0.1,100mg,,,,172.2,0.1,edible,True,LOL Edibles,LOL Edibles (Edible (weight - each))
2305987,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1804087,1A4060300004B03000009761,LOL Edibles - Edible - 100mg - Doob Cube - Ras...,Edible (weight - each),Each,1.0,1.0,1.0,2020-01,,0.1,100mg,,,,10.0,0.1,edible,True,LOL Edibles,LOL Edibles (Edible (weight - each))
2305988,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,2111948,1A4060300004B03000010682,THC Design - Flower - 3.5g - Sativa - Crescendo,Flower,Grams,3.5,1.0,0.285714,2020-01,,,3.5g,,,,0.285714,1.0,flower,True,THC Design,THC Design (Flower)
2305989,C10-0000169-LIC,7521891,inactive,Consumer,2020-01-22 13:41:39+00:00,2020-01,5,36.58,inactive,1782339,1A4060300004D62000000057,Puffy Delivery - Pre Roll - 0.8g - Sativa - Fa...,Pre-Roll Flower,Each,1.0,8.68,8.68,2020-01,,0.8,0.8g,,,,10.85,0.8,pre-roll flower,True,Puffy Delivery,Puffy Delivery (Pre-Roll Flower)


In [None]:
#df_sales[df_sales['count_measure_from_product_name'].notna()]['tx_product_category_name'].unique()

In [None]:
# for p in df_sales['tx_product_category_name'].unique():
#     print(p)
#     if p in ['Shake/Trim (by strain)','Shake/Trim','Infused Butter/Oil (weight - each)','Infused Butter/Oil (volume - each)',\
#             'Shake/Trim allocated for extraction']:
#         continue
#     else:
#         temp = df_sales[df_sales['tx_product_category_name'] == p]
#         temp_gb = temp[['letter_gram_measure_from_product_name','tx_price_per_unit']].groupby(['letter_gram_measure_from_product_name']).count().\
#         reset_index()['letter_gram_measure_from_product_name'].str.lower()
#         #print(temp_gb)
#         temp_gb.to_csv(str(p) + '.csv')

In [245]:
data= df_sales[df_sales['tx_product_category_name'] == 'Pre-Roll Infused']




In [246]:
data.shape[0]

116274

In [247]:
#letter gram
data\
[['letter_gram_measure_from_product_name','tx_price_per_unit']].groupby(['letter_gram_measure_from_product_name']).count().reset_index()



Unnamed: 0,letter_gram_measure_from_product_name,tx_price_per_unit
0,.5 g,47
1,.5G,19
2,.5g,6076
3,.7 g,49
4,.7g,682
5,0.35g,183
6,0.3g,3
7,0.59g,56
8,0.5G,86
9,0.5g,15935


In [248]:
#oz
data\
[['oz_measure_from_product_name','tx_price_per_unit']].groupby(['oz_measure_from_product_name']).count().reset_index()



Unnamed: 0,oz_measure_from_product_name,tx_price_per_unit


In [249]:
# count/piece
data\
[['count_measure_from_product_name','tx_price_per_unit']].groupby(['count_measure_from_product_name']).count().reset_index()



Unnamed: 0,count_measure_from_product_name,tx_price_per_unit
0,2.0,129
1,3.0,22
2,4.0,700
3,5.0,787
4,6.0,1241
5,7.0,149
6,12.0,662
7,15.0,231
8,16.0,149
9,20.0,406


In [251]:
data\
[['letter_gram_measure_from_product_name','count_measure_from_product_name','tx_price_per_unit']].groupby(['letter_gram_measure_from_product_name','count_measure_from_product_name']).count().reset_index()



Unnamed: 0,letter_gram_measure_from_product_name,count_measure_from_product_name,tx_price_per_unit
0,.5g,3.0,13
1,.5g,5.0,773
2,.5g,6.0,288
3,.5g,12.0,83
4,0.5g,2.0,6
5,0.5g,5.0,14
6,0.5g,6.0,953
7,0.5g,15.0,231
8,0.5g,16.0,149
9,0.5g,20.0,406


In [252]:
data\
[['letter_gram_measure_from_product_name','oz_measure_from_product_name','tx_price_per_unit']].groupby(['letter_gram_measure_from_product_name','oz_measure_from_product_name']).count().reset_index()



Unnamed: 0,letter_gram_measure_from_product_name,oz_measure_from_product_name,tx_price_per_unit


In [253]:
data\
[['count_measure_from_product_name','oz_measure_from_product_name','tx_price_per_unit']].groupby(['count_measure_from_product_name','oz_measure_from_product_name']).count().reset_index()



Unnamed: 0,count_measure_from_product_name,oz_measure_from_product_name,tx_price_per_unit


In [254]:
data[data['oz_measure_from_product_name'].notna()]['tx_product_name'].unique()




array([], dtype=object)

In [255]:
data[data['count_measure_from_product_name'].notna()]['tx_product_name'].unique()




array([' Space Coyote x Utopia Infused Preroll 1.0g Indica (24ct)',
       ' Space Coyote x Utopia Infused Preroll 1.0g Sativa (24ct)',
       'Highrize Venom OG HI 4pk', 'Highrize 12pk mimosa',
       'High Rize HI 12pk Tins Moon Drop',
       'highrize 12pk infused wedding cake',
       'Highrize Hash Infused Cherry AK 4pk',
       'Highrize Hash Infused 12pk Cookies',
       'Highrize Hash Infused 12pk White Widow',
       'Highrize Hash Infused 12pk Skunk Dawg',
       ' Garden Society Infused Preroll Rosettes Sativa Sample Case (2ct)',
       'Highrize Peanut Butter Breath HI 4pk Tube',
       'Highrize Peanut Butter Breath HI 12pk Tins',
       'CLSICS Peanut Butter Breath x Ice Queen Rosin (2.8g) Infused 4ct Pre-roll',
       'CLSICS Gorilla Glue x Oak OG Rosin (2.8g) Infused 4ct Pre-roll',
       ' Garden Society Infused Preroll Rosettes .5g Hash Indica 4-Pack 2.0g (12ct)',
       ' Space Coyote Infused Preroll 0.5g Sativa Hash 5-Pack (15ct)',
       ' Space Coyote Infused Prer

In [266]:
data[data['letter_gram_measure_from_product_name'] == '3.5g']['tx_price_per_unit'].describe()



count    5104.00000
mean       33.30683
std         9.81491
min         0.36000
25%        25.85000
50%        31.97000
75%        41.74000
max        55.44000
Name: tx_price_per_unit, dtype: float64

In [259]:
data[data['count_measure_from_product_name'].isna()]['tx_price_per_unit'].describe()



count    111248.000000
mean         19.422700
std          11.298787
min           0.011667
25%          10.930000
50%          17.060000
75%          26.020000
max         202.000000
Name: tx_price_per_unit, dtype: float64

In [261]:
data[data['count_measure_from_product_name'] == 20]

Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,count_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
765962,C10-0000774-LIC,0139964956,active,Consumer,2021-10-30 13:22:49+00:00,2021-10,5,82.92,active,18939303,1A4060300006D0A000556216,Glass House Farms x F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,42.82,42.82,2021-10,17300,0.5,0.5g,20.0,,,85.64,0.5,pre-roll infused,True,Glass House Farms x F/ELD Infused Preroll 0.5g...,Glass House Farms x F/ELD Infused Preroll 0.5g...
661420,C10-0000786-LIC,0145132227,active,Consumer,2021-11-21 15:44:30+00:00,2021-11,1,37.67,active,19696524,1A4060300006D0A000580574,Glass House Farms X F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,37.67,37.67,2021-11,17701,0.5,0.5g,20.0,,,75.34,0.5,pre-roll infused,True,Glass House Farms X F/ELD Infused Preroll 0.5g...,Glass House Farms X F/ELD Infused Preroll 0.5g...
660858,C10-0000786-LIC,0145153975,active,Consumer,2021-11-21 17:07:24+00:00,2021-11,1,29.30,active,19696521,1A4060300006D0A000580572,Glass House Farms x F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,29.30,29.30,2021-11,17301,0.5,0.5g,20.0,,,58.60,0.5,pre-roll infused,True,Glass House Farms x F/ELD Infused Preroll 0.5g...,Glass House Farms x F/ELD Infused Preroll 0.5g...
662191,C10-0000786-LIC,0145101882,active,Consumer,2021-11-21 13:42:00+00:00,2021-11,7,279.62,active,19696523,1A4060300006D0A000580573,Glass House Farms X F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,38.42,38.42,2021-11,17700,0.5,0.5g,20.0,,,76.84,0.5,pre-roll infused,True,Glass House Farms X F/ELD Infused Preroll 0.5g...,Glass House Farms X F/ELD Infused Preroll 0.5g...
662195,C10-0000786-LIC,0145101882,active,Consumer,2021-11-21 13:42:00+00:00,2021-11,7,279.62,active,19696521,1A4060300006D0A000580572,Glass House Farms x F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,38.42,38.42,2021-11,17301,0.5,0.5g,20.0,,,76.84,0.5,pre-roll infused,True,Glass House Farms x F/ELD Infused Preroll 0.5g...,Glass House Farms x F/ELD Infused Preroll 0.5g...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16587,C10-0000774-LIC,0177153297,active,Consumer,2022-04-02 17:17:09+00:00,2022-04,6,186.77,active,23496094,1A4060300006D0A000659171,Glass House Farms x F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,37.72,37.72,2022-04,17301,0.5,0.5g,20.0,,,75.44,0.5,pre-roll infused,True,Glass House Farms x F/ELD Infused Preroll 0.5g...,Glass House Farms x F/ELD Infused Preroll 0.5g...
14311,C10-0000939-LIC,0177270517,active,Consumer,2022-04-03 09:13:18+00:00,2022-04,1,38.22,active,21955914,1A4060300006D0A000633868,Glass House Farms x F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,38.22,38.22,2022-04,17301,0.5,0.5g,20.0,,,76.44,0.5,pre-roll infused,True,Glass House Farms x F/ELD Infused Preroll 0.5g...,Glass House Farms x F/ELD Infused Preroll 0.5g...
19288,C10-0000786-LIC,0177060882,active,Consumer,2022-04-02 11:51:45+00:00,2022-04,13,370.48,active,21664559,1A4060300006D0A000626041,Glass House Farms x F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,38.80,38.80,2022-04,17937,0.5,0.5g,20.0,,,77.60,0.5,pre-roll infused,True,Glass House Farms x F/ELD Infused Preroll 0.5g...,Glass House Farms x F/ELD Infused Preroll 0.5g...
19342,C10-0000774-LIC,0177058740,active,Consumer,2022-04-02 11:43:24+00:00,2022-04,2,71.22,active,23496094,1A4060300006D0A000659171,Glass House Farms x F/ELD Infused Preroll 0.5...,Pre-Roll Infused,Each,1.0,37.72,37.72,2022-04,17301,0.5,0.5g,20.0,,,75.44,0.5,pre-roll infused,True,Glass House Farms x F/ELD Infused Preroll 0.5g...,Glass House Farms x F/ELD Infused Preroll 0.5g...


In [207]:
data[data['tx_product_name'] == ' Raw Garden Cartridge 1.0g Indica Fire Walker (10ct)']



Unnamed: 0,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,sales_month,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,tx_price_per_unit,date_in_month,bad_numbers_from_product_name,extracted_units,letter_gram_measure_from_product_name,count_measure_from_product_name,gram_measure_from_product_name,oz_measure_from_product_name,adjusted_tx_price_per_unit,measurement_ratio_vs_gram,combined_product_category,brand_breakable_by_dash_boolean,brands,brands_by_category
2304244,C9-0000323-LIC,7718326,active,Consumer,2020-01-23 21:41:03+00:00,2020-01,1,56.64,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,56.64,56.64,2020-01,11890,1.0,1.0g,10.0,,,56.64,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2305315,C9-0000323-LIC,7594035,active,Consumer,2020-01-22 21:25:43+00:00,2020-01,1,56.64,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,56.64,56.64,2020-01,11890,1.0,1.0g,10.0,,,56.64,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2310369,C9-0000323-LIC,6973740,active,Patient,2020-01-17 20:18:54+00:00,2020-01,2,105.73,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,57.27,57.27,2020-01,11890,1.0,1.0g,10.0,,,57.27,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2308110,C9-0000323-LIC,7227932,active,Consumer,2020-01-20 01:14:03+00:00,2020-01,2,143.79,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,2.0,92.81,46.405,2020-01,11890,1.0,1.0g,10.0,,,46.405,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2309038,C9-0000323-LIC,7112859,active,Consumer,2020-01-19 01:35:08+00:00,2020-01,4,128.11,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,33.99,33.99,2020-01,11890,1.0,1.0g,10.0,,,33.99,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2298594,C9-0000323-LIC,8457842,active,Consumer,2020-01-29 17:49:28+00:00,2020-01,2,91.5,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,56.64,56.64,2020-01,11890,1.0,1.0g,10.0,,,56.64,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2299295,C9-0000323-LIC,8367108,active,Patient,2020-01-28 21:32:27+00:00,2020-01,1,61.9,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,61.9,61.9,2020-01,11890,1.0,1.0g,10.0,,,61.9,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2296414,C9-0000323-LIC,8719008,active,Consumer,2020-01-31 17:04:59+00:00,2020-01,1,56.64,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,56.64,56.64,2020-01,11890,1.0,1.0g,10.0,,,56.64,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2303053,C9-0000323-LIC,7857701,active,Consumer,2020-01-24 20:28:19+00:00,2020-01,2,78.78,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,52.52,52.52,2020-01,11890,1.0,1.0g,10.0,,,52.52,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...
2299791,C9-0000323-LIC,8312040,active,Consumer,2020-01-28 15:11:04+00:00,2020-01,2,108.93,active,1904692,1A4060300006D0A000050045,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Vape Cartridge (volume - each),Each,1.0,56.64,56.64,2020-01,11890,1.0,1.0g,10.0,,,56.64,1.0,vape cartridge,False,Raw Garden Cartridge 1.0g Indica Fire Walker ...,Raw Garden Cartridge 1.0g Indica Fire Walker ...


In [None]:
df_sales[df_sales['tx_product_category_name'] == 'Infused (edible)']\
[['count_measure_from_product_name','letter_gram_measure_from_product_name','tx_price_per_unit']].groupby(['count_measure_from_product_name','letter_gram_measure_from_product_name']).count().reset_index()




In [None]:
edible = df_sales[df_sales['tx_product_category_name'] == 'Shake (Packaged Eighth - each)']






In [None]:
edible[edible['letter_gram_measure_from_product_name'].notna()]['tx_product_name'].unique()

In [None]:
pd.DataFrame(edible[edible['letter_gram_measure_from_product_name'].notna()]['tx_product_name'].unique())







In [None]:
edible[['oz_measure_from_product_name','tx_price_per_unit']].groupby(['oz_measure_from_product_name']).count()




# Testing

In [None]:
from underwriting import msrp_band_analyzer_testing


In [None]:
result = msrp_band_analyzer_testing.run_test_set_analysis('product_category_name', 'Flower', 'C')

In [None]:
result[0]

In [None]:
result[1]

In [None]:
result = run_test_set_analysis('tx_product_category_name', 'Pre-Roll Flower', 'S')