In [None]:
# Steps to install
# 1. pip install sqlalchemy-bigquery google-cloud-bigquery-storage pyarrow
# 2. Copy the credentials file to wherever you set BIGQUERY_CREDENTIALS_PATH to

In [1]:
import json
import os
import pandas
import pyarrow

import sys
from os import path
import numpy

from dotenv import load_dotenv
from sqlalchemy import create_engine


load_dotenv(verbose=True)
BIGQUERY_CREDENTIALS_PATH = os.environ.get('BIGQUERY_CREDENTIALS_PATH')

In [2]:
sys.path.append(path.realpath(path.join(os.getcwd(), "../core")))
import create_queries
%load_ext autoreload
%autoreload 2

In [7]:
COMPANY_NAME = 'RA'
COMPANY_IDENTIFIER = 'RA'
TRANSFER_PACKAGES_START_DATE = '2020-01-01'
SALES_TRANSACTIONS_START_DATE = '2020-01-01'

INVENTORY_DATE = '10/17/2021'

In [8]:
company_incoming_transfer_packages_query = create_queries.create_company_incoming_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_outgoing_transfer_packages_query = create_queries.create_company_outgoing_transfer_packages_query(COMPANY_IDENTIFIER, TRANSFER_PACKAGES_START_DATE)
company_sales_transactions_query = create_queries.create_company_sales_transactions_query(COMPANY_IDENTIFIER, SALES_TRANSACTIONS_START_DATE)
company_inventory_packages_query = create_queries.create_company_inventory_packages_query(COMPANY_IDENTIFIER)

engine = create_engine('bigquery://bespoke-financial/ProdMetrcData', credentials_path=os.path.expanduser(BIGQUERY_CREDENTIALS_PATH))
company_incoming_transfer_packages_dataframe = pandas.read_sql_query(company_incoming_transfer_packages_query, engine)
company_outgoing_transfer_packages_dataframe = pandas.read_sql_query(company_outgoing_transfer_packages_query, engine)
company_sales_transactions_dataframe = pandas.read_sql_query(company_sales_transactions_query, engine)
company_inventory_packages_dataframe = pandas.read_sql_query(company_inventory_packages_query, engine)

In [None]:
deduped_sales_receipts_dataframe = prepare_data.dedupe_sales_transactions(company_sales_transactions_dataframe)

In [9]:
df_in = company_incoming_transfer_packages_dataframe
df_out = company_outgoing_transfer_packages_dataframe
df_sales = company_sales_transactions_dataframe
df_inventory = company_inventory_packages_dataframe

In [16]:
pandas.options.display.float_format = '{:,.2f}'.format

In [10]:
df_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152812 entries, 0 to 152811
Data columns (total 16 columns):
 #   Column                    Non-Null Count   Dtype              
---  ------                    --------------   -----              
 0   id                        152812 non-null  object             
 1   license_number            151033 non-null  object             
 2   receipt_number            152812 non-null  object             
 3   rt_type                   152812 non-null  object             
 4   sales_customer_type       152812 non-null  object             
 5   sales_datetime            152812 non-null  datetime64[ns, UTC]
 6   total_packages            152812 non-null  int64              
 7   rt_total_price            152812 non-null  float64            
 8   tx_type                   152812 non-null  object             
 9   tx_package_id             152812 non-null  object             
 10  tx_package_label          152812 non-null  object             
 11  

In [13]:
# df_sales[df_sales['tx_quantity_sold'] <1]

In [12]:
df_sales['per_unit'] = df_sales['tx_total_price'] / df_sales['tx_quantity_sold']

In [14]:
df_sales['year_month'] = df_sales['sales_datetime'].dt.strftime("%Y-%m")

In [18]:
df_sales.groupby('year_month')['tx_total_price'].sum()

year_month
2020-03    67,772.94
2020-04   177,318.76
2020-05   270,761.16
2020-06   296,527.40
2020-07   340,849.56
2020-08   366,741.80
2020-09   399,734.10
2020-10   415,668.86
2020-11   395,166.86
2020-12   206,846.51
2021-01   216,208.39
2021-02   189,439.47
2021-03   212,136.35
2021-04   238,125.01
2021-05   248,644.78
2021-06   220,601.60
2021-07   219,837.98
2021-08   214,482.58
2021-09   180,901.37
2021-10   110,363.87
Name: tx_total_price, dtype: float64

In [19]:
df_in.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5621 entries, 0 to 5620
Data columns (total 28 columns):
 #   Column                                 Non-Null Count  Dtype              
---  ------                                 --------------  -----              
 0   package_row_id                         5621 non-null   object             
 1   delivery_type                          5621 non-null   object             
 2   license_number                         5621 non-null   object             
 3   manifest_number                        5621 non-null   object             
 4   created_date                           5621 non-null   object             
 5   received_datetime                      5612 non-null   datetime64[ns, UTC]
 6   shipper_facility_license_number        5621 non-null   object             
 7   shipper_facility_name                  5621 non-null   object             
 8   recipient_facility_license_number      5621 non-null   object             
 9   recipien

In [20]:
df_in['per_unit_incoming'] = df_in['shipper_wholesale_price'] / df_in['shipped_quantity']

In [None]:
df = pd.merge(df,df2[['Key_Column','Target_Column']],on='Key_Column', how='left')

In [30]:
df_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152812 entries, 0 to 152811
Data columns (total 18 columns):
 #   Column                    Non-Null Count   Dtype              
---  ------                    --------------   -----              
 0   id                        152812 non-null  object             
 1   license_number            151033 non-null  object             
 2   receipt_number            152812 non-null  object             
 3   rt_type                   152812 non-null  object             
 4   sales_customer_type       152812 non-null  object             
 5   sales_datetime            152812 non-null  datetime64[ns, UTC]
 6   total_packages            152812 non-null  int64              
 7   rt_total_price            152812 non-null  float64            
 8   tx_type                   152812 non-null  object             
 9   tx_package_id             152812 non-null  object             
 10  tx_package_label          152812 non-null  object             
 11  

In [29]:
pandas.merge(df_sales, df_in[['package_id','per_unit_incoming']], left_on='tx_package_id', right_on='package_id', how='left')

Unnamed: 0,id,license_number,receipt_number,rt_type,sales_customer_type,sales_datetime,total_packages,rt_total_price,tx_type,tx_package_id,tx_package_label,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,per_unit,year_month,package_id,per_unit_incoming
0,6e1737df-814e-4cb6-82c1-969f866f4ac6,C10-0000596-LIC,0136818143,active,Consumer,2021-10-16 20:37:08.110000+00:00,1,0.00,active,16853032,1A40603000072DE000075450,".7g Single - Sour Bubbles, Stone Road",Pre-Roll Flower,Each,1.00,0.00,0.00,2021-10,16853032,1.00
1,40d7bd31-73a3-42b2-aeb5-cc39fe8779d0,C10-0000596-LIC,0136817748,active,Consumer,2021-10-16 20:35:06.170000+00:00,2,18.00,active,18459735,1A406030000339A000013005,Wedding Crashers 1g,Flower (packaged gram - each),Each,1.00,9.00,9.00,2021-10,18459735,0.01
2,40d7bd31-73a3-42b2-aeb5-cc39fe8779d0,C10-0000596-LIC,0136817748,active,Consumer,2021-10-16 20:35:06.170000+00:00,2,18.00,active,18441370,1A406030000339A000013026,Blue Berry Muffin 1g,Flower (packaged gram - each),Each,1.00,9.00,9.00,2021-10,18441370,0.01
3,78426d50-4914-4edf-8952-cf384c5473dc,C10-0000596-LIC,0136585295,active,Consumer,2021-10-15 21:02:04.760000+00:00,4,43.50,active,16892484,1A406030002E7C2000000026,Beary OG .5g Vape Cartridge,Vape Cartridge (weight - each),Each,1.00,25.00,25.00,2021-10,,
4,78426d50-4914-4edf-8952-cf384c5473dc,C10-0000596-LIC,0136585295,active,Consumer,2021-10-15 21:02:04.760000+00:00,4,43.50,active,16851983,1A40603000072DE000075449,"1g Hash Single - Acapulco Gold, Stone Road",Pre-Roll Infused,Each,1.00,8.00,8.00,2021-10,16851983,3.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153169,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.00,active,3016876,1A4060300008D22000021564,STIIIZY - OG Kush Pod - 0.5g,Vape Cartridge (volume - each),Each,2.00,54.00,27.00,2020-03,3016876,
153170,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.00,active,3050501,1A406030000A3CF000000232,Clonbar King Louis 3.5g,Flower (packaged eighth - each),Each,2.00,63.00,31.50,2020-03,3050501,
153171,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.00,active,3016876,1A4060300008D22000021564,STIIIZY - OG Kush Pod - 0.5g,Vape Cartridge (volume - each),Each,2.00,54.00,27.00,2020-03,3016876,
153172,9ccc7566-b22d-4381-88fe-607b2ac6c5f1,C10-0000596-LIC,0015284616,active,Consumer,2020-03-08 12:30:35.440000+00:00,3,117.00,active,2634052,1A4060300007B40000003102,Cinn Indica 10 Pk,Edible (weight - each),Each,1.00,0.00,0.00,2020-03,2634052,1.46


In [25]:
df_sales.merge(df_in['package_id','per_unit_incoming'], left_on='tx_package_id', right_on='package_id', how='left')

KeyError: ('package_id', 'per_unit_incoming')

In [None]:
print (df1.set_index('ISIN')['Security'])
ISIN
I1    ABC
I2    DEF
I3    JHK
I4    LMN
I5    OPQ
Name: Security, dtype: object

#create new df by copy of df2
df3 = df2.copy()
df3['Security'] = df3.ISIN.map(df1.set_index('ISIN')['Security'])
#remove column ISIN
df3.drop('ISIN', axis=1, inplace=True)
#change order of columns
df3 = df3[['Security','Value']]
print (df3)
  Security  Value
0      DEF    100
1      JHK    200
2      OPQ    300

In [None]:
df_cogs = df_in.copy()
df_cogs['per_unit_incoming'] = df_cogs.

In [58]:
incoming_transfer_simp = company_incoming_transfer_packages_dataframe[[
'license_number',
'created_date',
'package_id',
'product_category_name',
'product_name',
'shipper_wholesale_price',
'shipped_quantity',
'shipped_unit_of_measure',
'item_unit_weight',
'item_unit_weight_unit_of_measure_name']]

In [30]:
# incoming_transfer_simp

In [59]:
outgoing_transfer_simp = company_outgoing_transfer_packages_dataframe[[
'license_number',
'created_date',
'package_id',
'product_category_name',
'product_name',
'shipper_wholesale_price',
'shipped_quantity',
'shipped_unit_of_measure',
'item_unit_weight',
'item_unit_weight_unit_of_measure_name']]

In [53]:
company_sales_transactions_dataframe.to_csv('hpcc_all_sales_10_13_21.csv')

In [11]:
company_sales_transactions_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 971609 entries, 0 to 971608
Data columns (total 16 columns):
 #   Column                    Non-Null Count   Dtype              
---  ------                    --------------   -----              
 0   id                        971609 non-null  object             
 1   license_number            971609 non-null  object             
 2   receipt_number            971609 non-null  object             
 3   rt_type                   971609 non-null  object             
 4   sales_customer_type       971609 non-null  object             
 5   sales_datetime            971609 non-null  datetime64[ns, UTC]
 6   total_packages            971609 non-null  int64              
 7   rt_total_price            971609 non-null  float64            
 8   tx_type                   971609 non-null  object             
 9   tx_package_id             971609 non-null  object             
 10  tx_package_label          971609 non-null  object             
 11  

In [60]:
sales_simp = company_sales_transactions_dataframe[[
'license_number',
'sales_datetime',
'tx_package_id',
'tx_product_name',
'tx_product_category_name',
'tx_unit_of_measure',
'tx_quantity_sold',
'tx_total_price']]

In [61]:
inventory_simp = company_inventory_packages_dataframe[[
'license_number',
'package_id',
'packaged_date',
'product_name',
'product_category_name',
'quantity',
'unit_of_measure'
]]

In [62]:
sales_simp['year_month'] = sales_simp['sales_datetime'].dt.strftime("%Y-%m")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [40]:
sales_simp.head(2)

Unnamed: 0,license_number,sales_datetime,tx_package_id,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price,month_year
0,C10-0000064-LIC,2021-10-12 20:58:15+00:00,18426625,PR THCa-Infused Preroll Lemon Crasher,Pre-Roll Infused,Each,1.0,14.71,10-2021
1,C10-0000064-LIC,2021-10-12 20:57:35+00:00,18551375,STIIIZY - Grape Pie Live Resin Pod - 1g,Vape Cartridge (weight - each),Each,1.0,54.25,10-2021


In [64]:
sales_simp.groupby(by=['year_month'], as_index=False).count()

Unnamed: 0,year_month,license_number,sales_datetime,tx_package_id,tx_product_name,tx_product_category_name,tx_unit_of_measure,tx_quantity_sold,tx_total_price
0,2020-03,1304,1304,1304,1304,1304,1304,1304,1304
1,2020-04,2960,2960,2960,2960,2960,2960,2960,2960
2,2020-05,4228,4228,4228,4228,4228,4228,4228,4228
3,2020-06,4575,4575,4575,4575,4575,4575,4575,4575
4,2020-07,5207,5207,5207,5207,5207,5207,5207,5207
5,2020-08,5567,5567,5567,5567,5567,5567,5567,5567
6,2020-09,6034,6034,6034,6034,6034,6034,6034,6034
7,2020-10,6322,6322,6322,6322,6322,6322,6322,6322
8,2020-11,6312,6312,6312,6312,6312,6312,6312,6312
9,2020-12,4741,6520,6520,6520,6520,6520,6520,6520


In [51]:
sales_drop = sales_simp.drop(['sales_datetime', 'month_year'], axis=1)

In [52]:
sales_simp.to_csv('./hpcc_sales_2021_10_13.csv')

In [None]:
# INVENTORY_DATES = [
#     '09/30/2020',
#     '10/31/2020',
#     '11/30/2020',
#     '12/31/2020',
#     '01/31/2021',
#     '02/28/2021',
#     '03/31/2021',
#     '04/30/2021',
#     '05/31/2021',
#     '06/30/2021',
#     '07/31/2021',
#     '08/31/2021',
#     '09/30/2021',
# ]

In [12]:
# %autoreload 2

# sys.path.append(path.realpath(path.join(os.getcwd(), "../../scripts/analysis")))
# sys.path.append(path.realpath(path.join(os.getcwd(), "../../src")))

# from util import active_inventory_util as util

In [13]:
# d = util.Download()
# d.download_dataframes(
#     incoming_transfer_packages_dataframe=company_incoming_transfer_packages_dataframe,
#     outgoing_transfer_packages_dataframe=company_outgoing_transfer_packages_dataframe,
#     sales_transactions_dataframe=company_sales_transactions_dataframe,
# )

In [19]:
# q = util.Query()
# q.inventory_dates = INVENTORY_DATES
# q.company_name = COMPANY_NAME

# id_to_history = util.get_histories(d)
# util.print_counts(id_to_history)
# util.create_inventory_xlsx(id_to_history, q)

In [20]:
# computed_inventory_package_records = util.create_inventory_dataframe_by_date(id_to_history, INVENTORY_DATE)
# computed_inventory_packages_dataframe = pandas.DataFrame(
#     computed_inventory_package_records,
#     columns=[
#         'package_id',
#         'Arrived Date',
#         'Product Category',
#         'Product Name',
#         'Current Quantity',
#         'Sold Date',
#     ]
# )
# computed_inventory_packages_dataframe

In [21]:
# date_to_inventory_dataframe = {}
# for date, inventory_records in date_to_inventory_records.items():
#     date_to_inventory_dataframe[date] = pandas.DataFrame(
#         inventory_records,
#         columns=[
#             'package_id',
#             'Arrived Date',
#             'Product Category',
#             'Product Name',
#             'Current Quantity',
#             'Sold Date',
#         ]
#     )
    
# date_to_inventory_dataframe[list(date_to_inventory_dataframe.keys())[0]]

In [17]:
# raw_incoming_transfer_packages_dataframe = pandas.read_excel('data/20210930/royal_apothecary_incoming_transfer_packages_20200101_20210930.xlsx', header=0)
# len(raw_incoming_transfer_packages_dataframe.index), raw_incoming_transfer_packages_dataframe.columns

In [88]:
# incoming_transfer_packages_dataframe = raw_incoming_transfer_packages_dataframe[[
#     'date_type',
#     'transfer_row_id',
#     'delivery_row_id',
#     'package_row_id',
#     'delivery_type',
#     'manifest_number',
#     'created_date',
#     'received_datetime',
#     'shipper_facility_license_number',
#     'shipper_facility_name',
#     'recipient_facility_license_number',
#     'recipient_facility_name',
#     'shipment_type_name',
#     'shipment_transaction_type',
#     'package_id',
#     'package_label',
#     'type',
#     'product_category_name',
#     'product_name',
#     'shipper_wholesale_price',
#     'shipped_quantity',
#     'package_lab_results_status',
#     'shipment_package_state',
#     'is_testing_sample',
#     'is_trade_sample'
# ]]

In [18]:
# for date, inventory_dataframe in date_to_inventory_dataframe.items():
#     print(date)
#     print(f'# of packages in inventory: {len(inventory_dataframe.index)}')

#     inventory_with_incoming_transfer_packages_dataframe = inventory_dataframe.astype({'package_id': 'int64'}).merge(incoming_transfer_packages_dataframe, on='package_id', how='inner', suffixes=('_l', '_r'))
# #     print(f'# of packages in inventory with incoming package: {len(inventory_with_incoming_transfer_packages_dataframe.index)}')
    
#     inventory_with_cost_records = inventory_with_incoming_transfer_packages_dataframe.to_dict('record')

#     total_valuation_cost = 0
#     import math
#     for inventory_with_cost_record in inventory_with_cost_records:
#         incoming_shipped_price = inventory_with_cost_record['shipper_wholesale_price']
#         if math.isnan(incoming_shipped_price):
#             incoming_shipped_price = 0
#         incoming_shipped_quantity = inventory_with_cost_record['shipped_quantity']
#         current_quantity = inventory_with_cost_record['Current Quantity']
#         total_valuation_cost += float(current_quantity) * (incoming_shipped_price / incoming_shipped_quantity)

#     print(f'Inventory valuation (based on COST): ${total_valuation_cost}')
#     print('')

In [None]:
# import json
# import pandas
# import numpy
# import os
# import sys
# from os import path