In [152]:
import json
import pandas
import numpy

company_name = 'Royal_Apothecary'
INCOMING_TRANSFERS_FILE = 'data/royal/royal_apothecary_incoming_transfer_packages_20210101_20210905.xlsx'
OUTGOING_TRANSFERS_FILE = 'data/royal/royal_apothecary_outgoing_transfer_packages_20210101_20210905.xlsx'
PACKAGES_FILE = 'data/royal/royal_apothecary_active_inventory_20210906.xlsx'
SALES_TRANSACTIONS_FILE = 'data/royal/royal_apothecary_sales_transactions_20210101_20210905.xlsx'

inventory_dates = [
    '05/31/2021',
    '06/30/2021',
    '07/31/2021',
    '08/31/2021',
]

In [134]:
import xlwt

from typing import Dict, List, BinaryIO
from xlwt.Worksheet import Worksheet as xlwt_Worksheet

class Worksheet(object):
	"""A single sheet inside a spreadsheet."""

	MAX_ALLOWED_XL_CELL_CHARS = 32767
	TRUNCATION_MSG = '[truncated]'
	TRUNCATION_MSG_LEN = len(TRUNCATION_MSG)
	ALLOWED_VALUE_LEN = MAX_ALLOWED_XL_CELL_CHARS - TRUNCATION_MSG_LEN

	def __init__(self, ws: xlwt_Worksheet) -> None:
		self._ws = ws
		self._rowx = 0

	def add_row(self, values: List[str]) -> None:
		for colx, value in enumerate(values):
			# Excel cell doesn't support more than 32767 characters.
			if len(value) > self.MAX_ALLOWED_XL_CELL_CHARS:
				value = value[:self.ALLOWED_VALUE_LEN] + self.TRUNCATION_MSG

			self._ws.write(self._rowx, colx, value[:self.MAX_ALLOWED_XL_CELL_CHARS])
		self._rowx += 1

class WorkbookWriter(object):
	"""A wrapper around a xlwt Workbook"""

	def __init__(self, wb: xlwt.Workbook) -> None:
		self._wb = wb
		self._sheet_map: Dict[str, Worksheet] = {}

	def add_sheet(self, sheet_name: str) -> Worksheet:
		orig_ws = self._wb.add_sheet(sheet_name)
		ws = Worksheet(orig_ws)
		self._sheet_map[sheet_name] = ws
		return ws

	def write_records(self, sheet_name: str, records: List[str]) -> None:
		ws = self._sheet_map[sheet_name]
		ws.add_row(records)

	def save(self, f: BinaryIO) -> None:
		self._wb.save(f)



In [98]:
class Download(object):
    
    def __init__(self, 
                 incoming_file, outgoing_file, packages_file,
                 sales_transactions_file):
        self.incoming_records = self._file_as_dict_records(incoming_file)
        self.outgoing_records = self._file_as_dict_records(outgoing_file)
        self.packages_records = self._file_as_dict_records(packages_file)
        self.sales_tx_records = self._file_as_dict_records(sales_transactions_file)
    
    def _file_as_dict_records(self, filepath):
        df = pandas.read_excel(filepath, converters={
            'package_id': str,
            'tx_package_id': str
        })
        print('Opening file {} with columns {}'.format(filepath, df.columns))
        return df.to_dict('records')

d = Download(
    incoming_file=INCOMING_TRANSFERS_FILE,
    outgoing_file=OUTGOING_TRANSFERS_FILE,
    packages_file=PACKAGES_FILE,
    sales_transactions_file=SALES_TRANSACTIONS_FILE,
)

  warn("Workbook contains no default style, apply openpyxl's default")


Opening file data/royal/royal_apothecary_incoming_transfer_packages_20210101_20210905.xlsx with columns Index(['date_type', 'transfer_row_id', 'delivery_row_id', 'package_row_id',
       'delivery_type', 'manifest_number', 'created_date', 'received_datetime',
       'shipper_facility_license_number', 'shipper_facility_name',
       'recipient_facility_license_number', 'recipient_facility_name',
       'shipment_type_name', 'shipment_transaction_type', 'package_id',
       'package_label', 'type', 'product_category_name', 'product_name',
       'shipper_wholesale_price', 'shipped_quantity', 'lab_results_status'],
      dtype='object')
Opening file data/royal/royal_apothecary_outgoing_transfer_packages_20210101_20210905.xlsx with columns Index(['date_type', 'transfer_row_id', 'delivery_row_id', 'package_row_id',
       'delivery_type', 'manifest_number', 'created_date',
       'shipper_facility_license_number', 'shipper_facility_name',
       'recipient_facility_license_number', 'recipie

  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")


Opening file data/royal/royal_apothecary_active_inventory_20210906.xlsx with columns Index(['identifier', 'license_number', 'last_modified_at', 'package_id',
       'package_label', 'type', 'packaged_date', 'package_type',
       'product_name', 'product_category_name', 'quantity', 'unit_of_measure'],
      dtype='object')


  warn("Workbook contains no default style, apply openpyxl's default")


Opening file data/royal/royal_apothecary_sales_transactions_20210101_20210905.xlsx with columns Index(['date_type', 'id', 'last_modified_at', 'receipt_id', 'receipt_number',
       'receipt_type', 'sales_customer_type', 'sales_datetime',
       'total_packages', 'total_price', 'tx_type', 'tx_package_id',
       'tx_package_label', 'tx_product_name', 'tx_product_category_name',
       'tx_unit_of_measure', 'tx_quantity_sold', 'tx_total_price'],
      dtype='object')


In [154]:
from typing import Dict, List, Tuple
from dateutil import parser

def date_to_str(dt):
    return dt.strftime('%m/%d/%Y')

def parse_to_date(cur_date):
    if not cur_date:
        return None
    
    if type(cur_date) == str:
        return parser.parse(cur_date)
    
    return cur_date
        
class Printer():
     
    def __init__(self, verbose, show_info):
        self.verbose = verbose
        self.show_info = show_info
        self.packages_with_warnings = set([])
        
    def warn(self, msg, package_id: str = None):
        print('WARN: {}'.format(msg))
        if package_id:
            self.packages_with_warnings.add(package_id)
        
    def debug(self, msg):
        if self.verbose:
            print(msg)
            
    def info(self, msg):
        if self.show_info:
            print(msg)
            
class PackageHistory():
    """
        Grab all the information we know about this package, and then compute multiple fields on it
    """
    
    def __init__(self, package_id):
        self.incomings = []
        self.outgoings = []
        self.sales_txs = []
        self.pkg = None
        self.package_id = package_id
        self.computed_info = {}
        self.should_exclude = False
        
    def in_inventory_at_date(self, cur_date):
        # Was this package in the company's possession at this date?
        cur_date = parse_to_date(cur_date)    
        arrived_date = parse_to_date(self.computed_info['arrived']['date'])
        if cur_date < arrived_date:
            return False
        
        sold_date = parse_to_date(self.computed_info.get('sold', {}).get('date'))
        if sold_date and cur_date > sold_date:
            # We know it's not in your possession after the sales date
            return False
        
        # Knowing nothing else, assume you have the package at this date
        return True
        
    def get_inventory_column_names(self):
        return [
            'Package ID',
            'Arrived Date',
            'Sold Date',
        ]
        
    def get_inventory_output_row(self):
        incoming_pkg = self.incomings[-1] if self.incomings else None 
        sold_date = parse_to_date(self.computed_info.get('sold', {}).get('date'))
        
        if incoming_pkg:
            return [
                self.package_id,
                date_to_str(self.computed_info['arrived']['date']),
                date_to_str(sold_date) if sold_date else '',
            ]
        else:
            return [
                self.package_id,
                date_to_str(self.computed_info['arrived']['date']),
                date_to_str(sold_date) if sold_date else '',
            ]
        
    def filter_out_unhandled_packages(self, p: Printer):
        if len(self.incomings) > 1:
            p.info(f'Excluding package {self.package_id} because it has multiple incoming packages')
            self.should_exclude = True
            return
        
        if not self.incomings and not self.pkg:
            p.info(f'Excluding package {self.package_id} because it doesnt have an incoming or regular MetrcPackage')
            self.should_exclude = True
            return
        
    def when_it_arrived(self, p: Printer) -> bool:
        # Fills in the 'arrived' value for self.computed_info
        if self.incomings:
            incoming_pkg = self.incomings[-1]
            arrived_date = incoming_pkg['created_date']
            self.computed_info['arrived'] = {
                'reason': 'incoming',
                'date': arrived_date
            }
            return True
        
        if not self.pkg:
            p.warn(f'package {self.package_id} neither has an incoming package nor a regular "inventory" package')
            return False
        
        self.computed_info['arrived'] = {
            'reason': 'ownership',
            'date': self.pkg['packaged_date']
        }
        return True 
        
    def run_is_sold_logic(self, p: Printer) -> bool:
        # Fills in the 'sold' value for self.computed_info
        #
        # Tells us when a package was sold
        sold_threshold = 0.9
        
        # It's only considered sold if it was an incoming package
        # and we see there are sales transactions.
        
        if not self.incomings:
            return False
        
        if not self.sales_txs:
            return False
        
        if len(self.incomings) > 1:
            p.warn(f'package #{self.package_id} has multiple incoming transfers', package_id=self.package_id)
            
        incoming_pkg = self.incomings[-1]
        arrived_date = incoming_pkg['created_date']
        if not incoming_pkg['shipped_quantity'] or numpy.isnan(incoming_pkg['shipped_quantity']):
            p.warn(f'package #{self.package_id} does not have a shipped quantity', package_id=self.package_id)
            return False
            
        shipped_quantity = int(incoming_pkg['shipped_quantity'])
        price_of_pkg = incoming_pkg['shipper_wholesale_price']
        
        lines = []
        verbose = p.verbose
        
        if verbose:
            lines.append(f'Arrived {date_to_str(arrived_date)} with quantity {shipped_quantity}')
        
        self.sales_txs.sort(key = lambda x: x['sales_datetime'])
        amount_sold = 0
        is_sold = False
        is_sold_datetime = None
        revenue_from_pkg = 0
        
        for tx in self.sales_txs:
            if verbose:
                lines.append(f"On {date_to_str(tx['sales_datetime'])} sold {tx['tx_quantity_sold']} ({tx['tx_unit_of_measure']}) for ${tx['total_price']}")
            amount_sold += tx['tx_quantity_sold']
            revenue_from_pkg += tx['total_price']
            
            if not is_sold and (amount_sold / shipped_quantity) > sold_threshold:
                if verbose:
                    lines.append(f'More than {sold_threshold * 100}% was sold, therefore we consider it sold')
                is_sold = True
                is_sold_date = tx['sales_datetime']
        
        profit_margin = '{:.2f}'.format((revenue_from_pkg - price_of_pkg) / revenue_from_pkg * 100)
            
        if is_sold:
            days_delta = (is_sold_date - arrived_date).days
            
            # (Revenue - Expenses) / Revenue
            #print(f'Revenue {revenue_from_pkg}')
            #print(f'Price {price_of_pkg}')
            lines.insert(0, f'Package #{self.package_id} took {days_delta} days to sell with profit margin {profit_margin}%')
            self.computed_info['sold'] = {
                'date': is_sold_date
            }
        else:
            lines.insert(0, f'Package #{self.package_id} has current profit margin {profit_margin}%')
        
        p.info('\n'.join(lines))
            
        return is_sold
    
    def compute_additional_fields(self, run_filter: bool, p: Printer) -> None:
        if run_filter:
            self.filter_out_unhandled_packages(p)
            
        if self.should_exclude:
            return
        
        self.when_it_arrived(p)
        self.run_is_sold_logic(p)
        
    
def get_histories(d: Download) -> Dict[str, PackageHistory]:
    package_id_to_history = {}
    
    for in_r in d.incoming_records:
        package_id = in_r['package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.incomings.append(in_r)

    for out_r in d.outgoing_records:
        package_id = out_r['package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.outgoings.append(out_r)
        
    
    for pkg_r in d.packages_records:
        package_id = pkg_r['package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.pkg = pkg_r
        
    for tx_r in d.sales_tx_records:
        package_id = tx_r['tx_package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.sales_txs.append(tx_r)
        
    return package_id_to_history
    
id_to_history = get_histories(d)

In [155]:
def create_inventory_xlsx(id_to_history):
    
    i = 0
    num_excluded = 0
    num_total = 0
    max_to_see = -1
    p = Printer(verbose=False, show_info=False)
    
    for package_id, history in id_to_history.items():
        history.compute_additional_fields(run_filter=True, p=p)
        num_total += 1
        if history.should_exclude:
            num_excluded += 1
            
        if max_to_see > 0 and i >= max_to_see:
            # NOTE: remove this break, using this so I can debug 1 package
            # at a time
            break
            
        i += 1

    wb = WorkbookWriter(xlwt.Workbook())
    
    for inventory_date in inventory_dates:
        sheet_name = inventory_date.replace('/', '-')
        sheet = wb.add_sheet(sheet_name)
        
        # Determine whether this package belongs in the inventory for this date
        first = True
        
        for package_id, history in id_to_history.items():
            if history.should_exclude:
                continue
                
            if not history.in_inventory_at_date(inventory_date):
                continue
                
            if first:
                sheet.add_row(history.get_inventory_column_names())
                first = False
            
            row = history.get_inventory_output_row()
            sheet.add_row(row)
     
    filepath = f'{company_name}_inventory_by_month.xls'
    with open(filepath, 'wb') as f:
        wb.save(f)
        print('Wrote result to {}'.format(filepath))
        
    pct_excluded = '{:.2f}'.format(num_excluded / num_total * 100)
    print(f'Excluded {num_excluded} / {num_total} packages from consideration ({pct_excluded}%)')

create_inventory_xlsx(id_to_history)

Wrote result to Royal_Apothecary_inventory_by_month.xls
Excluded 795 / 3958 packages from consideration (20.09%)


In [100]:
##### DEBUG ######

def print_counts(id_to_history):
    only_incoming = 0
    only_outgoing = 0
    outgoing_and_incoming = 0
    in_and_sold_at_least_once = 0
    in_and_sold_many_times = 0
    current_inventory = 0
    inventory_with_no_transfers = 0
    total_seen = 0

    for package_id, history in id_to_history.items():
        if history.outgoings and not history.incomings:
            only_outgoing += 1

        if history.incomings and not history.outgoings and not history.sales_txs:
            only_incoming += 1

        if history.pkg:
            current_inventory += 1
            
        if history.incomings and history.sales_txs:
            in_and_sold_at_least_once += 1
            
        if history.incomings and len(history.sales_txs) > 1:
            #print(f'Package ID {package_id} was sold multiple times')
            in_and_sold_many_times += 1
            
        if history.outgoings and history.incomings:
            outgoing_and_incoming += 1
            
        if history.pkg and not history.outgoings and not history.incomings:
            inventory_with_no_transfers += 1

        total_seen += 1

    print(f'Only outgoing: {only_outgoing}')
    print(f'Only incoming: {only_incoming}')
    print(f'In and out: {outgoing_and_incoming}')
    print(f'In and sold at least once {in_and_sold_at_least_once}')
    print(f'In and sold many times {in_and_sold_many_times}')
    print(f'Inventory no transfers: {inventory_with_no_transfers}')
    print(f'Cur inventory: {current_inventory}')
    print(f'Total pkgs: {total_seen}')
    
print_counts(id_to_history)

Only outgoing: 33
Only incoming: 793
In and out: 1
In and sold at least once 1463
In and sold many times 1363
Inventory no transfers: 908
Cur inventory: 1471
Total pkgs: 3958


In [101]:
def determine_is_sold(id_to_history):
    
    num_sold = 0
    max_to_see = 10
    
    for package_id, history in id_to_history.items():
        was_sold = history.run_is_sold_logic()
        #print('')
        if was_sold:
            num_sold += 1
            
        if num_sold >= max_to_see:
            # NOTE: remove this break, using this so I can debug 1 package
            # at a time
            break

determine_is_sold(id_to_history)

Package #10548772 took 57 days to sell with profit margin nan%
Package #10548777 took 49 days to sell with profit margin nan%
Package #10549184 took 22 days to sell with profit margin nan%
Package #10772696 has current profit margin 87.34%
Package #10772063 has current profit margin 91.58%
Package #10696279 took 30 days to sell with profit margin 81.91%
Package #10696288 took 18 days to sell with profit margin 86.35%
Package #10696276 took 40 days to sell with profit margin 82.63%
Package #10696287 took 22 days to sell with profit margin 84.97%
Package #10696285 took 39 days to sell with profit margin 82.68%
Package #10696277 took 36 days to sell with profit margin 82.11%
Package #11047017 took 31 days to sell with profit margin 81.02%
