In [2]:
import json
import pandas

In [5]:
class Download(object):
    
    def __init__(self, 
                 incoming_file, outgoing_file, packages_file,
                 sales_transactions_file):
        self.incoming_records = self._file_as_dict_records(incoming_file)
        self.outgoing_records = self._file_as_dict_records(outgoing_file)
        self.packages_records = self._file_as_dict_records(packages_file)
        self.sales_tx_records = self._file_as_dict_records(sales_transactions_file)
    
    def _file_as_dict_records(self, filepath):
        df = pandas.read_excel(filepath, converters={
            'package_id': str,
            'tx_package_id': str
        })
        print('Opening file {} with columns {}'.format(filepath, df.columns))
        return df.to_dict('records')
    
d = Download(
    incoming_file='data/royal/royal_apothecary_incoming_transfer_packages_20210101_20210905.xlsx',
    outgoing_file='data/royal/royal_apothecary_outgoing_transfer_packages_20210101_20210905.xlsx',
    packages_file='data/royal/royal_apothecary_active_inventory_20210906.xlsx',
    sales_transactions_file='data/royal/royal_apothecary_sales_transactions_20210101_20210905.xlsx',
)

Opening file data/royal/royal_apothecary_incoming_transfer_packages_20210101_20210905.xlsx with columns Index(['date_type', 'transfer_row_id', 'delivery_row_id', 'package_row_id',
       'delivery_type', 'manifest_number', 'created_date', 'received_datetime',
       'shipper_facility_license_number', 'shipper_facility_name',
       'recipient_facility_license_number', 'recipient_facility_name',
       'shipment_type_name', 'shipment_transaction_type', 'package_id',
       'package_label', 'type', 'product_category_name', 'product_name',
       'shipper_wholesale_price', 'shipped_quantity', 'lab_results_status'],
      dtype='object')
Opening file data/royal/royal_apothecary_outgoing_transfer_packages_20210101_20210905.xlsx with columns Index(['date_type', 'transfer_row_id', 'delivery_row_id', 'package_row_id',
       'delivery_type', 'manifest_number', 'created_date',
       'shipper_facility_license_number', 'shipper_facility_name',
       'recipient_facility_license_number', 'recipie

In [6]:
from typing import Dict, List, Tuple

def date_to_str(dt):
    return dt.strftime('%m/%d/%Y')
    
class PackageHistory():
    
    def __init__(self, package_id):
        self.incomings = []
        self.outgoings = []
        self.sales_txs = []
        self.pkg = None
        self.package_id = package_id
        
    def run_is_sold_logic(self) -> bool:
        sold_threshold = 0.9
        
        # It's only considered sold if it was an incoming package
        # and we see there are sales transactions.
        
        if not self.incomings:
            return False
        
        if not self.sales_txs:
            return False
        
        if len(self.incomings) > 1:
            print(f'WARN: package #{package_id} has multiple incomings')
            
        incoming_pkg = self.incomings[-1]
        arrived_date = incoming_pkg['created_date']
        shipped_quantity = int(incoming_pkg['shipped_quantity'])
        price_of_pkg = incoming_pkg['shipper_wholesale_price']
        
        lines = []
        verbose = False
        
        if verbose:
            lines.append(f'Arrived {date_to_str(arrived_date)} with quantity {shipped_quantity}')
        
        self.sales_txs.sort(key = lambda x: x['sales_datetime'])
        amount_sold = 0
        is_sold = False
        is_sold_datetime = None
        revenue_from_pkg = 0
        
        for tx in self.sales_txs:
            if verbose:
                lines.append(f"On {date_to_str(tx['sales_datetime'])} sold {tx['tx_quantity_sold']} ({tx['tx_unit_of_measure']}) for ${tx['total_price']}")
            amount_sold += tx['tx_quantity_sold']
            revenue_from_pkg += tx['total_price']
            
            if not is_sold and (amount_sold / shipped_quantity) > sold_threshold:
                if verbose:
                    lines.append(f'More than {sold_threshold * 100}% was sold, therefore we consider it sold')
                is_sold = True
                is_sold_date = tx['sales_datetime']
        
        profit_margin = '{:.2f}'.format((revenue_from_pkg - price_of_pkg) / revenue_from_pkg * 100)
            
        if is_sold:
            days_delta = (is_sold_date - arrived_date).days
            # (Revenue - Expenses) / Revenue
            #print(f'Revenue {revenue_from_pkg}')
            #print(f'Price {price_of_pkg}')
            lines.insert(0, f'Package #{self.package_id} took {days_delta} days to sell with profit margin {profit_margin}%')
        else:
            lines.insert(0, f'Package #{self.package_id} has current profit margin {profit_margin}%')
        
        print('\n'.join(lines))
            
        return is_sold
        
    
def get_histories(d: Download) -> Dict[str, PackageHistory]:
    package_id_to_history = {}
    
    for in_r in d.incoming_records:
        package_id = in_r['package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.incomings.append(in_r)

    for out_r in d.outgoing_records:
        package_id = out_r['package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.outgoings.append(out_r)
        
    
    for pkg_r in d.packages_records:
        package_id = pkg_r['package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.pkg = pkg_r
        
    for tx_r in d.sales_tx_records:
        package_id = tx_r['tx_package_id']
        if package_id not in package_id_to_history:
            package_id_to_history[package_id] = PackageHistory(package_id)
            
        history = package_id_to_history[package_id]
        history.sales_txs.append(tx_r)
        
    return package_id_to_history
    
id_to_history = get_histories(d)

In [7]:
def print_counts(id_to_history):
    only_incoming = 0
    only_outgoing = 0
    outgoing_and_incoming = 0
    in_and_sold_at_least_once = 0
    in_and_sold_many_times = 0
    current_inventory = 0
    inventory_with_no_transfers = 0
    total_seen = 0

    for package_id, history in id_to_history.items():
        if history.outgoings and not history.incomings:
            only_outgoing += 1

        if history.incomings and not history.outgoings and not history.sales_txs:
            only_incoming += 1

        if history.pkg:
            current_inventory += 1
            
        if history.incomings and history.sales_txs:
            in_and_sold_at_least_once += 1
            
        if history.incomings and len(history.sales_txs) > 1:
            #print(f'Package ID {package_id} was sold multiple times')
            in_and_sold_many_times += 1
            
        if history.outgoings and history.incomings:
            outgoing_and_incoming += 1
            
        if history.pkg and not history.outgoings and not history.incomings:
            inventory_with_no_transfers += 1

        total_seen += 1

    print(f'Only outgoing: {only_outgoing}')
    print(f'Only incoming: {only_incoming}')
    print(f'In and out: {outgoing_and_incoming}')
    print(f'In and sold at least once {in_and_sold_at_least_once}')
    print(f'In and sold many times {in_and_sold_many_times}')
    print(f'Inventory no transfers: {inventory_with_no_transfers}')
    print(f'Cur inventory: {current_inventory}')
    print(f'Total pkgs: {total_seen}')
    
print_counts(id_to_history)

Only outgoing: 33
Only incoming: 793
In and out: 1
In and sold at least once 1463
In and sold many times 1363
Inventory no transfers: 908
Cur inventory: 1471
Total pkgs: 3958


In [8]:
def determine_is_sold(id_to_history):
    
    num_sold = 0
    max_to_see = 10
    
    for package_id, history in id_to_history.items():
        was_sold = history.run_is_sold_logic()
        #print('')
        if was_sold:
            num_sold += 1
            
        if num_sold >= max_to_see:
            # NOTE: remove this break, using this so I can debug 1 package
            # at a time
            break

determine_is_sold(id_to_history)

Package #10548772 took 57 days to sell with profit margin nan%
Package #10548777 took 49 days to sell with profit margin nan%
Package #10549184 took 22 days to sell with profit margin nan%
Package #10772696 has current profit margin 87.34%
Package #10772063 has current profit margin 91.58%
Package #10696279 took 30 days to sell with profit margin 81.91%
Package #10696288 took 18 days to sell with profit margin 86.35%
Package #10696276 took 40 days to sell with profit margin 82.63%
Package #10696287 took 22 days to sell with profit margin 84.97%
Package #10696285 took 39 days to sell with profit margin 82.68%
Package #10696277 took 36 days to sell with profit margin 82.11%
Package #11047017 took 31 days to sell with profit margin 81.02%


In [9]:
import time

xlsx_file_name = f'~/Downloads/export_data_{round(time.time())}.xlsx'
raw_incoming_transfers_dataframe.to_excel(xlsx_file_name, index=False)

NameError: name 'raw_incoming_transfers_dataframe' is not defined