# Identify sales

In [1]:
import pandas as pd
from pathlib import Path
import csd as c

## <span style="color:red"> Update Month in parentheses below as you update the auxiliary folder monthly

In [2]:
loc = Path.cwd().parent.parent / 'auxiliary (january)' / 'data' / 'generated' / 'invoice_data_2_in-scope.csv'
inv_df = pd.read_csv(loc)

## All invoice lines of sales

## <span style="color:red"> Change vendor number(s)

In [3]:
loc = Path.cwd().parent / 'data' / 'merfish_product_info.csv'
prod_info_df = pd.read_csv(loc)

In [4]:
unique_prod_df = prod_info_df[['prod']].drop_duplicates()

In [5]:
sales_df = unique_prod_df.merge(inv_df, how='inner', left_on='prod', right_on='item').drop(columns=['prod'])

In [6]:
sales_df.to_clipboard(index=False)

In [7]:
sales_df.head()

Unnamed: 0,whse,orderno,ordersuf,lineno,invoicedt,custno,custname,item,itemdesc,unit,...,priceorigcd,pdrecno,vendno,xcost_adj,GP$,Margin,month,Exclusion,whse_name,division
0,801,981007725,0,3,2022-06-21,112441,MECHANICAL INSTALLATION CORP.,CPF3114,"5/8""X50' REFRIG. COIL COPPER",RL,...,O,0,86125.0,110.758,194.162,0.636764,2022-06,,All County,HVAC
1,801,981008070,0,6,2022-07-01,109730,PERFECT AIR SOLUTIONS,CPF3114,"5/8""X50' REFRIG. COIL COPPER",RL,...,7,2496,86125.0,55.379,108.971,0.663042,2022-07,,All County,HVAC
2,801,981012717,0,1,2022-12-22,109631,"A & M HEATING & A/C,",CPF3114,"5/8""X50' REFRIG. COIL COPPER",RL,...,7,2496,86125.0,55.379,108.971,0.663042,2022-12,,All County,HVAC
3,801,981006017,0,3,2022-04-21,101013,BELL MECHANICAL LLC,CPF3012A,"1""X10' ""M"" COPPER PIPE",LG,...,2,63025,86125.0,186.15,59.6,0.242523,2022-04,,All County,HVAC
4,801,981007484,0,4,2022-06-13,101013,BELL MECHANICAL LLC,CPF3012A,"1""X10' ""M"" COPPER PIPE",LG,...,2,63025,86125.0,186.15,59.6,0.242523,2022-06,,All County,HVAC


In [8]:
sales_df.columns

Index(['whse', 'orderno', 'ordersuf', 'lineno', 'invoicedt', 'custno',
       'custname', 'item', 'itemdesc', 'unit', 'unitconv', 'units', 'unitcost',
       'replcost', 'unitprice', 'netamt', 'returnfl', 'transtype', 'prod_type',
       'prodcat', 'cat_descrip', 'rowpointer', 'priceorigcd', 'pdrecno',
       'vendno', 'xcost_adj', 'GP$', 'Margin', 'month', 'Exclusion',
       'whse_name', 'division'],
      dtype='object')

## Total sales by item and total customers

In [9]:
"""
group sales in two stages:
    first stage by 'item' and 'custno' with 'units' aggregated by sum
    second stage by 'item' aggregating 'custno' by count and 'units' by sum
the two stages of process let us count # of customers and get total sales of each item
"""
agg_sales_pre_df = sales_df.groupby(by=['item','itemdesc','custno'],as_index=False).agg({'units':'sum','netamt':'sum','xcost_adj':'sum'})

In [10]:
agg_sales_pre_df.to_clipboard(index=False)

In [11]:
agg_sales_pre_df

Unnamed: 0,item,itemdesc,custno,units,netamt,xcost_adj
0,CPF3000A,"1/2"" X 10' ""L"" COPPER",101013,18.0,464.76,360.35100
1,CPF3000A,"1/2"" X 10' ""L"" COPPER",101046,1.0,25.82,20.92825
2,CPF3001,"3/4""X20' ""L"" COPPER PIPE",101013,10.0,991.40,702.41000
3,CPF3001A,"3/4""X10' ""L"" COPPER PIPE",101013,118.0,5493.16,4005.09945
4,CPF3011,"3/4""X20' ""M"" COPPER PIPE",101013,5.0,313.15,220.42715
...,...,...,...,...,...,...
349,PVC50A,"2""X10' PVC SCH40 PIPE",111261,2.0,62.12,42.65762
350,PVC50A,"2""X10' PVC SCH40 PIPE",111965,2.0,66.40,38.69048
351,PVC50A,"2""X10' PVC SCH40 PIPE",112441,3.0,91.80,57.20244
352,PVC50A,"2""X10' PVC SCH40 PIPE",112601,1.0,28.56,19.06748


In [12]:
agg_sales_df = agg_sales_pre_df.groupby(by=['item','itemdesc'],as_index=False).agg({'custno':'count','units':'sum','netamt':'sum','xcost_adj':'sum'}).rename(columns={'custno':'total cust\'s','units':'total units'})

In [13]:
agg_sales_df['total margin'] = round( 100*(agg_sales_df['netamt'] - agg_sales_df['xcost_adj']) / agg_sales_df['netamt'], 2)

In [14]:
agg_sales_df = agg_sales_df.drop(columns=['netamt','xcost_adj'])

In [15]:
agg_sales_df.to_clipboard(index=False)

## Sales on overrides

In [16]:
overrides_df = sales_df.loc[sales_df['priceorigcd'] == 'O']

In [17]:
overrides_df.drop(columns=['ordersuf','lineno','unit','unitconv','month','Exclusion','cat_descrip','whse_name','division'])

Unnamed: 0,whse,orderno,invoicedt,custno,custname,item,itemdesc,units,unitcost,replcost,...,transtype,prod_type,prodcat,rowpointer,priceorigcd,pdrecno,vendno,xcost_adj,GP$,Margin
0,801,981007725,2022-06-21,112441,MECHANICAL INSTALLATION CORP.,CPF3114,"5/8""X50' REFRIG. COIL COPPER",2.0,55.37900,106.72,...,CS,stocked,6380,a09b5422-d97d-878a-9814-c288587bced4,O,0,86125.0,110.75800,194.16200,0.636764
14,801,981007673,2022-06-20,112579,RAUL RIVERA,CPF3110,"1/4"" X 50' REFRIG. COIL COPPER",1.0,40.06200,39.65,...,CS,stocked,6380,92b84891-26aa-d99e-9814-0e78a8aa975b,O,0,86125.0,40.06200,47.93800,0.544750
16,801,981008572,2022-07-21,101354,TOTAL COMFORT,CPF3110,"1/4"" X 50' REFRIG. COIL COPPER",1.0,40.06200,39.65,...,SO,stocked,6380,be00e090-2c26-f096-9a14-a95d709fd015,O,0,86125.0,40.06200,16.58800,0.292816
17,801,981010608,2022-10-03,101354,TOTAL COMFORT,CPF3110,"1/4"" X 50' REFRIG. COIL COPPER",1.0,40.06200,39.65,...,SO,stocked,6380,946f0448-d2f6-8d8a-9e14-7be8a00bb0f1,O,0,86125.0,40.06200,16.58800,0.292816
24,801,981007671,2022-06-20,109698,MERCURY ENERGY MGMT. INC.,CPF3116,"7/8""X50' REFRIG. COIL COPPER",10.0,194.20428,188.99,...,SO,stocked,6380,91553ec0-4c02-8fb5-9814-7b771873e7cf,O,0,86125.0,1942.04280,289.35720,0.129675
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1512,801,981004013,2022-02-03,109648,ALL SEASONS HVAC REF LIC FILE,PVC26,"3/4""X10' SCH40 PVC PIPE",10.0,6.03045,6.12,...,SO,stocked,7000,f24f5533-1480-56ab-9014-1a02b87aeb7b,O,0,86125.0,60.30450,27.19550,0.310806
1545,801,981005316,2022-03-31,101062,SOUNDVIEW SHEETMETAL LLC,PVC26,"3/4""X10' SCH40 PVC PIPE",1.0,6.54419,6.12,...,SO,stocked,7000,f5df97dc-5b21-afbb-9314-8624508dc718,O,9348,86125.0,6.54419,2.87581,0.305288
1549,801,981005617,2022-04-12,101062,SOUNDVIEW SHEETMETAL LLC,PVC26,"3/4""X10' SCH40 PVC PIPE",2.0,6.54419,6.12,...,SO,stocked,7000,9823b1d2-5018-27b4-9314-9edd883a7275,O,9348,86125.0,13.08838,5.75162,0.305288
1566,801,981006434,2022-05-06,101015,BOTTINI FUEL,PVC26,"3/4""X10' SCH40 PVC PIPE",10.0,6.71920,6.12,...,SO,stocked,7000,f2d59bd7-e57f-baa4-9514-2aa8d80ba91f,O,0,86125.0,67.19200,24.20800,0.264858


In [18]:
agg_overrides_df = overrides_df.groupby(by=['item'],as_index=False).agg({'units':'sum'}).rename(columns={'units':'units overriden'})

In [19]:
agg_overrides_df

Unnamed: 0,item,units overriden
0,CPF3000A,10.0
1,CPF3001,5.0
2,CPF3001A,98.0
3,CPF3011A,27.0
4,CPF3023,50.0
5,CPF3110,3.0
6,CPF3112,80.0
7,CPF3114,2.0
8,CPF3115,36.0
9,CPF3116,39.0


## Total PT 7 sales (no overrides)

## <span style="color:red"> Change vendor name in last part of file path

In [20]:
loc = Path.cwd().parent / 'data' / 'merfish_PT_7_PRs.csv'
PT_7_PRs = pd.read_csv(loc)

In [21]:
PT_7_PRs = PT_7_PRs.drop_duplicates()

In [22]:
PT_7_sales_by_PR_df = sales_df.merge(PT_7_PRs[['prod','pdrecno']], how='inner', left_on=['item','pdrecno'], right_on=['prod','pdrecno'])

In [23]:
PT_7_sales_by_PR_df.head()

Unnamed: 0,whse,orderno,ordersuf,lineno,invoicedt,custno,custname,item,itemdesc,unit,...,pdrecno,vendno,xcost_adj,GP$,Margin,month,Exclusion,whse_name,division,prod
0,801,981008070,0,6,2022-07-01,109730,PERFECT AIR SOLUTIONS,CPF3114,"5/8""X50' REFRIG. COIL COPPER",RL,...,2496,86125.0,55.379,108.971,0.663042,2022-07,,All County,HVAC,CPF3114
1,801,981012717,0,1,2022-12-22,109631,"A & M HEATING & A/C,",CPF3114,"5/8""X50' REFRIG. COIL COPPER",RL,...,2496,86125.0,55.379,108.971,0.663042,2022-12,,All County,HVAC,CPF3114
2,801,981003354,0,1,2022-01-06,101006,AMX COOLING & HEATING LLC,CPF3110,"1/4"" X 50' REFRIG. COIL COPPER",EA,...,2490,86125.0,25.794,29.146,0.530506,2022-01,,All County,HVAC,CPF3110
3,801,981003395,0,2,2022-01-10,109698,MERCURY ENERGY MGMT. INC.,CPF3116,"7/8""X50' REFRIG. COIL COPPER",RL,...,2498,86125.0,1889.9,168.3,0.08177,2022-01,,All County,HVAC,CPF3116
4,801,981004869,0,12,2022-03-09,101013,BELL MECHANICAL LLC,CPF3011,"3/4""X20' ""M"" COPPER PIPE",LG,...,2472,86125.0,220.42715,92.72285,0.296097,2022-03,,All County,HVAC,CPF3011


In [24]:
PT_7_sales_by_PR_df.shape

(572, 33)

In [25]:
PT_7_sales_by_PR_df['margin by replcost'] = (PT_7_sales_by_PR_df['netamt'] - PT_7_sales_by_PR_df['replcost']*PT_7_sales_by_PR_df['units']) / PT_7_sales_by_PR_df['netamt']

In [26]:
# drop unnecessary columns
PT_7_sales_by_PR_df = PT_7_sales_by_PR_df.drop(columns=['orderno','ordersuf','lineno','unit','unitconv','returnfl','prod_type','prodcat','cat_descrip','rowpointer','Exclusion'])

In [27]:
# inspect output
PT_7_sales_by_PR_df.to_clipboard(index=False)

In [28]:
PT_7_no_overs_df = PT_7_sales_by_PR_df.loc[ PT_7_sales_by_PR_df['priceorigcd'] == '7']

In [29]:
# inspect output
PT_7_no_overs_df.to_clipboard(index=False)

In [30]:
"""
group sales in two stages:
    first stage by 'item' and 'custno' with 'units' aggregated by sum
    second stage by 'item' aggregating 'custno' by count and 'units' by sum
the two stages of process let us count # of customers and get total sales of each item
"""
PT_7_no_overs_agg_pre_df = PT_7_no_overs_df.groupby(by=['item','custno'],as_index=False).agg({'units':'sum'})

In [31]:
PT_7_no_overs_agg_pre_df.to_clipboard(index=False)

In [32]:
PT_7_no_overs_agg_pre_df.loc[PT_7_no_overs_agg_pre_df.duplicated(subset=['item','custno'])]

Unnamed: 0,item,custno,units


In [33]:
PT_7_no_overs_agg_df = PT_7_no_overs_agg_pre_df.groupby(by=['item'],as_index=False).agg({'custno':'count','units':'sum'}).rename(columns={'custno':'PT 7 cust\'s', 'units':'PT 7 units'})

In [34]:
PT_7_no_overs_agg_df

Unnamed: 0,item,PT 7 cust's,PT 7 units
0,CPF3011,1,5.0
1,CPF3024,1,1.0
2,CPF3110,1,1.0
3,CPF3112,1,1.0
4,CPF3114,2,2.0
5,CPF3116,1,10.0
6,PVC251,95,1106.0
7,PVC26,8,39.0
8,PVC401,1,24.0
9,PVC5,2,16.0


In [35]:
# make cumulative percentage stats
PT_7_no_overs_quartiles_df = PT_7_no_overs_agg_pre_df[['item','units']].groupby('item').quantile(q=[.5,1],interpolation='midpoint')

In [36]:
PT_7_no_overs_quartiles_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,units
item,Unnamed: 1_level_1,Unnamed: 2_level_1
CPF3011,0.5,5.0
CPF3011,1.0,5.0
CPF3024,0.5,1.0
CPF3024,1.0,1.0
CPF3110,0.5,1.0


In [37]:
PT_7_no_overs_quartiles_df = PT_7_no_overs_quartiles_df.reset_index()

In [38]:
# data frame of quartiles of sales per customer
quartile_df = pd.DataFrame([ [prod,tuple(PT_7_no_overs_quartiles_df.loc[PT_7_no_overs_quartiles_df['item'] == prod]['units'].tolist())] for prod in PT_7_no_overs_quartiles_df['item'].unique()], columns=['item','PT 7 quartiles'])

In [39]:
quartile_df

Unnamed: 0,item,PT 7 quartiles
0,CPF3011,"(5.0, 5.0)"
1,CPF3024,"(1.0, 1.0)"
2,CPF3110,"(1.0, 1.0)"
3,CPF3112,"(1.0, 1.0)"
4,CPF3114,"(1.0, 1.0)"
5,CPF3116,"(10.0, 10.0)"
6,PVC251,"(4.0, 199.0)"
7,PVC26,"(2.5, 23.0)"
8,PVC401,"(24.0, 24.0)"
9,PVC5,"(8.0, 11.0)"


# Standard cost, multiplier, and standard price

## <span style="color:red"> Change the vendor name at end of file path

In [69]:
loc = Path.cwd().parent / 'data' / 'merfish_product_info.csv'
cost_data_df = pd.read_csv(loc)

In [70]:
cost_data_df.columns

Index(['whse', 'vendno', 'name', 'prod', 'descrip_1', 'descrip_2', 'mfgprod',
       'vendprod', 'pricetype', 'prodcat', 'prodcat_descrip', 'unitstock',
       'unitsell', 'unitconv', 'listprice', 'avgcost', 'lastcost', 'replcost',
       'replcostdt', 'stndcost', 'stndcostdt', 'priceonty', 'prcmult_1',
       't12m_sales', 'qtyonhand', 'qtyonorder', 'stndprice'],
      dtype='object')

## <span style="color:red"> Change vendor number(s)

In [71]:
# pick out cost data for our vendor
specific_cost_data_df = cost_data_df.loc[ ((cost_data_df['vendno'] == 18821) | (cost_data_df['vendno'] == 86125)) ]

In [92]:
specific_cost_data_df_cleaned = specific_cost_data_df[['prod','descrip_1','descrip_2','stndcostdt','stndcost','prcmult_1','stndprice','vendno']].sort_values(by=['prod','stndcostdt','stndcost']).drop_duplicates(subset=['prod'],keep='last')

In [93]:
specific_cost_data_df_cleaned['stndmargin'] = round((specific_cost_data_df_cleaned['prcmult_1']*100 - 100) / specific_cost_data_df_cleaned['prcmult_1'], 2)

In [94]:
specific_cost_data_df_cleaned

Unnamed: 0,prod,descrip_1,descrip_2,stndcostdt,stndcost,prcmult_1,stndprice,vendno,stndmargin
38,BLK2100,"1/2"" X 21' TC BLACK PIPE",,2021-09-17,17.24,4.5,77.58,86125,77.78
37,BLK2100A,"1/2"" X 10' TC BLACK PIPE",,2021-08-16,17.24,4.55,78.44,86125,78.02
39,BLK2101,"3/4""X21' TC BLACK PIPE",,2021-08-16,34.98,2.46,86.05,86125,59.35
40,BLK2101A,"3/4""X10' TC BLACK PIPE",,2021-08-16,18.09,1.91,34.55,86125,47.64
36,BLK2102A,"1""X10' TC BLACK PIPE",,2021-10-13,23.14,1.0,23.14,86125,0.0
2,BLK2103,"1-1/4""X21' TC BLACK",PIPE,2021-08-16,54.87,2.03,111.39,86125,50.74
11,CPF3000,"1/2 ""X 20' ""L"" COPPER",PIPE,2021-11-08,41.79,1.0,41.79,86125,0.0
33,CPF3000A,"1/2"" X 10' ""L"" COPPER",,2021-08-16,19.88,1.612903,32.06,86125,38.0
20,CPF3001,"3/4""X20' ""L"" COPPER",PIPE,2021-11-08,68.09,1.612903,109.82,86125,38.0
22,CPF3001A,"3/4""X10' ""L"" COPPER",PIPE,2021-08-16,30.49,1.612903,49.18,86125,38.0


## Put everything together

In [95]:
merge_1_df = agg_sales_df.merge(agg_overrides_df, how='left', on='item')

In [96]:
merge_2_df = merge_1_df.merge(PT_7_no_overs_agg_df, how='left', on='item')

In [97]:
merge_3_df = merge_2_df.merge(quartile_df, how='left', on='item')

In [98]:
merge_4_df = merge_3_df.merge(specific_cost_data_df_cleaned, how='outer', left_on='item', right_on='prod')

In [99]:
final_df = merge_4_df.drop(columns='item')

In [100]:
final_df.columns

Index(['itemdesc', 'total cust's', 'total units', 'total margin',
       'units overriden', 'PT 7 cust's', 'PT 7 units', 'PT 7 quartiles',
       'prod', 'descrip_1', 'descrip_2', 'stndcostdt', 'stndcost', 'prcmult_1',
       'stndprice', 'vendno', 'stndmargin'],
      dtype='object')

In [101]:
final_df.head()

Unnamed: 0,itemdesc,total cust's,total units,total margin,units overriden,PT 7 cust's,PT 7 units,PT 7 quartiles,prod,descrip_1,descrip_2,stndcostdt,stndcost,prcmult_1,stndprice,vendno,stndmargin
0,"1/2"" X 10' ""L"" COPPER",2.0,19.0,22.28,10.0,,,,CPF3000A,"1/2"" X 10' ""L"" COPPER",,2021-08-16,19.88,1.612903,32.06,86125,38.0
1,"3/4""X20' ""L"" COPPER PIPE",1.0,10.0,29.15,5.0,,,,CPF3001,"3/4""X20' ""L"" COPPER",PIPE,2021-11-08,68.09,1.612903,109.82,86125,38.0
2,"3/4""X10' ""L"" COPPER PIPE",1.0,118.0,27.09,98.0,,,,CPF3001A,"3/4""X10' ""L"" COPPER",PIPE,2021-08-16,30.49,1.612903,49.18,86125,38.0
3,"3/4""X20' ""M"" COPPER PIPE",1.0,5.0,29.61,,1.0,5.0,"(5.0, 5.0)",CPF3011,"3/4""X20' ""M"" COPPER",PIPE,2021-11-08,49.52,1.612903,79.87,86125,38.0
4,"3/4""X10' ""M"" COPPER PIPE",2.0,44.0,19.7,27.0,,,,CPF3011A,"3/4""X10' ""M"" COPPER",PIPE,2021-08-16,22.17,1.612903,35.76,86125,38.0


In [120]:
final_df['new stndcost'] = None
final_df['stndcost no adjust.'] = None
final_df['new stndprice'] = None
final_df['new stndmargin'] = None
final_df['move rate'] = None
final_df['margin change'] = None

In [124]:
final_df = final_df[['prod','descrip_1', 'descrip_2', 'total cust\'s', 'total units', 'total margin', 'units overriden',
                     'PT 7 cust\'s', 'PT 7 units', 'PT 7 quartiles', 'stndcost', 'stndprice', 'stndmargin', 'new stndcost',
                     'stndcost no adjust.', 'new stndprice', 'new stndmargin', 'move rate', 'margin change']]

## Save records of sales

## <span style="color:red"> Change vendor name at end of file path

In [123]:
loc = Path.cwd().parent / 'data' / 'merfish_sales_record.csv'
final_df.to_csv(loc, index=False)