### Preprocess JD data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

In [2]:
order = pd.read_csv("./JD_data/JD_order_data.csv")
order.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 549989 entries, 0 to 549988
Data columns (total 17 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   order_ID                    549989 non-null  object 
 1   user_ID                     549989 non-null  object 
 2   sku_ID                      549989 non-null  object 
 3   order_date                  549989 non-null  object 
 4   order_time                  549989 non-null  object 
 5   quantity                    549989 non-null  int64  
 6   type                        549989 non-null  int64  
 7   promise                     549989 non-null  object 
 8   original_unit_price         549989 non-null  float64
 9   final_unit_price            549989 non-null  float64
 10  direct_discount_per_unit    549989 non-null  float64
 11  quantity_discount_per_unit  549989 non-null  float64
 12  bundle_discount_per_unit    549989 non-null  float64
 13  coupon_discoun

In [None]:
# drop duplicate
order = order.drop_duplicates(subset=['order_ID', 'sku_ID'], keep='first')
# 1P
order_1p = order[order['type'] == 1]
# timestamp --- datetime
order_1p['order_date'] = pd.to_datetime(order_1p['order_date'])
order_1p['order_time'] = pd.to_datetime(order_1p['order_time'])
# order time
order_1p.sort_values(by=['order_time'], ascending=True, inplace=True)
# save
order_1p.to_csv('./pre_data/order_1p.csv', index=False)

### Orders fulfilled by FDC and its RDC

In [4]:
network = pd.read_csv("./JD_data/JD_network_data.csv")
order = pd.read_csv("./pre_data/order_1p.csv")

In [5]:
# RDC
rdc_list = fdc_list = network['region_ID'].unique().tolist()
print(rdc_list)
# RDC orders
rdc_order_num = {}
for rdc in rdc_list:
    tmp_rdc_order = order[order['dc_des'] == rdc]
    rdc_order_num[rdc] = len(tmp_rdc_order['order_ID'].unique().tolist())
print(rdc_order_num)

[2, 3, 4, 5, 7, 9, 10, 24]
{2: 21842, 3: 1274, 4: 18197, 5: 29222, 7: 7279, 9: 20637, 10: 5131, 24: 10944}


In [6]:
# FDC
fdc_list = network['dc_ID'].unique().tolist()
print(fdc_list)
# FDC orders
dc_order = pd.DataFrame(columns=['DC', 'RDC', 'total_order_num', 'order_by_DC', 'order_by_RDC', 'sum_DC_RDC'], dtype='Int64')
for fdc in fdc_list:
    rdc = int(network[network['dc_ID'] == fdc]['region_ID'])
    tmp = []
    tmp_fdc_order = order[order['dc_des'] == fdc]
    tmp1 = tmp_fdc_order[tmp_fdc_order['dc_ori'] == fdc]['order_ID'].unique().tolist()
    tmp2 = tmp_fdc_order[tmp_fdc_order['dc_ori'] == rdc]['order_ID'].unique().tolist()
    if fdc == rdc:
        tmp = [fdc, rdc, rdc_order_num[fdc], len(tmp1), 0, len(tmp1)]
    else:
        tmp = [fdc, rdc, len(tmp_fdc_order), len(tmp1), len(tmp2), len(tmp1) + len(tmp2)]
    dc_order.loc[len(dc_order.index)] = tmp

[57, 43, 42, 66, 20, 58, 15, 6, 2, 64, 14, 33, 34, 35, 3, 45, 44, 51, 63, 19, 4, 12, 28, 5, 13, 55, 54, 26, 65, 53, 39, 8, 46, 31, 67, 1, 7, 59, 9, 56, 25, 27, 32, 61, 36, 50, 37, 11, 10, 38, 47, 40, 41, 52, 24, 21]


In [7]:
# calculate ratio
def get_ratio(total_num, sum_num):
    if total_num == 0:
        return 0
    else:
        return sum_num / total_num
dc_order['ratio'] = dc_order.apply(lambda x: get_ratio(x['total_order_num'], x['sum_DC_RDC']), axis=1)

In [8]:
dc_order.sort_values(by=['sum_DC_RDC'], ascending=False, inplace=True)
dc_order.to_csv('./pre_data/dc_order.csv', index=False)
print(dc_order)

    DC  RDC  total_order_num  order_by_DC  order_by_RDC  sum_DC_RDC     ratio
23   5    5            29222        27276             0       27276  0.933406
8    2    2            21842        20214             0       20214  0.925465
38   9    9            20637        19069             0       19069  0.924020
20   4    4            18197        17070             0       17070  0.938067
54  24   24            10944        10214             0       10214  0.933297
41  27    9            10759         1842          7078        8920  0.829073
36   7    7             7279         6606             0        6606  0.907542
4   20    2             7765         4144          1992        6136  0.790212
33  31    7             7098         4033          1792        5825  0.820654
7    6    2             7247         3881          1891        5772  0.796468
22  28    4             6374         3309          1897        5206  0.816756
27  26    5             6911         2345          2771        5