In [1]:
import pandas as pd
import os
import warnings

pd.set_option('display.expand_frame_repr', False)
warnings.filterwarnings("ignore", message="Workbook contains no default style, apply openpyxl's default")

shop = 'Nestle'
sc_start_date = 'March 16'
sc_end_date = 'April 6'

# Order Data cannot be extracted from Lazada Seller Center in one go, so we had to break it down into multiple excel files
# All excel files from the same shop are stored in one folder
folder = f'Lazada Seller Center/{shop}'
excel_files = os.listdir(folder)

# Loop over all the excel files in the folder and import only the "orderNumber" and "status" columns
dfs = {}
for file in excel_files:
    file_path = os.path.join(folder, file)
    df = pd.read_excel(file_path, usecols=["orderNumber", "status", "createTime"])
    dfs[file] = df

# The result will be one DataFrame with "orderNumber" and "status" columns for each excel file
for file, df in dfs.items():
    print(file)
    print("Number of rows:", len(df))
    print(df.head(),'\n')

Lazada Nestle (April 5 - April 6).xlsx
Number of rows: 35452
          createTime      orderNumber     status
0  06 Apr 2024 23:59  815632621032626  delivered
1  06 Apr 2024 23:59  815632621032626  delivered
2  06 Apr 2024 23:59  815632621032626  delivered
3  06 Apr 2024 23:58  815615071012948  delivered
4  06 Apr 2024 23:58  821590341526793  confirmed 

Lazada Nestle (March 16- March 21).xlsx
Number of rows: 92556
          createTime      orderNumber    status
0  21 Mar 2024 23:59  813018543764322  canceled
1  21 Mar 2024 23:59  813018543764322  canceled
2  21 Mar 2024 23:59  813018543764322  canceled
3  21 Mar 2024 23:59  813018543764322  canceled
4  21 Mar 2024 23:59  813018543764322  canceled 

Lazada Nestle (March 22 - March 24).xlsx
Number of rows: 71663
          createTime      orderNumber            status
0  24 Mar 2024 23:59  814571149431252  Package Returned
1  24 Mar 2024 23:59  814571149431252  Package Returned
2  24 Mar 2024 23:59  814571149431252          canceled
3  2

In [9]:
# Merge into one DataFrame
seller_center = pd.concat(dfs.values(), keys=dfs.keys(), ignore_index=True)
print(f"Seller Center Lazada {shop} ({sc_start_date} - {sc_end_date})")
print("Number of rows:", len(seller_center))

Seller Center Lazada Nestle (March 16 - April 6)
Number of rows: 461449


In [10]:
# Remove duplicate entries
seller_center = seller_center.drop_duplicates(subset=['orderNumber'])
print(f"Seller Center Lazada {shop} {shop} ({sc_start_date} - {sc_end_date})")
print("Number of unique rows:", len(seller_center))

Seller Center Lazada Nestle Nestle (March 16 - April 6)
Number of unique rows: 137834


In [11]:
# Import Snowflake data into a DataFrame
snowflake = pd.read_csv(f'Snowflake/Snowflake Lazada {shop}.csv', usecols=['PLFM_ORD_ID','ORD_STAT_END', 'ORD_STAT_PLFM', 'ORDER_DATE'])

# Determine date range of the dataset
date_range = pd.date_range(start=snowflake.loc[:, 'ORDER_DATE'].min(), end=snowflake.loc[:, 'ORDER_DATE'].max())
sf_start_date = date_range[0].date()
sf_end_date = date_range[-1].date()

print(f"Snowflake Lazada {shop} ({sf_start_date} - {sf_end_date})")
print("Number of rows:", len(snowflake))

# Remove duplicates
snowflake.drop_duplicates(subset=['PLFM_ORD_ID'], inplace=True, ignore_index=True)
print("Number of unique rows:", len(snowflake), '\n')
print(snowflake.head())

Snowflake Lazada Nestle (2024-03-16 - 2024-04-06)
Number of rows: 529104
Number of unique rows: 152828 

       PLFM_ORD_ID ORD_STAT_END ORD_STAT_PLFM  ORDER_DATE
0  815578285042883    delivered     delivered  2024-04-06
1  810486785429988    delivered     delivered  2024-03-17
2  804229679871645    delivered     delivered  2024-03-16
3  814956760800080    delivered     delivered  2024-03-25
4  811872068558404      shipped       shipped  2024-03-30


In [12]:
# Merge the Seller Center and Snowflake DataFrames so we could compare them
merged_df = pd.merge(seller_center, snowflake, left_on="orderNumber", right_on="PLFM_ORD_ID", how="inner")
print(f"Lazada {shop} Seller Center vs Snowflake ({sf_start_date} - {sf_end_date})")
print(merged_df.head(), '\n')
total = len(merged_df)
print("Number of rows:", total)

# Count the number of mismatched status
mismatched_status = merged_df[merged_df["status"] != merged_df["ORD_STAT_PLFM"]]
count_mismatched_status = len(mismatched_status)
print("Number of mismatched status:", count_mismatched_status)

# Calculate percent mismatched status
percent_mismatched_status = (count_mismatched_status / total) * 100
print("Percent mismatched status:", round(percent_mismatched_status, 2), "%")


Lazada Nestle Seller Center vs Snowflake (2024-03-16 - 2024-04-06)
          createTime      orderNumber     status      PLFM_ORD_ID ORD_STAT_END ORD_STAT_PLFM  ORDER_DATE
0  06 Apr 2024 23:59  815632621032626  delivered  815632621032626    delivered     delivered  2024-04-06
1  06 Apr 2024 23:58  815615071012948  delivered  815615071012948    delivered     delivered  2024-04-06
2  06 Apr 2024 23:58  821590341526793  confirmed  821590341526793    delivered     confirmed  2024-04-06
3  06 Apr 2024 23:58  815637221592412    shipped  815637221592412    delivered     delivered  2024-04-06
4  06 Apr 2024 23:58  815633419064003  delivered  815633419064003    delivered     delivered  2024-04-06 

Number of rows: 136870
Number of mismatched status: 59135
Percent mismatched status: 43.21 %


In [13]:
# Check possible values of status column and count 
print(f"Seller Center Lazada {shop}")
print("Number of unique rows:", len(seller_center), '\n')
seller_center_status = seller_center['status'].value_counts()
print(seller_center_status, '\n')
print()
print(f"Snowflake Lazada {shop}")
print("Number of unique rows:", len(snowflake), '\n')
snowflake_ORD_STAT_PLFM = snowflake['ORD_STAT_PLFM'].value_counts()
print(snowflake_ORD_STAT_PLFM, '\n')
print()
print(f"Snowflake Lazada {shop}")
print("Number of unique rows:", len(snowflake), '\n')
snowflake_ORD_STAT_END = snowflake['ORD_STAT_END'].value_counts()
print(snowflake_ORD_STAT_END)

Seller Center Lazada Nestle
Number of unique rows: 137834 

status
confirmed                          84963
delivered                          36064
canceled                           15192
Package Returned                     967
shipped                              346
ready_to_ship                        116
In Transit: Returning to seller      112
Lost by 3PL                           33
Buyer Delivery Failed                 15
Package scrapped                      14
Damaged by 3PL                         9
Seller Delivery Failed                 2
returned                               1
Name: count, dtype: int64 


Snowflake Lazada Nestle
Number of unique rows: 152828 

ORD_STAT_PLFM
delivered               78995
confirmed               39249
canceled                16010
shipped                 15951
shipped_back_success      902
ready_to_ship             901
packed                    349
shipped_back              278
unpaid                     82
lost_by_3pl                47
p

In [14]:
# Get non-end status rows (unpaid, packed, shipped, shipped back, ready tp ship)
non_end_status = ['unpaid', 'packed', 'shipped','shipped_back', 'ready_to_ship']

seller_center = seller_center[seller_center['status'].isin(non_end_status)]
snowflake = snowflake[snowflake['ORD_STAT_PLFM'].isin(non_end_status)]

print(f"Seller Center Lazada {shop} Non-End Status")
print("Number of rows:", len(seller_center), '\n')
seller_center_status = seller_center['status'].value_counts()
print(seller_center_status, '\n')
print()
print(f"Snowflake Lazada {shop} Non-End Status")
print("Number of rows:", len(snowflake), '\n')
snowflake_status = snowflake['ORD_STAT_PLFM'].value_counts()
print(snowflake_status)

Seller Center Lazada Nestle Non-End Status
Number of rows: 462 

status
shipped          346
ready_to_ship    116
Name: count, dtype: int64 


Snowflake Lazada Nestle Non-End Status
Number of rows: 17561 

ORD_STAT_PLFM
shipped          15951
ready_to_ship      901
packed             349
shipped_back       278
unpaid              82
Name: count, dtype: int64


In [15]:
# Merge the Seller Center and Snowflake DataFrames so we could compare them
final_df = pd.merge(seller_center, snowflake, left_on="orderNumber", right_on="PLFM_ORD_ID", how="inner")
print(f"Lazada {shop} Seller Center vs Snowflake ({sf_start_date} - {sf_end_date})")
print(final_df.head(), '\n')
print("Number of rows:", total)

# Count the number of mismatched status
mismatched_status = final_df[final_df["status"] != final_df["ORD_STAT_PLFM"]]
count_mismatched_status = len(mismatched_status)
print("Number of mismatched status:", count_mismatched_status)

# Calculate percent mismatched status
percent_mismatched_status = (count_mismatched_status / total) * 100
print("Percent mismatched status:", round(percent_mismatched_status, 2), "%")


Lazada Nestle Seller Center vs Snowflake (2024-03-16 - 2024-04-06)
          createTime      orderNumber   status      PLFM_ORD_ID ORD_STAT_END ORD_STAT_PLFM  ORDER_DATE
0  06 Apr 2024 22:19  821565901372571  shipped  821565901372571      shipped       shipped  2024-04-06
1  06 Apr 2024 21:40  821537330912502  shipped  821537330912502    cancelled  shipped_back  2024-04-06
2  06 Apr 2024 21:36  821527749240152  shipped  821527749240152      shipped       shipped  2024-04-06
3  06 Apr 2024 21:33  815565275040152  shipped  815565275040152      shipped       shipped  2024-04-06
4  06 Apr 2024 21:24  815564067748548  shipped  815564067748548      shipped       shipped  2024-04-06 

Number of rows: 136870
Number of mismatched status: 12
Percent mismatched status: 0.01 %
