In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
df = pd.read_csv('Delivery_Logistics.csv')
df.head()

Unnamed: 0,delivery_id,delivery_partner,package_type,vehicle_type,delivery_mode,region,weather_condition,distance_km,package_weight_kg,delivery_time_hours,expected_time_hours,delayed,delivery_status,delivery_rating,delivery_cost
0,250.99,delhivery,automobile parts,bike,same day,west,clear,297.0,46.96,1970-01-01 00:00:00.000000008,1970-01-01 00:00:00.000000008,no,delivered,3,1632.7206
1,250.99,xpressbees,cosmetics,ev van,express,central,cold,89.6,47.39,1970-01-01 00:00:00.000000002,1970-01-01 00:00:00.000000003,no,delivered,5,640.17
2,250.99,shadowfax,groceries,truck,two day,east,rainy,273.5,26.89,1970-01-01 00:00:00.000000010,1970-01-01 00:00:00.000000016,no,delivered,4,1448.17
3,250.99,dhl,electronics,ev van,same day,east,cold,269.7,12.69,1970-01-01 00:00:00.000000006,1970-01-01 00:00:00.000000008,no,delivered,3,1486.57
4,250.99,dhl,clothing,van,two day,north,foggy,256.7,37.02,1970-01-01 00:00:00.000000009,1970-01-01 00:00:00.000000016,no,delivered,4,1394.56


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25000 entries, 0 to 24999
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   delivery_id          25000 non-null  float64
 1   delivery_partner     25000 non-null  object 
 2   package_type         25000 non-null  object 
 3   vehicle_type         25000 non-null  object 
 4   delivery_mode        25000 non-null  object 
 5   region               25000 non-null  object 
 6   weather_condition    25000 non-null  object 
 7   distance_km          25000 non-null  float64
 8   package_weight_kg    25000 non-null  float64
 9   delivery_time_hours  25000 non-null  object 
 10  expected_time_hours  25000 non-null  object 
 11  delayed              25000 non-null  object 
 12  delivery_status      25000 non-null  object 
 13  delivery_rating      25000 non-null  int64  
 14  delivery_cost        25000 non-null  float64
dtypes: float64(4), int64(1), object(10)


In [5]:
# PERBAIKAN FORMAT WAKTU
# =========================================================
# ============================
# 1. PAKSA KONVERSI STRING â†’ DATETIME
# ============================
df['delivery_time_hours'] = pd.to_datetime(df['delivery_time_hours'], errors='coerce')
df['expected_time_hours'] = pd.to_datetime(df['expected_time_hours'], errors='coerce')

# ============================
# 2. PISAH TANGGAL & JAM
# ============================
df['delivery_date'] = df['delivery_time_hours'].dt.date
df['delivery_clock'] = df['delivery_time_hours'].dt.time

df['expected_date'] = df['expected_time_hours'].dt.date
df['expected_clock'] = df['expected_time_hours'].dt.time

# ============================
# 3. HITUNG SELISIH WAKTU (delay)
# ============================
df['delay_timedelta'] = df['delivery_time_hours'] - df['expected_time_hours']

# total detik
df['total_delay_seconds'] = df['delay_timedelta'].dt.total_seconds()

# pisah jam / menit / detik
df['delay_hours'] = df['total_delay_seconds'] // 3600
df['delay_minutes'] = (df['total_delay_seconds'] % 3600) // 60
df['delay_seconds'] = df['total_delay_seconds'] % 60

# ============================
# 4. LABEL TERLAMBAT / TIDAK
# ============================
df['is_delayed'] = df['total_delay_seconds'].apply(lambda x: "yes" if x > 0 else "no")

# Lihat hasil
df[['delivery_date','delivery_clock','expected_date','expected_clock',
    'delay_hours','delay_minutes','delay_seconds','is_delayed']].head()

Unnamed: 0,delivery_date,delivery_clock,expected_date,expected_clock,delay_hours,delay_minutes,delay_seconds,is_delayed
0,1970-01-01,00:00:00,1970-01-01,00:00:00,0.0,0.0,0.0,no
1,1970-01-01,00:00:00,1970-01-01,00:00:00,-1.0,59.0,60.0,no
2,1970-01-01,00:00:00,1970-01-01,00:00:00,-1.0,59.0,60.0,no
3,1970-01-01,00:00:00,1970-01-01,00:00:00,-1.0,59.0,60.0,no
4,1970-01-01,00:00:00,1970-01-01,00:00:00,-1.0,59.0,60.0,no


In [6]:

# =========================================================
# ANALISIS RATA-RATA WAKTU PENGIRIMAN PER KATEGORI
# =========================================================
# Rata-rata waktu per partner
df.groupby('delivery_partner')['delivery_time_hours'].mean()

# Rata-rata waktu per kendaraan
df.groupby('vehicle_type')['delivery_time_hours'].mean()

# Rata-rata waktu per region
df.groupby('region')['delivery_time_hours'].mean()



region
central   1970-01-01 00:00:00.000000006
east      1970-01-01 00:00:00.000000006
north     1970-01-01 00:00:00.000000006
south     1970-01-01 00:00:00.000000006
west      1970-01-01 00:00:00.000000006
Name: delivery_time_hours, dtype: datetime64[ns]

In [7]:
# =========================================================
# ANALISIS PERSENTASE KETERLAMBATAN
# =========================================================
df['is_delayed'].value_counts(normalize=True) * 100


is_delayed
no     78.136
yes    21.864
Name: proportion, dtype: float64

In [8]:

# =========================================================
# ANALISIS BIAYA PENGIRIMAN PER KATEGORI
# =========================================================
df.groupby('vehicle_type')['delivery_cost'].mean()



vehicle_type
bike       868.917038
ev bike    869.235484
ev van     866.181463
scooter    862.914003
truck      864.768182
van        857.658050
Name: delivery_cost, dtype: float64

In [9]:

# =========================================================
# PENGARUH JARAK TERHADAP WAKTU
# =========================================================
df[['distance_km', 'delivery_time_hours']].corr()

Unnamed: 0,distance_km,delivery_time_hours
distance_km,1.0,0.685883
delivery_time_hours,0.685883,1.0


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25000 entries, 0 to 24999
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype          
---  ------               --------------  -----          
 0   delivery_id          25000 non-null  float64        
 1   delivery_partner     25000 non-null  object         
 2   package_type         25000 non-null  object         
 3   vehicle_type         25000 non-null  object         
 4   delivery_mode        25000 non-null  object         
 5   region               25000 non-null  object         
 6   weather_condition    25000 non-null  object         
 7   distance_km          25000 non-null  float64        
 8   package_weight_kg    25000 non-null  float64        
 9   delivery_time_hours  25000 non-null  datetime64[ns] 
 10  expected_time_hours  25000 non-null  datetime64[ns] 
 11  delayed              25000 non-null  object         
 12  delivery_status      25000 non-null  object         
 13  delivery_rating 

In [11]:
df.head()

Unnamed: 0,delivery_id,delivery_partner,package_type,vehicle_type,delivery_mode,region,weather_condition,distance_km,package_weight_kg,delivery_time_hours,expected_time_hours,delayed,delivery_status,delivery_rating,delivery_cost,delivery_date,delivery_clock,expected_date,expected_clock,delay_timedelta,total_delay_seconds,delay_hours,delay_minutes,delay_seconds,is_delayed
0,250.99,delhivery,automobile parts,bike,same day,west,clear,297.0,46.96,1970-01-01 00:00:00.000000008,1970-01-01 00:00:00.000000008,no,delivered,3,1632.7206,1970-01-01,00:00:00,1970-01-01,00:00:00,0 days 00:00:00,0.0,0.0,0.0,0.0,no
1,250.99,xpressbees,cosmetics,ev van,express,central,cold,89.6,47.39,1970-01-01 00:00:00.000000002,1970-01-01 00:00:00.000000003,no,delivered,5,640.17,1970-01-01,00:00:00,1970-01-01,00:00:00,-1 days +23:59:59.999999999,-1e-09,-1.0,59.0,60.0,no
2,250.99,shadowfax,groceries,truck,two day,east,rainy,273.5,26.89,1970-01-01 00:00:00.000000010,1970-01-01 00:00:00.000000016,no,delivered,4,1448.17,1970-01-01,00:00:00,1970-01-01,00:00:00,-1 days +23:59:59.999999994,-6e-09,-1.0,59.0,60.0,no
3,250.99,dhl,electronics,ev van,same day,east,cold,269.7,12.69,1970-01-01 00:00:00.000000006,1970-01-01 00:00:00.000000008,no,delivered,3,1486.57,1970-01-01,00:00:00,1970-01-01,00:00:00,-1 days +23:59:59.999999998,-2e-09,-1.0,59.0,60.0,no
4,250.99,dhl,clothing,van,two day,north,foggy,256.7,37.02,1970-01-01 00:00:00.000000009,1970-01-01 00:00:00.000000016,no,delivered,4,1394.56,1970-01-01,00:00:00,1970-01-01,00:00:00,-1 days +23:59:59.999999993,-7e-09,-1.0,59.0,60.0,no


In [12]:
df.to_csv('Delivery_Logistics_Cleaned_Dashboard.csv')