In [7]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.core.problem import Problem

# 최적화 문제 정의
class MultiObjectiveLogisticsProblem(Problem):

    def __init__(self, cluster_data):
        super().__init__(n_var=2, n_obj=2, n_constr=0,
                         xl=np.array([-90.0, -180.0]),
                         xu=np.array([90.0, 180.0]))
        self.cluster_data = cluster_data

    def _evaluate(self, X, out, *args, **kwargs):
        f1 = []  # 배송 거리
        f2 = []  # 딜레이 값
        for x in X:
            total_distance = 0
            total_delay = 0
            for i in range(len(self.cluster_data)):
                lat = self.cluster_data.iloc[i]['Geolocation_lat']
                lng = self.cluster_data.iloc[i]['Geolocation_lng']
                delay = self.cluster_data.iloc[i]['delay_time']
                distance = haversine(lat, lng, x[0], x[1])
                total_distance += distance
                total_delay += delay
            f1.append(total_distance)
            f2.append(total_delay)
        out["F"] = np.column_stack([f1, f2])

# 거리 계산 함수
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # 지구의 반지름 (km)
    d_lat = np.radians(lat2 - lat1)
    d_lon = np.radians(lon2 - lon1)
    a = np.sin(d_lat / 2) ** 2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(d_lon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c


In [10]:

# 데이터 로드 및 전처리
data = pd.read_csv("delivery_location.csv")

# 날짜 형식 변환
data['Order_purchase_timestamp'] = pd.to_datetime(data['Order_purchase_timestamp'])
data['Order_delivered_customer_date'] = pd.to_datetime(data['Order_delivered_customer_date'])

# 지연 시간 계산 (구매 시간 대비 실제 배송 시간)
data['delay_time'] = (data['Order_delivered_customer_date'] - data['Order_purchase_timestamp']).dt.total_seconds() / 3600  # 시간 단위

In [11]:
# 클러스터링 수행(9만개 데이터로 너무 많아 실행속도가 너무 느림 -> 1000개로 축소하여 분석 진행)
num_final_clusters = 1000
coords = data[['Geolocation_lat', 'Geolocation_lng']].values
kmeans = KMeans(n_clusters=num_final_clusters, random_state=0).fit(coords)
data['cluster'] = kmeans.labels_

In [14]:
data1 = data.groupby("cluster")[['Geolocation_lat','Geolocation_lng','delay_time']].mean().reset_index()

In [18]:
data1.drop(columns=['cluster'], inplace=True)

In [19]:
# 클러스터링 수행
num_final_clusters = 5
coords1 = data1[['Geolocation_lat', 'Geolocation_lng']].values
final_kmeans = KMeans(n_clusters=num_final_clusters, random_state=0).fit(coords1)
data1['cluster'] = final_kmeans.labels_

In [21]:
data1['cluster'].value_counts()

cluster
4    433
1    228
0    169
3     95
2     75
Name: count, dtype: int64

In [22]:
# 각 클러스터별 최적의 물류 센터 위치 찾기
optimal_centers = []
for label in range(num_final_clusters):
    cluster_data = data1[data1['cluster'] == label]
    problem = MultiObjectiveLogisticsProblem(cluster_data)
    algorithm = NSGA2(pop_size=100)
    termination = ('n_gen', 200)
    res = minimize(problem,
                   algorithm,
                   termination,
                   seed=1,
                   save_history=True)
    optimal_centers.append((res.X, res.F))

# 결과 출력
for i, (solution, function_values) in enumerate(optimal_centers):
    print(f"Cluster {i+1} - Optimal Location: Latitude {solution[0][0]}, Longitude {solution[0][1]}, Function Values: {function_values}")

Cluster 1 - Optimal Location: Latitude -9.35254615165369, Longitude -38.70385475428203, Function Values: [[71893.22497137 83460.46793053]]
Cluster 2 - Optimal Location: Latitude -26.316037120127657, Longitude -51.587051934975584, Function Values: [[83006.39060077 77206.31913531]]
Cluster 3 - Optimal Location: Latitude -12.186938225549543, Longitude -58.16540041365623, Function Values: [[41461.65743177 36664.99227984]]
Cluster 4 - Optimal Location: Latitude -5.073766581459029, Longitude -46.66793689625047, Function Values: [[48109.5860728  52826.51772666]
 [48109.5860728  52826.51772666]
 [48109.5860728  52826.51772666]
 [48109.5860728  52826.51772666]]
Cluster 5 - Optimal Location: Latitude -20.98257694944194, Longitude -45.90343151948893, Function Values: [[170125.21141388 135095.94135103]
 [170125.21141388 135095.94135103]]


In [25]:
import folium
from IPython.display import display

# 최적 위치 좌표
optimal_locations = [(-9.35254615165369, -38.70385475428203), (-26.316037120127657, -51.587051934975584), (-12.186938225549543, -58.16540041365623), (-5.073766581459029,-46.66793689625047), (-20.98257694944194, -45.90343151948893)]

latitudes = [loc[0] for loc in optimal_locations]
longitudes = [loc[1] for loc in optimal_locations]
map_center = [np.mean(latitudes), np.mean(longitudes)]

mymap = folium.Map(location=map_center, zoom_start=2)

for location in optimal_locations:
    folium.Marker(location=location).add_to(mymap)


display(mymap)