In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score


In [2]:
pickers = pd.read_csv("../data/pickers.csv")
orders = pd.read_csv("../data/orders.csv")


In [9]:
np.random.seed(42)

orders = pd.concat([orders] * 30, ignore_index=True)

orders["order_distance"] = np.random.randint(40, 120, size=len(orders))
orders["order_zone"] = np.random.choice(["A", "B"], size=len(orders))



In [10]:
training_rows = []

for _, order in orders.iterrows():
    for _, picker in pickers.iterrows():
        zone_match = int(order["order_zone"] == picker["current_zone"])

        completion_time = (
            order["order_distance"] / picker["avg_speed"]
        ) * (1 + picker["error_rate"])

        training_rows.append({
            "order_distance": order["order_distance"],
            "avg_speed": picker["avg_speed"],
            "error_rate": picker["error_rate"],
            "experience_years": picker["experience_years"],
            "zone_match": zone_match,
            "completion_time": completion_time
        })

df = pd.DataFrame(training_rows)
df.head()


Unnamed: 0,order_distance,avg_speed,error_rate,experience_years,zone_match,completion_time
0,91,1.2,0.01,3,0,76.591667
1,91,1.0,0.03,1,1,93.73
2,54,1.2,0.01,3,0,45.45
3,54,1.0,0.03,1,1,55.62
4,111,1.2,0.01,3,0,93.425


In [11]:
X = df.drop("completion_time", axis=1)
y = df["completion_time"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [12]:
model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)


0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [13]:
predictions = model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")


Mean Absolute Error: 1.10
R² Score: 1.00


In [14]:
feature_importance = pd.Series(
    model.feature_importances_,
    index=X.columns
).sort_values(ascending=False)

feature_importance


order_distance      0.932288
avg_speed           0.022515
error_rate          0.022459
experience_years    0.021643
zone_match          0.001094
dtype: float64

In [15]:
def recommend_picker(order_distance, order_zone, pickers_df, model):
    scores = {}

    for _, picker in pickers_df.iterrows():
        input_data = pd.DataFrame([{
            "order_distance": order_distance,
            "avg_speed": picker["avg_speed"],
            "error_rate": picker["error_rate"],
            "experience_years": picker["experience_years"],
            "zone_match": int(order_zone == picker["current_zone"])
        }])

        scores[picker["picker_id"]] = model.predict(input_data)[0]

    return min(scores, key=scores.get), scores


In [16]:
best_picker, all_scores = recommend_picker(
    order_distance=90,
    order_zone="A",
    pickers_df=pickers,
    model=model
)

best_picker, all_scores


('P1', {'P1': np.float64(75.82575000000006), 'P2': np.float64(91.7833)})