In [11]:
import pandas as pd
import numpy as np

In [13]:
df = pd.read_csv('../data/processed/stations_processed.csv')

In [15]:
from scipy.optimize import minimize

# --- 1. Normalização ---
def normalize(series):
    return (series - series.min()) / (series.max() - series.min() + 1e-9)

# --- 2. Função que calcula gain e alocação dado um vetor de pesos ---
def compute_gain(df, weights, exponent=5, cabinets_to_add=10, saturate=True):
    """
    df: DataFrame com métricas e penalidades normalizadas
    weights: [w_swaps_cabinet, w_swaps_per_obs, w_score_obs_per_swaps, w_score_distance, w_score_cabinet_number]
    exponent: contraste exponencial do priority_score
    saturate: aplicar saturação logarítmica
    """
    w1, w2, w3, w4, w5 = weights
    df = df.copy()
    
    # --- Priority score ---
    df['priority_score_raw'] = (
          w1 * df['score_swaps_cabinet']
        + w2 * df['score_swaps_per_obs']       # sinal positivo ou negativo
        + w3 * df['score_obs_per_swaps']     # sinal positivo ou negativo
        + w4 * df['score_distance']
        + w5 * df['score_cabinet_number']
    )
    df['priority_score'] = df['priority_score_raw'] ** exponent
    
    # --- Distribuição proporcional ---
    df['allocation_float'] = (df['priority_score'] / df['priority_score'].sum()) * cabinets_to_add
    df['allocation'] = np.floor(df['allocation_float'])
    
    # Distribuir o restante
    remaining = cabinets_to_add - int(df['allocation'].sum())
    if remaining > 0:
        top_up = df.sort_values('allocation_float', ascending=False).head(remaining).index
        df.loc[top_up, 'allocation'] += 1
    
    # --- Estimativa de swaps_per_day_mean ---
    if saturate:
        df['new_swaps_per_day_mean'] = df['swaps_per_day_mean'] + \
            df['swaps_per_cabinet'] * np.log1p(df['allocation']) / np.log1p(df['cabinet_number'] + 1)
    else:
        df['new_swaps_per_day_mean'] = df['swaps_per_day_mean'] + df['swaps_per_cabinet'] * df['allocation']
    
    df['gain'] = df['new_swaps_per_day_mean'] - df['swaps_per_day_mean']
    
    return df['gain'].sum(), df

# --- 3. Preparar dataframe com scores e penalidades ---
df_alloc = df[df['cabinet_number'] < 8].copy()

# Scores (maior é melhor)
df_alloc['score_swaps_cabinet'] = normalize(df_alloc['swaps_per_cabinet'])
df_alloc['score_swaps_per_obs'] = normalize(df_alloc['swaps_per_observation'])

# Penalidades (maior é melhor)
df_alloc['score_obs_per_swaps'] = normalize(df_alloc['observations_per_swaps'])
df_alloc['score_distance'] = normalize(df_alloc['nearest_station_distance_km'])
df_alloc['score_cabinet_number'] = 1 / (1 + df_alloc['cabinet_number'])

# --- 4. Testar sinais para swaps_per_obs e observations_per_swaps ---
w1, w2, w3, w4, w5 = 0.35, 0.15, 0.3, 0.1, 0.1  # valores iniciais
best_gain = -np.inf
best_weights = None

for sign2 in [-1, 1]:
    for sign3 in [-1, 1]:
        weights_test = [w1, sign2*w2, sign3*w3, w4, w5]
        total_gain, _ = compute_gain(df_alloc, weights_test)
        
        if total_gain > best_gain:
            best_gain = total_gain
            best_weights = weights_test

print("Optimal weights with signal testing:", best_weights)
print("Best total gain:", best_gain)

# --- 5. Calcular alocação final com pesos otimizados ---
_, df_final_alloc = compute_gain(df_alloc, best_weights)

# --- 6. Criar dicionário de alocação ---
allocation_dict = df_final_alloc.set_index('swap_station_id')['allocation'].to_dict()

# --- 7. Visualizar top 10 ganhos ---
df_final_alloc[['swap_station_id','cabinet_number','allocation','new_swaps_per_day_mean','gain']]\
    .sort_values('gain', ascending=False)[:10]

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Optimal weights with signal testing: [0.35, 0.15, 0.3, 0.1, 0.1]
Best total gain: 192.48493515801792


Unnamed: 0,swap_station_id,cabinet_number,allocation,new_swaps_per_day_mean,gain
7,446,1,2.0,81.79661,40.898305
46,1451,2,3.0,89.357143,29.785714
99,376,1,1.0,74.364674,28.768183
101,450,2,1.0,143.707627,28.741525
85,555,2,1.0,115.487288,23.097458
108,1413,1,1.0,53.755445,20.795445
132,1192,2,1.0,101.991525,20.398305
0,1411,4,0.0,37.724138,0.0
103,516,2,0.0,38.779661,0.0
98,621,2,0.0,55.559322,0.0


In [19]:
df_final_alloc.gain.sum()

192.48493515801792