In [38]:
import pandas as pd

from utils.employee_utils import *
from utils.pdf_utils import scrap_pdf
from utils.time_utils import strfdelta
from datetime import timedelta, datetime


In [39]:
pdf_path = "./horarios/Horario 01-07.24.pdf"

In [40]:
employee_list = scrap_pdf(pdf_path)

In [41]:
cajeros_list : list[Employee] = [employee for employee in employee_list if employee.category == "CAJERO"]

In [42]:
def get_day_dict(day: Day) -> dict:
    return {
        "Entrada": day.interval.start if day.day_type == "REGULAR" else day.day_type,
        "Salida": day.interval.end if day.day_type == "REGULAR" else day.day_type,
    }

def get_cajeros_dataframe(cajeros_list: list[Employee]) -> pd.DataFrame:
    days_of_week_names = ["Lunes", "Martes", "Miércoles", "Jueves", "Viernes", "Sábado", "Domingo"]
    days_of_week_atr_name = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
    
    # Collect dictionaries first
    dict_list = []
    for cajero in cajeros_list:
        cajero_dict = {("Nombre", ""): cajero.name}
        for day_name, day_attr in zip(days_of_week_names, days_of_week_atr_name):
            day_dict = get_day_dict(getattr(cajero.schedule, day_attr))
            for key, value in day_dict.items():
                cajero_dict[(day_name, key)] = value
        dict_list.append(cajero_dict)
    
    # Create the DataFrame from the list of dictionaries
    cajeros_df = pd.DataFrame(dict_list)
    
    # Set MultiIndex columns
    cajeros_df.columns = pd.MultiIndex.from_tuples(cajeros_df.columns)
    
    return cajeros_df

# Función para formatear las columnas de "Entrada" y "Salida"
def format_schedule(df):
    days_of_week_names = ["Lunes", "Martes", "Miércoles", "Jueves", "Viernes", "Sábado", "Domingo"]
    # Aplanar temporalmente el MultiIndex
    df_flat = df.copy()
    df_flat.columns = ['_'.join(col).strip() for col in df_flat.columns.values]
    
    # Formatear las columnas de "Entrada" y "Salida"
    for day in days_of_week_names:
        for period in ["Entrada", "Salida"]:
            col_name = f"{day}_{period}"
            if col_name in df_flat.columns:
                df_flat[col_name] = df_flat[col_name].apply(
                    lambda x: pd.to_datetime(x).strftime('%I:%M%p') if not isinstance(x, str) else x
                )
    
    # Restaurar el MultiIndex
    new_columns = [tuple(col.split('_')) if '_' in col else (col, '') for col in df_flat.columns]
    df_flat.columns = pd.MultiIndex.from_tuples(new_columns)
    
    return df_flat

def get_day_schedule(df, day):
    output_df = df[["Nombre", day]].xs(day, axis=1, level=0)
    output_df["Nombre"] = df["Nombre"]
    return output_df[["Nombre", "Entrada", "Salida"]]

def format_schedule(df):
    formated_df = df.copy()
    formated_df["Entrada"] = formated_df["Entrada"].apply(
        lambda x: pd.to_datetime(x).strftime('%I:%M%p') if not isinstance(x, str) else x
    )
    formated_df["Salida"] = formated_df["Salida"].apply(
        lambda x: pd.to_datetime(x).strftime('%I:%M%p') if not isinstance(x, str) else x
    )
    return formated_df

In [43]:
class DaySchedule():
    def __init__(self, cajeros_df: pd.DataFrame, day: str):
        self.day = day
        self.cajeros_df = cajeros_df
        self.day_schedule_df = self.get_day_schedule()

    def get_day_schedule(self):
        return get_day_schedule(self.cajeros_df, self.day)

    def format_schedule(self):
        return format_schedule(self.day_schedule_df)
    
    def get_available_employees(self) -> pd.DataFrame:
        out_df = self.day_schedule_df.copy()
        out_df = out_df[out_df["Entrada"] != "DIA DE DESCANSO"]
        out_df = out_df[out_df["Entrada"] != "VACACIONES"]
        out_df = out_df[out_df["Entrada"] != "PAGO HORAS FERIADO"]

        return out_df

In [44]:
# def get_final_matrix(day_schedule_df, min_cajas, max_cajas):
#     if (day_schedule_df.empty):
#         return []

#     if (max_cajas > len(day_schedule_df)):
#         max_cajas = len(day_schedule_df)

#     n_cajas_optimo = 0
#     min_caja_matrix = []
#     min_cajeros_no_asignados = []

#     min_tm = pd.to_datetime("00:00AM") - pd.to_datetime("00:00AM")

#     for nro_cajas in range(min_cajas, max_cajas+1):
#         seg1_res = get_caja_matrix(nro_cajas, day_schedule_df)
#         tm = calcular_promedio_tiempos_muertos_matrix(seg1_res[0])

#         if (nro_cajas == min_cajas):
#             min_tm = tm
#             n_cajas_optimo = nro_cajas
#             min_caja_matrix = seg1_res[0]
#             min_cajeros_no_asignados = seg1_res[1]
#         else:
#             if (tm < min_tm):
#                 min_tm = tm
#                 n_cajas_optimo = nro_cajas
#                 min_caja_matrix = seg1_res[0]
#                 min_cajeros_no_asignados = seg1_res[1]

#     segmento1_matrix = sort_by_n_cajeros(min_caja_matrix)

#     nro_cajas_restantes = max_cajas - n_cajas_optimo

#     seg2_df = pd.DataFrame(min_cajeros_no_asignados, columns=['Nombres', 'Entrada', 'Salida'])

#     seg2_df.sort_values(by=["Entrada"], inplace=True)
#     seg2_res = get_caja_matrix(nro_cajas_restantes, seg2_df)
#     segmento2_matrix = seg2_res[0]
#     segmento2_matrix = sort_by_n_cajeros(segmento2_matrix)

#     no_asignados = seg2_res[1]
#     seg3_df = pd.DataFrame(no_asignados, columns=['Nombres', 'Entrada', 'Salida'])
    
#     seg3_df.sort_values(by=["Entrada"], inplace=True)
#     seg3_df["Entrada"] = seg3_df["Entrada"].dt.strftime('%I:%M%p')
#     seg3_df["Salida"] = seg3_df["Salida"].dt.strftime('%I:%M%p')

#     seg1_assig = sort_by_tiempo_atencion(segmento1_matrix)
#     seg2_assig = sort_by_tiempo_atencion(segmento2_matrix)

#     final_matrix = []
#     final_matrix.append(seg1_assig[1])
#     final_matrix.append(seg1_assig[2])
#     final_matrix.append(seg2_assig[0])
#     final_matrix.append(seg1_assig[0])

#     for caja in seg1_assig[3:]:
#         final_matrix.append(caja)
#     for caja in seg2_assig[1:]:
#         final_matrix.append(caja)

#     return final_matrix    

In [45]:
class CheckoutTurn():
    def __init__(self, employee: Employee, start_time: datetime, end_time: datetime, nro_caja: int):
        self.employee = employee
        self.start_time = start_time
        self.end_time = end_time
        self.nro_caja = nro_caja
        self.duration = end_time - start_time

class Checkout():
    def __init__(self, nro_caja: int):
        self.nro_caja = nro_caja
        self.type = "Regular"
        self.turns : list[CheckoutTurn] = []
        self.attention_time = timedelta(0)
        self.death_time = timedelta(0)

    def add_turn(self, employee: Employee, start_time: datetime, end_time: datetime):
        ct = CheckoutTurn(employee, start_time, end_time, self.nro_caja)
        self.turns.append(ct)
        self.attention_time += ct.duration
        self.death_time += ct.start_time - self.turns[-2].end_time if len(self.turns) > 1 else timedelta(0)
        self.turns.sort(key=lambda x: x.start_time)


#def create_checkouts_distribution(daySchedule: pd.DataFrame, nro_cajas: int):
        

class CheckoutsDistribution():
    def __init__(self, daySchedule: pd.DataFrame, nro_cajas: int):
        self.daySchedule: pd.DataFrame = daySchedule
        self.nro_cajeros = len(self.daySchedule)
        self._nro_cajas = None
        self.nro_cajas = nro_cajas
        self.cajas : list[Checkout] = []
        self.non_assigned_df = []
        self.set_cajas()
        self.average_attention_time = sum([caja.attention_time for caja in self.cajas], timedelta(0)) / len(self.cajas)
        self.average_death_time = sum([caja.death_time for caja in self.cajas], timedelta(0)) / len(self.cajas)

    @property
    def nro_cajas(self):
        return self._nro_cajas
    
    @nro_cajas.setter
    def nro_cajas(self, value):
        if not isinstance(value, int):
            raise ValueError("nro_cajas must be an integer")
        if value < 1:
            raise ValueError("nro_cajas must be greater than 0")
        if value > self.nro_cajeros:
            raise ValueError("nro_cajas must be less than or equal to nro_cajeros")
        self._nro_cajas = value
        
    def set_cajas(self):
        if self.daySchedule.empty:
            return []
        
        if self.nro_cajas > len(self.daySchedule):
            self.nro_cajas = len(self.daySchedule)

        for i, cajero in enumerate(self.daySchedule.iterrows()):
            if i == 0:
                self.cajas.append(Checkout(1))
                self.cajas[0].add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
                continue
            for caja in self.cajas:
                if cajero[1]["Entrada"] >= caja.turns[-1].end_time:
                    caja.add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
                    break
            else:
                if len(self.cajas) < self.nro_cajas:
                    cj = Checkout(len(self.cajas) + 1)
                    cj.add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
                    self.cajas.append(cj)
                else:
                    self.non_assigned_df = self.daySchedule[~self.daySchedule["Nombre"].isin([turn.employee for caja in self.cajas for turn in caja.turns])]

    def get_distribution_matrix(self):
        distribution_matrix = []
        for caja in self.cajas:
            distribution_matrix.append([f"Caja {caja.nro_caja}"])
            for turn in caja.turns:
                distribution_matrix[-1].append(f"{turn.employee} - {turn.start_time.strftime('%I:%M%p')} - {turn.end_time.strftime('%I:%M%p')}")
        return distribution_matrix


    def get_sorted_checkouts(self):
        sorted_checkouts = sorted(self.cajas, key=lambda x: x.attention_time, reverse=True)
        return sorted_checkouts

In [46]:
def get_segments(day_schedule, min_cajas, max_cajas):
    available_employees = day_schedule.get_available_employees().sort_values(by=["Entrada"])

    if (available_employees.empty):
        return []

    if (max_cajas > len(available_employees)):
        max_cajas = len(available_employees)

    min_average_death_time = timedelta(0)
    seg1_distribution = None

    for nro_cajas in range(min_cajas, max_cajas+1):
        cd = CheckoutsDistribution(available_employees, nro_cajas)
        if (nro_cajas == min_cajas):
            seg1_distribution = cd
            min_average_death_time = cd.average_death_time
        elif cd.average_death_time < min_average_death_time:
            seg1_distribution = cd
            min_average_death_time = cd.average_death_time

    rest_cajas = max_cajas - seg1_distribution.nro_cajas

    seg2_distribution = CheckoutsDistribution(pd.DataFrame(seg1_distribution.non_assigned_df), rest_cajas)
    seg3 = seg2_distribution.non_assigned_df

    return seg1_distribution, seg2_distribution, seg3

In [47]:
def get_define_checkouts(seg1_dist, seg2_dist):
    defined_checkouts = []

    sorted_checkouts_seg1 = seg1_dist.get_sorted_checkouts()
    sorted_checkouts_seg2 = seg2_dist.get_sorted_checkouts()

    sorted_checkouts_seg1[0].type = "Preferencial"
    sorted_checkouts_seg1[0].nro_caja = 0
    defined_checkouts.append(sorted_checkouts_seg1.pop(0))

    sorted_checkouts_seg1[0].type = "Rápida"
    sorted_checkouts_seg1[0].nro_caja = 1
    sorted_checkouts_seg1.pop(0)
    defined_checkouts.append(sorted_checkouts_seg1.pop(0))

    sorted_checkouts_seg1[0].type = "Rápida"
    sorted_checkouts_seg1[0].nro_caja = 2
    sorted_checkouts_seg1.pop(0)
    defined_checkouts.append(sorted_checkouts_seg1.pop(0))

    sorted_checkouts_seg2[0].type = "Rápida"
    sorted_checkouts_seg2[0].nro_caja = 3
    sorted_checkouts_seg2.pop(0)
    defined_checkouts.append(sorted_checkouts_seg2.pop(0))

    n = 1
    sorted_checkouts = sorted_checkouts_seg1 + sorted_checkouts_seg2
    sorted_checkouts = sorted(sorted_checkouts, key=lambda x: x.turns[0].start_time)

    for caja in sorted_checkouts:
        caja.nro_caja = n
        defined_checkouts.append(caja)
        n += 1
    

In [48]:
cajeros_df = get_cajeros_dataframe(cajeros_list)

day = "Sábado"
day_schedule = DaySchedule(cajeros_df, day)

min_cajas = 5
max_cajas = 15

seg1_dist, seg2_dist, seg3 = get_segments(day_schedule, min_cajas, max_cajas)

In [49]:
seg3

Unnamed: 0,Nombre,Entrada,Salida
28,"HUAMAN HUAMANI, ALEXIS JAVIER",1900-01-01 10:45:00,1900-01-01 14:30:00
14,"ILDEFONSO MOTTA, JHOSSEP ANGELO",1900-01-01 11:00:00,1900-01-01 14:45:00
10,"BRICEÑO LUNA, JESSICA ARACELI",1900-01-01 11:00:00,1900-01-01 22:00:00
33,"POBLETE SAIRE, FIORELLA ESTHER",1900-01-01 11:00:00,1900-01-01 14:45:00
18,"SUAREZ JARA, YENNIFER YUSSARA",1900-01-01 11:15:00,1900-01-01 15:00:00
3,"HINOSTROZA MARIN, CAMILA MARIA",1900-01-01 11:15:00,1900-01-01 20:15:00
2,"ERIQUE CALLE, MARIA ANTONIETA",1900-01-01 12:15:00,1900-01-01 21:15:00
1,"CHAVEZ ONOFRE, CAMILA GERALDINE",1900-01-01 13:00:00,1900-01-01 16:45:00
44,"CAPCHA YARANGO, DAVID",1900-01-01 17:00:00,1900-01-01 20:45:00
41,"AYALA TAPIA, DARCIE SOL",1900-01-01 18:00:00,1900-01-01 21:45:00
