In [1]:
import pandas as pd

from utils.employee_utils import *
from utils.pdf_utils import scrap_pdf
from utils.time_utils import strfdelta
from datetime import timedelta, datetime


In [2]:
pdf_path = "./horarios/Horario 01-07.24.pdf"

In [3]:
employee_list = scrap_pdf(pdf_path)

In [4]:
cajeros_list : list[Employee] = [employee for employee in employee_list if employee.category == "CAJERO"]

In [5]:
def get_day_dict(day: Day) -> dict:
    return {
        "Entrada": day.interval.start if day.day_type == "REGULAR" else day.day_type,
        "Salida": day.interval.end if day.day_type == "REGULAR" else day.day_type,
    }

def get_cajeros_dataframe(cajeros_list: list[Employee]) -> pd.DataFrame:
    days_of_week_names = ["Lunes", "Martes", "Miércoles", "Jueves", "Viernes", "Sábado", "Domingo"]
    days_of_week_atr_name = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
    
    # Collect dictionaries first
    dict_list = []
    for cajero in cajeros_list:
        cajero_dict = {("Nombre", ""): cajero.name}
        for day_name, day_attr in zip(days_of_week_names, days_of_week_atr_name):
            day_dict = get_day_dict(getattr(cajero.schedule, day_attr))
            for key, value in day_dict.items():
                cajero_dict[(day_name, key)] = value
        cajero_dict["CajeroObj"] = cajero
        dict_list.append(cajero_dict)
    
    # Create the DataFrame from the list of dictionaries
    cajeros_df = pd.DataFrame(dict_list)
    
    # Set MultiIndex columns
    cajeros_df.columns = pd.MultiIndex.from_tuples(cajeros_df.columns)
    

    return cajeros_df

# Función para formatear las columnas de "Entrada" y "Salida"
def format_schedule(df):
    days_of_week_names = ["Lunes", "Martes", "Miércoles", "Jueves", "Viernes", "Sábado", "Domingo"]
    # Aplanar temporalmente el MultiIndex
    df_flat = df.copy()
    df_flat.columns = ['_'.join(col).strip() for col in df_flat.columns.values]
    
    # Formatear las columnas de "Entrada" y "Salida"
    for day in days_of_week_names:
        for period in ["Entrada", "Salida"]:
            col_name = f"{day}_{period}"
            if col_name in df_flat.columns:
                df_flat[col_name] = df_flat[col_name].apply(
                    lambda x: pd.to_datetime(x).strftime('%I:%M%p') if not isinstance(x, str) else x
                )
    
    # Restaurar el MultiIndex
    new_columns = [tuple(col.split('_')) if '_' in col else (col, '') for col in df_flat.columns]
    df_flat.columns = pd.MultiIndex.from_tuples(new_columns)
    
    return df_flat

def get_day_schedule(df, day):
    output_df = df[["Nombre", day]].xs(day, axis=1, level=0)
    output_df["Nombre"] = df["Nombre"]
    return output_df[["Nombre", "Entrada", "Salida"]]

def format_schedule(df):
    formated_df = df.copy()
    formated_df["Entrada"] = formated_df["Entrada"].apply(
        lambda x: pd.to_datetime(x).strftime('%I:%M%p') if not isinstance(x, str) else x
    )
    formated_df["Salida"] = formated_df["Salida"].apply(
        lambda x: pd.to_datetime(x).strftime('%I:%M%p') if not isinstance(x, str) else x
    )
    return formated_df

In [6]:
class DaySchedule():
    def __init__(self, cajeros_df: pd.DataFrame, day: str):
        self.day = day
        self.cajeros_df = cajeros_df
        self.day_schedule_df = self.get_day_schedule()

    def get_day_schedule(self):
        return get_day_schedule(self.cajeros_df, self.day)

    def format_schedule(self):
        return format_schedule(self.day_schedule_df)
    
    def get_available_employees(self) -> pd.DataFrame:
        out_df = self.day_schedule_df.copy()
        out_df = out_df[out_df["Entrada"] != "DIA DE DESCANSO"]
        out_df = out_df[out_df["Entrada"] != "VACACIONES"]
        out_df = out_df[out_df["Entrada"] != "PAGO HORAS FERIADO"]

        return out_df

In [7]:
class CheckoutTurn():
    def __init__(self, employee: Employee, start_time: datetime, end_time: datetime, nro_caja: int):
        self.employee = employee
        self.start_time = start_time
        self.end_time = end_time
        self.nro_caja = nro_caja
        self.duration = end_time - start_time

class Checkout():
    def __init__(self, nro_caja: int):
        self.nro_caja = nro_caja
        self.type = "Regular"
        self.turns : list[CheckoutTurn] = []
        self.attention_time = timedelta(0)
        self.death_time = timedelta(0)

    def add_turn(self, employee: Employee, start_time: datetime, end_time: datetime):
        ct = CheckoutTurn(employee, start_time, end_time, self.nro_caja)
        self.turns.append(ct)
        self.attention_time += ct.duration
        self.death_time += ct.start_time - self.turns[-2].end_time if len(self.turns) > 1 else timedelta(0)
        self.turns.sort(key=lambda x: x.start_time)
    
    def __str__(self) -> str:
        return f"Caja {self.nro_caja} - {self.type}" + "\n" + "\n".join([f"{turn.employee} - {turn.start_time.strftime('%I:%M%p')} - {turn.end_time.strftime('%I:%M%p')}" for turn in self.turns])

class CheckoutsDistribution():
    def __init__(self, available_employees_df: pd.DataFrame, nro_cajas: int):
        self.daySchedule: pd.DataFrame = available_employees_df
        self.nro_cajeros = len(self.daySchedule)
        self._nro_cajas = None
        self.nro_cajas = nro_cajas
        self.cajas : list[Checkout] = []
        self.non_assigned_df = []
        self.average_attention_time = timedelta(0)
        self.average_death_time = timedelta(0)

    @property
    def nro_cajas(self):
        return self._nro_cajas
    
    @nro_cajas.setter
    def nro_cajas(self, value):
        if not isinstance(value, int):
            raise ValueError("nro_cajas must be an integer")
        if value < 1:
            raise ValueError("nro_cajas must be greater than 0")
        if value > self.nro_cajeros:
            raise ValueError("nro_cajas must be less than or equal to nro_cajeros")
        self._nro_cajas = value

    def get_distribution_matrix(self):
        distribution_matrix = []
        for caja in self.cajas:
            distribution_matrix.append([f"Caja {caja.nro_caja}"])
            for turn in caja.turns:
                distribution_matrix[-1].append(f"{turn.employee} - {turn.start_time.strftime('%I:%M%p')} - {turn.end_time.strftime('%I:%M%p')}")
        return distribution_matrix

    def get_sorted_checkouts(self):
        sorted_checkouts = sorted(self.cajas, key=lambda x: x.attention_time, reverse=True)
        return sorted_checkouts


def create_checkouts_distribution(available_employees_df: pd.DataFrame, nro_cajas: int):
    cd = CheckoutsDistribution(available_employees_df, nro_cajas)

    if available_employees_df.empty:
        return []
    
    if nro_cajas > len(available_employees_df):
        nro_cajas = len(available_employees_df)

    for i, cajero in enumerate(available_employees_df.iterrows()):
        if i == 0:
            cd.cajas.append(Checkout(1))
            cd.cajas[0].add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
            continue
        for caja in cd.cajas:
            if cajero[1]["Entrada"] >= caja.turns[-1].end_time:
                caja.add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
                break
        else:
            if len(cd.cajas) < nro_cajas:
                cj = Checkout(len(cd.cajas) + 1)
                cj.add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
                cd.cajas.append(cj)
            else:
                cd.non_assigned_df = available_employees_df[~available_employees_df["Nombre"].isin([turn.employee for caja in cd.cajas for turn in caja.turns])]
    
    cd.average_attention_time = sum([caja.attention_time for caja in cd.cajas], timedelta(0)) / len(cd.cajas)
    cd.average_death_time = sum([caja.death_time for caja in cd.cajas], timedelta(0)) / len(cd.cajas)

    return cd


def create_preferential_checkout_distribution(available_employees_df: pd.DataFrame):
    cd = CheckoutsDistribution(available_employees_df, 1)
    for i, cajero in enumerate(available_employees_df.iterrows()):
        if (i==0):
            cd.cajas.append(Checkout(1))
            cd.cajas[0].type = "Preferencial"
            cd.cajas[0].add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
            # TODO: Handle employees that arrives before 07:00
            continue
        else:
            if cajero[1]["Entrada"] >= cd.cajas[0].turns[-1].end_time:
                cd.cajas[0].add_turn(cajero[1]["Nombre"], cajero[1]["Entrada"], cajero[1]["Salida"])
            else:
                cd.non_assigned_df = available_employees_df[~available_employees_df["Nombre"].isin([turn.employee for caja in cd.cajas for turn in caja.turns])]
    
    # Complete empty intervals
    # if cd.cajas[0].turns[0].start_time > available_employees_df["Entrada"].min():
    #     cd.cajas[0].add_turn("VACIO", available_employees_df["Entrada"].min(), cd.cajas[0].turns[0].start_time)

    for i, turn in enumerate(cd.cajas[0].turns):
        if i == 0:
            continue
        if turn.start_time > cd.cajas[0].turns[i-1].end_time:
            for cajero in cd.non_assigned_df.iterrows():
                if cajero[1]["Entrada"] <= turn.start_time and cajero[1]["Salida"] >= turn.end_time:
                    cd.cajas[0].add_turn(cajero[1]["Nombre"], turn.start_time, turn.end_time)
                    cd.non_assigned_df = cd.non_assigned_df[cd.non_assigned_df["Nombre"] != cajero[1]["Nombre"]]
                    break

In [8]:
def get_segments(day_schedule, min_cajas, max_cajas):
    available_employees_df = day_schedule.get_available_employees().sort_values(by=["Entrada"])
    
    if (available_employees_df.empty):
        return []

    if (max_cajas > len(available_employees_df)):
        max_cajas = len(available_employees_df)

    seg0_distribution = create_checkouts_distribution(available_employees_df, 1)

    min_average_death_time = timedelta(0)
    seg1_distribution = None

    for nro_cajas in range(min_cajas, max_cajas+1-1):
        cd = create_checkouts_distribution(pd.DataFrame(seg0_distribution.non_assigned_df), nro_cajas)

        if (nro_cajas == min_cajas):
            seg1_distribution = cd
            min_average_death_time = cd.average_death_time
        elif cd.average_death_time < min_average_death_time:
            seg1_distribution = cd
            min_average_death_time = cd.average_death_time

    rest_cajas = max_cajas - seg1_distribution.nro_cajas

    seg2_distribution = create_checkouts_distribution(pd.DataFrame(seg1_distribution.non_assigned_df), rest_cajas)
    seg3 = seg2_distribution.non_assigned_df

    return seg0_distribution, seg1_distribution, seg2_distribution, seg3

In [9]:
def get_define_checkouts(seg0_dist, seg1_dist, seg2_dist):
    defined_checkouts = []

    sorted_checkouts_seg0 = seg0_dist.get_sorted_checkouts()
    sorted_checkouts_seg1 = seg1_dist.get_sorted_checkouts()
    sorted_checkouts_seg2 = seg2_dist.get_sorted_checkouts()

    sorted_checkouts_seg0[0].type = "Preferencial"
    sorted_checkouts_seg0[0].nro_caja = 1
    defined_checkouts.append(sorted_checkouts_seg0.pop(0))

    sorted_checkouts_seg1[0].type = "Rápida"
    sorted_checkouts_seg1[0].nro_caja = 1
    defined_checkouts.append(sorted_checkouts_seg1.pop(0))

    sorted_checkouts_seg1[0].type = "Rápida"
    sorted_checkouts_seg1[0].nro_caja = 2
    defined_checkouts.append(sorted_checkouts_seg1.pop(0))

    sorted_checkouts_seg2[0].type = "Rápida"
    sorted_checkouts_seg2[0].nro_caja = 3
    defined_checkouts.append(sorted_checkouts_seg2.pop(0))

    n = 2
    sorted_checkouts = sorted_checkouts_seg1 + sorted_checkouts_seg2
    sorted_checkouts = sorted(sorted_checkouts, key=lambda x: x.turns[0].start_time)

    for caja in sorted_checkouts:
        caja.nro_caja = n
        defined_checkouts.append(caja)
        n += 1
    
    return defined_checkouts
    

In [10]:

cajeros_df = get_cajeros_dataframe(cajeros_list)

day = "Sábado"
day_schedule = DaySchedule(cajeros_df, day)

min_cajas = 5
max_cajas = 15

seg0_dist, seg1_dist, seg2_dist, seg3 = get_segments(day_schedule, min_cajas, max_cajas)

TypeError: Expected tuple, got str

In [None]:
for checkout in (get_define_checkouts(seg0_dist, seg1_dist, seg2_dist)):
    print(checkout)

Caja 1 - Preferencial
HUAMANI TORRES, LUIS RODRIGO - 06:30AM - 10:15AM
MORENO CANCHANYA, ROSMERY - 10:30AM - 02:15PM
YACILA GRANDEZ, RODRIGO ANDRE - 03:00PM - 06:45PM
LA ROSA EUSEBIO, SHADIA SHAMIRA - 07:00PM - 10:45PM
Caja 1 - Rápida
AGUILAR SCHLAEFLI, STEPHANIE XIMENA - 07:00AM - 10:45AM
Del Aguila Murayari, Darla - 10:45AM - 02:30PM
HUAYNATES ALTAMIRANO, JIM HANS - 03:00PM - 06:45PM
ARIAS MACHACUAY, SADELITH SORAGGI - 07:00PM - 10:45PM
Caja 2 - Rápida
MARTINEZ PAZ, ROCIO ESPERANZA - 08:30AM - 05:30PM
ZAVALA SOSA, NICOLE - 05:30PM - 09:15PM
Caja 3 - Rápida
VEGA RIVAS, ANDREA FERNANDA - 09:30AM - 01:15PM
TITO LAURA, NANCY FIORELLA - 02:00PM - 11:00PM
Caja 2 - Regular
VEGA CARDENAS, ANGELICA LOURDES - 08:00AM - 11:45AM
ALVITE CORNEJO, ANGIE LUCERO - 12:00PM - 03:45PM
VILCAPOMA CHILIN, JULISSA JAZMIN - 04:00PM - 07:45PM
Caja 3 - Regular
CHIARA LIMA, AUGUSTO SEBASTIAN - 08:00AM - 11:45AM
FLORES PAREDES, LOURDES - 12:00PM - 02:45PM
RIVERA CARREÑO, DIANA DESIRÉE - 03:30PM - 07:15PM
Caja 4 

In [None]:
seg3

Unnamed: 0,Nombre,Entrada,Salida
14,"ILDEFONSO MOTTA, JHOSSEP ANGELO",1900-01-01 11:00:00,1900-01-01 14:45:00
10,"BRICEÑO LUNA, JESSICA ARACELI",1900-01-01 11:00:00,1900-01-01 22:00:00
33,"POBLETE SAIRE, FIORELLA ESTHER",1900-01-01 11:00:00,1900-01-01 14:45:00
18,"SUAREZ JARA, YENNIFER YUSSARA",1900-01-01 11:15:00,1900-01-01 15:00:00
3,"HINOSTROZA MARIN, CAMILA MARIA",1900-01-01 11:15:00,1900-01-01 20:15:00
2,"ERIQUE CALLE, MARIA ANTONIETA",1900-01-01 12:15:00,1900-01-01 21:15:00
1,"CHAVEZ ONOFRE, CAMILA GERALDINE",1900-01-01 13:00:00,1900-01-01 16:45:00
41,"AYALA TAPIA, DARCIE SOL",1900-01-01 18:00:00,1900-01-01 21:45:00
50,"MARTICORENA LOPEZ, DAVID CARLOS",1900-01-01 18:00:00,1900-01-01 21:45:00
51,"MEDINA MARCELO, NAOMI ARIADNA",1900-01-01 18:15:00,1900-01-01 22:00:00


In [None]:
day_schedule.get_available_employees().sort_values(by=["Entrada"])

Unnamed: 0,Nombre,Entrada,Salida
4,"HUAMANI TORRES, LUIS RODRIGO",1900-01-01 06:30:00,1900-01-01 10:15:00
22,"AGUILAR SCHLAEFLI, STEPHANIE XIMENA",1900-01-01 07:00:00,1900-01-01 10:45:00
12,"CHIARA LIMA, AUGUSTO SEBASTIAN",1900-01-01 08:00:00,1900-01-01 11:45:00
37,"VEGA CARDENAS, ANGELICA LOURDES",1900-01-01 08:00:00,1900-01-01 11:45:00
15,"MARTINEZ PAZ, ROCIO ESPERANZA",1900-01-01 08:30:00,1900-01-01 17:30:00
5,"MENDOZA DIEGO, ZAIDA VANESSA",1900-01-01 08:45:00,1900-01-01 12:30:00
6,"MEZA MELO, NORMA FERNANDA",1900-01-01 08:45:00,1900-01-01 17:45:00
39,"YOVERA ROBLES, VICTOR EDUARDO",1900-01-01 09:00:00,1900-01-01 12:45:00
23,"AYALA MORA, CECILIA ROSARIO",1900-01-01 09:00:00,1900-01-01 12:45:00
38,"VEGA RIVAS, ANDREA FERNANDA",1900-01-01 09:30:00,1900-01-01 13:15:00
