Cargar librerías

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

Cargar el dataset

In [9]:
import pandas as pd
import re
from pathlib import Path

file_path = Path("TL-20240723-202614 WA1200 #05.csv")

with open(file_path, "r", encoding="utf-8", errors="replace") as f:
    rows = [line.rstrip("\n\r").split(";") for line in f]

# Fila 19 y 20 (1-indexadas) -> indices 18 y 19
vars_row = [str(x).strip() for x in rows[18]]
units_row = [str(x).strip() for x in rows[19]]

n = max(len(vars_row), len(units_row))
vars_row += [""] * (n - len(vars_row))
units_row += [""] * (n - len(units_row))

def fix_text(s):
    s = str(s)
    replacements = {
        "Mi¿½ltiple": "Multiple",
        "Mï¿½ltiple": "Multiple",
        "M�ltiple": "Multiple",
        "Operaci�n": "Operacion",
        "Reconstrucci�n": "Reconstruccion",
        "Presi�n": "Presion",
        "C�rter": "Carter",
        "Barom�trica": "Barometrica",
        "Admisi�n": "Admision",
        "�ndice": "Indice",
        "Bater�": "Bateria",
        "Sincronizaci": "Sincronizacion",
        "�F": "F",
        "Â°F": "F",
    }
    for bad, good in replacements.items():
        s = s.replace(bad, good)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def merge_name_unit(var, unit):
    var = fix_text(var)
    unit = fix_text(unit)

    if unit == "" or unit.lower() == "unidades":
        col = var
    elif var == "":
        col = f"[{unit}]"
    else:
        col = f"{var} [{unit}]"

    # Regla: toda columna de temperatura queda con [F]
    if re.search(r"temperatura", col, flags=re.IGNORECASE):
        if re.search(r"\[[^\]]*\]", col):
            col = re.sub(r"\[[^\]]*\]", "[F]", col)
        else:
            col = f"{col} [F]"

    return col

new_cols = [merge_name_unit(v, u) for v, u in zip(vars_row, units_row)]

# Datos desde fila 21
data_rows = []
for r in rows[20:]:
    r = [str(x).strip() for x in r]
    r = (r + [""] * n)[:n]
    data_rows.append(r)

df = pd.DataFrame(data_rows, columns=new_cols)

# Quitar columnas vacias de forma compatible con pandas antiguo
keep_cols = []
for c in df.columns:
    name_empty = (str(c).strip() == "")
    col_empty = df[c].astype(str).str.strip().eq("").all()
    if not (name_empty and col_empty):
        keep_cols.append(c)
df = df[keep_cols]

# Quitar filas totalmente vacias
df = df[df.astype(str).apply(lambda r: r.str.strip().ne("").any(), axis=1)].reset_index(drop=True)

print("Tamanio:", df.shape)
display(df.head(20))


Tamanio: (1245, 53)


Unnamed: 0,Evento,Tiempo Real,Tiempo del Motor,Tiempo de Operacion de Reconstruccion del M [HHHHHH:MM],Velocidad del Motor [rpm],Por Ciento de Pedal o Palanca del Acelerador [Percent],Temperatura del Refrigerante del Motor [F],Sensor de Presion del Refrigeran [in Hg],Presion de Aceite del Mot [psi],Temperatura de Aceite del Motor [F],...,Sensor 1 de Temperatura del Multiple de Admisi [F],Sensor 2 de Temperatura del Multiple de Admisi [F],Sensor 3 de Temperatura del Multiple de Admisi [F],Sensor 4 de Temperatura del Multiple de Admisi [F],Sensor de Temperatura 1 Delta del Multiple de Admision (Calcul [F],Sensor de Temperatura 2 Delta del Multiple de Admision (Calcul [F],Sensor de Temperatura 3 Delta del Multiple de Admision (Calcul [F],Sensor de Temperatura 4 Delta del Multiple de Admision (Calcul [F],Carga Neta Porcentual [Percent],Estado del Motor
0,1,Sin Preparar,002053:24:49,0002053:24,1820,90,183.2,30,67.9,199.4,...,124,127,125,129,32,33,33,33,66,Funcionando
1,2,Sin Preparar,002053:26:31,0002053:26,1829,100,185.0,27,68.5,201.8,...,129,131,129,133,34,33,33,33,68,Funcionando
2,3,Sin Preparar,002053:27:35,0002053:27,1852,100,188.6,27,69.6,205.4,...,135,136,134,138,34,33,33,33,63,Funcionando
3,4,Sin Preparar,002053:28:47,0002053:28,1824,100,190.4,34,66.7,208.0,...,140,141,139,143,34,33,33,33,73,Funcionando
4,5,Sin Preparar,002053:29:55,0002053:29,1819,100,194.0,32,69.0,210.3,...,142,143,141,145,34,33,33,33,100,Funcionando
5,6,Sin Preparar,002053:31:06,0002053:31,1855,100,195.8,33,66.7,212.2,...,144,145,143,146,34,33,33,33,61,Funcionando
6,7,Sin Preparar,002053:33:03,0002053:33,1852,100,195.8,32,67.3,211.9,...,140,141,139,142,34,33,33,33,64,Funcionando
7,8,Sin Preparar,002053:34:43,0002053:34,1660,100,195.8,33,62.7,214.1,...,140,140,139,143,34,32,33,33,92,Funcionando
8,9,Sin Preparar,002053:35:10,0002053:35,1852,100,195.8,33,66.1,214.8,...,142,143,142,145,34,33,33,33,61,Funcionando
9,10,Sin Preparar,002053:35:51,0002053:35,1817,100,197.6,38,66.1,214.8,...,144,146,144,147,32,33,33,33,55,Funcionando


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1245 entries, 0 to 1244
Data columns (total 53 columns):
 #   Column                                                                Non-Null Count  Dtype 
---  ------                                                                --------------  ----- 
 0   Evento                                                                1245 non-null   object
 1   Tiempo Real                                                           1245 non-null   object
 2   Tiempo del Motor                                                      1245 non-null   object
 3   Tiempo de Operacion de Reconstruccion del M [HHHHHH:MM]               1245 non-null   object
 4   Velocidad del Motor [rpm]                                             1245 non-null   object
 5   Por Ciento de Pedal o Palanca del Acelerador [Percent]                1245 non-null   object
 6   Temperatura del Refrigerante del Motor [F]                            1245 non-null   object
 7   Sensor

In [12]:
df.describe()

Unnamed: 0,Evento,Tiempo Real,Tiempo del Motor,Tiempo de Operacion de Reconstruccion del M [HHHHHH:MM],Velocidad del Motor [rpm],Por Ciento de Pedal o Palanca del Acelerador [Percent],Temperatura del Refrigerante del Motor [F],Sensor de Presion del Refrigeran [in Hg],Presion de Aceite del Mot [psi],Temperatura de Aceite del Motor [F],...,Sensor 1 de Temperatura del Multiple de Admisi [F],Sensor 2 de Temperatura del Multiple de Admisi [F],Sensor 3 de Temperatura del Multiple de Admisi [F],Sensor 4 de Temperatura del Multiple de Admisi [F],Sensor de Temperatura 1 Delta del Multiple de Admision (Calcul [F],Sensor de Temperatura 2 Delta del Multiple de Admision (Calcul [F],Sensor de Temperatura 3 Delta del Multiple de Admision (Calcul [F],Sensor de Temperatura 4 Delta del Multiple de Admision (Calcul [F],Carga Neta Porcentual [Percent],Estado del Motor
count,1245,1245,1245,1245,1245,1245,1245.0,1245,1245.0,1245.0,...,1245,1245,1245,1245,1245,1245,1245,1245,1245,1245
unique,1245,1,1245,967,349,31,22.0,43,40.0,328.0,...,28,46,46,47,4,5,5,5,71,1
top,1245,Sin Preparar,002096:07:27,0002081:26,1846,100,188.6,38,67.9,210.6,...,140,136,140,144,32,33,32,32,94,Funcionando
freq,1,1245,1,4,14,736,131.0,110,102.0,17.0,...,136,65,69,76,798,601,680,638,48,1245
