In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from scipy import signal
from sklearn.preprocessing import StandardScaler

2025-05-24 15:26:13.286117: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-24 15:26:13.525907: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748121973.610949    1596 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748121973.633445    1596 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748121973.799888    1596 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [5]:
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K

class FeatureWeightingLayer(Layer):
    def __init__(self, scaling_factors, **kwargs):
        super(FeatureWeightingLayer, self).__init__(**kwargs)
        self.scaling_factors = K.variable(scaling_factors, name='scaling_factors')
    
    def call(self, inputs):
        return inputs * self.scaling_factors
    
    def get_config(self):
        config = super(FeatureWeightingLayer, self).get_config()
        config.update({"scaling_factors": self.scaling_factors.numpy()})
        return config

# Load model directly with custom_objects parameter
model = load_model('./model.h5', custom_objects={'FeatureWeightingLayer': FeatureWeightingLayer})


I0000 00:00:1748122328.117254    1596 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5520 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [16]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import re
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K

class FeatureWeightingLayer(Layer):
    def __init__(self, scaling_factors, **kwargs):
        super(FeatureWeightingLayer, self).__init__(**kwargs)
        self.scaling_factors = K.variable(scaling_factors, name='scaling_factors')
    
    def call(self, inputs):
        return inputs * self.scaling_factors
    
    def get_config(self):
        config = super(FeatureWeightingLayer, self).get_config()
        config.update({"scaling_factors": self.scaling_factors.numpy()})
        return config

def clean_sensor_data(csv_path):
    """
    Limpia los datos del CSV eliminando líneas no numéricas y procesando correctamente
    
    Args:
        csv_path: ruta al archivo CSV
    """
    
    # Leer todas las líneas del archivo
    with open(csv_path, 'r') as file:
        lines = file.readlines()
    
    # Filtrar solo las líneas que contienen datos numéricos
    clean_lines = []
    
    for line in lines:
        line = line.strip()
        
        # Saltar líneas vacías
        if not line:
            continue
            
        # Saltar la línea de encabezado
        if line.startswith('angle1,angle2'):
            continue
            
        # Saltar líneas que contienen texto informativo
        if any(text in line for text in ['Iniciando', 'Smart Glove', 'Formato:', 'Flex1']):
            continue
        
        # Verificar si la línea contiene datos numéricos válidos
        # Debe tener el formato: num,num,num,num,num,num,num,num,num,num,timestamp
        parts = line.split(',')
        
        if len(parts) >= 11:  # Al menos 10 valores + timestamp
            try:
                # Intentar convertir los primeros 10 valores a float
                values = [float(x) for x in parts[:10]]
                # Si llegamos aquí, la línea es válida
                clean_lines.append(line)
            except ValueError:
                # Si no se puede convertir, saltar esta línea
                continue
    
    print(f"Líneas originales: {len(lines)}")
    print(f"Líneas válidas después de limpieza: {len(clean_lines)}")
    
    # Crear DataFrame con las líneas limpias
    if not clean_lines:
        raise ValueError("No se encontraron datos válidos en el archivo")
    
    # Convertir a DataFrame
    data_rows = []
    for line in clean_lines:
        parts = line.split(',')
        # Tomar solo los primeros 10 valores (sensores) + timestamp
        row = [float(x) for x in parts[:10]] + [float(parts[10])]
        data_rows.append(row)
    
    column_names = ['angle1', 'angle2', 'angle3', 'angle4', 'angle5', 
                   'rolldeg', 'pitchdeg', 'anglegx', 'anglegy', 'anglegz', 'timestamp']
    
    df = pd.DataFrame(data_rows, columns=column_names)
    
    print(f"DataFrame creado con forma: {df.shape}")
    print("\nPrimeras 5 filas:")
    print(df.head())
    print("\nÚltimas 5 filas:")
    print(df.tail())
    
    return df

def predict_continuous_data(model, df, label_map, strategy='sliding_window', window_size=68, step_size=10):
    """
    Predice sobre datos continuos sin marcadores Start/End
    
    Args:
        model: modelo LSTM entrenado
        df: DataFrame con datos limpios
        label_map: mapeo de etiquetas
        strategy: estrategia de predicción
        window_size: tamaño de ventana (68)
        step_size: paso entre ventanas
    """
    
    # Extraer solo las columnas de sensores (sin timestamp)
    sensor_data = df[['angle1', 'angle2', 'angle3', 'angle4', 'angle5', 
                     'rolldeg', 'pitchdeg', 'anglegx', 'anglegy', 'anglegz']].values
    
    timestamps = df['timestamp'].values
    
    print(f"Datos de sensores: {sensor_data.shape}")
    
    # Crear mapeo inverso
    reverse_label_map = {v: k for k, v in label_map.items()}
    
    predictions = []
    
    if strategy == 'sliding_window':
        print("Usando ventana deslizante...")
        
        for i in range(0, len(sensor_data) - window_size + 1, step_size):
            # Extraer ventana
            window = sensor_data[i:i + window_size]
            window_timestamps = timestamps[i:i + window_size]
            
            # Agregar dimensión de batch
            window_batch = np.expand_dims(window, axis=0)
            
            # Predecir
            prediction = model.predict(window_batch, verbose=0)
            predicted_class = np.argmax(prediction[0])
            confidence = np.max(prediction[0])
            predicted_label = reverse_label_map[predicted_class]
            
            predictions.append({
                'start_frame': i,
                'end_frame': i + window_size - 1,
                'start_time': window_timestamps[0],
                'end_time': window_timestamps[-1],
                'duration_seconds': window_timestamps[-1] - window_timestamps[0],
                'predicted_label': predicted_label,
                'confidence': confidence,
                'probabilities': prediction[0].tolist()
            })
    
    elif strategy == 'fixed_intervals':
        print("Usando intervalos fijos...")
        
        # Calcular duración total
        total_duration = timestamps[-1] - timestamps[0]
        interval_duration = 3.0  # 2 segundos por intervalo
        
        current_time = timestamps[0]
        
        while current_time + interval_duration <= timestamps[-1]:
            # Encontrar índices para este intervalo de tiempo
            start_idx = np.argmax(timestamps >= current_time)
            end_time = current_time + interval_duration
            end_idx = np.argmax(timestamps >= end_time)
            
            if end_idx == 0:  # Si no encontramos el final, usar el último índice
                end_idx = len(timestamps) - 1
            
            # Extraer datos para este intervalo
            interval_data = sensor_data[start_idx:end_idx]
            
            if len(interval_data) >= 10:  # Asegurar que tenemos suficientes datos
                # Redimensionar a window_size muestras
                if len(interval_data) >= window_size:
                    indices = np.random.choice(len(interval_data), size=window_size, replace=False)
                else:
                    indices = np.random.choice(len(interval_data), size=window_size, replace=True)
                
                indices = np.sort(indices)
                sequence = interval_data[indices]
                
                # Predecir
                sequence_batch = np.expand_dims(sequence, axis=0)
                prediction = model.predict(sequence_batch, verbose=0)
                predicted_class = np.argmax(prediction[0])
                confidence = np.max(prediction[0])
                predicted_label = reverse_label_map[predicted_class]
                
                predictions.append({
                    'start_frame': start_idx,
                    'end_frame': end_idx - 1,
                    'start_time': current_time,
                    'end_time': end_time,
                    'duration_seconds': interval_duration,
                    'predicted_label': predicted_label,
                    'confidence': confidence,
                    'probabilities': prediction[0].tolist()
                })
            
            current_time += interval_duration
    
    return predictions

def analyze_predictions(predictions, confidence_threshold=0.7):
    """
    Analiza las predicciones y proporciona un resumen
    """
    print(f"\n=== ANÁLISIS DE PREDICCIONES ===")
    print(f"Total de predicciones: {len(predictions)}")
    
    # Filtrar predicciones con alta confianza
    high_confidence_preds = [p for p in predictions if p['confidence'] >= confidence_threshold]
    print(f"Predicciones con confianza >= {confidence_threshold}: {len(high_confidence_preds)}")
    
    # Contar predicciones por etiqueta
    label_counts = {}
    for pred in high_confidence_preds:
        label = pred['predicted_label']
        if label in label_counts:
            label_counts[label] += 1
        else:
            label_counts[label] = 1
    
    print(f"\nDistribución de predicciones (confianza >= {confidence_threshold}):")
    for label, count in sorted(label_counts.items(), key=lambda x: x[1], reverse=True):
        print(f"  {label}: {count} predicciones")
    
    # Mostrar algunas predicciones de ejemplo
    print(f"\nEjemplo de predicciones con alta confianza:")
    for i, pred in enumerate(high_confidence_preds[:10]):  # Primeras 10
        print(f"  {pred['start_time']:.3f}s - {pred['end_time']:.3f}s: {pred['predicted_label']} (confianza: {pred['confidence']:.3f})")
    
    return high_confidence_preds

def save_predictions_to_csv(predictions, output_path):
    """
    Guarda las predicciones en un archivo CSV
    """
    pred_df = pd.DataFrame(predictions)
    pred_df.to_csv(output_path, index=False)
    print(f"\nPredicciones guardadas en: {output_path}")

# FUNCIÓN PRINCIPAL
def process_and_predict(csv_path, model_path, label_map, strategy='sliding_window'):
    """
    Función principal que procesa todo el pipeline
    """
    print("=== PROCESANDO DATOS DE SENSOR ===")
    
    # 1. Limpiar datos
    print("\n1. Limpiando datos...")
    df = clean_sensor_data(csv_path)
    
    # 2. Cargar modelo
    print("\n2. Cargando modelo...")
    model = load_model('./model.h5', custom_objects={'FeatureWeightingLayer': FeatureWeightingLayer})
    
    # 3. Hacer predicciones
    print("\n3. Haciendo predicciones...")
    predictions = predict_continuous_data(model, df, label_map, strategy)
    
    # 4. Analizar resultados
    print("\n4. Analizando resultados...")
    high_conf_preds = analyze_predictions(predictions)
    
    # 5. Guardar resultados
    output_path = csv_path.replace('.csv', '_predictions.csv')
    save_predictions_to_csv(predictions, output_path)
    
    return predictions, high_conf_preds

# EJEMPLO DE USO
"""
# Tu label_map del entrenamiento (debes tenerlo guardado)
label_map = {'A': 0, 'L': 1, 'H': 2, 'O': 3}

# Procesar y predecir
predictions, high_conf = process_and_predict(
    csv_path='./sensor_data_20250524_144255.csv',
    model_path='./model.h5',
    label_map=label_map,
    strategy='sliding_window'  # o 'fixed_intervals'
)

# Ver predicciones más confiables
for pred in high_conf[:5]:
    print(f"Tiempo {pred['start_time']:.1f}-{pred['end_time']:.1f}s: {pred['predicted_label']} ({pred['confidence']:.3f})")
"""


label_map = {'A': 0, 'L': 1, 'H': 2, 'O': 3}

# Procesar y predecir
predictions, high_conf = process_and_predict(
    csv_path='./sensor_data_20250524_144255.csv',
    model_path='./model.h5',
    label_map=label_map,
    strategy='fixed_intervals'
)

# Ver predicciones más confiables
for pred in high_conf[:5]:
    print(f"Tiempo {pred['start_time']:.1f}-{pred['end_time']:.1f}s: {pred['predicted_label']} ({pred['confidence']:.3f})")




=== PROCESANDO DATOS DE SENSOR ===

1. Limpiando datos...
Líneas originales: 389
Líneas válidas después de limpieza: 386
DataFrame creado con forma: (386, 11)

Primeras 5 filas:
   angle1  angle2  angle3  angle4  angle5  rolldeg  pitchdeg  anglegx  \
0   27.00   27.00   27.00   27.00   10.20    -6.83     -90.0    -0.02   
1   45.90   45.90   45.90   45.90   19.74   -14.16     -90.0    -0.03   
2   59.13   59.13   59.13   59.13   27.62    17.43     -90.0     0.09   
3   68.39   68.39   68.39   68.39   33.13    16.88     -90.0     0.41   
4   74.87   74.87   74.87   74.87   36.99    21.30     -90.0     0.50   

   anglegy  anglegz  timestamp  
0     0.09    -0.07     10.244  
1     1.15    -1.20     10.296  
2     2.07    -2.67     10.347  
3     3.06    -4.25     10.399  
4     3.86    -5.68     10.450  

Últimas 5 filas:
     angle1  angle2  angle3  angle4  angle5  rolldeg  pitchdeg  anglegx  \
381     0.0   71.04   59.47    6.28    2.10   -64.25    -16.94    43.27   
382     0.0   76.