In [31]:
# local_biplot.py
import subprocess
import sys
import os
import warnings

warnings.filterwarnings("ignore")

def install_and_import(package, alias=None):
    """
    Installs and imports a package using subprocess if it's not already installed.

    Parameters:
    -----------
    package : str
        The name of the package to install.
    alias : str, optional
        An alias for the package if the import name is different from the package name.
    """
    try:
        __import__(alias if alias else package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
        __import__(alias if alias else package)

# Install required libraries
libraries_to_install = {'umap-learn': 'umap'}
for library, alias in libraries_to_install.items():
    install_and_import(library, alias)

# Import libraries
import numpy as np
import seaborn as sns
from seaborn import kdeplot
from scipy.special import softmax
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import Normalize
import pandas as pd

class LocalBiplot():
    """
    A class to perform local biplot analysis using various dimensionality reduction techniques
    and affine transformations.

    Attributes:
    -----------
    redm : str
        The dimensionality reduction method to use ('umap' or 'tsne').
    affine_ : str
        Type of affine transformation ('full' or 'rotation').
    perplexity : str or int
        Perplexity parameter for t-SNE.
    min_dist : float
        Minimum distance parameter for UMAP.

    Methods:
    --------
    dim_red(X):
        Performs dimensionality reduction on the input data X.
    biplot2D(X, plot_=True, labels=None, loading_labels=None):
        Creates a 2D PCA biplot of the input data X.
    local_biplot2D(X, y, plot_=True, loading_labels=None):
        Performs local biplot analysis on the input data X with labels y.
    affine_transformation(params, points):
        Applies an affine transformation to the input points using the given parameters.
    objective_function(params, source_points, target_points):
        Objective function to minimize the mean squared error between transformed source points and target points.
    affine_transformation_obj(source_points, target_points, initial_guess=np.array([1, 1, 0, 0, 0, 0, 0])):
        Optimizes the affine transformation parameters to match source points to target points.
    plot_arrows(means_, points, head_width=0.025, color='b', linestyle='-'):
        Plots arrows from means to points.
    biplot_global(score, loading, rel_, axbiplot, axrel, mean_=None, labels=None, loading_labels=None, score_labels=None, bar_c='b'):
        Creates a global biplot for the first two principal components.
    """
    def __init__(self,redm = 'umap',affine_='full',perplexity='auto',min_dist=0.75):
        self.affine_ = affine_
        if affine_ == 'rotation':
            self.bounds = ((1,1),(1,1),(0,0),(0,0),(-np.pi,np.pi),(0,0),(0,0))
        else:
            self.bounds = ((None,None),(None,None),(None,None),(None,None),(-np.pi,np.pi),(None,None),(None,None))

        self.perplexity = perplexity
        self.min_dist = min_dist
        self.redm = redm

    def dim_red(self,X):
        """
        Performs dimensionality reduction on the input data X using UMAP or t-SNE.

        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            The input data.

        Returns:
        --------
        array-like, shape (n_samples, 2)
            The reduced dimensionality data.
        """
        if self.perplexity == 'auto':
            self.perplexity = np.round(0.5*np.sqrt(X.shape[0]))
        if self.redm == 'umap':
            self.red_ = UMAP(n_components=2,n_neighbors=int(self.perplexity),random_state=42, min_dist=self.min_dist)
        else:
            self.red_ = TSNE(n_components=2,perplexity=self.perplexity,random_state=42, init='pca')
        return MinMaxScaler(feature_range=(-1, 1)).fit_transform(self.red_.fit_transform(MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)))

    def biplot2D(self,X,plot_=True,labels=None,loading_labels=None):
        """
        Creates a 2D PCA biplot of the input data X.

        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            The input data.
        plot_ : bool, optional, default=True
            Whether to plot the biplot.
        labels : array-like, shape (n_samples,), optional
            Labels for the data points.
        loading_labels : list of str, optional
            Labels for the loadings.

        Returns:
        --------
        loading : array-like, shape (n_features, 2)
            The loadings for the first two principal components.
        rel_ : array-like, shape (n_features,)
            The relevance of each loading.
        score : array-like, shape (n_samples, 2)
            The PCA scores for the first two principal components.
        """
        # Example usage:
        # Assuming pca is your PCA object and X is the data you've fitted PCA on:
        pca = PCA(random_state = 42)
        score = MinMaxScaler(feature_range=(-1, 1)).fit_transform(pca.fit_transform(MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)))
        loading = pca.components_.T
        rel_ = softmax((abs(loading.dot(np.diag(pca.explained_variance_)))).sum(axis=1))

        if plot_:
            fig,ax = plt.subplots(1,2,figsize=(20, 7))
            self.biplot_global(score, loading, rel_,labels=labels, loading_labels=loading_labels,axbiplot=ax[0],axrel=ax[1])
            ax[0].set_title('2D PCA Global Biplot')
            plt.show()
            2+1

        return loading[:,:2],rel_,score[:,:2]



    def local_biplot2D(self, X, y, plot_=True, loading_labels=None):
        """
        Performs local biplot analysis on the input data X with labels y.

        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            The input data.
        y : array-like, shape (n_samples,) or int
            The labels for the data points, or the number of clusters for k-means clustering.
        plot_ : bool, optional, default=True
            Whether to plot the biplot.
        loading_labels : list of str, optional
            Labels for the loadings.

        Returns:
        --------
        results : list of dict
            A list containing dictionaries with the results for each group.
        """
        X_ = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)  # minmaxscaler between -1 +1
        Z = self.dim_red(X_)  # Nonlinear Dimensionality Reduction
        if isinstance(y, int):  # no labels -> clustering
            self.y = KMeans(n_clusters=y, random_state=42).fit_predict(Z)
        else:
            self.y = y

        C_ = len(np.unique(self.y))
        Zl = np.zeros(Z.shape)
        loading_ = np.zeros((C_, X.shape[1], 2))
        loading_r = np.zeros((C_, X.shape[1], 2))
        rel_ = np.zeros((C_, X.shape[1]))
        opt_params = np.zeros((C_, 7))  # affine transformation parameters

        results = []  # Lista para almacenar los resultados

        if plot_:
            fig, ax = plt.subplots(1, 2, figsize=(20, 7))
        cmap_ = mpl.colormaps['jet'].resampled(C_)
        cmap_ = cmap_(range(C_))

        for c in np.unique(self.y):
            loading_[c], rel_[c], Zl[self.y == c] = self.biplot2D(X_[self.y == c], plot_=False)  # pca biplot on c-th group
            Zl[self.y == c], opt_params[c], _ = self.affine_transformation_obj(Zl[self.y == c], Z[self.y == c])  # affine transformation training on c-th group
            loading_r[c] = self.affine_transformation(opt_params[c], loading_[c])  # transform loadings on c-th group

            # Agregar los resultados a la lista
            mean_ = np.repeat(Z[self.y == c].mean(axis=0).reshape(1, -1), (self.y == c).sum(), axis=0)
            results.append({
                'score': Z[self.y == c],
                'loading': loading_r[c],
                'rel_': rel_[c],
                'mean_': mean_,
                'labels': cmap_[c],
                'loading_labels': loading_labels,
                'opt_params': opt_params[c],
                'bar_c': cmap_[c]
            })

            if plot_:
                self.biplot_global(Z[self.y == c], loading_r[c], rel_[c], labels=cmap_[c], mean_=mean_,
                                loading_labels=loading_labels, axbiplot=ax[0], axrel=ax[1], bar_c=cmap_[c])
                2+1

        if plot_:
            ax[0].set_xlabel('Emb. 1')
            ax[0].set_ylabel('Emb. 2')
            ax[0].set_title(f'2D Local Biplot ({self.redm})')
            plt.show()
            2+1

        # Guardar resultados en la instancia si es necesario
        self.loadings_l = loading_r
        self.Zr = Z
        self.rel_l = rel_

        return results  # Retornar la lista de diccionarios con los resultados


    def affine_transformation(self,params,points):
        """
        Applies an affine transformation to the input points using the given parameters.

        Parameters:
        -----------
        params : array-like, shape (7,)
            The parameters for the affine transformation.
        points : array-like, shape (n_samples, 2)
            The points to transform.

        Returns:
        --------
        array-like, shape (n_samples, 2)
            The transformed points.
        """

        #points \in N x2
        #sx,sy,hx,hy,theta,tx,ty = params[0],params[1],params[2],params[3],params[4],params[5],params[6]
        S = np.array([[params[0],0],[0,params[1]]])
        H = np.array([[params[2],1],[1,params[3]]])
        R = np.array([[np.cos(params[4]),-np.sin(params[4])],[np.sin(params[4]),np.cos(params[4])]])
        M = R.dot(H).dot(S)
        tr_ = np.array([params[5],params[6]])
        return (M.dot(points.T)+np.repeat(tr_.reshape(-1,1), points.shape[0], axis=1)).T

    def objective_function(self, params, source_points, target_points):

        """
        The objective function to minimize: the mean squared error between the
        transformed source points and the target points.

        Parameters:
        -----------
        - params: Parameters of the affine transformation.
        - source_points: Source points to transform. N x 2
        - target_points: Target points to match. N x 2

        Returns:
        --------
        - Mean squared error between transformed source points and target points.
        """

        transformed_points = self.affine_transformation(params, source_points)
        return np.mean(np.sum((transformed_points - target_points)**2, axis=1))


    def affine_transformation_obj(self, source_points,target_points,initial_guess = np.array([1, 1, 0, 0, 0, 0,0])):
        """
        Optimizes the affine transformation parameters to match source points to target points.

        Parameters:
        -----------
        source_points : array-like, shape (n_samples, 2)
            The source points to transform.
        target_points : array-like, shape (n_samples, 2)
            The target points to match.
        initial_guess : array-like, shape (7,), optional
            Initial guess for the affine transformation parameters.

        Returns:
        --------
        array-like, shape (n_samples, 2)
            The transformed source points.
        array-like, shape (7,)
            The optimized affine transformation parameters.
        scipy.optimize.OptimizeResult
            The result of the optimization.
        """
        #source_points, target_points N x 2
        # Initial guess for the parameters (identity matrix and zero translation)
        # Perform optimization
        result = minimize(self.objective_function, x0=initial_guess, bounds=self.bounds, args=(source_points, target_points))

        # Extract the optimized transformation matrix and translation vector
        optimized_params = result.x
        transformed_points = self.affine_transformation(optimized_params,source_points)
        return transformed_points, optimized_params, result

    def plot_arrows(self,means_,points,head_width=0.025,color='b',linestyle ='-'):
        """
        Plots arrows from means to points.

        Parameters:
        -----------
        means_ : array-like, shape (n_samples, 2)
            The starting points of the arrows.
        points : array-like, shape (n_samples, 2)
            The ending points of the arrows.
        head_width : float, optional
            The width of the arrow heads.
        color : str, optional
            The color of the arrows.
        linestyle : str, optional
            The line style of the arrows.

        Returns:
        --------
        None
        """
        N,P = points.shape

        for n in range(N):
            plt.arrow(means_[n,0],means_[n,1],points[n,0],points[n,1],head_width=head_width,color=color,linestyle=linestyle)
        return

    def biplot_global(self,score, loading, rel_,axbiplot,axrel,mean_ = None,labels=None, loading_labels=None, score_labels=None,bar_c='b'):
        """
        Creates a global biplot for the first two principal components.

        Parameters:
        -----------
        score : array-like, shape (n_samples, 2)
            The PCA scores for the first two principal components.
        loading : array-like, shape (n_features, 2)
            The loadings for the first two principal components.
        rel_ : array-like, shape (n_features,)
            The relevance of each loading.
        axbiplot : matplotlib.axes.Axes
            The axes for the biplot.
        axrel : matplotlib.axes.Axes
            The axes for the relevance plot.
        mean_ : array-like, shape (n_samples, 2), optional
            The mean values for the data points.
        labels : array-like, shape (n_samples,), optional
            The labels for the data points.
        loading_labels : list of str, optional
            The labels for the loadings.
        score_labels : list of str, optional
            The labels for the scores.
        bar_c : str, optional
            The color of the relevance bars.

        Returns:
        --------
        None
        """
        xs = score[:, 0]
        ys = score[:, 1]
        n = loading.shape[0]
        if mean_ is None:
            mean_ = np.zeros((n, 2))
        else:
            # Si mean_ tiene menos filas que n, agregar filas adicionales usando los últimos valores
            if np.array(mean_).shape[0] < n:
                additional_rows = n - np.array(mean_).shape[0]
                last_values = mean_[-1, :]  # Últimos valores para agregar
                additional_data = np.tile(last_values, (additional_rows, 1))  # Crear filas adicionales
                mean_ = np.vstack((mean_, additional_data))
        # Plot scores
        if labels is not None:
            axbiplot.scatter(xs, ys, alpha=0.5,c=labels)
        else:
            axbiplot.scatter(xs, ys, alpha=0.5)

        if score_labels is not None:
            for i, txt in enumerate(score_labels):
                axbiplot.annotate(txt, (xs[i], ys[i]), fontsize=8)

        # Plot loading vectors
        for i in range(n):
            axbiplot.arrow(mean_[i,0], mean_[i,1], loading[i, 0]*max(abs(xs)), loading[i, 1]*max(abs(ys)),
                        color='r', alpha=0.5, head_width=0.025, head_length=0.05)
            if loading_labels is not None:
                axbiplot.text(mean_[i,0]+loading[i, 0]*max(abs(xs))*1.15, mean_[i,1]+loading[i, 1]*max(abs(ys))*1.15,
                        loading_labels[i], color='g', ha='center', va='center')

        axbiplot.set_xlabel("PC1")
        axbiplot.set_ylabel("PC2")

        axrel.bar(np.arange(1,n+1),rel_,color=bar_c)
        axrel.set_xticks(np.arange(1,n+1),loading_labels,rotation=90)
        axrel.set_ylabel("Normalized Relevance")
        return

import plotly.graph_objects as go
import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [36]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Cargar el DataFrame
df = pd.read_csv('/content/drive/MyDrive/IA_CHEC/trafos_red.csv')

# Creamos un LabelEncoder
label_encoder = LabelEncoder()

# Iteramos sobre las columnas del DataFrame
for column in df.columns:
    # Manejo de columnas datetime
    if pd.api.types.is_datetime64_any_dtype(df[column]):
        # Convertir datetime a int (segundos desde epoch)
        df[column] = df[column].astype('int64') // 10**9

    # Manejo de columnas categóricas y mixtas (str y numéricos)
    elif df[column].dtype == 'object' or pd.api.types.is_numeric_dtype(df[column]):
        # Almacenar los NaNs originales
        nan_mask = df[column].isna()

        # Llenar los NaNs temporalmente con un valor que no esté en los datos
        df[column] = df[column].astype(str).fillna('MISSING_VALUE')

        # Aplicar Label Encoding a la columna
        df[column] = label_encoder.fit_transform(df[column])

        # Restaurar los NaNs a sus posiciones originales
        df[column][nan_mask] = np.nan

# Opción 1: Eliminar filas con NaN antes de pasar los datos al biplot
#df_clean = df.dropna()

 #Opción 2: Imputar NaNs con un valor numérico si es necesario (por ejemplo, 0)
df_clean = df.fillna(0)

# Ejemplo de uso con local_biplot2D
localbiplot = LocalBiplot(affine_='rotation', redm='umap')

# Utilizar el DataFrame limpio (sin NaNs)
results = localbiplot.local_biplot2D(X=df_clean.values, y=3, plot_=False, loading_labels=df_clean.columns)


In [37]:
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=("Interactive 2D Biplot", "Normalized Relevance"),
    column_widths=[0.6, 0.6],
    horizontal_spacing=0.1
)

# Paleta de colores para los grupos, asegurando que los colores coincidan entre los gráficos
colors = ['red', 'blue', 'green', 'purple', 'orange', 'yellow', 'pink']  # Ajusta según el número de grupos

# Inicializar lista para apilar relevancias correctamente por etiquetas
loading_labels_all = results[0]['loading_labels']
stacked_relevances = {label: [0] * len(results) for label in loading_labels_all}

# Iterar sobre cada grupo para añadir datos al scatter plot y al gráfico de barras apiladas
for idx, result in enumerate(results):
    score = result['score']
    loading = result['loading']
    rel_ = result['rel_']
    mean_ = result['mean_']
    labels = result['labels']
    loading_labels = result['loading_labels']
    bar_c = colors[idx % len(colors)]  # Asegurarse de ciclar los colores adecuadamente

    # Añadir relevancias al diccionario para apilar correctamente
    for i, label in enumerate(loading_labels):
        stacked_relevances[label][idx] = rel_[i]

    # Scatter Plot interactivo para los scores (solo una vez en la leyenda)
    scatter_trace = go.Scatter(
        x=score[:, 0],
        y=score[:, 1],
        mode='markers',
        marker=dict(color=bar_c, size=10),
        name=f'Group {idx + 1} Scores',
        showlegend=False  # Deshabilitar la leyenda de scatter
    )

    # Añadir scatter trace al subplot de scatter plot
    fig.add_trace(scatter_trace, row=1, col=1)
    magnitudes = np.linalg.norm(loading, axis=1)

    # Obtener los índices de los 5 vectores más grandes
    top_5_indices = np.argsort(magnitudes)[-5:]
    # Añadir vectores de loadings al scatter plot
    for i in top_5_indices:
        loading_trace = go.Scatter(
            x=[mean_[i, 0], mean_[i, 0] + loading[i, 0] * max(abs(score[:, 0]))],
            y=[mean_[i, 1], mean_[i, 1] + loading[i, 1] * max(abs(score[:, 1]))],
            mode='lines+markers+text',
            line=dict(color=bar_c, width=2),
            marker=dict(size=5),
            text=['', loading_labels[i]],  # Solo mostrar el texto en el extremo
            textposition='top right',  # Posicionar texto al extremo derecho
            textfont=dict(size=12),  # Ajustar tamaño de texto si es necesario
            showlegend=False
        )
        fig.add_trace(loading_trace, row=1, col=1)

# Crear trazas de barras apiladas usando los datos agregados
for idx, color in enumerate(colors[:len(results)]):
    # Construir trazas de barra apilada para cada grupo
    bar_trace = go.Bar(
        x=loading_labels_all,
        y=[stacked_relevances[label][idx] for label in loading_labels_all],
        marker=dict(color=color),
        name=f'Group {idx + 1}',
        showlegend=False
    )
    # Añadir bar trace al subplot de barras
    fig.add_trace(bar_trace, row=1, col=2)

# Actualizar layout general
fig.update_layout(
    title_text="Combined Interactive 2D Biplot and Stacked Normalized Relevance",
    xaxis_title="PC1",
    yaxis_title="PC2",
    xaxis2_title="Loadings",
    yaxis2_title="Relevance",
    barmode='stack',  # Modo de barras apiladas
    showlegend=True  # Mostrar la leyenda
)

# Mostrar la figura interactiva
fig.show()
# Acortar distancias

In [29]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP
from sklearn.preprocessing import MinMaxScaler
from scipy.optimize import minimize
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# Clase LocalBiplot3D ajustada
class LocalBiplot3D:
    def __init__(self, redm='umap', affine_='full', perplexity='auto', min_dist=0.05):
        self.affine_ = affine_
        self.perplexity = perplexity
        self.min_dist = min_dist
        self.redm = redm

    def dim_red_3D(self, X):
        """
        Performs 3D dimensionality reduction on the input data X using UMAP or t-SNE.
        """
        # Asegurarse de que el número de vecinos sea al menos 2
        if self.perplexity == 'auto':
            self.perplexity = np.round(0.5 * np.sqrt(X.shape[0]))

        n_neighbors = max(2, int(self.perplexity))  # Asegurarse de que sea al menos 2

        if self.redm == 'umap':
            self.red_ = UMAP(n_components=3, n_neighbors=n_neighbors, random_state=42, min_dist=self.min_dist)
        else:
            self.red_ = TSNE(n_components=3, perplexity=self.perplexity, random_state=42, init='pca')

        return MinMaxScaler(feature_range=(-1, 1)).fit_transform(
            self.red_.fit_transform(MinMaxScaler(feature_range=(-1, 1)).fit_transform(X))
        )

    def biplot3D(self, X, plot_=True, labels=None, loading_labels=None):
        """
        Creates a 3D PCA biplot of the input data X.
        """
        n_samples, n_features = X.shape
        n_components = min(3, n_samples, n_features)  # Ajustar n_components según el tamaño de los datos
        if n_components < 1:
            raise ValueError("El número de componentes debe ser al menos 1.")

        pca = PCA(n_components=n_components, random_state=42)
        X_scaled = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
        score = MinMaxScaler(feature_range=(-1, 1)).fit_transform(pca.fit_transform(X_scaled))
        loading = pca.components_.T
        rel_ = np.sum(np.abs(loading), axis=1)

        if plot_:
            # Ajustar el gráfico si n_components es menor que 3
            if n_components == 3:
                fig = go.Figure()
                scatter_trace = go.Scatter3d(
                    x=score[:, 0],
                    y=score[:, 1],
                    z=score[:, 2],
                    mode='markers',
                    marker=dict(size=5, color=np.linalg.norm(score, axis=1)),
                    name='3D Scores'
                )
                fig.add_trace(scatter_trace)
                fig.update_layout(
                    scene=dict(
                        xaxis_title="PC1",
                        yaxis_title="PC2",
                        zaxis_title="PC3"
                    ),
                    title="3D PCA Biplot"
                )
                fig.show()
            elif n_components == 2:
                plt.scatter(score[:, 0], score[:, 1], c=np.linalg.norm(score, axis=1))
                plt.xlabel("PC1")
                plt.ylabel("PC2")
                plt.title("2D PCA Biplot")
                plt.show()
            else:
                plt.scatter(score[:, 0], np.zeros_like(score[:, 0]), c=np.linalg.norm(score, axis=1))
                plt.xlabel("PC1")
                plt.title("1D PCA Biplot")
                plt.show()

        return loading, rel_, score

    def generate_initial_guess_and_bounds(self, n_components_c):
        if n_components_c == 1:
            initial_guess = np.array([1.0, 0.0])  # scaling, translation
            bounds = [(None, None), (None, None)]
        elif n_components_c == 2:
            initial_guess = np.array([1.0, 1.0, 0.0, 0.0, 0.0])  # scaling x2, rotation angle, translation x2
            bounds = [(None, None), (None, None), (-np.pi, np.pi), (None, None), (None, None)]
        elif n_components_c == 3:
            initial_guess = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0])  # scaling x3, rotation angle, translation x3
            bounds = [(None, None), (None, None), (None, None), (-np.pi, np.pi), (None, None), (None, None), (None, None)]
        else:
            raise ValueError("n_components_c > 3 no está soportado")
        return initial_guess, bounds

    def affine_transformation(self, params, points):
        """
        Applies an affine transformation to the input points using the given parameters.
        """
        n_components_c = points.shape[1]
        if n_components_c == 1:
            # 1D scaling and translation
            S = np.array([[params[0]]])
            R = np.array([[1]])  # No rotation in 1D
            tr_ = np.array([params[1]])
        elif n_components_c == 2:
            # 2D scaling, rotation, translation
            S = np.diag(params[0:2])  # Scaling parameters
            angle = params[2]
            R = np.array([[np.cos(angle), -np.sin(angle)],
                          [np.sin(angle),  np.cos(angle)]])  # 2D rotation matrix
            tr_ = params[3:5]
        elif n_components_c == 3:
            # 3D scaling, rotation, translation
            S = np.diag(params[0:3])
            angle = params[3]
            R = np.array([[np.cos(angle), -np.sin(angle), 0],
                          [np.sin(angle),  np.cos(angle), 0],
                          [0,              0,             1]])  # Rotation around Z-axis
            tr_ = params[4:7]
        else:
            raise ValueError("n_components_c > 3 no está soportado")

        M = R.dot(S)
        transformed_points = (M.dot(points.T) + tr_.reshape(-1,1)).T
        return transformed_points

    def objective_function(self, params, source_points, target_points):
        """
        Objective function to minimize: the mean squared error between the
        transformed source points and the target points.
        """
        transformed_points = self.affine_transformation(params, source_points)
        # Ajustar target_points si es necesario
        n_components_c = source_points.shape[1]
        target_points_c = target_points[:, :n_components_c]
        return np.mean(np.sum((transformed_points - target_points_c)**2, axis=1))

    def affine_transformation_obj(self, source_points, target_points):
        """
        Optimizes the affine transformation parameters to match source points to target points.
        """
        n_components_c = source_points.shape[1]
        initial_guess, bounds = self.generate_initial_guess_and_bounds(n_components_c)
        result = minimize(self.objective_function, x0=initial_guess, bounds=bounds, args=(source_points, target_points))
        optimized_params = result.x
        transformed_points = self.affine_transformation(optimized_params, source_points)
        return transformed_points, optimized_params, result

    def local_biplot3D(self, X, y, plot_=True, loading_labels=None):
        """
        Performs local biplot analysis in 3D.
        """
        X_ = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)  # Normalize between -1 and 1
        Z = self.dim_red_3D(X_)  # 3D Nonlinear Dimensionality Reduction

        if isinstance(y, int):  # No labels -> clustering
            self.y = KMeans(n_clusters=y, random_state=42).fit_predict(Z)
        else:
            self.y = y

        C_ = len(np.unique(self.y))
        Zl = np.zeros(Z.shape)
        loading_ = []
        rel_ = []

        results = []

        cmap_ = plt.get_cmap('jet', C_)

        for idx, c in enumerate(np.unique(self.y)):
            X_c = X_[self.y == c]
            n_samples, n_features = X_c.shape
            if min(n_samples, n_features) < 1:
                print(f"Clase {c} omitida debido a datos insuficientes (muestras: {n_samples}, características: {n_features})")
                continue
            try:
                loading_c, rel_c, Zl_c = self.biplot3D(X_c, plot_=False)
                Zl_c_transformed, opt_params, _ = self.affine_transformation_obj(Zl_c, Z[self.y == c])  # Affine transformation
                # Ajustar Zl para que tenga la misma dimensión
                n_components_c = Zl_c_transformed.shape[1]
                Zl_c_padded = np.zeros((Zl_c_transformed.shape[0], 3))
                Zl_c_padded[:, :n_components_c] = Zl_c_transformed
                Zl[self.y == c] = Zl_c_padded

                mean_ = np.repeat(Z[self.y == c].mean(axis=0).reshape(1, -1), (self.y == c).sum(), axis=0)

                results.append({
                    'score': Z[self.y == c],
                    'loading': loading_c,
                    'rel_': rel_c,
                    'mean_': mean_,
                    'labels': cmap_(idx),
                    'loading_labels': loading_labels
                })
            except ValueError as e:
                print(f"Error al procesar la clase {c}: {e}")
                continue

        return results




In [38]:
 import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

df = pd.read_csv('/content/drive/MyDrive/IA_CHEC/trafos_red.csv')

# Label Encoding y preprocesamiento
label_encoder = LabelEncoder()
for column in df.columns:
    if pd.api.types.is_datetime64_any_dtype(df[column]):
        df[column] = df[column].astype('int64') // 10**9
    elif df[column].dtype == 'object' or pd.api.types.is_numeric_dtype(df[column]):
        nan_mask = df[column].isna()
        df[column] = df[column].astype(str).fillna('MISSING_VALUE')
        df[column] = label_encoder.fit_transform(df[column])
        df.loc[nan_mask, column] = np.nan  # Restaurar NaNs

# Limpiar NaNs antes de pasar al biplot
df_clean = df.dropna()

# Ejemplo de uso con local_biplot3D
localbiplot3D = LocalBiplot3D(affine_='rotation', redm='umap')

# Utilizar el DataFrame limpio (sin NaNs)
results = localbiplot3D.local_biplot3D(X=df_clean.values, y=3, plot_=False, loading_labels=df_clean.columns)

# Crear la figura con subplots
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{'type': 'scatter3d'}, {'type': 'bar'}]],
    subplot_titles=("Interactive 3D Biplot", "Normalized Relevance"),
    column_widths=[0.7, 0.3],
    horizontal_spacing=0.1
)

# Paleta de colores para los grupos, asegurando que los colores coincidan entre los gráficos
colors = ['red', 'blue', 'green', 'purple', 'orange', 'yellow', 'pink']  # Ajusta según el número de grupos

# **Modificar aquí: Recopilar todas las etiquetas de carga de todos los grupos**
all_loading_labels = set()
for result in results:
    all_loading_labels.update(result['loading_labels'])
loading_labels_all = list(all_loading_labels)

# Inicializar lista para apilar relevancias correctamente por etiquetas
stacked_relevances = {label: [0] * len(results) for label in loading_labels_all}

# Iterar sobre cada grupo para añadir datos al scatter plot 3D y al gráfico de barras apiladas
for idx, result in enumerate(results):
    score = result['score']
    loading = result['loading']
    rel_ = result['rel_']
    mean_vector = result['mean_'][0]  # Obtener el vector medio (asumiendo que es el mismo para todas las muestras)
    labels = result['labels']
    loading_labels = result['loading_labels']
    bar_c = colors[idx % len(colors)]  # Asegurarse de ciclar los colores adecuadamente

    # **Crear un mapeo de etiquetas a índices para el grupo actual**
    label_to_index = {label: i for i, label in enumerate(loading_labels)}

    # **Actualizar relevancias apiladas**
    for label in loading_labels_all:
        if label in label_to_index:
            i = label_to_index[label]
            stacked_relevances[label][idx] = rel_[i]
        else:
            # La etiqueta no está presente en este grupo; mantener el valor en cero
            pass

    # Scatter Plot interactivo 3D para los scores
    scatter_trace = go.Scatter3d(
        x=score[:, 0],
        y=score[:, 1],
        z=score[:, 2],
        mode='markers',
        marker=dict(color=bar_c, size=5),
        name=f'Group {idx + 1} Scores',
        showlegend=False
    )
    fig.add_trace(scatter_trace, row=1, col=1)

    # Obtener los índices de los vectores de carga más grandes (hasta un máximo de 5)
    magnitudes = np.linalg.norm(loading, axis=1)
    num_loadings = loading.shape[0]
    num_top_loadings = min(5, num_loadings)
    top_indices = np.argsort(magnitudes)[-num_top_loadings:]

    # Añadir vectores de carga (loadings) al scatter plot 3D
    for i in top_indices:
        # Asegurarse de que las dimensiones coinciden
        loading_vector = loading[i]
        # Si loading_vector tiene menos de 3 componentes, rellenar con ceros
        if loading_vector.shape[0] < 3:
            loading_vector = np.pad(loading_vector, (0, 3 - loading_vector.shape[0]), 'constant')
        # Si mean_vector tiene menos de 3 componentes, rellenar con ceros
        if mean_vector.shape[0] < 3:
            mean_vector_padded = np.pad(mean_vector, (0, 3 - mean_vector.shape[0]), 'constant')
        else:
            mean_vector_padded = mean_vector

        loading_trace = go.Scatter3d(
            x=[mean_vector_padded[0], mean_vector_padded[0] + loading_vector[0] * max(abs(score[:, 0]))],
            y=[mean_vector_padded[1], mean_vector_padded[1] + loading_vector[1] * max(abs(score[:, 1]))],
            z=[mean_vector_padded[2], mean_vector_padded[2] + loading_vector[2] * max(abs(score[:, 2]))],
            mode='lines+markers+text',
            line=dict(color=bar_c, width=2),
            marker=dict(size=3),
            text=['', loading_labels[i]],  # Mostrar el texto en el extremo
            textposition='top right',
            showlegend=False
        )
        fig.add_trace(loading_trace, row=1, col=1)

# **Ordenar las etiquetas para una visualización consistente**
loading_labels_all_sorted = sorted(loading_labels_all)

# Crear trazas de barras apiladas usando los datos agregados
for idx, color in enumerate(colors[:len(results)]):
    # Construir trazas de barra apilada para cada grupo
    bar_trace = go.Bar(
        x=loading_labels_all_sorted,
        y=[stacked_relevances[label][idx] for label in loading_labels_all_sorted],
        marker=dict(color=color),
        name=f'Group {idx + 1}',
        showlegend=True  # Cambiar a True si deseas mostrar la leyenda
    )
    # Añadir bar trace al subplot de barras
    fig.add_trace(bar_trace, row=1, col=2)

# Actualizar layout general
fig.update_layout(
    title_text="Combined Interactive 3D Biplot and Stacked Normalized Relevance",
    scene=dict(
        xaxis_title="Dim 1",
        yaxis_title="Dim 2",
        zaxis_title="Dim 3"
    ),
    barmode='stack',  # Modo de barras apiladas
    showlegend=True
)

# Mostrar la figura interactiva
fig.show()



In [47]:
# Cargar y procesar el DataFrame
df = pd.read_csv('/content/drive/MyDrive/IA_CHEC/interruptores_red.csv')

# Label Encoding y preprocesamiento
label_encoder = LabelEncoder()
for column in df.columns:
    if pd.api.types.is_datetime64_any_dtype(df[column]):
        df[column] = df[column].astype('int64') // 10**9
    elif df[column].dtype == 'object' or pd.api.types.is_numeric_dtype(df[column]):
        nan_mask = df[column].isna()
        df[column] = df[column].astype(str).fillna('MISSING_VALUE')
        df[column] = label_encoder.fit_transform(df[column])
        df.loc[nan_mask, column] = np.nan  # Restaurar NaNs

# Limpiar NaNs antes de pasar al biplot
df_clean = df.dropna()

# Ejemplo de uso con local_biplot3D
localbiplot3D = LocalBiplot3D(affine_='rotation', redm='umap')

# Utilizar el DataFrame limpio (sin NaNs)
results = localbiplot3D.local_biplot3D(X=df_clean.values, y=3, plot_=False, loading_labels=df_clean.columns)

# Crear la figura con subplots
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{'type': 'scatter3d'}, {'type': 'bar'}]],
    subplot_titles=("Interactive 3D Biplot", "Normalized Relevance"),
    column_widths=[0.7, 0.3],
    horizontal_spacing=0.1
)

# Paleta de colores para los grupos, asegurando que los colores coincidan entre los gráficos
colors = ['red', 'blue', 'green', 'purple', 'orange', 'yellow', 'pink']  # Ajusta según el número de grupos

# **Modificar aquí: Recopilar todas las etiquetas de carga de todos los grupos**
all_loading_labels = set()
for result in results:
    all_loading_labels.update(result['loading_labels'])
loading_labels_all = list(all_loading_labels)

# Inicializar lista para apilar relevancias correctamente por etiquetas
stacked_relevances = {label: [0] * len(results) for label in loading_labels_all}

# Iterar sobre cada grupo para añadir datos al scatter plot 3D y al gráfico de barras apiladas
for idx, result in enumerate(results):
    score = result['score']
    loading = result['loading']
    rel_ = result['rel_']
    mean_vector = result['mean_'][0]  # Obtener el vector medio (asumiendo que es el mismo para todas las muestras)
    labels = result['labels']
    loading_labels = result['loading_labels']
    bar_c = colors[idx % len(colors)]  # Asegurarse de ciclar los colores adecuadamente

    # **Crear un mapeo de etiquetas a índices para el grupo actual**
    label_to_index = {label: i for i, label in enumerate(loading_labels)}

    # **Actualizar relevancias apiladas**
    for label in loading_labels_all:
        if label in label_to_index:
            i = label_to_index[label]
            stacked_relevances[label][idx] = rel_[i]
        else:
            # La etiqueta no está presente en este grupo; mantener el valor en cero
            pass

    # Scatter Plot interactivo 3D para los scores
    scatter_trace = go.Scatter3d(
        x=score[:, 0],
        y=score[:, 1],
        z=score[:, 2],
        mode='markers',
        marker=dict(color=bar_c, size=5),
        name=f'Group {idx + 1} Scores',
        showlegend=False
    )
    fig.add_trace(scatter_trace, row=1, col=1)

    # Obtener los índices de los vectores de carga más grandes (hasta un máximo de 5)
    magnitudes = np.linalg.norm(loading, axis=1)
    num_loadings = loading.shape[0]
    num_top_loadings = min(5, num_loadings)
    top_indices = np.argsort(magnitudes)[-num_top_loadings:]

    # Añadir vectores de carga (loadings) al scatter plot 3D
    for i in top_indices:
        # Asegurarse de que las dimensiones coinciden
        loading_vector = loading[i]
        # Si loading_vector tiene menos de 3 componentes, rellenar con ceros
        if loading_vector.shape[0] < 3:
            loading_vector = np.pad(loading_vector, (0, 3 - loading_vector.shape[0]), 'constant')
        # Si mean_vector tiene menos de 3 componentes, rellenar con ceros
        if mean_vector.shape[0] < 3:
            mean_vector_padded = np.pad(mean_vector, (0, 3 - mean_vector.shape[0]), 'constant')
        else:
            mean_vector_padded = mean_vector

        loading_trace = go.Scatter3d(
            x=[mean_vector_padded[0], mean_vector_padded[0] + loading_vector[0] * max(abs(score[:, 0]))],
            y=[mean_vector_padded[1], mean_vector_padded[1] + loading_vector[1] * max(abs(score[:, 1]))],
            z=[mean_vector_padded[2], mean_vector_padded[2] + loading_vector[2] * max(abs(score[:, 2]))],
            mode='lines+markers+text',
            line=dict(color=bar_c, width=2),
            marker=dict(size=3),
            text=['', loading_labels[i]],  # Mostrar el texto en el extremo
            textposition='top right',
            showlegend=False
        )
        fig.add_trace(loading_trace, row=1, col=1)

# **Ordenar las etiquetas para una visualización consistente**
loading_labels_all_sorted = sorted(loading_labels_all)

# Crear trazas de barras apiladas usando los datos agregados
for idx, color in enumerate(colors[:len(results)]):
    # Construir trazas de barra apilada para cada grupo
    bar_trace = go.Bar(
        x=loading_labels_all_sorted,
        y=[stacked_relevances[label][idx] for label in loading_labels_all_sorted],
        marker=dict(color=color),
        name=f'Group {idx + 1}',
        showlegend=True  # Cambiar a True si deseas mostrar la leyenda
    )
    # Añadir bar trace al subplot de barras
    fig.add_trace(bar_trace, row=1, col=2)

# Actualizar layout general
fig.update_layout(
    title_text="Combined Interactive 3D Biplot and Stacked Normalized Relevance",
    scene=dict(
        xaxis_title="Dim 1",
        yaxis_title="Dim 2",
        zaxis_title="Dim 3"
    ),
    barmode='stack',  # Modo de barras apiladas
    showlegend=True
)

# Mostrar la figura interactiva
fig.show()

In [41]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Cargar y procesar el DataFrame
df = pd.read_csv('/content/drive/MyDrive/IA_CHEC/tramo_red.csv')

# Label Encoding y preprocesamiento
label_encoder = LabelEncoder()
for column in df.columns:
    if pd.api.types.is_datetime64_any_dtype(df[column]):
        df[column] = df[column].astype('int64') // 10**9
    elif df[column].dtype == 'object' or pd.api.types.is_numeric_dtype(df[column]):
        nan_mask = df[column].isna()
        df[column] = df[column].astype(str).fillna('MISSING_VALUE')
        df[column] = label_encoder.fit_transform(df[column])
        df.loc[nan_mask, column] = np.nan  # Restaurar NaNs

# Limpiar NaNs antes de pasar al biplot
df_clean = df.dropna()

# Ejemplo de uso con local_biplot3D
localbiplot3D = LocalBiplot3D(affine_='rotation', redm='umap')

# Utilizar el DataFrame limpio (sin NaNs)
results = localbiplot3D.local_biplot3D(X=df_clean.values, y=3, plot_=False, loading_labels=df_clean.columns)

# Crear la figura con subplots
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{'type': 'scatter3d'}, {'type': 'bar'}]],
    subplot_titles=("Interactive 3D Biplot", "Normalized Relevance"),
    column_widths=[0.7, 0.3],
    horizontal_spacing=0.1
)

# Paleta de colores para los grupos, asegurando que los colores coincidan entre los gráficos
colors = ['red', 'blue', 'green', 'purple', 'orange', 'yellow', 'pink']  # Ajusta según el número de grupos

# **Modificar aquí: Recopilar todas las etiquetas de carga de todos los grupos**
all_loading_labels = set()
for result in results:
    all_loading_labels.update(result['loading_labels'])
loading_labels_all = list(all_loading_labels)

# Inicializar lista para apilar relevancias correctamente por etiquetas
stacked_relevances = {label: [0] * len(results) for label in loading_labels_all}

# Iterar sobre cada grupo para añadir datos al scatter plot 3D y al gráfico de barras apiladas
for idx, result in enumerate(results):
    score = result['score']
    loading = result['loading']
    rel_ = result['rel_']
    mean_vector = result['mean_'][0]  # Obtener el vector medio (asumiendo que es el mismo para todas las muestras)
    labels = result['labels']
    loading_labels = result['loading_labels']
    bar_c = colors[idx % len(colors)]  # Asegurarse de ciclar los colores adecuadamente

    # **Crear un mapeo de etiquetas a índices para el grupo actual**
    label_to_index = {label: i for i, label in enumerate(loading_labels)}

    # **Actualizar relevancias apiladas**
    for label in loading_labels_all:
        if label in label_to_index:
            i = label_to_index[label]
            stacked_relevances[label][idx] = rel_[i]
        else:
            # La etiqueta no está presente en este grupo; mantener el valor en cero
            pass

    # Scatter Plot interactivo 3D para los scores
    scatter_trace = go.Scatter3d(
        x=score[:, 0],
        y=score[:, 1],
        z=score[:, 2],
        mode='markers',
        marker=dict(color=bar_c, size=5),
        name=f'Group {idx + 1} Scores',
        showlegend=False
    )
    fig.add_trace(scatter_trace, row=1, col=1)

    # Obtener los índices de los vectores de carga más grandes (hasta un máximo de 5)
    magnitudes = np.linalg.norm(loading, axis=1)
    num_loadings = loading.shape[0]
    num_top_loadings = min(5, num_loadings)
    top_indices = np.argsort(magnitudes)[-num_top_loadings:]

    # Añadir vectores de carga (loadings) al scatter plot 3D
    for i in top_indices:
        # Asegurarse de que las dimensiones coinciden
        loading_vector = loading[i]
        # Si loading_vector tiene menos de 3 componentes, rellenar con ceros
        if loading_vector.shape[0] < 3:
            loading_vector = np.pad(loading_vector, (0, 3 - loading_vector.shape[0]), 'constant')
        # Si mean_vector tiene menos de 3 componentes, rellenar con ceros
        if mean_vector.shape[0] < 3:
            mean_vector_padded = np.pad(mean_vector, (0, 3 - mean_vector.shape[0]), 'constant')
        else:
            mean_vector_padded = mean_vector

        loading_trace = go.Scatter3d(
            x=[mean_vector_padded[0], mean_vector_padded[0] + loading_vector[0] * max(abs(score[:, 0]))],
            y=[mean_vector_padded[1], mean_vector_padded[1] + loading_vector[1] * max(abs(score[:, 1]))],
            z=[mean_vector_padded[2], mean_vector_padded[2] + loading_vector[2] * max(abs(score[:, 2]))],
            mode='lines+markers+text',
            line=dict(color=bar_c, width=2),
            marker=dict(size=3),
            text=['', loading_labels[i]],  # Mostrar el texto en el extremo
            textposition='top right',
            showlegend=False
        )
        fig.add_trace(loading_trace, row=1, col=1)

# **Ordenar las etiquetas para una visualización consistente**
loading_labels_all_sorted = sorted(loading_labels_all)

# Crear trazas de barras apiladas usando los datos agregados
for idx, color in enumerate(colors[:len(results)]):
    # Construir trazas de barra apilada para cada grupo
    bar_trace = go.Bar(
        x=loading_labels_all_sorted,
        y=[stacked_relevances[label][idx] for label in loading_labels_all_sorted],
        marker=dict(color=color),
        name=f'Group {idx + 1}',
        showlegend=True  # Cambiar a True si deseas mostrar la leyenda
    )
    # Añadir bar trace al subplot de barras
    fig.add_trace(bar_trace, row=1, col=2)

# Actualizar layout general
fig.update_layout(
    title_text="Combined Interactive 3D Biplot and Stacked Normalized Relevance",
    scene=dict(
        xaxis_title="Dim 1",
        yaxis_title="Dim 2",
        zaxis_title="Dim 3"
    ),
    barmode='stack',  # Modo de barras apiladas
    showlegend=True
)

# Mostrar la figura interactiva
fig.show()

In [42]:
trafos = pd.read_csv('/content/drive/MyDrive/IA_CHEC/trafos_red.csv')

In [44]:
tramos = pd.read_csv('/content/drive/MyDrive/IA_CHEC/tramo_red.csv')

In [45]:
interrupciones = pd.read_csv('/content/drive/MyDrive/IA_CHEC/interruptores_red.csv')

In [46]:
trafos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2859 entries, 0 to 2858
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   evento           2859 non-null   int64  
 1   equipo_ope       2859 non-null   object 
 2   tipo_equi_ope    2859 non-null   object 
 3   cto_equi_ope     2859 non-null   object 
 4   tipo_elemento    2859 non-null   object 
 5   inicio           2859 non-null   object 
 6   fin              2859 non-null   object 
 7   duracion_h       2859 non-null   float64
 8   tipo_duracion    2859 non-null   object 
 9   causa            2857 non-null   float64
 10  tipo_causa       2859 non-null   object 
 11  CNT_TRAFOS_AFEC  2859 non-null   int64  
 12  cnt_usus         2859 non-null   int64  
 13  SAIDI            2859 non-null   float64
 14  SAIFI            2859 non-null   float64
 15  PHASES           2859 non-null   int64  
 16  FPARENT          2859 non-null   object 
 17  XPOS          