# Note

* Import
* Filsys
* Manually Cluster

# Import

In [1]:
# from ..HETSFileHelper import gatherCSV, readChannel, EIS_recal_ver02
import os
import re
import gc
import sys
from loguru import logger

import matplotlib.pyplot as plt 
from matplotlib.collections import LineCollection
from matplotlib.widgets import LassoSelector
from matplotlib.path import Path
from matplotlib.colors import ListedColormap

from datetime import datetime

from sklearn.decomposition import PCA

from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import StandardScaler
import hdbscan

import numpy as np
import torch


sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
from Outlier import OutlierDetection
from EISGPR import Interpolation


%matplotlib qt

# Filesys

In [2]:
def SearchELE(rootPath, ele_pattern = re.compile(r"(.+?)_归档")):
    '''==================================================
        Search all electrode directories in the rootPath
        Parameter: 
            rootPath: current search path
            ele_pattern: electrode dir name patten
        Returen:
            ele_list: list of electrode directories
        ==================================================
    '''
    ele_list = []
    for i in os.listdir(rootPath):
        _path = os.path.join(rootPath, i)
        if os.path.isdir(_path):
            match_ele = ele_pattern.match(i)
            if match_ele:
                ele_list.append([_path, match_ele.group(1)])
            else:
                ele_list.extend(SearchELE(_path, ele_pattern))

    return ele_list

# Manually Cluster

## Almost Electrode

### Input data

In [5]:
# rootPath = "D:/Baihm/EISNN/Archive/"
# rootPath = "D:/Baihm/EISNN/Archive_New/"
rootPath = "D:/Baihm/EISNN/Invivo"
# ele_list = SearchELE(rootPath)
ele_list = SearchELE(rootPath, ele_pattern = re.compile(r"(.+?)_Ver02"))
n_ele = len(ele_list)
logger.info(f"Search in {rootPath} and find {n_ele:03d} electrodes")

[32m2025-05-16 22:57:44.475[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mSearch in D:/Baihm/EISNN/Invivo and find 006 electrodes[0m


In [None]:
# 首先我们把128/128看似完全没问题的这部分电极拿出来做聚类看看
# 数据量也比较小，跑起来应该会更快

# DATASET_SUFFIX = "Outlier_Ver03"
DATASET_SUFFIX = "Outlier_Ver04"

almost_start_list = []
almost_start_id_list = []
almost_data_list = []
almost_id_list = []

n_avaliable = 0

freq_list = np.linspace(0,5000-1,101,dtype=int, endpoint=True)
# freq_list = freq_list[::-1]

for i in range(n_ele):
# for i in range(3):
    fd_pt = os.path.join(ele_list[i][0], DATASET_SUFFIX, f"{ele_list[i][1]}_{DATASET_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        # logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt, weights_only=False)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]

    n_day       = _meta_group["n_day"]
    n_ch        = _meta_group["n_ch"]
    n_valid_ch  = len(_data_group["Channels"])


    logger.info(f"ELE [{i}/{n_ele}]: {ele_list[i][0]}")

    n_avaliable = n_avaliable + 1

    # Iteration by channel
    for j in _data_group['Channels']:
        eis_seq = _data_group[j]["eis_seq"]
        _ch_data = _data_group[j]["chData"]
        _ch_data = _ch_data[eis_seq,:,:]
        _ch_data_log = np.log(_ch_data[:,1,:] + 1j*_ch_data[:,2,:])
        _ch_data[:,1,:] = np.real(_ch_data_log)
        _ch_data[:,2,:] = np.imag(_ch_data_log)
        if _ch_data.shape[2] == 5000:
            _ch_data = np.hstack((_ch_data[:,1,freq_list],_ch_data[:,2,freq_list]))
        else:
            _ch_data = np.hstack((_ch_data[:,1,:],_ch_data[:,2,:]))
        almost_data_list.append(_ch_data)
        almost_start_list.append(_ch_data[0,:])



        _ch_id = j

        _id = [i, _ch_id] * np.shape(_ch_data)[0]
        _id = np.array(_id).reshape(-1,2)

        eis_cluster = _data_group[j]['eis_cluster']
        _id = np.hstack((_id, eis_cluster.reshape(-1,1)))
        almost_id_list.append(_id)
        almost_start_id_list.append(_id[0,:])


        # _ch_id = j

        # _id = [i, _ch_id] * np.shape(_ch_data)[0]
        # _id = np.array(_id).reshape(-1,2)
        # almost_id_list.append(_id)
        # almost_start_id_list.append(_id[0,:])

almost_data_list = np.vstack(almost_data_list)
almost_id_list = np.vstack(almost_id_list)
almost_start_list = np.vstack(almost_start_list)
almost_start_id_list = np.vstack(almost_start_id_list)

logger.info(f"Total {almost_data_list.shape[0]} data points from {n_avaliable} electrodes")

del data_pt, _meta_group, _data_group, _ch_data
gc.collect()



[32m2025-05-16 22:57:49.300[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mELE [0/6]: D:/Baihm/EISNN/Invivo\S5877_Ver02[0m
[32m2025-05-16 22:57:51.210[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mELE [1/6]: D:/Baihm/EISNN/Invivo\S6005_Ver02[0m
[32m2025-05-16 22:57:53.012[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mELE [2/6]: D:/Baihm/EISNN/Invivo\S6006_Ver02[0m
[32m2025-05-16 22:57:54.506[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mELE [3/6]: D:/Baihm/EISNN/Invivo\S6072_Ver02[0m
[32m2025-05-16 22:57:56.219[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mELE [4/6]: D:/Baihm/EISNN/Invivo\S6106_Ver02[0m
[32m2025-05-16 22:57:56.758[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mELE [5/6]: D:/Baihm/EISNN/Invivo\S6175_Ver02[0m
[32m2025-05-16 22:57:56.805[0m | [1mINFO    [0m | [36

59

### PCA

In [17]:
_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

_data_norm = _scale.transform(almost_start_list)
_pca_start = _pca_m.transform(_data_norm)

In [18]:

cmap = plt.colormaps.get_cmap("rainbow_r")
_id_max = almost_id_list[:,0].max()

plt.figure(figsize=(9,9))
# plt.scatter(_pca_data[:,0],_pca_data[:,1], color = cmap(almost_id_list[:,0]/_id_max),s=0.1)
plt.scatter(_pca_data[:,0],_pca_data[:,1],s=0.01)
plt.gca().set_aspect('equal', adjustable='box')
plt.title('PCA')


Text(0.5, 1.0, 'PCA')

In [7]:



cmap = plt.colormaps.get_cmap("rainbow_r")
_id_max = almost_id_list[:,0].max()

plt.figure()
plt.scatter(_pca_data[:,0],_pca_data[:,1], color = 'lightgray', s=0.1)
# plt.scatter(_pca_start[:,0],_pca_start[:,1], color = cmap(almost_start_id_list[:,0]/_id_max),s=0.1)
plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.001)
plt.title('PCA')


Text(0.5, 1.0, 'PCA')

#### PC eigenValue

In [8]:
# 获取每个主成分的解释方差比（即贡献率）
explained_var = _pca_m.explained_variance_ratio_
components = np.arange(1, len(explained_var) + 1)

# 绘图
plt.figure(figsize=(6, 6))
bars = plt.bar(components, explained_var, color='skyblue')

# 在每个柱子上标注数值（百分比形式）
for bar, var in zip(bars, explained_var):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.005, f'{var*100:.2f}%', 
             ha='center', va='bottom', fontsize=10)

plt.xticks(components)
plt.xlabel("Principal Component")
plt.ylabel("Explained Variance Ratio")
plt.title("PCA Explained Variance per Component")
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

### LassoSelector

In [9]:


# === 数据准备 ===
lasso_labels = np.full(_pca_data.shape[0], -1)
current_label = 0
label_history = []




In [21]:
lasso_data = _pca_data[:,:2]

# === 可调色板（支持最多10类）===
color_list = ['lightgray', 'red', 'blue', 'green', 'orange', 'purple', 'cyan', 'magenta', 'brown', 'yellow']
cmap = ListedColormap(color_list)
# cmap = plt.colormaps.get_cmap('tab20c_r')

# === 参数状态 ===
mode = "new"  # 默认模式：new/add/erase
add_target = 0  # 添加模式时指定的目标簇编号

# === 画图 ===
fig, ax = plt.subplots()
pts = ax.scatter(lasso_data[:, 0], lasso_data[:, 1], c='lightgray', s=0.05)
plt.title("Lasso Cluster")

def update_colors():
    color_indices = np.where(lasso_labels == -1, 0, lasso_labels%8 + 1)
    pts.set_facecolor(cmap(color_indices))
    fig.canvas.draw_idle()

def on_select(verts):
    global current_label, lasso_labels, label_history
    path = Path(verts)
    ind = np.nonzero(path.contains_points(lasso_data))[0]
    
    # 保存当前状态以供撤销
    label_history.append(lasso_labels.copy())

    # 根据模式操作标签
    if mode == "new":
        lasso_labels[ind] = current_label
        # print(f"→ 新建簇 {current_label}, 包含 {len(ind)} 个点")
        current_label += 1
    elif mode == "add":
        lasso_labels[ind] = add_target
        # print(f"→ 添加到簇 {add_target}, 包含 {len(ind)} 个点")
    elif mode == "erase":
        lasso_labels[ind] = -1
        # print(f"→ 反选，{len(ind)} 个点被置为未分簇")

    update_colors()
update_colors()

# === Lasso 绑定 ===
lasso = LassoSelector(ax, on_select)

# === 按键绑定 ===
def on_key(event):
    global mode, add_target, current_label, lasso_labels
    if event.key == 'n':
        mode = 'new'
    elif event.key == 'a':
        mode = 'add'
    elif event.key == 'e':
        mode = 'erase'
    elif event.key == 'z':
        if label_history:
            lasso_labels[:] = label_history.pop()
            update_colors()
        else:
            pass
    elif mode == 'add' and event.key.isdigit():
        add_target = int(event.key)


fig.canvas.mpl_connect('key_press_event', on_key)

plt.show()

print(np.unique(lasso_labels))


[-1  0  1  4  5  6  7]


### Cluster Plot

In [20]:
_n = int((len(np.unique(lasso_labels))-1) / 5) + 1

if _n == 1:
    fig,axis = plt.subplots(1,len(np.unique(lasso_labels)))
    # fig,axis = plt.subplots(len(np.unique(lasso_labels)))
    for i in range(0,len(np.unique(lasso_labels))):
        _id = np.unique(lasso_labels)[i]
        _data_mask = almost_data_list[lasso_labels == _id,:]

        _rand_ch = np.floor(np.random.rand(500)*_data_mask.shape[0]).astype(int)
        for j in range(_data_mask.shape[0]):
            if j >= 500: break
            axis[i].semilogy(np.exp(_data_mask[_rand_ch[j],:101]), color = cmap(_id%8+1)) 
            # axis[i].plot(np.exp(_data_mask[_rand_ch[j],101:]), color = cmap(_id%8+1)) 

        axis[i].xaxis.set_visible(False)
        axis[i].yaxis.set_visible(False)
        axis[i].sharex(axis[0])
        axis[i].sharey(axis[0])
else:
    fig, axis = plt.subplots(_n,5)
    for i in range(0,len(np.unique(lasso_labels))):
        _id = np.unique(lasso_labels)[i]
        _data_mask = almost_data_list[lasso_labels == _id,:]

        _rand_ch = np.floor(np.random.rand(100)*_data_mask.shape[0]).astype(int)
        for j in range(_data_mask.shape[0]):
            if j >= 100: break
            axis[int(i/5),int(i%5)].semilogy(np.exp(_data_mask[_rand_ch[j],:101]), color = cmap(_id%8+1)) 

        axis[int(i/5),int(i%5)].xaxis.set_visible(False)
        axis[int(i/5),int(i%5)].yaxis.set_visible(False)
        axis[int(i/5),int(i%5)].sharex(axis[0,0])
        axis[int(i/5),int(i%5)].sharey(axis[0,0])

fig.show()


In [48]:
_data_mask = almost_data_list[lasso_labels == 0,:]
fig = plt.figure()
ax0 = fig.add_subplot(121)
ax1 = fig.add_subplot(122)
for i in range(_data_mask.shape[0]):
    ax0.semilogy(np.exp(_data_mask[i,:101]), alpha = 0.005)
    ax1.plot(_data_mask[i,101:], alpha = 0.005)
    

In [None]:
# np.save("D:\Baihm\EISNN\Dataset\Anomaly\Open\Archive_Weird_cluster.npy",_data_mask)

### Save Feature Data

In [None]:
_id_mask = almost_id_list[lasso_labels == 0,:]

# _data_mask = almost_data_list[lasso_labels == _id,:]
_rand_ch = np.floor(np.random.rand(1000)*_id_mask.shape[0]).astype(int)
        
open_data = almost_data_list[lasso_labels == 0,:]
open_data = open_data[_rand_ch,:]
open_data.shape
# np.save("D:\Baihm\EISNN\Dataset\Anomaly\Open\EIS_Open.npy",open_data)

### Black List

In [None]:


fig, axis = plt.subplots(1,3, figsize = (12,4))
for i in range(0,len(np.unique(lasso_labels))):
    _id = np.unique(lasso_labels)[i]
    _data_mask = almost_data_list[lasso_labels == _id,:]

    _rand_ch = np.floor(np.random.rand(100)*_data_mask.shape[0]).astype(int)
    for j in range(_data_mask.shape[0]):
        if j >= 100: break
        axis[int(i%3)].semilogy(np.exp(_data_mask[_rand_ch[j],:101]), color = cmap(_id%8+1)) 

    axis[int(i%3)].xaxis.set_visible(False)
    axis[int(i%3)].yaxis.set_visible(False)
    axis[int(i%3)].sharex(axis[0])
    axis[int(i%3)].sharey(axis[0])

fig.show()


# Manifold

## Input Data

In [6]:
rootPath = "D:/Baihm/EISNN/Archive/"
# rootPath = "D:/Baihm/EISNN/Archive_New/"
# rootPath = "D:/Baihm/EISNN/Invivo"
# ele_list = SearchELE(rootPath)
ele_list = SearchELE(rootPath, ele_pattern = re.compile(r"(.+?)_归档"))
# ele_list = SearchELE(rootPath, ele_pattern = re.compile(r"(.+?)_Ver02"))
n_ele = len(ele_list)
logger.info(f"Search in {rootPath} and find {n_ele:03d} electrodes")

[32m2025-05-25 19:05:26.181[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mSearch in D:/Baihm/EISNN/Archive/ and find 218 electrodes[0m


In [7]:
# 首先我们把128/128看似完全没问题的这部分电极拿出来做聚类看看
# 数据量也比较小，跑起来应该会更快

# DATASET_SUFFIX = "Outlier_Ver02"
DATASET_SUFFIX = "Outlier_Ver03"
# DATASET_SUFFIX = "Outlier_Ver04"

almost_start_list = []
almost_start_id_list = []
almost_data_list = []
almost_id_list = []

n_avaliable = 0

freq_list = np.linspace(5000-1,0,101,dtype=int, endpoint=True)
freq_list = freq_list[::-1]

for i in range(n_ele):
# for i in range(3):
    fd_pt = os.path.join(ele_list[i][0], DATASET_SUFFIX, f"{ele_list[i][1]}_{DATASET_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        # logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt, weights_only=False)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]

    n_day       = _meta_group["n_day"]
    n_ch        = _meta_group["n_ch"]
    n_valid_ch  = len(_data_group["Channels"])


    logger.info(f"ELE [{i}/{n_ele}]: {ele_list[i][0]}")

    n_avaliable = n_avaliable + 1

    # Iteration by channel
    for j in _data_group['Channels']:
        eis_seq = _data_group[j]["eis_seq"]
        _ch_data = _data_group[j]["chData"]
        _ch_data = _ch_data[eis_seq,:,:]
        _ch_data_log = np.log(_ch_data[:,1,:] + 1j*_ch_data[:,2,:])
        _ch_data[:,1,:] = np.real(_ch_data_log)
        _ch_data[:,2,:] = np.imag(_ch_data_log)
        if _ch_data.shape[2] == 5000:
            _ch_data = np.hstack((_ch_data[:,1,freq_list],_ch_data[:,2,freq_list]))
        else:
            _ch_data = np.hstack((_ch_data[:,1,:],_ch_data[:,2,:]))
        almost_data_list.append(_ch_data)
        almost_start_list.append(_ch_data[0,:])



        _ch_id = j

        _id = [i, _ch_id] * np.shape(_ch_data)[0]
        _id = np.array(_id).reshape(-1,2)

        eis_cluster = _data_group[j]['eis_cluster']
        _id = np.hstack((_id, eis_cluster.reshape(-1,1)))
        almost_id_list.append(_id)
        almost_start_id_list.append(_id[0,:])


        # _ch_id = j

        # _id = [i, _ch_id] * np.shape(_ch_data)[0]
        # _id = np.array(_id).reshape(-1,2)
        # almost_id_list.append(_id)
        # almost_start_id_list.append(_id[0,:])

almost_data_list = np.vstack(almost_data_list)
almost_id_list = np.vstack(almost_id_list)
almost_start_list = np.vstack(almost_start_list)
almost_start_id_list = np.vstack(almost_start_id_list)

logger.info(f"Total {almost_data_list.shape[0]} data points from {n_avaliable} electrodes")

del data_pt, _meta_group, _data_group, _ch_data
gc.collect()



[32m2025-05-25 19:05:46.554[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [0/218]: D:/Baihm/EISNN/Archive/01037160_归档[0m
[32m2025-05-25 19:05:46.647[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [1/218]: D:/Baihm/EISNN/Archive/01037161_归档[0m
[32m2025-05-25 19:05:46.727[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [2/218]: D:/Baihm/EISNN/Archive/01037162_归档[0m
[32m2025-05-25 19:05:46.799[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [3/218]: D:/Baihm/EISNN/Archive/01067093_归档[0m
[32m2025-05-25 19:05:46.864[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [4/218]: D:/Baihm/EISNN/Archive/01067094_归档[0m
[32m2025-05-25 19:05:46.929[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [5/218]: D:/Baihm/EISNN/Archive/01067095_归档[0m
[32m2025-05-25 19:05:46.967[0m | [1mI

461

## PCA

In [8]:
_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

_data_norm = _scale.transform(almost_start_list)
_pca_start = _pca_m.transform(_data_norm)

In [9]:


fig, axis = plt.subplots(1,1, figsize = (16,9))
# axis.scatter(_pca_data[:,0],_pca_data[:,1], color = 'lightgray', s=0.05)
plt.scatter(_pca_data[:,0],_pca_data[:,1],s=0.1)
# plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.5)
plt.title('PCA')


Text(0.5, 1.0, 'PCA')

In [16]:
# 获取每个主成分的解释方差比（即贡献率）
explained_var = _pca_m.explained_variance_ratio_
components = np.arange(1, len(explained_var) + 1)

# 绘图
plt.figure(figsize=(6, 6))
bars = plt.bar(components, explained_var, color='skyblue')

# 在每个柱子上标注数值（百分比形式）
for bar, var in zip(bars, explained_var):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.005, f'{var*100:.2f}%', 
             ha='center', va='bottom', fontsize=10)

plt.xticks(components)
plt.xlabel("Principal Component")
plt.ylabel("Explained Variance Ratio")
plt.title("PCA Explained Variance per Component")
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

## Plot & Save

In [None]:

SAVE_FLAG = False
# manifold_fig_save_path = f"D:/Baihm/EISNN/Archive/{DATASET_SUFFIX}/Manifold"
manifold_fig_save_path = f"D:/Baihm/EISNN/Invivo/{DATASET_SUFFIX}/Manifold"
if SAVE_FLAG:
    if not os.path.exists(manifold_fig_save_path):
        os.makedirs(manifold_fig_save_path)



uq_id_list = np.unique(almost_id_list[:,0])
uq_id_max = np.max(uq_id_list)



In [85]:
_pca_data_plot = np.array(_pca_data[:,:])
# _pca_data_plot[:,0] = _pca_data[:,1]
# _pca_data_plot[:,1] = _pca_data[:,2]

cmap = plt.colormaps.get_cmap("rainbow_r")

for i in range(len(uq_id_list)):
# for i in range(0,2):

    fig, axis = plt.subplots(1,1, figsize = (16,9))
    axis.scatter(_pca_data_plot[:,0],_pca_data_plot[:,1], color = 'lightgray', s=0.5)
    # plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.1)


    _ele_id = uq_id_list[i]

    ele_mask = almost_id_list[:,0] == _ele_id
    _ch_list = np.unique(almost_id_list[ele_mask,1])


    for j in _ch_list:
        _ch_mask = almost_id_list[:,:2] == [_ele_id,j]
        _ch_mask = _ch_mask[:,0] & _ch_mask[:,1]
        _ch_data = _pca_data_plot[_ch_mask,:2]

        # _c = cmap(_ele_id / uq_id_max)
        # axis.plot(_ch_data[:,0],_ch_data[:,1], color = _c, alpha = 0.5)

        _cluster_list = np.unique(almost_id_list[_ch_mask,2])

        _seq_all_len = almost_id_list[_ch_mask,2].shape[0]
        _seg_poi = 0

        for k in _cluster_list:
            _cluster_mask = almost_id_list[:,:] == [_ele_id,j,k]
            _cluster_mask = _cluster_mask[:,0] & _cluster_mask[:,1] & _cluster_mask[:,2]
            _cluster_data = _pca_data_plot[_cluster_mask,:2]

            _seg_data = _cluster_data.reshape(-1,1,2)
            _seg_data = np.concatenate([_seg_data[:-1], _seg_data[1:]], axis=1)

            _seg_len = _cluster_data.shape[0]
            
            color_range = np.linspace(_seg_poi/_seq_all_len, (_seg_poi+_seg_len)/_seq_all_len, _seg_len - 1)
            colors = cmap(color_range)

            _seg_poi = _seg_poi+_seg_len
            lc = LineCollection(_seg_data, colors=colors, linewidth=2,alpha=0.5)
            axis.add_collection(lc)

    axis.set_title(f"{ele_list[int(_ele_id)][1]}_Manifold")
    if SAVE_FLAG:
        _fig_name = f"{ele_list[int(_ele_id)][1]}_Manifold.png"
        _fig_save_path = os.path.join(manifold_fig_save_path, _fig_name)

        fig.savefig(_fig_save_path)
        plt.close(fig) 

        logger.info(f"{i}/{len(uq_id_list)} Saved")
    else:
        fig.show()



[32m2025-05-16 22:20:27.575[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m0/6 Saved[0m
[32m2025-05-16 22:20:27.802[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m1/6 Saved[0m
[32m2025-05-16 22:20:28.046[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m2/6 Saved[0m
[32m2025-05-16 22:20:28.469[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m3/6 Saved[0m
[32m2025-05-16 22:20:28.733[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m4/6 Saved[0m
[32m2025-05-16 22:20:28.894[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m5/6 Saved[0m


In [64]:
almost_id_list.shape

(98690, 3)

## Plot All Manifold

In [15]:




fig, axis = plt.subplots(1,1, figsize = (16,9))
axis.scatter(_pca_data[:,0],_pca_data[:,1], color = 'lightgray', s=0.05)
# plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.1)



uq_id_list = np.unique(almost_id_list[:,0])
uq_id_max = np.max(uq_id_list)


cmap = plt.colormaps.get_cmap("rainbow_r")

for i in range(len(uq_id_list)):
# for i in range(0,6):
    _ele_id = uq_id_list[i]

    ele_mask = almost_id_list[:,0] == _ele_id
    _ch_list = np.unique(almost_id_list[ele_mask,1])


    for j in _ch_list:
        _ch_mask = almost_id_list[:,:2] == [_ele_id,j]
        _ch_mask = _ch_mask[:,0] & _ch_mask[:,1]
        _ch_data = _pca_data[_ch_mask,:2]

        # _c = cmap(_ele_id / uq_id_max)
        # axis.plot(_ch_data[:,0],_ch_data[:,1], color = _c, alpha = 0.5)

        _cluster_list = np.unique(almost_id_list[_ch_mask,2])

        _seq_all_len = almost_id_list[_ch_mask,2].shape[0]
        _seg_poi = 0

        for k in _cluster_list:
            _cluster_mask = almost_id_list[:,:] == [_ele_id,j,k]
            _cluster_mask = _cluster_mask[:,0] & _cluster_mask[:,1] & _cluster_mask[:,2]
            _cluster_data = _pca_data[_cluster_mask,:2]

            _seg_data = _cluster_data.reshape(-1,1,2)
            _seg_data = np.concatenate([_seg_data[:-1], _seg_data[1:]], axis=1)

            _seg_len = _cluster_data.shape[0]
            
            color_range = np.linspace(_seg_poi/_seq_all_len, (_seg_poi+_seg_len)/_seq_all_len, _seg_len - 1)
            colors = cmap(color_range)

            _seg_poi = _seg_poi+_seg_len
            lc = LineCollection(_seg_data, colors=colors, linewidth=1, alpha = 0.01)
            axis.add_collection(lc)

fig.show()



# Dimensionality

## DD

In [24]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding, MDS
# import umap.umap_ as umap  # 请确保安装了 umap-learn

# np.random.seed(42)
data_dd = np.array(almost_data_list)

_order = 3
methods = {
    'PCA': PCA(n_components=_order),
    't-SNE': TSNE(n_components=_order, perplexity=25, random_state=42),  # perplexity 设置为 5
    'Isomap': Isomap(n_neighbors=10, n_components=_order),                    
    'LLE': LocallyLinearEmbedding(n_components=_order, random_state=42),
    # 'MDS': MDS(n_components=_order, random_state=42),         # Cost 
    # 'UMAP': umap.UMAP(n_components=_order, random_state=42)   # Cost 
}

embeddings = {}
emb_dist = {}
for name, method in methods.items():
    _scale = StandardScaler()
    _data_norm = _scale.fit_transform(data_dd)
    embedding = method.fit_transform(_data_norm)
    embeddings[name] = embedding
    _x = embedding[:,0].flatten()
    _y = embedding[:,1].flatten()

    emb_dist[name] = np.sqrt((_x[:, np.newaxis] - _x[np.newaxis, :])**2 + 
                         (_y[:, np.newaxis] - _y[np.newaxis, :])**2)
    logger.info(f"{name} distance matrix shape: {emb_dist[name].shape}")



[32m2025-05-16 23:12:31.571[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mPCA distance matrix shape: (9406, 9406)[0m
[32m2025-05-16 23:13:02.672[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mt-SNE distance matrix shape: (9406, 9406)[0m
[32m2025-05-16 23:13:27.094[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mIsomap distance matrix shape: (9406, 9406)[0m
[32m2025-05-16 23:13:28.863[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mLLE distance matrix shape: (9406, 9406)[0m


## DD Plot

In [25]:

fig, axis = plt.subplots(2,4,figsize=(12,6))


for i, (name, emb) in enumerate(embeddings.items()):
    _x = emb[:,0].flatten()
    _y = emb[:,1].flatten()

    _dist = np.sqrt((_x[:, np.newaxis] - _x[np.newaxis, :])**2 + 
                         (_y[:, np.newaxis] - _y[np.newaxis, :])**2)
    # _dist = emb_dist[name]

    axis[np.int16(i/2),(i%2)*2].scatter(emb[:, 0], emb[:, 1], c=np.arange(np.shape(almost_data_list)[0]), cmap='rainbow_r', s=1)
    axis[np.int16(i/2),(i%2)*2].set_title(name)

    s = axis[np.int16(i/2),(i%2)*2+1].imshow(_dist, cmap='coolwarm', interpolation='nearest')
    fig.colorbar(s, ax=axis[np.int16(i/2),(i%2)*2+1])
plt.tight_layout()
plt.show()


## Manifold Plot

In [36]:

uq_id_list = np.unique(almost_id_list[:,0])
uq_id_max = np.max(uq_id_list)



# _pca_data_plot = np.array(embeddings['PCA'])
_pca_data_plot = np.array(embeddings['t-SNE'])
# _pca_data_plot = np.array(embeddings['Isomap'])
# _pca_data_plot = np.array(embeddings['LLE'])
# _pca_data_plot[:,0] = _pca_data[:,1]
# _pca_data_plot[:,1] = _pca_data[:,2]

cmap = plt.colormaps.get_cmap("rainbow_r")

for i in range(len(uq_id_list)):
# for i in range(0,2):

    fig, axis = plt.subplots(1,1, figsize = (16,9))
    axis.scatter(_pca_data_plot[:,0],_pca_data_plot[:,1], color = 'lightgray', s=0.5)
    # plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.1)


    _ele_id = uq_id_list[i]

    ele_mask = almost_id_list[:,0] == _ele_id
    _ch_list = np.unique(almost_id_list[ele_mask,1])


    for j in _ch_list:
        _ch_mask = almost_id_list[:,:2] == [_ele_id,j]
        _ch_mask = _ch_mask[:,0] & _ch_mask[:,1]
        _ch_data = _pca_data_plot[_ch_mask,:2]

        # _c = cmap(_ele_id / uq_id_max)
        # axis.plot(_ch_data[:,0],_ch_data[:,1], color = _c, alpha = 0.5)

        _cluster_list = np.unique(almost_id_list[_ch_mask,2])

        _seq_all_len = almost_id_list[_ch_mask,2].shape[0]
        _seg_poi = 0

        for k in _cluster_list:
            _cluster_mask = almost_id_list[:,:] == [_ele_id,j,k]
            _cluster_mask = _cluster_mask[:,0] & _cluster_mask[:,1] & _cluster_mask[:,2]
            _cluster_data = _pca_data_plot[_cluster_mask,:2]

            _seg_data = _cluster_data.reshape(-1,1,2)
            _seg_data = np.concatenate([_seg_data[:-1], _seg_data[1:]], axis=1)

            _seg_len = _cluster_data.shape[0]
            
            color_range = np.linspace(_seg_poi/_seq_all_len, (_seg_poi+_seg_len)/_seq_all_len, _seg_len - 1)
            colors = cmap(color_range)

            _seg_poi = _seg_poi+_seg_len
            lc = LineCollection(_seg_data, colors=colors, linewidth=2,alpha=0.5)
            axis.add_collection(lc)

    axis.set_title(f"{ele_list[int(_ele_id)][1]}_Manifold")
    fig.show()
    # break

