# Note

* Import
* Filsys
* Manually Cluster

# Import

In [1]:
# from ..HETSFileHelper import gatherCSV, readChannel, EIS_recal_ver02
import os
import re
import gc
import sys
from loguru import logger

import matplotlib.pyplot as plt 
from matplotlib.collections import LineCollection
from matplotlib.widgets import LassoSelector
from matplotlib.path import Path
from matplotlib.colors import ListedColormap

from datetime import datetime

from sklearn.decomposition import PCA

from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import StandardScaler
import hdbscan

import numpy as np
import torch


sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
from Outlier import OutlierDetection
from EISGPR import Interpolation


%matplotlib qt

# Filesys

In [2]:
def SearchELE(rootPath, ele_pattern = re.compile(r"(.+?)_归档")):
    '''==================================================
        Search all electrode directories in the rootPath
        Parameter: 
            rootPath: current search path
            ele_pattern: electrode dir name patten
        Returen:
            ele_list: list of electrode directories
        ==================================================
    '''
    ele_list = []
    for i in os.listdir(rootPath):
        _path = os.path.join(rootPath, i)
        if os.path.isdir(_path):
            match_ele = ele_pattern.match(i)
            if match_ele:
                ele_list.append([_path, match_ele.group(1)])
            else:
                ele_list.extend(SearchELE(_path, ele_pattern))

    return ele_list

In [None]:
rootPath = "D:/Baihm/EISNN/Archive/"
ele_list = SearchELE(rootPath)
n_ele = len(ele_list)
logger.info(f"Search in {rootPath} and find {n_ele:03d} electrodes")

[32m2025-05-09 13:55:20.148[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mSearch in D:/Baihm/EISNN/Archive_New/ and find 187 electrodes[0m


# Manually Cluster

## Normal Electrodes [128/128] Cluster

### Input data

In [None]:
# 首先我们把128/128看似完全没问题的这部分电极拿出来做聚类看看
# 数据量也比较小，跑起来应该会更快

MODEL_SUFFIX = "Matern12_Ver01"

all_data_list = []
all_id_list = []
_ch_pattern = re.compile(r"ch_(\d{3})")

for i in range(n_ele):
# for i in range(3):
    fd_pt = os.path.join(ele_list[i][0], MODEL_SUFFIX, f"{ele_list[i][1]}_{MODEL_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        # logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt, weights_only=False)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]

    n_day       = _meta_group["n_day"]
    n_ch        = _meta_group["n_ch"]
    n_valid_ch  = len(_data_group["Channels"])

    # ignore abnormal ele
    if n_ch != 128 or n_valid_ch != n_ch:
        continue

    logger.info(f"ELE [{i}/{n_ele}]: {ele_list[i][0]}")


    # Iteration by channel
    for j in _data_group['Channels']:
        _ch_data = _data_group[j]["y_eval"]
        # _ch_data_log = np.log(_ch_data[:,:,0] + 1j*_ch_data[:,:,1])
        # _ch_data[:,:,0] = np.real(_ch_data_log)
        # _ch_data[:,:,1] = np.imag(_ch_data_log)
        _ch_data = np.hstack((_ch_data[:,:,0],_ch_data[:,:,1]))
        all_data_list.append(_ch_data)

        _ch_id = _ch_pattern.match(j)
        _ch_id = int(_ch_id.group(1))

        _id = [i, _ch_id] * np.shape(_ch_data)[0]
        _id = np.array(_id).reshape(-1,2)
        all_id_list.append(_id)

all_data_list = np.vstack(all_data_list)
all_id_list = np.vstack(all_id_list)


del data_pt, _meta_group, _data_group, _ch_data
gc.collect()



[32m2025-04-25 11:37:30.233[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [0/218]: D:/Baihm/EISNN/Archive/01037160_归档[0m
[32m2025-04-25 11:37:30.272[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [1/218]: D:/Baihm/EISNN/Archive/01037161_归档[0m
[32m2025-04-25 11:37:30.321[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [2/218]: D:/Baihm/EISNN/Archive/01037162_归档[0m
[32m2025-04-25 11:37:30.360[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [5/218]: D:/Baihm/EISNN/Archive/01067095_归档[0m
[32m2025-04-25 11:37:30.400[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [9/218]: D:/Baihm/EISNN/Archive/02027373_归档[0m
[32m2025-04-25 11:37:30.416[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [10/218]: D:/Baihm/EISNN/Archive/02027390_归档[0m
[32m2025-04-25 11:37:30.448[0m | [1m

516

In [72]:
# plt.figure()
# for i in range(10):
#     plt.plot(all_data_list[i,:101])

### PCA

In [84]:
_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(all_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

cmap = plt.colormaps.get_cmap("rainbow_r")
_id_max = all_id_list[:,0].max()




plt.figure()
plt.scatter(_pca_data[:,0],_pca_data[:,1], color = cmap(all_id_list[:,0]/_id_max),s=0.1)
# plt.scatter(_pca_data[:,0],_pca_data[:,1], s=0.1)
plt.title('PCA')


Text(0.5, 1.0, 'PCA')

In [9]:
a = _pca_m.explained_variance_
plt.figure()
plt.plot(a/np.sum(a))

[<matplotlib.lines.Line2D at 0x2990d1f5790>]

### Kmeans

In [233]:

kmeans = MiniBatchKMeans(n_clusters=15)
kmeans.fit(_pca_data[:,:2])
kmeans_labels = kmeans.labels_

fig = plt.figure()
cmap = plt.colormaps.get_cmap('tab20')
# plt.scatter(ipca_reduced_list[:,0],ipca_reduced_list[:,1])
plt.scatter(_pca_data[:,0],_pca_data[:,1], color = cmap(kmeans_labels[:]), s = 0.05)


np.shape(kmeans_labels)




(204098,)

### DBSCAN

In [243]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

# 降维后数据
X_reduced = np.vstack(_pca_data[:,:2])  # shape: [N, dim]

# 标准化
X_scaled = StandardScaler().fit_transform(X_reduced)

# DBSCAN 聚类
dbscan = DBSCAN(eps=0.1, min_samples=1000)
labels = dbscan.fit_predict(X_scaled)


In [245]:
fig     = plt.figure()
cmap    = plt.colormaps.get_cmap('rainbow_r')

n_cluster = len(np.unique(labels))
plt.scatter(_pca_data[:,0],_pca_data[:,1], color = cmap(labels[:]/n_cluster), s=0.05)
# for i in range(ipca_reduced_list.shape[0]):
#     plt.scatter(ipca_reduced_list[i,0],ipca_reduced_list[i,1], color = cmap(labels[i]))



np.shape(labels)


(204098,)

### HDBSCAN

In [262]:
_hdb = hdbscan.HDBSCAN(
    min_cluster_size=1000,
    min_samples=500,
    metric='euclidean',
    cluster_selection_method='eom',
    )
_hdb_labels = _hdb.fit_predict(_pca_data[:,:2])

fig     = plt.figure()
cmap    = plt.colormaps.get_cmap('tab10')
plt.scatter(_pca_data[:,0],_pca_data[:,1], color = cmap(_hdb_labels[:]), s=0.05)
np.shape(labels)




(204098,)

### OPTICS

In [None]:
from sklearn.cluster import OPTICS

optics = OPTICS(min_samples=20, xi=0.05, min_cluster_size=0.1)
optics.fit(_pca_data[:,:2])
optics_labels = optics.labels_


fig     = plt.figure()
cmap    = plt.colormaps.get_cmap('tab10')
plt.scatter(_pca_data[:,0],_pca_data[:,1], color = cmap(optics_labels[:]), s=0.05)
np.shape(labels)

### LassoSelector

In [46]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import LassoSelector
from matplotlib.path import Path
from matplotlib.colors import ListedColormap

# === 数据准备 ===
lasso_labels = np.full(_pca_data.shape[0], -1)
current_label = 0
label_history = []

lasso_data = _pca_data[:,:2]

# === 可调色板（支持最多10类）===
color_list = ['lightgray', 'red', 'blue', 'green', 'orange', 'purple', 'cyan', 'magenta', 'brown', 'yellow']
cmap = ListedColormap(color_list)
# cmap = plt.colormaps.get_cmap('tab20c_r')


In [64]:

# === 参数状态 ===
mode = "new"  # 默认模式：new/add/erase
add_target = 0  # 添加模式时指定的目标簇编号

# === 画图 ===
fig, ax = plt.subplots()
pts = ax.scatter(lasso_data[:, 0], lasso_data[:, 1], c='lightgray', s=0.5)
plt.title("Lasso Cluster")

def update_colors():
    color_indices = np.where(lasso_labels == -1, 0, lasso_labels%8 + 1)
    pts.set_facecolor(cmap(color_indices))
    fig.canvas.draw_idle()

def on_select(verts):
    global current_label, lasso_labels, label_history
    path = Path(verts)
    ind = np.nonzero(path.contains_points(lasso_data))[0]
    
    # 保存当前状态以供撤销
    label_history.append(lasso_labels.copy())

    # 根据模式操作标签
    if mode == "new":
        lasso_labels[ind] = current_label
        # print(f"→ 新建簇 {current_label}, 包含 {len(ind)} 个点")
        current_label += 1
    elif mode == "add":
        lasso_labels[ind] = add_target
        # print(f"→ 添加到簇 {add_target}, 包含 {len(ind)} 个点")
    elif mode == "erase":
        lasso_labels[ind] = -1
        # print(f"→ 反选，{len(ind)} 个点被置为未分簇")

    update_colors()
update_colors()

# === Lasso 绑定 ===
lasso = LassoSelector(ax, on_select)

# === 按键绑定 ===
def on_key(event):
    global mode, add_target, current_label, lasso_labels
    if event.key == 'n':
        mode = 'new'
    elif event.key == 'a':
        mode = 'add'
    elif event.key == 'e':
        mode = 'erase'
    elif event.key == 'z':
        if label_history:
            lasso_labels[:] = label_history.pop()
            update_colors()
        else:
            pass
    elif mode == 'add' and event.key.isdigit():
        add_target = int(event.key)


fig.canvas.mpl_connect('key_press_event', on_key)

plt.show()

print(np.unique(lasso_labels))


[-1  0  2  3  4  5  6  7  8  9 11 14 15 16 17 18 19 20 21]


#### Lasso Plot

In [67]:
_n = int(len(np.unique(lasso_labels)) / 5) + 1

fig, axis = plt.subplots(_n,5)
for i in range(0,len(np.unique(lasso_labels))):
    _id = np.unique(lasso_labels)[i]
    _data_mask = all_data_list[lasso_labels == _id,:]
    for j in range(_data_mask.shape[0]):
        if j > 50: break
        axis[int(i/5),int(i%5)].semilogy(np.exp(_data_mask[j,:101]), color = cmap(_id%8+1)) 
    
    axis[int(i/5),int(i%5)].sharex(axis[0,0])
    axis[int(i/5),int(i%5)].sharey(axis[0,0])

fig.show()


## Almost Electrode

### Input data

In [6]:
# 首先我们把128/128看似完全没问题的这部分电极拿出来做聚类看看
# 数据量也比较小，跑起来应该会更快

MODEL_SUFFIX = "Matern12_Ver01"

almost_start_list = []
almost_start_id_list = []
almost_data_list = []
almost_id_list = []
_ch_pattern = re.compile(r"ch_(\d{3})")

for i in range(n_ele):
# for i in range(3):
    fd_pt = os.path.join(ele_list[i][0], MODEL_SUFFIX, f"{ele_list[i][1]}_{MODEL_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        # logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt, weights_only=False)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]

    n_day       = _meta_group["n_day"]
    n_ch        = _meta_group["n_ch"]
    n_valid_ch  = len(_data_group["Channels"])

    # ignore abnormal ele
    if n_ch != 128 or n_valid_ch != n_ch:
        if n_day < 5 or n_valid_ch <= 100:
            continue

    logger.info(f"ELE [{i}/{n_ele}]: {ele_list[i][0]}")


    # Iteration by channel
    for j in _data_group['Channels']:
        _ch_data = _data_group[j]["y_eval"]
        # _ch_data_log = np.log(_ch_data[:,:,0] + 1j*_ch_data[:,:,1])
        # _ch_data[:,:,0] = np.real(_ch_data_log)
        # _ch_data[:,:,1] = np.imag(_ch_data_log)
        _ch_data = np.hstack((_ch_data[:,:,0],_ch_data[:,:,1]))
        almost_data_list.append(_ch_data)
        almost_start_list.append(_ch_data[0,:])


        _ch_id = _ch_pattern.match(j)
        _ch_id = int(_ch_id.group(1))

        _id = [i, _ch_id] * np.shape(_ch_data)[0]
        _id = np.array(_id).reshape(-1,2)
        almost_id_list.append(_id)
        almost_start_id_list.append(_id[0,:])

almost_data_list = np.vstack(almost_data_list)
almost_id_list = np.vstack(almost_id_list)
almost_start_list = np.vstack(almost_start_list)
almost_start_id_list = np.vstack(almost_start_id_list)


del data_pt, _meta_group, _data_group, _ch_data
gc.collect()



[32m2025-05-07 18:46:50.993[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [0/218]: D:/Baihm/EISNN/Archive/01037160_归档[0m
[32m2025-05-07 18:46:51.029[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [1/218]: D:/Baihm/EISNN/Archive/01037161_归档[0m
[32m2025-05-07 18:46:51.077[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [2/218]: D:/Baihm/EISNN/Archive/01037162_归档[0m
[32m2025-05-07 18:46:51.131[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [3/218]: D:/Baihm/EISNN/Archive/01067093_归档[0m
[32m2025-05-07 18:46:51.187[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [4/218]: D:/Baihm/EISNN/Archive/01067094_归档[0m
[32m2025-05-07 18:46:51.227[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mELE [5/218]: D:/Baihm/EISNN/Archive/01067095_归档[0m
[32m2025-05-07 18:46:51.261[0m | [1mI

165

### PCA

In [7]:
_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

_data_norm = _scale.transform(almost_start_list)
_pca_start = _pca_m.transform(_data_norm)

In [8]:

cmap = plt.colormaps.get_cmap("rainbow_r")
_id_max = almost_id_list[:,0].max()

plt.figure(figsize=(9,9))
# plt.scatter(_pca_data[:,0],_pca_data[:,1], color = cmap(almost_id_list[:,0]/_id_max),s=0.1)
plt.scatter(_pca_data[:,0],_pca_data[:,1],s=0.01)
# plt.gca().set_aspect('equal', adjustable='box')
plt.title('PCA')


Text(0.5, 1.0, 'PCA')

In [9]:



cmap = plt.colormaps.get_cmap("rainbow_r")
_id_max = almost_id_list[:,0].max()

plt.figure()
plt.scatter(_pca_data[:,0],_pca_data[:,1], color = 'lightgray', s=0.1)
plt.scatter(_pca_start[:,0],_pca_start[:,1], color = cmap(almost_start_id_list[:,0]/_id_max),s=0.1)
# plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.1)
plt.title('PCA')


Text(0.5, 1.0, 'PCA')

#### PC eigenValue

In [10]:
# 获取每个主成分的解释方差比（即贡献率）
explained_var = _pca_m.explained_variance_ratio_
components = np.arange(1, len(explained_var) + 1)

# 绘图
plt.figure(figsize=(6, 6))
bars = plt.bar(components, explained_var, color='skyblue')

# 在每个柱子上标注数值（百分比形式）
for bar, var in zip(bars, explained_var):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.005, f'{var*100:.2f}%', 
             ha='center', va='bottom', fontsize=10)

plt.xticks(components)
plt.xlabel("Principal Component")
plt.ylabel("Explained Variance Ratio")
plt.title("PCA Explained Variance per Component")
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

### LassoSelector

In [36]:


# === 数据准备 ===
lasso_labels = np.full(_pca_data.shape[0], -1)
current_label = 0
label_history = []




In [53]:
lasso_data = _pca_data[:,:2]

# === 可调色板（支持最多10类）===
color_list = ['lightgray', 'red', 'blue', 'green', 'orange', 'purple', 'cyan', 'magenta', 'brown', 'yellow']
cmap = ListedColormap(color_list)
# cmap = plt.colormaps.get_cmap('tab20c_r')

# === 参数状态 ===
mode = "new"  # 默认模式：new/add/erase
add_target = 0  # 添加模式时指定的目标簇编号

# === 画图 ===
fig, ax = plt.subplots()
pts = ax.scatter(lasso_data[:, 0], lasso_data[:, 1], c='lightgray', s=0.01)
plt.title("Lasso Cluster")

def update_colors():
    color_indices = np.where(lasso_labels == -1, 0, lasso_labels%8 + 1)
    pts.set_facecolor(cmap(color_indices))
    fig.canvas.draw_idle()

def on_select(verts):
    global current_label, lasso_labels, label_history
    path = Path(verts)
    ind = np.nonzero(path.contains_points(lasso_data))[0]
    
    # 保存当前状态以供撤销
    label_history.append(lasso_labels.copy())

    # 根据模式操作标签
    if mode == "new":
        lasso_labels[ind] = current_label
        # print(f"→ 新建簇 {current_label}, 包含 {len(ind)} 个点")
        current_label += 1
    elif mode == "add":
        lasso_labels[ind] = add_target
        # print(f"→ 添加到簇 {add_target}, 包含 {len(ind)} 个点")
    elif mode == "erase":
        lasso_labels[ind] = -1
        # print(f"→ 反选，{len(ind)} 个点被置为未分簇")

    update_colors()
update_colors()

# === Lasso 绑定 ===
lasso = LassoSelector(ax, on_select)

# === 按键绑定 ===
def on_key(event):
    global mode, add_target, current_label, lasso_labels
    if event.key == 'n':
        mode = 'new'
    elif event.key == 'a':
        mode = 'add'
    elif event.key == 'e':
        mode = 'erase'
    elif event.key == 'z':
        if label_history:
            lasso_labels[:] = label_history.pop()
            update_colors()
        else:
            pass
    elif mode == 'add' and event.key.isdigit():
        add_target = int(event.key)


fig.canvas.mpl_connect('key_press_event', on_key)

plt.show()

print(np.unique(lasso_labels))


[-1  0]


### Cluster Plot

In [None]:
_n = int((len(np.unique(lasso_labels))-1) / 5) + 1

if _n == 1:
    fig,axis = plt.subplots(1,len(np.unique(lasso_labels)))
    # fig,axis = plt.subplots(len(np.unique(lasso_labels)))
    for i in range(0,len(np.unique(lasso_labels))):
        _id = np.unique(lasso_labels)[i]
        _data_mask = almost_data_list[lasso_labels == _id,:]

        _rand_ch = np.floor(np.random.rand(500)*_data_mask.shape[0]).astype(int)
        for j in range(_data_mask.shape[0]):
            if j >= 500: break
            axis[i].semilogy(np.exp(_data_mask[_rand_ch[j],:101]), color = cmap(_id%8+1)) 
            # axis[i].plot(np.exp(_data_mask[_rand_ch[j],101:]), color = cmap(_id%8+1)) 

        axis[i].xaxis.set_visible(False)
        axis[i].yaxis.set_visible(False)
        axis[i].sharex(axis[0])
        axis[i].sharey(axis[0])
else:
    fig, axis = plt.subplots(_n,5)
    for i in range(0,len(np.unique(lasso_labels))):
        _id = np.unique(lasso_labels)[i]
        _data_mask = almost_data_list[lasso_labels == _id,:]

        _rand_ch = np.floor(np.random.rand(100)*_data_mask.shape[0]).astype(int)
        for j in range(_data_mask.shape[0]):
            if j >= 100: break
            axis[int(i/5),int(i%5)].semilogy(np.exp(_data_mask[_rand_ch[j],:101]), color = cmap(_id%8+1)) 

        axis[int(i/5),int(i%5)].xaxis.set_visible(False)
        axis[int(i/5),int(i%5)].yaxis.set_visible(False)
        axis[int(i/5),int(i%5)].sharex(axis[0,0])
        axis[int(i/5),int(i%5)].sharey(axis[0,0])

fig.show()


### Save Feature Data

In [None]:
_id_mask = almost_id_list[lasso_labels == 0,:]

# _data_mask = almost_data_list[lasso_labels == _id,:]
_rand_ch = np.floor(np.random.rand(1000)*_id_mask.shape[0]).astype(int)
        
open_data = almost_data_list[lasso_labels == 0,:]
open_data = open_data[_rand_ch,:]
open_data.shape
# np.save("D:\Baihm\EISNN\Dataset\Anomaly\Open\EIS_Open.npy",open_data)

### Black List

In [None]:


fig, axis = plt.subplots(1,3, figsize = (12,4))
for i in range(0,len(np.unique(lasso_labels))):
    _id = np.unique(lasso_labels)[i]
    _data_mask = almost_data_list[lasso_labels == _id,:]

    _rand_ch = np.floor(np.random.rand(100)*_data_mask.shape[0]).astype(int)
    for j in range(_data_mask.shape[0]):
        if j >= 100: break
        axis[int(i%3)].semilogy(np.exp(_data_mask[_rand_ch[j],:101]), color = cmap(_id%8+1)) 

    axis[int(i%3)].xaxis.set_visible(False)
    axis[int(i%3)].yaxis.set_visible(False)
    axis[int(i%3)].sharex(axis[0])
    axis[int(i%3)].sharey(axis[0])

fig.show()


# Manifold

In [1]:
Blacklist = [
    '01067093',     # Not look like EIS
    '01067094',     # Connection Error
    '02017385',     # Connection Error
    '05127177',     # Open to Short
    '06047729',     # Open to Short
    '06047730',     # Open to Short
    '06047731',     # Open to Short
    '09207024',     # Connection Error
    '10017038',     # Connection Error
    '10037050',     # Connection Error
    '10047056',     # Connection Error
    '10057069',     # Connection Error
    '10057083',     # Always Open
    '10057084',     # Chaos
    '10057087',     # Connection Error
    '22017367',     # Connection Error
    '22017371',     # Chaos
]

GrayList = [
    '10037051',     # Connection Error
    '10037052',     # Connection Error
    '10057071',     # Connection Error
    '10067077',     # Wired Shape like connection error
    '10150201',     # Wired Shape
    '10150202',     # Wired Shape
    '10150203',     # Wired Shape
    '20037515',     # Wired Shape
    '20037516',     # Wired Shape
    '20037517',     # Wired Shape
    '22037378',     # Connection Error
    '22037380',     # Connection Error
    '22047376',     # Connection Error

]

## Input Data

In [24]:
Whitelist = [
    '06017758',
    '06017760',
    '01037162',
    '10080601',
    '22017368',
    '01067095',
    '02027373',
    '05087164',
]


Blacklist = [
    '01067093',     # Not look like EIS
    '01067094',     # Connection Error
    '02017385',     # Connection Error
    '05127177',     # Open to Short
    '06047729',     # Open to Short
    '06047730',     # Open to Short
    '06047731',     # Open to Short
    '09207024',     # Connection Error
    '10017038',     # Connection Error
    '10037050',     # Connection Error
    '10047056',     # Connection Error
    '10057069',     # Connection Error
    '10057083',     # Always Open
    '10057084',     # Chaos
    '10057087',     # Connection Error
    '22017367',     # Connection Error
    '22017371',     # Chaos
]

GrayList = [
    '10037051',     # Connection Error
    '10037052',     # Connection Error
    '10057071',     # Connection Error
    '10067077',     # Wired Shape like connection error
    '10150201',     # Wired Shape
    '10150202',     # Wired Shape
    '10150203',     # Wired Shape
    '20037515',     # Wired Shape
    '20037516',     # Wired Shape
    '20037517',     # Wired Shape
    '22037378',     # Connection Error
    '22037380',     # Connection Error
    '22047376',     # Connection Error

]


In [25]:
# 首先我们把128/128看似完全没问题的这部分电极拿出来做聚类看看
# 数据量也比较小，跑起来应该会更快

MODEL_SUFFIX = "Matern12_Ver01"

almost_start_list = []
almost_start_id_list = []
almost_data_list = []
almost_id_list = []
_ch_pattern = re.compile(r"ch_(\d{3})")

for i in range(n_ele):
# for i in range(3):
    if ele_list[i][1] in Blacklist:
        continue
    fd_pt = os.path.join(ele_list[i][0], MODEL_SUFFIX, f"{ele_list[i][1]}_{MODEL_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        # logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt, weights_only=False)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]

    n_day       = _meta_group["n_day"]
    n_ch        = _meta_group["n_ch"]
    n_valid_ch  = len(_data_group["Channels"])

    # ignore abnormal ele
    if n_ch != 128 or n_valid_ch != n_ch:
        if n_day < 5 or n_valid_ch <= 100:
            continue

    logger.info(f"ELE [{i}/{n_ele}]: {ele_list[i][0]}")


    # Iteration by channel
    for j in _data_group['Channels']:
        _ch_data = _data_group[j]["y_eval"]
        # _ch_data_log = np.log(_ch_data[:,:,0] + 1j*_ch_data[:,:,1])
        # _ch_data[:,:,0] = np.real(_ch_data_log)
        # _ch_data[:,:,1] = np.imag(_ch_data_log)
        _ch_data = np.hstack((_ch_data[:,:,0],_ch_data[:,:,1]))
        almost_data_list.append(_ch_data)
        almost_start_list.append(_ch_data[0,:])


        _ch_id = _ch_pattern.match(j)
        _ch_id = int(_ch_id.group(1))

        _id = [i, _ch_id] * np.shape(_ch_data)[0]
        _id = np.array(_id).reshape(-1,2)

        _cluster_id = _data_group[j]['eis_cluster_eval']
        _id = np.hstack((_id, _cluster_id.reshape(-1,1)))
        almost_id_list.append(_id)
        almost_start_id_list.append(_id[0,:])

almost_data_list = np.vstack(almost_data_list)
almost_id_list = np.vstack(almost_id_list)
almost_start_list = np.vstack(almost_start_list)
almost_start_id_list = np.vstack(almost_start_id_list)


del data_pt, _meta_group, _data_group, _ch_data
gc.collect()



[32m2025-04-25 11:38:29.037[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [0/218]: D:/Baihm/EISNN/Archive/01037160_归档[0m
[32m2025-04-25 11:38:29.076[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [1/218]: D:/Baihm/EISNN/Archive/01037161_归档[0m
[32m2025-04-25 11:38:29.124[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [2/218]: D:/Baihm/EISNN/Archive/01037162_归档[0m
[32m2025-04-25 11:38:29.165[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [5/218]: D:/Baihm/EISNN/Archive/01067095_归档[0m
[32m2025-04-25 11:38:29.205[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [9/218]: D:/Baihm/EISNN/Archive/02027373_归档[0m
[32m2025-04-25 11:38:29.221[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mELE [10/218]: D:/Baihm/EISNN/Archive/02027390_归档[0m
[32m2025-04-25 11:38:29.254[0m | [1m

256

## PCA

In [26]:
_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

_data_norm = _scale.transform(almost_start_list)
_pca_start = _pca_m.transform(_data_norm)

In [33]:


fig, axis = plt.subplots(1,1, figsize = (16,9))
# axis.scatter(_pca_data[:,0],_pca_data[:,1], color = 'lightgray', s=0.05)
plt.scatter(_pca_data[:,0],_pca_data[:,1],s=0.01)
# plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.5)
plt.title('PCA')


Text(0.5, 1.0, 'PCA')

## Plot & Save

In [13]:

SAVE_FLAG = False
manifold_fig_save_path = "D:/Baihm/EISNN/Archive/Matern12_Ver01/Manifold"





uq_id_list = np.unique(almost_id_list[:,0])
uq_id_max = np.max(uq_id_list)



In [16]:
uq_id_list

array([  0.,   1.,   2.,   5.,   9.,  10.,  11.,  12.,  13.,  14.,  15.,
        16.,  18.,  19.,  21.,  22.,  23.,  24.,  25.,  26.,  27.,  28.,
        29.,  30.,  32.,  33.,  34.,  35.,  36.,  40.,  41.,  42.,  50.,
        55.,  60.,  62.,  63.,  64.,  65.,  66.,  69.,  72.,  73.,  74.,
        75.,  76.,  77.,  78.,  79.,  80.,  81.,  82.,  94.,  95.,  96.,
        97.,  98.,  99., 100., 101., 102., 104., 107., 108., 110., 111.,
       114., 118., 119., 120., 121., 122., 124., 126., 127., 128., 132.,
       133., 134., 135., 136., 138., 139., 140., 146., 149., 150., 151.,
       152., 153., 154., 155., 156., 157., 158., 159., 160., 162., 163.,
       164., 179., 180., 181., 182., 183., 184., 185., 186., 187., 188.,
       189., 191., 192., 193., 195., 197., 198., 199., 200., 201., 202.,
       203., 204., 210., 217.])

In [None]:

cmap = plt.colormaps.get_cmap("rainbow_r")

for i in range(len(uq_id_list)):
# for i in range(0,2):

    fig, axis = plt.subplots(1,1, figsize = (16,9))
    axis.scatter(_pca_data[:,0],_pca_data[:,1], color = 'lightgray', s=0.05)
    # plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.1)


    _ele_id = uq_id_list[i]

    ele_mask = almost_id_list[:,0] == _ele_id
    _ch_list = np.unique(almost_id_list[ele_mask,1])


    for j in _ch_list:
        _ch_mask = almost_id_list[:,:2] == [_ele_id,j]
        _ch_mask = _ch_mask[:,0] & _ch_mask[:,1]
        _ch_data = _pca_data[_ch_mask,:2]

        # _c = cmap(_ele_id / uq_id_max)
        # axis.plot(_ch_data[:,0],_ch_data[:,1], color = _c, alpha = 0.5)

        _cluster_list = np.unique(almost_id_list[_ch_mask,2])

        _seq_all_len = almost_id_list[_ch_mask,2].shape[0]
        _seg_poi = 0

        for k in _cluster_list:
            _cluster_mask = almost_id_list[:,:] == [_ele_id,j,k]
            _cluster_mask = _cluster_mask[:,0] & _cluster_mask[:,1] & _cluster_mask[:,2]
            _cluster_data = _pca_data[_cluster_mask,:2]

            _seg_data = _cluster_data.reshape(-1,1,2)
            _seg_data = np.concatenate([_seg_data[:-1], _seg_data[1:]], axis=1)

            _seg_len = _cluster_data.shape[0]
            
            color_range = np.linspace(_seg_poi/_seq_all_len, (_seg_poi+_seg_len)/_seq_all_len, _seg_len - 1)
            colors = cmap(color_range)

            _seg_poi = _seg_poi+_seg_len
            lc = LineCollection(_seg_data, colors=colors, linewidth=2)
            axis.add_collection(lc)

    axis.set_title(f"{ele_list[int(_ele_id)][1]}_Manifold")
    if SAVE_FLAG:
        _fig_name = f"{ele_list[int(_ele_id)][1]}_Manifold.png"
        _fig_save_path = os.path.join(manifold_fig_save_path, _fig_name)

        fig.savefig(_fig_save_path)
        plt.close(fig) 

        logger.info(f"{i}/{len(uq_id_list)} Saved")
    else:
        fig.show()



## Plot All Manifold

In [15]:




fig, axis = plt.subplots(1,1, figsize = (16,9))
axis.scatter(_pca_data[:,0],_pca_data[:,1], color = 'lightgray', s=0.05)
# plt.scatter(_pca_start[:,0],_pca_start[:,1],s=0.1)



uq_id_list = np.unique(almost_id_list[:,0])
uq_id_max = np.max(uq_id_list)


cmap = plt.colormaps.get_cmap("rainbow_r")

# for i in range(len(uq_id_list)):
for i in range(0,6):
    _ele_id = uq_id_list[i]

    ele_mask = almost_id_list[:,0] == _ele_id
    _ch_list = np.unique(almost_id_list[ele_mask,1])


    for j in _ch_list:
        _ch_mask = almost_id_list[:,:2] == [_ele_id,j]
        _ch_mask = _ch_mask[:,0] & _ch_mask[:,1]
        _ch_data = _pca_data[_ch_mask,:2]

        # _c = cmap(_ele_id / uq_id_max)
        # axis.plot(_ch_data[:,0],_ch_data[:,1], color = _c, alpha = 0.5)

        _cluster_list = np.unique(almost_id_list[_ch_mask,2])

        _seq_all_len = almost_id_list[_ch_mask,2].shape[0]
        _seg_poi = 0

        for k in _cluster_list:
            _cluster_mask = almost_id_list[:,:] == [_ele_id,j,k]
            _cluster_mask = _cluster_mask[:,0] & _cluster_mask[:,1] & _cluster_mask[:,2]
            _cluster_data = _pca_data[_cluster_mask,:2]

            _seg_data = _cluster_data.reshape(-1,1,2)
            _seg_data = np.concatenate([_seg_data[:-1], _seg_data[1:]], axis=1)

            _seg_len = _cluster_data.shape[0]
            
            color_range = np.linspace(_seg_poi/_seq_all_len, (_seg_poi+_seg_len)/_seq_all_len, _seg_len - 1)
            colors = cmap(color_range)

            _seg_poi = _seg_poi+_seg_len
            lc = LineCollection(_seg_data, colors=colors, linewidth=1, alpha = 0.1)
            axis.add_collection(lc)

fig.show()



In [311]:
fig.show()