# Import

In [1]:
import os
import re
import gc
import sys
from loguru import logger

import matplotlib.pyplot as plt 

from datetime import datetime

import numpy as np
import torch


sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
from HETSFileHelper import gatherCSV, readChannel, EIS_recal_ver02
from Outlier import OutlierDetection
from EISGPR import Interpolation


# %matplotlib qt

# Filesys

In [2]:
def SearchELE(rootPath, ele_pattern = re.compile(r"(.+?)_归档")):
    '''==================================================
        Search all electrode directories in the rootPath
        Parameter: 
            rootPath: current search path
            ele_pattern: electrode dir name patten
        Returen:
            ele_list: list of electrode directories
        ==================================================
    '''
    ele_list = []
    for i in os.listdir(rootPath):
        _path = os.path.join(rootPath, i)
        if os.path.isdir(_path):
            match_ele = ele_pattern.match(i)
            if match_ele:
                ele_list.append([_path, match_ele.group(1)])
            else:
                ele_list.extend(SearchELE(_path, ele_pattern))

    return ele_list

In [3]:
rootPath = "D:/Baihm/EISNN/Archive_New/"
ele_list = SearchELE(rootPath)

# rootPath = "D:/Baihm/EISNN/Invivo/"
# ele_list = SearchELE(rootPath, re.compile(r"(.+?)_Ver01"))

n_ele = len(ele_list)
logger.info(f"Search in {rootPath} and find {n_ele:03d} electrodes")

[32m2025-05-09 11:15:38.115[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mSearch in D:/Baihm/EISNN/Archive_New/ and find 187 electrodes[0m


# Error Processed Statistic

In [6]:
# 我们观察到，由于我们在最后聚类的时候使用了AP + silhouette_score
# 而silhouette_score 对最低样本数有要求
# 这使得我们会遇到大量报错，之前用try exception跳过了，但是这个可能会导致我们把正常电极误判
# 这里我们打印每个pt文件中，有效电极数和追踪天数
# 如果有效电极数 < 128 - 10 且追踪天数比较多，就认为有问题

DATASET_SUFFIX = "Outlier_Ver02"

n_miss_ele      = 0
n_avaliable_ele = 0


n_all_days      = []



n_few_error     = []
n_open_error    = []
n_nan_error     = []
n_good          = []



for i in range(n_ele):
# for i in range(3):
    # logger.info(f"ELE Begin: {ele_list[i][0]}")
    fd_pt = os.path.join(ele_list[i][0], DATASET_SUFFIX, f"{ele_list[i][1]}_{DATASET_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        n_miss_ele = n_miss_ele + 1
        logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]


    n_day = _meta_group["n_day"]    
    n_ch = _meta_group["n_ch"]         



    
    ch_few_error = _meta_group["ch_few_error"]  
    ch_open_error = _meta_group["ch_open_error"] 
    ch_nan_error = _meta_group["ch_nan_error"]  
    ch_good = _meta_group["ch_good"]       

    n_avaliable_ele = n_avaliable_ele + len(ch_good)
    n_all_days.append(n_day)
    n_few_error.append(len(ch_few_error))
    n_open_error.append(len(ch_open_error))
    n_nan_error.append(len(ch_nan_error))
    n_good.append(len(ch_good))

    

    logger.info(f"{ele_list[i][1]}[{i:03d}] - [{n_day}]: Error:{len(ch_few_error)} Open:{len(ch_open_error)} Nan:{len(ch_nan_error)} Good:{len(ch_good)}/{n_ch} ")


  data_pt = torch.load(fd_pt)
[32m2025-05-09 11:34:41.822[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m51[0m - [1m02027452[000] - [27]: Error:0 Open:54 Nan:0 Good:74/128 [0m
[32m2025-05-09 11:34:41.854[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m51[0m - [1m02027453[001] - [27]: Error:0 Open:59 Nan:0 Good:69/128 [0m
[32m2025-05-09 11:34:41.946[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m51[0m - [1m11037287[006] - [9]: Error:13 Open:7 Nan:0 Good:108/128 [0m
[32m2025-05-09 11:34:42.055[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m51[0m - [1m16057219[010] - [15]: Error:0 Open:7 Nan:0 Good:121/128 [0m
[32m2025-05-09 11:34:42.205[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m51[0m - [1m16057220[011] - [22]: Error:0 Open:9 Nan:0 Good:119/128 [0m
[32m2025-05-09 11:34:42.313[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m51[0m - [1m16057221[012] - [

In [12]:
n_all_days = np.array(n_all_days)
n_few_error = np.array(n_few_error)
n_open_error = np.array(n_open_error)
n_nan_error = np.array(n_nan_error)
n_good = np.array(n_good)

In [20]:
cnt_few_error  =  n_few_error  
cnt_open_error =  n_open_error
cnt_nan_error  =  n_nan_error 
cnt_good       =  n_good      
logger.info(f"\n cnt_few_error:{np.sum(cnt_few_error)}\
            \ncnt_open_error:{np.sum(cnt_open_error)}\
            \ncnt_nan_error:{np.sum(cnt_nan_error)}\
            \ncnt_good:{np.sum(cnt_good)}\
            \nsum:{np.sum(cnt_few_error)+np.sum(cnt_open_error)+np.sum(cnt_nan_error)+np.sum(cnt_good)}")

[32m2025-05-09 11:47:37.456[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1m
 cnt_few_error:2348            
cnt_open_error:4181            
cnt_nan_error:0            
cnt_good:9708            
sum:16237[0m


In [21]:
cnt_few_error  =  n_all_days * n_few_error  
cnt_open_error =  n_all_days * n_open_error
cnt_nan_error  =  n_all_days * n_nan_error 
cnt_good       =  n_all_days * n_good      
logger.info(f"\n cnt_few_error:{np.sum(cnt_few_error)}\
            \ncnt_open_error:{np.sum(cnt_open_error)}\
            \ncnt_nan_error:{np.sum(cnt_nan_error)}\
            \ncnt_good:{np.sum(cnt_good)}\
            \nsum:{np.sum(cnt_few_error)+np.sum(cnt_open_error)+np.sum(cnt_nan_error)+np.sum(cnt_good)}")

[32m2025-05-09 11:47:46.684[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1m
 cnt_few_error:13910            
cnt_open_error:54879            
cnt_nan_error:0            
cnt_good:101695            
sum:170484[0m


In [23]:
print(n_ele,n_miss_ele)

187 58
