# Import

In [None]:
import numpy as np
import torch

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import OneClassSVM
from scipy.special import expit  
import joblib

import re
import os
import gc
from loguru import logger

import matplotlib.pyplot as plt 
%matplotlib qt

from collections import defaultdict
# from datetime import datetime




In [None]:
def gatherCSV(rootPath, outsuffix = 'Tracking'):
    '''==================================================
        Collect all EIS.csv files in the rootPath
        Parameter: 
            rootPath: current search path
            outsuffix: Saving path of EIS.csv files
        Returen:
            EISDict: a 2D-dict of EIS data
            Storage Frame: EISDict[_sessionIndex][_channelIndex] = "_filepath"
        ==================================================
    '''
    _filename       = None
    _filepath       = None
    _trackpath      = None
    _csvpath        = None
    _sessionIndex   = None
    _channelIndex   = None
    _processed      = None

    EISDict = defaultdict(dict)

    ## Iterate session
    session_pattern = re.compile(r"(.+?)_(\d{8})_01")
    bank_pattern    = re.compile(r"([1-4])")
    file_pattern    = re.compile(r"EIS_ch(\d{3})\.csv")

    ## RootDir
    for i in os.listdir(rootPath):
        match_session = session_pattern.match(i)
        ## SessionDir
        if match_session:
            logger.info(f"Session Begin: {i}")
            _sessionIndex = match_session[2]
            for j in os.listdir(f"{rootPath}/{i}"):
                match_bank = bank_pattern.match(j)
                ## BankDir
                if match_bank:
                    logger.info(f"Bank Begin: {j}")
                    _trackpath = f"{rootPath}/{i}/{j}/{outsuffix}"
                    if not os.path.exists(_trackpath):
                        continue

                    for k in os.listdir(f"{rootPath}/{i}/{j}/{outsuffix}"):
                        match_file = file_pattern.match(k)
                        ## File
                        if match_file:
                            _filename = k
                            _filepath = f"{rootPath}/{i}/{j}/{outsuffix}/{k}"
                            _channelIndex = (int(match_bank[1])-1)*32+int(match_file[1])
                            
                            EISDict[_sessionIndex][_channelIndex] = f"{rootPath}/{i}/{j}/{outsuffix}/{k}"
                            
    return EISDict




# Data Readout
def readChannel(chID, fileDict):
    '''==================================================
        Read EIS.csv file by Channel
        Parameter: 
            chID: channel index
            fileDict: EISDict[_sessionIndex][_channelIndex] = "_filepath"
        Returen:
            freq: frequency
            Zreal: real part of impedance
            Zimag: imaginary part of impedance
        ==================================================
    '''
    chData = []
    for ssID in fileDict.keys():
        _data   = np.loadtxt(fileDict[ssID][chID], delimiter=',')
        _freq   = _data[:,0]
        _Zreal  = _data[:,3]
        _Zimag  = _data[:,4]
        chData.append(np.stack((_freq, _Zreal, _Zimag),axis=0))

    return np.stack(chData, axis=0)

In [None]:
def EIS_recal(data):
    f_poi = data[0,:]
    # Z_poi = data[1,:] * np.exp(1j*np.deg2rad(data[2,:]))
    Z_poi = data[1,:] + 1j*data[2,:]
    Y_poi = 1/Z_poi

    Rg0 = 1.611e13
    Cp0 = 1.4e-9
    
    _Rg0_rescale = 1/Rg0*np.power(f_poi,1.583)
    _Cp0_rescale = Cp0*np.power(f_poi,0.911)
    Y_org = Y_poi - _Rg0_rescale + 1j*_Cp0_rescale
    Z_org = 1/Y_org

    # Amp Calibration
    Z_ampC = np.abs(Z_org)

    # Phz Calibration
    Z_phzC = np.angle(Z_org)
    
    Z_rec = Z_ampC * np.exp(1j*Z_phzC)

    
    return np.transpose(np.array([f_poi, np.real(Z_rec), np.imag(Z_rec)])).T


def EIS_recal_ver02(data, _phz_0 = None):
    f_poi = data[0,:]
    # Z_poi = data[1,:] * np.exp(1j*np.deg2rad(data[2,:]))
    Z_poi = data[1,:] + 1j*data[2,:]
    Y_poi = 1/Z_poi

    Rg0 = 1.611e13
    Cp0 = 1.4e-9
    
    _Rg0_rescale = 1/Rg0*np.power(f_poi,1.583)
    _Cp0_rescale = Cp0*np.power(f_poi,0.911)
    Y_org = Y_poi - _Rg0_rescale + 1j*_Cp0_rescale
    Z_org = 1/Y_org

    # Phz Calibration
    if _phz_0 is None:
        _phz_0 = np.loadtxt("./phz_Calib.txt")
    
    Z_ampC = np.abs(Z_org)
    # Z_phzC = np.angle(Z_org) - _phz_0
    Z_phzC = np.angle(Z_org) - _phz_0

    Z_rec = Z_ampC * np.exp(1j*Z_phzC)

    # C = 5e-10
    Rs0 = 100
    Z_rec = Z_rec - Rs0



    Cp0 = 5e-10
    _Cp0_rescale = Cp0 * f_poi
    Z_rec = 1/(1/Z_rec - 1j * _Cp0_rescale)

    

    # Ls0 = 1.7e-4
    Ls0 = 5e-4
    _Ls0_rescale = Ls0 * f_poi
    Z_rec = Z_rec - 1j * _Ls0_rescale

    # C = 5e-10
    Rs0 = 566
    Z_rec = Z_rec - Rs0
    
    return np.stack([f_poi, np.real(Z_rec), np.imag(Z_rec)], axis=1).T
    

## Import Data

In [None]:
def SearchELE(rootPath, ele_pattern = re.compile(r"(.+?)_归档")):
    '''==================================================
        Search all electrode directories in the rootPath
        Parameter: 
            rootPath: current search path
            ele_pattern: electrode dir name patten
        Returen:
            ele_list: list of electrode directories
        ==================================================
    '''
    ele_list = []
    for i in os.listdir(rootPath):
        _path = os.path.join(rootPath, i)
        if os.path.isdir(_path):
            match_ele = ele_pattern.match(i)
            if match_ele:
                ele_list.append([_path, match_ele.group(1)])
            else:
                ele_list.extend(SearchELE(_path, ele_pattern))

    return ele_list

### Archive_Old

In [None]:
rootPath = "D:/Baihm/EISNN/Archive/"
ele_list = SearchELE(rootPath)
n_ele = len(ele_list)
logger.info(f"Search in {rootPath} and find {n_ele:03d} electrodes")

In [None]:
# 首先我们把128/128看似完全没问题的这部分电极拿出来做聚类看看
# 数据量也比较小，跑起来应该会更快

MODEL_SUFFIX = "Matern12_Ver01"

almost_start_list = []
almost_start_id_list = []
almost_data_list = []
almost_id_list = []
_ch_pattern = re.compile(r"ch_(\d{3})")

for i in range(n_ele):
# for i in range(3):
    fd_pt = os.path.join(ele_list[i][0], MODEL_SUFFIX, f"{ele_list[i][1]}_{MODEL_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        # logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt, weights_only=False)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]

    n_day       = _meta_group["n_day"]
    n_ch        = _meta_group["n_ch"]
    n_valid_ch  = len(_data_group["Channels"])

    # ignore abnormal ele
    if n_ch != 128 or n_valid_ch != n_ch:
        if n_day < 5 or n_valid_ch <= 100:
            continue

    logger.info(f"ELE [{i}/{n_ele}]: {ele_list[i][0]}")


    # Iteration by channel
    for j in _data_group['Channels']:
        _ch_data = _data_group[j]["y_eval"]
        # _ch_data_log = np.log(_ch_data[:,:,0] + 1j*_ch_data[:,:,1])
        # _ch_data[:,:,0] = np.real(_ch_data_log)
        # _ch_data[:,:,1] = np.imag(_ch_data_log)
        _ch_data = np.hstack((_ch_data[:,:,0],_ch_data[:,:,1]))
        almost_data_list.append(_ch_data)
        almost_start_list.append(_ch_data[0,:])


        _ch_id = _ch_pattern.match(j)
        _ch_id = int(_ch_id.group(1))

        _id = [i, _ch_id] * np.shape(_ch_data)[0]
        _id = np.array(_id).reshape(-1,2)
        almost_id_list.append(_id)
        almost_start_id_list.append(_id[0,:])

almost_data_list = np.vstack(almost_data_list)
almost_id_list = np.vstack(almost_id_list)
almost_start_list = np.vstack(almost_start_list)
almost_start_id_list = np.vstack(almost_start_id_list)


del data_pt, _meta_group, _data_group, _ch_data
gc.collect()

almost_data_list.shape

In [None]:
test_data = almost_data_list[:,:101] + 1j*almost_data_list[:,101:]
# test_data = np.log(almost_data_list[:,:101] + 1j*almost_data_list[:,101:])
test_data = np.concatenate([test_data.real, test_data.imag], axis=1)


### Archive_New

In [None]:
rootPath = "D:/Baihm/EISNN/Archive_New/"
ele_list = SearchELE(rootPath)
n_ele = len(ele_list)
logger.info(f"Search in {rootPath} and find {n_ele:03d} electrodes")

In [None]:
# 首先我们把128/128看似完全没问题的这部分电极拿出来做聚类看看
# 数据量也比较小，跑起来应该会更快

DATASET_SUFFIX = "Outlier_Ver02"

almost_start_list = []
almost_start_id_list = []
almost_data_list = []
almost_id_list = []

for i in range(n_ele):
# for i in range(3):
    fd_pt = os.path.join(ele_list[i][0], DATASET_SUFFIX, f"{ele_list[i][1]}_{DATASET_SUFFIX}.pt")
    if not os.path.exists(fd_pt):
        # logger.warning(f"{fd_pt} does not exist")
        continue
    data_pt = torch.load(fd_pt, weights_only=False)
    _meta_group = data_pt["meta_group"]
    _data_group = data_pt["data_group"]

    n_day       = _meta_group["n_day"]
    n_ch        = _meta_group["n_ch"]
    n_valid_ch  = len(_data_group["Channels"])


    logger.info(f"ELE [{i}/{n_ele}]: {ele_list[i][0]}")


    # Iteration by channel
    for j in _data_group['Channels']:
        _ch_data = _data_group[j]["chData"]
        _ch_data_log = np.log(_ch_data[:,1,:] + 1j*_ch_data[:,2,:])
        _ch_data[:,1,:] = np.real(_ch_data_log)
        _ch_data[:,2,:] = np.imag(_ch_data_log)
        _ch_data = np.hstack((_ch_data[:,1,:],_ch_data[:,2,:]))
        almost_data_list.append(_ch_data)
        almost_start_list.append(_ch_data[0,:])


        _ch_id = j

        _id = [i, _ch_id] * np.shape(_ch_data)[0]
        _id = np.array(_id).reshape(-1,2)
        almost_id_list.append(_id)
        almost_start_id_list.append(_id[0,:])

almost_data_list = np.vstack(almost_data_list)
almost_id_list = np.vstack(almost_id_list)
almost_start_list = np.vstack(almost_start_list)
almost_start_id_list = np.vstack(almost_start_id_list)


del data_pt, _meta_group, _data_group, _ch_data
gc.collect()



# Load Testdata

In [None]:
test_data = almost_data_list[:,:101] + 1j*almost_data_list[:,101:]
# test_data = np.log(almost_data_list[:,:101] + 1j*almost_data_list[:,101:])
test_data = np.concatenate([test_data.real, test_data.imag], axis=1)


# Weird model Training

## Feature Extraction

In [None]:

# rootPath = "D:\Baihm\EISNN\Archive_New/2025/2月/20047596_归档"
# rootPath = "D:\Baihm\EISNN\Archive_New/2025/2月/20047597_归档"
# rootPath = "D:\Baihm\EISNN\Archive/01067094_归档"
# rootPath = "D:\Baihm\EISNN\Archive/22037380_归档"
rootPath = "D:\Baihm\EISNN\Archive/10067077_归档"


EISDict = gatherCSV(rootPath)

In [None]:
# 20047596_归档
# ch_white_list = np.array([0,1,2,3,8,9,10,15,16,17,18,19,20,24,25,26,27,39,47,48,49,56,57,63,69,70,71,85,87,89,90,91,96,100,101,102,104,105,106,107])
# day_black_list = [0,5, 9,15]

# 20047597_归档
# ch_white_list = np.array([1,3,8,10,15,17,24,26,27,39,55,65,68,70,71,79,80,85,86,87,90,95,101,103,106,107,111,120,121])
# day_black_list = [0,5,9,10,15,16]

# 01067094_归档
# ch_white_list = np.array([0,1,2,3,7,8,10,15,16,17,18,19,20,24,25,26,27,28,39,47,48,50,51,52,55,56,57,59,63,65,68,69,70,71,85,86,87,95,96,97,100,101,102,103,104,105,107,111,122,123,124])
# day_black_list = [1,4,5,6,8, 11,13,14,15,16]

# day_white_list = [0,2,3,7,9,10]

# 22037380_归档
# ch_white_list = np.array([16,17,18,19,20,21,24,26,27,28])
# ch_white_list = np.arange(128)
# day_black_list = [1,8,9,10,11,12]


# 10067077_归档
ch_white_list = np.array([16,17,18,19,20,21,24,26,27,28])
# ch_white_list = np.arange(128)
day_black_list = [0,1,2,3,7,10]
# day_white_list = [4,5,6,8,9,11,12,13,14]





In [None]:
all_data = []
_data = []
for ch_id in ch_white_list:
    chData = readChannel(ch_id, EISDict)
    # all_data.append(chData[day_white_list])
    # _data = chData[day_white_list]
    _data = np.delete(chData, day_black_list, axis=0)

    _eis = np.abs(_data[:,1,2500:] + 1j*_data[:,2,2500:])
    _data = _data[(_eis<1e6).all(axis=1),:,:]
    
    all_data.append(_data)

all_data = np.concatenate(all_data, axis=0)
all_data.shape

chEIS = np.abs(all_data[:,1,:] + 1j*all_data[:,2,:])
plt.figure()
for i in range(all_data.shape[0]):
    plt.semilogy(chEIS[i,:], label = f"{i}")
    # plt.plot(chEIS[i,:], label = f"{i}")

In [None]:
# np.save("./Weird/EIS_10067077_weird.npy", all_data)

## Feature Import

In [None]:
all_data = []
all_data.append(np.load("./Weird/EIS_20047596_weird.npy"))
all_data.append(np.load("./Weird/EIS_20047597_weird.npy"))
all_data.append(np.load("./Weird/EIS_01067094_weird.npy"))
all_data.append(np.load("./Weird/EIS_22037380_weird.npy"))
all_data.append(np.load("./Weird/EIS_10067077_weird.npy"))
all_data = np.concatenate(all_data, axis=0)

In [None]:
fig,axis = plt.subplots(1,2, figsize=(12,6))
for i in range(all_data.shape[0]):
# for i in range(550,600):
    chEIS = all_data[i,1,:] + 1j*all_data[i,2,:]
    axis[0].semilogy(np.abs(chEIS), label = f"{i}", alpha = 0.1)
    axis[1].plot(np.angle(chEIS), label = f"{i}", alpha = 0.1)

## SVM for failure mode detction

### Input layer

In [None]:
freq_list = np.linspace(0, 5000-1,101,dtype=int, endpoint=True)
weird_data = np.log(all_data[:,1,freq_list] + 1j*all_data[:,2,freq_list])

weird_data = np.concatenate([weird_data.real, weird_data.imag], axis=1)

print(f"weird_data shape: {weird_data.shape}")
# print(f"test_data shape: {test_data.shape}")

In [None]:
weird_cluster_Archive_New = np.load("D:\Baihm\EISNN\Dataset\Anomaly\Weird\Archive_New_cluster.npy")
weird_cluster_Archive     = np.load("D:\Baihm\EISNN\Dataset\Anomaly\Weird\Archive_Weird_cluster.npy")
weird_data = np.vstack([weird_data,weird_cluster_Archive_New])
weird_data = np.vstack([weird_data,weird_cluster_Archive])

In [None]:
fig = plt.figure()
ax0 = fig.add_subplot(121)
ax1 = fig.add_subplot(122)


for i in range(weird_data.shape[0]):
    ax0.semilogy(np.exp(weird_data[i,:101]), label = f"{i}", alpha=0.005)
    ax1.plot(np.rad2deg(weird_data[i,101:]), label = f"{i}", alpha=0.005)

#### Calib

In [None]:
if False:
    _poi_data = np.zeros_like(all_data)
    phz_calibration = np.loadtxt("./phz_Calib.txt")
    for i in range(np.shape(all_data)[0]):
        # ch_eis = EIS_recal(chData[i,:,:])
        ch_eis = EIS_recal_ver02(all_data[i,:,:], phz_calibration)
        _poi_data[i,:,:] = ch_eis

    plt.figure()
    for i in range(_poi_data.shape[0]):
        _poi_eis = np.log(_poi_data[i,1,:] + 1j*_poi_data[i,2,:])
        plt.plot(_poi_eis.real, label = f"{i}",alpha=0.1)

### SVM Fit

In [None]:
weirdSVMmodel = OneClassSVM(kernel='rbf', gamma='auto', nu=0.01)  # nu 调整宽松程度
weirdSVMmodel.fit(weird_data)

# joblib.dump(weirdSVMmodel, "weirdSVMmodel.pkl")
# joblib.dump(weirdSVMmodel, "../../Outlier/weirdSVMmodel.pkl")
# joblib.dump(weirdSVMmodel, "weirdSVMmodel_20250516_01.pkl")
# joblib.dump(weirdSVMmodel, "../../Outlier/weirdSVMmodel_20250516_01.pkl")

In [None]:
_scores = weirdSVMmodel.decision_function(test_data)  # 越大越像训练数据

_probs = expit(_scores * 5)  # 可调整缩放因子以控制置信度

# Step 5: 拼出 m x 2 输出
weirdProbs = np.stack([_probs, 1 - _probs], axis=1)
print(weirdProbs.shape)  # (m, 2)

plt.figure()
plt.plot(_scores[:])
# plt.plot(weirdProbs[:,0])


weird_test_data = test_data[_probs>0.5]

fig = plt.figure()
ax0 = fig.add_subplot(121)
ax1 = fig.add_subplot(122)


for i in range(weird_test_data.shape[0]):
    ax0.semilogy(np.exp(weird_test_data[i,:101]), label = f"{i}", alpha=0.005)
    ax1.plot(np.rad2deg(weird_test_data[i,101:]), label = f"{i}", alpha=0.005)


### PCA

In [None]:

_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(test_data)
# _data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

cmap = plt.colormaps.get_cmap("rainbow_r")
# cmap = plt.colormaps.get_cmap("Set1")

plt.figure(figsize=(9,9))
plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(_probs), s=0.01)
# plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(_probs>0.5), s=0.01)
# plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(weirdProbs[:,0]>0.45), s=0.01)
# plt.gca().set_aspect('equal', adjustable='box')
plt.title('PCA')


## Weird Criterion

In [None]:
def weirdCriterion(model:OneClassSVM, test_data, threshold=0.5):
    '''==================================================
        Define the criterion of weird data
        Parameter: 
            model: trained OneClassSVM model
            test_data: data to be tested [n x 202] - (logZ)
            threshold: threshold of weird data
        Returen:
            weird_mask: True for weird data
        ==================================================
    '''
    _scores = model.decision_function(test_data) 

    _probs = expit(_scores * 5)

    weird_mask = _probs > threshold
    # weird_mask = (_probs > 0.4) & (_probs < 0.41)

    return weird_mask


In [None]:
_model = joblib.load("weirdSVMmodel.pkl")
weird_mask = weirdCriterion(_model, test_data, threshold=0.5)

weird_test_data = test_data[weird_mask]

plt.figure()

for i in range(weird_test_data.shape[0]):
    plt.plot(weird_test_data[i,:101], label = f"{i}", alpha=0.01)
    # plt.plot(weird_test_data[i,101:], label = f"{i}", alpha=0.1)

### PCA


In [None]:

_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

# cmap = plt.colormaps.get_cmap("rainbow_r")
cmap = plt.colormaps.get_cmap("Set1")

plt.figure(figsize=(9,9))
plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(weird_mask), s=0.01)
# plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(weirdProbs[:,0]>0.45), s=0.01)
# plt.gca().set_aspect('equal', adjustable='box')
plt.title('PCA')


# Short Criterion

In [None]:
def shortCriterion(freq, test_data, threshold = np.log(1e4)):
    '''==================================================
        Define the criterion of short data
        Parameter: 
            freq: frequency of EIS data [101,]
            test_data: data to be tested [n x 202] - (logZ)
            threshold: threshold of short data
        Returen:
            short_mask: True for shorted data
        ==================================================
    '''
    _freq_short_mask = np.zeros(test_data.shape[1])
    _freq_short_mask[:_freq_short_mask.shape[0]//2] = freq > 1e4
    _freq_short_mask = _freq_short_mask.astype(bool)

    short_mask = np.all(test_data[:,_freq_short_mask] < threshold, axis=1)

    return short_mask


In [None]:
freq_list = np.linspace(0,5000-1,101,dtype=int, endpoint=True)
_freq_all = chData[0,0,freq_list]

short_mask = shortCriterion(_freq_all, test_data, threshold=np.log(1e4))

short_test_data = test_data[short_mask]

fig, axis = plt.subplots(1,2)

for i in range(short_test_data.shape[0]):
    axis[0].loglog(_freq_all, np.exp(short_test_data[i,:101]), label = f"{i}", alpha=0.005)
    axis[1].semilogx(_freq_all, np.rad2deg(short_test_data[i,101:]), label = f"{i}", alpha=0.005)
    axis[0].grid()

## PCA

In [None]:
_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)


# cmap = plt.colormaps.get_cmap("rainbow_r")
cmap = plt.colormaps.get_cmap("Set1")

plt.figure(figsize=(9,9))
plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(short_mask.astype(int)+2), s=0.01)
plt.title('PCA')


# Open Model

## Input Layer

In [None]:
open_data = np.load("D:\Baihm\EISNN\Dataset\Anomaly\Open\EIS_Open.npy")
open_data_Archive_New = np.load("D:\Baihm\EISNN\Dataset\Anomaly\Open\Archive_New_Open_cluster.npy")
open_data = np.vstack([open_data, open_data_Archive_New])
print(f"weird_data shape: {open_data.shape}")

## SVM Training

In [None]:
openSVMmodel = OneClassSVM(kernel='rbf', gamma='auto', nu=0.005)  # nu 调整宽松程度
openSVMmodel.fit(open_data)

# joblib.dump(openSVMmodel, "openSVMmodel.pkl")
# joblib.dump(openSVMmodel, "../../Outlier/openSVMmodel.pkl")

In [None]:
_scores = openSVMmodel.decision_function(test_data)  # 越大越像训练数据

_probs = expit(_scores * 5) # 可调整缩放因子以控制置信度

# Step 5: 拼出 m x 2 输出
openProbs = np.stack([_probs, 1 - _probs], axis=1)
print(openProbs.shape)  # (m, 2)

plt.figure()
plt.plot(_scores[:])
# plt.plot(weirdProbs[:,0])


### PCA

In [None]:

_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)

# cmap = plt.colormaps.get_cmap("rainbow_r")
cmap = plt.colormaps.get_cmap("Set1")

plt.figure(figsize=(9,9))
plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(_probs>0.5), s=0.01)
# plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap((_probs>0.1) & (_probs<0.2)), s=0.01)
# plt.gca().set_aspect('equal', adjustable='box')
plt.title('PCA')


## Open Criterion

In [None]:
def openCriterion_threshold(freq, test_data, threshold = np.log(3e6)):
    '''==================================================
        Define the criterion of open data
        Parameter: 
            freq: frequency of EIS data [101,]
            test_data: data to be tested [n x 202] - (logZ)
            threshold: threshold of open data
        Returen:
            open_mask: True for open data
        ==================================================
    '''
    _freq_open_mask = np.zeros(test_data.shape[1])
    _freq_open_mask[:_freq_open_mask.shape[0]//2] = freq < 1e3
    _freq_open_mask = _freq_open_mask.astype(bool)

    # open_mask = np.all(test_data[:,_freq_open_mask] > threshold, axis=1)
    open_mask = np.all((test_data[:,_freq_open_mask] > np.log(2e6)), axis=1)

    return open_mask

def openCriterion(model:OneClassSVM, test_data, threshold=0.5):
    '''==================================================
        Define the criterion of open data
        Parameter: 
            model: trained OneClassSVM model
            test_data: data to be tested [n x 202] - (logZ)
            threshold: threshold of weird data
        Returen:
            open_mask: True for open data
        ==================================================
    '''
    _scores = model.decision_function(test_data) 

    _probs = expit(_scores * 5)

    open_mask = _probs > threshold

    return open_mask



In [None]:
_model = joblib.load("openSVMmodel.pkl")
open_mask = openCriterion(_model, test_data, threshold=0.5)

open_test_data = test_data[open_mask]


freq_list = np.linspace(0,5000-1,101,dtype=int, endpoint=True)
# _freq_all = chData[0,0,freq_list]
_freq_all = np.logspace(0,6,101, endpoint=True)
_rand_ch = np.floor(np.random.rand(1000)*open_test_data.shape[0]).astype(int)
  
plt.figure()
for i in range(_rand_ch.shape[0]):
    plt.loglog(_freq_all, np.exp(open_test_data[_rand_ch[i],:101]), label = f"{i}", alpha=0.05)
plt.grid(True)

In [None]:
# freq_list = np.linspace(0,5000-1,101,dtype=int, endpoint=True)
# _freq_all = chData[0,0,freq_list]

# # open_mask = openCriterion(_freq_all, test_data, threshold=np.log(3e6))
# open_mask = openCriterion(_freq_all, test_data)

# open_test_data = test_data[open_mask]

# open_test_data.shape


# fig, axis = plt.subplots(1,2)

# for i in range(open_test_data.shape[0]):
#     axis[0].loglog(_freq_all, np.exp(open_test_data[i,:101]), label = f"{i}", alpha=0.005)
#     axis[1].semilogx(_freq_all, np.rad2deg(open_test_data[i,101:]), label = f"{i}", alpha=0.005)
#     axis[0].grid()

## PCA

In [None]:
_pca_m = PCA(n_components = 10)
_scale = StandardScaler()
_data_norm = _scale.fit_transform(almost_data_list)
_pca_data = _pca_m.fit_transform(_data_norm)


# cmap = plt.colormaps.get_cmap("rainbow_r")
cmap = plt.colormaps.get_cmap("Set1")

plt.figure(figsize=(9,9))
plt.scatter(_pca_data[:,0],_pca_data[:,1],color=cmap(open_mask.astype(int)), s=0.01)
plt.title('PCA')
