# Import

## Import

In [1]:
import numpy as np

from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import squareform
from scipy import stats
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import OPTICS
from sklearn.mixture import BayesianGaussianMixture
from sklearn.metrics import silhouette_score

import re
import os
from loguru import logger

import matplotlib.pyplot as plt 
import matplotlib.patches as mpatches
%matplotlib qt

from collections import defaultdict
# from datetime import datetime




In [2]:
def gatherCSV(rootPath, outsuffix = 'Tracking'):
    '''==================================================
        Collect all EIS.csv files in the rootPath
        Parameter: 
            rootPath: current search path
            outsuffix: Saving path of EIS.csv files
        Returen:
            EISDict: a 2D-dict of EIS data
            Storage Frame: EISDict[_sessionIndex][_channelIndex] = "_filepath"
        ==================================================
    '''
    _filename       = None
    _filepath       = None
    _trackpath      = None
    _csvpath        = None
    _sessionIndex   = None
    _channelIndex   = None
    _processed      = None

    EISDict = defaultdict(dict)

    ## Iterate session
    session_pattern = re.compile(r"(.+?)_(\d{8})_01")
    bank_pattern    = re.compile(r"([1-4])")
    file_pattern    = re.compile(r"EIS_ch(\d{3})\.csv")

    ## RootDir
    for i in os.listdir(rootPath):
        match_session = session_pattern.match(i)
        ## SessionDir
        if match_session:
            logger.info(f"Session Begin: {i}")
            _sessionIndex = match_session[2]
            for j in os.listdir(f"{rootPath}/{i}"):
                match_bank = bank_pattern.match(j)
                ## BankDir
                if match_bank:
                    logger.info(f"Bank Begin: {j}")
                    _trackpath = f"{rootPath}/{i}/{j}/{outsuffix}"
                    if not os.path.exists(_trackpath):
                        continue

                    for k in os.listdir(f"{rootPath}/{i}/{j}/{outsuffix}"):
                        match_file = file_pattern.match(k)
                        ## File
                        if match_file:
                            _filename = k
                            _filepath = f"{rootPath}/{i}/{j}/{outsuffix}/{k}"
                            _channelIndex = (int(match_bank[1])-1)*32+int(match_file[1])
                            
                            EISDict[_sessionIndex][_channelIndex] = f"{rootPath}/{i}/{j}/{outsuffix}/{k}"
                            
    return EISDict

In [3]:
# Data Readout
def readChannel(chID, fileDict):
    '''==================================================
        Read EIS.csv file by Channel
        Parameter: 
            chID: channel index
            fileDict: EISDict[_sessionIndex][_channelIndex] = "_filepath"
        Returen:
            freq: frequency
            Zreal: real part of impedance
            Zimag: imaginary part of impedance
        ==================================================
    '''
    chData = []
    for ssID in fileDict.keys():
        _data   = np.loadtxt(fileDict[ssID][chID], delimiter=',')
        _freq   = _data[:,0]
        _Zreal  = _data[:,3]
        _Zimag  = _data[:,4]
        chData.append(np.stack((_freq, _Zreal, _Zimag),axis=0))

    return np.stack(chData, axis=0)

In [4]:
def EIS_recal(data):
    f_poi = data[0,:]
    # Z_poi = data[1,:] * np.exp(1j*np.deg2rad(data[2,:]))
    Z_poi = data[1,:] + 1j*data[2,:]
    Y_poi = 1/Z_poi

    Rg0 = 1.611e13
    Cp0 = 1.4e-9
    
    _Rg0_rescale = 1/Rg0*np.power(f_poi,1.583)
    _Cp0_rescale = Cp0*np.power(f_poi,0.911)
    Y_org = Y_poi - _Rg0_rescale + 1j*_Cp0_rescale
    Z_org = 1/Y_org

    # Amp Calibration
    Z_ampC = np.abs(Z_org)

    # Phz Calibration
    Z_phzC = np.angle(Z_org)
    
    Z_rec = Z_ampC * np.exp(1j*Z_phzC)

    
    return np.transpose(np.array([f_poi, np.real(Z_rec), np.imag(Z_rec)])).T


def EIS_recal_ver02(data, _phz_0 = None):
    f_poi = data[0,:]
    # Z_poi = data[1,:] * np.exp(1j*np.deg2rad(data[2,:]))
    Z_poi = data[1,:] + 1j*data[2,:]
    Y_poi = 1/Z_poi

    Rg0 = 1.611e13
    Cp0 = 1.4e-9
    
    _Rg0_rescale = 1/Rg0*np.power(f_poi,1.583)
    _Cp0_rescale = Cp0*np.power(f_poi,0.911)
    Y_org = Y_poi - _Rg0_rescale + 1j*_Cp0_rescale
    Z_org = 1/Y_org

    # Phz Calibration
    if _phz_0 is None:
        _phz_0 = np.loadtxt("./phz_Calib.txt")
    
    Z_ampC = np.abs(Z_org)
    # Z_phzC = np.angle(Z_org) - _phz_0
    Z_phzC = np.angle(Z_org) - _phz_0

    Z_rec = Z_ampC * np.exp(1j*Z_phzC)

    # C = 5e-10
    Rs0 = 100
    Z_rec = Z_rec - Rs0



    Cp0 = 5e-10
    _Cp0_rescale = Cp0 * f_poi
    Z_rec = 1/(1/Z_rec - 1j * _Cp0_rescale)

    

    # Ls0 = 1.7e-4
    Ls0 = 5e-4
    _Ls0_rescale = Ls0 * f_poi
    Z_rec = Z_rec - 1j * _Ls0_rescale

    # C = 5e-10
    Rs0 = 566
    Z_rec = Z_rec - Rs0
    
    return np.stack([f_poi, np.real(Z_rec), np.imag(Z_rec)], axis=1).T
    

## Data Loading

In [5]:
PLOT_FLAG = False


# rootPath = "D:/Baihm/EISNN/Archive/01037160_归档"  
# ch_id = 0   # Default  
# ch_id = 1   # 居然分成一类了？？？
# ch_id = 9   # 异常值没被筛出去，类似还有3
# ch_id = 7  # Normal Example
# ch_id = 20  # Normal to Short, Same to GPR  
# ch_id = 52    
# ch_id = 89  


# rootPath = "D:/Baihm/EISNN/Archive/01037161_归档"   # #2
# ch_id = 10  # Error - Ap结果不连续

# rootPath = "D:/Baihm/EISNN/Dataset/05087163_归档"
# ch_id = 7   # one outlier
# ch_id = 50  # No outlier but in two Phases
# ch_id = 55  # One outlier &wired end point
# ch_id = 114 # Open Circuit with on outpler


# rootPath = "D:\Baihm\EISNN\Archive/06047730_归档"
# ch_id = 41  # outlier detection error but has 20 samples

# rootPath = "D:/Baihm/EISNN/Archive/02067447_归档"
# ch_id = 68  # Short all the time

# rootPath = "D:/Baihm/EISNN/Archive/01067095_归档"
# ch_id = 19    # First Sample is outlier

# rootPath = "D:/Baihm/EISNN/Archive/09290511_归档"
# ch_id = 13    # Up & Down, 2 outliers
# ch_id = 21    # Normal + 2 outlier
# ch_id = 41    # Normal + 2 outlier - *(Hard To Tell)
# ch_id = 79    # 3-class, What a mess

# rootPath = "D:/Baihm/EISNN/Archive/11057712_归档"
# ch_id = 106    # Very Good Electrode with 1 hidden outlier, and one phase shift

# rootPath = "D:\Baihm\EISNN\Archive/10057084_归档"
# ch_id = 16    # Totaly Mess
# ch_id = 18    # Totaly Mess

# rootPath = "D:\Baihm\EISNN\Archive/11067223_归档"
# ch_id = 124     # Perfect but two phase with one outlier


# rootPath = "D:\Baihm\EISNN\Archive/15361101_归档"
# ch_id = 0     # Only One Sample - Run With Error


# rootPath = "D:\Baihm\EISNN\Archive/11207147_归档"
# ch_id = 0     # Only Three Sample - Run With Error


# rootPath = "D:\Baihm\EISNN\Archive/06017758_归档"
# ch_id = 96     # Perfect of Perfect


rootPath = "D:\Baihm\EISNN\Archive/22037380_归档"
ch_id = 20     # Connection Error


EISDict = gatherCSV(rootPath)
chData_full = readChannel(ch_id, EISDict)
freq_list = np.linspace(1000,np.shape(chData_full)[2]-1,101,dtype=int, endpoint=True)

if False:
    phz_calibration = np.loadtxt("./phz_Calib.txt")
    for i in range(np.shape(chData)[0]):
        # ch_eis = EIS_recal(chData[i,:,:])
        ch_eis = EIS_recal_ver02(chData[i,:,:], phz_calibration)
        chData[i,:,:] = ch_eis
chData = chData_full[:,:,freq_list]

[32m2025-05-08 18:08:19.309[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m32[0m - [1mSession Begin: 22037380_20241205_01[0m
[32m2025-05-08 18:08:19.310[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 1[0m
[32m2025-05-08 18:08:19.311[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 2[0m
[32m2025-05-08 18:08:19.311[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 3[0m
[32m2025-05-08 18:08:19.312[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 4[0m
[32m2025-05-08 18:08:19.313[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m32[0m - [1mSession Begin: 22037380_20241206_01[0m
[32m2025-05-08 18:08:19.314[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 1[0m
[32m2025-05-08 18:08:19.315[0m | [1mINFO    [0m | [36m__main__

## Data Plot

In [6]:
if PLOT_FLAG:
    fig, axis = plt.subplots(1,4,figsize=(15,6))
    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(np.shape(chData)[0]):
    # for i in [0,4,2,11]:
        ch_eis = chData[i,:,:]
        # ch_eis = EIS_recal(chData[i,:,:])[:,freq_list]
        _color = cmap(i/np.shape(chData)[0])
        axis[0].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"Session {i}")
        axis[1].semilogx(ch_eis[0,:], np.rad2deg(np.angle(ch_eis[1,:]+1j*ch_eis[2,:])), color = _color, linewidth=2, label=f"Session {i}")
        axis[2].plot(ch_eis[1,:], -ch_eis[2,:], color = _color, linewidth=2, label=f"Session {i}")
        # axis[4].loglog(ch_eis[1,:], -ch_eis[2,:], color = _color, linewidth=2, label=f"Session {i}")
    
        # _poi_Z = np.log(np.abs(ch_eis[1,:]+1j*ch_eis[2,:]))
        # _poi_P = np.angle(ch_eis[1,:]+1j*ch_eis[2,:])
        # _poi_eis = _poi_Z * np.exp(1j*_poi_P)
        # axis[3].plot(np.real(_poi_eis), -np.imag(_poi_eis), color = _color, linewidth=2, label=f"Session {i}")
        _poi_Z = np.log(ch_eis[1,:]+1j*ch_eis[2,:])
        axis[3].plot(np.real(_poi_Z), -np.imag(_poi_Z), color = _color, linewidth=2, label=f"Session {i}")
        

    axis[0].legend(frameon=False, loc='upper left')

# Outlier_Detection

## DTW Distance

In [13]:

REVERSED_DTW_FLAG = True

if not REVERSED_DTW_FLAG:
    # # From 0,0 -> n-1,n-1
    def dtw_calculation(s1, s2):
        len_s1, len_s2 = np.shape(s1)[0], np.shape(s2)[0]
        dtw_matrix = np.full((len_s1 + 1, len_s2 + 1), np.inf)
        dtw_trace = np.zeros((len_s1 + 1, len_s2 + 1, 2))
        dtw_matrix[0, 0] = 0
        for i in range(1, len_s1 + 1):
            for j in range(1, len_s2 + 1):
                cost = np.abs(s1[i - 1] - s2[j - 1])
                penalty = [ dtw_matrix[i - 1, j],
                            dtw_matrix[i, j - 1],
                            dtw_matrix[i - 1, j - 1]]
                i_p = np.argmin(penalty)
                dtw_matrix[i, j] = cost + penalty[i_p]
                if i_p == 0: dtw_trace[i, j] = [i - 1, j]  
                elif i_p==1: dtw_trace[i, j] = [i, j - 1]
                elif i_p==2: dtw_trace[i, j] = [i - 1, j - 1]
        dtw_sequence = []
        i,j = int(len_s1), int(len_s2)
        while  i!=0 and j!=0:
            dtw_sequence.append([i-1, j-1])     # Have to be i-1 & j-1, because matrix range from 1 to len + 1 
            i,j = int(dtw_trace[i, j, 0]), int(dtw_trace[i, j, 1])


        return [dtw_matrix[1:,1:], np.array(dtw_sequence)]


else: 
    # From n-1,n-1 -> 0,0
    def dtw_calculation(s1, s2):
        len_s1, len_s2 = np.shape(s1)[0], np.shape(s2)[0]
        dtw_matrix = np.full((len_s1 + 1, len_s2 + 1), np.inf)
        dtw_trace = np.zeros((len_s1 + 1, len_s2 + 1, 2))
        dtw_matrix[len_s1, len_s1] = 0
        for i in reversed(range(0, len_s1)):
            for j in reversed(range(0, len_s2)):
                cost = np.abs(s1[i] - s2[j])
                penalty = [ dtw_matrix[i + 1, j],
                            dtw_matrix[i, j + 1],
                            dtw_matrix[i + 1, j + 1]]
                i_p = np.argmin(penalty)
                dtw_matrix[i, j] = cost + penalty[i_p]
                if i_p == 0: dtw_trace[i, j] = [i + 1, j]  
                elif i_p==1: dtw_trace[i, j] = [i, j + 1]
                elif i_p==2: dtw_trace[i, j] = [i + 1, j + 1]
        dtw_sequence = []
        i,j = 0, 0
        while  i!=len_s1-1 and j!=len_s1-1:
            dtw_sequence.append([i, j])     # Have to be i-1 & j-1, because matrix range from 1 to len + 1 
            i,j = int(dtw_trace[i, j, 0]), int(dtw_trace[i, j, 1])


        return [dtw_matrix[:-1,:-1], np.array(dtw_sequence)]

### DTW - Calculation

In [14]:
ch_EIS = chData[:,1,:] + 1j*chData[:,2,:]

# 计算DTW距离矩阵
# data = ch_DRT
# data = ch_EIS
data = np.log(ch_EIS)

num_samples = np.shape(data)[0]
num_data = np.shape(data)[1]
dtw_dist_value = np.zeros((num_samples,num_samples))
dtw_dist_matrix = defaultdict(lambda: defaultdict(list))
dtw_dist_trace = defaultdict(lambda: defaultdict(list))


# plt.figure()
for i in range(num_samples):
    dtw_dist_matrix[i][i] = np.zeros((np.shape(data[i])[0],np.shape(data[i])[0]))
    dtw_dist_value[i,i] = 0
    for j in range(i + 1, num_samples):
        distance, dtw_sequence = dtw_calculation(data[i], data[j])
        dtw_dist_matrix[i][j] = distance
        dtw_dist_matrix[j][i] = distance
        dtw_dist_trace[i][j] = [dtw_sequence[:,0],dtw_sequence[:,1]]
        dtw_dist_trace[j][i] = [dtw_sequence[:,1],dtw_sequence[:,0]]

        if REVERSED_DTW_FLAG:
            dtw_dist_value[i,j] = distance[0,0]
            dtw_dist_value[j,i] = distance[0,0]
        else: 
            dtw_dist_value[i,j] = distance[-1,-1]
            dtw_dist_value[j,i] = distance[-1,-1]

        # plt.plot(dtw_sequence[:,0],dtw_sequence[:,1])




#### Plot DTW Dist

In [15]:
if False:
    _n = num_samples
    _m = num_samples

    fig, axis = plt.subplots(_n,_m)
    # cmap = plt.colormaps.get_cmap('rainbow_r')
    # colors = cmap(np.linspace(0,1,num_samples))


    for i in range(_n):
        for j in range(_m):
            # if i==j: continue
            axis[i,j].imshow(dtw_dist_matrix[i][j], cmap='coolwarm', interpolation='nearest')
        # axis[int(i/_m),int(i%_m)].legend()

if False:
    _n = num_samples
    _m = num_samples
    plt.figure()
    for i in range(_n):
        for j in range(_m):
            plt.imshow(dtw_dist_value, cmap='coolwarm', interpolation='nearest')
        # axis[int(i/_m),int(i%_m)].legend()


#### Plot DTW Path

In [16]:
if False:
    _n = 3
    _m = int(num_samples/3)+1

    fig, axis = plt.subplots(_n,_m)
    cmap = plt.colormaps.get_cmap('rainbow_r')
    colors = cmap(np.linspace(0,1,num_samples))


    for i in range(num_samples):
        for j in range(num_samples):
            if i==j: continue
            axis[int(i/_m),int(i%_m)].plot(dtw_dist_trace[i][j][0][:],dtw_dist_trace[i][j][1][:],color = colors[j])
        # axis[int(i/_m),int(i%_m)].legend()




### DTW Phase Space

In [17]:
# Extract Distance & Phase from DTW Data
def DTW_Amp(dtw_dist_inst, reversed = False):
    # dtw_dist_avg = dtw_dist_inst[-1][-1]/(np.shape(dtw_dist_inst)[0]-1)
    if reversed:
        dtw_dist_avg = dtw_dist_inst[0][0]
    else:
        dtw_dist_avg = dtw_dist_inst[-1][-1]
    return dtw_dist_avg

def DTW_Phz(dtw_trace_inst, reversed = False):
    if reversed:
        _x = dtw_trace_inst[0]
        _y = dtw_trace_inst[1]

    else:
        _x = dtw_trace_inst[0][::-1]
        _y = dtw_trace_inst[1][::-1]

    _x_diff = np.diff(_x[:])
    _y_diff = np.diff(_y[:])

    phz_scale = _x_diff + _y_diff
    phz_value = np.array(_x[1:] - _y[1:])

    dtw_phz_norm = np.sum(phz_scale * phz_value) / (_x[-1]**2)      # _x[-1] & _y[-1] = len(curve)-1

    return dtw_phz_norm



In [18]:
dtw_temporal_imag = np.zeros((num_samples,num_samples,2))

for i in range(num_samples):
    for j in range(i+1, num_samples):
        _amp = DTW_Amp(dtw_dist_matrix[i][j], REVERSED_DTW_FLAG)
        _phz = DTW_Phz(dtw_dist_trace[i][j], REVERSED_DTW_FLAG)
        dtw_temporal_imag[i, j] = [_amp,_phz]
        dtw_temporal_imag[j, i] = [-_amp,_phz]  # Flip 

dtw_temporal_vec = dtw_temporal_imag[:,:,0] * np.exp(1j * dtw_temporal_imag[:,:,1] * np.pi / 2)

dtw_temporal_dist = np.zeros((num_samples,num_samples))
for i in range(num_samples):
    for j in range(i+1, num_samples):
        dtw_temporal_dist[i,j] = np.sum(np.abs(dtw_temporal_vec[i,:] - dtw_temporal_vec[j,:]))
        dtw_temporal_dist[j,i] = dtw_temporal_dist[i,j]

####  Plot Phase Space Dist

In [19]:
if PLOT_FLAG:
    plt.figure()
    plt.imshow(dtw_temporal_dist, cmap='coolwarm', interpolation='nearest')

#### Plot Phase Space By Time [i,j]

In [20]:
if PLOT_FLAG:
    _n = 3
    _m = int(num_samples/3)+1

    fig, axis = plt.subplots(_n,_m, sharex=True, sharey=True)
    cmap = plt.colormaps.get_cmap('rainbow_r')
    colors = cmap(np.linspace(0,1,num_samples))


    for i in range(num_samples):
        for j in range(num_samples):
            if i==j: 
                axis[int(i/_m),int(i%_m)].scatter(0,0,color = colors[j])
            else:
                _poi = dtw_temporal_vec[i,j]
                axis[int(i/_m),int(i%_m)].scatter(np.real(_poi),np.imag(_poi),color = colors[j])
        # axis[int(i/_m),int(i%_m)].legend()


#### Plot Phase Space By Sample [j,i]

In [21]:
if False:
    _n = 3
    _m = int(num_samples/3)+1

    fig, axis = plt.subplots(_n,_m, sharex=True, sharey=True)
    cmap = plt.colormaps.get_cmap('rainbow_r')
    colors = cmap(np.linspace(0,1,num_samples))


    for i in range(num_samples):
        for j in range(num_samples):
            if i==j: 
                axis[int(i/_m),int(i%_m)].scatter(0,0,color = colors[j])
            else:
                _poi = dtw_temporal_vec[j,i]
                axis[int(i/_m),int(i%_m)].scatter(np.real(_poi),np.imag(_poi),color = colors[j])
        # axis[int(i/_m),int(i%_m)].legend()


#### Plot Dimensionality Reduction

In [22]:
if False:
    from sklearn.decomposition import PCA
    from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding, MDS
    import umap.umap_ as umap  # 请确保安装了 umap-learn

    dtw_manifold_stack_vec = np.zeros((num_samples , num_samples*2))
    for i in range(num_samples):
        dtw_manifold_stack_vec[i,:] = np.hstack((dtw_temporal_vec[i,:].real, dtw_temporal_vec[i,:].imag))


    data = dtw_manifold_stack_vec

    _order = 3
    methods = {
        'PCA': PCA(n_components=_order),
        't-SNE': TSNE(n_components=_order, perplexity=5, random_state=42),  # perplexity 设置为 5
        'Isomap': Isomap(n_components=_order),
        'LLE': LocallyLinearEmbedding(n_components=_order, random_state=42),
        'MDS': MDS(n_components=_order, random_state=42),
        'UMAP': umap.UMAP(n_components=_order, random_state=42)
    }

    embeddings = {}
    emb_dist = {}
    for name, method in methods.items():
        embedding = method.fit_transform(data)
        embeddings[name] = embedding
        
        _x = embedding[:,0].flatten()
        _y = embedding[:,1].flatten()

        emb_dist[name] = np.sqrt((_x[:, np.newaxis] - _x[np.newaxis, :])**2 + 
                            (_y[:, np.newaxis] - _y[np.newaxis, :])**2)
        


    fig, axis = plt.subplots(3,4,figsize=(12,6))
    for i, (name, emb) in enumerate(embeddings.items()):
        _x = emb[:,0].flatten()
        _y = emb[:,1].flatten()

        _dist = np.sqrt((_x[:, np.newaxis] - _x[np.newaxis, :])**2 + 
                            (_y[:, np.newaxis] - _y[np.newaxis, :])**2)
        # _dist = emb_dist[name]

        axis[np.int16(i/2),(i%2)*2].scatter(emb[:, 0], emb[:, 1], c=np.arange(np.shape(data)[0]), cmap='rainbow_r', edgecolor='k', s=100)
        axis[np.int16(i/2),(i%2)*2].set_title(name)

        s = axis[np.int16(i/2),(i%2)*2+1].imshow(_dist, cmap='coolwarm', interpolation='nearest')
        fig.colorbar(s, ax=axis[np.int16(i/2),(i%2)*2+1])
    plt.tight_layout()
    plt.show()


### DTW Order Tree

#### Linkage Method

In [23]:
a = squareform(dtw_temporal_dist)
b = dtw_temporal_dist

b.shape

(13, 13)

In [24]:
Z = linkage(squareform(dtw_temporal_dist), method='single', optimal_ordering=True)  # 使用 Ward 方式进行层次聚类
# Z = linkage(squareform(dtw_dist_value), method='ward', optimal_ordering=True)  # 使用 Ward 方式进行层次聚类


##### Plot Linkage Tree

In [25]:
if PLOT_FLAG:
    plt.figure(figsize=(8,4))
    _ = dendrogram(Z, labels=np.arange(num_samples))


#### Construct DTW Tree

In [26]:
class DTW_TREE:
    def __init__(self, id, value = np.inf):
        self.id = id
        self.value = value
        self.l = None
        self.r = None
        self.p = None

    def display(self):
        lines, *_ = self._display_aux()
        for line in lines: print(line)
    def _display_aux(self):
        if self.l is None and self.r is None:
            # _s = f"{self.id:03d}:{self.value:03d}"
            _s = f"{self.id:03d}"
            _w = len(_s)
            _h = 1
            _m = _w//2
            return [_s], _w, _h, _m
        elif self.r is None:
            _sr, _wr, _hr, _mr = self.l._display_aux()
            # _s = f"{self.id:03d}:{self.value:03d}"
            _s = f"{self.id:03d}"
            _w = len(_s)
            
            _line_0 = (_mr + 1) * ' ' + (_wr - _mr - 1) * '_' + _s
            _line_1 = _mr * ' ' + '/' + (_wr - _mr - 1 + _w) * ' '
            _lins_s = [line + _w * ' ' for line in _sr]
            return [_line_0, _line_1] + _lins_s, _wr + _w, _hr + 2, _wr + _w // 2
        elif self.l is None:
            _sl, _wl, _hl, _ml = self.r._display_aux()
            # _s = f"{self.id:03d}:{self.value:03d}"
            _s = f"{self.id:03d}"
            _w = len(_s)
            _line_0 = _s + _ml * '_' + (_wl - _ml) * ' '
            _line_1 = (_w + _ml) * ' ' + '\\' + (_wl - _ml - 1) * ' '
            _lins_s = [_w * ' ' + line for line in _sl]
            return [_line_0, _line_1] + _lins_s, _wl + _w, _hl + 2, _w // 2
        else:
            _sl, _wl, _hl, _ml = self.l._display_aux()
            _sr, _wr, _hr, _mr = self.r._display_aux()
            # _s = f"{self.id:03d}:{self.value:03d}"
            _s = f"{self.id:03d}"
            _w = len(_s)
            first_line = (_ml + 1) * ' ' + (_wl - _ml - 1) * '_' + _s + _mr * '_' + (_wr - _mr) * ' '
            second_line = _ml * ' ' + '/' + (_wl - _ml - 1 + _w + _mr) * ' ' + '\\' + (_wr - _mr - 1) * ' '
            if _hl < _hr:
                _sl += [_wl * ' '] * (_hr - _hl)
            elif _hr < _hl:
                _sr += [_wr * ' '] * (_hl - _hr)
            zipped_lines = zip(_sl, _sr)
            lines = [first_line, second_line] + [a + _w * ' ' + b for a, b in zipped_lines]
            return lines, _wl + _wr + _w, max(_hl, _hr) + 2, _wl + _w // 2


def dtw_tree_merge(dtw_tree_A, dtw_tree_B, id):
    if dtw_tree_A.value < dtw_tree_B.value:
        dtw_tree_root = DTW_TREE(id, dtw_tree_A.value)
        dtw_tree_root.l = dtw_tree_A
        dtw_tree_root.r = dtw_tree_B
    else:
        dtw_tree_root = DTW_TREE(id, dtw_tree_B.value)
        dtw_tree_root.l = dtw_tree_B
        dtw_tree_root.r = dtw_tree_A
    dtw_tree_A.p = dtw_tree_root
    dtw_tree_B.p = dtw_tree_root
    return dtw_tree_root


def dtw_leaf_ordering(dtw_tree):
    if dtw_tree is None: return []
    if dtw_tree.l is None and dtw_tree.r is None: return [dtw_tree.value]
    return dtw_leaf_ordering(dtw_tree.l) + dtw_leaf_ordering(dtw_tree.r)




In [27]:
Z

array([[1.00000000e+01, 1.10000000e+01, 6.22011937e+01, 2.00000000e+00],
       [7.00000000e+00, 8.00000000e+00, 6.59984316e+01, 2.00000000e+00],
       [9.00000000e+00, 1.30000000e+01, 1.05099121e+02, 3.00000000e+00],
       [2.00000000e+00, 3.00000000e+00, 1.52244376e+02, 2.00000000e+00],
       [6.00000000e+00, 5.00000000e+00, 1.83287163e+02, 2.00000000e+00],
       [1.50000000e+01, 1.20000000e+01, 2.33033063e+02, 4.00000000e+00],
       [1.60000000e+01, 1.70000000e+01, 4.04212154e+02, 4.00000000e+00],
       [0.00000000e+00, 1.90000000e+01, 4.31296430e+02, 5.00000000e+00],
       [2.00000000e+01, 4.00000000e+00, 6.20542277e+02, 6.00000000e+00],
       [2.10000000e+01, 1.00000000e+00, 6.88872071e+02, 7.00000000e+00],
       [2.20000000e+01, 1.80000000e+01, 1.45105963e+03, 1.10000000e+01],
       [1.40000000e+01, 2.30000000e+01, 2.25101994e+03, 1.30000000e+01]])

In [28]:
_node_cnt = num_samples 
dtw_node_list = []
for i in range(num_samples):
    dtw_node_list.append(DTW_TREE(i,i))

for i in Z:
    dtw_node_list.append(dtw_tree_merge(dtw_node_list[int(i[0])], dtw_node_list[int(i[1])],_node_cnt))
    _node_cnt = _node_cnt + 1

dtw_node_list[-1].display()


                                         ______________________024____     
                                        /                             \    
                                   ____023________________          _014_  
                                  /                       \        /     \ 
                             ____022_          __________018_     007   008
                            /        \        /              \             
     ______________________021_     001     _015____        012            
    /                          \           /        \                      
  _020__________              004         009     _013_                    
 /              \                                /     \                   
000        ____019____                          010   011                  
          /           \                                                    
        _016_       _017_                                                  
       /    

#### Construct DTW Chain

In [29]:
def all_longest_increasing_subsequences(seq):
    if not seq:
        return []
    
    n = len(seq)
    dp = [1] * n
    prev = [[] for _ in range(n)]
    
    for i in range(n):
        for j in range(i):
            if seq[j] < seq[i]:
                if dp[j] + 1 > dp[i]:
                    dp[i] = dp[j] + 1
                    prev[i] = [j]
                elif dp[j] + 1 == dp[i]:
                    prev[i].append(j)
    
    max_len = max(dp)
    end_indices = [i for i in range(n) if dp[i] == max_len]
    
    def backtrack(i):
        if not prev[i]:
            return [[seq[i]]]
        res = []
        for j in prev[i]:
            for subseq in backtrack(j):
                res.append(subseq + [seq[i]])
        return res

    result = []
    for i in end_indices:
        result.extend(backtrack(i))
    
    return result


def longest_sequence_dist(dist_matrix, sequence):
    _dist = 0
    for i in range(1,len(sequence)):
        _dist = _dist + dist_matrix[i-1,i]
    return _dist



In [30]:
leaf_full = dtw_leaf_ordering(dtw_node_list[-1])
# Find All longest_increasing_subsequences
leaf_ordering = all_longest_increasing_subsequences(leaf_full)

optimal_seq_dist = longest_sequence_dist(dtw_temporal_dist, leaf_ordering[0])
optimal_seq_id = 0
for i in range(1, len(leaf_ordering)):
    _dist = longest_sequence_dist(dtw_temporal_dist, leaf_ordering[i])
    if _dist < optimal_seq_dist:
        optimal_seq_dist = _dist
        optimal_seq_id = i

leaf_optimal_seq = np.array(leaf_ordering[optimal_seq_id])
leaf_optimal_len = np.shape(leaf_optimal_seq)[0]

leaf_anomaly = np.array([poi for poi in leaf_full if poi not in leaf_optimal_seq])
logger.info(f"\n Ordered Linkage Tree: \t\t\t{leaf_full}\n Longest Increasing Subsequence: \t{leaf_ordering}\n Min-Dist Subsequence: \t\t\t{leaf_optimal_seq}\n Outliers: \t\t\t\t{leaf_anomaly}")


[32m2025-05-08 17:03:19.180[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1m
 Ordered Linkage Tree: 			[0, 2, 3, 5, 6, 4, 1, 9, 10, 11, 12, 7, 8]
 Longest Increasing Subsequence: 	[[0, 2, 3, 5, 6, 9, 10, 11, 12]]
 Min-Dist Subsequence: 			[ 0  2  3  5  6  9 10 11 12]
 Outliers: 				[4 1 7 8][0m


In [31]:
dtw_seq_vec = np.zeros((leaf_optimal_len, leaf_optimal_len*2))
for i in range(leaf_optimal_len):
    dtw_seq_vec[i,:] = np.hstack((dtw_temporal_vec[leaf_optimal_seq[i],leaf_optimal_seq].real, dtw_temporal_vec[leaf_optimal_seq[i],leaf_optimal_seq].imag))

# # ———— Affinity Clustering ————
best_model = None
best_score = -np.inf
for seed in [7,42,1999]:
    aff_prop = AffinityPropagation()
    aff_prop.fit(dtw_seq_vec)
    _score = silhouette_score(dtw_seq_vec, aff_prop.labels_)
    if _score > best_score:
        best_score = _score
        best_model = aff_prop.labels_

dtw_cluster = best_model
n_clusters = len(np.unique(dtw_cluster))

logger.info(f"\n [Affinity] #Cluster: {n_clusters}, #Score: {best_score} \n Cluster: {dtw_cluster}")

# ———— OPTICS ————
optics = OPTICS(
    min_samples=2,        # 核心点的最小邻居数，默认5
    xi=0.05,              # 用于提取簇的最小斜率，默认0.05
    metric='euclidean',   # 在特征空间上用欧氏距离
)
optics.fit(dtw_seq_vec)
labels_opt = optics.labels_   # 噪声点标记为 -1
mask = labels_opt >= 0
if mask.sum() > 1:
    score_opt = silhouette_score(dtw_seq_vec[mask], labels_opt[mask])


dtw_cluster = labels_opt
n_clusters = len(np.unique(dtw_cluster))
logger.info(f"\n [OPTICS] #Cluster: {n_clusters}, #Score: {score_opt} \n Cluster: {dtw_cluster}")





[32m2025-05-08 17:03:19.203[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1m
 [Affinity] #Cluster: 2, #Score: 0.7844332508826075 
 Cluster: [0 0 0 0 0 1 1 1 1][0m
[32m2025-05-08 17:03:19.332[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m36[0m - [1m
 [OPTICS] #Cluster: 4, #Score: 0.6670633359337405 
 Cluster: [-1  0  0  1  1  2  2  2  2][0m


##### Plot Anomaly & Ordering

In [32]:
if PLOT_FLAG:
# fig, axis = plt.subplots(1,3, figsize=(15,5), constrained_layout=True)
    fig= plt.figure(figsize=(12,6))
    cmap = plt.colormaps.get_cmap('rainbow_r')
    axis = [1,2,3,4,5,6]
    axis[0] = fig.add_subplot(2,3,1)
    axis[1] = fig.add_subplot(2,3,2)
    axis[2] = fig.add_subplot(2,3,3)
    axis[3] = fig.add_subplot(2,3,4, projection='3d')
    axis[4] = fig.add_subplot(2,3,5, projection='3d')
    # axis[5] = fig.add_subplot(2,3,6, projection='3d')

    axis[0].set_title("All Samples")
    axis[1].set_title("Ordering Samples")
    axis[2].set_title("Anomaly Samples")

    for i in leaf_optimal_seq:
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[0].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
        axis[1].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    axis[1].legend()
    axis[1].sharex(axis[0])
    axis[1].sharey(axis[0])


    for i in leaf_anomaly:
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[0].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
        axis[2].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    axis[2].legend()
    axis[2].sharex(axis[0])
    axis[2].sharey(axis[0])



    init_elev = 21  # 仰角
    init_azim = 55  # 方位角
    axis[3].view_init(elev=init_elev, azim=init_azim)
    axis[4].view_init(elev=init_elev, azim=init_azim)
    # axis[5].view_init(elev=init_elev, azim=init_azim)

    _x = np.arange(num_samples)
    _y = np.log10(chData[0,0,:]).flatten()
    X, Y = np.meshgrid(_x, _y, indexing='ij')
    axis[3].plot_surface(X, Y, np.log10(np.abs(chData[:,1,:]+1j*chData[:,2,:])), cmap='viridis_r', alpha=0.8)

    _x = np.arange(num_samples)[leaf_optimal_seq]
    _y = np.log10(chData[0,0,:]).flatten()
    X, Y = np.meshgrid(_x, _y, indexing='ij')
    axis[4].plot_surface(X, Y, np.log10(np.abs(chData[leaf_optimal_seq,1,:]+1j*chData[leaf_optimal_seq,2,:])), cmap='viridis_r', alpha=0.8)

    # _x = np.arange(num_samples)
    # _y = np.log10(chData[0,0,:]).flatten()
    # X, Y = np.meshgrid(_x, _y, indexing='ij')
    # axis[3].plot_surface(X, Y, np.log10(np.abs(chData[:,1,:]+1j*chData[:,2,:])), cmap='viridis_r', alpha=0.8)



### State Transition Detection 

#### Affinity Propagation Clustering

In [33]:
dtw_seq_vec = np.zeros((leaf_optimal_len, leaf_optimal_len*2))
for i in range(leaf_optimal_len):
    dtw_seq_vec[i,:] = np.hstack((dtw_temporal_vec[leaf_optimal_seq[i],leaf_optimal_seq].real, dtw_temporal_vec[leaf_optimal_seq[i],leaf_optimal_seq].imag))

# # ———— Affinity Clustering ————
best_model = None
best_score = -np.inf
for seed in [7,42,1999]:
    aff_prop = AffinityPropagation()
    aff_prop.fit(dtw_seq_vec)
    _score = silhouette_score(dtw_seq_vec, aff_prop.labels_)
    if _score > best_score:
        best_score = _score
        best_model = aff_prop.labels_

dtw_cluster = best_model
n_clusters = len(np.unique(dtw_cluster))

logger.info(f"\n [Affinity] #Cluster: {n_clusters}, #Score: {best_score} \n Cluster: {dtw_cluster}")

# ———— OPTICS ————
optics = OPTICS(
    min_samples=2,        # 核心点的最小邻居数，默认5
    xi=0.01,             # 用于提取簇的最小斜率，默认0.05
    metric='euclidean',   # 在特征空间上用欧氏距离
)
optics.fit(dtw_seq_vec)
labels_opt = optics.labels_   # 噪声点标记为 -1
mask = labels_opt >= 0
if mask.sum() > 1:
    score_opt = silhouette_score(dtw_seq_vec[mask], labels_opt[mask])


dtw_cluster = labels_opt
n_clusters = len(np.unique(dtw_cluster))
logger.info(f"\n [OPTICS] #Cluster: {n_clusters}, #Score: {score_opt} \n Cluster: {dtw_cluster}")

if False:
    plt.figure()
    
    plt.plot(np.arange(optics.reachability_.shape[0]),optics.reachability_[optics.ordering_])
    plt.title('Reachability Plot')
    



[32m2025-05-08 17:03:19.380[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1m
 [Affinity] #Cluster: 2, #Score: 0.7844332508826075 
 Cluster: [0 0 0 0 0 1 1 1 1][0m
[32m2025-05-08 17:03:19.394[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m36[0m - [1m
 [OPTICS] #Cluster: 4, #Score: 0.6670633359337405 
 Cluster: [-1  0  0  1  1  2  2  2  2][0m


In [34]:
def errro_cluster_remover(dtw_cluster, leaf_optimal_seq):
    A_org = np.array(leaf_optimal_seq)
    B_org = np.array(dtw_cluster)
    
    # Remove -1
    A = A_org[B_org != -1]
    B = B_org[B_org != -1]

    # Maintain ascending sereis
    C = np.append(np.diff(B),0)
    
    mask = C < 0
    bad_classes = np.unique(B[mask])

    keep_mask = ~np.isin(B, bad_classes)

    A_filtered = A[keep_mask]
    B_filtered = B[keep_mask]

    unique_vals = np.unique(B_filtered)
    new_labels = np.arange(len(unique_vals))
    mapping = dict(zip(unique_vals, new_labels))

    B_remapped = np.vectorize(mapping.get)(B_filtered)

    return A_filtered,B_remapped

def errro_cluster_remover_ver02(dtw_cluster, leaf_optimal_seq):
    A_org = np.array(leaf_optimal_seq)
    B_org = np.array(dtw_cluster)
    
    # Remove -1
    A = A_org[B_org != -1]
    B = B_org[B_org != -1]

    # Maintain ascending sereis
    C = np.append(np.diff(B),0)
    
    mask = C < 0
    bad_classes = np.unique(B[mask])

    keep_mask = ~np.isin(B, bad_classes)

    A_filtered = A[keep_mask]
    B_filtered = B[keep_mask]

    unique_vals = np.unique(B_filtered)
    new_labels = np.arange(len(unique_vals))
    mapping = dict(zip(unique_vals, new_labels))

    B_remapped = np.vectorize(mapping.get)(B_filtered)

    return A_filtered,B_remapped


leaf_optimal_seq, dtw_cluster = errro_cluster_remover(dtw_cluster, leaf_optimal_seq)
leaf_optimal_len = np.shape(leaf_optimal_seq)[0]
n_clusters = len(np.unique(dtw_cluster))

dtw_seq_vec = np.zeros((leaf_optimal_len, leaf_optimal_len*2))
for i in range(leaf_optimal_len):
    dtw_seq_vec[i,:] = np.hstack((dtw_temporal_vec[leaf_optimal_seq[i],leaf_optimal_seq].real, dtw_temporal_vec[leaf_optimal_seq[i],leaf_optimal_seq].imag))


##### Plot AP result

In [35]:
if False:
    from sklearn.manifold import Isomap
    # 使用Isomap将数据降至2维（用于可视化）
    isomap = Isomap(n_components=2)
    X_iso = isomap.fit_transform(dtw_seq_vec)

    # 绘制二维散点图，不同颜色代表不同聚类
    plt.figure(figsize=(8, 6))
    scatter = plt.scatter(X_iso[:, 0], X_iso[:, 1], c=dtw_cluster, cmap='Set1', s=50)
    plt.title(f'Affinity Propagation Clusters (n_clusters={n_clusters})')
    plt.xlabel('Isomap Component 1')
    plt.ylabel('Isomap Component 2')
    plt.colorbar(scatter, label='Cluster Label')
    plt.show()

#### Confidence of Cluster

In [36]:
def Unilateral_T_test(data, x):
    n = len(data)
    if n < 2: return [-1]
    mean = np.mean(data)
    std = np.std(data, ddof=1)      # ddof = 1 for small sample

    # t_stat = (x - mean) / (std)
    t_stat = (x - mean) / (std / np.sqrt(n))
    # H0： x > μ
    p_value = 1 - stats.t.cdf(t_stat, df=n-1)


    return p_value

In [37]:
# Validation of cluster
inner_dist = []
for i in range(n_clusters):
    _cluster = dtw_seq_vec[dtw_cluster[:]==i,:]
    if np.shape(_cluster)[0] < 2:
        inner_dist.append([0])
    else: 
        inner_dist.append(np.sum(np.diff(dtw_seq_vec[dtw_cluster[:]==i,:],axis=0)**2, axis=1))


global_dist = []
for i in range(np.shape(dtw_seq_vec)[0]-1):
    global_dist.append(np.sum((dtw_seq_vec[i+1,:]-dtw_seq_vec[i,:])**2))
global_dist = np.array(global_dist)

cluster_connect = [] 

# # Average Criterion
for i in range(n_clusters-1):
    _pre = dtw_seq_vec[dtw_cluster[:]==i,:][-1]
    _poi = dtw_seq_vec[dtw_cluster[:]==i+1,:][0]
    intp_dst = np.sum(np.diff([_pre,_poi], axis=0)**2, axis=1)
    _p_value = Unilateral_T_test(np.hstack((inner_dist[i],inner_dist[i+1])), intp_dst)
    print(_p_value)
    if _p_value > 0.005:
        cluster_connect.append(i)


# Global Criterion
# for i in range(n_clusters-1):
#     _pre = dtw_seq_vec[dtw_cluster[:]==i,:][-1]
#     _poi = dtw_seq_vec[dtw_cluster[:]==i+1,:][0]
#     intp_dst = np.sum(np.diff([_pre,_poi], axis=0)**2, axis=1)
#     _p_value = Unilateral_T_test(global_dist, intp_dst)
#     print(_p_value)
#     if _p_value > 0.01:
#         cluster_connect.append(i)

# Closest Criterion
# for i in range(n_clusters-1):
#     _pre = dtw_seq_vec[dtw_cluster[:]==i,:][-1]
#     _poi = dtw_seq_vec[dtw_cluster[:]==i+1,:][0]
#     intp_dst = np.sum(np.diff([_pre,_poi], axis=0)**2, axis=1)

#     p_pre = Unilateral_T_test(inner_dist[i], intp_dst)
#     p_poi = Unilateral_T_test(inner_dist[i+1], intp_dst)
#     print(p_pre, p_poi)
#     if np.max([p_pre,p_poi]) > 0.01 and np.min([p_pre,p_poi]) > 0:    # merge
#         cluster_connect.append(i)


# Merge from high to low
dtw_cluster_revision = np.array(dtw_cluster)
for i in cluster_connect[::-1]:
    for j in range(np.shape(dtw_cluster_revision)[0]):
        _x = dtw_cluster_revision[j]
        if _x == i + 1:
            dtw_cluster_revision[j] = i
        elif _x > i + 1:
            dtw_cluster_revision[j] = _x - 1


n_clusters_revision = len(np.unique(dtw_cluster_revision))
dtw_cluster_revision



[0.00123629]
[1.32356555e-06]


array([0, 0, 1, 1, 2, 2, 2, 2])

In [38]:
# Empirical 
if False:
    _cluster_0 = np.sum(np.diff(dtw_seq_vec[dtw_cluster[:]==0,:],axis=0)**2, axis=1)
    _cluster_1 = np.sum(np.diff(dtw_seq_vec[dtw_cluster[:]==1,:],axis=0)**2, axis=1)
    _cluster_2 = np.sum(np.diff(dtw_seq_vec[dtw_cluster[:]==2,:],axis=0)**2, axis=1)
    _cluster_3 = np.sum(np.diff(dtw_seq_vec[dtw_cluster[:]==3,:],axis=0)**2, axis=1)

    print(f"\n{_cluster_0}\n{_cluster_1}\n{_cluster_2}\n{_cluster_3}")

    _intp_01 = np.sum(np.diff([dtw_seq_vec[dtw_cluster[:]==0,:][-1],dtw_seq_vec[dtw_cluster[:]==1,:][0]],axis=0)**2, axis=1)
    _intp_12 = np.sum(np.diff([dtw_seq_vec[dtw_cluster[:]==1,:][-1],dtw_seq_vec[dtw_cluster[:]==2,:][0]],axis=0)**2, axis=1)
    _intp_23 = np.sum(np.diff([dtw_seq_vec[dtw_cluster[:]==2,:][-1],dtw_seq_vec[dtw_cluster[:]==3,:][0]],axis=0)**2, axis=1)

    print(f"\n{_intp_01}\n{_intp_12}\n{_intp_23}")

    _std_0 = np.std(_cluster_0)
    _std_1 = np.std(_cluster_1)
    _std_2 = np.std(_cluster_2)
    _std_3 = np.std(_cluster_3)
    _mean_0 = np.mean(_cluster_0)
    _mean_1 = np.mean(_cluster_1)
    _mean_2 = np.mean(_cluster_2)
    _mean_3 = np.mean(_cluster_3)

    print(f"\n{_mean_0}±{3*_std_0}\n{_mean_1}±{3*_std_1}\n{_mean_2}±{3*_std_2}\n{_mean_3}±{3*_std_3}")

#### Single Cluster Outlier Detection

In [39]:
eis_seq = np.array(leaf_optimal_seq)
eis_cluster = np.array(dtw_cluster_revision)
eis_cluster_n = len(np.unique(eis_cluster))

for i in reversed(range(len(eis_seq))):
    print(i)
    if len(eis_cluster[eis_cluster == eis_cluster[i]]) == 1:
        eis_cluster[i:] = eis_cluster[i:] - 1
        eis_cluster = np.delete(eis_cluster, i)
        eis_seq = np.delete(eis_seq, i)
        eis_cluster_n = eis_cluster_n-1
    # elif len(eis_cluster[eis_cluster == eis_cluster[i]]) == 2:
    #     if eis_cluster[i] != 0 and eis_cluster[i] != eis_cluster_n-1:
    #         eis_cluster[i:] = eis_cluster[i:] - 1
            
    #         eis_cluster = np.delete(eis_cluster, i)
    #         eis_seq = np.delete(eis_seq, i)
    #         i = i-1
    #         eis_cluster = np.delete(eis_cluster, i)
    #         eis_seq = np.delete(eis_seq, i)

    #         eis_cluster_n = eis_cluster_n-1

eis_anomaly = np.array([poi for poi in leaf_full if poi not in eis_seq])
eis_cluster_n = len(np.unique(eis_cluster))


# Old Version: Only remove Single Cluster Outlier at Terminal
# if dtw_cluster_revision[0] != dtw_cluster_revision[1]:
#     eis_anomaly = np.append(eis_anomaly[::-1],eis_seq[0])[::-1]
#     eis_seq = eis_seq[1:]
#     eis_cluster = eis_cluster[1:] - 1
# if dtw_cluster_revision[-1] != dtw_cluster_revision[-2]:
#     eis_anomaly = np.append(eis_anomaly,eis_seq[-1])
#     eis_seq = eis_seq[:-1]
#     eis_cluster = eis_cluster[:-1]


7
6
5
4
3
2
1
0


#### Plot Cluster Before/After

In [40]:
if PLOT_FLAG:
# if True:
    # fig= plt.figure(figsize=(12,6), constrained_layout=True)
    fig= plt.figure(figsize=(15,8), constrained_layout=False)
    axis = [0] * 9
    axis[0] = fig.add_subplot(2,3,1)        # Origin Sequence
    axis[1] = fig.add_subplot(2,3,2)        # Origin Anomaly
    axis[2] = fig.add_subplot(2,3,3)        # Cluster Sequence
    axis[3] = fig.add_subplot(2,3,4)        # Cluster Anomaly
    axis[4] = fig.add_subplot(2,3,5)        # Merge Sequence
    axis[5] = fig.add_subplot(2,3,6)        # Merge Anomaly



    axis[0].set_title("W/O Cluster")
    axis[1].set_title("Cluster Original")
    axis[2].set_title("Cluster Merged")


    ## W/O Cluster
    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(leaf_optimal_len):
        _x = leaf_optimal_seq[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(_x/num_samples)
        axis[0].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")


    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(len(leaf_anomaly)):
        _x = leaf_anomaly[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(_x/num_samples)
        axis[3].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{_x:02d}")
    axis[3].legend()
    axis[3].sharex(axis[0])
    axis[3].sharey(axis[0])


    ## Cluster Original
    cmap = plt.colormaps.get_cmap('Set1')
    for i in range(leaf_optimal_len):
        _x = leaf_optimal_seq[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(dtw_cluster[i])
        axis[1].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"{chr(ord('A')+dtw_cluster[i])}")

    _legend_handle = []
    for i in range(n_clusters):
        _legend_handle.append(mpatches.Patch(color = cmap(i), label = f"{chr(ord('A')+i)}:{len(dtw_cluster[dtw_cluster==i])}"))
    axis[1].legend(handles=_legend_handle)



    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(len(leaf_anomaly)):
        _x = leaf_anomaly[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(_x/num_samples)
        axis[4].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{_x:02d}")
    axis[4].legend()
    axis[4].sharex(axis[0])
    axis[4].sharey(axis[0])



    ## Merged
    cmap = plt.colormaps.get_cmap('Set1')
    for i in range(len(eis_seq)):
        _x = eis_seq[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(eis_cluster[i])
        axis[2].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"{chr(ord('A')+eis_cluster[i])}")

    _legend_handle = []
    for i in range(eis_cluster_n):
        _legend_handle.append(mpatches.Patch(color = cmap(i), label = f"{chr(ord('A')+i)}:{len(eis_cluster[eis_cluster==i])}"))
    axis[2].legend(handles=_legend_handle)

    axis[2].sharex(axis[0])
    axis[2].sharey(axis[0])


    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(len(eis_anomaly)):
        _x = eis_anomaly[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(_x/num_samples)
        axis[5].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{_x:02d}")
    axis[5].legend()
    axis[5].sharex(axis[0])
    axis[5].sharey(axis[0])



### Plot Summary

In [41]:
# fig= plt.figure(figsize=(12,6), constrained_layout=True)
fig= plt.figure(figsize=(15,8), constrained_layout=False)
axis = [0] * 9
axis[0] = fig.add_subplot(3,3,1, projection='3d')   # Original 3D
axis[1] = fig.add_subplot(3,3,2)                    # Original 2D

axis[3] = fig.add_subplot(3,3,4, projection='3d')   # Linkage 3D
axis[4] = fig.add_subplot(3,3,5)                    # Linkage Sequence
axis[5] = fig.add_subplot(3,3,6)                    # Linkage Anomaly

axis[6] = fig.add_subplot(3,3,7, projection='3d')   # AP 3D
axis[7] = fig.add_subplot(3,3,8)                    # AP Sequence
axis[8] = fig.add_subplot(3,3,9)                    # AP Anomaly


init_elev = 21  # 仰角
init_azim = 55  # 方位角
axis[0].view_init(elev=init_elev, azim=init_azim)
axis[3].view_init(elev=init_elev, azim=init_azim)
axis[6].view_init(elev=init_elev, azim=init_azim)


axis[0].set_title("Original")
axis[3].set_title("Anomaly Detection")
axis[6].set_title("Cluster Analysis")


## Original
_x = np.arange(num_samples)
_y = np.log10(chData[0,0,:]).flatten()
X, Y = np.meshgrid(_x, _y, indexing='ij')
axis[0].plot_surface(X, Y, np.log10(np.abs(chData[:,1,:]+1j*chData[:,2,:])), cmap='viridis_r', alpha=0.8)


cmap = plt.colormaps.get_cmap('rainbow_r')
for i in range(num_samples):
    ch_eis = chData[i,:,:]
    _color = cmap(i/num_samples)
    axis[1].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    # axis[1].semilogx(ch_eis[0,:], np.angle(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")


## Anomaly Detection

_x = np.arange(num_samples)[leaf_optimal_seq]
_y = np.log10(chData[0,0,:]).flatten()
X, Y = np.meshgrid(_x, _y, indexing='ij')
axis[3].plot_surface(X, Y, np.log10(np.abs(chData[leaf_optimal_seq,1,:]+1j*chData[leaf_optimal_seq,2,:])), cmap='viridis_r', alpha=0.8)


cmap = plt.colormaps.get_cmap('rainbow_r')
for i in leaf_optimal_seq:
    ch_eis = chData[i,:,:]
    _color = cmap(i/num_samples)
    axis[4].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    # axis[4].semilogy(ch_eis[0,:], np.angle(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
axis[4].sharex(axis[1])
axis[4].sharey(axis[1])


for i in leaf_anomaly:
    ch_eis = chData[i,:,:]
    _color = cmap(i/num_samples)
    axis[5].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
axis[5].legend()
axis[5].sharex(axis[1])
axis[5].sharey(axis[1])


## Cluster Analysis

_x = np.arange(num_samples)[eis_seq]
_y = np.log10(chData[0,0,:]).flatten()
X, Y = np.meshgrid(_x, _y, indexing='ij')
axis[6].plot_surface(X, Y, np.log10(np.abs(chData[eis_seq,1,:]+1j*chData[eis_seq,2,:])), cmap='viridis_r', alpha=0.8)


cmap = plt.colormaps.get_cmap('Set1')
for i in range(len(eis_seq)):
    _x = eis_seq[i]
    ch_eis = chData[_x,:,:]
    _color = cmap(eis_cluster[i])
    axis[7].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"{chr(ord('A')+eis_cluster[i])}")

_legend_handle = []
for i in range(eis_cluster_n):
    _legend_handle.append(mpatches.Patch(color = cmap(i), label = f"{chr(ord('A')+i)}:{len(eis_cluster[eis_cluster==i])}"))
axis[7].legend(handles=_legend_handle)

axis[7].sharex(axis[1])
axis[7].sharey(axis[1])


cmap = plt.colormaps.get_cmap('rainbow_r')
for i in range(len(eis_anomaly)):
    _x = eis_anomaly[i]
    ch_eis = chData[_x,:,:]
    _color = cmap(_x/num_samples)
    axis[8].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{_x:02d}")
    # axis[8].semilogx(ch_eis[0,:], np.angle(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{_x:02d}")
axis[8].legend()
axis[8].sharex(axis[1])
axis[8].sharey(axis[1])








# Pipeline Test Bench

In [42]:
PIPELINE_FLAG = True
# if PIPELINE_FLAG:
if False:
    PLOT_FLAG = True


    # rootPath = "D:/Baihm/EISNN/Archive/01037160_归档"  
    # ch_id = 0   # Default  
    # ch_id = 1   # 居然分成一类了？？？
    # ch_id = 9   # 异常值没被筛出去，类似还有3
    # ch_id = 7  # Normal Example
    # ch_id = 20  # Normal to Short, Same to GPR  
    # ch_id = 52    
    # ch_id = 89  


    # rootPath = "D:/Baihm/EISNN/Archive/01037161_归档"   # #2
    # ch_id = 10  # Error - Ap结果不连续

    # rootPath = "D:/Baihm/EISNN/Dataset/05087163_归档"
    # ch_id = 7   # one outlier
    # ch_id = 50  # No outlier but in two Phases
    # ch_id = 55  # One outlier &wired end point
    # ch_id = 114 # Open Circuit with on outpler


    # rootPath = "D:\Baihm\EISNN\Archive/06047730_归档"
    # ch_id = 41  # outlier detection error but has 20 samples

    # rootPath = "D:/Baihm/EISNN/Archive/02067447_归档"
    # ch_id = 68  # Short all the time

    # rootPath = "D:/Baihm/EISNN/Archive/01067095_归档"
    # ch_id = 19    # First Sample is outlier

    rootPath = "D:/Baihm/EISNN/Archive/09290511_归档"
    ch_id = 13    # Up & Down, 2 outliers
    # ch_id = 21    # Normal + 2 outlier
    # ch_id = 41    # Normal + 2 outlier - *(Hard To Tell)
    # ch_id = 79    # 3-class, What a mess

    # rootPath = "D:/Baihm/EISNN/Archive/11057712_归档"
    # ch_id = 106    # Very Good Electrode with 1 hidden outlier, and one phase shift

    # rootPath = "D:\Baihm\EISNN\Archive/10057084_归档"
    # ch_id = 16    # Totaly Mess
    # ch_id = 18    # Totaly Mess

    # rootPath = "D:\Baihm\EISNN\Archive/11067223_归档"
    # ch_id = 124     # Perfect but two phase with one outlier


    # rootPath = "D:\Baihm\EISNN\Archive/15361101_归档"
    # ch_id = 0     # Only One Sample - Run With Error


    # rootPath = "D:\Baihm\EISNN\Archive/11207147_归档"
    # ch_id = 0     # Only Three Sample - Run With Error


    # rootPath = "D:\Baihm\EISNN\Archive/06017758_归档"
    # ch_id = 96     # Perfect of Perfect

    
    # rootPath = "D:\Baihm\EISNN\Archive/22037380_归档"
    # ch_id = 20     # Connection Error
    
    # rootPath = "D:\Baihm\EISNN\Archive/22027365_归档"
    # ch_id = 27     # Connection Error

    
    # rootPath = "D:\Baihm\EISNN\Archive/10067077_归档"
    # ch_id = 20     # Connection Error

    # rootPath = "D:\Baihm\EISNN\Archive/01067094_归档"
    # ch_id = 105     # Connection Error




    EISDict = gatherCSV(rootPath)
    chData_full = readChannel(ch_id, EISDict)
    freq_list = np.linspace(1000,np.shape(chData_full)[2]-1,101,dtype=int, endpoint=True)

    if False:
        phz_calibration = np.loadtxt("./phz_Calib.txt")
        for i in range(np.shape(chData)[0]):
            # ch_eis = EIS_recal(chData[i,:,:])
            ch_eis = EIS_recal_ver02(chData[i,:,:], phz_calibration)
            chData[i,:,:] = ch_eis
    chData = chData_full[:,:,freq_list]

In [None]:
if PIPELINE_FLAG:

    import OutlierDetection

    seq_weird = None
    # eis_seq, eis_cluster, eis_anomaly, leaf_anomaly = OutlierDetection.OutlierDetection(chData)
    eis_seq, eis_cluster, eis_anomaly, leaf_anomaly, seq_weird = OutlierDetection.OutlierDetection_Ver02(chData_full)
    num_samples = chData.shape[0]
    num_cluster = len(np.unique(eis_cluster))


    ## Plot

    # fig= plt.figure(figsize=(12,6), constrained_layout=True)
    fig= plt.figure(figsize=(15,8), constrained_layout=False)
    axis = [0] * 9
    axis[0] = fig.add_subplot(3,3,1, projection='3d')   # Original 3D
    axis[1] = fig.add_subplot(3,3,2)                    # Original 2D
    if seq_weird is not None:
        axis[2] = fig.add_subplot(3,3,3)                    # Original 2D


    axis[3] = fig.add_subplot(3,3,4, projection='3d')   # Linkage 3D
    axis[4] = fig.add_subplot(3,3,5)                    # Linkage Sequence
    axis[5] = fig.add_subplot(3,3,6)                    # Linkage Anomaly

    axis[6] = fig.add_subplot(3,3,7, projection='3d')   # AP 3D
    axis[7] = fig.add_subplot(3,3,8)                    # AP Sequence
    axis[8] = fig.add_subplot(3,3,9)                    # AP Anomaly


    init_elev = 21  # 仰角
    init_azim = 55  # 方位角
    axis[0].view_init(elev=init_elev, azim=init_azim)
    axis[3].view_init(elev=init_elev, azim=init_azim)
    axis[6].view_init(elev=init_elev, azim=init_azim)


    axis[0].set_title("Original")
    axis[3].set_title("Anomaly Detection")
    axis[6].set_title("Cluster Analysis")


    ## Original
    _x = np.arange(num_samples)
    _y = np.log10(chData[0,0,:]).flatten()
    X, Y = np.meshgrid(_x, _y, indexing='ij')
    axis[0].plot_surface(X, Y, np.log10(np.abs(chData[:,1,:]+1j*chData[:,2,:])), cmap='viridis_r', alpha=0.8)


    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(num_samples):
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[1].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")

    if seq_weird is not None:
        for i in seq_weird:
            ch_eis = chData[i,:,:]
            _color = cmap(i/num_samples)
            axis[2].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
        axis[2].legend()
        axis[2].sharex(axis[1])
        axis[2].sharey(axis[1])

    ## Anomaly Detection

    _x = np.arange(num_samples)[eis_seq]
    _y = np.log10(chData[0,0,:]).flatten()
    X, Y = np.meshgrid(_x, _y, indexing='ij')
    axis[3].plot_surface(X, Y, np.log10(np.abs(chData[eis_seq,1,:]+1j*chData[eis_seq,2,:])), cmap='viridis_r', alpha=0.8)


    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in eis_seq:
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[4].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    axis[4].sharex(axis[1])
    axis[4].sharey(axis[1])


    for i in leaf_anomaly:
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[5].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    axis[5].legend()
    axis[5].sharex(axis[1])
    axis[5].sharey(axis[1])


    ## Cluster Analysis

    _x = np.arange(num_samples)[eis_seq]
    _y = np.log10(chData[0,0,:]).flatten()
    X, Y = np.meshgrid(_x, _y, indexing='ij')
    axis[6].plot_surface(X, Y, np.log10(np.abs(chData[eis_seq,1,:]+1j*chData[eis_seq,2,:])), cmap='viridis_r', alpha=0.8)


    cmap = plt.colormaps.get_cmap('Set1')
    for i in range(len(eis_seq)):
        _x = eis_seq[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(eis_cluster[i])
        axis[7].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"{chr(ord('A')+eis_cluster[i])}")

    _legend_handle = []
    for i in range(num_cluster):
        _legend_handle.append(mpatches.Patch(color = cmap(i), label = f"{chr(ord('A')+i)}:{len(eis_cluster[eis_cluster==i])}"))
    axis[7].legend(handles=_legend_handle)

    axis[7].sharex(axis[1])
    axis[7].sharey(axis[1])


    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(len(eis_anomaly)):
        _x = eis_anomaly[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(_x/num_samples)
        axis[8].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{_x:02d}")
    axis[8].legend()
    axis[8].sharex(axis[1])
    axis[8].sharey(axis[1])





# Pipeline + Short & Open

In [60]:
import OutlierDetection
# eis_seq, eis_cluster, eis_anomaly, leaf_anomaly, weird_mask = OutlierDetection.OutlierDetection_Ver02(chData_full, mask_flag=False)
# open_mask, short_mask = OutlierDetection.OpenShortDetection(chData, mask_flag = False)
eis_seq, eis_cluster, eis_anomaly, leaf_anomaly, seq_weird = OutlierDetection.OutlierDetection_Ver02(chData_full, mask_flag=False)
seq_open, seq_short = OutlierDetection.OpenShortDetection(chData_full, mask_flag = False)

In [61]:
num_samples = chData.shape[0]
num_cluster = len(np.unique(eis_cluster))

# seq_full    = np.arange(num_samples)
# seq_weird   = seq_full[weird_mask]
# seq_open    = seq_full[open_mask]
# seq_short   = seq_full[short_mask]

In [None]:
def OutlierDetectionPlot(fig, chData, eis_seq, eis_cluster, eis_anomaly, leaf_anomaly, seq_weird, seq_open, seq_short):
    # fig= plt.figure(figsize=(15,8), constrained_layout=False)
    num_samples = chData.shape[0]
    num_cluster = len(np.unique(eis_cluster))

    seq_open_eis    = np.intersect1d(eis_seq, seq_open)
    seq_short_eis   = np.intersect1d(eis_seq, seq_short)


    axis    = [0] * 11
    axis[0] = fig.add_subplot(3,3,1, projection='3d')   # Original 3D
    axis[1] = fig.add_subplot(3,3,2)                    # Original 2D
    axis[2] = fig.add_subplot(3,3,3)                    # Original 2D
    axis[3] = fig.add_subplot(3,3,4, projection='3d')   # Linkage 3D
    axis[4] = fig.add_subplot(3,3,5)                    # Linkage Sequence
    axis[5] = fig.add_subplot(3,3,6)                    # Linkage Anomaly
    axis[7] = fig.add_subplot(3,3,8)                    # AP Sequence
    axis[8] = fig.add_subplot(3,3,9)                    # AP Anomaly
    
    
    axis[6] = fig.add_subplot(3,3,7)   # Text
    text_axis = axis[6]
    text_axis.axis('off')

    init_elev = 21  # 仰角
    init_azim = 55  # 方位角
    axis[0].view_init(elev=init_elev, azim=init_azim)
    axis[3].view_init(elev=init_elev, azim=init_azim)
    # axis[6].view_init(elev=init_elev, azim=init_azim)


    axis[0].set_title("Original")
    axis[3].set_title("Anomaly Detection")
    # axis[6].set_title("Cluster Analysis")

    
    axis[1].set_title("Original Data")
    axis[4].set_title("After Outlier Detection")
    axis[7].set_title("After Cluster")

    
    axis[2].set_title("Type I Outlier")
    axis[5].set_title("Type II Outlier")
    axis[8].set_title("Open-Short")


    ## Original
    _x = np.arange(num_samples)
    _y = np.log10(chData[0,0,:]).flatten()
    X, Y = np.meshgrid(_x, _y, indexing='ij')
    axis[0].plot_surface(X, Y, np.log10(np.abs(chData[:,1,:]+1j*chData[:,2,:])), cmap='viridis_r', alpha=0.8)


    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in range(num_samples):
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[1].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")

    for i in seq_weird:
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[2].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    axis[2].legend()
    axis[2].sharex(axis[1])
    axis[2].sharey(axis[1])

    ## Anomaly Detection

    _x = np.arange(num_samples)[eis_seq]
    _y = np.log10(chData[0,0,:]).flatten()
    X, Y = np.meshgrid(_x, _y, indexing='ij')
    axis[3].plot_surface(X, Y, np.log10(np.abs(chData[eis_seq,1,:]+1j*chData[eis_seq,2,:])), cmap='viridis_r', alpha=0.8)


    cmap = plt.colormaps.get_cmap('rainbow_r')
    for i in eis_seq:
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[4].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    axis[4].sharex(axis[1])
    axis[4].sharey(axis[1])


    for i in leaf_anomaly:
        ch_eis = chData[i,:,:]
        _color = cmap(i/num_samples)
        axis[5].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"S{i:02d}")
    axis[5].legend()
    axis[5].sharex(axis[1])
    axis[5].sharey(axis[1])


    ## Cluster Analysis

    # _x = np.arange(num_samples)[eis_seq]
    # _y = np.log10(chData[0,0,:]).flatten()
    # X, Y = np.meshgrid(_x, _y, indexing='ij')
    # axis[6].plot_surface(X, Y, np.log10(np.abs(chData[eis_seq,1,:]+1j*chData[eis_seq,2,:])), cmap='viridis_r', alpha=0.8)


    cmap = plt.colormaps.get_cmap('Set1')
    for i in range(len(eis_seq)):
        _x = eis_seq[i]
        ch_eis = chData[_x,:,:]
        _color = cmap(eis_cluster[i])
        axis[7].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, label=f"{chr(ord('A')+eis_cluster[i])}")

    _legend_handle = []
    for i in range(num_cluster):
        _legend_handle.append(mpatches.Patch(color = cmap(i), label = f"{chr(ord('A')+i)}:{len(eis_cluster[eis_cluster==i])}"))
    axis[7].legend(handles=_legend_handle)

    axis[7].sharex(axis[1])
    axis[7].sharey(axis[1])

    # Open Short

    cmap = plt.colormaps.get_cmap('managua')
    for i in range(len(eis_seq)):
        _x = eis_seq[i]
        if _x in seq_open_eis:     
            _color = cmap(0.0)
            alpha = 1
        elif _x in seq_short_eis:   
            _color = cmap(1.0)
            alpha = 1
        else:                       
            _color = cmap(0.5)
            alpha = 0.2
        ch_eis = chData[_x,:,:]
        axis[8].loglog(ch_eis[0,:], np.abs(ch_eis[1,:]+1j*ch_eis[2,:]), color = _color, linewidth=2, alpha = alpha)

    _legend_handle = []
    _legend_handle.append(mpatches.Patch(color = cmap(0.5), label = f"Norm:{len(eis_seq) - len(seq_open_eis) - len(seq_short_eis)}"))
    _legend_handle.append(mpatches.Patch(color = cmap(0.0), label = f"Open:{len(seq_open_eis)}"))
    _legend_handle.append(mpatches.Patch(color = cmap(1.0), label = f"Short:{len(seq_short_eis)}"))
    axis[8].legend(handles=_legend_handle)
    axis[8].sharex(axis[1])
    axis[8].sharey(axis[1])

    return text_axis
 

In [63]:
fig = plt.figure(figsize=(16, 9), constrained_layout=True)
axis = OutlierDetectionPlot(fig, chData, eis_seq, eis_cluster, eis_anomaly, leaf_anomaly, seq_weird, seq_open, seq_short)
    