# Import

In [4]:
%matplotlib qt
%gui qt

import re
import os
import sys

import numpy as np
from loguru import logger

import matplotlib.pyplot as plt 
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import seaborn


import pyqtgraph as pg
import pyqtgraph.opengl as gl

from collections import defaultdict
from datetime import datetime

import torch
import gpytorch
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [5]:
def gatherCSV(rootPath, outsuffix = 'Tracking'):
    '''==================================================
        Collect all EIS.csv files in the rootPath
        Parameter: 
            rootPath: current search path
            outsuffix: Saving path of EIS.csv files
        Returen:
            EISDict: a 2D-dict of EIS data
            Storage Frame: EISDict[_sessionIndex][_channelIndex] = "_filepath"
        ==================================================
    '''
    _filename       = None
    _filepath       = None
    _trackpath      = None
    _csvpath        = None
    _sessionIndex   = None
    _channelIndex   = None
    _processed      = None

    EISDict = defaultdict(dict)

    ## Iterate session
    session_pattern = re.compile(r"(.+?)_(\d{8})_01")
    bank_pattern    = re.compile(r"([1-4])")
    file_pattern    = re.compile(r"EIS_ch(\d{3})\.csv")

    ## RootDir
    for i in os.listdir(rootPath):
        match_session = session_pattern.match(i)
        ## SessionDir
        if match_session:
            logger.info(f"Session Begin: {i}")
            _sessionIndex = match_session[2]
            for j in os.listdir(f"{rootPath}/{i}"):
                match_bank = bank_pattern.match(j)
                ## BankDir
                if match_bank:
                    logger.info(f"Bank Begin: {j}")
                    _trackpath = f"{rootPath}/{i}/{j}/{outsuffix}"
                    if not os.path.exists(_trackpath):
                        continue

                    for k in os.listdir(f"{rootPath}/{i}/{j}/{outsuffix}"):
                        match_file = file_pattern.match(k)
                        ## File
                        if match_file:
                            _filename = k
                            _filepath = f"{rootPath}/{i}/{j}/{outsuffix}/{k}"
                            _channelIndex = (int(match_bank[1])-1)*32+int(match_file[1])
                            
                            EISDict[_sessionIndex][_channelIndex] = f"{rootPath}/{i}/{j}/{outsuffix}/{k}"
                            
    return EISDict

In [6]:
# Data Readout
def readChannel(chID, fileDict):
    '''==================================================
        Read EIS.csv file by Channel
        Parameter: 
            chID: channel index
            fileDict: EISDict[_sessionIndex][_channelIndex] = "_filepath"
        Returen:
            freq: frequency
            Zreal: real part of impedance
            Zimag: imaginary part of impedance
        ==================================================
    '''
    chData = []
    for ssID in fileDict.keys():
        _data   = np.loadtxt(fileDict[ssID][chID], delimiter=',')
        _freq   = _data[:,0]
        _Zreal  = _data[:,1] * np.cos(np.deg2rad(_data[:,2])) 
        _Zimag  = _data[:,1] * np.sin(np.deg2rad(_data[:,2])) 
        chData.append(np.stack((_freq, _Zreal, _Zimag),axis=0))

    return np.stack(chData, axis=0)

In [7]:
rootPath = "D:/Baihm/EISNN/Dataset/01037160_归档"
ch_id = 20  # Normal to Short, Same to GPR  
# ch_id = 89  # Same to GPR  
# ch_id = 7  # Normal Example

# rootPath = "D:/Baihm/EISNN/Dataset/05087163_归档"
# ch_id = 7   # one outlier
# ch_id = 50  # No outlier but in two Phases
# ch_id = 55  # One outlier &wired end point
# ch_id = 114 # Open Circuit with on outpler

# rootPath = "D:/Baihm/EISNN/Archive/02067447_归档"
# ch_id = 68  # Short all the time

# rootPath = "D:/Baihm/EISNN/Archive/01067095_归档"
# ch_id = 19    # First Sample is outlier

# rootPath = "D:/Baihm/EISNN/Archive/09290511_归档"
# ch_id = 13    # Up & Down, 2 outliers
# ch_id = 21    # Normal + 2 outlier
# ch_id = 41    # Normal + 2 outlier - *(Hard To Tell)
# ch_id = 79    # 3-class, What a mess

# rootPath = "D:/Baihm/EISNN/Archive/11057712_归档"
# ch_id = 106    # Very Good Electrode with 1 hidden outlier, and one phase shift

# rootPath = "D:\Baihm\EISNN\Archive/10057084_归档"
# ch_id = 16    # Totaly Mess
# ch_id = 18    # Totaly Mess

# rootPath = "D:\Baihm\EISNN\Archive/11067223_归档"
# ch_id = 124     # Perfect with one outlier


# freq_list = np.linspace(0,np.shape(chData)[2]-1,101,dtype=int)
freq_list = np.linspace(0,5000-1,101,dtype=int, endpoint=True)
EISDict = gatherCSV(rootPath)
chData = readChannel(ch_id, EISDict)[:,:,freq_list]

[32m2025-03-29 13:19:54.238[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m32[0m - [1mSession Begin: 01037160_20241124_01[0m
[32m2025-03-29 13:19:54.238[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 1[0m
[32m2025-03-29 13:19:54.239[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 2[0m
[32m2025-03-29 13:19:54.239[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 3[0m
[32m2025-03-29 13:19:54.240[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 4[0m
[32m2025-03-29 13:19:54.240[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m32[0m - [1mSession Begin: 01037160_20241125_01[0m
[32m2025-03-29 13:19:54.241[0m | [1mINFO    [0m | [36m__main__[0m:[36mgatherCSV[0m:[36m38[0m - [1mBank Begin: 1[0m
[32m2025-03-29 13:19:54.241[0m | [1mINFO    [0m | [36m__main__

# Data Cleaning

In [None]:
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
from  Outlier import OutlierDetection

CLEAN_FLAG = True
if CLEAN_FLAG:
    eis_seq, eis_cluster, eis_anomaly, leaf_anomaly = OutlierDetection.OutlierDetection(chData)
else: 
    eis_seq = np.arange(np.shape(chData)[0])

# EISGPModel 

In [9]:
# Single Point  Gaussian Process Regression
# 这个脚本中，我们把不同频率视为相互独立的变量进行考察
# 由于阻抗仍是一个复数，所以这里使用实部虚部两个task进行GP

class EISGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_tasks):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=num_tasks
        )
        self.covar_module = gpytorch.kernels.MultitaskKernel(
            gpytorch.kernels.RBFKernel(),num_tasks=num_tasks, rank=0
            # gpytorch.kernels.MaternKernel(nu=1.5), num_tasks=num_tasks, rank=50

        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)


def EISGPTrain(x_train, y_train, x_eval, device, training_iter = 50):
    num_tasks = y_train.shape[1]
    # Initialize likelihood and model
    likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=num_tasks).to(device)
    model = EISGPModel(x_train, y_train, likelihood, num_tasks=num_tasks).to(device)

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.5)  # Includes GaussianLikelihood parameters

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    # logger.info(f"Training for {training_iter} iterations...")
    loss_inst = []
    length_inst = []
    noise_inst = []
    for i in range(training_iter):
        optimizer.zero_grad()
        output = model(x_train)
        loss = -mll(output, y_train)
        loss.backward()
        optimizer.step()

        poi_noise = model.likelihood.noise.item()
        poi_length = model.covar_module.data_covar_module.lengthscale.item()
        loss_inst.append(loss.item())
        length_inst.append(poi_length)
        noise_inst.append(poi_noise)
        logger.info(f"Iter {i+1}/{training_iter} - Loss: {loss.item()} - Length: {poi_length} - Noise: {poi_noise}")
        
    # logger.info("Model Training Finished.")

    # Get into evaluation (predictive posterior) mode
    # Make predictions
    # logger.info("Model Evaluation Begin.")
    model.eval()
    likelihood.eval()
    with torch.no_grad(), gpytorch.settings.cholesky_jitter(1e-4):
        pred = likelihood(model(x_eval))
    # logger.info("Model Evaluation Finished.")

    return [pred, loss_inst, length_inst, noise_inst]


In [10]:
# Gaussian Process Regression
# Norm & Log Flag
NormFlag = True
LogFlag = False
EISFlag = False

# 根据EISDict的key确定日期范围，然后把日期范围映射到0~days
# Speed Rate = 10 means 1 day = 10 points
SPEED_RATE = 2
x_day = [datetime.strptime(date, '%Y%m%d') for date in EISDict.keys()]
x_day = [x_day[i] for i in eis_seq]

x_train = np.array([(poi - x_day[0]).days for poi in x_day])
x_eval = np.linspace(0,max(x_train),max(x_train)*SPEED_RATE+1)

y_train = np.hstack([chData[eis_seq,1,:],chData[eis_seq,2,:]])

n_freq = np.shape(freq_list)[0]

logger.info(f"\nx: {np.shape(x_train)} \ny: {np.shape(y_train)} \nx_pred{np.shape(x_eval)}")


[32m2025-03-29 13:19:58.403[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m20[0m - [1m
x: (12,) 
y: (12, 202) 
x_pred(33,)[0m


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x_train_tensor = torch.from_numpy(x_train).float().to(device)
x_eval_tensor = torch.from_numpy(x_eval).float().to(device)

# LogTransform - only log(-X), not -log(-X) for computable reverse calculation
if LogFlag:
    if EISFlag:
        _y_train = np.concatenate([np.log(np.abs(y_train[:,:n_freq]+1j*y_train[:,n_freq:])),-np.angle(y_train[:,:n_freq]+1j*y_train[:,n_freq:])], axis=1)
        y_train_log = _y_train
    else:
        y_pm_scale = np.sign(y_train)
        y_train_log = np.log(y_pm_scale * y_train)
else:
    y_train_log = y_train

# Normalization
if NormFlag:
    y_train_norm = np.zeros(np.shape(y_train))
    y_scaler = []
    for i in range(np.shape(y_train_log)[1]):
        scaler = StandardScaler()
        # scaler = MinMaxScaler()
        y_train_norm[:,i] = scaler.fit_transform(y_train_log[:,i].reshape(-1,1)).flatten()
        y_scaler.append(scaler)

# Without Normalization
else:
    y_train_norm = y_train_log

y_train_tensor = torch.from_numpy(y_train_norm).float().to(device)


## Run GPR

In [12]:
y_eval_tensor, loss_inst, length_inst, noise_inst = EISGPTrain(x_train_tensor, y_train_tensor, x_eval_tensor, device, training_iter=500)


[32m2025-03-29 13:19:59.368[0m | [1mINFO    [0m | [36m__main__[0m:[36mEISGPTrain[0m:[36m55[0m - [1mIter 1/500 - Loss: 1.5135364532470703 - Length: 0.9740769267082214 - Noise: 0.4741770327091217[0m
[32m2025-03-29 13:19:59.390[0m | [1mINFO    [0m | [36m__main__[0m:[36mEISGPTrain[0m:[36m55[0m - [1mIter 2/500 - Loss: 1.4528857469558716 - Length: 1.305506706237793 - Noise: 0.3153437674045563[0m
[32m2025-03-29 13:19:59.413[0m | [1mINFO    [0m | [36m__main__[0m:[36mEISGPTrain[0m:[36m55[0m - [1mIter 3/500 - Loss: 1.2874385118484497 - Length: 1.6886745691299438 - Noise: 0.2035830020904541[0m
[32m2025-03-29 13:19:59.437[0m | [1mINFO    [0m | [36m__main__[0m:[36mEISGPTrain[0m:[36m55[0m - [1mIter 4/500 - Loss: 1.182301640510559 - Length: 2.1121268272399902 - Noise: 0.12852269411087036[0m
[32m2025-03-29 13:19:59.460[0m | [1mINFO    [0m | [36m__main__[0m:[36mEISGPTrain[0m:[36m55[0m - [1mIter 5/500 - Loss: 1.0530176162719727 - Length: 2.56329

In [13]:
if True:
    fig = plt.figure()
    ax1 = fig.add_subplot(311)
    ax2 = fig.add_subplot(312)
    ax3 = fig.add_subplot(313)
    ax1.plot(loss_inst)
    ax1.set_title("Loss")
    ax2.plot(noise_inst)
    ax2.set_yscale('log')
    ax2.set_title("Noise")
    ax3.plot(length_inst)
    ax3.set_yscale('linear')
    ax3.set_title("LengthScale")
    fig.set_tight_layout(True)

In [17]:
# if torch.isnan(y_train_tensor).any():
#     logger.info(f"Freq: {i} - NaN Detected")
#     _poi_zero = np.zeros((n_RI))

y_eval_mean_norm = y_eval_tensor.mean.cpu().numpy()
y_eval_cov_norm = y_eval_tensor.covariance_matrix.cpu().detach().numpy()
y_eval_var_norm = y_eval_tensor.variance.cpu().numpy()


y_eval_mean = np.zeros(np.shape(y_eval_mean_norm))
y_eval_cov = np.zeros(np.shape(y_eval_cov_norm))
y_eval_var = np.zeros(np.shape(y_eval_var_norm))

# Denormalization
if NormFlag:
    for i in range(np.shape(y_train)[1]):
        y_eval_mean[:,i] = y_scaler[i].inverse_transform(y_eval_mean_norm[:,i].reshape(-1,1)).flatten()
    y_eval_scale = y_eval_mean.T.reshape(-1,1)
    y_eval_cov = (y_eval_scale@y_eval_scale.T) * y_eval_cov_norm
    y_eval_var = (y_eval_mean*y_eval_mean) * y_eval_var_norm


# Without Normalization
else:
    y_eval_mean = y_eval_mean_norm
    y_eval_cov = y_eval_cov_norm
    y_eval_var = y_eval_var_norm


if LogFlag:
    if EISFlag:
        # _y_eval_mean[:,:n_freq] = np.exp(y_eval_mean[:,:n_freq] + y_eval_var[:,:n_freq]/2)
        _y_eval_mean = np.concatenate((np.exp(y_eval_mean[:,:n_freq]),y_eval_mean[:,n_freq:]),axis=1)
        _y_eval_var = np.concatenate((np.exp(2*y_eval_mean[:,:n_freq]+y_eval_var[:,:n_freq])*(np.exp(y_eval_var[:,:n_freq])-1),+y_eval_var[:,:n_freq]),axis=1)
        
        # y_eval_mean     = _y_eval_mean
        # y_eval_var      = _y_eval_var
        
        _amp_mean = y_eval_mean[:,:n_freq]
        _phz_mean = -y_eval_mean[:,n_freq:]
        _meanR = _amp_mean*np.cos(_phz_mean)
        _meanI = _amp_mean*np.sin(_phz_mean)
    
    else:
        y_append_index = np.where([not (i in x_train) for i in x_eval])
        y_pm_append = y_pm_scale
        for i in y_append_index[0]:
            y_pm_append = np.insert(y_pm_append, i-1, y_pm_append[i-1,:], axis=0)

        # _y_eval_mean    = np.exp(y_eval_mean+y_eval_var/2) * y_pm_append
        _y_eval_mean    = np.exp(y_eval_mean) * y_pm_append
        _y_eval_var     = np.exp(2*y_eval_mean+y_eval_var)*(np.exp(y_eval_var)-1)
        _y_eval_scaler  = _y_eval_mean.T.reshape(-1,1)
        _y_eval_cov     = (_y_eval_scaler@_y_eval_scaler.T) * (np.exp(y_eval_cov)-1)

        y_eval_mean     = _y_eval_mean
        y_eval_var      = _y_eval_var
        y_eval_cov      = _y_eval_cov

        
        _meanR = y_eval_mean[:,:n_freq]
        _meanI = y_eval_mean[:,n_freq:]
        _amp_mean = np.abs(_meanR+1j*_meanI)
        _phz_mean = np.angle(_meanR+1j*_meanI)
        # y_train = np.log(y_train)
    

_poi_point = 50


fig = plt.figure(figsize=(8,8))
axis0 = fig.add_subplot(2,1,1)
axis1 = fig.add_subplot(2,1,2)

axis0.plot(x_eval, y_eval_mean[:,_poi_point], 'b.', label='Mean Prediction')
axis0.fill_between(x_eval, y_eval_mean[:,_poi_point] - 2*np.sqrt(y_eval_var[:,_poi_point]), y_eval_mean[:,_poi_point] + 2*np.sqrt(y_eval_var[:,_poi_point]), 
                 alpha=0.3, color='blue', label='95% CI')
axis0.plot(x_train, y_train[:,_poi_point], 'r.', label='Mean Train')


axis1.plot(x_eval, y_eval_mean[:,_poi_point], 'b.', label='Mean Prediction')
axis1.plot(x_train, y_train[:,_poi_point], 'r.', label='Mean Train')

# plt.plot(x_train, np.log(y_train[:,_poi_point]), 'r.', label='Mean Train')

axis0.set_xlabel('x')
axis0.set_ylabel('y')
axis0.set_title("Single-Point GPR: Mean and Variance")
axis0.legend()




<matplotlib.legend.Legend at 0x1cf654a7b50>

In [15]:
if False:
    fig, axis = plt.subplots(1,1,figsize=(12,6))
    cax = axis.imshow(np.log10(y_eval_cov), interpolation='none', cmap='coolwarm')
    fig.colorbar(cax,ax=axis)

    _poi = np.shape(y_eval_mean)[0]
    for i in range(1, _poi*2):  # 遍历 1~16 (不包含边界)
        axis.axhline(i*n_freq - 0.5, color='black', linewidth=0.5)  # 横线
        axis.axvline(i*n_freq - 0.5, color='black', linewidth=0.5)  # 竖线

    # for i in range(1, n_freq*2):  # 遍历 1~16 (不包含边界)
    #     axis.axhline(i*_poi, color='black', linewidth=0.5)  # 横线
    #     axis.axvline(i*_poi, color='black', linewidth=0.5)  # 竖线




In [None]:
# _amp_mean = np.abs(y_train[:,:n_freq]+1j*y_train[:,n_freq:])
# _phz_mean = np.angle(y_train[:,:n_freq]+1j*y_train[:,n_freq:])
amp = _amp_mean
phz = _phz_mean

# logger.info(f"amp: {np.shape(amp)}, phz: {np.shape(phz)}")


fig = plt.figure(figsize=(12, 6))

ax1 = fig.add_subplot(121, projection='3d')
ax2 = fig.add_subplot(122, projection='3d')
# ax3 = fig.add_subplot(223, projection='3d')
# ax4 = fig.add_subplot(224, projection='3d')
init_elev = 21  # 仰角
init_azim = 55  # 方位角
ax1.view_init(elev=init_elev, azim=init_azim)
ax2.view_init(elev=init_elev, azim=init_azim)


x = np.array(x_eval).flatten()
y = np.log10(chData[0,0,:]).flatten()
X, Y = np.meshgrid(x, y, indexing='ij')
ax1.plot_surface(X, Y, np.log10(amp), cmap='viridis_r', alpha=0.8)
ax2.plot_surface(X, Y, np.rad2deg(phz), cmap='viridis', alpha=0.8)


amp_train = np.abs(y_train[:,:n_freq]+1j*y_train[:,n_freq:])
phz_train = np.angle(y_train[:,:n_freq]+1j*y_train[:,n_freq:])
x = np.array(x_train).flatten()
y = np.log10(chData[0,0,:]).flatten()
X, Y = np.meshgrid(x, y, indexing='ij')
ax1.plot_surface(X, Y, np.log10(amp_train), cmap='inferno_r', alpha=0.8)
ax2.plot_surface(X, Y, np.rad2deg(phz_train), cmap='inferno', alpha=0.8)


# amp_varSpace = plotMeanVar(np.log10(amp[0]+2*amp[1]), np.log10(amp[0]-2*amp[1]), X, Y)
# phz_varSpace = plotMeanVar(phz[0]+2*phz[1],phz[0]-2*phz[1], X, Y)

# ax1.add_collection3d(amp_varSpace)
# ax2.add_collection3d(phz_varSpace)

ax1.set_zlim([2,8.5])
ax2.set_zlim([-120,30])



NameError: name 'amp' is not defined