# 01 - 特征提取与可视化（q_norm + baseline10 + delta）

本 notebook 用于：
1) 可视化电池的原始充电曲线（电压/电流随时间）
2) 可视化 **delta 序列**（`v_delta/i_delta` 随 `q_norm`）
3) 可视化数据集中：**每个电池的循环数与健康度（SOH）** 的关系

当前主线输入定义：
- 横轴：`q_norm ∈ [0,1]`（由 `q(t)=∫|I|dt` 得到）
- baseline：每块电池前10个有效循环的均值序列（只对 v/i）
- delta：`v_delta = v_norm - baseline_v`，`i_delta = i_norm - baseline_i`；`q_norm` 不扣除
- 三通道序列/ViT输入：`[v_delta, i_delta, q_norm]`


In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
matplotlib.rcParams['axes.unicode_minus'] = False

from src.data import load_processed_batteries
from src.data.feature import _compute_baseline_sequence, _cycle_sequence_qnorm
from src.data import (
    FEATURE_NAMES,
    extract_features_from_cycle,
    extract_all_features_from_battery,
    create_sequence,
    create_heatmap,
)

DATASET_NAME = 'MATR'
batteries = load_processed_batteries(f'../data/processed/{DATASET_NAME}')
print('加载电池数:', len(batteries))
battery = batteries[0]
print('示例电池:', battery.cell_id, 'cycles=', len(battery), 'dataset=', battery.dataset)


## A) 原始充电曲线可视化（电压/电流随时间）
从同一电池挑选若干个循环，画出 `V(t)` 与 `I(t)`。
这一步用于观察协议变化/异常循环（仅用于数据理解，不代表模型输入横轴）。

In [None]:
# 选取若干循环进行原始曲线对比
idxs = [0, len(battery)//4, len(battery)//2, 3*len(battery)//4, len(battery)-1]
idxs = sorted(set([i for i in idxs if 0 <= i < len(battery)]))

fig, axes = plt.subplots(2, 1, figsize=(12, 7), sharex=False)

for ci in idxs:
    c = battery.cycles[ci]
    axes[0].plot(c.time, c.voltage, linewidth=1, label=f'cycle_idx={ci}, cycle_no={c.cycle_number}')
axes[0].set_title('原始充电电压曲线 V(t)')
axes[0].set_xlabel('time (s)')
axes[0].set_ylabel('voltage (V)')
axes[0].grid(True, alpha=0.3)
axes[0].legend(fontsize=8)

for ci in idxs:
    c = battery.cycles[ci]
    axes[1].plot(c.time, c.current, linewidth=1, label=f'cycle_idx={ci}, cycle_no={c.cycle_number}')
axes[1].set_title('原始充电电流曲线 I(t)')
axes[1].set_xlabel('time (s)')
axes[1].set_ylabel('current (A)')
axes[1].grid(True, alpha=0.3)
axes[1].legend(fontsize=8)

plt.tight_layout()
plt.show()


## B) delta 序列可视化（v_delta/i_delta 随 q_norm）
对同一电池选择多个循环，分别画 `v_delta(q_norm)` 和 `i_delta(q_norm)`，以观察退化导致的形状变化。

In [None]:
num_samples = 200
baseline = _compute_baseline_sequence(battery, num_samples=num_samples)

# 选取多个 cycle_idx 对比 delta 曲线
pick = [10, len(battery)//4, len(battery)//2, 3*len(battery)//4]
pick = [i for i in pick if 0 <= i < len(battery)]
pick = sorted(set(pick))

fig, axes = plt.subplots(2, 1, figsize=(12, 7), sharex=True)

for ci in pick:
    c = battery.cycles[ci]
    delta_seq = create_sequence(c, num_samples=num_samples, battery=battery, baseline=baseline)
    v_delta = delta_seq[:, 0]
    i_delta = delta_seq[:, 1]
    q_norm  = delta_seq[:, 2]
    axes[0].plot(q_norm, v_delta, linewidth=1, label=f'cycle_idx={ci}')
    axes[1].plot(q_norm, i_delta, linewidth=1, label=f'cycle_idx={ci}')

axes[0].set_title('v_delta(q_norm)')
axes[0].set_ylabel('v_delta')
axes[0].grid(True, alpha=0.3)
axes[0].legend(fontsize=8)

axes[1].set_title('i_delta(q_norm)')
axes[1].set_xlabel('q_norm')
axes[1].set_ylabel('i_delta')
axes[1].grid(True, alpha=0.3)
axes[1].legend(fontsize=8)

plt.tight_layout()
plt.show()


## C) heatmap 可视化（image 输入）
heatmap 的三通道语义为：`[v_delta, i_delta, q_norm]`。

In [None]:
window_size = 80
heatmap = create_heatmap(battery, window_size=window_size, num_samples=num_samples, start_cycle_idx=0, end_cycle_idx=min(len(battery), 200))
print('heatmap shape:', heatmap.shape)

titles = ['v_delta', 'i_delta', 'q_norm']
fig, axes = plt.subplots(1, 3, figsize=(16, 4))
for ax, i, title in zip(axes, range(3), titles):
    im = ax.imshow(heatmap[:, :, i], aspect='auto', cmap='viridis')
    ax.set_title(title)
    ax.set_xlabel('q_norm sample index')
    ax.set_ylabel('history (resampled cycles)')
    plt.colorbar(im, ax=ax)
plt.tight_layout()
plt.show()


## D) 数据集层面：每个电池的 SOH 与循环数关系
我们统计每个电池：
- 循环总数 `total_cycles`
- 最小 SOH（退化到的最低健康度）
- 末端 SOH（最后一个有效 SOH）
并可视化它们与循环数的关系。

In [None]:
total_cycles = []
min_soh = []
end_soh = []
cell_ids = []

for b in batteries:
    soh_arr = b.get_soh_array()
    if soh_arr is None or len(soh_arr) == 0:
        continue
    total_cycles.append(len(b))
    min_soh.append(float(np.min(soh_arr)))
    end_soh.append(float(soh_arr[-1]))
    cell_ids.append(b.cell_id)

total_cycles = np.array(total_cycles)
min_soh = np.array(min_soh)
end_soh = np.array(end_soh)

print('统计到的电池数:', len(total_cycles))

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].scatter(total_cycles, min_soh, s=12, alpha=0.6)
axes[0].set_title('total_cycles vs min_soh')
axes[0].set_xlabel('total_cycles')
axes[0].set_ylabel('min_soh')
axes[0].grid(True, alpha=0.3)

axes[1].scatter(total_cycles, end_soh, s=12, alpha=0.6, color='tab:orange')
axes[1].set_title('total_cycles vs end_soh')
axes[1].set_xlabel('total_cycles')
axes[1].set_ylabel('end_soh')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## E) 电池SOH 随循环变化
用于直观检查数据集中电池的 SOH 衰减轨迹。

In [None]:
n_plot = 20
step = max(1, len(batteries) // n_plot)
idxs = list(range(0, len(batteries), step))[:n_plot]

plt.figure(figsize=(12, 5))
for bi in idxs:
    b = batteries[bi]
    soh = b.get_soh_array()
    if soh is None or len(soh) == 0:
        continue
    plt.plot(soh, linewidth=1, alpha=0.6)

plt.axhline(0.8, linestyle='--', color='r', linewidth=1, label='0.8')
plt.title(f'SOH degradation trajectories ({DATASET_NAME}) - {len(idxs)} batteries')
plt.xlabel('cycle index')
plt.ylabel('SOH')
plt.ylim(0, 1.05)
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()
