### 人类活动识别——条件扩散

In [None]:
import os

parent_dir="./output/HAR_CD"
task_name="base_MSE_Exp"

task_dir=os.path.join(parent_dir, task_name)
weights_dir=os.path.join(task_dir, "weights")
samples_dir=os.path.join(task_dir, "samples")
loss_json_path=os.path.join(task_dir, f"{task_name}_loss.json")
fid_json_path=os.path.join(task_dir, f"{task_name}_fid.json")

data_dir="./data/RFID_multi_628"
classes_path=os.path.join(data_dir, "data.yml")
train_dir=os.path.join(data_dir, "dataset/train")
eval_dir=os.path.join(data_dir, "dataset/eval")

os.makedirs(weights_dir, exist_ok=True)
os.makedirs(samples_dir, exist_ok=True)

total_batch=4
epoch_interval=50
current_epoch=0

In [None]:
# 加载类别信息

from utils.ConfigUtils import get_classes

classes=list(get_classes(classes_path).values())
num_classes = len(classes)

print(classes)

In [None]:
# 数据预处理
from torchvision import transforms
from model.Normalization import RobustNorm

transform=transforms.Compose([
    RobustNorm(-68.0, 68.0),
])

train_transform=transforms.Compose([
    RobustNorm(-68.0, 68.0),
    transforms.Resize(size=(299,299)),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
])

eval_transform=transforms.Compose([
    RobustNorm(-1.0, 1.0),
    transforms.Resize(size=(299,299)),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
])

In [None]:
# 加载数据集
from model.RFID_Dataset import RFID_Dataset,build_class_datasets

train_dataset = RFID_Dataset(
    train_dir,
    T=32,
    step=1,
    num_channels=3,
    transform=transform,
)

eval_dataset = RFID_Dataset(
    eval_dir,
    T=32,
    step=1,
    num_channels=3,
    transform=transform,
)

print(f"训练集的数据个数: {len(train_dataset)}")
print(f"验证集的数据个数: {len(eval_dataset)}")

In [None]:
# 全量训练集
all_train_dataset = RFID_Dataset(
    train_dir,
    T=32,
    step=1,
    num_channels=3,
    transform=train_transform,
)

# 分类别训练集
train_datasets_dict=build_class_datasets(
    train_dir,
    T=32,
    step=1,
    num_channels=3,
    transforms=train_transform,
)
train_datasets_dict[-1]=all_train_dataset

for label,dataset in train_datasets_dict.items():
    print(f"label {label} 数据个数为: {len(dataset)}")

In [None]:
# 模型组网

from model.base.UNet import UNet
# from model.v1.UNet import UNet
# from model.v2.UNet import UNet
# from model.v3.UNet import UNet
# from model.v4.UNet import UNet

from model.BetaScheduler import LinearBetaScheduler
from model.CD_Model import CD_Model
from model.ModelWorker.CDModelWorker import CDModelWorker
import torch
from torchkeras import summary

input_shape = (3, 32, 12)

model = CD_Model(
    UNet(
        input_shape=input_shape,
        init_features=64,
        embed_dim=128,
        num_heads=1,
        num_groups=16,
    ),
    LinearBetaScheduler(timesteps=1000,beta_end=0.02),
    num_classes=6,
    embed_dim=128,
    enable_guidance=True,
)

model_worker = CDModelWorker(model)

print(f"{input_shape=}")

time = torch.tensor([0], dtype=torch.long)
condition = torch.tensor([0], dtype=torch.long)
model_info = summary(model, input_shape=input_shape, time=time, condition=condition)

In [None]:
# 模型准备
from torch.utils.data import DataLoader
from torch import nn
from torch import optim
from model.Loss import *

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True,num_workers=4)
eval_loader = DataLoader(eval_dataset, batch_size=256,num_workers=4)

# loss = nn.MSELoss()
# loss = MinSNRLoss()
# loss=SigmoidLoss()
loss=InverseSigmoidLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.0001)
scheduler=optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
# scheduler=optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

In [None]:
# 加载模型

load_epoch = 200
print(f"{load_epoch=}")
model_worker.load(
    f"{weights_dir}/HAR_CD_{load_epoch}.pth"
)

In [None]:
# 流水线训练

from utils.JsonUtils import *
from model.RFID_Dataset import save_samples
from utils.SimilarityUtils import *

for batch in range(total_batch):
    print("="*30)
    current_epoch+=epoch_interval
    print(f"Next target epoch:{current_epoch}")

    # 模型训练
    model_worker.train(
        criterion=loss,
        optimizer=optimizer,
        train_loader=train_loader,
        # eval_loader=eval_loader,
        epochs=epoch_interval, 
        scheduler=scheduler,
        cond_dropout_rate=0.1,
        step_range=None,
        enable_board=False,
        verbose=1
    )

    # 保存模型
    weight_path=os.path.join(weights_dir, f"HAR_CD_{current_epoch}.pth")
    model_worker.save(weight_path)

    # 分时间步评估
    sequence=torch.linspace(0, 1000,10+1,dtype=torch.long).tolist()
    loss_group=model_worker.evaluate_sequence(
        eval_loader=train_loader,
        criterion=nn.MSELoss(),
        time=sequence,
        verbose=1
    )
    loss_info={
        "epoch":current_epoch,
        "loss":loss_group
    }
    append_data(loss_info, loss_json_path)

    # 样本保存目录
    samples_subdir=os.path.join(samples_dir, f"epoch_{current_epoch}")

    # DDPM采样
    for batch in range(1):
        print(f"{batch=}")
        for i in range(num_classes):
            # 生成数据
            condition = i
            datas = model_worker.generate_sample_batch(
                100,
                condition,
                guidance_scale=2,
            )


            # 保存数据
            save_samples(
                datas, 
                output_dir=f"{samples_subdir}/{condition}",
                merge=True,
            )
        print("-" * 20)

    # 全量测试集
    all_test_dataset=RFID_Dataset(
        samples_subdir,
        T=32,
        step=32,
        num_channels=3,
        transform=eval_transform,
    )

    # 分类别测试集
    test_datasets_dict=build_class_datasets(
        samples_subdir,
        T=32,
        step=32,
        num_channels=3,
        transforms=eval_transform,
    )
    test_datasets_dict[-1]=all_test_dataset

    ## 计算FID
    fid_group=execute_fid_pipeline(
        train_datasets_dict,
        test_datasets_dict,
        classes=classes
    )
    for label,fid in fid_group.items():
        print(f"label {label} : {fid}")
    fid_info={
        "epoch":current_epoch,
        "fid":fid_group
    }
    append_data(fid_info, fid_json_path)

In [None]:
# 流水线评估

from utils.JsonUtils import *
from utils.SimilarityUtils import *

weights_dict={}
for weight_file in os.listdir(weights_dir):
    if not weight_file.endswith('.pth'):
        continue
    current_epoch = int(weight_file.split('.')[0].split('_')[-1])
    weights_dict[current_epoch]=os.path.join(weights_dir, weight_file)
weights_dict=dict(sorted(weights_dict.items(), key=lambda item: item[0]))


for current_epoch, weight_path in weights_dict.items():
    print("="*30)
    model_worker.load(weight_path)
    print(f"weight: {weight_path}")

    # 分时间步评估
    sequence=torch.linspace(0, 1000,10+1,dtype=torch.long).tolist()
    loss_group=model_worker.evaluate_sequence(
        eval_loader=train_loader,
        criterion=nn.MSELoss(),
        time=sequence,
        verbose=1
    )
    loss_info={
        "epoch":current_epoch,
        "loss":loss_group
    }
    append_data(loss_info, loss_json_path)

    # 样本保存目录
    samples_subdir=os.path.join(samples_dir, f"epoch_{current_epoch}")
    
    # 全量测试集
    all_test_dataset=RFID_Dataset(
        samples_subdir,
        T=32,
        step=32,
        num_channels=3,
        transform=eval_transform,
    )

    # 分类别测试集
    test_datasets_dict=build_class_datasets(
        samples_subdir,
        T=32,
        step=32,
        num_channels=3,
        transforms=eval_transform,
    )
    test_datasets_dict[-1]=all_test_dataset

    ## 计算FID
    fid_group=execute_fid_pipeline(
        train_datasets_dict,
        test_datasets_dict,
        classes=classes,
    )
    for label,fid in fid_group.items():
        print(f"label {label} : {fid}")
    fid_info={
        "epoch":current_epoch,
        "fid":fid_group
    }
    append_data(fid_info, fid_json_path)

In [None]:
# 绘制loss曲线

from utils.JsonUtils import load_data
from utils.DataUtils.Visualization import plot_curves

data=load_data(loss_json_path)
loss_data = {}
for item in data:
    loss_data[item["epoch"]] = item["loss"] 

plot_curves(
    loss_data,
    f"The loss of {task_name}",
)

In [None]:
# 绘制FID指标

from utils.JsonUtils import load_data
from utils.DataUtils.Visualization import plot_curves

data=load_data(fid_json_path)
loss_data = {}
for item in data:
    loss_data[item["epoch"]] = item["fid"]

plot_curves(
    loss_data,
    f"FID of {task_name}",
    show_points=True,
)


In [None]:
# DDPM采样
from model.RFID_Dataset import save_samples

# 样本保存目录
samples_subdir=os.path.join(samples_dir, f"epoch_{current_epoch}_extra")
for batch in range(1):
    print(f"{batch=}")
    for i in range(num_classes):
        # 生成数据
        condition = i
        datas = model_worker.generate_sample_batch(
            200,
            condition,
            guidance_scale=2,
        )

        # 保存数据
        save_samples(
            datas, 
            output_dir=f"{samples_subdir}/{condition}",
            merge=True,
        )
    print("=" * 20)

In [None]:
# 计算FID

from utils.SimilarityUtils import *

# 全量测试集
all_test_dataset=RFID_Dataset(
    samples_subdir,
    T=32,
    step=32,
    num_channels=3,
    transform=eval_transform,
)

# 分类别测试集
test_datasets_dict=build_class_datasets(
    samples_subdir,
    T=32,
    step=32,
    num_channels=3,
    transforms=eval_transform,
)
test_datasets_dict[-1]=all_test_dataset

## 计算FID
fid_group=execute_fid_pipeline(
    train_datasets_dict,
    test_datasets_dict,
    classes=classes,
)
for label,fid in fid_group.items():
    print(f"label {label} : {fid}")
fid_info={
    "epoch":current_epoch,
    "fid":fid_group
}