In [1]:
import torch
import pandas as pd
import os
from pathlib import Path
from timm.models import create_model
import numpy as np
# from csvdata import CSVMAE
import utils
import modeling_pretrain


In [2]:
# 參數設定（手動設定，不需要 argparse）
csv_folder = "/mnt/d/mae/bigclassroom/foundation_dataset/456"   # 輸入 CSV 檔案
save_path = "output_csv"  # 結果輸出的資料夾
model_path = "ckpt_output_dir/checkpoint-1999.pth"  # 模型檔案
device = "cuda" if torch.cuda.is_available() else "cpu"  # 自動選擇 GPU 或 CPU
model_name = "pretrain_videomae_base_patch1_4"  # 你的 VideoMAE 模型
drop_path = 0.0  # Drop Path Rate   
mask_ratio = 0.75  # 設定 Masking 比例 (例如 75%)
num_frames = 16
decoder_depth = 8
patch_size = 1
window_size = (num_frames // 2, 4, 4)  # (T, H, W)
# 確保輸出資料夾存在
Path(save_path).mkdir(parents=True, exist_ok=True)
csv_files = sorted([f for f in os.listdir(csv_folder) if f.endswith(".csv")])


In [3]:
import os
import torch
import numpy as np
from torch.utils.data import Dataset
from masking_generator import TubeMaskingGenerator
import re
from collections import defaultdict

class CSVMAETEST(Dataset):
    def __init__(self, root_dir, num_frames=16, mask_ratio=0.75, window_size=None):
        self.root_dir = root_dir
        self.num_frames = num_frames
        self.window_size = window_size
        self.mask_generator = TubeMaskingGenerator(window_size, mask_ratio=mask_ratio)

        # 遞迴地獲取所有 CSV 檔案
        self.file_paths = []
        for subdir, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith('.csv'):
                    self.file_paths.append(os.path.join(subdir, file))

        # 自訂排序函數: 先按類別名稱排序，再按數字時間戳排序
        def extract_sort_key(path):
            filename = os.path.basename(path)
            match = re.match(r"(.+?)_(\d+)\.csv", filename)  # 擷取類別名稱 & 數字
            if match:
                category = match.group(1)  # 取得類別名稱 (如 1119-1in3out-mv-mc-mid)
                time_value = int(match.group(2))  # 取得數字時間戳
                return (category, time_value)
            return (filename, 0)  # 若無法匹配則放最後

        # 排序後按類別分組
        self.file_paths.sort(key=extract_sort_key)
        self.groups = defaultdict(list)  # 每個類別對應的 CSV 檔案
        for path in self.file_paths:
            category = extract_sort_key(path)[0]  # 取得類別名稱
            self.groups[category].append(path)

        # 預先切割成 num_frames 時間段，確保分組不重複
        self.sequence_groups = []
        for category, files in self.groups.items():
            if len(files) >= num_frames:
                for i in range(0, len(files) - num_frames + 1, num_frames):  # 修改這裡，使每次分組間隔num_frames
                    self.sequence_groups.append(files[i: i + num_frames])

        # 檢查是否有可用數據
        if not self.sequence_groups:
            raise ValueError(f"CSV 檔案數量不足 {num_frames}，請檢查 `{root_dir}` 資料夾！")

        # 顯示前 5 組數據確認順序
        print("= 加載的 CSV 時間序列組: (顯示前 5 組) =")
        for i, seq in enumerate(self.sequence_groups[:5]):
            print(f"組 {i+1}: {[os.path.basename(f) for f in seq]}")

    def __len__(self):
        return len(self.sequence_groups)

    def __getitem__(self, idx):
        file_list = self.sequence_groups[idx]  # 取得對應的一組 num_frames CSV 檔案
        frames = []
        for csv_path in file_list:
            data = np.genfromtxt(csv_path, delimiter=',')
            frames.append(data)

        frames = np.stack(frames)  # (T, H, W)

        # 增加 Channel 維度 -> (C=1, T, H, W)
        frames = np.expand_dims(frames, axis=0)
        frames = torch.tensor(frames, dtype=torch.float32)

        # 生成 mask
        mask = self.mask_generator()

        return (frames, mask)


In [4]:
dataset = CSVMAETEST(root_dir=csv_folder, num_frames=num_frames, mask_ratio=mask_ratio, window_size=window_size)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)

print(f"Loading model: {model_name}")

model = create_model(
        model_name,
        pretrained=False,
        drop_path_rate=0,
        drop_block_rate=None,
        decoder_depth=decoder_depth,
        use_checkpoint=model_path
    )
checkpoint = torch.load(model_path, map_location="cpu")
model.load_state_dict(checkpoint["model"])
model.to(device)
model.eval()

= 加載的 CSV 時間序列組: (顯示前 5 組) =
組 1: ['1119-1in3out-mv-mc-mid_5.csv', '1119-1in3out-mv-mc-mid_10.csv', '1119-1in3out-mv-mc-mid_15.csv', '1119-1in3out-mv-mc-mid_20.csv', '1119-1in3out-mv-mc-mid_25.csv', '1119-1in3out-mv-mc-mid_30.csv', '1119-1in3out-mv-mc-mid_35.csv', '1119-1in3out-mv-mc-mid_40.csv', '1119-1in3out-mv-mc-mid_45.csv', '1119-1in3out-mv-mc-mid_50.csv', '1119-1in3out-mv-mc-mid_55.csv', '1119-1in3out-mv-mc-mid_60.csv', '1119-1in3out-mv-mc-mid_65.csv', '1119-1in3out-mv-mc-mid_70.csv', '1119-1in3out-mv-mc-mid_75.csv', '1119-1in3out-mv-mc-mid_80.csv']
組 2: ['1119-1in3out-mv-mc-mid_85.csv', '1119-1in3out-mv-mc-mid_90.csv', '1119-1in3out-mv-mc-mid_95.csv', '1119-1in3out-mv-mc-mid_100.csv', '1119-1in3out-mv-mc-mid_105.csv', '1119-1in3out-mv-mc-mid_110.csv', '1119-1in3out-mv-mc-mid_115.csv', '1119-1in3out-mv-mc-mid_120.csv', '1119-1in3out-mv-mc-mid_125.csv', '1119-1in3out-mv-mc-mid_130.csv', '1119-1in3out-mv-mc-mid_135.csv', '1119-1in3out-mv-mc-mid_140.csv', '1119-1in3out-mv-mc-mid_145

  checkpoint = torch.load(model_path, map_location="cpu")


PretrainVisionTransformer(
  (encoder): PretrainVisionTransformerEncoder(
    (patch_embed): PatchEmbed(
      (proj): Conv3d(1, 768, kernel_size=(2, 1, 1), stride=(2, 1, 1))
    )
    (blocks): ModuleList(
      (0-11): 12 x Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=False)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (norm): LayerNorm((768,), e

In [5]:
def inverse_normalize(log_normalized_values):
    # 將 0~1 的正規化數據反轉回 log 範圍，然後再轉回線性空間的原始範圍
    original_min = 300
    original_max = 1600
    log_values = log_normalized_values * (np.log(original_max) - np.log(original_min)) + np.log(original_min)
    original_values = np.exp(log_values)
    return original_values

In [12]:
from einops import rearrange
import torch.nn as nn
import numpy as np
import pandas as pd

loss_func = nn.MSELoss()

# 逐個 video 處理並儲存
for idx, (videos, bool_masked_pos) in enumerate(dataloader):
    videos = videos.to(device, non_blocking=True)
    bool_masked_pos = bool_masked_pos.to(device, non_blocking=True).flatten(1).to(torch.bool)

    # 進行推理
    with torch.no_grad():
        # Rearrange and process the frames
        videos_patch = rearrange(videos, 'b c (t p0) (h p1) (w p2) -> b (t h w) (p0 p1 p2 c)', p0=2, p1=patch_size, p2=patch_size)
        B, _, C = videos_patch.shape

        labels = videos_patch[bool_masked_pos].reshape(B, -1, C)
        outputs = model(videos, bool_masked_pos)
        loss = loss_func(input=outputs, target=labels)
        print(f"Loss for video {idx}: {loss.item()}")

    # 轉換為 NumPy 格式
    outputs_np = outputs.cpu().detach().numpy().reshape(-1, 2)  # (N_mask, C)
    mask_np = bool_masked_pos.cpu().detach().numpy().reshape(-1).astype(int)  # (N,)

    # ✅ **即時儲存結果**
    output_list = [outputs_np]
    mask_list = [mask_np]

    # 儲存為 CSV (攤平成一維)
    df_outputs = pd.DataFrame(np.concatenate(output_list, axis=0))  # (N, C)
    df_masks = pd.DataFrame(np.concatenate(mask_list, axis=0))  # (N,)

    print("🔍 原始 mask 形狀:", df_masks.shape)
    print("🔍 原始 output 形狀:", df_outputs.shape)

    # **還原完整的感測器數據**
    sensor_data = videos.cpu().detach().numpy().squeeze(0)  # (C=1, T, H, W) → (T, H, W)
    sensor_data = sensor_data.reshape(num_frames, -1)  # 轉換為 (T, sensors)

    # **mask=1 的地方用 `df_outputs` 預測值替換**
    predicted_values = df_outputs.to_numpy().reshape(num_frames, -1)  # (T, 預測的 sensor 值)
    mask_reshaped = df_masks.to_numpy().reshape(-1, num_frames)  # (H × W, T) → (T, H × W)
    mask_reshaped = np.tile(mask_reshaped, (2, 1))

    print(predicted_values.shape)
    print(sensor_data.shape)
    print(mask_reshaped.shape)

    # 創建與 sensor_data 相同形狀的空矩陣 (16, 16)
    predicted_values_padded = np.zeros_like(sensor_data)

    # 只填充 mask=1 的部分
    predicted_values_padded[mask_reshaped == 1] = predicted_values.flatten()

    sensor_data[mask_reshaped == 1] = predicted_values_padded[mask_reshaped == 1]  # 只替換 mask=1 的值
    sensor_data = inverse_normalize(sensor_data)

    # 先建立 DataFrame，只有 sensor_data
    final_df = pd.DataFrame(sensor_data)  # 先只加入 sensor_data

    # **命名欄位**
    sensor_columns = [f"sensor{i+1}" for i in range(sensor_data.shape[1])]

    final_df.columns = sensor_columns  # 先設定 sensor 欄位名稱

    mask_reshaped = mask_reshaped.reshape(sensor_data.shape)  # (T, H×W)

    final_df["mask"] = mask_reshaped[0].flatten()  # 直接在 DataFrame 內加入 mask

    # **即時儲存最終結果**
    final_df.to_csv(f"{save_path}/final_output_{idx}.csv", index=False)

    print(f"✅ Video {idx} 結果已儲存至 {save_path}/final_output_{idx}.csv")


Loss for video 0: 0.011690905317664146
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 0 結果已儲存至 output_csv/final_output_0.csv
Loss for video 1: 0.008535642176866531
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 1 結果已儲存至 output_csv/final_output_1.csv
Loss for video 2: 0.004539229907095432
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 2 結果已儲存至 output_csv/final_output_2.csv
Loss for video 3: 0.003322185017168522
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 3 結果已儲存至 output_csv/final_output_3.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 4: 0.003004766535013914
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 4 結果已儲存至 output_csv/final_output_4.csv
Loss for video 5: 0.0028815343976020813
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 5 結果已儲存至 output_csv/final_output_5.csv
Loss for video 6: 0.0030954480171203613
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 6 結果已儲存至 output_csv/final_output_6.csv
Loss for video 7: 0.0028788677882403135
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 7 結果已儲存至 output_csv/final_output_7.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 8: 0.0035263828467577696
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 8 結果已儲存至 output_csv/final_output_8.csv
Loss for video 9: 0.001438829000107944
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 9 結果已儲存至 output_csv/final_output_9.csv
Loss for video 10: 0.007211784832179546
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 10 結果已儲存至 output_csv/final_output_10.csv
Loss for video 11: 0.0030758732464164495
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 11 結果已儲存至 output_csv/final_output_11.csv
Loss for video 12: 0.003920829854905605
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


(16, 12)
(16, 16)
(16, 16)
✅ Video 12 結果已儲存至 output_csv/final_output_12.csv
Loss for video 13: 0.004776531830430031
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 13 結果已儲存至 output_csv/final_output_13.csv
Loss for video 14: 0.0020118195097893476
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 14 結果已儲存至 output_csv/final_output_14.csv
Loss for video 15: 0.008160900324583054
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 15 結果已儲存至 output_csv/final_output_15.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 16: 0.0026139100082218647
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 16 結果已儲存至 output_csv/final_output_16.csv
Loss for video 17: 0.004893605597317219
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 17 結果已儲存至 output_csv/final_output_17.csv
Loss for video 18: 0.006873867940157652
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 18 結果已儲存至 output_csv/final_output_18.csv
Loss for video 19: 0.00727284699678421
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 19 結果已儲存至 output_csv/final_output_19.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 20: 0.0036284083034843206
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 20 結果已儲存至 output_csv/final_output_20.csv
Loss for video 21: 0.005734368227422237
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 21 結果已儲存至 output_csv/final_output_21.csv
Loss for video 22: 0.004891693592071533
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 22 結果已儲存至 output_csv/final_output_22.csv
Loss for video 23: 0.003918604925274849
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 23 結果已儲存至 output_csv/final_output_23.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 24: 0.008214897476136684
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 24 結果已儲存至 output_csv/final_output_24.csv
Loss for video 25: 0.004668105393648148
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 25 結果已儲存至 output_csv/final_output_25.csv
Loss for video 26: 0.006916258484125137
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 26 結果已儲存至 output_csv/final_output_26.csv
Loss for video 27: 0.003576758084818721
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 27 結果已儲存至 output_csv/final_output_27.csv
Loss for video 28: 0.006916931830346584


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 28 結果已儲存至 output_csv/final_output_28.csv
Loss for video 29: 0.008237301371991634
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 29 結果已儲存至 output_csv/final_output_29.csv
Loss for video 30: 0.009567607194185257
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 30 結果已儲存至 output_csv/final_output_30.csv
Loss for video 31: 0.0034799929708242416
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 31 結果已儲存至 output_csv/final_output_31.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 32: 0.009279794991016388
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 32 結果已儲存至 output_csv/final_output_32.csv
Loss for video 33: 0.007987565360963345
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 33 結果已儲存至 output_csv/final_output_33.csv
Loss for video 34: 0.007460962515324354
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 34 結果已儲存至 output_csv/final_output_34.csv
Loss for video 35: 0.004833603743463755
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 35 結果已儲存至 output_csv/final_output_35.csv
Loss for video 36: 0.007086483296006918
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 36 結果已儲存至 output_csv/final_output_36.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 37: 0.0024790153838694096
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 37 結果已儲存至 output_csv/final_output_37.csv
Loss for video 38: 0.008927753195166588
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 38 結果已儲存至 output_csv/final_output_38.csv
Loss for video 39: 0.004233098588883877
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 39 結果已儲存至 output_csv/final_output_39.csv
Loss for video 40: 0.002145943697541952
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 40 結果已儲存至 output_csv/final_output_40.csv
Loss for video 41: 0.0071341292932629585
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 41 結果已儲存至 output_csv/final_output_41.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loss for video 42: 0.008114620111882687
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 42 結果已儲存至 output_csv/final_output_42.csv
Loss for video 43: 0.007708774879574776
🔍 原始 mask 形狀: (128, 1)
🔍 原始 output 形狀: (96, 2)
(16, 12)
(16, 16)
(16, 16)
✅ Video 43 結果已儲存至 output_csv/final_output_43.csv


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


In [None]:
import os
import torch
import numpy as np
from torch.utils.data import DataLoader
from timm.models import create_model
from utils import NativeScalerWithGradNormCount as NativeScaler
import utils
from datasets import CSVMAE
from einops import rearrange

def load_model(model_name, weights_path, device):
    print(f"Loading model: {model_name}")
    model = create_model(
        model_name,
        pretrained=False,
        drop_path_rate=0.0,
        drop_block_rate=None,
        decoder_depth=4,
        use_checkpoint=False
    )
    checkpoint = torch.load(weights_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'], strict=False)
    model.to(device)
    model.eval()
    return model

def validate(model, dataloader, device, output_file):
    loss_func = torch.nn.MSELoss()
    results = []
    total_loss = 0.0
    num_samples = 0
    
    with torch.no_grad():
        for step, (videos, bool_masked_pos) in enumerate(dataloader):

            videos = videos.to(device, non_blocking=True)
            bool_masked_pos = bool_masked_pos.to(device, non_blocking=True).flatten(1).to(torch.bool)
            
            videos_patch = rearrange(videos, 'b c (t p0) (h p1) (w p2) -> b (t h w) (p0 p1 p2 c)', p0=2, p1=1, p2=1)
            B, _, C = videos_patch.shape
            labels = videos_patch[bool_masked_pos].reshape(B, -1, C)
            outputs = model(videos, bool_masked_pos)
            # **計算 Loss**
            loss = loss_func(outputs, labels)
            loss_value = loss.item()
            total_loss += loss_value
            num_samples += 1

            print(f"Step {step + 1}/{len(dataloader)} - Loss: {loss_value:.6f}")

    # 計算平均 Loss
    avg_loss = total_loss / num_samples if num_samples > 0 else float('nan')
    print(f"✅ 平均 Loss: {avg_loss:.6f}")

    # **儲存結果**
    with open(output_file, 'w') as f:
        f.write(f"Average Loss: {avg_loss:.6f}\n")
        for res in results:
            f.write(str(res) + '\n')

    print(f"Validation results and loss saved to {output_file}")


def run_validation(model_name, weights_path, data_path, batch_size, device, output_file):
    if not os.path.exists(data_path):
        print("Error: Data path does not exist!")
        return
    
    model = load_model(model_name, weights_path, device)
    patch_size = model.encoder.patch_embed.patch_size
    window_size = (16 // 2, 4 // patch_size[0], 4 // patch_size[1])
    dataset = CSVMAE(root_dir=data_path, num_frames=16, window_size=window_size)
    dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=4, pin_memory=True)
    validate(model, dataloader, device, output_file)

model_name = 'pretrain_videomae_base_patch1_4'
weights_path = '/mnt/d/haoxiangbiya/VideoMAE/ckpt_output_dir/checkpoint-2499.pth'
data_path = '/mnt/d/mae/bigclassroom/foundation_dataset/train'
batch_size = 8
device = 'cuda'
output_file = 'validation_results.txt'

run_validation(model_name, weights_path, data_path, batch_size, device, output_file)


In [None]:
import os
import torch
import numpy as np
from torch.utils.data import Dataset
from masking_generator import TubeMaskingGenerator  # 假設你有這個mask生成器
from collections import defaultdict
import re

class CustomCSVMAE(Dataset):
    def __init__(self, root_dir, num_frames=16, mask_ratio=0.75, patch_size=16, window_size=None):
        self.root_dir = root_dir
        self.num_frames = num_frames
        self.mask_ratio = mask_ratio
        self.patch_size = patch_size
        self.mask_generator = TubeMaskingGenerator(window_size, mask_ratio=mask_ratio)

        # 遞迴地獲取所有 CSV 檔案
        self.file_paths = []
        for subdir, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith('.csv'):
                    self.file_paths.append(os.path.join(subdir, file))

        # 自訂排序函數：按檔案名中的數字時間戳排序
        def extract_sort_key(path):
            filename = os.path.basename(path)
            match = re.match(r"(.+?)_(\d+)\.csv", filename)  # 擷取類別名稱 & 數字
            if match:
                category = match.group(1)  # 取得類別名稱 (如 1119-1in3out-mv-mc-mid)
                time_value = int(match.group(2))  # 取得數字時間戳
                return (category, time_value)
            return (filename, 0)  # 若無法匹配則放最後

        # 排序後按類別分組
        self.file_paths.sort(key=extract_sort_key)
        self.groups = defaultdict(list)  # 每個類別對應的 CSV 檔案
        for path in self.file_paths:
            category = extract_sort_key(path)[0]  # 取得類別名稱
            self.groups[category].append(path)

        # 按照 num_frames 切割成時間段
        self.sequence_groups = []
        for category, files in self.groups.items():
            if len(files) >= num_frames:
                for i in range(len(files) - num_frames + 1):
                    self.sequence_groups.append(files[i: i + num_frames])

        if not self.sequence_groups:
            raise ValueError(f"CSV 檔案數量不足 {num_frames}，請檢查 `{root_dir}` 資料夾！")

        # 顯示前 5 組數據
        print("= 加載的 CSV 時間序列組: (顯示前 5 組) =")
        for i, seq in enumerate(self.sequence_groups[:5]):
            print(f"組 {i+1}: {[os.path.basename(f) for f in seq]}")

    def __len__(self):
        return len(self.sequence_groups)

    def __getitem__(self, idx):
        file_list = self.sequence_groups[idx]  # 取得對應的一組 num_frames CSV 檔案
        frames = []
        bool_masked_pos = []  # 用來儲存每個 frame 的 mask 訊息

        # 讀取每個影片的 frames 和對應的 mask
        for csv_path in file_list:
            data = np.genfromtxt(csv_path, delimiter=',')
            frames.append(data)

            # 生成 mask，這部分可以使用您先前的 `TubeMaskingGenerator`
            mask = np.random.binomial(1, self.mask_ratio, data.shape)  # 隨機生成的 mask
            bool_masked_pos.append(mask)

        frames = np.stack(frames)  # (T, H, W)
        frames = np.expand_dims(frames, axis=0)  # 增加 Channel 維度 (C=1, T, H, W)
        frames = torch.tensor(frames, dtype=torch.float32)

        bool_masked_pos = np.stack(bool_masked_pos)  # (T, H, W)
        bool_masked_pos = torch.tensor(bool_masked_pos, dtype=torch.bool)

        return (frames, bool_masked_pos)

# 使用示例：
csv_folder = "/path/to/your/csv/folder"
dataset = CustomCSVMAE(root_dir=csv_folder, num_frames=16, mask_ratio=0.75, patch_size=16)

# 打印某一組資料，檢查資料是否正確
sample_frames, sample_mask = dataset[0]  # 顯示第一組資料
print("Sample Frames Shape:", sample_frames.shape)
print("Sample Mask Shape:", sample_mask.shape)
