In [1]:

import os
import sys
import os.path as osp
import argparse
import cv2
from typing import List, Dict, Any
import numpy as np
from glob import glob
import random
import time
import csv
import h5py
import copy
import pandas as pd
import importlib
from functools import partial
from omegaconf import OmegaConf
from tqdm import tqdm
from rich.progress import track
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

import torchvision
from torchvision import transforms
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter


from dataset.mpiigaze import MPIIGazeDataset

from losses.stereo_loss import IterationLoss, StereoL1Loss
from losses.gaze_loss import GazeLoss
from utils.gaze import rotation_matrix_2d, pitchyaw_to_vector, vector_to_pitchyaw
from utils.helper import AverageMeter

def str2bool(v):
		if isinstance(v, bool):
			return v
		if v.lower() in ("yes", "true", "t", "y", "1"):
			return True
		elif v.lower() in ("no", "false", "f", "n", "0"):
			return False
		else:
			raise argparse.ArgumentTypeError("Boolean value expected.")
def set_seed(seed_value=42):
	random.seed(seed_value)
	np.random.seed(seed_value)
	torch.manual_seed(seed_value)
	if torch.cuda.is_available():
		torch.cuda.manual_seed(seed_value)
		torch.cuda.manual_seed_all(seed_value)
		torch.backends.cudnn.deterministic = True
		torch.backends.cudnn.benchmark = False

from dataset.xgaze import XGazeDataset
from utils.augment import RandomMultiErasing

2024-03-30 16:23:12.332534: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-30 16:23:12.354114: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
## train
"""

- type: ToPILImage
- type: ColorJitter
    brightness: 1.0
    contrast: 0.1
    saturation: 0.1
- type: RandomAffine
    degrees: 0.0
    scale: [0.99, 1.01]
    translate: [0.01, 0.01]
- type: ToTensor
- type: Normalize
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]

- type: RandomMultiErasing
    p: 0.5
    proportion: [0.5, 0.6]
    dot_size: [0.05, 0.3]        
"""

## test
"""
- type: ToPILImage
- type: ToTensor
- type: Normalize
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]
 
"""



'\n- type: ToPILImage\n- type: ToTensor\n- type: Normalize\n    mean: [0.485, 0.456, 0.406]\n    std: [0.229, 0.224, 0.225]\n \n'

In [5]:


mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

augment_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ColorJitter(brightness=1.0, contrast=0.1, saturation=0.1),
    transforms.RandomAffine(degrees=0.0, scale=[0.99, 1.01], translate=[0.01, 0.01]),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
	transforms.Resize((128, 128)),
    RandomMultiErasing(p=0.5, proportion=[0.5, 0.6], dot_size=[0.05, 0.3]),
])

preprocess = transforms.Compose([
                transforms.ToPILImage(),
                transforms.ToTensor(),
                transforms.Resize((128, 128), antialias=True),
                transforms.Normalize(mean=mean, std=std)
            ])

xgaze_dataset = XGazeDataset(dataset_path="/home/jqin/wk/Datasets/xgaze_v2_128",
								color_type='bgr',
								image_transform=augment_transform,
								keys_to_use=['subject0000.h5', 'subject0003.h5', 'subject0005.h5'],
								camera_tag='novel_train',
								)

print( xgaze_dataset.idx_to_kv)
# print( xgaze_dataset.key_idx_dict)
# xgaze_dataloader = DataLoader(xgaze_dataset, batch_size=100, shuffle=True, num_workers=4,)
# for i, data in enumerate(xgaze_dataloader):
# 	print(' data: ', data)
# 	if i > 0:
# 		break
	

Output()

read file:  /home/jqin/wk/Datasets/xgaze_v2_128/subject0000.h5
read file:  /home/jqin/wk/Datasets/xgaze_v2_128/subject0003.h5
read file:  /home/jqin/wk/Datasets/xgaze_v2_128/subject0005.h5


[(0, 0, 6), (0, 1, 3), (0, 3, 10), (0, 4, 9), (0, 6, 10), (0, 7, 6), (0, 9, 6), (0, 10, 7), (0, 12, 7), (0, 13, 16), (0, 15, 13), (0, 16, 3), (0, 18, 31), (0, 19, 33), (0, 21, 27), (0, 22, 31), (0, 24, 28), (0, 25, 18), (0, 27, 30), (0, 28, 24), (0, 30, 33), (0, 31, 19), (0, 33, 30), (0, 34, 33), (0, 36, 52), (0, 37, 39), (0, 39, 36), (0, 40, 46), (0, 42, 36), (0, 43, 46), (0, 45, 42), (0, 46, 40), (0, 48, 43), (0, 49, 48), (0, 51, 45), (0, 52, 51), (0, 54, 61), (0, 55, 66), (0, 57, 54), (0, 58, 69), (0, 60, 63), (0, 61, 63), (0, 63, 55), (0, 64, 67), (0, 66, 55), (0, 67, 60), (0, 69, 64), (0, 70, 58), (0, 72, 73), (0, 73, 82), (0, 75, 84), (0, 76, 75), (0, 78, 79), (0, 79, 82), (0, 81, 79), (0, 82, 78), (0, 84, 78), (0, 85, 82), (0, 87, 88), (0, 88, 81), (0, 90, 96), (0, 91, 94), (0, 93, 94), (0, 94, 102), (0, 96, 100), (0, 97, 90), (0, 99, 105), (0, 100, 99), (0, 102, 106), (0, 103, 100), (0, 105, 90), (0, 106, 102), (0, 108, 123), (0, 109, 115), (0, 111, 118), (0, 112, 109), (0, 114

In [44]:
from utils.math import rotation_matrix_2d


rot_my:  torch.Size([10, 3, 3])
 rot_my:  tensor([[ 0.9073, -0.1042, -0.4073],
        [ 0.0000,  0.9688, -0.2480],
        [ 0.4204,  0.2250,  0.8790]])
rot_hisadome:  torch.Size([10, 3, 3])
 rot_hisadome:  tensor([[ 0.9073, -0.1042, -0.4073],
        [ 0.0000,  0.9688, -0.2480],
        [ 0.4204,  0.2250,  0.8790]])


In [None]:
# import logging
# import torch
# import numpy as np
# from rich.progress import track
# from torch.utils.data import Dataset
# from typing import List, Optional

# import abc


# class AbstractIndex(metaclass=abc.ABCMeta):
#     def __init__(self):
#         self.indices = self.build_index()

#     @abc.abstractmethod
#     def build_index(self):
#         pass

#     def __getitem__(self, idx):/python3.8/site-packages/torchvision/transforms/functional.py:1603: 
# UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) 
# will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. 
# To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None 
# (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). 
# This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).
#   warnings.warn(


# class XGazeIndex(AbstractIndex):
#     def __init__(
#         self,
#         root: str,
#         subject_keys: Optional[List[str]] = None,
#         cam_indices: Optional[List[int]] = None,
#     ) -> None:
#         self.root = root
#         print(" >>>>>>>>>>>>>> root: ", root  )
#         if subject_keys is None:
#             subject_paths = glob.glob(os.path.join(self.root, "*.h5"))
#             subject_keys = [os.path.basename(subj_path) for subj_path in subject_paths]
#             subject_keys.sort()
#         self.subject_keys = subject_keys

#         if cam_indices is None:
#             cam_indices = list(range(18))
#         self.cam_indices = cam_indices

#         logger = logging.getLogger(__class__.__name__)
#         logger.info(f"Number of subjects = {len(self.subject_keys)}")
#         print(" subject_keys: ", self.subject_keys)
#         logger.info(f"Camera index to use = {self.cam_indices}")

#         super().__init__()

#     def build_index(self):
#         indices = []
#         for subject in track(self.subject_keys, description="Building xgaze index"):
#             with h5py.File(os.path.join(self.root, subject), swmr=True) as subject_hdf:

#                 for img_idx, (frame_idx, cam_idx) in enumerate(
#                     zip(
#                         # subject_hdf["frame_index"][:, 0], subject_hdf["cam_index"][:, 0]
#                         subject_hdf["frame_index"][:], subject_hdf["cam_index"][:]
#                     )
#                 ):
#                     cam_idx -= 1
#                     if cam_idx in self.cam_indices:
#                         data = {
#                             "hdf_path": os.path.join(self.root, subject),
#                             "subject_index": subject,
#                             "frame_index": frame_idx,
#                             "cam_index": cam_idx,
#                             "img_index": img_idx,
#                             "id": f"{subject}-{frame_idx}-{cam_idx}",
#                         }
#                         indices.append(data)
                
#         return indices

# class BaseGazeDataset(Dataset):
#     def __init__(self, indices, preprocess):
#         super().__init__()
#         self.indices = indices
#         self.logger = logging.getLogger(self.__class__.__name__)
#         print(" preprocess: ", preprocess)
#         self.preprocess = preprocess

#     def __getitem__(self, idx):
#         idx_data = self.indices[idx]
#         data = self.preprocess(idx_data)
#         return data

#     def __len__(self):
#         return len(self.indices)


# class StereoGazeDataset(BaseGazeDataset):
#     def __init__(
#         self,
#         indices,
#         preprocess,
#         img_per_frame,
#         stereo_preprocess, 
#         cam_pairs=None,
#     ):
#         super().__init__(indices, preprocess)
#         self.frame_pool = self.build_pair_pool()
#         self.cam_pairs = cam_pairs
#         self.img_per_frame = img_per_frame
#         self.stereo_preprocess = stereo_preprocess

#     def __getitem__(self, idx):
#         idx_data = self.indices[idx]
#         idx_data = self.generate_stereo_pair(idx_data)
#         stereo_data = {}

#         for i, data in enumerate(idx_data):
#             # data = self.preprocess(data)
#             for key in data.keys():
#                 new_key = f"{key}_{i}"
#                 stereo_data[new_key] = data[key]

#         stereo_data = self.stereo_preprocess(stereo_data)
#         return stereo_data

#     def generate_stereo_pair(self, idx_data):
#         stereo_data = [idx_data]

#         subject_idx = idx_data["subject_index"]
#         frame_idx = idx_data["frame_index"]
#         same_scene = self.frame_pool[subject_idx][frame_idx]
#         others = []
#         src_candidates = self.cam_pairs[idx_data["cam_index"]]
#         for data in same_scene:
#             if src_candidates[data["cam_index"]]:
#                 others.append(data)

#         others = np.random.choice(others, self.img_per_frame - 1, replace=False)
#         stereo_data += others.tolist()
#         return stereo_data

#     def build_pair_pool(self):
#         pool = {}
#         for idx_data in track(
#             self.indices, total=len(self.indices), description="Building frame pool"
#         ):
#             subject_idx = idx_data["subject_index"]
#             frame_idx = idx_data["frame_index"]

#             if subject_idx not in pool:
#                 pool[subject_idx] = {}

#             if frame_idx not in pool[subject_idx]:
#                 pool[subject_idx][frame_idx] = []

#             pool[subject_idx][frame_idx].append(idx_data)
#         return pool

# all_cam_indices = list(range(18))

# novel_cam_val_indices = list(range(2, 18, 3))
# novel_cam_train_indices = [ x for x in all_cam_indices if x not in novel_cam_val_indices]

# xgaze_index = XGazeIndex(root="/home/jqin/wk/Datasets/xgaze_v2_128", subject_keys=['subject0000.h5', 'subject0003.h5'], cam_indices=novel_cam_train_indices)
# print(' xgaze_index: ', len(xgaze_index.indices))
# for i in range(10):
#     print(' xgaze_index: ', xgaze_index.indices[i])
# # print(' xgaze_index: ', xgaze_index.indices[0:10])
    
# image_per_frame = 2

# mean = [0.485, 0.456, 0.406]
# std = [0.229, 0.224, 0.225]
# preprocess = transforms.Compose([
#                 transforms.ToPILImage(),
#                 transforms.ToTensor(),
#                 transforms.Resize((128, 128)),
#                 transforms.Normalize(mean=mean, std=std)
#             ])

# stereo_dataset = StereoGazeDataset(
#     indices=xgaze_index.indices,
#     preprocess=preprocess,
#     img_per_frame=image_per_frame,
#     stereo_preprocess=RenameRef(),
#     cam_pairs=np.eye(18, dtype=int) != 1,
# )

# stereo_dataloader = DataLoader(
#     stereo_dataset,
#     batch_size=12,
#     shuffle=False,
#     num_workers=4,
# )

# for i, data in enumerate(stereo_dataloader):
#     print(' data: ', data)
#     if i > 10:
#         break