In [1]:
import argparse
import datetime
import os
import random
import time
from re import X
from xxlimited import Str

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.lr_monitor import LearningRateMonitor
from pytorch_lightning.profiler import SimpleProfiler
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.loggers import TensorBoardLogger

In [2]:
from dataset.mo2cap2 import Mo2Cap2DataModule
from dataset.mocap import MocapDataModule
from dataset.mo2cap2 import Mo2Cap2DataModule
from dataset.mocap_distance import MocapDistanceDataModule
from dataset.mocap_h36m_cropped_hm import MocapH36MCropHMDataModule
from dataset.mocap_h36m_hm import MocapH36MHMDataModule
from dataset.mocap_transformer import MocapSeqDataModule
from dataset.mo2cap2_transformer import Mo2Cap2SeqDataModule
from dataset.mocap_h36m import MocapH36MDataModule
from dataset.mocap_h36m_cropped import MocapH36MCropDataModule
from dataset.mocap_h36m_transformer import MocapH36MSeqDataModule
from dataset.mocap_h36m_cropped_transformer import MocapH36MCropSeqDataModule
from dataset.mocap_h36m_2d import Mocap2DH36MDataModule

from net.DirectRegression import DirectRegression
from net.HRNetBaseline import HRNetBaseline
from net.HRNetEgo import HRNetEgoSTAN
from net.Mo2Cap2BaselineL1 import Mo2Cap2BaselineL1
from net.Mo2Cap2Direct import Mo2Cap2Direct
from net.Mo2Cap2GlobalTrans import Mo2Cap2GlobalTrans
from net.Mo2Cap2Seq import Mo2Cap2Seq
from net.Mo2Cap2SeqHMDirect import Mo2Cap2SeqHMDirect
from net.Mo2Cap2SeqHMDirectAvg import Mo2Cap2SeqHMDirectAvg
from net.Mo2Cap2SeqHMDirectSlice import Mo2Cap2SeqHMDirectSlice
from net.xRNetBaseLine2D import xREgoPose2D
from net.xRNetBaseLineL1 import xREgoPoseL1
from net.xRNetDirect import xREgoPoseDirect
from net.Mo2Cap2Baseline import Mo2Cap2Baseline
from net.xRNetPosterior2D import xREgoPosePosterior2D
from net.xRNetPosteriorLinear import xREgoPosePosteriorLinear
from net.xRNetSeq import xREgoPoseSeq
from net.xRNetBaseLine import xREgoPose
from net.xRNetConcat import xRNetConcat
from net.xRNetHeatmap import xREgoPoseHeatMap
from net.xRNetSeqHM import xREgoPoseSeqHM
from net.xRNetPosterior import xREgoPosePosterior
from net.xRNetPosteriorDist import xREgoPosePosteriorDist
from net.xRNetSeqDirect import xREgoPoseSeqDirect
from net.xRNetSeqHMDirect import xREgoPoseSeqHMDirect
from net.xRNetGlobalTrans import xREgoPoseGlobalTrans
from net.xRNetDist import xREgoPoseDist
from net.xRNetSeqHMDirectAvg import xREgoPoseSeqHMDirectAvg
from net.xRNetSeqHMDirectED import xREgoPoseSeqHMDirectED
from net.xRNetSeqHMDirectEDExp import xREgoPoseSeqHMDirectEDExp
from net.xRNetSeqHMDirectRevPos import xREgoPoseSeqHMDirectRevPos
from net.xRNetSeqHMDirectSlice import xREgoPoseSeqHMDirectSlice
from net.xRNetUNet import xREgoPoseUNet
from utils.evaluate import create_results_csv

In [3]:

MODEL_DIRECTORY = {
    "direct_regression": DirectRegression,
    "xregopose": xREgoPose,
    "xregopose_l1": xREgoPoseL1,
    "xregopose_seq": xREgoPoseSeq,
    "xregopose_concat":xRNetConcat,
    "xregopose_heatmap": xREgoPoseHeatMap,
    "xregopose_seq_hm": xREgoPoseSeqHM,
    "xregopose_posterior": xREgoPosePosterior,
    "xregopose_posterior_2d": xREgoPosePosterior2D,
    "xregopose_posterior_dist": xREgoPosePosteriorDist,
    "xregopose_posterior_linear": xREgoPosePosteriorLinear,
    "xregopose_seq_hm_direct": xREgoPoseSeqHMDirect,
    "xregopose_seq_hm_direct_ed": xREgoPoseSeqHMDirectED,
    "xregopose_seq_hm_direct_ed_exp": xREgoPoseSeqHMDirectEDExp,
    "xregopose_seq_direct": xREgoPoseSeqDirect,
    "xregopose_global_trans": xREgoPoseGlobalTrans,
    "xregopose_dist": xREgoPoseDist,
    "xregopose_unet": xREgoPoseUNet,
    "xregopose_direct": xREgoPoseDirect,
    "xregopose_seq_hm_direct_rev_pos": xREgoPoseSeqHMDirectRevPos,
    "xregopose_seq_hm_direct_avg": xREgoPoseSeqHMDirectAvg,
    "xregopose_seq_hm_direct_slice": xREgoPoseSeqHMDirectSlice,
    "mo2cap2": Mo2Cap2Baseline,
    "mo2cap2_l1": Mo2Cap2BaselineL1,
    "mo2cap2_direct": Mo2Cap2Direct,
    "mo2cap2_global_trans": Mo2Cap2GlobalTrans,
    "mo2cap2_seq": Mo2Cap2Seq,
    "mo2cap2_slice": Mo2Cap2SeqHMDirectSlice,
    "mo2cap2_avg": Mo2Cap2SeqHMDirectAvg,
    "mo2cap2_ego": Mo2Cap2SeqHMDirect,
    "xregopose_2d": xREgoPose2D,
    "HRNetBaseline": HRNetBaseline,
    "HRNetEgoSTAN": HRNetEgoSTAN


}
DATALOADER_DIRECTORY = {
    'baseline': MocapDataModule,
    'sequential': MocapSeqDataModule,
    'distance': MocapDistanceDataModule,
    'mo2cap2': Mo2Cap2DataModule,
    'mo2cap2_seq': Mo2Cap2SeqDataModule,
    'h36m_static': MocapH36MDataModule,
    'h36m_seq' : MocapH36MSeqDataModule,
    'h36m_2d' : Mocap2DH36MDataModule,
    'h36m_hm': MocapH36MHMDataModule,
    'h36m_crop': MocapH36MCropDataModule,
    'h36m_crop_hm': MocapH36MCropHMDataModule,
    'h36m_seq_crop': MocapH36MCropSeqDataModule,
} 

# ann_file_val = r'F:\extracted_mo2cap2_dataset\TestSet'
# ann_file_train = r'F:\extracted_mo2cap2_dataset\TrainSet'


In [4]:
dict_args = {
	'model': 'mo2cap2_l1',
	'eval': False,
	'dataloader': 'mo2cap2',
	'load': None,
	'resume_from_checkpoint': None,
	# 'dataset_tr': r'F:\extracted_mo2cap2_dataset\TrainSet',
	'dataset_tr': r'F:\mo2cap2_data_half\TrainSet',
	'dataset_val': r'F:\mo2cap2_data_small\TrainSet',
	'dataset_test': r'F:\extracted_mo2cap2_dataset\TestSet',
	'cuda': 'cuda',
	'gpus': 1,
	'batch_size': 48,
	'epoch': 10,
	'num_workers': 4,
	'val_freq': 0.1,
	'es_patience': 5,
	'logdir': r'C:\Users\user\Documents\GitHub\Ego-STAN\temp_res',
	'lr': 0.001,
	'load_resnet': r'C:\Users\user\.cache\torch\hub\checkpoints\resnet101-63fe2227.pth',
	# 'hm_train_steps': 100000,
	'hm_train_steps': 10000,
	'seq_len': 5,
	'skip': 0,
	'encoder_type': 'branch_concat',
	'heatmap_type': 'baseline',
	'heatmap_resolution': [47, 47],
	'image_resolution': [368, 368],
	'seed': 42,
	'clip_grad_norm': 0.0,
	'dropout': 0.0,
	'dropout_linear': 0.0,
	'protocol': 'p2',
	'w2c': False,
	'weight_regularization': 0.01,
	'monitor_metric': 'val_mpjpe_full_body',
	'sigma': 3,
	'h36m_sample_rate': 1,
	'csv_mode': '3D'
}

In [5]:
pl.seed_everything(dict_args['seed'])
# Initialize model to train
assert dict_args['model'] in MODEL_DIRECTORY
if dict_args['load'] is not None:
	model = MODEL_DIRECTORY[dict_args['model']].load_from_checkpoint(dict_args['load'], **dict_args)
else:
	model = MODEL_DIRECTORY[dict_args['model']](**dict_args)

# Initialize logging paths
random_sec = random.randint(1, 20)
time.sleep(random_sec)
now = datetime.datetime.now().strftime('%m%d%H%M%S')
weight_save_dir = os.path.join(dict_args["logdir"], os.path.join('models', 'state_dict', now))
while os.path.exists(weight_save_dir):
	random_sec = random.randint(1, 20)
	time.sleep(random_sec)
	now = datetime.datetime.now().strftime('%m%d%H%M%S')
	weight_save_dir = os.path.join(dict_args["logdir"], os.path.join('models', 'state_dict', now))

os.makedirs(weight_save_dir, exist_ok=True)


# Callback: early stopping parameters
early_stopping_callback = EarlyStopping(
	monitor=dict_args['monitor_metric'],
	mode="min",
	verbose=True,
	patience=dict_args["es_patience"],
)

# Callback: model checkpoint strategy
checkpoint_callback = ModelCheckpoint(
	dirpath=weight_save_dir, save_top_k=5, verbose=True, monitor=dict_args['monitor_metric'], mode="min"
)

# Data: load data module
assert dict_args['dataloader'] in DATALOADER_DIRECTORY
data_module = DATALOADER_DIRECTORY[dict_args['dataloader']](**dict_args)


Global seed set to 42
  self.heatmap.resnet101.load_state_dict(torch.load(self.load_resnet))


In [6]:
import torch
torch.cuda.is_available()

True

In [7]:

# Trainer: initialize training behaviour
profiler = SimpleProfiler()
lr_monitor = LearningRateMonitor(logging_interval='step')
# logger = TensorBoardLogger(save_dir=dict_args['logdir'], version=now, name='lightning_logs', log_graph=True)

logger = WandbLogger(
    project='egostanrepo_mo2cap2',  # Replace with your project name
    # name='orgin_mo2cap2',  # Replace with your experiment name
    save_dir=dict_args['logdir'],
    version=now
)
# logger = None

if dict_args['gpus'] > 1:
	accelerator = 'dp'
elif dict_args['gpus'] == 1:
	accelerator = 'gpu'
elif dict_args['gpus'] == 0:
	accelerator = 'cpu'

trainer = pl.Trainer(
	callbacks=[early_stopping_callback, checkpoint_callback, lr_monitor],
	val_check_interval=dict_args['val_freq'],
	deterministic=True,
	gpus=dict_args['gpus'],
	profiler=profiler,
	logger=logger,
	max_epochs=dict_args["epoch"],
	log_every_n_steps=10,
	gradient_clip_val=dict_args['clip_grad_norm'],
	resume_from_checkpoint=dict_args['resume_from_checkpoint'],
	accelerator=accelerator
)



GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [8]:
raise

RuntimeError: No active exception to reraise

In [9]:
# Trainer: train model
trainer.fit(model, data_module)

# Evaluate model on best ckpt (defined in 'ModelCheckpoint' callback)
if dict_args['eval'] and dict_args['dataset_test']:
	trainer.test(model, ckpt_path='best', datamodule=data_module)
	test_mpjpe_dict = model.test_results
	mpjpe_csv_path = os.path.join(weight_save_dir, f'{now}_eval.csv')
	# Store mpjpe test results as a csv
	create_results_csv(test_mpjpe_dict, mpjpe_csv_path, dict_args['dataloader'], dict_args['csv_mode'])
else:
	print("Evaluation skipped")

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mvhehduatks[0m ([33mcv04[0m). Use [1m`wandb login --relogin`[0m to force relogin



  | Name            | Type           | Params | In sizes         | Out sizes      
----------------------------------------------------------------------------------------
0 | heatmap         | HeatMap        | 61.5 M | [1, 3, 368, 368] | [1, 15, 47, 47]
1 | encoder         | Encoder        | 40.0 M | [1, 15, 47, 47]  | [1, 20]        
2 | pose_decoder    | PoseDecoder    | 3.2 K  | [1, 20]          | [1, 15, 3]     
3 | heatmap_decoder | HeatmapDecoder | 40.0 M | [1, 20]          | [1, 15, 47, 47]
----------------------------------------------------------------------------------------
141 M     Trainable params
0         Non-trainable params
141 M     Total params
566.168   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 42


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Metric val_mpjpe_full_body improved. New best score: 0.294
Epoch 0, global step 65: val_mpjpe_full_body reached 0.29366 (best 0.29366), saving model to "C:\Users\user\Documents\GitHub\Ego-STAN\temp_res\models\state_dict\0819142329\epoch=0-step=65.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 0, global step 131: val_mpjpe_full_body reached 207.44667 (best 0.29366), saving model to "C:\Users\user\Documents\GitHub\Ego-STAN\temp_res\models\state_dict\0819142329\epoch=0-step=131.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 0, global step 197: val_mpjpe_full_body reached 243.10242 (best 0.29366), saving model to "C:\Users\user\Documents\GitHub\Ego-STAN\temp_res\models\state_dict\0819142329\epoch=0-step=197.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 0, global step 263: val_mpjpe_full_body reached 115.01862 (best 0.29366), saving model to "C:\Users\user\Documents\GitHub\Ego-STAN\temp_res\models\state_dict\0819142329\epoch=0-step=263.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 0, global step 329: val_mpjpe_full_body reached 103.39344 (best 0.29366), saving model to "C:\Users\user\Documents\GitHub\Ego-STAN\temp_res\models\state_dict\0819142329\epoch=0-step=329.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Monitored metric val_mpjpe_full_body did not improve in the last 5 records. Best score: 0.294. Signaling Trainer to stop.
Epoch 0, global step 395: val_mpjpe_full_body reached 102.05797 (best 0.29366), saving model to "C:\Users\user\Documents\GitHub\Ego-STAN\temp_res\models\state_dict\0819142329\epoch=0-step=395.ckpt" as top 5
FIT Profiler Report

Action                             	|  Mean duration (s)	|Num calls      	|  Total time (s) 	|  Percentage %   	|
----------------------------------------------------------------------------------------------------------------------------------------
Total                              	|  -              	|_              	|  739.05         	|  100 %          	|
----------------------------------------------------------------------------------------------------------------------------------------
run_training_epoch                 	|  677.56         	|1              	|  677.56         	|  91.681         	|
run_training_batch                 	| 

Evaluation skipped
