# HSTL - Hierarchical Spatio-Temporal Representation Learning for Gait Recognition
## Google Colab Notebook

This notebook supports both training and inference phases for HSTL on Google Colab.

**Datasets Supported:**
- CASIA-B
- OUMVLP
- Gait3D

## 1. Setup Environment

In [None]:
# Check GPU availability
!nvidia-smi

In [None]:
# Install required packages (if not already installed)
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install pyyaml tensorboard opencv-python tqdm

# Verify PyTorch installation
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Set working directory (adjust if needed)
import os
from pathlib import Path

# If you mounted Google Drive and your HSTL folder is there:
# from google.colab import drive
# drive.mount('/content/drive')
# os.chdir('/content/drive/MyDrive/path/to/HSTL')

# Verify we're in the correct directory
print(f"Current directory: {os.getcwd()}")
print(f"Files in current directory: {os.listdir('.')}")

# Add lib to Python path
import sys
lib_path = os.path.join(os.getcwd(), 'lib')
if lib_path not in sys.path:
    sys.path.insert(0, lib_path)

print(f"\nPython path includes lib: {lib_path in sys.path}")

## 2. Configuration Selection

Choose your dataset and configuration file:

In [None]:
# ============= CONFIGURATION =============
# Choose your dataset: 'CASIA-B', 'OUMVLP', or 'Gait3D'
DATASET = 'CASIA-B'  # Change this to your dataset

# Map dataset to config file
CONFIG_MAP = {
    'CASIA-B': './config/hstl.yaml',
    'OUMVLP': './config/hstl_oumvlp.yaml',
    'Gait3D': './config/hstl_gait3d.yaml'
}

CONFIG_FILE = CONFIG_MAP.get(DATASET, './config/hstl.yaml')

# Phase: 'train' or 'test'
PHASE = 'train'  # Change to 'test' for inference

# Checkpoint iteration (for testing or resuming training)
ITER = 0  # Set to checkpoint iteration number (e.g., 80000) or 0 for training from scratch

# Log to file
LOG_TO_FILE = True

print(f"Configuration:")
print(f"  Dataset: {DATASET}")
print(f"  Config file: {CONFIG_FILE}")
print(f"  Phase: {PHASE}")
print(f"  Checkpoint iteration: {ITER}")
print(f"  Log to file: {LOG_TO_FILE}")

## 3. Load Configuration and Verify Paths

In [None]:
import yaml

# Load and display config
with open(CONFIG_FILE, 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded:")
print(yaml.dump(config, default_flow_style=False))

# Verify dataset path exists
dataset_root = config['data_cfg']['dataset_root']
print(f"\nDataset root: {dataset_root}")
print(f"Dataset exists: {os.path.exists(dataset_root)}")

if not os.path.exists(dataset_root):
    print("\n⚠️ WARNING: Dataset path does not exist!")
    print("Please update the 'dataset_root' in your config file.")

## 4. Prepare Single-GPU Training/Testing Function

Google Colab typically provides a single GPU, so we'll modify the DDP approach to work with single GPU:

In [None]:
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
import os

def setup_single_gpu():
    """Setup for single GPU training/testing"""
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '12355'
    os.environ['WORLD_SIZE'] = '1'
    os.environ['RANK'] = '0'
    os.environ['LOCAL_RANK'] = '0'
    
    # Initialize process group with gloo backend (compatible with CPU or GPU)
    # Use 'nccl' if you're sure CUDA is available and working
    backend = 'nccl' if torch.cuda.is_available() else 'gloo'
    dist.init_process_group(backend=backend, init_method='env://', world_size=1, rank=0)
    
    print(f"Process group initialized with {backend} backend")
    print(f"World size: {dist.get_world_size()}")
    print(f"Rank: {dist.get_rank()}")

print("Single GPU setup function defined")

## 5. Run Training or Testing

In [None]:
# Import necessary modules
import sys
import argparse

# Setup single GPU environment
setup_single_gpu()

# Prepare arguments for main.py
sys.argv = [
    'main.py',
    '--local_rank', '0',
    '--cfgs', CONFIG_FILE,
    '--phase', PHASE,
]

if ITER != 0:
    sys.argv.extend(['--iter', str(ITER)])

if LOG_TO_FILE:
    sys.argv.append('--log_to_file')

print(f"Running with arguments: {' '.join(sys.argv[1:])}\n")

# Import and run main
from modeling import models
from utils import config_loader, get_ddp_module, init_seeds, params_count, get_msg_mgr
import torch.nn as nn

# Parse arguments
parser = argparse.ArgumentParser(description='Main program')
parser.add_argument('--local_rank', type=int, default=0)
parser.add_argument('--cfgs', type=str, default='config/default.yaml')
parser.add_argument('--phase', default='train', choices=['train', 'test'])
parser.add_argument('--log_to_file', action='store_true')
parser.add_argument('--iter', default=0)
opt = parser.parse_args()

# Load config
cfgs = config_loader(opt.cfgs)
if opt.iter != 0:
    cfgs['evaluator_cfg']['restore_hint'] = int(opt.iter)
    cfgs['trainer_cfg']['restore_hint'] = int(opt.iter)

training = (opt.phase == 'train')

# Initialization
def initialization(cfgs, training):
    msg_mgr = get_msg_mgr()
    engine_cfg = cfgs['trainer_cfg'] if training else cfgs['evaluator_cfg']
    output_path = os.path.join('output/', cfgs['data_cfg']['dataset_name'],
                               cfgs['model_cfg']['model'], engine_cfg['save_name'])
    if training:
        msg_mgr.init_manager(output_path, opt.log_to_file, engine_cfg['log_iter'],
                             engine_cfg['restore_hint'] if isinstance(engine_cfg['restore_hint'], (int)) else 0)
    else:
        msg_mgr.init_logger(output_path, opt.log_to_file)

    msg_mgr.log_info(engine_cfg)

    seed = torch.distributed.get_rank()
    init_seeds(seed)

# Run model
def run_model(cfgs, training):
    msg_mgr = get_msg_mgr()
    model_cfg = cfgs['model_cfg']
    msg_mgr.log_info(model_cfg)
    Model = getattr(models, model_cfg['model'])
    model = Model(cfgs, training)
    if training and cfgs['trainer_cfg']['sync_BN']:
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
    model = get_ddp_module(model)
    msg_mgr.log_info(params_count(model))
    msg_mgr.log_info("Model Initialization Finished!")

    if training:
        Model.run_train(model)
    else:
        Model.run_test(model)

# Execute
print("Starting initialization...")
initialization(cfgs, training)
print("\nRunning model...")
run_model(cfgs, training)
print("\nCompleted!")

## 6. View Results and Logs

In [None]:
# View output directory structure
output_dir = f"output/{DATASET}/HSTL/HSTL"
print(f"Output directory: {output_dir}\n")

if os.path.exists(output_dir):
    for root, dirs, files in os.walk(output_dir):
        level = root.replace(output_dir, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 2 * (level + 1)
        for file in files:
            print(f"{subindent}{file}")
else:
    print("Output directory not found yet.")

In [None]:
# View latest log file
log_dir = f"output/{DATASET}/HSTL/HSTL/logs"

if os.path.exists(log_dir):
    log_files = sorted([f for f in os.listdir(log_dir) if f.endswith('.txt')])
    if log_files:
        latest_log = os.path.join(log_dir, log_files[-1])
        print(f"Latest log file: {latest_log}\n")
        print("Last 50 lines:")
        print("=" * 80)
        with open(latest_log, 'r') as f:
            lines = f.readlines()
            for line in lines[-50:]:
                print(line.rstrip())
    else:
        print("No log files found yet.")
else:
    print("Log directory not found yet.")

## 7. TensorBoard Visualization (Optional)

In [None]:
# Load TensorBoard in Colab
%load_ext tensorboard

# Point to your output directory
tensorboard_dir = f"output/{DATASET}/HSTL/HSTL"

if os.path.exists(tensorboard_dir):
    %tensorboard --logdir {tensorboard_dir}
else:
    print("TensorBoard directory not found. Train the model first.")

## 8. Quick Test with Specific Checkpoint

In [None]:
# Quick test with a specific checkpoint
# This cell can be run independently after training

PHASE = 'test'
ITER = 80000  # Specify your checkpoint iteration

print(f"Testing with checkpoint iteration: {ITER}")

# Cleanup previous process group if exists
if dist.is_initialized():
    dist.destroy_process_group()

# Re-setup and run
setup_single_gpu()

# Update sys.argv and run (similar to cell 5)
sys.argv = [
    'main.py',
    '--local_rank', '0',
    '--cfgs', CONFIG_FILE,
    '--phase', PHASE,
    '--iter', str(ITER),
]

if LOG_TO_FILE:
    sys.argv.append('--log_to_file')

# Reload and run
import importlib
import modeling.models
importlib.reload(modeling.models)

# Parse and execute
parser = argparse.ArgumentParser(description='Main program')
parser.add_argument('--local_rank', type=int, default=0)
parser.add_argument('--cfgs', type=str, default='config/default.yaml')
parser.add_argument('--phase', default='train', choices=['train', 'test'])
parser.add_argument('--log_to_file', action='store_true')
parser.add_argument('--iter', default=0)
opt = parser.parse_args()

cfgs = config_loader(opt.cfgs)
if opt.iter != 0:
    cfgs['evaluator_cfg']['restore_hint'] = int(opt.iter)
    cfgs['trainer_cfg']['restore_hint'] = int(opt.iter)

training = (opt.phase == 'train')
initialization(cfgs, training)
run_model(cfgs, training)
print("\nTesting completed!")

## 9. Cleanup

In [None]:
# Cleanup distributed process group
if dist.is_initialized():
    dist.destroy_process_group()
    print("Process group destroyed")

# Clear GPU cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("GPU cache cleared")

print("Cleanup completed")