# PhysiX inference on PhysGen

### Settings

In [1]:
variation = 'sound_reflection'  # sound_baseline, sound_reflection, sound_diffraction, sound_combined
input_type = 'osm'  # 'osm', 'base_simulation'
output_type = 'standard'  # 'standard', 'complex_only'

load_data = True
root_path = "/home/tippolit/src/PhysiX/data/raw_data/physgen/data"
output_osm_path = f"{root_path}"
output_real_path = f"{root_path}"
# output_pred_path = f"{root_path}/pred"

### Imports

In [2]:
import os
import shutil
import re
import random

import torch

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

import prime_printer as prime

from physgen_dataset import save_dataset

ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5204:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5204:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory
ALSA lib confmisc.c:1342:(snd_func_refer) error evaluating name
ALSA lib conf.c:5204:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory
ALSA lib conf.c:5727:(snd_config_expand) Evaluate error: No such file or directory
ALSA lib pcm.c:2721:(snd_pcm_open_noupdate) Unknown PCM default
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
!python --version

Python 3.10.18


In [4]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE

'cuda'

In [5]:
print(prime.get_hardware())


-------------------------------- 
Your Hardware:

    ---> General <---
Operatingsystem: Linux
Version: #65-Ubuntu SMP PREEMPT_DYNAMIC Mon May 19 17:15:03 UTC 2025
Architecture: ('64bit', 'ELF')
Processor: x86_64

    ---> GPU <---
GPU Name: NVIDIA GeForce RTX 4090
VRAM Total: 24564 MB
VRAM Used: 3 MB
Utilization: 0.0 %
PyTorch Support: True (NVIDIA GeForce RTX 4090)
TensorFlow Support: False -> not installed

    ---> CPU <---
CPU-Name: 13th Gen Intel(R) Core(TM) i7-13700
CPU Kernels: 16
Logical CPU-Kernels: 24
CPU-Frequence: 4783 MHz
CPU-Utilization: 0.3 %

    ---> RAM <---
RAM Total: 62 GB
RAM Available: 58 GB
RAM-Utilization: 5.8 %

--------------------------------


In [6]:
os.getcwd()

'/home/tippolit/src/PhysiX'

### Setting Up Folder

In [17]:
for cur_dir in ['/home/tippolit/src/PhysiX/checkpoints', 
                '/home/tippolit/src/PhysiX/embeddings', 
                '/home/tippolit/src/PhysiX/results', 
                '/home/tippolit/src/PhysiX/tokenizer', 
                '/home/tippolit/src/PhysiX/cache']:
    os.makedirs(cur_dir, exist_ok=True) 

for cur_dir in ['/home/tippolit/src/PhysiX/data/raw_data/physgen/data', 
                '/home/tippolit/src/PhysiX/data/cleaned_data/physgen', 
                '/home/tippolit/src/PhysiX/data/normalized/physgen']:
    if os.path.exists(cur_dir):
        shutil.rmtree(cur_dir)
        os.makedirs(cur_dir)

### Data Loading

In [18]:
# Check data loading + path existence
if os.path.exists(output_osm_path) and os.path.isdir(output_osm_path) and load_data:
    shutil.rmtree(output_osm_path)
    os.makedirs(output_osm_path)
    print(f"Cleared {output_osm_path}.")
elif not os.path.exists(output_osm_path) and not os.path.isdir(output_osm_path) and load_data:
    os.makedirs(output_osm_path)
    print(f"Created {output_osm_path}.")
elif not (os.path.exists(output_osm_path) and os.path.isdir(output_osm_path)) and not load_data:
    raise Exception(f"output_osm_path does not exist ({output_osm_path}) and data is set to not be loaded.")

if os.path.exists(output_real_path) and os.path.isdir(output_real_path) and load_data:
    shutil.rmtree(output_real_path)
    os.makedirs(output_real_path)
    print(f"Cleared {output_real_path}.")
elif not os.path.exists(output_real_path) and not os.path.isdir(output_real_path) and load_data:
    os.makedirs(output_real_path)
    print(f"Created {output_real_path}.")
elif not (os.path.exists(output_real_path) and os.path.isdir(output_real_path)) and not load_data:
    raise Exception(f"output_real_path does not exist ({output_real_path}) and data is set to not be loaded.")

# Save Physgen data
if load_data:
    save_dataset(output_real_path=output_real_path, 
                 output_osm_path=output_osm_path, 
                 variation=variation, 
                 input_type=input_type, 
                 output_type=output_type, 
                 info_print=False, 
                 progress_print=True)
    print("Loaded data.")
else:
    print("Did not load data.")

# Check amount of data
max_amount = 1244
target_files = [i for i in os.listdir(output_real_path) if "target" in i]
n_target_files = len(target_files)
if n_target_files != max_amount:
    raise Exception(f"Only found {n_target_files} Physgen target files, {max_amount-n_target_files} files are missing.")

input_files = [i for i in os.listdir(output_osm_path) if "input" in i]
n_input_files = len(input_files)
if n_input_files != max_amount:
    raise Exception(f"Only found {n_input_files} Physgen input files, {max_amount-n_input_files} files are missing.")

Cleared /home/tippolit/src/PhysiX/data/raw_data/physgen/data.
Cleared /home/tippolit/src/PhysiX/data/raw_data/physgen/data.
PhysGen (sound_reflection) Dataset for test got created
Physgen Data Loading |          | 0.08%
Physgen Data Loading |          | 0.16%
Physgen Data Loading |          | 0.24%
Physgen Data Loading |          | 0.32%
Physgen Data Loading |          | 0.40%
Physgen Data Loading |          | 0.48%
Physgen Data Loading |          | 0.56%
Physgen Data Loading |          | 0.64%
Physgen Data Loading |          | 0.72%
Physgen Data Loading |          | 0.80%
Physgen Data Loading |          | 0.88%
Physgen Data Loading |          | 0.96%
Physgen Data Loading |          | 1.05%
Physgen Data Loading |          | 1.13%
Physgen Data Loading |          | 1.21%
Physgen Data Loading |          | 1.29%
Physgen Data Loading |          | 1.37%
Physgen Data Loading |          | 1.45%
Physgen Data Loading |          | 1.53%
Physgen Data Loading |          | 1.61%
Physgen Data Loading

In [19]:
input_idxs = sorted(list(map(lambda x: int(re.findall(r"\d+", x)[0]), input_files)))
target_idxs = sorted(list(map(lambda x: int(re.findall(r"\d+", x)[0]), target_files)))
goal_idxs = np.arange(0, len(input_idxs)).tolist()
print(input_idxs == goal_idxs)
print(target_idxs == goal_idxs)

True
True


### Preprocessing

In [20]:
# os.makedirs("./data/cleaned_data/")
original_cwd = os.getcwd()
try:
  new_cwd = "./well_utils/data_processing"
  os.chdir(new_cwd)
  print("Set working env to:", os.getcwd())
  !python -m process_dataset \
    physgen \
    --raw_data_path    /home/tippolit/src/PhysiX/data/raw_data \
    --cleaned_data_path /home/tippolit/src/PhysiX/data/cleaned_data/physgen
finally:
  os.chdir(original_cwd)
  print("Set working env to:", os.getcwd())

Set working env to: /home/tippolit/src/PhysiX/well_utils/data_processing


/home/tippolit/src/PhysiX/data/raw_data/physgen/data
Processing files:  50%|██████████          | 1244/2488 [00:04<00:04, 299.15it/s]
Set working env to: /home/tippolit/src/PhysiX


In [21]:
!ls /home/tippolit/src/PhysiX/data/cleaned_data/physgen | grep '\.hdf5$' | wc -l

1244


In [22]:
# Compute stats
original_cwd = os.getcwd()
try:
  !python -m well_utils.data_processing.normalization.calculate_stats \
    --input_dir  /home/tippolit/src/PhysiX/data/cleaned_data/physgen \
    --output_path /home/tippolit/src/PhysiX/data/normalized/physgen/normalization_stats.json

  # Normalize (standard or minmax)
  !python -m well_utils.data_processing.normalization.normalize \
    --input_dir  /home/tippolit/src/PhysiX/data/cleaned_data/physgen \
    --output_dir  /home/tippolit/src/PhysiX/data/normalized/physgen/ \
    --stats_path  /home/tippolit/src/PhysiX/data/normalized/physgen/normalization_stats.json \
    --normalization_type standard --delete
finally:
  os.chdir(original_cwd)
  print("Set working env to:", os.getcwd())

Found 1244 HDF5 files in /home/tippolit/src/PhysiX/data/cleaned_data/physgen
Analyzing files: 100%|█████████████████████| 1244/1244 [00:07<00:00, 177.07it/s]
Statistics saved to /home/tippolit/src/PhysiX/data/normalized/physgen/normalization_stats.json
Found 1244 HDF5 files in /home/tippolit/src/PhysiX/data/cleaned_data/physgen
Normalizing 1244 files...
Processing:   0%|                                      | 0/1244 [00:00<?, ?it/s]
Normalizing: 100%|███████████████████████████████| 1/1 [00:00<00:00, 249.04it/s][A

Normalizing: 100%|███████████████████████████████| 1/1 [00:00<00:00, 295.73it/s][A

Normalizing: 100%|███████████████████████████████| 1/1 [00:00<00:00, 282.37it/s][A

Normalizing: 100%|███████████████████████████████| 1/1 [00:00<00:00, 288.67it/s][A

Normalizing: 100%|███████████████████████████████| 1/1 [00:00<00:00, 292.67it/s][A

Normalizing: 100%|███████████████████████████████| 1/1 [00:00<00:00, 292.57it/s][A

Normalizing: 100%|███████████████████████████████| 1/

### Tokenization

In [23]:
!python -m cosmos1.models.tokenizer.lobotomize.inflate_channels_continuous \
  --weights            /checkpoints/Cosmos-1.0-Tokenizer-CV8x8x8/autoencoder.jit \
  --original_channels 1 --new_channels 2 \
  --frames 1 --height 256 --width 256

Traceback (most recent call last):
  File "/home/tippolit/anaconda3/envs/cosmos/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/tippolit/anaconda3/envs/cosmos/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/tippolit/src/PhysiX/cosmos1/models/tokenizer/lobotomize/inflate_channels_continuous.py", line 19, in <module>
    from cosmos1.models.autoregressive.tokenizer.lobotomy.helpers import inflate_channel_weights
ModuleNotFoundError: No module named 'cosmos1.models.autoregressive.tokenizer.lobotomy'


### Fine-Tune

### Inference

In [24]:
!PYTHONPATH=$(pwd) python cosmos1/models/autoregressive/evaluation/general.py \
  --batch_input_path    /home/tippolit/src/PhysiX/data/raw_data/physgen/data \
  --checkpoint_dir      /checkpoints/finetuned/ \
  --ar_model_dir        Cosmos-1.0-Autoregressive-4B \
  --tokenizer_path      /checkpoints/tokenizers/<DATASET>/last.pth \
  --channel_stats_path  /data/normalized/<DATASET>/normalization_stats.json \
  --dimensions          256 256 \
  --context_len         9 \
  --random_eval_samples 10 \
  --visualize_interval  1 \
  --output_dir          results/<DATASET>/ \
  --compression_ratio   4 8 8

/bin/bash: line 1: DATASET: No such file or directory


### Calc Evaluation

In [25]:
!python eval_metrics.py \
    --data_dir ./eval/{model_name}/real \
    --pred_dir ./eval/{model_name}/pred \
    --osm_dir ./eval/{model_name}/osm \
    --output ./eval_results/evaluation_{model_name}.csv

Traceback (most recent call last):
  File "/home/tippolit/src/PhysiX/eval_metrics.py", line 210, in <module>
    file_names = os.listdir(data_dir)
FileNotFoundError: [Errno 2] No such file or directory: './eval/{model_name}/real'


### Show Results

In [26]:
# see eval script (depth anything)