In [None]:
# 2025-05-15 check tensor shape in tar file 

In [3]:
import tarfile
import torch
import io
import numpy as np

# Load and extract contents from tar file
def load_and_check_tensors(tar_path):
    # Open the tar file
    with tarfile.open(tar_path, 'r') as tar:
        # List all contents
        all_members = tar.getnames()
        # print(f"Files in archive: {all_members}")
        
        # Extract and check tensors
        for member in tar.getmembers():
            if member.name.endswith('.pt') or member.name.endswith('.pth'):
                # Extract the tensor file to memory
                f = tar.extractfile(member)
                if f is not None:
                    # Read the data
                    tensor_data = io.BytesIO(f.read())
                    
                    # Load tensor
                    tensor = torch.load(tensor_data)
                    
                    # Check if it's a single tensor or a dictionary of tensors
                    if isinstance(tensor, torch.Tensor):
                        print(f"{member.name}: Single tensor with shape {tensor.shape}")
                        assert tensor.shape[1] == 4, "Dataset is providing more than 4 latent channels!"
                    elif isinstance(tensor, dict):
                        print(f"{member.name}: Dictionary with keys:")
                        for key, value in tensor.items():
                            if isinstance(value, torch.Tensor):
                                print(f"  - {key}: {value.shape}")
                            else:
                                print(f"  - {key}: {type(value)}")
                    else:
                        print(f"{member.name}: Not a tensor, type: {type(tensor)}")

            
# Example usage
tar_path = "/home/jupyter/mluser/git/llm-cv-pano-cubediff/cl/data/dataspace/polyhaven_tiny/cubediff_train.tar"
load_and_check_tensors(tar_path)

quarry_01_puresky.quarry_01_puresky.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
gray_pier.gray_pier.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
neuer_zollhof.neuer_zollhof.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
lakeside.lakeside.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
bergen.bergen.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
autumn_forest_02.autumn_forest_02.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
quarry_01.quarry_01.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
belfast_sunset_puresky.belfast_sunset_puresky.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
bambanani_sunset.bambanani_sunset.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
abandoned_workshop_02.abandoned_workshop_02.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
cloudy_vondelpark.cloudy_vondelpark.pt: Single tensor with shape torch.Size([6, 4, 64, 64])
brown_photostudio_01.brown_photostudio_01.pt: Single t