In [55]:
import numpy as np
import random
import time
import sys

from tinytorch.core.dataloader import Dataset, TensorDataset, DataLoader, RandomHorizontalFlip, RandomCrop, Compose
from tinytorch.core.tensor import Tensor


## Unit Test - Dataloader

In [10]:
def test_unit_dataset():
    print("üî¨ Unit Test: Dataset Abstract Base Class...")

    try:
        dataset = Dataset()
        assert False, 'SHould not be able to instantiate abstract Dataset'
    except TypeError:
        print("‚úÖ Dataset is properly abstract")
    print("‚úÖ Dataset interface works correctly!")

    class TestDataset(Dataset):
        def __init__(self, size):
            self.size = size

        def __len__(self) -> int:
            return self.size

        def __getitem__(self, idx: int):
            return f'item_{idx}'

    dataset = TestDataset(10)
    assert len(dataset) == 10
    assert dataset[0] == 'item_0'
    assert dataset[9] == 'item_9'

if __name__=='__main__':
    test_unit_dataset()

üî¨ Unit Test: Dataset Abstract Base Class...
‚úÖ Dataset is properly abstract
‚úÖ Dataset interface works correctly!


## Unit Test - Tensor Dataset Class

In [11]:
def test_unit_tensordataset():
    print("üî¨ Unit Test: TensorDataset...")

    features = Tensor([[1, 2], [3, 4], [5, 6]])
    labels = Tensor([0, 1, 0])
    dataset = TensorDataset(features, labels)

    # Test length
    assert len(dataset) == 3, f"Expected length 3, got {len(dataset)}"

    # Test indexing
    sample = dataset[0]
    assert len(sample) == 2, "Should return tuple with 2 tensors"
    assert np.array_equal(sample[0].data, [1, 2]), f'Wrong features: {sample[0].data}'

    # Test error handling
    try:
        dataset[10]
        assert False, "Shoudl raise IndexError for out of bounds access"
    except IndexError:
        pass

    # Test mismatch between tensor sizes
    try:
        bad_features = Tensor([[1,2], [3, 4]])
        bad_labels = Tensor([0, 1, 0])
        TensorDataset(bad_features, bad_labels)
        assert False, "Should riase error for mismatched tensor sizes"
    except ValueError:
        pass
    
    
    print("‚úÖ TensorDataset works correctly!")

if __name__ =='__main__':
    test_unit_tensordataset()

üî¨ Unit Test: TensorDataset...
‚úÖ TensorDataset works correctly!


## Unit Test: Data Augmentation Transforms

In [39]:
def test_unit_augmentation():
    print("üî¨ Unit Test: Data Augmentation...")

    # Test random flip
    print("   Testing RandomHorizontalFlip...")
    flip = RandomHorizontalFlip(p= 1.0)

    img = np.array([[1, 2, 3], [4, 5, 6]])
    flipped = flip(img)
    expected = np.array([[3, 2, 1], [6, 5, 4]])
    assert np.array_equal(flipped, expected), f'Flipped failed: {fippled} vs {expected}'

    # Test never flip
    no_flip = RandomHorizontalFlip(p=0.0)
    unchanged = no_flip(img)
    assert np.array_equal(unchanged, img), 'p=0 should never flip'

    # Test random shape preservation
    crop = RandomCrop(32, padding= 4)

    # Test with (C, H, W) format (cifar-1 style)
    img_chw = np.random.randn(3, 32, 32)
    cropped  = crop(img_chw)
    assert cropped.shape == (3, 32, 32), f'CHW crop shape wrong: {cropped.shape}'

    # Test with (H, W) format
    img_hw = np.random.randn(28, 28)
    crop_hw = RandomCrop(28, padding = 4)
    cropped_hw = crop_hw(img_hw)
    assert cropped_hw.shape == (28, 28), f'HW crop shape wrong: {cropped.shape}'

    ## Test compose pipeline
    print("   Testing Compose")
    transforms = Compose([
        RandomHorizontalFlip(p=0.5),
        RandomCrop(32, padding=4)
    ])

    img = np.random.randn(3, 32, 32)
    augmented = transforms(img)
    assert augmented.shape == (3, 32, 32), f'Compose output shape wrong: {augmented.shape}'

    # test if trasnform works with tensor
    print("   Tesing Tensor compatibility")
    tensor_img = Tensor(np.random.randn(3, 32, 32))

    flip_result = RandomHorizontalFlip(p=1.0)(tensor_img)
    assert isinstance(flip_result, Tensor), f'Flip should return Tensor given Tensor'

    crop_result = RandomCrop(32, padding=4)(tensor_img)
    assert isinstance(crop_result, Tensor), 'Crop should return Tensor given Tensor'

    # Test 5: Randomness verification 
    print ("   Testing randomness...")

    flip_random = RandomHorizontalFlip(p=0.5)
    
    flips  = 0
    no_flips = 0
    test_img = np.array([[1, 2]])

    for _ in range(100):
        result = flip_random(test_img)
        if np.array_equal(result, np.array([[2, 1]])):
            flips += 1
        else:
            no_flips += 1
    assert flips > 20 and no_flips > 0, f'flip randomness seems broken: {flips} flips, {no_flips} no-flips'
    
    print("‚úÖ Data Augmentation works correctly!")

if __name__=='__main__':
    test_unit_augmentation()

üî¨ Unit Test: Data Augmentation...
   Testing RandomHorizontalFlip...
   Testing Compose
   Tesing Tensor compatibility
   Testing randomness...
‚úÖ Data Augmentation works correctly!


## Unit Test - Data Loader

In [117]:
def test_unit_dataloader():
    print("üî¨ Unit Test: DataLoader...")

    # create test dataset
    features= Tensor([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
    labels = Tensor([0, 1, 0, 1, 0])
    dataset = TensorDataset(features, labels)

    # Test basic batching no shuffling
    loader = DataLoader(dataset, batch_size= 2, shuffle= False)
    assert len(loader) == 3, f'Expected 3 batches, got {len(loader)}'

    batches = list(loader)
    assert len(batches) == 3, f'Expected 3 batches, got {len(batches)}'

    # Test first batch
    batch_features, batch_labels = batches[0]
    assert batch_features.data.shape == (2, 2), f'Wrong batch feature shape: {batch_features.data.shape}'
    assert batch_labels.data.shape == (2,), f'Wrong batch label shape: {batch_labels.data.shape}'

    # Test last batch (should have 1 sample)
    batch_features, batch_labels = batches[2]
    assert batch_features.data.shape == (1, 2), f"Wrong last batch features shape: {batch_features.data.shape}"
    assert batch_labels.data.shape == (1,), f"Wrong last batch labels shape: {batch_labels.data.shape}"

    # Test data is preserved
    assert np.array_equal(batches[0][0].data[0], [1, 2]), "Fist sample should be [1, 2]"
    assert batches[0][1].data[0] == 0, "First label should be 0"

    loader_shuffle = DataLoader(dataset, batch_size=5, shuffle=True)
    loader_no_shuffle = DataLoader(dataset, batch_size=5, shuffle=False)

    batch_shuffle = list(loader_shuffle)[0]
    batch_no_shuffle = list(loader_no_shuffle)[0]

    # Note: This might occasionally fail due to random chance, but very unlikely
    # We'll just test that both contain all the original data
    shuffle_features = set(tuple(row) for row in batch_shuffle[0].data)
    no_shuffle_features = set(tuple(row) for row in batch_no_shuffle[0].data)
    expected_features = {(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)}

    assert shuffle_features == expected_features, "Shuffle should preserve all data"
    assert no_shuffle_features == expected_features, "No shuffle should preserve all data"

    print("‚úÖ DataLoader works correctly!")

    print("‚úÖ DataLoader works correctly!")

if __name__=='__main__':
    test_unit_dataloader() 

üî¨ Unit Test: DataLoader...
‚úÖ DataLoader works correctly!
‚úÖ DataLoader works correctly!


In [53]:
def test_unit_dataloader_deterministic():
    """üî¨ Test DataLoader deterministic shuffling with fixed seed."""
    print("üî¨ Unit Test: DataLoader Deterministic Shuffling...")

    # Create test dataset
    features = Tensor([[1, 2], [3, 4], [5, 6], [7, 8]])
    labels = Tensor([0, 1, 0, 1])
    dataset = TensorDataset(features, labels)

    # Test that same seed produces same shuffle
    random.seed(42)
    loader1 = DataLoader(dataset, batch_size=2, shuffle=True)
    batches1 = list(loader1)

    random.seed(42)
    loader2 = DataLoader(dataset, batch_size=2, shuffle=True)
    batches2 = list(loader2)

    # Should produce identical batches with same seed
    for i, (batch1, batch2) in enumerate(zip(batches1, batches2)):
        assert np.array_equal(batch1[0].data, batch2[0].data), \
            f"Batch {i} features should be identical with same seed"
        assert np.array_equal(batch1[1].data, batch2[1].data), \
            f"Batch {i} labels should be identical with same seed"

    # Test that different seeds produce different shuffles
    random.seed(42)
    loader3 = DataLoader(dataset, batch_size=2, shuffle=True)
    batches3 = list(loader3)

    random.seed(123)  # Different seed
    loader4 = DataLoader(dataset, batch_size=2, shuffle=True)
    batches4 = list(loader4)

    # Should produce different batches with different seeds (very likely)
    different = False
    for batch3, batch4 in zip(batches3, batches4):
        if not np.array_equal(batch3[0].data, batch4[0].data):
            different = True
            break

    assert different, "Different seeds should produce different shuffles"

    print("‚úÖ Deterministic shuffling works correctly!")

if __name__ == "__main__":
    test_unit_dataloader_deterministic()

üî¨ Unit Test: DataLoader Deterministic Shuffling...
‚úÖ Deterministic shuffling works correctly!


## Working with Real Datasets

In [59]:
def analyze_dataloader_performance():
    """üìä Analyze DataLoader performance characteristics."""
    print("üìä Analyzing DataLoader Performance...")

    # Create test dataset of varying sizes
    sizes = [1000, 5000, 10000]
    batch_sizes = [16, 64, 256]

    print("\nüîç Batch Size vs Loading Time:")

    for size in sizes:
        # Create synthetic dataset
        features = Tensor(np.random.randn(size, 100))  # 100 features
        labels = Tensor(np.random.randint(0, 10, size))
        dataset = TensorDataset(features, labels)

        print(f"\nDataset size: {size} samples")

        for batch_size in batch_sizes:
            # Time data loading
            loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

            start_time = time.time()
            batch_count = 0
            for batch in loader:
                batch_count += 1
            end_time = time.time()

            elapsed = end_time - start_time
            throughput = size / elapsed if elapsed > 0 else float('inf')

            print(f"  Batch size {batch_size:3d}: {elapsed:.3f}s ({throughput:,.0f} samples/sec)")

    # Analyze shuffle overhead
    print("\nüîÑ Shuffle Overhead Analysis:")

    dataset_size = 10000
    features = Tensor(np.random.randn(dataset_size, 50))
    labels = Tensor(np.random.randint(0, 5, dataset_size))
    dataset = TensorDataset(features, labels)

    batch_size = 64

    # No shuffle
    loader_no_shuffle = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    start_time = time.time()
    batches_no_shuffle = list(loader_no_shuffle)
    time_no_shuffle = time.time() - start_time

    # With shuffle
    loader_shuffle = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    start_time = time.time()
    batches_shuffle = list(loader_shuffle)
    time_shuffle = time.time() - start_time

    shuffle_overhead = ((time_shuffle - time_no_shuffle) / time_no_shuffle) * 100

    print(f"  No shuffle: {time_no_shuffle:.3f}s")
    print(f"  With shuffle: {time_shuffle:.3f}s")
    print(f"  Shuffle overhead: {shuffle_overhead:.1f}%")

    print("\nüí° Key Insights:")
    print("‚Ä¢ Larger batch sizes reduce per-sample overhead")
    print("‚Ä¢ Shuffle adds minimal overhead for reasonable dataset sizes")
    print("‚Ä¢ Memory usage scales linearly with batch size")
    print("üöÄ Production tip: Balance batch size with GPU memory limits")


def analyze_memory_usage():
    """üìä Analyze memory usage patterns in data loading."""
    print("\nüìä Analyzing Memory Usage Patterns...")

    # Memory usage estimation
    def estimate_memory_mb(batch_size, feature_size, dtype_bytes=4):
        """Estimate memory usage for a batch."""
        return (batch_size * feature_size * dtype_bytes) / (1024 * 1024)

    print("\nüíæ Memory Usage by Batch Configuration:")

    feature_sizes = [784, 3072, 50176]  # MNIST, CIFAR-10, ImageNet-like
    feature_names = ["MNIST (28√ó28)", "CIFAR-10 (32√ó32√ó3)", "ImageNet (224√ó224√ó1)"]
    batch_sizes = [1, 32, 128, 512]

    for feature_size, name in zip(feature_sizes, feature_names):
        print(f"\n{name}:")
        for batch_size in batch_sizes:
            memory_mb = estimate_memory_mb(batch_size, feature_size)
            print(f"  Batch {batch_size:3d}: {memory_mb:6.1f} MB")

    print("\nüéØ Memory Trade-offs:")
    print("‚Ä¢ Larger batches: More memory, better GPU utilization")
    print("‚Ä¢ Smaller batches: Less memory, more noisy gradients")
    print("‚Ä¢ Sweet spot: Usually 32-128 depending on model size")

    # Demonstrate actual memory usage with our tensors
    print("\nüî¨ Actual Tensor Memory Usage:")

    # Create different sized tensors
    tensor_small = Tensor(np.random.randn(32, 784))    # Small batch
    tensor_large = Tensor(np.random.randn(512, 784))   # Large batch

    # Measure actual memory (data array + object overhead)
    small_bytes = tensor_small.data.nbytes
    large_bytes = tensor_large.data.nbytes

    # Also measure Python object overhead
    small_total = sys.getsizeof(tensor_small.data) + sys.getsizeof(tensor_small)
    large_total = sys.getsizeof(tensor_large.data) + sys.getsizeof(tensor_large)

    print(f"  Small batch (32√ó784):")
    print(f"    - Data only: {small_bytes / 1024:.1f} KB")
    print(f"    - With object overhead: {small_total / 1024:.1f} KB")
    print(f"  Large batch (512√ó784):")
    print(f"    - Data only: {large_bytes / 1024:.1f} KB")
    print(f"    - With object overhead: {large_total / 1024:.1f} KB")
    print(f"  Ratio: {large_bytes / small_bytes:.1f}√ó (data scales linearly)")

    print("\nüéØ Memory Optimization Tips:")
    print("‚Ä¢ Object overhead becomes negligible with larger batches")
    print("‚Ä¢ Use float32 instead of float64 to halve memory usage")
    print("‚Ä¢ Consider gradient accumulation for effective larger batches")


def analyze_collation_overhead():
    """üìä Analyze the cost of collating samples into batches."""
    print("\nüìä Analyzing Collation Overhead...")

    # Test different batch sizes to see collation cost
    dataset_size = 1000
    feature_size = 100
    features = Tensor(np.random.randn(dataset_size, feature_size))
    labels = Tensor(np.random.randint(0, 10, dataset_size))
    dataset = TensorDataset(features, labels)

    print("\n‚ö° Collation Time by Batch Size:")

    for batch_size in [8, 32, 128, 512]:
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

        start_time = time.time()
        for batch in loader:
            pass  # Just iterate, measuring collation overhead
        total_time = time.time() - start_time

        batches = len(loader)
        time_per_batch = (total_time / batches) * 1000  # Convert to ms

        print(f"  Batch size {batch_size:3d}: {time_per_batch:.2f}ms per batch ({batches} batches total)")

    print("\nüí° Collation Insights:")
    print("‚Ä¢ Larger batches take longer to collate (more np.stack operations)")
    print("‚Ä¢ But fewer large batches are more efficient than many small ones")
    print("‚Ä¢ Optimal: Balance between batch size and iteration overhead")

if __name__=='__main__':
    analyze_dataloader_performance()
    analyze_memory_usage()
    analyze_collation_overhead()

üìä Analyzing DataLoader Performance...

üîç Batch Size vs Loading Time:

Dataset size: 1000 samples
  Batch size  16: 0.028s (35,537 samples/sec)
  Batch size  64: 0.032s (31,695 samples/sec)
  Batch size 256: 0.015s (65,848 samples/sec)

Dataset size: 5000 samples
  Batch size  16: 0.073s (68,077 samples/sec)
  Batch size  64: 0.056s (89,695 samples/sec)
  Batch size 256: 0.058s (86,436 samples/sec)

Dataset size: 10000 samples
  Batch size  16: 0.133s (75,181 samples/sec)
  Batch size  64: 0.112s (89,506 samples/sec)
  Batch size 256: 0.128s (78,277 samples/sec)

üîÑ Shuffle Overhead Analysis:
  No shuffle: 0.108s
  With shuffle: 0.122s
  Shuffle overhead: 13.7%

üí° Key Insights:
‚Ä¢ Larger batch sizes reduce per-sample overhead
‚Ä¢ Shuffle adds minimal overhead for reasonable dataset sizes
‚Ä¢ Memory usage scales linearly with batch size
üöÄ Production tip: Balance batch size with GPU memory limits

üìä Analyzing Memory Usage Patterns...

üíæ Memory Usage by Batch Configura

In [110]:
features = np.random.randn(32, 765)
targets = np.random.randint(0, 10, 32)

train_size = int(0.8 * len(features))
x_train, x_test = features[:train_size], features[train_size:]
y_train, y_test = targets[:train_size], targets[train_size:]

train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size= 8, shuffle= True)
test_loader = DataLoader(test_dataset, batch_size= 8, shuffle= False)

len(train_loader), len(test_loader)

# model should handle variable batch sizes
# in production monitor gpu utilization

(4, 1)

## Integration Testing 

In [111]:
def test_training_integration():
    """üî¨ Test DataLoader integration with training workflow."""
    print("üî¨ Integration Test: Training Workflow...")

    # Create a realistic dataset
    num_samples = 1000
    num_features = 20
    num_classes = 5

    # Synthetic classification data
    features = Tensor(np.random.randn(num_samples, num_features))
    labels = Tensor(np.random.randint(0, num_classes, num_samples))

    dataset = TensorDataset(features, labels)

    # Create train/val splits
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size

    # Manual split (in production, you'd use proper splitting utilities)
    train_indices = list(range(train_size))
    val_indices = list(range(train_size, len(dataset)))

    # Create subset datasets
    train_samples = [dataset[i] for i in train_indices]
    val_samples = [dataset[i] for i in val_indices]

    # Convert back to tensors for TensorDataset
    train_features = Tensor(np.stack([sample[0].data for sample in train_samples]))
    train_labels = Tensor(np.stack([sample[1].data for sample in train_samples]))
    val_features = Tensor(np.stack([sample[0].data for sample in val_samples]))
    val_labels = Tensor(np.stack([sample[1].data for sample in val_samples]))

    train_dataset = TensorDataset(train_features, train_labels)
    val_dataset = TensorDataset(val_features, val_labels)

    # Create DataLoaders
    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    print(f"üìä Dataset splits:")
    print(f"  Training: {len(train_dataset)} samples, {len(train_loader)} batches")
    print(f"  Validation: {len(val_dataset)} samples, {len(val_loader)} batches")

    # Simulate training loop
    print("\nüèÉ Simulated Training Loop:")

    epoch_samples = 0
    batch_count = 0

    for batch_idx, (batch_features, batch_labels) in enumerate(train_loader):
        batch_count += 1
        epoch_samples += len(batch_features.data)

        # Simulate forward pass (just check shapes)
        assert batch_features.data.shape[0] <= batch_size, "Batch size exceeded"
        assert batch_features.data.shape[1] == num_features, "Wrong feature count"
        assert len(batch_labels.data) == len(batch_features.data), "Mismatched batch sizes"

        if batch_idx < 3:  # Show first few batches
            print(f"  Batch {batch_idx + 1}: {batch_features.data.shape[0]} samples")

    print(f"  Total: {batch_count} batches, {epoch_samples} samples processed")

    # Validate that all samples were seen
    assert epoch_samples == len(train_dataset), f"Expected {len(train_dataset)}, processed {epoch_samples}"

    print("‚úÖ Training integration works correctly!")

if __name__ == "__main__":
    test_training_integration()

üî¨ Integration Test: Training Workflow...
üìä Dataset splits:
  Training: 800 samples, 25 batches
  Validation: 200 samples, 7 batches

üèÉ Simulated Training Loop:
  Batch 1: 32 samples
  Batch 2: 32 samples
  Batch 3: 32 samples
  Total: 25 batches, 800 samples processed
‚úÖ Training integration works correctly!


## Module Integration Testing

In [118]:
def test_module():
    """üß™ Module Test: Complete Integration

    Comprehensive test of entire module functionality.

    This final test runs before module summary to ensure:
    - All unit tests pass
    - Functions work together correctly
    - Module is ready for integration with TinyTorch
    """
    print("üß™ RUNNING MODULE INTEGRATION TEST")
    print("=" * 50)

    # Run all unit tests
    print("Running unit tests...")
    test_unit_dataset()
    test_unit_tensordataset()
    test_unit_dataloader()
    test_unit_dataloader_deterministic()
    test_unit_augmentation()

    print("\nRunning integration scenarios...")

    # Test complete workflow
    test_training_integration()

    # Test augmentation with DataLoader
    print("üî¨ Integration Test: Augmentation with DataLoader...")

    # Create dataset with augmentation
    train_transforms = Compose([
        RandomHorizontalFlip(0.5),
        RandomCrop(8, padding=2)  # Small images for test
    ])

    # Simulate CIFAR-style images (C, H, W)
    images = np.random.randn(100, 3, 8, 8)
    labels = np.random.randint(0, 10, 100)

    # Apply augmentation manually (how you'd use in practice)
    augmented_images = np.array([train_transforms(img) for img in images])

    dataset = TensorDataset(Tensor(augmented_images), Tensor(labels))
    loader = DataLoader(dataset, batch_size=16, shuffle=True)

    batch_count = 0
    for batch_x, batch_y in loader:
        assert batch_x.shape[1:] == (3, 8, 8), f"Augmented batch shape wrong: {batch_x.shape}"
        batch_count += 1

    assert batch_count > 0, "DataLoader should produce batches"
    print("‚úÖ Augmentation + DataLoader integration works!")

    print("\n" + "=" * 50)
    print("üéâ ALL TESTS PASSED! Module ready for export.")
    print("Run: tito module complete 08")

if __name__=='__main__':
    test_module()

üß™ RUNNING MODULE INTEGRATION TEST
Running unit tests...
üî¨ Unit Test: Dataset Abstract Base Class...
‚úÖ Dataset is properly abstract
‚úÖ Dataset interface works correctly!
üî¨ Unit Test: TensorDataset...
‚úÖ TensorDataset works correctly!
üî¨ Unit Test: DataLoader...
‚úÖ DataLoader works correctly!
‚úÖ DataLoader works correctly!
üî¨ Unit Test: DataLoader Deterministic Shuffling...
‚úÖ Deterministic shuffling works correctly!
üî¨ Unit Test: Data Augmentation...
   Testing RandomHorizontalFlip...
   Testing Compose
   Tesing Tensor compatibility
   Testing randomness...
‚úÖ Data Augmentation works correctly!

Running integration scenarios...
üî¨ Integration Test: Training Workflow...
üìä Dataset splits:
  Training: 800 samples, 25 batches
  Validation: 200 samples, 7 batches

üèÉ Simulated Training Loop:
  Batch 1: 32 samples
  Batch 2: 32 samples
  Batch 3: 32 samples
  Total: 25 batches, 800 samples processed
‚úÖ Training integration works correctly!
üî¨ Integration Test