In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import h5py
from scipy.io import loadmat
import gc

# ============ GPU Setup ============
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ============ Enhanced Data Loading ============
def load_mat_file(file_path, var_name):
    try:
        with h5py.File(file_path, 'r') as f:
            data = f[var_name][:]
        print(f"Loaded {file_path} using h5py.")
    except OSError:
        print(f"h5py failed, using scipy.io.loadmat for {file_path}")
        data = loadmat(file_path)[var_name]
    return np.array(data)

def load_in_batches(file_path, var_name, batch_size=10000):
    data = []
    with h5py.File(file_path, 'r') as f:
        dataset = f[var_name]
        total = dataset.shape[0]
        for i in range(0, total, batch_size):
            batch = np.array(dataset[i:i+batch_size]).T
            data.append(batch)
    return np.concatenate(data, axis=0)

# ============ Batch Preprocessing ============
def batch_preprocess(y, psi, h, batch_size=5000):
    num_samples = y.shape[0]
    X_batches = []
    h_target_batches = []
    
    for i in range(0, num_samples, batch_size):
        # Process current batch
        batch_end = min(i+batch_size, num_samples)
        
        # Convert to complex
        y_batch = y[i:batch_end,...,0] + 1j*y[i:batch_end,...,1]
        h_batch = h[i:batch_end,...,0] + 1j*h[i:batch_end,...,1]
        
        # Process Psi with proper reshaping
        psi_real = psi[0,:,:,i:batch_end].transpose(2, 1, 0)
        psi_imag = psi[1,:,:,i:batch_end].transpose(2, 1, 0)
        psi_batch = psi_real + 1j*psi_imag
        
        # Create input features
        X_real = np.concatenate([
            y_batch.real,
            psi_batch.reshape(len(y_batch), -1).real
        ], axis=1)
        
        X_imag = np.concatenate([
            y_batch.imag,
            psi_batch.reshape(len(y_batch), -1).imag
        ], axis=1)
        
        X_stack = np.stack([X_real, X_imag], axis=1)
        X_batches.append(torch.tensor(X_stack, dtype=torch.float32))
        
        # Create target
        h_target = torch.stack([
            torch.tensor(h_batch.real, dtype=torch.float32),
            torch.tensor(h_batch.imag, dtype=torch.float32)
        ], dim=1)
        h_target_batches.append(h_target)
        
        # Cleanup
        del y_batch, h_batch, psi_real, psi_imag, psi_batch
        gc.collect()
        if device.type == 'cuda':
            torch.cuda.empty_cache()
    
    return torch.cat(X_batches), torch.cat(h_target_batches)

# ============ Main Data Pipeline ============
try:
    y = load_mat_file('yDL_10dB_40k_150pilots_ipjp.mat', 'yDL')
    psi = load_mat_file('PsiDL_10dB_40k_150pilots_ipjp.mat', 'PsiDL')
    h = load_mat_file('hDL_10dB_40k_150pilots_ipjp.mat', 'hDL')
except MemoryError:
    print("Using batch loading for large files")
    y = load_in_batches('yDL_10dB_40k_150pilots_ipjp.mat', 'yDL')
    psi = load_in_batches('PsiDL_10dB_40k_150pilots_ipjp.mat', 'PsiDL')
    h = load_in_batches('hDL_10dB_40k_150pilots_ipjp.mat', 'hDL')

# Process data in batches
X, h_target = batch_preprocess(y, psi, h)

# Cleanup original data
del y, psi, h
gc.collect()

# ============ Optimized Data Loading ============
def create_datasets(X, h_target):
    train_size = 32000
    val_size = 4000
    
    train_dataset = TensorDataset(X[:train_size], h_target[:train_size])
    val_dataset = TensorDataset(X[train_size:train_size+val_size], 
                               h_target[train_size:train_size+val_size])
    test_dataset = TensorDataset(X[train_size+val_size:], 
                                h_target[train_size+val_size:])
    return train_dataset, val_dataset, test_dataset

train_dataset, val_dataset, test_dataset = create_datasets(X, h_target)

# Memory-friendly data loaders
def create_loaders(batch_size=32):
    return (
        DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                 pin_memory=device.type == 'cuda'),
        DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                 pin_memory=device.type == 'cuda'),
        DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                 pin_memory=device.type == 'cuda')
    )

train_loader, val_loader, test_loader = create_loaders()

# ============ Lightweight Model ============
class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(2, 16, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(16, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(32, 64, 3, stride=2, padding=1),
            nn.AdaptiveAvgPool1d(512),
            nn.Conv1d(64, 2, 1)
        )
        
    def forward(self, x):
        return self.encoder(x)

# ============ Training with Memory Management ============
def train_model():
    model = Autoencoder().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    criterion = nn.MSELoss()
    
    print("Starting training...")
    for epoch in range(1, 21):
        model.train()
        train_loss = 0.0
        
        for batch_idx, (xb, yb) in enumerate(train_loader):
            xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
            
            optimizer.zero_grad(set_to_none=True)
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            
            # Gradient accumulation every 4 batches
            if (batch_idx + 1) % 4 == 0:
                optimizer.step()
                optimizer.zero_grad()
            
            train_loss += loss.item()
            
            # Cleanup
            del xb, yb, preds
            if device.type == 'cuda':
                torch.cuda.empty_cache()
        
        # Validation
        val_loss = 0.0
        model.eval()
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                val_loss += criterion(preds, yb).item()
                
                del xb, yb, preds
                if device.type == 'cuda':
                    torch.cuda.empty_cache()
        
        print(f"Epoch {epoch:2d} | Train Loss: {train_loss/len(train_loader):.6f} | "
              f"Val Loss: {val_loss/len(val_loader):.6f}")
    
    return model

# ============ Execution ============
model = train_model()

# Cleanup before testing
gc.collect()
if device.type == 'cuda':
    torch.cuda.empty_cache()

# Final evaluation
def evaluate(model, loader):
    model.eval()
    total_loss = 0.0
    criterion = nn.MSELoss()
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            total_loss += criterion(preds, yb).item()
    return total_loss / len(loader)

print("\nTesting...")
test_loss = evaluate(model, test_loader)
print(f"Final Test Loss: {test_loss:.6f}")

Using device: cuda
h5py failed, using scipy.io.loadmat for yDL_10dB_40k_150pilots_ipjp.mat
Loaded PsiDL_10dB_40k_150pilots_ipjp.mat using h5py.
h5py failed, using scipy.io.loadmat for hDL_10dB_40k_150pilots_ipjp.mat
Starting training...
Epoch  1 | Train Loss: 0.447845 | Val Loss: 0.446302
Epoch  2 | Train Loss: 0.447822 | Val Loss: 0.446206
Epoch  3 | Train Loss: 0.447812 | Val Loss: 0.446235
Epoch  4 | Train Loss: 0.447814 | Val Loss: 0.446206
Epoch  5 | Train Loss: 0.447809 | Val Loss: 0.446211
Epoch  6 | Train Loss: 0.447811 | Val Loss: 0.446203
Epoch  7 | Train Loss: 0.447810 | Val Loss: 0.446207
Epoch  8 | Train Loss: 0.447807 | Val Loss: 0.446209
Epoch  9 | Train Loss: 0.447812 | Val Loss: 0.446217
Epoch 10 | Train Loss: 0.447812 | Val Loss: 0.446214
Epoch 11 | Train Loss: 0.447805 | Val Loss: 0.446204
Epoch 12 | Train Loss: 0.447809 | Val Loss: 0.446206
Epoch 13 | Train Loss: 0.447806 | Val Loss: 0.446214
Epoch 14 | Train Loss: 0.447810 | Val Loss: 0.446232
Epoch 15 | Train Loss