In [None]:
import time
import ray
from ray.train.torch import TorchTrainer
from ray.air import ScalingConfig

# Initialize Ray
ray.init(ignore_reinit_error=True)

# Define the training function and use the @ray.remote decorator to parallelize it
@ray.remote
def train_fn(config):
    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import DataLoader, TensorDataset

    # Simulate data size
    data_size = config["data_size"]  # Data size passed from the main loop
    batch_size = config.get("batch_size", 64)  # Default batch size

    # Generate synthetic data (X: features, y: labels)
    X = torch.randn(data_size, 10)  # 10 features
    y = torch.randint(0, 2, (data_size,))  # Binary classification (0 or 1)

    # Create a simple dataset and data loader
    dataset = TensorDataset(X, y)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Define a simple model
    model = nn.Sequential(
        nn.Linear(10, 16),
        nn.ReLU(),
        nn.Linear(16, 2)  # Output layer for 2 classes
    )
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # Training loop
    epochs = 5
    for epoch in range(epochs):
        total_loss = 0.0
        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

    return {"final_loss": total_loss}

# Dataset sizes for testing scalability
datasets = [10_000, 50_000, 100_000, 1_000_000]  # Simulated dataset sizes

# Loop through datasets and train
for size in datasets:
    start_time = time.time()  # Start timer

    # Run the remote task and get the result
    future = train_fn.remote({"data_size": size})  # Call the remote function
    
    # Get the result from the remote function
    result = ray.get(future)  # Wait for the result of the remote task
    
    end_time = time.time()  # End timer
    
    training_time = end_time - start_time  # Calculate elapsed time
    print(f"Dataset size: {size}, Training time: {training_time:.2f} seconds")
    print(f"Results for dataset size {size}: {result}")

# Shut down Ray
ray.shutdown()


2025-01-01 22:45:47,769	INFO worker.py:1819 -- Started a local Ray instance.


[36m(train_fn pid=14552)[0m Epoch 1/5, Loss: 109.2871
[36m(train_fn pid=14552)[0m Epoch 2/5, Loss: 109.1031
[36m(train_fn pid=14552)[0m Epoch 3/5, Loss: 108.8427
[36m(train_fn pid=14552)[0m Epoch 4/5, Loss: 108.9449
Dataset size: 10000, Training time: 7.00 seconds
Results for dataset size 10000: {'final_loss': 108.96069884300232}
[36m(train_fn pid=14552)[0m Epoch 5/5, Loss: 108.9607
[36m(train_fn pid=14552)[0m Epoch 1/5, Loss: 543.0476
[36m(train_fn pid=14552)[0m Epoch 2/5, Loss: 542.3775
[36m(train_fn pid=14552)[0m Epoch 3/5, Loss: 542.4362
[36m(train_fn pid=14552)[0m Epoch 4/5, Loss: 542.2909
Dataset size: 50000, Training time: 7.93 seconds
Results for dataset size 50000: {'final_loss': 542.3334870934486}
[36m(train_fn pid=14552)[0m Epoch 5/5, Loss: 542.3335
[36m(train_fn pid=14552)[0m Epoch 1/5, Loss: 1084.6758
[36m(train_fn pid=14552)[0m Epoch 2/5, Loss: 1084.0439
[36m(train_fn pid=14552)[0m Epoch 3/5, Loss: 1083.7928
[36m(train_fn pid=14552)[0m Epoch 4/

In [6]:
!git --version


git version 2.44.0.windows.1


In [7]:
!git init


Initialized empty Git repository in C:/Users/white/Desktop/PDC JUypter/.git/


In [None]:
!git remote add origin https://github.com/your-username/your-repository.git
