In [1]:
import torch
from sklearn.model_selection import train_test_split

In [2]:
# 1. MANUAL DATASET GENERATION
# Creating 100 samples with 5 features each
features = torch.randn(100, 5)
# Labels: 0 (Normal) and 1 (Fraud)
labels = torch.cat([torch.zeros(80), torch.ones(20)]) 

In [3]:
# 2. FIRST SPLIT: Train vs (Validation + Test)
# We take 80% for training and 20% for the remaining pool
# 'stratify' ensures both sets have the same 80/20 ratio of labels
x_train, x_temp, y_train, y_temp = train_test_split(
    features.numpy(), 
    labels.numpy(), 
    test_size=0.20, 
    random_state=42,
    stratify=labels.numpy()
)

In [4]:
# 3. SECOND SPLIT: Validation vs Test
# Dividing the 20% pool into two equal halves (10% Val, 10% Test)
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, 
    y_temp, 
    test_size=0.50, 
    random_state=42,
    stratify=y_temp
)

In [5]:
# Converting back to PyTorch Tensors
x_train = torch.from_numpy(x_train)
x_val = torch.from_numpy(x_val)
x_test = torch.from_numpy(x_test)

In [6]:
print(f"Total Samples: {len(features)}")
print(f"Training Set: {len(x_train)} samples")
print(f"Validation Set: {len(x_val)} samples")
print(f"Testing Set: {len(x_test)} samples")

Total Samples: 100
Training Set: 80 samples
Validation Set: 10 samples
Testing Set: 10 samples
