In [31]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset

In [32]:
class MyCustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        x = self.data[idx]
        y = self.labels[idx]
        return x, y        
        

In [33]:
# Features: 10 samples, each with 2 numbers
features = torch.tensor([[i, i + 1] for i in range(10)], dtype=torch.float32)

# Labels: 10 numbers (e.g., sum of the features for demonstration)
labels = torch.tensor([i + (i + 1) for i in range(10)], dtype=torch.float32)

print(features)
print(labels)

tensor([[ 0.,  1.],
        [ 1.,  2.],
        [ 2.,  3.],
        [ 3.,  4.],
        [ 4.,  5.],
        [ 5.,  6.],
        [ 6.,  7.],
        [ 7.,  8.],
        [ 8.,  9.],
        [ 9., 10.]])
tensor([ 1.,  3.,  5.,  7.,  9., 11., 13., 15., 17., 19.])


In [34]:
dataset = MyCustomDataset(data=features, labels=labels)

In [35]:
len(dataset)

10

In [36]:
dataset[-1]

(tensor([ 9., 10.]), tensor(19.))

In [39]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [65]:
for x_batch, y_batch in dataloader:
    print(x_batch)
    print(y_batch)

tensor([[ 9., 10.],
        [ 6.,  7.],
        [ 7.,  8.],
        [ 5.,  6.]])
tensor([19., 13., 15., 11.])
tensor([[1., 2.],
        [8., 9.],
        [0., 1.],
        [2., 3.]])
tensor([ 3., 17.,  1.,  5.])
tensor([[4., 5.],
        [3., 4.]])
tensor([9., 7.])


In [66]:
class DatasetPT(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return self.Y.shape[0]

    def __getitem__(self, idx):
        X_out = torch.from_numpy(self.X[idx, :]).float()
        Y_out = torch.from_numpy(self.Y[idx, :]).float()
        return X_out, Y_out