In [1]:
import os

import torch
from torch.utils.data import Dataset
import pandas as pd

In [2]:
data = {'Title': ['Homo Sapiens', 
                 'Homo Deus', 
                 'Octopus']}

# Create a Pandas DataFrame with the data
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,Title
0,Homo Sapiens
1,Homo Deus
2,Octopus


In [14]:
df.iloc[0]

Title    Homo Sapiens
Name: 0, dtype: object

In [4]:
class PetDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = self.dataframe
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, index):
        title = self.dataframe.iloc[index]

IndentationError: expected an indented block (1363826637.py, line 10)

##### Example

In [30]:
books = {
    0: {
        'title': 'Sapiens',
        'year_published': 2011,
    },
    1: {
        'title': 'Homo Deus',
        'year_published': 2015,
    },
    2: {
        'title': '21 Lessons for the 21st Century',
        'year_published': 2018,
    }
}


In [44]:
from torch.utils.data import Dataset

In [45]:
books

{0: {'title': 'Sapiens', 'year_published': 2011},
 1: {'title': 'Homo Deus', 'year_published': 2015},
 2: {'title': '21 Lessons for the 21st Century', 'year_published': 2018}}

Create `PetDataset` using PyTorch's `Dataset`

In [46]:
class PetDataset(Dataset):
    def __init__(self, data):
        self.data = data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        title = self.data[index]['title']
        year_published = self.data[index]['year_published']
        
        return (title, year_published)

In [47]:
dataset = PetDataset(data=books)

In [48]:
len(dataset)

3

In [49]:
dataset[0]

('Sapiens', 2011)

In [50]:
dataset[2]

('21 Lessons for the 21st Century', 2018)

### DataLoader

##### Example 1

In [73]:
books = {
    0: {
        'title': 'Sapiens',
        'year_published': 2011,
    },
    1: {
        'title': 'Homo Deus',
        'year_published': 2015,
    },
    2: {
        'title': 'Homo Deus',
        'year_published': 2015,
    },
    3: {
        'title': '21 Lessons for the 21st Century',
        'year_published': 2018,
    },
    4: {
        'title': 'Homo Deus',
        'year_published': 2015,
    },
    5: {
        'title': '21 Lessons for the 21st Century',
        'year_published': 2018,
    },
    6: {
        'title': 'Homo Deus',
        'year_published': 2015,
    },
    7: {
        'title': '21 Lessons for the 21st Century',
        'year_published': 2018,
    },
    8: {
        'title': 'Homo Deus',
        'year_published': 2015,
    },
    9: {
        'title': '21 Lessons for the 21st Century',
        'year_published': 2018,
    }
}


In [97]:
dataset = PetDataset(data=books)

In [112]:
from torch.utils.data import DataLoader, random_split

In [113]:
len(dataset)

10

Given `dataset`, create dataloaders for training set and validation set
- Randomly select `6` items in `dataset` for training set and `4` items for validation set
- Batch size of both dataloaders is `2`

In [114]:
train_set, test_set = random_split(dataset, lengths=[6, 4])

In [115]:
train_loader = DataLoader(dataset=train_set, batch_size=2)

In [116]:
valid_loader = DataLoader(dataset=test_set, batch_size=2)

In [117]:
train_loader, valid_loader

(<torch.utils.data.dataloader.DataLoader at 0x7ffb2f23ea90>,
 <torch.utils.data.dataloader.DataLoader at 0x7ffb2f0bae80>)