# Batch sizes
### Epochs
Epoch = 1: One Forward and Backward pass of ALL training samples

### Batch
Number of training samples used for one forward and backward pass

### Number of iterations
Number of passes, with each pass using 'batch size' number of samples

E.g. 
- 100 samples
- Batch size = 20
- => 100/20 = **5** iterations for **1** epoch

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [3]:
import pandas as pd
df=pd.read_csv('../datasets/wine.csv')
df



Unnamed: 0,Wine,Alcohol,Malic.acid,Ash,Acl,Mg,Phenols,Flavanoids,Nonflavanoid.phenols,Proanth,Color.int,Hue,OD,Proline
0,1,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


In [12]:
class WineDataset(Dataset):
    def __init__(self):
        # data loading
        xy= np.loadtxt('datasets/wine.csv', delimiter = ',', dtype = np.float32, skiprows=1) #skiprows=1 for skipping the column names 
        self.X = torch.from_numpy(xy[:,1:])
        self.y = torch.from_numpy(xy[:,[0]]) # [0] for keeping the shape as (n_samples,1)
        self.n_samples = self.X.shape[0]
        self.n_features = self.X.shape[1]

    def __getitem__(self, index):
        return(self.X[index],self.y[index])

    def __len__(self):
        return(self.n_samples)

In [13]:
dataset = WineDataset()
print(dataset[1])

(tensor([1.3200e+01, 1.7800e+00, 2.1400e+00, 1.1200e+01, 1.0000e+02, 2.6500e+00,
        2.7600e+00, 2.6000e-01, 1.2800e+00, 4.3800e+00, 1.0500e+00, 3.4000e+00,
        1.0500e+03]), tensor([1.]))


In [29]:
batch_size = 4
dataloader = DataLoader(dataset=dataset, batch_size = batch_size, shuffle = True, num_workers=0) #num_workers is for speeding up the calculation, similar to n_jobs in RandomForestClassifier
print(dataloader)
print(enumerate(dataloader))

<torch.utils.data.dataloader.DataLoader object at 0x7fe62e72f100>
<enumerate object at 0x7fe62e6cee80>


In [24]:
data_iter = iter(dataloader)
data = data_iter.next() # goes to next batch
features, labels = data
print(features, labels) # prints one batch of training

tensor([[1.3830e+01, 1.6500e+00, 2.6000e+00, 1.7200e+01, 9.4000e+01, 2.4500e+00,
         2.9900e+00, 2.2000e-01, 2.2900e+00, 5.6000e+00, 1.2400e+00, 3.3700e+00,
         1.2650e+03],
        [1.2470e+01, 1.5200e+00, 2.2000e+00, 1.9000e+01, 1.6200e+02, 2.5000e+00,
         2.2700e+00, 3.2000e-01, 3.2800e+00, 2.6000e+00, 1.1600e+00, 2.6300e+00,
         9.3700e+02],
        [1.2600e+01, 1.3400e+00, 1.9000e+00, 1.8500e+01, 8.8000e+01, 1.4500e+00,
         1.3600e+00, 2.9000e-01, 1.3500e+00, 2.4500e+00, 1.0400e+00, 2.7700e+00,
         5.6200e+02],
        [1.3160e+01, 2.3600e+00, 2.6700e+00, 1.8600e+01, 1.0100e+02, 2.8000e+00,
         3.2400e+00, 3.0000e-01, 2.8100e+00, 5.6800e+00, 1.0300e+00, 3.1700e+00,
         1.1850e+03]]) tensor([[1.],
        [2.],
        [2.],
        [1.]])


In [26]:
# Iterating over whole dataloader
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/batch_size)
print(total_samples, n_iterations)

178 45


In [31]:
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        # Forward Propagation ---> Backward Propagation ---> Update Parameters
        if((i+1)%9==0):
            print(f'Epoch {epoch+1}/{num_epochs}, Step {i+1}/{n_iterations}, Input shape: {inputs.shape}') # input shape = [batch_size, n_features]
    print()
# Reason for batch_size of 2 for last step is because total samples is 178
# So 44 iterations of batch size 4 will finish 176 samples and 2 will remain
# Hence shape of 45th step is (2,13)

Epoch 1/2, Step 9/45, Input shape: torch.Size([4, 13])
Epoch 1/2, Step 18/45, Input shape: torch.Size([4, 13])
Epoch 1/2, Step 27/45, Input shape: torch.Size([4, 13])
Epoch 1/2, Step 36/45, Input shape: torch.Size([4, 13])
Epoch 1/2, Step 45/45, Input shape: torch.Size([2, 13])

Epoch 2/2, Step 9/45, Input shape: torch.Size([4, 13])
Epoch 2/2, Step 18/45, Input shape: torch.Size([4, 13])
Epoch 2/2, Step 27/45, Input shape: torch.Size([4, 13])
Epoch 2/2, Step 36/45, Input shape: torch.Size([4, 13])
Epoch 2/2, Step 45/45, Input shape: torch.Size([2, 13])



Other datasets - MNIST dataset (`torchvision.datasets.MNIST`), CIFAR dataset, fashion-MNIST dataset etc.