In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader,TensorDataset

In [2]:
# create some data in numpy
nObservations = 100
nFeatures = 20

data = np.random.randn(nObservations,nFeatures)

In [5]:
# convert to pytorch tensor
dataT = torch.tensor(data)

# print out some information
print('Numpy Data')
print(type(data))
print(data.shape)
print(data.dtype)
print(' ')


print('Tensor Data')
print(type(dataT))
print(type(dataT))
print(dataT.shape)
print(dataT.dtype)
print(' ')


Numpy Data
<class 'numpy.ndarray'>
(100, 20)
float64
 
Tensor Data
<class 'torch.Tensor'>
<class 'torch.Tensor'>
torch.Size([100, 20])
torch.float64
 


In [6]:
# sometimes you need to convert datatypes
dataT2 = torch.tensor(data).float()
print(dataT2.dtype)

# long is for ints
dataT3 = torch.tensor(data).long()
print(dataT3.dtype)


torch.float32
torch.int64


In [12]:
# convert tensor into pytorch datasets
 
dataset = TensorDataset(dataT)

# dataset is a two-element tuple comprising data,labels
dataset.tensors[0]


tensor([[-2.6462,  0.9421, -0.7378,  ..., -1.3899, -1.8895, -1.8208],
        [-0.2757,  1.8786, -3.0606,  ..., -0.5944, -1.2844,  0.0763],
        [ 0.6499,  1.0307, -1.8771,  ..., -1.6208,  1.4059,  1.1239],
        ...,
        [ 0.2137,  0.1354, -1.5676,  ...,  0.6966, -1.8372, -0.7226],
        [-0.5949, -2.4285, -0.5333,  ...,  0.2796,  0.0276, -0.0726],
        [-1.9132,  1.0349,  1.7274,  ..., -0.3699, -1.6858, -1.1412]],
       dtype=torch.float64)

In [16]:
# lets try with labels

labels = torch.ceil(torch.linspace(.01,4,nObservations))

# transform to an actual matrix (column vector)
labels = labels.reshape((len(labels),1)) # 1 is for all the data is in separate columns
#print(labels)

# now make another dataset
dataset = TensorDataset(dataT,labels)
#print(dataset.tensors[0].size())
print(dataset.tensors[1].size())

# for comparison
print(np.shape(np.random.randint(5,size=nObservations)))


torch.Size([100, 1])
(100,)


In [17]:
# Data Loaders
# create a dataloader object
batchsize = 25
dataloader = DataLoader(dataset,batch_size=batchsize)# ,shuffle=True,drop_last=True)

dataloader.dataset.tensors[0].size()

torch.Size([100, 20])

In [18]:
# size of each batch
for dat,labs  in dataloader:
    print('Batch Info:')
    print(dat.size())
    print(labs.size())
    print(' ')



Batch Info:
torch.Size([25, 20])
torch.Size([25, 1])
 
Batch Info:
torch.Size([25, 20])
torch.Size([25, 1])
 
Batch Info:
torch.Size([25, 20])
torch.Size([25, 1])
 
Batch Info:
torch.Size([25, 20])
torch.Size([25, 1])
 


In [23]:
# try again with shuffling (shuffling happens during iterations)
#dataloader = DataLoader(dataset,batch_size=batchsize,shuffle=True)

for dat,labs in dataloader:
  print(labs.T)
  print(' ')

tensor([[2., 1., 2., 4., 4., 4., 2., 2., 3., 1., 3., 2., 2., 2., 3., 3., 2., 4.,
         2., 3., 4., 2., 1., 4., 1.]])
 
tensor([[2., 2., 1., 4., 2., 3., 3., 3., 2., 1., 1., 4., 3., 4., 1., 4., 3., 1.,
         1., 3., 1., 1., 2., 1., 2.]])
 
tensor([[2., 4., 1., 2., 4., 3., 1., 1., 1., 3., 4., 2., 3., 4., 4., 3., 1., 2.,
         4., 3., 3., 3., 3., 2., 1.]])
 
tensor([[4., 4., 3., 1., 4., 1., 3., 3., 4., 4., 2., 1., 4., 4., 3., 2., 1., 3.,
         1., 4., 1., 2., 3., 2., 4.]])
 


In [24]:
# to get only one batch for testing
data,labs = next(iter(dataloader))

In [26]:
data

tensor([[-0.0540, -0.1668, -0.3710,  0.3518, -1.1568,  1.9112,  0.6247, -0.6548,
          0.5692,  0.1223, -0.7024, -1.3775,  1.9176, -0.2915,  2.0003, -0.4923,
          1.7067,  0.0269, -0.5868,  1.1203],
        [-0.4031,  1.1707,  1.2123, -1.0396, -0.4703,  0.5002, -1.4157,  0.1948,
          0.7521, -0.9700,  0.9442,  0.0240, -0.5965,  1.8777, -0.7752,  0.5430,
         -0.3497,  0.4210, -1.4835,  0.5306],
        [-0.2757,  1.8786, -3.0606,  0.5848, -1.0003,  0.4833,  0.6812, -0.2442,
         -1.1700, -0.6941, -0.1619, -0.3195, -0.5373, -0.8063, -0.5489,  0.2186,
         -1.4679, -0.5944, -1.2844,  0.0763],
        [ 0.3345, -0.6035,  0.2050,  0.0538,  0.0399,  1.5953, -0.1440,  0.3620,
         -0.5283, -0.8597, -0.0874,  0.5251,  1.0547,  0.3092,  1.2819, -1.0935,
         -0.3373, -1.7743,  0.2227, -0.3382],
        [ 1.9237, -2.2175,  0.1781, -0.8935,  0.5635,  0.9976, -0.6286,  1.6740,
          2.6551, -0.8948, -0.8318,  1.2519,  1.2650,  1.6932,  0.9640,  1.6846,
      