# dataloader

https://github.com/pytorch/pytorch/blob/master/torch/csrc/api/include/torch/data/dataloader.h

In [1]:
#pragma cling add_include_path("../../libtorch/include")
#pragma cling add_include_path("../../libtorch/include/torch/csrc/api/include")
#pragma cling add_library_path("../../libtorch/lib")
#pragma cling load("libtorch")

In [2]:
#include <iostream>
#include <cstddef>
#include <torch/torch.h>

## step1: create custom dataset

In [3]:
int sample_size = 5;
int number_features = 3;

torch::Tensor features = torch::arange(sample_size*number_features).reshape({sample_size,number_features}) * 1.0;
std::cout << features << std::endl;


  0   1   2
  3   4   5
  6   7   8
  9  10  11
 12  13  14
[ CPUFloatType{5,3} ]


In [4]:
torch::Tensor label = torch::randint(/*low=*/0, /*high=*/3, /*size=*/{sample_size,});
std::cout << label << std::endl;

 2
 0
 1
 0
 2
[ CPUFloatType{5} ]


In [5]:
class CustomDataset : public torch::data::Dataset<CustomDataset>{
    private:
        torch::Tensor __features;
        torch::Tensor __labels;
    
    public:
    CustomDataset(torch::Tensor features, torch::Tensor labels):__features(features),__labels(labels){
        
    }
    
    torch::data::Example<> get(std::size_t index) override{
        torch::Tensor sample_features = __features[index];
        torch::Tensor sample_label = __labels[index];
        return {sample_features.clone(), sample_label.clone()};
    }
    
    torch::optional<std::size_t> size() const override{
        return __features.sizes()[0];
    }
}

In [6]:
std::cout << features.sizes()[0];

5

In [7]:
//std::cout << features.size(0);

In [8]:
auto dataset = CustomDataset(features, label);
//auto stacked_dataset = dataset.map(torch::data::transforms::Stack<>());

In [9]:
std::cout << dataset.get(0).data;

 0
 1
 2
[ CPUFloatType{3} ]

In [10]:
std::cout << dataset.get(0).target;

2
[ CPUFloatType{} ]

In [11]:
std::cout << dataset.get(1).data;

 3
 4
 5
[ CPUFloatType{3} ]

In [12]:
auto stacked_dataset = dataset.map(torch::data::transforms::Stack<>());

## step2: create data loader

In [13]:
int batch_size = 2;
torch::data::DataLoaderOptions options = torch::data::DataLoaderOptions().batch_size(batch_size).drop_last(false);

In [14]:
auto data_loader = torch::data::make_data_loader<torch::data::samplers::SequentialSampler>(std::move(stacked_dataset), options);

In [18]:
for(auto& batch : *data_loader){
    std::cout << batch.data << std::endl;
    std::cout << "---------" << std::endl;
    std::cout << batch.target << std::endl;
    std::cout << "---------" << std::endl;
}

 0  1  2
 3  4  5
[ CPUFloatType{2,3} ]
---------
 2
 0
[ CPUFloatType{2} ]
---------
  6   7   8
  9  10  11
[ CPUFloatType{2,3} ]
---------
 1
 0
[ CPUFloatType{2} ]
---------
 12  13  14
[ CPUFloatType{1,3} ]
---------
 2
[ CPUFloatType{1} ]
---------
