# Linear regression

In [1]:
#pragma cling add_include_path("../../libtorch/include")
#pragma cling add_include_path("../../libtorch/include/torch/csrc/api/include")
#pragma cling add_library_path("../../libtorch/lib")
#pragma cling load("libtorch")

In [2]:
#include <iostream>
#include <cstddef>
#include <torch/torch.h>

In [3]:
//torch::manual_seed(1);

## step 1: create mock data x and y

In [4]:
std::pair<torch::Tensor, torch::Tensor> synthetic_data(torch::Tensor true_w, float true_b, int64_t num_samples) {

    auto X = torch::normal(0.0, 1.0, {num_samples, true_w.size(0)});
    auto y = torch::matmul(X, true_w) + true_b;
    y += torch::normal(0.0, 0.01, y.sizes());
    y = torch::reshape(y, {-1, 1});

    //return torch::cat({X, y}, 1);
    return {X, y};
 }

In [5]:
auto options = torch::TensorOptions().dtype(torch::kFloat).device(torch::kCPU);

// Generating the Dataset
torch::Tensor true_w = torch::tensor({2.0, -3.4}, options);
std::cout << true_w.size(0) << std::endl;
float true_b = 4.2;
int64_t num_samples = 3000;

std::pair<torch::Tensor, torch::Tensor> data_and_label = synthetic_data(true_w, true_b, num_samples);

2


## step2: create dataset and dataloader

In [6]:
class CustomDataset : public torch::data::Dataset<CustomDataset>{
    private:
        torch::Tensor __features;
        torch::Tensor __labels;
    
    public:
    CustomDataset(torch::Tensor features, torch::Tensor labels):__features(features),__labels(labels){
        
    }
    
    torch::data::Example<> get(size_t index){
        torch::Tensor sample_features = __features[index];
        torch::Tensor sample_label = __labels[index];
        return {sample_features.clone(), sample_label.clone()};
    }
    
    torch::optional <size_t> size() const {
        return __features.sizes()[0];
    }
}

In [7]:
auto dataset = CustomDataset(data_and_label.first, data_and_label.second);

In [8]:
// print the first sample
torch::data::Example<> sample = dataset.get(0);
std::cout << sample.data << std::endl;
std::cout << sample.target << std::endl;

 0.0283
 2.1235
[ CPUFloatType{2} ]
-2.9579
[ CPUFloatType{1} ]


In [9]:
auto stacked_dataset = dataset.map(torch::data::transforms::Stack<>());

In [10]:
int batch_size = 10;
torch::data::DataLoaderOptions options = torch::data::DataLoaderOptions().batch_size(batch_size).drop_last(false);
auto data_loader = torch::data::make_data_loader<torch::data::samplers::RandomSampler>(std::move(stacked_dataset), options);

## step3: create model

In [11]:
torch::nn::Linear model(true_w.size(0), 1);

model->weight.data().normal_(0.0, 0.01);
model->bias.data().fill_(0);

## step4: create optimizer

In [12]:
auto optimizer = torch::optim::SGD(model->parameters(), 0.03);

## step5: define loss function

In [None]:
auto options = torch::nn::MSELossOptions(torch::kMean);
torch::nn::MSELoss loss_function(options);

## step6: train to get the optimal parameter in model

In [14]:
auto features = std::move(data_and_label.first);
auto labels = std::move(data_and_label.second);

// Training
size_t num_epochs = 3;
for( size_t i =0 ; i < num_epochs; i++ ) {

    for (auto &batch : *data_loader) {
        //std::cout << batch.data()->data.sizes() << '\n';
        auto x = batch.data;
        auto y = batch.target;

        //std::cout << x.sizes() << std::endl;
        //std::cout << y.sizes() << std::endl;

        auto output = model->forward(x);
        auto loss = loss_function(output, y);
        //std::cout << loss << std::endl;

        optimizer.zero_grad();
        loss.backward();
        optimizer.step();
    }

    auto epoch_output = model->forward(features);
    auto epoch_loss = torch::mse_loss(epoch_output, labels);
    std::cout << "Epoch: " << i << " loss: " << epoch_loss << std::endl;
}

Epoch: 0 loss: 0.000100087
[ CPUFloatType{} ]
Epoch: 1 loss: 0.000100172
[ CPUFloatType{} ]
Epoch: 2 loss: 9.97453e-05
[ CPUFloatType{} ]


## step7: evaluate the performance

In [16]:
// -------------------------------------------------------------------------------------------
// ompare the model parameters learned by training on finite data and the actual parameters
// ------------------------------------------------------------------------------------------
auto w = model->weight.data();
std::cout << "estimated parameter is " << w << std::endl;
std::cout << w.sizes() << "\n";
std::cout << true_w.sizes() << "\n";
auto w_dif = true_w - w.reshape(true_w.sizes());
std::cout << "error in estimating w:\n" << w_dif << std::endl;
auto b = model->bias.data().item<float>();
std::cout << "estimated bias is " << b << std::endl;
std::cout << "error in estimating b: " << (true_b - b) << std::endl;

std::cout << "Done\n";

estimated parameter is  1.9999 -3.3995
[ CPUFloatType{1,2} ]
[1, 2]
[2]
error in estimating w:
1e-05 *
 5.7101
-52.2375
[ CPUFloatType{2} ]
estimated bias is 4.19985
error in estimating b: 0.000148773
Done
