# Package Installs

In [1]:
# Update package lists and install Armadillo library
!apt-get update
!apt-get install -y libarmadillo-dev

# Install packages for C++ integration
!pip install --upgrade git+https://github.com/pybind/pybind11.git
!pip install cython

0% [Working]            Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
0% [Connecting to archive.ubuntu.com (185.125.190.81)] [1 InRelease 9,828 B/129 kB 8%] [Connected to                                                                                                    Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,230 kB]
Get:8 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [2,606 kB]
Get:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRel

# PyBind11 Workflow

## Armadillo.h Header

In [2]:
%%bash
mkdir -p pybind11
cat > pybind11/armadillo.h << 'EOF'
#ifndef PYBIND11_ARMADILLO_H
#define PYBIND11_ARMADILLO_H

#include <armadillo>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

namespace pybind11 { namespace detail {

// Minimal type caster for arma::Col<T> (commonly used as arma::vec)
template <typename T>
struct type_caster< arma::Col<T> > {
public:
    PYBIND11_TYPE_CASTER(arma::Col<T>, _("numpy.ndarray"));

    bool load(handle src, bool) {
        auto buf = reinterpret_borrow<array_t<T>>(src);
        buffer_info info = buf.request();
        if (info.ndim != 1)
            return false;
        // Create an arma::Col without copying memory
        value = arma::Col<T>(static_cast<T*>(info.ptr), info.shape[0], false);
        return true;
    }

    static handle cast(const arma::Col<T>& src, return_value_policy, handle) {
        return array_t<T>(
            { src.n_elem },
            { sizeof(T) },
            src.memptr()
        ).release();
    }
};

// Minimal type caster for arma::Mat<T>
template <typename T>
struct type_caster< arma::Mat<T> > {
public:
    PYBIND11_TYPE_CASTER(arma::Mat<T>, _("numpy.ndarray"));

    bool load(handle src, bool) {
        auto buf = reinterpret_borrow<array_t<T>>(src);
        buffer_info info = buf.request();
        if (info.ndim != 2)
            return false;
        value = arma::Mat<T>(static_cast<T*>(info.ptr), info.shape[0], info.shape[1], false);
        return true;
    }

    static handle cast(const arma::Mat<T>& src, return_value_policy, handle) {
        return array_t<T>(
            { src.n_rows, src.n_cols },
            { sizeof(T)*src.n_cols, sizeof(T) },
            src.memptr()
        ).release();
    }
};

}} // namespace pybind11::detail

#endif
EOF

## Main Script

In [3]:
%%bash
cat > regression.cpp << 'EOF'
#include <pybind11/pybind11.h>
#include <pybind11/armadillo.h>  // Uses our minimal header in the local pybind11 folder
#include <armadillo>
#include <stdexcept>

namespace py = pybind11;  // semicolon added

// Define a LinearRegression class with a sklearn-style API
class LinearRegression {
public:
    arma::vec theta;  // Learned parameters (intercept first)

    LinearRegression() {}  // Default constructor

    // Fit the model using feature matrix X (n_samples x n_features) and target vector y (n_samples)
    void fit(const arma::mat & X, const arma::vec & y) {
        if (X.n_rows != y.n_elem)
            throw std::runtime_error("X rows must equal length of y");
        // Augment X with a column of ones for the intercept
        arma::mat X_aug = arma::join_horiz(arma::ones<arma::vec>(X.n_rows), X);
        theta = arma::solve(X_aug, y);
    }

    // Predict target values for a new feature matrix X (n_samples x n_features)
    arma::vec predict(const arma::mat & X) {
        if (theta.n_elem == 0)
            throw std::runtime_error("Model is not fitted yet");
        arma::mat X_aug = arma::join_horiz(arma::ones<arma::vec>(X.n_rows), X);
        if (theta.n_elem != X_aug.n_cols)
            throw std::runtime_error("Mismatch between theta and number of features");
        return X_aug * theta;
    }
};

PYBIND11_MODULE(regression, m) {
    m.doc() = "LinearRegression model (PyBind11 + Armadillo) with sklearn-style API";
    py::class_<LinearRegression>(m, "LinearRegression")
        .def(py::init<>(), "Create a new LinearRegression model")
        .def("fit", &LinearRegression::fit, "Fit the model using X and y")
        .def("predict", &LinearRegression::predict, "Predict target values for new X")
        .def_readonly("theta", &LinearRegression::theta, "Learned parameters (intercept first)");
}
EOF

## Compile the Module

In [4]:
%%bash
# Remove any old shared libraries
rm -f regression*.so

# Compile the module; the -I. flag ensures local "pybind11" folder is searched for the header
c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) -I. regression.cpp -o regression$(python3.11-config --extension-suffix) -larmadillo

# Verify the shared library exists
ls -l regression*.so

-rwxr-xr-x 1 root root 305472 Feb  6 05:54 regression.cpython-311-x86_64-linux-gnu.so


## Test the Module

In [5]:
import sys
print(sys.version)

3.11.11 (main, Dec  4 2024, 08:55:07) [GCC 11.4.0]


In [6]:
import sys
sys.path.insert(0, ".")  # Ensure the current directory is in sys.path
import numpy as np
import regression  # Import the PyBind11 module

# Create a small dataset where y = 2 * x
X_train = np.array([[1], [2], [3], [4], [5]], dtype=np.float64)
y_train = np.array([2, 4, 6, 8, 10], dtype=np.float64)

# Instantiate and fit the model
model_pybind = regression.LinearRegression()
model_pybind.fit(X_train, y_train)

# Get and print the model parameters (θ)
theta_pybind = model_pybind.theta
print("PyBind11 learned theta:", theta_pybind)

# Predict new values
X_test = np.array([[6], [7], [8]], dtype=np.float64)
y_pred_pybind = model_pybind.predict(X_test)
print("PyBind11 predictions:", y_pred_pybind)

PyBind11 learned theta: [-2.38323279e-15  2.00000000e+00]
PyBind11 predictions: [12. 14. 16.]


# Cython Workflow

## Header File

In [7]:
%%bash
cat > regression.hpp << 'EOF'
#ifndef REGRESSION_HPP
#define REGRESSION_HPP

#include <armadillo>
#include <stdexcept>

// Define a LinearRegression class with a sklearn-style API
class LinearRegression {
public:
    arma::vec theta;  // Learned parameters (intercept first)
    LinearRegression();
    void fit(const arma::mat & X, const arma::vec & y);
    arma::vec predict(const arma::mat & X);
};

#endif
EOF

In [8]:
%%bash
cat > regression_api.h << 'EOF'
#ifndef REGRESSION_API_H
#define REGRESSION_API_H

#include "regression.hpp"

#ifdef __cplusplus
extern "C" {
#endif

LinearRegression* LinearRegression_new();
void LinearRegression_fit(LinearRegression* model, const double* X_data, int n_rows, int n_cols, const double* y_data);
double* LinearRegression_predict(LinearRegression* model, const double* X_data, int n_rows, int n_cols, int* out_size);
const double* LinearRegression_get_theta(LinearRegression* model, int* len);
void LinearRegression_free(LinearRegression* model);
void free_array(double* ptr);

#ifdef __cplusplus
}
#endif

#endif
EOF

## Main Script

In [9]:
%%bash
cat > regression_impl.cpp << 'EOF'
#include "regression_api.h"
#include <armadillo>
#include <stdexcept>
#include <algorithm>

LinearRegression::LinearRegression() { }

void LinearRegression::fit(const arma::mat & X, const arma::vec & y) {
    if (X.n_rows != y.n_elem)
        throw std::runtime_error("X rows must equal length of y");
    arma::mat X_aug = arma::join_horiz(arma::ones<arma::vec>(X.n_rows), X);
    theta = arma::solve(X_aug, y);
}

arma::vec LinearRegression::predict(const arma::mat & X) {
    if (theta.n_elem == 0)
        throw std::runtime_error("Model is not fitted yet");
    arma::mat X_aug = arma::join_horiz(arma::ones<arma::vec>(X.n_rows), X);
    if (theta.n_elem != X_aug.n_cols)
        throw std::runtime_error("Mismatch between theta and number of features");
    return X_aug * theta;
}

extern "C" {

LinearRegression* LinearRegression_new() {
    return new LinearRegression();
}

void LinearRegression_fit(LinearRegression* model, const double* X_data, int n_rows, int n_cols, const double* y_data) {
    arma::mat X(const_cast<double*>(X_data), n_rows, n_cols, false);
    arma::vec y(const_cast<double*>(y_data), n_rows, false);
    model->fit(X, y);
}

double* LinearRegression_predict(LinearRegression* model, const double* X_data, int n_rows, int n_cols, int* out_size) {
    arma::mat X(const_cast<double*>(X_data), n_rows, n_cols, false);
    arma::vec y_pred = model->predict(X);
    *out_size = y_pred.n_elem;
    double* result = new double[y_pred.n_elem];
    std::copy(y_pred.memptr(), y_pred.memptr() + y_pred.n_elem, result);
    return result;
}

const double* LinearRegression_get_theta(LinearRegression* model, int* len) {
    *len = model->theta.n_elem;
    return model->theta.memptr();
}

void LinearRegression_free(LinearRegression* model) {
    delete model;
}

void free_array(double* ptr) {
    delete[] ptr;
}

} // extern "C"
EOF

## Cython Wrapper File

In [10]:
%%bash
cat > regression_wrapper.pyx << 'EOF'
# cython: language_level=3
import numpy as np
cimport numpy as np

# Declare the external C functions from our API header
cdef extern from "regression_api.h":
    cdef struct LinearRegression  # opaque type

    LinearRegression* LinearRegression_new()
    void LinearRegression_fit(LinearRegression* model, const double* X_data, int n_rows, int n_cols, const double* y_data)
    double* LinearRegression_predict(LinearRegression* model, const double* X_data, int n_rows, int n_cols, int* out_size)
    const double* LinearRegression_get_theta(LinearRegression* model, int* len)
    void LinearRegression_free(LinearRegression* model)
    void free_array(double* ptr)

cdef class PyLinearRegression:
    cdef LinearRegression* thisptr

    def __cinit__(self):
        self.thisptr = LinearRegression_new()

    def fit(self, np.ndarray[np.float64_t, ndim=2] X, np.ndarray[np.float64_t, ndim=1] y):
        if X.shape[0] != y.shape[0]:
            raise ValueError("X rows must equal length of y")
        LinearRegression_fit(self.thisptr, <double*>X.data, X.shape[0], X.shape[1], <double*>y.data)

    def predict(self, np.ndarray[np.float64_t, ndim=2] X):
        cdef int n_rows = X.shape[0]
        cdef int n_cols = X.shape[1]
        cdef int out_size
        cdef double* res_ptr = LinearRegression_predict(self.thisptr, <double*>X.data, n_rows, n_cols, &out_size)
        cdef np.ndarray[np.float64_t, ndim=1] result = np.empty(out_size, dtype=np.float64)
        for i in range(out_size):
            result[i] = res_ptr[i]
        free_array(res_ptr)
        return result

    def get_theta(self):
        cdef int len_theta
        cdef const double* ptr = LinearRegression_get_theta(self.thisptr, &len_theta)
        # Convert the returned pointer into a Python list and then a numpy array
        return np.array([ptr[i] for i in range(len_theta)], dtype=np.float64)

    def __dealloc__(self):
        if self.thisptr is not NULL:
            LinearRegression_free(self.thisptr)
            self.thisptr = NULL
EOF

## Cython Module Setup

In [11]:
%%bash
cat > setup.py << 'EOF'
from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy

ext = Extension(
    name="regression_wrapper",
    sources=["regression_wrapper.pyx", "regression_impl.cpp"],
    language="c++",
    include_dirs=[numpy.get_include(), "."],
    extra_compile_args=["-O3", "-std=c++11", "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION"],
    extra_link_args=["-larmadillo"],
)

setup(
    name="regression_wrapper",
    ext_modules=cythonize(ext),
)
EOF

In [12]:
!python setup.py build_ext --inplace

Compiling regression_wrapper.pyx because it changed.
[1/1] Cythonizing regression_wrapper.pyx
running build_ext
building 'regression_wrapper' extension
creating build/temp.linux-x86_64-cpython-311
x86_64-linux-gnu-g++ -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -I/usr/local/lib/python3.11/dist-packages/numpy/core/include -I. -I/usr/include/python3.11 -c regression_impl.cpp -o build/temp.linux-x86_64-cpython-311/regression_impl.o -O3 -std=c++11 -DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION
x86_64-linux-gnu-g++ -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -I/usr/local/lib/python3.11/dist-packages/numpy/core/include -I. -I/usr/include/python3.11 -c regression_wrapper.cpp -o build/temp.linux-x86_64-cpython-311/regression_wrapper.o -O3 -std=c++11 -DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION
creating build/lib.linux-x86_64-cpython-3

## Test the Module

In [13]:
import numpy as np
from regression_wrapper import PyLinearRegression  # Import the Cython wrapper class

# Create a small dataset where y = 2 * x
X_train = np.array([[1], [2], [3], [4], [5]], dtype=np.float64)
y_train = np.array([2, 4, 6, 8, 10], dtype=np.float64)

# Instantiate and fit the model
model_cython = PyLinearRegression()
model_cython.fit(X_train, y_train)

# Get and print the model parameters (θ)
theta_cython = model_cython.get_theta()
print("Cython learned theta:", theta_cython)

# Predict new values
X_test = np.array([[6], [7], [8]], dtype=np.float64)
y_pred_cython = model_cython.predict(X_test)
print("Cython predictions:", y_pred_cython)

Cython learned theta: [-2.38323279e-15  2.00000000e+00]
Cython predictions: [12. 14. 16.]


# Pure Python

In [14]:
class LinearRegressionPurePython:
    def __init__(self):
        self.theta = []  # model parameters: first element is intercept, others are coefficients

    def transpose(self, A):
        return list(map(list, zip(*A)))

    def mat_mult(self, A, B):
        # Multiply matrix A (m x n) by matrix B (n x p)
        m = len(A)
        n = len(A[0])
        p = len(B[0])
        result = [[0 for _ in range(p)] for _ in range(m)]
        for i in range(m):
            for j in range(p):
                for k in range(n):
                    result[i][j] += A[i][k] * B[k][j]
        return result

    def invert_matrix(self, A):
        # Invert a square matrix A using Gauss-Jordan elimination
        n = len(A)
        # create augmented matrix [A | I]
        aug = [row + [1 if i == j else 0 for j in range(n)] for i, row in enumerate(A)]
        # forward elimination
        for i in range(n):
            # find pivot
            pivot = aug[i][i]
            if pivot == 0:
                raise ValueError("Matrix is singular")
            # normalize pivot row
            aug[i] = [x / pivot for x in aug[i]]
            for j in range(n):
                if j != i:
                    factor = aug[j][i]
                    aug[j] = [aj - factor * ai for aj, ai in zip(aug[j], aug[i])]
        # extract inverse from augmented matrix
        inv = [row[n:] for row in aug]
        return inv

    def fit(self, X, y):
        # X: list of lists (each inner list is a feature vector)
        # y: list of target values
        m = len(X)
        # add intercept column
        X_aug = [[1] + row for row in X]
        # compute X^T * X
        X_trans = self.transpose(X_aug)
        XtX = self.mat_mult(X_trans, X_aug)
        # compute X^T * y
        # treat y as a column vector
        y_col = [[val] for val in y]
        Xty = self.mat_mult(X_trans, y_col)
        # invert XtX
        XtX_inv = self.invert_matrix(XtX)
        # theta = (X^T * X)^-1 * (X^T * y)
        theta_mat = self.mat_mult(XtX_inv, Xty)
        self.theta = [row[0] for row in theta_mat]

    def predict(self, X):
        # X: list of lists (each inner list is a feature vector)
        X_aug = [[1] + row for row in X]
        preds = []
        for row in X_aug:
            pred = sum(a * b for a, b in zip(row, self.theta))
            preds.append(pred)
        return preds

In [15]:
# Test the pure Python model on a small dataset
model_py = LinearRegressionPurePython()
X_train_py = [[1], [2], [3], [4], [5]]
y_train_py = [2, 4, 6, 8, 10]
model_py.fit(X_train_py, y_train_py)

print("Pure Python learned theta:", model_py.theta)
print("Pure Python predictions:", model_py.predict([[6], [7], [8]]))

Pure Python learned theta: [-1.4210854715202004e-14, 2.0]
Pure Python predictions: [11.999999999999986, 13.999999999999986, 15.999999999999986]


# Performance Comparison

In [16]:
import timeit
import numpy as np
import regression
from regression_wrapper import PyLinearRegression

# Use a moderately sized dataset for all models
n_samples = 1000
n_features = 10
X_train_mod = np.random.rand(n_samples, n_features).astype(np.float64)
true_theta = np.random.rand(n_features + 1).astype(np.float64)
X_train_aug = np.hstack((np.ones((n_samples, 1)), X_train_mod))
y_train_mod = X_train_aug.dot(true_theta)

n_runs = 50

# PyBind11 model timing
def pybind_train():
    model = regression.LinearRegression()
    model.fit(X_train_mod, y_train_mod)

pybind_train_time = timeit.timeit(pybind_train, number=n_runs)
print(f"PyBind11 training time ({n_runs} runs): {pybind_train_time:.6f} sec")

def pybind_predict():
    model = regression.LinearRegression()
    model.fit(X_train_mod, y_train_mod)
    model.predict(X_train_mod)

pybind_predict_time = timeit.timeit(pybind_predict, number=n_runs)
print(f"PyBind11 prediction time ({n_runs} runs): {pybind_predict_time:.6f} sec")

# Cython model timing
def cython_train():
    model = PyLinearRegression()
    model.fit(X_train_mod, y_train_mod)

cython_train_time = timeit.timeit(cython_train, number=n_runs)
print(f"Cython training time ({n_runs} runs): {cython_train_time:.6f} sec")

def cython_predict():
    model = PyLinearRegression()
    model.fit(X_train_mod, y_train_mod)
    model.predict(X_train_mod)

cython_predict_time = timeit.timeit(cython_predict, number=n_runs)
print(f"Cython prediction time ({n_runs} runs): {cython_predict_time:.6f} sec")

# Pure Python model timing
def purepython_train():
    model = LinearRegressionPurePython()
    # Convert NumPy dataset to built-in lists
    X_list = X_train_mod.tolist()
    y_list = y_train_mod.tolist()
    model.fit(X_list, y_list)

purepython_train_time = timeit.timeit(purepython_train, number=n_runs)
print(f"Pure Python training time ({n_runs} runs): {purepython_train_time:.6f} sec")

def purepython_predict():
    model = LinearRegressionPurePython()
    X_list = X_train_mod.tolist()
    y_list = y_train_mod.tolist()
    model.fit(X_list, y_list)
    model.predict(X_list)

purepython_predict_time = timeit.timeit(purepython_predict, number=n_runs)
print(f"Pure Python prediction time ({n_runs} runs): {purepython_predict_time:.6f} sec")

PyBind11 training time (50 runs): 0.010261 sec
PyBind11 prediction time (50 runs): 0.010696 sec
Cython training time (50 runs): 0.013633 sec
Cython prediction time (50 runs): 0.010836 sec
Pure Python training time (50 runs): 0.829785 sec
Pure Python prediction time (50 runs): 0.784235 sec


In [17]:
import pandas as pd

# Create DataFrame
time_df = pd.DataFrame({
    "Model Type": ["PyBind11", "Cython", "Pure Python"],
    "Training Time (s)": [pybind_train_time, cython_train_time, purepython_train_time],
    "Prediction Time (s)": [pybind_predict_time, cython_predict_time, purepython_predict_time]
})

display(time_df)

Unnamed: 0,Model Type,Training Time (s),Prediction Time (s)
0,PyBind11,0.010261,0.010696
1,Cython,0.013633,0.010836
2,Pure Python,0.829785,0.784235
