In [2]:
%%bash
# Update package lists and install mlpack (and Armadillo) if not already installed
apt-get update && apt-get install -y libmlpack-dev

# Write the C++ code to a file named ml_cpp.cpp
cat << 'EOF' > ml_cpp.cpp
#include <mlpack/core.hpp>
#include <mlpack/methods/decision_tree/decision_tree.hpp>
#include <armadillo>
#include <iostream>

using namespace mlpack;
using namespace mlpack::tree;
using namespace arma;
using namespace std;

int main()
{
  // Create a small synthetic dataset with 2 features and 10 samples
  // First 5 samples belong to class 0 and the last 5 to class 1
  mat X = {
    {1.0, 2.0, 1.5, 2.1, 1.8, 3.0, 3.1, 3.2, 2.9, 3.3},
    {1.1, 1.9, 1.4, 2.2, 1.7, 3.2, 3.0, 3.3, 2.8, 3.4}
  };
  Row<size_t> y = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1};

  // Split the dataset into training (80%) and testing (20%)
  const uword nSamples = X.n_cols;
  const uword nTrain = static_cast<uword>(0.8 * nSamples); // 8 training samples
  mat X_train = X.cols(0, nTrain - 1);
  Row<size_t> y_train = y.subvec(0, nTrain - 1);

  // Use the remaining samples for testing
  mat X_test = X.cols(nTrain, nSamples - 1);
  Row<size_t> y_test = y.subvec(nTrain, nSamples - 1);

  // Train a decision tree classifier with 2 classes
  DecisionTree<> dt(X_train, y_train, 2);

  // Predict the classes of the test set
  Row<size_t> predictions;
  dt.Classify(X_test, predictions);

  // Print predictions and true labels
  cout << "Predictions: " << predictions << endl;
  cout << "True Labels: " << y_test << endl;

  // Compute a simple confusion matrix
  size_t TP = 0, TN = 0, FP = 0, FN = 0;
  // Consider class '1' as positive
  for (size_t i = 0; i < y_test.n_elem; ++i)
  {
    if (y_test[i] == 1 && predictions[i] == 1)
      TP++;
    else if (y_test[i] == 0 && predictions[i] == 0)
      TN++;
    else if (y_test[i] == 0 && predictions[i] == 1)
      FP++;
    else if (y_test[i] == 1 && predictions[i] == 0)
      FN++;
  }

  cout << "\nConfusion Matrix:" << endl;
  cout << "TP: " << TP << "   FP: " << FP << endl;
  cout << "FN: " << FN << "   TN: " << TN << endl;

  // Calculate and print accuracy
  double accuracy = double(TP + TN) / double(TP + TN + FP + FN);
  cout << "\nAccuracy: " << accuracy << endl;

  return 0;
}
EOF

# Compile the C++ code using g++ with OpenMP support and link with mlpack and Armadillo
g++ -std=c++11 ml_cpp.cpp -o ml_cpp -fopenmp -lmlpack -larmadillo

# Run the compiled executable
./ml_cpp

Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists...
Reading package lists...
Building dependency tree...
Reading state information...
libmlpack-dev is already the newest version (3.4.2-5ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.
Predictions:         0        0

True Labels:         1 

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [4]:
%%bash
# Download and preprocess the Iris dataset if not already present
if [ ! -f iris.csv ]; then
  wget -O iris.data https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
  # Remove blank lines and convert species to numeric (Iris-setosa -> 0, Iris-versicolor -> 1, Iris-virginica -> 2)
  awk -F, 'NF==5 {gsub("Iris-setosa", "0", $5); gsub("Iris-versicolor", "1", $5); gsub("Iris-virginica", "2", $5); print $1","$2","$3","$4","$5}' iris.data > iris.csv
fi

# Update package lists and install mlpack, Armadillo, and ensmallen (for LinearSVM)
apt-get update && apt-get install -y libmlpack-dev libensmallen-dev

# Write the C++ code to a file named ml_cpp.cpp
cat << 'EOF' > ml_cpp.cpp
#include <mlpack/core.hpp>
#include <mlpack/methods/random_forest/random_forest.hpp>
#include <mlpack/methods/linear_svm/linear_svm.hpp>
#include <armadillo>
#include <iostream>

using namespace mlpack;
using namespace mlpack::tree; // for RandomForest
using namespace mlpack::svm;  // for LinearSVM
using namespace arma;
using namespace std;

int main()
{
  // Load the Iris dataset from iris.csv
  // Expecting 5 rows (4 features + 1 label) and 150 columns (samples)
  arma::mat dataset;
  if (!data::Load("iris.csv", dataset, true))
  {
    cerr << "Could not load iris.csv" << endl;
    return -1;
  }

  // Split dataset into features (first 4 rows) and labels (5th row)
  arma::mat X = dataset.submat(0, 0, 3, dataset.n_cols - 1);
  arma::Row<size_t> y;
  y.set_size(dataset.n_cols);
  for (size_t i = 0; i < dataset.n_cols; i++)
  {
    y[i] = static_cast<size_t>(dataset(4, i));
  }

  // Shuffle and split the dataset into training (80%) and testing (20%)
  arma::uvec indices = arma::randperm(dataset.n_cols);
  size_t nTrain = static_cast<size_t>(0.8 * dataset.n_cols);
  arma::uvec trainIndices = indices.subvec(0, nTrain - 1);
  arma::uvec testIndices = indices.subvec(nTrain, dataset.n_cols - 1);

  arma::mat X_train = X.cols(trainIndices);
  arma::Row<size_t> y_train = y.cols(trainIndices);
  arma::mat X_test = X.cols(testIndices);
  arma::Row<size_t> y_test = y.cols(testIndices);

  // Number of classes in the Iris dataset (0, 1, 2)
  size_t numClasses = 3;

  // Train a Random Forest model using 10 trees and minimum leaf size 1
  RandomForest<> rf(X_train, y_train, numClasses, 10, 1);
  arma::Row<size_t> predictions_rf;
  rf.Classify(X_test, predictions_rf);

  // Evaluate the Random Forest model by computing a confusion matrix and accuracy
  arma::Mat<size_t> confMatrix_rf(numClasses, numClasses, fill::zeros);
  for (size_t i = 0; i < y_test.n_elem; i++)
  {
    confMatrix_rf(predictions_rf[i], y_test[i])++;
  }
  double accuracy_rf = double(arma::accu(confMatrix_rf.diag())) / double(y_test.n_elem);
  cout << "\nRandom Forest Confusion Matrix (rows: predicted, columns: actual):" << endl;
  cout << confMatrix_rf << endl;
  cout << "Random Forest Accuracy: " << accuracy_rf << endl;

  // Train a Linear SVM model with lambda = 0.1
  LinearSVM<> svm(X_train, y_train, numClasses, 0.1);
  arma::Row<size_t> predictions_svm;
  svm.Classify(X_test, predictions_svm);

  // Evaluate the Linear SVM model by computing a confusion matrix and accuracy
  arma::Mat<size_t> confMatrix_svm(numClasses, numClasses, fill::zeros);
  for (size_t i = 0; i < y_test.n_elem; i++)
  {
    confMatrix_svm(predictions_svm[i], y_test[i])++;
  }
  double accuracy_svm = double(arma::accu(confMatrix_svm.diag())) / double(y_test.n_elem);
  cout << "\nLinear SVM Confusion Matrix (rows: predicted, columns: actual):" << endl;
  cout << confMatrix_svm << endl;
  cout << "Linear SVM Accuracy: " << accuracy_svm << endl;

  return 0;
}
EOF

# Compile the C++ code using g++ with OpenMP support and link with mlpack and Armadillo
g++ -std=c++11 ml_cpp.cpp -o ml_cpp -fopenmp -lmlpack -larmadillo

# Run the compiled executable
./ml_cpp

Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists...
Reading package lists...
Building dependency tree...
Reading state information...
libmlpack-dev is already the newest version (3.4.2-5ubuntu1).
The following NEW packages will be installed:
  libensmallen-dev
0 upgraded, 1 newly installed, 0 to remove and 30 not u

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [6]:
%%bash
# Download and preprocess the Iris dataset if not already present
if [ ! -f iris.csv ]; then
  wget -O iris.data https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
  # Remove blank lines and convert species to numeric (Iris-setosa -> 0, Iris-versicolor -> 1, Iris-virginica -> 2)
  awk -F, 'NF==5 {gsub("Iris-setosa", "0", $5); gsub("Iris-versicolor", "1", $5); gsub("Iris-virginica", "2", $5); print $1","$2","$3","$4","$5}' iris.data > iris.csv
fi

# Update package lists and install mlpack, Armadillo, ensmallen (for LinearSVM), clang, and libomp-dev for OpenMP support
apt-get update && apt-get install -y libmlpack-dev libensmallen-dev clang libomp-dev

# Write the C++ code to a file named ml_cpp.cpp
cat << 'EOF' > ml_cpp.cpp
#include <mlpack/core.hpp>
#include <mlpack/methods/random_forest/random_forest.hpp>
#include <mlpack/methods/linear_svm/linear_svm.hpp>
#include <armadillo>
#include <iostream>

using namespace mlpack;
using namespace mlpack::tree; // for RandomForest
using namespace mlpack::svm;  // for LinearSVM
using namespace arma;
using namespace std;

int main()
{
  // Load the Iris dataset from iris.csv
  // Expecting 5 rows (4 features + 1 label) and 150 columns (samples)
  arma::mat dataset;
  if (!data::Load("iris.csv", dataset, true))
  {
    cerr << "Could not load iris.csv" << endl;
    return -1;
  }

  // Split dataset into features (first 4 rows) and labels (5th row)
  arma::mat X = dataset.submat(0, 0, 3, dataset.n_cols - 1);
  arma::Row<size_t> y;
  y.set_size(dataset.n_cols);
  for (size_t i = 0; i < dataset.n_cols; i++)
  {
    y[i] = static_cast<size_t>(dataset(4, i));
  }

  // Shuffle and split the dataset into training (80%) and testing (20%)
  arma::uvec indices = arma::randperm(dataset.n_cols);
  size_t nTrain = static_cast<size_t>(0.8 * dataset.n_cols);
  arma::uvec trainIndices = indices.subvec(0, nTrain - 1);
  arma::uvec testIndices = indices.subvec(nTrain, dataset.n_cols - 1);

  arma::mat X_train = X.cols(trainIndices);
  arma::Row<size_t> y_train = y.cols(trainIndices);
  arma::mat X_test = X.cols(testIndices);
  arma::Row<size_t> y_test = y.cols(testIndices);

  // Number of classes in the Iris dataset (0, 1, 2)
  size_t numClasses = 3;

  // Train a Random Forest model using 10 trees and minimum leaf size 1
  RandomForest<> rf(X_train, y_train, numClasses, 10, 1);
  arma::Row<size_t> predictions_rf;
  rf.Classify(X_test, predictions_rf);

  // Evaluate the Random Forest model by computing a confusion matrix and accuracy
  arma::Mat<size_t> confMatrix_rf(numClasses, numClasses, fill::zeros);
  for (size_t i = 0; i < y_test.n_elem; i++)
  {
    confMatrix_rf(predictions_rf[i], y_test[i])++;
  }
  double accuracy_rf = double(arma::accu(confMatrix_rf.diag())) / double(y_test.n_elem);
  cout << "\nRandom Forest Confusion Matrix (rows: predicted, columns: actual):" << endl;
  cout << confMatrix_rf << endl;
  cout << "Random Forest Accuracy: " << accuracy_rf << endl;

  // Train a Linear SVM model with lambda = 0.1
  LinearSVM<> svm(X_train, y_train, numClasses, 0.1);
  arma::Row<size_t> predictions_svm;
  svm.Classify(X_test, predictions_svm);

  // Evaluate the Linear SVM model by computing a confusion matrix and accuracy
  arma::Mat<size_t> confMatrix_svm(numClasses, numClasses, fill::zeros);
  for (size_t i = 0; i < y_test.n_elem; i++)
  {
    confMatrix_svm(predictions_svm[i], y_test[i])++;
  }
  double accuracy_svm = double(arma::accu(confMatrix_svm.diag())) / double(y_test.n_elem);
  cout << "\nLinear SVM Confusion Matrix (rows: predicted, columns: actual):" << endl;
  cout << confMatrix_svm << endl;
  cout << "Linear SVM Accuracy: " << accuracy_svm << endl;

  return 0;
}
EOF

# Compile the C++ code using clang++ with OpenMP support
# The include flag (-I) points to the directory where omp.h is located
clang++ -std=c++11 ml_cpp.cpp -o ml_cpp -Xpreprocessor -fopenmp -I/usr/lib/llvm-14/lib/clang/14.0.0/include -lomp -lmlpack -larmadillo

# Run the compiled executable
./ml_cpp

Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:6 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists...
Reading package lists...
Building dependency tree...
Reading state information...
clang is already the newest version (1:14.0-55~exp2).
libensmallen-dev is already the newest version (2.18.2-1).
libmlpack-dev is already the newest version (3.4.2-5ubuntu1).
The fol

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [7]:
%%bash
# Download and preprocess the Iris dataset if not already present
if [ ! -f iris.csv ]; then
  wget -O iris.data https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
  # Remove blank lines and convert species to numeric (Iris-setosa -> 0, Iris-versicolor -> 1, Iris-virginica -> 2)
  awk -F, 'NF==5 {gsub("Iris-setosa", "0", $5); gsub("Iris-versicolor", "1", $5); gsub("Iris-virginica", "2", $5); print $1","$2","$3","$4","$5}' iris.data > iris.csv
fi

In [8]:
%%writefile ml_cpp.cpp
#include <mlpack/core.hpp>
#include <mlpack/methods/random_forest/random_forest.hpp>
#include <mlpack/methods/linear_svm/linear_svm.hpp>
#include <armadillo>
#include <iostream>

using namespace mlpack;
using namespace mlpack::tree; // for RandomForest
using namespace mlpack::svm;  // for LinearSVM
using namespace arma;
using namespace std;

int main()
{
  // Load the Iris dataset from iris.csv
  // Expecting 5 rows (4 features + 1 label) and 150 columns (samples)
  arma::mat dataset;
  if (!data::Load("iris.csv", dataset, true))
  {
    cerr << "Could not load iris.csv" << endl;
    return -1;
  }

  // Split dataset into features (first 4 rows) and labels (5th row)
  arma::mat X = dataset.submat(0, 0, 3, dataset.n_cols - 1);
  arma::Row<size_t> y;
  y.set_size(dataset.n_cols);
  for (size_t i = 0; i < dataset.n_cols; i++)
  {
    y[i] = static_cast<size_t>(dataset(4, i));
  }

  // Shuffle and split the dataset into training (80%) and testing (20%)
  arma::uvec indices = arma::randperm(dataset.n_cols);
  size_t nTrain = static_cast<size_t>(0.8 * dataset.n_cols);
  arma::uvec trainIndices = indices.subvec(0, nTrain - 1);
  arma::uvec testIndices = indices.subvec(nTrain, dataset.n_cols - 1);

  arma::mat X_train = X.cols(trainIndices);
  arma::Row<size_t> y_train = y.cols(trainIndices);
  arma::mat X_test = X.cols(testIndices);
  arma::Row<size_t> y_test = y.cols(testIndices);

  // Number of classes in the Iris dataset (0, 1, 2)
  size_t numClasses = 3;

  // Train a Random Forest model using 10 trees and minimum leaf size 1
  RandomForest<> rf(X_train, y_train, numClasses, 10, 1);
  arma::Row<size_t> predictions_rf;
  rf.Classify(X_test, predictions_rf);

  // Evaluate the Random Forest model by computing a confusion matrix and accuracy
  arma::Mat<size_t> confMatrix_rf(numClasses, numClasses, fill::zeros);
  for (size_t i = 0; i < y_test.n_elem; i++)
  {
    confMatrix_rf(predictions_rf[i], y_test[i])++;
  }
  double accuracy_rf = double(arma::accu(confMatrix_rf.diag())) / double(y_test.n_elem);
  cout << "\nRandom Forest Confusion Matrix (rows: predicted, columns: actual):" << endl;
  cout << confMatrix_rf << endl;
  cout << "Random Forest Accuracy: " << accuracy_rf << endl;

  // Train a Linear SVM model with lambda = 0.1
  LinearSVM<> svm(X_train, y_train, numClasses, 0.1);
  arma::Row<size_t> predictions_svm;
  svm.Classify(X_test, predictions_svm);

  // Evaluate the Linear SVM model by computing a confusion matrix and accuracy
  arma::Mat<size_t> confMatrix_svm(numClasses, numClasses, fill::zeros);
  for (size_t i = 0; i < y_test.n_elem; i++)
  {
    confMatrix_svm(predictions_svm[i], y_test[i])++;
  }
  double accuracy_svm = double(arma::accu(confMatrix_svm.diag())) / double(y_test.n_elem);
  cout << "\nLinear SVM Confusion Matrix (rows: predicted, columns: actual):" << endl;
  cout << confMatrix_svm << endl;
  cout << "Linear SVM Accuracy: " << accuracy_svm << endl;

  return 0;
}

Overwriting ml_cpp.cpp


In [9]:
%%bash
# Update package lists and install mlpack, Armadillo, and ensmallen (for LinearSVM)
apt-get update && apt-get install -y libmlpack-dev libensmallen-dev

# Compile the C++ code using g++ with OpenMP support and link with mlpack and Armadillo
g++ -std=c++11 ml_cpp.cpp -o ml_cpp -fopenmp -lmlpack -larmadillo

# Run the compiled executable
./ml_cpp

Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists...
Reading package lists...
Building dependency tree...
Reading state information...
libensmallen-dev is already the newest version (2.18.2-1).
libmlpack-dev is already the newest version (3.4.2-5ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgrade

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
