In [None]:
#include <iostream>
#include <vector>
#include <cstdlib>
#include <cmath>
#include <ctime>
#include <algorithm>

// Define the Neuron class
class Neuron {
public:
    Neuron() = default;
    Neuron(int n_weights) {
        initWeights(n_weights);
        m_nWeights = n_weights;
        m_activation = 0;
        m_output = 0;
        m_delta = 0;
    }

    ~Neuron() {}

    void activate(const std::vector<float>& inputs) {
        m_activation = m_weights[m_nWeights - 1]; // Bias weight
        for (size_t i = 0; i < m_nWeights - 1; ++i) {
            m_activation += m_weights[i] * inputs[i];
        }
    }

    void transfer() {
        m_output = 1.0f / (1.0f + std::exp(-m_activation));
    }

    float transfer_derivative() const {
        return m_output * (1.0f - m_output);
    }

    std::vector<float>& get_weights() { return m_weights; }
    float get_output() const { return m_output; }
    float get_activation() const { return m_activation; }
    float get_delta() const { return m_delta; }
    void set_delta(float delta) { m_delta = delta; }

private:
    size_t m_nWeights;
    std::vector<float> m_weights;
    float m_activation;
    float m_output;
    float m_delta;

    void initWeights(int n_weights) {
        m_weights.resize(n_weights);
        for (int w = 0; w < n_weights; ++w) {
            m_weights[w] = static_cast<float>(std::rand()) / static_cast<float>(RAND_MAX);
        }
    }
};

// Define the Layer class
class Layer {
public:
    Layer() = default;
    Layer(int n_neurons, int n_weights) {
        initNeurons(n_neurons, n_weights);
    }

    ~Layer() {}

    std::vector<Neuron>& get_neurons() { return m_neurons; }

private:
    void initNeurons(int n_neurons, int n_weights) {
        m_neurons.resize(n_neurons, Neuron(n_weights));
    }

    std::vector<Neuron> m_neurons;
};

// Define the Network class
class Network {
public:
    Network() {
        std::srand(static_cast<unsigned int>(std::time(nullptr)));
        m_nLayers = 0;
    }

    ~Network() {}

    void initialize_network(int n_inputs, int n_hidden, int n_outputs) {
        add_layer(n_hidden, n_inputs + 1); // +1 for bias weight
        add_layer(n_hidden, n_hidden + 1); // +1 for bias weight
        add_layer(n_outputs, n_hidden + 1); // +1 for bias weight
    }

    void add_layer(int n_neurons, int n_weights) {
        m_layers.emplace_back(n_neurons, n_weights);
        m_nLayers++;
    }

    std::vector<float> forward_propagate(const std::vector<float>& inputs) {
        std::vector<float> new_inputs = inputs;
        for (const auto& layer : m_layers) {
            std::vector<float> layer_outputs;
            for (const auto& neuron : layer.get_neurons()) {
                neuron.activate(new_inputs);
                neuron.transfer();
                layer_outputs.push_back(neuron.get_output());
            }
            new_inputs = layer_outputs;
        }
        return new_inputs;
    }

    void backward_propagate_error(const std::vector<float>& expected) {
        for (size_t i = m_nLayers; i-- > 0;) {
            auto& layer_neurons = m_layers[i].get_neurons();
            for (size_t n = 0; n < layer_neurons.size(); ++n) {
                float error = 0.0;
                if (i == m_nLayers - 1) {
                    error = expected[n] - layer_neurons[n].get_output();
                } else {
                    for (const auto& next_neuron : m_layers[i + 1].get_neurons()) {
                        error += (next_neuron.get_weights()[n] * next_neuron.get_delta());
                    }
                }
                layer_neurons[n].set_delta(error * layer_neurons[n].transfer_derivative());
            }
        }
    }

    void update_weights(const std::vector<float>& inputs, float l_rate) {
        std::vector<float> prev_layer_outputs = inputs;
        for (size_t i = 0; i < m_nLayers; ++i) {
            std::vector<float> layer_inputs;
            if (i != 0) {
                for (const auto& neuron : m_layers[i - 1].get_neurons()) {
                    layer_inputs.push_back(neuron.get_output());
                }
            } else {
                layer_inputs = inputs;
            }

            auto& layer_neurons = m_layers[i].get_neurons();
            for (size_t n = 0; n < layer_neurons.size(); ++n) {
                auto& weights = layer_neurons[n].get_weights();
                for (size_t j = 0; j < layer_inputs.size(); ++j) {
                    weights[j] += l_rate * layer_neurons[n].get_delta() * layer_inputs[j];
                }
                weights.back() += l_rate * layer_neurons[n].get_delta(); // Update bias weight
            }
        }
    }

    void train(const std::vector<std::vector<float>>& training_data, float l_rate, size_t n_epoch, size_t n_outputs) {
        for (size_t e = 0; e < n_epoch; ++e) {
            float sum_error = 0;
            for (const auto& row : training_data) {
                std::vector<float> outputs = forward_propagate(row);
                std::vector<float> expected(n_outputs, 0.0f);
                expected[static_cast<int>(row.back())] = 1.0f;
                for (size_t x = 0; x < n_outputs; ++x) {
                    sum_error += std::pow((expected[x] - outputs[x]), 2);
                }
                backward_propagate_error(expected);
                update_weights(row, l_rate);
            }
            std::cout << "[>] epoch=" << e << ", l_rate=" << l_rate << ", error=" << sum_error << std::endl;
        }
    }

    int predict(const std::vector<float>& input) {
        std::vector<float> outputs = forward_propagate(input);
        return std::max_element(outputs.begin(), outputs.end()) - outputs.begin();
    }

    void display_human() const {
        std::cout << "[Network] (Layers: " << m_nLayers << ")" << std::endl;
        std::cout << "{" << std::endl;
        for (size_t l = 0; l < m_layers.size(); ++l) {
            const auto& layer = m_layers[l];
            std::cout << "\t(Layer " << l << "): {";
            for (size_t i = 0; i < layer.get_neurons().size(); ++i) {
                const auto& neuron = layer.get_neurons()[i];
                std::cout << "<(Neuron " << i << "): [ weights={";
                const auto& weights = neuron.get_weights();
                for (size_t w = 0; w < weights.size(); ++w) {
                    std::cout << weights[w];
                    if (w < weights.size() - 1) {
                        std::cout << ", ";
                    }
                }
                std::cout << "}, output=" << neuron.get_output()
                          << ", activation=" << neuron.get_activation()
                          << ", delta=" << neuron.get_delta()
                          << "]>";
                if (i < layer.get_neurons().size() - 1) {
                    std::cout << ", ";
                }
            }
            std::cout << "}";
            if (l < m_layers.size() - 1) {
                std::cout << ", ";
            }
            std::cout << std::endl;
        }
        std::cout << "}" << std::endl;
    }

private:
    size_t m_nLayers;
    std::vector<Layer> m_layers;
};


In [None]:

std::vector<float> evaluate_network(std::vector<std::vector<float>> dataset, int n_folds, float l_rate, int n_epoch, int n_hidden) {

	/* Split dataset into k folds */

	std::vector<std::vector<std::vector<float>>> dataset_splits;
	// initialize prng
	std::srand(static_cast<unsigned int>(std::time(nullptr)));

	std::vector<float> scores;

	size_t fold_size = static_cast<unsigned int>(dataset.size() / n_folds);
	for (int f = 0; f < n_folds; f++)
	{
		std::vector<std::vector<float>> fold;
		while (fold.size() < fold_size) {
			int n = rand() % dataset.size(); // get a random index

			// add the chosen element to the fold and remove it from the dataset
			std::swap(dataset[n], dataset.back());
			fold.push_back(dataset.back());
			dataset.pop_back();
		}

		dataset_splits.push_back(fold);
	}

	/* Iterate over folds */
	// choose one as test and the rest as training sets
	for (size_t i = 0; i < dataset_splits.size(); i++)
	{
		std::vector<std::vector<std::vector<float>>> train_sets = dataset_splits;
		std::swap(train_sets[i], train_sets.back());
		std::vector<std::vector<float>> test_set = train_sets.back();
		train_sets.pop_back();

		// merge the multiple train_sets into one train set
		std::vector<std::vector<float>> train_set;
		for (auto &s: train_sets)
		{
			for (auto& row : s) {
				train_set.push_back(row);
			}
		}

		// store the expected results
		std::vector<int> expected;
		for (auto& row: test_set)
		{
			expected.push_back(static_cast<int>(row.back()));
			// just ensure that the actual result is not saved in the test data
			row.back() = 42;
		}

		std::vector<int> predicted;

		std::set<float> results;
		for (const auto& r : train_set) {
			results.insert(r.back());
		}
		int n_outputs = results.size();
		int n_inputs = train_set[0].size() - 1;

		/* Backpropagation with stochastic gradient descent */
		Network* network = new Network();
		network->initialize_network(n_inputs, n_hidden, n_outputs);
		network->train(train_set, l_rate, n_epoch, n_outputs);

		for (const auto& row: test_set)
		{
			predicted.push_back(network->predict(row));
		}

		scores.push_back(accuracy_metric(expected, predicted));
	}

	return scores;
}

/*
*
*/
float accuracy_metric(std::vector<int> expect, std::vector<int> predict) {
	int correct = 0;

	for (size_t i = 0; i < predict.size(); i++)
	{
		if (predict[i] == expect[i]) {
			correct++;
		}
	}
	return static_cast<float>(correct * 100.0f / predict.size());
}

/*
* Load comma separated values from file and normalize the values
*/
std::vector<std::vector<float>> load_csv_data(std::string filename) {
	const std::regex comma(",");

	std::ifstream csv_file(filename);

	std::vector<std::vector<float>> data;

	std::string line;

	std::vector<float> mins;
	std::vector<float> maxs;
	bool first = true;

	while (csv_file && std::getline(csv_file, line)) {
		// split line by commas
		std::vector<std::string> srow{ std::sregex_token_iterator(line.begin(), line.end(), comma, -1), std::sregex_token_iterator() };
		// create float vector
		std::vector<float> row(srow.size());
		// transform the strings to floats
		std::transform(srow.begin(), srow.end(), row.begin(), [](std::string const& val) {return std::stof(val); });

		// keep track of the min and max value for each column for subsequent normalization
		if (first) {
			mins = row;
			maxs = row;
			first = false;
		}
		else {
			for (size_t t=0; t < row.size(); t++)
			{
				if (row[t] > maxs[t]) {
					maxs[t] = row[t];
				}
				else if (row[t] < mins[t]) {
					mins[t] = row[t];
				}
			}
		}

		data.push_back(row);
	}

	// normalize values
	for (auto& vec : data) {
		// ignore the last column (the output)
		for (size_t i = 0; i < vec.size()-1; i++)
		{
			vec[i] = (vec[i] - mins[i]) / (maxs[i] - mins[i]);
		}
	}

	return data;
}

In [None]:
int main() {
	
	std::vector<std::vector<float>> csv_data;
	csv_data = load_csv_data("../data/data.csv");

	/*
	* Normalize the last column (turning the outputs into values starting from 0 for the one-hot encoding in the end)
	*/
	std::map<int, int> lookup = {};
	int index = 0;
	for (auto& vec : csv_data) {
		std::pair<std::map<int, int>::iterator, bool> ret;
		// insert unique values
		ret = lookup.insert(std::pair<int, int>(static_cast<int>(vec.back()),index));
		// update the vector with the new index
		vec.back() = static_cast<float>(ret.first->second);
		// if an actual new value was found, increase the index
		if (ret.second) {
			index++;
		}
	}

	int n_folds = 5;		// how many folds you want to create from the given dataset
	float l_rate = 0.3f;	// how much of an impact shall an error have on a weight
	int n_epoch = 500;		// how many times should weights be updated
	int n_hidden = 5;		// how many neurons you want in the first layer

	// test the implemented neural network
	std::vector<float> scores = evaluate_network(csv_data, n_folds, l_rate, n_epoch, n_hidden);

	// calculate the mean average of the scores across each cross validation
	float mean = std::accumulate(scores.begin(), scores.end(), decltype(scores)::value_type(0)) / static_cast<float>(scores.size());

	std::cout << "Mean accuracy: " << mean << std::endl;

	return 0;
}

In [None]:
main()