From 2c1a6a280bb8bf46357c92f85d40addc3fe446d3 Mon Sep 17 00:00:00 2001
From: giovastabile <giovanni.stabile87@gmail.com>
Date: Wed, 10 Apr 2019 09:09:39 +0200
Subject: [PATCH] added method to read and write matrices and tensors, Assert
 Functions, and tools to format the code

---
 code_formatter.sh                   |   79 +
 include/Activation/Identity.h       |   44 +-
 include/Activation/ReLU.h           |   44 +-
 include/Activation/Sigmoid.h        |   44 +-
 include/Activation/Softmax.h        |   52 +-
 include/Callback.h                  |   55 +-
 include/Callback/VerboseCallback.h  |   30 +-
 include/Config.h                    |    3 +-
 include/EigenStream.h               |  239 +
 include/Layer.h                     |  252 +-
 include/Layer/Convolutional.h       |  370 +-
 include/Layer/FullyConnected.h      |  249 +-
 include/Layer/MaxPooling.h          |  243 +-
 include/M_Assert.h                  |   25 +
 include/MiniDNN.h                   |    1 +
 include/Network.h                   |  783 +--
 include/Optimizer.h                 |   53 +-
 include/Optimizer/AdaGrad.h         |   70 +-
 include/Optimizer/RMSProp.h         |   73 +-
 include/Optimizer/SGD.h             |   37 +-
 include/Output.h                    |   86 +-
 include/Output/BinaryClassEntropy.h |  164 +-
 include/Output/MultiClassEntropy.h  |  200 +-
 include/Output/RegressionMSE.h      |   74 +-
 include/RNG.h                       |   82 +-
 include/Utils/Convolution.h         |  116 +-
 include/Utils/FindMax.h             |   81 +-
 include/Utils/Random.h              |   33 +-
 include/Utils/sparsepp.h            | 7915 +++++++++++++++------------
 remove_orig_files.sh                |    3 +
 30 files changed, 6614 insertions(+), 4886 deletions(-)
 create mode 100755 code_formatter.sh
 create mode 100644 include/EigenStream.h
 create mode 100755 include/M_Assert.h
 create mode 100755 remove_orig_files.sh

diff --git a/code_formatter.sh b/code_formatter.sh
new file mode 100755
index 0000000..33e5be7
--- /dev/null
+++ b/code_formatter.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+#######################################
+
+required_command="astyle"
+code_directory="include/"
+tutorial_directory="tutorials/"
+
+#######################################
+
+usage() {
+	echo
+    echo -e "\tUsage: $(basename $0) [files]"
+	echo
+    echo -e "\tIf files are not specified, $(basename $0) formats all ".C" and ".H" files"
+	echo -e "\tin source directory; otherwise, it formats all given files."
+	echo
+	echo -e "\tRequired command: $required_command"
+	echo
+	exit 0
+}
+
+
+[[ $1 == "-h" ]] && usage
+
+# Test for required program
+for comm in $required_command; do
+	command -v $comm >/dev/null 2>&1 || {
+		echo "I require $comm but it's not installed. Aborting." >&2;
+		exit 1
+	}
+done
+
+# Set the files to format
+[[ $# != 0 ]] && src_files=$@ || src_files="--recursive $code_directory**.h,**.H"
+[[ $# != 0 ]] && tutorial_files=$@ || tutorial_files="--recursive $tutorial_directory**.cpp,**.H"
+
+echo $tutorial_files
+echo $src_files
+
+# Here the important part: astyle formats the src files.
+astyle --style=bsd\
+       --indent=spaces=4\
+       --indent-classes\
+       --indent-switches\
+       --indent-col1-comments\
+       --break-blocks\
+       --pad-oper\
+       --pad-comma\
+       --pad-header\
+       --delete-empty-lines\
+       --align-pointer=type\
+       --align-reference=type\
+       --add-braces\
+       --convert-tabs\
+       --close-templates\
+       --max-code-length=80\
+       --mode=c\
+       $src_files
+
+# Here the important part: astyle formats the tutorial files.
+astyle --style=bsd\
+       --indent=spaces=4\
+       --indent-classes\
+       --indent-switches\
+       --indent-col1-comments\
+       --break-blocks\
+       --pad-oper\
+       --pad-comma\
+       --pad-header\
+       --delete-empty-lines\
+       --align-pointer=type\
+       --align-reference=type\
+       --add-braces\
+       --convert-tabs\
+       --close-templates\
+       --max-code-length=80\
+       --mode=c\
+       $tutorial_files
diff --git a/include/Activation/Identity.h b/include/Activation/Identity.h
index 170cf0c..dd94f60 100644
--- a/include/Activation/Identity.h
+++ b/include/Activation/Identity.h
@@ -4,7 +4,8 @@
 #include <Eigen/Core>
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -18,26 +19,27 @@ namespace MiniDNN {
 ///
 class Identity
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-
-public:
-    // a = activation(z) = z
-    // Z = [z1, ..., zn], A = [a1, ..., an], n observations
-    static inline void activate(const Matrix& Z, Matrix& A)
-    {
-        A.noalias() = Z;
-    }
-
-    // Apply the Jacobian matrix J to a vector f
-    // J = d_a / d_z = I
-    // g = J * f = f
-    // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
-    // Note: When entering this function, Z and G may point to the same matrix
-    static inline void apply_jacobian(const Matrix& Z, const Matrix& A, const Matrix& F, Matrix& G)
-    {
-        G.noalias() = F;
-    }
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+
+    public:
+        // a = activation(z) = z
+        // Z = [z1, ..., zn], A = [a1, ..., an], n observations
+        static inline void activate(const Matrix& Z, Matrix& A)
+        {
+            A.noalias() = Z;
+        }
+
+        // Apply the Jacobian matrix J to a vector f
+        // J = d_a / d_z = I
+        // g = J * f = f
+        // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
+        // Note: When entering this function, Z and G may point to the same matrix
+        static inline void apply_jacobian(const Matrix& Z, const Matrix& A,
+                                          const Matrix& F, Matrix& G)
+        {
+            G.noalias() = F;
+        }
 };
 
 
diff --git a/include/Activation/ReLU.h b/include/Activation/ReLU.h
index 658cee4..686d157 100644
--- a/include/Activation/ReLU.h
+++ b/include/Activation/ReLU.h
@@ -4,7 +4,8 @@
 #include <Eigen/Core>
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -14,26 +15,27 @@ namespace MiniDNN {
 ///
 class ReLU
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-
-public:
-    // a = activation(z) = max(z, 0)
-    // Z = [z1, ..., zn], A = [a1, ..., an], n observations
-    static inline void activate(const Matrix& Z, Matrix& A)
-    {
-        A.array() = Z.array().cwiseMax(Scalar(0));
-    }
-
-    // Apply the Jacobian matrix J to a vector f
-    // J = d_a / d_z = diag(sign(a)) = diag(a > 0)
-    // g = J * f = (a > 0) .* f
-    // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
-    // Note: When entering this function, Z and G may point to the same matrix
-    static inline void apply_jacobian(const Matrix& Z, const Matrix& A, const Matrix& F, Matrix& G)
-    {
-        G.array() = (A.array() > Scalar(0)).select(F, Scalar(0));
-    }
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+
+    public:
+        // a = activation(z) = max(z, 0)
+        // Z = [z1, ..., zn], A = [a1, ..., an], n observations
+        static inline void activate(const Matrix& Z, Matrix& A)
+        {
+            A.array() = Z.array().cwiseMax(Scalar(0));
+        }
+
+        // Apply the Jacobian matrix J to a vector f
+        // J = d_a / d_z = diag(sign(a)) = diag(a > 0)
+        // g = J * f = (a > 0) .* f
+        // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
+        // Note: When entering this function, Z and G may point to the same matrix
+        static inline void apply_jacobian(const Matrix& Z, const Matrix& A,
+                                          const Matrix& F, Matrix& G)
+        {
+            G.array() = (A.array() > Scalar(0)).select(F, Scalar(0));
+        }
 };
 
 
diff --git a/include/Activation/Sigmoid.h b/include/Activation/Sigmoid.h
index 370997a..b9f2db5 100644
--- a/include/Activation/Sigmoid.h
+++ b/include/Activation/Sigmoid.h
@@ -4,7 +4,8 @@
 #include <Eigen/Core>
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -14,26 +15,27 @@ namespace MiniDNN {
 ///
 class Sigmoid
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-
-public:
-    // a = activation(z) = 1 / (1 + exp(-z))
-    // Z = [z1, ..., zn], A = [a1, ..., an], n observations
-    static inline void activate(const Matrix& Z, Matrix& A)
-    {
-        A.array() = Scalar(1) / (Scalar(1) + (-Z.array()).exp());
-    }
-
-    // Apply the Jacobian matrix J to a vector f
-    // J = d_a / d_z = diag(a .* (1 - a))
-    // g = J * f = a .* (1 - a) .* f
-    // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
-    // Note: When entering this function, Z and G may point to the same matrix
-    static inline void apply_jacobian(const Matrix& Z, const Matrix& A, const Matrix& F, Matrix& G)
-    {
-        G.array() = A.array() * (Scalar(1) - A.array()) * F.array();
-    }
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+
+    public:
+        // a = activation(z) = 1 / (1 + exp(-z))
+        // Z = [z1, ..., zn], A = [a1, ..., an], n observations
+        static inline void activate(const Matrix& Z, Matrix& A)
+        {
+            A.array() = Scalar(1) / (Scalar(1) + (-Z.array()).exp());
+        }
+
+        // Apply the Jacobian matrix J to a vector f
+        // J = d_a / d_z = diag(a .* (1 - a))
+        // g = J * f = a .* (1 - a) .* f
+        // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
+        // Note: When entering this function, Z and G may point to the same matrix
+        static inline void apply_jacobian(const Matrix& Z, const Matrix& A,
+                                          const Matrix& F, Matrix& G)
+        {
+            G.array() = A.array() * (Scalar(1) - A.array()) * F.array();
+        }
 };
 
 
diff --git a/include/Activation/Softmax.h b/include/Activation/Softmax.h
index e3ced2f..6d39913 100644
--- a/include/Activation/Softmax.h
+++ b/include/Activation/Softmax.h
@@ -1,7 +1,8 @@
 #ifndef ACTIVATION_SOFTMAX_H_
 #define ACTIVATION_SOFTMAX_H_
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -11,30 +12,31 @@ namespace MiniDNN {
 ///
 class Softmax
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Array<Scalar, 1, Eigen::Dynamic> RowArray;
-
-public:
-    // a = activation(z) = softmax(z)
-    // Z = [z1, ..., zn], A = [a1, ..., an], n observations
-    static inline void activate(const Matrix& Z, Matrix& A)
-    {
-        A.array() = (Z.rowwise() - Z.colwise().maxCoeff()).array().exp();
-        RowArray colsums = A.colwise().sum();
-        A.array().rowwise() /= colsums;
-    }
-
-    // Apply the Jacobian matrix J to a vector f
-    // J = d_a / d_z = diag(a) - a * a'
-    // g = J * f = a .* f - a * (a' * f) = a .* (f - a'f)
-    // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
-    // Note: When entering this function, Z and G may point to the same matrix
-    static inline void apply_jacobian(const Matrix& Z, const Matrix& A, const Matrix& F, Matrix& G)
-    {
-        RowArray a_dot_f = A.cwiseProduct(F).colwise().sum();
-        G.array() = A.array() * (F.array().rowwise() - a_dot_f);
-    }
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::Array<Scalar, 1, Eigen::Dynamic> RowArray;
+
+    public:
+        // a = activation(z) = softmax(z)
+        // Z = [z1, ..., zn], A = [a1, ..., an], n observations
+        static inline void activate(const Matrix& Z, Matrix& A)
+        {
+            A.array() = (Z.rowwise() - Z.colwise().maxCoeff()).array().exp();
+            RowArray colsums = A.colwise().sum();
+            A.array().rowwise() /= colsums;
+        }
+
+        // Apply the Jacobian matrix J to a vector f
+        // J = d_a / d_z = diag(a) - a * a'
+        // g = J * f = a .* f - a * (a' * f) = a .* (f - a'f)
+        // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
+        // Note: When entering this function, Z and G may point to the same matrix
+        static inline void apply_jacobian(const Matrix& Z, const Matrix& A,
+                                          const Matrix& F, Matrix& G)
+        {
+            RowArray a_dot_f = A.cwiseProduct(F).colwise().sum();
+            G.array() = A.array() * (F.array().rowwise() - a_dot_f);
+        }
 };
 
 
diff --git a/include/Callback.h b/include/Callback.h
index 90af852..93d82bb 100644
--- a/include/Callback.h
+++ b/include/Callback.h
@@ -4,7 +4,8 @@
 #include <Eigen/Core>
 #include "Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 class Network;
@@ -27,30 +28,34 @@ class Network;
 ///
 class Callback
 {
-protected:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::RowVectorXi IntegerVector;
-
-public:
-    // Public members that will be set by the network during the training process
-    int m_nbatch;   // Number of total batches
-    int m_batch_id; // The index for the current mini-batch (0, 1, ..., m_nbatch-1)
-    int m_nepoch;   // Total number of epochs (one run on the whole data set) in the training process
-    int m_epoch_id; // The index for the current epoch (0, 1, ..., m_nepoch-1)
-
-    Callback() :
-        m_nbatch(0), m_batch_id(0), m_nepoch(0), m_epoch_id(0)
-    {}
-
-    virtual ~Callback() {}
-
-    // Before training a mini-batch
-    virtual void pre_training_batch(const Network* net, const Matrix& x, const Matrix& y) {}
-    virtual void pre_training_batch(const Network* net, const Matrix& x, const IntegerVector& y) {}
-
-    // After a mini-batch is trained
-    virtual void post_training_batch(const Network* net, const Matrix& x, const Matrix& y) {}
-    virtual void post_training_batch(const Network* net, const Matrix& x, const IntegerVector& y) {}
+    protected:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::RowVectorXi IntegerVector;
+
+    public:
+        // Public members that will be set by the network during the training process
+        int m_nbatch;   // Number of total batches
+        int m_batch_id; // The index for the current mini-batch (0, 1, ..., m_nbatch-1)
+        int m_nepoch;   // Total number of epochs (one run on the whole data set) in the training process
+        int m_epoch_id; // The index for the current epoch (0, 1, ..., m_nepoch-1)
+
+        Callback() :
+            m_nbatch(0), m_batch_id(0), m_nepoch(0), m_epoch_id(0)
+        {}
+
+        virtual ~Callback() {}
+
+        // Before training a mini-batch
+        virtual void pre_training_batch(const Network* net, const Matrix& x,
+                                        const Matrix& y) {}
+        virtual void pre_training_batch(const Network* net, const Matrix& x,
+                                        const IntegerVector& y) {}
+
+        // After a mini-batch is trained
+        virtual void post_training_batch(const Network* net, const Matrix& x,
+                                         const Matrix& y) {}
+        virtual void post_training_batch(const Network* net, const Matrix& x,
+                                         const IntegerVector& y) {}
 };
 
 
diff --git a/include/Callback/VerboseCallback.h b/include/Callback/VerboseCallback.h
index 94a82ea..ffdec7b 100644
--- a/include/Callback/VerboseCallback.h
+++ b/include/Callback/VerboseCallback.h
@@ -7,7 +7,8 @@
 #include "../Callback.h"
 #include "../Network.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -17,18 +18,21 @@ namespace MiniDNN {
 ///
 class VerboseCallback: public Callback
 {
-public:
-    void post_training_batch(const Network* net, const Matrix& x, const Matrix& y)
-    {
-        const Scalar loss = net->get_output()->loss();
-        std::cout << "[Epoch " << m_epoch_id << ", batch " << m_batch_id << "] Loss = " << loss << std::endl;
-    }
-
-    void post_training_batch(const Network* net, const Matrix& x, const IntegerVector& y)
-    {
-        Scalar loss = net->get_output()->loss();
-        std::cout << "[Epoch " << m_epoch_id << ", batch " << m_batch_id << "] Loss = " << loss << std::endl;
-    }
+    public:
+        void post_training_batch(const Network* net, const Matrix& x, const Matrix& y)
+        {
+            const Scalar loss = net->get_output()->loss();
+            std::cout << "[Epoch " << m_epoch_id << ", batch " << m_batch_id << "] Loss = "
+                      << loss << std::endl;
+        }
+
+        void post_training_batch(const Network* net, const Matrix& x,
+                                 const IntegerVector& y)
+        {
+            Scalar loss = net->get_output()->loss();
+            std::cout << "[Epoch " << m_epoch_id << ", batch " << m_batch_id << "] Loss = "
+                      << loss << std::endl;
+        }
 };
 
 
diff --git a/include/Config.h b/include/Config.h
index e5691b2..d316a7f 100644
--- a/include/Config.h
+++ b/include/Config.h
@@ -1,7 +1,8 @@
 #ifndef CONFIG_H_
 #define CONFIG_H_
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 // Floating-point number type
diff --git a/include/EigenStream.h b/include/EigenStream.h
new file mode 100644
index 0000000..966d53a
--- /dev/null
+++ b/include/EigenStream.h
@@ -0,0 +1,239 @@
+/// \file
+/// Source code file of the EigenStream class, it contains the implementation of
+/// several methods for input output operations.
+
+#ifndef EigenStream_H
+#define EigenStream_H
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <algorithm>
+#include <fstream>
+#include "M_Assert.h"
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wold-style-cast"
+#include <Eigen/Eigen>
+#include <unsupported/Eigen/CXX11/Tensor>
+#pragma GCC diagnostic pop
+#define MAXBUFSIZE (static_cast<int> (1e6))
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+/// Class for input-output manipulation
+class EigenStream
+{
+    private:
+
+    public:
+
+        //--------------------------------------------------------------------------
+        /// Export the matrices in numpy (type=python), matlab (type=matlab) format and txt (type=eigen) format
+        /* In this case the function is implemented for a second order matrix  */
+        ///
+        /// @param[in] matrice Eigen::MatrixXd that you want to export.
+        /// @param[in] Name string to identify the name you want to use to save the file.
+        /// @param[in] type string to identify format to export the matrix if numpy (type="python"), if matlab (type="matlab") if txt (type="eigen").
+        /// @param[in] folder string to identify the folder where you want to save the file.
+        ///
+        static void exportMatrix(Eigen::MatrixXd& matrix, std::string Name,
+                                 std::string type = "python", std::string folder = "./Model")
+        {
+            mkdir(folder.c_str(), ACCESSPERMS);
+            std::string est;
+
+            if (type == "python")
+            {
+                est = ".py";
+                std::string filename(folder + "/" + Name + "_mat" + est);
+                std::ofstream str(filename.c_str());
+                str << Name << "=np.array([";
+
+                for (int i = 0; i < matrix.rows(); i++)
+                {
+                    for (int j = 0; j < matrix.cols(); j++)
+                    {
+                        if (j == 0)
+                        {
+                            str << "[" << matrix(i, j);
+                        }
+                        else
+                        {
+                            str << "," << matrix(i, j);
+                        }
+                    }
+
+                    if (i != (matrix.rows() - 1))
+                    {
+                        str << "]," << std::endl;
+                    }
+                }
+
+                str << "]])" << std::endl;
+            }
+
+            if (type == "matlab")
+            {
+                est = ".m";
+                std::string filename(folder + "/" + Name + "_mat" + est);
+                std::ofstream str(filename.c_str());
+                str << Name << "=[";
+
+                for (int i = 0; i < matrix.rows(); i++)
+                {
+                    for (int j = 0; j < matrix.cols(); j++)
+                    {
+                        str << " " << matrix(i, j);
+                    }
+
+                    if (i != (matrix.rows() - 1))
+                    {
+                        str << ";" << std::endl;
+                    }
+                }
+
+                str << "];" << std::endl;
+            }
+
+            if (type == "eigen")
+            {
+                std::ofstream ofs;
+                std::string filename(folder + "/" + Name + "_mat.txt");
+                ofs.open(filename.c_str());
+                ofs << matrix << std::endl;
+                ofs.close();
+            }
+        }
+
+        //--------------------------------------------------------------------------
+        /// @brief      Saves a dense matrix to a binary format file
+        ///
+        /// @param[in]  Matrix      The  Eigen dense matrix
+        /// @param[in]  folder      the folder where you want to save the matrix
+        /// @param[in]  MatrixName  The matrix name for the output file
+        ///
+        /// @tparam     MatrixType  type of the matrix, i.e. double, float, ...
+        ///
+        template <typename MatrixType>
+        static void SaveDenseMatrix(MatrixType& Matrix, std::string folder,
+                                    std::string MatrixName)
+        {
+            mkdir(folder.c_str(), ACCESSPERMS);
+            std::ofstream out(folder + MatrixName,
+                              std::ios::out | std::ios::binary | std::ios::trunc);
+            typename MatrixType::Index rows = Matrix.rows(), cols = Matrix.cols();
+            out.write(reinterpret_cast<char*> (&rows), sizeof(typename MatrixType::Index));
+            out.write(reinterpret_cast<char*> (&cols), sizeof(typename MatrixType::Index));
+            out.write(reinterpret_cast<char*> (Matrix.data()),
+                      rows * cols * sizeof(typename MatrixType::Scalar) );
+            out.close();
+        }
+
+        //--------------------------------------------------------------------------
+        /// @brief      Reads a dense matrix from a binary format file
+        ///
+        /// @param[in,out]  Matrix      The Eigen dense matrix
+        /// @param[in]      folder      The folder from where you want to read the matrix
+        /// @param[in]      MatrixName  The matrix name of the input file
+        ///
+        /// @tparam     MatrixType  type of the matrix, i.e. double, float, ...
+        ///
+        template <typename MatrixType>
+        void ReadDenseMatrix(MatrixType& Matrix, std::string folder,
+                             std::string MatrixName)
+        {
+            std::ifstream in;
+            in.open((folder + MatrixName).c_str(), std::ios::in | std::ios::binary);
+            std::string message(folder + MatrixName +
+                                " file does not exist, Check if the file is existing");
+            M_Assert(in.good(), message.c_str());
+
+            if (in.is_open())
+            {
+                typename MatrixType::Index rows = 0, cols = 0;
+                in.read(reinterpret_cast<char*> (&rows), sizeof(typename MatrixType::Index));
+                in.read(reinterpret_cast<char*> (&cols), sizeof(typename MatrixType::Index));
+                Matrix.resize(rows, cols);
+                in.read( reinterpret_cast<char*>(Matrix.data()),
+                         rows * cols * sizeof(typename MatrixType::Scalar) );
+                in.close();
+            }
+        }
+
+        //----------------------------------------------------------------------
+        /// @brief      Saves a dense tensor.
+        ///
+        /// @param      Tensor      The tensor
+        /// @param[in]  folder      The folder
+        /// @param[in]  MatrixName  The matrix name
+        ///
+        /// @tparam     TensorType  type of the tensor, i.e. double, float, ...
+        ///
+        template <typename TensorType>
+        static void SaveDenseTensor(TensorType& Tensor, std::string folder,
+                                    std::string MatrixName)
+        {
+            std::ofstream out(folder + MatrixName,
+                              std::ios::out | std::ios::binary | std::ios::trunc);
+            typename TensorType::Dimensions dim = Tensor.dimensions();
+            int tot = 1;
+
+            for (unsigned int k = 0; k < dim.size(); k++)
+            {
+                tot *= dim[k];
+            }
+
+            out.write(reinterpret_cast<char*> (&dim),
+                      sizeof(typename TensorType::Dimensions));
+            out.write(reinterpret_cast<char*> (Tensor.data()),
+                      tot * sizeof(typename TensorType::Scalar) );
+            out.close();
+        }
+
+        //----------------------------------------------------------------------
+        /// @brief      Reads a dense tensor.
+        ///
+        /// @param      Tensor      The tensor
+        /// @param[in]  folder      The folder
+        /// @param[in]  MatrixName  The matrix name
+        ///
+        /// @tparam     TensorType   type of the tensor, i.e. double, float, ...
+        ///
+        template <typename TensorType>
+        static void ReadDenseTensor(TensorType& Tensor, std::string folder,
+                                    std::string MatrixName)
+        {
+            std::ifstream in;
+            in.open((folder + MatrixName).c_str(), std::ios::in | std::ios::binary);
+            typename TensorType::Dimensions dim;
+            in.read(reinterpret_cast<char*> (&dim),
+                    sizeof(typename TensorType::Dimensions));
+            auto dims = Tensor.dimensions();
+            M_Assert(dims.size() == dim.size(),
+                     "The rank of the tensor you want to fill does not coincide with the rank of the tensor you are reading");
+            int tot = 1;
+
+            for (unsigned int k = 0; k < dim.size(); k++)
+            {
+                tot *= dim[k];
+            }
+
+            Tensor.resize(dim);
+            in.read( reinterpret_cast<char*>(Tensor.data()),
+                     tot * sizeof(typename TensorType::Scalar) );
+            in.close();
+        }
+
+
+
+
+};
+
+#endif
+
+
+
+
+
+
diff --git a/include/Layer.h b/include/Layer.h
index 618238a..95f9b25 100644
--- a/include/Layer.h
+++ b/include/Layer.h
@@ -7,7 +7,8 @@
 #include "RNG.h"
 #include "Optimizer.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -23,127 +24,134 @@ namespace MiniDNN {
 ///
 class Layer
 {
-protected:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-
-    const int m_in_size;  // Size of input units
-    const int m_out_size; // Size of output units
-
-public:
-    ///
-    /// Constructor
-    ///
-    /// \param in_size  Number of input units of this hidden Layer. It must be
-    ///                 equal to the number of output units of the previous layer.
-    /// \param out_size Number of output units of this hidden layer. It must be
-    ///                 equal to the number of input units of the next layer.
-    ///
-    Layer(const int in_size, const int out_size) :
-        m_in_size(in_size), m_out_size(out_size)
-    {}
-
-    ///
-    /// Virtual destructor
-    ///
-    virtual ~Layer() {}
-
-    ///
-    /// Get the number of input units of this hidden layer.
-    ///
-    int in_size() const { return m_in_size; }
-    ///
-    /// Get the number of output units of this hidden layer.
-    ///
-    int out_size() const { return m_out_size; }
-
-    ///
-    /// Initialize layer parameters using \f$N(\mu, \sigma^2)\f$ distribution
-    ///
-    /// \param mu    Mean of the normal distribution.
-    /// \param sigma Standard deviation of the normal distribution.
-    /// \param rng   The random number generator of type RNG.
-    virtual void init(const Scalar& mu, const Scalar& sigma, RNG& rng) = 0;
-
-    ///
-    /// Compute the output of this layer
-    ///
-    /// The purpose of this function is to let the hidden layer compute information
-    /// that will be passed to the next layer as the input. The concrete behavior
-    /// of this function is subject to the implementation, with the only
-    /// requirement that after calling this function, the Layer::output() member
-    /// function will return a reference to the output values.
-    ///
-    /// \param prev_layer_data The output of previous layer, which is also the
-    ///                        input of this layer. `prev_layer_data` should have
-    ///                        `in_size` rows as in the constructor, and each
-    ///                        column of `prev_layer_data` is an observation.
-    ///
-    virtual void forward(const Matrix& prev_layer_data) = 0;
-
-    ///
-    /// Obtain the output values of this layer
-    ///
-    /// This function is assumed to be called after Layer::forward() in each iteration.
-    /// The output are the values of output hidden units after applying activation function.
-    /// The main usage of this function is to provide the `prev_layer_data` parameter
-    /// in Layer::forward() of the next layer.
-    ///
-    /// \return A reference to the matrix that contains the output values. The
-    ///         matrix should have `out_size` rows as in the constructor,
-    ///         and have number of columns equal to that of `prev_layer_data` in the
-    ///         Layer::forward() function. Each column represents an observation.
-    ///
-    virtual const Matrix& output() const = 0;
-
-    ///
-    /// Compute the gradients of parameters and input units using back-propagation
-    ///
-    /// The purpose of this function is to compute the gradient of input units,
-    /// which can be retrieved by Layer::backprop_data(), and the gradient of
-    /// layer parameters, which could later be used by the Layer::update() function.
-    ///
-    /// \param prev_layer_data The output of previous layer, which is also the
-    ///                        input of this layer. `prev_layer_data` should have
-    ///                        `in_size` rows as in the constructor, and each
-    ///                        column of `prev_layer_data` is an observation.
-    /// \param next_layer_data The gradients of the input units of the next layer,
-    ///                        which is also the gradients of the output units of
-    ///                        this layer. `next_layer_data` should have
-    ///                        `out_size` rows as in the constructor, and the same
-    ///                        number of columns as `prev_layer_data`.
-    ///
-    virtual void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data) = 0;
-
-    ///
-    /// Obtain the gradient of input units of this layer
-    ///
-    /// This function provides the `next_layer_data` parameter in Layer::backprop()
-    /// of the previous layer, since the derivative of the input of this layer is also the derivative
-    /// of the output of previous layer.
-    ///
-    virtual const Matrix& backprop_data() const = 0;
-
-    ///
-    /// Update parameters after back-propagation
-    ///
-    /// \param opt The optimization algorithm to be used. See the Optimizer class.
-    ///
-    virtual void update(Optimizer& opt) = 0;
-
-    ///
-    /// Get serialized values of parameters
-    ///
-    virtual std::vector<Scalar> get_parameters() const = 0;
-    ///
-    /// Set the values of layer parameters from serialized data
-    ///
-    virtual void set_parameters(const std::vector<Scalar>& param) {};
-
-    ///
-    /// Get serialized values of the gradient of parameters
-    ///
-    virtual std::vector<Scalar> get_derivatives() const = 0;
+    protected:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+
+        const int m_in_size;  // Size of input units
+        const int m_out_size; // Size of output units
+
+    public:
+        ///
+        /// Constructor
+        ///
+        /// \param in_size  Number of input units of this hidden Layer. It must be
+        ///                 equal to the number of output units of the previous layer.
+        /// \param out_size Number of output units of this hidden layer. It must be
+        ///                 equal to the number of input units of the next layer.
+        ///
+        Layer(const int in_size, const int out_size) :
+            m_in_size(in_size), m_out_size(out_size)
+        {}
+
+        ///
+        /// Virtual destructor
+        ///
+        virtual ~Layer() {}
+
+        ///
+        /// Get the number of input units of this hidden layer.
+        ///
+        int in_size() const
+        {
+            return m_in_size;
+        }
+        ///
+        /// Get the number of output units of this hidden layer.
+        ///
+        int out_size() const
+        {
+            return m_out_size;
+        }
+
+        ///
+        /// Initialize layer parameters using \f$N(\mu, \sigma^2)\f$ distribution
+        ///
+        /// \param mu    Mean of the normal distribution.
+        /// \param sigma Standard deviation of the normal distribution.
+        /// \param rng   The random number generator of type RNG.
+        virtual void init(const Scalar& mu, const Scalar& sigma, RNG& rng) = 0;
+
+        ///
+        /// Compute the output of this layer
+        ///
+        /// The purpose of this function is to let the hidden layer compute information
+        /// that will be passed to the next layer as the input. The concrete behavior
+        /// of this function is subject to the implementation, with the only
+        /// requirement that after calling this function, the Layer::output() member
+        /// function will return a reference to the output values.
+        ///
+        /// \param prev_layer_data The output of previous layer, which is also the
+        ///                        input of this layer. `prev_layer_data` should have
+        ///                        `in_size` rows as in the constructor, and each
+        ///                        column of `prev_layer_data` is an observation.
+        ///
+        virtual void forward(const Matrix& prev_layer_data) = 0;
+
+        ///
+        /// Obtain the output values of this layer
+        ///
+        /// This function is assumed to be called after Layer::forward() in each iteration.
+        /// The output are the values of output hidden units after applying activation function.
+        /// The main usage of this function is to provide the `prev_layer_data` parameter
+        /// in Layer::forward() of the next layer.
+        ///
+        /// \return A reference to the matrix that contains the output values. The
+        ///         matrix should have `out_size` rows as in the constructor,
+        ///         and have number of columns equal to that of `prev_layer_data` in the
+        ///         Layer::forward() function. Each column represents an observation.
+        ///
+        virtual const Matrix& output() const = 0;
+
+        ///
+        /// Compute the gradients of parameters and input units using back-propagation
+        ///
+        /// The purpose of this function is to compute the gradient of input units,
+        /// which can be retrieved by Layer::backprop_data(), and the gradient of
+        /// layer parameters, which could later be used by the Layer::update() function.
+        ///
+        /// \param prev_layer_data The output of previous layer, which is also the
+        ///                        input of this layer. `prev_layer_data` should have
+        ///                        `in_size` rows as in the constructor, and each
+        ///                        column of `prev_layer_data` is an observation.
+        /// \param next_layer_data The gradients of the input units of the next layer,
+        ///                        which is also the gradients of the output units of
+        ///                        this layer. `next_layer_data` should have
+        ///                        `out_size` rows as in the constructor, and the same
+        ///                        number of columns as `prev_layer_data`.
+        ///
+        virtual void backprop(const Matrix& prev_layer_data,
+                              const Matrix& next_layer_data) = 0;
+
+        ///
+        /// Obtain the gradient of input units of this layer
+        ///
+        /// This function provides the `next_layer_data` parameter in Layer::backprop()
+        /// of the previous layer, since the derivative of the input of this layer is also the derivative
+        /// of the output of previous layer.
+        ///
+        virtual const Matrix& backprop_data() const = 0;
+
+        ///
+        /// Update parameters after back-propagation
+        ///
+        /// \param opt The optimization algorithm to be used. See the Optimizer class.
+        ///
+        virtual void update(Optimizer& opt) = 0;
+
+        ///
+        /// Get serialized values of parameters
+        ///
+        virtual std::vector<Scalar> get_parameters() const = 0;
+        ///
+        /// Set the values of layer parameters from serialized data
+        ///
+        virtual void set_parameters(const std::vector<Scalar>& param) {};
+
+        ///
+        /// Get serialized values of the gradient of parameters
+        ///
+        virtual std::vector<Scalar> get_derivatives() const = 0;
 };
 
 
diff --git a/include/Layer/Convolutional.h b/include/Layer/Convolutional.h
index f7502e7..eb317c8 100644
--- a/include/Layer/Convolutional.h
+++ b/include/Layer/Convolutional.h
@@ -9,7 +9,8 @@
 #include "../Utils/Convolution.h"
 #include "../Utils/Random.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -22,191 +23,194 @@ namespace MiniDNN {
 template <typename Activation>
 class Convolutional: public Layer
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-    typedef Matrix::ConstAlignedMapType ConstAlignedMapMat;
-    typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
-    typedef Vector::AlignedMapType AlignedMapVec;
-
-    const internal::ConvDims m_dim; // Various dimensions of convolution
-
-    Vector m_filter_data;           // Filter parameters. Total length is
-                                    // (in_channels x out_channels x filter_rows x filter_cols)
-                                    // See Utils/Convolution.h for its layout
-
-    Vector m_df_data;               // Derivative of filters, same dimension as m_filter_data
-
-    Vector m_bias;                  // Bias term for the output channels, out_channels x 1. (One bias term per channel)
-    Vector m_db;                    // Derivative of bias, same dimension as m_bias
-
-    Matrix m_z;                     // Linear term, z = conv(in, w) + b. Each column is an observation
-    Matrix m_a;                     // Output of this layer, a = act(z)
-    Matrix m_din;                   // Derivative of the input of this layer
-                                    // Note that input of this layer is also the output of previous layer
-
-public:
-    ///
-    /// Constructor
-    ///
-    /// \param in_width      Width of the input image in each channel.
-    /// \param in_height     Height of the input image in each channel.
-    /// \param in_channels   Number of input channels.
-    /// \param out_channels  Number of output channels.
-    /// \param window_width  Width of the filter.
-    /// \param window_height Height of the filter.
-    ///
-    Convolutional(const int in_width, const int in_height,
-                  const int in_channels, const int out_channels,
-				  const int window_width, const int window_height) :
-        Layer(in_width * in_height * in_channels,
-              (in_width - window_width + 1) * (in_height - window_height + 1) * out_channels),
-        m_dim(in_channels, out_channels, in_height, in_width, window_height, window_width)
-    {}
-
-    void init(const Scalar& mu, const Scalar& sigma, RNG& rng)
-    {
-        // Set data dimension
-        const int filter_data_size = m_dim.in_channels * m_dim.out_channels * m_dim.filter_rows * m_dim.filter_cols;
-        m_filter_data.resize(filter_data_size);
-        m_df_data.resize(filter_data_size);
-
-        // Random initialization of filter parameters
-        internal::set_normal_random(m_filter_data.data(), filter_data_size, rng, mu, sigma);
-
-        // Bias term
-        m_bias.resize(m_dim.out_channels);
-        m_db.resize(m_dim.out_channels);
-        internal::set_normal_random(m_bias.data(), m_dim.out_channels, rng, mu, sigma);
-    }
-
-    // http://cs231n.github.io/convolutional-networks/
-    void forward(const Matrix& prev_layer_data)
-    {
-        // Each column is an observation
-        const int nobs = prev_layer_data.cols();
-
-        // Linear term, z = conv(in, w) + b
-        m_z.resize(this->m_out_size, nobs);
-        // Convolution
-        internal::convolve_valid(m_dim, prev_layer_data.data(), true, nobs,
-            m_filter_data.data(), m_z.data()
-        );
-        // Add bias terms
-        // Each column of m_z contains m_dim.out_channels channels, and each channel has
-        // m_dim.conv_rows * m_dim.conv_cols elements
-        int channel_start_row = 0;
-        const int channel_nelem = m_dim.conv_rows * m_dim.conv_cols;
-        for(int i = 0; i < m_dim.out_channels; i++, channel_start_row += channel_nelem)
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+        typedef Matrix::ConstAlignedMapType ConstAlignedMapMat;
+        typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
+        typedef Vector::AlignedMapType AlignedMapVec;
+
+        const internal::ConvDims m_dim; // Various dimensions of convolution
+
+        Vector m_filter_data;           // Filter parameters. Total length is
+        // (in_channels x out_channels x filter_rows x filter_cols)
+        // See Utils/Convolution.h for its layout
+
+        Vector m_df_data;               // Derivative of filters, same dimension as m_filter_data
+
+        Vector m_bias;                  // Bias term for the output channels, out_channels x 1. (One bias term per channel)
+        Vector m_db;                    // Derivative of bias, same dimension as m_bias
+
+        Matrix m_z;                     // Linear term, z = conv(in, w) + b. Each column is an observation
+        Matrix m_a;                     // Output of this layer, a = act(z)
+        Matrix m_din;                   // Derivative of the input of this layer
+        // Note that input of this layer is also the output of previous layer
+
+    public:
+        ///
+        /// Constructor
+        ///
+        /// \param in_width      Width of the input image in each channel.
+        /// \param in_height     Height of the input image in each channel.
+        /// \param in_channels   Number of input channels.
+        /// \param out_channels  Number of output channels.
+        /// \param window_width  Width of the filter.
+        /// \param window_height Height of the filter.
+        ///
+        Convolutional(const int in_width, const int in_height,
+                      const int in_channels, const int out_channels,
+                      const int window_width, const int window_height) :
+            Layer(in_width * in_height * in_channels,
+                  (in_width - window_width + 1) * (in_height - window_height + 1) * out_channels),
+            m_dim(in_channels, out_channels, in_height, in_width, window_height,
+                  window_width)
+        {}
+
+        void init(const Scalar& mu, const Scalar& sigma, RNG& rng)
+        {
+            // Set data dimension
+            const int filter_data_size = m_dim.in_channels * m_dim.out_channels *
+                                         m_dim.filter_rows * m_dim.filter_cols;
+            m_filter_data.resize(filter_data_size);
+            m_df_data.resize(filter_data_size);
+            // Random initialization of filter parameters
+            internal::set_normal_random(m_filter_data.data(), filter_data_size, rng, mu,
+                                        sigma);
+            // Bias term
+            m_bias.resize(m_dim.out_channels);
+            m_db.resize(m_dim.out_channels);
+            internal::set_normal_random(m_bias.data(), m_dim.out_channels, rng, mu, sigma);
+        }
+
+        // http://cs231n.github.io/convolutional-networks/
+        void forward(const Matrix& prev_layer_data)
+        {
+            // Each column is an observation
+            const int nobs = prev_layer_data.cols();
+            // Linear term, z = conv(in, w) + b
+            m_z.resize(this->m_out_size, nobs);
+            // Convolution
+            internal::convolve_valid(m_dim, prev_layer_data.data(), true, nobs,
+                                     m_filter_data.data(), m_z.data()
+                                    );
+            // Add bias terms
+            // Each column of m_z contains m_dim.out_channels channels, and each channel has
+            // m_dim.conv_rows * m_dim.conv_cols elements
+            int channel_start_row = 0;
+            const int channel_nelem = m_dim.conv_rows * m_dim.conv_cols;
+
+            for (int i = 0; i < m_dim.out_channels; i++, channel_start_row += channel_nelem)
+            {
+                m_z.block(channel_start_row, 0, channel_nelem, nobs).array() += m_bias[i];
+            }
+
+            // Apply activation function
+            m_a.resize(this->m_out_size, nobs);
+            Activation::activate(m_z, m_a);
+        }
+
+        const Matrix& output() const
+        {
+            return m_a;
+        }
+
+        // prev_layer_data: in_size x nobs
+        // next_layer_data: out_size x nobs
+        // https://grzegorzgwardys.wordpress.com/2016/04/22/8/
+        void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data)
+        {
+            const int nobs = prev_layer_data.cols();
+            // After forward stage, m_z contains z = conv(in, w) + b
+            // Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
+            // d(L) / d(a) is computed in the next layer, contained in next_layer_data
+            // The Jacobian matrix J = d(a) / d(z) is determined by the activation function
+            Matrix& dLz = m_z;
+            Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
+            // z_j = sum_i(conv(in_i, w_ij)) + b_j
+            //
+            // d(z_k) / d(w_ij) = 0, if k != j
+            // d(L) / d(w_ij) = [d(z_j) / d(w_ij)] * [d(L) / d(z_j)] = sum_i{ [d(z_j) / d(w_ij)] * [d(L) / d(z_j)] }
+            // = sum_i(conv(in_i, d(L) / d(z_j)))
+            //
+            // z_j is an image (matrix), b_j is a scalar
+            // d(z_j) / d(b_j) = a matrix of the same size of d(z_j) filled with 1
+            // d(L) / d(b_j) = (d(L) / d(z_j)).sum()
+            //
+            // d(z_j) / d(in_i) = conv_full_op(w_ij_rotate)
+            // d(L) / d(in_i) = sum_j((d(z_j) / d(in_i)) * (d(L) / d(z_j))) = sum_j(conv_full(d(L) / d(z_j), w_ij_rotate))
+            // Derivative for weights
+            internal::ConvDims back_conv_dim(nobs, m_dim.out_channels, m_dim.channel_rows,
+                                             m_dim.channel_cols,
+                                             m_dim.conv_rows, m_dim.conv_cols);
+            internal::convolve_valid(back_conv_dim, prev_layer_data.data(), false,
+                                     m_dim.in_channels,
+                                     dLz.data(), m_df_data.data()
+                                    );
+            m_df_data /= nobs;
+            // Derivative for bias
+            // Aggregate d(L) / d(z) in each output channel
+            ConstAlignedMapMat dLz_by_channel(dLz.data(), m_dim.conv_rows * m_dim.conv_cols,
+                                              m_dim.out_channels * nobs);
+            Vector dLb = dLz_by_channel.colwise().sum();
+            // Average over observations
+            ConstAlignedMapMat dLb_by_obs(dLb.data(), m_dim.out_channels, nobs);
+            m_db.noalias() = dLb_by_obs.rowwise().mean();
+            // Compute d(L) / d_in = conv_full(d(L) / d(z), w_rotate)
+            m_din.resize(this->m_in_size, nobs);
+            internal::ConvDims conv_full_dim(m_dim.out_channels, m_dim.in_channels,
+                                             m_dim.conv_rows, m_dim.conv_cols, m_dim.filter_rows, m_dim.filter_cols);
+            internal::convolve_full(conv_full_dim, dLz.data(), nobs,
+                                    m_filter_data.data(), m_din.data()
+                                   );
+        }
+
+        const Matrix& backprop_data() const
+        {
+            return m_din;
+        }
+
+        void update(Optimizer& opt)
+        {
+            ConstAlignedMapVec dw(m_df_data.data(), m_df_data.size());
+            ConstAlignedMapVec db(m_db.data(), m_db.size());
+            AlignedMapVec      w(m_filter_data.data(), m_filter_data.size());
+            AlignedMapVec      b(m_bias.data(), m_bias.size());
+            opt.update(dw, w);
+            opt.update(db, b);
+        }
+
+        std::vector<Scalar> get_parameters() const
+        {
+            std::vector<Scalar> res(m_filter_data.size() + m_bias.size());
+            // Copy the data of filters and bias to a long vector
+            std::copy(m_filter_data.data(), m_filter_data.data() + m_filter_data.size(),
+                      res.begin());
+            std::copy(m_bias.data(), m_bias.data() + m_bias.size(),
+                      res.begin() + m_filter_data.size());
+            return res;
+        }
+
+        void set_parameters(const std::vector<Scalar>& param)
         {
-            m_z.block(channel_start_row, 0, channel_nelem, nobs).array() += m_bias[i];
+            if (static_cast<int>(param.size()) != m_filter_data.size() + m_bias.size())
+            {
+                throw std::invalid_argument("Parameter size does not match");
+            }
+
+            std::copy(param.begin(), param.begin() + m_filter_data.size(),
+                      m_filter_data.data());
+            std::copy(param.begin() + m_filter_data.size(), param.end(), m_bias.data());
         }
 
-        // Apply activation function
-        m_a.resize(this->m_out_size, nobs);
-        Activation::activate(m_z, m_a);
-    }
-
-    const Matrix& output() const
-    {
-        return m_a;
-    }
-
-    // prev_layer_data: in_size x nobs
-    // next_layer_data: out_size x nobs
-    // https://grzegorzgwardys.wordpress.com/2016/04/22/8/
-    void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data)
-    {
-        const int nobs = prev_layer_data.cols();
-
-        // After forward stage, m_z contains z = conv(in, w) + b
-        // Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
-        // d(L) / d(a) is computed in the next layer, contained in next_layer_data
-        // The Jacobian matrix J = d(a) / d(z) is determined by the activation function
-        Matrix& dLz = m_z;
-        Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
-
-        // z_j = sum_i(conv(in_i, w_ij)) + b_j
-        //
-        // d(z_k) / d(w_ij) = 0, if k != j
-        // d(L) / d(w_ij) = [d(z_j) / d(w_ij)] * [d(L) / d(z_j)] = sum_i{ [d(z_j) / d(w_ij)] * [d(L) / d(z_j)] }
-        // = sum_i(conv(in_i, d(L) / d(z_j)))
-        //
-        // z_j is an image (matrix), b_j is a scalar
-        // d(z_j) / d(b_j) = a matrix of the same size of d(z_j) filled with 1
-        // d(L) / d(b_j) = (d(L) / d(z_j)).sum()
-        //
-        // d(z_j) / d(in_i) = conv_full_op(w_ij_rotate)
-        // d(L) / d(in_i) = sum_j((d(z_j) / d(in_i)) * (d(L) / d(z_j))) = sum_j(conv_full(d(L) / d(z_j), w_ij_rotate))
-
-        // Derivative for weights
-        internal::ConvDims back_conv_dim(nobs, m_dim.out_channels, m_dim.channel_rows, m_dim.channel_cols,
-                                         m_dim.conv_rows, m_dim.conv_cols);
-        internal::convolve_valid(back_conv_dim, prev_layer_data.data(), false, m_dim.in_channels,
-            dLz.data(), m_df_data.data()
-        );
-        m_df_data /= nobs;
-
-        // Derivative for bias
-        // Aggregate d(L) / d(z) in each output channel
-        ConstAlignedMapMat dLz_by_channel(dLz.data(), m_dim.conv_rows * m_dim.conv_cols, m_dim.out_channels * nobs);
-        Vector dLb = dLz_by_channel.colwise().sum();
-        // Average over observations
-        ConstAlignedMapMat dLb_by_obs(dLb.data(), m_dim.out_channels, nobs);
-        m_db.noalias() = dLb_by_obs.rowwise().mean();
-
-        // Compute d(L) / d_in = conv_full(d(L) / d(z), w_rotate)
-        m_din.resize(this->m_in_size, nobs);
-        internal::ConvDims conv_full_dim(m_dim.out_channels, m_dim.in_channels, m_dim.conv_rows, m_dim.conv_cols, m_dim.filter_rows, m_dim.filter_cols);
-        internal::convolve_full(conv_full_dim, dLz.data(), nobs,
-            m_filter_data.data(), m_din.data()
-        );
-    }
-
-    const Matrix& backprop_data() const
-    {
-        return m_din;
-    }
-
-    void update(Optimizer& opt)
-    {
-        ConstAlignedMapVec dw(m_df_data.data(), m_df_data.size());
-        ConstAlignedMapVec db(m_db.data(), m_db.size());
-        AlignedMapVec      w(m_filter_data.data(), m_filter_data.size());
-        AlignedMapVec      b(m_bias.data(), m_bias.size());
-
-        opt.update(dw, w);
-        opt.update(db, b);
-    }
-
-    std::vector<Scalar> get_parameters() const
-    {
-        std::vector<Scalar> res(m_filter_data.size() + m_bias.size());
-        // Copy the data of filters and bias to a long vector
-        std::copy(m_filter_data.data(), m_filter_data.data() + m_filter_data.size(), res.begin());
-        std::copy(m_bias.data(), m_bias.data() + m_bias.size(), res.begin() + m_filter_data.size());
-
-        return res;
-    }
-
-    void set_parameters(const std::vector<Scalar>& param)
-    {
-        if(static_cast<int>(param.size()) != m_filter_data.size() + m_bias.size())
-            throw std::invalid_argument("Parameter size does not match");
-
-        std::copy(param.begin(), param.begin() + m_filter_data.size(), m_filter_data.data());
-        std::copy(param.begin() + m_filter_data.size(), param.end(), m_bias.data());
-    }
-
-    std::vector<Scalar> get_derivatives() const
-    {
-        std::vector<Scalar> res(m_df_data.size() + m_db.size());
-        // Copy the data of filters and bias to a long vector
-        std::copy(m_df_data.data(), m_df_data.data() + m_df_data.size(), res.begin());
-        std::copy(m_db.data(), m_db.data() + m_db.size(), res.begin() + m_df_data.size());
-
-        return res;
-    }
+        std::vector<Scalar> get_derivatives() const
+        {
+            std::vector<Scalar> res(m_df_data.size() + m_db.size());
+            // Copy the data of filters and bias to a long vector
+            std::copy(m_df_data.data(), m_df_data.data() + m_df_data.size(), res.begin());
+            std::copy(m_db.data(), m_db.data() + m_db.size(),
+                      res.begin() + m_df_data.size());
+            return res;
+        }
 };
 
 
diff --git a/include/Layer/FullyConnected.h b/include/Layer/FullyConnected.h
index 39ed2a8..9a549a9 100644
--- a/include/Layer/FullyConnected.h
+++ b/include/Layer/FullyConnected.h
@@ -8,7 +8,8 @@
 #include "../Layer.h"
 #include "../Utils/Random.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -19,132 +20,126 @@ namespace MiniDNN {
 template <typename Activation>
 class FullyConnected: public Layer
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-    typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
-    typedef Vector::AlignedMapType AlignedMapVec;
-
-    Matrix m_weight;  // Weight parameters, W(in_size x out_size)
-    Vector m_bias;    // Bias parameters, b(out_size x 1)
-    Matrix m_dw;      // Derivative of weights
-    Vector m_db;      // Derivative of bias
-    Matrix m_z;       // Linear term, z = W' * in + b
-    Matrix m_a;       // Output of this layer, a = act(z)
-    Matrix m_din;     // Derivative of the input of this layer.
-                      // Note that input of this layer is also the output of previous layer
-
-public:
-    ///
-    /// Constructor
-    ///
-    /// \param in_size  Number of input units.
-    /// \param out_size Number of output units.
-    ///
-    FullyConnected(const int in_size, const int out_size) :
-        Layer(in_size, out_size)
-    {}
-
-    void init(const Scalar& mu, const Scalar& sigma, RNG& rng)
-    {
-        m_weight.resize(this->m_in_size, this->m_out_size);
-        m_bias.resize(this->m_out_size);
-        m_dw.resize(this->m_in_size, this->m_out_size);
-        m_db.resize(this->m_out_size);
-
-        // Set random coefficients
-        internal::set_normal_random(m_weight.data(), m_weight.size(), rng, mu, sigma);
-        internal::set_normal_random(m_bias.data(), m_bias.size(), rng, mu, sigma);
-    }
-
-    // prev_layer_data: in_size x nobs
-    void forward(const Matrix& prev_layer_data)
-    {
-        const int nobs = prev_layer_data.cols();
-        // Linear term z = W' * in + b
-        m_z.resize(this->m_out_size, nobs);
-        m_z.noalias() = m_weight.transpose() * prev_layer_data;
-        m_z.colwise() += m_bias;
-
-        // Apply activation function
-        m_a.resize(this->m_out_size, nobs);
-        Activation::activate(m_z, m_a);
-    }
-
-    const Matrix& output() const
-    {
-        return m_a;
-    }
-
-    // prev_layer_data: in_size x nobs
-    // next_layer_data: out_size x nobs
-    void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data)
-    {
-        const int nobs = prev_layer_data.cols();
-
-        // After forward stage, m_z contains z = W' * in + b
-        // Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
-        // d(L) / d(a) is computed in the next layer, contained in next_layer_data
-        // The Jacobian matrix J = d(a) / d(z) is determined by the activation function
-        Matrix& dLz = m_z;
-        Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
-
-        // Now dLz contains d(L) / d(z)
-        // Derivative for weights, d(L) / d(W) = [d(L) / d(z)] * in'
-        m_dw.noalias() = prev_layer_data * dLz.transpose() / nobs;
-
-        // Derivative for bias, d(L) / d(b) = d(L) / d(z)
-        m_db.noalias() = dLz.rowwise().mean();
-
-        // Compute d(L) / d_in = W * [d(L) / d(z)]
-        m_din.resize(this->m_in_size, nobs);
-        m_din.noalias() = m_weight * dLz;
-    }
-
-    const Matrix& backprop_data() const
-    {
-        return m_din;
-    }
-
-    void update(Optimizer& opt)
-    {
-        ConstAlignedMapVec dw(m_dw.data(), m_dw.size());
-        ConstAlignedMapVec db(m_db.data(), m_db.size());
-        AlignedMapVec      w(m_weight.data(), m_weight.size());
-        AlignedMapVec      b(m_bias.data(), m_bias.size());
-
-        opt.update(dw, w);
-        opt.update(db, b);
-    }
-
-    std::vector<Scalar> get_parameters() const
-    {
-        std::vector<Scalar> res(m_weight.size() + m_bias.size());
-        // Copy the data of weights and bias to a long vector
-        std::copy(m_weight.data(), m_weight.data() + m_weight.size(), res.begin());
-        std::copy(m_bias.data(), m_bias.data() + m_bias.size(), res.begin() + m_weight.size());
-
-        return res;
-    }
-
-    void set_parameters(const std::vector<Scalar>& param)
-    {
-        if(static_cast<int>(param.size()) != m_weight.size() + m_bias.size())
-            throw std::invalid_argument("Parameter size does not match");
-
-        std::copy(param.begin(), param.begin() + m_weight.size(), m_weight.data());
-        std::copy(param.begin() + m_weight.size(), param.end(), m_bias.data());
-    }
-
-    std::vector<Scalar> get_derivatives() const
-    {
-        std::vector<Scalar> res(m_dw.size() + m_db.size());
-        // Copy the data of weights and bias to a long vector
-        std::copy(m_dw.data(), m_dw.data() + m_dw.size(), res.begin());
-        std::copy(m_db.data(), m_db.data() + m_db.size(), res.begin() + m_dw.size());
-
-        return res;
-    }
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+        typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
+        typedef Vector::AlignedMapType AlignedMapVec;
+
+        Matrix m_weight;  // Weight parameters, W(in_size x out_size)
+        Vector m_bias;    // Bias parameters, b(out_size x 1)
+        Matrix m_dw;      // Derivative of weights
+        Vector m_db;      // Derivative of bias
+        Matrix m_z;       // Linear term, z = W' * in + b
+        Matrix m_a;       // Output of this layer, a = act(z)
+        Matrix m_din;     // Derivative of the input of this layer.
+        // Note that input of this layer is also the output of previous layer
+
+    public:
+        ///
+        /// Constructor
+        ///
+        /// \param in_size  Number of input units.
+        /// \param out_size Number of output units.
+        ///
+        FullyConnected(const int in_size, const int out_size) :
+            Layer(in_size, out_size)
+        {}
+
+        void init(const Scalar& mu, const Scalar& sigma, RNG& rng)
+        {
+            m_weight.resize(this->m_in_size, this->m_out_size);
+            m_bias.resize(this->m_out_size);
+            m_dw.resize(this->m_in_size, this->m_out_size);
+            m_db.resize(this->m_out_size);
+            // Set random coefficients
+            internal::set_normal_random(m_weight.data(), m_weight.size(), rng, mu, sigma);
+            internal::set_normal_random(m_bias.data(), m_bias.size(), rng, mu, sigma);
+        }
+
+        // prev_layer_data: in_size x nobs
+        void forward(const Matrix& prev_layer_data)
+        {
+            const int nobs = prev_layer_data.cols();
+            // Linear term z = W' * in + b
+            m_z.resize(this->m_out_size, nobs);
+            m_z.noalias() = m_weight.transpose() * prev_layer_data;
+            m_z.colwise() += m_bias;
+            // Apply activation function
+            m_a.resize(this->m_out_size, nobs);
+            Activation::activate(m_z, m_a);
+        }
+
+        const Matrix& output() const
+        {
+            return m_a;
+        }
+
+        // prev_layer_data: in_size x nobs
+        // next_layer_data: out_size x nobs
+        void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data)
+        {
+            const int nobs = prev_layer_data.cols();
+            // After forward stage, m_z contains z = W' * in + b
+            // Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
+            // d(L) / d(a) is computed in the next layer, contained in next_layer_data
+            // The Jacobian matrix J = d(a) / d(z) is determined by the activation function
+            Matrix& dLz = m_z;
+            Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
+            // Now dLz contains d(L) / d(z)
+            // Derivative for weights, d(L) / d(W) = [d(L) / d(z)] * in'
+            m_dw.noalias() = prev_layer_data * dLz.transpose() / nobs;
+            // Derivative for bias, d(L) / d(b) = d(L) / d(z)
+            m_db.noalias() = dLz.rowwise().mean();
+            // Compute d(L) / d_in = W * [d(L) / d(z)]
+            m_din.resize(this->m_in_size, nobs);
+            m_din.noalias() = m_weight * dLz;
+        }
+
+        const Matrix& backprop_data() const
+        {
+            return m_din;
+        }
+
+        void update(Optimizer& opt)
+        {
+            ConstAlignedMapVec dw(m_dw.data(), m_dw.size());
+            ConstAlignedMapVec db(m_db.data(), m_db.size());
+            AlignedMapVec      w(m_weight.data(), m_weight.size());
+            AlignedMapVec      b(m_bias.data(), m_bias.size());
+            opt.update(dw, w);
+            opt.update(db, b);
+        }
+
+        std::vector<Scalar> get_parameters() const
+        {
+            std::vector<Scalar> res(m_weight.size() + m_bias.size());
+            // Copy the data of weights and bias to a long vector
+            std::copy(m_weight.data(), m_weight.data() + m_weight.size(), res.begin());
+            std::copy(m_bias.data(), m_bias.data() + m_bias.size(),
+                      res.begin() + m_weight.size());
+            return res;
+        }
+
+        void set_parameters(const std::vector<Scalar>& param)
+        {
+            if (static_cast<int>(param.size()) != m_weight.size() + m_bias.size())
+            {
+                throw std::invalid_argument("Parameter size does not match");
+            }
+
+            std::copy(param.begin(), param.begin() + m_weight.size(), m_weight.data());
+            std::copy(param.begin() + m_weight.size(), param.end(), m_bias.data());
+        }
+
+        std::vector<Scalar> get_derivatives() const
+        {
+            std::vector<Scalar> res(m_dw.size() + m_db.size());
+            // Copy the data of weights and bias to a long vector
+            std::copy(m_dw.data(), m_dw.data() + m_dw.size(), res.begin());
+            std::copy(m_db.data(), m_db.data() + m_db.size(), res.begin() + m_dw.size());
+            return res;
+        }
 };
 
 
diff --git a/include/Layer/MaxPooling.h b/include/Layer/MaxPooling.h
index 99fc692..402658f 100644
--- a/include/Layer/MaxPooling.h
+++ b/include/Layer/MaxPooling.h
@@ -8,7 +8,8 @@
 #include "../Layer.h"
 #include "../Utils/FindMax.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -21,128 +22,150 @@ namespace MiniDNN {
 template <typename Activation>
 class MaxPooling: public Layer
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::MatrixXi IntMatrix;
-
-    const int m_channel_rows;
-    const int m_channel_cols;
-    const int m_in_channels;
-    const int m_pool_rows;
-    const int m_pool_cols;
-
-    const int m_out_rows;
-    const int m_out_cols;
-
-    IntMatrix m_loc;             // Record the locations of maximums
-    Matrix m_z;                  // Max pooling results
-    Matrix m_a;                  // Output of this layer, a = act(z)
-    Matrix m_din;                // Derivative of the input of this layer.
-                                 // Note that input of this layer is also the output of previous layer
-
-public:
-    // Currently we only implement the "valid" rule
-    // https://stackoverflow.com/q/37674306
-    ///
-    /// Constructor
-    ///
-    /// \param in_width       Width of the input image in each channel.
-    /// \param in_height      Height of the input image in each channel.
-    /// \param in_channels    Number of input channels.
-    /// \param pooling_width  Width of the pooling window.
-    /// \param pooling_height Height of the pooling window.
-    ///
-    MaxPooling(const int in_width, const int in_height, const int in_channels,
-               const int pooling_width, const int pooling_height) :
-        Layer(in_width * in_height * in_channels,
-              (in_width / pooling_width) * (in_height / pooling_height) * in_channels),
-        m_channel_rows(in_height), m_channel_cols(in_width), m_in_channels(in_channels),
-        m_pool_rows(pooling_height), m_pool_cols(pooling_width),
-        m_out_rows(m_channel_rows / m_pool_rows), m_out_cols(m_channel_cols / m_pool_cols)
-    {}
-
-    void init(const Scalar& mu, const Scalar& sigma, RNG& rng) {}
-
-    void forward(const Matrix& prev_layer_data)
-    {
-        // Each column is an observation
-        const int nobs = prev_layer_data.cols();
-        m_loc.resize(this->m_out_size, nobs);
-        m_z.resize(this->m_out_size, nobs);
-
-        // Use m_loc to store the address of each pooling block relative to the beginning of the data
-        int* loc_data = m_loc.data();
-        const int channel_end = prev_layer_data.size();
-        const int channel_stride = m_channel_rows * m_channel_cols;
-        const int col_end_gap = m_channel_rows * m_pool_cols * m_out_cols;
-        const int col_stride = m_channel_rows * m_pool_cols;
-        const int row_end_gap = m_out_rows * m_pool_rows;
-        for(int channel_start = 0; channel_start < channel_end; channel_start += channel_stride)
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::MatrixXi IntMatrix;
+
+        const int m_channel_rows;
+        const int m_channel_cols;
+        const int m_in_channels;
+        const int m_pool_rows;
+        const int m_pool_cols;
+
+        const int m_out_rows;
+        const int m_out_cols;
+
+        IntMatrix m_loc;             // Record the locations of maximums
+        Matrix m_z;                  // Max pooling results
+        Matrix m_a;                  // Output of this layer, a = act(z)
+        Matrix m_din;                // Derivative of the input of this layer.
+        // Note that input of this layer is also the output of previous layer
+
+    public:
+        // Currently we only implement the "valid" rule
+        // https://stackoverflow.com/q/37674306
+        ///
+        /// Constructor
+        ///
+        /// \param in_width       Width of the input image in each channel.
+        /// \param in_height      Height of the input image in each channel.
+        /// \param in_channels    Number of input channels.
+        /// \param pooling_width  Width of the pooling window.
+        /// \param pooling_height Height of the pooling window.
+        ///
+        MaxPooling(const int in_width, const int in_height, const int in_channels,
+                   const int pooling_width, const int pooling_height) :
+            Layer(in_width * in_height * in_channels,
+                  (in_width / pooling_width) * (in_height / pooling_height) * in_channels),
+            m_channel_rows(in_height), m_channel_cols(in_width), m_in_channels(in_channels),
+            m_pool_rows(pooling_height), m_pool_cols(pooling_width),
+            m_out_rows(m_channel_rows / m_pool_rows),
+            m_out_cols(m_channel_cols / m_pool_cols)
+        {}
+
+        void init(const Scalar& mu, const Scalar& sigma, RNG& rng) {}
+
+        void forward(const Matrix& prev_layer_data)
         {
-            const int col_end = channel_start + col_end_gap;
-            for(int col_start = channel_start; col_start < col_end; col_start += col_stride)
+            // Each column is an observation
+            const int nobs = prev_layer_data.cols();
+            m_loc.resize(this->m_out_size, nobs);
+            m_z.resize(this->m_out_size, nobs);
+            // Use m_loc to store the address of each pooling block relative to the beginning of the data
+            int* loc_data = m_loc.data();
+            const int channel_end = prev_layer_data.size();
+            const int channel_stride = m_channel_rows * m_channel_cols;
+            const int col_end_gap = m_channel_rows * m_pool_cols * m_out_cols;
+            const int col_stride = m_channel_rows * m_pool_cols;
+            const int row_end_gap = m_out_rows * m_pool_rows;
+
+            for (int channel_start = 0; channel_start < channel_end;
+                    channel_start += channel_stride)
             {
-                const int row_end = col_start + row_end_gap;
-                for(int row_start = col_start; row_start < row_end; row_start += m_pool_rows, loc_data++)
-                    *loc_data = row_start;
+                const int col_end = channel_start + col_end_gap;
+
+                for (int col_start = channel_start; col_start < col_end;
+                        col_start += col_stride)
+                {
+                    const int row_end = col_start + row_end_gap;
+
+                    for (int row_start = col_start; row_start < row_end;
+                            row_start += m_pool_rows, loc_data++)
+                    {
+                        *loc_data = row_start;
+                    }
+                }
             }
-        }
-
-        // Find the location of the max value in each block
-        loc_data = m_loc.data();
-        const int* const loc_end = loc_data + m_loc.size();
-        Scalar* z_data = m_z.data();
-        const Scalar* src = prev_layer_data.data();
-        for(; loc_data < loc_end; loc_data++, z_data++)
-        {
-            const int offset = *loc_data;
-            *z_data = internal::find_block_max(src + offset, m_pool_rows, m_pool_cols, m_channel_rows, *loc_data);
-            *loc_data += offset;
-        }
-
-        // Apply activation function
-        m_a.resize(this->m_out_size, nobs);
-        Activation::activate(m_z, m_a);
-    }
 
-    const Matrix& output() const { return m_a; }
+            // Find the location of the max value in each block
+            loc_data = m_loc.data();
+            const int* const loc_end = loc_data + m_loc.size();
+            Scalar* z_data = m_z.data();
+            const Scalar* src = prev_layer_data.data();
 
-    // prev_layer_data: in_size x nobs
-    // next_layer_data: out_size x nobs
-    void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data)
-    {
-        const int nobs = prev_layer_data.cols();
+            for (; loc_data < loc_end; loc_data++, z_data++)
+            {
+                const int offset = *loc_data;
+                *z_data = internal::find_block_max(src + offset, m_pool_rows, m_pool_cols,
+                                                   m_channel_rows, *loc_data);
+                *loc_data += offset;
+            }
 
-        // After forward stage, m_z contains z = max_pooling(in)
-        // Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
-        // d(L) / d(z) is computed in the next layer, contained in next_layer_data
-        // The Jacobian matrix J = d(a) / d(z) is determined by the activation function
-        Matrix& dLz = m_z;
-        Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
+            // Apply activation function
+            m_a.resize(this->m_out_size, nobs);
+            Activation::activate(m_z, m_a);
+        }
 
-        // d(L) / d(in_i) = sum_j{ [d(z_j) / d(in_i)] * [d(L) / d(z_j)] }
-        // d(z_j) / d(in_i) = 1 if in_i is used to compute z_j and is the maximum
-        //                  = 0 otherwise
-        m_din.resize(this->m_in_size, nobs);
-        m_din.setZero();
-        const int dLz_size = dLz.size();
+        const Matrix& output() const
+        {
+            return m_a;
+        }
 
-        const Scalar* dLz_data = dLz.data();
-        const int* loc_data = m_loc.data();
-        Scalar* din_data = m_din.data();
-        for(int i = 0; i < dLz_size; i++)
-            din_data[loc_data[i]] += dLz_data[i];
-    }
+        // prev_layer_data: in_size x nobs
+        // next_layer_data: out_size x nobs
+        void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data)
+        {
+            const int nobs = prev_layer_data.cols();
+            // After forward stage, m_z contains z = max_pooling(in)
+            // Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
+            // d(L) / d(z) is computed in the next layer, contained in next_layer_data
+            // The Jacobian matrix J = d(a) / d(z) is determined by the activation function
+            Matrix& dLz = m_z;
+            Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
+            // d(L) / d(in_i) = sum_j{ [d(z_j) / d(in_i)] * [d(L) / d(z_j)] }
+            // d(z_j) / d(in_i) = 1 if in_i is used to compute z_j and is the maximum
+            //                  = 0 otherwise
+            m_din.resize(this->m_in_size, nobs);
+            m_din.setZero();
+            const int dLz_size = dLz.size();
+            const Scalar* dLz_data = dLz.data();
+            const int* loc_data = m_loc.data();
+            Scalar* din_data = m_din.data();
+
+            for (int i = 0; i < dLz_size; i++)
+            {
+                din_data[loc_data[i]] += dLz_data[i];
+            }
+        }
 
-    const Matrix& backprop_data() const { return m_din; }
+        const Matrix& backprop_data() const
+        {
+            return m_din;
+        }
 
-    void update(Optimizer& opt) {}
+        void update(Optimizer& opt) {}
 
-    std::vector<Scalar> get_parameters() const { return std::vector<Scalar>(); }
+        std::vector<Scalar> get_parameters() const
+        {
+            return std::vector<Scalar>();
+        }
 
-    void set_parameters(const std::vector<Scalar>& param) {}
+        void set_parameters(const std::vector<Scalar>& param) {}
 
-    std::vector<Scalar> get_derivatives() const { return std::vector<Scalar>(); }
+        std::vector<Scalar> get_derivatives() const
+        {
+            return std::vector<Scalar>();
+        }
 };
 
 
diff --git a/include/M_Assert.h b/include/M_Assert.h
new file mode 100755
index 0000000..aed5541
--- /dev/null
+++ b/include/M_Assert.h
@@ -0,0 +1,25 @@
+#include <iostream>
+
+/// \file
+/// Implementation of the assert function for MiniDNN
+#pragma once
+
+inline void __M_Assert(const char* expr_str, bool expr, const char* file,
+                       int line, const char* msg)
+{
+    if (!expr)
+    {
+        std::cerr << "Assert failed:\t" << msg << "\n"
+                  << "Expected:\t" << expr_str << "\n"
+                  << "Source:\t\t" << file << ", line " << line << "\n";
+        abort();
+    }
+}
+
+#ifndef NDEBUG
+#define M_Assert(Expr, Msg) \
+__M_Assert(#Expr, Expr, __FILE__, __LINE__, Msg)
+#else
+#define M_Assert(Expr, Msg) ;
+#endif
+
diff --git a/include/MiniDNN.h b/include/MiniDNN.h
index 231445d..9cf2c67 100644
--- a/include/MiniDNN.h
+++ b/include/MiniDNN.h
@@ -11,6 +11,7 @@
 #include "Layer/FullyConnected.h"
 #include "Layer/Convolutional.h"
 #include "Layer/MaxPooling.h"
+#include "EigenStream.h"
 
 #include "Activation/ReLU.h"
 #include "Activation/Identity.h"
diff --git a/include/Network.h b/include/Network.h
index 7af186d..275523d 100644
--- a/include/Network.h
+++ b/include/Network.h
@@ -11,7 +11,8 @@
 #include "Callback.h"
 #include "Utils/Random.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -27,410 +28,462 @@ namespace MiniDNN {
 ///
 class Network
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::RowVectorXi IntegerVector;
-
-    RNG                 m_default_rng;      // Built-in RNG
-    RNG&                m_rng;              // Reference to the RNG provided by the user,
-                                            // otherwise reference to m_default_rng
-    std::vector<Layer*> m_layers;           // Pointers to hidden layers
-    Output*             m_output;           // The output layer
-    Callback            m_default_callback; // Default callback function
-    Callback*           m_callback;         // Points to user-provided callback function,
-                                            // otherwise points to m_default_callback
-
-    // Check dimensions of layers
-    void check_unit_sizes() const
-    {
-        const int nlayer = num_layers();
-        if(nlayer <= 1)
-            return;
-
-        for(int i = 1; i < nlayer; i++)
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::RowVectorXi IntegerVector;
+
+        RNG                 m_default_rng;      // Built-in RNG
+        RNG&
+        m_rng;              // Reference to the RNG provided by the user,
+        // otherwise reference to m_default_rng
+        std::vector<Layer*> m_layers;           // Pointers to hidden layers
+        Output*             m_output;           // The output layer
+        Callback            m_default_callback; // Default callback function
+        Callback*
+        m_callback;         // Points to user-provided callback function,
+        // otherwise points to m_default_callback
+
+        // Check dimensions of layers
+        void check_unit_sizes() const
         {
-            if(m_layers[i]->in_size() != m_layers[i - 1]->out_size())
-                throw std::invalid_argument("Unit sizes do not match");
+            const int nlayer = num_layers();
+
+            if (nlayer <= 1)
+            {
+                return;
+            }
+
+            for (int i = 1; i < nlayer; i++)
+            {
+                if (m_layers[i]->in_size() != m_layers[i - 1]->out_size())
+                {
+                    throw std::invalid_argument("Unit sizes do not match");
+                }
+            }
         }
-    }
-
-    // Let each layer compute its output
-    void forward(const Matrix& input)
-    {
-        const int nlayer = num_layers();
-        if(nlayer <= 0)
-            return;
-
-        // First layer
-        if(input.rows() != m_layers[0]->in_size())
-            throw std::invalid_argument("Input data have incorrect dimension");
-        m_layers[0]->forward(input);
-
-        // The following layers
-        for(int i = 1; i < nlayer; i++)
+
+        // Let each layer compute its output
+        void forward(const Matrix& input)
         {
-            m_layers[i]->forward(m_layers[i - 1]->output());
+            const int nlayer = num_layers();
+
+            if (nlayer <= 0)
+            {
+                return;
+            }
+
+            // First layer
+            if (input.rows() != m_layers[0]->in_size())
+            {
+                throw std::invalid_argument("Input data have incorrect dimension");
+            }
+
+            m_layers[0]->forward(input);
+
+            // The following layers
+            for (int i = 1; i < nlayer; i++)
+            {
+                m_layers[i]->forward(m_layers[i - 1]->output());
+            }
         }
-    }
-
-    // Let each layer compute its gradients of the parameters
-    // target has two versions: Matrix and RowVectorXi
-    // The RowVectorXi version is used in classification problems where each
-    // element is a class label
-    template <typename TargetType>
-    void backprop(const Matrix& input, const TargetType& target)
-    {
-        const int nlayer = num_layers();
-        if(nlayer <= 0)
-            return;
-
-        Layer* first_layer = m_layers[0];
-        Layer* last_layer = m_layers[nlayer - 1];
-
-        // Let output layer compute back-propagation data
-        m_output->check_target_data(target);
-        m_output->evaluate(last_layer->output(), target);
-
-        // If there is only one hidden layer, "prev_layer_data" will be the input data
-        if(nlayer == 1)
+
+        // Let each layer compute its gradients of the parameters
+        // target has two versions: Matrix and RowVectorXi
+        // The RowVectorXi version is used in classification problems where each
+        // element is a class label
+        template <typename TargetType>
+        void backprop(const Matrix& input, const TargetType& target)
         {
-            first_layer->backprop(input, m_output->backprop_data());
-            return;
+            const int nlayer = num_layers();
+
+            if (nlayer <= 0)
+            {
+                return;
+            }
+
+            Layer* first_layer = m_layers[0];
+            Layer* last_layer = m_layers[nlayer - 1];
+            // Let output layer compute back-propagation data
+            m_output->check_target_data(target);
+            m_output->evaluate(last_layer->output(), target);
+
+            // If there is only one hidden layer, "prev_layer_data" will be the input data
+            if (nlayer == 1)
+            {
+                first_layer->backprop(input, m_output->backprop_data());
+                return;
+            }
+
+            // Compute gradients for the last hidden layer
+            last_layer->backprop(m_layers[nlayer - 2]->output(), m_output->backprop_data());
+
+            // Compute gradients for all the hidden layers except for the first one and the last one
+            for (int i = nlayer - 2; i > 0; i--)
+            {
+                m_layers[i]->backprop(m_layers[i - 1]->output(),
+                                      m_layers[i + 1]->backprop_data());
+            }
+
+            // Compute gradients for the first layer
+            first_layer->backprop(input, m_layers[1]->backprop_data());
         }
 
-        // Compute gradients for the last hidden layer
-        last_layer->backprop(m_layers[nlayer - 2]->output(), m_output->backprop_data());
-        // Compute gradients for all the hidden layers except for the first one and the last one
-        for(int i = nlayer - 2; i > 0; i--)
+        // Update parameters
+        void update(Optimizer& opt)
         {
-            m_layers[i]->backprop(m_layers[i - 1]->output(), m_layers[i + 1]->backprop_data());
+            const int nlayer = num_layers();
+
+            if (nlayer <= 0)
+            {
+                return;
+            }
+
+            for (int i = 0; i < nlayer; i++)
+            {
+                m_layers[i]->update(opt);
+            }
         }
-        // Compute gradients for the first layer
-        first_layer->backprop(input, m_layers[1]->backprop_data());
-    }
-
-    // Update parameters
-    void update(Optimizer& opt)
-    {
-        const int nlayer = num_layers();
-        if(nlayer <= 0)
-            return;
-
-        for(int i = 0; i < nlayer; i++)
+
+    public:
+        ///
+        /// Default constructor that creates an empty neural network
+        ///
+        Network() :
+            m_default_rng(1),
+            m_rng(m_default_rng),
+            m_output(NULL),
+            m_default_callback(),
+            m_callback(&m_default_callback)
+        {}
+
+        ///
+        /// Constructor with a user-provided random number generator
+        ///
+        /// \param rng A user-provided random number generator object that inherits
+        ///            from the default RNG class.
+        ///
+        Network(RNG& rng) :
+            m_default_rng(1),
+            m_rng(rng),
+            m_output(NULL),
+            m_default_callback(),
+            m_callback(&m_default_callback)
+        {}
+
+        ///
+        /// Destructor that frees the added hidden layers and output layer
+        ///
+        ~Network()
         {
-            m_layers[i]->update(opt);
+            const int nlayer = num_layers();
+
+            for (int i = 0; i < nlayer; i++)
+            {
+                delete m_layers[i];
+            }
+
+            if (m_output)
+            {
+                delete m_output;
+            }
         }
-    }
-
-public:
-    ///
-    /// Default constructor that creates an empty neural network
-    ///
-    Network() :
-        m_default_rng(1),
-        m_rng(m_default_rng),
-        m_output(NULL),
-        m_default_callback(),
-        m_callback(&m_default_callback)
-    {}
-
-    ///
-    /// Constructor with a user-provided random number generator
-    ///
-    /// \param rng A user-provided random number generator object that inherits
-    ///            from the default RNG class.
-    ///
-    Network(RNG& rng) :
-        m_default_rng(1),
-        m_rng(rng),
-        m_output(NULL),
-        m_default_callback(),
-        m_callback(&m_default_callback)
-    {}
-
-    ///
-    /// Destructor that frees the added hidden layers and output layer
-    ///
-    ~Network()
-    {
-        const int nlayer = num_layers();
-        for(int i = 0; i < nlayer; i++)
+
+        ///
+        /// Add a hidden layer to the neural network
+        ///
+        /// \param layer A pointer to a Layer object, typically constructed from
+        ///              layer classes such as FullyConnected and Convolutional.
+        ///              **NOTE**: the pointer will be handled and freed by the
+        ///              network object, so do not delete it manually.
+        ///
+        void add_layer(Layer* layer)
         {
-            delete m_layers[i];
+            m_layers.push_back(layer);
         }
 
-        if(m_output)
-            delete m_output;
-    }
-
-    ///
-    /// Add a hidden layer to the neural network
-    ///
-    /// \param layer A pointer to a Layer object, typically constructed from
-    ///              layer classes such as FullyConnected and Convolutional.
-    ///              **NOTE**: the pointer will be handled and freed by the
-    ///              network object, so do not delete it manually.
-    ///
-    void add_layer(Layer* layer)
-    {
-        m_layers.push_back(layer);
-    }
-
-    ///
-    /// Set the output layer of the neural network
-    ///
-    /// \param output A pointer to an Output object, typically constructed from
-    ///               output layer classes such as RegressionMSE and MultiClassEntropy.
-    ///               **NOTE**: the pointer will be handled and freed by the
-    ///               network object, so do not delete it manually.
-    ///
-    void set_output(Output* output)
-    {
-        if(m_output)
-            delete m_output;
-
-        m_output = output;
-    }
-
-    ///
-    /// Number of hidden layers in the network
-    ///
-    int num_layers() const { return m_layers.size(); }
-
-    ///
-    /// Get the list of hidden layers of the network
-    ///
-    std::vector<const Layer*> get_layers() const
-    {
-        const int nlayer = num_layers();
-        std::vector<const Layer*> layers(nlayer);
-        std::copy(m_layers.begin(), m_layers.end(), layers.begin());
-        return layers;
-    }
-
-    ///
-    /// Get the output layer
-    ///
-    const Output* get_output() const { return m_output; }
-
-    ///
-    /// Set the callback function that can be called during model fitting
-    ///
-    /// \param callback A user-provided callback function object that inherits
-    ///                 from the default Callback class.
-    ///
-    void set_callback(Callback& callback)
-    {
-        m_callback = &callback;
-    }
-    ///
-    /// Set the default silent callback function
-    ///
-    void set_default_callback()
-    {
-        m_callback = &m_default_callback;
-    }
-
-    ///
-    /// Initialize layer parameters in the network using normal distribution
-    ///
-    /// \param mu    Mean of the normal distribution.
-    /// \param sigma Standard deviation of the normal distribution.
-    /// \param seed  Set the random seed of the %RNG if `seed > 0`, otherwise
-    ///              use the current random state.
-    ///
-    void init(const Scalar& mu = Scalar(0), const Scalar& sigma = Scalar(0.01), int seed = -1)
-    {
-        check_unit_sizes();
-
-        if(seed > 0)
-            m_rng.seed(seed);
-
-        const int nlayer = num_layers();
-        for(int i = 0; i < nlayer; i++)
+        ///
+        /// Set the output layer of the neural network
+        ///
+        /// \param output A pointer to an Output object, typically constructed from
+        ///               output layer classes such as RegressionMSE and MultiClassEntropy.
+        ///               **NOTE**: the pointer will be handled and freed by the
+        ///               network object, so do not delete it manually.
+        ///
+        void set_output(Output* output)
         {
-            m_layers[i]->init(mu, sigma, m_rng);
+            if (m_output)
+            {
+                delete m_output;
+            }
+
+            m_output = output;
         }
-    }
-
-    ///
-    /// Get the serialized layer parameters
-    ///
-    std::vector< std::vector<Scalar> > get_parameters() const
-    {
-        const int nlayer = num_layers();
-        std::vector< std::vector<Scalar> > res;
-        res.reserve(nlayer);
-        for(int i = 0; i < nlayer; i++)
+
+        ///
+        /// Number of hidden layers in the network
+        ///
+        int num_layers() const
         {
-            res.push_back(m_layers[i]->get_parameters());
+            return m_layers.size();
         }
 
-        return res;
-    }
-
-    ///
-    /// Set the layer parameters
-    ///
-    /// \param param Serialized layer parameters
-    ///
-    void set_parameters(const std::vector< std::vector<Scalar> >& param)
-    {
-        const int nlayer = num_layers();
-        if(static_cast<int>(param.size()) != nlayer)
-            throw std::invalid_argument("Parameter size does not match");
-
-        for(int i = 0; i < nlayer; i++)
+        ///
+        /// Get the list of hidden layers of the network
+        ///
+        std::vector<const Layer*> get_layers() const
         {
-            m_layers[i]->set_parameters(param[i]);
+            const int nlayer = num_layers();
+            std::vector<const Layer*> layers(nlayer);
+            std::copy(m_layers.begin(), m_layers.end(), layers.begin());
+            return layers;
         }
-    }
-
-    ///
-    /// Get the serialized derivatives of layer parameters
-    ///
-    std::vector< std::vector<Scalar> > get_derivatives() const
-    {
-        const int nlayer = num_layers();
-        std::vector< std::vector<Scalar> > res;
-        res.reserve(nlayer);
-        for(int i = 0; i < nlayer; i++)
+
+        ///
+        /// Get the output layer
+        ///
+        const Output* get_output() const
         {
-            res.push_back(m_layers[i]->get_derivatives());
+            return m_output;
         }
 
-        return res;
-    }
-
-    ///
-    /// Debugging tool to check parameter gradients
-    ///
-    template <typename TargetType>
-    void check_gradient(const Matrix& input, const TargetType& target, int npoints, int seed = -1)
-    {
-        if(seed > 0)
-            m_rng.seed(seed);
-
-        this->forward(input);
-        this->backprop(input, target);
-        std::vector< std::vector<Scalar> > param = this->get_parameters();
-        std::vector< std::vector<Scalar> > deriv = this->get_derivatives();
-
-        const Scalar eps = 1e-5;
-        const int nlayer = deriv.size();
-        for(int i = 0; i < npoints; i++)
+        ///
+        /// Set the callback function that can be called during model fitting
+        ///
+        /// \param callback A user-provided callback function object that inherits
+        ///                 from the default Callback class.
+        ///
+        void set_callback(Callback& callback)
         {
-            // Randomly select a layer
-            const int layer_id = int(m_rng.rand() * nlayer);
-            // Randomly pick a parameter, note that some layers may have no parameters
-            const int nparam = deriv[layer_id].size();
-            if(nparam < 1)  continue;
-            const int param_id = int(m_rng.rand() * nparam);
-            // Turbulate the parameter a little bit
-            const Scalar old = param[layer_id][param_id];
-
-            param[layer_id][param_id] -= eps;
-            this->set_parameters(param);
-            this->forward(input);
-            this->backprop(input, target);
-            const Scalar loss_pre = m_output->loss();
+            m_callback = &callback;
+        }
+        ///
+        /// Set the default silent callback function
+        ///
+        void set_default_callback()
+        {
+            m_callback = &m_default_callback;
+        }
+
+        ///
+        /// Initialize layer parameters in the network using normal distribution
+        ///
+        /// \param mu    Mean of the normal distribution.
+        /// \param sigma Standard deviation of the normal distribution.
+        /// \param seed  Set the random seed of the %RNG if `seed > 0`, otherwise
+        ///              use the current random state.
+        ///
+        void init(const Scalar& mu = Scalar(0), const Scalar& sigma = Scalar(0.01),
+                  int seed = -1)
+        {
+            check_unit_sizes();
+
+            if (seed > 0)
+            {
+                m_rng.seed(seed);
+            }
+
+            const int nlayer = num_layers();
+
+            for (int i = 0; i < nlayer; i++)
+            {
+                m_layers[i]->init(mu, sigma, m_rng);
+            }
+        }
+
+        ///
+        /// Get the serialized layer parameters
+        ///
+        std::vector< std::vector<Scalar>> get_parameters() const
+        {
+            const int nlayer = num_layers();
+            std::vector< std::vector<Scalar>> res;
+            res.reserve(nlayer);
+
+            for (int i = 0; i < nlayer; i++)
+            {
+                res.push_back(m_layers[i]->get_parameters());
+            }
+
+            return res;
+        }
+
+        ///
+        /// Set the layer parameters
+        ///
+        /// \param param Serialized layer parameters
+        ///
+        void set_parameters(const std::vector< std::vector<Scalar>>& param)
+        {
+            const int nlayer = num_layers();
+
+            if (static_cast<int>(param.size()) != nlayer)
+            {
+                throw std::invalid_argument("Parameter size does not match");
+            }
+
+            for (int i = 0; i < nlayer; i++)
+            {
+                m_layers[i]->set_parameters(param[i]);
+            }
+        }
+
+        ///
+        /// Get the serialized derivatives of layer parameters
+        ///
+        std::vector< std::vector<Scalar>> get_derivatives() const
+        {
+            const int nlayer = num_layers();
+            std::vector< std::vector<Scalar>> res;
+            res.reserve(nlayer);
+
+            for (int i = 0; i < nlayer; i++)
+            {
+                res.push_back(m_layers[i]->get_derivatives());
+            }
+
+            return res;
+        }
+
+        ///
+        /// Debugging tool to check parameter gradients
+        ///
+        template <typename TargetType>
+        void check_gradient(const Matrix& input, const TargetType& target, int npoints,
+                            int seed = -1)
+        {
+            if (seed > 0)
+            {
+                m_rng.seed(seed);
+            }
 
-            param[layer_id][param_id] += eps * 2;
-            this->set_parameters(param);
             this->forward(input);
             this->backprop(input, target);
-            const Scalar loss_post = m_output->loss();
+            std::vector< std::vector<Scalar>> param = this->get_parameters();
+            std::vector< std::vector<Scalar>> deriv = this->get_derivatives();
+            const Scalar eps = 1e-5;
+            const int nlayer = deriv.size();
 
-            const Scalar deriv_est = (loss_post - loss_pre) / eps / 2;
-
-            std::cout << "[layer " << layer_id << ", param " << param_id <<
-            "] deriv = " << deriv[layer_id][param_id] << ", est = " << deriv_est <<
-            ", diff = " << deriv_est - deriv[layer_id][param_id] << std::endl;
+            for (int i = 0; i < npoints; i++)
+            {
+                // Randomly select a layer
+                const int layer_id = int(m_rng.rand() * nlayer);
+                // Randomly pick a parameter, note that some layers may have no parameters
+                const int nparam = deriv[layer_id].size();
+
+                if (nparam < 1)
+                {
+                    continue;
+                }
+
+                const int param_id = int(m_rng.rand() * nparam);
+                // Turbulate the parameter a little bit
+                const Scalar old = param[layer_id][param_id];
+                param[layer_id][param_id] -= eps;
+                this->set_parameters(param);
+                this->forward(input);
+                this->backprop(input, target);
+                const Scalar loss_pre = m_output->loss();
+                param[layer_id][param_id] += eps * 2;
+                this->set_parameters(param);
+                this->forward(input);
+                this->backprop(input, target);
+                const Scalar loss_post = m_output->loss();
+                const Scalar deriv_est = (loss_post - loss_pre) / eps / 2;
+                std::cout << "[layer " << layer_id << ", param " << param_id <<
+                          "] deriv = " << deriv[layer_id][param_id] << ", est = " << deriv_est <<
+                          ", diff = " << deriv_est - deriv[layer_id][param_id] << std::endl;
+                param[layer_id][param_id] = old;
+            }
 
-            param[layer_id][param_id] = old;
+            // Restore original parameters
+            this->set_parameters(param);
         }
 
-        // Restore original parameters
-        this->set_parameters(param);
-    }
-
-    ///
-    /// Fit the model based on the given data
-    ///
-    /// \param opt        An object that inherits from the Optimizer class, indicating the optimization algorithm to use.
-    /// \param x          The predictors. Each column is an observation.
-    /// \param y          The response variable. Each column is an observation.
-    /// \param batch_size Mini-batch size.
-    /// \param epoch      Number of epochs of training.
-    /// \param seed       Set the random seed of the %RNG if `seed > 0`, otherwise
-    ///                   use the current random state.
-    ///
-    template <typename DerivedX, typename DerivedY>
-    bool fit(Optimizer& opt, const Eigen::MatrixBase<DerivedX>& x, const Eigen::MatrixBase<DerivedY>& y,
-             int batch_size, int epoch, int seed = -1)
-    {
-        // We do not directly use PlainObjectX since it may be row-majored if x is passed as mat.transpose()
-        // We want to force XType and YType to be column-majored
-        typedef typename Eigen::MatrixBase<DerivedX>::PlainObject PlainObjectX;
-        typedef typename Eigen::MatrixBase<DerivedY>::PlainObject PlainObjectY;
-        typedef Eigen::Matrix<typename PlainObjectX::Scalar, PlainObjectX::RowsAtCompileTime, PlainObjectX::ColsAtCompileTime> XType;
-        typedef Eigen::Matrix<typename PlainObjectY::Scalar, PlainObjectY::RowsAtCompileTime, PlainObjectY::ColsAtCompileTime> YType;
-
-        const int nlayer = num_layers();
-        if(nlayer <= 0)
-            return false;
-
-        // Reset optimizer
-        opt.reset();
-
-        // Create shuffled mini-batches
-        if(seed > 0)
-            m_rng.seed(seed);
-
-        std::vector<XType> x_batches;
-        std::vector<YType> y_batches;
-        const int nbatch = internal::create_shuffled_batches(x, y, batch_size, m_rng, x_batches, y_batches);
-
-        // Set up callback parameters
-        m_callback->m_nbatch = nbatch;
-        m_callback->m_nepoch = epoch;
-
-        // Iterations on the whole data set
-        for(int k = 0; k < epoch; k++)
+        ///
+        /// Fit the model based on the given data
+        ///
+        /// \param opt        An object that inherits from the Optimizer class, indicating the optimization algorithm to use.
+        /// \param x          The predictors. Each column is an observation.
+        /// \param y          The response variable. Each column is an observation.
+        /// \param batch_size Mini-batch size.
+        /// \param epoch      Number of epochs of training.
+        /// \param seed       Set the random seed of the %RNG if `seed > 0`, otherwise
+        ///                   use the current random state.
+        ///
+        template <typename DerivedX, typename DerivedY>
+        bool fit(Optimizer& opt, const Eigen::MatrixBase<DerivedX>& x,
+                 const Eigen::MatrixBase<DerivedY>& y,
+                 int batch_size, int epoch, int seed = -1)
         {
-            m_callback->m_epoch_id = k;
+            // We do not directly use PlainObjectX since it may be row-majored if x is passed as mat.transpose()
+            // We want to force XType and YType to be column-majored
+            typedef typename Eigen::MatrixBase<DerivedX>::PlainObject PlainObjectX;
+            typedef typename Eigen::MatrixBase<DerivedY>::PlainObject PlainObjectY;
+            typedef Eigen::Matrix<typename PlainObjectX::Scalar, PlainObjectX::RowsAtCompileTime, PlainObjectX::ColsAtCompileTime>
+            XType;
+            typedef Eigen::Matrix<typename PlainObjectY::Scalar, PlainObjectY::RowsAtCompileTime, PlainObjectY::ColsAtCompileTime>
+            YType;
+            const int nlayer = num_layers();
+
+            if (nlayer <= 0)
+            {
+                return false;
+            }
+
+            // Reset optimizer
+            opt.reset();
 
-            // Train on each mini-batch
-            for(int i = 0; i < nbatch; i++)
+            // Create shuffled mini-batches
+            if (seed > 0)
             {
-                m_callback->m_batch_id = i;
-                m_callback->pre_training_batch(this, x_batches[i], y_batches[i]);
+                m_rng.seed(seed);
+            }
 
-                this->forward(x_batches[i]);
-                this->backprop(x_batches[i], y_batches[i]);
-                this->update(opt);
+            std::vector<XType> x_batches;
+            std::vector<YType> y_batches;
+            const int nbatch = internal::create_shuffled_batches(x, y, batch_size, m_rng,
+                               x_batches, y_batches);
+            // Set up callback parameters
+            m_callback->m_nbatch = nbatch;
+            m_callback->m_nepoch = epoch;
 
-                m_callback->post_training_batch(this, x_batches[i], y_batches[i]);
+            // Iterations on the whole data set
+            for (int k = 0; k < epoch; k++)
+            {
+                m_callback->m_epoch_id = k;
+
+                // Train on each mini-batch
+                for (int i = 0; i < nbatch; i++)
+                {
+                    m_callback->m_batch_id = i;
+                    m_callback->pre_training_batch(this, x_batches[i], y_batches[i]);
+                    this->forward(x_batches[i]);
+                    this->backprop(x_batches[i], y_batches[i]);
+                    this->update(opt);
+                    m_callback->post_training_batch(this, x_batches[i], y_batches[i]);
+                }
             }
+
+            return true;
         }
 
-        return true;
-    }
-
-    ///
-    /// Use the fitted model to make predictions
-    ///
-    /// \param x The predictors. Each column is an observation.
-    ///
-    Matrix predict(const Matrix& x)
-    {
-        const int nlayer = num_layers();
-        if(nlayer <= 0)
-            return Matrix();
-
-        this->forward(x);
-        return m_layers[nlayer - 1]->output();
-    }
+        ///
+        /// Use the fitted model to make predictions
+        ///
+        /// \param x The predictors. Each column is an observation.
+        ///
+        Matrix predict(const Matrix& x)
+        {
+            const int nlayer = num_layers();
+
+            if (nlayer <= 0)
+            {
+                return Matrix();
+            }
+
+            this->forward(x);
+            return m_layers[nlayer - 1]->output();
+        }
 };
 
 
diff --git a/include/Optimizer.h b/include/Optimizer.h
index ecc6091..bc1c5a7 100644
--- a/include/Optimizer.h
+++ b/include/Optimizer.h
@@ -4,7 +4,8 @@
 #include <Eigen/Core>
 #include "Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -18,31 +19,31 @@ namespace MiniDNN {
 ///
 class Optimizer
 {
-protected:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-    typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
-    typedef Vector::AlignedMapType AlignedMapVec;
-
-public:
-    virtual ~Optimizer() {}
-
-    ///
-    /// Reset the optimizer to clear all historical information
-    ///
-    virtual void reset() {};
-
-    ///
-    /// Update the parameter vector using its gradient
-    ///
-    /// It is assumed that the memory addresses of `dvec` and `vec` do not
-    /// change during the training process. This is used to implement optimization
-    /// algorithms that have "memories". See the AdaGrad algorithm for an example.
-    ///
-    /// \param dvec The gradient of the parameter. Read-only
-    /// \param vec  On entering, the current parameter vector. On exit, the
-    ///             updated parameters.
-    ///
-    virtual void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec) = 0;
+    protected:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+        typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
+        typedef Vector::AlignedMapType AlignedMapVec;
+
+    public:
+        virtual ~Optimizer() {}
+
+        ///
+        /// Reset the optimizer to clear all historical information
+        ///
+        virtual void reset() {};
+
+        ///
+        /// Update the parameter vector using its gradient
+        ///
+        /// It is assumed that the memory addresses of `dvec` and `vec` do not
+        /// change during the training process. This is used to implement optimization
+        /// algorithms that have "memories". See the AdaGrad algorithm for an example.
+        ///
+        /// \param dvec The gradient of the parameter. Read-only
+        /// \param vec  On entering, the current parameter vector. On exit, the
+        ///             updated parameters.
+        ///
+        virtual void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec) = 0;
 };
 
 
diff --git a/include/Optimizer/AdaGrad.h b/include/Optimizer/AdaGrad.h
index 1201c8a..6e23f33 100644
--- a/include/Optimizer/AdaGrad.h
+++ b/include/Optimizer/AdaGrad.h
@@ -6,7 +6,8 @@
 #include "../Optimizer.h"
 #include "../Utils/sparsepp.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -16,39 +17,44 @@ namespace MiniDNN {
 ///
 class AdaGrad: public Optimizer
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-    typedef Eigen::Array<Scalar, Eigen::Dynamic, 1> Array;
-    typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
-    typedef Vector::AlignedMapType AlignedMapVec;
-
-    spp::sparse_hash_map<const Scalar*, Array> m_history;
-
-public:
-    Scalar m_lrate;
-    Scalar m_eps;
-
-    AdaGrad() :
-        m_lrate(Scalar(0.01)), m_eps(Scalar(1e-7))
-    {}
-
-    void reset() { m_history.clear(); }
-
-    void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec)
-    {
-        // Get the accumulated squared gradient associated with this gradient
-        Array& grad_square = m_history[dvec.data()];
-        // If length is zero, initialize it
-        if(grad_square.size() == 0)
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+        typedef Eigen::Array<Scalar, Eigen::Dynamic, 1> Array;
+        typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
+        typedef Vector::AlignedMapType AlignedMapVec;
+
+        spp::sparse_hash_map<const Scalar*, Array> m_history;
+
+    public:
+        Scalar m_lrate;
+        Scalar m_eps;
+
+        AdaGrad() :
+            m_lrate(Scalar(0.01)), m_eps(Scalar(1e-7))
+        {}
+
+        void reset()
         {
-            grad_square.resize(dvec.size());
-            grad_square.setZero();
+            m_history.clear();
+        }
+
+        void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec)
+        {
+            // Get the accumulated squared gradient associated with this gradient
+            Array& grad_square = m_history[dvec.data()];
+
+            // If length is zero, initialize it
+            if (grad_square.size() == 0)
+            {
+                grad_square.resize(dvec.size());
+                grad_square.setZero();
+            }
+
+            // Update accumulated squared gradient
+            grad_square += dvec.array().square();
+            // Update parameters
+            vec.array() -= m_lrate * dvec.array() / (grad_square.sqrt() + m_eps);
         }
-        // Update accumulated squared gradient
-        grad_square += dvec.array().square();
-        // Update parameters
-        vec.array() -= m_lrate * dvec.array() / (grad_square.sqrt() + m_eps);
-    }
 };
 
 
diff --git a/include/Optimizer/RMSProp.h b/include/Optimizer/RMSProp.h
index 31c0256..5d1c2b4 100644
--- a/include/Optimizer/RMSProp.h
+++ b/include/Optimizer/RMSProp.h
@@ -6,7 +6,8 @@
 #include "../Optimizer.h"
 #include "../Utils/sparsepp.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -16,40 +17,46 @@ namespace MiniDNN {
 ///
 class RMSProp: public Optimizer
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-    typedef Eigen::Array<Scalar, Eigen::Dynamic, 1> Array;
-    typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
-    typedef Vector::AlignedMapType AlignedMapVec;
-
-    spp::sparse_hash_map<const Scalar*, Array> m_history;
-
-public:
-    Scalar m_lrate;
-    Scalar m_eps;
-    Scalar m_decay;
-
-    RMSProp() :
-        m_lrate(Scalar(0.001)), m_eps(Scalar(1e-6)), m_decay(Scalar(0.9))
-    {}
-
-    void reset() { m_history.clear(); }
-
-    void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec)
-    {
-        // Get the accumulated squared gradient associated with this gradient
-        Array& grad_square = m_history[dvec.data()];
-        // If length is zero, initialize it
-        if(grad_square.size() == 0)
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+        typedef Eigen::Array<Scalar, Eigen::Dynamic, 1> Array;
+        typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
+        typedef Vector::AlignedMapType AlignedMapVec;
+
+        spp::sparse_hash_map<const Scalar*, Array> m_history;
+
+    public:
+        Scalar m_lrate;
+        Scalar m_eps;
+        Scalar m_decay;
+
+        RMSProp() :
+            m_lrate(Scalar(0.001)), m_eps(Scalar(1e-6)), m_decay(Scalar(0.9))
+        {}
+
+        void reset()
         {
-            grad_square.resize(dvec.size());
-            grad_square.setZero();
+            m_history.clear();
+        }
+
+        void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec)
+        {
+            // Get the accumulated squared gradient associated with this gradient
+            Array& grad_square = m_history[dvec.data()];
+
+            // If length is zero, initialize it
+            if (grad_square.size() == 0)
+            {
+                grad_square.resize(dvec.size());
+                grad_square.setZero();
+            }
+
+            // Update accumulated squared gradient
+            grad_square = m_decay * grad_square + (Scalar(1) - m_decay) *
+                          dvec.array().square();
+            // Update parameters
+            vec.array() -= m_lrate * dvec.array() / (grad_square + m_eps).sqrt();
         }
-        // Update accumulated squared gradient
-        grad_square = m_decay * grad_square + (Scalar(1) - m_decay) * dvec.array().square();
-        // Update parameters
-        vec.array() -= m_lrate * dvec.array() / (grad_square + m_eps).sqrt();
-    }
 };
 
 
diff --git a/include/Optimizer/SGD.h b/include/Optimizer/SGD.h
index ed72fd4..07e0a5f 100644
--- a/include/Optimizer/SGD.h
+++ b/include/Optimizer/SGD.h
@@ -5,7 +5,8 @@
 #include "../Config.h"
 #include "../Optimizer.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -15,23 +16,23 @@ namespace MiniDNN {
 ///
 class SGD: public Optimizer
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-    typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
-    typedef Vector::AlignedMapType AlignedMapVec;
-
-public:
-    Scalar m_lrate;
-    Scalar m_decay;
-
-    SGD() :
-        m_lrate(Scalar(0.01)), m_decay(Scalar(0))
-    {}
-
-    void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec)
-    {
-        vec.noalias() -= m_lrate * (dvec + m_decay * vec);
-    }
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+        typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
+        typedef Vector::AlignedMapType AlignedMapVec;
+
+    public:
+        Scalar m_lrate;
+        Scalar m_decay;
+
+        SGD() :
+            m_lrate(Scalar(0.01)), m_decay(Scalar(0))
+        {}
+
+        void update(ConstAlignedMapVec& dvec, AlignedMapVec& vec)
+        {
+            vec.noalias() -= m_lrate * (dvec + m_decay * vec);
+        }
 };
 
 
diff --git a/include/Output.h b/include/Output.h
index 9f580be..0b04bd4 100644
--- a/include/Output.h
+++ b/include/Output.h
@@ -5,7 +5,8 @@
 #include <stdexcept>
 #include "Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -21,47 +22,48 @@ namespace MiniDNN {
 ///
 class Output
 {
-protected:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-    typedef Eigen::RowVectorXi IntegerVector;
-
-public:
-    virtual ~Output() {}
-
-    // Check the format of target data, e.g. in classification problems the
-    // target data should be binary (either 0 or 1)
-    virtual void check_target_data(const Matrix& target) {}
-
-    // Another type of target data where each element is a class label
-    // This version may not be sensible for regression tasks, so by default
-    // we raise an exception
-    virtual void check_target_data(const IntegerVector& target)
-    {
-        throw std::invalid_argument("[class Output]: This output type cannot take class labels as target data");
-    }
-
-    // A combination of the forward stage and the back-propagation stage for the output layer
-    // The computed derivative of the input should be stored in this layer, and can be retrieved by
-    // the backprop_data() function
-    virtual void evaluate(const Matrix& prev_layer_data, const Matrix& target) = 0;
-
-    // Another type of target data where each element is a class label
-    // This version may not be sensible for regression tasks, so by default
-    // we raise an exception
-    virtual void evaluate(const Matrix& prev_layer_data, const IntegerVector& target)
-    {
-        throw std::invalid_argument("[class Output]: This output type cannot take class labels as target data");
-    }
-
-    // The derivative of the input of this layer, which is also the derivative
-    // of the output of previous layer
-    virtual const Matrix& backprop_data() const = 0;
-
-    // Return the loss function value after the evaluation
-    // This function can be assumed to be called after evaluate(), so that it can make use of the
-    // intermediate result to save some computation
-    virtual Scalar loss() const = 0;
+    protected:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+        typedef Eigen::RowVectorXi IntegerVector;
+
+    public:
+        virtual ~Output() {}
+
+        // Check the format of target data, e.g. in classification problems the
+        // target data should be binary (either 0 or 1)
+        virtual void check_target_data(const Matrix& target) {}
+
+        // Another type of target data where each element is a class label
+        // This version may not be sensible for regression tasks, so by default
+        // we raise an exception
+        virtual void check_target_data(const IntegerVector& target)
+        {
+            throw std::invalid_argument("[class Output]: This output type cannot take class labels as target data");
+        }
+
+        // A combination of the forward stage and the back-propagation stage for the output layer
+        // The computed derivative of the input should be stored in this layer, and can be retrieved by
+        // the backprop_data() function
+        virtual void evaluate(const Matrix& prev_layer_data, const Matrix& target) = 0;
+
+        // Another type of target data where each element is a class label
+        // This version may not be sensible for regression tasks, so by default
+        // we raise an exception
+        virtual void evaluate(const Matrix& prev_layer_data,
+                              const IntegerVector& target)
+        {
+            throw std::invalid_argument("[class Output]: This output type cannot take class labels as target data");
+        }
+
+        // The derivative of the input of this layer, which is also the derivative
+        // of the output of previous layer
+        virtual const Matrix& backprop_data() const = 0;
+
+        // Return the loss function value after the evaluation
+        // This function can be assumed to be called after evaluate(), so that it can make use of the
+        // intermediate result to save some computation
+        virtual Scalar loss() const = 0;
 };
 
 
diff --git a/include/Output/BinaryClassEntropy.h b/include/Output/BinaryClassEntropy.h
index 57df104..f3ea0b3 100644
--- a/include/Output/BinaryClassEntropy.h
+++ b/include/Output/BinaryClassEntropy.h
@@ -5,7 +5,8 @@
 #include <stdexcept>
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -15,83 +16,100 @@ namespace MiniDNN {
 ///
 class BinaryClassEntropy: public Output
 {
-private:
-    Matrix m_din;  // Derivative of the input of this layer.
-                   // Note that input of this layer is also the output of previous layer
-
-public:
-    void check_target_data(const Matrix& target)
-    {
-        // Each element should be either 0 or 1
-        const int nelem = target.size();
-        const Scalar* target_data = target.data();
-        for(int i = 0; i < nelem; i++)
+    private:
+        Matrix m_din;  // Derivative of the input of this layer.
+        // Note that input of this layer is also the output of previous layer
+
+    public:
+        void check_target_data(const Matrix& target)
+        {
+            // Each element should be either 0 or 1
+            const int nelem = target.size();
+            const Scalar* target_data = target.data();
+
+            for (int i = 0; i < nelem; i++)
+            {
+                if ((target_data[i] != Scalar(0)) && (target_data[i] != Scalar(1)))
+                {
+                    throw std::invalid_argument("[class BinaryClassEntropy]: Target data should only contain zero or one");
+                }
+            }
+        }
+
+        void check_target_data(const IntegerVector& target)
+        {
+            // Each element should be either 0 or 1
+            const int nobs = target.size();
+
+            for (int i = 0; i < nobs; i++)
+            {
+                if ((target[i] != 0) && (target[i] != 1))
+                {
+                    throw std::invalid_argument("[class BinaryClassEntropy]: Target data should only contain zero or one");
+                }
+            }
+        }
+
+        void evaluate(const Matrix& prev_layer_data, const Matrix& target)
+        {
+            // Check dimension
+            const int nobs = prev_layer_data.cols();
+            const int nvar = prev_layer_data.rows();
+
+            if ((target.cols() != nobs) || (target.rows() != nvar))
+            {
+                throw std::invalid_argument("[class BinaryClassEntropy]: Target data have incorrect dimension");
+            }
+
+            // Compute the derivative of the input of this layer
+            // L = -y * log(phat) - (1 - y) * log(1 - phat)
+            // in = phat
+            // d（L） / d（in） = -y / phat + (1 - y) / (1 - phat), y is either 0 or 1
+            m_din.resize(nvar, nobs);
+            m_din.array() = (target.array() < Scalar(0.5)).select((Scalar(
+                                1) - prev_layer_data.array()).cwiseInverse(),
+                            -prev_layer_data.cwiseInverse());
+        }
+
+        void evaluate(const Matrix& prev_layer_data, const IntegerVector& target)
+        {
+            // Only when the last hidden layer has only one unit can we use this version
+            const int nvar = prev_layer_data.rows();
+
+            if (nvar != 1)
+            {
+                throw std::invalid_argument("[class BinaryClassEntropy]: Only one response variable is allowed when class labels are used as target data");
+            }
+
+            // Check dimension
+            const int nobs = prev_layer_data.cols();
+
+            if (target.size() != nobs)
+            {
+                throw std::invalid_argument("[class BinaryClassEntropy]: Target data have incorrect dimension");
+            }
+
+            // Same as above
+            m_din.resize(1, nobs);
+            m_din.array() = (target.array() == 0).select((Scalar(1) -
+                            prev_layer_data.array()).cwiseInverse(),
+                            -prev_layer_data.cwiseInverse());
+        }
+
+        const Matrix& backprop_data() const
         {
-            if((target_data[i] != Scalar(0)) && (target_data[i] != Scalar(1)))
-                throw std::invalid_argument("[class BinaryClassEntropy]: Target data should only contain zero or one");
+            return m_din;
         }
-    }
 
-    void check_target_data(const IntegerVector& target)
-    {
-        // Each element should be either 0 or 1
-        const int nobs = target.size();
-        for(int i = 0; i < nobs; i++)
+        Scalar loss() const
         {
-            if((target[i] != 0) && (target[i] != 1))
-                throw std::invalid_argument("[class BinaryClassEntropy]: Target data should only contain zero or one");
+            // L = -y * log(phat) - (1 - y) * log(1 - phat)
+            // y = 0 => L = -log(1 - phat)
+            // y = 1 => L = -log(phat)
+            // m_din contains 1/(1 - phat) if y = 0, and -1/phat if y = 1, so
+            // L = log(abs(m_din)).sum()
+            return m_din.array().abs().log().sum() / m_din.cols();
         }
-    }
-
-    void evaluate(const Matrix& prev_layer_data, const Matrix& target)
-    {
-        // Check dimension
-        const int nobs = prev_layer_data.cols();
-        const int nvar = prev_layer_data.rows();
-        if((target.cols() != nobs) || (target.rows() != nvar))
-            throw std::invalid_argument("[class BinaryClassEntropy]: Target data have incorrect dimension");
-
-        // Compute the derivative of the input of this layer
-        // L = -y * log(phat) - (1 - y) * log(1 - phat)
-        // in = phat
-        // d（L） / d（in） = -y / phat + (1 - y) / (1 - phat), y is either 0 or 1
-        m_din.resize(nvar, nobs);
-        m_din.array() = (target.array() < Scalar(0.5)).select((Scalar(1) - prev_layer_data.array()).cwiseInverse(),
-                                                              -prev_layer_data.cwiseInverse());
-    }
-
-    void evaluate(const Matrix& prev_layer_data, const IntegerVector& target)
-    {
-        // Only when the last hidden layer has only one unit can we use this version
-        const int nvar = prev_layer_data.rows();
-        if(nvar != 1)
-            throw std::invalid_argument("[class BinaryClassEntropy]: Only one response variable is allowed when class labels are used as target data");
-
-        // Check dimension
-        const int nobs = prev_layer_data.cols();
-        if(target.size() != nobs)
-            throw std::invalid_argument("[class BinaryClassEntropy]: Target data have incorrect dimension");
-
-        // Same as above
-        m_din.resize(1, nobs);
-        m_din.array() = (target.array() == 0).select((Scalar(1) - prev_layer_data.array()).cwiseInverse(),
-                                                     -prev_layer_data.cwiseInverse());
-    }
-
-    const Matrix& backprop_data() const
-    {
-        return m_din;
-    }
-
-    Scalar loss() const
-    {
-        // L = -y * log(phat) - (1 - y) * log(1 - phat)
-        // y = 0 => L = -log(1 - phat)
-        // y = 1 => L = -log(phat)
-        // m_din contains 1/(1 - phat) if y = 0, and -1/phat if y = 1, so
-        // L = log(abs(m_din)).sum()
-        return m_din.array().abs().log().sum() / m_din.cols();
-    }
 };
 
 
diff --git a/include/Output/MultiClassEntropy.h b/include/Output/MultiClassEntropy.h
index d5ac7a8..b7d7ee4 100644
--- a/include/Output/MultiClassEntropy.h
+++ b/include/Output/MultiClassEntropy.h
@@ -5,7 +5,8 @@
 #include <stdexcept>
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -15,108 +16,129 @@ namespace MiniDNN {
 ///
 class MultiClassEntropy: public Output
 {
-private:
-    Matrix m_din;  // Derivative of the input of this layer.
-                   // Note that input of this layer is also the output of previous layer
-
-public:
-    void check_target_data(const Matrix& target)
-    {
-        // Each element should be either 0 or 1
-        // Each column has and only has one 1
-        const int nobs = target.cols();
-        const int nclass = target.rows();
-        for(int i = 0; i < nobs; i++)
+    private:
+        Matrix m_din;  // Derivative of the input of this layer.
+        // Note that input of this layer is also the output of previous layer
+
+    public:
+        void check_target_data(const Matrix& target)
         {
-            int one = 0;
-            for(int j = 0; j < nclass; j++)
+            // Each element should be either 0 or 1
+            // Each column has and only has one 1
+            const int nobs = target.cols();
+            const int nclass = target.rows();
+
+            for (int i = 0; i < nobs; i++)
             {
-                if(target(j, i) == Scalar(1))
+                int one = 0;
+
+                for (int j = 0; j < nclass; j++)
                 {
-                    one++;
-                    continue;
+                    if (target(j, i) == Scalar(1))
+                    {
+                        one++;
+                        continue;
+                    }
+
+                    if (target(j, i) != Scalar(0))
+                    {
+                        throw std::invalid_argument("[class MultiClassEntropy]: Target data should only contain zero or one");
+                    }
+                }
+
+                if (one != 1)
+                {
+                    throw std::invalid_argument("[class MultiClassEntropy]: Each column of target data should only contain one \"1\"");
                 }
-                if(target(j, i) != Scalar(0))
-                    throw std::invalid_argument("[class MultiClassEntropy]: Target data should only contain zero or one");
             }
-            if(one != 1)
-                throw std::invalid_argument("[class MultiClassEntropy]: Each column of target data should only contain one \"1\"");
         }
-    }
 
-    void check_target_data(const IntegerVector& target)
-    {
-        // All elements must be non-negative
-        const int nobs = target.size();
-        for(int i = 0; i < nobs; i++)
+        void check_target_data(const IntegerVector& target)
         {
-            if(target[i] < 0)
-                throw std::invalid_argument("[class MultiClassEntropy]: Target data must be non-negative");
+            // All elements must be non-negative
+            const int nobs = target.size();
+
+            for (int i = 0; i < nobs; i++)
+            {
+                if (target[i] < 0)
+                {
+                    throw std::invalid_argument("[class MultiClassEntropy]: Target data must be non-negative");
+                }
+            }
         }
-    }
-
-    // target is a matrix with each column representing an observation
-    // Each column is a vector that has a one at some location and has zeros elsewhere
-    void evaluate(const Matrix& prev_layer_data, const Matrix& target)
-    {
-        // Check dimension
-        const int nobs = prev_layer_data.cols();
-        const int nclass = prev_layer_data.rows();
-        if((target.cols() != nobs) || (target.rows() != nclass))
-            throw std::invalid_argument("[class MultiClassEntropy]: Target data have incorrect dimension");
-
-        // Compute the derivative of the input of this layer
-        // L = -sum(log(phat) * y)
-        // in = phat
-        // d(L) / d(in) = -y / phat
-        m_din.resize(nclass, nobs);
-        m_din.noalias() = -target.cwiseQuotient(prev_layer_data);
-    }
-
-    // target is a vector of class labels that take values from [0, 1, ..., nclass - 1]
-    // The i-th element of target is the class label for observation i
-    void evaluate(const Matrix& prev_layer_data, const IntegerVector& target)
-    {
-        // Check dimension
-        const int nobs = prev_layer_data.cols();
-        const int nclass = prev_layer_data.rows();
-        if(target.size() != nobs)
-            throw std::invalid_argument("[class MultiClassEntropy]: Target data have incorrect dimension");
-
-        // Compute the derivative of the input of this layer
-        // L = -log(phat[y])
-        // in = phat
-        // d(L) / d(in) = [0, 0, ..., -1/phat[y], 0, ..., 0]
-        m_din.resize(nclass, nobs);
-        m_din.setZero();
-        for(int i = 0; i < nobs; i++)
+
+        // target is a matrix with each column representing an observation
+        // Each column is a vector that has a one at some location and has zeros elsewhere
+        void evaluate(const Matrix& prev_layer_data, const Matrix& target)
         {
-            m_din(target[i], i) = -Scalar(1) / prev_layer_data(target[i], i);
+            // Check dimension
+            const int nobs = prev_layer_data.cols();
+            const int nclass = prev_layer_data.rows();
+
+            if ((target.cols() != nobs) || (target.rows() != nclass))
+            {
+                throw std::invalid_argument("[class MultiClassEntropy]: Target data have incorrect dimension");
+            }
+
+            // Compute the derivative of the input of this layer
+            // L = -sum(log(phat) * y)
+            // in = phat
+            // d(L) / d(in) = -y / phat
+            m_din.resize(nclass, nobs);
+            m_din.noalias() = -target.cwiseQuotient(prev_layer_data);
         }
-    }
-
-    const Matrix& backprop_data() const
-    {
-        return m_din;
-    }
-
-    Scalar loss() const
-    {
-        // L = -sum(log(phat) * y)
-        // in = phat
-        // d(L) / d(in) = -y / phat
-        // m_din contains 0 if y = 0, and -1/phat if y = 1
-        Scalar res = Scalar(0);
-        const int nelem = m_din.size();
-        const Scalar* din_data = m_din.data();
-        for(int i = 0; i < nelem; i++)
+
+        // target is a vector of class labels that take values from [0, 1, ..., nclass - 1]
+        // The i-th element of target is the class label for observation i
+        void evaluate(const Matrix& prev_layer_data, const IntegerVector& target)
         {
-            if(din_data[i] < Scalar(0))
-                res += std::log(-din_data[i]);
+            // Check dimension
+            const int nobs = prev_layer_data.cols();
+            const int nclass = prev_layer_data.rows();
+
+            if (target.size() != nobs)
+            {
+                throw std::invalid_argument("[class MultiClassEntropy]: Target data have incorrect dimension");
+            }
+
+            // Compute the derivative of the input of this layer
+            // L = -log(phat[y])
+            // in = phat
+            // d(L) / d(in) = [0, 0, ..., -1/phat[y], 0, ..., 0]
+            m_din.resize(nclass, nobs);
+            m_din.setZero();
+
+            for (int i = 0; i < nobs; i++)
+            {
+                m_din(target[i], i) = -Scalar(1) / prev_layer_data(target[i], i);
+            }
         }
 
-        return res / m_din.cols();
-    }
+        const Matrix& backprop_data() const
+        {
+            return m_din;
+        }
+
+        Scalar loss() const
+        {
+            // L = -sum(log(phat) * y)
+            // in = phat
+            // d(L) / d(in) = -y / phat
+            // m_din contains 0 if y = 0, and -1/phat if y = 1
+            Scalar res = Scalar(0);
+            const int nelem = m_din.size();
+            const Scalar* din_data = m_din.data();
+
+            for (int i = 0; i < nelem; i++)
+            {
+                if (din_data[i] < Scalar(0))
+                {
+                    res += std::log(-din_data[i]);
+                }
+            }
+
+            return res / m_din.cols();
+        }
 };
 
 
diff --git a/include/Output/RegressionMSE.h b/include/Output/RegressionMSE.h
index 2080d67..15dc5f4 100644
--- a/include/Output/RegressionMSE.h
+++ b/include/Output/RegressionMSE.h
@@ -5,7 +5,8 @@
 #include <stdexcept>
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -15,40 +16,43 @@ namespace MiniDNN {
 ///
 class RegressionMSE: public Output
 {
-private:
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
-
-    Matrix m_din;  // Derivative of the input of this layer.
-                   // Note that input of this layer is also the output of previous layer
-
-public:
-    void evaluate(const Matrix& prev_layer_data, const Matrix& target)
-    {
-        // Check dimension
-        const int nobs = prev_layer_data.cols();
-        const int nvar = prev_layer_data.rows();
-        if((target.cols() != nobs) || (target.rows() != nvar))
-            throw std::invalid_argument("[class RegressionMSE]: Target data have incorrect dimension");
-
-        // Compute the derivative of the input of this layer
-        // L = 0.5 * ||yhat - y||^2
-        // in = yhat
-        // d(L) / d(in) = yhat - y
-        m_din.resize(nvar, nobs);
-        m_din.noalias() = prev_layer_data - target;
-    }
-
-    const Matrix& backprop_data() const
-    {
-        return m_din;
-    }
-
-    Scalar loss() const
-    {
-        // L = 0.5 * ||yhat - y||^2
-        return m_din.squaredNorm() / m_din.cols() * Scalar(0.5);
-    }
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
+
+        Matrix m_din;  // Derivative of the input of this layer.
+        // Note that input of this layer is also the output of previous layer
+
+    public:
+        void evaluate(const Matrix& prev_layer_data, const Matrix& target)
+        {
+            // Check dimension
+            const int nobs = prev_layer_data.cols();
+            const int nvar = prev_layer_data.rows();
+
+            if ((target.cols() != nobs) || (target.rows() != nvar))
+            {
+                throw std::invalid_argument("[class RegressionMSE]: Target data have incorrect dimension");
+            }
+
+            // Compute the derivative of the input of this layer
+            // L = 0.5 * ||yhat - y||^2
+            // in = yhat
+            // d(L) / d(in) = yhat - y
+            m_din.resize(nvar, nobs);
+            m_din.noalias() = prev_layer_data - target;
+        }
+
+        const Matrix& backprop_data() const
+        {
+            return m_din;
+        }
+
+        Scalar loss() const
+        {
+            // L = 0.5 * ||yhat - y||^2
+            return m_din.squaredNorm() / m_din.cols() * Scalar(0.5);
+        }
 };
 
 
diff --git a/include/RNG.h b/include/RNG.h
index 44f25bf..db572eb 100644
--- a/include/RNG.h
+++ b/include/RNG.h
@@ -1,7 +1,8 @@
 #ifndef RNG_H_
 #define RNG_H_
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
 
 ///
@@ -12,50 +13,53 @@ namespace MiniDNN {
 ///
 class RNG
 {
-private:
-    const unsigned int m_a;     // multiplier
-    const unsigned long m_max;  // 2^31 - 1
-    long m_rand;
-
-    inline long next_long_rand(long seed)
-    {
-        unsigned long lo, hi;
+    private:
+        const unsigned int m_a;     // multiplier
+        const unsigned long m_max;  // 2^31 - 1
+        long m_rand;
 
-        lo = m_a * (long)(seed & 0xFFFF);
-        hi = m_a * (long)((unsigned long)seed >> 16);
-        lo += (hi & 0x7FFF) << 16;
-        if(lo > m_max)
-        {
-            lo &= m_max;
-            ++lo;
-        }
-        lo += hi >> 15;
-        if(lo > m_max)
+        inline long next_long_rand(long seed)
         {
-            lo &= m_max;
-            ++lo;
+            unsigned long lo, hi;
+            lo = m_a * (long)(seed & 0xFFFF);
+            hi = m_a * (long)((unsigned long)seed >> 16);
+            lo += (hi & 0x7FFF) << 16;
+
+            if (lo > m_max)
+            {
+                lo &= m_max;
+                ++lo;
+            }
+
+            lo += hi >> 15;
+
+            if (lo > m_max)
+            {
+                lo &= m_max;
+                ++lo;
+            }
+
+            return (long)lo;
         }
-        return (long)lo;
-    }
-public:
-    RNG(unsigned long init_seed) :
-        m_a(16807),
-        m_max(2147483647L),
-        m_rand(init_seed ? (init_seed & m_max) : 1)
-    {}
+    public:
+        RNG(unsigned long init_seed) :
+            m_a(16807),
+            m_max(2147483647L),
+            m_rand(init_seed ? (init_seed & m_max) : 1)
+        {}
 
-    virtual ~RNG() {}
+        virtual ~RNG() {}
 
-    virtual void seed(unsigned long seed)
-    {
-        m_rand = (seed ? (seed & m_max) : 1);
-    }
+        virtual void seed(unsigned long seed)
+        {
+            m_rand = (seed ? (seed & m_max) : 1);
+        }
 
-    virtual double rand()
-    {
-        m_rand = next_long_rand(m_rand);
-        return double(m_rand) / double(m_max);
-    }
+        virtual double rand()
+        {
+            m_rand = next_long_rand(m_rand);
+            return double(m_rand) / double(m_max);
+        }
 };
 
 
diff --git a/include/Utils/Convolution.h b/include/Utils/Convolution.h
index 4d7dade..f00a3b3 100644
--- a/include/Utils/Convolution.h
+++ b/include/Utils/Convolution.h
@@ -4,9 +4,11 @@
 #include <Eigen/Core>
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
-namespace internal {
+namespace internal
+{
 
 
 // We assume the following memory layout:
@@ -85,7 +87,8 @@ struct ConvDims
         channel_rows(channel_rows_), channel_cols(channel_cols_),
         filter_rows(filter_rows_), filter_cols(filter_cols_),
         img_rows(channel_rows_), img_cols(in_channels_ * channel_cols_),
-        conv_rows(channel_rows_ - filter_rows_ + 1), conv_cols(channel_cols_ - filter_cols_ + 1)
+        conv_rows(channel_rows_ - filter_rows_ + 1),
+        conv_cols(channel_cols_ - filter_cols_ + 1)
     {}
 };
 // Transform original matrix to "lower" form as described in the MEC paper
@@ -102,19 +105,23 @@ inline void flatten_mat(
     // Number of bytes in the segment that will be copied at one time
     const int& segment_size = dim.filter_rows;
     const std::size_t copy_bytes = sizeof(Scalar) * segment_size;
-
     Scalar* writer = flat_mat.data();
     const int channel_size = dim.channel_rows * dim.channel_cols;
-    for(int i = 0; i < n_obs; i++, src += stride)
+
+    for (int i = 0; i < n_obs; i++, src += stride)
     {
         const Scalar* reader_row = src;
         const Scalar* const reader_row_end = src + dim.conv_rows;
-        for(; reader_row < reader_row_end; reader_row++)
+
+        for (; reader_row < reader_row_end; reader_row++)
         {
             const Scalar* reader = reader_row;
             const Scalar* const reader_end = reader + channel_size;
-            for(; reader < reader_end; reader += dim.channel_rows, writer += segment_size)
+
+            for (; reader < reader_end; reader += dim.channel_rows, writer += segment_size)
+            {
                 std::memcpy(writer, reader, copy_bytes);
+            }
         }
     }
 }
@@ -122,8 +129,9 @@ inline void flatten_mat(
 // and progressively move the window to the right
 inline void moving_product(
     const int step,
-    const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>& mat1,
-    Eigen::Map< const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> >& mat2,
+    const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>&
+    mat1,
+    Eigen::Map< const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>& mat2,
     Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>& res
 )
 {
@@ -133,41 +141,46 @@ inline void moving_product(
     const int col2 = mat2.cols();
     const int col_end = col1 - row2;
     int res_start_col = 0;
-    for(int left_end = 0; left_end <= col_end; left_end += step, res_start_col += col2)
+
+    for (int left_end = 0; left_end <= col_end;
+            left_end += step, res_start_col += col2)
     {
-        res.block(0, res_start_col, row1, col2).noalias() += mat1.block(0, left_end, row1, row2) * mat2;
+        res.block(0, res_start_col, row1, col2).noalias() += mat1.block(0, left_end,
+                row1, row2) * mat2;
     }
 }
 // The main convolution function using the "valid" rule
 inline void convolve_valid(
     const ConvDims& dim,
-    const Scalar* src, const bool image_outer_loop, const int n_obs, const Scalar* filter_data,
+    const Scalar* src, const bool image_outer_loop, const int n_obs,
+    const Scalar* filter_data,
     Scalar* dest)
 {
     typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> RMatrix;
+    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
+    RMatrix;
     typedef Eigen::Map<const Matrix> ConstMapMat;
-
     // Flat matrix
     const int flat_rows = dim.conv_rows * n_obs;
     const int flat_cols = dim.filter_rows * dim.channel_cols;
     const int channel_size = dim.channel_rows * dim.channel_cols;
     // Distance between two images
-    const int img_stride = image_outer_loop ? (dim.img_rows * dim.img_cols) : channel_size;
+    const int img_stride = image_outer_loop ? (dim.img_rows * dim.img_cols) :
+                           channel_size;
     // Distance between two channels
-    const int channel_stride = image_outer_loop ? channel_size : (channel_size * n_obs);
+    const int channel_stride = image_outer_loop ? channel_size :
+                               (channel_size * n_obs);
     RMatrix flat_mat(flat_rows, flat_cols);
-
     // Convolution results
     const int& res_rows = flat_rows;
     const int res_cols = dim.conv_cols * dim.out_channels;
     Matrix res = Matrix::Zero(res_rows, res_cols);
-
     const int& step = dim.filter_rows;
     const int filter_size = dim.filter_rows * dim.filter_cols;
     const int filter_stride = filter_size * dim.out_channels;
 
-    for(int i = 0; i < dim.in_channels; i++, src += channel_stride, filter_data += filter_stride)
+    for (int i = 0; i < dim.in_channels;
+            i++, src += channel_stride, filter_data += filter_stride)
     {
         // Flatten source image
         flatten_mat(dim, src, img_stride, n_obs, flat_mat);
@@ -189,7 +202,6 @@ inline void convolve_valid(
      */
     // obs<k>_out<l> means the convolution result of the k-th image on the l-th output channel
     // [i, j] gives the matrix indices
-
     // The destination has the layout
     /*
      * obs0_out0[0, 0] obs0_out0[0, 1] obs0_out0[0, 2] obs0_out1[0, 0] obs0_out1[0, 1] obs0_out1[0, 2] ...
@@ -199,7 +211,6 @@ inline void convolve_valid(
      */
     // which in a larger scale looks like
     // [obs0_out0 obs0_out1 obs0_out2 obs1_out0 obs1_out1 obs1_out2 obs2_out0 ...]
-
     // Copy data to destination
     // dest[a, b] corresponds to obs<k>_out<l>[i, j]
     // where k = b / (conv_cols * out_channels),
@@ -213,7 +224,8 @@ inline void convolve_valid(
     const int dest_cols = res_cols * n_obs;
     const Scalar* res_data = res.data();
     const std::size_t copy_bytes = sizeof(Scalar) * dest_rows;
-    for(int b = 0; b < dest_cols; b++, dest += dest_rows)
+
+    for (int b = 0; b < dest_cols; b++, dest += dest_rows)
     {
         const int k = b / res_cols;
         const int l = (b % res_cols) / dim.conv_cols;
@@ -229,7 +241,8 @@ inline void convolve_valid(
 // The moving_product() function for the "full" rule
 inline void moving_product(
     const int padding, const int step,
-    const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>& mat1,
+    const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>&
+    mat1,
     const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>& mat2,
     Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>& res
 )
@@ -239,30 +252,39 @@ inline void moving_product(
     const int row2 = mat2.rows();
     const int col2 = mat2.cols();
     int res_start_col = 0;
-
     // Left padding
     int left_end = -padding;
     int right_end = step;
-    for(; left_end < 0 && right_end <= col1; left_end += step, right_end += step, res_start_col += col2)
+
+    for (; left_end < 0
+            && right_end <= col1;
+            left_end += step, right_end += step, res_start_col += col2)
     {
         res.block(0, res_start_col, row1, col2).noalias() += mat1.leftCols(right_end) *
-            mat2.bottomRows(right_end);
+                mat2.bottomRows(right_end);
     }
+
     // Main part
-    for(; right_end <= col1; left_end += step, right_end += step, res_start_col += col2)
+    for (; right_end <= col1;
+            left_end += step, right_end += step, res_start_col += col2)
     {
-        res.block(0, res_start_col, row1, col2).noalias() += mat1.block(0, left_end, row1, row2) * mat2;
+        res.block(0, res_start_col, row1, col2).noalias() += mat1.block(0, left_end,
+                row1, row2) * mat2;
     }
+
     // Right padding
-    for(; left_end < col1; left_end += step, res_start_col += col2)
+    for (; left_end < col1; left_end += step, res_start_col += col2)
     {
-        if(left_end <= 0)
+        if (left_end <= 0)
+        {
+            res.block(0, res_start_col, row1, col2).noalias() += mat1 * mat2.block(0,
+                    -left_end, col1, row2);
+        }
+        else
         {
-            res.block(0, res_start_col, row1, col2).noalias() += mat1 * mat2.block(0, -left_end, col1, row2);
-        } else {
             const int overlap = col1 - left_end;
             res.block(0, res_start_col, row1, col2).noalias() += mat1.rightCols(overlap) *
-                mat2.topRows(overlap);
+                    mat2.topRows(overlap);
         }
     }
 }
@@ -273,17 +295,15 @@ inline void convolve_full(
     Scalar* dest)
 {
     typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
-    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> RMatrix;
+    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
+    RMatrix;
     typedef Eigen::Map<const Matrix> ConstMapMat;
-
     // Padding sizes
     const int padding_top = dim.filter_rows - 1;
     const int padding_left = dim.filter_cols - 1;
-
     // Dimension of convolution result using "full" rule
     const int conv_rows = dim.channel_rows + padding_top;
     const int conv_cols = dim.channel_cols + padding_left;
-
     // Add (top and bottom) padding to source images
     const int pad_rows = dim.img_rows + padding_top * 2;
     const int pad_cols = dim.img_cols * n_obs;
@@ -293,15 +313,14 @@ inline void convolve_full(
     pad_mat.bottomRows(padding_top).setZero();
     pad_mat.block(padding_top, 0, dim.img_rows, pad_cols).noalias() = src_mat;
     src = pad_mat.data();
-    ConvDims pad_dim(dim.in_channels, dim.out_channels, pad_rows, dim.channel_cols, dim.filter_rows, dim.filter_cols);
-
+    ConvDims pad_dim(dim.in_channels, dim.out_channels, pad_rows, dim.channel_cols,
+                     dim.filter_rows, dim.filter_cols);
     // Flat matrix
     const int flat_rows = conv_rows * n_obs;
     const int flat_cols = dim.filter_rows * dim.channel_cols;
     const int img_stride = pad_rows * dim.img_cols;
     const int channel_stride = pad_rows * dim.channel_cols;
     RMatrix flat_mat(flat_rows, flat_cols);
-
     // The processing of filters are different from the "valid" rule in two ways:
     // 1. The layout of input channels and output channels are switched
     // 2. The filters need to be rotated, which is equivalent to reversing the vector of each filter
@@ -309,14 +328,18 @@ inline void convolve_full(
     std::vector<Matrix> filters_in(dim.in_channels);
     const int filter_size = dim.filter_rows * dim.filter_cols;
     const int nfilter = dim.in_channels * dim.out_channels;
-    for(int i = 0; i < dim.in_channels; i++)
+
+    for (int i = 0; i < dim.in_channels; i++)
     {
         filters_in[i].resize(filter_size, dim.out_channels);
     }
+
     const Scalar* reader = filter_data;
-    for(int i = 0; i < nfilter; i++, reader += filter_size)
+
+    for (int i = 0; i < nfilter; i++, reader += filter_size)
     {
-        Scalar* writer = filters_in[i % dim.in_channels].data() + (i / dim.in_channels) * filter_size;
+        Scalar* writer = filters_in[i % dim.in_channels].data() +
+                         (i / dim.in_channels) * filter_size;
         std::reverse_copy(reader, reader + filter_size, writer);
     }
 
@@ -324,10 +347,10 @@ inline void convolve_full(
     const int& res_rows = flat_rows;
     const int res_cols = conv_cols * dim.out_channels;
     Matrix res = Matrix::Zero(res_rows, res_cols);
-
     const int& step = dim.filter_rows;
     const int filter_padding = padding_left * dim.filter_rows;
-    for(int i = 0; i < dim.in_channels; i++, src += channel_stride)
+
+    for (int i = 0; i < dim.in_channels; i++, src += channel_stride)
     {
         // Flatten source image
         flatten_mat(pad_dim, src, img_stride, n_obs, flat_mat);
@@ -340,7 +363,8 @@ inline void convolve_full(
     const int  dest_cols = res_cols * n_obs;
     const Scalar* res_data = res.data();
     const std::size_t copy_bytes = sizeof(Scalar) * dest_rows;
-    for(int b = 0; b < dest_cols; b++, dest += dest_rows)
+
+    for (int b = 0; b < dest_cols; b++, dest += dest_rows)
     {
         const int k = b / res_cols;
         const int l = (b % res_cols) / conv_cols;
diff --git a/include/Utils/FindMax.h b/include/Utils/FindMax.h
index 86343bc..f967fa7 100644
--- a/include/Utils/FindMax.h
+++ b/include/Utils/FindMax.h
@@ -3,9 +3,11 @@
 
 #include "../Config.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
-namespace internal {
+namespace internal
+{
 
 
 // Find the location of the maximum element in x[0], x[1], ..., x[n-1]
@@ -14,7 +16,7 @@ namespace internal {
 template <int N>
 inline int find_max(const Scalar* x)
 {
-    const int loc = find_max<N - 1>(x);
+    const int loc = find_max < N - 1 > (x);
     return (x[N - 1] > x[loc]) ? (N - 1) : loc;
 }
 
@@ -27,20 +29,24 @@ inline int find_max<2>(const Scalar* x)
 // n is assumed be >= 2
 inline int find_max(const Scalar* x, const int n)
 {
-    switch(n)
+    switch (n)
     {
         case 2:
             return find_max<2>(x);
+
         case 3:
             return find_max<3>(x);
+
         case 4:
             return find_max<4>(x);
+
         case 5:
             return find_max<5>(x);
     }
 
     int loc = find_max<6>(x);
-    for(int i = 6; i < n; i++)
+
+    for (int i = 6; i < n; i++)
     {
         loc = (x[i] > x[loc]) ? i : loc;
     }
@@ -51,47 +57,88 @@ inline int find_max(const Scalar* x, const int n)
 // Find the maximum element in the block x[0:(nrow-1), 0:(ncol-1)]
 // col_stride is the distance between x[0, 0] and x[0, 1]
 // Special cases for small n
-inline Scalar find_block_max(const Scalar* x, const int nrow, const int ncol, const int col_stride, int& loc)
+inline Scalar find_block_max(const Scalar* x, const int nrow, const int ncol,
+                             const int col_stride, int& loc)
 {
     // Max element in the first column
     loc = find_max(x, nrow);
     Scalar val = x[loc];
-
     // 2nd column
     x += col_stride;
     int loc_next = find_max(x, nrow);
     Scalar val_next = x[loc_next];
-    if(val_next > val)  { loc = col_stride + loc_next; val = val_next; }
-    if(ncol == 2)  return val;
+
+    if (val_next > val)
+    {
+        loc = col_stride + loc_next;
+        val = val_next;
+    }
+
+    if (ncol == 2)
+    {
+        return val;
+    }
 
     // 3rd column
     x += col_stride;
     loc_next = find_max(x, nrow);
     val_next = x[loc_next];
-    if(val_next > val)  { loc = 2 * col_stride + loc_next; val = val_next; }
-    if(ncol == 3)  return val;
+
+    if (val_next > val)
+    {
+        loc = 2 * col_stride + loc_next;
+        val = val_next;
+    }
+
+    if (ncol == 3)
+    {
+        return val;
+    }
 
     // 4th column
     x += col_stride;
     loc_next = find_max(x, nrow);
     val_next = x[loc_next];
-    if(val_next > val)  { loc = 3 * col_stride + loc_next; val = val_next; }
-    if(ncol == 4)  return val;
+
+    if (val_next > val)
+    {
+        loc = 3 * col_stride + loc_next;
+        val = val_next;
+    }
+
+    if (ncol == 4)
+    {
+        return val;
+    }
 
     // 5th column
     x += col_stride;
     loc_next = find_max(x, nrow);
     val_next = x[loc_next];
-    if(val_next > val)  { loc = 4 * col_stride + loc_next; val = val_next; }
-    if(ncol == 5)  return val;
+
+    if (val_next > val)
+    {
+        loc = 4 * col_stride + loc_next;
+        val = val_next;
+    }
+
+    if (ncol == 5)
+    {
+        return val;
+    }
 
     // Other columns
-    for(int i = 5; i < ncol; i++)
+    for (int i = 5; i < ncol; i++)
     {
         x += col_stride;
         loc_next = find_max(x, nrow);
         val_next = x[loc_next];
-        if(val_next > val)  { loc = i * col_stride + loc_next; val = val_next; }
+
+        if (val_next > val)
+        {
+            loc = i * col_stride + loc_next;
+            val = val_next;
+        }
     }
 
     return val;
diff --git a/include/Utils/Random.h b/include/Utils/Random.h
index 85563de..c099e27 100644
--- a/include/Utils/Random.h
+++ b/include/Utils/Random.h
@@ -5,15 +5,17 @@
 #include "../Config.h"
 #include "../RNG.h"
 
-namespace MiniDNN {
+namespace MiniDNN
+{
 
-namespace internal {
+namespace internal
+{
 
 
 // Shuffle the integer array
 inline void shuffle(int* arr, const int n, RNG& rng)
 {
-    for(int i = n - 1; i > 0; i--)
+    for (int i = n - 1; i > 0; i--)
     {
         // A random non-negative integer <= i
         const int j = int(rng.rand() * (i + 1));
@@ -26,7 +28,8 @@ inline void shuffle(int* arr, const int n, RNG& rng)
 
 template <typename DerivedX, typename DerivedY, typename XType, typename YType>
 int create_shuffled_batches(
-    const Eigen::MatrixBase<DerivedX>& x, const Eigen::MatrixBase<DerivedY>& y, int batch_size, RNG& rng,
+    const Eigen::MatrixBase<DerivedX>& x, const Eigen::MatrixBase<DerivedY>& y,
+    int batch_size, RNG& rng,
     std::vector<XType>& x_batches, std::vector<YType>& y_batches
 )
 {
@@ -34,32 +37,38 @@ int create_shuffled_batches(
     const int dimx = x.rows();
     const int dimy = y.rows();
 
-    if(y.cols() != nobs)
+    if (y.cols() != nobs)
+    {
         throw std::invalid_argument("Input X and Y have different number of observations");
+    }
 
     // Randomly shuffle the IDs
     Eigen::VectorXi id = Eigen::VectorXi::LinSpaced(nobs, 0, nobs - 1);
     shuffle(id.data(), id.size(), rng);
 
     // Compute batch size
-    if(batch_size > nobs)
+    if (batch_size > nobs)
+    {
         batch_size = nobs;
+    }
+
     const int nbatch = (nobs - 1) / batch_size + 1;
     const int last_batch_size = nobs - (nbatch - 1) * batch_size;
-
     // Create shuffled data
     x_batches.clear();
     y_batches.clear();
     x_batches.reserve(nbatch);
     y_batches.reserve(nbatch);
-    for(int i = 0; i < nbatch; i++)
+
+    for (int i = 0; i < nbatch; i++)
     {
         const int bsize = (i == nbatch - 1) ? last_batch_size : batch_size;
         x_batches.push_back(XType(dimx, bsize));
         y_batches.push_back(YType(dimy, bsize));
         // Copy data
         const int offset = i * batch_size;
-        for(int j = 0; j < bsize; j++)
+
+        for (int j = 0; j < bsize; j++)
         {
             x_batches[i].col(j).noalias() = x.col(id[offset + j]);
             y_batches[i].col(j).noalias() = y.col(id[offset + j]);
@@ -76,14 +85,16 @@ inline void set_normal_random(Scalar* arr, const int n, RNG& rng,
 {
     // For simplicity we use Box-Muller transform to generate normal random variates
     const double two_pi = 6.283185307179586476925286766559;
-    for(int i = 0; i < n - 1; i += 2)
+
+    for (int i = 0; i < n - 1; i += 2)
     {
         const double t1 = sigma * std::sqrt(-2 * std::log(rng.rand()));
         const double t2 = two_pi * rng.rand();
         arr[i]     = t1 * std::cos(t2) + mu;
         arr[i + 1] = t1 * std::sin(t2) + mu;
     }
-    if(n % 2 == 1)
+
+    if (n % 2 == 1)
     {
         const double t1 = sigma * std::sqrt(-2 * std::log(rng.rand()));
         const double t2 = two_pi * rng.rand();
diff --git a/include/Utils/sparsepp.h b/include/Utils/sparsepp.h
index 4c5c972..8ce79d2 100644
--- a/include/Utils/sparsepp.h
+++ b/include/Utils/sparsepp.h
@@ -80,758 +80,760 @@
 #define SPP_STORE_NUM_ITEMS 1 // little bit more memory, but faster!!
 
 #if (SPP_GROUP_SIZE == 32)
-    #define SPP_SHIFT_ 5
-    #define SPP_MASK_  0x1F
+#define SPP_SHIFT_ 5
+#define SPP_MASK_  0x1F
 #elif (SPP_GROUP_SIZE == 64)
-    #define SPP_SHIFT_ 6
-    #define SPP_MASK_  0x3F
+#define SPP_SHIFT_ 6
+#define SPP_MASK_  0x3F
 #else
-    #error "SPP_GROUP_SIZE must be either 32 or 64"
+#error "SPP_GROUP_SIZE must be either 32 or 64"
 #endif
 
 // Boost like configuration
 // ------------------------
 #if defined __clang__
 
-    #if defined(i386)
-        #include <cpuid.h>
-        inline void spp_cpuid(int info[4], int InfoType) {
-            __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
-        }
-    #endif
+#if defined(i386)
+#include <cpuid.h>
+inline void spp_cpuid(int info[4], int InfoType)
+{
+    __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+}
+#endif
 
-    #define SPP_POPCNT   __builtin_popcount
-    #define SPP_POPCNT64 __builtin_popcountll
+#define SPP_POPCNT   __builtin_popcount
+#define SPP_POPCNT64 __builtin_popcountll
 
-    #define SPP_HAS_CSTDINT
+#define SPP_HAS_CSTDINT
 
-    #ifndef __has_extension
-        #define __has_extension __has_feature
-    #endif
+#ifndef __has_extension
+#define __has_extension __has_feature
+#endif
 
-    #if !__has_feature(cxx_exceptions) && !defined(SPP_NO_EXCEPTIONS)
-        #define SPP_NO_EXCEPTIONS
-    #endif
+#if !__has_feature(cxx_exceptions) && !defined(SPP_NO_EXCEPTIONS)
+#define SPP_NO_EXCEPTIONS
+#endif
 
-    #if !__has_feature(cxx_rtti) && !defined(SPP_NO_RTTI)
-      #define SPP_NO_RTTI
-    #endif
+#if !__has_feature(cxx_rtti) && !defined(SPP_NO_RTTI)
+#define SPP_NO_RTTI
+#endif
 
-    #if !__has_feature(cxx_rtti) && !defined(SPP_NO_TYPEID)
-        #define SPP_NO_TYPEID
-    #endif
+#if !__has_feature(cxx_rtti) && !defined(SPP_NO_TYPEID)
+#define SPP_NO_TYPEID
+#endif
 
-    #if defined(__int64) && !defined(__GNUC__)
-        #define SPP_HAS_MS_INT64
-    #endif
+#if defined(__int64) && !defined(__GNUC__)
+#define SPP_HAS_MS_INT64
+#endif
 
-    #define SPP_HAS_NRVO
+#define SPP_HAS_NRVO
 
-    // Branch prediction hints
-    #if defined(__has_builtin)
-        #if __has_builtin(__builtin_expect)
-             #define SPP_LIKELY(x) __builtin_expect(x, 1)
-             #define SPP_UNLIKELY(x) __builtin_expect(x, 0)
-        #endif
-    #endif
+// Branch prediction hints
+#if defined(__has_builtin)
+#if __has_builtin(__builtin_expect)
+#define SPP_LIKELY(x) __builtin_expect(x, 1)
+#define SPP_UNLIKELY(x) __builtin_expect(x, 0)
+#endif
+#endif
 
-    // Clang supports "long long" in all compilation modes.
-    #define SPP_HAS_LONG_LONG
+// Clang supports "long long" in all compilation modes.
+#define SPP_HAS_LONG_LONG
 
-    #if !__has_feature(cxx_constexpr)
-        #define SPP_NO_CXX11_CONSTEXPR
-    #endif
+#if !__has_feature(cxx_constexpr)
+#define SPP_NO_CXX11_CONSTEXPR
+#endif
 
-    #if !__has_feature(cxx_decltype)
-        #define SPP_NO_CXX11_DECLTYPE
-    #endif
+#if !__has_feature(cxx_decltype)
+#define SPP_NO_CXX11_DECLTYPE
+#endif
 
-    #if !__has_feature(cxx_decltype_incomplete_return_types)
-        #define SPP_NO_CXX11_DECLTYPE_N3276
-    #endif
+#if !__has_feature(cxx_decltype_incomplete_return_types)
+#define SPP_NO_CXX11_DECLTYPE_N3276
+#endif
 
-    #if !__has_feature(cxx_defaulted_functions)
-        #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
-    #endif
+#if !__has_feature(cxx_defaulted_functions)
+#define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
+#endif
 
-    #if !__has_feature(cxx_deleted_functions)
-        #define SPP_NO_CXX11_DELETED_FUNCTIONS
-    #endif
+#if !__has_feature(cxx_deleted_functions)
+#define SPP_NO_CXX11_DELETED_FUNCTIONS
+#endif
 
-    #if !__has_feature(cxx_explicit_conversions)
-        #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
-    #endif
+#if !__has_feature(cxx_explicit_conversions)
+#define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
+#endif
 
-    #if !__has_feature(cxx_default_function_template_args)
-        #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
-    #endif
+#if !__has_feature(cxx_default_function_template_args)
+#define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
+#endif
 
-    #if !__has_feature(cxx_generalized_initializers)
-        #define SPP_NO_CXX11_HDR_INITIALIZER_LIST
-    #endif
+#if !__has_feature(cxx_generalized_initializers)
+#define SPP_NO_CXX11_HDR_INITIALIZER_LIST
+#endif
 
-    #if !__has_feature(cxx_lambdas)
-        #define SPP_NO_CXX11_LAMBDAS
-    #endif
+#if !__has_feature(cxx_lambdas)
+#define SPP_NO_CXX11_LAMBDAS
+#endif
 
-    #if !__has_feature(cxx_local_type_template_args)
-        #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS
-    #endif
+#if !__has_feature(cxx_local_type_template_args)
+#define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS
+#endif
 
-    #if !__has_feature(cxx_nullptr)
-        #define SPP_NO_CXX11_NULLPTR
-    #endif
+#if !__has_feature(cxx_nullptr)
+#define SPP_NO_CXX11_NULLPTR
+#endif
 
-    #if !__has_feature(cxx_range_for)
-        #define SPP_NO_CXX11_RANGE_BASED_FOR
-    #endif
+#if !__has_feature(cxx_range_for)
+#define SPP_NO_CXX11_RANGE_BASED_FOR
+#endif
 
-    #if !__has_feature(cxx_raw_string_literals)
-        #define SPP_NO_CXX11_RAW_LITERALS
-    #endif
+#if !__has_feature(cxx_raw_string_literals)
+#define SPP_NO_CXX11_RAW_LITERALS
+#endif
 
-    #if !__has_feature(cxx_reference_qualified_functions)
-        #define SPP_NO_CXX11_REF_QUALIFIERS
-    #endif
+#if !__has_feature(cxx_reference_qualified_functions)
+#define SPP_NO_CXX11_REF_QUALIFIERS
+#endif
 
-    #if !__has_feature(cxx_generalized_initializers)
-        #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
-    #endif
+#if !__has_feature(cxx_generalized_initializers)
+#define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
+#endif
 
-    #if !__has_feature(cxx_rvalue_references)
-        #define SPP_NO_CXX11_RVALUE_REFERENCES
-    #endif
+#if !__has_feature(cxx_rvalue_references)
+#define SPP_NO_CXX11_RVALUE_REFERENCES
+#endif
 
-    #if !__has_feature(cxx_strong_enums)
-        #define SPP_NO_CXX11_SCOPED_ENUMS
-    #endif
+#if !__has_feature(cxx_strong_enums)
+#define SPP_NO_CXX11_SCOPED_ENUMS
+#endif
 
-    #if !__has_feature(cxx_static_assert)
-        #define SPP_NO_CXX11_STATIC_ASSERT
-    #endif
+#if !__has_feature(cxx_static_assert)
+#define SPP_NO_CXX11_STATIC_ASSERT
+#endif
 
-    #if !__has_feature(cxx_alias_templates)
-        #define SPP_NO_CXX11_TEMPLATE_ALIASES
-    #endif
+#if !__has_feature(cxx_alias_templates)
+#define SPP_NO_CXX11_TEMPLATE_ALIASES
+#endif
 
-    #if !__has_feature(cxx_unicode_literals)
-        #define SPP_NO_CXX11_UNICODE_LITERALS
-    #endif
+#if !__has_feature(cxx_unicode_literals)
+#define SPP_NO_CXX11_UNICODE_LITERALS
+#endif
 
-    #if !__has_feature(cxx_variadic_templates)
-        #define SPP_NO_CXX11_VARIADIC_TEMPLATES
-    #endif
+#if !__has_feature(cxx_variadic_templates)
+#define SPP_NO_CXX11_VARIADIC_TEMPLATES
+#endif
 
-    #if !__has_feature(cxx_user_literals)
-        #define SPP_NO_CXX11_USER_DEFINED_LITERALS
-    #endif
+#if !__has_feature(cxx_user_literals)
+#define SPP_NO_CXX11_USER_DEFINED_LITERALS
+#endif
 
-    #if !__has_feature(cxx_alignas)
-        #define SPP_NO_CXX11_ALIGNAS
-    #endif
+#if !__has_feature(cxx_alignas)
+#define SPP_NO_CXX11_ALIGNAS
+#endif
 
-    #if !__has_feature(cxx_trailing_return)
-        #define SPP_NO_CXX11_TRAILING_RESULT_TYPES
-    #endif
+#if !__has_feature(cxx_trailing_return)
+#define SPP_NO_CXX11_TRAILING_RESULT_TYPES
+#endif
 
-    #if !__has_feature(cxx_inline_namespaces)
-        #define SPP_NO_CXX11_INLINE_NAMESPACES
-    #endif
+#if !__has_feature(cxx_inline_namespaces)
+#define SPP_NO_CXX11_INLINE_NAMESPACES
+#endif
 
-    #if !__has_feature(cxx_override_control)
-        #define SPP_NO_CXX11_FINAL
-    #endif
+#if !__has_feature(cxx_override_control)
+#define SPP_NO_CXX11_FINAL
+#endif
 
-    #if !(__has_feature(__cxx_binary_literals__) || __has_extension(__cxx_binary_literals__))
-        #define SPP_NO_CXX14_BINARY_LITERALS
-    #endif
+#if !(__has_feature(__cxx_binary_literals__) || __has_extension(__cxx_binary_literals__))
+#define SPP_NO_CXX14_BINARY_LITERALS
+#endif
 
-    #if !__has_feature(__cxx_decltype_auto__)
-        #define SPP_NO_CXX14_DECLTYPE_AUTO
-    #endif
+#if !__has_feature(__cxx_decltype_auto__)
+#define SPP_NO_CXX14_DECLTYPE_AUTO
+#endif
 
-    #if !__has_feature(__cxx_aggregate_nsdmi__)
-        #define SPP_NO_CXX14_AGGREGATE_NSDMI
-    #endif
+#if !__has_feature(__cxx_aggregate_nsdmi__)
+#define SPP_NO_CXX14_AGGREGATE_NSDMI
+#endif
 
-    #if !__has_feature(__cxx_init_captures__)
-        #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
-    #endif
+#if !__has_feature(__cxx_init_captures__)
+#define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
+#endif
 
-    #if !__has_feature(__cxx_generic_lambdas__)
-        #define SPP_NO_CXX14_GENERIC_LAMBDAS
-    #endif
+#if !__has_feature(__cxx_generic_lambdas__)
+#define SPP_NO_CXX14_GENERIC_LAMBDAS
+#endif
 
 
-    #if !__has_feature(__cxx_generic_lambdas__) || !__has_feature(__cxx_relaxed_constexpr__)
-        #define SPP_NO_CXX14_CONSTEXPR
-    #endif
+#if !__has_feature(__cxx_generic_lambdas__) || !__has_feature(__cxx_relaxed_constexpr__)
+#define SPP_NO_CXX14_CONSTEXPR
+#endif
 
-    #if !__has_feature(__cxx_return_type_deduction__)
-        #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
-    #endif
+#if !__has_feature(__cxx_return_type_deduction__)
+#define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
+#endif
 
-    #if !__has_feature(__cxx_variable_templates__)
-        #define SPP_NO_CXX14_VARIABLE_TEMPLATES
-    #endif
+#if !__has_feature(__cxx_variable_templates__)
+#define SPP_NO_CXX14_VARIABLE_TEMPLATES
+#endif
 
-    #if __cplusplus < 201400
-        #define SPP_NO_CXX14_DIGIT_SEPARATORS
-    #endif
+#if __cplusplus < 201400
+#define SPP_NO_CXX14_DIGIT_SEPARATORS
+#endif
 
-    #if defined(__has_builtin) && __has_builtin(__builtin_unreachable)
-      #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable();
-    #endif
+#if defined(__has_builtin) && __has_builtin(__builtin_unreachable)
+#define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable();
+#endif
 
-    #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__))
+#define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__))
 
-    #ifndef SPP_COMPILER
-        #define SPP_COMPILER "Clang version " __clang_version__
-    #endif
+#ifndef SPP_COMPILER
+#define SPP_COMPILER "Clang version " __clang_version__
+#endif
 
-    #define SPP_CLANG 1
+#define SPP_CLANG 1
 
 
 #elif defined __GNUC__
 
-    #define SPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#define SPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
 
-    //  definition to expand macro then apply to pragma message
-    // #define VALUE_TO_STRING(x) #x
-    // #define VALUE(x) VALUE_TO_STRING(x)
-    // #define VAR_NAME_VALUE(var) #var "="  VALUE(var)
-    // #pragma message(VAR_NAME_VALUE(SPP_GCC_VERSION))
+//  definition to expand macro then apply to pragma message
+// #define VALUE_TO_STRING(x) #x
+// #define VALUE(x) VALUE_TO_STRING(x)
+// #define VAR_NAME_VALUE(var) #var "="  VALUE(var)
+// #pragma message(VAR_NAME_VALUE(SPP_GCC_VERSION))
 
-    #if defined(i386)
-        #include <cpuid.h>
-        inline void spp_cpuid(int info[4], int InfoType) {
-            __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
-        }
-    #endif
+#if defined(i386)
+#include <cpuid.h>
+inline void spp_cpuid(int info[4], int InfoType)
+{
+    __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+}
+#endif
 
-    // __POPCNT__ defined when the compiled with popcount support
-    // (-mpopcnt compiler option is given for example)
-    #ifdef __POPCNT__
-        // slower unless compiled iwith -mpopcnt
-        #define SPP_POPCNT   __builtin_popcount
-        #define SPP_POPCNT64 __builtin_popcountll
-    #endif
+// __POPCNT__ defined when the compiled with popcount support
+// (-mpopcnt compiler option is given for example)
+#ifdef __POPCNT__
+// slower unless compiled iwith -mpopcnt
+#define SPP_POPCNT   __builtin_popcount
+#define SPP_POPCNT64 __builtin_popcountll
+#endif
 
-    #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L)
-        #define SPP_GCC_CXX11
-    #endif
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L)
+#define SPP_GCC_CXX11
+#endif
 
-    #if __GNUC__ == 3
-        #if defined (__PATHSCALE__)
-             #define SPP_NO_TWO_PHASE_NAME_LOOKUP
-             #define SPP_NO_IS_ABSTRACT
-        #endif
+#if __GNUC__ == 3
+#if defined (__PATHSCALE__)
+#define SPP_NO_TWO_PHASE_NAME_LOOKUP
+#define SPP_NO_IS_ABSTRACT
+#endif
 
-        #if __GNUC_MINOR__ < 4
-             #define SPP_NO_IS_ABSTRACT
-        #endif
+#if __GNUC_MINOR__ < 4
+#define SPP_NO_IS_ABSTRACT
+#endif
 
-        #define SPP_NO_CXX11_EXTERN_TEMPLATE
-    #endif
+#define SPP_NO_CXX11_EXTERN_TEMPLATE
+#endif
 
-    #if __GNUC__ < 4
-    //
-    // All problems to gcc-3.x and earlier here:
-    //
-    #define SPP_NO_TWO_PHASE_NAME_LOOKUP
-        #ifdef __OPEN64__
-            #define SPP_NO_IS_ABSTRACT
-        #endif
-    #endif
-
-    // GCC prior to 3.4 had     #pragma once too but it didn't work well with filesystem links
-    #if SPP_GCC_VERSION >= 30400
-        #define SPP_HAS_PRAGMA_ONCE
-    #endif
-
-    #if SPP_GCC_VERSION < 40400
-        // Previous versions of GCC did not completely implement value-initialization:
-        // GCC Bug 30111, "Value-initialization of POD base class doesn't initialize
-        // members", reported by Jonathan Wakely in 2006,
-        // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30111 (fixed for GCC 4.4)
-        // GCC Bug 33916, "Default constructor fails to initialize array members",
-        // reported by Michael Elizabeth Chastain in 2007,
-        // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33916 (fixed for GCC 4.2.4)
-        // See also: http://www.boost.org/libs/utility/value_init.htm    #compiler_issues
-        #define SPP_NO_COMPLETE_VALUE_INITIALIZATION
-    #endif
-
-    #if !defined(__EXCEPTIONS) && !defined(SPP_NO_EXCEPTIONS)
-        #define SPP_NO_EXCEPTIONS
-    #endif
+#if __GNUC__ < 4
+//
+// All problems to gcc-3.x and earlier here:
+//
+#define SPP_NO_TWO_PHASE_NAME_LOOKUP
+#ifdef __OPEN64__
+#define SPP_NO_IS_ABSTRACT
+#endif
+#endif
 
-    //
-    // Threading support: Turn this on unconditionally here (except for
-    // those platforms where we can know for sure). It will get turned off again
-    // later if no threading API is detected.
-    //
-    #if !defined(__MINGW32__) && !defined(linux) && !defined(__linux) && !defined(__linux__)
-        #define SPP_HAS_THREADS
-    #endif
+// GCC prior to 3.4 had     #pragma once too but it didn't work well with filesystem links
+#if SPP_GCC_VERSION >= 30400
+#define SPP_HAS_PRAGMA_ONCE
+#endif
 
-    //
-    // gcc has "long long"
-    // Except on Darwin with standard compliance enabled (-pedantic)
-    // Apple gcc helpfully defines this macro we can query
-    //
-    #if !defined(__DARWIN_NO_LONG_LONG)
-        #define SPP_HAS_LONG_LONG
-    #endif
+#if SPP_GCC_VERSION < 40400
+// Previous versions of GCC did not completely implement value-initialization:
+// GCC Bug 30111, "Value-initialization of POD base class doesn't initialize
+// members", reported by Jonathan Wakely in 2006,
+// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30111 (fixed for GCC 4.4)
+// GCC Bug 33916, "Default constructor fails to initialize array members",
+// reported by Michael Elizabeth Chastain in 2007,
+// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33916 (fixed for GCC 4.2.4)
+// See also: http://www.boost.org/libs/utility/value_init.htm    #compiler_issues
+#define SPP_NO_COMPLETE_VALUE_INITIALIZATION
+#endif
 
-    //
-    // gcc implements the named return value optimization since version 3.1
-    //
-    #define SPP_HAS_NRVO
+#if !defined(__EXCEPTIONS) && !defined(SPP_NO_EXCEPTIONS)
+#define SPP_NO_EXCEPTIONS
+#endif
 
-    // Branch prediction hints
-    #define SPP_LIKELY(x) __builtin_expect(x, 1)
-    #define SPP_UNLIKELY(x) __builtin_expect(x, 0)
+//
+// Threading support: Turn this on unconditionally here (except for
+// those platforms where we can know for sure). It will get turned off again
+// later if no threading API is detected.
+//
+#if !defined(__MINGW32__) && !defined(linux) && !defined(__linux) && !defined(__linux__)
+#define SPP_HAS_THREADS
+#endif
 
-    //
-    // Dynamic shared object (DSO) and dynamic-link library (DLL) support
-    //
-    #if __GNUC__ >= 4
-       #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) && !defined(__CYGWIN__)
-            // All Win32 development environments, including 64-bit Windows and MinGW, define
-            // _WIN32 or one of its variant spellings. Note that Cygwin is a POSIX environment,
-            // so does not define _WIN32 or its variants.
-            #define SPP_HAS_DECLSPEC
-            #define SPP_SYMBOL_EXPORT __attribute__((__dllexport__))
-            #define SPP_SYMBOL_IMPORT __attribute__((__dllimport__))
-       #else
-            #define SPP_SYMBOL_EXPORT __attribute__((__visibility__("default")))
-            #define SPP_SYMBOL_IMPORT
-       #endif
-
-       #define SPP_SYMBOL_VISIBLE __attribute__((__visibility__("default")))
-    #else
-       // config/platform/win32.hpp will define SPP_SYMBOL_EXPORT, etc., unless already defined
-       #define SPP_SYMBOL_EXPORT
-    #endif
+//
+// gcc has "long long"
+// Except on Darwin with standard compliance enabled (-pedantic)
+// Apple gcc helpfully defines this macro we can query
+//
+#if !defined(__DARWIN_NO_LONG_LONG)
+#define SPP_HAS_LONG_LONG
+#endif
 
-    //
-    // RTTI and typeinfo detection is possible post gcc-4.3:
-    //
-    #if SPP_GCC_VERSION > 40300
-        #ifndef __GXX_RTTI
-            #ifndef SPP_NO_TYPEID
-                #define SPP_NO_TYPEID
-            #endif
-            #ifndef SPP_NO_RTTI
-                #define SPP_NO_RTTI
-            #endif
-        #endif
-    #endif
+//
+// gcc implements the named return value optimization since version 3.1
+//
+#define SPP_HAS_NRVO
 
-    //
-    // Recent GCC versions have __int128 when in 64-bit mode.
-    //
-    // We disable this if the compiler is really nvcc with C++03 as it
-    // doesn't actually support __int128 as of CUDA_VERSION=7500
-    // even though it defines __SIZEOF_INT128__.
-    // See https://svn.boost.org/trac/boost/ticket/8048
-    //     https://svn.boost.org/trac/boost/ticket/11852
-    // Only re-enable this for nvcc if you're absolutely sure
-    // of the circumstances under which it's supported:
-    //
-    #if defined(__CUDACC__)
-        #if defined(SPP_GCC_CXX11)
-            #define SPP_NVCC_CXX11
-        #else
-            #define SPP_NVCC_CXX03
-        #endif
-    #endif
-
-    #if defined(__SIZEOF_INT128__) && !defined(SPP_NVCC_CXX03)
-        #define SPP_HAS_INT128
-    #endif
-    //
-    // Recent GCC versions have a __float128 native type, we need to
-    // include a std lib header to detect this - not ideal, but we'll
-    // be including <cstddef> later anyway when we select the std lib.
-    //
-    // Nevertheless, as of CUDA 7.5, using __float128 with the host
-    // compiler in pre-C++11 mode is still not supported.
-    // See https://svn.boost.org/trac/boost/ticket/11852
-    //
-    #ifdef __cplusplus
-        #include <cstddef>
-    #else
-        #include <stddef.h>
-    #endif
+// Branch prediction hints
+#define SPP_LIKELY(x) __builtin_expect(x, 1)
+#define SPP_UNLIKELY(x) __builtin_expect(x, 0)
 
-    #if defined(_GLIBCXX_USE_FLOAT128) && !defined(__STRICT_ANSI__) && !defined(SPP_NVCC_CXX03)
-         #define SPP_HAS_FLOAT128
-    #endif
+//
+// Dynamic shared object (DSO) and dynamic-link library (DLL) support
+//
+#if __GNUC__ >= 4
+#if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) && !defined(__CYGWIN__)
+// All Win32 development environments, including 64-bit Windows and MinGW, define
+// _WIN32 or one of its variant spellings. Note that Cygwin is a POSIX environment,
+// so does not define _WIN32 or its variants.
+#define SPP_HAS_DECLSPEC
+#define SPP_SYMBOL_EXPORT __attribute__((__dllexport__))
+#define SPP_SYMBOL_IMPORT __attribute__((__dllimport__))
+#else
+#define SPP_SYMBOL_EXPORT __attribute__((__visibility__("default")))
+#define SPP_SYMBOL_IMPORT
+#endif
 
-    // C++0x features in 4.3.n and later
-    //
-    #if (SPP_GCC_VERSION >= 40300) && defined(SPP_GCC_CXX11)
-       // C++0x features are only enabled when -std=c++0x or -std=gnu++0x are
-       // passed on the command line, which in turn defines
-       // __GXX_EXPERIMENTAL_CXX0X__.
-       #define SPP_HAS_DECLTYPE
-       #define SPP_HAS_RVALUE_REFS
-       #define SPP_HAS_STATIC_ASSERT
-       #define SPP_HAS_VARIADIC_TMPL
-       #define SPP_HAS_CSTDINT
-    #else
-       #define SPP_NO_CXX11_DECLTYPE
-       #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
-       #define SPP_NO_CXX11_RVALUE_REFERENCES
-       #define SPP_NO_CXX11_STATIC_ASSERT
-    #endif
-
-    // C++0x features in 4.4.n and later
-    //
-    #if (SPP_GCC_VERSION < 40400) || !defined(SPP_GCC_CXX11)
-       #define SPP_NO_CXX11_AUTO_DECLARATIONS
-       #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS
-       #define SPP_NO_CXX11_CHAR16_T
-       #define SPP_NO_CXX11_CHAR32_T
-       #define SPP_NO_CXX11_HDR_INITIALIZER_LIST
-       #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
-       #define SPP_NO_CXX11_DELETED_FUNCTIONS
-       #define SPP_NO_CXX11_TRAILING_RESULT_TYPES
-       #define SPP_NO_CXX11_INLINE_NAMESPACES
-       #define SPP_NO_CXX11_VARIADIC_TEMPLATES
-    #endif
-
-    #if SPP_GCC_VERSION < 40500
-       #define SPP_NO_SFINAE_EXPR
-    #endif
-
-    // GCC 4.5 forbids declaration of defaulted functions in private or protected sections
-    #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 5) || !defined(SPP_GCC_CXX11)
-       #define SPP_NO_CXX11_NON_PUBLIC_DEFAULTED_FUNCTIONS
-    #endif
-
-    // C++0x features in 4.5.0 and later
-    //
-    #if (SPP_GCC_VERSION < 40500) || !defined(SPP_GCC_CXX11)
-       #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
-       #define SPP_NO_CXX11_LAMBDAS
-       #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS
-       #define SPP_NO_CXX11_RAW_LITERALS
-       #define SPP_NO_CXX11_UNICODE_LITERALS
-    #endif
-
-    // C++0x features in 4.5.1 and later
-    //
-    #if (SPP_GCC_VERSION < 40501) || !defined(SPP_GCC_CXX11)
-       // scoped enums have a serious bug in 4.4.0, so define SPP_NO_CXX11_SCOPED_ENUMS before 4.5.1
-       // See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38064
-       #define SPP_NO_CXX11_SCOPED_ENUMS
-    #endif
+#define SPP_SYMBOL_VISIBLE __attribute__((__visibility__("default")))
+#else
+// config/platform/win32.hpp will define SPP_SYMBOL_EXPORT, etc., unless already defined
+#define SPP_SYMBOL_EXPORT
+#endif
 
-    // C++0x features in 4.6.n and later
-    //
-    #if (SPP_GCC_VERSION < 40600) || !defined(SPP_GCC_CXX11)
-        #define SPP_NO_CXX11_CONSTEXPR
-        #define SPP_NO_CXX11_NULLPTR
-        #define SPP_NO_CXX11_RANGE_BASED_FOR
-        #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
-    #endif
-
-    // C++0x features in 4.7.n and later
-    //
-    #if (SPP_GCC_VERSION < 40700) || !defined(SPP_GCC_CXX11)
-        #define SPP_NO_CXX11_FINAL
-        #define SPP_NO_CXX11_TEMPLATE_ALIASES
-        #define SPP_NO_CXX11_USER_DEFINED_LITERALS
-        #define SPP_NO_CXX11_FIXED_LENGTH_VARIADIC_TEMPLATE_EXPANSION_PACKS
-    #endif
-
-    // C++0x features in 4.8.n and later
-    //
-    #if (SPP_GCC_VERSION < 40800) || !defined(SPP_GCC_CXX11)
-        #define SPP_NO_CXX11_ALIGNAS
-    #endif
+//
+// RTTI and typeinfo detection is possible post gcc-4.3:
+//
+#if SPP_GCC_VERSION > 40300
+#ifndef __GXX_RTTI
+#ifndef SPP_NO_TYPEID
+#define SPP_NO_TYPEID
+#endif
+#ifndef SPP_NO_RTTI
+#define SPP_NO_RTTI
+#endif
+#endif
+#endif
 
-    // C++0x features in 4.8.1 and later
-    //
-    #if (SPP_GCC_VERSION < 40801) || !defined(SPP_GCC_CXX11)
-        #define SPP_NO_CXX11_DECLTYPE_N3276
-        #define SPP_NO_CXX11_REF_QUALIFIERS
-        #define SPP_NO_CXX14_BINARY_LITERALS
-    #endif
+//
+// Recent GCC versions have __int128 when in 64-bit mode.
+//
+// We disable this if the compiler is really nvcc with C++03 as it
+// doesn't actually support __int128 as of CUDA_VERSION=7500
+// even though it defines __SIZEOF_INT128__.
+// See https://svn.boost.org/trac/boost/ticket/8048
+//     https://svn.boost.org/trac/boost/ticket/11852
+// Only re-enable this for nvcc if you're absolutely sure
+// of the circumstances under which it's supported:
+//
+#if defined(__CUDACC__)
+#if defined(SPP_GCC_CXX11)
+#define SPP_NVCC_CXX11
+#else
+#define SPP_NVCC_CXX03
+#endif
+#endif
 
-    // C++14 features in 4.9.0 and later
-    //
-    #if (SPP_GCC_VERSION < 40900) || (__cplusplus < 201300)
-        #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
-        #define SPP_NO_CXX14_GENERIC_LAMBDAS
-        #define SPP_NO_CXX14_DIGIT_SEPARATORS
-        #define SPP_NO_CXX14_DECLTYPE_AUTO
-        #if !((SPP_GCC_VERSION >= 40801) && (SPP_GCC_VERSION < 40900) && defined(SPP_GCC_CXX11))
-            #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
-        #endif
-    #endif
-
-
-    // C++ 14:
-    #if !defined(__cpp_aggregate_nsdmi) || (__cpp_aggregate_nsdmi < 201304)
-        #define SPP_NO_CXX14_AGGREGATE_NSDMI
-    #endif
-    #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304)
-        #define SPP_NO_CXX14_CONSTEXPR
-    #endif
-    #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304)
-        #define SPP_NO_CXX14_VARIABLE_TEMPLATES
-    #endif
+#if defined(__SIZEOF_INT128__) && !defined(SPP_NVCC_CXX03)
+#define SPP_HAS_INT128
+#endif
+//
+// Recent GCC versions have a __float128 native type, we need to
+// include a std lib header to detect this - not ideal, but we'll
+// be including <cstddef> later anyway when we select the std lib.
+//
+// Nevertheless, as of CUDA 7.5, using __float128 with the host
+// compiler in pre-C++11 mode is still not supported.
+// See https://svn.boost.org/trac/boost/ticket/11852
+//
+#ifdef __cplusplus
+#include <cstddef>
+#else
+#include <stddef.h>
+#endif
+
+#if defined(_GLIBCXX_USE_FLOAT128) && !defined(__STRICT_ANSI__) && !defined(SPP_NVCC_CXX03)
+#define SPP_HAS_FLOAT128
+#endif
+
+// C++0x features in 4.3.n and later
+//
+#if (SPP_GCC_VERSION >= 40300) && defined(SPP_GCC_CXX11)
+// C++0x features are only enabled when -std=c++0x or -std=gnu++0x are
+// passed on the command line, which in turn defines
+// __GXX_EXPERIMENTAL_CXX0X__.
+#define SPP_HAS_DECLTYPE
+#define SPP_HAS_RVALUE_REFS
+#define SPP_HAS_STATIC_ASSERT
+#define SPP_HAS_VARIADIC_TMPL
+#define SPP_HAS_CSTDINT
+#else
+#define SPP_NO_CXX11_DECLTYPE
+#define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
+#define SPP_NO_CXX11_RVALUE_REFERENCES
+#define SPP_NO_CXX11_STATIC_ASSERT
+#endif
+
+// C++0x features in 4.4.n and later
+//
+#if (SPP_GCC_VERSION < 40400) || !defined(SPP_GCC_CXX11)
+#define SPP_NO_CXX11_AUTO_DECLARATIONS
+#define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS
+#define SPP_NO_CXX11_CHAR16_T
+#define SPP_NO_CXX11_CHAR32_T
+#define SPP_NO_CXX11_HDR_INITIALIZER_LIST
+#define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
+#define SPP_NO_CXX11_DELETED_FUNCTIONS
+#define SPP_NO_CXX11_TRAILING_RESULT_TYPES
+#define SPP_NO_CXX11_INLINE_NAMESPACES
+#define SPP_NO_CXX11_VARIADIC_TEMPLATES
+#endif
+
+#if SPP_GCC_VERSION < 40500
+#define SPP_NO_SFINAE_EXPR
+#endif
+
+// GCC 4.5 forbids declaration of defaulted functions in private or protected sections
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 5) || !defined(SPP_GCC_CXX11)
+#define SPP_NO_CXX11_NON_PUBLIC_DEFAULTED_FUNCTIONS
+#endif
+
+// C++0x features in 4.5.0 and later
+//
+#if (SPP_GCC_VERSION < 40500) || !defined(SPP_GCC_CXX11)
+#define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
+#define SPP_NO_CXX11_LAMBDAS
+#define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS
+#define SPP_NO_CXX11_RAW_LITERALS
+#define SPP_NO_CXX11_UNICODE_LITERALS
+#endif
+
+// C++0x features in 4.5.1 and later
+//
+#if (SPP_GCC_VERSION < 40501) || !defined(SPP_GCC_CXX11)
+// scoped enums have a serious bug in 4.4.0, so define SPP_NO_CXX11_SCOPED_ENUMS before 4.5.1
+// See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38064
+#define SPP_NO_CXX11_SCOPED_ENUMS
+#endif
+
+// C++0x features in 4.6.n and later
+//
+#if (SPP_GCC_VERSION < 40600) || !defined(SPP_GCC_CXX11)
+#define SPP_NO_CXX11_CONSTEXPR
+#define SPP_NO_CXX11_NULLPTR
+#define SPP_NO_CXX11_RANGE_BASED_FOR
+#define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
+#endif
+
+// C++0x features in 4.7.n and later
+//
+#if (SPP_GCC_VERSION < 40700) || !defined(SPP_GCC_CXX11)
+#define SPP_NO_CXX11_FINAL
+#define SPP_NO_CXX11_TEMPLATE_ALIASES
+#define SPP_NO_CXX11_USER_DEFINED_LITERALS
+#define SPP_NO_CXX11_FIXED_LENGTH_VARIADIC_TEMPLATE_EXPANSION_PACKS
+#endif
+
+// C++0x features in 4.8.n and later
+//
+#if (SPP_GCC_VERSION < 40800) || !defined(SPP_GCC_CXX11)
+#define SPP_NO_CXX11_ALIGNAS
+#endif
+
+// C++0x features in 4.8.1 and later
+//
+#if (SPP_GCC_VERSION < 40801) || !defined(SPP_GCC_CXX11)
+#define SPP_NO_CXX11_DECLTYPE_N3276
+#define SPP_NO_CXX11_REF_QUALIFIERS
+#define SPP_NO_CXX14_BINARY_LITERALS
+#endif
+
+// C++14 features in 4.9.0 and later
+//
+#if (SPP_GCC_VERSION < 40900) || (__cplusplus < 201300)
+#define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
+#define SPP_NO_CXX14_GENERIC_LAMBDAS
+#define SPP_NO_CXX14_DIGIT_SEPARATORS
+#define SPP_NO_CXX14_DECLTYPE_AUTO
+#if !((SPP_GCC_VERSION >= 40801) && (SPP_GCC_VERSION < 40900) && defined(SPP_GCC_CXX11))
+#define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
+#endif
+#endif
 
-    //
-    // Unused attribute:
-    #if __GNUC__ >= 4
-        #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__))
-    #endif
-    //
-    // __builtin_unreachable:
-    #if SPP_GCC_VERSION >= 40800
-        #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable();
-    #endif
 
-    #ifndef SPP_COMPILER
-        #define SPP_COMPILER "GNU C++ version " __VERSION__
-    #endif
+// C++ 14:
+#if !defined(__cpp_aggregate_nsdmi) || (__cpp_aggregate_nsdmi < 201304)
+#define SPP_NO_CXX14_AGGREGATE_NSDMI
+#endif
+#if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304)
+#define SPP_NO_CXX14_CONSTEXPR
+#endif
+#if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304)
+#define SPP_NO_CXX14_VARIABLE_TEMPLATES
+#endif
+
+//
+// Unused attribute:
+#if __GNUC__ >= 4
+#define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__))
+#endif
+//
+// __builtin_unreachable:
+#if SPP_GCC_VERSION >= 40800
+#define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable();
+#endif
 
-    // ConceptGCC compiler:
-    //   http://www.generic-programming.org/software/ConceptGCC/
-    #ifdef __GXX_CONCEPTS__
-        #define SPP_HAS_CONCEPTS
-        #define SPP_COMPILER "ConceptGCC version " __VERSION__
-    #endif
+#ifndef SPP_COMPILER
+#define SPP_COMPILER "GNU C++ version " __VERSION__
+#endif
+
+// ConceptGCC compiler:
+//   http://www.generic-programming.org/software/ConceptGCC/
+#ifdef __GXX_CONCEPTS__
+#define SPP_HAS_CONCEPTS
+#define SPP_COMPILER "ConceptGCC version " __VERSION__
+#endif
 
 
 #elif defined _MSC_VER
 
-    #include <intrin.h>                     // for __popcnt()
-
-    #define SPP_POPCNT_CHECK  // slower when defined, but we have to check!
-    #define spp_cpuid(info, x)    __cpuid(info, x)
-
-    #define SPP_POPCNT __popcnt
-    #if (SPP_GROUP_SIZE == 64 && INTPTR_MAX == INT64_MAX)
-        #define SPP_POPCNT64 __popcnt64
-    #endif
-
-    // Attempt to suppress VC6 warnings about the length of decorated names (obsolete):
-    #pragma warning( disable : 4503 ) // warning: decorated name length exceeded
-
-    #define SPP_HAS_PRAGMA_ONCE
-    #define SPP_HAS_CSTDINT
-
-   //
-    // versions check:
-    // we don't support Visual C++ prior to version 7.1:
-    #if _MSC_VER < 1310
-        #error "Antique compiler not supported"
-    #endif
-
-    #if _MSC_FULL_VER < 180020827
-        #define SPP_NO_FENV_H
-    #endif
-
-    #if _MSC_VER < 1400
-        // although a conforming signature for swprint exists in VC7.1
-        // it appears not to actually work:
-        #define SPP_NO_SWPRINTF
-
-        // Our extern template tests also fail for this compiler:
-        #define SPP_NO_CXX11_EXTERN_TEMPLATE
-
-        // Variadic macros do not exist for VC7.1 and lower
-        #define SPP_NO_CXX11_VARIADIC_MACROS
-    #endif
-
-    #if _MSC_VER < 1500  // 140X == VC++ 8.0
-        #undef SPP_HAS_CSTDINT
-        #define SPP_NO_MEMBER_TEMPLATE_FRIENDS
-    #endif
-
-    #if _MSC_VER < 1600  // 150X == VC++ 9.0
-        // A bug in VC9:
-        #define SPP_NO_ADL_BARRIER
-    #endif
-
-
-    // MSVC (including the latest checked version) has not yet completely
-    // implemented value-initialization, as is reported:
-    // "VC++ does not value-initialize members of derived classes without
-    // user-declared constructor", reported in 2009 by Sylvester Hesp:
-    // https:    //connect.microsoft.com/VisualStudio/feedback/details/484295
-    // "Presence of copy constructor breaks member class initialization",
-    // reported in 2009 by Alex Vakulenko:
-    // https:    //connect.microsoft.com/VisualStudio/feedback/details/499606
-    // "Value-initialization in new-expression", reported in 2005 by
-    // Pavel Kuznetsov (MetaCommunications Engineering):
-    // https:    //connect.microsoft.com/VisualStudio/feedback/details/100744
-    // See also: http:    //www.boost.org/libs/utility/value_init.htm    #compiler_issues
-    // (Niels Dekker, LKEB, May 2010)
-    #define SPP_NO_COMPLETE_VALUE_INITIALIZATION
-
-    #ifndef _NATIVE_WCHAR_T_DEFINED
-        #define SPP_NO_INTRINSIC_WCHAR_T
-    #endif
+#include <intrin.h>                     // for __popcnt()
 
-    //
-    // check for exception handling support:
-    #if !defined(_CPPUNWIND) && !defined(SPP_NO_EXCEPTIONS)
-        #define SPP_NO_EXCEPTIONS
-    #endif
+#define SPP_POPCNT_CHECK  // slower when defined, but we have to check!
+#define spp_cpuid(info, x)    __cpuid(info, x)
 
-    //
-    // __int64 support:
-    //
-    #define SPP_HAS_MS_INT64
-    #if defined(_MSC_EXTENSIONS) || (_MSC_VER >= 1400)
-        #define SPP_HAS_LONG_LONG
-    #else
-        #define SPP_NO_LONG_LONG
-    #endif
+#define SPP_POPCNT __popcnt
+#if (SPP_GROUP_SIZE == 64 && INTPTR_MAX == INT64_MAX)
+#define SPP_POPCNT64 __popcnt64
+#endif
 
-    #if (_MSC_VER >= 1400) && !defined(_DEBUG)
-        #define SPP_HAS_NRVO
-    #endif
+// Attempt to suppress VC6 warnings about the length of decorated names (obsolete):
+#pragma warning( disable : 4503 ) // warning: decorated name length exceeded
 
-    #if _MSC_VER >= 1500  // 150X == VC++ 9.0
-        #define SPP_HAS_PRAGMA_DETECT_MISMATCH
-    #endif
+#define SPP_HAS_PRAGMA_ONCE
+#define SPP_HAS_CSTDINT
 
-    //
-    // disable Win32 API's if compiler extensions are
-    // turned off:
-    //
-    #if !defined(_MSC_EXTENSIONS) && !defined(SPP_DISABLE_WIN32)
-        #define SPP_DISABLE_WIN32
-    #endif
+//
+// versions check:
+// we don't support Visual C++ prior to version 7.1:
+#if _MSC_VER < 1310
+#error "Antique compiler not supported"
+#endif
 
-    #if !defined(_CPPRTTI) && !defined(SPP_NO_RTTI)
-        #define SPP_NO_RTTI
-    #endif
+#if _MSC_FULL_VER < 180020827
+#define SPP_NO_FENV_H
+#endif
 
-    //
-    // TR1 features:
-    //
-    #if _MSC_VER >= 1700
-        //      #define SPP_HAS_TR1_HASH	// don't know if this is true yet.
-        //      #define SPP_HAS_TR1_TYPE_TRAITS	// don't know if this is true yet.
-        #define SPP_HAS_TR1_UNORDERED_MAP
-        #define SPP_HAS_TR1_UNORDERED_SET
-    #endif
+#if _MSC_VER < 1400
+// although a conforming signature for swprint exists in VC7.1
+// it appears not to actually work:
+#define SPP_NO_SWPRINTF
 
-    //
-    // C++0x features
-    //
-    //   See above for SPP_NO_LONG_LONG
+// Our extern template tests also fail for this compiler:
+#define SPP_NO_CXX11_EXTERN_TEMPLATE
 
-    // C++ features supported by VC++ 10 (aka 2010)
-    //
-    #if _MSC_VER < 1600
-        #define SPP_NO_CXX11_AUTO_DECLARATIONS
-        #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS
-        #define SPP_NO_CXX11_LAMBDAS
-        #define SPP_NO_CXX11_RVALUE_REFERENCES
-        #define SPP_NO_CXX11_STATIC_ASSERT
-        #define SPP_NO_CXX11_NULLPTR
-        #define SPP_NO_CXX11_DECLTYPE
-    #endif // _MSC_VER < 1600
-
-    #if _MSC_VER >= 1600
-        #define SPP_HAS_STDINT_H
-    #endif
-
-    // C++11 features supported by VC++ 11 (aka 2012)
-    //
-    #if _MSC_VER < 1700
-        #define SPP_NO_CXX11_FINAL
-        #define SPP_NO_CXX11_RANGE_BASED_FOR
-        #define SPP_NO_CXX11_SCOPED_ENUMS
-    #endif // _MSC_VER < 1700
+// Variadic macros do not exist for VC7.1 and lower
+#define SPP_NO_CXX11_VARIADIC_MACROS
+#endif
+
+#if _MSC_VER < 1500  // 140X == VC++ 8.0
+#undef SPP_HAS_CSTDINT
+#define SPP_NO_MEMBER_TEMPLATE_FRIENDS
+#endif
+
+#if _MSC_VER < 1600  // 150X == VC++ 9.0
+// A bug in VC9:
+#define SPP_NO_ADL_BARRIER
+#endif
 
-    // C++11 features supported by VC++ 12 (aka 2013).
-    //
-    #if _MSC_FULL_VER < 180020827
-        #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
-        #define SPP_NO_CXX11_DELETED_FUNCTIONS
-        #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
-        #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
-        #define SPP_NO_CXX11_RAW_LITERALS
-        #define SPP_NO_CXX11_TEMPLATE_ALIASES
-        #define SPP_NO_CXX11_TRAILING_RESULT_TYPES
-        #define SPP_NO_CXX11_VARIADIC_TEMPLATES
-        #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
-        #define SPP_NO_CXX11_DECLTYPE_N3276
-    #endif
-
-    // C++11 features supported by VC++ 14 (aka 2014) CTP1
-    #if (_MSC_FULL_VER < 190021730)
-        #define SPP_NO_CXX11_REF_QUALIFIERS
-        #define SPP_NO_CXX11_USER_DEFINED_LITERALS
-        #define SPP_NO_CXX11_ALIGNAS
-        #define SPP_NO_CXX11_INLINE_NAMESPACES
-        #define SPP_NO_CXX14_DECLTYPE_AUTO
-        #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
-        #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
-        #define SPP_NO_CXX11_HDR_INITIALIZER_LIST
-    #endif
-
-    // C++11 features not supported by any versions
-    #define SPP_NO_CXX11_CHAR16_T
-    #define SPP_NO_CXX11_CHAR32_T
-    #define SPP_NO_CXX11_CONSTEXPR
-    #define SPP_NO_CXX11_UNICODE_LITERALS
-    #define SPP_NO_SFINAE_EXPR
-    #define SPP_NO_TWO_PHASE_NAME_LOOKUP
-
-    // C++ 14:
-    #if !defined(__cpp_aggregate_nsdmi) || (__cpp_aggregate_nsdmi < 201304)
-        #define SPP_NO_CXX14_AGGREGATE_NSDMI
-    #endif
-
-    #if !defined(__cpp_binary_literals) || (__cpp_binary_literals < 201304)
-        #define SPP_NO_CXX14_BINARY_LITERALS
-    #endif
-
-    #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304)
-        #define SPP_NO_CXX14_CONSTEXPR
-    #endif
-
-    #if (__cplusplus < 201304) // There's no SD6 check for this....
-        #define SPP_NO_CXX14_DIGIT_SEPARATORS
-    #endif
-
-    #if !defined(__cpp_generic_lambdas) || (__cpp_generic_lambdas < 201304)
-        #define SPP_NO_CXX14_GENERIC_LAMBDAS
-    #endif
-
-    #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304)
-         #define SPP_NO_CXX14_VARIABLE_TEMPLATES
-    #endif
+
+// MSVC (including the latest checked version) has not yet completely
+// implemented value-initialization, as is reported:
+// "VC++ does not value-initialize members of derived classes without
+// user-declared constructor", reported in 2009 by Sylvester Hesp:
+// https:    //connect.microsoft.com/VisualStudio/feedback/details/484295
+// "Presence of copy constructor breaks member class initialization",
+// reported in 2009 by Alex Vakulenko:
+// https:    //connect.microsoft.com/VisualStudio/feedback/details/499606
+// "Value-initialization in new-expression", reported in 2005 by
+// Pavel Kuznetsov (MetaCommunications Engineering):
+// https:    //connect.microsoft.com/VisualStudio/feedback/details/100744
+// See also: http:    //www.boost.org/libs/utility/value_init.htm    #compiler_issues
+// (Niels Dekker, LKEB, May 2010)
+#define SPP_NO_COMPLETE_VALUE_INITIALIZATION
+
+#ifndef _NATIVE_WCHAR_T_DEFINED
+#define SPP_NO_INTRINSIC_WCHAR_T
+#endif
+
+//
+// check for exception handling support:
+#if !defined(_CPPUNWIND) && !defined(SPP_NO_EXCEPTIONS)
+#define SPP_NO_EXCEPTIONS
+#endif
+
+//
+// __int64 support:
+//
+#define SPP_HAS_MS_INT64
+#if defined(_MSC_EXTENSIONS) || (_MSC_VER >= 1400)
+#define SPP_HAS_LONG_LONG
+#else
+#define SPP_NO_LONG_LONG
+#endif
+
+#if (_MSC_VER >= 1400) && !defined(_DEBUG)
+#define SPP_HAS_NRVO
+#endif
+
+#if _MSC_VER >= 1500  // 150X == VC++ 9.0
+#define SPP_HAS_PRAGMA_DETECT_MISMATCH
+#endif
+
+//
+// disable Win32 API's if compiler extensions are
+// turned off:
+//
+#if !defined(_MSC_EXTENSIONS) && !defined(SPP_DISABLE_WIN32)
+#define SPP_DISABLE_WIN32
+#endif
+
+#if !defined(_CPPRTTI) && !defined(SPP_NO_RTTI)
+#define SPP_NO_RTTI
+#endif
+
+//
+// TR1 features:
+//
+#if _MSC_VER >= 1700
+//      #define SPP_HAS_TR1_HASH    // don't know if this is true yet.
+//      #define SPP_HAS_TR1_TYPE_TRAITS // don't know if this is true yet.
+#define SPP_HAS_TR1_UNORDERED_MAP
+#define SPP_HAS_TR1_UNORDERED_SET
+#endif
+
+//
+// C++0x features
+//
+//   See above for SPP_NO_LONG_LONG
+
+// C++ features supported by VC++ 10 (aka 2010)
+//
+#if _MSC_VER < 1600
+#define SPP_NO_CXX11_AUTO_DECLARATIONS
+#define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS
+#define SPP_NO_CXX11_LAMBDAS
+#define SPP_NO_CXX11_RVALUE_REFERENCES
+#define SPP_NO_CXX11_STATIC_ASSERT
+#define SPP_NO_CXX11_NULLPTR
+#define SPP_NO_CXX11_DECLTYPE
+#endif // _MSC_VER < 1600
+
+#if _MSC_VER >= 1600
+#define SPP_HAS_STDINT_H
+#endif
+
+// C++11 features supported by VC++ 11 (aka 2012)
+//
+#if _MSC_VER < 1700
+#define SPP_NO_CXX11_FINAL
+#define SPP_NO_CXX11_RANGE_BASED_FOR
+#define SPP_NO_CXX11_SCOPED_ENUMS
+#endif // _MSC_VER < 1700
+
+// C++11 features supported by VC++ 12 (aka 2013).
+//
+#if _MSC_FULL_VER < 180020827
+#define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
+#define SPP_NO_CXX11_DELETED_FUNCTIONS
+#define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
+#define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
+#define SPP_NO_CXX11_RAW_LITERALS
+#define SPP_NO_CXX11_TEMPLATE_ALIASES
+#define SPP_NO_CXX11_TRAILING_RESULT_TYPES
+#define SPP_NO_CXX11_VARIADIC_TEMPLATES
+#define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
+#define SPP_NO_CXX11_DECLTYPE_N3276
+#endif
+
+// C++11 features supported by VC++ 14 (aka 2014) CTP1
+#if (_MSC_FULL_VER < 190021730)
+#define SPP_NO_CXX11_REF_QUALIFIERS
+#define SPP_NO_CXX11_USER_DEFINED_LITERALS
+#define SPP_NO_CXX11_ALIGNAS
+#define SPP_NO_CXX11_INLINE_NAMESPACES
+#define SPP_NO_CXX14_DECLTYPE_AUTO
+#define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
+#define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
+#define SPP_NO_CXX11_HDR_INITIALIZER_LIST
+#endif
+
+// C++11 features not supported by any versions
+#define SPP_NO_CXX11_CHAR16_T
+#define SPP_NO_CXX11_CHAR32_T
+#define SPP_NO_CXX11_CONSTEXPR
+#define SPP_NO_CXX11_UNICODE_LITERALS
+#define SPP_NO_SFINAE_EXPR
+#define SPP_NO_TWO_PHASE_NAME_LOOKUP
+
+// C++ 14:
+#if !defined(__cpp_aggregate_nsdmi) || (__cpp_aggregate_nsdmi < 201304)
+#define SPP_NO_CXX14_AGGREGATE_NSDMI
+#endif
+
+#if !defined(__cpp_binary_literals) || (__cpp_binary_literals < 201304)
+#define SPP_NO_CXX14_BINARY_LITERALS
+#endif
+
+#if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304)
+#define SPP_NO_CXX14_CONSTEXPR
+#endif
+
+#if (__cplusplus < 201304) // There's no SD6 check for this....
+#define SPP_NO_CXX14_DIGIT_SEPARATORS
+#endif
+
+#if !defined(__cpp_generic_lambdas) || (__cpp_generic_lambdas < 201304)
+#define SPP_NO_CXX14_GENERIC_LAMBDAS
+#endif
+
+#if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304)
+#define SPP_NO_CXX14_VARIABLE_TEMPLATES
+#endif
 
 #endif
 
 // from boost/config/suffix.hpp
 // ----------------------------
 #ifndef SPP_ATTRIBUTE_UNUSED
-    #define SPP_ATTRIBUTE_UNUSED
+#define SPP_ATTRIBUTE_UNUSED
 #endif
 
 // includes
 // --------
 #if defined(SPP_HAS_CSTDINT) && (__cplusplus >= 201103)
-    #include <cstdint>
+#include <cstdint>
+#else
+#if defined(__FreeBSD__) || defined(__IBMCPP__) || defined(_AIX)
+#include <inttypes.h>
 #else
-    #if defined(__FreeBSD__) || defined(__IBMCPP__) || defined(_AIX)
-        #include <inttypes.h>
-    #else
-        #include <stdint.h>
-    #endif
+#include <stdint.h>
+#endif
 #endif
 
 #include <cassert>
@@ -852,16 +854,17 @@
 #include <ios>
 
 #if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
-    #include <initializer_list>
+#include <initializer_list>
 #endif
 
 #if (SPP_GROUP_SIZE == 32)
-    typedef uint32_t group_bm_type;
+typedef uint32_t group_bm_type;
 #else
-    typedef uint64_t group_bm_type;
+typedef uint64_t group_bm_type;
 #endif
 
-template<int S, int H> class HashObject; // for Google's benchmark, not in spp namespace!
+template<int S, int H> class
+    HashObject; // for Google's benchmark, not in spp namespace!
 
 //  ----------------------------------------------------------------------
 //                  H A S H    F U N C T I O N S
@@ -879,62 +882,62 @@ template<int S, int H> class HashObject; // for Google's benchmark, not in spp n
 #define spp_utils_h_guard_
 
 #if defined(_MSC_VER)
-    #if (_MSC_VER >= 1600 )                      // vs2010 (1900 is vs2015)
-        #include <functional>
-        #define SPP_HASH_CLASS std::hash
-    #else
-        #include  <hash_map>
-        #define SPP_HASH_CLASS stdext::hash_compare
-    #endif
-    #if (_MSC_FULL_VER < 190021730)
-        #define SPP_NO_CXX11_NOEXCEPT
-    #endif
+#if (_MSC_VER >= 1600 )                      // vs2010 (1900 is vs2015)
+#include <functional>
+#define SPP_HASH_CLASS std::hash
+#else
+#include  <hash_map>
+#define SPP_HASH_CLASS stdext::hash_compare
+#endif
+#if (_MSC_FULL_VER < 190021730)
+#define SPP_NO_CXX11_NOEXCEPT
+#endif
 #elif defined __clang__
-    #if __has_feature(cxx_noexcept)  // what to use here?
-       #include <functional>
-       #define SPP_HASH_CLASS  std::hash
-    #else
-       #include <tr1/unordered_map>
-       #define SPP_HASH_CLASS std::tr1::hash
-    #endif
-
-    #if !__has_feature(cxx_noexcept)
-        #define SPP_NO_CXX11_NOEXCEPT
-    #endif
+#if __has_feature(cxx_noexcept)  // what to use here?
+#include <functional>
+#define SPP_HASH_CLASS  std::hash
+#else
+#include <tr1/unordered_map>
+#define SPP_HASH_CLASS std::tr1::hash
+#endif
+
+#if !__has_feature(cxx_noexcept)
+#define SPP_NO_CXX11_NOEXCEPT
+#endif
 #elif defined(__GNUC__)
-    #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L)
-        #include <functional>
-        #define SPP_HASH_CLASS std::hash
-
-        #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) < 40600
-            #define SPP_NO_CXX11_NOEXCEPT
-        #endif
-    #else
-        #include <tr1/unordered_map>
-        #define SPP_HASH_CLASS std::tr1::hash
-        #define SPP_NO_CXX11_NOEXCEPT
-    #endif
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L)
+#include <functional>
+#define SPP_HASH_CLASS std::hash
+
+#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) < 40600
+#define SPP_NO_CXX11_NOEXCEPT
+#endif
 #else
-    #include <functional>
-    #define SPP_HASH_CLASS  std::hash
+#include <tr1/unordered_map>
+#define SPP_HASH_CLASS std::tr1::hash
+#define SPP_NO_CXX11_NOEXCEPT
+#endif
+#else
+#include <functional>
+#define SPP_HASH_CLASS  std::hash
 #endif
 
 #ifdef SPP_NO_CXX11_NOEXCEPT
-    #define SPP_NOEXCEPT
+#define SPP_NOEXCEPT
 #else
-    #define SPP_NOEXCEPT noexcept
+#define SPP_NOEXCEPT noexcept
 #endif
 
 #ifdef SPP_NO_CXX11_CONSTEXPR
-    #define SPP_CONSTEXPR
+#define SPP_CONSTEXPR
 #else
-    #define SPP_CONSTEXPR constexpr
+#define SPP_CONSTEXPR constexpr
 #endif
 
 #define SPP_INLINE
 
 #ifndef SPP_NAMESPACE
-    #define SPP_NAMESPACE spp
+#define SPP_NAMESPACE spp
 #endif
 
 namespace SPP_NAMESPACE
@@ -943,7 +946,7 @@ namespace SPP_NAMESPACE
 template <class T>
 struct spp_hash
 {
-    SPP_INLINE size_t operator()(const T &__v) const SPP_NOEXCEPT
+    SPP_INLINE size_t operator()(const T& __v) const SPP_NOEXCEPT
     {
         SPP_HASH_CLASS<T> hasher;
         return hasher(__v);
@@ -951,22 +954,25 @@ struct spp_hash
 };
 
 template <class T>
-struct spp_hash<T *>
+struct spp_hash<T*>
 {
     static size_t spp_log2 (size_t val) SPP_NOEXCEPT
     {
         size_t res = 0;
+
         while (val > 1)
         {
             val >>= 1;
             res++;
         }
+
         return res;
     }
 
-    SPP_INLINE size_t operator()(const T *__v) const SPP_NOEXCEPT
+    SPP_INLINE size_t operator()(const T* __v) const SPP_NOEXCEPT
     {
-        static const size_t shift = 3; // spp_log2(1 + sizeof(T)); // T might be incomplete!
+        static const size_t shift =
+            3; // spp_log2(1 + sizeof(T)); // T might be incomplete!
         const uintptr_t i = (const uintptr_t)__v;
         return static_cast<size_t>(i >> shift);
     }
@@ -999,77 +1005,100 @@ template <>
 struct spp_hash<bool> : public std::unary_function<bool, size_t>
 {
     SPP_INLINE size_t operator()(bool __v) const SPP_NOEXCEPT
-    { return static_cast<size_t>(__v); }
+    {
+        return static_cast<size_t>(__v);
+    }
 };
 
 template <>
 struct spp_hash<char> : public std::unary_function<char, size_t>
 {
     SPP_INLINE size_t operator()(char __v) const SPP_NOEXCEPT
-    { return static_cast<size_t>(__v); }
+    {
+        return static_cast<size_t>(__v);
+    }
 };
 
 template <>
 struct spp_hash<signed char> : public std::unary_function<signed char, size_t>
 {
     SPP_INLINE size_t operator()(signed char __v) const SPP_NOEXCEPT
-    { return static_cast<size_t>(__v); }
+    {
+        return static_cast<size_t>(__v);
+    }
 };
 
 template <>
-struct spp_hash<unsigned char> : public std::unary_function<unsigned char, size_t>
+struct spp_hash<unsigned char> : public
+    std::unary_function<unsigned char, size_t>
 {
     SPP_INLINE size_t operator()(unsigned char __v) const SPP_NOEXCEPT
-    { return static_cast<size_t>(__v); }
+    {
+        return static_cast<size_t>(__v);
+    }
 };
 
 template <>
 struct spp_hash<wchar_t> : public std::unary_function<wchar_t, size_t>
 {
     SPP_INLINE size_t operator()(wchar_t __v) const SPP_NOEXCEPT
-    { return static_cast<size_t>(__v); }
+    {
+        return static_cast<size_t>(__v);
+    }
 };
 
 template <>
 struct spp_hash<int16_t> : public std::unary_function<int16_t, size_t>
 {
     SPP_INLINE size_t operator()(int16_t __v) const SPP_NOEXCEPT
-    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+    {
+        return spp_mix_32(static_cast<uint32_t>(__v));
+    }
 };
 
 template <>
 struct spp_hash<uint16_t> : public std::unary_function<uint16_t, size_t>
 {
     SPP_INLINE size_t operator()(uint16_t __v) const SPP_NOEXCEPT
-    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+    {
+        return spp_mix_32(static_cast<uint32_t>(__v));
+    }
 };
 
 template <>
 struct spp_hash<int32_t> : public std::unary_function<int32_t, size_t>
 {
     SPP_INLINE size_t operator()(int32_t __v) const SPP_NOEXCEPT
-    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+    {
+        return spp_mix_32(static_cast<uint32_t>(__v));
+    }
 };
 
 template <>
 struct spp_hash<uint32_t> : public std::unary_function<uint32_t, size_t>
 {
     SPP_INLINE size_t operator()(uint32_t __v) const SPP_NOEXCEPT
-    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+    {
+        return spp_mix_32(static_cast<uint32_t>(__v));
+    }
 };
 
 template <>
 struct spp_hash<int64_t> : public std::unary_function<int64_t, size_t>
 {
     SPP_INLINE size_t operator()(int64_t __v) const SPP_NOEXCEPT
-    { return spp_mix_64(static_cast<uint64_t>(__v)); }
+    {
+        return spp_mix_64(static_cast<uint64_t>(__v));
+    }
 };
 
 template <>
 struct spp_hash<uint64_t> : public std::unary_function<uint64_t, size_t>
 {
     SPP_INLINE size_t operator()(uint64_t __v) const SPP_NOEXCEPT
-    { return spp_mix_64(static_cast<uint64_t>(__v)); }
+    {
+        return spp_mix_64(static_cast<uint64_t>(__v));
+    }
 };
 
 template <>
@@ -1078,7 +1107,7 @@ struct spp_hash<float> : public std::unary_function<float, size_t>
     SPP_INLINE size_t operator()(float __v) const SPP_NOEXCEPT
     {
         // -0.0 and 0.0 should return same hash
-        uint32_t *as_int = reinterpret_cast<uint32_t *>(&__v);
+        uint32_t* as_int = reinterpret_cast<uint32_t*>(&__v);
         return (__v == 0) ? static_cast<size_t>(0) : spp_mix_32(*as_int);
     }
 };
@@ -1089,7 +1118,7 @@ struct spp_hash<double> : public std::unary_function<double, size_t>
     SPP_INLINE size_t operator()(double __v) const SPP_NOEXCEPT
     {
         // -0.0 and 0.0 should return same hash
-        uint64_t *as_int = reinterpret_cast<uint64_t *>(&__v);
+        uint64_t* as_int = reinterpret_cast<uint64_t*>(&__v);
         return (__v == 0) ? static_cast<size_t>(0) : spp_mix_64(*as_int);
     }
 };
@@ -1120,7 +1149,6 @@ inline void hash_combine(std::size_t& seed, T const& v)
 {
     spp::spp_hash<T> hasher;
     Combiner<std::size_t, sizeof(std::size_t)> combiner;
-
     combiner(seed, hasher(v));
 }
 
@@ -1155,13 +1183,13 @@ struct cvt
 };
 
 template <class K, class V>
-struct cvt<std::pair<const K, V> >
+struct cvt<std::pair<const K, V>>
 {
     typedef std::pair<K, V> type;
 };
 
 template <class K, class V>
-struct cvt<const std::pair<const K, V> >
+struct cvt<const std::pair<const K, V>>
 {
     typedef const std::pair<K, V> type;
 };
@@ -1170,9 +1198,9 @@ struct cvt<const std::pair<const K, V> >
 //              M O V E   I T E R A T O R
 //  ----------------------------------------------------------------------
 #ifdef SPP_NO_CXX11_RVALUE_REFERENCES
-    #define MK_MOVE_IT(p) (p)
+#define MK_MOVE_IT(p) (p)
 #else
-    #define MK_MOVE_IT(p) std::make_move_iterator(p)
+#define MK_MOVE_IT(p) std::make_move_iterator(p)
 #endif
 
 
@@ -1182,58 +1210,68 @@ struct cvt<const std::pair<const K, V> >
 template<class T>
 class libc_allocator_with_realloc
 {
-public:
-    typedef T value_type;
-    typedef size_t size_type;
-    typedef ptrdiff_t difference_type;
+    public:
+        typedef T value_type;
+        typedef size_t size_type;
+        typedef ptrdiff_t difference_type;
 
-    typedef T* pointer;
-    typedef const T* const_pointer;
-    typedef T& reference;
-    typedef const T& const_reference;
+        typedef T* pointer;
+        typedef const T* const_pointer;
+        typedef T& reference;
+        typedef const T& const_reference;
 
-    libc_allocator_with_realloc() {}
-    libc_allocator_with_realloc(const libc_allocator_with_realloc& /*unused*/) {}
-    ~libc_allocator_with_realloc() {}
+        libc_allocator_with_realloc() {}
+        libc_allocator_with_realloc(const libc_allocator_with_realloc& /*unused*/) {}
+        ~libc_allocator_with_realloc() {}
 
-    pointer address(reference r) const  { return &r; }
-    const_pointer address(const_reference r) const  { return &r; }
+        pointer address(reference r) const
+        {
+            return &r;
+        }
+        const_pointer address(const_reference r) const
+        {
+            return &r;
+        }
 
-    pointer allocate(size_type n, const_pointer  /*unused*/= 0)
-    {
-        return static_cast<pointer>(malloc(n * sizeof(value_type)));
-    }
+        pointer allocate(size_type n, const_pointer  /*unused*/ = 0)
+        {
+            return static_cast<pointer>(malloc(n * sizeof(value_type)));
+        }
 
-    void deallocate(pointer p, size_type /*unused*/)
-    {
-        free(p);
-    }
+        void deallocate(pointer p, size_type /*unused*/)
+        {
+            free(p);
+        }
 
-    pointer reallocate(pointer p, size_type n)
-    {
-        return static_cast<pointer>(realloc(p, n * sizeof(value_type)));
-    }
+        pointer reallocate(pointer p, size_type n)
+        {
+            return static_cast<pointer>(realloc(p, n * sizeof(value_type)));
+        }
 
-    size_type max_size() const
-    {
-        return static_cast<size_type>(-1) / sizeof(value_type);
-    }
+        size_type max_size() const
+        {
+            return static_cast<size_type>(-1) / sizeof(value_type);
+        }
 
-    void construct(pointer p, const value_type& val)
-    {
-        new(p) value_type(val);
-    }
+        void construct(pointer p, const value_type& val)
+        {
+            new (p) value_type(val);
+        }
 
-    void destroy(pointer p) { p->~value_type(); }
+        void destroy(pointer p)
+        {
+            p->~value_type();
+        }
 
-    template <class U>
-    explicit libc_allocator_with_realloc(const libc_allocator_with_realloc<U>& /*unused*/) {}
+        template <class U>
+        explicit libc_allocator_with_realloc(const
+                                             libc_allocator_with_realloc<U>& /*unused*/) {}
 
-    template<class U>
-    struct rebind
-    {
-        typedef libc_allocator_with_realloc<U> other;
-    };
+        template<class U>
+        struct rebind
+        {
+            typedef libc_allocator_with_realloc<U> other;
+        };
 };
 
 //  ----------------------------------------------------------------------
@@ -1242,18 +1280,18 @@ class libc_allocator_with_realloc
 template<>
 class libc_allocator_with_realloc<void>
 {
-public:
-    typedef void value_type;
-    typedef size_t size_type;
-    typedef ptrdiff_t difference_type;
-    typedef void* pointer;
-    typedef const void* const_pointer;
-
-    template<class U>
-    struct rebind
-    {
-        typedef libc_allocator_with_realloc<U> other;
-    };
+    public:
+        typedef void value_type;
+        typedef size_t size_type;
+        typedef ptrdiff_t difference_type;
+        typedef void* pointer;
+        typedef const void* const_pointer;
+
+        template<class U>
+        struct rebind
+        {
+            typedef libc_allocator_with_realloc<U> other;
+        };
 };
 
 template<class T>
@@ -1274,11 +1312,11 @@ inline bool operator!=(const libc_allocator_with_realloc<T>& /*unused*/,
 //             I N T E R N A L    S T U F F
 //  ----------------------------------------------------------------------
 #ifdef SPP_NO_CXX11_STATIC_ASSERT
-    template <bool> struct SppCompileAssert { };
-    #define SPP_COMPILE_ASSERT(expr, msg) \
+template <bool> struct SppCompileAssert { };
+#define SPP_COMPILE_ASSERT(expr, msg) \
       SPP_ATTRIBUTE_UNUSED typedef SppCompileAssert<(bool(expr))> spp_bogus_[bool(expr) ? 1 : -1]
 #else
-    #define SPP_COMPILE_ASSERT static_assert
+#define SPP_COMPILE_ASSERT static_assert
 #endif
 
 namespace sparsehash_internal
@@ -1310,163 +1348,180 @@ namespace sparsehash_internal
 // the type, and use a second, void* arg to achieve the desired
 // 'catch-all' semantics.
 
-    // ----- low-level I/O for FILE* ----
+// ----- low-level I/O for FILE* ----
 
-    template<typename Ignored>
-    inline bool read_data_internal(Ignored* /*unused*/, FILE* fp,
-                                   void* data, size_t length)
-    {
-        return fread(data, length, 1, fp) == 1;
-    }
+template<typename Ignored>
+inline bool read_data_internal(Ignored* /*unused*/, FILE* fp,
+                               void* data, size_t length)
+{
+    return fread(data, length, 1, fp) == 1;
+}
 
-    template<typename Ignored>
-    inline bool write_data_internal(Ignored* /*unused*/, FILE* fp,
-                                    const void* data, size_t length)
-    {
-        return fwrite(data, length, 1, fp) == 1;
-    }
+template<typename Ignored>
+inline bool write_data_internal(Ignored* /*unused*/, FILE* fp,
+                                const void* data, size_t length)
+{
+    return fwrite(data, length, 1, fp) == 1;
+}
 
-    // ----- low-level I/O for iostream ----
+// ----- low-level I/O for iostream ----
 
-    // We want the caller to be responsible for #including <iostream>, not
-    // us, because iostream is a big header!  According to the standard,
-    // it's only legal to delay the instantiation the way we want to if
-    // the istream/ostream is a template type.  So we jump through hoops.
-    template<typename ISTREAM>
-    inline bool read_data_internal_for_istream(ISTREAM* fp,
-                                               void* data, size_t length)
-    {
-        return fp->read(reinterpret_cast<char*>(data),
-                        static_cast<std::streamsize>(length)).good();
-    }
-    template<typename Ignored>
-    inline bool read_data_internal(Ignored* /*unused*/, std::istream* fp,
-                                   void* data, size_t length)
-    {
-        return read_data_internal_for_istream(fp, data, length);
-    }
+// We want the caller to be responsible for #including <iostream>, not
+// us, because iostream is a big header!  According to the standard,
+// it's only legal to delay the instantiation the way we want to if
+// the istream/ostream is a template type.  So we jump through hoops.
+template<typename ISTREAM>
+inline bool read_data_internal_for_istream(ISTREAM* fp,
+        void* data, size_t length)
+{
+    return fp->read(reinterpret_cast<char*>(data),
+                    static_cast<std::streamsize>(length)).good();
+}
+template<typename Ignored>
+inline bool read_data_internal(Ignored* /*unused*/, std::istream* fp,
+                               void* data, size_t length)
+{
+    return read_data_internal_for_istream(fp, data, length);
+}
 
-    template<typename OSTREAM>
-    inline bool write_data_internal_for_ostream(OSTREAM* fp,
-                                                const void* data, size_t length)
-    {
-        return fp->write(reinterpret_cast<const char*>(data),
-                         static_cast<std::streamsize>(length)).good();
-    }
-    template<typename Ignored>
-    inline bool write_data_internal(Ignored* /*unused*/, std::ostream* fp,
-                                    const void* data, size_t length)
-    {
-        return write_data_internal_for_ostream(fp, data, length);
-    }
+template<typename OSTREAM>
+inline bool write_data_internal_for_ostream(OSTREAM* fp,
+        const void* data, size_t length)
+{
+    return fp->write(reinterpret_cast<const char*>(data),
+                     static_cast<std::streamsize>(length)).good();
+}
+template<typename Ignored>
+inline bool write_data_internal(Ignored* /*unused*/, std::ostream* fp,
+                                const void* data, size_t length)
+{
+    return write_data_internal_for_ostream(fp, data, length);
+}
 
-    // ----- low-level I/O for custom streams ----
+// ----- low-level I/O for custom streams ----
 
-    // The INPUT type needs to support a Read() method that takes a
-    // buffer and a length and returns the number of bytes read.
-    template <typename INPUT>
-    inline bool read_data_internal(INPUT* fp, void* /*unused*/,
-                                   void* data, size_t length)
-    {
-        return static_cast<size_t>(fp->Read(data, length)) == length;
-    }
+// The INPUT type needs to support a Read() method that takes a
+// buffer and a length and returns the number of bytes read.
+template <typename INPUT>
+inline bool read_data_internal(INPUT* fp, void* /*unused*/,
+                               void* data, size_t length)
+{
+    return static_cast<size_t>(fp->Read(data, length)) == length;
+}
 
-    // The OUTPUT type needs to support a Write() operation that takes
-    // a buffer and a length and returns the number of bytes written.
-    template <typename OUTPUT>
-    inline bool write_data_internal(OUTPUT* fp, void* /*unused*/,
-                                    const void* data, size_t length)
-    {
-        return static_cast<size_t>(fp->Write(data, length)) == length;
-    }
+// The OUTPUT type needs to support a Write() operation that takes
+// a buffer and a length and returns the number of bytes written.
+template <typename OUTPUT>
+inline bool write_data_internal(OUTPUT* fp, void* /*unused*/,
+                                const void* data, size_t length)
+{
+    return static_cast<size_t>(fp->Write(data, length)) == length;
+}
 
-    // ----- low-level I/O: the public API ----
+// ----- low-level I/O: the public API ----
 
-    template <typename INPUT>
-    inline bool read_data(INPUT* fp, void* data, size_t length)
-    {
-        return read_data_internal(fp, fp, data, length);
-    }
+template <typename INPUT>
+inline bool read_data(INPUT* fp, void* data, size_t length)
+{
+    return read_data_internal(fp, fp, data, length);
+}
 
-    template <typename OUTPUT>
-    inline bool write_data(OUTPUT* fp, const void* data, size_t length)
-    {
-        return write_data_internal(fp, fp, data, length);
-    }
+template <typename OUTPUT>
+inline bool write_data(OUTPUT* fp, const void* data, size_t length)
+{
+    return write_data_internal(fp, fp, data, length);
+}
+
+// Uses read_data() and write_data() to read/write an integer.
+// length is the number of bytes to read/write (which may differ
+// from sizeof(IntType), allowing us to save on a 32-bit system
+// and load on a 64-bit system).  Excess bytes are taken to be 0.
+// INPUT and OUTPUT must match legal inputs to read/write_data (above).
+// --------------------------------------------------------------------
+template <typename INPUT, typename IntType>
+bool read_bigendian_number(INPUT* fp, IntType* value, size_t length)
+{
+    *value = 0;
+    unsigned char byte;
+    // We require IntType to be unsigned or else the shifting gets all screwy.
+    SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
+                       "serializing_int_requires_an_unsigned_type");
 
-    // Uses read_data() and write_data() to read/write an integer.
-    // length is the number of bytes to read/write (which may differ
-    // from sizeof(IntType), allowing us to save on a 32-bit system
-    // and load on a 64-bit system).  Excess bytes are taken to be 0.
-    // INPUT and OUTPUT must match legal inputs to read/write_data (above).
-    // --------------------------------------------------------------------
-    template <typename INPUT, typename IntType>
-    bool read_bigendian_number(INPUT* fp, IntType* value, size_t length)
+    for (size_t i = 0; i < length; ++i)
     {
-        *value = 0;
-        unsigned char byte;
-        // We require IntType to be unsigned or else the shifting gets all screwy.
-        SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
-        for (size_t i = 0; i < length; ++i)
+        if (!read_data(fp, &byte, sizeof(byte)))
         {
-            if (!read_data(fp, &byte, sizeof(byte)))
-                return false;
-            *value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
+            return false;
         }
-        return true;
+
+        *value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
     }
 
-    template <typename OUTPUT, typename IntType>
-    bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length)
+    return true;
+}
+
+template <typename OUTPUT, typename IntType>
+bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length)
+{
+    unsigned char byte;
+    // We require IntType to be unsigned or else the shifting gets all screwy.
+    SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
+                       "serializing_int_requires_an_unsigned_type");
+
+    for (size_t i = 0; i < length; ++i)
     {
-        unsigned char byte;
-        // We require IntType to be unsigned or else the shifting gets all screwy.
-        SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
-        for (size_t i = 0; i < length; ++i)
+        byte = (sizeof(value) <= length - 1 - i)
+               ? static_cast<unsigned char>(0) : static_cast<unsigned char>((value >> ((
+                           length - 1 - i) * 8)) & 255);
+
+        if (!write_data(fp, &byte, sizeof(byte)))
         {
-            byte = (sizeof(value) <= length-1 - i)
-                ? static_cast<unsigned char>(0) : static_cast<unsigned char>((value >> ((length-1 - i) * 8)) & 255);
-            if (!write_data(fp, &byte, sizeof(byte))) return false;
+            return false;
         }
-        return true;
     }
 
-    // If your keys and values are simple enough, you can pass this
-    // serializer to serialize()/unserialize().  "Simple enough" means
-    // value_type is a POD type that contains no pointers.  Note,
-    // however, we don't try to normalize endianness.
-    // This is the type used for NopointerSerializer.
-    // ---------------------------------------------------------------
-    template <typename value_type> struct pod_serializer
+    return true;
+}
+
+// If your keys and values are simple enough, you can pass this
+// serializer to serialize()/unserialize().  "Simple enough" means
+// value_type is a POD type that contains no pointers.  Note,
+// however, we don't try to normalize endianness.
+// This is the type used for NopointerSerializer.
+// ---------------------------------------------------------------
+template <typename value_type> struct pod_serializer
+{
+    template <typename INPUT>
+    bool operator()(INPUT* fp, value_type* value) const
     {
-        template <typename INPUT>
-        bool operator()(INPUT* fp, value_type* value) const
-        {
-            return read_data(fp, value, sizeof(*value));
-        }
+        return read_data(fp, value, sizeof(*value));
+    }
 
-        template <typename OUTPUT>
-        bool operator()(OUTPUT* fp, const value_type& value) const
-        {
-            return write_data(fp, &value, sizeof(value));
-        }
-    };
+    template <typename OUTPUT>
+    bool operator()(OUTPUT* fp, const value_type& value) const
+    {
+        return write_data(fp, &value, sizeof(value));
+    }
+};
 
 
-    // Settings contains parameters for growing and shrinking the table.
-    // It also packages zero-size functor (ie. hasher).
-    //
-    // It does some munging of the hash value for the cases where
-    // the original hash function is not be very good.
-    // ---------------------------------------------------------------
-    template<typename Key, typename HashFunc, typename SizeType, int HT_MIN_BUCKETS>
-    class sh_hashtable_settings : public HashFunc
-    {
+// Settings contains parameters for growing and shrinking the table.
+// It also packages zero-size functor (ie. hasher).
+//
+// It does some munging of the hash value for the cases where
+// the original hash function is not be very good.
+// ---------------------------------------------------------------
+template<typename Key, typename HashFunc, typename SizeType, int HT_MIN_BUCKETS>
+class sh_hashtable_settings : public HashFunc
+{
     private:
 #ifndef SPP_MIX_HASH
         template <class T, int sz> struct Mixer
         {
-            inline T operator()(T h) const { return h; }
+            inline T operator()(T h) const
+            {
+                return h;
+            }
         };
 #else
         template <class T, int sz> struct Mixer
@@ -1474,7 +1529,7 @@ namespace sparsehash_internal
             inline T operator()(T h) const;
         };
 
-         template <class T> struct Mixer<T, 4>
+        template <class T> struct Mixer<T, 4>
         {
             inline T operator()(T h) const
             {
@@ -1530,28 +1585,69 @@ namespace sparsehash_internal
         {
             size_t h = hasher::operator()(v);
             Mixer<size_t, sizeof(size_t)> mixer;
-
             return mixer(h);
         }
 
-        float enlarge_factor() const            { return enlarge_factor_; }
-        void set_enlarge_factor(float f)        { enlarge_factor_ = f;    }
-        float shrink_factor() const             { return shrink_factor_;  }
-        void set_shrink_factor(float f)         { shrink_factor_ = f;     }
+        float enlarge_factor() const
+        {
+            return enlarge_factor_;
+        }
+        void set_enlarge_factor(float f)
+        {
+            enlarge_factor_ = f;
+        }
+        float shrink_factor() const
+        {
+            return shrink_factor_;
+        }
+        void set_shrink_factor(float f)
+        {
+            shrink_factor_ = f;
+        }
 
-        size_type enlarge_threshold() const     { return enlarge_threshold_; }
-        void set_enlarge_threshold(size_type t) { enlarge_threshold_ = t; }
-        size_type shrink_threshold() const      { return shrink_threshold_; }
-        void set_shrink_threshold(size_type t)  { shrink_threshold_ = t; }
+        size_type enlarge_threshold() const
+        {
+            return enlarge_threshold_;
+        }
+        void set_enlarge_threshold(size_type t)
+        {
+            enlarge_threshold_ = t;
+        }
+        size_type shrink_threshold() const
+        {
+            return shrink_threshold_;
+        }
+        void set_shrink_threshold(size_type t)
+        {
+            shrink_threshold_ = t;
+        }
 
-        size_type enlarge_size(size_type x) const { return static_cast<size_type>(x * enlarge_factor_); }
-        size_type shrink_size(size_type x) const { return static_cast<size_type>(x * shrink_factor_); }
+        size_type enlarge_size(size_type x) const
+        {
+            return static_cast<size_type>(x * enlarge_factor_);
+        }
+        size_type shrink_size(size_type x) const
+        {
+            return static_cast<size_type>(x * shrink_factor_);
+        }
 
-        bool consider_shrink() const            { return consider_shrink_; }
-        void set_consider_shrink(bool t)        { consider_shrink_ = t; }
+        bool consider_shrink() const
+        {
+            return consider_shrink_;
+        }
+        void set_consider_shrink(bool t)
+        {
+            consider_shrink_ = t;
+        }
 
-        unsigned int num_ht_copies() const      { return num_ht_copies_; }
-        void inc_num_ht_copies()                { ++num_ht_copies_; }
+        unsigned int num_ht_copies() const
+        {
+            return num_ht_copies_;
+        }
+        void inc_num_ht_copies()
+        {
+            ++num_ht_copies_;
+        }
 
         // Reset the enlarge and shrink thresholds
         void reset_thresholds(size_type num_buckets)
@@ -1569,8 +1665,12 @@ namespace sparsehash_internal
         {
             assert(shrink >= 0);
             assert(grow <= 1);
-            if (shrink > grow/2.0f)
-                shrink = grow / 2.0f;     // otherwise we thrash hashtable size
+
+            if (shrink > grow / 2.0f)
+            {
+                shrink = grow / 2.0f;    // otherwise we thrash hashtable size
+            }
+
             set_shrink_factor(shrink);
             set_enlarge_factor(grow);
         }
@@ -1582,16 +1682,22 @@ namespace sparsehash_internal
         {
             float enlarge = enlarge_factor();
             size_type sz = HT_MIN_BUCKETS;             // min buckets allowed
+
             while (sz < min_buckets_wanted ||
-                   num_elts >= static_cast<size_type>(sz * enlarge))
+                    num_elts >= static_cast<size_type>(sz * enlarge))
             {
                 // This just prevents overflowing size_type, since sz can exceed
                 // max_size() here.
                 // -------------------------------------------------------------
                 if (static_cast<size_type>(sz * 2) < sz)
-                    throw_exception(std::length_error("resize overflow"));  // protect against overflow
+                {
+                    throw_exception(
+                        std::length_error("resize overflow"));    // protect against overflow
+                }
+
                 sz *= 2;
             }
+
             return sz;
         }
 
@@ -1602,8 +1708,9 @@ namespace sparsehash_internal
         float shrink_factor_;          // how empty before resize
         bool consider_shrink_;         // if we should try to shrink before next insert
 
-        unsigned int num_ht_copies_;   // num_ht_copies is a counter incremented every Copy/Move
-    };
+        unsigned int
+        num_ht_copies_;   // num_ht_copies is a counter incremented every Copy/Move
+};
 
 }  // namespace sparsehash_internal
 
@@ -1679,7 +1786,10 @@ namespace sparsehash_internal
 //                       type_traits we need
 // ---------------------------------------------------------------------------
 template<class T, T v>
-struct integral_constant { static const T value = v; };
+struct integral_constant
+{
+    static const T value = v;
+};
 
 template <class T, T v> const T integral_constant<T, v>::value;
 
@@ -1689,13 +1799,25 @@ typedef integral_constant<bool, false> false_type;
 template<typename T, typename U> struct is_same : public false_type { };
 template<typename T> struct is_same<T, T> : public true_type { };
 
-template<typename T> struct remove_const { typedef T type; };
-template<typename T> struct remove_const<T const> { typedef T type; };
+template<typename T> struct remove_const
+{
+    typedef T type;
+};
+template<typename T> struct remove_const<T const>
+{
+    typedef T type;
+};
 
-template<typename T> struct remove_volatile { typedef T type; };
-template<typename T> struct remove_volatile<T volatile> { typedef T type; };
+template<typename T> struct remove_volatile
+{
+    typedef T type;
+};
+template<typename T> struct remove_volatile<T volatile>
+{
+    typedef T type;
+};
 
-template<typename T> struct remove_cv 
+template<typename T> struct remove_cv
 {
     typedef typename remove_const<typename remove_volatile<T>::type>::type type;
 };
@@ -1714,8 +1836,8 @@ template<> struct is_integral<unsigned int>   : true_type { };
 template<> struct is_integral<long>           : true_type { };
 template<> struct is_integral<unsigned long>  : true_type { };
 #ifdef SPP_HAS_LONG_LONG
-    template<> struct is_integral<long long>  : true_type { };
-    template<> struct is_integral<unsigned long long> : true_type { };
+template<> struct is_integral<long long>  : true_type { };
+template<> struct is_integral<unsigned long long> : true_type { };
 #endif
 template <class T> struct is_integral<const T>          : is_integral<T> { };
 template <class T> struct is_integral<volatile T>       : is_integral<T> { };
@@ -1727,9 +1849,12 @@ template <class T> struct is_floating_point      : false_type { };
 template<> struct is_floating_point<float>       : true_type { };
 template<> struct is_floating_point<double>      : true_type { };
 template<> struct is_floating_point<long double> : true_type { };
-template <class T> struct is_floating_point<const T> :        is_floating_point<T> { };
-template <class T> struct is_floating_point<volatile T>       : is_floating_point<T> { };
-template <class T> struct is_floating_point<const volatile T> : is_floating_point<T> { };
+template <class T> struct is_floating_point<const T> :
+    is_floating_point<T> { };
+template <class T> struct is_floating_point<volatile T>       :
+    is_floating_point<T> { };
+template <class T> struct is_floating_point<const volatile T> :
+    is_floating_point<T> { };
 
 //  ---------------- is_pointer ----------------------------------------
 template <class T> struct is_pointer;
@@ -1752,17 +1877,23 @@ template<typename T> struct is_reference<T&> : true_type {};
 // ------------------------------------------------------------------------
 template <class T> struct is_relocatable;
 template <class T> struct is_relocatable :
-     integral_constant<bool, (is_integral<T>::value || is_floating_point<T>::value)>
+integral_constant < bool, (is_integral<T>::value
+                           || is_floating_point<T>::value) >
 { };
 
-template<int S, int H> struct is_relocatable<HashObject<S, H> > : true_type { };
-
-template <class T> struct is_relocatable<const T>          : is_relocatable<T> { };
-template <class T> struct is_relocatable<volatile T>       : is_relocatable<T> { };
-template <class T> struct is_relocatable<const volatile T> : is_relocatable<T> { };
-template <class A, int N> struct is_relocatable<A[N]>      : is_relocatable<A> { };
-template <class T, class U> struct is_relocatable<std::pair<T, U> > :
-     integral_constant<bool, (is_relocatable<T>::value && is_relocatable<U>::value)>
+template<int S, int H> struct is_relocatable<HashObject<S, H>> : true_type { };
+
+template <class T> struct is_relocatable<const T>          :
+    is_relocatable<T> { };
+template <class T> struct is_relocatable<volatile T>       :
+    is_relocatable<T> { };
+template <class T> struct is_relocatable<const volatile T> :
+    is_relocatable<T> { };
+template <class A, int N> struct is_relocatable<A[N]>      :
+    is_relocatable<A> { };
+template <class T, class U> struct is_relocatable<std::pair<T, U>> :
+            integral_constant < bool,
+        (is_relocatable<T>::value&& is_relocatable<U>::value) >
 { };
 
 // ---------------------------------------------------------------------------
@@ -1800,187 +1931,265 @@ template <class T, class U> struct is_relocatable<std::pair<T, U> > :
 template <class tabletype>
 class table_iterator
 {
-public:
-    typedef table_iterator iterator;
-
-    typedef std::random_access_iterator_tag      iterator_category;
-    typedef typename tabletype::value_type       value_type;
-    typedef typename tabletype::difference_type  difference_type;
-    typedef typename tabletype::size_type        size_type;
-
-    explicit table_iterator(tabletype *tbl = 0, size_type p = 0) :
-        table(tbl), pos(p)
-    { }
+    public:
+        typedef table_iterator iterator;
 
-    // Helper function to assert things are ok; eg pos is still in range
-    void check() const
-    {
-        assert(table);
-        assert(pos <= table->size());
-    }
+        typedef std::random_access_iterator_tag      iterator_category;
+        typedef typename tabletype::value_type       value_type;
+        typedef typename tabletype::difference_type  difference_type;
+        typedef typename tabletype::size_type        size_type;
 
-    // Arithmetic: we just do arithmetic on pos.  We don't even need to
-    // do bounds checking, since STL doesn't consider that its job.  :-)
-    iterator& operator+=(size_type t) { pos += t; check(); return *this; }
-    iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
-    iterator& operator++()            { ++pos; check(); return *this; }
-    iterator& operator--()            { --pos; check(); return *this; }
-    iterator operator++(int)
-    {
-        iterator tmp(*this);     // for x++
-        ++pos; check(); return tmp;
-    }
+        explicit table_iterator(tabletype* tbl = 0, size_type p = 0) :
+            table(tbl), pos(p)
+        { }
 
-    iterator operator--(int)
-    {
-        iterator tmp(*this);     // for x--
-        --pos; check(); return tmp;
-    }
+        // Helper function to assert things are ok; eg pos is still in range
+        void check() const
+        {
+            assert(table);
+            assert(pos <= table->size());
+        }
 
-    iterator operator+(difference_type i) const
-    {
-        iterator tmp(*this);
-        tmp += i; return tmp;
-    }
+        // Arithmetic: we just do arithmetic on pos.  We don't even need to
+        // do bounds checking, since STL doesn't consider that its job.  :-)
+        iterator& operator+=(size_type t)
+        {
+            pos += t;
+            check();
+            return *this;
+        }
+        iterator& operator-=(size_type t)
+        {
+            pos -= t;
+            check();
+            return *this;
+        }
+        iterator& operator++()
+        {
+            ++pos;
+            check();
+            return *this;
+        }
+        iterator& operator--()
+        {
+            --pos;
+            check();
+            return *this;
+        }
+        iterator operator++(int)
+        {
+            iterator tmp(*this);     // for x++
+            ++pos;
+            check();
+            return tmp;
+        }
 
-    iterator operator-(difference_type i) const
-    {
-        iterator tmp(*this);
-        tmp -= i; return tmp;
-    }
+        iterator operator--(int)
+        {
+            iterator tmp(*this);     // for x--
+            --pos;
+            check();
+            return tmp;
+        }
 
-    difference_type operator-(iterator it) const
-    {
-        // for "x = it2 - it"
-        assert(table == it.table);
-        return pos - it.pos;
-    }
+        iterator operator+(difference_type i) const
+        {
+            iterator tmp(*this);
+            tmp += i;
+            return tmp;
+        }
 
-    // Comparisons.
-    bool operator==(const iterator& it) const
-    {
-        return table == it.table && pos == it.pos;
-    }
+        iterator operator-(difference_type i) const
+        {
+            iterator tmp(*this);
+            tmp -= i;
+            return tmp;
+        }
 
-    bool operator<(const iterator& it) const
-    {
-        assert(table == it.table);              // life is bad bad bad otherwise
-        return pos < it.pos;
-    }
+        difference_type operator-(iterator it) const
+        {
+            // for "x = it2 - it"
+            assert(table == it.table);
+            return pos - it.pos;
+        }
 
-    bool operator!=(const iterator& it) const { return !(*this == it); }
-    bool operator<=(const iterator& it) const { return !(it < *this); }
-    bool operator>(const iterator& it) const { return it < *this; }
-    bool operator>=(const iterator& it) const { return !(*this < it); }
+        // Comparisons.
+        bool operator==(const iterator& it) const
+        {
+            return table == it.table && pos == it.pos;
+        }
 
-    // Here's the info we actually need to be an iterator
-    tabletype *table;              // so we can dereference and bounds-check
-    size_type pos;                 // index into the table
-};
+        bool operator<(const iterator& it) const
+        {
+            assert(table == it.table);              // life is bad bad bad otherwise
+            return pos < it.pos;
+        }
 
-// ---------------------------------------------------------------------------
-// ---------------------------------------------------------------------------
-template <class tabletype>
-class const_table_iterator
-{
-public:
-    typedef table_iterator<tabletype> iterator;
-    typedef const_table_iterator const_iterator;
-
-    typedef std::random_access_iterator_tag iterator_category;
-    typedef typename tabletype::value_type value_type;
-    typedef typename tabletype::difference_type difference_type;
-    typedef typename tabletype::size_type size_type;
-    typedef typename tabletype::const_reference reference;  // we're const-only
-    typedef typename tabletype::const_pointer pointer;
-
-    // The "real" constructor
-    const_table_iterator(const tabletype *tbl, size_type p)
-        : table(tbl), pos(p) { }
-
-    // The default constructor, used when I define vars of type table::iterator
-    const_table_iterator() : table(NULL), pos(0) { }
-
-    // The copy constructor, for when I say table::iterator foo = tbl.begin()
-    // Also converts normal iterators to const iterators // not explicit on purpose
-    const_table_iterator(const iterator &from)
-        : table(from.table), pos(from.pos) { }
+        bool operator!=(const iterator& it) const
+        {
+            return !(*this == it);
+        }
+        bool operator<=(const iterator& it) const
+        {
+            return !(it < *this);
+        }
+        bool operator>(const iterator& it) const
+        {
+            return it < *this;
+        }
+        bool operator>=(const iterator& it) const
+        {
+            return !(*this < it);
+        }
 
-    // The default destructor is fine; we don't define one
-    // The default operator= is fine; we don't define one
+        // Here's the info we actually need to be an iterator
+        tabletype* table;              // so we can dereference and bounds-check
+        size_type pos;                 // index into the table
+};
 
-    // The main thing our iterator does is dereference.  If the table entry
-    // we point to is empty, we return the default value type.
-    reference operator*() const       { return (*table)[pos]; }
-    pointer operator->() const        { return &(operator*()); }
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+template <class tabletype>
+class const_table_iterator
+{
+    public:
+        typedef table_iterator<tabletype> iterator;
+        typedef const_table_iterator const_iterator;
+
+        typedef std::random_access_iterator_tag iterator_category;
+        typedef typename tabletype::value_type value_type;
+        typedef typename tabletype::difference_type difference_type;
+        typedef typename tabletype::size_type size_type;
+        typedef typename tabletype::const_reference reference;  // we're const-only
+        typedef typename tabletype::const_pointer pointer;
+
+        // The "real" constructor
+        const_table_iterator(const tabletype* tbl, size_type p)
+            : table(tbl), pos(p) { }
+
+        // The default constructor, used when I define vars of type table::iterator
+        const_table_iterator() : table(NULL), pos(0) { }
+
+        // The copy constructor, for when I say table::iterator foo = tbl.begin()
+        // Also converts normal iterators to const iterators // not explicit on purpose
+        const_table_iterator(const iterator& from)
+            : table(from.table), pos(from.pos) { }
+
+        // The default destructor is fine; we don't define one
+        // The default operator= is fine; we don't define one
+
+        // The main thing our iterator does is dereference.  If the table entry
+        // we point to is empty, we return the default value type.
+        reference operator*() const
+        {
+            return (*table)[pos];
+        }
+        pointer operator->() const
+        {
+            return &(operator*());
+        }
 
-    // Helper function to assert things are ok; eg pos is still in range
-    void check() const
-    {
-        assert(table);
-        assert(pos <= table->size());
-    }
+        // Helper function to assert things are ok; eg pos is still in range
+        void check() const
+        {
+            assert(table);
+            assert(pos <= table->size());
+        }
 
-    // Arithmetic: we just do arithmetic on pos.  We don't even need to
-    // do bounds checking, since STL doesn't consider that its job.  :-)
-    const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
-    const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
-    const_iterator& operator++()            { ++pos; check(); return *this; }
-    const_iterator& operator--()            { --pos; check(); return *this; }
-    const_iterator operator++(int)          
-    {
-        const_iterator tmp(*this); // for x++
-        ++pos; check(); 
-        return tmp; 
-    }
-    const_iterator operator--(int)          
-    {
-        const_iterator tmp(*this); // for x--
-        --pos; check(); 
-        return tmp;
-    }
-    const_iterator operator+(difference_type i) const
-    {
-        const_iterator tmp(*this);
-        tmp += i;
-        return tmp;
-    }
-    const_iterator operator-(difference_type i) const
-    {
-        const_iterator tmp(*this);
-        tmp -= i;
-        return tmp;
-    }
-    difference_type operator-(const_iterator it) const
-    {
-        // for "x = it2 - it"
-        assert(table == it.table);
-        return pos - it.pos;
-    }
-    reference operator[](difference_type n) const
-    {
-        return *(*this + n);            // simple though not totally efficient
-    }
+        // Arithmetic: we just do arithmetic on pos.  We don't even need to
+        // do bounds checking, since STL doesn't consider that its job.  :-)
+        const_iterator& operator+=(size_type t)
+        {
+            pos += t;
+            check();
+            return *this;
+        }
+        const_iterator& operator-=(size_type t)
+        {
+            pos -= t;
+            check();
+            return *this;
+        }
+        const_iterator& operator++()
+        {
+            ++pos;
+            check();
+            return *this;
+        }
+        const_iterator& operator--()
+        {
+            --pos;
+            check();
+            return *this;
+        }
+        const_iterator operator++(int)
+        {
+            const_iterator tmp(*this); // for x++
+            ++pos;
+            check();
+            return tmp;
+        }
+        const_iterator operator--(int)
+        {
+            const_iterator tmp(*this); // for x--
+            --pos;
+            check();
+            return tmp;
+        }
+        const_iterator operator+(difference_type i) const
+        {
+            const_iterator tmp(*this);
+            tmp += i;
+            return tmp;
+        }
+        const_iterator operator-(difference_type i) const
+        {
+            const_iterator tmp(*this);
+            tmp -= i;
+            return tmp;
+        }
+        difference_type operator-(const_iterator it) const
+        {
+            // for "x = it2 - it"
+            assert(table == it.table);
+            return pos - it.pos;
+        }
+        reference operator[](difference_type n) const
+        {
+            return *(*this + n);            // simple though not totally efficient
+        }
 
-    // Comparisons.
-    bool operator==(const const_iterator& it) const
-    {
-        return table == it.table && pos == it.pos;
-    }
+        // Comparisons.
+        bool operator==(const const_iterator& it) const
+        {
+            return table == it.table && pos == it.pos;
+        }
 
-    bool operator<(const const_iterator& it) const
-    {
-        assert(table == it.table);              // life is bad bad bad otherwise
-        return pos < it.pos;
-    }
-    bool operator!=(const const_iterator& it) const { return !(*this == it); }
-    bool operator<=(const const_iterator& it) const { return !(it < *this); }
-    bool operator>(const const_iterator& it) const { return it < *this; }
-    bool operator>=(const const_iterator& it) const { return !(*this < it); }
+        bool operator<(const const_iterator& it) const
+        {
+            assert(table == it.table);              // life is bad bad bad otherwise
+            return pos < it.pos;
+        }
+        bool operator!=(const const_iterator& it) const
+        {
+            return !(*this == it);
+        }
+        bool operator<=(const const_iterator& it) const
+        {
+            return !(it < *this);
+        }
+        bool operator>(const const_iterator& it) const
+        {
+            return it < *this;
+        }
+        bool operator>=(const const_iterator& it) const
+        {
+            return !(*this < it);
+        }
 
-    // Here's the info we actually need to be an iterator
-    const tabletype *table;        // so we can dereference and bounds-check
-    size_type pos;                 // index into the table
+        // Here's the info we actually need to be an iterator
+        const tabletype* table;        // so we can dereference and bounds-check
+        size_type pos;                 // index into the table
 };
 
 // ---------------------------------------------------------------------------
@@ -2007,174 +2216,212 @@ class const_table_iterator
 template <class T, class row_it, class col_it, class iter_type>
 class Two_d_iterator : public std::iterator<iter_type, T>
 {
-public:
-    typedef Two_d_iterator iterator;
-    typedef T              value_type;
+    public:
+        typedef Two_d_iterator iterator;
+        typedef T              value_type;
 
-    explicit Two_d_iterator(row_it curr) : row_current(curr), col_current(0)
-    {
-        if (row_current && !row_current->is_marked())
+        explicit Two_d_iterator(row_it curr) : row_current(curr), col_current(0)
         {
-            col_current = row_current->ne_begin();
-            advance_past_end();                 // in case cur->begin() == cur->end()
+            if (row_current && !row_current->is_marked())
+            {
+                col_current = row_current->ne_begin();
+                advance_past_end();                 // in case cur->begin() == cur->end()
+            }
         }
-    }
 
-    explicit Two_d_iterator(row_it curr, col_it col) : row_current(curr), col_current(col)
-    {
-        assert(col);
-    }
+        explicit Two_d_iterator(row_it curr, col_it col) : row_current(curr),
+            col_current(col)
+        {
+            assert(col);
+        }
 
-    // The default constructor
-    Two_d_iterator() :  row_current(0), col_current(0) { }
+        // The default constructor
+        Two_d_iterator() :  row_current(0), col_current(0) { }
 
-    // Need this explicitly so we can convert normal iterators <=> const iterators
-    // not explicit on purpose
-    // ---------------------------------------------------------------------------
-    template <class T2, class row_it2, class col_it2, class iter_type2>
-    Two_d_iterator(const Two_d_iterator<T2, row_it2, col_it2, iter_type2>& it) :
-        row_current (*(row_it *)&it.row_current),
-        col_current (*(col_it *)&it.col_current)
-    { }
+        // Need this explicitly so we can convert normal iterators <=> const iterators
+        // not explicit on purpose
+        // ---------------------------------------------------------------------------
+        template <class T2, class row_it2, class col_it2, class iter_type2>
+        Two_d_iterator(const Two_d_iterator<T2, row_it2, col_it2, iter_type2>& it) :
+            row_current (*(row_it*) & it.row_current),
+            col_current (*(col_it*) & it.col_current)
+        { }
 
-    // The default destructor is fine; we don't define one
-    // The default operator= is fine; we don't define one
+        // The default destructor is fine; we don't define one
+        // The default operator= is fine; we don't define one
 
-    value_type& operator*() const  { return *(col_current); }
-    value_type* operator->() const { return &(operator*()); }
+        value_type& operator*() const
+        {
+            return *(col_current);
+        }
+        value_type* operator->() const
+        {
+            return &(operator*());
+        }
 
-    // Arithmetic: we just do arithmetic on pos.  We don't even need to
-    // do bounds checking, since STL doesn't consider that its job.  :-)
-    // NOTE: this is not amortized constant time!  What do we do about it?
-    // ------------------------------------------------------------------
-    void advance_past_end()
-    {
-        // used when col_current points to end()
-        while (col_current == row_current->ne_end())
+        // Arithmetic: we just do arithmetic on pos.  We don't even need to
+        // do bounds checking, since STL doesn't consider that its job.  :-)
+        // NOTE: this is not amortized constant time!  What do we do about it?
+        // ------------------------------------------------------------------
+        void advance_past_end()
         {
-            // end of current row
-            // ------------------
-            ++row_current;                                // go to beginning of next
-            if (!row_current->is_marked())                // col is irrelevant at end
-                col_current = row_current->ne_begin();
-            else
-                break;                                    // don't go past row_end
+            // used when col_current points to end()
+            while (col_current == row_current->ne_end())
+            {
+                // end of current row
+                // ------------------
+                ++row_current;                                // go to beginning of next
+
+                if (!row_current->is_marked())                // col is irrelevant at end
+                {
+                    col_current = row_current->ne_begin();
+                }
+                else
+                {
+                    break;    // don't go past row_end
+                }
+            }
         }
-    }
 
-    friend size_t operator-(iterator l, iterator f)
-    {
-        if (f.row_current->is_marked())
-            return 0;
+        friend size_t operator-(iterator l, iterator f)
+        {
+            if (f.row_current->is_marked())
+            {
+                return 0;
+            }
+
+            size_t diff(0);
 
-        size_t diff(0);
-        while (f != l)
+            while (f != l)
+            {
+                ++diff;
+                ++f;
+            }
+
+            return diff;
+        }
+
+        iterator& operator++()
         {
-            ++diff;
-            ++f;
+            // assert(!row_current->is_marked());               // how to ++ from there?
+            ++col_current;
+            advance_past_end();                              // in case col_current is at end()
+            return *this;
         }
-        return diff;
-    }
 
-    iterator& operator++()
-    {
-        // assert(!row_current->is_marked());               // how to ++ from there?
-        ++col_current;
-        advance_past_end();                              // in case col_current is at end()
-        return *this;
-    }
+        iterator& operator--()
+        {
+            while (row_current->is_marked() ||
+                    col_current == row_current->ne_begin())
+            {
+                --row_current;
+                col_current = row_current->ne_end();             // this is 1 too far
+            }
 
-    iterator& operator--()
-    {
-        while (row_current->is_marked() ||
-               col_current == row_current->ne_begin())
+            --col_current;
+            return *this;
+        }
+        iterator operator++(int)
         {
-            --row_current;
-            col_current = row_current->ne_end();             // this is 1 too far
+            iterator tmp(*this);
+            ++*this;
+            return tmp;
+        }
+        iterator operator--(int)
+        {
+            iterator tmp(*this);
+            --*this;
+            return tmp;
         }
-        --col_current;
-        return *this;
-    }
-    iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
-    iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }
 
 
-    // Comparisons.
-    bool operator==(const iterator& it) const
-    {
-        return (row_current == it.row_current &&
-                (!row_current || row_current->is_marked() || col_current == it.col_current));
-    }
+        // Comparisons.
+        bool operator==(const iterator& it) const
+        {
+            return (row_current == it.row_current &&
+                    (!row_current || row_current->is_marked() || col_current == it.col_current));
+        }
 
-    bool operator!=(const iterator& it) const { return !(*this == it); }
+        bool operator!=(const iterator& it) const
+        {
+            return !(*this == it);
+        }
 
-    // Here's the info we actually need to be an iterator
-    // These need to be public so we convert from iterator to const_iterator
-    // ---------------------------------------------------------------------
-    row_it row_current;
-    col_it col_current;
+        // Here's the info we actually need to be an iterator
+        // These need to be public so we convert from iterator to const_iterator
+        // ---------------------------------------------------------------------
+        row_it row_current;
+        col_it col_current;
 };
 
 
 // ---------------------------------------------------------------------------
 // ---------------------------------------------------------------------------
 template <class T, class row_it, class col_it, class iter_type, class Alloc>
-class Two_d_destructive_iterator : public Two_d_iterator<T, row_it, col_it, iter_type>
+class Two_d_destructive_iterator : public
+    Two_d_iterator<T, row_it, col_it, iter_type>
 {
-public:
-    typedef Two_d_destructive_iterator iterator;
+    public:
+        typedef Two_d_destructive_iterator iterator;
 
-    Two_d_destructive_iterator(Alloc &alloc, row_it curr) :
-        _alloc(alloc)
-    {
-        this->row_current = curr;
-        this->col_current = 0;
-        if (this->row_current && !this->row_current->is_marked())
+        Two_d_destructive_iterator(Alloc& alloc, row_it curr) :
+            _alloc(alloc)
         {
-            this->col_current = this->row_current->ne_begin();
-            advance_past_end();                 // in case cur->begin() == cur->end()
+            this->row_current = curr;
+            this->col_current = 0;
+
+            if (this->row_current && !this->row_current->is_marked())
+            {
+                this->col_current = this->row_current->ne_begin();
+                advance_past_end();                 // in case cur->begin() == cur->end()
+            }
         }
-    }
 
-    // Arithmetic: we just do arithmetic on pos.  We don't even need to
-    // do bounds checking, since STL doesn't consider that its job.  :-)
-    // NOTE: this is not amortized constant time!  What do we do about it?
-    // ------------------------------------------------------------------
-    void advance_past_end()
-    {
-        // used when col_current points to end()
-        while (this->col_current == this->row_current->ne_end())
+        // Arithmetic: we just do arithmetic on pos.  We don't even need to
+        // do bounds checking, since STL doesn't consider that its job.  :-)
+        // NOTE: this is not amortized constant time!  What do we do about it?
+        // ------------------------------------------------------------------
+        void advance_past_end()
         {
-            this->row_current->clear(_alloc, true);  // This is what differs from non-destructive iterators above
+            // used when col_current points to end()
+            while (this->col_current == this->row_current->ne_end())
+            {
+                this->row_current->clear(_alloc,
+                                         true);  // This is what differs from non-destructive iterators above
+                // end of current row
+                // ------------------
+                ++this->row_current;                          // go to beginning of next
 
-            // end of current row
-            // ------------------
-            ++this->row_current;                          // go to beginning of next
-            if (!this->row_current->is_marked())          // col is irrelevant at end
-                this->col_current = this->row_current->ne_begin();
-            else
-                break;                                    // don't go past row_end
+                if (!this->row_current->is_marked())          // col is irrelevant at end
+                {
+                    this->col_current = this->row_current->ne_begin();
+                }
+                else
+                {
+                    break;    // don't go past row_end
+                }
+            }
         }
-    }
 
-    iterator& operator++()
-    {
-        // assert(!this->row_current->is_marked());         // how to ++ from there?
-        ++this->col_current;
-        advance_past_end();                              // in case col_current is at end()
-        return *this;
-    }
+        iterator& operator++()
+        {
+            // assert(!this->row_current->is_marked());         // how to ++ from there?
+            ++this->col_current;
+            advance_past_end();                              // in case col_current is at end()
+            return *this;
+        }
 
-private:
-    Two_d_destructive_iterator& operator=(const Two_d_destructive_iterator &o);
+    private:
+        Two_d_destructive_iterator& operator=(const Two_d_destructive_iterator& o);
 
-    Alloc &_alloc;
+        Alloc& _alloc;
 };
 
 
 // ---------------------------------------------------------------------------
 // ---------------------------------------------------------------------------
-static const char spp_bits_in[256] = {
+static const char spp_bits_in[256] =
+{
     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -2230,13 +2477,15 @@ static inline uint32_t s_spp_popcount_default(uint64_t x)
 {
     const uint64_t m1  = uint64_t(0x5555555555555555); // binary: 0101...
     const uint64_t m2  = uint64_t(0x3333333333333333); // binary: 00110011..
-    const uint64_t m4  = uint64_t(0x0f0f0f0f0f0f0f0f); // binary:  4 zeros,  4 ones ...
-    const uint64_t h01 = uint64_t(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3...
-
+    const uint64_t m4  = uint64_t(
+                             0x0f0f0f0f0f0f0f0f); // binary:  4 zeros,  4 ones ...
+    const uint64_t h01 = uint64_t(
+                             0x0101010101010101); // the sum of 256 to the power of 0,1,2,3...
     x -= (x >> 1) & m1;             // put count of each 2 bits into those 2 bits
     x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits
     x = (x + (x >> 4)) & m4;        // put count of each 8 bits into those 8 bits
-    return (x * h01)>>56;           // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24)+...
+    return (x * h01) >>
+           56;         // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24)+...
 }
 
 #if defined(SPP_POPCNT_CHECK)
@@ -2244,8 +2493,12 @@ static inline bool spp_popcount_check()
 {
     int cpuInfo[4] = { -1 };
     spp_cpuid(cpuInfo, 1);
+
     if (cpuInfo[2] & (1 << 23))
-        return true;   // means SPP_POPCNT supported
+    {
+        return true;    // means SPP_POPCNT supported
+    }
+
     return false;
 }
 #endif
@@ -2318,2484 +2571,3035 @@ static inline uint32_t spp_popcount(uint64_t i)
 template <class T, class Alloc>
 class sparsegroup
 {
-public:
-    // Basic types
-    typedef T                                              value_type;
-    typedef Alloc                                          allocator_type;
-    typedef value_type&                                    reference;
-    typedef const value_type&                              const_reference;
-    typedef value_type*                                    pointer;
-    typedef const value_type*                              const_pointer;
-
-    typedef uint8_t                                        size_type;        // max # of buckets
-
-    // These are our special iterators, that go over non-empty buckets in a
-    // group.  These aren't const-only because you can change non-empty bcks.
-    // ---------------------------------------------------------------------
-    typedef pointer                                        ne_iterator;
-    typedef const_pointer                                  const_ne_iterator;
-    typedef std::reverse_iterator<ne_iterator>             reverse_ne_iterator;
-    typedef std::reverse_iterator<const_ne_iterator>       const_reverse_ne_iterator;
-
-    // We'll have versions for our special non-empty iterator too
-    // ----------------------------------------------------------
-    ne_iterator               ne_begin()         { return reinterpret_cast<pointer>(_group); }
-    const_ne_iterator         ne_begin() const   { return reinterpret_cast<pointer>(_group); }
-    const_ne_iterator         ne_cbegin() const  { return reinterpret_cast<pointer>(_group); }
-    ne_iterator               ne_end()           { return reinterpret_cast<pointer>(_group + _num_items()); }
-    const_ne_iterator         ne_end() const     { return reinterpret_cast<pointer>(_group + _num_items()); }
-    const_ne_iterator         ne_cend() const    { return reinterpret_cast<pointer>(_group + _num_items()); }
-    reverse_ne_iterator       ne_rbegin()        { return reverse_ne_iterator(ne_end()); }
-    const_reverse_ne_iterator ne_rbegin() const  { return const_reverse_ne_iterator(ne_cend());  }
-    const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_cend());  }
-    reverse_ne_iterator       ne_rend()          { return reverse_ne_iterator(ne_begin()); }
-    const_reverse_ne_iterator ne_rend() const    { return const_reverse_ne_iterator(ne_cbegin());  }
-    const_reverse_ne_iterator ne_crend() const   { return const_reverse_ne_iterator(ne_cbegin());  }
+    public:
+        // Basic types
+        typedef T                                              value_type;
+        typedef Alloc                                          allocator_type;
+        typedef value_type&                                    reference;
+        typedef const value_type&                              const_reference;
+        typedef value_type*                                    pointer;
+        typedef const value_type*                              const_pointer;
+
+        typedef uint8_t
+        size_type;        // max # of buckets
+
+        // These are our special iterators, that go over non-empty buckets in a
+        // group.  These aren't const-only because you can change non-empty bcks.
+        // ---------------------------------------------------------------------
+        typedef pointer                                        ne_iterator;
+        typedef const_pointer                                  const_ne_iterator;
+        typedef std::reverse_iterator<ne_iterator>             reverse_ne_iterator;
+        typedef std::reverse_iterator<const_ne_iterator>
+        const_reverse_ne_iterator;
+
+        // We'll have versions for our special non-empty iterator too
+        // ----------------------------------------------------------
+        ne_iterator               ne_begin()
+        {
+            return reinterpret_cast<pointer>(_group);
+        }
+        const_ne_iterator         ne_begin() const
+        {
+            return reinterpret_cast<pointer>(_group);
+        }
+        const_ne_iterator         ne_cbegin() const
+        {
+            return reinterpret_cast<pointer>(_group);
+        }
+        ne_iterator               ne_end()
+        {
+            return reinterpret_cast<pointer>(_group + _num_items());
+        }
+        const_ne_iterator         ne_end() const
+        {
+            return reinterpret_cast<pointer>(_group + _num_items());
+        }
+        const_ne_iterator         ne_cend() const
+        {
+            return reinterpret_cast<pointer>(_group + _num_items());
+        }
+        reverse_ne_iterator       ne_rbegin()
+        {
+            return reverse_ne_iterator(ne_end());
+        }
+        const_reverse_ne_iterator ne_rbegin() const
+        {
+            return const_reverse_ne_iterator(ne_cend());
+        }
+        const_reverse_ne_iterator ne_crbegin() const
+        {
+            return const_reverse_ne_iterator(ne_cend());
+        }
+        reverse_ne_iterator       ne_rend()
+        {
+            return reverse_ne_iterator(ne_begin());
+        }
+        const_reverse_ne_iterator ne_rend() const
+        {
+            return const_reverse_ne_iterator(ne_cbegin());
+        }
+        const_reverse_ne_iterator ne_crend() const
+        {
+            return const_reverse_ne_iterator(ne_cbegin());
+        }
 
-private:
-    // T can be std::pair<const K, V>, but sometime we need to cast to a mutable type
-    // ------------------------------------------------------------------------------
-    typedef typename spp::cvt<T>::type                     mutable_value_type;
-    typedef mutable_value_type *                           mutable_pointer;
-    typedef const mutable_value_type *                     const_mutable_pointer;
+    private:
+        // T can be std::pair<const K, V>, but sometime we need to cast to a mutable type
+        // ------------------------------------------------------------------------------
+        typedef typename spp::cvt<T>::type                     mutable_value_type;
+        typedef mutable_value_type*                            mutable_pointer;
+        typedef const mutable_value_type*                      const_mutable_pointer;
 
-    bool _bmtest(size_type i) const   { return !!(_bitmap & (static_cast<group_bm_type>(1) << i)); }
-    void _bmset(size_type i)          { _bitmap |= static_cast<group_bm_type>(1) << i; }
-    void _bmclear(size_type i)        { _bitmap &= ~(static_cast<group_bm_type>(1) << i); }
+        bool _bmtest(size_type i) const
+        {
+            return !!(_bitmap & (static_cast<group_bm_type>(1) << i));
+        }
+        void _bmset(size_type i)
+        {
+            _bitmap |= static_cast<group_bm_type>(1) << i;
+        }
+        void _bmclear(size_type i)
+        {
+            _bitmap &= ~(static_cast<group_bm_type>(1) << i);
+        }
 
-    bool _bme_test(size_type i) const { return !!(_bm_erased & (static_cast<group_bm_type>(1) << i)); }
-    void _bme_set(size_type i)        { _bm_erased |= static_cast<group_bm_type>(1) << i; }
-    void _bme_clear(size_type i)      { _bm_erased &= ~(static_cast<group_bm_type>(1) << i); }
+        bool _bme_test(size_type i) const
+        {
+            return !!(_bm_erased & (static_cast<group_bm_type>(1) << i));
+        }
+        void _bme_set(size_type i)
+        {
+            _bm_erased |= static_cast<group_bm_type>(1) << i;
+        }
+        void _bme_clear(size_type i)
+        {
+            _bm_erased &= ~(static_cast<group_bm_type>(1) << i);
+        }
 
-    bool _bmtest_strict(size_type i) const
-    { return !!((_bitmap | _bm_erased) & (static_cast<group_bm_type>(1) << i)); }
+        bool _bmtest_strict(size_type i) const
+        {
+            return !!((_bitmap | _bm_erased) & (static_cast<group_bm_type>(1) << i));
+        }
 
 
-    static uint32_t _sizing(uint32_t n)
-    {
+        static uint32_t _sizing(uint32_t n)
+        {
 #if !defined(SPP_ALLOC_SZ) || (SPP_ALLOC_SZ == 0)
-        // aggressive allocation first, then decreasing as sparsegroups fill up
-        // --------------------------------------------------------------------
-        static uint8_t s_alloc_batch_sz[SPP_GROUP_SIZE] = { 0 };
-        if (!s_alloc_batch_sz[0])
-        {
-            // 32 bit bitmap
-            // ........ .... .... .. .. .. .. .  .  .  .  .  .  .  .
-            //     8     12   16  18 20 22 24 25 26   ...          32
-            // ------------------------------------------------------
-            uint8_t group_sz          = SPP_GROUP_SIZE / 4;
-            uint8_t group_start_alloc = SPP_GROUP_SIZE / 8; //4;
-            uint8_t alloc_sz          = group_start_alloc;
-            for (int i=0; i<4; ++i)
+            // aggressive allocation first, then decreasing as sparsegroups fill up
+            // --------------------------------------------------------------------
+            static uint8_t s_alloc_batch_sz[SPP_GROUP_SIZE] = { 0 };
+
+            if (!s_alloc_batch_sz[0])
             {
-                for (int j=0; j<group_sz; ++j)
+                // 32 bit bitmap
+                // ........ .... .... .. .. .. .. .  .  .  .  .  .  .  .
+                //     8     12   16  18 20 22 24 25 26   ...          32
+                // ------------------------------------------------------
+                uint8_t group_sz          = SPP_GROUP_SIZE / 4;
+                uint8_t group_start_alloc = SPP_GROUP_SIZE / 8; //4;
+                uint8_t alloc_sz          = group_start_alloc;
+
+                for (int i = 0; i < 4; ++i)
                 {
-                    if (j && j % group_start_alloc == 0)
-                        alloc_sz += group_start_alloc;
-                    s_alloc_batch_sz[i * group_sz + j] = alloc_sz;
+                    for (int j = 0; j < group_sz; ++j)
+                    {
+                        if (j && j % group_start_alloc == 0)
+                        {
+                            alloc_sz += group_start_alloc;
+                        }
+
+                        s_alloc_batch_sz[i * group_sz + j] = alloc_sz;
+                    }
+
+                    if (group_start_alloc > 2)
+                    {
+                        group_start_alloc /= 2;
+                    }
+
+                    alloc_sz += group_start_alloc;
                 }
-                if (group_start_alloc > 2)
-                    group_start_alloc /= 2;
-                alloc_sz += group_start_alloc;
             }
-        }
-
-        return n ? static_cast<uint32_t>(s_alloc_batch_sz[n-1]) : 0; // more aggressive alloc at the beginning
 
+            return n ? static_cast<uint32_t>(s_alloc_batch_sz[n - 1]) :
+                   0; // more aggressive alloc at the beginning
 #elif (SPP_ALLOC_SZ == 1)
-        // use as little memory as possible - slowest insert/delete in table
-        // -----------------------------------------------------------------
-        return n;
+            // use as little memory as possible - slowest insert/delete in table
+            // -----------------------------------------------------------------
+            return n;
 #else
-        // decent compromise when SPP_ALLOC_SZ == 2
-        // ----------------------------------------
-        static size_type sz_minus_1 = SPP_ALLOC_SZ - 1;
-        return (n + sz_minus_1) & ~sz_minus_1;
+            // decent compromise when SPP_ALLOC_SZ == 2
+            // ----------------------------------------
+            static size_type sz_minus_1 = SPP_ALLOC_SZ - 1;
+            return (n + sz_minus_1) & ~sz_minus_1;
 #endif
-    }
-
-    pointer _allocate_group(allocator_type &alloc, uint32_t n /* , bool tight = false */)
-    {
-        // ignore tight since we don't store num_alloc
-        // num_alloc = (uint8_t)(tight ? n : _sizing(n));
+        }
 
-        uint32_t num_alloc = (uint8_t)_sizing(n);
-        _set_num_alloc(num_alloc);
-        pointer retval = alloc.allocate(static_cast<size_type>(num_alloc));
-        if (retval == NULL)
+        pointer _allocate_group(allocator_type& alloc,
+                                uint32_t n /* , bool tight = false */)
         {
-            // the allocator is supposed to throw an exception if the allocation fails.
-            fprintf(stderr, "sparsehash FATAL ERROR: failed to allocate %d groups\n", num_alloc);
-            exit(1);
+            // ignore tight since we don't store num_alloc
+            // num_alloc = (uint8_t)(tight ? n : _sizing(n));
+            uint32_t num_alloc = (uint8_t)_sizing(n);
+            _set_num_alloc(num_alloc);
+            pointer retval = alloc.allocate(static_cast<size_type>(num_alloc));
+
+            if (retval == NULL)
+            {
+                // the allocator is supposed to throw an exception if the allocation fails.
+                fprintf(stderr, "sparsehash FATAL ERROR: failed to allocate %d groups\n",
+                        num_alloc);
+                exit(1);
+            }
+
+            return retval;
         }
-        return retval;
-    }
 
-    void _free_group(allocator_type &alloc, uint32_t num_alloc)
-    {
-        if (_group)
+        void _free_group(allocator_type& alloc, uint32_t num_alloc)
         {
-            uint32_t num_buckets = _num_items();
-            if (num_buckets)
+            if (_group)
             {
-                mutable_pointer end_it = (mutable_pointer)(_group + num_buckets);
-                for (mutable_pointer p = (mutable_pointer)_group; p != end_it; ++p)
-                    p->~mutable_value_type();
+                uint32_t num_buckets = _num_items();
+
+                if (num_buckets)
+                {
+                    mutable_pointer end_it = (mutable_pointer)(_group + num_buckets);
+
+                    for (mutable_pointer p = (mutable_pointer)_group; p != end_it; ++p)
+                    {
+                        p->~mutable_value_type();
+                    }
+                }
+
+                alloc.deallocate(_group, (typename allocator_type::size_type)num_alloc);
+                _group = NULL;
             }
-            alloc.deallocate(_group, (typename allocator_type::size_type)num_alloc);
-            _group = NULL;
         }
-    }
 
-    // private because should not be called - no allocator!
-    sparsegroup &operator=(const sparsegroup& x);
+        // private because should not be called - no allocator!
+        sparsegroup& operator=(const sparsegroup& x);
 
-    static size_type _pos_to_offset(group_bm_type bm, size_type pos)
-    {
-        //return (size_type)((uint32_t)~((int32_t(-1) + pos) >> 31) & spp_popcount(bm << (SPP_GROUP_SIZE - pos)));
-        //return (size_type)(pos ? spp_popcount(bm << (SPP_GROUP_SIZE - pos)) : 0);
-        return static_cast<size_type>(spp_popcount(bm & ((static_cast<group_bm_type>(1) << pos) - 1)));
-    }
+        static size_type _pos_to_offset(group_bm_type bm, size_type pos)
+        {
+            //return (size_type)((uint32_t)~((int32_t(-1) + pos) >> 31) & spp_popcount(bm << (SPP_GROUP_SIZE - pos)));
+            //return (size_type)(pos ? spp_popcount(bm << (SPP_GROUP_SIZE - pos)) : 0);
+            return static_cast<size_type>(spp_popcount(bm & ((static_cast<group_bm_type>
+                                          (1) << pos) - 1)));
+        }
 
-public:
+    public:
 
-    // get_iter() in sparsetable needs it
-    size_type pos_to_offset(size_type pos) const
-    {
-        return _pos_to_offset(_bitmap, pos);
-    }
+        // get_iter() in sparsetable needs it
+        size_type pos_to_offset(size_type pos) const
+        {
+            return _pos_to_offset(_bitmap, pos);
+        }
 
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable : 4146)
 #endif
 
-    // Returns the (logical) position in the bm[] array, i, such that
-    // bm[i] is the offset-th set bit in the array.  It is the inverse
-    // of pos_to_offset.  get_pos() uses this function to find the index
-    // of an ne_iterator in the table.  Bit-twiddling from
-    // http://hackersdelight.org/basics.pdf
-    // -----------------------------------------------------------------
-    static size_type offset_to_pos(group_bm_type bm, size_type offset)
-    {
-        for (; offset > 0; offset--)
-            bm &= (bm-1);  // remove right-most set bit
+        // Returns the (logical) position in the bm[] array, i, such that
+        // bm[i] is the offset-th set bit in the array.  It is the inverse
+        // of pos_to_offset.  get_pos() uses this function to find the index
+        // of an ne_iterator in the table.  Bit-twiddling from
+        // http://hackersdelight.org/basics.pdf
+        // -----------------------------------------------------------------
+        static size_type offset_to_pos(group_bm_type bm, size_type offset)
+        {
+            for (; offset > 0; offset--)
+            {
+                bm &= (bm - 1);    // remove right-most set bit
+            }
 
-        // Clear all bits to the left of the rightmost bit (the &),
-        // and then clear the rightmost bit but set all bits to the
-        // right of it (the -1).
-        // --------------------------------------------------------
-        bm = (bm & -bm) - 1;
-        return  static_cast<size_type>(spp_popcount(bm));
-    }
+            // Clear all bits to the left of the rightmost bit (the &),
+            // and then clear the rightmost bit but set all bits to the
+            // right of it (the -1).
+            // --------------------------------------------------------
+            bm = (bm & -bm) - 1;
+            return  static_cast<size_type>(spp_popcount(bm));
+        }
 
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
 
-    size_type offset_to_pos(size_type offset) const
-    {
-        return offset_to_pos(_bitmap, offset);
-    }
+        size_type offset_to_pos(size_type offset) const
+        {
+            return offset_to_pos(_bitmap, offset);
+        }
 
-public:
-    // Constructors -- default and copy -- and destructor
-    explicit sparsegroup() :
-        _group(0), _bitmap(0), _bm_erased(0)
-    {
-        _set_num_items(0);
-        _set_num_alloc(0);
-    }
+    public:
+        // Constructors -- default and copy -- and destructor
+        explicit sparsegroup() :
+            _group(0), _bitmap(0), _bm_erased(0)
+        {
+            _set_num_items(0);
+            _set_num_alloc(0);
+        }
 
-    sparsegroup(const sparsegroup& x) :
-        _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
-    {
-        _set_num_items(0);
-        _set_num_alloc(0);
-         assert(_group == 0); if (_group) exit(1);
-    }
+        sparsegroup(const sparsegroup& x) :
+            _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
+        {
+            _set_num_items(0);
+            _set_num_alloc(0);
+            assert(_group == 0);
 
-    sparsegroup(const sparsegroup& x, allocator_type& a) :
-        _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
-    {
-        _set_num_items(0);
-        _set_num_alloc(0);
+            if (_group)
+            {
+                exit(1);
+            }
+        }
 
-        uint32_t num_items = x._num_items();
-        if (num_items)
+        sparsegroup(const sparsegroup& x, allocator_type& a) :
+            _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
         {
-            _group = _allocate_group(a, num_items /* , true */);
-            _set_num_items(num_items);
-            std::uninitialized_copy(x._group, x._group + num_items, _group);
+            _set_num_items(0);
+            _set_num_alloc(0);
+            uint32_t num_items = x._num_items();
+
+            if (num_items)
+            {
+                _group = _allocate_group(a, num_items /* , true */);
+                _set_num_items(num_items);
+                std::uninitialized_copy(x._group, x._group + num_items, _group);
+            }
         }
-    }
 
-    ~sparsegroup() { assert(_group == 0); if (_group) exit(1); }
+        ~sparsegroup()
+        {
+            assert(_group == 0);
 
-    void destruct(allocator_type& a) { _free_group(a, _num_alloc()); }
+            if (_group)
+            {
+                exit(1);
+            }
+        }
 
-    // Many STL algorithms use swap instead of copy constructors
-    void swap(sparsegroup& x)
-    {
-        using std::swap;
+        void destruct(allocator_type& a)
+        {
+            _free_group(a, _num_alloc());
+        }
 
-        swap(_group, x._group);
-        swap(_bitmap, x._bitmap);
-        swap(_bm_erased, x._bm_erased);
+        // Many STL algorithms use swap instead of copy constructors
+        void swap(sparsegroup& x)
+        {
+            using std::swap;
+            swap(_group, x._group);
+            swap(_bitmap, x._bitmap);
+            swap(_bm_erased, x._bm_erased);
 #ifdef SPP_STORE_NUM_ITEMS
-        swap(_num_buckets,   x._num_buckets);
-        swap(_num_allocated, x._num_allocated);
+            swap(_num_buckets,   x._num_buckets);
+            swap(_num_allocated, x._num_allocated);
 #endif
-    }
+        }
 
-    // It's always nice to be able to clear a table without deallocating it
-    void clear(allocator_type &alloc, bool erased)
-    {
-        _free_group(alloc, _num_alloc());
-        _bitmap = 0;
-        if (erased)
-            _bm_erased = 0;
-        _set_num_items(0);
-        _set_num_alloc(0);
-    }
+        // It's always nice to be able to clear a table without deallocating it
+        void clear(allocator_type& alloc, bool erased)
+        {
+            _free_group(alloc, _num_alloc());
+            _bitmap = 0;
 
-    // Functions that tell you about size.  Alas, these aren't so useful
-    // because our table is always fixed size.
-    size_type size() const           { return static_cast<size_type>(SPP_GROUP_SIZE); }
-    size_type max_size() const       { return static_cast<size_type>(SPP_GROUP_SIZE); }
+            if (erased)
+            {
+                _bm_erased = 0;
+            }
 
-    bool empty() const               { return false; }
+            _set_num_items(0);
+            _set_num_alloc(0);
+        }
 
-    // We also may want to know how many *used* buckets there are
-    size_type num_nonempty() const   { return (size_type)_num_items(); }
+        // Functions that tell you about size.  Alas, these aren't so useful
+        // because our table is always fixed size.
+        size_type size() const
+        {
+            return static_cast<size_type>(SPP_GROUP_SIZE);
+        }
+        size_type max_size() const
+        {
+            return static_cast<size_type>(SPP_GROUP_SIZE);
+        }
 
-    // TODO(csilvers): make protected + friend
-    // This is used by sparse_hashtable to get an element from the table
-    // when we know it exists.
-    reference unsafe_get(size_type i) const
-    {
-        // assert(_bmtest(i));
-        return (reference)_group[pos_to_offset(i)];
-    }
+        bool empty() const
+        {
+            return false;
+        }
 
-    typedef std::pair<pointer, bool> SetResult;
+        // We also may want to know how many *used* buckets there are
+        size_type num_nonempty() const
+        {
+            return (size_type)_num_items();
+        }
 
-private:
-    typedef spp_::integral_constant<bool,
-                                    (spp_::is_relocatable<value_type>::value &&
-                                     spp_::is_same<allocator_type,
-                                                   spp_::libc_allocator_with_realloc<value_type> >::value)>
-            realloc_and_memmove_ok;
+        // TODO(csilvers): make protected + friend
+        // This is used by sparse_hashtable to get an element from the table
+        // when we know it exists.
+        reference unsafe_get(size_type i) const
+        {
+            // assert(_bmtest(i));
+            return (reference)_group[pos_to_offset(i)];
+        }
 
-    // ------------------------- memory at *p is uninitialized => need to construct
-    void _init_val(mutable_value_type *p, reference val)
-    {
+        typedef std::pair<pointer, bool> SetResult;
+
+    private:
+        typedef spp_::integral_constant < bool,
+                (spp_::is_relocatable<value_type>::value&&
+                 spp_::is_same<allocator_type,
+                 spp_::libc_allocator_with_realloc<value_type>>::value) >
+                realloc_and_memmove_ok;
+
+        // ------------------------- memory at *p is uninitialized => need to construct
+        void _init_val(mutable_value_type* p, reference val)
+        {
 #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
-        ::new (p) value_type(std::move(val));
+            ::new (p) value_type(std::move(val));
 #else
-        ::new (p) value_type(val);
+            ::new (p) value_type(val);
 #endif
-    }
+        }
 
-    // ------------------------- memory at *p is uninitialized => need to construct
-    void _init_val(mutable_value_type *p, const_reference val)
-    {
-        ::new (p) value_type(val);
-    }
+        // ------------------------- memory at *p is uninitialized => need to construct
+        void _init_val(mutable_value_type* p, const_reference val)
+        {
+            ::new (p) value_type(val);
+        }
 
-    // ------------------------------------------------ memory at *p is initialized
-    void _set_val(value_type *p, reference val)
-    {
+        // ------------------------------------------------ memory at *p is initialized
+        void _set_val(value_type* p, reference val)
+        {
 #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
-        *(mutable_pointer)p = std::move(val);
+            *(mutable_pointer)p = std::move(val);
 #else
-        using std::swap;
-        swap(*(mutable_pointer)p, *(mutable_pointer)&val);
+            using std::swap;
+            swap(*(mutable_pointer)p, *(mutable_pointer)&val);
 #endif
-    }
-
-    // ------------------------------------------------ memory at *p is initialized
-    void _set_val(value_type *p, const_reference val)
-    {
-        *(mutable_pointer)p = *(const_mutable_pointer)&val;
-    }
-
-    // Our default allocator - try to merge memory buffers
-    // right now it uses Google's traits, but we should use something like folly::IsRelocatable
-    // return true if the slot was constructed (i.e. contains a valid value_type
-    // ---------------------------------------------------------------------------------
-    template <class Val>
-    void _set_aux(allocator_type &alloc, size_type offset, Val &val, spp_::true_type)
-    {
-        //static int x=0;  if (++x < 10) printf("x\n"); // check we are getting here
-
-        uint32_t  num_items = _num_items();
-        uint32_t  num_alloc = _sizing(num_items);
+        }
 
-        if (num_items == num_alloc)
+        // ------------------------------------------------ memory at *p is initialized
+        void _set_val(value_type* p, const_reference val)
         {
-            num_alloc = _sizing(num_items + 1);
-            _group = alloc.reallocate(_group, num_alloc);
-            _set_num_alloc(num_alloc);
+            *(mutable_pointer)p = *(const_mutable_pointer)&val;
         }
 
-        for (uint32_t i = num_items; i > offset; --i)
-            memcpy(_group + i, _group + i-1, sizeof(*_group));
+        // Our default allocator - try to merge memory buffers
+        // right now it uses Google's traits, but we should use something like folly::IsRelocatable
+        // return true if the slot was constructed (i.e. contains a valid value_type
+        // ---------------------------------------------------------------------------------
+        template <class Val>
+        void _set_aux(allocator_type& alloc, size_type offset, Val& val,
+                      spp_::true_type)
+        {
+            //static int x=0;  if (++x < 10) printf("x\n"); // check we are getting here
+            uint32_t  num_items = _num_items();
+            uint32_t  num_alloc = _sizing(num_items);
 
-        _init_val((mutable_pointer)(_group + offset), val);
-    }
+            if (num_items == num_alloc)
+            {
+                num_alloc = _sizing(num_items + 1);
+                _group = alloc.reallocate(_group, num_alloc);
+                _set_num_alloc(num_alloc);
+            }
 
-    // Create space at _group[offset], without special assumptions about value_type
-    // and allocator_type, with a default value
-    // return true if the slot was constructed (i.e. contains a valid value_type
-    // ---------------------------------------------------------------------------------
-    template <class Val>
-    void _set_aux(allocator_type &alloc, size_type offset, Val &val, spp_::false_type)
-    {
-        uint32_t  num_items = _num_items();
-        uint32_t  num_alloc = _sizing(num_items);
+            for (uint32_t i = num_items; i > offset; --i)
+            {
+                memcpy(_group + i, _group + i - 1, sizeof(*_group));
+            }
 
-        //assert(num_alloc == (uint32_t)_num_allocated);
-        if (num_items < num_alloc)
-        {
-            // create new object at end and rotate it to position
-            _init_val((mutable_pointer)&_group[num_items], val);
-            std::rotate((mutable_pointer)(_group + offset),
-                        (mutable_pointer)(_group + num_items),
-                        (mutable_pointer)(_group + num_items + 1));
-            return;
+            _init_val((mutable_pointer)(_group + offset), val);
         }
 
-        // This is valid because 0 <= offset <= num_items
-        pointer p = _allocate_group(alloc, _sizing(num_items + 1));
-        if (offset)
-            std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)_group),
-                                    MK_MOVE_IT((mutable_pointer)(_group + offset)),
-                                    (mutable_pointer)p);
-        if (num_items > offset)
-            std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset)),
-                                    MK_MOVE_IT((mutable_pointer)(_group + num_items)),
-                                    (mutable_pointer)(p + offset + 1));
-        _init_val((mutable_pointer)(p + offset), val);
-        _free_group(alloc, num_alloc);
-        _group = p;
-    }
-
-    // ----------------------------------------------------------------------------------
-    template <class Val>
-    void _set(allocator_type &alloc, size_type i, size_type offset, Val &val)
-    {
-        if (!_bmtest(i))
+        // Create space at _group[offset], without special assumptions about value_type
+        // and allocator_type, with a default value
+        // return true if the slot was constructed (i.e. contains a valid value_type
+        // ---------------------------------------------------------------------------------
+        template <class Val>
+        void _set_aux(allocator_type& alloc, size_type offset, Val& val,
+                      spp_::false_type)
         {
-            _set_aux(alloc, offset, val, realloc_and_memmove_ok());
-            _incr_num_items();
-            _bmset(i);
-        }
-        else
-            _set_val(&_group[offset], val);
-    }
-
-public:
+            uint32_t  num_items = _num_items();
+            uint32_t  num_alloc = _sizing(num_items);
 
-    // This returns the pointer to the inserted item
-    // ---------------------------------------------
-    template <class Val>
-    pointer set(allocator_type &alloc, size_type i, Val &val)
-    {
-        _bme_clear(i); // in case this was an "erased" location
+            //assert(num_alloc == (uint32_t)_num_allocated);
+            if (num_items < num_alloc)
+            {
+                // create new object at end and rotate it to position
+                _init_val((mutable_pointer)&_group[num_items], val);
+                std::rotate((mutable_pointer)(_group + offset),
+                            (mutable_pointer)(_group + num_items),
+                            (mutable_pointer)(_group + num_items + 1));
+                return;
+            }
 
-        size_type offset = pos_to_offset(i);
-        _set(alloc, i, offset, val);            // may change _group pointer
-        return (pointer)(_group + offset);
-    }
+            // This is valid because 0 <= offset <= num_items
+            pointer p = _allocate_group(alloc, _sizing(num_items + 1));
 
-    // We let you see if a bucket is non-empty without retrieving it
-    // -------------------------------------------------------------
-    bool test(size_type i) const
-    {
-        return _bmtest(i);
-    }
+            if (offset)
+                std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)_group),
+                                        MK_MOVE_IT((mutable_pointer)(_group + offset)),
+                                        (mutable_pointer)p);
 
-    // also tests for erased values
-    // ----------------------------
-    bool test_strict(size_type i) const
-    {
-        return _bmtest_strict(i);
-    }
+            if (num_items > offset)
+                std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset)),
+                                        MK_MOVE_IT((mutable_pointer)(_group + num_items)),
+                                        (mutable_pointer)(p + offset + 1));
 
-private:
-    // Shrink the array, assuming value_type has trivial copy
-    // constructor and destructor, and the allocator_type is the default
-    // libc_allocator_with_alloc.
-    // -----------------------------------------------------------------------
-    void _group_erase_aux(allocator_type &alloc, size_type offset, spp_::true_type)
-    {
-        // static int x=0;  if (++x < 10) printf("Y\n"); // check we are getting here
-        uint32_t  num_items = _num_items();
-        uint32_t  num_alloc = _sizing(num_items);
+            _init_val((mutable_pointer)(p + offset), val);
+            _free_group(alloc, num_alloc);
+            _group = p;
+        }
 
-        if (num_items == 1)
+        // ----------------------------------------------------------------------------------
+        template <class Val>
+        void _set(allocator_type& alloc, size_type i, size_type offset, Val& val)
         {
-            assert(offset == 0);
-            _free_group(alloc, num_alloc);
-            _set_num_alloc(0);
-            return;
+            if (!_bmtest(i))
+            {
+                _set_aux(alloc, offset, val, realloc_and_memmove_ok());
+                _incr_num_items();
+                _bmset(i);
+            }
+            else
+            {
+                _set_val(&_group[offset], val);
+            }
         }
 
-        _group[offset].~value_type();
+    public:
 
-        for (size_type i = offset; i < num_items - 1; ++i)
-            memcpy(_group + i, _group + i + 1, sizeof(*_group));
+        // This returns the pointer to the inserted item
+        // ---------------------------------------------
+        template <class Val>
+        pointer set(allocator_type& alloc, size_type i, Val& val)
+        {
+            _bme_clear(i); // in case this was an "erased" location
+            size_type offset = pos_to_offset(i);
+            _set(alloc, i, offset, val);            // may change _group pointer
+            return (pointer)(_group + offset);
+        }
 
-        if (_sizing(num_items - 1) != num_alloc)
+        // We let you see if a bucket is non-empty without retrieving it
+        // -------------------------------------------------------------
+        bool test(size_type i) const
         {
-            num_alloc = _sizing(num_items - 1);
-            assert(num_alloc);            // because we have at least 1 item left
-            _set_num_alloc(num_alloc);
-            _group = alloc.reallocate(_group, num_alloc);
+            return _bmtest(i);
         }
-    }
 
-    // Shrink the array, without any special assumptions about value_type and
-    // allocator_type.
-    // --------------------------------------------------------------------------
-    void _group_erase_aux(allocator_type &alloc, size_type offset, spp_::false_type)
-    {
-        uint32_t  num_items = _num_items();
-        uint32_t  num_alloc   = _sizing(num_items);
+        // also tests for erased values
+        // ----------------------------
+        bool test_strict(size_type i) const
+        {
+            return _bmtest_strict(i);
+        }
 
-        if (_sizing(num_items - 1) != num_alloc)
+    private:
+        // Shrink the array, assuming value_type has trivial copy
+        // constructor and destructor, and the allocator_type is the default
+        // libc_allocator_with_alloc.
+        // -----------------------------------------------------------------------
+        void _group_erase_aux(allocator_type& alloc, size_type offset, spp_::true_type)
         {
-            pointer p = 0;
-            if (num_items > 1)
-            {
-                p = _allocate_group(alloc, num_items - 1);
-                if (offset)
-                    std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group)),
-                                            MK_MOVE_IT((mutable_pointer)(_group + offset)),
-                                            (mutable_pointer)(p));
-                if (static_cast<uint32_t>(offset + 1) < num_items)
-                    std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset + 1)),
-                                            MK_MOVE_IT((mutable_pointer)(_group + num_items)),
-                                            (mutable_pointer)(p + offset));
-            }
-            else
+            // static int x=0;  if (++x < 10) printf("Y\n"); // check we are getting here
+            uint32_t  num_items = _num_items();
+            uint32_t  num_alloc = _sizing(num_items);
+
+            if (num_items == 1)
             {
                 assert(offset == 0);
+                _free_group(alloc, num_alloc);
                 _set_num_alloc(0);
+                return;
             }
-            _free_group(alloc, num_alloc);
-            _group = p;
-        }
-        else
-        {
-            std::rotate((mutable_pointer)(_group + offset),
-                        (mutable_pointer)(_group + offset + 1),
-                        (mutable_pointer)(_group + num_items));
-            ((mutable_pointer)(_group + num_items - 1))->~mutable_value_type();
-        }
-    }
 
-    void _group_erase(allocator_type &alloc, size_type offset)
-    {
-        _group_erase_aux(alloc, offset, realloc_and_memmove_ok());
-    }
+            _group[offset].~value_type();
 
-public:
-    template <class twod_iter>
-    bool erase_ne(allocator_type &alloc, twod_iter &it)
-    {
-        assert(_group && it.col_current != ne_end());
-        size_type offset = (size_type)(it.col_current - ne_begin());
-        size_type pos    = offset_to_pos(offset);
+            for (size_type i = offset; i < num_items - 1; ++i)
+            {
+                memcpy(_group + i, _group + i + 1, sizeof(*_group));
+            }
 
-        if (_num_items() <= 1)
-        {
-            clear(alloc, false);
-            it.col_current = 0;
+            if (_sizing(num_items - 1) != num_alloc)
+            {
+                num_alloc = _sizing(num_items - 1);
+                assert(num_alloc);            // because we have at least 1 item left
+                _set_num_alloc(num_alloc);
+                _group = alloc.reallocate(_group, num_alloc);
+            }
         }
-        else
+
+        // Shrink the array, without any special assumptions about value_type and
+        // allocator_type.
+        // --------------------------------------------------------------------------
+        void _group_erase_aux(allocator_type& alloc, size_type offset, spp_::false_type)
         {
-            _group_erase(alloc, offset);
-            _decr_num_items();
-            _bmclear(pos);
+            uint32_t  num_items = _num_items();
+            uint32_t  num_alloc   = _sizing(num_items);
+
+            if (_sizing(num_items - 1) != num_alloc)
+            {
+                pointer p = 0;
 
-            // in case _group_erase reallocated the buffer
-            it.col_current = reinterpret_cast<pointer>(_group) + offset;
+                if (num_items > 1)
+                {
+                    p = _allocate_group(alloc, num_items - 1);
+
+                    if (offset)
+                        std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group)),
+                                                MK_MOVE_IT((mutable_pointer)(_group + offset)),
+                                                (mutable_pointer)(p));
+
+                    if (static_cast<uint32_t>(offset + 1) < num_items)
+                        std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset + 1)),
+                                                MK_MOVE_IT((mutable_pointer)(_group + num_items)),
+                                                (mutable_pointer)(p + offset));
+                }
+                else
+                {
+                    assert(offset == 0);
+                    _set_num_alloc(0);
+                }
+
+                _free_group(alloc, num_alloc);
+                _group = p;
+            }
+            else
+            {
+                std::rotate((mutable_pointer)(_group + offset),
+                            (mutable_pointer)(_group + offset + 1),
+                            (mutable_pointer)(_group + num_items));
+                ((mutable_pointer)(_group + num_items - 1))->~mutable_value_type();
+            }
         }
-        _bme_set(pos);  // remember that this position has been erased
-        it.advance_past_end();
-        return true;
-    }
 
+        void _group_erase(allocator_type& alloc, size_type offset)
+        {
+            _group_erase_aux(alloc, offset, realloc_and_memmove_ok());
+        }
 
-    // This takes the specified elements out of the group.  This is
-    // "undefining", rather than "clearing".
-    // TODO(austern): Make this exception safe: handle exceptions from
-    // value_type's copy constructor.
-    // ---------------------------------------------------------------
-    void erase(allocator_type &alloc, size_type i)
-    {
-        if (_bmtest(i))
+    public:
+        template <class twod_iter>
+        bool erase_ne(allocator_type& alloc, twod_iter& it)
         {
-            // trivial to erase empty bucket
-            if (_num_items() == 1)
+            assert(_group && it.col_current != ne_end());
+            size_type offset = (size_type)(it.col_current - ne_begin());
+            size_type pos    = offset_to_pos(offset);
+
+            if (_num_items() <= 1)
+            {
                 clear(alloc, false);
+                it.col_current = 0;
+            }
             else
             {
-                _group_erase(alloc, pos_to_offset(i));
+                _group_erase(alloc, offset);
                 _decr_num_items();
-                _bmclear(i);
+                _bmclear(pos);
+                // in case _group_erase reallocated the buffer
+                it.col_current = reinterpret_cast<pointer>(_group) + offset;
             }
-            _bme_set(i); // remember that this position has been erased
+
+            _bme_set(pos);  // remember that this position has been erased
+            it.advance_past_end();
+            return true;
         }
-    }
 
-    // I/O
-    // We support reading and writing groups to disk.  We don't store
-    // the actual array contents (which we don't know how to store),
-    // just the bitmap and size.  Meant to be used with table I/O.
-    // --------------------------------------------------------------
-    template <typename OUTPUT> bool write_metadata(OUTPUT *fp) const
-    {
-        // warning: we write 4 or 8 bytes for the bitmap, instead of 6 in the
-        //          original google sparsehash
-        // ------------------------------------------------------------------
-        if (!sparsehash_internal::write_data(fp, &_bitmap, sizeof(_bitmap)))
-            return false;
 
-        return true;
-    }
+        // This takes the specified elements out of the group.  This is
+        // "undefining", rather than "clearing".
+        // TODO(austern): Make this exception safe: handle exceptions from
+        // value_type's copy constructor.
+        // ---------------------------------------------------------------
+        void erase(allocator_type& alloc, size_type i)
+        {
+            if (_bmtest(i))
+            {
+                // trivial to erase empty bucket
+                if (_num_items() == 1)
+                {
+                    clear(alloc, false);
+                }
+                else
+                {
+                    _group_erase(alloc, pos_to_offset(i));
+                    _decr_num_items();
+                    _bmclear(i);
+                }
 
-    // Reading destroys the old group contents!  Returns true if all was ok.
-    template <typename INPUT> bool read_metadata(allocator_type &alloc, INPUT *fp)
-    {
-        clear(alloc, true);
+                _bme_set(i); // remember that this position has been erased
+            }
+        }
 
-        if (!sparsehash_internal::read_data(fp, &_bitmap, sizeof(_bitmap)))
-            return false;
+        // I/O
+        // We support reading and writing groups to disk.  We don't store
+        // the actual array contents (which we don't know how to store),
+        // just the bitmap and size.  Meant to be used with table I/O.
+        // --------------------------------------------------------------
+        template <typename OUTPUT> bool write_metadata(OUTPUT* fp) const
+        {
+            // warning: we write 4 or 8 bytes for the bitmap, instead of 6 in the
+            //          original google sparsehash
+            // ------------------------------------------------------------------
+            if (!sparsehash_internal::write_data(fp, &_bitmap, sizeof(_bitmap)))
+            {
+                return false;
+            }
 
-        // We'll allocate the space, but we won't fill it: it will be
-        // left as uninitialized raw memory.
-        uint32_t num_items = spp_popcount(_bitmap); // yes, _num_buckets not set
-        _set_num_items(num_items);
-        _group = num_items ? _allocate_group(alloc, num_items/* , true */) : 0;
-        return true;
-    }
+            return true;
+        }
 
-    // Again, only meaningful if value_type is a POD.
-    template <typename INPUT> bool read_nopointer_data(INPUT *fp)
-    {
-        for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
-            if (!sparsehash_internal::read_data(fp, &(*it), sizeof(*it)))
-                return false;
-        return true;
-    }
+        // Reading destroys the old group contents!  Returns true if all was ok.
+        template <typename INPUT> bool read_metadata(allocator_type& alloc, INPUT* fp)
+        {
+            clear(alloc, true);
 
-    // If your keys and values are simple enough, we can write them
-    // to disk for you.  "simple enough" means POD and no pointers.
-    // However, we don't try to normalize endianness.
-    // ------------------------------------------------------------
-    template <typename OUTPUT> bool write_nopointer_data(OUTPUT *fp) const
-    {
-        for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
-            if (!sparsehash_internal::write_data(fp, &(*it), sizeof(*it)))
+            if (!sparsehash_internal::read_data(fp, &_bitmap, sizeof(_bitmap)))
+            {
                 return false;
-        return true;
-    }
-
+            }
 
-    // Comparisons.  We only need to define == and < -- we get
-    // != > <= >= via relops.h (which we happily included above).
-    // Note the comparisons are pretty arbitrary: we compare
-    // values of the first index that isn't equal (using default
-    // value for empty buckets).
-    // ---------------------------------------------------------
-    bool operator==(const sparsegroup& x) const
-    {
-        return (_bitmap == x._bitmap &&
-                _bm_erased == x._bm_erased &&
-                std::equal(_group, _group + _num_items(), x._group));
-    }
+            // We'll allocate the space, but we won't fill it: it will be
+            // left as uninitialized raw memory.
+            uint32_t num_items = spp_popcount(_bitmap); // yes, _num_buckets not set
+            _set_num_items(num_items);
+            _group = num_items ? _allocate_group(alloc, num_items/* , true */) : 0;
+            return true;
+        }
 
-    bool operator<(const sparsegroup& x) const
-    {
-        // also from <algorithm>
-        return std::lexicographical_compare(_group, _group + _num_items(),
-                                            x._group, x._group + x._num_items());
-    }
+        // Again, only meaningful if value_type is a POD.
+        template <typename INPUT> bool read_nopointer_data(INPUT* fp)
+        {
+            for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
+                if (!sparsehash_internal::read_data(fp, &(*it), sizeof(*it)))
+                {
+                    return false;
+                }
 
-    bool operator!=(const sparsegroup& x) const { return !(*this == x); }
-    bool operator<=(const sparsegroup& x) const { return !(x < *this); }
-    bool operator> (const sparsegroup& x) const { return x < *this; }
-    bool operator>=(const sparsegroup& x) const { return !(*this < x); }
+            return true;
+        }
 
-    void mark()            { _group = (value_type *)static_cast<uintptr_t>(-1); }
-    bool is_marked() const { return _group == (value_type *)static_cast<uintptr_t>(-1); }
+        // If your keys and values are simple enough, we can write them
+        // to disk for you.  "simple enough" means POD and no pointers.
+        // However, we don't try to normalize endianness.
+        // ------------------------------------------------------------
+        template <typename OUTPUT> bool write_nopointer_data(OUTPUT* fp) const
+        {
+            for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
+                if (!sparsehash_internal::write_data(fp, &(*it), sizeof(*it)))
+                {
+                    return false;
+                }
 
-private:
-    // ---------------------------------------------------------------------------
-    template <class A>
-    class alloc_impl : public A
-    {
-    public:
-        typedef typename A::pointer pointer;
-        typedef typename A::size_type size_type;
+            return true;
+        }
 
-        // Convert a normal allocator to one that has realloc_or_die()
-        explicit alloc_impl(const A& a) : A(a) { }
 
-        // realloc_or_die should only be used when using the default
-        // allocator (libc_allocator_with_realloc).
-        pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/)
+        // Comparisons.  We only need to define == and < -- we get
+        // != > <= >= via relops.h (which we happily included above).
+        // Note the comparisons are pretty arbitrary: we compare
+        // values of the first index that isn't equal (using default
+        // value for empty buckets).
+        // ---------------------------------------------------------
+        bool operator==(const sparsegroup& x) const
         {
-            fprintf(stderr, "realloc_or_die is only supported for "
-                    "libc_allocator_with_realloc\n");
-            exit(1);
-            return NULL;
+            return (_bitmap == x._bitmap &&
+                    _bm_erased == x._bm_erased &&
+                    std::equal(_group, _group + _num_items(), x._group));
         }
-    };
 
-    // A template specialization of alloc_impl for
-    // libc_allocator_with_realloc that can handle realloc_or_die.
-    // -----------------------------------------------------------
-    template <class A>
-    class alloc_impl<libc_allocator_with_realloc<A> >
-        : public libc_allocator_with_realloc<A>
-    {
-    public:
-        typedef typename libc_allocator_with_realloc<A>::pointer pointer;
-        typedef typename libc_allocator_with_realloc<A>::size_type size_type;
+        bool operator<(const sparsegroup& x) const
+        {
+            // also from <algorithm>
+            return std::lexicographical_compare(_group, _group + _num_items(),
+                                                x._group, x._group + x._num_items());
+        }
 
-        explicit alloc_impl(const libc_allocator_with_realloc<A>& a)
-            : libc_allocator_with_realloc<A>(a)
-        { }
+        bool operator!=(const sparsegroup& x) const
+        {
+            return !(*this == x);
+        }
+        bool operator<=(const sparsegroup& x) const
+        {
+            return !(x < *this);
+        }
+        bool operator> (const sparsegroup& x) const
+        {
+            return x < *this;
+        }
+        bool operator>=(const sparsegroup& x) const
+        {
+            return !(*this < x);
+        }
 
-        pointer realloc_or_die(pointer ptr, size_type n)
+        void mark()
         {
-            pointer retval = this->reallocate(ptr, n);
-            if (retval == NULL) 
-            {
-                fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
-                        "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
-                exit(1);
-            }
-            return retval;
+            _group = (value_type*)static_cast<uintptr_t>(-1);
+        }
+        bool is_marked() const
+        {
+            return _group == (value_type*)static_cast<uintptr_t>(-1);
         }
-    };
 
-#ifdef SPP_STORE_NUM_ITEMS
-    uint32_t _num_items() const           { return (uint32_t)_num_buckets; }
-    void     _set_num_items(uint32_t val) { _num_buckets = static_cast<size_type>(val); }
-    void     _incr_num_items()            { ++_num_buckets; }
-    void     _decr_num_items()            { --_num_buckets; }
-    uint32_t _num_alloc() const           { return (uint32_t)_num_allocated; }
-    void     _set_num_alloc(uint32_t val) { _num_allocated = static_cast<size_type>(val); }
-#else
-    uint32_t _num_items() const           { return spp_popcount(_bitmap); }
-    void     _set_num_items(uint32_t )    { }
-    void     _incr_num_items()            { }
-    void     _decr_num_items()            { }
-    uint32_t _num_alloc() const           { return _sizing(_num_items()); }
-    void     _set_num_alloc(uint32_t val) { }
-#endif
+    private:
+        // ---------------------------------------------------------------------------
+        template <class A>
+        class alloc_impl : public A
+        {
+            public:
+                typedef typename A::pointer pointer;
+                typedef typename A::size_type size_type;
 
-    // The actual data
-    // ---------------
-    value_type *         _group;                             // (small) array of T's
-    group_bm_type        _bitmap;
-    group_bm_type        _bm_erased;                         // ones where items have been erased
+                // Convert a normal allocator to one that has realloc_or_die()
+                explicit alloc_impl(const A& a) : A(a) { }
 
-#ifdef SPP_STORE_NUM_ITEMS
-    size_type            _num_buckets;
-    size_type            _num_allocated;
-#endif
-};
+                // realloc_or_die should only be used when using the default
+                // allocator (libc_allocator_with_realloc).
+                pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/)
+                {
+                    fprintf(stderr, "realloc_or_die is only supported for "
+                            "libc_allocator_with_realloc\n");
+                    exit(1);
+                    return NULL;
+                }
+        };
 
-// ---------------------------------------------------------------------------
+        // A template specialization of alloc_impl for
+        // libc_allocator_with_realloc that can handle realloc_or_die.
+        // -----------------------------------------------------------
+        template <class A>
+        class alloc_impl<libc_allocator_with_realloc<A>>
+                    : public libc_allocator_with_realloc<A>
+        {
+            public:
+                typedef typename libc_allocator_with_realloc<A>::pointer pointer;
+                typedef typename libc_allocator_with_realloc<A>::size_type size_type;
+
+                explicit alloc_impl(const libc_allocator_with_realloc<A>& a)
+                    : libc_allocator_with_realloc<A>(a)
+                { }
+
+                pointer realloc_or_die(pointer ptr, size_type n)
+                {
+                    pointer retval = this->reallocate(ptr, n);
+
+                    if (retval == NULL)
+                    {
+                        fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
+                                "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
+                        exit(1);
+                    }
+
+                    return retval;
+                }
+        };
+
+#ifdef SPP_STORE_NUM_ITEMS
+        uint32_t _num_items() const
+        {
+            return (uint32_t)_num_buckets;
+        }
+        void     _set_num_items(uint32_t val)
+        {
+            _num_buckets = static_cast<size_type>(val);
+        }
+        void     _incr_num_items()
+        {
+            ++_num_buckets;
+        }
+        void     _decr_num_items()
+        {
+            --_num_buckets;
+        }
+        uint32_t _num_alloc() const
+        {
+            return (uint32_t)_num_allocated;
+        }
+        void     _set_num_alloc(uint32_t val)
+        {
+            _num_allocated = static_cast<size_type>(val);
+        }
+#else
+        uint32_t _num_items() const
+        {
+            return spp_popcount(_bitmap);
+        }
+        void     _set_num_items(uint32_t )    { }
+        void     _incr_num_items()            { }
+        void     _decr_num_items()            { }
+        uint32_t _num_alloc() const
+        {
+            return _sizing(_num_items());
+        }
+        void     _set_num_alloc(uint32_t val) { }
+#endif
+
+        // The actual data
+        // ---------------
+        value_type*          _group;                             // (small) array of T's
+        group_bm_type        _bitmap;
+        group_bm_type
+        _bm_erased;                         // ones where items have been erased
+
+#ifdef SPP_STORE_NUM_ITEMS
+        size_type            _num_buckets;
+        size_type            _num_allocated;
+#endif
+};
+
+// ---------------------------------------------------------------------------
 // We need a global swap as well
 // ---------------------------------------------------------------------------
 template <class T, class Alloc>
-inline void swap(sparsegroup<T,Alloc> &x, sparsegroup<T,Alloc> &y)
+inline void swap(sparsegroup<T, Alloc>& x, sparsegroup<T, Alloc>& y)
 {
     x.swap(y);
 }
 
 // ---------------------------------------------------------------------------
 // ---------------------------------------------------------------------------
-template <class T, class Alloc = libc_allocator_with_realloc<T> >
+template <class T, class Alloc = libc_allocator_with_realloc<T>>
 class sparsetable
 {
-public:
-    typedef T                                             value_type;
-    typedef Alloc                                         allocator_type;
-    typedef sparsegroup<value_type, allocator_type>       group_type;
+    public:
+        typedef T                                             value_type;
+        typedef Alloc                                         allocator_type;
+        typedef sparsegroup<value_type, allocator_type>       group_type;
 
-private:
-    typedef typename Alloc::template rebind<group_type>::other group_alloc_type;
-    typedef typename group_alloc_type::size_type          group_size_type;
+    private:
+        typedef typename Alloc::template rebind<group_type>::other group_alloc_type;
+        typedef typename group_alloc_type::size_type          group_size_type;
 
-public:
-    // Basic types
-    // -----------
-    typedef typename allocator_type::size_type            size_type;
-    typedef typename allocator_type::difference_type      difference_type;
-    typedef value_type&                                   reference;
-    typedef const value_type&                             const_reference;
-    typedef value_type*                                   pointer;
-    typedef const value_type*                             const_pointer;
-
-    typedef group_type&                                   GroupsReference;
-    typedef const group_type&                             GroupsConstReference;
-
-    typedef typename group_type::ne_iterator              ColIterator;
-    typedef typename group_type::const_ne_iterator        ColConstIterator;
-
-    typedef table_iterator<sparsetable<T, allocator_type> >        iterator;       // defined with index
-    typedef const_table_iterator<sparsetable<T, allocator_type> >  const_iterator; // defined with index
-    typedef std::reverse_iterator<const_iterator>         const_reverse_iterator;
-    typedef std::reverse_iterator<iterator>               reverse_iterator;
-
-    // These are our special iterators, that go over non-empty buckets in a
-    // table.  These aren't const only because you can change non-empty bcks.
-    // ----------------------------------------------------------------------
-    typedef Two_d_iterator<T,
-                           group_type *,
-                           ColIterator,
-                           std::bidirectional_iterator_tag> ne_iterator;
-
-    typedef Two_d_iterator<const T,
-                           const group_type *,
-                           ColConstIterator,
-                           std::bidirectional_iterator_tag> const_ne_iterator;
-
-    // Another special iterator: it frees memory as it iterates (used to resize).
-    // Obviously, you can only iterate over it once, which is why it's an input iterator
-    // ---------------------------------------------------------------------------------
-    typedef Two_d_destructive_iterator<T,
-                                       group_type *,
-                                       ColIterator,
-                                       std::input_iterator_tag,
-                                       allocator_type>     destructive_iterator;
-
-    typedef std::reverse_iterator<ne_iterator>               reverse_ne_iterator;
-    typedef std::reverse_iterator<const_ne_iterator>         const_reverse_ne_iterator;
+    public:
+        // Basic types
+        // -----------
+        typedef typename allocator_type::size_type            size_type;
+        typedef typename allocator_type::difference_type      difference_type;
+        typedef value_type&                                   reference;
+        typedef const value_type&                             const_reference;
+        typedef value_type*                                   pointer;
+        typedef const value_type*                             const_pointer;
+
+        typedef group_type&                                   GroupsReference;
+        typedef const group_type&                             GroupsConstReference;
+
+        typedef typename group_type::ne_iterator              ColIterator;
+        typedef typename group_type::const_ne_iterator        ColConstIterator;
+
+        typedef table_iterator<sparsetable<T, allocator_type>>
+                iterator;       // defined with index
+        typedef const_table_iterator<sparsetable<T, allocator_type>>
+                const_iterator; // defined with index
+        typedef std::reverse_iterator<const_iterator>         const_reverse_iterator;
+        typedef std::reverse_iterator<iterator>               reverse_iterator;
+
+        // These are our special iterators, that go over non-empty buckets in a
+        // table.  These aren't const only because you can change non-empty bcks.
+        // ----------------------------------------------------------------------
+        typedef Two_d_iterator<T,
+                group_type*,
+                ColIterator,
+                std::bidirectional_iterator_tag> ne_iterator;
+
+        typedef Two_d_iterator<const T,
+                const group_type*,
+                ColConstIterator,
+                std::bidirectional_iterator_tag> const_ne_iterator;
+
+        // Another special iterator: it frees memory as it iterates (used to resize).
+        // Obviously, you can only iterate over it once, which is why it's an input iterator
+        // ---------------------------------------------------------------------------------
+        typedef Two_d_destructive_iterator<T,
+                group_type*,
+                ColIterator,
+                std::input_iterator_tag,
+                allocator_type>     destructive_iterator;
+
+        typedef std::reverse_iterator<ne_iterator>               reverse_ne_iterator;
+        typedef std::reverse_iterator<const_ne_iterator>
+        const_reverse_ne_iterator;
+
+
+        // Iterator functions
+        // ------------------
+        iterator               begin()
+        {
+            return iterator(this, 0);
+        }
+        const_iterator         begin() const
+        {
+            return const_iterator(this, 0);
+        }
+        const_iterator         cbegin() const
+        {
+            return const_iterator(this, 0);
+        }
+        iterator               end()
+        {
+            return iterator(this, size());
+        }
+        const_iterator         end() const
+        {
+            return const_iterator(this, size());
+        }
+        const_iterator         cend() const
+        {
+            return const_iterator(this, size());
+        }
+        reverse_iterator       rbegin()
+        {
+            return reverse_iterator(end());
+        }
+        const_reverse_iterator rbegin() const
+        {
+            return const_reverse_iterator(cend());
+        }
+        const_reverse_iterator crbegin() const
+        {
+            return const_reverse_iterator(cend());
+        }
+        reverse_iterator       rend()
+        {
+            return reverse_iterator(begin());
+        }
+        const_reverse_iterator rend() const
+        {
+            return const_reverse_iterator(cbegin());
+        }
+        const_reverse_iterator crend() const
+        {
+            return const_reverse_iterator(cbegin());
+        }
 
+        // Versions for our special non-empty iterator
+        // ------------------------------------------
+        ne_iterator       ne_begin()
+        {
+            return ne_iterator      (_first_group);
+        }
+        const_ne_iterator ne_begin() const
+        {
+            return const_ne_iterator(_first_group);
+        }
+        const_ne_iterator ne_cbegin() const
+        {
+            return const_ne_iterator(_first_group);
+        }
+        ne_iterator       ne_end()
+        {
+            return ne_iterator      (_last_group);
+        }
+        const_ne_iterator ne_end() const
+        {
+            return const_ne_iterator(_last_group);
+        }
+        const_ne_iterator ne_cend() const
+        {
+            return const_ne_iterator(_last_group);
+        }
 
-    // Iterator functions
-    // ------------------
-    iterator               begin()         { return iterator(this, 0); }
-    const_iterator         begin() const   { return const_iterator(this, 0); }
-    const_iterator         cbegin() const  { return const_iterator(this, 0); }
-    iterator               end()           { return iterator(this, size()); }
-    const_iterator         end() const     { return const_iterator(this, size()); }
-    const_iterator         cend() const    { return const_iterator(this, size()); }
-    reverse_iterator       rbegin()        { return reverse_iterator(end()); }
-    const_reverse_iterator rbegin() const  { return const_reverse_iterator(cend()); }
-    const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
-    reverse_iterator       rend()          { return reverse_iterator(begin()); }
-    const_reverse_iterator rend() const    { return const_reverse_iterator(cbegin()); }
-    const_reverse_iterator crend() const   { return const_reverse_iterator(cbegin()); }
+        reverse_ne_iterator       ne_rbegin()
+        {
+            return reverse_ne_iterator(ne_end());
+        }
+        const_reverse_ne_iterator ne_rbegin() const
+        {
+            return const_reverse_ne_iterator(ne_end());
+        }
+        const_reverse_ne_iterator ne_crbegin() const
+        {
+            return const_reverse_ne_iterator(ne_end());
+        }
+        reverse_ne_iterator       ne_rend()
+        {
+            return reverse_ne_iterator(ne_begin());
+        }
+        const_reverse_ne_iterator ne_rend() const
+        {
+            return const_reverse_ne_iterator(ne_begin());
+        }
+        const_reverse_ne_iterator ne_crend() const
+        {
+            return const_reverse_ne_iterator(ne_begin());
+        }
 
-    // Versions for our special non-empty iterator
-    // ------------------------------------------
-    ne_iterator       ne_begin()           { return ne_iterator      (_first_group); }
-    const_ne_iterator ne_begin() const     { return const_ne_iterator(_first_group); }
-    const_ne_iterator ne_cbegin() const    { return const_ne_iterator(_first_group); }
-    ne_iterator       ne_end()             { return ne_iterator      (_last_group); }
-    const_ne_iterator ne_end() const       { return const_ne_iterator(_last_group); }
-    const_ne_iterator ne_cend() const      { return const_ne_iterator(_last_group); }
+        destructive_iterator destructive_begin()
+        {
+            return destructive_iterator(_alloc, _first_group);
+        }
 
-    reverse_ne_iterator       ne_rbegin()        { return reverse_ne_iterator(ne_end()); }
-    const_reverse_ne_iterator ne_rbegin() const  { return const_reverse_ne_iterator(ne_end());  }
-    const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_end());  }
-    reverse_ne_iterator       ne_rend()          { return reverse_ne_iterator(ne_begin()); }
-    const_reverse_ne_iterator ne_rend() const    { return const_reverse_ne_iterator(ne_begin()); }
-    const_reverse_ne_iterator ne_crend() const   { return const_reverse_ne_iterator(ne_begin()); }
+        destructive_iterator destructive_end()
+        {
+            return destructive_iterator(_alloc, _last_group);
+        }
 
-    destructive_iterator destructive_begin()
-    {
-        return destructive_iterator(_alloc, _first_group);
-    }
+        // How to deal with the proper group
+        static group_size_type num_groups(group_size_type num)
+        {
+            // how many to hold num buckets
+            return num == 0 ? (group_size_type)0 :
+                   (group_size_type)(((num - 1) / SPP_GROUP_SIZE) + 1);
+        }
 
-    destructive_iterator destructive_end()
-    {
-        return destructive_iterator(_alloc, _last_group);
-    }
+        typename group_type::size_type pos_in_group(size_type i) const
+        {
+            return static_cast<typename group_type::size_type>(i & SPP_MASK_);
+        }
 
-    // How to deal with the proper group
-    static group_size_type num_groups(group_size_type num)
-    {
-        // how many to hold num buckets
-        return num == 0 ? (group_size_type)0 :
-            (group_size_type)(((num-1) / SPP_GROUP_SIZE) + 1);
-    }
+        size_type group_num(size_type i) const
+        {
+            return (size_type)(i >> SPP_SHIFT_);
+        }
 
-    typename group_type::size_type pos_in_group(size_type i) const
-    {
-        return static_cast<typename group_type::size_type>(i & SPP_MASK_);
-    }
+        GroupsReference which_group(size_type i)
+        {
+            return _first_group[group_num(i)];
+        }
 
-    size_type group_num(size_type i) const
-    {
-        return (size_type)(i >> SPP_SHIFT_);
-    }
+        GroupsConstReference which_group(size_type i) const
+        {
+            return _first_group[group_num(i)];
+        }
 
-    GroupsReference which_group(size_type i)
-    {
-        return _first_group[group_num(i)];
-    }
+        void _alloc_group_array(group_size_type sz, group_type*& first,
+                                group_type*& last)
+        {
+            if (sz)
+            {
+                first = _group_alloc.allocate((size_type)(sz + 1)); // + 1 for end marker
+                first[sz].mark();                      // for the ne_iterator
+                last = first + sz;
+            }
+        }
 
-    GroupsConstReference which_group(size_type i) const
-    {
-        return _first_group[group_num(i)];
-    }
+        void _free_group_array(group_type*& first, group_type*& last)
+        {
+            if (first)
+            {
+                _group_alloc.deallocate(first,
+                                        (group_size_type)(last - first + 1)); // + 1 for end marker
+                first = last = 0;
+            }
+        }
 
-    void _alloc_group_array(group_size_type sz, group_type *&first, group_type *&last)
-    {
-        if (sz)
+        void _allocate_groups(size_type sz)
         {
-            first = _group_alloc.allocate((size_type)(sz + 1)); // + 1 for end marker
-            first[sz].mark();                      // for the ne_iterator
-            last = first + sz;
+            if (sz)
+            {
+                _alloc_group_array(sz, _first_group, _last_group);
+                std::uninitialized_fill(_first_group, _last_group, group_type());
+            }
         }
-    }
 
-    void _free_group_array(group_type *&first, group_type *&last)
-    {
-        if (first)
+        void _free_groups()
         {
-            _group_alloc.deallocate(first, (group_size_type)(last - first + 1)); // + 1 for end marker
-            first = last = 0;
+            if (_first_group)
+            {
+                for (group_type* g = _first_group; g != _last_group; ++g)
+                {
+                    g->destruct(_alloc);
+                }
+
+                _free_group_array(_first_group, _last_group);
+            }
         }
-    }
 
-    void _allocate_groups(size_type sz)
-    {
-        if (sz)
+        void _cleanup()
         {
-            _alloc_group_array(sz, _first_group, _last_group);
-            std::uninitialized_fill(_first_group, _last_group, group_type());
+            _free_groups();    // sets _first_group = _last_group = 0
+            _table_size  = 0;
+            _num_buckets = 0;
         }
-    }
 
-    void _free_groups()
-    {
-        if (_first_group)
+        void _init()
         {
-            for (group_type *g = _first_group; g != _last_group; ++g)
-                g->destruct(_alloc);
-            _free_group_array(_first_group, _last_group);
+            _first_group = 0;
+            _last_group  = 0;
+            _table_size  = 0;
+            _num_buckets = 0;
         }
-    }
 
-    void _cleanup()
-    {
-        _free_groups();    // sets _first_group = _last_group = 0
-        _table_size  = 0;
-        _num_buckets = 0;
-    }
+        void _copy(const sparsetable& o)
+        {
+            _table_size = o._table_size;
+            _num_buckets = o._num_buckets;
+            _alloc = o._alloc;                // todo - copy or move allocator according to...
+            _group_alloc =
+                o._group_alloc;    // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
+            group_size_type sz = (group_size_type)(o._last_group - o._first_group);
 
-    void _init()
-    {
-        _first_group = 0;
-        _last_group  = 0;
-        _table_size  = 0;
-        _num_buckets = 0;
-    }
+            if (sz)
+            {
+                _alloc_group_array(sz, _first_group, _last_group);
 
-    void _copy(const sparsetable &o)
-    {
-        _table_size = o._table_size;
-        _num_buckets = o._num_buckets;
-        _alloc = o._alloc;                // todo - copy or move allocator according to...
-        _group_alloc = o._group_alloc;    // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
+                for (group_size_type i = 0; i < sz; ++i)
+                {
+                    new (_first_group + i) group_type(o._first_group[i], _alloc);
+                }
+            }
+        }
 
-        group_size_type sz = (group_size_type)(o._last_group - o._first_group);
-        if (sz)
+    public:
+        // Constructors -- default, normal (when you specify size), and copy
+        explicit sparsetable(size_type sz = 0,
+                             const allocator_type& alloc = allocator_type()) :
+            _first_group(0),
+            _last_group(0),
+            _table_size(sz),
+            _num_buckets(0),
+            _alloc(alloc)  // todo - copy or move allocator according to
+            // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
         {
-            _alloc_group_array(sz, _first_group, _last_group);
-            for (group_size_type i=0; i<sz; ++i)
-                new (_first_group + i) group_type(o._first_group[i], _alloc);
+            _allocate_groups(num_groups(sz));
         }
-    }
-
-public:
-    // Constructors -- default, normal (when you specify size), and copy
-    explicit sparsetable(size_type sz = 0, const allocator_type &alloc = allocator_type()) :
-        _first_group(0),
-        _last_group(0),
-        _table_size(sz),
-        _num_buckets(0),
-        _alloc(alloc)  // todo - copy or move allocator according to
-                       // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
-    {
-        _allocate_groups(num_groups(sz));
-    }
 
-    ~sparsetable()
-    {
-        _free_groups();
-    }
+        ~sparsetable()
+        {
+            _free_groups();
+        }
 
-    sparsetable(const sparsetable &o)
-    {
-        _init();
-        _copy(o);
-    }
+        sparsetable(const sparsetable& o)
+        {
+            _init();
+            _copy(o);
+        }
 
-    sparsetable& operator=(const sparsetable &o)
-    {
-        _cleanup();
-        _copy(o);
-        return *this;
-    }
+        sparsetable& operator=(const sparsetable& o)
+        {
+            _cleanup();
+            _copy(o);
+            return *this;
+        }
 
 
 #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
-    sparsetable(sparsetable&& o)
-    {
-        _init();
-        this->swap(o);
-    }
+        sparsetable(sparsetable&& o)
+        {
+            _init();
+            this->swap(o);
+        }
 
-    sparsetable(sparsetable&& o, const allocator_type &alloc)
-    {
-        _init();
-        this->swap(o);
-        _alloc = alloc; // [gp todo] is this correct?
-    }
+        sparsetable(sparsetable&& o, const allocator_type& alloc)
+        {
+            _init();
+            this->swap(o);
+            _alloc = alloc; // [gp todo] is this correct?
+        }
 
-    sparsetable& operator=(sparsetable&& o)
-    {
-        _cleanup();
-        this->swap(o);
-        return *this;
-    }
+        sparsetable& operator=(sparsetable&& o)
+        {
+            _cleanup();
+            this->swap(o);
+            return *this;
+        }
 #endif
 
-    // Many STL algorithms use swap instead of copy constructors
-    void swap(sparsetable& o)
-    {
-        using std::swap;
-
-        swap(_first_group, o._first_group);
-        swap(_last_group,  o._last_group);
-        swap(_table_size,  o._table_size);
-        swap(_num_buckets, o._num_buckets);
-        if (_alloc != o._alloc)
-            swap(_alloc, o._alloc);
-        if (_group_alloc != o._group_alloc)
-            swap(_group_alloc, o._group_alloc);
-    }
-
-    // It's always nice to be able to clear a table without deallocating it
-    void clear()
-    {
-        _free_groups();
-        _num_buckets = 0;
-        _table_size = 0;
-    }
-
-    inline allocator_type get_allocator() const
-    {
-        return _alloc;
-    }
-
-
-    // Functions that tell you about size.
-    // NOTE: empty() is non-intuitive!  It does not tell you the number
-    // of not-empty buckets (use num_nonempty() for that).  Instead
-    // it says whether you've allocated any buckets or not.
-    // ----------------------------------------------------------------
-    size_type size() const           { return _table_size; }
-    size_type max_size() const       { return _alloc.max_size(); }
-    bool empty() const               { return _table_size == 0; }
-    size_type num_nonempty() const   { return _num_buckets; }
-
-    // OK, we'll let you resize one of these puppies
-    void resize(size_type new_size)
-    {
-        group_size_type sz = num_groups(new_size);
-        group_size_type old_sz = (group_size_type)(_last_group - _first_group);
-
-        if (sz != old_sz)
+        // Many STL algorithms use swap instead of copy constructors
+        void swap(sparsetable& o)
         {
-            // resize group array
-            // ------------------
-            group_type *first = 0, *last = 0;
-            if (sz)
+            using std::swap;
+            swap(_first_group, o._first_group);
+            swap(_last_group,  o._last_group);
+            swap(_table_size,  o._table_size);
+            swap(_num_buckets, o._num_buckets);
+
+            if (_alloc != o._alloc)
             {
-                _alloc_group_array(sz, first, last);
-                memcpy(first, _first_group, sizeof(*first) * (std::min)(sz, old_sz));
+                swap(_alloc, o._alloc);
             }
 
-            if (sz < old_sz)
+            if (_group_alloc != o._group_alloc)
             {
-                for (group_type *g = _first_group + sz; g != _last_group; ++g)
-                    g->destruct(_alloc);
+                swap(_group_alloc, o._group_alloc);
             }
-            else
-                std::uninitialized_fill(first + old_sz, last, group_type());
-
-            _free_group_array(_first_group, _last_group);
-            _first_group = first;
-            _last_group  = last;
         }
-#if 0
-        // used only in test program
-        // todo: fix if sparsetable to be used directly
-        // --------------------------------------------
-        if (new_size < _table_size)
+
+        // It's always nice to be able to clear a table without deallocating it
+        void clear()
         {
-            // lower num_buckets, clear last group
-            if (pos_in_group(new_size) > 0)     // need to clear inside last group
-                groups.back().erase(_alloc, groups.back().begin() + pos_in_group(new_size),
-                                    groups.back().end());
-            _num_buckets = 0;                   // refigure # of used buckets
-            for (const group_type *group = _first_group; group != _last_group; ++group)
-                _num_buckets += group->num_nonempty();
+            _free_groups();
+            _num_buckets = 0;
+            _table_size = 0;
         }
-#endif
-        _table_size = new_size;
-    }
-
-    // We let you see if a bucket is non-empty without retrieving it
-    // -------------------------------------------------------------
-    bool test(size_type i) const
-    {
-        // assert(i < _table_size);
-        return which_group(i).test(pos_in_group(i));
-    }
-
-    // also tests for erased values
-    // ----------------------------
-    bool test_strict(size_type i) const
-    {
-        // assert(i < _table_size);
-        return which_group(i).test_strict(pos_in_group(i));
-    }
 
-    friend struct GrpPos;
+        inline allocator_type get_allocator() const
+        {
+            return _alloc;
+        }
 
-    struct GrpPos
-    {
-        typedef typename sparsetable::ne_iterator ne_iter;
-        GrpPos(const sparsetable &table, size_type i) :
-            grp(table.which_group(i)), pos(table.pos_in_group(i)) {}
 
-        bool test_strict() const { return grp.test_strict(pos); }
-        bool test() const        { return grp.test(pos); }
-        typename sparsetable::reference unsafe_get() const { return  grp.unsafe_get(pos); }
-        ne_iter get_iter(typename sparsetable::reference ref)
+        // Functions that tell you about size.
+        // NOTE: empty() is non-intuitive!  It does not tell you the number
+        // of not-empty buckets (use num_nonempty() for that).  Instead
+        // it says whether you've allocated any buckets or not.
+        // ----------------------------------------------------------------
+        size_type size() const
         {
-            return ne_iter((group_type *)&grp, &ref);
+            return _table_size;
         }
-
-        void erase(sparsetable &table) // item *must* be present
+        size_type max_size() const
+        {
+            return _alloc.max_size();
+        }
+        bool empty() const
+        {
+            return _table_size == 0;
+        }
+        size_type num_nonempty() const
         {
-            assert(table._num_buckets);
-            ((group_type &)grp).erase(table._alloc, pos);
-            --table._num_buckets;
+            return _num_buckets;
         }
 
-    private:
-        GrpPos* operator=(const GrpPos&);
+        // OK, we'll let you resize one of these puppies
+        void resize(size_type new_size)
+        {
+            group_size_type sz = num_groups(new_size);
+            group_size_type old_sz = (group_size_type)(_last_group - _first_group);
 
-        const group_type &grp;
-        typename group_type::size_type pos;
-    };
+            if (sz != old_sz)
+            {
+                // resize group array
+                // ------------------
+                group_type* first = 0, *last = 0;
 
-    bool test(iterator pos) const
-    {
-        return which_group(pos.pos).test(pos_in_group(pos.pos));
-    }
+                if (sz)
+                {
+                    _alloc_group_array(sz, first, last);
+                    memcpy(first, _first_group, sizeof(*first) * (std::min)(sz, old_sz));
+                }
 
-    bool test(const_iterator pos) const
-    {
-        return which_group(pos.pos).test(pos_in_group(pos.pos));
-    }
+                if (sz < old_sz)
+                {
+                    for (group_type* g = _first_group + sz; g != _last_group; ++g)
+                    {
+                        g->destruct(_alloc);
+                    }
+                }
+                else
+                {
+                    std::uninitialized_fill(first + old_sz, last, group_type());
+                }
 
-    // TODO(csilvers): make protected + friend
-    // This is used by sparse_hashtable to get an element from the table
-    // when we know it exists (because the caller has called test(i)).
-    // -----------------------------------------------------------------
-    reference unsafe_get(size_type i) const
-    {
-        assert(i < _table_size);
-        // assert(test(i));
-        return which_group(i).unsafe_get(pos_in_group(i));
-    }
+                _free_group_array(_first_group, _last_group);
+                _first_group = first;
+                _last_group  = last;
+            }
 
-    // Needed for hashtables, gets as a ne_iterator.  Crashes for empty bcks
-    const_ne_iterator get_iter(size_type i) const
-    {
-        //assert(test(i));    // how can a ne_iterator point to an empty bucket?
+#if 0
 
-        size_type grp_idx = group_num(i);
+            // used only in test program
+            // todo: fix if sparsetable to be used directly
+            // --------------------------------------------
+            if (new_size < _table_size)
+            {
+                // lower num_buckets, clear last group
+                if (pos_in_group(new_size) > 0)     // need to clear inside last group
+                    groups.back().erase(_alloc, groups.back().begin() + pos_in_group(new_size),
+                                        groups.back().end());
 
-        return const_ne_iterator(_first_group + grp_idx,
-                                 (_first_group[grp_idx].ne_begin() +
-                                  _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
-    }
+                _num_buckets = 0;                   // refigure # of used buckets
 
-    const_ne_iterator get_iter(size_type i, ColIterator col_it) const
-    {
-        return const_ne_iterator(_first_group + group_num(i), col_it);
-    }
+                for (const group_type* group = _first_group; group != _last_group; ++group)
+                {
+                    _num_buckets += group->num_nonempty();
+                }
+            }
 
-    // For nonempty we can return a non-const version
-    ne_iterator get_iter(size_type i)
-    {
-        //assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
+#endif
+            _table_size = new_size;
+        }
 
-        size_type grp_idx = group_num(i);
+        // We let you see if a bucket is non-empty without retrieving it
+        // -------------------------------------------------------------
+        bool test(size_type i) const
+        {
+            // assert(i < _table_size);
+            return which_group(i).test(pos_in_group(i));
+        }
 
-        return ne_iterator(_first_group + grp_idx,
-                           (_first_group[grp_idx].ne_begin() +
-                            _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
-    }
+        // also tests for erased values
+        // ----------------------------
+        bool test_strict(size_type i) const
+        {
+            // assert(i < _table_size);
+            return which_group(i).test_strict(pos_in_group(i));
+        }
 
-    ne_iterator get_iter(size_type i, ColIterator col_it)
-    {
-        return ne_iterator(_first_group + group_num(i), col_it);
-    }
+        friend struct GrpPos;
 
-    // And the reverse transformation.
-    size_type get_pos(const const_ne_iterator& it) const
-    {
-        difference_type current_row = it.row_current - _first_group;
-        difference_type current_col = (it.col_current - _first_group[current_row].ne_begin());
-        return ((current_row * SPP_GROUP_SIZE) +
-                _first_group[current_row].offset_to_pos(current_col));
-    }
+        struct GrpPos
+        {
+                typedef typename sparsetable::ne_iterator ne_iter;
+                GrpPos(const sparsetable& table, size_type i) :
+                    grp(table.which_group(i)), pos(table.pos_in_group(i)) {}
 
-    // Val can be reference or const_reference
-    // ---------------------------------------
-    template <class Val>
-    reference set(size_type i, Val &val)
-    {
-        assert(i < _table_size);
-        group_type &group = which_group(i);
-        typename group_type::size_type old_numbuckets = group.num_nonempty();
-        pointer p(group.set(_alloc, pos_in_group(i), val));
-        _num_buckets += group.num_nonempty() - old_numbuckets;
-        return *p;
-    }
+                bool test_strict() const
+                {
+                    return grp.test_strict(pos);
+                }
+                bool test() const
+                {
+                    return grp.test(pos);
+                }
+                typename sparsetable::reference unsafe_get() const
+                {
+                    return  grp.unsafe_get(pos);
+                }
+                ne_iter get_iter(typename sparsetable::reference ref)
+                {
+                    return ne_iter((group_type*)&grp, &ref);
+                }
 
-    // used in _move_from (where we can move the old value instead of copying it
-    void move(size_type i, reference val)
-    {
-        assert(i < _table_size);
-        which_group(i).set(_alloc, pos_in_group(i), val);
-        ++_num_buckets;
-    }
+                void erase(sparsetable& table) // item *must* be present
+                {
+                    assert(table._num_buckets);
+                    ((group_type&)grp).erase(table._alloc, pos);
+                    --table._num_buckets;
+                }
 
-    // This takes the specified elements out of the table.
-    // --------------------------------------------------
-    void erase(size_type i)
-    {
-        assert(i < _table_size);
+            private:
+                GrpPos* operator=(const GrpPos&);
 
-        GroupsReference grp(which_group(i));
-        typename group_type::size_type old_numbuckets = grp.num_nonempty();
-        grp.erase(_alloc, pos_in_group(i));
-        _num_buckets += grp.num_nonempty() - old_numbuckets;
-    }
+                const group_type& grp;
+                typename group_type::size_type pos;
+        };
 
-    void erase(iterator pos)
-    {
-        erase(pos.pos);
-    }
+        bool test(iterator pos) const
+        {
+            return which_group(pos.pos).test(pos_in_group(pos.pos));
+        }
 
-    void erase(iterator start_it, iterator end_it)
-    {
-        // This could be more efficient, but then we'd need to figure
-        // out if we spanned groups or not.  Doesn't seem worth it.
-        for (; start_it != end_it; ++start_it)
-            erase(start_it);
-    }
+        bool test(const_iterator pos) const
+        {
+            return which_group(pos.pos).test(pos_in_group(pos.pos));
+        }
 
-    const_ne_iterator erase(const_ne_iterator it)
-    {
-        ne_iterator res(it);
-        if (res.row_current->erase_ne(_alloc, res))
-            _num_buckets--;
-        return res;
-    }
+        // TODO(csilvers): make protected + friend
+        // This is used by sparse_hashtable to get an element from the table
+        // when we know it exists (because the caller has called test(i)).
+        // -----------------------------------------------------------------
+        reference unsafe_get(size_type i) const
+        {
+            assert(i < _table_size);
+            // assert(test(i));
+            return which_group(i).unsafe_get(pos_in_group(i));
+        }
 
-    const_ne_iterator erase(const_ne_iterator f, const_ne_iterator l)
-    {
-        size_t diff = l - f;
-        while (diff--)
-            f = erase(f);
-        return f;
-    }
+        // Needed for hashtables, gets as a ne_iterator.  Crashes for empty bcks
+        const_ne_iterator get_iter(size_type i) const
+        {
+            //assert(test(i));    // how can a ne_iterator point to an empty bucket?
+            size_type grp_idx = group_num(i);
+            return const_ne_iterator(_first_group + grp_idx,
+                                     (_first_group[grp_idx].ne_begin() +
+                                      _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
+        }
 
-    // We support reading and writing tables to disk.  We don't store
-    // the actual array contents (which we don't know how to store),
-    // just the groups and sizes.  Returns true if all went ok.
+        const_ne_iterator get_iter(size_type i, ColIterator col_it) const
+        {
+            return const_ne_iterator(_first_group + group_num(i), col_it);
+        }
 
-private:
-    // Every time the disk format changes, this should probably change too
-    typedef unsigned long MagicNumberType;
-    static const MagicNumberType MAGIC_NUMBER = 0x24687531;
-
-    // Old versions of this code write all data in 32 bits.  We need to
-    // support these files as well as having support for 64-bit systems.
-    // So we use the following encoding scheme: for values < 2^32-1, we
-    // store in 4 bytes in big-endian order.  For values > 2^32, we
-    // store 0xFFFFFFF followed by 8 bytes in big-endian order.  This
-    // causes us to mis-read old-version code that stores exactly
-    // 0xFFFFFFF, but I don't think that is likely to have happened for
-    // these particular values.
-    template <typename OUTPUT, typename IntType>
-    static bool write_32_or_64(OUTPUT* fp, IntType value)
-    {
-        if (value < 0xFFFFFFFFULL)        // fits in 4 bytes
-        {
-            if (!sparsehash_internal::write_bigendian_number(fp, value, 4))
+        // For nonempty we can return a non-const version
+        ne_iterator get_iter(size_type i)
+        {
+            //assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
+            size_type grp_idx = group_num(i);
+            return ne_iterator(_first_group + grp_idx,
+                               (_first_group[grp_idx].ne_begin() +
+                                _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
+        }
+
+        ne_iterator get_iter(size_type i, ColIterator col_it)
+        {
+            return ne_iterator(_first_group + group_num(i), col_it);
+        }
+
+        // And the reverse transformation.
+        size_type get_pos(const const_ne_iterator& it) const
+        {
+            difference_type current_row = it.row_current - _first_group;
+            difference_type current_col = (it.col_current -
+                                           _first_group[current_row].ne_begin());
+            return ((current_row * SPP_GROUP_SIZE) +
+                    _first_group[current_row].offset_to_pos(current_col));
+        }
+
+        // Val can be reference or const_reference
+        // ---------------------------------------
+        template <class Val>
+        reference set(size_type i, Val& val)
+        {
+            assert(i < _table_size);
+            group_type& group = which_group(i);
+            typename group_type::size_type old_numbuckets = group.num_nonempty();
+            pointer p(group.set(_alloc, pos_in_group(i), val));
+            _num_buckets += group.num_nonempty() - old_numbuckets;
+            return *p;
+        }
+
+        // used in _move_from (where we can move the old value instead of copying it
+        void move(size_type i, reference val)
+        {
+            assert(i < _table_size);
+            which_group(i).set(_alloc, pos_in_group(i), val);
+            ++_num_buckets;
+        }
+
+        // This takes the specified elements out of the table.
+        // --------------------------------------------------
+        void erase(size_type i)
+        {
+            assert(i < _table_size);
+            GroupsReference grp(which_group(i));
+            typename group_type::size_type old_numbuckets = grp.num_nonempty();
+            grp.erase(_alloc, pos_in_group(i));
+            _num_buckets += grp.num_nonempty() - old_numbuckets;
+        }
+
+        void erase(iterator pos)
+        {
+            erase(pos.pos);
+        }
+
+        void erase(iterator start_it, iterator end_it)
+        {
+            // This could be more efficient, but then we'd need to figure
+            // out if we spanned groups or not.  Doesn't seem worth it.
+            for (; start_it != end_it; ++start_it)
+            {
+                erase(start_it);
+            }
+        }
+
+        const_ne_iterator erase(const_ne_iterator it)
+        {
+            ne_iterator res(it);
+
+            if (res.row_current->erase_ne(_alloc, res))
+            {
+                _num_buckets--;
+            }
+
+            return res;
+        }
+
+        const_ne_iterator erase(const_ne_iterator f, const_ne_iterator l)
+        {
+            size_t diff = l - f;
+
+            while (diff--)
+            {
+                f = erase(f);
+            }
+
+            return f;
+        }
+
+        // We support reading and writing tables to disk.  We don't store
+        // the actual array contents (which we don't know how to store),
+        // just the groups and sizes.  Returns true if all went ok.
+
+    private:
+        // Every time the disk format changes, this should probably change too
+        typedef unsigned long MagicNumberType;
+        static const MagicNumberType MAGIC_NUMBER = 0x24687531;
+
+        // Old versions of this code write all data in 32 bits.  We need to
+        // support these files as well as having support for 64-bit systems.
+        // So we use the following encoding scheme: for values < 2^32-1, we
+        // store in 4 bytes in big-endian order.  For values > 2^32, we
+        // store 0xFFFFFFF followed by 8 bytes in big-endian order.  This
+        // causes us to mis-read old-version code that stores exactly
+        // 0xFFFFFFF, but I don't think that is likely to have happened for
+        // these particular values.
+        template <typename OUTPUT, typename IntType>
+        static bool write_32_or_64(OUTPUT* fp, IntType value)
+        {
+            if (value < 0xFFFFFFFFULL)        // fits in 4 bytes
+            {
+                if (!sparsehash_internal::write_bigendian_number(fp, value, 4))
+                {
+                    return false;
+                }
+            }
+            else
+            {
+                if (!sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4))
+                {
+                    return false;
+                }
+
+                if (!sparsehash_internal::write_bigendian_number(fp, value, 8))
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        template <typename INPUT, typename IntType>
+        static bool read_32_or_64(INPUT* fp, IntType* value)
+        {
+            // reads into value
+            MagicNumberType first4 = 0;   // a convenient 32-bit unsigned type
+
+            if (!sparsehash_internal::read_bigendian_number(fp, &first4, 4))
+            {
                 return false;
+            }
+
+            if (first4 < 0xFFFFFFFFULL)
+            {
+                *value = first4;
+            }
+            else
+            {
+                if (!sparsehash_internal::read_bigendian_number(fp, value, 8))
+                {
+                    return false;
+                }
+            }
+
+            return true;
         }
-        else
+
+    public:
+        // read/write_metadata() and read_write/nopointer_data() are DEPRECATED.
+        // Use serialize() and unserialize(), below, for new code.
+
+        template <typename OUTPUT>
+        bool write_metadata(OUTPUT* fp) const
         {
-            if (!sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4))
+            if (!write_32_or_64(fp, MAGIC_NUMBER))
+            {
                 return false;
-            if (!sparsehash_internal::write_bigendian_number(fp, value, 8))
+            }
+
+            if (!write_32_or_64(fp, _table_size))
+            {
                 return false;
+            }
+
+            if (!write_32_or_64(fp, _num_buckets))
+            {
+                return false;
+            }
+
+            for (const group_type* group = _first_group; group != _last_group; ++group)
+                if (group->write_metadata(fp) == false)
+                {
+                    return false;
+                }
+
+            return true;
         }
-        return true;
-    }
 
-    template <typename INPUT, typename IntType>
-    static bool read_32_or_64(INPUT* fp, IntType *value)
-    {
-        // reads into value
-        MagicNumberType first4 = 0;   // a convenient 32-bit unsigned type
-        if (!sparsehash_internal::read_bigendian_number(fp, &first4, 4))
-            return false;
+        // Reading destroys the old table contents!  Returns true if read ok.
+        template <typename INPUT>
+        bool read_metadata(INPUT* fp)
+        {
+            size_type magic_read = 0;
 
-        if (first4 < 0xFFFFFFFFULL)
+            if (!read_32_or_64(fp, &magic_read))
+            {
+                return false;
+            }
+
+            if (magic_read != MAGIC_NUMBER)
+            {
+                clear();                        // just to be consistent
+                return false;
+            }
+
+            if (!read_32_or_64(fp, &_table_size))
+            {
+                return false;
+            }
+
+            if (!read_32_or_64(fp, &_num_buckets))
+            {
+                return false;
+            }
+
+            resize(_table_size);                    // so the vector's sized ok
+
+            for (group_type* group = _first_group; group != _last_group; ++group)
+                if (group->read_metadata(_alloc, fp) == false)
+                {
+                    return false;
+                }
+
+            return true;
+        }
+
+        // This code is identical to that for SparseGroup
+        // If your keys and values are simple enough, we can write them
+        // to disk for you.  "simple enough" means no pointers.
+        // However, we don't try to normalize endianness
+        bool write_nopointer_data(FILE* fp) const
         {
-            *value = first4;
+            for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
+                if (!fwrite(&*it, sizeof(*it), 1, fp))
+                {
+                    return false;
+                }
+
+            return true;
         }
-        else
+
+        // When reading, we have to override the potential const-ness of *it
+        bool read_nopointer_data(FILE* fp)
         {
-            if (!sparsehash_internal::read_bigendian_number(fp, value, 8))
-                return false;
+            for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
+                if (!fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp))
+                {
+                    return false;
+                }
+
+            return true;
         }
-        return true;
-    }
 
-public:
-    // read/write_metadata() and read_write/nopointer_data() are DEPRECATED.
-    // Use serialize() and unserialize(), below, for new code.
+        // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
+        //    (istream, ostream, etc) *or* a class providing
+        //    Read(void*, size_t) and Write(const void*, size_t)
+        //    (respectively), which writes a buffer into a stream
+        //    (which the INPUT/OUTPUT instance presumably owns).
 
-    template <typename OUTPUT>
-    bool write_metadata(OUTPUT *fp) const
-    {
-        if (!write_32_or_64(fp, MAGIC_NUMBER))  return false;
-        if (!write_32_or_64(fp, _table_size))  return false;
-        if (!write_32_or_64(fp, _num_buckets))  return false;
+        typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
 
-        for (const group_type *group = _first_group; group != _last_group; ++group)
-            if (group->write_metadata(fp) == false)
+        // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
+        template <typename ValueSerializer, typename OUTPUT>
+        bool serialize(ValueSerializer serializer, OUTPUT* fp)
+        {
+            if (!write_metadata(fp))
+            {
                 return false;
-        return true;
-    }
+            }
 
-    // Reading destroys the old table contents!  Returns true if read ok.
-    template <typename INPUT>
-    bool read_metadata(INPUT *fp)
-    {
-        size_type magic_read = 0;
-        if (!read_32_or_64(fp, &magic_read))  return false;
-        if (magic_read != MAGIC_NUMBER)
+            for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
+                if (!serializer(fp, *it))
+                {
+                    return false;
+                }
+
+            return true;
+        }
+
+        // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
+        template <typename ValueSerializer, typename INPUT>
+        bool unserialize(ValueSerializer serializer, INPUT* fp)
+        {
+            clear();
+
+            if (!read_metadata(fp))
+            {
+                return false;
+            }
+
+            for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
+                if (!serializer(fp, &*it))
+                {
+                    return false;
+                }
+
+            return true;
+        }
+
+        // Comparisons.  Note the comparisons are pretty arbitrary: we
+        // compare values of the first index that isn't equal (using default
+        // value for empty buckets).
+        bool operator==(const sparsetable& x) const
+        {
+            return (_table_size == x._table_size &&
+                    _num_buckets == x._num_buckets &&
+                    _first_group == x._first_group);
+        }
+
+        bool operator<(const sparsetable& x) const
+        {
+            return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
+        }
+        bool operator!=(const sparsetable& x) const
+        {
+            return !(*this == x);
+        }
+        bool operator<=(const sparsetable& x) const
+        {
+            return !(x < *this);
+        }
+        bool operator>(const sparsetable& x)  const
+        {
+            return x < *this;
+        }
+        bool operator>=(const sparsetable& x) const
+        {
+            return !(*this < x);
+        }
+
+
+    private:
+        // The actual data
+        // ---------------
+        group_type*      _first_group;
+        group_type*      _last_group;
+        size_type        _table_size;          // how many buckets they want
+        size_type        _num_buckets;         // number of non-empty buckets
+        group_alloc_type _group_alloc;
+        allocator_type   _alloc;
+};
+
+// We need a global swap as well
+// ---------------------------------------------------------------------------
+template <class T, class Alloc>
+inline void swap(sparsetable<T, Alloc>& x, sparsetable<T, Alloc>& y)
+{
+    x.swap(y);
+}
+
+
+//  ----------------------------------------------------------------------
+//                  S P A R S E _ H A S H T A B L E
+//  ----------------------------------------------------------------------
+// Hashtable class, used to implement the hashed associative containers
+// hash_set and hash_map.
+//
+// Value: what is stored in the table (each bucket is a Value).
+// Key: something in a 1-to-1 correspondence to a Value, that can be used
+//      to search for a Value in the table (find() takes a Key).
+// HashFcn: Takes a Key and returns an integer, the more unique the better.
+// ExtractKey: given a Value, returns the unique Key associated with it.
+//             Must inherit from unary_function, or at least have a
+//             result_type enum indicating the return type of operator().
+// EqualKey: Given two Keys, says whether they are the same (that is,
+//           if they are both associated with the same Value).
+// Alloc: STL allocator to use to allocate memory.
+//
+//  ----------------------------------------------------------------------
+
+// The probing method
+// ------------------
+// Linear probing
+// #define JUMP_(key, num_probes)    ( 1 )
+// Quadratic probing
+#define JUMP_(key, num_probes)    ( num_probes )
+
+
+// -------------------------------------------------------------------
+// -------------------------------------------------------------------
+template <class Value, class Key, class HashFcn,
+          class ExtractKey, class SetKey, class EqualKey, class Alloc>
+class sparse_hashtable
+{
+    public:
+        typedef Key                                        key_type;
+        typedef Value                                      value_type;
+        typedef HashFcn
+        hasher; // user provided or spp_hash<Key>
+        typedef EqualKey                                   key_equal;
+        typedef Alloc                                      allocator_type;
+
+        typedef typename allocator_type::size_type         size_type;
+        typedef typename allocator_type::difference_type   difference_type;
+        typedef value_type&                                reference;
+        typedef const value_type&                          const_reference;
+        typedef value_type*                                pointer;
+        typedef const value_type*                          const_pointer;
+
+        // Table is the main storage class.
+        typedef sparsetable<value_type, allocator_type>   Table;
+        typedef typename Table::ne_iterator               ne_it;
+        typedef typename Table::const_ne_iterator         cne_it;
+        typedef typename Table::destructive_iterator      dest_it;
+        typedef typename Table::ColIterator               ColIterator;
+
+        typedef ne_it                                     iterator;
+        typedef cne_it                                    const_iterator;
+        typedef dest_it                                   destructive_iterator;
+
+        // These come from tr1.  For us they're the same as regular iterators.
+        // -------------------------------------------------------------------
+        typedef iterator                                  local_iterator;
+        typedef const_iterator                            const_local_iterator;
+
+        // How full we let the table get before we resize
+        // ----------------------------------------------
+        static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
+
+        // How empty we let the table get before we resize lower, by default.
+        // (0.0 means never resize lower.)
+        // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
+        // ------------------------------------------------------------------
+        static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
+
+        // Minimum size we're willing to let hashtables be.
+        // Must be a power of two, and at least 4.
+        // Note, however, that for a given hashtable, the initial size is a
+        // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
+        // ------------------------------------------------------------------
+        static const size_type HT_MIN_BUCKETS = 4;
+
+        // By default, if you don't specify a hashtable size at
+        // construction-time, we use this size.  Must be a power of two, and
+        // at least HT_MIN_BUCKETS.
+        // -----------------------------------------------------------------
+        static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
+
+        // iterators
+        // ---------
+        iterator       begin()
+        {
+            return _mk_iterator(table.ne_begin());
+        }
+        iterator       end()
+        {
+            return _mk_iterator(table.ne_end());
+        }
+        const_iterator begin() const
+        {
+            return _mk_const_iterator(table.ne_cbegin());
+        }
+        const_iterator end() const
+        {
+            return _mk_const_iterator(table.ne_cend());
+        }
+        const_iterator cbegin() const
+        {
+            return _mk_const_iterator(table.ne_cbegin());
+        }
+        const_iterator cend() const
+        {
+            return _mk_const_iterator(table.ne_cend());
+        }
+
+        // These come from tr1 unordered_map.  They iterate over 'bucket' n.
+        // For sparsehashtable, we could consider each 'group' to be a bucket,
+        // I guess, but I don't really see the point.  We'll just consider
+        // bucket n to be the n-th element of the sparsetable, if it's occupied,
+        // or some empty element, otherwise.
+        // ---------------------------------------------------------------------
+        local_iterator begin(size_type i)
+        {
+            return _mk_iterator(table.test(i) ? table.get_iter(i) : table.ne_end());
+        }
+
+        local_iterator end(size_type i)
+        {
+            local_iterator it = begin(i);
+
+            if (table.test(i))
+            {
+                ++it;
+            }
+
+            return _mk_iterator(it);
+        }
+
+        const_local_iterator begin(size_type i) const
+        {
+            return _mk_const_iterator(table.test(i) ? table.get_iter(i) : table.ne_cend());
+        }
+
+        const_local_iterator end(size_type i) const
+        {
+            const_local_iterator it = begin(i);
+
+            if (table.test(i))
+            {
+                ++it;
+            }
+
+            return _mk_const_iterator(it);
+        }
+
+        const_local_iterator cbegin(size_type i) const
+        {
+            return begin(i);
+        }
+        const_local_iterator cend(size_type i)   const
+        {
+            return end(i);
+        }
+
+        // This is used when resizing
+        // --------------------------
+        destructive_iterator destructive_begin()
+        {
+            return _mk_destructive_iterator(table.destructive_begin());
+        }
+        destructive_iterator destructive_end()
+        {
+            return _mk_destructive_iterator(table.destructive_end());
+        }
+
+
+        // accessor functions for the things we templatize on, basically
+        // -------------------------------------------------------------
+        hasher hash_funct() const
+        {
+            return settings;
+        }
+        key_equal key_eq() const
+        {
+            return key_info;
+        }
+        allocator_type get_allocator() const
+        {
+            return table.get_allocator();
+        }
+
+        // Accessor function for statistics gathering.
+        unsigned int num_table_copies() const
+        {
+            return settings.num_ht_copies();
+        }
+
+    private:
+        // This is used as a tag for the copy constructor, saying to destroy its
+        // arg We have two ways of destructively copying: with potentially growing
+        // the hashtable as we copy, and without.  To make sure the outside world
+        // can't do a destructive copy, we make the typename private.
+        // -----------------------------------------------------------------------
+        enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
+
+        void _squash_deleted()
+        {
+            // gets rid of any deleted entries we have
+            // ---------------------------------------
+            if (num_deleted)
+            {
+                // get rid of deleted before writing
+                sparse_hashtable tmp(MoveDontGrow, *this);
+                swap(tmp);                    // now we are tmp
+            }
+
+            assert(num_deleted == 0);
+        }
+
+        // creating iterators from sparsetable::ne_iterators
+        // -------------------------------------------------
+        iterator             _mk_iterator(ne_it it) const
+        {
+            return it;
+        }
+        const_iterator       _mk_const_iterator(cne_it it) const
+        {
+            return it;
+        }
+        destructive_iterator _mk_destructive_iterator(dest_it it) const
+        {
+            return it;
+        }
+
+    public:
+        size_type size() const
         {
-            clear();                        // just to be consistent
-            return false;
+            return table.num_nonempty();
+        }
+        size_type max_size() const
+        {
+            return table.max_size();
+        }
+        bool empty() const
+        {
+            return size() == 0;
+        }
+        size_type bucket_count() const
+        {
+            return table.size();
+        }
+        size_type max_bucket_count() const
+        {
+            return max_size();
+        }
+        // These are tr1 methods.  Their idea of 'bucket' doesn't map well to
+        // what we do.  We just say every bucket has 0 or 1 items in it.
+        size_type bucket_size(size_type i) const
+        {
+            return (size_type)(begin(i) == end(i) ? 0 : 1);
         }
 
-        if (!read_32_or_64(fp, &_table_size))  return false;
-        if (!read_32_or_64(fp, &_num_buckets))  return false;
+    private:
+        // Because of the above, size_type(-1) is never legal; use it for errors
+        // ---------------------------------------------------------------------
+        static const size_type ILLEGAL_BUCKET = size_type(-1);
 
-        resize(_table_size);                    // so the vector's sized ok
-        for (group_type *group = _first_group; group != _last_group; ++group)
-            if (group->read_metadata(_alloc, fp) == false)
-                return false;
-        return true;
-    }
+        // Used after a string of deletes.  Returns true if we actually shrunk.
+        // TODO(csilvers): take a delta so we can take into account inserts
+        // done after shrinking.  Maybe make part of the Settings class?
+        // --------------------------------------------------------------------
+        bool _maybe_shrink()
+        {
+            assert((bucket_count() & (bucket_count() - 1)) == 0); // is a power of two
+            assert(bucket_count() >= HT_MIN_BUCKETS);
+            bool retval = false;
+            // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
+            // we'll never shrink until you get relatively big, and we'll never
+            // shrink below HT_DEFAULT_STARTING_BUCKETS.  Otherwise, something
+            // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
+            // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
+            // ---------------------------------------------------------------
+            const size_type num_remain = table.num_nonempty();
+            const size_type shrink_threshold = settings.shrink_threshold();
+
+            if (shrink_threshold > 0 && num_remain < shrink_threshold &&
+                    bucket_count() > HT_DEFAULT_STARTING_BUCKETS)
+            {
+                const float shrink_factor = settings.shrink_factor();
+                size_type sz = (size_type)(bucket_count() /
+                                           2);    // find how much we should shrink
 
-    // This code is identical to that for SparseGroup
-    // If your keys and values are simple enough, we can write them
-    // to disk for you.  "simple enough" means no pointers.
-    // However, we don't try to normalize endianness
-    bool write_nopointer_data(FILE *fp) const
-    {
-        for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
-            if (!fwrite(&*it, sizeof(*it), 1, fp))
-                return false;
-        return true;
-    }
+                while (sz > HT_DEFAULT_STARTING_BUCKETS &&
+                        num_remain < static_cast<size_type>(sz * shrink_factor))
+                {
+                    sz /= 2;                            // stay a power of 2
+                }
 
-    // When reading, we have to override the potential const-ness of *it
-    bool read_nopointer_data(FILE *fp)
-    {
-        for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
-            if (!fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp))
-                return false;
-        return true;
-    }
+                sparse_hashtable tmp(MoveDontCopy, *this, sz);
+                swap(tmp);                            // now we are tmp
+                retval = true;
+            }
 
-    // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
-    //    (istream, ostream, etc) *or* a class providing
-    //    Read(void*, size_t) and Write(const void*, size_t)
-    //    (respectively), which writes a buffer into a stream
-    //    (which the INPUT/OUTPUT instance presumably owns).
+            settings.set_consider_shrink(false);   // because we just considered it
+            return retval;
+        }
 
-    typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
+        // We'll let you resize a hashtable -- though this makes us copy all!
+        // When you resize, you say, "make it big enough for this many more elements"
+        // Returns true if we actually resized, false if size was already ok.
+        // --------------------------------------------------------------------------
+        bool _resize_delta(size_type delta)
+        {
+            bool did_resize = false;
 
-    // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
-    template <typename ValueSerializer, typename OUTPUT>
-    bool serialize(ValueSerializer serializer, OUTPUT *fp)
-    {
-        if (!write_metadata(fp))
-            return false;
-        for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
-            if (!serializer(fp, *it))
-                return false;
-        return true;
-    }
+            if (settings.consider_shrink())
+            {
+                // see if lots of deletes happened
+                if (_maybe_shrink())
+                {
+                    did_resize = true;
+                }
+            }
 
-    // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
-    template <typename ValueSerializer, typename INPUT>
-    bool unserialize(ValueSerializer serializer, INPUT *fp)
-    {
-        clear();
-        if (!read_metadata(fp))
-            return false;
-        for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
-            if (!serializer(fp, &*it))
-                return false;
-        return true;
-    }
+            if (table.num_nonempty() >=
+                    (std::numeric_limits<size_type>::max)() - delta)
+            {
+                throw_exception(std::length_error("resize overflow"));
+            }
 
-    // Comparisons.  Note the comparisons are pretty arbitrary: we
-    // compare values of the first index that isn't equal (using default
-    // value for empty buckets).
-    bool operator==(const sparsetable& x) const
-    {
-        return (_table_size == x._table_size &&
-                _num_buckets == x._num_buckets &&
-                _first_group == x._first_group);
-    }
+            size_type num_occupied = (size_type)(table.num_nonempty() + num_deleted);
 
-    bool operator<(const sparsetable& x) const
-    {
-        return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
-    }
-    bool operator!=(const sparsetable& x) const { return !(*this == x); }
-    bool operator<=(const sparsetable& x) const { return !(x < *this); }
-    bool operator>(const sparsetable& x)  const { return x < *this; }
-    bool operator>=(const sparsetable& x) const { return !(*this < x); }
+            if (bucket_count() >= HT_MIN_BUCKETS &&
+                    (num_occupied + delta) <= settings.enlarge_threshold())
+            {
+                return did_resize;    // we're ok as we are
+            }
 
+            // Sometimes, we need to resize just to get rid of all the
+            // "deleted" buckets that are clogging up the hashtable.  So when
+            // deciding whether to resize, count the deleted buckets (which
+            // are currently taking up room).
+            // -------------------------------------------------------------
+            const size_type needed_size =
+                settings.min_buckets((size_type)(num_occupied + delta), (size_type)0);
 
-private:
-    // The actual data
-    // ---------------
-    group_type *     _first_group;
-    group_type *     _last_group;
-    size_type        _table_size;          // how many buckets they want
-    size_type        _num_buckets;         // number of non-empty buckets
-    group_alloc_type _group_alloc;
-    allocator_type   _alloc;
-};
+            if (needed_size <= bucket_count())      // we have enough buckets
+            {
+                return did_resize;
+            }
 
-// We need a global swap as well
-// ---------------------------------------------------------------------------
-template <class T, class Alloc>
-inline void swap(sparsetable<T,Alloc> &x, sparsetable<T,Alloc> &y)
-{
-    x.swap(y);
-}
+            size_type resize_to = settings.min_buckets((size_type)(num_occupied + delta),
+                                  bucket_count());
 
+            if (resize_to < needed_size &&    // may double resize_to
+                    resize_to < (std::numeric_limits<size_type>::max)() / 2)
+            {
+                // This situation means that we have enough deleted elements,
+                // that once we purge them, we won't actually have needed to
+                // grow.  But we may want to grow anyway: if we just purge one
+                // element, say, we'll have to grow anyway next time we
+                // insert.  Might as well grow now, since we're already going
+                // through the trouble of copying (in order to purge the
+                // deleted elements).
+                const size_type target =
+                    static_cast<size_type>(settings.shrink_size((size_type)(resize_to * 2)));
+
+                if (table.num_nonempty() + delta >= target)
+                {
+                    // Good, we won't be below the shrink threshhold even if we double.
+                    resize_to *= 2;
+                }
+            }
 
-//  ----------------------------------------------------------------------
-//                  S P A R S E _ H A S H T A B L E
-//  ----------------------------------------------------------------------
-// Hashtable class, used to implement the hashed associative containers
-// hash_set and hash_map.
-//
-// Value: what is stored in the table (each bucket is a Value).
-// Key: something in a 1-to-1 correspondence to a Value, that can be used
-//      to search for a Value in the table (find() takes a Key).
-// HashFcn: Takes a Key and returns an integer, the more unique the better.
-// ExtractKey: given a Value, returns the unique Key associated with it.
-//             Must inherit from unary_function, or at least have a
-//             result_type enum indicating the return type of operator().
-// EqualKey: Given two Keys, says whether they are the same (that is,
-//           if they are both associated with the same Value).
-// Alloc: STL allocator to use to allocate memory.
-//
-//  ----------------------------------------------------------------------
+            sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
+            swap(tmp);                             // now we are tmp
+            return true;
+        }
 
-// The probing method
-// ------------------
-// Linear probing
-// #define JUMP_(key, num_probes)    ( 1 )
-// Quadratic probing
-#define JUMP_(key, num_probes)    ( num_probes )
+        // Used to actually do the rehashing when we grow/shrink a hashtable
+        // -----------------------------------------------------------------
+        void _copy_from(const sparse_hashtable& ht, size_type min_buckets_wanted)
+        {
+            clear();            // clear table, set num_deleted to 0
+            // If we need to change the size of our table, do it now
+            const size_type resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
 
+            if (resize_to > bucket_count())
+            {
+                // we don't have enough buckets
+                table.resize(resize_to);               // sets the number of buckets
+                settings.reset_thresholds(bucket_count());
+            }
 
-// -------------------------------------------------------------------
-// -------------------------------------------------------------------
-template <class Value, class Key, class HashFcn,
-          class ExtractKey, class SetKey, class EqualKey, class Alloc>
-class sparse_hashtable
-{
-public:
-    typedef Key                                        key_type;
-    typedef Value                                      value_type;
-    typedef HashFcn                                    hasher; // user provided or spp_hash<Key>
-    typedef EqualKey                                   key_equal;
-    typedef Alloc                                      allocator_type;
-
-    typedef typename allocator_type::size_type         size_type;
-    typedef typename allocator_type::difference_type   difference_type;
-    typedef value_type&                                reference;
-    typedef const value_type&                          const_reference;
-    typedef value_type*                                pointer;
-    typedef const value_type*                          const_pointer;
-
-    // Table is the main storage class.
-    typedef sparsetable<value_type, allocator_type>   Table;
-    typedef typename Table::ne_iterator               ne_it;
-    typedef typename Table::const_ne_iterator         cne_it;
-    typedef typename Table::destructive_iterator      dest_it;
-    typedef typename Table::ColIterator               ColIterator;
-
-    typedef ne_it                                     iterator;
-    typedef cne_it                                    const_iterator;
-    typedef dest_it                                   destructive_iterator;
-
-    // These come from tr1.  For us they're the same as regular iterators.
-    // -------------------------------------------------------------------
-    typedef iterator                                  local_iterator;
-    typedef const_iterator                            const_local_iterator;
-
-    // How full we let the table get before we resize
-    // ----------------------------------------------
-    static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
-
-    // How empty we let the table get before we resize lower, by default.
-    // (0.0 means never resize lower.)
-    // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
-    // ------------------------------------------------------------------
-    static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
-
-    // Minimum size we're willing to let hashtables be.
-    // Must be a power of two, and at least 4.
-    // Note, however, that for a given hashtable, the initial size is a
-    // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
-    // ------------------------------------------------------------------
-    static const size_type HT_MIN_BUCKETS = 4;
-
-    // By default, if you don't specify a hashtable size at
-    // construction-time, we use this size.  Must be a power of two, and
-    // at least HT_MIN_BUCKETS.
-    // -----------------------------------------------------------------
-    static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
-
-    // iterators
-    // ---------
-    iterator       begin()        { return _mk_iterator(table.ne_begin());  }
-    iterator       end()          { return _mk_iterator(table.ne_end());    }
-    const_iterator begin() const  { return _mk_const_iterator(table.ne_cbegin()); }
-    const_iterator end() const    { return _mk_const_iterator(table.ne_cend());   }
-    const_iterator cbegin() const { return _mk_const_iterator(table.ne_cbegin()); }
-    const_iterator cend() const   { return _mk_const_iterator(table.ne_cend());   }
-
-    // These come from tr1 unordered_map.  They iterate over 'bucket' n.
-    // For sparsehashtable, we could consider each 'group' to be a bucket,
-    // I guess, but I don't really see the point.  We'll just consider
-    // bucket n to be the n-th element of the sparsetable, if it's occupied,
-    // or some empty element, otherwise.
-    // ---------------------------------------------------------------------
-    local_iterator begin(size_type i)
-    {
-        return _mk_iterator(table.test(i) ? table.get_iter(i) : table.ne_end());
-    }
+            // We use a normal iterator to get bcks from ht
+            // We could use insert() here, but since we know there are
+            // no duplicates, we can be more efficient
+            assert((bucket_count() & (bucket_count() - 1)) == 0);    // a power of two
 
-    local_iterator end(size_type i)
-    {
-        local_iterator it = begin(i);
-        if (table.test(i))
-            ++it;
-        return _mk_iterator(it);
-    }
+            for (const_iterator it = ht.begin(); it != ht.end(); ++it)
+            {
+                size_type num_probes = 0;              // how many times we've probed
+                size_type bucknum;
+                const size_type bucket_count_minus_one = bucket_count() - 1;
+
+                for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
+                        table.test(
+                            bucknum);                                   // table.test() OK since no erase()
+                        bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one)
+                {
+                    ++num_probes;
+                    assert(num_probes < bucket_count()
+                           && "Hashtable is full: an error in key_equal<> or hash<>");
+                }
 
-    const_local_iterator begin(size_type i) const
-    {
-        return _mk_const_iterator(table.test(i) ? table.get_iter(i) : table.ne_cend());
-    }
+                table.set(bucknum, *it);               // copies the value to here
+            }
 
-    const_local_iterator end(size_type i) const
-    {
-        const_local_iterator it = begin(i);
-        if (table.test(i))
-            ++it;
-        return _mk_const_iterator(it);
-    }
+            settings.inc_num_ht_copies();
+        }
 
-    const_local_iterator cbegin(size_type i) const { return begin(i); }
-    const_local_iterator cend(size_type i)   const { return end(i); }
+        // Implementation is like _copy_from, but it destroys the table of the
+        // "from" guy by freeing sparsetable memory as we iterate.  This is
+        // useful in resizing, since we're throwing away the "from" guy anyway.
+        // --------------------------------------------------------------------
+        void _move_from(MoveDontCopyT mover, sparse_hashtable& ht,
+                        size_type min_buckets_wanted)
+        {
+            clear();
+            // If we need to change the size of our table, do it now
+            size_type resize_to;
 
-    // This is used when resizing
-    // --------------------------
-    destructive_iterator destructive_begin()       { return _mk_destructive_iterator(table.destructive_begin()); }
-    destructive_iterator destructive_end()         { return _mk_destructive_iterator(table.destructive_end());   }
+            if (mover == MoveDontGrow)
+            {
+                resize_to = ht.bucket_count();    // keep same size as old ht
+            }
+            else                                     // MoveDontCopy
+            {
+                resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
+            }
 
+            if (resize_to > bucket_count())
+            {
+                // we don't have enough buckets
+                table.resize(resize_to);               // sets the number of buckets
+                settings.reset_thresholds(bucket_count());
+            }
 
-    // accessor functions for the things we templatize on, basically
-    // -------------------------------------------------------------
-    hasher hash_funct() const               { return settings; }
-    key_equal key_eq() const                { return key_info; }
-    allocator_type get_allocator() const    { return table.get_allocator(); }
+            // We use a normal iterator to get bcks from ht
+            // We could use insert() here, but since we know there are
+            // no duplicates, we can be more efficient
+            assert((bucket_count() & (bucket_count() - 1)) == 0);    // a power of two
+            const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
 
-    // Accessor function for statistics gathering.
-    unsigned int num_table_copies() const { return settings.num_ht_copies(); }
+            // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
+            for (destructive_iterator it = ht.destructive_begin();
+                    it != ht.destructive_end(); ++it)
+            {
+                size_type num_probes = 0;
+                size_type bucknum;
+
+                for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
+                        table.test(
+                            bucknum);                          // table.test() OK since no erase()
+                        bucknum = (size_type)((bucknum + JUMP_(key,
+                                               num_probes)) & (bucket_count() - 1)))
+                {
+                    ++num_probes;
+                    assert(num_probes < bucket_count()
+                           && "Hashtable is full: an error in key_equal<> or hash<>");
+                }
 
-private:
-    // This is used as a tag for the copy constructor, saying to destroy its
-    // arg We have two ways of destructively copying: with potentially growing
-    // the hashtable as we copy, and without.  To make sure the outside world
-    // can't do a destructive copy, we make the typename private.
-    // -----------------------------------------------------------------------
-    enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
+                table.move(bucknum, *it);    // moves the value to here
+            }
 
-    void _squash_deleted()
-    {
-        // gets rid of any deleted entries we have
-        // ---------------------------------------
-        if (num_deleted)
-        {
-            // get rid of deleted before writing
-            sparse_hashtable tmp(MoveDontGrow, *this);
-            swap(tmp);                    // now we are tmp
+            settings.inc_num_ht_copies();
         }
-        assert(num_deleted == 0);
-    }
 
-    // creating iterators from sparsetable::ne_iterators
-    // -------------------------------------------------
-    iterator             _mk_iterator(ne_it it) const               { return it; }
-    const_iterator       _mk_const_iterator(cne_it it) const        { return it; }
-    destructive_iterator _mk_destructive_iterator(dest_it it) const { return it; }
 
-public:
-    size_type size() const              { return table.num_nonempty(); }
-    size_type max_size() const          { return table.max_size(); }
-    bool empty() const                  { return size() == 0; }
-    size_type bucket_count() const      { return table.size(); }
-    size_type max_bucket_count() const  { return max_size(); }
-    // These are tr1 methods.  Their idea of 'bucket' doesn't map well to
-    // what we do.  We just say every bucket has 0 or 1 items in it.
-    size_type bucket_size(size_type i) const
-    {
-        return (size_type)(begin(i) == end(i) ? 0 : 1);
-    }
+        // Required by the spec for hashed associative container
+    public:
+        // Though the docs say this should be num_buckets, I think it's much
+        // more useful as num_elements.  As a special feature, calling with
+        // req_elements==0 will cause us to shrink if we can, saving space.
+        // -----------------------------------------------------------------
+        void resize(size_type req_elements)
+        {
+            // resize to this or larger
+            if (settings.consider_shrink() || req_elements == 0)
+            {
+                _maybe_shrink();
+            }
 
-private:
-    // Because of the above, size_type(-1) is never legal; use it for errors
-    // ---------------------------------------------------------------------
-    static const size_type ILLEGAL_BUCKET = size_type(-1);
-
-    // Used after a string of deletes.  Returns true if we actually shrunk.
-    // TODO(csilvers): take a delta so we can take into account inserts
-    // done after shrinking.  Maybe make part of the Settings class?
-    // --------------------------------------------------------------------
-    bool _maybe_shrink()
-    {
-        assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
-        assert(bucket_count() >= HT_MIN_BUCKETS);
-        bool retval = false;
-
-        // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
-        // we'll never shrink until you get relatively big, and we'll never
-        // shrink below HT_DEFAULT_STARTING_BUCKETS.  Otherwise, something
-        // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
-        // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
-        // ---------------------------------------------------------------
-        const size_type num_remain = table.num_nonempty();
-        const size_type shrink_threshold = settings.shrink_threshold();
-        if (shrink_threshold > 0 && num_remain < shrink_threshold &&
-            bucket_count() > HT_DEFAULT_STARTING_BUCKETS)
-        {
-            const float shrink_factor = settings.shrink_factor();
-            size_type sz = (size_type)(bucket_count() / 2);    // find how much we should shrink
-            while (sz > HT_DEFAULT_STARTING_BUCKETS &&
-                   num_remain < static_cast<size_type>(sz * shrink_factor))
+            if (req_elements > table.num_nonempty())    // we only grow
             {
-                sz /= 2;                            // stay a power of 2
+                _resize_delta((size_type)(req_elements - table.num_nonempty()));
             }
-            sparse_hashtable tmp(MoveDontCopy, *this, sz);
-            swap(tmp);                            // now we are tmp
-            retval = true;
         }
-        settings.set_consider_shrink(false);   // because we just considered it
-        return retval;
-    }
 
-    // We'll let you resize a hashtable -- though this makes us copy all!
-    // When you resize, you say, "make it big enough for this many more elements"
-    // Returns true if we actually resized, false if size was already ok.
-    // --------------------------------------------------------------------------
-    bool _resize_delta(size_type delta)
-    {
-        bool did_resize = false;
-        if (settings.consider_shrink())
+        // Get and change the value of shrink_factor and enlarge_factor.  The
+        // description at the beginning of this file explains how to choose
+        // the values.  Setting the shrink parameter to 0.0 ensures that the
+        // table never shrinks.
+        // ------------------------------------------------------------------
+        void get_resizing_parameters(float* shrink, float* grow) const
+        {
+            *shrink = settings.shrink_factor();
+            *grow = settings.enlarge_factor();
+        }
+
+        float get_shrink_factor() const
         {
-            // see if lots of deletes happened
-            if (_maybe_shrink())
-                did_resize = true;
+            return settings.shrink_factor();
         }
-        if (table.num_nonempty() >=
-            (std::numeric_limits<size_type>::max)() - delta)
+        float get_enlarge_factor() const
         {
-            throw_exception(std::length_error("resize overflow"));
+            return settings.enlarge_factor();
         }
 
-        size_type num_occupied = (size_type)(table.num_nonempty() + num_deleted);
+        void set_resizing_parameters(float shrink, float grow)
+        {
+            settings.set_resizing_parameters(shrink, grow);
+            settings.reset_thresholds(bucket_count());
+        }
 
-        if (bucket_count() >= HT_MIN_BUCKETS &&
-             (num_occupied + delta) <= settings.enlarge_threshold())
-            return did_resize;                       // we're ok as we are
+        void set_shrink_factor(float shrink)
+        {
+            set_resizing_parameters(shrink, get_enlarge_factor());
+        }
 
-        // Sometimes, we need to resize just to get rid of all the
-        // "deleted" buckets that are clogging up the hashtable.  So when
-        // deciding whether to resize, count the deleted buckets (which
-        // are currently taking up room).
-        // -------------------------------------------------------------
-        const size_type needed_size =
-                  settings.min_buckets((size_type)(num_occupied + delta), (size_type)0);
-
-        if (needed_size <= bucket_count())      // we have enough buckets
-            return did_resize;
-
-        size_type resize_to = settings.min_buckets((size_type)(num_occupied + delta), bucket_count());
-
-        if (resize_to < needed_size &&    // may double resize_to
-            resize_to < (std::numeric_limits<size_type>::max)() / 2)
-        {
-            // This situation means that we have enough deleted elements,
-            // that once we purge them, we won't actually have needed to
-            // grow.  But we may want to grow anyway: if we just purge one
-            // element, say, we'll have to grow anyway next time we
-            // insert.  Might as well grow now, since we're already going
-            // through the trouble of copying (in order to purge the
-            // deleted elements).
-            const size_type target =
-                static_cast<size_type>(settings.shrink_size((size_type)(resize_to*2)));
-            if (table.num_nonempty() + delta >= target)
-            {
-                // Good, we won't be below the shrink threshhold even if we double.
-                resize_to *= 2;
-            }
+        void set_enlarge_factor(float grow)
+        {
+            set_resizing_parameters(get_shrink_factor(), grow);
         }
 
-        sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
-        swap(tmp);                             // now we are tmp
-        return true;
-    }
+        // CONSTRUCTORS -- as required by the specs, we take a size,
+        // but also let you specify a hashfunction, key comparator,
+        // and key extractor.  We also define a copy constructor and =.
+        // DESTRUCTOR -- the default is fine, surprisingly.
+        // ------------------------------------------------------------
+        explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
+                                  const HashFcn& hf = HashFcn(),
+                                  const EqualKey& eql = EqualKey(),
+                                  const ExtractKey& ext = ExtractKey(),
+                                  const SetKey& set = SetKey(),
+                                  const allocator_type& alloc = allocator_type())
+            : settings(hf),
+              key_info(ext, set, eql),
+              num_deleted(0),
+              table((expected_max_items_in_table == 0
+                     ? HT_DEFAULT_STARTING_BUCKETS
+                     : settings.min_buckets(expected_max_items_in_table, 0)),
+                    allocator_type(alloc))
+        {
+            settings.reset_thresholds(bucket_count());
+        }
 
-    // Used to actually do the rehashing when we grow/shrink a hashtable
-    // -----------------------------------------------------------------
-    void _copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted)
-    {
-        clear();            // clear table, set num_deleted to 0
+        // As a convenience for resize(), we allow an optional second argument
+        // which lets you make this new hashtable a different size than ht.
+        // We also provide a mechanism of saying you want to "move" the ht argument
+        // into us instead of copying.
+        // ------------------------------------------------------------------------
+        sparse_hashtable(const sparse_hashtable& ht,
+                         size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
+            : settings(ht.settings),
+              key_info(ht.key_info),
+              num_deleted(0),
+              table(0)
+        {
+            settings.reset_thresholds(bucket_count());
+            _copy_from(ht, min_buckets_wanted);
+        }
 
-        // If we need to change the size of our table, do it now
-        const size_type resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
 
-        if (resize_to > bucket_count())
+        sparse_hashtable(sparse_hashtable&& o) :
+            settings(std::move(o.settings)),
+            key_info(std::move(o.key_info)),
+            num_deleted(o.num_deleted),
+            table(std::move(o.table))
         {
-            // we don't have enough buckets
-            table.resize(resize_to);               // sets the number of buckets
-            settings.reset_thresholds(bucket_count());
         }
 
-        // We use a normal iterator to get bcks from ht
-        // We could use insert() here, but since we know there are
-        // no duplicates, we can be more efficient
-        assert((bucket_count() & (bucket_count()-1)) == 0);      // a power of two
-        for (const_iterator it = ht.begin(); it != ht.end(); ++it)
+        sparse_hashtable(sparse_hashtable&& o, const allocator_type& alloc) :
+            settings(std::move(o.settings)),
+            key_info(std::move(o.key_info)),
+            num_deleted(o.num_deleted),
+            table(std::move(o.table), alloc)
         {
-            size_type num_probes = 0;              // how many times we've probed
-            size_type bucknum;
-            const size_type bucket_count_minus_one = bucket_count() - 1;
-            for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
-                 table.test(bucknum);                                   // table.test() OK since no erase()
-                 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one)
-            {
-                ++num_probes;
-                assert(num_probes < bucket_count()
-                       && "Hashtable is full: an error in key_equal<> or hash<>");
-            }
-            table.set(bucknum, *it);               // copies the value to here
         }
-        settings.inc_num_ht_copies();
-    }
 
-    // Implementation is like _copy_from, but it destroys the table of the
-    // "from" guy by freeing sparsetable memory as we iterate.  This is
-    // useful in resizing, since we're throwing away the "from" guy anyway.
-    // --------------------------------------------------------------------
-    void _move_from(MoveDontCopyT mover, sparse_hashtable &ht,
-                   size_type min_buckets_wanted)
-    {
-        clear();
+        sparse_hashtable& operator=(sparse_hashtable&& o)
+        {
+            using std::swap;
+            sparse_hashtable tmp(std::move(o));
+            swap(tmp, *this);
+            return *this;
+        }
+#endif
 
-        // If we need to change the size of our table, do it now
-        size_type resize_to;
-        if (mover == MoveDontGrow)
-            resize_to = ht.bucket_count();       // keep same size as old ht
-        else                                     // MoveDontCopy
-            resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
-        if (resize_to > bucket_count())
+        sparse_hashtable(MoveDontCopyT mover,
+                         sparse_hashtable& ht,
+                         size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
+            : settings(ht.settings),
+              key_info(ht.key_info),
+              num_deleted(0),
+              table(min_buckets_wanted, ht.table.get_allocator())
         {
-            // we don't have enough buckets
-            table.resize(resize_to);               // sets the number of buckets
             settings.reset_thresholds(bucket_count());
+            _move_from(mover, ht, min_buckets_wanted);
         }
 
-        // We use a normal iterator to get bcks from ht
-        // We could use insert() here, but since we know there are
-        // no duplicates, we can be more efficient
-        assert((bucket_count() & (bucket_count()-1)) == 0);      // a power of two
-        const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
-
-        // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
-        for (destructive_iterator it = ht.destructive_begin();
-              it != ht.destructive_end(); ++it)
+        sparse_hashtable& operator=(const sparse_hashtable& ht)
         {
-            size_type num_probes = 0;
-            size_type bucknum;
-            for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
-                 table.test(bucknum);                          // table.test() OK since no erase()
-                 bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & (bucket_count()-1)))
+            if (&ht == this)
             {
-                ++num_probes;
-                assert(num_probes < bucket_count()
-                       && "Hashtable is full: an error in key_equal<> or hash<>");
+                return *this;    // don't copy onto ourselves
             }
-            table.move(bucknum, *it);    // moves the value to here
-        }
-        settings.inc_num_ht_copies();
-    }
 
+            settings = ht.settings;
+            key_info = ht.key_info;
+            num_deleted = ht.num_deleted;
+            // _copy_from() calls clear and sets num_deleted to 0 too
+            _copy_from(ht, HT_MIN_BUCKETS);
+            // we purposefully don't copy the allocator, which may not be copyable
+            return *this;
+        }
 
-    // Required by the spec for hashed associative container
-public:
-    // Though the docs say this should be num_buckets, I think it's much
-    // more useful as num_elements.  As a special feature, calling with
-    // req_elements==0 will cause us to shrink if we can, saving space.
-    // -----------------------------------------------------------------
-    void resize(size_type req_elements)
-    {
-        // resize to this or larger
-        if (settings.consider_shrink() || req_elements == 0)
-            _maybe_shrink();
-        if (req_elements > table.num_nonempty())    // we only grow
-            _resize_delta((size_type)(req_elements - table.num_nonempty()));
-    }
-
-    // Get and change the value of shrink_factor and enlarge_factor.  The
-    // description at the beginning of this file explains how to choose
-    // the values.  Setting the shrink parameter to 0.0 ensures that the
-    // table never shrinks.
-    // ------------------------------------------------------------------
-    void get_resizing_parameters(float* shrink, float* grow) const
-    {
-        *shrink = settings.shrink_factor();
-        *grow = settings.enlarge_factor();
-    }
+        // Many STL algorithms use swap instead of copy constructors
+        void swap(sparse_hashtable& ht)
+        {
+            using std::swap;
+            swap(settings, ht.settings);
+            swap(key_info, ht.key_info);
+            swap(num_deleted, ht.num_deleted);
+            table.swap(ht.table);
+            settings.reset_thresholds(bucket_count());  // also resets consider_shrink
+            ht.settings.reset_thresholds(ht.bucket_count());
+            // we purposefully don't swap the allocator, which may not be swap-able
+        }
 
-    float get_shrink_factor() const  { return settings.shrink_factor(); }
-    float get_enlarge_factor() const { return settings.enlarge_factor(); }
+        // It's always nice to be able to clear a table without deallocating it
+        void clear()
+        {
+            if (!empty() || num_deleted != 0)
+            {
+                table.clear();
+                table = Table(HT_DEFAULT_STARTING_BUCKETS);
+            }
 
-    void set_resizing_parameters(float shrink, float grow) 
-    {
-        settings.set_resizing_parameters(shrink, grow);
-        settings.reset_thresholds(bucket_count());
-    }
+            settings.reset_thresholds(bucket_count());
+            num_deleted = 0;
+        }
 
-    void set_shrink_factor(float shrink)
-    {
-        set_resizing_parameters(shrink, get_enlarge_factor());
-    }
+        // LOOKUP ROUTINES
+    private:
 
-    void set_enlarge_factor(float grow)
-    {
-        set_resizing_parameters(get_shrink_factor(), grow);
-    }
+        enum pos_type { pt_empty = 0, pt_erased, pt_full };
+        // -------------------------------------------------------------------
+        class Position
+        {
+            public:
 
-    // CONSTRUCTORS -- as required by the specs, we take a size,
-    // but also let you specify a hashfunction, key comparator,
-    // and key extractor.  We also define a copy constructor and =.
-    // DESTRUCTOR -- the default is fine, surprisingly.
-    // ------------------------------------------------------------
-    explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
-                              const HashFcn& hf = HashFcn(),
-                              const EqualKey& eql = EqualKey(),
-                              const ExtractKey& ext = ExtractKey(),
-                              const SetKey& set = SetKey(),
-                              const allocator_type& alloc = allocator_type())
-        : settings(hf),
-          key_info(ext, set, eql),
-          num_deleted(0),
-          table((expected_max_items_in_table == 0
-                 ? HT_DEFAULT_STARTING_BUCKETS
-                 : settings.min_buckets(expected_max_items_in_table, 0)),
-                allocator_type(alloc))
-    {
-        settings.reset_thresholds(bucket_count());
-    }
+                Position() : _t(pt_empty) {}
+                Position(pos_type t, size_type idx) : _t(t), _idx(idx) {}
 
-    // As a convenience for resize(), we allow an optional second argument
-    // which lets you make this new hashtable a different size than ht.
-    // We also provide a mechanism of saying you want to "move" the ht argument
-    // into us instead of copying.
-    // ------------------------------------------------------------------------
-    sparse_hashtable(const sparse_hashtable& ht,
-                     size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
-        : settings(ht.settings),
-          key_info(ht.key_info),
-          num_deleted(0),
-          table(0)
-    {
-        settings.reset_thresholds(bucket_count());
-        _copy_from(ht, min_buckets_wanted);
-    }
+                pos_type  _t;
+                size_type _idx;
+        };
 
-#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+        // Returns a pair:
+        //   - 'first' is a code, 2 if key already present, 0 or 1 otherwise.
+        //   - 'second' is a position, where the key should go
+        // Note: because of deletions where-to-insert is not trivial: it's the
+        // first deleted bucket we see, as long as we don't find the key later
+        // -------------------------------------------------------------------
+        Position _find_position(const key_type& key) const
+        {
+            size_type num_probes = 0;                    // how many times we've probed
+            const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
+            size_type bucknum = hash(key) & bucket_count_minus_one;
+            Position pos;
 
-    sparse_hashtable(sparse_hashtable&& o) :
-        settings(std::move(o.settings)),
-        key_info(std::move(o.key_info)),
-        num_deleted(o.num_deleted),
-        table(std::move(o.table))
-    {
-    }
+            while (1)
+            {
+                // probe until something happens
+                // -----------------------------
+                typename Table::GrpPos grp_pos(table, bucknum);
 
-    sparse_hashtable(sparse_hashtable&& o, const allocator_type& alloc) :
-        settings(std::move(o.settings)),
-        key_info(std::move(o.key_info)),
-        num_deleted(o.num_deleted),
-        table(std::move(o.table), alloc)
-    {
-    }
+                if (!grp_pos.test_strict())
+                {
+                    // bucket is empty => key not present
+                    return pos._t ? pos : Position(pt_empty, bucknum);
+                }
+                else if (grp_pos.test())
+                {
+                    reference ref(grp_pos.unsafe_get());
 
-    sparse_hashtable& operator=(sparse_hashtable&& o)
-    {
-        using std::swap;
+                    if (equals(key, get_key(ref)))
+                    {
+                        return Position(pt_full, bucknum);
+                    }
+                }
+                else if (pos._t == pt_empty)
+                {
+                    // first erased position
+                    pos._t   = pt_erased;
+                    pos._idx = bucknum;
+                }
 
-        sparse_hashtable tmp(std::move(o));
-        swap(tmp, *this);
-        return *this;
-    }
-#endif
+                ++num_probes;                        // we're doing another probe
+                bucknum = (size_type)((bucknum + JUMP_(key,
+                                                       num_probes)) & bucket_count_minus_one);
+                assert(num_probes < bucket_count()
+                       && "Hashtable is full: an error in key_equal<> or hash<>");
+            }
+        }
 
-    sparse_hashtable(MoveDontCopyT mover,
-                     sparse_hashtable& ht,
-                     size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
-        : settings(ht.settings),
-          key_info(ht.key_info),
-          num_deleted(0),
-          table(min_buckets_wanted, ht.table.get_allocator())
-    {
-        settings.reset_thresholds(bucket_count());
-        _move_from(mover, ht, min_buckets_wanted);
-    }
+    public:
+        // I hate to duplicate find() like that, but it is
+        // significantly faster to not have the intermediate pair
+        // ------------------------------------------------------------------
+        iterator find(const key_type& key)
+        {
+            size_type num_probes = 0;              // how many times we've probed
+            const size_type bucket_count_minus_one = bucket_count() - 1;
+            size_type bucknum = hash(key) & bucket_count_minus_one;
 
-    sparse_hashtable& operator=(const sparse_hashtable& ht)
-    {
-        if (&ht == this)
-            return *this;        // don't copy onto ourselves
-        settings = ht.settings;
-        key_info = ht.key_info;
-        num_deleted = ht.num_deleted;
+            while (1)                        // probe until something happens
+            {
+                typename Table::GrpPos grp_pos(table, bucknum);
 
-        // _copy_from() calls clear and sets num_deleted to 0 too
-        _copy_from(ht, HT_MIN_BUCKETS);
+                if (!grp_pos.test_strict())
+                {
+                    return end();    // bucket is empty
+                }
 
-        // we purposefully don't copy the allocator, which may not be copyable
-        return *this;
-    }
+                if (grp_pos.test())
+                {
+                    reference ref(grp_pos.unsafe_get());
 
-    // Many STL algorithms use swap instead of copy constructors
-    void swap(sparse_hashtable& ht)
-    {
-        using std::swap;
+                    if (equals(key, get_key(ref)))
+                    {
+                        return grp_pos.get_iter(ref);
+                    }
+                }
 
-        swap(settings, ht.settings);
-        swap(key_info, ht.key_info);
-        swap(num_deleted, ht.num_deleted);
-        table.swap(ht.table);
-        settings.reset_thresholds(bucket_count());  // also resets consider_shrink
-        ht.settings.reset_thresholds(ht.bucket_count());
-        // we purposefully don't swap the allocator, which may not be swap-able
-    }
+                ++num_probes;                        // we're doing another probe
+                bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+                assert(num_probes < bucket_count()
+                       && "Hashtable is full: an error in key_equal<> or hash<>");
+            }
+        }
 
-    // It's always nice to be able to clear a table without deallocating it
-    void clear()
-    {
-        if (!empty() || num_deleted != 0)
+        // Wish I could avoid the duplicate find() const and non-const.
+        // ------------------------------------------------------------
+        const_iterator find(const key_type& key) const
         {
-            table.clear();
-            table = Table(HT_DEFAULT_STARTING_BUCKETS);
-        }
-        settings.reset_thresholds(bucket_count());
-        num_deleted = 0;
-    }
+            size_type num_probes = 0;              // how many times we've probed
+            const size_type bucket_count_minus_one = bucket_count() - 1;
+            size_type bucknum = hash(key) & bucket_count_minus_one;
 
-    // LOOKUP ROUTINES
-private:
+            while (1)                        // probe until something happens
+            {
+                typename Table::GrpPos grp_pos(table, bucknum);
 
-    enum pos_type { pt_empty = 0, pt_erased, pt_full };
-    // -------------------------------------------------------------------
-    class Position
-    {
-    public:
+                if (!grp_pos.test_strict())
+                {
+                    return end();    // bucket is empty
+                }
+                else if (grp_pos.test())
+                {
+                    reference ref(grp_pos.unsafe_get());
 
-        Position() : _t(pt_empty) {}
-        Position(pos_type t, size_type idx) : _t(t), _idx(idx) {}
+                    if (equals(key, get_key(ref)))
+                    {
+                        return _mk_const_iterator(table.get_iter(bucknum, &ref));
+                    }
+                }
 
-        pos_type  _t;
-        size_type _idx;
-    };
+                ++num_probes;                        // we're doing another probe
+                bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+                assert(num_probes < bucket_count()
+                       && "Hashtable is full: an error in key_equal<> or hash<>");
+            }
+        }
 
-    // Returns a pair:
-    //   - 'first' is a code, 2 if key already present, 0 or 1 otherwise.
-    //   - 'second' is a position, where the key should go
-    // Note: because of deletions where-to-insert is not trivial: it's the
-    // first deleted bucket we see, as long as we don't find the key later
-    // -------------------------------------------------------------------
-    Position _find_position(const key_type &key) const
-    {
-        size_type num_probes = 0;                    // how many times we've probed
-        const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
-        size_type bucknum = hash(key) & bucket_count_minus_one;
-        Position pos;
+        // This is a tr1 method: the bucket a given key is in, or what bucket
+        // it would be put in, if it were to be inserted.  Shrug.
+        // ------------------------------------------------------------------
+        size_type bucket(const key_type& key) const
+        {
+            Position pos = _find_position(key);
+            return pos._idx;
+        }
+
+        // Counts how many elements have key key.  For maps, it's either 0 or 1.
+        // ---------------------------------------------------------------------
+        size_type count(const key_type& key) const
+        {
+            Position pos = _find_position(key);
+            return (size_type)(pos._t == pt_full ? 1 : 0);
+        }
 
-        while (1)
+        // Likewise, equal_range doesn't really make sense for us.  Oh well.
+        // -----------------------------------------------------------------
+        std::pair<iterator, iterator> equal_range(const key_type& key)
         {
-            // probe until something happens
-            // -----------------------------
-            typename Table::GrpPos grp_pos(table, bucknum);
+            iterator pos = find(key);      // either an iterator or end
 
-            if (!grp_pos.test_strict())
+            if (pos == end())
             {
-                // bucket is empty => key not present
-                return pos._t ? pos : Position(pt_empty, bucknum);
+                return std::pair<iterator, iterator>(pos, pos);
             }
-            else if (grp_pos.test())
+            else
             {
-                reference ref(grp_pos.unsafe_get());
+                const iterator startpos = pos++;
+                return std::pair<iterator, iterator>(startpos, pos);
+            }
+        }
 
-                if (equals(key, get_key(ref)))
-                    return Position(pt_full, bucknum);
+        std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
+        {
+            const_iterator pos = find(key);      // either an iterator or end
+
+            if (pos == end())
+            {
+                return std::pair<const_iterator, const_iterator>(pos, pos);
             }
-            else if (pos._t == pt_empty)
+            else
             {
-                // first erased position
-                pos._t   = pt_erased;
-                pos._idx = bucknum;
+                const const_iterator startpos = pos++;
+                return std::pair<const_iterator, const_iterator>(startpos, pos);
             }
-
-            ++num_probes;                        // we're doing another probe
-            bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one);
-            assert(num_probes < bucket_count()
-                   && "Hashtable is full: an error in key_equal<> or hash<>");
         }
-    }
 
-public:
-    // I hate to duplicate find() like that, but it is
-    // significantly faster to not have the intermediate pair
-    // ------------------------------------------------------------------
-    iterator find(const key_type& key)
-    {
-        size_type num_probes = 0;              // how many times we've probed
-        const size_type bucket_count_minus_one = bucket_count() - 1;
-        size_type bucknum = hash(key) & bucket_count_minus_one;
 
-        while (1)                        // probe until something happens
+        // INSERTION ROUTINES
+    private:
+        // Private method used by insert_noresize and find_or_insert.
+        template <class T>
+        reference _insert_at(T& obj, size_type pos, bool erased)
         {
-            typename Table::GrpPos grp_pos(table, bucknum);
-
-            if (!grp_pos.test_strict())
-                return end();            // bucket is empty
-            if (grp_pos.test())
+            if (size() >= max_size())
             {
-                reference ref(grp_pos.unsafe_get());
+                throw_exception(std::length_error("insert overflow"));
+            }
 
-                if (equals(key, get_key(ref)))
-                    return grp_pos.get_iter(ref);
+            if (erased)
+            {
+                assert(num_deleted);
+                --num_deleted;
             }
-            ++num_probes;                        // we're doing another probe
-            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
-            assert(num_probes < bucket_count()
-                   && "Hashtable is full: an error in key_equal<> or hash<>");
-        }
-    }
 
-    // Wish I could avoid the duplicate find() const and non-const.
-    // ------------------------------------------------------------
-    const_iterator find(const key_type& key) const
-    {
-        size_type num_probes = 0;              // how many times we've probed
-        const size_type bucket_count_minus_one = bucket_count() - 1;
-        size_type bucknum = hash(key) & bucket_count_minus_one;
+            return table.set(pos, obj);
+        }
 
-        while (1)                        // probe until something happens
+        // If you know *this is big enough to hold obj, use this routine
+        template <class T>
+        std::pair<iterator, bool> _insert_noresize(T& obj)
         {
-            typename Table::GrpPos grp_pos(table, bucknum);
+            Position pos = _find_position(get_key(obj));
+            bool already_there = (pos._t == pt_full);
 
-            if (!grp_pos.test_strict())
-                return end();            // bucket is empty
-            else if (grp_pos.test())
+            if (!already_there)
             {
-                reference ref(grp_pos.unsafe_get());
-
-                if (equals(key, get_key(ref)))
-                    return _mk_const_iterator(table.get_iter(bucknum, &ref));
+                reference ref(_insert_at(obj, pos._idx, pos._t == pt_erased));
+                return std::pair<iterator, bool>(_mk_iterator(table.get_iter(pos._idx, &ref)),
+                                                 true);
             }
-            ++num_probes;                        // we're doing another probe
-            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
-            assert(num_probes < bucket_count()
-                   && "Hashtable is full: an error in key_equal<> or hash<>");
+
+            return std::pair<iterator, bool>(_mk_iterator(table.get_iter(pos._idx)), false);
         }
-    }
 
-    // This is a tr1 method: the bucket a given key is in, or what bucket
-    // it would be put in, if it were to be inserted.  Shrug.
-    // ------------------------------------------------------------------
-    size_type bucket(const key_type& key) const
-    {
-        Position pos = _find_position(key);
-        return pos._idx;
-    }
+        // Specializations of insert(it, it) depending on the power of the iterator:
+        // (1) Iterator supports operator-, resize before inserting
+        template <class ForwardIterator>
+        void _insert(ForwardIterator f, ForwardIterator l,
+                     std::forward_iterator_tag /*unused*/)
+        {
+            int64_t dist = std::distance(f, l);
 
-    // Counts how many elements have key key.  For maps, it's either 0 or 1.
-    // ---------------------------------------------------------------------
-    size_type count(const key_type &key) const
-    {
-        Position pos = _find_position(key);
-        return (size_type)(pos._t == pt_full ? 1 : 0);
-    }
+            if (dist < 0
+                    ||  static_cast<size_t>(dist) >= (std::numeric_limits<size_type>::max)())
+            {
+                throw_exception(std::length_error("insert-range overflow"));
+            }
 
-    // Likewise, equal_range doesn't really make sense for us.  Oh well.
-    // -----------------------------------------------------------------
-    std::pair<iterator,iterator> equal_range(const key_type& key)
-    {
-        iterator pos = find(key);      // either an iterator or end
-        if (pos == end())
-            return std::pair<iterator,iterator>(pos, pos);
-        else
-        {
-            const iterator startpos = pos++;
-            return std::pair<iterator,iterator>(startpos, pos);
+            _resize_delta(static_cast<size_type>(dist));
+
+            for (; dist > 0; --dist, ++f)
+            {
+                _insert_noresize(*f);
+            }
         }
-    }
 
-    std::pair<const_iterator,const_iterator> equal_range(const key_type& key) const
-    {
-        const_iterator pos = find(key);      // either an iterator or end
-        if (pos == end())
-            return std::pair<const_iterator,const_iterator>(pos, pos);
-        else
+        // (2) Arbitrary iterator, can't tell how much to resize
+        template <class InputIterator>
+        void _insert(InputIterator f, InputIterator l,
+                     std::input_iterator_tag /*unused*/)
         {
-            const const_iterator startpos = pos++;
-            return std::pair<const_iterator,const_iterator>(startpos, pos);
+            for (; f != l; ++f)
+            {
+                _insert(*f);
+            }
         }
-    }
 
+    public:
 
-    // INSERTION ROUTINES
-private:
-    // Private method used by insert_noresize and find_or_insert.
-    template <class T>
-    reference _insert_at(T& obj, size_type pos, bool erased)
-    {
-        if (size() >= max_size())
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+        template <class... Args>
+        std::pair<iterator, bool> emplace(Args&& ... args)
         {
-            throw_exception(std::length_error("insert overflow"));
+            _resize_delta(1);
+            value_type obj(std::forward<Args>(args)...);
+            return _insert_noresize(obj);
         }
-        if (erased)
+#endif
+
+        // This is the normal insert routine, used by the outside world
+        std::pair<iterator, bool> insert(const_reference obj)
         {
-            assert(num_deleted);
-            --num_deleted;
+            _resize_delta(1);                      // adding an object, grow if need be
+            return _insert_noresize(obj);
         }
-        return table.set(pos, obj);
-    }
-
-    // If you know *this is big enough to hold obj, use this routine
-    template <class T>
-    std::pair<iterator, bool> _insert_noresize(T& obj)
-    {
-        Position pos = _find_position(get_key(obj));
-        bool already_there = (pos._t == pt_full);
 
-        if (!already_there)
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+        template< class P >
+        std::pair<iterator, bool> insert(P&& obj)
         {
-            reference ref(_insert_at(obj, pos._idx, pos._t == pt_erased));
-            return std::pair<iterator, bool>(_mk_iterator(table.get_iter(pos._idx, &ref)), true);
+            _resize_delta(1);                      // adding an object, grow if need be
+            value_type val(std::forward<value_type>(obj));
+            return _insert_noresize(val);
         }
-        return std::pair<iterator,bool>(_mk_iterator(table.get_iter(pos._idx)), false);
-    }
-
-    // Specializations of insert(it, it) depending on the power of the iterator:
-    // (1) Iterator supports operator-, resize before inserting
-    template <class ForwardIterator>
-    void _insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag /*unused*/)
-    {
-        int64_t dist = std::distance(f, l);
-        if (dist < 0 ||  static_cast<size_t>(dist) >= (std::numeric_limits<size_type>::max)())
-            throw_exception(std::length_error("insert-range overflow"));
-
-        _resize_delta(static_cast<size_type>(dist));
-
-        for (; dist > 0; --dist, ++f)
-            _insert_noresize(*f);
-    }
+#endif
 
-    // (2) Arbitrary iterator, can't tell how much to resize
-    template <class InputIterator>
-    void _insert(InputIterator f, InputIterator l, std::input_iterator_tag /*unused*/)
-    {
-        for (; f != l; ++f)
-            _insert(*f);
-    }
+        // When inserting a lot at a time, we specialize on the type of iterator
+        template <class InputIterator>
+        void insert(InputIterator f, InputIterator l)
+        {
+            // specializes on iterator type
+            _insert(f, l,
+                    typename std::iterator_traits<InputIterator>::iterator_category());
+        }
 
-public:
+        // DefaultValue is a functor that takes a key and returns a value_type
+        // representing the default value to be inserted if none is found.
+        template <class DefaultValue>
+        value_type& find_or_insert(const key_type& key)
+        {
+            size_type num_probes = 0;              // how many times we've probed
+            const size_type bucket_count_minus_one = bucket_count() - 1;
+            size_type bucknum = hash(key) & bucket_count_minus_one;
+            DefaultValue default_value;
+            size_type erased_pos = 0;
+            bool erased = false;
 
-#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
-    template <class... Args>
-    std::pair<iterator, bool> emplace(Args&&... args)
-    {
-        _resize_delta(1);
-        value_type obj(std::forward<Args>(args)...);
-        return _insert_noresize(obj);
-    }
-#endif
+            while (1)                        // probe until something happens
+            {
+                typename Table::GrpPos grp_pos(table, bucknum);
 
-    // This is the normal insert routine, used by the outside world
-    std::pair<iterator, bool> insert(const_reference obj)
-    {
-        _resize_delta(1);                      // adding an object, grow if need be
-        return _insert_noresize(obj);
-    }
+                if (!grp_pos.test_strict())
+                {
+                    // not found
+                    if (_resize_delta(1))
+                    {
+                        // needed to rehash to make room
+                        // Since we resized, we can't use pos, so recalculate where to insert.
+                        value_type def(default_value(key));
+                        return *(_insert_noresize(def).first);
+                    }
+                    else
+                    {
+                        // no need to rehash, insert right here
+                        value_type def(default_value(key));
+                        return _insert_at(def, erased ? erased_pos : bucknum, erased);
+                    }
+                }
 
-#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
-    template< class P >
-    std::pair<iterator, bool> insert(P &&obj)
-    {
-        _resize_delta(1);                      // adding an object, grow if need be
-        value_type val(std::forward<value_type>(obj));
-        return _insert_noresize(val);
-    }
-#endif
+                if (grp_pos.test())
+                {
+                    reference ref(grp_pos.unsafe_get());
 
-    // When inserting a lot at a time, we specialize on the type of iterator
-    template <class InputIterator>
-    void insert(InputIterator f, InputIterator l)
-    {
-        // specializes on iterator type
-        _insert(f, l,
-               typename std::iterator_traits<InputIterator>::iterator_category());
-    }
+                    if (equals(key, get_key(ref)))
+                    {
+                        return ref;
+                    }
+                }
+                else if (!erased)
+                {
+                    // first erased position
+                    erased_pos = bucknum;
+                    erased = true;
+                }
 
-    // DefaultValue is a functor that takes a key and returns a value_type
-    // representing the default value to be inserted if none is found.
-    template <class DefaultValue>
-    value_type& find_or_insert(const key_type& key)
-    {
-        size_type num_probes = 0;              // how many times we've probed
-        const size_type bucket_count_minus_one = bucket_count() - 1;
-        size_type bucknum = hash(key) & bucket_count_minus_one;
-        DefaultValue default_value;
-        size_type erased_pos = 0;
-        bool erased = false;
+                ++num_probes;                        // we're doing another probe
+                bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+                assert(num_probes < bucket_count()
+                       && "Hashtable is full: an error in key_equal<> or hash<>");
+            }
+        }
 
-        while (1)                        // probe until something happens
+        size_type erase(const key_type& key)
         {
-            typename Table::GrpPos grp_pos(table, bucknum);
+            size_type num_probes = 0;              // how many times we've probed
+            const size_type bucket_count_minus_one = bucket_count() - 1;
+            size_type bucknum = hash(key) & bucket_count_minus_one;
 
-            if (!grp_pos.test_strict())
+            while (1)                        // probe until something happens
             {
-                // not found
-                if (_resize_delta(1))
+                typename Table::GrpPos grp_pos(table, bucknum);
+
+                if (!grp_pos.test_strict())
                 {
-                    // needed to rehash to make room
-                    // Since we resized, we can't use pos, so recalculate where to insert.
-                    value_type def(default_value(key));
-                    return *(_insert_noresize(def).first);
+                    return 0;    // bucket is empty, we deleted nothing
                 }
-                else
+
+                if (grp_pos.test())
                 {
-                    // no need to rehash, insert right here
-                    value_type def(default_value(key));
-                    return _insert_at(def, erased ? erased_pos : bucknum, erased);
+                    reference ref(grp_pos.unsafe_get());
+
+                    if (equals(key, get_key(ref)))
+                    {
+                        grp_pos.erase(table);
+                        ++num_deleted;
+                        settings.set_consider_shrink(true); // will think about shrink after next insert
+                        return 1;                           // because we deleted one thing
+                    }
                 }
-            }
-            if (grp_pos.test())
-            {
-                reference ref(grp_pos.unsafe_get());
 
-                if (equals(key, get_key(ref)))
-                    return ref;
+                ++num_probes;                        // we're doing another probe
+                bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+                assert(num_probes < bucket_count()
+                       && "Hashtable is full: an error in key_equal<> or hash<>");
             }
-            else if (!erased)
+        }
+
+        const_iterator erase(const_iterator pos)
+        {
+            if (pos == cend())
             {
-                // first erased position
-                erased_pos = bucknum;
-                erased = true;
+                return cend();    // sanity check
             }
 
-            ++num_probes;                        // we're doing another probe
-            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
-            assert(num_probes < bucket_count()
-                   && "Hashtable is full: an error in key_equal<> or hash<>");
+            const_iterator nextpos = table.erase(pos);
+            ++num_deleted;
+            settings.set_consider_shrink(true);
+            return nextpos;
         }
-    }
-
-    size_type erase(const key_type& key)
-    {
-        size_type num_probes = 0;              // how many times we've probed
-        const size_type bucket_count_minus_one = bucket_count() - 1;
-        size_type bucknum = hash(key) & bucket_count_minus_one;
 
-        while (1)                        // probe until something happens
+        const_iterator erase(const_iterator f, const_iterator l)
         {
-            typename Table::GrpPos grp_pos(table, bucknum);
-
-            if (!grp_pos.test_strict())
-                return 0;            // bucket is empty, we deleted nothing
-            if (grp_pos.test())
+            if (f == cend())
             {
-                reference ref(grp_pos.unsafe_get());
-
-                if (equals(key, get_key(ref)))
-                {
-                    grp_pos.erase(table);
-                    ++num_deleted;
-                    settings.set_consider_shrink(true); // will think about shrink after next insert
-                    return 1;                           // because we deleted one thing
-                }
+                return cend();    // sanity check
             }
-            ++num_probes;                        // we're doing another probe
-            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
-            assert(num_probes < bucket_count()
-                   && "Hashtable is full: an error in key_equal<> or hash<>");
+
+            size_type num_before = table.num_nonempty();
+            const_iterator nextpos = table.erase(f, l);
+            num_deleted += num_before - table.num_nonempty();
+            settings.set_consider_shrink(true);
+            return nextpos;
         }
-    }
 
-    const_iterator erase(const_iterator pos)
-    {
-        if (pos == cend())
-            return cend();                 // sanity check
+        // Deleted key routines - just to keep google test framework happy
+        // we don't actually use the deleted key
+        // ---------------------------------------------------------------
+        void set_deleted_key(const key_type&)
+        {
+        }
 
-        const_iterator nextpos = table.erase(pos);
-        ++num_deleted;
-        settings.set_consider_shrink(true);
-        return nextpos;
-    }
+        void clear_deleted_key()
+        {
+        }
 
-    const_iterator erase(const_iterator f, const_iterator l)
-    {
-        if (f == cend())
-            return cend();                // sanity check
+        bool operator==(const sparse_hashtable& ht) const
+        {
+            if (this == &ht)
+            {
+                return true;
+            }
 
-        size_type num_before = table.num_nonempty();
-        const_iterator nextpos = table.erase(f, l);
-        num_deleted += num_before - table.num_nonempty();
-        settings.set_consider_shrink(true);
-        return nextpos;
-    }
+            if (size() != ht.size())
+            {
+                return false;
+            }
 
-    // Deleted key routines - just to keep google test framework happy
-    // we don't actually use the deleted key
-    // ---------------------------------------------------------------
-    void set_deleted_key(const key_type&)
-    {
-    }
+            for (const_iterator it = begin(); it != end(); ++it)
+            {
+                const_iterator it2 = ht.find(get_key(*it));
 
-    void clear_deleted_key()
-    {
-    }
+                if ((it2 == ht.end()) || (*it != *it2))
+                {
+                    return false;
+                }
+            }
 
-    bool operator==(const sparse_hashtable& ht) const
-    {
-        if (this == &ht)
             return true;
+        }
 
-        if (size() != ht.size())
-            return false;
-
-        for (const_iterator it = begin(); it != end(); ++it)
+        bool operator!=(const sparse_hashtable& ht) const
         {
-            const_iterator it2 = ht.find(get_key(*it));
-            if ((it2 == ht.end()) || (*it != *it2))
-                return false;
+            return !(*this == ht);
         }
 
-        return true;
-    }
 
-    bool operator!=(const sparse_hashtable& ht) const
-    {
-        return !(*this == ht);
-    }
+        // I/O
+        // We support reading and writing hashtables to disk.  NOTE that
+        // this only stores the hashtable metadata, not the stuff you've
+        // actually put in the hashtable!  Alas, since I don't know how to
+        // write a hasher or key_equal, you have to make sure everything
+        // but the table is the same.  We compact before writing.
+        //
+        // The OUTPUT type needs to support a Write() operation. File and
+        // OutputBuffer are appropriate types to pass in.
+        //
+        // The INPUT type needs to support a Read() operation. File and
+        // InputBuffer are appropriate types to pass in.
+        // -------------------------------------------------------------
+        template <typename OUTPUT>
+        bool write_metadata(OUTPUT* fp)
+        {
+            return table.write_metadata(fp);
+        }
 
+        template <typename INPUT>
+        bool read_metadata(INPUT* fp)
+        {
+            num_deleted = 0;            // since we got rid before writing
+            const bool result = table.read_metadata(fp);
+            settings.reset_thresholds(bucket_count());
+            return result;
+        }
 
-    // I/O
-    // We support reading and writing hashtables to disk.  NOTE that
-    // this only stores the hashtable metadata, not the stuff you've
-    // actually put in the hashtable!  Alas, since I don't know how to
-    // write a hasher or key_equal, you have to make sure everything
-    // but the table is the same.  We compact before writing.
-    //
-    // The OUTPUT type needs to support a Write() operation. File and
-    // OutputBuffer are appropriate types to pass in.
-    //
-    // The INPUT type needs to support a Read() operation. File and
-    // InputBuffer are appropriate types to pass in.
-    // -------------------------------------------------------------
-    template <typename OUTPUT>
-    bool write_metadata(OUTPUT *fp)
-    {
-        return table.write_metadata(fp);
-    }
+        // Only meaningful if value_type is a POD.
+        template <typename OUTPUT>
+        bool write_nopointer_data(OUTPUT* fp)
+        {
+            return table.write_nopointer_data(fp);
+        }
 
-    template <typename INPUT>
-    bool read_metadata(INPUT *fp)
-    {
-        num_deleted = 0;            // since we got rid before writing
-        const bool result = table.read_metadata(fp);
-        settings.reset_thresholds(bucket_count());
-        return result;
-    }
+        // Only meaningful if value_type is a POD.
+        template <typename INPUT>
+        bool read_nopointer_data(INPUT* fp)
+        {
+            return table.read_nopointer_data(fp);
+        }
 
-    // Only meaningful if value_type is a POD.
-    template <typename OUTPUT>
-    bool write_nopointer_data(OUTPUT *fp)
-    {
-        return table.write_nopointer_data(fp);
-    }
+        // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
+        //    (istream, ostream, etc) *or* a class providing
+        //    Read(void*, size_t) and Write(const void*, size_t)
+        //    (respectively), which writes a buffer into a stream
+        //    (which the INPUT/OUTPUT instance presumably owns).
 
-    // Only meaningful if value_type is a POD.
-    template <typename INPUT>
-    bool read_nopointer_data(INPUT *fp)
-    {
-        return table.read_nopointer_data(fp);
-    }
+        typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
 
-    // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
-    //    (istream, ostream, etc) *or* a class providing
-    //    Read(void*, size_t) and Write(const void*, size_t)
-    //    (respectively), which writes a buffer into a stream
-    //    (which the INPUT/OUTPUT instance presumably owns).
+        // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
+        template <typename ValueSerializer, typename OUTPUT>
+        bool serialize(ValueSerializer serializer, OUTPUT* fp)
+        {
+            return table.serialize(serializer, fp);
+        }
 
-    typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
+        // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
+        template <typename ValueSerializer, typename INPUT>
+        bool unserialize(ValueSerializer serializer, INPUT* fp)
+        {
+            num_deleted = 0;            // since we got rid before writing
+            const bool result = table.unserialize(serializer, fp);
+            settings.reset_thresholds(bucket_count());
+            return result;
+        }
 
-    // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
-    template <typename ValueSerializer, typename OUTPUT>
-    bool serialize(ValueSerializer serializer, OUTPUT *fp)
-    {
-        return table.serialize(serializer, fp);
-    }
+    private:
 
-    // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
-    template <typename ValueSerializer, typename INPUT>
-    bool unserialize(ValueSerializer serializer, INPUT *fp)
-    {
-        num_deleted = 0;            // since we got rid before writing
-        const bool result = table.unserialize(serializer, fp);
-        settings.reset_thresholds(bucket_count());
-        return result;
-    }
+        // Package templated functors with the other types to eliminate memory
+        // needed for storing these zero-size operators.  Since ExtractKey and
+        // hasher's operator() might have the same function signature, they
+        // must be packaged in different classes.
+        // -------------------------------------------------------------------------
+        struct Settings :
+            sparsehash_internal::sh_hashtable_settings<key_type, hasher,
+            size_type, HT_MIN_BUCKETS>
+        {
+            explicit Settings(const hasher& hf)
+                : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
+                  HT_MIN_BUCKETS>
+                  (hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
+        };
 
-private:
+        // KeyInfo stores delete key and packages zero-size functors:
+        // ExtractKey and SetKey.
+        // ---------------------------------------------------------
+        class KeyInfo : public ExtractKey, public SetKey, public EqualKey
+        {
+            public:
+                KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
+                    : ExtractKey(ek), SetKey(sk), EqualKey(eq)
+                {
+                }
 
-    // Package templated functors with the other types to eliminate memory
-    // needed for storing these zero-size operators.  Since ExtractKey and
-    // hasher's operator() might have the same function signature, they
-    // must be packaged in different classes.
-    // -------------------------------------------------------------------------
-    struct Settings :
-        sparsehash_internal::sh_hashtable_settings<key_type, hasher,
-                                                   size_type, HT_MIN_BUCKETS>
-    {
-        explicit Settings(const hasher& hf)
-            : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
-              HT_MIN_BUCKETS>
-              (hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
-    };
+                // We want to return the exact same type as ExtractKey: Key or const Key&
+                typename ExtractKey::result_type get_key(const_reference v) const
+                {
+                    return ExtractKey::operator()(v);
+                }
 
-    // KeyInfo stores delete key and packages zero-size functors:
-    // ExtractKey and SetKey.
-     // ---------------------------------------------------------
-    class KeyInfo : public ExtractKey, public SetKey, public EqualKey
-    {
-    public:
-        KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
-            : ExtractKey(ek), SetKey(sk), EqualKey(eq)
-        {
-        }
+                bool equals(const key_type& a, const key_type& b) const
+                {
+                    return EqualKey::operator()(a, b);
+                }
+        };
 
-        // We want to return the exact same type as ExtractKey: Key or const Key&
-        typename ExtractKey::result_type get_key(const_reference v) const
+        // Utility functions to access the templated operators
+        size_t hash(const key_type& v) const
         {
-            return ExtractKey::operator()(v);
+            return settings.hash(v);
         }
 
         bool equals(const key_type& a, const key_type& b) const
         {
-            return EqualKey::operator()(a, b);
-        }
-    };
-
-    // Utility functions to access the templated operators
-    size_t hash(const key_type& v) const
-    {
-        return settings.hash(v);
-    }
-
-    bool equals(const key_type& a, const key_type& b) const
-    {
-        return key_info.equals(a, b);
-    }
+            return key_info.equals(a, b);
+        }
 
-    typename ExtractKey::result_type get_key(const_reference v) const
-    {
-        return key_info.get_key(v);
-    }
+        typename ExtractKey::result_type get_key(const_reference v) const
+        {
+            return key_info.get_key(v);
+        }
 
-private:
-    // Actual data
-    // -----------
-    Settings  settings;
-    KeyInfo   key_info;
-    size_type num_deleted;
-    Table     table;         // holds num_buckets and num_elements too
+    private:
+        // Actual data
+        // -----------
+        Settings  settings;
+        KeyInfo   key_info;
+        size_type num_deleted;
+        Table     table;         // holds num_buckets and num_elements too
 };
 
 
 // We need a global swap as well
 // -----------------------------
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
-inline void swap(sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
-                 sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &y)
+inline void swap(sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& x,
+                 sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& y)
 {
     x.swap(y);
 }
@@ -4804,22 +5608,22 @@ inline void swap(sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
 
 // -----------------------------------------------------------------------------
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
-const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
-sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
+const typename sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type
+sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET;
 
 // How full we let the table get before we resize.  Knuth says .8 is
 // good -- higher causes us to probe too much, though saves memory
 // -----------------------------------------------------------------------------
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
-const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
+const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 50;
 
 // How empty we let the table get before we resize lower.
 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
 // -----------------------------------------------------------------------------
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
-const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
-= static_cast<int>(0.4 *
-                   sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
+const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT
+    = static_cast<int>(0.4 *
+                       sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
 
 
 //  ----------------------------------------------------------------------
@@ -4828,8 +5632,8 @@ const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
 template <class Key, class T,
           class HashFcn = spp_hash<Key>,
           class EqualKey = std::equal_to<Key>,
-          class Alloc = libc_allocator_with_realloc<std::pair<const Key, T> > >
-class sparse_hash_map
+          class Alloc = libc_allocator_with_realloc<std::pair<const Key, T>>>
+                  class sparse_hash_map
 {
 public:
     typedef typename std::pair<const Key, T> value_type;
@@ -4865,7 +5669,7 @@ class sparse_hash_map
 
     // The actual data
     typedef sparse_hashtable<value_type, Key, HashFcn, SelectKey,
-                             SetKey, EqualKey, Alloc> ht;
+            SetKey, EqualKey, Alloc> ht;
 
 public:
     typedef typename ht::key_type             key_type;
@@ -4888,27 +5692,75 @@ class sparse_hash_map
     typedef typename ht::const_local_iterator const_local_iterator;
 
     // Iterator functions
-    iterator       begin()                         { return rep.begin(); }
-    iterator       end()                           { return rep.end(); }
-    const_iterator begin() const                   { return rep.cbegin(); }
-    const_iterator end() const                     { return rep.cend(); }
-    const_iterator cbegin() const                  { return rep.cbegin(); }
-    const_iterator cend() const                    { return rep.cend(); }
+    iterator       begin()
+    {
+        return rep.begin();
+    }
+    iterator       end()
+    {
+        return rep.end();
+    }
+    const_iterator begin() const
+    {
+        return rep.cbegin();
+    }
+    const_iterator end() const
+    {
+        return rep.cend();
+    }
+    const_iterator cbegin() const
+    {
+        return rep.cbegin();
+    }
+    const_iterator cend() const
+    {
+        return rep.cend();
+    }
 
     // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
-    local_iterator begin(size_type i)              { return rep.begin(i); }
-    local_iterator end(size_type i)                { return rep.end(i); }
-    const_local_iterator begin(size_type i) const  { return rep.begin(i); }
-    const_local_iterator end(size_type i) const    { return rep.end(i); }
-    const_local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
-    const_local_iterator cend(size_type i) const   { return rep.cend(i); }
+    local_iterator begin(size_type i)
+    {
+        return rep.begin(i);
+    }
+    local_iterator end(size_type i)
+    {
+        return rep.end(i);
+    }
+    const_local_iterator begin(size_type i) const
+    {
+        return rep.begin(i);
+    }
+    const_local_iterator end(size_type i) const
+    {
+        return rep.end(i);
+    }
+    const_local_iterator cbegin(size_type i) const
+    {
+        return rep.cbegin(i);
+    }
+    const_local_iterator cend(size_type i) const
+    {
+        return rep.cend(i);
+    }
 
     // Accessor functions
     // ------------------
-    allocator_type get_allocator() const           { return rep.get_allocator(); }
-    hasher hash_funct() const                      { return rep.hash_funct(); }
-    hasher hash_function() const                   { return hash_funct(); }
-    key_equal key_eq() const                       { return rep.key_eq(); }
+    allocator_type get_allocator() const
+    {
+        return rep.get_allocator();
+    }
+    hasher hash_funct() const
+    {
+        return rep.hash_funct();
+    }
+    hasher hash_function() const
+    {
+        return hash_funct();
+    }
+    key_equal key_eq() const
+    {
+        return rep.key_eq();
+    }
 
 
     // Constructors
@@ -4963,21 +5815,21 @@ class sparse_hash_map
         rep.insert(f, l);
     }
 
-    sparse_hash_map(const sparse_hash_map &o) :
+    sparse_hash_map(const sparse_hash_map& o) :
         rep(o.rep)
     {}
 
-    sparse_hash_map(const sparse_hash_map &o,
+    sparse_hash_map(const sparse_hash_map& o,
                     const allocator_type& alloc) :
         rep(o.rep, alloc)
     {}
 
 #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
-    sparse_hash_map(sparse_hash_map &&o) :
+    sparse_hash_map(sparse_hash_map&& o) :
         rep(std::move(o.rep))
     {}
 
-    sparse_hash_map(sparse_hash_map &&o,
+    sparse_hash_map(sparse_hash_map&& o,
                     const allocator_type& alloc) :
         rep(std::move(o.rep), alloc)
     {}
@@ -5021,85 +5873,159 @@ class sparse_hash_map
     }
 #endif
 
-    sparse_hash_map& operator=(const sparse_hash_map &o)
+    sparse_hash_map& operator=(const sparse_hash_map& o)
     {
         rep = o.rep;
         return *this;
     }
 
-    void clear()                        { rep.clear(); }
-    void swap(sparse_hash_map& hs)      { rep.swap(hs.rep); }
+    void clear()
+    {
+        rep.clear();
+    }
+    void swap(sparse_hash_map& hs)
+    {
+        rep.swap(hs.rep);
+    }
 
     // Functions concerning size
     // -------------------------
-    size_type size() const              { return rep.size(); }
-    size_type max_size() const          { return rep.max_size(); }
-    bool empty() const                  { return rep.empty(); }
-    size_type bucket_count() const      { return rep.bucket_count(); }
-    size_type max_bucket_count() const  { return rep.max_bucket_count(); }
+    size_type size() const
+    {
+        return rep.size();
+    }
+    size_type max_size() const
+    {
+        return rep.max_size();
+    }
+    bool empty() const
+    {
+        return rep.empty();
+    }
+    size_type bucket_count() const
+    {
+        return rep.bucket_count();
+    }
+    size_type max_bucket_count() const
+    {
+        return rep.max_bucket_count();
+    }
 
-    size_type bucket_size(size_type i) const    { return rep.bucket_size(i); }
-    size_type bucket(const key_type& key) const { return rep.bucket(key); }
-    float     load_factor() const       { return size() * 1.0f / bucket_count(); }
+    size_type bucket_size(size_type i) const
+    {
+        return rep.bucket_size(i);
+    }
+    size_type bucket(const key_type& key) const
+    {
+        return rep.bucket(key);
+    }
+    float     load_factor() const
+    {
+        return size() * 1.0f / bucket_count();
+    }
 
-    float max_load_factor() const      { return rep.get_enlarge_factor(); }
-    void  max_load_factor(float grow)  { rep.set_enlarge_factor(grow); }
+    float max_load_factor() const
+    {
+        return rep.get_enlarge_factor();
+    }
+    void  max_load_factor(float grow)
+    {
+        rep.set_enlarge_factor(grow);
+    }
 
-    float min_load_factor() const      { return rep.get_shrink_factor(); }
-    void  min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
+    float min_load_factor() const
+    {
+        return rep.get_shrink_factor();
+    }
+    void  min_load_factor(float shrink)
+    {
+        rep.set_shrink_factor(shrink);
+    }
 
     void set_resizing_parameters(float shrink, float grow)
     {
         rep.set_resizing_parameters(shrink, grow);
     }
 
-    void resize(size_type cnt)        { rep.resize(cnt); }
-    void rehash(size_type cnt)        { resize(cnt); } // c++11 name
-    void reserve(size_type cnt)       { resize(cnt); } // c++11
+    void resize(size_type cnt)
+    {
+        rep.resize(cnt);
+    }
+    void rehash(size_type cnt)
+    {
+        resize(cnt);    // c++11 name
+    }
+    void reserve(size_type cnt)
+    {
+        resize(cnt);    // c++11
+    }
 
     // Lookup
     // ------
-    iterator find(const key_type& key)                 { return rep.find(key); }
-    const_iterator find(const key_type& key) const     { return rep.find(key); }
+    iterator find(const key_type& key)
+    {
+        return rep.find(key);
+    }
+    const_iterator find(const key_type& key) const
+    {
+        return rep.find(key);
+    }
 
     mapped_type& operator[](const key_type& key)
     {
         return rep.template find_or_insert<DefaultValue>(key).second;
     }
 
-    size_type count(const key_type& key) const         { return rep.count(key); }
+    size_type count(const key_type& key) const
+    {
+        return rep.count(key);
+    }
 
     std::pair<iterator, iterator>
-    equal_range(const key_type& key)             { return rep.equal_range(key); }
+    equal_range(const key_type& key)
+    {
+        return rep.equal_range(key);
+    }
 
     std::pair<const_iterator, const_iterator>
-    equal_range(const key_type& key) const       { return rep.equal_range(key); }
+    equal_range(const key_type& key) const
+    {
+        return rep.equal_range(key);
+    }
 
     mapped_type& at(const key_type& key)
     {
         iterator it = rep.find(key);
+
         if (it == rep.end())
+        {
             throw_exception(std::out_of_range("at: key not present"));
+        }
+
         return it->second;
     }
 
     const mapped_type& at(const key_type& key) const
     {
         const_iterator it = rep.find(key);
+
         if (it == rep.cend())
+        {
             throw_exception(std::out_of_range("at: key not present"));
+        }
+
         return it->second;
     }
 
 #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
     template <class... Args>
-    std::pair<iterator, bool> emplace(Args&&... args)
+    std::pair<iterator, bool> emplace(Args&& ... args)
     {
         return rep.emplace(std::forward<Args>(args)...);
     }
 
     template <class... Args>
-    iterator emplace_hint(const_iterator , Args&&... args)
+    iterator emplace_hint(const_iterator, Args&& ... args)
     {
         return rep.emplace(std::forward<Args>(args)...).first;
     }
@@ -5108,40 +6034,88 @@ class sparse_hash_map
     // Insert
     // ------
     std::pair<iterator, bool>
-    insert(const value_type& obj)                    { return rep.insert(obj); }
+    insert(const value_type& obj)
+    {
+        return rep.insert(obj);
+    }
 
 #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
     template< class P >
-    std::pair<iterator, bool> insert(P&& obj)        { return rep.insert(std::forward<P>(obj)); }
+    std::pair<iterator, bool> insert(P&& obj)
+    {
+        return rep.insert(std::forward<P>(obj));
+    }
 #endif
 
     template <class InputIterator>
-    void insert(InputIterator f, InputIterator l)    { rep.insert(f, l); }
+    void insert(InputIterator f, InputIterator l)
+    {
+        rep.insert(f, l);
+    }
 
-    void insert(const_iterator f, const_iterator l)  { rep.insert(f, l); }
+    void insert(const_iterator f, const_iterator l)
+    {
+        rep.insert(f, l);
+    }
 
-    iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
-    iterator insert(const_iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
+    iterator insert(iterator /*unused*/, const value_type& obj)
+    {
+        return insert(obj).first;
+    }
+    iterator insert(const_iterator /*unused*/, const value_type& obj)
+    {
+        return insert(obj).first;
+    }
 
     // Deleted key routines - just to keep google test framework happy
     // we don't actually use the deleted key
     // ---------------------------------------------------------------
-    void set_deleted_key(const key_type& key)   { rep.set_deleted_key(key); }
-    void clear_deleted_key()                    { rep.clear_deleted_key();  }
-    key_type deleted_key() const                { return rep.deleted_key(); }
+    void set_deleted_key(const key_type& key)
+    {
+        rep.set_deleted_key(key);
+    }
+    void clear_deleted_key()
+    {
+        rep.clear_deleted_key();
+    }
+    key_type deleted_key() const
+    {
+        return rep.deleted_key();
+    }
 
     // Erase
     // -----
-    size_type erase(const key_type& key)               { return rep.erase(key); }
-    iterator  erase(iterator it)                       { return rep.erase(it); }
-    iterator  erase(iterator f, iterator l)            { return rep.erase(f, l); }
-    iterator  erase(const_iterator it)                 { return rep.erase(it); }
-    iterator  erase(const_iterator f, const_iterator l){ return rep.erase(f, l); }
+    size_type erase(const key_type& key)
+    {
+        return rep.erase(key);
+    }
+    iterator  erase(iterator it)
+    {
+        return rep.erase(it);
+    }
+    iterator  erase(iterator f, iterator l)
+    {
+        return rep.erase(f, l);
+    }
+    iterator  erase(const_iterator it)
+    {
+        return rep.erase(it);
+    }
+    iterator  erase(const_iterator f, const_iterator l)
+    {
+        return rep.erase(f, l);
+    }
 
     // Comparison
     // ----------
-    bool operator==(const sparse_hash_map& hs) const   { return rep == hs.rep; }
-    bool operator!=(const sparse_hash_map& hs) const   { return rep != hs.rep; }
+    bool operator==(const sparse_hash_map& hs) const
+    {
+        return rep == hs.rep;
+    }
+    bool operator!=(const sparse_hash_map& hs) const
+    {
+        return rep != hs.rep;
+    }
 
 
     // I/O -- this is an add-on for writing metainformation to disk
@@ -5197,16 +6171,28 @@ class sparse_hash_map
     // Use serialize() and unserialize() for new code.
     // -----------------------------------------------
     template <typename OUTPUT>
-    bool write_metadata(OUTPUT *fp)       { return rep.write_metadata(fp); }
+    bool write_metadata(OUTPUT* fp)
+    {
+        return rep.write_metadata(fp);
+    }
 
     template <typename INPUT>
-    bool read_metadata(INPUT *fp)         { return rep.read_metadata(fp); }
+    bool read_metadata(INPUT* fp)
+    {
+        return rep.read_metadata(fp);
+    }
 
     template <typename OUTPUT>
-    bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
+    bool write_nopointer_data(OUTPUT* fp)
+    {
+        return rep.write_nopointer_data(fp);
+    }
 
     template <typename INPUT>
-    bool read_nopointer_data(INPUT *fp)   { return rep.read_nopointer_data(fp); }
+    bool read_nopointer_data(INPUT* fp)
+    {
+        return rep.read_nopointer_data(fp);
+    }
 
 
 private:
@@ -5230,7 +6216,7 @@ inline void swap(sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
 template <class Value,
           class HashFcn = spp_hash<Value>,
           class EqualKey = std::equal_to<Value>,
-          class Alloc = libc_allocator_with_realloc<Value> >
+          class Alloc = libc_allocator_with_realloc<Value>>
 class sparse_hash_set
 {
 private:
@@ -5238,7 +6224,10 @@ class sparse_hash_set
     struct Identity
     {
         typedef const Value& result_type;
-        inline const Value& operator()(const Value& v) const { return v; }
+        inline const Value& operator()(const Value& v) const
+    {
+        return v;
+    }
     };
 
     struct SetKey
@@ -5250,7 +6239,7 @@ class sparse_hash_set
     };
 
     typedef sparse_hashtable<Value, Value, HashFcn, Identity, SetKey,
-                             EqualKey, Alloc> ht;
+            EqualKey, Alloc> ht;
 
 public:
     typedef typename ht::key_type              key_type;
@@ -5273,24 +6262,60 @@ class sparse_hash_set
 
 
     // Iterator functions -- recall all iterators are const
-    iterator       begin() const             { return rep.begin(); }
-    iterator       end() const               { return rep.end(); }
-    const_iterator cbegin() const            { return rep.cbegin(); }
-    const_iterator cend() const              { return rep.cend(); }
+    iterator       begin() const
+    {
+        return rep.begin();
+    }
+    iterator       end() const
+    {
+        return rep.end();
+    }
+    const_iterator cbegin() const
+    {
+        return rep.cbegin();
+    }
+    const_iterator cend() const
+    {
+        return rep.cend();
+    }
 
     // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
-    local_iterator begin(size_type i) const  { return rep.begin(i); }
-    local_iterator end(size_type i) const    { return rep.end(i); }
-    local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
-    local_iterator cend(size_type i) const   { return rep.cend(i); }
+    local_iterator begin(size_type i) const
+    {
+        return rep.begin(i);
+    }
+    local_iterator end(size_type i) const
+    {
+        return rep.end(i);
+    }
+    local_iterator cbegin(size_type i) const
+    {
+        return rep.cbegin(i);
+    }
+    local_iterator cend(size_type i) const
+    {
+        return rep.cend(i);
+    }
 
 
     // Accessor functions
     // ------------------
-    allocator_type get_allocator() const     { return rep.get_allocator(); }
-    hasher         hash_funct() const        { return rep.hash_funct(); }
-    hasher         hash_function() const     { return hash_funct(); }  // tr1 name
-    key_equal      key_eq() const            { return rep.key_eq(); }
+    allocator_type get_allocator() const
+    {
+        return rep.get_allocator();
+    }
+    hasher         hash_funct() const
+    {
+        return rep.hash_funct();
+    }
+    hasher         hash_function() const
+    {
+        return hash_funct();    // tr1 name
+    }
+    key_equal      key_eq() const
+    {
+        return rep.key_eq();
+    }
 
 
     // Constructors
@@ -5346,21 +6371,21 @@ class sparse_hash_set
         rep.insert(f, l);
     }
 
-    sparse_hash_set(const sparse_hash_set &o) :
+    sparse_hash_set(const sparse_hash_set& o) :
         rep(o.rep)
     {}
 
-    sparse_hash_set(const sparse_hash_set &o,
+    sparse_hash_set(const sparse_hash_set& o,
                     const allocator_type& alloc) :
         rep(o.rep, alloc)
     {}
 
 #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
-    sparse_hash_set(sparse_hash_set &&o) :
+    sparse_hash_set(sparse_hash_set&& o) :
         rep(std::move(o.rep))
     {}
 
-    sparse_hash_set(sparse_hash_set &&o,
+    sparse_hash_set(sparse_hash_set&& o,
                     const allocator_type& alloc) :
         rep(std::move(o.rep), alloc)
     {}
@@ -5406,62 +6431,122 @@ class sparse_hash_set
 
 #endif
 
-    sparse_hash_set& operator=(const sparse_hash_set &o)
+    sparse_hash_set& operator=(const sparse_hash_set& o)
     {
         rep = o.rep;
         return *this;
     }
 
-    void clear()                        { rep.clear(); }
-    void swap(sparse_hash_set& hs)      { rep.swap(hs.rep); }
+    void clear()
+    {
+        rep.clear();
+    }
+    void swap(sparse_hash_set& hs)
+    {
+        rep.swap(hs.rep);
+    }
 
 
     // Functions concerning size
     // -------------------------
-    size_type size() const              { return rep.size(); }
-    size_type max_size() const          { return rep.max_size(); }
-    bool empty() const                  { return rep.empty(); }
-    size_type bucket_count() const      { return rep.bucket_count(); }
-    size_type max_bucket_count() const  { return rep.max_bucket_count(); }
+    size_type size() const
+    {
+        return rep.size();
+    }
+    size_type max_size() const
+    {
+        return rep.max_size();
+    }
+    bool empty() const
+    {
+        return rep.empty();
+    }
+    size_type bucket_count() const
+    {
+        return rep.bucket_count();
+    }
+    size_type max_bucket_count() const
+    {
+        return rep.max_bucket_count();
+    }
 
-    size_type bucket_size(size_type i) const    { return rep.bucket_size(i); }
-    size_type bucket(const key_type& key) const { return rep.bucket(key); }
+    size_type bucket_size(size_type i) const
+    {
+        return rep.bucket_size(i);
+    }
+    size_type bucket(const key_type& key) const
+    {
+        return rep.bucket(key);
+    }
 
-    float     load_factor() const       { return size() * 1.0f / bucket_count(); }
+    float     load_factor() const
+    {
+        return size() * 1.0f / bucket_count();
+    }
 
-    float max_load_factor() const      { return rep.get_enlarge_factor(); }
-    void  max_load_factor(float grow)  { rep.set_enlarge_factor(grow); }
+    float max_load_factor() const
+    {
+        return rep.get_enlarge_factor();
+    }
+    void  max_load_factor(float grow)
+    {
+        rep.set_enlarge_factor(grow);
+    }
 
-    float min_load_factor() const      { return rep.get_shrink_factor(); }
-    void  min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
+    float min_load_factor() const
+    {
+        return rep.get_shrink_factor();
+    }
+    void  min_load_factor(float shrink)
+    {
+        rep.set_shrink_factor(shrink);
+    }
 
     void set_resizing_parameters(float shrink, float grow)
     {
         rep.set_resizing_parameters(shrink, grow);
     }
 
-    void resize(size_type cnt)        { rep.resize(cnt); }
-    void rehash(size_type cnt)        { resize(cnt); } // c++11 name
-    void reserve(size_type cnt)       { resize(cnt); } // c++11
+    void resize(size_type cnt)
+    {
+        rep.resize(cnt);
+    }
+    void rehash(size_type cnt)
+    {
+        resize(cnt);    // c++11 name
+    }
+    void reserve(size_type cnt)
+    {
+        resize(cnt);    // c++11
+    }
 
     // Lookup
     // ------
-    iterator find(const key_type& key) const     { return rep.find(key); }
+    iterator find(const key_type& key) const
+    {
+        return rep.find(key);
+    }
 
-    size_type count(const key_type& key) const   { return rep.count(key); }
+    size_type count(const key_type& key) const
+    {
+        return rep.count(key);
+    }
 
     std::pair<iterator, iterator>
-    equal_range(const key_type& key) const       { return rep.equal_range(key); }
+    equal_range(const key_type& key) const
+    {
+        return rep.equal_range(key);
+    }
 
 #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
     template <class... Args>
-    std::pair<iterator, bool> emplace(Args&&... args)
+    std::pair<iterator, bool> emplace(Args&& ... args)
     {
         return rep.emplace(std::forward<Args>(args)...);
     }
 
     template <class... Args>
-    iterator emplace_hint(const_iterator , Args&&... args)
+    iterator emplace_hint(const_iterator, Args&& ... args)
     {
         return rep.emplace(std::forward<Args>(args)...).first;
     }
@@ -5477,32 +6562,68 @@ class sparse_hash_set
 
 #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
     template<class P>
-    std::pair<iterator, bool> insert(P&& obj)        { return rep.insert(std::forward<P>(obj)); }
+    std::pair<iterator, bool> insert(P&& obj)
+    {
+        return rep.insert(std::forward<P>(obj));
+    }
 #endif
 
     template <class InputIterator>
-    void insert(InputIterator f, InputIterator l)    { rep.insert(f, l); }
+    void insert(InputIterator f, InputIterator l)
+    {
+        rep.insert(f, l);
+    }
 
-    void insert(const_iterator f, const_iterator l)  { rep.insert(f, l); }
+    void insert(const_iterator f, const_iterator l)
+    {
+        rep.insert(f, l);
+    }
 
-    iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
+    iterator insert(iterator /*unused*/, const value_type& obj)
+    {
+        return insert(obj).first;
+    }
 
     // Deleted key - do nothing - just to keep google test framework happy
     // -------------------------------------------------------------------
-    void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
-    void clear_deleted_key()                  { rep.clear_deleted_key();  }
-    key_type deleted_key() const              { return rep.deleted_key(); }
+    void set_deleted_key(const key_type& key)
+    {
+        rep.set_deleted_key(key);
+    }
+    void clear_deleted_key()
+    {
+        rep.clear_deleted_key();
+    }
+    key_type deleted_key() const
+    {
+        return rep.deleted_key();
+    }
 
     // Erase
     // -----
-    size_type erase(const key_type& key)      { return rep.erase(key); }
-    iterator  erase(iterator it)              { return rep.erase(it); }
-    iterator  erase(iterator f, iterator l)   { return rep.erase(f, l); }
+    size_type erase(const key_type& key)
+    {
+        return rep.erase(key);
+    }
+    iterator  erase(iterator it)
+    {
+        return rep.erase(it);
+    }
+    iterator  erase(iterator f, iterator l)
+    {
+        return rep.erase(f, l);
+    }
 
     // Comparison
     // ----------
-    bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; }
-    bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; }
+    bool operator==(const sparse_hash_set& hs) const
+    {
+        return rep == hs.rep;
+    }
+    bool operator!=(const sparse_hash_set& hs) const
+    {
+        return rep != hs.rep;
+    }
 
 
     // I/O -- this is an add-on for writing metainformation to disk
@@ -5558,16 +6679,28 @@ class sparse_hash_set
     // Use serialize() and unserialize() for new code.
     // -----------------------------------------------
     template <typename OUTPUT>
-    bool write_metadata(OUTPUT *fp)       { return rep.write_metadata(fp); }
+    bool write_metadata(OUTPUT* fp)
+    {
+        return rep.write_metadata(fp);
+    }
 
     template <typename INPUT>
-    bool read_metadata(INPUT *fp)         { return rep.read_metadata(fp); }
+    bool read_metadata(INPUT* fp)
+    {
+        return rep.read_metadata(fp);
+    }
 
     template <typename OUTPUT>
-    bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
+    bool write_nopointer_data(OUTPUT* fp)
+    {
+        return rep.write_nopointer_data(fp);
+    }
 
     template <typename INPUT>
-    bool read_nopointer_data(INPUT *fp)   { return rep.read_nopointer_data(fp); }
+    bool read_nopointer_data(INPUT* fp)
+    {
+        return rep.read_nopointer_data(fp);
+    }
 
 private:
     // The actual data
diff --git a/remove_orig_files.sh b/remove_orig_files.sh
new file mode 100755
index 0000000..aee82d0
--- /dev/null
+++ b/remove_orig_files.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+find . -name "*.orig"
+find . -name "*.orig" -delete