Skip to content

Commit

Permalink
[ML] Convert std::shared_ptrs to std::unique_ptrs where possible (ela…
Browse files Browse the repository at this point in the history
…stic#115)

Many of our current uses of shared pointer date from when not all our target platforms were
C++11 compliant and in particular move semantics weren't available. Now that they all are, we can
switch these to std::unique_ptr. This has a few advantages: i) it saves us 16 bytes (8 for the extra
pointer to the reference count and 8 for the reference count) per instance, ii) it avoids atomic
updates of the reference count, iii) it forces one to have the correct copy semantics in such
cases. For example, this showed up that we had an implicit shallow copy (not appropriate) on our
naive Bayes implementation and fixing this showed up a bug in restore. This also fixes an error in
memory accounting for shared pointers, which wasn't including the extra memory for the
reference count.
  • Loading branch information
tveasey committed Jun 8, 2018
1 parent 50f57e2 commit 6013ad0
Show file tree
Hide file tree
Showing 81 changed files with 881 additions and 623 deletions.
3 changes: 3 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ Secure the ML processes by preventing system calls such as fork and exec. The Li
Seccomp BPF to intercept system calls and is available in kernels since 3.5. On Windows Job Objects prevent
new processes being created and macOS uses the sandbox functionality ({pull}98[#98])

Fix a bug causing us to under estimate the memory used by shared pointers and reduce the memory consumed
by unnecessary reference counting ({pull}108[#108])

=== Bug Fixes

Age seasonal components in proportion to the fraction of values with which they're updated ({pull}88[#88])
Expand Down
2 changes: 1 addition & 1 deletion include/api/CForecastRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class API_EXPORT CForecastRunner final : private core::CNonCopyable {
using TForecastModelWrapper = model::CForecastDataSink::SForecastModelWrapper;
using TForecastResultSeries = model::CForecastDataSink::SForecastResultSeries;
using TForecastResultSeriesVec = std::vector<TForecastResultSeries>;
using TMathsModelPtr = std::shared_ptr<maths::CModel>;
using TMathsModelPtr = std::unique_ptr<maths::CModel>;

using TStrUSet = boost::unordered_set<std::string>;

Expand Down
41 changes: 30 additions & 11 deletions include/core/CMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,21 +287,25 @@ class CORE_EXPORT CMemory : private CNonInstantiatable {
static std::size_t
dynamicSize(const T& t,
typename boost::enable_if<typename boost::is_pointer<T>>::type* = nullptr) {
if (t == nullptr) {
return 0;
}
return staticSize(*t) + dynamicSize(*t);
return t == nullptr ? 0 : staticSize(*t) + dynamicSize(*t);
}

//! Overload for std::shared_ptr.
template<typename T>
static std::size_t dynamicSize(const std::shared_ptr<T>& t) {
if (!t) {
if (t == nullptr) {
return 0;
}
long uc = t.use_count();
// Round up
return (staticSize(*t) + dynamicSize(*t) + std::size_t(uc - 1)) / uc;
// Note we add on sizeof(long) here to account for the memory
// used by the shared reference count. Also, round up.
return (sizeof(long) + staticSize(*t) + dynamicSize(*t) + std::size_t(uc - 1)) / uc;
}

//! Overload for std::unique_ptr.
template<typename T>
static std::size_t dynamicSize(const std::unique_ptr<T>& t) {
return t == nullptr ? 0 : staticSize(*t) + dynamicSize(*t);
}

//! Overload for boost::array.
Expand Down Expand Up @@ -703,25 +707,40 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable {
static void dynamicSize(const char* name,
const std::shared_ptr<T>& t,
CMemoryUsage::TMemoryUsagePtr mem) {
if (t) {
if (t != nullptr) {
long uc = t.use_count();
// If the pointer is shared by multiple users, each one
// might count it, so divide by the number of users.
// However, if only 1 user has it, do a full debug.
if (uc == 1) {
mem->addItem("shared_ptr", CMemory::staticSize(*t));
// Note we add on sizeof(long) here to account for
// the memory used by the shared reference count.
mem->addItem("shared_ptr", sizeof(long) + CMemory::staticSize(*t));
dynamicSize(name, *t, mem);
} else {
std::ostringstream ss;
ss << "shared_ptr (x" << uc << ')';
// Round up
mem->addItem(ss.str(), (CMemory::staticSize(*t) +
// Note we add on sizeof(long) here to account for
// the memory used by the shared reference count.
// Also, round up.
mem->addItem(ss.str(), (sizeof(long) + CMemory::staticSize(*t) +
CMemory::dynamicSize(*t) + std::size_t(uc - 1)) /
uc);
}
}
}

//! Overload for std::unique_ptr.
template<typename T>
static void dynamicSize(const char* name,
const std::unique_ptr<T>& t,
CMemoryUsage::TMemoryUsagePtr mem) {
if (t != nullptr) {
mem->addItem("ptr", CMemory::staticSize(*t));
memory_detail::SDebugMemoryDynamicSize<T>::dispatch(name, *t, mem);
}
}

//! Overload for boost::array.
template<typename T, std::size_t N>
static void dynamicSize(const char* name,
Expand Down
9 changes: 8 additions & 1 deletion include/maths/CChecksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,20 @@ class CChecksumImpl<BasicChecksum> {
: CChecksumImpl<typename selector<T>::value>::dispatch(seed, *target);
}

//! Checksum a pointer.
//! Checksum a shared pointer.
template<typename T>
static uint64_t dispatch(uint64_t seed, const std::shared_ptr<T>& target) {
return !target ? seed
: CChecksumImpl<typename selector<T>::value>::dispatch(seed, *target);
}

//! Checksum a unique pointer.
template<typename T>
static uint64_t dispatch(uint64_t seed, const std::unique_ptr<T>& target) {
return !target ? seed
: CChecksumImpl<typename selector<T>::value>::dispatch(seed, *target);
}

//! Checksum a pair.
template<typename U, typename V>
static uint64_t dispatch(uint64_t seed, const std::pair<U, V>& target) {
Expand Down
1 change: 0 additions & 1 deletion include/maths/CClusterer.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ class MATHS_EXPORT CClustererTypes {
template<typename POINT>
class CClusterer : public CClustererTypes {
public:
using TClustererPtr = std::shared_ptr<CClusterer>;
using TPointVec = std::vector<POINT>;
using TPointPrecise = typename SPromoted<POINT>::Type;
using TPointPreciseVec = std::vector<TPointPrecise>;
Expand Down
6 changes: 3 additions & 3 deletions include/maths/CClustererStateSerialiser.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct SDistributionRestoreParams;
//! to potential competitors.
class MATHS_EXPORT CClustererStateSerialiser {
public:
using TClusterer1dPtr = std::shared_ptr<CClusterer1d>;
using TClusterer1dPtr = std::unique_ptr<CClusterer1d>;

public:
//! Construct the appropriate CClusterer sub-class from its state
Expand Down Expand Up @@ -73,7 +73,7 @@ class MATHS_EXPORT CClustererStateSerialiser {
//! \note Sets \p ptr to NULL on failure.
template<typename T, std::size_t N>
bool operator()(const SDistributionRestoreParams& params,
std::shared_ptr<CClusterer<CVectorNx1<T, N>>>& ptr,
std::unique_ptr<CClusterer<CVectorNx1<T, N>>>& ptr,
core::CStateRestoreTraverser& traverser) {
return this->operator()(params, CClustererTypes::CDoNothing(),
CClustererTypes::CDoNothing(), ptr, traverser);
Expand All @@ -87,7 +87,7 @@ class MATHS_EXPORT CClustererStateSerialiser {
bool operator()(const SDistributionRestoreParams& params,
const CClustererTypes::TSplitFunc& splitFunc,
const CClustererTypes::TMergeFunc& mergeFunc,
std::shared_ptr<CClusterer<CVectorNx1<T, N>>>& ptr,
std::unique_ptr<CClusterer<CVectorNx1<T, N>>>& ptr,
core::CStateRestoreTraverser& traverser) {
std::size_t numResults(0);

Expand Down
2 changes: 1 addition & 1 deletion include/maths/CModelStateSerialiser.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ struct SModelRestoreParams;
//! compatibility in the future as the classes evolve.
class MATHS_EXPORT CModelStateSerialiser {
public:
using TModelPtr = std::shared_ptr<CModel>;
using TModelPtr = std::unique_ptr<CModel>;

public:
//! Construct the appropriate CPrior sub-class from its state
Expand Down
11 changes: 6 additions & 5 deletions include/maths/CMultimodalPrior.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,9 @@ namespace maths {
//! point of view this is the composite pattern.
class MATHS_EXPORT CMultimodalPrior : public CPrior {
public:
using TClustererPtr = std::shared_ptr<CClusterer1d>;
using TPriorPtr = std::shared_ptr<CPrior>;
using TClustererPtr = std::unique_ptr<CClusterer1d>;
using TPriorPtr = std::unique_ptr<CPrior>;
using TPriorPtrVec = std::vector<TPriorPtr>;
using TPriorPtrVecItr = TPriorPtrVec::iterator;
using TPriorPtrVecCItr = TPriorPtrVec::const_iterator;
using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<double>::TAccumulator;
using TMeanVarAccumulatorVec = std::vector<TMeanVarAccumulator>;

Expand All @@ -80,6 +78,9 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior {
double decayRate = 0.0);

//! Create from a collection of weights and priors.
//!
//! \note The priors are moved into place clearing the values in \p priors.
//! \note This constructor doesn't support subsequent update of the prior.
CMultimodalPrior(maths_t::EDataType dataType, double decayRate, TPriorPtrVec& priors);

//! Construct from part of a state document.
Expand Down Expand Up @@ -320,7 +321,7 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior {
CMultimodalPrior* m_Prior;
};

using TMode = SMultimodalPriorMode<std::shared_ptr<CPrior>>;
using TMode = SMultimodalPriorMode<TPriorPtr>;
using TModeVec = std::vector<TMode>;

private:
Expand Down
2 changes: 2 additions & 0 deletions include/maths/CMultimodalPriorMode.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct SMultimodalPriorMode {
SMultimodalPriorMode() : s_Index(0), s_Prior() {}
SMultimodalPriorMode(std::size_t index, const PRIOR_PTR& prior)
: s_Index(index), s_Prior(prior->clone()) {}
SMultimodalPriorMode(std::size_t index, PRIOR_PTR&& prior)
: s_Index(index), s_Prior(std::move(prior)) {}

//! Get the weight of this sample.
double weight() const { return s_Prior->numberSamples(); }
Expand Down
31 changes: 16 additions & 15 deletions include/maths/CMultivariateMultimodalPrior.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <maths/MathsTypes.h>

#include <boost/bind.hpp>
#include <boost/make_unique.hpp>
#include <boost/numeric/conversion/bounds.hpp>
#include <boost/ref.hpp>

Expand All @@ -50,10 +51,10 @@ namespace multivariate_multimodal_prior_detail {

using TSizeDoublePr = std::pair<size_t, double>;
using TSizeDoublePr3Vec = core::CSmallVector<TSizeDoublePr, 3>;
using TPriorPtr = std::shared_ptr<CMultivariatePrior>;
using TDouble10Vec1Vec = CMultivariatePrior::TDouble10Vec1Vec;
using TDouble10VecWeightsAry1Vec = CMultivariatePrior::TDouble10VecWeightsAry1Vec;
using TMode = SMultimodalPriorMode<std::shared_ptr<CMultivariatePrior>>;
using TPriorPtr = std::unique_ptr<CMultivariatePrior>;
using TMode = SMultimodalPriorMode<TPriorPtr>;
using TModeVec = std::vector<TMode>;

//! Implementation of a sample joint log marginal likelihood calculation.
Expand Down Expand Up @@ -134,7 +135,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
using TMatrix = CSymmetricMatrixNxN<double, N>;
using TMatrixVec = std::vector<TMatrix>;
using TClusterer = CClusterer<TFloatPoint>;
using TClustererPtr = std::shared_ptr<TClusterer>;
using TClustererPtr = std::unique_ptr<TClusterer>;
using TPriorPtrVec = std::vector<TPriorPtr>;

// Lift all overloads of into scope.
Expand Down Expand Up @@ -162,13 +163,13 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {

//! Create from a collection of priors.
//!
//! \note The priors are shallow copied.
//! \note The priors are moved into place clearing the values in \p priors.
//! \note This constructor doesn't support subsequent update of the prior.
CMultivariateMultimodalPrior(maths_t::EDataType dataType, TPriorPtrVec& priors)
: CMultivariatePrior(dataType, 0.0) {
m_Modes.reserve(priors.size());
for (std::size_t i = 0u; i < priors.size(); ++i) {
m_Modes.emplace_back(i, priors[i]);
m_Modes.emplace_back(i, std::move(priors[i]));
}
}

Expand Down Expand Up @@ -425,12 +426,12 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
for (const auto& mode : m_Modes) {
TUnivariatePriorPtrDoublePr prior(mode.s_Prior->univariate(marginalize, condition));
if (prior.first == nullptr) {
return TUnivariatePriorPtrDoublePr();
return {};
}
if (prior.first->isNonInformative()) {
continue;
}
modes.push_back(prior.first);
modes.push_back(std::move(prior.first));
weights.push_back(prior.second);
maxWeight.add(weights.back());
}
Expand All @@ -444,8 +445,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples());
}

return {TUnivariatePriorPtr(new CMultimodalPrior(this->dataType(),
this->decayRate(), modes)),
return {boost::make_unique<CMultimodalPrior>(this->dataType(),
this->decayRate(), modes),
Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0};
}

Expand All @@ -465,7 +466,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
const TSizeDoublePr10Vec& condition) const {

if (N == 2) {
return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0);
return {TPriorPtr(this->clone()), 0.0};
}

std::size_t n = m_Modes.size();
Expand All @@ -484,7 +485,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
if (prior.first->isNonInformative()) {
continue;
}
modes.push_back(prior.first);
modes.push_back(std::move(prior.first));
weights.push_back(prior.second);
maxWeight.add(weights.back());
}
Expand All @@ -498,7 +499,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples());
}

return {TPriorPtr(new CMultivariateMultimodalPrior<2>(this->dataType(), modes)),
return {boost::make_unique<CMultivariateMultimodalPrior<2>>(this->dataType(), modes),
Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0};
}

Expand Down Expand Up @@ -905,7 +906,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {

// Create the child modes.
LOG_TRACE(<< "Creating mode with index " << leftSplitIndex);
modes.emplace_back(leftSplitIndex, m_Prior->m_SeedPrior);
modes.emplace_back(leftSplitIndex, TPriorPtr(m_Prior->m_SeedPrior->clone()));
{
TPointVec samples;
if (!m_Prior->m_Clusterer->sample(
Expand Down Expand Up @@ -935,7 +936,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
}

LOG_TRACE(<< "Creating mode with index " << rightSplitIndex);
modes.emplace_back(rightSplitIndex, m_Prior->m_SeedPrior);
modes.emplace_back(rightSplitIndex, TPriorPtr(m_Prior->m_SeedPrior->clone()));
{
TPointVec samples;
if (!m_Prior->m_Clusterer->sample(
Expand Down Expand Up @@ -1025,7 +1026,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior {
MODE_TAG, TMode mode,
traverser.traverseSubLevel(boost::bind(
&TMode::acceptRestoreTraverser, &mode, boost::cref(params), _1)),
m_Modes.push_back(mode))
m_Modes.push_back(std::move(mode)))
RESTORE_SETUP_TEARDOWN(
NUMBER_SAMPLES_TAG, double numberSamples,
core::CStringUtils::stringToType(traverser.value(), numberSamples),
Expand Down
2 changes: 1 addition & 1 deletion include/maths/CMultivariateMultimodalPriorFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct SDistributionRestoreParams;
//! \brief Factory for multivariate multimodal priors.
class MATHS_EXPORT CMultivariateMultimodalPriorFactory {
public:
using TPriorPtr = std::shared_ptr<CMultivariatePrior>;
using TPriorPtr = std::unique_ptr<CMultivariatePrior>;

public:
//! Create a new non-informative multivariate normal prior.
Expand Down
Loading

0 comments on commit 6013ad0

Please sign in to comment.