Skip to content

Commit

Permalink
[ML] Ensure we have sufficient resolution when we reinitialise compon…
Browse files Browse the repository at this point in the history
…ent models (elastic#2167)

When we retest if a modelled seasonal component is still appropriate or may contain bias we now require higher
resolution if shorter time windows are available. This is important because if the time series has a mixture of fast and
slow seasonality we will detect the fast seasonality first and then potentially reinitialise it when we detect the slow
seasonality. We should only do this if the window provides us with sufficient resolution to initialise it properly.

Closes elastic#2166.
  • Loading branch information
tveasey committed Jan 6, 2022
1 parent b3f53f1 commit e3d3d04
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 73 deletions.
11 changes: 11 additions & 0 deletions docs/CHANGELOG.asciidoc
Expand Up @@ -55,6 +55,17 @@
- the C++ front end to PyTorch - and performs inference on models stored in the
TorchScript format. (See {ml-pull}1902[#1902].)


== {es} version 7.17.0

=== Bug Fixes

* Avoid transient poor time series modelling after detecting new seasonal components.
This can affect cases where we have fast and slow repeats in the data, for example
30 mins and 1 day, and the job uses a short bucket length. The outcome can be transient
poor predictions and model bounds, and sometimes false positive anomalies. (See
{ml-pull}2167[#2167].)

== {es} version 7.16.0

=== Enhancements
Expand Down
5 changes: 4 additions & 1 deletion include/maths/time_series/CExpandingWindow.h
Expand Up @@ -88,6 +88,9 @@ class MATHS_TIME_SERIES_EXPORT CExpandingWindow {
//! Get the number of bucket values.
std::size_t size() const;

//! Check if there are shorter windows.
bool haveShorterWindows() const;

//! Get the mean time offset of the data points added with respect to the start
//! of the sample interval.
core_t::TTime sampleAverageOffset() const;
Expand Down Expand Up @@ -132,7 +135,7 @@ class MATHS_TIME_SERIES_EXPORT CExpandingWindow {
bool needToCompress(core_t::TTime time) const;

//! Get a checksum for this object.
uint64_t checksum(uint64_t seed = 0) const;
std::uint64_t checksum(std::uint64_t seed = 0) const;

//! Debug the memory used by this object.
void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
Expand Down
1 change: 1 addition & 0 deletions include/maths/time_series/CTimeSeriesDecompositionDetail.h
Expand Up @@ -59,6 +59,7 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionDetail
using TMakeTestForSeasonality =
std::function<CTimeSeriesTestForSeasonality(const CExpandingWindow&,
core_t::TTime,
std::size_t,
const TFilteredPredictor&)>;
// clang-format on

Expand Down
15 changes: 9 additions & 6 deletions include/maths/time_series/CTimeSeriesTestForSeasonality.h
Expand Up @@ -224,14 +224,17 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesTestForSeasonality {
double outlierFraction = OUTLIER_FRACTION);

//! Check if it is possible to test for \p component given the window \p values.
static bool canTestComponent(const TFloatMeanAccumulatorVec& values,
core_t::TTime bucketsStartTime,
core_t::TTime bucketLength,
core_t::TTime minimumPeriod,
const CSeasonalTime& component);
static bool canTestModelledComponent(const TFloatMeanAccumulatorVec& values,
core_t::TTime bucketsStartTime,
core_t::TTime bucketLength,
core_t::TTime minimumPeriod,
std::size_t minimumResolution,
const CSeasonalTime& component);

//! Register a seasonal component which is already being modelled.
void addModelledSeasonality(const CSeasonalTime& period, std::size_t size);
void addModelledSeasonality(const CSeasonalTime& period,
std::size_t minimumResolution,
std::size_t size);

//! Add a predictor for the currently modelled seasonal conponents.
void modelledSeasonalityPredictor(const TPredictor& predictor);
Expand Down
6 changes: 5 additions & 1 deletion lib/maths/time_series/CExpandingWindow.cc
Expand Up @@ -104,6 +104,10 @@ std::size_t CExpandingWindow::size() const {
return m_Size;
}

bool CExpandingWindow::haveShorterWindows() const {
return m_BucketLengthIndex > 0;
}

core_t::TTime CExpandingWindow::sampleAverageOffset() const {
return static_cast<core_t::TTime>(common::CBasicStatistics::mean(m_MeanOffset) + 0.5);
}
Expand Down Expand Up @@ -246,7 +250,7 @@ bool CExpandingWindow::needToCompress(core_t::TTime time) const {
return time >= this->endTime();
}

uint64_t CExpandingWindow::checksum(uint64_t seed) const {
std::uint64_t CExpandingWindow::checksum(std::uint64_t seed) const {
CScopeInflate inflate(*this, false);
seed = common::CChecksum::calculate(seed, m_BucketLengthIndex);
seed = common::CChecksum::calculate(seed, m_StartTime);
Expand Down
125 changes: 72 additions & 53 deletions lib/maths/time_series/CTimeSeriesDecompositionDetail.cc
Expand Up @@ -973,27 +973,35 @@ class CSeasonalityTestParameters {
}

static std::size_t numberBuckets(int window, core_t::TTime bucketLength) {
auto result = windowParameters(window, bucketLength);
return result != nullptr ? result->s_NumberBuckets : 0;
const auto* params = windowParameters(window, bucketLength);
return params != nullptr ? params->s_NumberBuckets : 0;
}

static core_t::TTime maxBucketLength(int window, core_t::TTime bucketLength) {
return bucketLengths(window, bucketLength)
? bucketLengths(window, bucketLength)->back()
: 0;
const auto* bucketLengths_ = bucketLengths(window, bucketLength);
return bucketLengths_ != nullptr ? bucketLengths_->back() : 0;
}

static const TTimeVec* bucketLengths(int window, core_t::TTime bucketLength) {
auto result = windowParameters(window, bucketLength);
return result != nullptr ? &result->s_BucketLengths : nullptr;
const auto* params = windowParameters(window, bucketLength);
return params != nullptr ? &params->s_BucketLengths : nullptr;
}

static const TTimeVec& testSchedule(int window, core_t::TTime bucketLength) {
return windowParameters(window, bucketLength)->s_TestSchedule;
const auto* params = windowParameters(window, bucketLength);
return params != nullptr ? params->s_TestSchedule : EMPTY_TEST_SCHEDULE;
}

static core_t::TTime shortestComponent(int window, core_t::TTime bucketLength) {
return windowParameters(window, bucketLength)->s_ShortestComponent;
const auto* params = windowParameters(window, bucketLength);
return params != nullptr ? params->s_ShortestComponent : 0;
}

static std::size_t minimumResolutionToTestModelledComponent(int window,
core_t::TTime bucketLength,
bool shorterWindowAvailable) {
const auto* params = windowParameters(window, bucketLength);
return params != nullptr && shorterWindowAvailable ? params->s_MinimumResolution : 2;
}

private:
Expand All @@ -1002,16 +1010,18 @@ class CSeasonalityTestParameters {
SParameters(core_t::TTime bucketLength,
core_t::TTime shortestComponent,
std::size_t numberBuckets,
std::size_t minimumResolution,
const std::initializer_list<core_t::TTime>& bucketLengths,
const std::initializer_list<core_t::TTime>& testSchedule)
: s_BucketLength{bucketLength}, s_ShortestComponent{shortestComponent},
s_NumberBuckets{numberBuckets}, s_BucketLengths{bucketLengths}, s_TestSchedule{testSchedule} {
}
s_NumberBuckets{numberBuckets}, s_MinimumResolution{minimumResolution},
s_BucketLengths{bucketLengths}, s_TestSchedule{testSchedule} {}
bool operator<(core_t::TTime rhs) const { return s_BucketLength < rhs; }

core_t::TTime s_BucketLength = 0;
core_t::TTime s_ShortestComponent = 0;
std::size_t s_NumberBuckets = 0;
core_t::TTime s_BucketLength{0};
core_t::TTime s_ShortestComponent{0};
std::size_t s_NumberBuckets{0};
std::size_t s_MinimumResolution{0};
TTimeVec s_BucketLengths;
TTimeVec s_TestSchedule;
};
Expand All @@ -1027,6 +1037,7 @@ class CSeasonalityTestParameters {

private:
static const TParametersVecVec WINDOW_PARAMETERS;
static const TTimeVec EMPTY_TEST_SCHEDULE;
};

// These parameterise the windows used to test for periodic components. From
Expand All @@ -1040,40 +1051,42 @@ class CSeasonalityTestParameters {
// when we'll test for seasonal components.
const CSeasonalityTestParameters::TParametersVecVec CSeasonalityTestParameters::WINDOW_PARAMETERS{
/* SHORT WINDOW */
{{1, 1, 180, {1, 5, 10, 30, 60, 300, 600}, {}},
{5, 1, 180, {5, 10, 30, 60, 300, 600}, {}},
{10, 1, 180, {10, 30, 60, 300, 600}, {}},
{30, 1, 180, {30, 60, 300, 600}, {}},
{60, 1, 336, {60, 300, 900, 3600, 7200}, {3 * 604800}},
{300, 1, 336, {300, 900, 3600, 7200}, {3 * 604800}},
{600, 1, 336, {600, 3600, 7200}, {3 * 604800}},
{900, 1, 336, {900, 3600, 7200}, {3 * 604800}},
{1200, 1, 336, {1200, 3600, 7200}, {3 * 86400, 3 * 604800}},
{1800, 1, 336, {1800, 3600, 7200}, {3 * 86400, 3 * 604800}},
{3600, 1, 336, {3600, 7200}, {3 * 86400, 604800, 3 * 604800}},
{7200, 1, 336, {7200, 14400}, {3 * 86400, 604800, 3 * 604800}},
{14400, 1, 336, {14400}, {604800, 3 * 604800}},
{21600, 1, 224, {21600}, {604800, 3 * 604800}},
{28800, 1, 168, {28800}, {3 * 604800}},
{43200, 1, 112, {43200}, {4 * 604800}},
{86400, 1, 56, {86400}, {}}},
{{1, 1, 180, 10, {1, 5, 10, 30, 60, 300, 600}, {}},
{5, 1, 180, 10, {5, 10, 30, 60, 300, 600}, {}},
{10, 1, 180, 10, {10, 30, 60, 300, 600}, {}},
{30, 1, 180, 10, {30, 60, 300, 600}, {}},
{60, 1, 336, 12, {60, 300, 900, 3600, 7200}, {3 * 604800}},
{300, 1, 336, 12, {300, 900, 3600, 7200}, {3 * 604800}},
{600, 1, 336, 12, {600, 3600, 7200}, {3 * 604800}},
{900, 1, 336, 12, {900, 3600, 7200}, {3 * 604800}},
{1200, 1, 336, 12, {1200, 3600, 7200}, {3 * 86400, 3 * 604800}},
{1800, 1, 336, 12, {1800, 3600, 7200}, {3 * 86400, 3 * 604800}},
{3600, 1, 336, 12, {3600, 7200}, {3 * 86400, 604800, 3 * 604800}},
{7200, 1, 336, 12, {7200, 14400}, {3 * 86400, 604800, 3 * 604800}},
{14400, 1, 336, 6, {14400}, {604800, 3 * 604800}},
{21600, 1, 224, 6, {21600}, {604800, 3 * 604800}},
{28800, 1, 168, 6, {28800}, {3 * 604800}},
{43200, 1, 112, 6, {43200}, {4 * 604800}},
{86400, 1, 56, 6, {86400}, {}}},
/* LONG WINDOW */
{{1, 30601, 336, {900, 3600, 7200}, {3 * 604800}},
{5, 30601, 336, {900, 3600, 7200}, {3 * 604800}},
{10, 30601, 336, {900, 3600, 7200}, {3 * 604800}},
{30, 30601, 336, {900, 3600, 7200}, {3 * 604800}},
{60, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{300, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{600, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{900, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{1200, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{1800, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{3600, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{7200, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{14400, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{86400, 648001, 156, {43200, 86400, 604800}, {104 * 604800}},
{604800, 648001, 156, {43200, 86400, 604800}, {104 * 604800}}}};
}
{{1, 30601, 336, 12, {900, 3600, 7200}, {3 * 604800}},
{5, 30601, 336, 12, {900, 3600, 7200}, {3 * 604800}},
{10, 30601, 336, 12, {900, 3600, 7200}, {3 * 604800}},
{30, 30601, 336, 12, {900, 3600, 7200}, {3 * 604800}},
{60, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{300, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{600, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{900, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{1200, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{1800, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{3600, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{7200, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{14400, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{86400, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}},
{604800, 648001, 156, 6, {43200, 86400, 604800}, {104 * 604800}}}};
}

const TTimeVec CSeasonalityTestParameters::EMPTY_TEST_SCHEDULE;

CTimeSeriesDecompositionDetail::CSeasonalityTest::CSeasonalityTest(double decayRate,
core_t::TTime bucketLength)
Expand Down Expand Up @@ -1203,8 +1216,12 @@ void CTimeSeriesDecompositionDetail::CSeasonalityTest::test(const SAddValue& mes
const auto& window = m_Windows[i];
core_t::TTime minimumPeriod{
CSeasonalityTestParameters::shortestComponent(i, m_BucketLength)};
auto seasonalityTest =
makeTest(*window, minimumPeriod, makePreconditioner());
std::size_t minimumResolutionToTestModelledComponent{
CSeasonalityTestParameters::minimumResolutionToTestModelledComponent(
i, m_BucketLength, window->haveShorterWindows())};
auto seasonalityTest = makeTest(*window, minimumPeriod,
minimumResolutionToTestModelledComponent,
makePreconditioner());
seasonalityTest.fitAndRemoveUntestableModelledComponents();

auto decomposition = seasonalityTest.decompose();
Expand Down Expand Up @@ -1974,16 +1991,17 @@ void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction() {
CTimeSeriesDecompositionDetail::TMakeTestForSeasonality
CTimeSeriesDecompositionDetail::CComponents::makeTestForSeasonality(const TFilteredPredictor& predictor) const {
return [predictor, this](const CExpandingWindow& window, core_t::TTime minimumPeriod,
std::size_t minimumResolutionToTestModelledComponent,
const TFilteredPredictor& preconditioner) {
core_t::TTime valuesStartTime{window.beginValuesTime()};
core_t::TTime windowBucketStartTime{window.bucketStartTime()};
core_t::TTime windowBucketLength{window.bucketLength()};
auto values = window.values();
TBoolVec testableMask;
for (const auto& component : this->seasonal()) {
testableMask.push_back(CTimeSeriesTestForSeasonality::canTestComponent(
values, windowBucketStartTime, windowBucketLength,
minimumPeriod, component.time()));
testableMask.push_back(CTimeSeriesTestForSeasonality::canTestModelledComponent(
values, windowBucketStartTime, windowBucketLength, minimumPeriod,
minimumResolutionToTestModelledComponent, component.time()));
}
values = window.valuesMinusPrediction(std::move(values), [&](core_t::TTime time) {
return preconditioner(time, testableMask);
Expand All @@ -1997,7 +2015,8 @@ CTimeSeriesDecompositionDetail::CComponents::makeTestForSeasonality(const TFilte
.modelledSeasonalityPredictor(predictor);
std::ptrdiff_t maximumNumberComponents{MAXIMUM_COMPONENTS};
for (const auto& component : this->seasonal()) {
test.addModelledSeasonality(component.time(), component.size());
test.addModelledSeasonality(component.time(), minimumResolutionToTestModelledComponent,
component.size());
--maximumNumberComponents;
}
test.maximumNumberOfComponents(maximumNumberComponents);
Expand Down
22 changes: 14 additions & 8 deletions lib/maths/time_series/CTimeSeriesTestForSeasonality.cc
Expand Up @@ -271,23 +271,29 @@ CTimeSeriesTestForSeasonality::CTimeSeriesTestForSeasonality(core_t::TTime value
LOG_TRACE(<< "eps variance = " << m_EpsVariance);
}

bool CTimeSeriesTestForSeasonality::canTestComponent(const TFloatMeanAccumulatorVec& values,
core_t::TTime bucketsStartTime,
core_t::TTime bucketLength,
core_t::TTime minimumPeriod,
const CSeasonalTime& component) {
bool CTimeSeriesTestForSeasonality::canTestModelledComponent(
const TFloatMeanAccumulatorVec& values,
core_t::TTime bucketsStartTime,
core_t::TTime bucketLength,
core_t::TTime minimumPeriod,
std::size_t minimumResolution,
const CSeasonalTime& component) {
std::size_t minimumPeriodInBuckets{
std::max(buckets(bucketLength, minimumPeriod), minimumResolution)};
return 10 * (component.period() % bucketLength) < component.period() &&
canTestPeriod(values, buckets(bucketLength, minimumPeriod),
canTestPeriod(values, minimumPeriodInBuckets,
toPeriod(bucketsStartTime, bucketLength, component));
}

void CTimeSeriesTestForSeasonality::addModelledSeasonality(const CSeasonalTime& component,
std::size_t minimumResolution,
std::size_t size) {
auto period = toPeriod(m_BucketsStartTime, m_BucketLength, component);
m_ModelledPeriods.push_back(period);
m_ModelledPeriodsSizes.push_back(size);
m_ModelledPeriodsTestable.push_back(canTestComponent(
m_Values, m_BucketsStartTime, m_BucketLength, m_MinimumPeriod, component));
m_ModelledPeriodsTestable.push_back(
canTestModelledComponent(m_Values, m_BucketsStartTime, m_BucketLength,
m_MinimumPeriod, minimumResolution, component));
if (period.windowed()) {
m_StartOfWeekOverride = period.s_StartOfWeek;
// We need the actual time in case it isn't a multiple of the bucket length
Expand Down

0 comments on commit e3d3d04

Please sign in to comment.