1
1
// TODO include the event._id (and define a JSON encoding for ObjectId?)
2
2
// TODO allow the event time to change when updating (fix invalidation)
3
- // TODO fix race condition between cache invalidation and metric computation
4
3
5
4
var mongodb = require ( "mongodb" ) ,
6
5
parser = require ( "./event-expression" ) ,
7
6
tiers = require ( "./tiers" ) ,
8
- types = require ( "./types" ) ;
7
+ types = require ( "./types" ) ,
8
+ bisect = require ( "./bisect" ) ;
9
9
10
10
var type_re = / ^ [ a - z ] [ a - z A - Z 0 - 9 _ ] + $ / ,
11
11
invalidate = { $set : { i : true } } ,
@@ -18,10 +18,14 @@ var type_re = /^[a-z][a-zA-Z0-9_]+$/,
18
18
var streamDelayDefault = 5000 ,
19
19
streamInterval = 1000 ;
20
20
21
+ // How frequently to invalidate metrics after receiving events.
22
+ var invalidateInterval = 5000 ;
23
+
21
24
exports . putter = function ( db ) {
22
25
var collection = types ( db ) ,
23
26
knownByType = { } ,
24
- eventsToSaveByType = { } ;
27
+ eventsToSaveByType = { } ,
28
+ timesToInvalidateByTierByType = { } ;
25
29
26
30
function putter ( request , callback ) {
27
31
var time = new Date ( request . time ) ,
@@ -78,20 +82,59 @@ exports.putter = function(db) {
78
82
} ) ;
79
83
}
80
84
81
- // Save the event of the specified type, and invalidate any cached metrics
82
- // associated with this event type and time.
85
+ // Save the event of the specified type, and queue invalidation of any cached
86
+ // metrics associated with this event type and time.
87
+ //
88
+ // We don't invalidate the events immediately. This would cause many redundant
89
+ // updates when many events are received simultaneously. Also, having a short
90
+ // delay between saving the event and invalidating the metrics reduces the
91
+ // likelihood of a race condition between when the events are read by the
92
+ // evaluator and when the newly-computed metrics are saved.
83
93
function save ( type , event ) {
84
- type = collection ( type ) ;
85
- type . events . save ( event , handle ) ;
86
- for ( var tier in tiers ) {
87
- type . metrics . update ( {
88
- i : false ,
89
- "_id.l" : + tier ,
90
- "_id.t" : tiers [ tier ] . floor ( event . t )
91
- } , invalidate , handle ) ;
94
+ collection ( type ) . events . save ( event , handle ) ;
95
+ queueInvalidation ( type , event ) ;
96
+ }
97
+
98
+ // Schedule deferred invalidation of metrics for this type.
99
+ // For each type and tier, track the metric times to invalidate.
100
+ // The times are kept in sorted order for bisection.
101
+ function queueInvalidation ( type , event ) {
102
+ var timesToInvalidateByTier = timesToInvalidateByTierByType [ type ] ,
103
+ time = event . t ;
104
+ if ( timesToInvalidateByTier ) {
105
+ for ( var tier in tiers ) {
106
+ var tierTimes = timesToInvalidateByTier [ tier ] ,
107
+ tierTime = tiers [ tier ] . floor ( time ) ,
108
+ i = bisect ( tierTimes , tierTime ) ;
109
+ if ( tierTimes [ i ] > tierTime ) tierTimes . splice ( i , 0 , tierTime ) ;
110
+ }
111
+ } else {
112
+ timesToInvalidateByTier = timesToInvalidateByTierByType [ type ] = { } ;
113
+ for ( var tier in tiers ) {
114
+ timesToInvalidateByTier [ tier ] = [ tiers [ tier ] . floor ( time ) ] ;
115
+ }
92
116
}
93
117
}
94
118
119
+ // Process any deferred metric invalidations, flushing the queues. Note that
120
+ // the queue (timesToInvalidateByTierByType) is copied-on-write, so while the
121
+ // previous batch of events are being invalidated, new events can arrive.
122
+ setInterval ( function ( ) {
123
+ for ( var type in timesToInvalidateByTierByType ) {
124
+ var metrics = collection ( type ) . metrics ,
125
+ timesToInvalidateByTier = timesToInvalidateByTierByType [ type ] ;
126
+ for ( var tier in tiers ) {
127
+ metrics . update ( {
128
+ i : false ,
129
+ "_id.l" : + tier ,
130
+ "_id.t" : { $in : timesToInvalidateByTier [ tier ] }
131
+ } , invalidate , multi ) ;
132
+ }
133
+ flushed = true ;
134
+ }
135
+ timesToInvalidateByTierByType = { } ; // copy-on-write
136
+ } , invalidateInterval ) ;
137
+
95
138
return putter ;
96
139
} ;
97
140
0 commit comments