Asynchronous cache misses in flight

The number of asynchronous cache-misses in-flight affects performance depending on size of cache, latency of backing-store algorithm and number of cache-hits. Here is a simple benchmark that tests the scaling on number of cache misses in-flight:

allocates cache that is big enough for 100x100 pixels of image
continuously increases size of image until 105x105
hides latencies of cache-misses to keep re-computation times at a minimum

"use strict";

let Lru = require("./lrucache.js").Lru;


let cache = new Lru(10000 /* enough to hold 100x100 pixels */, async function(key,callback){

    	// cache-miss data-load algorithm
	// Mandelbrot Set Generator

 	let xy=key.split(",");
	const MAX_ITERATION = 100
		
	    let z = { x: 0, y: 0 }, n = 0, p, d;
	    do {
		p = {
		    x: Math.pow(z.x, 2) - Math.pow(z.y, 2),
		    y: 2 * z.x * z.y
		};
		z = {
		    x: p.x + (xy[0]-200)/200,
		    y: p.y + (xy[1]-200)/200
		};
		d = Math.sqrt(Math.pow(z.x, 2) + Math.pow(z.y, 2));
		n += 1;
	    } while (d <= 2 && n < MAX_ITERATION);
	    
	
     	callback(n);

},100000000 /* cache element lifetime milliseconds */);


// avoiding callback-hell when multiple values are needed at once (all values are gathered asynchronously and joined as a result array)
let N=1;
function test()
{
	let ctr = 0;
	let t = Date.now();
	for(let x=0;x<N;x++)
	{
		for(let y=0;y<N;y++)
		{
			cache.getMultiple(function(results){						
				let smoothedPixel = results.reduce((a,b)=>{return a+b;}) /5.0;
				ctr++;
				if(ctr == N*N)
				{
					console.log("benchmark for image-smoothing for "+N+"x"+N+" sized image:");
					console.log((Date.now()-t)+" milliseconds for "+(N*N*5)+" pixel accesses ==> "+( 1000 * (Date.now()-t) / (N*N*5) )+" nanoseconds per pixel");
					console.log("  ");
					if(N<105){ N++; test(); }
				}
			},x+","+y, (x+1)+","+(y+1), (x-1)+","+(y-1), (x+1)+","+(y-1), (x-1)+","+(y+1));
		}
	}
}

test();

Output starts like this:

benchmark for image-smoothing for 1x1 sized image:
11 milliseconds for 5 pixel accesses ==> 2200 nanoseconds per pixel
  
benchmark for image-smoothing for 2x2 sized image:
12 milliseconds for 20 pixel accesses ==> 600 nanoseconds per pixel
  
benchmark for image-smoothing for 3x3 sized image:
3 milliseconds for 45 pixel accesses ==> 66.66666666666667 nanoseconds per pixel

and ends like this:

benchmark for image-smoothing for 97x97 sized image:
63 milliseconds for 47045 pixel accesses ==> 1.3391433733659261 nanoseconds per pixel
  
benchmark for image-smoothing for 98x98 sized image:
64 milliseconds for 48020 pixel accesses ==> 1.3327780091628487 nanoseconds per pixel
  
benchmark for image-smoothing for 99x99 sized image:
80 milliseconds for 49005 pixel accesses ==> 1.6324864809713295 nanoseconds per pixel
  
benchmark for image-smoothing for 100x100 sized image:
85 milliseconds for 50000 pixel accesses ==> 1.7 nanoseconds per pixel
  
benchmark for image-smoothing for 101x101 sized image:
93 milliseconds for 51005 pixel accesses ==> 1.823350651896873 nanoseconds per pixel
  
benchmark for image-smoothing for 102x102 sized image:
107 milliseconds for 52020 pixel accesses ==> 2.0569011918492888 nanoseconds per pixel
  
benchmark for image-smoothing for 103x103 sized image:
128 milliseconds for 53045 pixel accesses ==> 2.413045527382411 nanoseconds per pixel
  
benchmark for image-smoothing for 104x104 sized image:
123 milliseconds for 54080 pixel accesses ==> 2.274408284023669 nanoseconds per pixel
  
benchmark for image-smoothing for 105x105 sized image:
136 milliseconds for 55125 pixel accesses ==> 2.4671201814058956 nanoseconds per pixel

there is a sudden increase of timings at 100x100 as its equal to size of cache. So at 100x100 cache starts handling cache-miss operations but the operation per item is a computation which can not be latency-hidden so the timing keeps increasing.

To really hide latencies, the operations must be truly asynchronous within themselves. To simulate this, the algorithm can be replaced with a simple setTimeout with 150-millisecond waiting:

"use strict";

let Lru = require("./lrucache.js").Lru;


let cache = new Lru(10000 /* enough to hold 100x100 pixels */, async function(key,callback){

    	// cache-miss data-load algorithm
	let randomResult = 5;
	setTimeout(function(){
	     	callback(randomResult);
	},150);

},100000000 /* cache element lifetime milliseconds */);


// avoiding callback-hell when multiple values are needed at once (all values are gathered asynchronously and joined as a result array)
let N=1;
function test()
{
	let ctr = 0;
	let t = Date.now();
	for(let x=0;x<N;x++)
	{
		for(let y=0;y<N;y++)
		{
			cache.getMultiple(function(results){						
				let smoothedPixel = results.reduce((a,b)=>{return a+b;}) /5.0;
				ctr++;
				if(ctr == N*N)
				{
					console.log("benchmark for image-smoothing for "+N+"x"+N+" sized image:");
					console.log((Date.now()-t)+" milliseconds for "+(N*N*5)+" pixel accesses ==> "+( 1000 * (Date.now()-t) / (N*N*5) )+" nanoseconds per pixel");
					console.log("  ");
					if(N<105){ N++; test(); }
				}
			},x+","+y, (x+1)+","+(y+1), (x-1)+","+(y-1), (x+1)+","+(y-1), (x-1)+","+(y+1));
		}
	}
}

test();

output:

benchmark for image-smoothing for 1x1 sized image:
163 milliseconds for 5 pixel accesses ==> 32600 nanoseconds per pixel
  
benchmark for image-smoothing for 2x2 sized image:
151 milliseconds for 20 pixel accesses ==> 7550 nanoseconds per pixel
  
benchmark for image-smoothing for 3x3 sized image:
152 milliseconds for 45 pixel accesses ==> 3377.777777777778 nanoseconds per pixel
  
benchmark for image-smoothing for 4x4 sized image:
152 milliseconds for 80 pixel accesses ==> 1900 nanoseconds per pixel
  
benchmark for image-smoothing for 5x5 sized image:
153 milliseconds for 125 pixel accesses ==> 1224 nanoseconds per pixel
...
...
...
benchmark for image-smoothing for 98x98 sized image:
220 milliseconds for 48020 pixel accesses ==> 4.581424406497293 nanoseconds per pixel
  
benchmark for image-smoothing for 99x99 sized image:
223 milliseconds for 49005 pixel accesses ==> 4.550556065707581 nanoseconds per pixel
  
benchmark for image-smoothing for 100x100 sized image:
227 milliseconds for 50000 pixel accesses ==> 4.54 nanoseconds per pixel
  
benchmark for image-smoothing for 101x101 sized image:
228 milliseconds for 51005 pixel accesses ==> 4.470149985295559 nanoseconds per pixel
  
benchmark for image-smoothing for 102x102 sized image:
232 milliseconds for 52020 pixel accesses ==> 4.459823144944252 nanoseconds per pixel
  
benchmark for image-smoothing for 103x103 sized image:
233 milliseconds for 53045 pixel accesses ==> 4.392496936563295 nanoseconds per pixel
  
benchmark for image-smoothing for 104x104 sized image:
239 milliseconds for 54080 pixel accesses ==> 4.419378698224852 nanoseconds per pixel
  
benchmark for image-smoothing for 105x105 sized image:
239 milliseconds for 55125 pixel accesses ==> 4.3356009070294785 nanoseconds per pixel

There is no performance drop at all, even at 105x105, as all the cache-misses are hidden behind all item accesses even the cache-hits. For this particular example, 50k cache-hits effectively hided all of the 5125 cache-misses and even if they can't those 5125 misses they can still hide each other (1x extra latency instead of 5125x extra latency).

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Asynchronous cache misses in flight

Clone this wiki locally