forked from redis/redis
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
718 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,364 @@ | ||
/* Maxmemory directive handling (LRU eviction and other policies). | ||
* | ||
* ---------------------------------------------------------------------------- | ||
* | ||
* Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com> | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* * Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* * Redistributions in binary form must reproduce the above copyright | ||
* notice, this list of conditions and the following disclaimer in the | ||
* documentation and/or other materials provided with the distribution. | ||
* * Neither the name of Redis nor the names of its contributors may be used | ||
* to endorse or promote products derived from this software without | ||
* specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
#include "server.h" | ||
#include "bio.h" | ||
|
||
/* Return the LRU clock, based on the clock resolution. This is a time | ||
* in a reduced-bits format that can be used to set and check the | ||
* object->lru field of redisObject structures. */ | ||
unsigned int getLRUClock(void) { | ||
return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX; | ||
} | ||
|
||
/* Given an object returns the min number of milliseconds the object was never | ||
* requested, using an approximated LRU algorithm. */ | ||
unsigned long long estimateObjectIdleTime(robj *o) { | ||
unsigned long long lruclock = LRU_CLOCK(); | ||
if (lruclock >= o->lru) { | ||
return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION; | ||
} else { | ||
return (lruclock + (LRU_CLOCK_MAX - o->lru)) * | ||
LRU_CLOCK_RESOLUTION; | ||
} | ||
} | ||
|
||
/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config | ||
* file to limit the max memory used by the server, before processing a | ||
* command. | ||
* | ||
* The goal of the function is to free enough memory to keep Redis under the | ||
* configured memory limit. | ||
* | ||
* The function starts calculating how many bytes should be freed to keep | ||
* Redis under the limit, and enters a loop selecting the best keys to | ||
* evict accordingly to the configured policy. | ||
* | ||
* If all the bytes needed to return back under the limit were freed the | ||
* function returns C_OK, otherwise C_ERR is returned, and the caller | ||
* should block the execution of commands that will result in more memory | ||
* used by the server. | ||
* | ||
* ------------------------------------------------------------------------ | ||
* | ||
* LRU approximation algorithm | ||
* | ||
* Redis uses an approximation of the LRU algorithm that runs in constant | ||
* memory. Every time there is a key to expire, we sample N keys (with | ||
* N very small, usually in around 5) to populate a pool of best keys to | ||
* evict of M keys (the pool size is defined by MAXMEMORY_EVICTION_POOL_SIZE). | ||
* | ||
* The N keys sampled are added in the pool of good keys to expire (the one | ||
* with an old access time) if they are better than one of the current keys | ||
* in the pool. | ||
* | ||
* After the pool is populated, the best key we have in the pool is expired. | ||
* However note that we don't remove keys from the pool when they are deleted | ||
* so the pool may contain keys that no longer exist. | ||
* | ||
* When we try to evict a key, and all the entries in the pool don't exist | ||
* we populate it again. This time we'll be sure that the pool has at least | ||
* one key that can be evicted, if there is at least one key that can be | ||
* evicted in the whole database. */ | ||
|
||
/* Create a new eviction pool. */ | ||
struct evictionPoolEntry *evictionPoolAlloc(void) { | ||
struct evictionPoolEntry *ep; | ||
int j; | ||
|
||
ep = zmalloc(sizeof(*ep)*MAXMEMORY_EVICTION_POOL_SIZE); | ||
for (j = 0; j < MAXMEMORY_EVICTION_POOL_SIZE; j++) { | ||
ep[j].idle = 0; | ||
ep[j].key = NULL; | ||
} | ||
return ep; | ||
} | ||
|
||
/* This is an helper function for freeMemoryIfNeeded(), it is used in order | ||
* to populate the evictionPool with a few entries every time we want to | ||
* expire a key. Keys with idle time smaller than one of the current | ||
* keys are added. Keys are always added if there are free entries. | ||
* | ||
* We insert keys on place in ascending order, so keys with the smaller | ||
* idle time are on the left, and keys with the higher idle time on the | ||
* right. */ | ||
|
||
#define EVICTION_SAMPLES_ARRAY_SIZE 16 | ||
void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) { | ||
int j, k, count; | ||
dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE]; | ||
dictEntry **samples; | ||
|
||
/* Try to use a static buffer: this function is a big hit... | ||
* Note: it was actually measured that this helps. */ | ||
if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) { | ||
samples = _samples; | ||
} else { | ||
samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples); | ||
} | ||
|
||
count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples); | ||
for (j = 0; j < count; j++) { | ||
unsigned long long idle; | ||
sds key; | ||
robj *o; | ||
dictEntry *de; | ||
|
||
de = samples[j]; | ||
key = dictGetKey(de); | ||
/* If the dictionary we are sampling from is not the main | ||
* dictionary (but the expires one) we need to lookup the key | ||
* again in the key dictionary to obtain the value object. */ | ||
if (sampledict != keydict) de = dictFind(keydict, key); | ||
o = dictGetVal(de); | ||
idle = estimateObjectIdleTime(o); | ||
|
||
/* Insert the element inside the pool. | ||
* First, find the first empty bucket or the first populated | ||
* bucket that has an idle time smaller than our idle time. */ | ||
k = 0; | ||
while (k < MAXMEMORY_EVICTION_POOL_SIZE && | ||
pool[k].key && | ||
pool[k].idle < idle) k++; | ||
if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) { | ||
/* Can't insert if the element is < the worst element we have | ||
* and there are no empty buckets. */ | ||
continue; | ||
} else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) { | ||
/* Inserting into empty position. No setup needed before insert. */ | ||
} else { | ||
/* Inserting in the middle. Now k points to the first element | ||
* greater than the element to insert. */ | ||
if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) { | ||
/* Free space on the right? Insert at k shifting | ||
* all the elements from k to end to the right. */ | ||
memmove(pool+k+1,pool+k, | ||
sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); | ||
} else { | ||
/* No free space on right? Insert at k-1 */ | ||
k--; | ||
/* Shift all elements on the left of k (included) to the | ||
* left, so we discard the element with smaller idle time. */ | ||
sdsfree(pool[0].key); | ||
memmove(pool,pool+1,sizeof(pool[0])*k); | ||
} | ||
} | ||
pool[k].key = sdsdup(key); | ||
pool[k].idle = idle; | ||
} | ||
if (samples != _samples) zfree(samples); | ||
} | ||
|
||
int freeMemoryIfNeeded(void) { | ||
size_t mem_reported, mem_used, mem_tofree, mem_freed; | ||
int slaves = listLength(server.slaves); | ||
mstime_t latency, eviction_latency; | ||
long long delta; | ||
|
||
/* Check if we are over the memory usage limit. If we are not, no need | ||
* to subtract the slaves output buffers. We can just return ASAP. */ | ||
mem_reported = zmalloc_used_memory(); | ||
if (mem_reported <= server.maxmemory) return C_OK; | ||
|
||
/* Remove the size of slaves output buffers and AOF buffer from the | ||
* count of used memory. */ | ||
mem_used = mem_reported; | ||
if (slaves) { | ||
listIter li; | ||
listNode *ln; | ||
|
||
listRewind(server.slaves,&li); | ||
while((ln = listNext(&li))) { | ||
client *slave = listNodeValue(ln); | ||
unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave); | ||
if (obuf_bytes > mem_used) | ||
mem_used = 0; | ||
else | ||
mem_used -= obuf_bytes; | ||
} | ||
} | ||
if (server.aof_state != AOF_OFF) { | ||
mem_used -= sdslen(server.aof_buf); | ||
mem_used -= aofRewriteBufferSize(); | ||
} | ||
|
||
/* Check if we are still over the memory limit. */ | ||
if (mem_used <= server.maxmemory) return C_OK; | ||
|
||
/* Compute how much memory we need to free. */ | ||
mem_tofree = mem_used - server.maxmemory; | ||
mem_freed = 0; | ||
|
||
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) | ||
goto cant_free; /* We need to free memory, but policy forbids. */ | ||
|
||
latencyStartMonitor(latency); | ||
while (mem_freed < mem_tofree) { | ||
int j, k, keys_freed = 0; | ||
|
||
for (j = 0; j < server.dbnum; j++) { | ||
long bestval = 0; /* just to prevent warning */ | ||
sds bestkey = NULL; | ||
dictEntry *de; | ||
redisDb *db = server.db+j; | ||
dict *dict; | ||
|
||
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || | ||
server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) | ||
{ | ||
dict = server.db[j].dict; | ||
} else { | ||
dict = server.db[j].expires; | ||
} | ||
if (dictSize(dict) == 0) continue; | ||
|
||
/* volatile-random and allkeys-random policy */ | ||
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || | ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) | ||
{ | ||
de = dictGetRandomKey(dict); | ||
bestkey = dictGetKey(de); | ||
} | ||
|
||
/* volatile-lru and allkeys-lru policy */ | ||
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || | ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) | ||
{ | ||
struct evictionPoolEntry *pool = db->eviction_pool; | ||
|
||
while(bestkey == NULL) { | ||
evictionPoolPopulate(dict, db->dict, db->eviction_pool); | ||
/* Go backward from best to worst element to evict. */ | ||
for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) { | ||
if (pool[k].key == NULL) continue; | ||
de = dictFind(dict,pool[k].key); | ||
|
||
/* Remove the entry from the pool. */ | ||
sdsfree(pool[k].key); | ||
/* Shift all elements on its right to left. */ | ||
memmove(pool+k,pool+k+1, | ||
sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); | ||
/* Clear the element on the right which is empty | ||
* since we shifted one position to the left. */ | ||
pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL; | ||
pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0; | ||
|
||
/* If the key exists, is our pick. Otherwise it is | ||
* a ghost and we need to try the next element. */ | ||
if (de) { | ||
bestkey = dictGetKey(de); | ||
break; | ||
} else { | ||
/* Ghost... */ | ||
continue; | ||
} | ||
} | ||
} | ||
} | ||
|
||
/* volatile-ttl */ | ||
else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { | ||
for (k = 0; k < server.maxmemory_samples; k++) { | ||
sds thiskey; | ||
long thisval; | ||
|
||
de = dictGetRandomKey(dict); | ||
thiskey = dictGetKey(de); | ||
thisval = (long) dictGetVal(de); | ||
|
||
/* Expire sooner (minor expire unix timestamp) is better | ||
* candidate for deletion */ | ||
if (bestkey == NULL || thisval < bestval) { | ||
bestkey = thiskey; | ||
bestval = thisval; | ||
} | ||
} | ||
} | ||
|
||
/* Finally remove the selected key. */ | ||
if (bestkey) { | ||
robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); | ||
propagateExpire(db,keyobj,server.lazyfree_lazy_eviction); | ||
/* We compute the amount of memory freed by db*Delete() alone. | ||
* It is possible that actually the memory needed to propagate | ||
* the DEL in AOF and replication link is greater than the one | ||
* we are freeing removing the key, but we can't account for | ||
* that otherwise we would never exit the loop. | ||
* | ||
* AOF and Output buffer memory will be freed eventually so | ||
* we only care about memory used by the key space. */ | ||
delta = (long long) zmalloc_used_memory(); | ||
latencyStartMonitor(eviction_latency); | ||
if (server.lazyfree_lazy_eviction) | ||
dbAsyncDelete(db,keyobj); | ||
else | ||
dbSyncDelete(db,keyobj); | ||
latencyEndMonitor(eviction_latency); | ||
latencyAddSampleIfNeeded("eviction-del",eviction_latency); | ||
latencyRemoveNestedEvent(latency,eviction_latency); | ||
delta -= (long long) zmalloc_used_memory(); | ||
mem_freed += delta; | ||
server.stat_evictedkeys++; | ||
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", | ||
keyobj, db->id); | ||
decrRefCount(keyobj); | ||
keys_freed++; | ||
|
||
/* When the memory to free starts to be big enough, we may | ||
* start spending so much time here that is impossible to | ||
* deliver data to the slaves fast enough, so we force the | ||
* transmission here inside the loop. */ | ||
if (slaves) flushSlavesOutputBuffers(); | ||
} | ||
} | ||
if (!keys_freed) { | ||
latencyEndMonitor(latency); | ||
latencyAddSampleIfNeeded("eviction-cycle",latency); | ||
goto cant_free; /* nothing to free... */ | ||
} | ||
} | ||
latencyEndMonitor(latency); | ||
latencyAddSampleIfNeeded("eviction-cycle",latency); | ||
return C_OK; | ||
|
||
cant_free: | ||
/* We are here if we are not able to reclaim memory. There is only one | ||
* last thing we can try: check if the lazyfree thread has jobs in queue | ||
* and wait... */ | ||
while(bioPendingJobsOfType(BIO_LAZY_FREE)) { | ||
if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree) | ||
break; | ||
usleep(1000); | ||
} | ||
return C_ERR; | ||
} | ||
|
Oops, something went wrong.