这里主要是讲述activeExpireCycle的实现,不是非常核心的内容。
- /* Helper function for the activeExpireCycle() function.
- * This function will try to expire the key that is stored in the hash table
- * entry 'de' of the 'expires' hash table of a Redis database.
- *
- * If the key is found to be expired, it is removed from the database and
- * 1 is returned. Otherwise no operation is performed and 0 is returned.
- *
- * When a key is expired, server.stat_expiredkeys is incremented.
- *
- * The parameter 'now' is the current time in milliseconds as is passed
- * to the function to avoid too many gettimeofday() syscalls. */
- int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
- long long t = dictGetSignedIntegerVal(de);
- if (now > t) {
- sds key = dictGetKey(de);
- robj *keyobj = createStringObject(key,sdslen(key));
- deleteExpiredKeyAndPropagate(db,keyobj);
- decrRefCount(keyobj);
- return 1;
- } else {
- return 0;
- }
- }
-
- /* Try to expire a few timed out keys. The algorithm used is adaptive and
- * will use few CPU cycles if there are few expiring keys, otherwise
- * it will get more aggressive to avoid that too much memory is used by
- * keys that can be removed from the keyspace.
- *
- * Every expire cycle tests multiple databases: the next call will start
- * again from the next db. No more than CRON_DBS_PER_CALL databases are
- * tested at every iteration.
- *
- * The function can perform more or less work, depending on the "type"
- * argument. It can execute a "fast cycle" or a "slow cycle". The slow
- * cycle is the main way we collect expired cycles: this happens with
- * the "server.hz" frequency (usually 10 hertz).
- *
- * However the slow cycle can exit for timeout, since it used too much time.
- * For this reason the function is also invoked to perform a fast cycle
- * at every event loop cycle, in the beforeSleep() function. The fast cycle
- * will try to perform less work, but will do it much more often.
- *
- * The following are the details of the two expire cycles and their stop
- * conditions:
- *
- * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
- * "fast" expire cycle that takes no longer than ACTIVE_EXPIRE_CYCLE_FAST_DURATION
- * microseconds, and is not repeated again before the same amount of time.
- * The cycle will also refuse to run at all if the latest slow cycle did not
- * terminate because of a time limit condition.
- *
- * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
- * executed, where the time limit is a percentage of the REDIS_HZ period
- * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. In the
- * fast cycle, the check of every database is interrupted once the number
- * of already expired keys in the database is estimated to be lower than
- * a given percentage, in order to avoid doing too much work to gain too
- * little memory.
- *
- * The configured expire "effort" will modify the baseline parameters in
- * order to do more work in both the fast and slow expire cycles.
- */
-
- #define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */
- #define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */
- #define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */
- #define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which
- we do extra efforts. */
-
- void activeExpireCycle(int type) {
- /* Adjust the running parameters according to the configured expire
- * effort. The default effort is 1, and the maximum configurable effort
- * is 10. */
- unsigned long
- effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */
- config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP +
- ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4*effort,
- config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION +
- ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
- config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC +
- 2*effort,
- config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE-
- effort;
-
- /* This function has some global state in order to continue the work
- * incrementally across calls. */
- static unsigned int current_db = 0; /* Next DB to test. */
- static int timelimit_exit = 0; /* Time limit hit in previous call? */
- static long long last_fast_cycle = 0; /* When last fast cycle ran. */
-
- int j, iteration = 0;
- int dbs_per_call = CRON_DBS_PER_CALL;
- long long start = ustime(), timelimit, elapsed;
-
- /* When clients are paused the dataset should be static not just from the
- * POV of clients not being able to write, but also from the POV of
- * expires and evictions of keys not being performed. */
- if (checkClientPauseTimeoutAndReturnIfPaused()) return;
-
- if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
- /* Don't start a fast cycle if the previous cycle did not exit
- * for time limit, unless the percentage of estimated stale keys is
- * too high. Also never repeat a fast cycle for the same period
- * as the fast cycle total duration itself. */
- if (!timelimit_exit &&
- server.stat_expired_stale_perc < config_cycle_acceptable_stale)
- return;
-
- if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
- return;
-
- last_fast_cycle = start;
- }
-
- /* We usually should test CRON_DBS_PER_CALL per iteration, with
- * two exceptions:
- *
- * 1) Don't test more DBs than we have.
- * 2) If last time we hit the time limit, we want to scan all DBs
- * in this iteration, as there is work to do in some DB and we don't want
- * expired keys to use memory for too much time. */
- if (dbs_per_call > server.dbnum || timelimit_exit)
- dbs_per_call = server.dbnum;
-
- /* We can use at max 'config_cycle_slow_time_perc' percentage of CPU
- * time per iteration. Since this function gets called with a frequency of
- * server.hz times per second, the following is the max amount of
- * microseconds we can spend in this function. */
- timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
- timelimit_exit = 0;
- if (timelimit <= 0) timelimit = 1;
-
- if (type == ACTIVE_EXPIRE_CYCLE_FAST)
- timelimit = config_cycle_fast_duration; /* in microseconds. */
-
- /* Accumulate some global stats as we expire keys, to have some idea
- * about the number of keys that are already logically expired, but still
- * existing inside the database. */
- long total_sampled = 0;
- long total_expired = 0;
-
- for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
- /* Expired and checked in a single loop. */
- unsigned long expired, sampled;
-
- redisDb *db = server.db+(current_db % server.dbnum);
-
- /* Increment the DB now so we are sure if we run out of time
- * in the current DB we'll restart from the next. This allows to
- * distribute the time evenly across DBs. */
- current_db++;
-
- /* Continue to expire if at the end of the cycle there are still
- * a big percentage of keys to expire, compared to the number of keys
- * we scanned. The percentage, stored in config_cycle_acceptable_stale
- * is not fixed, but depends on the Redis configured "expire effort". */
- do {
- unsigned long num, slots;
- long long now, ttl_sum;
- int ttl_samples;
- iteration++;
-
- /* If there is nothing to expire try next DB ASAP. */
- if ((num = dictSize(db->expires)) == 0) {
- db->avg_ttl = 0;
- break;
- }
- slots = dictSlots(db->expires);
- now = mstime();
-
- /* When there are less than 1% filled slots, sampling the key
- * space is expensive, so stop here waiting for better times...
- * The dictionary will be resized asap. */
- if (slots > DICT_HT_INITIAL_SIZE &&
- (num*100/slots < 1)) break;
-
- /* The main collection cycle. Sample random keys among keys
- * with an expire set, checking for expired ones. */
- expired = 0;
- sampled = 0;
- ttl_sum = 0;
- ttl_samples = 0;
-
- if (num > config_keys_per_loop)
- num = config_keys_per_loop;
-
- /* Here we access the low level representation of the hash table
- * for speed concerns: this makes this code coupled with dict.c,
- * but it hardly changed in ten years.
- *
- * Note that certain places of the hash table may be empty,
- * so we want also a stop condition about the number of
- * buckets that we scanned. However scanning for free buckets
- * is very fast: we are in the cache line scanning a sequential
- * array of NULL pointers, so we can scan a lot more buckets
- * than keys in the same time. */
- long max_buckets = num*20;
- long checked_buckets = 0;
-
- while (sampled < num && checked_buckets < max_buckets) {
- for (int table = 0; table < 2; table++) {
- if (table == 1 && !dictIsRehashing(db->expires)) break;
-
- unsigned long idx = db->expires_cursor;
- idx &= db->expires->ht[table].sizemask;
- dictEntry *de = db->expires->ht[table].table[idx];
- long long ttl;
-
- /* Scan the current bucket of the current table. */
- checked_buckets++;
- while(de) {
- /* Get the next entry now since this entry may get
- * deleted. */
- dictEntry *e = de;
- de = de->next;
-
- ttl = dictGetSignedIntegerVal(e)-now;
- if (activeExpireCycleTryExpire(db,e,now)) expired++;
- if (ttl > 0) {
- /* We want the average TTL of keys yet
- * not expired. */
- ttl_sum += ttl;
- ttl_samples++;
- }
- sampled++;
- }
- }
- db->expires_cursor++;
- }
- total_expired += expired;
- total_sampled += sampled;
-
- /* Update the average TTL stats for this database. */
- if (ttl_samples) {
- long long avg_ttl = ttl_sum/ttl_samples;
-
- /* Do a simple running average with a few samples.
- * We just use the current estimate with a weight of 2%
- * and the previous estimate with a weight of 98%. */
- if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
- db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
- }
-
- /* We can't block forever here even if there are many keys to
- * expire. So after a given amount of milliseconds return to the
- * caller waiting for the other active expire cycle. */
- if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
- elapsed = ustime()-start;
- if (elapsed > timelimit) {
- timelimit_exit = 1;
- server.stat_expired_time_cap_reached_count++;
- break;
- }
- }
- /* We don't repeat the cycle for the current database if there are
- * an acceptable amount of stale keys (logically expired but yet
- * not reclaimed). */
- } while (sampled == 0 ||
- (expired*100/sampled) > config_cycle_acceptable_stale);
- }
-
- elapsed = ustime()-start;
- server.stat_expire_cycle_time_used += elapsed;
- latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
-
- /* Update our estimate of keys existing but yet to be expired.
- * Running average with this sample accounting for 5%. */
- double current_perc;
- if (total_sampled) {
- current_perc = (double)total_expired/total_sampled;
- } else
- current_perc = 0;
- server.stat_expired_stale_perc = (current_perc*0.05)+
- (server.stat_expired_stale_perc*0.95);
- }
-