关键词搜索

源码搜索 ×
×

漫话Redis源码之五十七

发布2022-01-09浏览470次

详情内容

这个文件主要讲述一些宏和哈希函数

关于宏,其实不用细看。

关于哈希函数,看懂用途即可。其转换的细节不用看,不然肯定受不了。

  1. /* The cached cardinality MSB is used to signal validity of the cached value. */
  2. #define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[7] |= (1<<7)
  3. #define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1<<7)) == 0)
  4. #define HLL_P 14 /* The greater is P, the smaller the error. */
  5. #define HLL_Q (64-HLL_P) /* The number of bits of the hash value used for
  6. determining the number of leading zeros. */
  7. #define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
  8. #define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
  9. #define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */
  10. #define HLL_REGISTER_MAX ((1<<HLL_BITS)-1)
  11. #define HLL_HDR_SIZE sizeof(struct hllhdr)
  12. #define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8))
  13. #define HLL_DENSE 0 /* Dense encoding. */
  14. #define HLL_SPARSE 1 /* Sparse encoding. */
  15. #define HLL_RAW 255 /* Only used internally, never exposed. */
  16. #define HLL_MAX_ENCODING 1
  17. static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected";
  18. /* =========================== Low level bit macros ========================= */
  19. /* Macros to access the dense representation.
  20. *
  21. * We need to get and set 6 bit counters in an array of 8 bit bytes.
  22. * We use macros to make sure the code is inlined since speed is critical
  23. * especially in order to compute the approximated cardinality in
  24. * HLLCOUNT where we need to access all the registers at once.
  25. * For the same reason we also want to avoid conditionals in this code path.
  26. *
  27. * +--------+--------+--------+------//
  28. * |11000000|22221111|33333322|55444444
  29. * +--------+--------+--------+------//
  30. *
  31. * Note: in the above representation the most significant bit (MSB)
  32. * of every byte is on the left. We start using bits from the LSB to MSB,
  33. * and so forth passing to the next byte.
  34. *
  35. * Example, we want to access to counter at pos = 1 ("111111" in the
  36. * illustration above).
  37. *
  38. * The index of the first byte b0 containing our data is:
  39. *
  40. * b0 = 6 * pos / 8 = 0
  41. *
  42. * +--------+
  43. * |11000000| <- Our byte at b0
  44. * +--------+
  45. *
  46. * The position of the first bit (counting from the LSB = 0) in the byte
  47. * is given by:
  48. *
  49. * fb = 6 * pos % 8 -> 6
  50. *
  51. * Right shift b0 of 'fb' bits.
  52. *
  53. * +--------+
  54. * |11000000| <- Initial value of b0
  55. * |00000011| <- After right shift of 6 pos.
  56. * +--------+
  57. *
  58. * Left shift b1 of bits 8-fb bits (2 bits)
  59. *
  60. * +--------+
  61. * |22221111| <- Initial value of b1
  62. * |22111100| <- After left shift of 2 bits.
  63. * +--------+
  64. *
  65. * OR the two bits, and finally AND with 111111 (63 in decimal) to
  66. * clean the higher order bits we are not interested in:
  67. *
  68. * +--------+
  69. * |00000011| <- b0 right shifted
  70. * |22111100| <- b1 left shifted
  71. * |22111111| <- b0 OR b1
  72. * | 111111| <- (b0 OR b1) AND 63, our value.
  73. * +--------+
  74. *
  75. * We can try with a different example, like pos = 0. In this case
  76. * the 6-bit counter is actually contained in a single byte.
  77. *
  78. * b0 = 6 * pos / 8 = 0
  79. *
  80. * +--------+
  81. * |11000000| <- Our byte at b0
  82. * +--------+
  83. *
  84. * fb = 6 * pos % 8 = 0
  85. *
  86. * So we right shift of 0 bits (no shift in practice) and
  87. * left shift the next byte of 8 bits, even if we don't use it,
  88. * but this has the effect of clearing the bits so the result
  89. * will not be affected after the OR.
  90. *
  91. * -------------------------------------------------------------------------
  92. *
  93. * Setting the register is a bit more complex, let's assume that 'val'
  94. * is the value we want to set, already in the right range.
  95. *
  96. * We need two steps, in one we need to clear the bits, and in the other
  97. * we need to bitwise-OR the new bits.
  98. *
  99. * Let's try with 'pos' = 1, so our first byte at 'b' is 0,
  100. *
  101. * "fb" is 6 in this case.
  102. *
  103. * +--------+
  104. * |11000000| <- Our byte at b0
  105. * +--------+
  106. *
  107. * To create an AND-mask to clear the bits about this position, we just
  108. * initialize the mask with the value 63, left shift it of "fs" bits,
  109. * and finally invert the result.
  110. *
  111. * +--------+
  112. * |00111111| <- "mask" starts at 63
  113. * |11000000| <- "mask" after left shift of "ls" bits.
  114. * |00111111| <- "mask" after invert.
  115. * +--------+
  116. *
  117. * Now we can bitwise-AND the byte at "b" with the mask, and bitwise-OR
  118. * it with "val" left-shifted of "ls" bits to set the new bits.
  119. *
  120. * Now let's focus on the next byte b1:
  121. *
  122. * +--------+
  123. * |22221111| <- Initial value of b1
  124. * +--------+
  125. *
  126. * To build the AND mask we start again with the 63 value, right shift
  127. * it by 8-fb bits, and invert it.
  128. *
  129. * +--------+
  130. * |00111111| <- "mask" set at 2&6-1
  131. * |00001111| <- "mask" after the right shift by 8-fb = 2 bits
  132. * |11110000| <- "mask" after bitwise not.
  133. * +--------+
  134. *
  135. * Now we can mask it with b+1 to clear the old bits, and bitwise-OR
  136. * with "val" left-shifted by "rs" bits to set the new value.
  137. */
  138. /* Note: if we access the last counter, we will also access the b+1 byte
  139. * that is out of the array, but sds strings always have an implicit null
  140. * term, so the byte exists, and we can skip the conditional (or the need
  141. * to allocate 1 byte more explicitly). */
  142. /* Store the value of the register at position 'regnum' into variable 'target'.
  143. * 'p' is an array of unsigned bytes. */
  144. #define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
  145. uint8_t *_p = (uint8_t*) p; \
  146. unsigned long _byte = regnum*HLL_BITS/8; \
  147. unsigned long _fb = regnum*HLL_BITS&7; \
  148. unsigned long _fb8 = 8 - _fb; \
  149. unsigned long b0 = _p[_byte]; \
  150. unsigned long b1 = _p[_byte+1]; \
  151. target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \
  152. } while(0)
  153. /* Set the value of the register at position 'regnum' to 'val'.
  154. * 'p' is an array of unsigned bytes. */
  155. #define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \
  156. uint8_t *_p = (uint8_t*) p; \
  157. unsigned long _byte = regnum*HLL_BITS/8; \
  158. unsigned long _fb = regnum*HLL_BITS&7; \
  159. unsigned long _fb8 = 8 - _fb; \
  160. unsigned long _v = val; \
  161. _p[_byte] &= ~(HLL_REGISTER_MAX << _fb); \
  162. _p[_byte] |= _v << _fb; \
  163. _p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); \
  164. _p[_byte+1] |= _v >> _fb8; \
  165. } while(0)
  166. /* Macros to access the sparse representation.
  167. * The macros parameter is expected to be an uint8_t pointer. */
  168. #define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */
  169. #define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */
  170. #define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */
  171. #define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT)
  172. #define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT)
  173. #define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1)
  174. #define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1)
  175. #define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1)
  176. #define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1)
  177. #define HLL_SPARSE_VAL_MAX_VALUE 32
  178. #define HLL_SPARSE_VAL_MAX_LEN 4
  179. #define HLL_SPARSE_ZERO_MAX_LEN 64
  180. #define HLL_SPARSE_XZERO_MAX_LEN 16384
  181. #define HLL_SPARSE_VAL_SET(p,val,len) do { \
  182. *(p) = (((val)-1)<<2|((len)-1))|HLL_SPARSE_VAL_BIT; \
  183. } while(0)
  184. #define HLL_SPARSE_ZERO_SET(p,len) do { \
  185. *(p) = (len)-1; \
  186. } while(0)
  187. #define HLL_SPARSE_XZERO_SET(p,len) do { \
  188. int _l = (len)-1; \
  189. *(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \
  190. *((p)+1) = (_l&0xff); \
  191. } while(0)
  192. #define HLL_ALPHA_INF 0.721347520444481703680 /* constant for 0.5/ln(2) */
  193. /* ========================= HyperLogLog algorithm ========================= */
  194. /* Our hash function is MurmurHash2, 64 bit version.
  195. * It was modified for Redis in order to provide the same result in
  196. * big and little endian archs (endian neutral). */
  197. uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
  198. const uint64_t m = 0xc6a4a7935bd1e995;
  199. const int r = 47;
  200. uint64_t h = seed ^ (len * m);
  201. const uint8_t *data = (const uint8_t *)key;
  202. const uint8_t *end = data + (len-(len&7));
  203. while(data != end) {
  204. uint64_t k;
  205. #if (BYTE_ORDER == LITTLE_ENDIAN)
  206. #ifdef USE_ALIGNED_ACCESS
  207. memcpy(&k,data,sizeof(uint64_t));
  208. #else
  209. k = *((uint64_t*)data);
  210. #endif
  211. #else
  212. k = (uint64_t) data[0];
  213. k |= (uint64_t) data[1] << 8;
  214. k |= (uint64_t) data[2] << 16;
  215. k |= (uint64_t) data[3] << 24;
  216. k |= (uint64_t) data[4] << 32;
  217. k |= (uint64_t) data[5] << 40;
  218. k |= (uint64_t) data[6] << 48;
  219. k |= (uint64_t) data[7] << 56;
  220. #endif
  221. k *= m;
  222. k ^= k >> r;
  223. k *= m;
  224. h ^= k;
  225. h *= m;
  226. data += 8;
  227. }
  228. switch(len & 7) {
  229. case 7: h ^= (uint64_t)data[6] << 48; /* fall-thru */
  230. case 6: h ^= (uint64_t)data[5] << 40; /* fall-thru */
  231. case 5: h ^= (uint64_t)data[4] << 32; /* fall-thru */
  232. case 4: h ^= (uint64_t)data[3] << 24; /* fall-thru */
  233. case 3: h ^= (uint64_t)data[2] << 16; /* fall-thru */
  234. case 2: h ^= (uint64_t)data[1] << 8; /* fall-thru */
  235. case 1: h ^= (uint64_t)data[0];
  236. h *= m; /* fall-thru */
  237. };
  238. h ^= h >> r;
  239. h *= m;
  240. h ^= h >> r;
  241. return h;
  242. }
  243. /* Given a string element to add to the HyperLogLog, returns the length
  244. * of the pattern 000..1 of the element hash. As a side effect 'regp' is
  245. * set to the register index this element hashes to. */
  246. int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
  247. uint64_t hash, bit, index;
  248. int count;
  249. /* Count the number of zeroes starting from bit HLL_REGISTERS
  250. * (that is a power of two corresponding to the first bit we don't use
  251. * as index). The max run can be 64-P+1 = Q+1 bits.
  252. *
  253. * Note that the final "1" ending the sequence of zeroes must be
  254. * included in the count, so if we find "001" the count is 3, and
  255. * the smallest count possible is no zeroes at all, just a 1 bit
  256. * at the first position, that is a count of 1.
  257. *
  258. * This may sound like inefficient, but actually in the average case
  259. * there are high probabilities to find a 1 after a few iterations. */
  260. hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
  261. index = hash & HLL_P_MASK; /* Register index. */
  262. hash >>= HLL_P; /* Remove bits used to address the register. */
  263. hash |= ((uint64_t)1<<HLL_Q); /* Make sure the loop terminates
  264. and count will be <= Q+1. */
  265. bit = 1;
  266. count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
  267. while((hash & bit) == 0) {
  268. count++;
  269. bit <<= 1;
  270. }
  271. *regp = (int) index;
  272. return count;
  273. }

相关技术文章

点击QQ咨询
开通会员
返回顶部
×
微信扫码支付
微信扫码支付
确定支付下载
请使用微信描二维码支付
×

提示信息

×

选择支付方式

  • 微信支付
  • 支付宝付款
确定支付下载