关键词搜索

源码搜索 ×
×

漫话Redis源码之五十三

发布2022-01-09浏览408次

详情内容

这里主要讲一些列的字符串序列化,其实很简单。

  1. /* Listpack -- A lists of strings serialization format
  2. *
  3. * This file implements the specification you can find at:
  4. *
  5. * https://github.com/antirez/listpack
  6. *
  7. * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
  8. * Copyright (c) 2020, Redis Labs, Inc
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice,
  15. * this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. * * Neither the name of Redis nor the names of its contributors may be used
  20. * to endorse or promote products derived from this software without
  21. * specific prior written permission.
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  24. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  27. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  31. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  32. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. */
  35. #include <stdint.h>
  36. #include <limits.h>
  37. #include <sys/types.h>
  38. #include <stdlib.h>
  39. #include <string.h>
  40. #include <stdio.h>
  41. #include "listpack.h"
  42. #include "listpack_malloc.h"
  43. #include "redisassert.h"
  44. #define LP_HDR_SIZE 6 /* 32 bit total len + 16 bit number of elements. */
  45. #define LP_HDR_NUMELE_UNKNOWN UINT16_MAX
  46. #define LP_MAX_INT_ENCODING_LEN 9
  47. #define LP_MAX_BACKLEN_SIZE 5
  48. #define LP_MAX_ENTRY_BACKLEN 34359738367ULL
  49. #define LP_ENCODING_INT 0
  50. #define LP_ENCODING_STRING 1
  51. #define LP_ENCODING_7BIT_UINT 0
  52. #define LP_ENCODING_7BIT_UINT_MASK 0x80
  53. #define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT)
  54. #define LP_ENCODING_6BIT_STR 0x80
  55. #define LP_ENCODING_6BIT_STR_MASK 0xC0
  56. #define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR)
  57. #define LP_ENCODING_13BIT_INT 0xC0
  58. #define LP_ENCODING_13BIT_INT_MASK 0xE0
  59. #define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT)
  60. #define LP_ENCODING_12BIT_STR 0xE0
  61. #define LP_ENCODING_12BIT_STR_MASK 0xF0
  62. #define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR)
  63. #define LP_ENCODING_16BIT_INT 0xF1
  64. #define LP_ENCODING_16BIT_INT_MASK 0xFF
  65. #define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT)
  66. #define LP_ENCODING_24BIT_INT 0xF2
  67. #define LP_ENCODING_24BIT_INT_MASK 0xFF
  68. #define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT)
  69. #define LP_ENCODING_32BIT_INT 0xF3
  70. #define LP_ENCODING_32BIT_INT_MASK 0xFF
  71. #define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT)
  72. #define LP_ENCODING_64BIT_INT 0xF4
  73. #define LP_ENCODING_64BIT_INT_MASK 0xFF
  74. #define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT)
  75. #define LP_ENCODING_32BIT_STR 0xF0
  76. #define LP_ENCODING_32BIT_STR_MASK 0xFF
  77. #define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR)
  78. #define LP_EOF 0xFF
  79. #define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F)
  80. #define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1])
  81. #define LP_ENCODING_32BIT_STR_LEN(p) (((uint32_t)(p)[1]<<0) | \
  82. ((uint32_t)(p)[2]<<8) | \
  83. ((uint32_t)(p)[3]<<16) | \
  84. ((uint32_t)(p)[4]<<24))
  85. #define lpGetTotalBytes(p) (((uint32_t)(p)[0]<<0) | \
  86. ((uint32_t)(p)[1]<<8) | \
  87. ((uint32_t)(p)[2]<<16) | \
  88. ((uint32_t)(p)[3]<<24))
  89. #define lpGetNumElements(p) (((uint32_t)(p)[4]<<0) | \
  90. ((uint32_t)(p)[5]<<8))
  91. #define lpSetTotalBytes(p,v) do { \
  92. (p)[0] = (v)&0xff; \
  93. (p)[1] = ((v)>>8)&0xff; \
  94. (p)[2] = ((v)>>16)&0xff; \
  95. (p)[3] = ((v)>>24)&0xff; \
  96. } while(0)
  97. #define lpSetNumElements(p,v) do { \
  98. (p)[4] = (v)&0xff; \
  99. (p)[5] = ((v)>>8)&0xff; \
  100. } while(0)
  101. /* Validates that 'p' is not ouside the listpack.
  102. * All function that return a pointer to an element in the listpack will assert
  103. * that this element is valid, so it can be freely used.
  104. * Generally functions such lpNext and lpDelete assume the input pointer is
  105. * already validated (since it's the return value of another function). */
  106. #define ASSERT_INTEGRITY(lp, p) do { \
  107. assert((p) >= (lp)+LP_HDR_SIZE && (p) < (lp)+lpGetTotalBytes((lp))); \
  108. } while (0)
  109. /* Similar to the above, but validates the entire element lenth rather than just
  110. * it's pointer. */
  111. #define ASSERT_INTEGRITY_LEN(lp, p, len) do { \
  112. assert((p) >= (lp)+LP_HDR_SIZE && (p)+(len) < (lp)+lpGetTotalBytes((lp))); \
  113. } while (0)
  114. static inline void lpAssertValidEntry(unsigned char* lp, size_t lpbytes, unsigned char *p);
  115. /* Convert a string into a signed 64 bit integer.
  116. * The function returns 1 if the string could be parsed into a (non-overflowing)
  117. * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value
  118. * when the function returns success.
  119. *
  120. * Note that this function demands that the string strictly represents
  121. * a int64 value: no spaces or other characters before or after the string
  122. * representing the number are accepted, nor zeroes at the start if not
  123. * for the string "0" representing the zero number.
  124. *
  125. * Because of its strictness, it is safe to use this function to check if
  126. * you can convert a string into a long long, and obtain back the string
  127. * from the number without any loss in the string representation. *
  128. *
  129. * -----------------------------------------------------------------------------
  130. *
  131. * Credits: this function was adapted from the Redis source code, file
  132. * "utils.c", function string2ll(), and is copyright:
  133. *
  134. * Copyright(C) 2011, Pieter Noordhuis
  135. * Copyright(C) 2011, Salvatore Sanfilippo
  136. *
  137. * The function is released under the BSD 3-clause license.
  138. */
  139. int lpStringToInt64(const char *s, unsigned long slen, int64_t *value) {
  140. const char *p = s;
  141. unsigned long plen = 0;
  142. int negative = 0;
  143. uint64_t v;
  144. if (plen == slen)
  145. return 0;
  146. /* Special case: first and only digit is 0. */
  147. if (slen == 1 && p[0] == '0') {
  148. if (value != NULL) *value = 0;
  149. return 1;
  150. }
  151. if (p[0] == '-') {
  152. negative = 1;
  153. p++; plen++;
  154. /* Abort on only a negative sign. */
  155. if (plen == slen)
  156. return 0;
  157. }
  158. /* First digit should be 1-9, otherwise the string should just be 0. */
  159. if (p[0] >= '1' && p[0] <= '9') {
  160. v = p[0]-'0';
  161. p++; plen++;
  162. } else if (p[0] == '0' && slen == 1) {
  163. *value = 0;
  164. return 1;
  165. } else {
  166. return 0;
  167. }
  168. while (plen < slen && p[0] >= '0' && p[0] <= '9') {
  169. if (v > (UINT64_MAX / 10)) /* Overflow. */
  170. return 0;
  171. v *= 10;
  172. if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */
  173. return 0;
  174. v += p[0]-'0';
  175. p++; plen++;
  176. }
  177. /* Return if not all bytes were used. */
  178. if (plen < slen)
  179. return 0;
  180. if (negative) {
  181. if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */
  182. return 0;
  183. if (value != NULL) *value = -v;
  184. } else {
  185. if (v > INT64_MAX) /* Overflow. */
  186. return 0;
  187. if (value != NULL) *value = v;
  188. }
  189. return 1;
  190. }
  191. /* Create a new, empty listpack.
  192. * On success the new listpack is returned, otherwise an error is returned.
  193. * Pre-allocate at least `capacity` bytes of memory,
  194. * over-allocated memory can be shrinked by `lpShrinkToFit`.
  195. * */
  196. unsigned char *lpNew(size_t capacity) {
  197. unsigned char *lp = lp_malloc(capacity > LP_HDR_SIZE+1 ? capacity : LP_HDR_SIZE+1);
  198. if (lp == NULL) return NULL;
  199. lpSetTotalBytes(lp,LP_HDR_SIZE+1);
  200. lpSetNumElements(lp,0);
  201. lp[LP_HDR_SIZE] = LP_EOF;
  202. return lp;
  203. }
  204. /* Free the specified listpack. */
  205. void lpFree(unsigned char *lp) {
  206. lp_free(lp);
  207. }
  208. /* Shrink the memory to fit. */
  209. unsigned char* lpShrinkToFit(unsigned char *lp) {
  210. size_t size = lpGetTotalBytes(lp);
  211. if (size < lp_malloc_size(lp)) {
  212. return lp_realloc(lp, size);
  213. } else {
  214. return lp;
  215. }
  216. }
  217. /* Given an element 'ele' of size 'size', determine if the element can be
  218. * represented inside the listpack encoded as integer, and returns
  219. * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer
  220. * encoding is possible.
  221. *
  222. * If the LP_ENCODING_INT is returned, the function stores the integer encoded
  223. * representation of the element in the 'intenc' buffer.
  224. *
  225. * Regardless of the returned encoding, 'enclen' is populated by reference to
  226. * the number of bytes that the string or integer encoded element will require
  227. * in order to be represented. */
  228. int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) {
  229. int64_t v;
  230. if (lpStringToInt64((const char*)ele, size, &v)) {
  231. if (v >= 0 && v <= 127) {
  232. /* Single byte 0-127 integer. */
  233. intenc[0] = v;
  234. *enclen = 1;
  235. } else if (v >= -4096 && v <= 4095) {
  236. /* 13 bit integer. */
  237. if (v < 0) v = ((int64_t)1<<13)+v;
  238. intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;
  239. intenc[1] = v&0xff;
  240. *enclen = 2;
  241. } else if (v >= -32768 && v <= 32767) {
  242. /* 16 bit integer. */
  243. if (v < 0) v = ((int64_t)1<<16)+v;
  244. intenc[0] = LP_ENCODING_16BIT_INT;
  245. intenc[1] = v&0xff;
  246. intenc[2] = v>>8;
  247. *enclen = 3;
  248. } else if (v >= -8388608 && v <= 8388607) {
  249. /* 24 bit integer. */
  250. if (v < 0) v = ((int64_t)1<<24)+v;
  251. intenc[0] = LP_ENCODING_24BIT_INT;
  252. intenc[1] = v&0xff;
  253. intenc[2] = (v>>8)&0xff;
  254. intenc[3] = v>>16;
  255. *enclen = 4;
  256. } else if (v >= -2147483648 && v <= 2147483647) {
  257. /* 32 bit integer. */
  258. if (v < 0) v = ((int64_t)1<<32)+v;
  259. intenc[0] = LP_ENCODING_32BIT_INT;
  260. intenc[1] = v&0xff;
  261. intenc[2] = (v>>8)&0xff;
  262. intenc[3] = (v>>16)&0xff;
  263. intenc[4] = v>>24;
  264. *enclen = 5;
  265. } else {
  266. /* 64 bit integer. */
  267. uint64_t uv = v;
  268. intenc[0] = LP_ENCODING_64BIT_INT;
  269. intenc[1] = uv&0xff;
  270. intenc[2] = (uv>>8)&0xff;
  271. intenc[3] = (uv>>16)&0xff;
  272. intenc[4] = (uv>>24)&0xff;
  273. intenc[5] = (uv>>32)&0xff;
  274. intenc[6] = (uv>>40)&0xff;
  275. intenc[7] = (uv>>48)&0xff;
  276. intenc[8] = uv>>56;
  277. *enclen = 9;
  278. }
  279. return LP_ENCODING_INT;
  280. } else {
  281. if (size < 64) *enclen = 1+size;
  282. else if (size < 4096) *enclen = 2+size;
  283. else *enclen = 5+(uint64_t)size;
  284. return LP_ENCODING_STRING;
  285. }
  286. }
  287. /* Store a reverse-encoded variable length field, representing the length
  288. * of the previous element of size 'l', in the target buffer 'buf'.
  289. * The function returns the number of bytes used to encode it, from
  290. * 1 to 5. If 'buf' is NULL the function just returns the number of bytes
  291. * needed in order to encode the backlen. */
  292. unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
  293. if (l <= 127) {
  294. if (buf) buf[0] = l;
  295. return 1;
  296. } else if (l < 16383) {
  297. if (buf) {
  298. buf[0] = l>>7;
  299. buf[1] = (l&127)|128;
  300. }
  301. return 2;
  302. } else if (l < 2097151) {
  303. if (buf) {
  304. buf[0] = l>>14;
  305. buf[1] = ((l>>7)&127)|128;
  306. buf[2] = (l&127)|128;
  307. }
  308. return 3;
  309. } else if (l < 268435455) {
  310. if (buf) {
  311. buf[0] = l>>21;
  312. buf[1] = ((l>>14)&127)|128;
  313. buf[2] = ((l>>7)&127)|128;
  314. buf[3] = (l&127)|128;
  315. }
  316. return 4;
  317. } else {
  318. if (buf) {
  319. buf[0] = l>>28;
  320. buf[1] = ((l>>21)&127)|128;
  321. buf[2] = ((l>>14)&127)|128;
  322. buf[3] = ((l>>7)&127)|128;
  323. buf[4] = (l&127)|128;
  324. }
  325. return 5;
  326. }
  327. }
  328. /* Decode the backlen and returns it. If the encoding looks invalid (more than
  329. * 5 bytes are used), UINT64_MAX is returned to report the problem. */
  330. uint64_t lpDecodeBacklen(unsigned char *p) {
  331. uint64_t val = 0;
  332. uint64_t shift = 0;
  333. do {
  334. val |= (uint64_t)(p[0] & 127) << shift;
  335. if (!(p[0] & 128)) break;
  336. shift += 7;
  337. p--;
  338. if (shift > 28) return UINT64_MAX;
  339. } while(1);
  340. return val;
  341. }
  342. /* Encode the string element pointed by 's' of size 'len' in the target
  343. * buffer 's'. The function should be called with 'buf' having always enough
  344. * space for encoding the string. This is done by calling lpEncodeGetType()
  345. * before calling this function. */
  346. void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) {
  347. if (len < 64) {
  348. buf[0] = len | LP_ENCODING_6BIT_STR;
  349. memcpy(buf+1,s,len);
  350. } else if (len < 4096) {
  351. buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR;
  352. buf[1] = len & 0xff;
  353. memcpy(buf+2,s,len);
  354. } else {
  355. buf[0] = LP_ENCODING_32BIT_STR;
  356. buf[1] = len & 0xff;
  357. buf[2] = (len >> 8) & 0xff;
  358. buf[3] = (len >> 16) & 0xff;
  359. buf[4] = (len >> 24) & 0xff;
  360. memcpy(buf+5,s,len);
  361. }
  362. }
  363. /* Return the encoded length of the listpack element pointed by 'p'.
  364. * This includes the encoding byte, length bytes, and the element data itself.
  365. * If the element encoding is wrong then 0 is returned.
  366. * Note that this method may access additional bytes (in case of 12 and 32 bit
  367. * str), so should only be called when we know 'p' was already validated by
  368. * lpCurrentEncodedSizeBytes or ASSERT_INTEGRITY_LEN (possibly since 'p' is
  369. * a return value of another function that validated its return. */
  370. uint32_t lpCurrentEncodedSizeUnsafe(unsigned char *p) {
  371. if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
  372. if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p);
  373. if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2;
  374. if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3;
  375. if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4;
  376. if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5;
  377. if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9;
  378. if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p);
  379. if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p);
  380. if (p[0] == LP_EOF) return 1;
  381. return 0;
  382. }
  383. /* Return bytes needed to encode the length of the listpack element pointed by 'p'.
  384. * This includes just the encodign byte, and the bytes needed to encode the length
  385. * of the element (excluding the element data itself)
  386. * If the element encoding is wrong then 0 is returned. */
  387. uint32_t lpCurrentEncodedSizeBytes(unsigned char *p) {
  388. if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
  389. if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1;
  390. if (LP_ENCODING_IS_13BIT_INT(p[0])) return 1;
  391. if (LP_ENCODING_IS_16BIT_INT(p[0])) return 1;
  392. if (LP_ENCODING_IS_24BIT_INT(p[0])) return 1;
  393. if (LP_ENCODING_IS_32BIT_INT(p[0])) return 1;
  394. if (LP_ENCODING_IS_64BIT_INT(p[0])) return 1;
  395. if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2;
  396. if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5;
  397. if (p[0] == LP_EOF) return 1;
  398. return 0;
  399. }
  400. /* Skip the current entry returning the next. It is invalid to call this
  401. * function if the current element is the EOF element at the end of the
  402. * listpack, however, while this function is used to implement lpNext(),
  403. * it does not return NULL when the EOF element is encountered. */
  404. unsigned char *lpSkip(unsigned char *p) {
  405. unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
  406. entrylen += lpEncodeBacklen(NULL,entrylen);
  407. p += entrylen;
  408. return p;
  409. }
  410. /* If 'p' points to an element of the listpack, calling lpNext() will return
  411. * the pointer to the next element (the one on the right), or NULL if 'p'
  412. * already pointed to the last element of the listpack. */
  413. unsigned char *lpNext(unsigned char *lp, unsigned char *p) {
  414. assert(p);
  415. p = lpSkip(p);
  416. if (p[0] == LP_EOF) return NULL;
  417. lpAssertValidEntry(lp, lpBytes(lp), p);
  418. return p;
  419. }
  420. /* If 'p' points to an element of the listpack, calling lpPrev() will return
  421. * the pointer to the previous element (the one on the left), or NULL if 'p'
  422. * already pointed to the first element of the listpack. */
  423. unsigned char *lpPrev(unsigned char *lp, unsigned char *p) {
  424. assert(p);
  425. if (p-lp == LP_HDR_SIZE) return NULL;
  426. p--; /* Seek the first backlen byte of the last element. */
  427. uint64_t prevlen = lpDecodeBacklen(p);
  428. prevlen += lpEncodeBacklen(NULL,prevlen);
  429. p -= prevlen-1; /* Seek the first byte of the previous entry. */
  430. lpAssertValidEntry(lp, lpBytes(lp), p);
  431. return p;
  432. }
  433. /* Return a pointer to the first element of the listpack, or NULL if the
  434. * listpack has no elements. */
  435. unsigned char *lpFirst(unsigned char *lp) {
  436. unsigned char *p = lp + LP_HDR_SIZE; /* Skip the header. */
  437. if (p[0] == LP_EOF) return NULL;
  438. lpAssertValidEntry(lp, lpBytes(lp), p);
  439. return p;
  440. }
  441. /* Return a pointer to the last element of the listpack, or NULL if the
  442. * listpack has no elements. */
  443. unsigned char *lpLast(unsigned char *lp) {
  444. unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */
  445. return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */
  446. }
  447. /* Return the number of elements inside the listpack. This function attempts
  448. * to use the cached value when within range, otherwise a full scan is
  449. * needed. As a side effect of calling this function, the listpack header
  450. * could be modified, because if the count is found to be already within
  451. * the 'numele' header field range, the new value is set. */
  452. uint32_t lpLength(unsigned char *lp) {
  453. uint32_t numele = lpGetNumElements(lp);
  454. if (numele != LP_HDR_NUMELE_UNKNOWN) return numele;
  455. /* Too many elements inside the listpack. We need to scan in order
  456. * to get the total number. */
  457. uint32_t count = 0;
  458. unsigned char *p = lpFirst(lp);
  459. while(p) {
  460. count++;
  461. p = lpNext(lp,p);
  462. }
  463. /* If the count is again within range of the header numele field,
  464. * set it. */
  465. if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count);
  466. return count;
  467. }
  468. /* Return the listpack element pointed by 'p'.
  469. *
  470. * The function changes behavior depending on the passed 'intbuf' value.
  471. * Specifically, if 'intbuf' is NULL:
  472. *
  473. * If the element is internally encoded as an integer, the function returns
  474. * NULL and populates the integer value by reference in 'count'. Otherwise if
  475. * the element is encoded as a string a pointer to the string (pointing inside
  476. * the listpack itself) is returned, and 'count' is set to the length of the
  477. * string.
  478. *
  479. * If instead 'intbuf' points to a buffer passed by the caller, that must be
  480. * at least LP_INTBUF_SIZE bytes, the function always returns the element as
  481. * it was a string (returning the pointer to the string and setting the
  482. * 'count' argument to the string length by reference). However if the element
  483. * is encoded as an integer, the 'intbuf' buffer is used in order to store
  484. * the string representation.
  485. *
  486. * The user should use one or the other form depending on what the value will
  487. * be used for. If there is immediate usage for an integer value returned
  488. * by the function, than to pass a buffer (and convert it back to a number)
  489. * is of course useless.
  490. *
  491. * If the function is called against a badly encoded ziplist, so that there
  492. * is no valid way to parse it, the function returns like if there was an
  493. * integer encoded with value 12345678900000000 + <unrecognized byte>, this may
  494. * be an hint to understand that something is wrong. To crash in this case is
  495. * not sensible because of the different requirements of the application using
  496. * this lib.
  497. *
  498. * Similarly, there is no error returned since the listpack normally can be
  499. * assumed to be valid, so that would be a very high API cost. However a function
  500. * in order to check the integrity of the listpack at load time is provided,
  501. * check lpIsValid(). */
  502. unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
  503. int64_t val;
  504. uint64_t uval, negstart, negmax;
  505. assert(p); /* assertion for valgrind (avoid NPD) */
  506. if (LP_ENCODING_IS_7BIT_UINT(p[0])) {
  507. negstart = UINT64_MAX; /* 7 bit ints are always positive. */
  508. negmax = 0;
  509. uval = p[0] & 0x7f;
  510. } else if (LP_ENCODING_IS_6BIT_STR(p[0])) {
  511. *count = LP_ENCODING_6BIT_STR_LEN(p);
  512. return p+1;
  513. } else if (LP_ENCODING_IS_13BIT_INT(p[0])) {
  514. uval = ((p[0]&0x1f)<<8) | p[1];
  515. negstart = (uint64_t)1<<12;
  516. negmax = 8191;
  517. } else if (LP_ENCODING_IS_16BIT_INT(p[0])) {
  518. uval = (uint64_t)p[1] |
  519. (uint64_t)p[2]<<8;
  520. negstart = (uint64_t)1<<15;
  521. negmax = UINT16_MAX;
  522. } else if (LP_ENCODING_IS_24BIT_INT(p[0])) {
  523. uval = (uint64_t)p[1] |
  524. (uint64_t)p[2]<<8 |
  525. (uint64_t)p[3]<<16;
  526. negstart = (uint64_t)1<<23;
  527. negmax = UINT32_MAX>>8;
  528. } else if (LP_ENCODING_IS_32BIT_INT(p[0])) {
  529. uval = (uint64_t)p[1] |
  530. (uint64_t)p[2]<<8 |
  531. (uint64_t)p[3]<<16 |
  532. (uint64_t)p[4]<<24;
  533. negstart = (uint64_t)1<<31;
  534. negmax = UINT32_MAX;
  535. } else if (LP_ENCODING_IS_64BIT_INT(p[0])) {
  536. uval = (uint64_t)p[1] |
  537. (uint64_t)p[2]<<8 |
  538. (uint64_t)p[3]<<16 |
  539. (uint64_t)p[4]<<24 |
  540. (uint64_t)p[5]<<32 |
  541. (uint64_t)p[6]<<40 |
  542. (uint64_t)p[7]<<48 |
  543. (uint64_t)p[8]<<56;
  544. negstart = (uint64_t)1<<63;
  545. negmax = UINT64_MAX;
  546. } else if (LP_ENCODING_IS_12BIT_STR(p[0])) {
  547. *count = LP_ENCODING_12BIT_STR_LEN(p);
  548. return p+2;
  549. } else if (LP_ENCODING_IS_32BIT_STR(p[0])) {
  550. *count = LP_ENCODING_32BIT_STR_LEN(p);
  551. return p+5;
  552. } else {
  553. uval = 12345678900000000ULL + p[0];
  554. negstart = UINT64_MAX;
  555. negmax = 0;
  556. }
  557. /* We reach this code path only for integer encodings.
  558. * Convert the unsigned value to the signed one using two's complement
  559. * rule. */
  560. if (uval >= negstart) {
  561. /* This three steps conversion should avoid undefined behaviors
  562. * in the unsigned -> signed conversion. */
  563. uval = negmax-uval;
  564. val = uval;
  565. val = -val-1;
  566. } else {
  567. val = uval;
  568. }
  569. /* Return the string representation of the integer or the value itself
  570. * depending on intbuf being NULL or not. */
  571. if (intbuf) {
  572. *count = snprintf((char*)intbuf,LP_INTBUF_SIZE,"%lld",(long long)val);
  573. return intbuf;
  574. } else {
  575. *count = val;
  576. return NULL;
  577. }
  578. }
  579. /* Insert, delete or replace the specified element 'ele' of length 'len' at
  580. * the specified position 'p', with 'p' being a listpack element pointer
  581. * obtained with lpFirst(), lpLast(), lpNext(), lpPrev() or lpSeek().
  582. *
  583. * The element is inserted before, after, or replaces the element pointed
  584. * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER
  585. * or LP_REPLACE.
  586. *
  587. * If 'ele' is set to NULL, the function removes the element pointed by 'p'
  588. * instead of inserting one.
  589. *
  590. * Returns NULL on out of memory or when the listpack total length would exceed
  591. * the max allowed size of 2^32-1, otherwise the new pointer to the listpack
  592. * holding the new element is returned (and the old pointer passed is no longer
  593. * considered valid)
  594. *
  595. * If 'newp' is not NULL, at the end of a successful call '*newp' will be set
  596. * to the address of the element just added, so that it will be possible to
  597. * continue an interation with lpNext() and lpPrev().
  598. *
  599. * For deletion operations ('ele' set to NULL) 'newp' is set to the next
  600. * element, on the right of the deleted one, or to NULL if the deleted element
  601. * was the last one. */
  602. unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp) {
  603. unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
  604. unsigned char backlen[LP_MAX_BACKLEN_SIZE];
  605. uint64_t enclen; /* The length of the encoded element. */
  606. /* An element pointer set to NULL means deletion, which is conceptually
  607. * replacing the element with a zero-length element. So whatever we
  608. * get passed as 'where', set it to LP_REPLACE. */
  609. if (ele == NULL) where = LP_REPLACE;
  610. /* If we need to insert after the current element, we just jump to the
  611. * next element (that could be the EOF one) and handle the case of
  612. * inserting before. So the function will actually deal with just two
  613. * cases: LP_BEFORE and LP_REPLACE. */
  614. if (where == LP_AFTER) {
  615. p = lpSkip(p);
  616. where = LP_BEFORE;
  617. ASSERT_INTEGRITY(lp, p);
  618. }
  619. /* Store the offset of the element 'p', so that we can obtain its
  620. * address again after a reallocation. */
  621. unsigned long poff = p-lp;
  622. /* Calling lpEncodeGetType() results into the encoded version of the
  623. * element to be stored into 'intenc' in case it is representable as
  624. * an integer: in that case, the function returns LP_ENCODING_INT.
  625. * Otherwise if LP_ENCODING_STR is returned, we'll have to call
  626. * lpEncodeString() to actually write the encoded string on place later.
  627. *
  628. * Whatever the returned encoding is, 'enclen' is populated with the
  629. * length of the encoded element. */
  630. int enctype;
  631. if (ele) {
  632. enctype = lpEncodeGetType(ele,size,intenc,&enclen);
  633. } else {
  634. enctype = -1;
  635. enclen = 0;
  636. }
  637. /* We need to also encode the backward-parsable length of the element
  638. * and append it to the end: this allows to traverse the listpack from
  639. * the end to the start. */
  640. unsigned long backlen_size = ele ? lpEncodeBacklen(backlen,enclen) : 0;
  641. uint64_t old_listpack_bytes = lpGetTotalBytes(lp);
  642. uint32_t replaced_len = 0;
  643. if (where == LP_REPLACE) {
  644. replaced_len = lpCurrentEncodedSizeUnsafe(p);
  645. replaced_len += lpEncodeBacklen(NULL,replaced_len);
  646. ASSERT_INTEGRITY_LEN(lp, p, replaced_len);
  647. }
  648. uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size
  649. - replaced_len;
  650. if (new_listpack_bytes > UINT32_MAX) return NULL;
  651. /* We now need to reallocate in order to make space or shrink the
  652. * allocation (in case 'when' value is LP_REPLACE and the new element is
  653. * smaller). However we do that before memmoving the memory to
  654. * make room for the new element if the final allocation will get
  655. * larger, or we do it after if the final allocation will get smaller. */
  656. unsigned char *dst = lp + poff; /* May be updated after reallocation. */
  657. /* Realloc before: we need more room. */
  658. if (new_listpack_bytes > old_listpack_bytes &&
  659. new_listpack_bytes > lp_malloc_size(lp)) {
  660. if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
  661. dst = lp + poff;
  662. }
  663. /* Setup the listpack relocating the elements to make the exact room
  664. * we need to store the new one. */
  665. if (where == LP_BEFORE) {
  666. memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff);
  667. } else { /* LP_REPLACE. */
  668. long lendiff = (enclen+backlen_size)-replaced_len;
  669. memmove(dst+replaced_len+lendiff,
  670. dst+replaced_len,
  671. old_listpack_bytes-poff-replaced_len);
  672. }
  673. /* Realloc after: we need to free space. */
  674. if (new_listpack_bytes < old_listpack_bytes) {
  675. if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
  676. dst = lp + poff;
  677. }
  678. /* Store the entry. */
  679. if (newp) {
  680. *newp = dst;
  681. /* In case of deletion, set 'newp' to NULL if the next element is
  682. * the EOF element. */
  683. if (!ele && dst[0] == LP_EOF) *newp = NULL;
  684. }
  685. if (ele) {
  686. if (enctype == LP_ENCODING_INT) {
  687. memcpy(dst,intenc,enclen);
  688. } else {
  689. lpEncodeString(dst,ele,size);
  690. }
  691. dst += enclen;
  692. memcpy(dst,backlen,backlen_size);
  693. dst += backlen_size;
  694. }
  695. /* Update header. */
  696. if (where != LP_REPLACE || ele == NULL) {
  697. uint32_t num_elements = lpGetNumElements(lp);
  698. if (num_elements != LP_HDR_NUMELE_UNKNOWN) {
  699. if (ele)
  700. lpSetNumElements(lp,num_elements+1);
  701. else
  702. lpSetNumElements(lp,num_elements-1);
  703. }
  704. }
  705. lpSetTotalBytes(lp,new_listpack_bytes);
  706. #if 0
  707. /* This code path is normally disabled: what it does is to force listpack
  708. * to return *always* a new pointer after performing some modification to
  709. * the listpack, even if the previous allocation was enough. This is useful
  710. * in order to spot bugs in code using listpacks: by doing so we can find
  711. * if the caller forgets to set the new pointer where the listpack reference
  712. * is stored, after an update. */
  713. unsigned char *oldlp = lp;
  714. lp = lp_malloc(new_listpack_bytes);
  715. memcpy(lp,oldlp,new_listpack_bytes);
  716. if (newp) {
  717. unsigned long offset = (*newp)-oldlp;
  718. *newp = lp + offset;
  719. }
  720. /* Make sure the old allocation contains garbage. */
  721. memset(oldlp,'A',new_listpack_bytes);
  722. lp_free(oldlp);
  723. #endif
  724. return lp;
  725. }
  726. /* Append the specified element 'ele' of length 'len' at the end of the
  727. * listpack. It is implemented in terms of lpInsert(), so the return value is
  728. * the same as lpInsert(). */
  729. unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) {
  730. uint64_t listpack_bytes = lpGetTotalBytes(lp);
  731. unsigned char *eofptr = lp + listpack_bytes - 1;
  732. return lpInsert(lp,ele,size,eofptr,LP_BEFORE,NULL);
  733. }
  734. /* Remove the element pointed by 'p', and return the resulting listpack.
  735. * If 'newp' is not NULL, the next element pointer (to the right of the
  736. * deleted one) is returned by reference. If the deleted element was the
  737. * last one, '*newp' is set to NULL. */
  738. unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) {
  739. return lpInsert(lp,NULL,0,p,LP_REPLACE,newp);
  740. }
  741. /* Return the total number of bytes the listpack is composed of. */
  742. uint32_t lpBytes(unsigned char *lp) {
  743. return lpGetTotalBytes(lp);
  744. }
  745. /* Seek the specified element and returns the pointer to the seeked element.
  746. * Positive indexes specify the zero-based element to seek from the head to
  747. * the tail, negative indexes specify elements starting from the tail, where
  748. * -1 means the last element, -2 the penultimate and so forth. If the index
  749. * is out of range, NULL is returned. */
  750. unsigned char *lpSeek(unsigned char *lp, long index) {
  751. int forward = 1; /* Seek forward by default. */
  752. /* We want to seek from left to right or the other way around
  753. * depending on the listpack length and the element position.
  754. * However if the listpack length cannot be obtained in constant time,
  755. * we always seek from left to right. */
  756. uint32_t numele = lpGetNumElements(lp);
  757. if (numele != LP_HDR_NUMELE_UNKNOWN) {
  758. if (index < 0) index = (long)numele+index;
  759. if (index < 0) return NULL; /* Index still < 0 means out of range. */
  760. if (index >= (long)numele) return NULL; /* Out of range the other side. */
  761. /* We want to scan right-to-left if the element we are looking for
  762. * is past the half of the listpack. */
  763. if (index > (long)numele/2) {
  764. forward = 0;
  765. /* Right to left scanning always expects a negative index. Convert
  766. * our index to negative form. */
  767. index -= numele;
  768. }
  769. } else {
  770. /* If the listpack length is unspecified, for negative indexes we
  771. * want to always scan right-to-left. */
  772. if (index < 0) forward = 0;
  773. }
  774. /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */
  775. if (forward) {
  776. unsigned char *ele = lpFirst(lp);
  777. while (index > 0 && ele) {
  778. ele = lpNext(lp,ele);
  779. index--;
  780. }
  781. return ele;
  782. } else {
  783. unsigned char *ele = lpLast(lp);
  784. while (index < -1 && ele) {
  785. ele = lpPrev(lp,ele);
  786. index++;
  787. }
  788. return ele;
  789. }
  790. }
  791. /* Same as lpFirst but without validation assert, to be used right before lpValidateNext. */
  792. unsigned char *lpValidateFirst(unsigned char *lp) {
  793. unsigned char *p = lp + LP_HDR_SIZE; /* Skip the header. */
  794. if (p[0] == LP_EOF) return NULL;
  795. return p;
  796. }
  797. /* Validate the integrity of a single listpack entry and move to the next one.
  798. * The input argument 'pp' is a reference to the current record and is advanced on exit.
  799. * Returns 1 if valid, 0 if invalid. */
  800. int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
  801. #define OUT_OF_RANGE(p) ( \
  802. (p) < lp + LP_HDR_SIZE || \
  803. (p) > lp + lpbytes - 1)
  804. unsigned char *p = *pp;
  805. if (!p)
  806. return 0;
  807. /* Before accessing p, make sure it's valid. */
  808. if (OUT_OF_RANGE(p))
  809. return 0;
  810. if (*p == LP_EOF) {
  811. *pp = NULL;
  812. return 1;
  813. }
  814. /* check that we can read the encoded size */
  815. uint32_t lenbytes = lpCurrentEncodedSizeBytes(p);
  816. if (!lenbytes)
  817. return 0;
  818. /* make sure the encoded entry length doesn't rech outside the edge of the listpack */
  819. if (OUT_OF_RANGE(p + lenbytes))
  820. return 0;
  821. /* get the entry length and encoded backlen. */
  822. unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
  823. unsigned long encodedBacklen = lpEncodeBacklen(NULL,entrylen);
  824. entrylen += encodedBacklen;
  825. /* make sure the entry doesn't rech outside the edge of the listpack */
  826. if (OUT_OF_RANGE(p + entrylen))
  827. return 0;
  828. /* move to the next entry */
  829. p += entrylen;
  830. /* make sure the encoded length at the end patches the one at the beginning. */
  831. uint64_t prevlen = lpDecodeBacklen(p-1);
  832. if (prevlen + encodedBacklen != entrylen)
  833. return 0;
  834. *pp = p;
  835. return 1;
  836. #undef OUT_OF_RANGE
  837. }
  838. /* Validate that the entry doesn't reach outside the listpack allocation. */
  839. static inline void lpAssertValidEntry(unsigned char* lp, size_t lpbytes, unsigned char *p) {
  840. assert(lpValidateNext(lp, &p, lpbytes));
  841. }
  842. /* Validate the integrity of the data structure.
  843. * when `deep` is 0, only the integrity of the header is validated.
  844. * when `deep` is 1, we scan all the entries one by one. */
  845. int lpValidateIntegrity(unsigned char *lp, size_t size, int deep){
  846. /* Check that we can actually read the header. (and EOF) */
  847. if (size < LP_HDR_SIZE + 1)
  848. return 0;
  849. /* Check that the encoded size in the header must match the allocated size. */
  850. size_t bytes = lpGetTotalBytes(lp);
  851. if (bytes != size)
  852. return 0;
  853. /* The last byte must be the terminator. */
  854. if (lp[size-1] != LP_EOF)
  855. return 0;
  856. if (!deep)
  857. return 1;
  858. /* Validate the invividual entries. */
  859. uint32_t count = 0;
  860. unsigned char *p = lp + LP_HDR_SIZE;
  861. while(p && p[0] != LP_EOF) {
  862. if (!lpValidateNext(lp, &p, bytes))
  863. return 0;
  864. count++;
  865. }
  866. /* Check that the count in the header is correct */
  867. uint32_t numele = lpGetNumElements(lp);
  868. if (numele != LP_HDR_NUMELE_UNKNOWN && numele != count)
  869. return 0;
  870. return 1;
  871. }

相关技术文章

点击QQ咨询
开通会员
返回顶部
×
微信扫码支付
微信扫码支付
确定支付下载
请使用微信描二维码支付
×

提示信息

×

选择支付方式

  • 微信支付
  • 支付宝付款
确定支付下载