五、Redis源码数据结构之跳表skiplist(5、 Skip list of redis source code data structure)

一、前言:

有序集合Sorted Set:底层数据结构跳表+哈希表

typedef struct zset {
    dict *dict; 哈希表  --哈希表高效支持单点查询
    zskiplist *zsl; 跳表 --跳表高效支持范围查询
} zset;

源码文件:t_zset.c-各种操作实现   sercver.h-相关的结构定义

可阅读上一文章讲述了哈希表的数据结构,了解哈希表的相关内容

四、Redis源码数据结构之哈希表Hash – chch213 – 博客园 (cnblogs.com)

接下来我们重点了解下跳表数据结构。

二、跳表分析

源码跳表定义:

/* ZSETs use a specialized version of Skiplists */
typedef struct zskiplistNode {
    sds ele; // 元素
    double score; // 权重
    struct zskiplistNode *backward; // 后向指针
    struct zskiplistLevel {
        struct zskiplistNode *forward; // 前向指针
        unsigned long span; // 跨度
    } level[]; // 节点的level数组
} zskiplistNode;

typedef struct zskiplist {
    struct zskiplistNode *header, *tail; // 头节点 尾节点
    unsigned long length; // 跳表长度
    int level; // 跳表最大层数
} zskiplist;  在理解上面定义的时候我一般就把当成一个父子节点树信息来记忆理解:父节点(父元素 父元素级别 祖父节点指向 子节点信息。。-多个子节点

跳表的查询和层级数重点源码分析:

double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
    zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx;
    ....
    x = zsl->header; // 获取跳表的头
    for (i = zsl->level-1; i >= 0; i--) { // 从最大层数开始逐一遍历
        while (x->level[i].forward &&
            x->level[i].forward->ele != oldele && /* make sure not to access the
                                                     ->obj pointer if it matches
                                                     oldele */
            (x->level[i].forward->score < score ||
                (x->level[i].forward->score == score &&
                sdscmp(x->level[i].forward->ele,ele) < 0)))
            x = x->level[i].forward;
        update[i] = x;
    }
    .......
}
ZSKIPLIST_MAXLEVEL 最大层数64 ZSKIPLIST_P 随机数的值0.25
int zslRandomLevel(void) {
    int level = 1;
    while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
        level += 1;
    return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}

创建一个zset源码分析:

void zsetConvert(robj *zobj, int encoding) {
    zset *zs;
    zskiplistNode *node, *next;
    sds ele;
    double score;
    .....

        zs = zmalloc(sizeof(*zs));
        zs->dict = dictCreate(&zsetDictType,NULL); 创建哈希表
        zs->zsl = zslCreate(); 创建跳表    ......
int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) {
    /* Turn options into simple to check vars. */
    .....
    double curscore;

    /* NaN as input is an error regardless of all the other parameters. */
    ...../* Update the sorted set according to its encoding. */ 采用ziplist编码方式
    if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
    .....
                zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
    .....

    /* Note that the above block handling ziplist would have either returned or
     * converted the key to skiplist. */
    if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {  // 采用skiplist编码方式
        zset *zs = zobj->ptr;
        zskiplistNode *znode;
        dictEntry *de;

        de = dictFind(zs->dict,ele); // 哈希表函数判断元素是否存在
        if (de != NULL) { // 能查到元素
            /* NX? Return, same element already exists. */
            if (nx) {
                *flags |= ZADD_NOP;
                return 1;
            }
            curscore = *(double*)dictGetVal(de); // 获取权重信息

            /* Prepare the score for the increment if needed. */
            if (incr) { // 更新元素权重信息 
                score += curscore;
                if (isnan(score)) {
                    *flags |= ZADD_NAN;
                    return 0;
                }
                if (newscore) *newscore = score;
            }

            /* Remove and re-insert when score changes. */
            if (score != curscore) { // 权重信息发生变化 
                znode = zslUpdateScore(zs->zsl,curscore,ele,score); // 更新跳表信息
                /* Note that we did not removed the original element from
                 * the hash table representing the sorted set, so we just
                 * update the score. */
                dictGetVal(de) = &znode->score; /* Update score ptr. */ 哈希表权重信息更新
                *flags |= ZADD_UPDATED;
            }
            return 1;
        } else if (!xx) {       // 不存在元素 
            ele = sdsdup(ele);
            znode = zslInsert(zs->zsl,score,ele); // 跳表元素插入函数
            serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK); // 哈希表元素插入函数
            *flags |= ZADD_ADDED;
            if (newscore) *newscore = score;
            return 1;
        } else {
            *flags |= ZADD_NOP;
            return 1;
        }
    } else {
        serverPanic("Unknown sorted set encoding");
    }
    return 0; /* Never reached. */
}
/*-----------------------------------------------------------------------------
 * Sorted set commands
 *----------------------------------------------------------------------------*/

/* This generic command implements both ZADD and ZINCRBY. */ 这个通用命令同时实现了ZADD和ZINCRBY
void zaddGenericCommand(client *c, int flags) {
    static char *nanerr = "resulting score is not a number (NaN)";
    robj *key = c->argv[1];
    robj *zobj;
    sds ele;
    double score = 0, *scores = NULL;
    int j, elements;
    int scoreidx = 0;
    /* The following vars are used in order to track what the command actually
     * did during the execution, to reply to the client and to trigger the
     * notification of keyspace change. */
    int added = 0;      /* Number of new elements added. */
    int updated = 0;    /* Number of elements with updated score. */
    int processed = 0;  /* Number of elements processed, may remain zero with
                           options like XX. */

    /* Parse options. At the end 'scoreidx' is set to the argument position
     * of the score of the first score-element pair. */
    scoreidx = 2;
    while(scoreidx < c->argc) {
        char *opt = c->argv[scoreidx]->ptr;
        if (!strcasecmp(opt,"nx")) flags |= ZADD_NX;
        else if (!strcasecmp(opt,"xx")) flags |= ZADD_XX;
        else if (!strcasecmp(opt,"ch")) flags |= ZADD_CH;
        else if (!strcasecmp(opt,"incr")) flags |= ZADD_INCR;
        else break;
        scoreidx++;
    }

    /* Turn options into simple to check vars. */
    int incr = (flags & ZADD_INCR) != 0;
    int nx = (flags & ZADD_NX) != 0;
    int xx = (flags & ZADD_XX) != 0;
    int ch = (flags & ZADD_CH) != 0;

    /* After the options, we expect to have an even number of args, since
     * we expect any number of score-element pairs. */
    elements = c->argc-scoreidx;
    if (elements % 2 || !elements) {
        addReply(c,shared.syntaxerr);
        return;
    }
    elements /= 2; /* Now this holds the number of score-element pairs. */

    /* Check for incompatible options. */
    if (nx && xx) {
        addReplyError(c,
            "XX and NX options at the same time are not compatible");
        return;
    }

    if (incr && elements > 1) {
        addReplyError(c,
            "INCR option supports a single increment-element pair");
        return;
    }

    /* Start parsing all the scores, we need to emit any syntax error
     * before executing additions to the sorted set, as the command should
     * either execute fully or nothing at all. */
    scores = zmalloc(sizeof(double)*elements);
    for (j = 0; j < elements; j++) {
        if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
            != C_OK) goto cleanup;
    }

    /* Lookup the key and create the sorted set if does not exist. */
    zobj = lookupKeyWrite(c->db,key);
    if (zobj == NULL) {
        if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
        if (server.zset_max_ziplist_entries == 0 ||  // 最大值128
            server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr)) // 最大值64
        {
            zobj = createZsetObject(); // 数据较大时采用哈希表和跳表存储,降低查询的时间复杂度
        } else {
            zobj = createZsetZiplistObject(); // 数据较小时采用ziplist,节省内存
        }
        dbAdd(c->db,key,zobj);
    } else {
        if (zobj->type != OBJ_ZSET) {
            addReply(c,shared.wrongtypeerr);
            goto cleanup;
        }
    }

    for (j = 0; j < elements; j++) {
        double newscore;
        score = scores[j];
        int retflags = flags;

        ele = c->argv[scoreidx+1+j*2]->ptr;
        int retval = zsetAdd(zobj, score, ele, &retflags, &newscore);
        if (retval == 0) {
            addReplyError(c,nanerr);
            goto cleanup;
        }
        if (retflags & ZADD_ADDED) added++;
        if (retflags & ZADD_UPDATED) updated++;
        if (!(retflags & ZADD_NOP)) processed++;
        score = newscore;
    }
    server.dirty += (added+updated);
.....
}

个人只从源码分析下跳表的数据结构使用情况,比较核心细节可能没有完全看透彻,有兴趣的可再加深理解下,谢谢

————————

1、 Preface:

Ordered set sorted set: underlying data structure jump table + hash table

typedef struct zset {
    dict *dict; 哈希表  --哈希表高效支持单点查询
    zskiplist *zsl; 跳表 --跳表高效支持范围查询
} zset;

Source file: t_ zset. C – various operations to achieve sercver H-related structure definition

You can read the previous article about the data structure of hash table and understand the relevant contents of hash table

4、 Hash table of redis source code data structure hash – chch213 – blog Garden (cnblogs. Com)

Next, let’s focus on the data structure of the next hop table.

2、 Jump table analysis

Source code jump table definition:

/* ZSETs use a specialized version of Skiplists */
typedef struct zskiplistNode {
    sds ele; // 元素
    double score; // 权重
    struct zskiplistNode *backward; // 后向指针
    struct zskiplistLevel {
        struct zskiplistNode *forward; // 前向指针
        unsigned long span; // 跨度
    } level[]; // 节点的level数组
} zskiplistNode;

typedef struct zskiplist {
    struct zskiplistNode *header, *tail; // 头节点 尾节点
    unsigned long length; // 跳表长度
    int level; // 跳表最大层数
} zskiplist;  在理解上面定义的时候我一般就把当成一个父子节点树信息来记忆理解:父节点(父元素 父元素级别 祖父节点指向 子节点信息。。-多个子节点

Key source code analysis of skip table query and layer series:

double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
    zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx;
    ....
    x = zsl->header; // 获取跳表的头
    for (i = zsl->level-1; i >= 0; i--) { // 从最大层数开始逐一遍历
        while (x->level[i].forward &&
            x->level[i].forward->ele != oldele && /* make sure not to access the
                                                     ->obj pointer if it matches
                                                     oldele */
            (x->level[i].forward->score < score ||
                (x->level[i].forward->score == score &&
                sdscmp(x->level[i].forward->ele,ele) < 0)))
            x = x->level[i].forward;
        update[i] = x;
    }
    .......
}
ZSKIPLIST_MAXLEVEL 最大层数64 ZSKIPLIST_P 随机数的值0.25
int zslRandomLevel(void) {
    int level = 1;
    while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
        level += 1;
    return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}

Create a Zset source code analysis:

void zsetConvert(robj *zobj, int encoding) {
    zset *zs;
    zskiplistNode *node, *next;
    sds ele;
    double score;
    .....

        zs = zmalloc(sizeof(*zs));
        zs->dict = dictCreate(&zsetDictType,NULL); 创建哈希表
        zs->zsl = zslCreate(); 创建跳表    ......
int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) {
    /* Turn options into simple to check vars. */
    .....
    double curscore;

    /* NaN as input is an error regardless of all the other parameters. */
    ...../* Update the sorted set according to its encoding. */ 采用ziplist编码方式
    if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
    .....
                zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
    .....

    /* Note that the above block handling ziplist would have either returned or
     * converted the key to skiplist. */
    if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {  // 采用skiplist编码方式
        zset *zs = zobj->ptr;
        zskiplistNode *znode;
        dictEntry *de;

        de = dictFind(zs->dict,ele); // 哈希表函数判断元素是否存在
        if (de != NULL) { // 能查到元素
            /* NX? Return, same element already exists. */
            if (nx) {
                *flags |= ZADD_NOP;
                return 1;
            }
            curscore = *(double*)dictGetVal(de); // 获取权重信息

            /* Prepare the score for the increment if needed. */
            if (incr) { // 更新元素权重信息 
                score += curscore;
                if (isnan(score)) {
                    *flags |= ZADD_NAN;
                    return 0;
                }
                if (newscore) *newscore = score;
            }

            /* Remove and re-insert when score changes. */
            if (score != curscore) { // 权重信息发生变化 
                znode = zslUpdateScore(zs->zsl,curscore,ele,score); // 更新跳表信息
                /* Note that we did not removed the original element from
                 * the hash table representing the sorted set, so we just
                 * update the score. */
                dictGetVal(de) = &znode->score; /* Update score ptr. */ 哈希表权重信息更新
                *flags |= ZADD_UPDATED;
            }
            return 1;
        } else if (!xx) {       // 不存在元素 
            ele = sdsdup(ele);
            znode = zslInsert(zs->zsl,score,ele); // 跳表元素插入函数
            serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK); // 哈希表元素插入函数
            *flags |= ZADD_ADDED;
            if (newscore) *newscore = score;
            return 1;
        } else {
            *flags |= ZADD_NOP;
            return 1;
        }
    } else {
        serverPanic("Unknown sorted set encoding");
    }
    return 0; /* Never reached. */
}
/*-----------------------------------------------------------------------------
 * Sorted set commands
 *----------------------------------------------------------------------------*/

/* This generic command implements both ZADD and ZINCRBY. */ 这个通用命令同时实现了ZADD和ZINCRBY
void zaddGenericCommand(client *c, int flags) {
    static char *nanerr = "resulting score is not a number (NaN)";
    robj *key = c->argv[1];
    robj *zobj;
    sds ele;
    double score = 0, *scores = NULL;
    int j, elements;
    int scoreidx = 0;
    /* The following vars are used in order to track what the command actually
     * did during the execution, to reply to the client and to trigger the
     * notification of keyspace change. */
    int added = 0;      /* Number of new elements added. */
    int updated = 0;    /* Number of elements with updated score. */
    int processed = 0;  /* Number of elements processed, may remain zero with
                           options like XX. */

    /* Parse options. At the end 'scoreidx' is set to the argument position
     * of the score of the first score-element pair. */
    scoreidx = 2;
    while(scoreidx < c->argc) {
        char *opt = c->argv[scoreidx]->ptr;
        if (!strcasecmp(opt,"nx")) flags |= ZADD_NX;
        else if (!strcasecmp(opt,"xx")) flags |= ZADD_XX;
        else if (!strcasecmp(opt,"ch")) flags |= ZADD_CH;
        else if (!strcasecmp(opt,"incr")) flags |= ZADD_INCR;
        else break;
        scoreidx++;
    }

    /* Turn options into simple to check vars. */
    int incr = (flags & ZADD_INCR) != 0;
    int nx = (flags & ZADD_NX) != 0;
    int xx = (flags & ZADD_XX) != 0;
    int ch = (flags & ZADD_CH) != 0;

    /* After the options, we expect to have an even number of args, since
     * we expect any number of score-element pairs. */
    elements = c->argc-scoreidx;
    if (elements % 2 || !elements) {
        addReply(c,shared.syntaxerr);
        return;
    }
    elements /= 2; /* Now this holds the number of score-element pairs. */

    /* Check for incompatible options. */
    if (nx && xx) {
        addReplyError(c,
            "XX and NX options at the same time are not compatible");
        return;
    }

    if (incr && elements > 1) {
        addReplyError(c,
            "INCR option supports a single increment-element pair");
        return;
    }

    /* Start parsing all the scores, we need to emit any syntax error
     * before executing additions to the sorted set, as the command should
     * either execute fully or nothing at all. */
    scores = zmalloc(sizeof(double)*elements);
    for (j = 0; j < elements; j++) {
        if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
            != C_OK) goto cleanup;
    }

    /* Lookup the key and create the sorted set if does not exist. */
    zobj = lookupKeyWrite(c->db,key);
    if (zobj == NULL) {
        if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
        if (server.zset_max_ziplist_entries == 0 ||  // 最大值128
            server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr)) // 最大值64
        {
            zobj = createZsetObject(); // 数据较大时采用哈希表和跳表存储,降低查询的时间复杂度
        } else {
            zobj = createZsetZiplistObject(); // 数据较小时采用ziplist,节省内存
        }
        dbAdd(c->db,key,zobj);
    } else {
        if (zobj->type != OBJ_ZSET) {
            addReply(c,shared.wrongtypeerr);
            goto cleanup;
        }
    }

    for (j = 0; j < elements; j++) {
        double newscore;
        score = scores[j];
        int retflags = flags;

        ele = c->argv[scoreidx+1+j*2]->ptr;
        int retval = zsetAdd(zobj, score, ele, &retflags, &newscore);
        if (retval == 0) {
            addReplyError(c,nanerr);
            goto cleanup;
        }
        if (retflags & ZADD_ADDED) added++;
        if (retflags & ZADD_UPDATED) updated++;
        if (!(retflags & ZADD_NOP)) processed++;
        score = newscore;
    }
    server.dirty += (added+updated);
.....
}

I only analyze the use of the data structure of the jump table from the source code. I may not have seen the core details thoroughly. Those who are interested can further understand it. Thank you