2010-04-15

C语言高级数据结构设计与算法实现

摘要

数据结构是计算机程序的核心组成部分，合适的数据结构选择直接影响程序的性能和可维护性。本文将深入探讨C语言中高级数据结构的设计与实现，包括红黑树、跳表、布隆过滤器、LRU缓存等复杂结构，以及相关的插入、删除、查找算法的优化技巧。通过详细的代码实现和性能分析，展示如何在实际项目中选择和使用合适的数据结构。

1. 引言

1.1 数据结构选择原则

在C语言编程中，数据结构的选择需要考虑以下因素：

时间复杂度：查找O(1) vs O(log n) vs O(n)，插入删除的效率
空间复杂度：内存占用、缓存友好性、内存碎片
应用场景：只读查询、频繁更新、范围查询、并发访问

1.2 通用接口设计

// 通用比较函数和访问函数
typedef int (*compare_func_t)(const void *a, const void *b);
typedef void (*visit_func_t)(void *data);
typedef unsigned long (*hash_func_t)(const void *key);

// 通用节点结构
typedef struct node {
    void *key;
    void *value;
    struct node *next;
} node_t;

2. 红黑树实现

2.1 红黑树基础结构

typedef enum { RB_RED = 0, RB_BLACK = 1 } rb_color_t;

typedef struct rb_node {
    void *key;
    void *value;
    rb_color_t color;
    struct rb_node *left, *right, *parent;
} rb_node_t;

typedef struct rb_tree {
    rb_node_t *root;
    rb_node_t *nil;     // 哨兵节点
    compare_func_t compare;
    size_t size;
} rb_tree_t;

rb_tree_t* rb_tree_create(compare_func_t compare) {
    rb_tree_t *tree = calloc(1, sizeof(rb_tree_t));
    if (!tree) return NULL;
    
    tree->nil = calloc(1, sizeof(rb_node_t));
    if (!tree->nil) {
        free(tree);
        return NULL;
    }
    
    tree->nil->color = RB_BLACK;
    tree->nil->left = tree->nil->right = tree->nil->parent = tree->nil;
    tree->root = tree->nil;
    tree->compare = compare;
    
    return tree;
}

2.2 旋转和插入操作

// 右旋转
static void rb_right_rotate(rb_tree_t *tree, rb_node_t *y) {
    rb_node_t *x = y->left;
    y->left = x->right;
    
    if (x->right != tree->nil) {
        x->right->parent = y;
    }
    
    x->parent = y->parent;
    if (y->parent == tree->nil) {
        tree->root = x;
    } else if (y == y->parent->right) {
        y->parent->right = x;
    } else {
        y->parent->left = x;
    }
    
    x->right = y;
    y->parent = x;
}

// 左旋转
static void rb_left_rotate(rb_tree_t *tree, rb_node_t *x) {
    rb_node_t *y = x->right;
    x->right = y->left;
    
    if (y->left != tree->nil) {
        y->left->parent = x;
    }
    
    y->parent = x->parent;
    if (x->parent == tree->nil) {
        tree->root = y;
    } else if (x == x->parent->left) {
        x->parent->left = y;
    } else {
        x->parent->right = y;
    }
    
    y->left = x;
    x->parent = y;
}

// 插入修复
static void rb_insert_fixup(rb_tree_t *tree, rb_node_t *z) {
    while (z->parent->color == RB_RED) {
        if (z->parent == z->parent->parent->left) {
            rb_node_t *y = z->parent->parent->right;
            
            if (y->color == RB_RED) {
                // 情况1：叔叔是红色
                z->parent->color = RB_BLACK;
                y->color = RB_BLACK;
                z->parent->parent->color = RB_RED;
                z = z->parent->parent;
            } else {
                if (z == z->parent->right) {
                    // 情况2：叔叔是黑色，z是右孩子
                    z = z->parent;
                    rb_left_rotate(tree, z);
                }
                // 情况3：叔叔是黑色，z是左孩子
                z->parent->color = RB_BLACK;
                z->parent->parent->color = RB_RED;
                rb_right_rotate(tree, z->parent->parent);
            }
        } else {
            // 对称情况处理
            // ... 省略对称代码
        }
    }
    tree->root->color = RB_BLACK;
}

// 插入节点
int rb_tree_insert(rb_tree_t *tree, void *key, void *value) {
    if (!tree || !key) return -1;
    
    rb_node_t *new_node = calloc(1, sizeof(rb_node_t));
    if (!new_node) return -1;
    
    new_node->key = key;
    new_node->value = value;
    new_node->color = RB_RED;
    new_node->left = new_node->right = new_node->parent = tree->nil;
    
    rb_node_t *y = tree->nil;
    rb_node_t *x = tree->root;
    
    // 找到插入位置
    while (x != tree->nil) {
        y = x;
        int cmp = tree->compare(new_node->key, x->key);
        if (cmp < 0) {
            x = x->left;
        } else if (cmp > 0) {
            x = x->right;
        } else {
            // 键已存在，更新值
            x->value = value;
            free(new_node);
            return 0;
        }
    }
    
    new_node->parent = y;
    if (y == tree->nil) {
        tree->root = new_node;
    } else if (tree->compare(new_node->key, y->key) < 0) {
        y->left = new_node;
    } else {
        y->right = new_node;
    }
    
    tree->size++;
    rb_insert_fixup(tree, new_node);
    
    return 0;
}

3. 跳表实现

3.1 跳表结构

#define SKIPLIST_MAX_LEVEL 16
#define SKIPLIST_P 0.25

typedef struct skiplist_node {
    void *key;
    void *value;
    struct skiplist_node *forward[1];  // 柔性数组
} skiplist_node_t;

typedef struct skiplist {
    skiplist_node_t *header;
    int level;
    size_t size;
    compare_func_t compare;
} skiplist_t;

static int random_level(void) {
    int level = 1;
    while (((double)rand() / RAND_MAX) < SKIPLIST_P && 
           level < SKIPLIST_MAX_LEVEL) {
        level++;
    }
    return level;
}

skiplist_t* skiplist_create(compare_func_t compare) {
    skiplist_t *list = calloc(1, sizeof(skiplist_t));
    if (!list) return NULL;
    
    size_t header_size = sizeof(skiplist_node_t) + 
                        SKIPLIST_MAX_LEVEL * sizeof(skiplist_node_t*);
    list->header = calloc(1, header_size);
    if (!list->header) {
        free(list);
        return NULL;
    }
    
    list->level = 1;
    list->compare = compare;
    return list;
}

3.2 跳表操作

// 插入操作
int skiplist_insert(skiplist_t *list, void *key, void *value) {
    if (!list || !key) return -1;
    
    skiplist_node_t *update[SKIPLIST_MAX_LEVEL];
    skiplist_node_t *current = list->header;
    
    // 从最高层开始搜索
    for (int i = list->level - 1; i >= 0; i--) {
        while (current->forward[i] && 
               list->compare(current->forward[i]->key, key) < 0) {
            current = current->forward[i];
        }
        update[i] = current;
    }
    
    current = current->forward[0];
    
    // 检查是否已存在
    if (current && list->compare(current->key, key) == 0) {
        current->value = value;
        return 0;
    }
    
    // 生成新节点层数
    int new_level = random_level();
    if (new_level > list->level) {
        for (int i = list->level; i < new_level; i++) {
            update[i] = list->header;
        }
        list->level = new_level;
    }
    
// 创建新节点（正确计算柔性数组大小）
size_t node_size = sizeof(skiplist_node_t) + 
                  (new_level - 1) * sizeof(skiplist_node_t*);
skiplist_node_t *new_node = calloc(1, node_size);
if (!new_node) return -1;

new_node->key = key;
new_node->value = value;

// 初始化所有前向指针
for (int i = 0; i < new_level; i++) {
    new_node->forward[i] = NULL;
}
    
    // 更新前向指针
    for (int i = 0; i < new_level; i++) {
        new_node->forward[i] = update[i]->forward[i];
        update[i]->forward[i] = new_node;
    }
    
    list->size++;
    return 0;
}

// 查找操作
void* skiplist_search(skiplist_t *list, void *key) {
    if (!list || !key) return NULL;
    
    skiplist_node_t *current = list->header;
    
    for (int i = list->level - 1; i >= 0; i--) {
        while (current->forward[i] && 
               list->compare(current->forward[i]->key, key) < 0) {
            current = current->forward[i];
        }
    }
    
    current = current->forward[0];
    
    if (current && list->compare(current->key, key) == 0) {
        return current->value;
    }
    
    return NULL;
}

4. 布隆过滤器

4.1 布隆过滤器实现

typedef struct bloom_filter {
    uint8_t *bits;
    size_t size;        // 位数组大小
    size_t hash_count;  // 哈希函数数量
    size_t element_count;
} bloom_filter_t;

bloom_filter_t* bloom_create(size_t expected_elements, double false_positive_rate) {
    bloom_filter_t *bloom = calloc(1, sizeof(bloom_filter_t));
    if (!bloom) return NULL;
    
    // 计算最优参数
    bloom->size = (size_t)(-expected_elements * log(false_positive_rate) / 
                          (log(2) * log(2)));
    bloom->hash_count = (size_t)(bloom->size * log(2) / expected_elements);
    
    size_t byte_size = (bloom->size + 7) / 8;
    bloom->bits = calloc(byte_size, sizeof(uint8_t));
    if (!bloom->bits) {
        free(bloom);
        return NULL;
    }
    
    return bloom;
}

// 简单哈希函数
static uint32_t hash_func(const void *data, size_t len, uint32_t seed) {
    const uint8_t *bytes = (const uint8_t*)data;
    uint32_t hash = seed;
    
    for (size_t i = 0; i < len; i++) {
        hash = hash * 31 + bytes[i];
    }
    
    return hash;
}

void bloom_add(bloom_filter_t *bloom, const void *data, size_t len) {
    if (!bloom || !data) return;
    
    for (size_t i = 0; i < bloom->hash_count; i++) {
        uint32_t hash = hash_func(data, len, i);
        size_t index = hash % bloom->size;
        
        size_t byte_index = index / 8;
        size_t bit_index = index % 8;
        bloom->bits[byte_index] |= (1 << bit_index);
    }
    
    bloom->element_count++;
}

int bloom_test(bloom_filter_t *bloom, const void *data, size_t len) {
    if (!bloom || !data) return 0;
    
    for (size_t i = 0; i < bloom->hash_count; i++) {
        uint32_t hash = hash_func(data, len, i);
        size_t index = hash % bloom->size;
        
        size_t byte_index = index / 8;
        size_t bit_index = index % 8;
        
        if (!(bloom->bits[byte_index] & (1 << bit_index))) {
            return 0;  // 确定不存在
        }
    }
    
    return 1;  // 可能存在
}

5. LRU缓存实现

5.1 基于哈希表和双向链表的LRU

typedef struct lru_node {
    void *key;
    void *value;
    struct lru_node *prev;
    struct lru_node *next;
} lru_node_t;

typedef struct lru_cache {
    lru_node_t *head;       // 虚拟头节点
    lru_node_t *tail;       // 虚拟尾节点
    node_t **hash_table;    // 哈希表
    size_t capacity;
    size_t size;
    size_t hash_size;
    hash_func_t hash;
    compare_func_t compare;
} lru_cache_t;

lru_cache_t* lru_create(size_t capacity, hash_func_t hash, compare_func_t compare) {
    lru_cache_t *cache = calloc(1, sizeof(lru_cache_t));
    if (!cache) return NULL;
    
    cache->capacity = capacity;
    cache->hash_size = capacity * 2;  // 减少哈希冲突
    cache->hash = hash;
    cache->compare = compare;
    
    // 创建虚拟头尾节点
    cache->head = calloc(1, sizeof(lru_node_t));
    cache->tail = calloc(1, sizeof(lru_node_t));
    if (!cache->head || !cache->tail) {
        lru_destroy(cache);
        return NULL;
    }
    
    cache->head->next = cache->tail;
    cache->tail->prev = cache->head;
    
    // 创建哈希表
    cache->hash_table = calloc(cache->hash_size, sizeof(node_t*));
    if (!cache->hash_table) {
        lru_destroy(cache);
        return NULL;
    }
    
    return cache;
}

// 移动节点到头部
static void move_to_head(lru_cache_t *cache, lru_node_t *node) {
    // 从当前位置移除
    node->prev->next = node->next;
    node->next->prev = node->prev;
    
    // 插入到头部
    node->next = cache->head->next;
    node->prev = cache->head;
    cache->head->next->prev = node;
    cache->head->next = node;
}

// 移除尾部节点
static lru_node_t* remove_tail(lru_cache_t *cache) {
    lru_node_t *last = cache->tail->prev;
    last->prev->next = cache->tail;
    cache->tail->prev = last->prev;
    return last;
}

// 获取值
void* lru_get(lru_cache_t *cache, void *key) {
    if (!cache || !key) return NULL;
    
    // 在哈希表中查找
    size_t hash_index = cache->hash(key) % cache->hash_size;
    node_t *hash_node = cache->hash_table[hash_index];
    
    while (hash_node) {
        if (cache->compare(hash_node->key, key) == 0) {
            lru_node_t *lru_node = (lru_node_t*)hash_node->value;
            move_to_head(cache, lru_node);
            return lru_node->value;
        }
        hash_node = hash_node->next;
    }
    
    return NULL;  // 未找到
}

// 设置值
int lru_put(lru_cache_t *cache, void *key, void *value) {
    if (!cache || !key) return -1;
    
    // 检查是否已存在
    void *existing = lru_get(cache, key);
    if (existing) {
        // 更新现有值
        size_t hash_index = cache->hash(key) % cache->hash_size;
        node_t *hash_node = cache->hash_table[hash_index];
        
        while (hash_node) {
            if (cache->compare(hash_node->key, key) == 0) {
                lru_node_t *lru_node = (lru_node_t*)hash_node->value;
                lru_node->value = value;
                return 0;
            }
            hash_node = hash_node->next;
        }
    }
    
    // 创建新节点
    lru_node_t *new_lru_node = calloc(1, sizeof(lru_node_t));
    if (!new_lru_node) return -1;
    
    new_lru_node->key = key;
    new_lru_node->value = value;
    
    // 添加到哈希表
    size_t hash_index = cache->hash(key) % cache->hash_size;
    node_t *new_hash_node = calloc(1, sizeof(node_t));
    if (!new_hash_node) {
        free(new_lru_node);
        return -1;
    }
    
    new_hash_node->key = key;
    new_hash_node->value = new_lru_node;
    new_hash_node->next = cache->hash_table[hash_index];
    cache->hash_table[hash_index] = new_hash_node;
    
    // 添加到链表头部
    new_lru_node->next = cache->head->next;
    new_lru_node->prev = cache->head;
    cache->head->next->prev = new_lru_node;
    cache->head->next = new_lru_node;
    
    cache->size++;
    
    // 检查容量限制
    if (cache->size > cache->capacity) {
        lru_node_t *tail_node = remove_tail(cache);
        // 从哈希表中移除
        // ... 移除逻辑
        free(tail_node);
        cache->size--;
    }
    
    return 0;
}

6. 性能分析与应用选择

6.1 数据结构性能对比

数据结构	查找	插入	删除	空间复杂度	特点
红黑树	O(log n)	O(log n)	O(log n)	O(n)	严格平衡，有序
跳表	O(log n)	O(log n)	O(log n)	O(n)	概率平衡，并发友好
布隆过滤器	O(k)	O(k)	不支持	O(m)	快速判断存在性
LRU缓存	O(1)	O(1)	O(1)	O(n)	缓存淘汰策略

6.2 应用场景选择

// 根据需求选择数据结构
typedef struct data_structure_selector {
    int need_ordering;       // 是否需要有序
    int frequent_range_query; // 是否频繁范围查询
    int memory_constrained;   // 是否内存受限
    int concurrent_access;    // 是否并发访问
    int approximate_ok;       // 是否允许近似结果
} ds_selector_t;

const char* recommend_data_structure(ds_selector_t *req) {
    if (req->approximate_ok && req->memory_constrained) {
        return "Bloom Filter";
    }
    
    if (req->need_ordering && req->frequent_range_query) {
        return "B+ Tree";
    }
    
    if (req->concurrent_access) {
        return "Skip List";
    }
    
    if (req->need_ordering) {
        return "Red-Black Tree";
    }
    
    return "Hash Table";
}

7. 总结

本文介绍了C语言中几种重要的高级数据结构：

红黑树：自平衡二叉搜索树，适用于需要有序访问的场景
跳表：概率性平衡结构，并发友好，实现简单
布隆过滤器：空间效率极高的概率性数据结构
LRU缓存：基于哈希表和双向链表的缓存淘汰策略

选择合适的数据结构需要综合考虑时间复杂度、空间复杂度、并发性能和实现复杂度等因素。

编外计划 - 日志

To be or not to be,--that is question.