2011-09-28

C语言多线程编程与并发控制最佳实践

摘要

随着多核处理器的普及，多线程编程已成为C语言开发中不可或缺的核心技能。正确的并发控制不仅能充分利用硬件资源，还能显著提升程序性能。本文系统介绍C语言多线程编程的高级技术，包括POSIX线程API、同步原语、无锁数据结构、线程池设计、性能优化策略以及常见的并发陷阱和解决方案。通过深入的理论分析和实用的代码示例，为开发者提供构建高性能、线程安全应用程序的完整指南。

1. 多线程编程基础

1.1 POSIX线程API概述

#include <pthread.h>

// 线程创建与管理
int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
                   void *(*start_routine)(void*), void *arg);
int pthread_join(pthread_t thread, void **retval);
int pthread_detach(pthread_t thread);
void pthread_exit(void *retval);

// 线程属性设置
int pthread_attr_init(pthread_attr_t *attr);
int pthread_attr_destroy(pthread_attr_t *attr);
int pthread_attr_setdetachstate(pthread_attr_t *attr, int detachstate);

1.2 线程安全基础概念

竞态条件：多个线程同时访问共享资源导致不可预测结果
数据竞争：至少一个线程写入共享变量且没有正确同步
原子性：操作要么完全执行，要么完全不执行

2. 同步原语深入分析

2.1 互斥锁高级用法

#include <pthread.h>

// 递归互斥锁
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;

// 自旋锁（适用于短临界区）
pthread_spinlock_t spinlock;

typedef struct ThreadSafeCounter {
    pthread_mutex_t mutex;
    int count;
} ThreadSafeCounter;

void counter_increment(ThreadSafeCounter* counter) {
    pthread_mutex_lock(&counter->mutex);
    counter->count++;
    pthread_mutex_lock(&counter->mutex); // 递归锁定
    counter->count++; // 仍然可以执行
    pthread_mutex_unlock(&counter->mutex);
    pthread_mutex_unlock(&counter->mutex);
}

// 条件变量高级模式
typedef struct BoundedBuffer {
    pthread_mutex_t mutex;
    pthread_cond_t not_empty;
    pthread_cond_t not_full;
    int* buffer;
    int capacity;
    int count;
    int front;
    int rear;
} BoundedBuffer;

void buffer_put(BoundedBuffer* buf, int value) {
    pthread_mutex_lock(&buf->mutex);
    
    while (buf->count == buf->capacity) {
        pthread_cond_wait(&buf->not_full, &buf->mutex);
    }
    
    buf->buffer[buf->rear] = value;
    buf->rear = (buf->rear + 1) % buf->capacity;
    buf->count++;
    
    pthread_cond_signal(&buf->not_empty);
    pthread_mutex_unlock(&buf->mutex);
}

2.2 读写锁优化策略

#include <pthread.h>

typedef struct ReadWriteLock {
    pthread_rwlock_t rwlock;
    int reader_count;
    int writer_count;
} ReadWriteLock;

// 自定义读写锁实现（支持写者优先）
typedef struct FairRWLock {
    pthread_mutex_t mutex;
    pthread_cond_t read_cond;
    pthread_cond_t write_cond;
    int readers;
    int writers;
    int waiting_writers;
} FairRWLock;

void fair_rwlock_rdlock(FairRWLock* lock) {
    pthread_mutex_lock(&lock->mutex);
    
    while (lock->writers > 0 || lock->waiting_writers > 0) {
        pthread_cond_wait(&lock->read_cond, &lock->mutex);
    }
    
    lock->readers++;
    pthread_mutex_unlock(&lock->mutex);
}

void fair_rwlock_wrlock(FairRWLock* lock) {
    pthread_mutex_lock(&lock->mutex);
    lock->waiting_writers++;
    
    while (lock->readers > 0 || lock->writers > 0) {
        pthread_cond_wait(&lock->write_cond, &lock->mutex);
    }
    
    lock->waiting_writers--;
    lock->writers++;
    pthread_mutex_unlock(&lock->mutex);
}

3. 无锁编程技术

3.1 原子操作与内存顺序

#include <stdatomic.h>
#include <stdint.h>

// 原子类型和操作
atomic_int atomic_counter = ATOMIC_VAR_INIT(0);

void increment_atomic() {
    atomic_fetch_add_explicit(&atomic_counter, 1, memory_order_relaxed);
}

// 无锁栈实现
typedef struct LockFreeNode {
    void* data;
    struct LockFreeNode* next;
} LockFreeNode;

typedef struct LockFreeStack {
    LockFreeNode* head;
} LockFreeStack;

void lock_free_push(LockFreeStack* stack, void* data) {
    LockFreeNode* new_node = malloc(sizeof(LockFreeNode));
    new_node->data = data;
    
    LockFreeNode* old_head;
    do {
        old_head = atomic_load(&stack->head);
        new_node->next = old_head;
    } while (!atomic_compare_exchange_weak(&stack->head, &old_head, new_node));
}

void* lock_free_pop(LockFreeStack* stack) {
    LockFreeNode* old_head;
    LockFreeNode* new_head;
    
    do {
        old_head = atomic_load(&stack->head);
        if (old_head == NULL) return NULL;
        new_head = old_head->next;
    } while (!atomic_compare_exchange_weak(&stack->head, &old_head, new_head));
    
    void* data = old_head->data;
    free(old_head);
    return data;
}

3.2 无锁队列实现

#include <stdatomic.h>

typedef struct LockFreeQueue {
    _Atomic(struct Node*) head;
    _Atomic(struct Node*) tail;
} LockFreeQueue;

typedef struct Node {
    void* data;
    _Atomic(struct Node*) next;
} Node;

void lock_free_enqueue(LockFreeQueue* queue, void* data) {
    Node* new_node = malloc(sizeof(Node));
    new_node->data = data;
    atomic_store(&new_node->next, NULL);
    
    Node* old_tail;
    Node* old_next;
    
    while (1) {
        old_tail = atomic_load(&queue->tail);
        old_next = atomic_load(&old_tail->next);
        
        if (old_tail == atomic_load(&queue->tail)) {
            if (old_next == NULL) {
                if (atomic_compare_exchange_weak(&old_tail->next, &old_next, new_node)) {
                    break;
                }
            } else {
                atomic_compare_exchange_weak(&queue->tail, &old_tail, old_next);
            }
        }
    }
    
    atomic_compare_exchange_weak(&queue->tail, &old_tail, new_node);
}

4. 线程池设计与实现

4.1 高性能线程池

#include <pthread.h>
#include <semaphore.h>

typedef struct ThreadPool {
    pthread_t* threads;
    int thread_count;
    
    // 任务队列
    struct Task** task_queue;
    int queue_size;
    int queue_capacity;
    int queue_front;
    int queue_rear;
    
    pthread_mutex_t queue_mutex;
    pthread_cond_t queue_not_empty;
    pthread_cond_t queue_not_full;
    
    sem_t available_workers;
    int shutdown;
} ThreadPool;

typedef struct Task {
    void (*function)(void*);
    void* argument;
} Task;

void* worker_thread(void* arg) {
    ThreadPool* pool = (ThreadPool*)arg;
    
    while (1) {
        pthread_mutex_lock(&pool->queue_mutex);
        
        while (pool->queue_size == 0 && !pool->shutdown) {
            pthread_cond_wait(&pool->queue_not_empty, &pool->queue_mutex);
        }
        
        if (pool->shutdown && pool->queue_size == 0) {
            pthread_mutex_unlock(&pool->queue_mutex);
            pthread_exit(NULL);
        }
        
        Task* task = pool->task_queue[pool->queue_front];
        pool->queue_front = (pool->queue_front + 1) % pool->queue_capacity;
        pool->queue_size--;
        
        pthread_cond_signal(&pool->queue_not_full);
        pthread_mutex_unlock(&pool->queue_mutex);
        
        // 执行任务
        sem_wait(&pool->available_workers);
        task->function(task->argument);
        sem_post(&pool->available_workers);
        
        free(task);
    }
    
    return NULL;
}

4.2 工作窃取线程池

typedef struct WorkStealingQueue {
    Task** tasks;
    int capacity;
    _Atomic int top;
    _Atomic int bottom;
} WorkStealingQueue;

Task* work_stealing_pop(WorkStealingQueue* queue) {
    int bottom = atomic_load(&queue->bottom) - 1;
    atomic_store(&queue->bottom, bottom);
    
    int top = atomic_load(&queue->top);
    if (top <= bottom) {
        Task* task = queue->tasks[bottom % queue->capacity];
        if (top == bottom) {
            if (atomic_compare_exchange_weak(&queue->top, &top, top + 1)) {
                atomic_store(&queue->bottom, bottom + 1);
            } else {
                task = NULL;
            }
        }
        return task;
    } else {
        atomic_store(&queue->bottom, bottom + 1);
        return NULL;
    }
}

5. 并发性能优化

5.1 避免虚假共享

#include <stdalign.h>

// 缓存行对齐的数据结构
typedef struct alignas(64) CacheAlignedCounter {
    _Atomic long value;
    char padding[64 - sizeof(_Atomic long)];
} CacheAlignedCounter;

// 线程本地存储优化
__thread int thread_local_counter = 0;

// 分组统计减少竞争
typedef struct ShardedCounter {
    CacheAlignedCounter* counters;
    int num_shards;
} ShardedCounter;

void sharded_increment(ShardedCounter* counter, int thread_id) {
    int shard = thread_id % counter->num_shards;
    atomic_fetch_add(&counter->counters[shard].value, 1);
}

long sharded_get_total(ShardedCounter* counter) {
    long total = 0;
    for (int i = 0; i < counter->num_shards; i++) {
        total += atomic_load(&counter->counters[i].value);
    }
    return total;
}

5.2 并发数据结构性能测试

#include <time.h>

void benchmark_concurrent(int num_threads, int operations_per_thread) {
    pthread_t threads[num_threads];
    struct timespec start, end;
    
    clock_gettime(CLOCK_MONOTONIC, &start);
    
    for (int i = 0; i < num_threads; i++) {
        pthread_create(&threads[i], NULL, worker_function, 
                      (void*)(intptr_t)operations_per_thread);
    }
    
    for (int i = 0; i < num_threads; i++) {
        pthread_join(threads[i], NULL);
    }
    
    clock_gettime(CLOCK_MONOTONIC, &end);
    
    double elapsed = (end.tv_sec - start.tv_sec) + 
                    (end.tv_nsec - start.tv_nsec) / 1e9;
    
    printf("Throughput: %.2f ops/sec\n", 
           (num_threads * operations_per_thread) / elapsed);
}

6. 调试与故障排除

6.1 死锁检测工具

#include <pthread.h>

// 简单的死锁检测框架
typedef struct LockTracker {
    pthread_mutex_t* mutex;
    const char* file;
    int line;
    struct LockTracker* next;
} LockTracker;

__thread LockTracker* thread_locks = NULL;

void tracked_lock(pthread_mutex_t* mutex, const char* file, int line) {
    // 检查潜在死锁（简化版）
    LockTracker* current = thread_locks;
    while (current) {
        if (current->mutex == mutex) {
            fprintf(stderr, "Potential deadlock at %s:%d\n", file, line);
            break;
        }
        current = current->next;
    }
    
    pthread_mutex_lock(mutex);
    
    LockTracker* new_lock = malloc(sizeof(LockTracker));
    new_lock->mutex = mutex;
    new_lock->file = file;
    new_lock->line = line;
    new_lock->next = thread_locks;
    thread_locks = new_lock;
}

6.2 线程安全分析工具

// 使用ThreadSanitizer等工具
// 编译时添加：-fsanitize=thread

// 自定义竞争检测
#ifdef THREAD_SAFETY_CHECK
#define ACCESS_SHARED(var) \
    do { \
        static __thread int access_count = 0; \
        if (access_count++ > 0) { \
            fprintf(stderr, "Concurrent access detected at %s:%d\n", \
                   __FILE__, __LINE__); \
        } \
        (var); \
        access_count--; \
    } while (0)
#else
#define ACCESS_SHARED(var) (var)
#endif

7. 现代并发编程最佳实践

7.1 C11标准并发支持

#include <threads.h>
#include <stdatomic.h>

// C11标准线程API
int thrd_create(thrd_t *thr, thrd_start_t func, void *arg);
int thrd_join(thrd_t thr, int *res);
void thrd_exit(int res);

// 更现代的同步原语
mtx_t mutex;
cnd_t condition;

7.2 异步编程模式

#include <future.h>

// 基于Promise/Future的模式
typedef struct AsyncResult {
    void* result;
    int completed;
    pthread_mutex_t mutex;
    pthread_cond_t cond;
} AsyncResult;

AsyncResult* async_execute(void* (*function)(void*), void* arg) {
    AsyncResult* result = malloc(sizeof(AsyncResult));
    result->completed = 0;
    pthread_mutex_init(&result->mutex, NULL);
    pthread_cond_init(&result->cond, NULL);
    
    pthread_t thread;
    pthread_create(&thread, NULL, function_wrapper, 
                  (struct AsyncWrapper){function, arg, result});
    pthread_detach(thread);
    
    return result;
}

void* async_get(AsyncResult* result) {
    pthread_mutex_lock(&result->mutex);
    while (!result->completed) {
        pthread_cond_wait(&result->cond, &result->mutex);
    }
    pthread_mutex_unlock(&result->mutex);
    return result->result;
}

8. 结论

多线程编程是C语言开发中的高级技能，需要深入理解同步机制、内存模型和性能特性。关键最佳实践包括：

正确使用同步原语：根据场景选择合适的锁类型
优先考虑无锁编程：在性能关键路径使用原子操作
设计线程安全接口：从开始就考虑并发安全性
性能监控与优化：使用工具分析并优化并发性能
全面的测试覆盖：包括压力测试和竞态条件测试

掌握这些高级并发技术将使您能够构建出高性能、高可靠性的多线程应用程序。

本文代码示例需要根据具体平台和编译器支持进行调整，建议在实际项目中使用成熟的并发库。

编外计划 - 日志

To be or not to be,--that is question.