Nginx负载均衡配置:高可用Web服务架构实战

引言

Nginx作为高性能的Web服务器和反向代理服务器,其负载均衡功能是构建高可用、高并发Web服务架构的核心组件。本文将深入探讨Nginx负载均衡的配置方法、算法选择和优化策略。

负载均衡基础

1. 负载均衡算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# nginx.conf
http {
# 轮询(默认)
upstream backend_round_robin {
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

# 加权轮询
upstream backend_weighted {
server 192.168.1.10:8080 weight=3;
server 192.168.1.11:8080 weight=2;
server 192.168.1.12:8080 weight=1;
}

# IP哈希
upstream backend_ip_hash {
ip_hash;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

# 最少连接
upstream backend_least_conn {
least_conn;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

# 一致性哈希(需要第三方模块)
upstream backend_consistent_hash {
consistent_hash $request_uri;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}
}

2. 服务器状态管理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
upstream backend_with_backup {
# 主服务器
server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.11:8080 max_fails=3 fail_timeout=30s;

# 备用服务器
server 192.168.1.12:8080 backup;

# 临时下线服务器
server 192.168.1.13:8080 down;

# 慢启动服务器
server 192.168.1.14:8080 slow_start=30s;

# 最大连接数限制
server 192.168.1.15:8080 max_conns=100;
}

高级负载均衡配置

1. 基于URI的负载均衡

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
http {
# API服务器组
upstream api_servers {
server 192.168.1.20:8080;
server 192.168.1.21:8080;
}

# 静态资源服务器组
upstream static_servers {
server 192.168.1.30:8080;
server 192.168.1.31:8080;
}

# 图片服务器组
upstream image_servers {
server 192.168.1.40:8080;
server 192.168.1.41:8080;
}

server {
listen 80;
server_name example.com;

# API请求
location /api/ {
proxy_pass http://api_servers;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;

# API特定配置
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}

# 静态资源
location ~* \.(css|js|html)$ {
proxy_pass http://static_servers;
proxy_cache static_cache;
proxy_cache_valid 200 1h;
proxy_cache_use_stale error timeout updating;
add_header X-Cache-Status $upstream_cache_status;
}

# 图片资源
location ~* \.(jpg|jpeg|png|gif|ico|svg)$ {
proxy_pass http://image_servers;
proxy_cache image_cache;
proxy_cache_valid 200 24h;
proxy_cache_use_stale error timeout updating;
expires 30d;
add_header Cache-Control "public, immutable";
}

# 默认请求
location / {
proxy_pass http://api_servers;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}
}

2. 基于地理位置的负载均衡

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
http {
# 地理位置映射
geo $geo_region {
default global;
127.0.0.0/8 local;
10.0.0.0/8 internal;
192.168.0.0/16 internal;
1.2.3.0/24 asia;
4.5.6.0/24 europe;
7.8.9.0/24 america;
}

# 不同地区的服务器组
upstream asia_servers {
server 192.168.1.50:8080;
server 192.168.1.51:8080;
}

upstream europe_servers {
server 192.168.1.60:8080;
server 192.168.1.61:8080;
}

upstream america_servers {
server 192.168.1.70:8080;
server 192.168.1.71:8080;
}

upstream global_servers {
server 192.168.1.80:8080;
server 192.168.1.81:8080;
}

server {
listen 80;
server_name example.com;

location / {
# 根据地理位置选择服务器组
if ($geo_region = "asia") {
proxy_pass http://asia_servers;
}
if ($geo_region = "europe") {
proxy_pass http://europe_servers;
}
if ($geo_region = "america") {
proxy_pass http://america_servers;
}

# 默认全球服务器
proxy_pass http://global_servers;

proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Geo-Region $geo_region;
}
}
}

3. 动态负载均衡配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 使用Nginx Plus的动态配置
http {
upstream backend {
zone backend 64k;

# 初始服务器
server 192.168.1.10:8080;
server 192.168.1.11:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
health_check;
}

# API管理接口
location /api/upstream {
api write=on;
allow 192.168.1.0/24;
deny all;
}
}
}

健康检查配置

1. 被动健康检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
upstream backend {
server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.11:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.12:8080 max_fails=3 fail_timeout=30s;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
proxy_next_upstream_tries 3;
proxy_next_upstream_timeout 10s;

proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
}

2. 主动健康检查(Nginx Plus)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
upstream backend {
zone backend 64k;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 健康检查配置
health_check interval=5s
fails=3
passes=2
uri=/health
match=server_ok;
}
}

# 健康检查匹配条件
match server_ok {
status 200;
header Content-Type ~ "application/json";
body ~ '"status"\s*:\s*"ok"';
}

3. 自定义健康检查脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash

# Nginx健康检查脚本

UPSTREAM_SERVERS=(
"192.168.1.10:8080"
"192.168.1.11:8080"
"192.168.1.12:8080"
)

HEALTH_CHECK_URL="/health"
TIMEOUT=5
LOG_FILE="/var/log/nginx/health_check.log"

function check_server() {
local server=$1
local url="http://${server}${HEALTH_CHECK_URL}"

# 发送健康检查请求
response=$(curl -s -w "%{http_code}" -o /dev/null --connect-timeout $TIMEOUT "$url")

if [ "$response" = "200" ]; then
echo "$(date): Server $server is healthy" >> $LOG_FILE
return 0
else
echo "$(date): Server $server is unhealthy (HTTP $response)" >> $LOG_FILE
return 1
fi
}

function update_upstream() {
local server=$1
local action=$2 # "enable" or "disable"

# 使用Nginx Plus API更新upstream
if [ "$action" = "disable" ]; then
curl -X PATCH -d '{"down":true}' \
"http://localhost/api/6/http/upstreams/backend/servers/${server}"
else
curl -X PATCH -d '{"down":false}' \
"http://localhost/api/6/http/upstreams/backend/servers/${server}"
fi
}

# 主循环
while true; do
for server in "${UPSTREAM_SERVERS[@]}"; do
if check_server "$server"; then
update_upstream "$server" "enable"
else
update_upstream "$server" "disable"
fi
done

sleep 10
done

会话保持配置

1. IP哈希会话保持

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
upstream backend {
ip_hash;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}

2. Cookie会话保持

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
upstream backend {
server 192.168.1.10:8080 route=server1;
server 192.168.1.11:8080 route=server2;
server 192.168.1.12:8080 route=server3;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 设置会话Cookie
proxy_cookie_path / "/; Secure; HttpOnly";

# 基于Cookie的路由
if ($cookie_server_route) {
proxy_pass http://backend;
}

proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}

3. 自定义会话保持

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
http {
# 会话映射
map $cookie_sessionid $backend_pool {
~^(..).*$ $1;
}

upstream backend_00 {
server 192.168.1.10:8080;
}

upstream backend_01 {
server 192.168.1.11:8080;
}

upstream backend_02 {
server 192.168.1.12:8080;
}

upstream backend_default {
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
# 根据会话ID选择后端
set $backend "backend_default";

if ($backend_pool = "00") {
set $backend "backend_00";
}
if ($backend_pool = "01") {
set $backend "backend_01";
}
if ($backend_pool = "02") {
set $backend "backend_02";
}

proxy_pass http://$backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}
}

负载均衡监控

1. 状态监控配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
http {
upstream backend {
zone backend 64k;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
}

# 状态页面
location /nginx_status {
stub_status on;
access_log off;
allow 192.168.1.0/24;
deny all;
}

# Upstream状态(Nginx Plus)
location /upstream_status {
api;
access_log off;
allow 192.168.1.0/24;
deny all;
}
}
}

2. 监控脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash

# Nginx负载均衡监控脚本

NGINX_STATUS_URL="http://localhost/nginx_status"
UPSTREAM_API_URL="http://localhost/api/6/http/upstreams"
LOG_FILE="/var/log/nginx/lb_monitor.log"
ALERT_EMAIL="admin@example.com"

function get_nginx_stats() {
curl -s "$NGINX_STATUS_URL" | awk '
/Active connections/ { active = $3 }
/Reading/ { reading = $2; writing = $4; waiting = $6 }
END {
print "active_connections:" active
print "reading:" reading
print "writing:" writing
print "waiting:" waiting
}
'
}

function get_upstream_stats() {
curl -s "$UPSTREAM_API_URL" | jq -r '
.[] |
"upstream:" + .name +
" servers:" + (.servers | length | tostring) +
" active:" + ([.servers[] | select(.state == "up")] | length | tostring)
'
}

function check_thresholds() {
local active_connections=$1
local waiting_connections=$2

# 检查连接数阈值
if [ "$active_connections" -gt 1000 ]; then
echo "$(date): High active connections: $active_connections" >> $LOG_FILE
send_alert "High active connections: $active_connections"
fi

if [ "$waiting_connections" -gt 500 ]; then
echo "$(date): High waiting connections: $waiting_connections" >> $LOG_FILE
send_alert "High waiting connections: $waiting_connections"
fi
}

function send_alert() {
local message=$1
echo "$message" | mail -s "Nginx Load Balancer Alert" "$ALERT_EMAIL"
}

# 主监控循环
while true; do
echo "$(date): Checking Nginx status..." >> $LOG_FILE

# 获取Nginx状态
nginx_stats=$(get_nginx_stats)
echo "$nginx_stats" >> $LOG_FILE

# 获取Upstream状态
upstream_stats=$(get_upstream_stats)
echo "$upstream_stats" >> $LOG_FILE

# 提取连接数
active_connections=$(echo "$nginx_stats" | grep "active_connections" | cut -d: -f2)
waiting_connections=$(echo "$nginx_stats" | grep "waiting" | cut -d: -f2)

# 检查阈值
check_thresholds "$active_connections" "$waiting_connections"

sleep 60
done

性能优化

1. 连接优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
http {
# 连接池配置
upstream backend {
server 192.168.1.10:8080;
server 192.168.1.11:8080;

# 保持连接
keepalive 32;
keepalive_requests 100;
keepalive_timeout 60s;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 连接优化
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;

# 缓冲优化
proxy_buffering on;
proxy_buffer_size 4k;
proxy_buffers 8 4k;
proxy_busy_buffers_size 8k;
}
}
}

2. 缓存优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
http {
# 缓存路径配置
proxy_cache_path /var/cache/nginx/proxy
levels=1:2
keys_zone=proxy_cache:10m
max_size=1g
inactive=60m
use_temp_path=off;

upstream backend {
server 192.168.1.10:8080;
server 192.168.1.11:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 缓存配置
proxy_cache proxy_cache;
proxy_cache_key $scheme$proxy_host$request_uri;
proxy_cache_valid 200 302 10m;
proxy_cache_valid 404 1m;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_cache_lock on;
proxy_cache_lock_timeout 5s;

# 缓存头部
add_header X-Cache-Status $upstream_cache_status;
proxy_ignore_headers Cache-Control Expires;
}
}
}

故障转移配置

1. 自动故障转移

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
upstream primary {
server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.11:8080 max_fails=3 fail_timeout=30s;
}

upstream backup {
server 192.168.1.20:8080;
server 192.168.1.21:8080;
}

server {
listen 80;
server_name example.com;

location / {
# 主要upstream
proxy_pass http://primary;

# 故障转移到备用upstream
error_page 502 503 504 = @fallback;

proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}

location @fallback {
proxy_pass http://backup;
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
}

2. 故障转移脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash

# Nginx故障转移脚本

PRIMARY_UPSTREAM="primary"
BACKUP_UPSTREAM="backup"
NGINX_CONFIG="/etc/nginx/nginx.conf"
NGINX_PID="/var/run/nginx.pid"
LOG_FILE="/var/log/nginx/failover.log"

function check_upstream() {
local upstream=$1
local health_url="http://localhost/api/6/http/upstreams/$upstream"

# 检查upstream中的健康服务器数量
healthy_servers=$(curl -s "$health_url" | jq '[.servers[] | select(.state == "up")] | length')

echo "$healthy_servers"
}

function switch_to_backup() {
echo "$(date): Switching to backup upstream" >> $LOG_FILE

# 修改配置文件
sed -i 's/proxy_pass http://primary/proxy_pass http://backup/g' $NGINX_CONFIG

# 重新加载配置
nginx -s reload

echo "$(date): Switched to backup upstream" >> $LOG_FILE
}

function switch_to_primary() {
echo "$(date): Switching back to primary upstream" >> $LOG_FILE

# 修改配置文件
sed -i 's/proxy_pass http://backup/proxy_pass http://primary/g' $NGINX_CONFIG

# 重新加载配置
nginx -s reload

echo "$(date): Switched back to primary upstream" >> $LOG_FILE
}

# 主监控循环
while true; do
primary_healthy=$(check_upstream "$PRIMARY_UPSTREAM")
backup_healthy=$(check_upstream "$BACKUP_UPSTREAM")

current_upstream=$(grep -o 'proxy_pass http://[^;]*' $NGINX_CONFIG | head -1 | cut -d'/' -f3)

if [ "$current_upstream" = "primary" ] && [ "$primary_healthy" -eq 0 ] && [ "$backup_healthy" -gt 0 ]; then
switch_to_backup
elif [ "$current_upstream" = "backup" ] && [ "$primary_healthy" -gt 0 ]; then
switch_to_primary
fi

sleep 30
done

最佳实践与总结

负载均衡设计原则

  1. 算法选择:根据应用特性选择合适的负载均衡算法
  2. 健康检查:配置完善的健康检查机制
  3. 会话保持:根据业务需求选择会话保持策略
  4. 故障转移:设计自动故障转移机制

性能优化要点

  1. 连接复用:启用keepalive连接池
  2. 缓存策略:合理配置代理缓存
  3. 超时设置:优化各种超时参数
  4. 监控告警:建立完善的监控体系

常见问题解决

  1. 会话丢失:使用IP哈希或Cookie会话保持
  2. 负载不均:调整权重或更换负载均衡算法
  3. 单点故障:配置多个备用服务器
  4. 性能瓶颈:优化连接池和缓存配置

结语

Nginx负载均衡是构建高可用Web服务的关键技术。通过合理的配置和优化,可以实现高性能、高可用的负载均衡方案,为业务系统提供稳定可靠的服务支撑。

版权所有,如有侵权请联系我