Nginx负载均衡配置:高可用Web服务架构实战

引言

Nginx作为高性能的Web服务器和反向代理服务器,其负载均衡功能是构建高可用、高并发Web服务架构的核心组件。本文将深入探讨Nginx负载均衡的配置方法、算法选择和优化策略。

负载均衡基础

1. 负载均衡算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# nginx.conf
http {
# 轮询(默认)
upstream backend_round_robin {
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

# 加权轮询
upstream backend_weighted {
server 192.168.1.10:8080 weight=3;
server 192.168.1.11:8080 weight=2;
server 192.168.1.12:8080 weight=1;
}

# IP哈希
upstream backend_ip_hash {
ip_hash;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

# 最少连接
upstream backend_least_conn {
least_conn;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

# 一致性哈希(需要第三方模块)
upstream backend_consistent_hash {
consistent_hash $request_uri;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}
}

2. 服务器状态管理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
upstream backend_with_backup {
# 主服务器
server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.11:8080 max_fails=3 fail_timeout=30s;

# 备用服务器
server 192.168.1.12:8080 backup;

# 临时下线服务器
server 192.168.1.13:8080 down;

# 慢启动服务器
server 192.168.1.14:8080 slow_start=30s;

# 最大连接数限制
server 192.168.1.15:8080 max_conns=100;
}

高级负载均衡配置

1. 基于URI的负载均衡

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
http {
# API服务器组
upstream api_servers {
server 192.168.1.20:8080;
server 192.168.1.21:8080;
}

# 静态资源服务器组
upstream static_servers {
server 192.168.1.30:8080;
server 192.168.1.31:8080;
}

# 图片服务器组
upstream image_servers {
server 192.168.1.40:8080;
server 192.168.1.41:8080;
}

server {
listen 80;
server_name example.com;

# API请求
location /api/ {
proxy_pass http://api_servers;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;

# API特定配置
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}

# 静态资源
location ~* \.(css|js|html)$ {
proxy_pass http://static_servers;
proxy_cache static_cache;
proxy_cache_valid 200 1h;
proxy_cache_use_stale error timeout updating;
add_header X-Cache-Status $upstream_cache_status;
}

# 图片资源
location ~* \.(jpg|jpeg|png|gif|ico|svg)$ {
proxy_pass http://image_servers;
proxy_cache image_cache;
proxy_cache_valid 200 24h;
proxy_cache_use_stale error timeout updating;
expires 30d;
add_header Cache-Control "public, immutable";
}

# 默认请求
location / {
proxy_pass http://api_servers;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}
}

2. 基于地理位置的负载均衡

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
http {
# 地理位置映射
geo $geo_region {
default global;
127.0.0.0/8 local;
10.0.0.0/8 internal;
192.168.0.0/16 internal;
1.2.3.0/24 asia;
4.5.6.0/24 europe;
7.8.9.0/24 america;
}

# 不同地区的服务器组
upstream asia_servers {
server 192.168.1.50:8080;
server 192.168.1.51:8080;
}

upstream europe_servers {
server 192.168.1.60:8080;
server 192.168.1.61:8080;
}

upstream america_servers {
server 192.168.1.70:8080;
server 192.168.1.71:8080;
}

upstream global_servers {
server 192.168.1.80:8080;
server 192.168.1.81:8080;
}

server {
listen 80;
server_name example.com;

location / {
# 根据地理位置选择服务器组
if ($geo_region = "asia") {
proxy_pass http://asia_servers;
}
if ($geo_region = "europe") {
proxy_pass http://europe_servers;
}
if ($geo_region = "america") {
proxy_pass http://america_servers;
}

# 默认全球服务器
proxy_pass http://global_servers;

proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Geo-Region $geo_region;
}
}
}

3. 动态负载均衡配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 使用Nginx Plus的动态配置
http {
upstream backend {
zone backend 64k;

# 初始服务器
server 192.168.1.10:8080;
server 192.168.1.11:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
health_check;
}

# API管理接口
location /api/upstream {
api write=on;
allow 192.168.1.0/24;
deny all;
}
}
}

健康检查配置

1. 被动健康检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
upstream backend {
server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.11:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.12:8080 max_fails=3 fail_timeout=30s;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
proxy_next_upstream_tries 3;
proxy_next_upstream_timeout 10s;

proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
}

2. 主动健康检查(Nginx Plus)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
upstream backend {
zone backend 64k;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 健康检查配置
health_check interval=5s
fails=3
passes=2
uri=/health
match=server_ok;
}
}

# 健康检查匹配条件
match server_ok {
status 200;
header Content-Type ~ "application/json";
body ~ '"status"\s*:\s*"ok"';
}

3. 自定义健康检查脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash

# Nginx健康检查脚本

UPSTREAM_SERVERS=(
"192.168.1.10:8080"
"192.168.1.11:8080"
"192.168.1.12:8080"
)

HEALTH_CHECK_URL="/health"
TIMEOUT=5
LOG_FILE="/var/log/nginx/health_check.log"

function check_server() {
local server=$1
local url="http://${server}${HEALTH_CHECK_URL}"

# 发送健康检查请求
response=$(curl -s -w "%{http_code}" -o /dev/null --connect-timeout $TIMEOUT "$url")

if [ "$response" = "200" ]; then
echo "$(date): Server $server is healthy" >> $LOG_FILE
return 0
else
echo "$(date): Server $server is unhealthy (HTTP $response)" >> $LOG_FILE
return 1
fi
}

function update_upstream() {
local server=$1
local action=$2 # "enable" or "disable"

# 使用Nginx Plus API更新upstream
if [ "$action" = "disable" ]; then
curl -X PATCH -d '{"down":true}' \
"http://localhost/api/6/http/upstreams/backend/servers/${server}"
else
curl -X PATCH -d '{"down":false}' \
"http://localhost/api/6/http/upstreams/backend/servers/${server}"
fi
}

# 主循环
while true; do
for server in "${UPSTREAM_SERVERS[@]}"; do
if check_server "$server"; then
update_upstream "$server" "enable"
else
update_upstream "$server" "disable"
fi
done

sleep 10
done

会话保持配置

1. IP哈希会话保持

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
upstream backend {
ip_hash;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}

2. Cookie会话保持

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
upstream backend {
server 192.168.1.10:8080 route=server1;
server 192.168.1.11:8080 route=server2;
server 192.168.1.12:8080 route=server3;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 设置会话Cookie
proxy_cookie_path / "/; Secure; HttpOnly";

# 基于Cookie的路由
if ($cookie_server_route) {
proxy_pass http://backend;
}

proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}

3. 自定义会话保持

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
http {
# 会话映射
map $cookie_sessionid $backend_pool {
~^(..).*$ $1;
}

upstream backend_00 {
server 192.168.1.10:8080;
}

upstream backend_01 {
server 192.168.1.11:8080;
}

upstream backend_02 {
server 192.168.1.12:8080;
}

upstream backend_default {
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
# 根据会话ID选择后端
set $backend "backend_default";

if ($backend_pool = "00") {
set $backend "backend_00";
}
if ($backend_pool = "01") {
set $backend "backend_01";
}
if ($backend_pool = "02") {
set $backend "backend_02";
}

proxy_pass http://$backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}
}

负载均衡监控

1. 状态监控配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
http {
upstream backend {
zone backend 64k;
server 192.168.1.10:8080;
server 192.168.1.11:8080;
server 192.168.1.12:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;
}

# 状态页面
location /nginx_status {
stub_status on;
access_log off;
allow 192.168.1.0/24;
deny all;
}

# Upstream状态(Nginx Plus)
location /upstream_status {
api;
access_log off;
allow 192.168.1.0/24;
deny all;
}
}
}

2. 监控脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash

# Nginx负载均衡监控脚本

NGINX_STATUS_URL="http://localhost/nginx_status"
UPSTREAM_API_URL="http://localhost/api/6/http/upstreams"
LOG_FILE="/var/log/nginx/lb_monitor.log"
ALERT_EMAIL="admin@example.com"

function get_nginx_stats() {
curl -s "$NGINX_STATUS_URL" | awk '
/Active connections/ { active = $3 }
/Reading/ { reading = $2; writing = $4; waiting = $6 }
END {
print "active_connections:" active
print "reading:" reading
print "writing:" writing
print "waiting:" waiting
}
'
}

function get_upstream_stats() {
curl -s "$UPSTREAM_API_URL" | jq -r '
.[] |
"upstream:" + .name +
" servers:" + (.servers | length | tostring) +
" active:" + ([.servers[] | select(.state == "up")] | length | tostring)
'
}

function check_thresholds() {
local active_connections=$1
local waiting_connections=$2

# 检查连接数阈值
if [ "$active_connections" -gt 1000 ]; then
echo "$(date): High active connections: $active_connections" >> $LOG_FILE
send_alert "High active connections: $active_connections"
fi

if [ "$waiting_connections" -gt 500 ]; then
echo "$(date): High waiting connections: $waiting_connections" >> $LOG_FILE
send_alert "High waiting connections: $waiting_connections"
fi
}

function send_alert() {
local message=$1
echo "$message" | mail -s "Nginx Load Balancer Alert" "$ALERT_EMAIL"
}

# 主监控循环
while true; do
echo "$(date): Checking Nginx status..." >> $LOG_FILE

# 获取Nginx状态
nginx_stats=$(get_nginx_stats)
echo "$nginx_stats" >> $LOG_FILE

# 获取Upstream状态
upstream_stats=$(get_upstream_stats)
echo "$upstream_stats" >> $LOG_FILE

# 提取连接数
active_connections=$(echo "$nginx_stats" | grep "active_connections" | cut -d: -f2)
waiting_connections=$(echo "$nginx_stats" | grep "waiting" | cut -d: -f2)

# 检查阈值
check_thresholds "$active_connections" "$waiting_connections"

sleep 60
done

性能优化

1. 连接优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
http {
# 连接池配置
upstream backend {
server 192.168.1.10:8080;
server 192.168.1.11:8080;

# 保持连接
keepalive 32;
keepalive_requests 100;
keepalive_timeout 60s;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 连接优化
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;

# 缓冲优化
proxy_buffering on;
proxy_buffer_size 4k;
proxy_buffers 8 4k;
proxy_busy_buffers_size 8k;
}
}
}

2. 缓存优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
http {
# 缓存路径配置
proxy_cache_path /var/cache/nginx/proxy
levels=1:2
keys_zone=proxy_cache:10m
max_size=1g
inactive=60m
use_temp_path=off;

upstream backend {
server 192.168.1.10:8080;
server 192.168.1.11:8080;
}

server {
listen 80;
server_name example.com;

location / {
proxy_pass http://backend;

# 缓存配置
proxy_cache proxy_cache;
proxy_cache_key $scheme$proxy_host$request_uri;
proxy_cache_valid 200 302 10m;
proxy_cache_valid 404 1m;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_cache_lock on;
proxy_cache_lock_timeout 5s;

# 缓存头部
add_header X-Cache-Status $upstream_cache_status;
proxy_ignore_headers Cache-Control Expires;
}
}
}

故障转移配置

1. 自动故障转移

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
upstream primary {
server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
server 192.168.1.11:8080 max_fails=3 fail_timeout=30s;
}

upstream backup {
server 192.168.1.20:8080;
server 192.168.1.21:8080;
}

server {
listen 80;
server_name example.com;

location / {
# 主要upstream
proxy_pass http://primary;

# 故障转移到备用upstream
error_page 502 503 504 = @fallback;

proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}

location @fallback {
proxy_pass http://backup;
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
}

2. 故障转移脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash

# Nginx故障转移脚本

PRIMARY_UPSTREAM="primary"
BACKUP_UPSTREAM="backup"
NGINX_CONFIG="/etc/nginx/nginx.conf"
NGINX_PID="/var/run/nginx.pid"
LOG_FILE="/var/log/nginx/failover.log"

function check_upstream() {
local upstream=$1
local health_url="http://localhost/api/6/http/upstreams/$upstream"

# 检查upstream中的健康服务器数量
healthy_servers=$(curl -s "$health_url" | jq '[.servers[] | select(.state == "up")] | length')

echo "$healthy_servers"
}

function switch_to_backup() {
echo "$(date): Switching to backup upstream" >> $LOG_FILE

# 修改配置文件
sed -i 's/proxy_pass http://primary/proxy_pass http://backup/g' $NGINX_CONFIG

# 重新加载配置
nginx -s reload

echo "$(date): Switched to backup upstream" >> $LOG_FILE
}

function switch_to_primary() {
echo "$(date): Switching back to primary upstream" >> $LOG_FILE

# 修改配置文件
sed -i 's/proxy_pass http://backup/proxy_pass http://primary/g' $NGINX_CONFIG

# 重新加载配置
nginx -s reload

echo "$(date): Switched back to primary upstream" >> $LOG_FILE
}

# 主监控循环
while true; do
primary_healthy=$(check_upstream "$PRIMARY_UPSTREAM")
backup_healthy=$(check_upstream "$BACKUP_UPSTREAM")

current_upstream=$(grep -o 'proxy_pass http://[^;]*' $NGINX_CONFIG | head -1 | cut -d'/' -f3)

if [ "$current_upstream" = "primary" ] && [ "$primary_healthy" -eq 0 ] && [ "$backup_healthy" -gt 0 ]; then
switch_to_backup
elif [ "$current_upstream" = "backup" ] && [ "$primary_healthy" -gt 0 ]; then
switch_to_primary
fi

sleep 30
done

最佳实践与总结

负载均衡设计原则

  1. 算法选择:根据应用特性选择合适的负载均衡算法
  2. 健康检查:配置完善的健康检查机制
  3. 会话保持:根据业务需求选择会话保持策略
  4. 故障转移:设计自动故障转移机制

性能优化要点

  1. 连接复用:启用keepalive连接池
  2. 缓存策略:合理配置代理缓存
  3. 超时设置:优化各种超时参数
  4. 监控告警:建立完善的监控体系

常见问题解决

  1. 会话丢失:使用IP哈希或Cookie会话保持
  2. 负载不均:调整权重或更换负载均衡算法
  3. 单点故障:配置多个备用服务器
  4. 性能瓶颈:优化连接池和缓存配置

结语

Nginx负载均衡是构建高可用Web服务的关键技术。通过合理的配置和优化,可以实现高性能、高可用的负载均衡方案,为业务系统提供稳定可靠的服务支撑。

版权所有,如有侵权请联系我

Linux性能调优:系统优化与性能提升实战指南

引言

Linux性能调优是系统管理员和运维工程师必备的核心技能。通过系统性的性能分析和优化,可以显著提升系统的响应速度、吞吐量和稳定性。本文将深入探讨Linux性能调优的方法论、工具使用和实战技巧。

CPU性能调优

1. CPU使用率分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/bin/bash

# CPU性能分析脚本

echo "=== CPU性能分析报告 ==="
echo "时间: $(date)"
echo ""

# CPU基本信息
echo "CPU信息:"
lscpu | grep -E "Model name|CPU\(s\)|Thread|Core"
echo ""

# CPU使用率
echo "CPU使用率:"
top -bn1 | grep "Cpu(s)" | awk '{print "用户态: "$2", 系统态: "$4", 空闲: "$8}'
echo ""

# 负载平均值
echo "系统负载:"
uptime | awk -F'load average:' '{print "负载平均值:"$2}'
echo ""

# CPU密集型进程
echo "CPU使用率最高的10个进程:"
ps aux --sort=-%cpu | head -11
echo ""

# 中断统计
echo "中断统计:"
cat /proc/interrupts | head -10

2. CPU调度优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/bash

# CPU调度优化脚本

# 设置CPU调度器
echo "当前CPU调度器:"
cat /sys/block/sda/queue/scheduler

# 优化CPU亲和性
function optimize_cpu_affinity() {
local process_name=$1
local cpu_cores=$2

echo "优化 $process_name 的CPU亲和性..."

# 查找进程PID
local pids=$(pgrep $process_name)

for pid in $pids; do
if [ -n "$pid" ]; then
taskset -cp $cpu_cores $pid
echo "进程 $pid 绑定到CPU核心 $cpu_cores"
fi
done
}

# 示例:将nginx进程绑定到特定CPU核心
optimize_cpu_affinity "nginx" "0,1"
optimize_cpu_affinity "mysql" "2,3"

# 设置进程优先级
function set_process_priority() {
local process_name=$1
local nice_value=$2

echo "设置 $process_name 进程优先级为 $nice_value"

local pids=$(pgrep $process_name)
for pid in $pids; do
if [ -n "$pid" ]; then
renice $nice_value $pid
fi
done
}

# 降低备份进程优先级
set_process_priority "backup" "10"

内存性能调优

1. 内存使用分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash

# 内存性能分析脚本

echo "=== 内存性能分析 ==="

# 内存使用概况
echo "内存使用情况:"
free -h
echo ""

# 内存使用率最高的进程
echo "内存使用率最高的10个进程:"
ps aux --sort=-%mem | head -11
echo ""

# 交换分区使用情况
echo "交换分区使用:"
swapon --show
echo ""

# 内存碎片化分析
echo "内存碎片化信息:"
cat /proc/buddyinfo
echo ""

# 缓存和缓冲区
echo "缓存统计:"
cat /proc/meminfo | grep -E "Cached|Buffers|Dirty"

2. 内存优化配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash

# 内存优化脚本

# 调整虚拟内存参数
function optimize_vm_parameters() {
echo "优化虚拟内存参数..."

# 调整swappiness
echo "vm.swappiness = 10" >> /etc/sysctl.conf

# 调整脏页回写
echo "vm.dirty_ratio = 15" >> /etc/sysctl.conf
echo "vm.dirty_background_ratio = 5" >> /etc/sysctl.conf

# 调整内存过量分配
echo "vm.overcommit_memory = 1" >> /etc/sysctl.conf

# 应用配置
sysctl -p

echo "虚拟内存参数优化完成"
}

# 清理内存缓存
function clear_memory_cache() {
echo "清理内存缓存..."

# 同步文件系统
sync

# 清理页面缓存
echo 1 > /proc/sys/vm/drop_caches

# 清理目录项和inode缓存
echo 2 > /proc/sys/vm/drop_caches

# 清理所有缓存
echo 3 > /proc/sys/vm/drop_caches

echo "内存缓存清理完成"
}

optimize_vm_parameters

磁盘I/O性能调优

1. 磁盘性能测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/bin/bash

# 磁盘性能测试脚本

TEST_DIR="/tmp/disk_test"
TEST_FILE="$TEST_DIR/test_file"
TEST_SIZE="1G"

mkdir -p $TEST_DIR

echo "=== 磁盘性能测试 ==="

# 顺序写测试
echo "顺序写测试:"
dd if=/dev/zero of=$TEST_FILE bs=1M count=1024 oflag=direct 2>&1 | grep -E "copied|MB/s"

# 顺序读测试
echo "顺序读测试:"
dd if=$TEST_FILE of=/dev/null bs=1M iflag=direct 2>&1 | grep -E "copied|MB/s"

# 随机读写测试(需要fio工具)
if command -v fio &> /dev/null; then
echo "随机读写测试:"
fio --name=random-rw --ioengine=libaio --iodepth=4 --rw=randrw --bs=4k --direct=1 --size=100M --numjobs=1 --runtime=60 --group_reporting --filename=$TEST_FILE
fi

# 清理测试文件
rm -rf $TEST_DIR

2. 磁盘I/O优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/bin/bash

# 磁盘I/O优化脚本

# 优化磁盘调度器
function optimize_disk_scheduler() {
echo "优化磁盘调度器..."

for disk in $(lsblk -d -o NAME | grep -v NAME); do
# SSD使用noop或deadline
if [[ $(cat /sys/block/$disk/queue/rotational) == "0" ]]; then
echo "deadline" > /sys/block/$disk/queue/scheduler
echo "SSD $disk 设置为 deadline 调度器"
else
# HDD使用cfq
echo "cfq" > /sys/block/$disk/queue/scheduler
echo "HDD $disk 设置为 cfq 调度器"
fi
done
}

# 调整文件系统参数
function optimize_filesystem() {
echo "优化文件系统参数..."

# 调整文件系统挂载选项
cat >> /etc/fstab << 'EOF'
# 性能优化挂载选项
# /dev/sdb1 /data ext4 defaults,noatime,nodiratime 0 2
EOF

# 调整内核参数
cat >> /etc/sysctl.conf << 'EOF'
# 磁盘I/O优化
vm.dirty_ratio = 20
vm.dirty_background_ratio = 10
vm.dirty_expire_centisecs = 3000
vm.dirty_writeback_centisecs = 500
EOF

sysctl -p
}

optimize_disk_scheduler
optimize_filesystem

网络性能调优

1. 网络性能分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash

# 网络性能分析脚本

echo "=== 网络性能分析 ==="

# 网络接口统计
echo "网络接口统计:"
cat /proc/net/dev
echo ""

# 网络连接统计
echo "网络连接统计:"
netstat -s | grep -E "connections|packets|errors"
echo ""

# TCP连接状态
echo "TCP连接状态分布:"
netstat -an | awk '/^tcp/ {++state[$NF]} END {for(key in state) print key"\t"state[key]}'
echo ""

# 网络带宽使用
echo "网络带宽使用:"
if command -v iftop &> /dev/null; then
timeout 10 iftop -t -s 10
fi

2. 网络参数优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash

# 网络性能优化脚本

function optimize_network_parameters() {
echo "优化网络参数..."

cat >> /etc/sysctl.conf << 'EOF'
# 网络性能优化
# TCP缓冲区大小
net.core.rmem_default = 262144
net.core.rmem_max = 16777216
net.core.wmem_default = 262144
net.core.wmem_max = 16777216

# TCP窗口缩放
net.ipv4.tcp_window_scaling = 1

# TCP时间戳
net.ipv4.tcp_timestamps = 1

# TCP SACK
net.ipv4.tcp_sack = 1

# TCP快速回收
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_tw_reuse = 1

# TCP keepalive
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_intvl = 30
net.ipv4.tcp_keepalive_probes = 3

# 连接队列大小
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 5000

# TCP SYN队列
net.ipv4.tcp_max_syn_backlog = 8192
EOF

sysctl -p
echo "网络参数优化完成"
}

optimize_network_parameters

系统整体优化

1. 系统资源限制优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash

# 系统资源限制优化

function optimize_system_limits() {
echo "优化系统资源限制..."

# 备份原配置
cp /etc/security/limits.conf /etc/security/limits.conf.backup

# 添加优化配置
cat >> /etc/security/limits.conf << 'EOF'
# 系统性能优化
* soft nofile 65535
* hard nofile 65535
* soft nproc 65535
* hard nproc 65535
* soft memlock unlimited
* hard memlock unlimited
EOF

# 优化systemd服务限制
mkdir -p /etc/systemd/system.conf.d
cat > /etc/systemd/system.conf.d/limits.conf << 'EOF'
[Manager]
DefaultLimitNOFILE=65535
DefaultLimitNPROC=65535
EOF

systemctl daemon-reload
echo "系统资源限制优化完成"
}

optimize_system_limits

2. 性能监控脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash

# 系统性能监控脚本

LOG_FILE="/var/log/performance-monitor.log"
ALERT_THRESHOLD_CPU=80
ALERT_THRESHOLD_MEM=90
ALERT_THRESHOLD_DISK=85

function monitor_system_performance() {
local timestamp=$(date +"%Y-%m-%d %H:%M:%S")

# CPU使用率
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | sed 's/%us,//')

# 内存使用率
local mem_usage=$(free | awk 'NR==2{printf "%.2f", $3*100/$2}')

# 磁盘使用率
local disk_usage=$(df -h / | awk 'NR==2{print $5}' | sed 's/%//')

# 系统负载
local load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//')

# 记录性能数据
echo "[$timestamp] CPU: ${cpu_usage}%, MEM: ${mem_usage}%, DISK: ${disk_usage}%, LOAD: $load_avg" >> $LOG_FILE

# 检查告警阈值
if (( $(echo "$cpu_usage > $ALERT_THRESHOLD_CPU" | bc -l) )); then
echo "[$timestamp] ALERT: CPU使用率过高 ($cpu_usage%)" >> $LOG_FILE
fi

if (( $(echo "$mem_usage > $ALERT_THRESHOLD_MEM" | bc -l) )); then
echo "[$timestamp] ALERT: 内存使用率过高 ($mem_usage%)" >> $LOG_FILE
fi

if [ "$disk_usage" -gt "$ALERT_THRESHOLD_DISK" ]; then
echo "[$timestamp] ALERT: 磁盘使用率过高 ($disk_usage%)" >> $LOG_FILE
fi
}

# 执行监控
monitor_system_performance

最佳实践与总结

性能调优原则

  1. 测量先行:在优化前建立基准测试
  2. 逐步优化:一次只调整一个参数
  3. 持续监控:优化后持续观察系统表现
  4. 文档记录:记录所有配置变更

常见性能瓶颈

  1. CPU瓶颈:进程调度、中断处理
  2. 内存瓶颈:内存不足、频繁交换
  3. 磁盘I/O瓶颈:随机读写、磁盘队列
  4. 网络瓶颈:带宽限制、连接数限制

优化建议

  1. 硬件层面:选择合适的CPU、内存、存储
  2. 系统层面:优化内核参数、文件系统
  3. 应用层面:优化应用配置、代码逻辑
  4. 监控层面:建立完善的性能监控体系

结语

Linux性能调优是一个系统性工程,需要从硬件、操作系统、应用等多个层面进行综合优化。通过科学的方法论和实用的工具,可以显著提升系统性能,为业务发展提供强有力的技术支撑。

版权所有,如有侵权请联系我