Docker Compose 性能优化
概述
性能优化是 Docker Compose 应用部署中的重要环节。通过合理的资源配置、网络优化、存储优化和应用调优,可以显著提升容器化应用的性能和稳定性。本文将详细介绍 Docker Compose 性能优化的各个方面和最佳实践。
资源限制和分配
1. CPU 资源限制
version: '3.8'
services:
web:
image: nginx:alpine
deploy:
resources:
limits:
cpus: '2.0' # 最大使用 2 个 CPU 核心
memory: 1G # 最大使用 1GB 内存
reservations:
cpus: '0.5' # 保证至少 0.5 个 CPU 核心
memory: 512M # 保证至少 512MB 内存
# 兼容性配置(非 Swarm 模式)
cpus: 1.5
mem_limit: 1g
mem_reservation: 512m
app:
build: .
deploy:
resources:
limits:
cpus: '4.0'
memory: 2G
reservations:
cpus: '1.0'
memory: 1G
# CPU 权重设置
cpu_shares: 1024 # 默认权重
cpu_quota: 150000 # CPU 配额(微秒)
cpu_period: 100000 # CPU 周期(微秒)
database:
image: postgres:13-alpine
deploy:
resources:
limits:
cpus: '2.0'
memory: 4G
reservations:
cpus: '1.0'
memory: 2G
environment:
- POSTGRES_DB=myapp
- POSTGRES_USER=user
- POSTGRES_PASSWORD=password
2. 内存优化配置
services:
# 内存密集型应用
analytics:
image: analytics-app
deploy:
resources:
limits:
memory: 8G
reservations:
memory: 4G
# 禁用 swap
memswap_limit: 8G
mem_swappiness: 0
# OOM 处理
oom_kill_disable: false
oom_score_adj: 100
# 轻量级应用
api:
image: api-app
deploy:
resources:
limits:
memory: 512M
reservations:
memory: 256M
# 内存软限制
mem_limit: 512m
memswap_limit: 512m
# 缓存服务
redis:
image: redis:6-alpine
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 1G
command: |
redis-server
--maxmemory 1.5gb
--maxmemory-policy allkeys-lru
--save ""
3. 磁盘 I/O 优化
services:
database:
image: postgres:13-alpine
volumes:
- postgres_data:/var/lib/postgresql/data
# 磁盘 I/O 限制
blkio_config:
weight: 300
weight_device:
- path: /dev/sda
weight: 400
device_read_bps:
- path: /dev/sda
rate: '50mb'
device_write_bps:
- path: /dev/sda
rate: '20mb'
device_read_iops:
- path: /dev/sda
rate: 1000
device_write_iops:
- path: /dev/sda
rate: 500
# 高性能存储配置
high_io_app:
image: myapp
volumes:
- type: volume
source: fast_storage
target: /data
volume:
nocopy: true
# 使用 tmpfs 提高性能
tmpfs:
- /tmp:size=1G,noexec,nosuid,nodev
- /var/cache:size=500M
volumes:
postgres_data:
driver: local
driver_opts:
type: ext4
device: /dev/nvme0n1p1 # 使用 NVMe SSD
o: noatime,nodiratime # 禁用访问时间更新
fast_storage:
driver: local
driver_opts:
type: ext4
device: /dev/nvme0n1p2
o: noatime,data=writeback,barrier=0
网络性能优化
1. 网络模式优化
version: '3.8'
services:
# 高性能网络配置
high_performance_app:
image: myapp
network_mode: host # 使用主机网络,最高性能
# 或者使用自定义网络
networks:
- high_performance_net
# 标准网络配置
standard_app:
image: myapp
networks:
- app_network
ports:
- "3000:3000"
# 内部服务(无需外部访问)
internal_service:
image: internal-app
networks:
- internal_network
# 不暴露端口,减少网络开销
networks:
high_performance_net:
driver: bridge
driver_opts:
com.docker.network.bridge.name: br-perf
com.docker.network.driver.mtu: 9000 # 巨型帧
app_network:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16
driver_opts:
com.docker.network.bridge.enable_icc: "true"
com.docker.network.bridge.enable_ip_masquerade: "true"
internal_network:
driver: bridge
internal: true # 内部网络,提高安全性和性能
2. 负载均衡优化
version: '3.8'
services:
# 负载均衡器
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
deploy:
resources:
limits:
cpus: '1.0'
memory: 512M
networks:
- frontend
- backend
# 应用服务(多实例)
app:
image: myapp
deploy:
replicas: 4
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 512M
update_config:
parallelism: 2
delay: 10s
failure_action: rollback
networks:
- backend
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
networks:
frontend:
backend:
# nginx/nginx.conf
worker_processes auto;
worker_rlimit_nofile 65535;
events {
worker_connections 4096;
use epoll;
multi_accept on;
}
http {
# 性能优化
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
keepalive_requests 1000;
# 压缩
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml;
# 缓存
open_file_cache max=10000 inactive=20s;
open_file_cache_valid 30s;
open_file_cache_min_uses 2;
open_file_cache_errors on;
upstream app_backend {
least_conn; # 负载均衡算法
server app:3000 max_fails=3 fail_timeout=30s;
keepalive 32;
}
server {
listen 80;
location / {
proxy_pass http://app_backend;
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# 连接池
proxy_connect_timeout 5s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
# 缓冲
proxy_buffering on;
proxy_buffer_size 4k;
proxy_buffers 8 4k;
}
# 静态文件缓存
location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
}
}
存储性能优化
1. 卷性能优化
version: '3.8'
services:
database:
image: postgres:13-alpine
volumes:
# 高性能数据卷
- type: volume
source: db_data
target: /var/lib/postgresql/data
volume:
nocopy: true
# WAL 日志分离
- type: volume
source: db_wal
target: /var/lib/postgresql/wal
volume:
nocopy: true
# 临时文件使用 tmpfs
- type: tmpfs
target: /tmp
tmpfs:
size: 1G
environment:
- POSTGRES_DB=myapp
- POSTGRES_USER=user
- POSTGRES_PASSWORD=password
command: |
postgres
-c shared_buffers=256MB
-c effective_cache_size=1GB
-c maintenance_work_mem=64MB
-c checkpoint_completion_target=0.9
-c wal_buffers=16MB
-c default_statistics_target=100
-c random_page_cost=1.1
-c effective_io_concurrency=200
# 缓存服务
redis:
image: redis:6-alpine
volumes:
- redis_data:/data
# 使用内存文件系统提高性能
tmpfs:
- /tmp:size=100M
command: |
redis-server
--save ""
--appendonly yes
--appendfsync everysec
--no-appendfsync-on-rewrite yes
--auto-aof-rewrite-percentage 100
--auto-aof-rewrite-min-size 64mb
volumes:
db_data:
driver: local
driver_opts:
type: ext4
device: /dev/nvme0n1p1
o: noatime,nodiratime,data=writeback
db_wal:
driver: local
driver_opts:
type: ext4
device: /dev/nvme0n1p2
o: noatime,nodiratime,data=writeback,barrier=0
redis_data:
driver: local
driver_opts:
type: ext4
device: /dev/nvme0n1p3
o: noatime,nodiratime
2. 缓存策略
services:
# 应用层缓存
app:
image: myapp
environment:
- CACHE_TYPE=redis
- CACHE_URL=redis://redis:6379
- CACHE_TTL=3600
depends_on:
- redis
- memcached
networks:
- app_network
# Redis 缓存
redis:
image: redis:6-alpine
command: |
redis-server
--maxmemory 1gb
--maxmemory-policy allkeys-lru
--tcp-keepalive 60
--timeout 300
volumes:
- redis_cache:/data
networks:
- app_network
# Memcached 缓存
memcached:
image: memcached:alpine
command: memcached -m 512 -c 1024 -t 4
deploy:
resources:
limits:
memory: 600M
reservations:
memory: 512M
networks:
- app_network
# CDN 缓存代理
varnish:
image: varnish:6.0
ports:
- "8080:80"
volumes:
- ./varnish/default.vcl:/etc/varnish/default.vcl:ro
environment:
- VARNISH_SIZE=1G
command: varnishd -F -f /etc/varnish/default.vcl -s malloc,1G -a :80
depends_on:
- nginx
networks:
- frontend
volumes:
redis_cache:
driver: local
driver_opts:
type: tmpfs
device: tmpfs
o: size=1G
networks:
app_network:
frontend:
应用性能优化
1. 多阶段构建优化
# Dockerfile.optimized
# 构建阶段
FROM node:16-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production && npm cache clean --force
COPY . .
RUN npm run build
# 运行阶段
FROM node:16-alpine AS runtime
WORKDIR /app
# 安装 dumb-init 用于信号处理
RUN apk add --no-cache dumb-init
# 创建非 root 用户
RUN addgroup -g 1001 -S nodejs && \
adduser -S nextjs -u 1001
# 复制构建产物
COPY --from=builder --chown=nextjs:nodejs /app/dist ./dist
COPY --from=builder --chown=nextjs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nextjs:nodejs /app/package.json ./package.json
USER nextjs
EXPOSE 3000
# 使用 dumb-init 作为 PID 1
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "dist/server.js"]
services:
app:
build:
context: .
dockerfile: Dockerfile.optimized
cache_from:
- myapp:builder
- myapp:runtime
target: runtime
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 512M
environment:
- NODE_ENV=production
- NODE_OPTIONS=--max-old-space-size=768
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
2. 进程管理优化
services:
# Node.js 应用优化
node_app:
image: node:16-alpine
working_dir: /app
volumes:
- ./app:/app
environment:
- NODE_ENV=production
- NODE_OPTIONS=--max-old-space-size=1024 --optimize-for-size
- UV_THREADPOOL_SIZE=16
command: |
sh -c '
# 使用 PM2 进行进程管理
npm install -g pm2
pm2-runtime start ecosystem.config.js
'
deploy:
resources:
limits:
cpus: '2.0'
memory: 1.5G
# Python 应用优化
python_app:
image: python:3.9-slim
working_dir: /app
volumes:
- ./python-app:/app
environment:
- PYTHONUNBUFFERED=1
- PYTHONDONTWRITEBYTECODE=1
- WORKERS=4
command: |
sh -c '
# 使用 Gunicorn 进行进程管理
pip install gunicorn
gunicorn --workers $WORKERS --worker-class gevent --worker-connections 1000 --bind 0.0.0.0:8000 app:app
'
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
# Java 应用优化
java_app:
image: openjdk:11-jre-slim
working_dir: /app
volumes:
- ./java-app:/app
environment:
- JAVA_OPTS=-Xms512m -Xmx1g -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+UseStringDeduplication
command: java $JAVA_OPTS -jar app.jar
deploy:
resources:
limits:
cpus: '2.0'
memory: 1.5G
// ecosystem.config.js (PM2 配置)
module.exports = {
apps: [{
name: 'myapp',
script: 'server.js',
instances: 'max', // 使用所有 CPU 核心
exec_mode: 'cluster',
max_memory_restart: '1G',
node_args: '--max-old-space-size=1024',
env: {
NODE_ENV: 'production',
PORT: 3000
},
// 性能监控
pmx: true,
// 日志配置
log_file: '/var/log/app/combined.log',
out_file: '/var/log/app/out.log',
error_file: '/var/log/app/error.log',
log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
// 自动重启配置
watch: false,
ignore_watch: ['node_modules', 'logs'],
// 优雅关闭
kill_timeout: 5000,
wait_ready: true,
listen_timeout: 10000
}]
};
监控和性能分析
1. 性能监控配置
version: '3.8'
services:
# 应用服务
app:
image: myapp
ports:
- "3000:3000"
environment:
- ENABLE_METRICS=true
- METRICS_PORT=9090
labels:
- "prometheus.io/scrape=true"
- "prometheus.io/port=9090"
- "prometheus.io/path=/metrics"
networks:
- app_network
- monitoring
# Prometheus 监控
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=15d'
- '--web.enable-lifecycle'
networks:
- monitoring
# Grafana 可视化
grafana:
image: grafana/grafana:latest
ports:
- "3001:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana_data:/var/lib/grafana
- ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources:ro
networks:
- monitoring
# Node Exporter(系统指标)
node_exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
networks:
- monitoring
# cAdvisor(容器指标)
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
networks:
- monitoring
volumes:
prometheus_data:
grafana_data:
networks:
app_network:
monitoring:
2. 性能分析工具
services:
# Jaeger 分布式追踪
jaeger:
image: jaegertracing/all-in-one:latest
ports:
- "16686:16686"
- "14268:14268"
environment:
- COLLECTOR_ZIPKIN_HTTP_PORT=9411
networks:
- monitoring
# APM 性能监控
apm_server:
image: docker.elastic.co/apm/apm-server:7.15.0
ports:
- "8200:8200"
volumes:
- ./apm/apm-server.yml:/usr/share/apm-server/apm-server.yml:ro
networks:
- monitoring
# 日志聚合
fluentd:
image: fluent/fluentd:v1.14-1
ports:
- "24224:24224"
volumes:
- ./fluentd/fluent.conf:/fluentd/etc/fluent.conf:ro
- fluentd_data:/fluentd/log
networks:
- monitoring
# 应用性能分析
app_with_profiling:
image: myapp
environment:
- ENABLE_PROFILING=true
- JAEGER_AGENT_HOST=jaeger
- APM_SERVER_URL=http://apm_server:8200
logging:
driver: fluentd
options:
fluentd-address: localhost:24224
tag: myapp
networks:
- app_network
- monitoring
volumes:
fluentd_data:
扩展和负载均衡
1. 水平扩展配置
version: '3.8'
services:
# 负载均衡器
haproxy:
image: haproxy:2.4-alpine
ports:
- "80:80"
- "8404:8404" # 统计页面
volumes:
- ./haproxy/haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro
depends_on:
- app
networks:
- frontend
- backend
# 应用服务(可扩展)
app:
image: myapp
deploy:
replicas: 4
update_config:
parallelism: 2
delay: 10s
failure_action: rollback
monitor: 60s
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 512M
environment:
- NODE_ENV=production
- INSTANCE_ID={{.Task.Slot}}
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
networks:
- backend
# 数据库(主从复制)
db_master:
image: postgres:13-alpine
environment:
- POSTGRES_DB=myapp
- POSTGRES_USER=user
- POSTGRES_PASSWORD=password
- POSTGRES_REPLICATION_USER=replicator
- POSTGRES_REPLICATION_PASSWORD=replicator_password
volumes:
- db_master_data:/var/lib/postgresql/data
- ./postgres/master.conf:/etc/postgresql/postgresql.conf:ro
command: postgres -c config_file=/etc/postgresql/postgresql.conf
networks:
- backend
db_slave:
image: postgres:13-alpine
environment:
- POSTGRES_MASTER_SERVICE=db_master
- POSTGRES_REPLICATION_USER=replicator
- POSTGRES_REPLICATION_PASSWORD=replicator_password
volumes:
- db_slave_data:/var/lib/postgresql/data
depends_on:
- db_master
networks:
- backend
volumes:
db_master_data:
db_slave_data:
networks:
frontend:
backend:
# haproxy/haproxy.cfg
global
daemon
maxconn 4096
log stdout local0
defaults
mode http
timeout connect 5000ms
timeout client 50000ms
timeout server 50000ms
option httplog
option dontlognull
option redispatch
retries 3
frontend web_frontend
bind *:80
default_backend web_servers
backend web_servers
balance roundrobin
option httpchk GET /health
http-check expect status 200
# 动态服务发现(需要配置)
server-template app 4 app:3000 check inter 2000ms rise 2 fall 3
frontend stats
bind *:8404
stats enable
stats uri /stats
stats refresh 30s
stats admin if TRUE
2. 自动扩展脚本
#!/bin/bash
# autoscale.sh
SERVICE_NAME="myapp_app"
MIN_REPLICAS=2
MAX_REPLICAS=10
CPU_THRESHOLD=70
MEMORY_THRESHOLD=80
while true; do
# 获取当前副本数
CURRENT_REPLICAS=$(docker service ls --filter name=$SERVICE_NAME --format "{{.Replicas}}" | cut -d'/' -f1)
# 获取 CPU 和内存使用率
CPU_USAGE=$(docker stats --no-stream --format "table {{.CPUPerc}}" | grep -v CPU | sed 's/%//' | awk '{sum+=$1} END {print sum/NR}')
MEMORY_USAGE=$(docker stats --no-stream --format "table {{.MemPerc}}" | grep -v MEM | sed 's/%//' | awk '{sum+=$1} END {print sum/NR}')
echo "Current replicas: $CURRENT_REPLICAS, CPU: $CPU_USAGE%, Memory: $MEMORY_USAGE%"
# 扩容条件
if (( $(echo "$CPU_USAGE > $CPU_THRESHOLD" | bc -l) )) || (( $(echo "$MEMORY_USAGE > $MEMORY_THRESHOLD" | bc -l) )); then
if [ $CURRENT_REPLICAS -lt $MAX_REPLICAS ]; then
NEW_REPLICAS=$((CURRENT_REPLICAS + 1))
echo "Scaling up to $NEW_REPLICAS replicas"
docker service scale $SERVICE_NAME=$NEW_REPLICAS
fi
# 缩容条件
elif (( $(echo "$CPU_USAGE < 30" | bc -l) )) && (( $(echo "$MEMORY_USAGE < 40" | bc -l) )); then
if [ $CURRENT_REPLICAS -gt $MIN_REPLICAS ]; then
NEW_REPLICAS=$((CURRENT_REPLICAS - 1))
echo "Scaling down to $NEW_REPLICAS replicas"
docker service scale $SERVICE_NAME=$NEW_REPLICAS
fi
fi
sleep 60
done
性能测试和基准测试
1. 负载测试配置
version: '3.8'
services:
# 被测试的应用
app:
image: myapp
deploy:
replicas: 3
resources:
limits:
cpus: '1.0'
memory: 1G
networks:
- test_network
# Apache Bench 负载测试
ab_test:
image: alpine
command: |
sh -c '
apk add --no-cache apache2-utils curl
echo "Starting load test..."
ab -n 10000 -c 100 -k http://app:3000/
echo "Load test completed"
'
depends_on:
- app
networks:
- test_network
# wrk 负载测试
wrk_test:
image: williamyeh/wrk
command: wrk -t12 -c400 -d30s --latency http://app:3000/
depends_on:
- app
networks:
- test_network
# JMeter 负载测试
jmeter:
image: justb4/jmeter:latest
volumes:
- ./jmeter:/test
command: |
sh -c '
jmeter -n -t /test/load_test.jmx -l /test/results.jtl
jmeter -g /test/results.jtl -o /test/report
'
depends_on:
- app
networks:
- test_network
# 性能监控
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
networks:
- test_network
networks:
test_network:
2. 性能基准测试脚本
#!/bin/bash
# benchmark.sh
APP_URL="http://localhost:3000"
TEST_DURATION="60s"
CONCURRENCY_LEVELS=(10 50 100 200 500)
RESULTS_DIR="./benchmark_results"
mkdir -p $RESULTS_DIR
echo "Starting performance benchmark tests..."
echo "Target: $APP_URL"
echo "Duration: $TEST_DURATION"
echo "Results will be saved to: $RESULTS_DIR"
# 预热
echo "Warming up..."
curl -s $APP_URL > /dev/null
sleep 5
for concurrency in "${CONCURRENCY_LEVELS[@]}"; do
echo "Testing with $concurrency concurrent connections..."
# wrk 测试
wrk -t12 -c$concurrency -d$TEST_DURATION --latency $APP_URL > "$RESULTS_DIR/wrk_c${concurrency}.txt"
# Apache Bench 测试
ab -n $((concurrency * 100)) -c $concurrency -k $APP_URL/ > "$RESULTS_DIR/ab_c${concurrency}.txt"
echo "Completed test with $concurrency connections"
sleep 10 # 冷却时间
done
# 生成报告
echo "Generating performance report..."
python3 << EOF
import os
import re
import json
results = {}
results_dir = '$RESULTS_DIR'
for filename in os.listdir(results_dir):
if filename.startswith('wrk_c'):
concurrency = re.search(r'c(\d+)', filename).group(1)
with open(os.path.join(results_dir, filename), 'r') as f:
content = f.read()
# 解析 wrk 结果
rps_match = re.search(r'Requests/sec:\s+(\d+\.\d+)', content)
latency_match = re.search(r'Latency\s+(\d+\.\d+)ms', content)
if rps_match and latency_match:
results[concurrency] = {
'rps': float(rps_match.group(1)),
'latency_ms': float(latency_match.group(1))
}
# 保存结果
with open(os.path.join(results_dir, 'summary.json'), 'w') as f:
json.dump(results, f, indent=2)
print("Performance Summary:")
print("Concurrency\tRPS\t\tLatency (ms)")
for concurrency in sorted(results.keys(), key=int):
data = results[concurrency]
print(f"{concurrency}\t\t{data['rps']:.2f}\t\t{data['latency_ms']:.2f}")
EOF
echo "Benchmark completed. Results saved to $RESULTS_DIR"
最佳实践总结
1. 性能优化检查清单
# 性能优化检查清单
version: '3.8'
services:
optimized_app:
image: myapp:optimized
# ✓ 资源限制
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 1G
# ✓ 健康检查
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
# ✓ 环境优化
environment:
- NODE_ENV=production
- NODE_OPTIONS=--max-old-space-size=1536
- UV_THREADPOOL_SIZE=16
# ✓ 网络优化
networks:
- optimized_network
# ✓ 存储优化
volumes:
- type: tmpfs
target: /tmp
tmpfs:
size: 100M
# ✓ 日志优化
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
# ✓ 安全优化
user: "1001:1001"
read_only: true
tmpfs:
- /tmp:noexec,nosuid,size=100m
networks:
optimized_network:
driver: bridge
driver_opts:
com.docker.network.driver.mtu: 1500
2. 性能监控指标
# 关键性能指标监控
services:
app:
image: myapp
environment:
# 启用指标收集
- METRICS_ENABLED=true
- METRICS_PORT=9090
labels:
# Prometheus 监控标签
- "prometheus.io/scrape=true"
- "prometheus.io/port=9090"
- "prometheus.io/path=/metrics"
# 性能监控标签
- "performance.cpu.limit=2.0"
- "performance.memory.limit=2G"
- "performance.rps.target=1000"
- "performance.latency.target=100ms"
总结
Docker Compose 性能优化是一个系统性工程,需要从多个维度进行优化:
- 资源优化: 合理配置 CPU、内存、磁盘 I/O 限制
- 网络优化: 选择合适的网络模式,优化负载均衡
- 存储优化: 使用高性能存储,合理配置缓存策略
- 应用优化: 优化应用代码,使用合适的进程管理
- 监控分析: 建立完善的监控体系,持续优化
- 扩展策略: 实现水平扩展和自动扩展
- 性能测试: 定期进行负载测试和基准测试
关键要点:
- 根据应用特性选择合适的优化策略
- 建立完善的监控和告警机制
- 持续进行性能测试和优化
- 平衡性能、稳定性和成本
- 遵循最佳实践和安全原则
通过系统性的性能优化,可以显著提升 Docker Compose 应用的性能和用户体验。