Tomcat 故障排除与调优
Troubleshooting and Tuning
概述
Tomcat故障排除与调优是运维工程师的核心技能。本文详细介绍常见问题的诊断方法、性能瓶颈分析、故障处理流程和调优策略,帮助你快速定位和解决Tomcat运行中的各种问题。
1. 常见启动问题
1.1 端口占用检查
#!/bin/bash
# check-ports.sh
TOMCAT_PORTS=(8080 8443 8009 8005)
for port in "${TOMCAT_PORTS[@]}"; do
echo "检查端口 $port:"
if netstat -tuln | grep ":$port " > /dev/null; then
echo " ❌ 端口 $port 已被占用"
pid=$(lsof -ti:$port)
if [ ! -z "$pid" ]; then
echo " 占用进程: $(ps -p $pid -o cmd --no-headers)"
echo " 解决方案: kill -9 $pid"
fi
else
echo " ✅ 端口 $port 可用"
fi
done
1.2 JVM问题诊断
#!/bin/bash
# diagnose-jvm.sh
TOMCAT_HOME="/opt/tomcat9"
CATALINA_OUT="$TOMCAT_HOME/logs/catalina.out"
# 检查Java环境
check_java() {
if ! command -v java &> /dev/null; then
echo "❌ Java未安装"
return 1
fi
java_version=$(java -version 2>&1 | head -n1 | cut -d'"' -f2)
echo "Java版本: $java_version"
major_version=$(echo $java_version | cut -d'.' -f1)
if [ "$major_version" -lt 8 ]; then
echo "❌ Java版本过低"
return 1
fi
echo "✅ Java版本兼容"
}
# 检查内存设置
check_memory() {
total_memory=$(free -m | awk 'NR==2{print $2}')
echo "系统内存: ${total_memory}MB"
if [ -f "$TOMCAT_HOME/bin/setenv.sh" ]; then
heap_size=$(grep -o 'Xmx[0-9]*[mg]' "$TOMCAT_HOME/bin/setenv.sh" | head -1)
echo "堆内存设置: $heap_size"
fi
}
# 检查启动错误
check_errors() {
if [ ! -f "$CATALINA_OUT" ]; then
echo "❌ 找不到catalina.out"
return 1
fi
# 查找常见错误
errors=("OutOfMemoryError" "ClassNotFoundException" "BindException")
for error in "${errors[@]}"; do
if tail -1000 "$CATALINA_OUT" | grep -q "$error"; then
echo "❌ 发现错误: $error"
tail -1000 "$CATALINA_OUT" | grep -A 2 "$error" | tail -5
fi
done
if tail -100 "$CATALINA_OUT" | grep -q "Server startup in"; then
echo "✅ 服务器启动成功"
else
echo "❌ 服务器启动失败"
fi
}
check_java
check_memory
check_errors
2. 性能问题诊断
2.1 性能分析工具
#!/bin/bash
# performance-analyzer.sh
TOMCAT_PID=$(jps -l | grep Bootstrap | awk '{print $1}')
analyze_cpu() {
echo "=== CPU分析 ==="
if [ -z "$TOMCAT_PID" ]; then
echo "❌ Tomcat进程未找到"
return 1
fi
# 监控CPU使用率
total=0
samples=10
for i in $(seq 1 $samples); do
cpu=$(top -p $TOMCAT_PID -n 1 -b | grep java | awk '{print $9}' | cut -d'%' -f1)
total=$(echo "$total + $cpu" | bc)
sleep 2
done
avg=$(echo "scale=1; $total / $samples" | bc)
echo "平均CPU使用率: ${avg}%"
if (( $(echo "$avg > 80" | bc -l) )); then
echo "⚠️ CPU使用率过高"
fi
}
analyze_memory() {
echo "=== 内存分析 ==="
jstat -gc $TOMCAT_PID | tail -1 | awk '{
total_heap = ($1 + $2 + $5 + $7);
used_heap = ($3 + $4 + $6 + $8);
heap_usage = used_heap * 100 / total_heap;
print "堆内存使用率: " heap_usage "%";
print "Young GC次数: " $13;
print "Full GC次数: " $15;
if (heap_usage > 85) print "⚠️ 堆内存使用率过高";
if ($15 > 10) print "⚠️ Full GC次数过多";
}'
}
analyze_threads() {
echo "=== 线程分析 ==="
thread_dump=$(jstack $TOMCAT_PID)
total_threads=$(echo "$thread_dump" | grep -c "^\"")
blocked_threads=$(echo "$thread_dump" | grep -c "BLOCKED")
echo "总线程数: $total_threads"
echo "阻塞线程: $blocked_threads"
if [ "$blocked_threads" -gt 10 ]; then
echo "⚠️ 阻塞线程过多,检查死锁"
jcmd $TOMCAT_PID Thread.print | grep -A 5 "deadlock"
fi
}
case "$1" in
"cpu") analyze_cpu ;;
"memory") analyze_memory ;;
"threads") analyze_threads ;;
*)
analyze_cpu
echo
analyze_memory
echo
analyze_threads
;;
esac
3. 内存问题处理
3.1 内存泄漏检测
#!/bin/bash
# memory-leak-detector.sh
TOMCAT_PID=$(jps -l | grep Bootstrap | awk '{print $1}')
HEAP_DUMP_DIR="/opt/tomcat/heapdumps"
detect_leak() {
echo "=== 内存泄漏检测 ==="
mkdir -p "$HEAP_DUMP_DIR"
# 监控内存增长
echo "监控内存增长(60秒)..."
initial_memory=$(jstat -gc $TOMCAT_PID | tail -1 | awk '{print ($3 + $4 + $6 + $8)}')
sleep 60
final_memory=$(jstat -gc $TOMCAT_PID | tail -1 | awk '{print ($3 + $4 + $6 + $8)}')
growth=$((final_memory - initial_memory))
echo "内存增长: ${growth}KB"
if [ "$growth" -gt 10240 ]; then
echo "⚠️ 检测到内存快速增长"
generate_heap_dump
fi
}
generate_heap_dump() {
echo "生成堆转储..."
local dump_file="$HEAP_DUMP_DIR/heap_$(date +%Y%m%d_%H%M%S).hprof"
if jcmd $TOMCAT_PID GC.dump_heap "$dump_file"; then
echo "堆转储已生成: $dump_file"
# 分析大对象
echo "类实例统计(前10):"
jcmd $TOMCAT_PID GC.class_histogram | head -10
else
echo "❌ 堆转储生成失败"
fi
}
monitor_memory() {
echo "开始内存监控..."
while true; do
timestamp=$(date '+%H:%M:%S')
jstat -gc $TOMCAT_PID | tail -1 | awk -v ts="$timestamp" '{
used = ($3 + $4 + $6 + $8);
total = ($1 + $2 + $5 + $7);
usage = used * 100 / total;
print ts " - 内存使用: " usage "% (" used "KB/" total "KB)";
}'
sleep 30
done
}
case "$1" in
"detect") detect_leak ;;
"dump") generate_heap_dump ;;
"monitor") monitor_memory ;;
*) detect_leak ;;
esac
4. 网络连接问题
4.1 连接诊断工具
#!/bin/bash
# connection-diagnostics.sh
TOMCAT_PORT=8080
test_connectivity() {
echo "=== 连接测试 ==="
# 端口可访问性
if timeout 5 bash -c "</dev/tcp/localhost/$TOMCAT_PORT"; then
echo "✅ 端口可访问"
else
echo "❌ 端口不可访问"
return 1
fi
# HTTP响应测试
response_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$TOMCAT_PORT/")
response_time=$(curl -s -o /dev/null -w "%{time_total}" "http://localhost:$TOMCAT_PORT/")
echo "HTTP响应码: $response_code"
echo "响应时间: ${response_time}秒"
}
analyze_connections() {
echo "=== 连接分析 ==="
# 连接状态统计
echo "连接状态统计:"
netstat -an | grep ":$TOMCAT_PORT " | awk '{print $6}' | sort | uniq -c
# 连接数统计
total=$(netstat -an | grep ":$TOMCAT_PORT " | wc -l)
established=$(netstat -an | grep ":$TOMCAT_PORT " | grep ESTABLISHED | wc -l)
time_wait=$(netstat -an | grep ":$TOMCAT_PORT " | grep TIME_WAIT | wc -l)
echo "总连接数: $total"
echo "已建立: $established"
echo "TIME_WAIT: $time_wait"
if [ "$time_wait" -gt 1000 ]; then
echo "⚠️ TIME_WAIT连接过多"
fi
}
load_test() {
echo "=== 负载测试 ==="
if ! command -v ab &> /dev/null; then
echo "需要安装apache2-utils"
return 1
fi
echo "执行负载测试..."
ab -n 1000 -c 50 "http://localhost:$TOMCAT_PORT/" | grep -E "(Requests per second|Failed requests)"
}
case "$1" in
"test") test_connectivity ;;
"analyze") analyze_connections ;;
"load") load_test ;;
*)
test_connectivity
echo
analyze_connections
;;
esac
5. 自动调优工具
5.1 系统调优脚本
#!/bin/bash
# auto-tuning.sh
TOMCAT_HOME="/opt/tomcat9"
system_analysis() {
cpu_cores=$(nproc)
total_memory=$(free -m | awk 'NR==2{print $2}')
echo "CPU核心: $cpu_cores"
echo "总内存: ${total_memory}MB"
echo "$cpu_cores,$total_memory"
}
tune_jvm() {
local cpu_cores=$1
local total_memory=$2
echo "=== JVM调优 ==="
# 计算堆内存(70%)
heap_size=$((total_memory * 70 / 100))
# 选择GC
if [ "$heap_size" -gt 4096 ]; then
gc_params="-XX:+UseG1GC -XX:MaxGCPauseMillis=200"
else
gc_params="-XX:+UseParallelGC"
fi
# 生成setenv.sh
cat > "$TOMCAT_HOME/bin/setenv.sh" << EOF
#!/bin/bash
# Auto-generated JVM parameters
export JAVA_OPTS="-Xms${heap_size}m -Xmx${heap_size}m $gc_params"
export CATALINA_OPTS="-Dfile.encoding=UTF-8 -Djava.security.egd=file:/dev/./urandom"
# GC日志
export CATALINA_OPTS="\$CATALINA_OPTS -Xloggc:\$CATALINA_HOME/logs/gc.log"
export CATALINA_OPTS="\$CATALINA_OPTS -XX:+PrintGCDetails -XX:+PrintGCTimeStamps"
EOF
chmod +x "$TOMCAT_HOME/bin/setenv.sh"
echo "JVM参数已优化: 堆内存=${heap_size}MB"
}
tune_connector() {
echo "=== 连接器调优 ==="
local cpu_cores=$1
local max_threads=$((cpu_cores * 50))
local accept_count=$((max_threads / 2))
# 备份原配置
cp "$TOMCAT_HOME/conf/server.xml" "$TOMCAT_HOME/conf/server.xml.backup"
# 更新连接器配置
sed -i "s/maxThreads=\"[0-9]*\"/maxThreads=\"$max_threads\"/" "$TOMCAT_HOME/conf/server.xml"
sed -i "s/acceptCount=\"[0-9]*\"/acceptCount=\"$accept_count\"/" "$TOMCAT_HOME/conf/server.xml"
echo "连接器已优化: maxThreads=$max_threads, acceptCount=$accept_count"
}
tune_system() {
echo "=== 系统参数调优 ==="
# 文件描述符限制
echo "* soft nofile 65536" >> /etc/security/limits.conf
echo "* hard nofile 65536" >> /etc/security/limits.conf
# TCP参数优化
cat >> /etc/sysctl.conf << EOF
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 30
net.core.somaxconn = 32768
EOF
sysctl -p
echo "系统参数已优化"
}
generate_report() {
echo "=== 调优报告 ==="
echo "系统信息:"
system_analysis
echo
echo "当前JVM参数:"
if [ -f "$TOMCAT_HOME/bin/setenv.sh" ]; then
grep "export.*OPTS" "$TOMCAT_HOME/bin/setenv.sh"
fi
echo
echo "连接器配置:"
grep -E "(maxThreads|acceptCount)" "$TOMCAT_HOME/conf/server.xml"
echo
echo "调优建议:"
echo "1. 重启Tomcat使配置生效"
echo "2. 监控应用性能"
echo "3. 根据实际负载调整参数"
}
main() {
if [ "$EUID" -ne 0 ]; then
echo "需要root权限运行"
exit 1
fi
analysis=$(system_analysis)
cpu_cores=$(echo "$analysis" | cut -d',' -f1)
total_memory=$(echo "$analysis" | cut -d',' -f2)
case "$1" in
"jvm") tune_jvm "$cpu_cores" "$total_memory" ;;
"connector") tune_connector "$cpu_cores" ;;
"system") tune_system ;;
"report") generate_report ;;
*)
tune_jvm "$cpu_cores" "$total_memory"
tune_connector "$cpu_cores"
tune_system
generate_report
;;
esac
}
main "$@"
6. 故障处理检查清单
6.1 故障处理流程
#!/bin/bash
# troubleshoot-checklist.sh
echo "=== Tomcat故障处理检查清单 ==="
checklist=(
"检查Tomcat进程是否运行"
"验证端口是否可访问"
"检查Java版本兼容性"
"分析启动日志错误"
"监控内存使用情况"
"检查磁盘空间"
"验证配置文件语法"
"测试应用响应"
"检查网络连接"
"分析性能指标"
)
for i in "${!checklist[@]}"; do
echo "$((i+1)). ${checklist[i]}"
done
echo
echo "常见问题快速诊断:"
echo "- 启动失败: 检查端口占用和Java环境"
echo "- 内存不足: 调整堆内存大小和GC参数"
echo "- 响应缓慢: 分析线程池和数据库连接"
echo "- 连接超时: 检查网络和防火墙设置"
echo "- 应用无响应: 检查死锁和资源竞争"
echo
echo "监控指标建议:"
echo "- CPU使用率 < 80%"
echo "- 内存使用率 < 85%"
echo "- 响应时间 < 5秒"
echo "- 错误率 < 1%"
echo "- GC暂停时间 < 200ms"
小结
通过本文学习,你应该掌握:
- 常见启动问题的诊断和解决方法
- 系统性能瓶颈的分析技术
- 内存泄漏的检测和处理
- 网络连接问题的排查技巧
- 自动化调优工具的使用
- 故障处理的标准流程
- 关键性能指标的监控
本系列20篇Tomcat文章涵盖了从基础安装到高级调优的全部内容,为你提供了完整的Tomcat使用和管理指南。