Tomcat 监控与诊断
Monitoring and Diagnostics
概述
有效的监控和诊断是确保Tomcat稳定运行的关键。本文介绍JMX监控、性能指标收集、APM工具集成和故障诊断方法。
1. JMX监控配置
1.1 启用JMX
# setenv.sh - JMX配置
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.port=9999"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.ssl=false"
export CATALINA_OPTS="$CATALINA_OPTS -Djava.rmi.server.hostname=192.168.1.10"
1.2 JMX安全配置
# 启用JMX认证
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.authenticate=true"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.password.file=$CATALINA_HOME/conf/jmxremote.password"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.access.file=$CATALINA_HOME/conf/jmxremote.access"
2. 性能监控工具
2.1 JVM监控脚本
#!/bin/bash
# jvm-monitor.sh
JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')
if [ -z "$JAVA_PID" ]; then
echo "Tomcat进程未找到"
exit 1
fi
echo "=== JVM监控 $(date) ==="
# 内存使用情况
echo "内存使用:"
jstat -gc $JAVA_PID | awk 'NR==2 {
printf "Eden: %.2fMB, Survivor: %.2fMB, Old: %.2fMB\n",
$3/1024, ($2+$1)/1024, $4/1024
}'
# GC统计
echo "GC统计:"
jstat -gccapacity $JAVA_PID | awk 'NR==2 {
printf "新生代容量: %.2fMB, 老年代容量: %.2fMB\n", $4/1024, $10/1024
}'
# 线程信息
echo "线程信息:"
jstack $JAVA_PID | grep "java.lang.Thread.State" | sort | uniq -c
# CPU使用率
echo "进程CPU使用率:"
ps -p $JAVA_PID -o pid,ppid,pcpu,pmem,cmd
2.2 应用监控
// TomcatMonitor.java
package com.example.monitor;
import javax.management.*;
import java.lang.management.ManagementFactory;
public class TomcatMonitor {
private MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
public void monitorTomcat() throws Exception {
System.out.println("=== Tomcat监控报告 ===");
// 连接器监控
monitorConnectors();
// 会话监控
monitorSessions();
// 线程池监控
monitorThreadPools();
}
private void monitorConnectors() throws Exception {
for (ObjectName connector : mbs.queryNames(new ObjectName("Catalina:type=Connector,*"), null)) {
String port = connector.getKeyProperty("port");
Integer currentThreads = (Integer) mbs.getAttribute(connector, "currentThreadCount");
Integer maxThreads = (Integer) mbs.getAttribute(connector, "maxThreads");
Long requestCount = (Long) mbs.getAttribute(connector, "requestCount");
System.out.printf("连接器 %s: 线程 %d/%d, 请求数 %d%n",
port, currentThreads, maxThreads, requestCount);
}
}
private void monitorSessions() throws Exception {
for (ObjectName manager : mbs.queryNames(new ObjectName("Catalina:type=Manager,*"), null)) {
String path = manager.getKeyProperty("path");
Integer activeSessions = (Integer) mbs.getAttribute(manager, "activeSessions");
System.out.printf("应用 %s: 活跃会话 %d%n", path.isEmpty() ? "/" : path, activeSessions);
}
}
private void monitorThreadPools() throws Exception {
for (ObjectName executor : mbs.queryNames(new ObjectName("Catalina:type=Executor,*"), null)) {
String name = executor.getKeyProperty("name");
Integer activeCount = (Integer) mbs.getAttribute(executor, "activeCount");
Integer poolSize = (Integer) mbs.getAttribute(executor, "poolSize");
System.out.printf("线程池 %s: 活跃 %d, 总数 %d%n", name, activeCount, poolSize);
}
}
}
3. APM工具集成
3.1 Prometheus + Grafana
<!-- 添加JMX Exporter -->
<Context>
<JarScanner>
<JarScanFilter defaultTldScan="false"/>
</JarScanner>
</Context>
# 启动JMX Exporter
java -javaagent:jmx_prometheus_javaagent-0.17.0.jar=8080:config.yaml \
-jar your-application.jar
3.2 Micrometer集成
// MetricsConfiguration.java
@Configuration
public class MetricsConfiguration {
@Bean
public MeterRegistry meterRegistry() {
return Metrics.globalRegistry;
}
@Bean
public TomcatMetricsBinder tomcatMetricsBinder() {
return new TomcatMetricsBinder();
}
}
4. 健康检查
4.1 健康检查端点
// HealthCheckServlet.java
@WebServlet("/health")
public class HealthCheckServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
boolean healthy = true;
Map<String, String> checks = new HashMap<>();
// 数据库检查
try {
// 数据库连接测试
checks.put("database", "UP");
} catch (Exception e) {
healthy = false;
checks.put("database", "DOWN: " + e.getMessage());
}
// 内存检查
Runtime runtime = Runtime.getRuntime();
long usedMemory = runtime.totalMemory() - runtime.freeMemory();
double memoryUsage = (double) usedMemory / runtime.maxMemory();
if (memoryUsage > 0.9) {
healthy = false;
checks.put("memory", "HIGH: " + String.format("%.1f%%", memoryUsage * 100));
} else {
checks.put("memory", "OK: " + String.format("%.1f%%", memoryUsage * 100));
}
response.setContentType("application/json");
response.setStatus(healthy ? 200 : 503);
Gson gson = new Gson();
response.getWriter().write(gson.toJson(Map.of(
"status", healthy ? "UP" : "DOWN",
"checks", checks,
"timestamp", System.currentTimeMillis()
)));
}
}
4.2 自动健康检查脚本
#!/bin/bash
# health-check.sh
TOMCAT_URL="http://localhost:8080"
ALERT_EMAIL="admin@example.com"
check_tomcat_health() {
local status_code=$(curl -s -o /dev/null -w "%{http_code}" "$TOMCAT_URL/health")
if [ "$status_code" = "200" ]; then
echo "✓ Tomcat健康检查通过"
return 0
else
echo "✗ Tomcat健康检查失败 (状态码: $status_code)"
echo "Tomcat健康检查失败" | mail -s "Tomcat告警" "$ALERT_EMAIL"
return 1
fi
}
check_response_time() {
local response_time=$(curl -s -o /dev/null -w "%{time_total}" "$TOMCAT_URL/")
local time_ms=$(echo "$response_time * 1000" | bc)
if (( $(echo "$time_ms > 5000" | bc -l) )); then
echo "⚠ 响应时间过长: ${time_ms}ms"
return 1
else
echo "✓ 响应时间正常: ${time_ms}ms"
return 0
fi
}
main() {
echo "=== Tomcat健康检查 $(date) ==="
local failed=0
check_tomcat_health || failed=1
check_response_time || failed=1
if [ $failed -eq 1 ]; then
echo "健康检查失败"
exit 1
else
echo "所有检查通过"
exit 0
fi
}
main
5. 性能分析工具
5.1 线程分析
#!/bin/bash
# thread-analyzer.sh
JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')
analyze_threads() {
echo "=== 线程分析 ==="
# 生成线程转储
jstack $JAVA_PID > /tmp/thread_dump_$(date +%Y%m%d_%H%M%S).txt
# 分析线程状态
echo "线程状态分布:"
jstack $JAVA_PID | grep "java.lang.Thread.State" | \
sort | uniq -c | sort -nr
# 检查死锁
if jstack $JAVA_PID | grep -q "Found Java-level deadlock"; then
echo "⚠️ 检测到死锁!"
fi
# 分析阻塞线程
echo "阻塞的线程:"
jstack $JAVA_PID | grep -A 5 "BLOCKED"
}
analyze_threads
5.2 内存分析
#!/bin/bash
# memory-analyzer.sh
JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')
analyze_memory() {
echo "=== 内存分析 ==="
# 堆内存使用
jmap -heap $JAVA_PID
# 对象直方图
echo "对象使用排行:"
jmap -histo $JAVA_PID | head -20
# 检查内存泄漏风险
local old_gen_usage=$(jstat -gc $JAVA_PID | awk 'NR==2 {print int($4*100/($1+$2+$3+$4+$5))}')
if [ $old_gen_usage -gt 80 ]; then
echo "⚠️ 老年代使用率过高: ${old_gen_usage}%"
fi
}
analyze_memory
6. 监控告警
6.1 告警规则
#!/bin/bash
# alert-rules.sh
JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')
ALERT_EMAIL="admin@example.com"
# 内存告警
check_memory_alert() {
local memory_usage=$(jstat -gc $JAVA_PID | awk 'NR==2 {
used=($2+$3+$4); total=($1+$2+$3+$4+$5);
print int(used*100/total)
}')
if [ $memory_usage -gt 85 ]; then
echo "内存使用率告警: ${memory_usage}%" | \
mail -s "Tomcat内存告警" "$ALERT_EMAIL"
fi
}
# 线程告警
check_thread_alert() {
local thread_count=$(jstack $JAVA_PID | grep "^\"" | wc -l)
if [ $thread_count -gt 500 ]; then
echo "线程数量告警: $thread_count" | \
mail -s "Tomcat线程告警" "$ALERT_EMAIL"
fi
}
# GC告警
check_gc_alert() {
local full_gc_count=$(jstat -gc $JAVA_PID | awk 'NR==2 {print $9}')
local gc_time=$(jstat -gc $JAVA_PID | awk 'NR==2 {print $10}')
# 检查Full GC频率
if [ $full_gc_count -gt 10 ]; then
echo "Full GC频率过高: $full_gc_count" | \
mail -s "Tomcat GC告警" "$ALERT_EMAIL"
fi
}
# 执行所有检查
check_memory_alert
check_thread_alert
check_gc_alert
6.2 监控仪表板
#!/bin/bash
# dashboard.sh
generate_dashboard() {
local html_file="/tmp/tomcat_dashboard.html"
cat > "$html_file" << 'EOF'
<!DOCTYPE html>
<html>
<head>
<title>Tomcat监控仪表板</title>
<meta http-equiv="refresh" content="30">
</head>
<body>
<h1>Tomcat监控仪表板</h1>
<div id="metrics">
<h2>系统指标</h2>
<pre id="system-info"></pre>
<h2>JVM指标</h2>
<pre id="jvm-info"></pre>
<h2>应用指标</h2>
<pre id="app-info"></pre>
</div>
<script>
// 定期刷新数据
function updateMetrics() {
fetch('/metrics')
.then(response => response.json())
.then(data => {
document.getElementById('system-info').textContent = JSON.stringify(data.system, null, 2);
document.getElementById('jvm-info').textContent = JSON.stringify(data.jvm, null, 2);
document.getElementById('app-info').textContent = JSON.stringify(data.application, null, 2);
});
}
setInterval(updateMetrics, 5000);
updateMetrics();
</script>
</body>
</html>
EOF
echo "仪表板已生成: $html_file"
}
generate_dashboard
小结
通过本文学习,你应该掌握:
- JMX监控配置和安全设置
- 性能监控脚本和工具使用
- APM工具的集成方法
- 健康检查端点的实现
- 线程和内存分析技术
- 监控告警规则配置
- 监控仪表板的构建
下一篇文章将介绍Tomcat代理配置技术。