Tomcat 监控与诊断

Monitoring and Diagnostics

概述

有效的监控和诊断是确保Tomcat稳定运行的关键。本文介绍JMX监控、性能指标收集、APM工具集成和故障诊断方法。

1. JMX监控配置

1.1 启用JMX

# setenv.sh - JMX配置
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.port=9999"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.ssl=false"
export CATALINA_OPTS="$CATALINA_OPTS -Djava.rmi.server.hostname=192.168.1.10"

1.2 JMX安全配置

# 启用JMX认证
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.authenticate=true"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.password.file=$CATALINA_HOME/conf/jmxremote.password"
export CATALINA_OPTS="$CATALINA_OPTS -Dcom.sun.management.jmxremote.access.file=$CATALINA_HOME/conf/jmxremote.access"

2. 性能监控工具

2.1 JVM监控脚本

#!/bin/bash
# jvm-monitor.sh

JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')

if [ -z "$JAVA_PID" ]; then
    echo "Tomcat进程未找到"
    exit 1
fi

echo "=== JVM监控 $(date) ==="

# 内存使用情况
echo "内存使用:"
jstat -gc $JAVA_PID | awk 'NR==2 {
    printf "Eden: %.2fMB, Survivor: %.2fMB, Old: %.2fMB\n", 
           $3/1024, ($2+$1)/1024, $4/1024
}'

# GC统计
echo "GC统计:"
jstat -gccapacity $JAVA_PID | awk 'NR==2 {
    printf "新生代容量: %.2fMB, 老年代容量: %.2fMB\n", $4/1024, $10/1024
}'

# 线程信息
echo "线程信息:"
jstack $JAVA_PID | grep "java.lang.Thread.State" | sort | uniq -c

# CPU使用率
echo "进程CPU使用率:"
ps -p $JAVA_PID -o pid,ppid,pcpu,pmem,cmd

2.2 应用监控

// TomcatMonitor.java
package com.example.monitor;

import javax.management.*;
import java.lang.management.ManagementFactory;

public class TomcatMonitor {

    private MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();

    public void monitorTomcat() throws Exception {
        System.out.println("=== Tomcat监控报告 ===");

        // 连接器监控
        monitorConnectors();

        // 会话监控
        monitorSessions();

        // 线程池监控
        monitorThreadPools();
    }

    private void monitorConnectors() throws Exception {
        for (ObjectName connector : mbs.queryNames(new ObjectName("Catalina:type=Connector,*"), null)) {
            String port = connector.getKeyProperty("port");
            Integer currentThreads = (Integer) mbs.getAttribute(connector, "currentThreadCount");
            Integer maxThreads = (Integer) mbs.getAttribute(connector, "maxThreads");
            Long requestCount = (Long) mbs.getAttribute(connector, "requestCount");

            System.out.printf("连接器 %s: 线程 %d/%d, 请求数 %d%n", 
                              port, currentThreads, maxThreads, requestCount);
        }
    }

    private void monitorSessions() throws Exception {
        for (ObjectName manager : mbs.queryNames(new ObjectName("Catalina:type=Manager,*"), null)) {
            String path = manager.getKeyProperty("path");
            Integer activeSessions = (Integer) mbs.getAttribute(manager, "activeSessions");
            System.out.printf("应用 %s: 活跃会话 %d%n", path.isEmpty() ? "/" : path, activeSessions);
        }
    }

    private void monitorThreadPools() throws Exception {
        for (ObjectName executor : mbs.queryNames(new ObjectName("Catalina:type=Executor,*"), null)) {
            String name = executor.getKeyProperty("name");
            Integer activeCount = (Integer) mbs.getAttribute(executor, "activeCount");
            Integer poolSize = (Integer) mbs.getAttribute(executor, "poolSize");
            System.out.printf("线程池 %s: 活跃 %d, 总数 %d%n", name, activeCount, poolSize);
        }
    }
}

3. APM工具集成

3.1 Prometheus + Grafana

<!-- 添加JMX Exporter -->
<Context>
    <JarScanner>
        <JarScanFilter defaultTldScan="false"/>
    </JarScanner>
</Context>
# 启动JMX Exporter
java -javaagent:jmx_prometheus_javaagent-0.17.0.jar=8080:config.yaml \
     -jar your-application.jar

3.2 Micrometer集成

// MetricsConfiguration.java
@Configuration
public class MetricsConfiguration {

    @Bean
    public MeterRegistry meterRegistry() {
        return Metrics.globalRegistry;
    }

    @Bean
    public TomcatMetricsBinder tomcatMetricsBinder() {
        return new TomcatMetricsBinder();
    }
}

4. 健康检查

4.1 健康检查端点

// HealthCheckServlet.java
@WebServlet("/health")
public class HealthCheckServlet extends HttpServlet {

    @Override
    protected void doGet(HttpServletRequest request, HttpServletResponse response) 
            throws ServletException, IOException {

        boolean healthy = true;
        Map<String, String> checks = new HashMap<>();

        // 数据库检查
        try {
            // 数据库连接测试
            checks.put("database", "UP");
        } catch (Exception e) {
            healthy = false;
            checks.put("database", "DOWN: " + e.getMessage());
        }

        // 内存检查
        Runtime runtime = Runtime.getRuntime();
        long usedMemory = runtime.totalMemory() - runtime.freeMemory();
        double memoryUsage = (double) usedMemory / runtime.maxMemory();

        if (memoryUsage > 0.9) {
            healthy = false;
            checks.put("memory", "HIGH: " + String.format("%.1f%%", memoryUsage * 100));
        } else {
            checks.put("memory", "OK: " + String.format("%.1f%%", memoryUsage * 100));
        }

        response.setContentType("application/json");
        response.setStatus(healthy ? 200 : 503);

        Gson gson = new Gson();
        response.getWriter().write(gson.toJson(Map.of(
            "status", healthy ? "UP" : "DOWN",
            "checks", checks,
            "timestamp", System.currentTimeMillis()
        )));
    }
}

4.2 自动健康检查脚本

#!/bin/bash
# health-check.sh

TOMCAT_URL="http://localhost:8080"
ALERT_EMAIL="admin@example.com"

check_tomcat_health() {
    local status_code=$(curl -s -o /dev/null -w "%{http_code}" "$TOMCAT_URL/health")

    if [ "$status_code" = "200" ]; then
        echo "✓ Tomcat健康检查通过"
        return 0
    else
        echo "✗ Tomcat健康检查失败 (状态码: $status_code)"
        echo "Tomcat健康检查失败" | mail -s "Tomcat告警" "$ALERT_EMAIL"
        return 1
    fi
}

check_response_time() {
    local response_time=$(curl -s -o /dev/null -w "%{time_total}" "$TOMCAT_URL/")
    local time_ms=$(echo "$response_time * 1000" | bc)

    if (( $(echo "$time_ms > 5000" | bc -l) )); then
        echo "⚠ 响应时间过长: ${time_ms}ms"
        return 1
    else
        echo "✓ 响应时间正常: ${time_ms}ms"
        return 0
    fi
}

main() {
    echo "=== Tomcat健康检查 $(date) ==="

    local failed=0

    check_tomcat_health || failed=1
    check_response_time || failed=1

    if [ $failed -eq 1 ]; then
        echo "健康检查失败"
        exit 1
    else
        echo "所有检查通过"
        exit 0
    fi
}

main

5. 性能分析工具

5.1 线程分析

#!/bin/bash
# thread-analyzer.sh

JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')

analyze_threads() {
    echo "=== 线程分析 ==="

    # 生成线程转储
    jstack $JAVA_PID > /tmp/thread_dump_$(date +%Y%m%d_%H%M%S).txt

    # 分析线程状态
    echo "线程状态分布:"
    jstack $JAVA_PID | grep "java.lang.Thread.State" | \
    sort | uniq -c | sort -nr

    # 检查死锁
    if jstack $JAVA_PID | grep -q "Found Java-level deadlock"; then
        echo "⚠️ 检测到死锁!"
    fi

    # 分析阻塞线程
    echo "阻塞的线程:"
    jstack $JAVA_PID | grep -A 5 "BLOCKED"
}

analyze_threads

5.2 内存分析

#!/bin/bash
# memory-analyzer.sh

JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')

analyze_memory() {
    echo "=== 内存分析 ==="

    # 堆内存使用
    jmap -heap $JAVA_PID

    # 对象直方图
    echo "对象使用排行:"
    jmap -histo $JAVA_PID | head -20

    # 检查内存泄漏风险
    local old_gen_usage=$(jstat -gc $JAVA_PID | awk 'NR==2 {print int($4*100/($1+$2+$3+$4+$5))}')

    if [ $old_gen_usage -gt 80 ]; then
        echo "⚠️ 老年代使用率过高: ${old_gen_usage}%"
    fi
}

analyze_memory

6. 监控告警

6.1 告警规则

#!/bin/bash
# alert-rules.sh

JAVA_PID=$(jps -l | grep Bootstrap | awk '{print $1}')
ALERT_EMAIL="admin@example.com"

# 内存告警
check_memory_alert() {
    local memory_usage=$(jstat -gc $JAVA_PID | awk 'NR==2 {
        used=($2+$3+$4); total=($1+$2+$3+$4+$5); 
        print int(used*100/total)
    }')

    if [ $memory_usage -gt 85 ]; then
        echo "内存使用率告警: ${memory_usage}%" | \
        mail -s "Tomcat内存告警" "$ALERT_EMAIL"
    fi
}

# 线程告警
check_thread_alert() {
    local thread_count=$(jstack $JAVA_PID | grep "^\"" | wc -l)

    if [ $thread_count -gt 500 ]; then
        echo "线程数量告警: $thread_count" | \
        mail -s "Tomcat线程告警" "$ALERT_EMAIL"
    fi
}

# GC告警
check_gc_alert() {
    local full_gc_count=$(jstat -gc $JAVA_PID | awk 'NR==2 {print $9}')
    local gc_time=$(jstat -gc $JAVA_PID | awk 'NR==2 {print $10}')

    # 检查Full GC频率
    if [ $full_gc_count -gt 10 ]; then
        echo "Full GC频率过高: $full_gc_count" | \
        mail -s "Tomcat GC告警" "$ALERT_EMAIL"
    fi
}

# 执行所有检查
check_memory_alert
check_thread_alert  
check_gc_alert

6.2 监控仪表板

#!/bin/bash
# dashboard.sh

generate_dashboard() {
    local html_file="/tmp/tomcat_dashboard.html"

    cat > "$html_file" << 'EOF'
<!DOCTYPE html>
<html>
<head>
    <title>Tomcat监控仪表板</title>
    <meta http-equiv="refresh" content="30">
</head>
<body>
    <h1>Tomcat监控仪表板</h1>
    <div id="metrics">
        <h2>系统指标</h2>
        <pre id="system-info"></pre>

        <h2>JVM指标</h2>
        <pre id="jvm-info"></pre>

        <h2>应用指标</h2>
        <pre id="app-info"></pre>
    </div>

    <script>
        // 定期刷新数据
        function updateMetrics() {
            fetch('/metrics')
                .then(response => response.json())
                .then(data => {
                    document.getElementById('system-info').textContent = JSON.stringify(data.system, null, 2);
                    document.getElementById('jvm-info').textContent = JSON.stringify(data.jvm, null, 2);
                    document.getElementById('app-info').textContent = JSON.stringify(data.application, null, 2);
                });
        }

        setInterval(updateMetrics, 5000);
        updateMetrics();
    </script>
</body>
</html>
EOF

    echo "仪表板已生成: $html_file"
}

generate_dashboard

小结

通过本文学习,你应该掌握:

  1. JMX监控配置和安全设置
  2. 性能监控脚本和工具使用
  3. APM工具的集成方法
  4. 健康检查端点的实现
  5. 线程和内存分析技术
  6. 监控告警规则配置
  7. 监控仪表板的构建

下一篇文章将介绍Tomcat代理配置技术。

powered by Gitbook© 2025 编外计划 | 最后修改: 2025-08-29 15:40:15

results matching ""

    No results matching ""