> 技术文档 > 【HBase】HBaseJMX 接口监控信息实现钉钉告警_hbase jmx

【HBase】HBaseJMX 接口监控信息实现钉钉告警_hbase jmx

目录

一、JMX 简介

二、JMX监控信息钉钉告警实现


一、JMX 简介


官网:Apache HBase ™ Reference Guide

JMX (Java管理扩展)提供了内置的工具,使您能够监视和管理Java VM。要启用远程系统的监视和管理,需要在启动Java VM时设置系统属性com.sun.management.jmxremote.port(希望通过该端口号启用JMX RMI连接)。

访问:

curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server

输出的 指标如下:

{ \"beans\" : [ { \"name\" : \"Hadoop:service=HBase,name=RegionServer,sub=Server\", \"modelerType\" : \"RegionServer,sub=Server\", \"tag.zookeeperQuorum\" : \"hdp-node2:2181,hdp-node3:2181,hdp-node1:2181\", \"tag.serverName\" : \"hdp-node2,16020,1738720067137\", \"tag.clusterId\" : \"85aa06c7-b28c-41fd-aa17-a49376641751\", \"tag.Context\" : \"regionserver\", \"tag.Hostname\" : \"hdp-node2\", \"regionCount\" : 34, \"storeCount\" : 51, \"hlogFileCount\" : 1, \"hlogFileSize\" : 0, \"storeFileCount\" : 31, \"memStoreSize\" : 0, \"storeFileSize\" : 212110208, \"maxStoreFileAge\" : 23910274739, \"minStoreFileAge\" : 595075791, \"avgStoreFileAge\" : 12083745007, \"numReferenceFiles\" : 0, \"regionServerStartTime\" : 1738720067137, \"averageRegionSize\" : 6238535, \"storeFileIndexSize\" : 529736, \"staticIndexSize\" : 1348988, \"staticBloomSize\" : 2438222, \"mutationsWithoutWALCount\" : 0, \"mutationsWithoutWALSize\" : 0, \"percentFilesLocal\" : 100.0, \"percentFilesLocalSecondaryRegions\" : 0.0, \"splitQueueLength\" : 0, \"compactionQueueLength\" : 0, \"smallCompactionQueueLength\" : 0, \"largeCompactionQueueLength\" : 0, \"flushQueueLength\" : 0, \"blockCacheFreeSize\" : 1716727624, \"blockCacheCount\" : 0, \"blockCacheSize\" : 1259320, \"blockCacheCountHitPercent\" : 0.0, \"blockCacheExpressHitPercent\" : 0.0, \"l1CacheHitCount\" : 0, \"l1CacheMissCount\" : 0, \"l1CacheHitRatio\" : 0.0, \"l1CacheMissRatio\" : 0.0, \"l2CacheHitCount\" : 0, \"l2CacheMissCount\" : 0, \"l2CacheHitRatio\" : 0.0, \"l2CacheMissRatio\" : 0.0, \"mobFileCacheCount\" : 0, \"mobFileCacheHitPercent\" : 0.0, \"totalRequestCount\" : 2, \"totalRowActionRequestCount\" : 0, \"readRequestCount\" : 0, \"filteredReadRequestCount\" : 0, \"writeRequestCount\" : 0, \"rpcGetRequestCount\" : 0, \"rpcScanRequestCount\" : 0, \"rpcMultiRequestCount\" : 0, \"rpcMutateRequestCount\" : 0, \"checkMutateFailedCount\" : 0, \"checkMutatePassedCount\" : 0, \"blockCacheHitCount\" : 0, \"blockCacheHitCountPrimary\" : 0, \"blockCacheMissCount\" : 0, \"blockCacheMissCountPrimary\" : 0, \"blockCacheEvictionCount\" : 0, \"blockCacheEvictionCountPrimary\" : 0, \"blockCacheFailedInsertionCount\" : 0, \"blockCacheDataMissCount\" : 0, \"blockCacheLeafIndexMissCount\" : 0, \"blockCacheBloomChunkMissCount\" : 0, \"blockCacheMetaMissCount\" : 0, \"blockCacheRootIndexMissCount\" : 0, \"blockCacheIntermediateIndexMissCount\" : 0, \"blockCacheFileInfoMissCount\" : 0, \"blockCacheGeneralBloomMetaMissCount\" : 0, \"blockCacheDeleteFamilyBloomMissCount\" : 0, \"blockCacheTrailerMissCount\" : 0, \"blockCacheDataHitCount\" : 0, \"blockCacheLeafIndexHitCount\" : 0, \"blockCacheBloomChunkHitCount\" : 0, \"blockCacheMetaHitCount\" : 0, \"blockCacheRootIndexHitCount\" : 0, \"blockCacheIntermediateIndexHitCount\" : 0, \"blockCacheFileInfoHitCount\" : 0, \"blockCacheGeneralBloomMetaHitCount\" : 0, \"blockCacheDeleteFamilyBloomHitCount\" : 0, \"blockCacheTrailerHitCount\" : 0, \"updatesBlockedTime\" : 0, \"flushedCellsCount\" : 0, \"compactedCellsCount\" : 0, \"majorCompactedCellsCount\" : 0, \"flushedCellsSize\" : 0, \"compactedCellsSize\" : 0, \"majorCompactedCellsSize\" : 0, \"cellsCountCompactedFromMob\" : 0, \"cellsCountCompactedToMob\" : 0, \"cellsSizeCompactedFromMob\" : 0, \"cellsSizeCompactedToMob\" : 0, \"mobFlushCount\" : 0, \"mobFlushedCellsCount\" : 0, \"mobFlushedCellsSize\" : 0, \"mobScanCellsCount\" : 0, \"mobScanCellsSize\" : 0, \"mobFileCacheAccessCount\" : 0, \"mobFileCacheMissCount\" : 0, \"mobFileCacheEvictedCount\" : 0, \"hedgedReads\" : 0, \"hedgedReadWins\" : 0, \"blockedRequestCount\" : 0, \"MajorCompactionTime_num_ops\" : 2, \"MajorCompactionTime_min\" : 0, \"MajorCompactionTime_max\" : 0, \"MajorCompactionTime_mean\" : 0, \"MajorCompactionTime_25th_percentile\" : 0, \"MajorCompactionTime_median\" : 0, \"MajorCompactionTime_75th_percentile\" : 0, \"MajorCompactionTime_90th_percentile\" : 0, \"MajorCompactionTime_95th_percentile\" : 0, \"MajorCompactionTime_98th_percentile\" : 0, \"MajorCompactionTime_99th_percentile\" : 0, \"MajorCompactionTime_99.9th_percentile\" : 0, \"MajorCompactionTime_TimeRangeCount_600000-inf\" : 2, \"PauseTimeWithGc_num_ops\" : 0, \"PauseTimeWithGc_min\" : 0, \"PauseTimeWithGc_max\" : 0, \"PauseTimeWithGc_mean\" : 0, \"PauseTimeWithGc_25th_percentile\" : 0, \"PauseTimeWithGc_median\" : 0, \"PauseTimeWithGc_75th_percentile\" : 0, \"PauseTimeWithGc_90th_percentile\" : 0, \"PauseTimeWithGc_95th_percentile\" : 0, \"PauseTimeWithGc_98th_percentile\" : 0, \"PauseTimeWithGc_99th_percentile\" : 0, \"PauseTimeWithGc_99.9th_percentile\" : 0, \"compactedOutputBytes\" : 8924, \"pauseWarnThresholdExceeded\" : 0, \"ScanTime_num_ops\" : 0, \"ScanTime_min\" : 0, \"ScanTime_max\" : 0, \"ScanTime_mean\" : 0, \"ScanTime_25th_percentile\" : 0, \"ScanTime_median\" : 0, \"ScanTime_75th_percentile\" : 0, \"ScanTime_90th_percentile\" : 0, \"ScanTime_95th_percentile\" : 0, \"ScanTime_98th_percentile\" : 0, \"ScanTime_99th_percentile\" : 0, \"ScanTime_99.9th_percentile\" : 0, \"Increment_num_ops\" : 0, \"Increment_min\" : 0, \"Increment_max\" : 0, \"Increment_mean\" : 0, \"Increment_25th_percentile\" : 0, \"Increment_median\" : 0, \"Increment_75th_percentile\" : 0, \"Increment_90th_percentile\" : 0, \"Increment_95th_percentile\" : 0, \"Increment_98th_percentile\" : 0, \"Increment_99th_percentile\" : 0, \"Increment_99.9th_percentile\" : 0, \"Delete_num_ops\" : 0, \"Delete_min\" : 0, \"Delete_max\" : 0, \"Delete_mean\" : 0, \"Delete_25th_percentile\" : 0, \"Delete_median\" : 0, \"Delete_75th_percentile\" : 0, \"Delete_90th_percentile\" : 0, \"Delete_95th_percentile\" : 0, \"Delete_98th_percentile\" : 0, \"Delete_99th_percentile\" : 0, \"Delete_99.9th_percentile\" : 0, \"Put_num_ops\" : 0, \"Put_min\" : 0, \"Put_max\" : 0, \"Put_mean\" : 0, \"Put_25th_percentile\" : 0, \"Put_median\" : 0, \"Put_75th_percentile\" : 0, \"Put_90th_percentile\" : 0, \"Put_95th_percentile\" : 0, \"Put_98th_percentile\" : 0, \"Put_99th_percentile\" : 0, \"Put_99.9th_percentile\" : 0, \"DeleteBatch_num_ops\" : 0, \"DeleteBatch_min\" : 0, \"DeleteBatch_max\" : 0, \"DeleteBatch_mean\" : 0, \"DeleteBatch_25th_percentile\" : 0, \"DeleteBatch_median\" : 0, \"DeleteBatch_75th_percentile\" : 0, \"DeleteBatch_90th_percentile\" : 0, \"DeleteBatch_95th_percentile\" : 0, \"DeleteBatch_98th_percentile\" : 0, \"DeleteBatch_99th_percentile\" : 0, \"DeleteBatch_99.9th_percentile\" : 0, \"splitRequestCount\" : 0, \"FlushMemstoreSize_num_ops\" : 0, \"FlushMemstoreSize_min\" : 0, \"FlushMemstoreSize_max\" : 0, \"FlushMemstoreSize_mean\" : 0, \"FlushMemstoreSize_25th_percentile\" : 0, \"FlushMemstoreSize_median\" : 0, \"FlushMemstoreSize_75th_percentile\" : 0, \"FlushMemstoreSize_90th_percentile\" : 0, \"FlushMemstoreSize_95th_percentile\" : 0, \"FlushMemstoreSize_98th_percentile\" : 0, \"FlushMemstoreSize_99th_percentile\" : 0, \"FlushMemstoreSize_99.9th_percentile\" : 0, \"CompactionInputFileCount_num_ops\" : 2, \"CompactionInputFileCount_min\" : 0, \"CompactionInputFileCount_max\" : 0, \"CompactionInputFileCount_mean\" : 0, \"CompactionInputFileCount_25th_percentile\" : 0, \"CompactionInputFileCount_median\" : 0, \"CompactionInputFileCount_75th_percentile\" : 0, \"CompactionInputFileCount_90th_percentile\" : 0, \"CompactionInputFileCount_95th_percentile\" : 0, \"CompactionInputFileCount_98th_percentile\" : 0, \"CompactionInputFileCount_99th_percentile\" : 0, \"CompactionInputFileCount_99.9th_percentile\" : 0, \"PutBatch_num_ops\" : 0, \"PutBatch_min\" : 0, \"PutBatch_max\" : 0, \"PutBatch_mean\" : 0, \"PutBatch_25th_percentile\" : 0, \"PutBatch_median\" : 0, \"PutBatch_75th_percentile\" : 0, \"PutBatch_90th_percentile\" : 0, \"PutBatch_95th_percentile\" : 0, \"PutBatch_98th_percentile\" : 0, \"PutBatch_99th_percentile\" : 0, \"PutBatch_99.9th_percentile\" : 0, \"CompactionTime_num_ops\" : 2, \"CompactionTime_min\" : 0, \"CompactionTime_max\" : 0, \"CompactionTime_mean\" : 0, \"CompactionTime_25th_percentile\" : 0, \"CompactionTime_median\" : 0, \"CompactionTime_75th_percentile\" : 0, \"CompactionTime_90th_percentile\" : 0, \"CompactionTime_95th_percentile\" : 0, \"CompactionTime_98th_percentile\" : 0, \"CompactionTime_99th_percentile\" : 0, \"CompactionTime_99.9th_percentile\" : 0, \"CompactionTime_TimeRangeCount_600000-inf\" : 2, \"Get_num_ops\" : 0, \"Get_min\" : 0, \"Get_max\" : 0, \"Get_mean\" : 0, \"Get_25th_percentile\" : 0, \"Get_median\" : 0, \"Get_75th_percentile\" : 0, \"Get_90th_percentile\" : 0, \"Get_95th_percentile\" : 0, \"Get_98th_percentile\" : 0, \"Get_99th_percentile\" : 0, \"Get_99.9th_percentile\" : 0, \"MajorCompactionInputFileCount_num_ops\" : 2, \"MajorCompactionInputFileCount_min\" : 0, \"MajorCompactionInputFileCount_max\" : 0, \"MajorCompactionInputFileCount_mean\" : 0, \"MajorCompactionInputFileCount_25th_percentile\" : 0, \"MajorCompactionInputFileCount_median\" : 0, \"MajorCompactionInputFileCount_75th_percentile\" : 0, \"MajorCompactionInputFileCount_90th_percentile\" : 0, \"MajorCompactionInputFileCount_95th_percentile\" : 0, \"MajorCompactionInputFileCount_98th_percentile\" : 0, \"MajorCompactionInputFileCount_99th_percentile\" : 0, \"MajorCompactionInputFileCount_99.9th_percentile\" : 0, \"CheckAndPut_num_ops\" : 0, \"CheckAndPut_min\" : 0, \"CheckAndPut_max\" : 0, \"CheckAndPut_mean\" : 0, \"CheckAndPut_25th_percentile\" : 0, \"CheckAndPut_median\" : 0, \"CheckAndPut_75th_percentile\" : 0, \"CheckAndPut_90th_percentile\" : 0, \"CheckAndPut_95th_percentile\" : 0, \"CheckAndPut_98th_percentile\" : 0, \"CheckAndPut_99th_percentile\" : 0, \"CheckAndPut_99.9th_percentile\" : 0, \"SplitTime_num_ops\" : 0, \"SplitTime_min\" : 0, \"SplitTime_max\" : 0, \"SplitTime_mean\" : 0, \"SplitTime_25th_percentile\" : 0, \"SplitTime_median\" : 0, \"SplitTime_75th_percentile\" : 0, \"SplitTime_90th_percentile\" : 0, \"SplitTime_95th_percentile\" : 0, \"SplitTime_98th_percentile\" : 0, \"SplitTime_99th_percentile\" : 0, \"SplitTime_99.9th_percentile\" : 0, \"MajorCompactionOutputSize_num_ops\" : 2, \"MajorCompactionOutputSize_min\" : 0, \"MajorCompactionOutputSize_max\" : 0, \"MajorCompactionOutputSize_mean\" : 0, \"MajorCompactionOutputSize_25th_percentile\" : 0, \"MajorCompactionOutputSize_median\" : 0, \"MajorCompactionOutputSize_75th_percentile\" : 0, \"MajorCompactionOutputSize_90th_percentile\" : 0, \"MajorCompactionOutputSize_95th_percentile\" : 0, \"MajorCompactionOutputSize_98th_percentile\" : 0, \"MajorCompactionOutputSize_99th_percentile\" : 0, \"MajorCompactionOutputSize_99.9th_percentile\" : 0, \"MajorCompactionOutputSize_SizeRangeCount_100000000-inf\" : 2, \"majorCompactedInputBytes\" : 8924, \"slowAppendCount\" : 0, \"flushedOutputBytes\" : 0, \"CompactionOutputFileCount_num_ops\" : 2, \"CompactionOutputFileCount_min\" : 0, \"CompactionOutputFileCount_max\" : 0, \"CompactionOutputFileCount_mean\" : 0, \"CompactionOutputFileCount_25th_percentile\" : 0, \"CompactionOutputFileCount_median\" : 0, \"CompactionOutputFileCount_75th_percentile\" : 0, \"CompactionOutputFileCount_90th_percentile\" : 0, \"CompactionOutputFileCount_95th_percentile\" : 0, \"CompactionOutputFileCount_98th_percentile\" : 0, \"CompactionOutputFileCount_99th_percentile\" : 0, \"CompactionOutputFileCount_99.9th_percentile\" : 0, \"slowDeleteCount\" : 0, \"Replay_num_ops\" : 0, \"Replay_min\" : 0, \"Replay_max\" : 0, \"Replay_mean\" : 0, \"Replay_25th_percentile\" : 0, \"Replay_median\" : 0, \"Replay_75th_percentile\" : 0, \"Replay_90th_percentile\" : 0, \"Replay_95th_percentile\" : 0, \"Replay_98th_percentile\" : 0, \"Replay_99th_percentile\" : 0, \"Replay_99.9th_percentile\" : 0, \"FlushTime_num_ops\" : 0, \"FlushTime_min\" : 0, \"FlushTime_max\" : 0, \"FlushTime_mean\" : 0, \"FlushTime_25th_percentile\" : 0, \"FlushTime_median\" : 0, \"FlushTime_75th_percentile\" : 0, \"FlushTime_90th_percentile\" : 0, \"FlushTime_95th_percentile\" : 0, \"FlushTime_98th_percentile\" : 0, \"FlushTime_99th_percentile\" : 0, \"FlushTime_99.9th_percentile\" : 0, \"MajorCompactionInputSize_num_ops\" : 2, \"MajorCompactionInputSize_min\" : 0, \"MajorCompactionInputSize_max\" : 0, \"MajorCompactionInputSize_mean\" : 0, \"MajorCompactionInputSize_25th_percentile\" : 0, \"MajorCompactionInputSize_median\" : 0, \"MajorCompactionInputSize_75th_percentile\" : 0, \"MajorCompactionInputSize_90th_percentile\" : 0, \"MajorCompactionInputSize_95th_percentile\" : 0, \"MajorCompactionInputSize_98th_percentile\" : 0, \"MajorCompactionInputSize_99th_percentile\" : 0, \"MajorCompactionInputSize_99.9th_percentile\" : 0, \"MajorCompactionInputSize_SizeRangeCount_100000000-inf\" : 2, \"pauseInfoThresholdExceeded\" : 0, \"splitSuccessCount\" : 0, \"CheckAndDelete_num_ops\" : 0, \"CheckAndDelete_min\" : 0, \"CheckAndDelete_max\" : 0, \"CheckAndDelete_mean\" : 0, \"CheckAndDelete_25th_percentile\" : 0, \"CheckAndDelete_median\" : 0, \"CheckAndDelete_75th_percentile\" : 0, \"CheckAndDelete_90th_percentile\" : 0, \"CheckAndDelete_95th_percentile\" : 0, \"CheckAndDelete_98th_percentile\" : 0, \"CheckAndDelete_99th_percentile\" : 0, \"CheckAndDelete_99.9th_percentile\" : 0, \"CompactionInputSize_num_ops\" : 2, \"CompactionInputSize_min\" : 0, \"CompactionInputSize_max\" : 0, \"CompactionInputSize_mean\" : 0, \"CompactionInputSize_25th_percentile\" : 0, \"CompactionInputSize_median\" : 0, \"CompactionInputSize_75th_percentile\" : 0, \"CompactionInputSize_90th_percentile\" : 0, \"CompactionInputSize_95th_percentile\" : 0, \"CompactionInputSize_98th_percentile\" : 0, \"CompactionInputSize_99th_percentile\" : 0, \"CompactionInputSize_99.9th_percentile\" : 0, \"CompactionInputSize_SizeRangeCount_100000000-inf\" : 2, \"MajorCompactionOutputFileCount_num_ops\" : 2, \"MajorCompactionOutputFileCount_min\" : 0, \"MajorCompactionOutputFileCount_max\" : 0, \"MajorCompactionOutputFileCount_mean\" : 0, \"MajorCompactionOutputFileCount_25th_percentile\" : 0, \"MajorCompactionOutputFileCount_median\" : 0, \"MajorCompactionOutputFileCount_75th_percentile\" : 0, \"MajorCompactionOutputFileCount_90th_percentile\" : 0, \"MajorCompactionOutputFileCount_95th_percentile\" : 0, \"MajorCompactionOutputFileCount_98th_percentile\" : 0, \"MajorCompactionOutputFileCount_99th_percentile\" : 0, \"MajorCompactionOutputFileCount_99.9th_percentile\" : 0, \"ScanSize_num_ops\" : 0, \"ScanSize_min\" : 0, \"ScanSize_max\" : 0, \"ScanSize_mean\" : 0, \"ScanSize_25th_percentile\" : 0, \"ScanSize_median\" : 0, \"ScanSize_75th_percentile\" : 0, \"ScanSize_90th_percentile\" : 0, \"ScanSize_95th_percentile\" : 0, \"ScanSize_98th_percentile\" : 0, \"ScanSize_99th_percentile\" : 0, \"ScanSize_99.9th_percentile\" : 0, \"slowGetCount\" : 0, \"flushedMemstoreBytes\" : 0, \"CompactionOutputSize_num_ops\" : 2, \"CompactionOutputSize_min\" : 0, \"CompactionOutputSize_max\" : 0, \"CompactionOutputSize_mean\" : 0, \"CompactionOutputSize_25th_percentile\" : 0, \"CompactionOutputSize_median\" : 0, \"CompactionOutputSize_75th_percentile\" : 0, \"CompactionOutputSize_90th_percentile\" : 0, \"CompactionOutputSize_95th_percentile\" : 0, \"CompactionOutputSize_98th_percentile\" : 0, \"CompactionOutputSize_99th_percentile\" : 0, \"CompactionOutputSize_99.9th_percentile\" : 0, \"CompactionOutputSize_SizeRangeCount_100000000-inf\" : 2, \"majorCompactedOutputBytes\" : 8924, \"PauseTimeWithoutGc_num_ops\" : 0, \"PauseTimeWithoutGc_min\" : 0, \"PauseTimeWithoutGc_max\" : 0, \"PauseTimeWithoutGc_mean\" : 0, \"PauseTimeWithoutGc_25th_percentile\" : 0, \"PauseTimeWithoutGc_median\" : 0, \"PauseTimeWithoutGc_75th_percentile\" : 0, \"PauseTimeWithoutGc_90th_percentile\" : 0, \"PauseTimeWithoutGc_95th_percentile\" : 0, \"PauseTimeWithoutGc_98th_percentile\" : 0, \"PauseTimeWithoutGc_99th_percentile\" : 0, \"PauseTimeWithoutGc_99.9th_percentile\" : 0, \"slowPutCount\" : 0, \"slowIncrementCount\" : 0, \"compactedInputBytes\" : 8924, \"Append_num_ops\" : 0, \"Append_min\" : 0, \"Append_max\" : 0, \"Append_mean\" : 0, \"Append_25th_percentile\" : 0, \"Append_median\" : 0, \"Append_75th_percentile\" : 0, \"Append_90th_percentile\" : 0, \"Append_95th_percentile\" : 0, \"Append_98th_percentile\" : 0, \"Append_99th_percentile\" : 0, \"Append_99.9th_percentile\" : 0, \"FlushOutputSize_num_ops\" : 0, \"FlushOutputSize_min\" : 0, \"FlushOutputSize_max\" : 0, \"FlushOutputSize_mean\" : 0, \"FlushOutputSize_25th_percentile\" : 0, \"FlushOutputSize_median\" : 0, \"FlushOutputSize_75th_percentile\" : 0, \"FlushOutputSize_90th_percentile\" : 0, \"FlushOutputSize_95th_percentile\" : 0, \"FlushOutputSize_98th_percentile\" : 0, \"FlushOutputSize_99th_percentile\" : 0, \"FlushOutputSize_99.9th_percentile\" : 0, \"Bulkload_count\" : 0, \"Bulkload_mean_rate\" : 0.0, \"Bulkload_1min_rate\" : 0.0, \"Bulkload_5min_rate\" : 0.0, \"Bulkload_15min_rate\" : 0.0, \"Bulkload_num_ops\" : 0, \"Bulkload_min\" : 0, \"Bulkload_max\" : 0, \"Bulkload_mean\" : 0, \"Bulkload_25th_percentile\" : 0, \"Bulkload_median\" : 0, \"Bulkload_75th_percentile\" : 0, \"Bulkload_90th_percentile\" : 0, \"Bulkload_95th_percentile\" : 0, \"Bulkload_98th_percentile\" : 0, \"Bulkload_99th_percentile\" : 0, \"Bulkload_99.9th_percentile\" : 0 } ]}

如上监控主要是HBase 内某个RegionServer 详细信息。具体有gc, scan,flush ,block,compaction 等细粒度的监控。


二、JMX监控信息钉钉告警实现


下面我们实现了一个RegionServer运行时长的钉钉通知消息

# -*- coding: utf-8 -*-import timeimport requestsimport jsonimport schedule as schedule\"\"\"~~~~~~~~~~~~author: kanglldate: 2025/02/11 11:50desc: reid cluster HBase JMX 获取指标信息-- curl 请求: curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server 2小时发出一次正常的RS运行状态信息,RS运行时间小于10分钟且当前小时立即发出一次告警\"\"\"__author__ = \'kanglilong \'headers = {\'Content-Type\': \'application/json;charset=utf-8\'}hostArr = {\"hdp-node1\", \"hdp-node2\", \"hdp-node3\"}dingding_url = \"https://oapi.dingtalk.com/robot/send?access_token=ba7693ae5a1a5a4cda1358f35b19785a6d8a7659da92ba3685d6532994a6d82c\"# 记录上一次发送运行时间小于 10 分钟告警的小时last_less_than_10mins_alert_hour = Nonedef jmxGetHBaseStatus(regionserver_host): \"\"\" 从 HBase JMX 接口获取 RegionServer 运行时长信息 :return: 告警信息 \"\"\" jmx_port = 16030 # 构建JMX查询URL,用于获取运行时间指标 jmx_url = f\'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server\' try: # 发送HTTP请求获取JMX数据 response = requests.get(jmx_url) # 检查响应状态码 response.raise_for_status() # 解析JSON响应 jmx_data = response.json() # 从JMX数据中提取运行时间(单位:毫秒) region_server_start_time = jmx_data[\'beans\'][0][\'regionServerStartTime\'] # 获取当前时间戳(毫秒) current_time = int(time.time() * 1000) # 计算RegionServer运行时长(毫秒) uptime = current_time - region_server_start_time # 将运行时长转换为时分秒格式 uptime_hms = convert_milliseconds_to_hms(uptime) text = f\"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}\" return text except requests.exceptions.RequestException as e: print(f\'请求出错: {e}\') except (KeyError, IndexError, json.JSONDecodeError) as e: print(f\'解析 JMX 数据出错: {e}\')def jmxGetHBaseAlarmStatus(regionserver_host): \"\"\" 从 HBase JMX 接口获取 RegionServer 重启的运行时长,也就是运行时间小于10min :return: 告警信息 \"\"\" jmx_port = 16030 # 构建JMX查询URL,用于获取运行时间指标 jmx_url = f\'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server\' try: text = \"\" now_time = time.localtime(time.time()) formatted_time = time.strftime(\'%Y-%m-%d %H:%M:%S\', now_time) # 发送HTTP请求获取JMX数据 response = requests.get(jmx_url) if response.status_code == 200: # 检查响应状态码 # response.raise_for_status() # 解析JSON响应 jmx_data = response.json() if len(jmx_data[\'beans\'][0]) > 400: # print(\"---\", less_than_10mins_alert_sent) # if jmx_data is not None and len(jmx_data) > 0: # 从JMX数据中提取运行时间(单位:毫秒) region_server_start_time = jmx_data[\'beans\'][0][\'regionServerStartTime\'] # 获取当前时间戳(毫秒) current_time = int(time.time() * 1000) # 计算 RegionServer 运行时长(毫秒) uptime = current_time - region_server_start_time # 将运行时长转换为时分秒格式 uptime_hms = convert_milliseconds_to_hms(uptime) # current_hour = time.localtime().tm_hour global last_less_than_10mins_alert_hour if uptime is not None:  if uptime < 10 * 60 * 1000: # 运行时间小于 10 分钟 if last_less_than_10mins_alert_hour is None or last_less_than_10mins_alert_hour != current_hour: print(\"++++\", last_less_than_10mins_alert_hour) text = \"告警类型: reid 集群HBase 重启告警通知 \\n\" + \"告警信息: \\n\" + f\"hostname: {regionserver_host} ,RegionServer uptime: {uptime_hms} \" + \"\\n告警时间:\" + formatted_time # 发出告警 msg(text, dingding_url) last_less_than_10mins_alert_hour = current_hour # print(f\"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}\") return text except requests.exceptions.RequestException as e: print(f\'请求出错: {e}\') except (KeyError, IndexError, json.JSONDecodeError) as e: print(f\'解析 JMX 数据出错: {e}\') return Nonedef convert_milliseconds_to_hms(milliseconds): \"\"\" 将毫秒转换为时分秒的格式 :param milliseconds: 毫秒数 :return: 时分秒格式的字符串 \"\"\" seconds = milliseconds // 1000 hours = seconds // 3600 seconds %= 3600 minutes = seconds // 60 seconds %= 60 return f\"{hours}小时 {minutes}分钟 {seconds}秒.\"def getAllHostsHBase(alert_message=\"\"): \"\"\" 从 HBase JMX 接口获取 RegionServer 运行时长信息 :return: 正常通知信息或 None \"\"\" count = 0 now_time = time.localtime(time.time()) formatted_time = time.strftime(\'%Y-%m-%d %H:%M:%S\', now_time) # 将集合转换为列表,并进行排序 sorted_hosts = sorted(list(hostArr)) alert_message += \"告警类型: reid 集群HBase告警通知 \\n\" + \"告警信息: \\n\" for host in sorted_hosts: line_alarm = str(jmxGetHBaseStatus(host)) count += 1 alert_message += \"\\t\" + str(count) + \".\" + line_alarm + \"\\n\" alert_message += \"\\n告警时间:\" + formatted_time print(alert_message) notify_msg(alert_message, dingding_url)def check_and_alert(): \"\"\" 检查运行时长,若小于 10 分钟且满足条件则立即发送 \"\"\" now_time = time.localtime(time.time()) formatted_time = time.strftime(\'%Y-%m-%d %H:%M:%S\', now_time) # 将集合转换为列表,并进行排序 sorted_hosts = sorted(list(hostArr)) for host in sorted_hosts: alarm_str = jmxGetHBaseAlarmStatus(host) print(alarm_str) if alarm_str is not None and alarm_str != \"\": print(\"时间: \", formatted_time, \"主机:\", host, \'RegionServer 重启告警发出!\') else: print(\"时间: \", formatted_time, \"主机:\", host, \'RegionServer 状 态 正 常!\')def msg(text, api_url): \"\"\" 钉钉告警发出 通知具体负责人 :param text: 告警文本 :param api_url: 钉钉URL :return: 无返回值 \"\"\" json_text = { \"msgtype\": \"text\", \"text\": { \"content\": text }, \"at\": { \"atMobiles\": [\"\"] } } requests.post(api_url, json.dumps(json_text), headers=headers).contentdef notify_msg(text, api_url): \"\"\" 钉钉告警发出 :param text: 告警文本 :param api_url: 钉钉URL :return: 无返回值 \"\"\" json_text = { \"msgtype\": \"text\", \"text\": { \"content\": text }, \"at\": { \"atMobiles\": [\"\"] } } requests.post(api_url, json.dumps(json_text), headers=headers).contentdef correct_msg(text, api_url): \"\"\" 钉钉告警发出, 组件正常的告警信息,不艾特告警人 :param text: 告警文本 :param api_url: 钉钉URL :return: 无返回值 \"\"\" json_text = { \"msgtype\": \"text\", \"text\": { \"content\": text }, \"at\": { \"atMobiles\": [\"\"] } } requests.post(api_url, json.dumps(json_text), headers=headers).contentif __name__ == \'__main__\': # 设定整点执行常规告警任务 schedule.every().hour.at(\":00\").do(getAllHostsHBase) while True: check_and_alert() schedule.run_pending() time.sleep(10)

钉钉告警通知: