说明:刘宏缔的架构森林是一个专注架构的博客,地址:https://www.cnblogs.com/architectforest
对应的源码可以访问这里获取: https://github.com/liuhongdi/
说明:作者:刘宏缔 邮箱: 371125307@qq.com
https://github.com/google/mtail
[root@SearchCacheServer ~]# cd /usr/local/source/ [root@SearchCacheServer source]# mkdir mtail [root@SearchCacheServer source]# cd mtail/ [root@SearchCacheServer mtail]# wget https://github.com/google/mtail/releases/download/v3.0.0-rc35/mtail_v3.0.0-rc35_linux_amd64
[root@SearchCacheServer mtail]# chmod 744 mtail_v3.0.0-rc35_linux_amd64
[root@SearchCacheServer mtail]# mkdir /usr/local/soft/mtail_v3.0.0-rc35_linux_amd64 [root@SearchCacheServer mtail]# mv mtail_v3.0.0-rc35_linux_amd64 /usr/local/soft/mtail_v3.0.0-rc35_linux_amd64/mtail
[root@SearchCacheServer mtail]# /usr/local/soft/mtail_v3.0.0-rc35_linux_amd64/mtail --version mtail version v3.0.0-rc35 git revision a33283598c4b7a70fc2f113680318f29d5826cca go version go1.14 go arch amd64 go os linux
[root@SearchCacheServer phplogs]# mkdir -p /data/mtail/prog [root@SearchCacheServer phplogs]# cd /data/mtail/prog/ [root@SearchCacheServer prog]# vi error_count.mtail
counter error_count /ERROR/ { error_count++ }
说明:error_count变量值统计了包含ERROR字串的行
https://github.com/google/mtail/tree/master/examples
[root@SearchCacheServer prog]# nohup /usr/local/soft/mtail_v3.0.0-rc35_linux_amd64/mtail -logtostderr -progs /data/mtail/prog/error_count.mtail -logs /data/logs/phplogs/prd_mobile_php_errors.log &
[root@SearchCacheServer prog]# ss -lntp | grep mtail LISTEN 0 128 *:3903 *:* users:(("mtail",27642,11))
http://121.122.123.118:3903/
http://121.122.123.118:3903/metrics
[root@blog ~]# cd /usr/local/soft/prometheus-2.18.1.linux-amd64/ [root@blog prometheus-2.18.1.linux-amd64]# vi prometheus.yml
- job_name: ‘118mtail‘ static_configs: - targets: [‘121.122.123.118:3903‘]
[root@blog prometheus-2.18.1.linux-amd64]# systemctl restart prometheus.service
http://121.122.123.47:9090/graph
[root@blog rules]# pwd /data/prometheus/rules [root@blog rules]# vi rule.yml
- alert: ErrorlogStatus # alert 名字 expr: error_count{job="118mtail"} > 0 # 判断条件 for: 10s # 条件保持 10s 才会发出 alter labels: # 设置 alert 的标签 severity: "critical" annotations: # alert 的其他标签,但不用于标识 alert description: php log error more than 20s summary: php have error
说明:error_count>0是触发报警的条件
[root@blog prometheus-2.18.1.linux-amd64]# ./promtool check config prometheus.yml Checking prometheus.yml SUCCESS: 1 rule files found Checking /data/prometheus/rules/rule.yml SUCCESS: 2 rules found
[root@blog prometheus-2.18.1.linux-amd64]# ./promtool check rules /data/prometheus/rules/rule.yml Checking /data/prometheus/rules/rule.yml SUCCESS: 2 rules found
[root@blog rules]# systemctl restart prometheus.service
https://www.cnblogs.com/architectforest/p/13065262.html
[root@blog ~]# /usr/local/soft/prometheus-2.18.1.linux-amd64/prometheus --version prometheus, version 2.18.1 (branch: HEAD, revision: ecee9c8abfd118f139014cb1b174b08db3f342cf) build user: root@2117a9e64a7e build date: 20200507-16:51:47 go version: go1.14.2
[root@blog ~]# cat /etc/redhat-release CentOS Linux release 8.0.1905 (Core)
linux(centos8):prometheus使用mtail监控错误日志
原文:https://www.cnblogs.com/architectforest/p/13067671.html