一、zabbix-agent配置
安装:
[root@jumpserver script]#easy_install pip
[root@jumpserver script]#pip install docker-py
[root@jumpserver script]# yum install python-simplejson -y
放入脚本并授权
https://pan.baidu.com/s/1wdRctyexC8ug0yz5O5yxXg
[root@jumpserver script]# mkdir -p /etc/zabbix/zabbix_agentd.d/script
[root@jumpserver script]# ls
docker_discovery.py docker_monitor.py docker_processmonitor.sh
[root@jumpserver script]# chmod +x *[root@jumpserver script]# chown zabbix:zabbix *
编辑配置文件
[root@jumpserver script]# tail -3 /etc/zabbix/zabbix_agentd.d/userparameter_mysql.conf
UserParameter=docker_discovery[*], python /etc/zabbix/zabbix_agentd.d/script/docker_discovery.py $1
UserParameter=docker_stats[*], sudo /usr/bin/python /etc/zabbix/zabbix_agentd.d/script/docker_monitor.py $1 $2
UserParameter=docker_process[*], /bin/bash /etc/zabbix/zabbix_agentd.d/script/docker_processmonitor.sh $1 $2 $3
修改sudo
[root@jumpserver script]# chmod 700 /etc/sudoers
添加
[root@jumpserver script]# grep zabbix /etc/sudoers
zabbix ALL=(root) NOPASSWD:/bin/docker,/usr/bin/python,/etc/zabbix/zabbix_agentd.d/script/docker_monitor.py,/etc/zabbix/zabbix_agentd.d/script/docker_discovery.py,/etc/zabbix/zabbix_agentd.d/script/docker_processmonitor.sh
恢复权限
[root@jumpserver script]# chmod 400 /etc/sudoers
二、zabbix-server抓取数据测试
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_discovery[docker]
{
"data":[
{
"{#CONTAINERNAME}":"jms_guacamole"
}
]
}
内存
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,mem_usage]
457662464
cpu
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,cpu_total_usage]
1829946
百分比cpu
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,cpu_percent]
0.19
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,mem_limit]
8369680384
百分比内存
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,mem_percent]
5.47
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,network_rx_bytes]
415161
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,network_tx_bytes]
458163
zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,mem_usage]
zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,cpu_total_usage]
zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,cpu_percent]
zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,mem_limit]
zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,mem_percent]
zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,network_rx_bytes]
zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,network_tx_bytes]
三、web-zabbix配置
1、创建模板
1、创建自动发现规则
2、创建监控原型
容器:{#CONTAINERNAME}cpu使用率
docker_stats[{#CONTAINERNAME},cpu_percent]
容器:{#CONTAINERNAME}流出流量
docker_stats[{#CONTAINERNAME},network_rx_bytes]
容器:{#CONTAINERNAME}:进入流量
docker_stats[{#CONTAINERNAME},network_tx_bytes]
容器:{#CONTAINERNAME}:内存使用率
docker_stats[{#CONTAINERNAME},men_percent]
要一直创建这么多
3、创建图形
添加原型
查看图形
继续创建
4、配置触发器
容器:{#CONTAINERNAME}:内存使用率超过80%
{Docker-discover:docker_status[{#CONTAINERNAME},men_percent].last(#80)}>80
同时也配置下CPU触发器
......................................
这回就可以关联主机了。
=-=====
二、监控容器状态
上面的模板没有监控容器的状态,接下来我们配置一个监控容器的状态的。
1、修改zabbix-agent配置文件
UserParameter=docker.discovery,/data/zabbix/scripts/docker_ping.py
UserParameter=docker.[*],/data/zabbix/script/docker_ping.py $1 $2
2、py脚本
原本文件是叫docker.py的,但是加入docker.py之后,回和上面的py脚本有冲突,会自动添加一个docker.pyc的一个文件,而且还导致上面的监控脚本不能用了。
cat /data/zabbix/script/docker_ping.py
#!/usr/bin/python
import sys
import os
import json
def discover():
d = {}
d['data'] = []
with os.popen("docker ps -a --format {{.Names}}") as pipe:
for line in pipe:
info = {}
info['{#CONTAINERNAME}'] = line.replace("\n","")
d['data'].append(info)print json.dumps(d)
def status(name,action):
if action == "ping":
cmd = 'docker inspect --format="{{.State.Running}}" %s' %name
result = os.popen(cmd).read().replace("\n","")
if result == "true":
print 1
else:
print 0
else:
cmd = 'docker stats %s --no-stream --format "{{.%s}}"' % (name,action)
result = os.popen(cmd).read().replace("\n","")
if "%" in result:
print float(result.replace("%",""))
else:
print result
if __name__ == '__main__':
try:
name, action = sys.argv[1], sys.argv[2]
status(name,action)
except IndexError:
discover()
3、先导入zabbix.xml模板
<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
<version>3.2</version>
<date>2018-06-04T04:12:36Z</date>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<templates>
<template>
<template>docker-status</template>
<name>docker-status</name>
<description/>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<items/>
<discovery_rules>
<discovery_rule>
<name>docker.discovery</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>docker.discovery</key>
<delay>60</delay>
<status>0</status>
<allowed_hosts/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<delay_flex/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<filter>
<evaltype>0</evaltype>
<formula/>
<conditions>
<condition>
<macro>{#CONTAINERNAME}</macro>
<value>@ CONTAINER NAME</value>
<operator>8</operator>
<formulaid>A</formulaid>
</condition>
</conditions>
</filter>
<lifetime>30</lifetime>
<description/>
<item_prototypes>
<item_prototype>
<name>Container {#CONTAINERNAME} Diskio usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME} ,BlockIO]</key>
<delay>60</delay>
<history>90</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container{#CONTAINERNAME} CPU usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},CPUPerc]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units>%</units>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container {#CONTAINERNAME} mem usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},MemPerc]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units>%</units>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container {#CONTAINERNAME} NETio usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},NetIO]</key>
<delay>60</delay>
<history>90</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container{#CONTAINERNAME} is_run :</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME} ,ping]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
</item_prototypes>
<trigger_prototypes>
<trigger_prototype>
<expression>{docker-status:docker.[{#CONTAINERNAME} ,ping].last()}=0</expression>
<recovery_mode>0</recovery_mode>
<recoveryexpression/>
<name>docker{#CONTAINERNAME}_down</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>5</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger_prototype>
</trigger_prototypes>
<graph_prototypes/>
<host_prototypes/>
</discovery_rule>
</discovery_rules>
<httptests/>
<macros/>
<templates/>
<screens/>
</template>
</templates>
</zabbix_export>~~
4、清理坑,首先py脚本使用docker.sock文件读取值得,所以需要授权。
chmod 777 /var/run/docker.sock
5、清理模板坑
导入模板的时候你会发现监控项中,多了一个空格,导致你无法取值。
docker.[{#CONTAINERNAME} ,ping] |
docker.[{#CONTAINERNAME} ,BlockIO] |
修改为
docker.[{#CONTAINERNAME},ping] docker.[{#CONTAINERNAME},BlockIO] |
这样就可以取值了。
。。。。。。。。。。。。。。。。。。
三、当然啦2个模板有监控内容有冲突的地方哈,可以把后面新增加的模板删一删
有问题请加博主微信进行沟通!
全部评论