zabbix自动发现监控docker

1720人浏览 / 0人评论

一、zabbix-agent配置

安装:

[root@jumpserver script]#easy_install pip

[root@jumpserver script]#pip install docker-py

[root@jumpserver script]# yum install python-simplejson -y

放入脚本并授权

https://pan.baidu.com/s/1wdRctyexC8ug0yz5O5yxXg

[root@jumpserver script]# mkdir -p /etc/zabbix/zabbix_agentd.d/script

[root@jumpserver script]# ls
 docker_discovery.py  docker_monitor.py  docker_processmonitor.sh
 [root@jumpserver script]# chmod +x *

[root@jumpserver script]# chown zabbix:zabbix *
 

 编辑配置文件

[root@jumpserver script]# tail -3  /etc/zabbix/zabbix_agentd.d/userparameter_mysql.conf 
UserParameter=docker_discovery[*], python /etc/zabbix/zabbix_agentd.d/script/docker_discovery.py $1
UserParameter=docker_stats[*], sudo /usr/bin/python  /etc/zabbix/zabbix_agentd.d/script/docker_monitor.py  $1 $2
UserParameter=docker_process[*], /bin/bash  /etc/zabbix/zabbix_agentd.d/script/docker_processmonitor.sh   $1  $2  $3
 

修改sudo

[root@jumpserver script]# chmod 700 /etc/sudoers

添加

[root@jumpserver script]# grep zabbix /etc/sudoers
zabbix ALL=(root) NOPASSWD:/bin/docker,/usr/bin/python,/etc/zabbix/zabbix_agentd.d/script/docker_monitor.py,/etc/zabbix/zabbix_agentd.d/script/docker_discovery.py,/etc/zabbix/zabbix_agentd.d/script/docker_processmonitor.sh

 

恢复权限

[root@jumpserver script]# chmod 400 /etc/sudoers 

二、zabbix-server抓取数据测试

[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_discovery[docker]
{
    "data":[
        {
            "{#CONTAINERNAME}":"jms_guacamole"
        }
    ]
}
 

内存

[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,mem_usage]
457662464
 

cpu

[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,cpu_total_usage]
1829946
 

百分比cpu

[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,cpu_percent]
0.19
 

[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,mem_limit]
8369680384
 

百分比内存

[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,mem_percent]
5.47
 

[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,network_rx_bytes]
415161
[root@zabbix02 ~]# zabbix_get -s 192.168.30.136 -k docker_stats[jms_guacamole,network_tx_bytes]
458163
 

zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,mem_usage]

zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,cpu_total_usage]

zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,cpu_percent]

zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,mem_limit]

zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,mem_percent]

zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,network_rx_bytes]

zabbix_get -s 172.16.2.115 -k docker_stats[tomcat,network_tx_bytes]

三、web-zabbix配置

1、创建模板

1、创建自动发现规则

 

2、创建监控原型

容器:{#CONTAINERNAME}cpu使用率

docker_stats[{#CONTAINERNAME},cpu_percent]

容器:{#CONTAINERNAME}流出流量

docker_stats[{#CONTAINERNAME},network_rx_bytes]

容器:{#CONTAINERNAME}:进入流量

docker_stats[{#CONTAINERNAME},network_tx_bytes]

容器:{#CONTAINERNAME}:内存使用率

docker_stats[{#CONTAINERNAME},men_percent]

要一直创建这么多

3、创建图形

添加原型

查看图形

继续创建

4、配置触发器

容器:{#CONTAINERNAME}:内存使用率超过80%

{Docker-discover:docker_status[{#CONTAINERNAME},men_percent].last(#80)}>80

同时也配置下CPU触发器

......................................

这回就可以关联主机了。

=-=====

二、监控容器状态

上面的模板没有监控容器的状态,接下来我们配置一个监控容器的状态的。

1、修改zabbix-agent配置文件

UserParameter=docker.discovery,/data/zabbix/scripts/docker_ping.py
UserParameter=docker.[*],/data/zabbix/script/docker_ping.py $1 $2

2、py脚本

原本文件是叫docker.py的,但是加入docker.py之后,回和上面的py脚本有冲突,会自动添加一个docker.pyc的一个文件,而且还导致上面的监控脚本不能用了。

cat /data/zabbix/script/docker_ping.py

#!/usr/bin/python
import sys
import os
import json


def discover():
    d = {}
    d['data'] = []
    with os.popen("docker ps -a --format {{.Names}}") as pipe:
        for line in pipe:
            info = {}
            info['{#CONTAINERNAME}'] = line.replace("\n","")
            d['data'].append(info)

    print json.dumps(d)


def status(name,action):
    if action == "ping":
        cmd = 'docker inspect --format="{{.State.Running}}" %s' %name
        result = os.popen(cmd).read().replace("\n","")
        if result == "true":
            print 1
        else:
            print 0
    else:
        cmd = 'docker stats %s --no-stream --format "{{.%s}}"' % (name,action)
        result = os.popen(cmd).read().replace("\n","")
        if "%" in result:
            print float(result.replace("%",""))
        else:
            print result


if __name__ == '__main__':
        try:
                name, action = sys.argv[1], sys.argv[2]
                status(name,action)
        except IndexError:
                discover()
 

3、先导入zabbix.xml模板

<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
<version>3.2</version>
<date>2018-06-04T04:12:36Z</date>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<templates>
<template>
<template>docker-status</template>
<name>docker-status</name>
<description/>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<items/>
<discovery_rules>
<discovery_rule>
<name>docker.discovery</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>docker.discovery</key>
<delay>60</delay>
<status>0</status>
<allowed_hosts/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<delay_flex/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<filter>
<evaltype>0</evaltype>
<formula/>
<conditions>
<condition>
<macro>{#CONTAINERNAME}</macro>
<value>@ CONTAINER NAME</value>
<operator>8</operator>
<formulaid>A</formulaid>
</condition>
</conditions>
</filter>
<lifetime>30</lifetime>
<description/>
<item_prototypes>
<item_prototype>
<name>Container {#CONTAINERNAME} Diskio usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME} ,BlockIO]</key>
<delay>60</delay>
<history>90</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container{#CONTAINERNAME} CPU usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},CPUPerc]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units>%</units>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container {#CONTAINERNAME} mem usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},MemPerc]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units>%</units>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container {#CONTAINERNAME} NETio usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},NetIO]</key>
<delay>60</delay>
<history>90</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container{#CONTAINERNAME} is_run :</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME} ,ping]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
</item_prototypes>
<trigger_prototypes>
<trigger_prototype>
<expression>{docker-status:docker.[{#CONTAINERNAME} ,ping].last()}=0</expression>
<recovery_mode>0</recovery_mode>
<recoveryexpression/>
<name>docker
{#CONTAINERNAME}_down</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>5</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger_prototype>
</trigger_prototypes>
<graph_prototypes/>
<host_prototypes/>
</discovery_rule>
</discovery_rules>
<httptests/>
<macros/>
<templates/>
<screens/>
</template>
</templates>
</zabbix_export>~~

 

4、清理坑,首先py脚本使用docker.sock文件读取值得,所以需要授权。

chmod 777 /var/run/docker.sock 

5、清理模板坑

导入模板的时候你会发现监控项中,多了一个空格,导致你无法取值。

docker.[{#CONTAINERNAME} ,ping]

 

docker.[{#CONTAINERNAME} ,BlockIO]

 

 

修改为

docker.[{#CONTAINERNAME},ping]

docker.[{#CONTAINERNAME},BlockIO]

这样就可以取值了。

。。。。。。。。。。。。。。。。。。

三、当然啦2个模板有监控内容有冲突的地方哈,可以把后面新增加的模板删一删 

全部评论