Категории

вторник, 17 марта 2015 г.

Zabbix: supervisord автообнаружение программ и мониторинг статуса.

Скрипт поддерживает обнаружение (discovery) всех программ, которые сконфигурированы для запуска.
Отправка данных осуществляется через zabbix_sender.
Установка довольно тривиальна:
mkdir /etc/zabbix/scripts
chown root:zabbix -R /etc/zabbix/scripts
chmod 750 /etc/zabbix/scripts
Код скрипта /etc/zabbix/scripts/lsimegaraid_discovery_trapper.sh:
#!/bin/sh

ZABBIX_SENDER='/usr/bin/env zabbix_sender'
ZBX_CONFIG='/etc/zabbix/zabbix_agentd.conf'
ZBX_HOSTNAME=$(awk -F'=' '/^Hostname=/{ print $2 }' /etc/zabbix/zabbix_agentd.d/*.conf)

supervisorctl status | \
 awk 'match($0, /^([^:]+):[^[:blank:]]+[[:blank:]]+([^[:blank:]]+).*$/, line) {
  SUBSEP=","
  # Sadly, we can not initialize all the worker,status couples in the BEGIN section.
  # We have to run these almost useless lines during the input parsing
  if (status[line[1],"STOPPED"]  == 0) {status[line[1],"STOPPED"]  = 0}
  if (status[line[1],"STARTING"] == 0) {status[line[1],"STARTING"] = 0}
  if (status[line[1],"RUNNING"]  == 0) {status[line[1],"RUNNING"]  = 0}
  if (status[line[1],"BACKOFF"]  == 0) {status[line[1],"BACKOFF"]  = 0}
  if (status[line[1],"STOPPING"] == 0) {status[line[1],"STOPPING"] = 0}
  if (status[line[1],"EXITED"]   == 0) {status[line[1],"EXITED"]   = 0}
  if (status[line[1],"FATAL"]    == 0) {status[line[1],"FATAL"]    = 0}
  if (status[line[1],"UNKNOWN"]  == 0) {status[line[1],"UNKNOWN"]  = 0}
  status[line[1],line[2]]++
 }
 END {
  for(i in status) {
   print "- supervisord.workerState[" i "] " status[i]
  }
 }' | /usr/bin/env zabbix_sender --config $ZBX_CONFIG \
  --input-file - --host $ZBX_HOSTNAME >/dev/null 2>&1
echo $?
exit 0
Установка прав на скрипт:
chown root:zabbix /etc/zabbix/scripts/supervisord_discovery_trapper.sh
chmod 750 /etc/zabbix/scripts/supervisord_discovery_trapper.sh
Не забываем про настройки в скрипте, где нужно указать полные пути до необходимых программ и конфигурационных файлов:
ZABBIX_SENDER='/usr/bin/env zabbix_sender'
ZBX_CONFIG='/etc/zabbix/zabbix_agentd.conf'
ZBX_HOSTNAME=$(awk -F'=' '/^Hostname=/{ print $2 }' /etc/zabbix/zabbix_agentd.d/*.conf)
Конфигурационный файл: /etc/zabbix/zabbix_agentd.conf.d/lsimegaraid.conf
AllowRoot=1
UserParameter=supervisord.discovery,supervisorctl status | awk 'BEGIN {out="{\n\t\"data\":[\n"; f=0;} {match($0, /^([^:]+):[^[:blank:]]+[[:blank:]]+([^[:blank:]]+).*$/, line);if (!seen[line[1]]++ && line[1] != NULL) {if (f == 1) out=out",\n";out=out "\t\t{\"{#PROGRAMM}\":\""line[1]"\"}";f=1}} END{ if(f == 0){print "{}"}else{print out"\n\t]\n}"}}'
UserParameter=supervisord.trapper,/etc/zabbix/scripts/supervisord_trapper.sh
Шаблон для zabbix:
<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
    <version>2.0</version>
    <date>2015-03-17T09:01:18Z</date>
    <groups>
        <group>
            <name>Templates</name>
        </group>
    </groups>
    <templates>
        <template>
            <template>Template_APP supervisor</template>
            <name>Template_APP supervisor</name>
            <description/>
            <groups>
                <group>
                    <name>Templates</name>
                </group>
            </groups>
            <applications>
                <application>
                    <name>App supervisord</name>
                </application>
            </applications>
            <items>
                <item>
                    <name>Controller</name>
                    <type>0</type>
                    <snmp_community/>
                    <multiplier>0</multiplier>
                    <snmp_oid/>
                    <key>supervisord.trapper</key>
                    <delay>30</delay>
                    <history>90</history>
                    <trends>365</trends>
                    <status>0</status>
                    <value_type>3</value_type>
                    <allowed_hosts/>
                    <units/>
                    <delta>0</delta>
                    <snmpv3_contextname/>
                    <snmpv3_securityname/>
                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
                    <snmpv3_authpassphrase/>
                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
                    <snmpv3_privpassphrase/>
                    <formula>1</formula>
                    <delay_flex/>
                    <params/>
                    <ipmi_sensor/>
                    <data_type>0</data_type>
                    <authtype>0</authtype>
                    <username/>
                    <password/>
                    <publickey/>
                    <privatekey/>
                    <port/>
                    <description/>
                    <inventory_link>0</inventory_link>
                    <applications>
                        <application>
                            <name>App supervisord</name>
                        </application>
                    </applications>
                    <valuemap/>
                    <logtimefmt/>
                </item>
                <item>
                    <name>Supervisord process count</name>
                    <type>0</type>
                    <snmp_community/>
                    <multiplier>0</multiplier>
                    <snmp_oid/>
                    <key>proc.num[supervisord]</key>
                    <delay>30</delay>
                    <history>90</history>
                    <trends>365</trends>
                    <status>0</status>
                    <value_type>3</value_type>
                    <allowed_hosts/>
                    <units/>
                    <delta>0</delta>
                    <snmpv3_contextname/>
                    <snmpv3_securityname/>
                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
                    <snmpv3_authpassphrase/>
                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
                    <snmpv3_privpassphrase/>
                    <formula>1</formula>
                    <delay_flex/>
                    <params/>
                    <ipmi_sensor/>
                    <data_type>0</data_type>
                    <authtype>0</authtype>
                    <username/>
                    <password/>
                    <publickey/>
                    <privatekey/>
                    <port/>
                    <description/>
                    <inventory_link>0</inventory_link>
                    <applications>
                        <application>
                            <name>App supervisord</name>
                        </application>
                    </applications>
                    <valuemap/>
                    <logtimefmt/>
                </item>
            </items>
            <discovery_rules>
                <discovery_rule>
                    <name>supervisord State discovery</name>
                    <type>0</type>
                    <snmp_community/>
                    <snmp_oid/>
                    <key>supervisord.discovery</key>
                    <delay>3600</delay>
                    <status>0</status>
                    <allowed_hosts/>
                    <snmpv3_contextname/>
                    <snmpv3_securityname/>
                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
                    <snmpv3_authpassphrase/>
                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
                    <snmpv3_privpassphrase/>
                    <delay_flex/>
                    <params/>
                    <ipmi_sensor/>
                    <authtype>0</authtype>
                    <username/>
                    <password/>
                    <publickey/>
                    <privatekey/>
                    <port/>
                    <filter>
                        <evaltype>0</evaltype>
                        <formula/>
                        <conditions/>
                    </filter>
                    <lifetime>30</lifetime>
                    <description/>
                    <item_prototypes>
                        <item_prototype>
                            <name>supervisord BACKOFF State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},BACKOFF]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                        <item_prototype>
                            <name>supervisord EXITED State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},EXITED]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                        <item_prototype>
                            <name>supervisord FATAL State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},FATAL]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                        <item_prototype>
                            <name>supervisord RUNNING State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},RUNNING]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                        <item_prototype>
                            <name>supervisord STARTING State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},STARTING]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                        <item_prototype>
                            <name>supervisord STOPPED State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},STOPPED]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                        <item_prototype>
                            <name>supervisord STOPPING State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},STOPPING]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                        <item_prototype>
                            <name>supervisord UNKNOWN State of worker $1</name>
                            <type>2</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>supervisord.workerState[{#PROGRAMM},UNKNOWN]</key>
                            <delay>0</delay>
                            <history>7</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>App supervisord</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                        </item_prototype>
                    </item_prototypes>
                    <trigger_prototypes>
                        <trigger_prototype>
                            <expression>{Template_APP supervisor:supervisord.workerState[{#PROGRAMM},BACKOFF].last(0)}&gt;0</expression>
                            <name>Supervisord BACKOFF State of worker {#PROGRAMM} on host {HOST.NAME}</name>
                            <url/>
                            <status>0</status>
                            <priority>4</priority>
                            <description/>
                            <type>0</type>
                        </trigger_prototype>
                        <trigger_prototype>
                            <expression>{Template_APP supervisor:supervisord.workerState[{#PROGRAMM},EXITED].last(0)}&gt;0</expression>
                            <name>Supervisord EXITED State of worker {#PROGRAMM} on host {HOST.NAME}</name>
                            <url/>
                            <status>0</status>
                            <priority>4</priority>
                            <description/>
                            <type>0</type>
                        </trigger_prototype>
                        <trigger_prototype>
                            <expression>{Template_APP supervisor:supervisord.workerState[{#PROGRAMM},FATAL].last(0)}&gt;0</expression>
                            <name>Supervisord FATAL State of worker {#PROGRAMM} on host {HOST.NAME}</name>
                            <url/>
                            <status>0</status>
                            <priority>4</priority>
                            <description/>
                            <type>0</type>
                        </trigger_prototype>
                        <trigger_prototype>
                            <expression>{Template_APP supervisor:supervisord.workerState[{#PROGRAMM},RUNNING].last(0)}=0</expression>
                            <name>Supervisord RUNNING State of worker {#PROGRAMM} on host {HOST.NAME}</name>
                            <url/>
                            <status>0</status>
                            <priority>4</priority>
                            <description/>
                            <type>0</type>
                        </trigger_prototype>
                        <trigger_prototype>
                            <expression>{Template_APP supervisor:supervisord.workerState[{#PROGRAMM},STOPPED].last(0)}&gt;0</expression>
                            <name>Supervisord STOPPED State of worker {#PROGRAMM} on host {HOST.NAME}</name>
                            <url/>
                            <status>0</status>
                            <priority>4</priority>
                            <description/>
                            <type>0</type>
                        </trigger_prototype>
                        <trigger_prototype>
                            <expression>{Template_APP supervisor:supervisord.workerState[{#PROGRAMM},UNKNOWN].last(0)}&gt;0</expression>
                            <name>Supervisord UNKNOWN State of worker {#PROGRAMM} on host {HOST.NAME}</name>
                            <url/>
                            <status>0</status>
                            <priority>4</priority>
                            <description/>
                            <type>0</type>
                        </trigger_prototype>
                    </trigger_prototypes>
                    <graph_prototypes/>
                    <host_prototypes/>
                </discovery_rule>
            </discovery_rules>
            <macros/>
            <templates/>
            <screens/>
        </template>
    </templates>
    <triggers>
        <trigger>
            <expression>{Template_APP supervisor:proc.num[supervisord].last(0)}=0</expression>
            <name>Supervisord is down on {HOST.NAME}</name>
            <url/>
            <status>0</status>
            <priority>4</priority>
            <description/>
            <type>0</type>
            <dependencies/>
        </trigger>
    </triggers>
</zabbix_export>
Не забываем перезапустить агент, чтобы агент прочитал новый конфигурационный файл:
service zabbix-agentd restart
Проверка:
zabbix_get -s HOST -k "supervisord.discovery"

Комментариев нет:

Отправить комментарий