标签:linux nagios
nagios
服务端
yum install -y httpd nagios nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe
#设置登录nagios后台的用户和密码:
htpasswd -c /etc/nagios/passwd nagiosadmin
vim /etc/nagios/nagios.cfg
#检测配置文件
nagios -v /etc/nagios/nagios.cfg
#启动服务:
service httpd start;
service nagios start
#浏览器访问:
客户端
yum install -y nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe
vim /etc/nagios/nrpe.cfg
修改“allowed_hosts=127.0.0.1”为“allowed_hosts=127.0.0.1,192.168.177.145” 后面的ip为服务端ip;
修改“dont_blame_nrpe=0”为“dont_blame_nrpe=1”
#启动客户端
/etc/init.d/nrpe start
#服务器端添加需要监控的客户端配置(如客户端192.168.177.140)
vim /etc/nagios/conf.d/192168.177140.cfg
添加配置如下
define host{
use linux-server
host_name 192.168.177.140
alias 177.140
address 192.168.177.140
}
#check_ping
define service{
use generic-service
host_name 192.168.177.140
service_description check_ping
check_command check_ping!100.0,20%!200.0,50%
max_check_attempts 5
normal_check_interval 1
}
#check_ssh
define service{
use generic-service
host_name 192.168.177.140
service_description check_ssh
check_command check_ssh
max_check_attempts 5
normal_check_interval 1
}
#check_http
define service{
use generic-service
host_name 192.168.177.140
service_description check_http
check_command check_http
max_check_attempts 5
normal_check_interval 1
}
#check_load
define service{
use generic-service
host_name 192.168.177.140
service_description check_load
check_command check_load
max_check_attempts 5
normal_check_interval 1
}
#check_hda1 使用check_nrpe需要编辑服务器端command.cfg文件
define service{
use generic-service
host_name 192.168.177.140
service_description check_hda1
check_command check_nrpe!check_hda1
max_check_attempts 5
normal_check_interval 1
}
#check_hda3
define service{
use generic-service
host_name 192.168.177.140
service_description check_hda3
check_command check_nrpe!check_hda3
max_check_attempts 5
normal_check_interval 1
}
#check_mysql
define service{
use generic-service
host_name 192.168.177.140
service_description check_mysql
check_command check_nrpe!check_mysql
max_check_attempts 5
normal_check_interval 1
}
#当nagios检测到问题时,一共尝试检测5次都有问题才会告警,如果该数值为1,那么检测到问题立即告警
max_check_attempts 5
#重新检测的时间间隔,单位是分钟,默认是3分钟
normal_check_interval 1
#在服务出现异常后,故障一直没有解决,nagios再次对使用者发出通知的时间。单位是分钟。如果你认为,所有的事件只需要一次通知就够了,可以把这里的选项设为0
notification_interval 60
#服务器端/etc/nagios/objects/command.cfg
vim /etc/nagios/objects/command.cfg
添加
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
#check_mysql脚本
#!/bin/bash
USAGE="`basename $0` [-w|--warning]<percent free> [-c|--critical]<percent free>"
THRESHOLD_USAGE="WARNING threshold must be greater than CRITICAL: `basename $0` $*"
calc=/tmp/memcalc
percent_free=/tmp/mempercent
critical=""
warning=""
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
# print usage
if [[ $# -lt 4 ]]
then
echo ""
echo "Wrong Syntax: `basename $0` $*"
echo ""
echo "Usage: $USAGE"
echo ""
exit 0
fi
# read input
while [[ $# -gt 0 ]]
do
case "$1" in
-w|--warning)
shift
warning=$1
;;
-c|--critical)
shift
critical=$1
;;
esac
shift
done
# verify input
if [[ $warning -eq $critical || $warning -lt $critical ]]
then
echo ""
echo "$THRESHOLD_USAGE"
echo ""
echo "Usage: $USAGE"
echo ""
exit 0
fi
# Total memory available
total=`free -m | head -2 |tail -1 |gawk ‘{print $2}‘`
# Total memory used
used=`free -m | head -2 |tail -1 |gawk ‘{print $3}‘`
# Calc total minus used
free=`free -m | head -2 |tail -1 |gawk ‘{print $2-$3}‘`
# normal values
#echo "$total"MB total
#echo "$used"MB used
#echo "$free"MB free
# make it into % percent free = ((free mem / total mem) * 100)
echo "5" > $calc # decimal accuracy
echo "k" >> $calc # commit
echo "100" >> $calc # multiply
echo "$free" >> $calc # division integer
echo "$total" >> $calc # division integer
echo "/" >> $calc # division sign
echo "*" >> $calc # multiplication sign
echo "p" >> $calc # print
percent=`/usr/bin/dc $calc|/bin/sed ‘s/^\./0./‘|/usr/bin/tr "." " "|/usr/bin/gawk {‘print $1‘}`
#percent1=`/usr/bin/dc $calc`
#echo "$percent1"
if [[ "$percent" -le $critical ]]
then
echo "CRITICAL - $free MB ($percent%) Free Memory"
exit 2
fi
if [[ "$percent" -le $warning ]]
then
echo "WARNING - $free MB ($percent%) Free Memory"
exit 1
fi
if [[ "$percent" -gt $warning ]]
then
echo "OK - $free MB ($percent%) Free Memory"
exit 0
fi本文出自 “ubuntu” 博客,谢绝转载!
标签:linux nagios
原文地址:http://thankinglove.blog.51cto.com/2311485/1721519