Nagios server

Nagios allows monitoring servers and detecting any unexpected behavior (considering a probe was setup to detect it). We will monitor here disk space, services, and ib network.

Add nagios user and nagcmd, a user used to launch nagios commands.

groupadd nagios
useradd -m -g nagios nagios
passwd nagios
groupadd nagcmd
usermod -g nagcmd nagios
usermod -g nagcmd apache

Install needed packages (built previously, see preparing install).

yum install nagios nagios-contrib nagios-debuginfo nagios-devel nagios-plugins nagios-plugins-debuginfo

Edit /usr/local/nagios/etc/objects/contacts.cfg and add here your email address to let nagios know where to send alerts, and also set admin user as nagiosadmin.

define contact{
       contact_name  nagiosadmin	; Short name of user
       use             generic-contact	; Inherit default values from generic-contact template (defined above)
       alias         Administrateur Nagios	; Full name of user
       email         root@localhost	; Adresse Email pour les notifications
       }
define contactgroup{
       contactgroup_name       admins
       alias                   Administrateurs Nagios
       members                 nagiosadmin
       }

Then generate a password for the nagiosadmin user, which will be used in the web interface, and restart httpd:

htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
service httpd restart

Now, check the configuration, it should be OK:

/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
[…]
Total Warnings: 0
Total Errors:   0

And start nagios:

/etc/init.d/nagios start

It is now possible to login into nagios interface at http://localhost/nagios

Time now to configure nagios to monitor our servers.

mkdir /usr/local/nagios/etc/linux_servers/

Edit /usr/local/nagios/etc/objects/commands.cfg and add check_nrpe command:

###############################################################################
# COMMANDS.CFG - SAMPLE COMMAND DEFINITIONS FOR NAGIOS 4.1.1
#
#
# NOTES: This config file provides you with some example command definitions
#        that you can reference in host, service, and contact definitions.
#
#        You don't need to keep commands in a separate file from your other
#        object definitions.  This has been done just to make things easier to
#        understand.
#
###############################################################################

# 'check_nrep' command definition
define command{
       command_name    check_nrpe
       command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -u -c $ARG1$ -a $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$
       }

Then, in /usr/local/nagios/etc/nagios.cfg, add our linux_serverss directory. This directory is where we will store our configuration files of servers monitored.

# You can also tell Nagios to process all config files (with a .cfg
# extension) in a particular directory by using the cfg_dir
# directive as shown below:

#cfg_dir=/usr/local/nagios/etc/servers
#cfg_dir=/usr/local/nagios/etc/printers
#cfg_dir=/usr/local/nagios/etc/switches
#cfg_dir=/usr/local/nagios/etc/routers

cfg_dir=/usr/local/nagios/etc/linux_servers

Then create a group for Linux servers in /usr/local/nagios/etc/linux_servers/groupe_linux_servers.cfg:

# Define a hostgroup for Linux machines
# All hosts that use the linux-server template will automatically be a member of this group

define hostgroup{
        hostgroup_name  linux-servers   ; The name of the hostgroup
        alias           Serveurs Linux  ; Long name of the group
        members         compute1,compute2        ; separes par des virgules
        }

And add servers and services in /usr/local/nagios/etc/linux_servers/servprod.cfg:

# Host definition

define host{
    use        linux-server ;
    host_name    compute1         ;
    alias        Serveur compute1 ;
    address    compute1               ;
    }

define host{
    use        linux-server ;
    host_name    compute2         ;
    alias        Serveur compute2 ;
    address    compute2               ;
    }

# Check disc space

define service{
    use            generic-service
    hostgroup_name linux-servers
    service_description    Espace disque /
    check_command        check_nrpe!check_disk!80%!90%!/
    }

# Check cpu load

define service{
   use                     generic-service
    hostgroup_name linux-servers
   service_description     Charge CPU
   check_command           check_nrpe!check_load!80!90
   }

# Check number of users logged in

define service{
   use                     generic-service
    hostgroup_name linux-servers
   service_description     Nombre utilisateurs
   check_command           check_nrpe!check_users!2!10
   }

Then, comment the following lines in /usr/local/nagios/etc/objects/localhost.cfg:

# Define an optional hostgroup for Linux machines

#define hostgroup{
#        hostgroup_name  linux-servers ; The name of the hostgroup
#        alias           Linux Servers ; Long name of the group
#        members         localhost     ; Comma separated list of hosts that belong to this group
#        }

Set rights, test configuration, and restart nagios:

chown -R nagios.nagios /usr/local/nagios/etc/serveurs_*
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios start

We will add specific probes in nagios later. For now, nagios allow you to know which hosts are up, and their basic information.

Nagios

Add nagios group and user:

groupadd nagios && useradd nagios -g nagios -p nagios

Then install nagios-plugin and nrpe:

yum install nagios-plugins nagios-plugins-debuginfo nrpe nrpe-debuginfo nrpe-plugin

Edit /etc/xinetd.d/nrpe and add the server ip, to allow it to query information.

# default: on
# description: NRPE (Nagios Remote Plugin Executor)
service nrpe
{
        flags           = REUSE
        socket_type     = stream
        port            = 5666
        wait            = no
        user            = nagios
        group           = nagios
        server          = /usr/local/nagios/bin/nrpe
        server_args     = -c /usr/local/nagios/etc/nrpe.cfg --inetd
        log_on_failure  += USERID
        disable         = no
        only_from       = 127.0.0.1 10.1.0.1
}

Then edit /etc/services and add (or uncomment) the following line at the right place:

nrpe 5666/tcp # NRPE

Then restart xinetd, and check it is listening using netstat:

systemctl restart xinetd
yum install net-tools
 [root@compute1 ~]#  netstat -at | grep nrpe
tcp6       0      0 [::]:nrpe               [::]:*                  LISTEN
[root@compute1 ~]#

Edit /usr/local/nagios/etc/nrpe.cfg, and uncomment the following lines:

command[check_users]=/usr/local/nagios/libexec/check_users -w $ARG1$ -c $ARG2$
command[check_load]=/usr/local/nagios/libexec/check_load -w $ARG1$ -c $ARG2$
command[check_disk]=/usr/local/nagios/libexec/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
command[check_procs]=/usr/local/nagios/libexec/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$

Now test locally:

[root@compute1 ~]#  /usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
NRPE v2.15
[root@compute1 ~]#  /usr/local/nagios/libexec/check_disk -w 10% -c 5% -u GB
DISK OK - free space: / 2 GB (70% inode=85%); /dev 0 GB (100% inode=99%); /dev/shm 0 GB (100% inode=99%); /run 0 GB (98% inode=99%); /sys/fs/cgroup 0 GB (100% inode=99%); /boot 3 GB (97% inode=99%); /run/user/0 0 GB (100% inode=99%);| /=1GB;2;2;0;3 /dev=0GB;0;0;0;0 /dev/shm=0GB;0;0;0;0 /run=0GB;0;0;0;0 /sys/fs/cgroup=0GB;0;0;0;0 /boot=0GB;2;2;0;3 /run/user/0=0GB;0;0;0;0
[root@compute1 ~]#

Nagios server should now be able to reach the client.