nagios_check_zfs_linux/check_zfs.py

479 lines
15 KiB
Python
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Get zfs pool status."""
########################################################################
##
## Written by Zachary LaCelle
## Migration to Python3 by Boris Tassou
## Copyright 2016
## Licensed under GPL (see below)
##
## Nagios script to monitor ZFS pools/filesystems
## in Linux.
##
## Tested operating systems/ZFS versions:
## * See README.md
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
##
########################################################################
import subprocess
import argparse
import logging
import sys
# from sys import exit
# from array import *
# from types import *
# import array
# import types
# from os import geteuid
##
# Commands to run
# CHANGE THESE IF YOU NEED TO
##
ZPOOLCOMMANDE = '/sbin/zpool'
ZFSCOMMANDE = '/sbin/zfs'
##
# Variables to print at the end
##
NAGIOSSTATUS = ('OK', 'WARNING', 'CRITICAL', 'UNKNOWN')
STATENUM = 0
MSG = ''
PERFDATA = ''
##
# Filled from command line arguments
##
CHECKCAPACITY = False
CAPWARNTHRESHOLD = 50
CAPCRITTHRESHOLD = 80
CHECKFRAGMENTATION = False
FRAGWARNTHRESHOLD = 50
FRAGCRITTHRESHOLD = 80
logging.basicConfig(stream=sys.stdout, format='%(message)s', level=logging.WARN)
def checkargbounds(valuearr, minval, maxval):
"""Check value bounds."""
for value in valuearr:
if value < minval:
return False
elif value > maxval:
return False
return True
def converttogb(valuestr):
"""Convert to GB."""
value = valuestr[:-1]
value = value.replace(',', '.')
if valuestr.endswith('G'):
return float(value)
elif valuestr.endswith('T'):
gigs = float(value)*1024
return float(gigs)
elif valuestr.endswith('M'):
gigs = float(value) / 1024.0
return float(gigs)
elif valuestr.endswith('K'):
gigs = float(value) / (1024.0 * 1024.0)
return float(gigs)
def raisestatenum(statenumin, statenum):
"""Raise state num."""
if statenumin > statenum:
return statenumin
return statenum
###################################################################################
##
# Parse command line args
##
PARSER = argparse.ArgumentParser(
prog='check_zfs',
description='Check the ZFS pool specified by an argument.',
epilog='Note that monitor flags (e.g. capacity) require 2 arguments:\
warning threshold, and critical threshold')
PARSER.add_argument('--capacity', help="monitor utilization of zpool (%%, int [0-100])",
type=int, nargs=2)
PARSER.add_argument('--fragmentation', help="monitor fragmentation of zpool (%%, int [0-100])",
type=int, nargs=2)
PARSER.add_argument('pool', help="name of the zpool to check", type=str)
ARGS = PARSER.parse_args()
RETVAL = True
if ARGS.capacity is not None:
CHECKCAPACITY = True
CAPWARNTHRESHOLD = ARGS.capacity[0]
CAPCRITTHRESHOLD = ARGS.capacity[1]
CAPARR = ['i', [CAPWARNTHRESHOLD, CAPCRITTHRESHOLD]]
RETVAL = checkargbounds(CAPARR, 0, 100)
if RETVAL is False:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Capacity thresholds must be between 0 and 100 (as a percent).",
NAGIOSSTATUS[STATENUM])
PARSER.print_help()
sys.exit(STATENUM)
RETVAL = True
if ARGS.fragmentation is not None:
CHECKFRAGMENTATION = True
FRAGWARNTHRESHOLD = ARGS.fragmentation[0]
FRAGCRITTHRESHOLD = ARGS.fragmentation[1]
FRAGARR = ['i', [FRAGWARNTHRESHOLD, FRAGCRITTHRESHOLD]]
RETVAL = checkargbounds(FRAGARR, 0, 100)
if RETVAL is False:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Fragmentation thresholds must be between 0 and 100 (as a percent).",
NAGIOSSTATUS[STATENUM])
PARSER.print_help()
sys.exit(STATENUM)
###################################################################################
##
# Get generic info about the ZFS environment
ZFSENTRIES = []
FULLCOMMAND = ['/usr/bin/sudo', '-n', ZFSCOMMANDE, 'list']
try:
CHILDPROCESS = subprocess.Popen(FULLCOMMAND, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
except OSError:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to your\
visudo: nagios ALL=NOPASSWD: /sbin/zfs", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZFSSTRING = CHILDPROCESS.communicate()[0]
ZFSRETVAL = CHILDPROCESS.returncode
if ZFSRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zfs", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZFSLINES = ZFSSTRING.splitlines()
for IDX, LINE in enumerate(ZFSLINES):
if IDX != 0:
ZFSENTRY = LINE.split()
ZFSENTRIES.append(ZFSENTRY)
# Make sure the pool we specified is valid
VALIDPOOL = False
for ENTRY in ZFSENTRIES:
if ENTRY[0].decode() == ARGS.pool:
VALIDPOOL = True
if not VALIDPOOL:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Pool %s is invalid. Please select a valid pool.",
NAGIOSSTATUS[STATENUM], ARGS.pool)
sys.exit(STATENUM)
###################################################################################
##
# Get info on zpool
FULLCOMMAND = ['/usr/bin/sudo', '-n', ZPOOLCOMMANDE, 'list', ARGS.pool]
try:
CHILDPROCESS = subprocess.Popen(FULLCOMMAND, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLSTRING = CHILDPROCESS.communicate()[0]
ZPOOLRETVAL = CHILDPROCESS.returncode
if ZPOOLRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLLINES = ZPOOLSTRING.splitlines()
ZPOOLMETA = ZPOOLLINES[0].decode().split()
ZPOOLMETASTR = ','.join(ZPOOLMETA)
ZPOOLENTRY = ZPOOLLINES[1].decode().split()
ZPOOLENTRYSTR = ','.join(ZPOOLENTRY)
NAME = ''
SIZE = ''
ALLOC = ''
FREE = ''
EXPANDSZ = ''
FRAG = ''
CAP = ''
DEDUP = ''
HEALTH = ''
ALTROOT = ''
for IDX, FIELDNAME in enumerate(ZPOOLMETA):
if FIELDNAME == 'NAME':
NAME = ZPOOLENTRY[IDX]
elif FIELDNAME == 'SIZE':
SIZE = ZPOOLENTRY[IDX]
elif FIELDNAME == 'ALLOC':
ALLOC = ZPOOLENTRY[IDX]
elif FIELDNAME == 'FREE':
FREE = ZPOOLENTRY[IDX]
elif FIELDNAME == 'EXPANDSZ':
EXPANDSZ = ZPOOLENTRY[IDX]
elif FIELDNAME == 'FRAG':
FRAG = ZPOOLENTRY[IDX]
elif FIELDNAME == 'CAP':
CAP = ZPOOLENTRY[IDX]
elif FIELDNAME == 'DEDUP':
DEDUP = ZPOOLENTRY[IDX]
elif FIELDNAME == 'HEALTH':
HEALTH = ZPOOLENTRY[IDX]
elif FIELDNAME == 'ALTROOT':
ALTROOT = ZPOOLENTRY[IDX]
if NAME == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Missing required field in zpool output: NAME", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
if HEALTH == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Missing required field in zpool output: HEALTH", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
if CHECKCAPACITY and CAP == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s Cannot monitor capacity without zpool output: CAP.\
Outputs are %s", NAGIOSSTATUS[STATENUM], ZPOOLMETASTR)
sys.exit(STATENUM)
if CHECKFRAGMENTATION and FRAG == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Cannot monitor fragmentation without zpool output: FRAG.\
Outputs are %s", NAGIOSSTATUS[STATENUM], ZPOOLMETASTR)
sys.exit(STATENUM)
# Get compressratio on zpool
CHECKFORCOMPRESSION = ['/usr/bin/sudo', '-n', ZFSCOMMANDE, 'get', 'compression', ARGS.pool]
try:
CHILDPROCESS = subprocess.Popen(CHECKFORCOMPRESSION, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLSTRING = CHILDPROCESS.communicate()[0]
ZPOOLRETVAL = CHILDPROCESS.returncode
if ZPOOLRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLLINES = ZPOOLSTRING.splitlines()
ZPOOLMETA = ZPOOLLINES[0].decode().split()
ZPOOLMETASTR = ','.join(ZPOOLMETA)
ZPOOLENTRY = ZPOOLLINES[1].decode().split()
ZPOOLENTRYSTR = ','.join(ZPOOLENTRY)
COMPRESSNAME = ''
COMPRESSVALUE = ''
COMPRESSRATIONAME = ''
COMPRESSRATIOVALUE = ''
for IDX, FIELDNAME in enumerate(ZPOOLMETA):
if FIELDNAME == 'NAME':
COMPRESSNAME = ZPOOLENTRY[IDX]
elif FIELDNAME == 'VALUE':
COMPRESSVALUE = ZPOOLENTRY[IDX]
if COMPRESSNAME == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Missing required field in zpool output: NAME", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
if COMPRESSVALUE == 'on':
GETCOMPRESSRATIOCOMMAND = ['/usr/bin/sudo', '-n', ZFSCOMMANDE, 'get',
'compressratio', ARGS.pool]
try:
CHILDPROCESS = subprocess.Popen(GETCOMPRESSRATIOCOMMAND, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLSTRING = CHILDPROCESS.communicate()[0]
ZPOOLRETVAL = CHILDPROCESS.returncode
if ZPOOLRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLLINES = ZPOOLSTRING.splitlines()
ZPOOLMETA = ZPOOLLINES[0].decode().split()
ZPOOLMETASTR = ','.join(ZPOOLMETA)
ZPOOLENTRY = ZPOOLLINES[1].decode().split()
ZPOOLENTRYSTR = ','.join(ZPOOLENTRY)
for IDX, fieldName in enumerate(ZPOOLMETA):
if FIELDNAME == 'NAME':
COMPRESSRATIONAME = ZPOOLENTRY[IDX]
elif FIELDNAME == 'VALUE':
COMPRESSRATIOVALUE = ZPOOLENTRY[IDX]
###################################################################################
##
# OK, finally in the actual status checking of the zpool
# Let's build up our perfdata, regardless of what we're checking
FRAGPERCENT = ''
if FRAG != '':
FRAGPERCENT = FRAG.replace("%", "")
FRAGPERFSTR = "frag="+str(FRAGPERCENT)+"%;"
if CHECKFRAGMENTATION:
FRAGPERFSTR = FRAGPERFSTR + str(FRAGWARNTHRESHOLD) + ";" + str(FRAGCRITTHRESHOLD) + ";"
else:
FRAGPERFSTR += (";;")
PERFDATA += (FRAGPERFSTR)
PERFDATA += " "
CAPPERCENT = ''
if CAP != '':
CAPPERCENT = CAP.replace("%", "")
CAPPERFSTR = "cap=" + str(CAPPERCENT) + "%;"
if CHECKCAPACITY:
CAPPERFSTR = CAPPERFSTR + str(CAPWARNTHRESHOLD) + ";" + str(CAPCRITTHRESHOLD) + ";"
else:
CAPPERFSTR += (";;")
PERFDATA += (CAPPERFSTR)
PERFDATA += " "
# Perfdata for dedup & compression factor
if DEDUP != '':
DEDUP_NO_X = DEDUP.rstrip('x')
PERFDATA += "DEDUP=" + str(DEDUP_NO_X)
PERFDATA += " "
if COMPRESSRATIOVALUE != '':
COMPRESSRATIONOX = COMPRESSRATIOVALUE.rstrip('x')
PERFDATA += "compress_ratio=" + str(COMPRESSRATIONOX)
PERFDATA += " "
# Sizes can be in K, M, G, or T (maybe P, but I'm not doing this yet)
if SIZE != '':
SIZEGB = converttogb(SIZE)
PERFDATA += "size=" + str(SIZEGB) + "GB;;;"
PERFDATA += " "
if ALLOC != '':
ALLOCGB = converttogb(ALLOC)
PERFDATA += "alloc=" + str(ALLOCGB) + "GB;;;"
PERFDATA += " "
if FREE != '':
FREEGB = converttogb(FREE)
PERFDATA += "free=" + str(FREEGB) + "GB;;;"
PERFDATA += " "
##
# Do mandatory checks
HEALTHNUM = -1
if HEALTH == 'ONLINE':
HEALTHNUM = 0
elif HEALTH == 'OFFLINE':
STATENUM = raisestatenum(1, STATENUM)
HEALTHNUM = 1
elif HEALTH == 'REMOVED':
STATENUM = raisestatenum(1, STATENUM)
HEALTHNUM = 2
elif HEALTH == 'UNAVAIL':
STATENUM = raisestatenum(1, STATENUM)
HEALTHNUM = 3
elif HEALTH == 'DEGRADED':
STATENUM = raisestatenum(2, STATENUM)
HEALTHNUM = 4
elif HEALTH == 'FAULTED':
STATENUM = raisestatenum(2, STATENUM)
HEALTHNUM = 5
PERFDATA += "health=" + str(HEALTHNUM) + ";1;3;"
PERFDATA += " "
##
# Initial part of msg
MSG = "POOL: " + str(NAME)
HEALTHMSGFILLED = False
if HEALTHNUM > 0:
MSG += ", STATUS: " + str(HEALTH)
HEALTHMSGFILLED = True
##
# Do optional checks
FRAGMSGFILLED = False
CAPMSGFILLED = False
if CHECKFRAGMENTATION and FRAGPERFSTR != '':
if FRAGPERFSTR.isdigit() is True:
if int(FRAGPERFSTR) > int(FRAGCRITTHRESHOLD):
FRAGMSGFILLED = True
STATENUM = raisestatenum(2, STATENUM)
MSG += ", FRAG CRIT: " + str(FRAG)
elif int(FRAGPERFSTR) > int(FRAGWARNTHRESHOLD):
FRAGMSGFILLED = True
STATENUM = raisestatenum(1, STATENUM)
MSG += ", FRAG WARN: " + str(FRAG)
if CHECKCAPACITY and CAPPERCENT != '':
if int(CAPPERCENT) > int(CAPCRITTHRESHOLD):
CAPMSGFILLED = True
STATENUM = raisestatenum(2, STATENUM)
MSG += ", CAP CRIT: " + str(CAP)
elif int(CAPPERCENT) > int(CAPWARNTHRESHOLD):
CAPMSGFILLED = True
STATENUM = raisestatenum(1, STATENUM)
MSG += ", CAP WARN: " + str(CAP)
##
# Build up rest of message
if not HEALTHMSGFILLED:
MSG += ", STATUS: " + str(HEALTH)
if SIZE != '':
MSG += ", SIZE: " + str(SIZE)
if ALLOC != '':
MSG += ", ALLOC: " + str(ALLOC)
if FREE != '':
MSG += ", FREE: " + str(FREE)
if DEDUP != '':
MSG += ", DEDUP: " + str(DEDUP)
if COMPRESSRATIOVALUE != '':
MSG += ", COMPRESS: " + str(COMPRESSRATIOVALUE)
if FRAG != '' and not FRAGMSGFILLED:
MSG += ", FRAG: " + str(FRAG)
if CAP != '' and not CAPMSGFILLED:
MSG += ", CAP: " + str(CAP)
##
# Print our output and return
logging.warning("%s: %s | %s", NAGIOSSTATUS[STATENUM], MSG, PERFDATA)
sys.exit(STATENUM)