nagios_check_zfs_linux/check_zfs.py

479 lines
15 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Get zfs pool status."""
########################################################################
##
## Written by Zachary LaCelle
2020-06-16 10:13:32 +02:00
## Migration to Python3 by Boris Tassou
## Copyright 2016
## Licensed under GPL (see below)
##
## Nagios script to monitor ZFS pools/filesystems
## in Linux.
##
## Tested operating systems/ZFS versions:
## * See README.md
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
##
########################################################################
import subprocess
import argparse
import logging
import sys
# from sys import exit
# from array import *
# from types import *
# import array
# import types
# from os import geteuid
##
# Commands to run
# CHANGE THESE IF YOU NEED TO
##
2020-06-16 10:13:32 +02:00
ZPOOLCOMMANDE = '/sbin/zpool'
ZFSCOMMANDE = '/sbin/zfs'
##
# Variables to print at the end
##
2020-06-16 10:13:32 +02:00
NAGIOSSTATUS = ('OK', 'WARNING', 'CRITICAL', 'UNKNOWN')
STATENUM = 0
MSG = ''
PERFDATA = ''
##
# Filled from command line arguments
##
2020-06-16 10:13:32 +02:00
CHECKCAPACITY = False
CAPWARNTHRESHOLD = 50
CAPCRITTHRESHOLD = 80
CHECKFRAGMENTATION = False
FRAGWARNTHRESHOLD = 50
FRAGCRITTHRESHOLD = 80
logging.basicConfig(stream=sys.stdout, format='%(message)s', level=logging.WARN)
2020-06-16 10:13:32 +02:00
def checkargbounds(valuearr, minval, maxval):
"""Check value bounds."""
2020-06-16 10:13:32 +02:00
for value in valuearr:
if value < minval:
return False
2020-06-16 10:13:32 +02:00
elif value > maxval:
return False
return True
2020-06-16 10:13:32 +02:00
def converttogb(valuestr):
"""Convert to GB."""
2020-06-16 10:13:32 +02:00
value = valuestr[:-1]
value = value.replace(',', '.')
2020-06-16 10:13:32 +02:00
if valuestr.endswith('G'):
return float(value)
2020-06-16 10:13:32 +02:00
elif valuestr.endswith('T'):
gigs = float(value)*1024
return float(gigs)
2020-06-16 10:13:32 +02:00
elif valuestr.endswith('M'):
gigs = float(value) / 1024.0
return float(gigs)
2020-06-16 10:13:32 +02:00
elif valuestr.endswith('K'):
gigs = float(value) / (1024.0 * 1024.0)
return float(gigs)
2020-06-16 10:13:32 +02:00
def raisestatenum(statenumin, statenum):
"""Raise state num."""
2020-06-16 10:13:32 +02:00
if statenumin > statenum:
return statenumin
return statenum
###################################################################################
##
# Parse command line args
##
2020-06-16 10:13:32 +02:00
PARSER = argparse.ArgumentParser(
prog='check_zfs',
description='Check the ZFS pool specified by an argument.',
epilog='Note that monitor flags (e.g. capacity) require 2 arguments:\
warning threshold, and critical threshold')
2020-06-16 10:13:32 +02:00
PARSER.add_argument('--capacity', help="monitor utilization of zpool (%%, int [0-100])",
type=int, nargs=2)
2020-06-16 10:13:32 +02:00
PARSER.add_argument('--fragmentation', help="monitor fragmentation of zpool (%%, int [0-100])",
type=int, nargs=2)
2020-06-16 10:13:32 +02:00
PARSER.add_argument('pool', help="name of the zpool to check", type=str)
ARGS = PARSER.parse_args()
RETVAL = True
if ARGS.capacity is not None:
CHECKCAPACITY = True
CAPWARNTHRESHOLD = ARGS.capacity[0]
CAPCRITTHRESHOLD = ARGS.capacity[1]
CAPARR = ['i', [CAPWARNTHRESHOLD, CAPCRITTHRESHOLD]]
RETVAL = checkargbounds(CAPARR, 0, 100)
if RETVAL is False:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Capacity thresholds must be between 0 and 100 (as a percent).",
2020-06-16 10:13:32 +02:00
NAGIOSSTATUS[STATENUM])
PARSER.print_help()
sys.exit(STATENUM)
RETVAL = True
if ARGS.fragmentation is not None:
CHECKFRAGMENTATION = True
FRAGWARNTHRESHOLD = ARGS.fragmentation[0]
FRAGCRITTHRESHOLD = ARGS.fragmentation[1]
FRAGARR = ['i', [FRAGWARNTHRESHOLD, FRAGCRITTHRESHOLD]]
RETVAL = checkargbounds(FRAGARR, 0, 100)
if RETVAL is False:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Fragmentation thresholds must be between 0 and 100 (as a percent).",
2020-06-16 10:13:32 +02:00
NAGIOSSTATUS[STATENUM])
PARSER.print_help()
sys.exit(STATENUM)
###################################################################################
##
# Get generic info about the ZFS environment
2020-06-16 10:13:32 +02:00
ZFSENTRIES = []
FULLCOMMAND = ['/usr/bin/sudo', '-n', ZFSCOMMANDE, 'list']
try:
2020-06-16 10:13:32 +02:00
CHILDPROCESS = subprocess.Popen(FULLCOMMAND, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
except OSError:
2020-06-16 10:13:32 +02:00
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to your\
2020-06-16 10:13:32 +02:00
visudo: nagios ALL=NOPASSWD: /sbin/zfs", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
2020-06-16 10:13:32 +02:00
ZFSSTRING = CHILDPROCESS.communicate()[0]
ZFSRETVAL = CHILDPROCESS.returncode
2020-06-16 10:13:32 +02:00
if ZFSRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
2020-06-16 10:13:32 +02:00
your visudo: nagios ALL=NOPASSWD: /sbin/zfs", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
2020-06-16 10:13:32 +02:00
ZFSLINES = ZFSSTRING.splitlines()
for IDX, LINE in enumerate(ZFSLINES):
if IDX != 0:
ZFSENTRY = LINE.split()
ZFSENTRIES.append(ZFSENTRY)
# Make sure the pool we specified is valid
2020-06-16 10:13:32 +02:00
VALIDPOOL = False
for ENTRY in ZFSENTRIES:
if ENTRY[0].decode() == ARGS.pool:
VALIDPOOL = True
if not VALIDPOOL:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Pool %s is invalid. Please select a valid pool.",
2020-06-16 10:13:32 +02:00
NAGIOSSTATUS[STATENUM], ARGS.pool)
sys.exit(STATENUM)
###################################################################################
##
# Get info on zpool
2020-06-16 10:13:32 +02:00
FULLCOMMAND = ['/usr/bin/sudo', '-n', ZPOOLCOMMANDE, 'list', ARGS.pool]
try:
2020-06-16 10:13:32 +02:00
CHILDPROCESS = subprocess.Popen(FULLCOMMAND, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
2020-06-16 10:13:32 +02:00
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
2020-06-16 10:13:32 +02:00
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLSTRING = CHILDPROCESS.communicate()[0]
ZPOOLRETVAL = CHILDPROCESS.returncode
2020-06-16 10:13:32 +02:00
if ZPOOLRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
2020-06-16 10:13:32 +02:00
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLLINES = ZPOOLSTRING.splitlines()
ZPOOLMETA = ZPOOLLINES[0].decode().split()
ZPOOLMETASTR = ','.join(ZPOOLMETA)
ZPOOLENTRY = ZPOOLLINES[1].decode().split()
ZPOOLENTRYSTR = ','.join(ZPOOLENTRY)
NAME = ''
SIZE = ''
ALLOC = ''
FREE = ''
EXPANDSZ = ''
FRAG = ''
CAP = ''
DEDUP = ''
HEALTH = ''
ALTROOT = ''
for IDX, FIELDNAME in enumerate(ZPOOLMETA):
if FIELDNAME == 'NAME':
NAME = ZPOOLENTRY[IDX]
elif FIELDNAME == 'SIZE':
SIZE = ZPOOLENTRY[IDX]
elif FIELDNAME == 'ALLOC':
ALLOC = ZPOOLENTRY[IDX]
elif FIELDNAME == 'FREE':
FREE = ZPOOLENTRY[IDX]
elif FIELDNAME == 'EXPANDSZ':
EXPANDSZ = ZPOOLENTRY[IDX]
elif FIELDNAME == 'FRAG':
FRAG = ZPOOLENTRY[IDX]
elif FIELDNAME == 'CAP':
CAP = ZPOOLENTRY[IDX]
elif FIELDNAME == 'DEDUP':
DEDUP = ZPOOLENTRY[IDX]
elif FIELDNAME == 'HEALTH':
HEALTH = ZPOOLENTRY[IDX]
elif FIELDNAME == 'ALTROOT':
ALTROOT = ZPOOLENTRY[IDX]
if NAME == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Missing required field in zpool output: NAME", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
if HEALTH == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Missing required field in zpool output: HEALTH", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
if CHECKCAPACITY and CAP == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s Cannot monitor capacity without zpool output: CAP.\
2020-06-16 10:13:32 +02:00
Outputs are %s", NAGIOSSTATUS[STATENUM], ZPOOLMETASTR)
sys.exit(STATENUM)
if CHECKFRAGMENTATION and FRAG == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Cannot monitor fragmentation without zpool output: FRAG.\
2020-06-16 10:13:32 +02:00
Outputs are %s", NAGIOSSTATUS[STATENUM], ZPOOLMETASTR)
sys.exit(STATENUM)
# Get compressratio on zpool
2020-06-16 10:13:32 +02:00
CHECKFORCOMPRESSION = ['/usr/bin/sudo', '-n', ZFSCOMMANDE, 'get', 'compression', ARGS.pool]
try:
2020-06-16 10:13:32 +02:00
CHILDPROCESS = subprocess.Popen(CHECKFORCOMPRESSION, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
2020-06-16 10:13:32 +02:00
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
2020-06-16 10:13:32 +02:00
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLSTRING = CHILDPROCESS.communicate()[0]
ZPOOLRETVAL = CHILDPROCESS.returncode
2020-06-16 10:13:32 +02:00
if ZPOOLRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
2020-06-16 10:13:32 +02:00
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLLINES = ZPOOLSTRING.splitlines()
ZPOOLMETA = ZPOOLLINES[0].decode().split()
ZPOOLMETASTR = ','.join(ZPOOLMETA)
ZPOOLENTRY = ZPOOLLINES[1].decode().split()
ZPOOLENTRYSTR = ','.join(ZPOOLENTRY)
COMPRESSNAME = ''
COMPRESSVALUE = ''
COMPRESSRATIONAME = ''
COMPRESSRATIOVALUE = ''
for IDX, FIELDNAME in enumerate(ZPOOLMETA):
if FIELDNAME == 'NAME':
COMPRESSNAME = ZPOOLENTRY[IDX]
elif FIELDNAME == 'VALUE':
COMPRESSVALUE = ZPOOLENTRY[IDX]
if COMPRESSNAME == '':
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : Missing required field in zpool output: NAME", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
if COMPRESSVALUE == 'on':
GETCOMPRESSRATIOCOMMAND = ['/usr/bin/sudo', '-n', ZFSCOMMANDE, 'get',
'compressratio', ARGS.pool]
try:
2020-06-16 10:13:32 +02:00
CHILDPROCESS = subprocess.Popen(GETCOMPRESSRATIOCOMMAND, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
2020-06-16 10:13:32 +02:00
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
2020-06-16 10:13:32 +02:00
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
ZPOOLSTRING = CHILDPROCESS.communicate()[0]
ZPOOLRETVAL = CHILDPROCESS.returncode
2020-06-16 10:13:32 +02:00
if ZPOOLRETVAL is 1:
STATENUM = raisestatenum(3, STATENUM)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
2020-06-16 10:13:32 +02:00
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", NAGIOSSTATUS[STATENUM])
sys.exit(STATENUM)
2020-06-16 10:13:32 +02:00
ZPOOLLINES = ZPOOLSTRING.splitlines()
ZPOOLMETA = ZPOOLLINES[0].decode().split()
ZPOOLMETASTR = ','.join(ZPOOLMETA)
ZPOOLENTRY = ZPOOLLINES[1].decode().split()
ZPOOLENTRYSTR = ','.join(ZPOOLENTRY)
2020-06-16 10:13:32 +02:00
for IDX, fieldName in enumerate(ZPOOLMETA):
if FIELDNAME == 'NAME':
COMPRESSRATIONAME = ZPOOLENTRY[IDX]
elif FIELDNAME == 'VALUE':
COMPRESSRATIOVALUE = ZPOOLENTRY[IDX]
###################################################################################
##
# OK, finally in the actual status checking of the zpool
# Let's build up our perfdata, regardless of what we're checking
2020-06-16 10:13:32 +02:00
FRAGPERCENT = ''
if FRAG != '':
FRAGPERCENT = FRAG.replace("%", "")
FRAGPERFSTR = "frag="+str(FRAGPERCENT)+"%;"
if CHECKFRAGMENTATION:
FRAGPERFSTR = FRAGPERFSTR + str(FRAGWARNTHRESHOLD) + ";" + str(FRAGCRITTHRESHOLD) + ";"
else:
2020-06-16 10:13:32 +02:00
FRAGPERFSTR += (";;")
PERFDATA += (FRAGPERFSTR)
PERFDATA += " "
CAPPERCENT = ''
if CAP != '':
CAPPERCENT = CAP.replace("%", "")
CAPPERFSTR = "cap=" + str(CAPPERCENT) + "%;"
if CHECKCAPACITY:
CAPPERFSTR = CAPPERFSTR + str(CAPWARNTHRESHOLD) + ";" + str(CAPCRITTHRESHOLD) + ";"
else:
2020-06-16 10:13:32 +02:00
CAPPERFSTR += (";;")
PERFDATA += (CAPPERFSTR)
PERFDATA += " "
# Perfdata for dedup & compression factor
2020-06-16 10:13:32 +02:00
if DEDUP != '':
DEDUP_NO_X = DEDUP.rstrip('x')
PERFDATA += "DEDUP=" + str(DEDUP_NO_X)
PERFDATA += " "
2020-06-16 10:13:32 +02:00
if COMPRESSRATIOVALUE != '':
COMPRESSRATIONOX = COMPRESSRATIOVALUE.rstrip('x')
PERFDATA += "compress_ratio=" + str(COMPRESSRATIONOX)
PERFDATA += " "
# Sizes can be in K, M, G, or T (maybe P, but I'm not doing this yet)
2020-06-16 10:13:32 +02:00
if SIZE != '':
SIZEGB = converttogb(SIZE)
PERFDATA += "size=" + str(SIZEGB) + "GB;;;"
PERFDATA += " "
2020-06-16 10:13:32 +02:00
if ALLOC != '':
ALLOCGB = converttogb(ALLOC)
PERFDATA += "alloc=" + str(ALLOCGB) + "GB;;;"
PERFDATA += " "
2020-06-16 10:13:32 +02:00
if FREE != '':
FREEGB = converttogb(FREE)
PERFDATA += "free=" + str(FREEGB) + "GB;;;"
PERFDATA += " "
##
# Do mandatory checks
2020-06-16 10:13:32 +02:00
HEALTHNUM = -1
if HEALTH == 'ONLINE':
HEALTHNUM = 0
elif HEALTH == 'OFFLINE':
STATENUM = raisestatenum(1, STATENUM)
HEALTHNUM = 1
elif HEALTH == 'REMOVED':
STATENUM = raisestatenum(1, STATENUM)
HEALTHNUM = 2
elif HEALTH == 'UNAVAIL':
STATENUM = raisestatenum(1, STATENUM)
HEALTHNUM = 3
elif HEALTH == 'DEGRADED':
STATENUM = raisestatenum(2, STATENUM)
HEALTHNUM = 4
elif HEALTH == 'FAULTED':
STATENUM = raisestatenum(2, STATENUM)
HEALTHNUM = 5
PERFDATA += "health=" + str(HEALTHNUM) + ";1;3;"
PERFDATA += " "
##
# Initial part of msg
2020-06-16 10:13:32 +02:00
MSG = "POOL: " + str(NAME)
HEALTHMSGFILLED = False
if HEALTHNUM > 0:
MSG += ", STATUS: " + str(HEALTH)
HEALTHMSGFILLED = True
##
# Do optional checks
2020-06-16 10:13:32 +02:00
FRAGMSGFILLED = False
CAPMSGFILLED = False
if CHECKFRAGMENTATION and FRAGPERFSTR != '':
if FRAGPERFSTR.isdigit() is True:
if int(FRAGPERFSTR) > int(FRAGCRITTHRESHOLD):
FRAGMSGFILLED = True
STATENUM = raisestatenum(2, STATENUM)
MSG += ", FRAG CRIT: " + str(FRAG)
elif int(FRAGPERFSTR) > int(FRAGWARNTHRESHOLD):
FRAGMSGFILLED = True
STATENUM = raisestatenum(1, STATENUM)
MSG += ", FRAG WARN: " + str(FRAG)
if CHECKCAPACITY and CAPPERCENT != '':
if int(CAPPERCENT) > int(CAPCRITTHRESHOLD):
CAPMSGFILLED = True
STATENUM = raisestatenum(2, STATENUM)
MSG += ", CAP CRIT: " + str(CAP)
elif int(CAPPERCENT) > int(CAPWARNTHRESHOLD):
CAPMSGFILLED = True
STATENUM = raisestatenum(1, STATENUM)
MSG += ", CAP WARN: " + str(CAP)
##
# Build up rest of message
2020-06-16 10:13:32 +02:00
if not HEALTHMSGFILLED:
MSG += ", STATUS: " + str(HEALTH)
if SIZE != '':
MSG += ", SIZE: " + str(SIZE)
if ALLOC != '':
MSG += ", ALLOC: " + str(ALLOC)
if FREE != '':
MSG += ", FREE: " + str(FREE)
if DEDUP != '':
MSG += ", DEDUP: " + str(DEDUP)
if COMPRESSRATIOVALUE != '':
MSG += ", COMPRESS: " + str(COMPRESSRATIOVALUE)
if FRAG != '' and not FRAGMSGFILLED:
MSG += ", FRAG: " + str(FRAG)
if CAP != '' and not CAPMSGFILLED:
MSG += ", CAP: " + str(CAP)
##
# Print our output and return
2020-06-16 10:13:32 +02:00
logging.warning("%s: %s | %s", NAGIOSSTATUS[STATENUM], MSG, PERFDATA)
sys.exit(STATENUM)