#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Get zfs pool status.""" ######################################################################## ## ## Written by Zachary LaCelle ## Written by Boris Tassou ## Copyright 2020 ## Licensed under GPL (see below) ## ## Nagios script to monitor ZFS pools/filesystems ## in Linux. ## ## Tested operating systems/ZFS versions: ## * See README.md ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## ######################################################################## import subprocess import argparse import logging import sys # from sys import exit # from array import * # from types import * # import array # import types # from os import geteuid ## # Commands to run # CHANGE THESE IF YOU NEED TO ## zpoolCommand = '/sbin/zpool' zfsCommand = '/sbin/zfs' ## # Variables to print at the end ## nagiosStatus = ('OK', 'WARNING', 'CRITICAL', 'UNKNOWN') stateNum = 0 msg = '' perfdata = '' ## # Filled from command line arguments ## checkCapacity = False capWarnThreshold = 50 capCritThreshold = 80 checkFragmentation = False fragWarnThreshold = 50 fragCritThreshold = 80 logging.basicConfig(stream=sys.stdout, format='%(message)s', level=logging.WARN) def CheckArgBounds(valueArr, minVal, maxVal): """Check value bounds.""" for value in valueArr: if value < minVal: return False elif value > maxVal: return False return True def ConvertToGB(valueStr): """Convert to GB.""" value = valueStr[:-1] value = value.replace(',', '.') if valueStr.endswith('G'): return float(value) elif valueStr.endswith('T'): gigs = float(value)*1024 return float(gigs) elif valueStr.endswith('M'): gigs = float(value) / 1024.0 return float(gigs) elif valueStr.endswith('K'): gigs = float(value) / (1024.0 * 1024.0) return float(gigs) def RaiseStateNum(stateNumIn, stateNum): """Raise state num.""" if stateNumIn > stateNum: return stateNumIn return stateNum ################################################################################### ## # Parse command line args ## parser = argparse.ArgumentParser( prog='check_zfs', description='Check the ZFS pool specified by an argument.', epilog='Note that monitor flags (e.g. capacity) require 2 arguments:\ warning threshold, and critical threshold') parser.add_argument('--capacity', help="monitor utilization of zpool (%%, int [0-100])", type=int, nargs=2) parser.add_argument('--fragmentation', help="monitor fragmentation of zpool (%%, int [0-100])", type=int, nargs=2) parser.add_argument('pool', help="name of the zpool to check", type=str) args = parser.parse_args() retVal = True if args.capacity is not None: checkCapacity = True capWarnThreshold = args.capacity[0] capCritThreshold = args.capacity[1] capArr = ['i', [capWarnThreshold, capCritThreshold]] retVal = CheckArgBounds(capArr, 0, 100) if retVal is False: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : Capacity thresholds must be between 0 and 100 (as a percent).", nagiosStatus[stateNum]) parser.print_help() exit(stateNum) retVal = True if args.fragmentation is not None: checkFragmentation = True fragWarnThreshold = args.fragmentation[0] fragCritThreshold = args.fragmentation[1] fragArr = ['i', [fragWarnThreshold, fragCritThreshold]] retVal = CheckArgBounds(fragArr, 0, 100) if retVal is False: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : Fragmentation thresholds must be between 0 and 100 (as a percent).", nagiosStatus[stateNum]) parser.print_help() exit(stateNum) ################################################################################### ## # Get generic info about the ZFS environment zfsEntries = [] fullCommand = ['/usr/bin/sudo', '-n', zfsCommand, 'list'] try: childProcess = subprocess.Popen(fullCommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to your\ visudo: nagios ALL=NOPASSWD: /sbin/zfs", nagiosStatus[stateNum]) exit(stateNum) zfsString = childProcess.communicate()[0] zfsRetval = childProcess.returncode if zfsRetval is 1: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to\ your visudo: nagios ALL=NOPASSWD: /sbin/zfs", nagiosStatus[stateNum]) exit(stateNum) zfsLines = zfsString.splitlines() for idx, line in enumerate(zfsLines): if idx != 0: zfsEntry = line.split() zfsEntries.append(zfsEntry) # Make sure the pool we specified is valid validPool = False for entry in zfsEntries: if entry[0].decode() == args.pool: validPool = True if not validPool: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : Pool %s is invalid. Please select a valid pool.", nagiosStatus[stateNum], args.pool) exit(stateNum) ################################################################################### ## # Get info on zpool fullCommand = ['/usr/bin/sudo', '-n', zpoolCommand, 'list', args.pool] try: childProcess = subprocess.Popen(fullCommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to\ your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum]) exit(stateNum) zpoolString = childProcess.communicate()[0] zpoolRetval = childProcess.returncode if zpoolRetval is 1: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to\ your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum]) exit(stateNum) zpoolLines = zpoolString.splitlines() zpoolMeta = zpoolLines[0].decode().split() zpoolMetaStr = ','.join(zpoolMeta) zpoolEntry = zpoolLines[1].decode().split() zpoolEntryStr = ','.join(zpoolEntry) name = '' size = '' alloc = '' free = '' expandsz = '' frag = '' cap = '' dedup = '' health = '' altroot = '' for idx, fieldName in enumerate(zpoolMeta): if fieldName == 'NAME': name = zpoolEntry[idx] elif fieldName == 'SIZE': size = zpoolEntry[idx] elif fieldName == 'ALLOC': alloc = zpoolEntry[idx] elif fieldName == 'FREE': free = zpoolEntry[idx] elif fieldName == 'EXPANDSZ': expandsz = zpoolEntry[idx] elif fieldName == 'FRAG': frag = zpoolEntry[idx] elif fieldName == 'CAP': cap = zpoolEntry[idx] elif fieldName == 'DEDUP': dedup = zpoolEntry[idx] elif fieldName == 'HEALTH': health = zpoolEntry[idx] elif fieldName == 'ALTROOT': altroot = zpoolEntry[idx] if name == '': stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : Missing required field in zpool output: NAME", nagiosStatus[stateNum]) exit(stateNum) if health == '': stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : Missing required field in zpool output: HEALTH", nagiosStatus[stateNum]) exit(stateNum) if checkCapacity and cap == '': stateNum = RaiseStateNum(3, stateNum) logging.warning("%s Cannot monitor capacity without zpool output: CAP.\ Outputs are %s", nagiosStatus[stateNum], zpoolMetaStr) exit(stateNum) if checkFragmentation and frag == '': stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : Cannot monitor fragmentation without zpool output: FRAG.\ Outputs are %s", nagiosStatus[stateNum], zpoolMetaStr) exit(stateNum) # Get compressratio on zpool checkForCompression = ['/usr/bin/sudo', '-n', zfsCommand, 'get', 'compression', args.pool] try: childProcess = subprocess.Popen(checkForCompression, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to\ your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum]) exit(stateNum) zpoolString = childProcess.communicate()[0] zpoolRetval = childProcess.returncode if zpoolRetval is 1: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to\ your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum]) exit(stateNum) zpoolLines = zpoolString.splitlines() zpoolMeta = zpoolLines[0].decode().split() zpoolMetaStr = ','.join(zpoolMeta) zpoolEntry = zpoolLines[1].decode().split() zpoolEntryStr = ','.join(zpoolEntry) compressName = '' compressValue = '' compressRatioName = '' compressRatioValue = '' for idx, fieldName in enumerate(zpoolMeta): if fieldName == 'NAME': compressName = zpoolEntry[idx] elif fieldName == 'VALUE': compressValue = zpoolEntry[idx] if compressName == '': stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : Missing required field in zpool output: NAME", nagiosStatus[stateNum]) exit(stateNum) if compressValue == 'on': getCompressRatioCommand = ['/usr/bin/sudo', '-n', zfsCommand, 'get', 'compressratio', args.pool] try: childProcess = subprocess.Popen(getCompressRatioCommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to\ your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum]) exit(stateNum) zpoolString = childProcess.communicate()[0] zpoolRetval = childProcess.returncode if zpoolRetval is 1: stateNum = RaiseStateNum(3, stateNum) logging.warning("%s : process must be run as root. Possible solution: add the following to\ your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum]) exit(stateNum) zpoolLines = zpoolString.splitlines() zpoolMeta = zpoolLines[0].decode().split() zpoolMetaStr = ','.join(zpoolMeta) zpoolEntry = zpoolLines[1].decode().split() zpoolEntryStr = ','.join(zpoolEntry) for idx, fieldName in enumerate(zpoolMeta): if fieldName == 'NAME': compressRatioName = zpoolEntry[idx] elif fieldName == 'VALUE': compressRatioValue = zpoolEntry[idx] ################################################################################### ## # OK, finally in the actual status checking of the zpool # Let's build up our perfdata, regardless of what we're checking fragPercent = '' if frag != '': fragPercent = frag.replace("%", "") fragPerfStr = "frag="+str(fragPercent)+"%;" if checkFragmentation: fragPerfStr = fragPerfStr+str(fragWarnThreshold)+";"+str(fragCritThreshold)+";" else: fragPerfStr += (";;") perfdata += (fragPerfStr) perfdata += " " capPercent = '' if cap != '': capPercent = cap.replace("%", "") capPerfStr = "cap="+str(capPercent)+"%;" if checkCapacity: capPerfStr = capPerfStr+str(capWarnThreshold)+";"+str(capCritThreshold)+";" else: capPerfStr += (";;") perfdata += (capPerfStr) perfdata += " " # Perfdata for dedup & compression factor if dedup != '': dedup_no_x = dedup.rstrip('x') perfdata += "dedup="+str(dedup_no_x) perfdata += " " if compressRatioValue != '': compressRatioNoX = compressRatioValue.rstrip('x') perfdata += "compress_ratio="+str(compressRatioNoX) perfdata += " " # Sizes can be in K, M, G, or T (maybe P, but I'm not doing this yet) if size != '': sizeGB = ConvertToGB(size) perfdata += "size="+str(sizeGB)+"GB;;;" perfdata += " " if alloc != '': allocGB = ConvertToGB(alloc) perfdata += "alloc="+str(allocGB)+"GB;;;" perfdata += " " if free != '': freeGB = ConvertToGB(free) perfdata += "free="+str(freeGB)+"GB;;;" perfdata += " " ## # Do mandatory checks healthNum = -1 if health == 'ONLINE': healthNum = 0 elif health == 'OFFLINE': stateNum = RaiseStateNum(1, stateNum) healthNum = 1 elif health == 'REMOVED': stateNum = RaiseStateNum(1, stateNum) healthNum = 2 elif health == 'UNAVAIL': stateNum = RaiseStateNum(1, stateNum) healthNum = 3 elif health == 'DEGRADED': stateNum = RaiseStateNum(2, stateNum) healthNum = 4 elif health == 'FAULTED': stateNum = RaiseStateNum(2, stateNum) healthNum = 5 perfdata += "health="+str(healthNum)+";1;3;" perfdata += " " ## # Initial part of msg msg = "POOL: " + str(name) healthMsgFilled = False if healthNum > 0: msg += ", STATUS: "+str(health) healthMsgFilled = True ## # Do optional checks fragMsgFilled = False capMsgFilled = False if checkFragmentation and fragPercent != '': if fragPercent.isdigit() is True: if int(fragPercent) > int(fragCritThreshold): fragMsgFilled = True stateNum = RaiseStateNum(2, stateNum) msg += ", FRAG CRIT: " + str(frag) elif int(fragPercent) > int(fragWarnThreshold): fragMsgFilled = True stateNum = RaiseStateNum(1, stateNum) msg += ", FRAG WARN: " + str(frag) if checkCapacity and capPercent != '': if int(capPercent) > int(capCritThreshold): capMsgFilled = True stateNum = RaiseStateNum(2, stateNum) msg += ", CAP CRIT: " + str(cap) elif int(capPercent) > int(capWarnThreshold): capMsgFilled = True stateNum = RaiseStateNum(1, stateNum) msg += ", CAP WARN: "+str(cap) ## # Build up rest of message if not healthMsgFilled: msg += ", STATUS: " + str(health) if size != '': msg += ", SIZE: " + str(size) if alloc != '': msg += ", ALLOC: " + str(alloc) if free != '': msg += ", FREE: " + str(free) if dedup != '': msg += ", DEDUP: " + str(dedup) if compressRatioValue != '': msg += ", COMPRESS: " + str(compressRatioValue) if frag != '' and not fragMsgFilled: msg += ", FRAG: " + str(frag) if cap != '' and not capMsgFilled: msg += ", CAP: " + str(cap) ## # Print our output and return logging.warning("%s: %s | %s", nagiosStatus[stateNum], msg, perfdata) exit(stateNum)