This commit is contained in:
Alarig Le Lay 2020-06-14 16:07:49 +02:00
parent e5cdfee69d
commit 007ccd5b15
Signed by: alarig
GPG key ID: 7AFE62C6DF8BCDEC

187
check_zfs.py Executable file → Normal file
View file

@ -1,4 +1,6 @@
#!/usr/bin/python #!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Get zfs pool status."""
######################################################################## ########################################################################
## ##
@ -10,8 +12,7 @@
## in Linux. ## in Linux.
## ##
## Tested operating systems/ZFS versions: ## Tested operating systems/ZFS versions:
## * Ubuntu 14.04 LTS, ZFS v5 ## * See README.md
## * CentOS 7, ZFS v5
## ##
## This program is free software: you can redistribute it and/or modify ## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by ## it under the terms of the GNU General Public License as published by
@ -28,12 +29,16 @@
## ##
######################################################################## ########################################################################
from sys import exit
import subprocess import subprocess
import argparse import argparse
from array import * import logging
from types import * import sys
from os import geteuid # from sys import exit
# from array import *
# from types import *
# import array
# import types
# from os import geteuid
## ##
# Commands to run # Commands to run
@ -60,7 +65,11 @@ checkFragmentation=False
fragWarnThreshold = 50 fragWarnThreshold = 50
fragCritThreshold = 80 fragCritThreshold = 80
logging.basicConfig(stream=sys.stdout, format='%(message)s', level=logging.WARN)
def CheckArgBounds(valueArr, minVal, maxVal): def CheckArgBounds(valueArr, minVal, maxVal):
"""Check value bounds."""
for value in valueArr: for value in valueArr:
if value < minVal: if value < minVal:
return False return False
@ -68,7 +77,9 @@ def CheckArgBounds( valueArr, minVal, maxVal ):
return False return False
return True return True
def ConvertToGB(valueStr): def ConvertToGB(valueStr):
"""Convert to GB."""
value = valueStr[:-1] value = valueStr[:-1]
value = value.replace(',', '.') value = value.replace(',', '.')
if valueStr.endswith('G'): if valueStr.endswith('G'):
@ -83,11 +94,14 @@ def ConvertToGB( valueStr ):
gigs = float(value) / (1024.0 * 1024.0) gigs = float(value) / (1024.0 * 1024.0)
return float(gigs) return float(gigs)
def RaiseStateNum(stateNumIn, stateNum): def RaiseStateNum(stateNumIn, stateNum):
"""Raise state num."""
if stateNumIn > stateNum: if stateNumIn > stateNum:
return stateNumIn return stateNumIn
return stateNum return stateNum
################################################################################### ###################################################################################
## ##
# Parse command line args # Parse command line args
@ -95,9 +109,12 @@ def RaiseStateNum( stateNumIn, stateNum ):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog='check_zfs', prog='check_zfs',
description='Check the ZFS pool specified by an argument.', description='Check the ZFS pool specified by an argument.',
epilog='Note that monitor flags (e.g. capacity) require 2 arguments: warning threshold, and critical threshold') epilog='Note that monitor flags (e.g. capacity) require 2 arguments:\
parser.add_argument('--capacity', help="monitor utilization of zpool (%%, int [0-100])", type=int, nargs=2) warning threshold, and critical threshold')
parser.add_argument('--fragmentation', help="monitor fragmentation of zpool (%%, int [0-100])", type=int, nargs=2) parser.add_argument('--capacity', help="monitor utilization of zpool (%%, int [0-100])",
type=int, nargs=2)
parser.add_argument('--fragmentation', help="monitor fragmentation of zpool (%%, int [0-100])",
type=int, nargs=2)
parser.add_argument('pool', help="name of the zpool to check", type=str) parser.add_argument('pool', help="name of the zpool to check", type=str)
args = parser.parse_args() args = parser.parse_args()
@ -107,11 +124,12 @@ if args.capacity is not None:
checkCapacity = True checkCapacity = True
capWarnThreshold = args.capacity[0] capWarnThreshold = args.capacity[0]
capCritThreshold = args.capacity[1] capCritThreshold = args.capacity[1]
capArr = array('i', [capWarnThreshold, capCritThreshold]) capArr = ['i', [capWarnThreshold, capCritThreshold]]
retVal = CheckArgBounds(capArr, 0, 100) retVal = CheckArgBounds(capArr, 0, 100)
if retVal is False: if retVal is False:
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": Capacity thresholds must be between 0 and 100 (as a percent)." logging.warning("%s : Capacity thresholds must be between 0 and 100 (as a percent).",
nagiosStatus[stateNum])
parser.print_help() parser.print_help()
exit(stateNum) exit(stateNum)
retVal = True retVal = True
@ -119,22 +137,14 @@ if args.fragmentation is not None:
checkFragmentation = True checkFragmentation = True
fragWarnThreshold = args.fragmentation[0] fragWarnThreshold = args.fragmentation[0]
fragCritThreshold = args.fragmentation[1] fragCritThreshold = args.fragmentation[1]
fragArr = array('i', [fragWarnThreshold, fragCritThreshold]) fragArr = ['i', [fragWarnThreshold, fragCritThreshold]]
retVal = CheckArgBounds(fragArr, 0, 100) retVal = CheckArgBounds(fragArr, 0, 100)
if retVal is False: if retVal is False:
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": Fragmentation thresholds must be between 0 and 100 (as a percent)." logging.warning("%s : Fragmentation thresholds must be between 0 and 100 (as a percent).",
nagiosStatus[stateNum])
parser.print_help() parser.print_help()
exit(stateNum) exit(stateNum)
###################################################################################
###################################################################################
##
# Verify that we're running as root. This should render redundant some checks
# below, but we'll leave them there in case of bugs and to make this more readable.
#if geteuid() != 0:
# stateNum = RaiseStateNum(3, stateNum)
# print nagiosStatus[stateNum] + ": process must be run as root. Did you for get sudo? If not, possible solution: add the following toyour visudo: nagios ALL=NOPASSWD: /sbin/zfs"
# exit(stateNum)
################################################################################### ###################################################################################
## ##
@ -142,10 +152,12 @@ if args.fragmentation is not None:
zfsEntries = [] zfsEntries = []
fullCommand = ['/usr/bin/sudo', '-n', zfsCommand, 'list'] fullCommand = ['/usr/bin/sudo', '-n', zfsCommand, 'list']
try: try:
childProcess = subprocess.Popen(fullCommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) childProcess = subprocess.Popen(fullCommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
except OSError: except OSError:
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": process must be run as root. Possible solution: add the following to your visudo: nagios ALL=NOPASSWD: /sbin/zfs" logging.warning("%s : process must be run as root. Possible solution: add the following to your\
visudo: nagios ALL=NOPASSWD: /sbin/zfs", nagiosStatus[stateNum])
exit(stateNum) exit(stateNum)
zfsString = childProcess.communicate()[0] zfsString = childProcess.communicate()[0]
@ -153,7 +165,8 @@ zfsRetval = childProcess.returncode
if zfsRetval is 1: if zfsRetval is 1:
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": process must be run as root. Possible solution: add the following to your visudo: nagios ALL=NOPASSWD: /sbin/zfs" logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zfs", nagiosStatus[stateNum])
exit(stateNum) exit(stateNum)
zfsLines = zfsString.splitlines() zfsLines = zfsString.splitlines()
@ -165,11 +178,12 @@ for idx, line in enumerate(zfsLines):
# Make sure the pool we specified is valid # Make sure the pool we specified is valid
validPool = False validPool = False
for entry in zfsEntries: for entry in zfsEntries:
if entry[0] == args.pool: if entry[0].decode() == args.pool:
validPool = True validPool = True
if not validPool: if not validPool:
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": Pool " + args.pool + " is invalid. Please select a valid pool." logging.warning("%s : Pool %s is invalid. Please select a valid pool.",
nagiosStatus[stateNum], args.pool)
exit(stateNum) exit(stateNum)
################################################################################### ###################################################################################
@ -183,20 +197,22 @@ try:
stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError: except OSError:
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": process must be run as root. Possible solution: add the following to your visudo: nagios ALL=NOPASSWD: /sbin/zpool" logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum) exit(stateNum)
zpoolString = childProcess.communicate()[0] zpoolString = childProcess.communicate()[0]
zpoolRetval = childProcess.returncode zpoolRetval = childProcess.returncode
if zpoolRetval is 1: if zpoolRetval is 1:
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": process must be run as root. Possible solution: add the following to your visudo: nagios ALL=NOPASSWD: /sbin/zpool" logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum) exit(stateNum)
zpoolLines = zpoolString.splitlines() zpoolLines = zpoolString.splitlines()
zpoolMeta=zpoolLines[0].split() zpoolMeta = zpoolLines[0].decode().split()
zpoolMetaStr = ','.join(zpoolMeta) zpoolMetaStr = ','.join(zpoolMeta)
zpoolEntry=zpoolLines[1].split() zpoolEntry = zpoolLines[1].decode().split()
zpoolEntryStr = ','.join(zpoolEntry) zpoolEntryStr = ','.join(zpoolEntry)
name = '' name = ''
@ -234,21 +250,98 @@ for idx, fieldName in enumerate(zpoolMeta):
if name == '': if name == '':
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": Missing required field in zpool output: NAME" logging.warning("%s : Missing required field in zpool output: NAME", nagiosStatus[stateNum])
exit(stateNum) exit(stateNum)
if health == '': if health == '':
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": Missing required field in zpool output: HEALTH" logging.warning("%s : Missing required field in zpool output: HEALTH", nagiosStatus[stateNum])
exit(stateNum) exit(stateNum)
if checkCapacity and cap == '': if checkCapacity and cap == '':
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": Cannot monitor capacity without zpool output: CAP. Outputs are" + zpoolMetaStr logging.warning("%s Cannot monitor capacity without zpool output: CAP.\
Outputs are %s", nagiosStatus[stateNum], zpoolMetaStr)
exit(stateNum) exit(stateNum)
if checkFragmentation and frag == '': if checkFragmentation and frag == '':
stateNum = RaiseStateNum(3, stateNum) stateNum = RaiseStateNum(3, stateNum)
print nagiosStatus[stateNum] + ": Cannot monitor fragmentation without zpool output: FRAG. Outputs are " + zpoolMetaStr logging.warning("%s : Cannot monitor fragmentation without zpool output: FRAG.\
Outputs are %s", nagiosStatus[stateNum], zpoolMetaStr)
exit(stateNum) exit(stateNum)
# Get compressratio on zpool
checkForCompression = ['/usr/bin/sudo', '-n', zfsCommand, 'get', 'compression', args.pool]
try:
childProcess = subprocess.Popen(checkForCompression, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolString = childProcess.communicate()[0]
zpoolRetval = childProcess.returncode
if zpoolRetval is 1:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolLines = zpoolString.splitlines()
zpoolMeta = zpoolLines[0].decode().split()
zpoolMetaStr = ','.join(zpoolMeta)
zpoolEntry = zpoolLines[1].decode().split()
zpoolEntryStr = ','.join(zpoolEntry)
compressName = ''
compressValue = ''
compressRatioName = ''
compressRatioValue = ''
for idx, fieldName in enumerate(zpoolMeta):
if fieldName == 'NAME':
compressName = zpoolEntry[idx]
elif fieldName == 'VALUE':
compressValue = zpoolEntry[idx]
if compressName == '':
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Missing required field in zpool output: NAME", nagiosStatus[stateNum])
exit(stateNum)
if compressValue == 'on':
getCompressRatioCommand = ['/usr/bin/sudo', '-n', zfsCommand, 'get', 'compressratio', args.pool]
try:
childProcess = subprocess.Popen(getCompressRatioCommand, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolString = childProcess.communicate()[0]
zpoolRetval = childProcess.returncode
if zpoolRetval is 1:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolLines = zpoolString.splitlines()
zpoolMeta = zpoolLines[0].decode().split()
zpoolMetaStr = ','.join(zpoolMeta)
zpoolEntry = zpoolLines[1].decode().split()
zpoolEntryStr = ','.join(zpoolEntry)
for idx, fieldName in enumerate(zpoolMeta):
if fieldName == 'NAME':
compressRatioName = zpoolEntry[idx]
elif fieldName == 'VALUE':
compressRatioValue = zpoolEntry[idx]
################################################################################### ###################################################################################
## ##
# OK, finally in the actual status checking of the zpool # OK, finally in the actual status checking of the zpool
@ -261,7 +354,7 @@ if frag!='':
if checkFragmentation: if checkFragmentation:
fragPerfStr = fragPerfStr+str(fragWarnThreshold)+";"+str(fragCritThreshold)+";" fragPerfStr = fragPerfStr+str(fragWarnThreshold)+";"+str(fragCritThreshold)+";"
else: else:
fragPerfStr+=(";;"); fragPerfStr += (";;")
perfdata += (fragPerfStr) perfdata += (fragPerfStr)
perfdata += " " perfdata += " "
@ -272,10 +365,21 @@ if cap!='':
if checkCapacity: if checkCapacity:
capPerfStr = capPerfStr+str(capWarnThreshold)+";"+str(capCritThreshold)+";" capPerfStr = capPerfStr+str(capWarnThreshold)+";"+str(capCritThreshold)+";"
else: else:
capPerfStr+=(";;"); capPerfStr += (";;")
perfdata += (capPerfStr) perfdata += (capPerfStr)
perfdata += " " perfdata += " "
# Perfdata for dedup & compression factor
if dedup != '':
dedup_no_x = dedup.rstrip('x')
perfdata += "dedup="+str(dedup_no_x)
perfdata += " "
if compressRatioValue != '':
compressRatioNoX = compressRatioValue.rstrip('x')
perfdata += "compress_ratio="+str(compressRatioNoX)
perfdata += " "
# Sizes can be in K, M, G, or T (maybe P, but I'm not doing this yet) # Sizes can be in K, M, G, or T (maybe P, but I'm not doing this yet)
if size != '': if size != '':
sizeGB = ConvertToGB(size) sizeGB = ConvertToGB(size)
@ -328,6 +432,7 @@ if healthNum > 0:
fragMsgFilled = False fragMsgFilled = False
capMsgFilled = False capMsgFilled = False
if checkFragmentation and fragPercent != '': if checkFragmentation and fragPercent != '':
if fragPercent.isdigit() is True:
if int(fragPercent) > int(fragCritThreshold): if int(fragPercent) > int(fragCritThreshold):
fragMsgFilled = True fragMsgFilled = True
stateNum = RaiseStateNum(2, stateNum) stateNum = RaiseStateNum(2, stateNum)
@ -356,6 +461,10 @@ if alloc!='':
msg += ", ALLOC: " + str(alloc) msg += ", ALLOC: " + str(alloc)
if free != '': if free != '':
msg += ", FREE: " + str(free) msg += ", FREE: " + str(free)
if dedup != '':
msg += ", DEDUP: " + str(dedup)
if compressRatioValue != '':
msg += ", COMPRESS: " + str(compressRatioValue)
if frag != '' and not fragMsgFilled: if frag != '' and not fragMsgFilled:
msg += ", FRAG: " + str(frag) msg += ", FRAG: " + str(frag)
if cap != '' and not capMsgFilled: if cap != '' and not capMsgFilled:
@ -363,5 +472,5 @@ if cap!='' and not capMsgFilled:
## ##
# Print our output and return # Print our output and return
print nagiosStatus[stateNum]+": "+msg+" | "+perfdata logging.warning("%s: %s | %s", nagiosStatus[stateNum], msg, perfdata)
exit(stateNum) exit(stateNum)