nagios_check_zfs_linux/check_zfs.py

478 lines
15 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Get zfs pool status."""
########################################################################
##
## Written by Zachary LaCelle
2020-06-14 16:09:01 +02:00
## Written by Boris Tassou
## Copyright 2020
## Licensed under GPL (see below)
##
## Nagios script to monitor ZFS pools/filesystems
## in Linux.
##
## Tested operating systems/ZFS versions:
## * See README.md
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
##
########################################################################
import subprocess
import argparse
import logging
import sys
# from sys import exit
# from array import *
# from types import *
# import array
# import types
# from os import geteuid
##
# Commands to run
# CHANGE THESE IF YOU NEED TO
##
zpoolCommand = '/sbin/zpool'
zfsCommand = '/sbin/zfs'
##
# Variables to print at the end
##
nagiosStatus = ('OK', 'WARNING', 'CRITICAL', 'UNKNOWN')
stateNum = 0
msg = ''
perfdata = ''
##
# Filled from command line arguments
##
checkCapacity = False
capWarnThreshold = 50
capCritThreshold = 80
checkFragmentation = False
fragWarnThreshold = 50
fragCritThreshold = 80
logging.basicConfig(stream=sys.stdout, format='%(message)s', level=logging.WARN)
def CheckArgBounds(valueArr, minVal, maxVal):
"""Check value bounds."""
for value in valueArr:
if value < minVal:
return False
elif value > maxVal:
return False
return True
def ConvertToGB(valueStr):
"""Convert to GB."""
value = valueStr[:-1]
value = value.replace(',', '.')
if valueStr.endswith('G'):
return float(value)
elif valueStr.endswith('T'):
gigs = float(value)*1024
return float(gigs)
elif valueStr.endswith('M'):
gigs = float(value) / 1024.0
return float(gigs)
elif valueStr.endswith('K'):
gigs = float(value) / (1024.0 * 1024.0)
return float(gigs)
def RaiseStateNum(stateNumIn, stateNum):
"""Raise state num."""
if stateNumIn > stateNum:
return stateNumIn
return stateNum
###################################################################################
##
# Parse command line args
##
parser = argparse.ArgumentParser(
prog='check_zfs',
description='Check the ZFS pool specified by an argument.',
epilog='Note that monitor flags (e.g. capacity) require 2 arguments:\
warning threshold, and critical threshold')
parser.add_argument('--capacity', help="monitor utilization of zpool (%%, int [0-100])",
type=int, nargs=2)
parser.add_argument('--fragmentation', help="monitor fragmentation of zpool (%%, int [0-100])",
type=int, nargs=2)
parser.add_argument('pool', help="name of the zpool to check", type=str)
args = parser.parse_args()
retVal = True
if args.capacity is not None:
checkCapacity = True
capWarnThreshold = args.capacity[0]
capCritThreshold = args.capacity[1]
capArr = ['i', [capWarnThreshold, capCritThreshold]]
retVal = CheckArgBounds(capArr, 0, 100)
if retVal is False:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Capacity thresholds must be between 0 and 100 (as a percent).",
nagiosStatus[stateNum])
parser.print_help()
exit(stateNum)
retVal = True
if args.fragmentation is not None:
checkFragmentation = True
fragWarnThreshold = args.fragmentation[0]
fragCritThreshold = args.fragmentation[1]
fragArr = ['i', [fragWarnThreshold, fragCritThreshold]]
retVal = CheckArgBounds(fragArr, 0, 100)
if retVal is False:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Fragmentation thresholds must be between 0 and 100 (as a percent).",
nagiosStatus[stateNum])
parser.print_help()
exit(stateNum)
###################################################################################
##
# Get generic info about the ZFS environment
zfsEntries = []
fullCommand = ['/usr/bin/sudo', '-n', zfsCommand, 'list']
try:
childProcess = subprocess.Popen(fullCommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
except OSError:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to your\
visudo: nagios ALL=NOPASSWD: /sbin/zfs", nagiosStatus[stateNum])
exit(stateNum)
zfsString = childProcess.communicate()[0]
zfsRetval = childProcess.returncode
if zfsRetval is 1:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zfs", nagiosStatus[stateNum])
exit(stateNum)
zfsLines = zfsString.splitlines()
for idx, line in enumerate(zfsLines):
if idx != 0:
zfsEntry = line.split()
zfsEntries.append(zfsEntry)
# Make sure the pool we specified is valid
validPool = False
for entry in zfsEntries:
if entry[0].decode() == args.pool:
validPool = True
if not validPool:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Pool %s is invalid. Please select a valid pool.",
nagiosStatus[stateNum], args.pool)
exit(stateNum)
###################################################################################
##
# Get info on zpool
fullCommand = ['/usr/bin/sudo', '-n', zpoolCommand, 'list', args.pool]
try:
childProcess = subprocess.Popen(fullCommand, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolString = childProcess.communicate()[0]
zpoolRetval = childProcess.returncode
if zpoolRetval is 1:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolLines = zpoolString.splitlines()
zpoolMeta = zpoolLines[0].decode().split()
zpoolMetaStr = ','.join(zpoolMeta)
zpoolEntry = zpoolLines[1].decode().split()
zpoolEntryStr = ','.join(zpoolEntry)
name = ''
size = ''
alloc = ''
free = ''
expandsz = ''
frag = ''
cap = ''
dedup = ''
health = ''
altroot = ''
for idx, fieldName in enumerate(zpoolMeta):
if fieldName == 'NAME':
name = zpoolEntry[idx]
elif fieldName == 'SIZE':
size = zpoolEntry[idx]
elif fieldName == 'ALLOC':
alloc = zpoolEntry[idx]
elif fieldName == 'FREE':
free = zpoolEntry[idx]
elif fieldName == 'EXPANDSZ':
expandsz = zpoolEntry[idx]
elif fieldName == 'FRAG':
frag = zpoolEntry[idx]
elif fieldName == 'CAP':
cap = zpoolEntry[idx]
elif fieldName == 'DEDUP':
dedup = zpoolEntry[idx]
elif fieldName == 'HEALTH':
health = zpoolEntry[idx]
elif fieldName == 'ALTROOT':
altroot = zpoolEntry[idx]
if name == '':
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Missing required field in zpool output: NAME", nagiosStatus[stateNum])
exit(stateNum)
if health == '':
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Missing required field in zpool output: HEALTH", nagiosStatus[stateNum])
exit(stateNum)
if checkCapacity and cap == '':
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s Cannot monitor capacity without zpool output: CAP.\
Outputs are %s", nagiosStatus[stateNum], zpoolMetaStr)
exit(stateNum)
if checkFragmentation and frag == '':
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Cannot monitor fragmentation without zpool output: FRAG.\
Outputs are %s", nagiosStatus[stateNum], zpoolMetaStr)
exit(stateNum)
# Get compressratio on zpool
checkForCompression = ['/usr/bin/sudo', '-n', zfsCommand, 'get', 'compression', args.pool]
try:
childProcess = subprocess.Popen(checkForCompression, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolString = childProcess.communicate()[0]
zpoolRetval = childProcess.returncode
if zpoolRetval is 1:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolLines = zpoolString.splitlines()
zpoolMeta = zpoolLines[0].decode().split()
zpoolMetaStr = ','.join(zpoolMeta)
zpoolEntry = zpoolLines[1].decode().split()
zpoolEntryStr = ','.join(zpoolEntry)
compressName = ''
compressValue = ''
compressRatioName = ''
compressRatioValue = ''
for idx, fieldName in enumerate(zpoolMeta):
if fieldName == 'NAME':
compressName = zpoolEntry[idx]
elif fieldName == 'VALUE':
compressValue = zpoolEntry[idx]
if compressName == '':
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : Missing required field in zpool output: NAME", nagiosStatus[stateNum])
exit(stateNum)
if compressValue == 'on':
getCompressRatioCommand = ['/usr/bin/sudo', '-n', zfsCommand, 'get', 'compressratio', args.pool]
try:
childProcess = subprocess.Popen(getCompressRatioCommand, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolString = childProcess.communicate()[0]
zpoolRetval = childProcess.returncode
if zpoolRetval is 1:
stateNum = RaiseStateNum(3, stateNum)
logging.warning("%s : process must be run as root. Possible solution: add the following to\
your visudo: nagios ALL=NOPASSWD: /sbin/zpool", nagiosStatus[stateNum])
exit(stateNum)
zpoolLines = zpoolString.splitlines()
zpoolMeta = zpoolLines[0].decode().split()
zpoolMetaStr = ','.join(zpoolMeta)
zpoolEntry = zpoolLines[1].decode().split()
zpoolEntryStr = ','.join(zpoolEntry)
for idx, fieldName in enumerate(zpoolMeta):
if fieldName == 'NAME':
compressRatioName = zpoolEntry[idx]
elif fieldName == 'VALUE':
compressRatioValue = zpoolEntry[idx]
###################################################################################
##
# OK, finally in the actual status checking of the zpool
# Let's build up our perfdata, regardless of what we're checking
fragPercent = ''
if frag != '':
fragPercent = frag.replace("%", "")
fragPerfStr = "frag="+str(fragPercent)+"%;"
if checkFragmentation:
fragPerfStr = fragPerfStr+str(fragWarnThreshold)+";"+str(fragCritThreshold)+";"
else:
fragPerfStr += (";;")
perfdata += (fragPerfStr)
perfdata += " "
capPercent = ''
if cap != '':
capPercent = cap.replace("%", "")
capPerfStr = "cap="+str(capPercent)+"%;"
if checkCapacity:
capPerfStr = capPerfStr+str(capWarnThreshold)+";"+str(capCritThreshold)+";"
else:
capPerfStr += (";;")
perfdata += (capPerfStr)
perfdata += " "
# Perfdata for dedup & compression factor
if dedup != '':
dedup_no_x = dedup.rstrip('x')
perfdata += "dedup="+str(dedup_no_x)
perfdata += " "
if compressRatioValue != '':
compressRatioNoX = compressRatioValue.rstrip('x')
perfdata += "compress_ratio="+str(compressRatioNoX)
perfdata += " "
# Sizes can be in K, M, G, or T (maybe P, but I'm not doing this yet)
if size != '':
sizeGB = ConvertToGB(size)
perfdata += "size="+str(sizeGB)+"GB;;;"
perfdata += " "
if alloc != '':
allocGB = ConvertToGB(alloc)
perfdata += "alloc="+str(allocGB)+"GB;;;"
perfdata += " "
if free != '':
freeGB = ConvertToGB(free)
perfdata += "free="+str(freeGB)+"GB;;;"
perfdata += " "
##
# Do mandatory checks
healthNum = -1
if health == 'ONLINE':
healthNum = 0
elif health == 'OFFLINE':
stateNum = RaiseStateNum(1, stateNum)
healthNum = 1
elif health == 'REMOVED':
stateNum = RaiseStateNum(1, stateNum)
healthNum = 2
elif health == 'UNAVAIL':
stateNum = RaiseStateNum(1, stateNum)
healthNum = 3
elif health == 'DEGRADED':
stateNum = RaiseStateNum(2, stateNum)
healthNum = 4
elif health == 'FAULTED':
stateNum = RaiseStateNum(2, stateNum)
healthNum = 5
perfdata += "health="+str(healthNum)+";1;3;"
perfdata += " "
##
# Initial part of msg
msg = "POOL: " + str(name)
healthMsgFilled = False
if healthNum > 0:
msg += ", STATUS: "+str(health)
healthMsgFilled = True
##
# Do optional checks
fragMsgFilled = False
capMsgFilled = False
if checkFragmentation and fragPercent != '':
if fragPercent.isdigit() is True:
if int(fragPercent) > int(fragCritThreshold):
fragMsgFilled = True
stateNum = RaiseStateNum(2, stateNum)
msg += ", FRAG CRIT: " + str(frag)
elif int(fragPercent) > int(fragWarnThreshold):
fragMsgFilled = True
stateNum = RaiseStateNum(1, stateNum)
msg += ", FRAG WARN: " + str(frag)
if checkCapacity and capPercent != '':
if int(capPercent) > int(capCritThreshold):
capMsgFilled = True
stateNum = RaiseStateNum(2, stateNum)
msg += ", CAP CRIT: " + str(cap)
elif int(capPercent) > int(capWarnThreshold):
capMsgFilled = True
stateNum = RaiseStateNum(1, stateNum)
msg += ", CAP WARN: "+str(cap)
##
# Build up rest of message
if not healthMsgFilled:
msg += ", STATUS: " + str(health)
if size != '':
msg += ", SIZE: " + str(size)
if alloc != '':
msg += ", ALLOC: " + str(alloc)
if free != '':
msg += ", FREE: " + str(free)
if dedup != '':
msg += ", DEDUP: " + str(dedup)
if compressRatioValue != '':
msg += ", COMPRESS: " + str(compressRatioValue)
if frag != '' and not fragMsgFilled:
msg += ", FRAG: " + str(frag)
if cap != '' and not capMsgFilled:
msg += ", CAP: " + str(cap)
##
# Print our output and return
logging.warning("%s: %s | %s", nagiosStatus[stateNum], msg, perfdata)
exit(stateNum)