munin_ipmi_plugins

changeset 0:f2e7c91f5ea7 tip

initial release - code written by Ludovic Aubry <ludovic.aubry@logilab.fr>
author Arthur Lutz <arthur.lutz@logilab.fr>
date Wed, 10 Jun 2009 17:06:33 +0200
parents
children
files README ipmi_sensor_
diffstat 2 files changed, 307 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/README	Wed Jun 10 17:06:33 2009 +0200
     1.3 @@ -0,0 +1,22 @@
     1.4 +munin_ipmi_plugins
     1.5 +------------------
     1.6 +
     1.7 +This is simply a python rewrite of the ipmi_sensor_ writen in Ruby by Peter Palfrader. 
     1.8 +
     1.9 +
    1.10 +
    1.11 +
    1.12 +Reason
    1.13 +------
    1.14 +
    1.15 +We had a machine that was very long to run the full "ipmitool -I open " so we had to modify the
    1.16 +way the plugin was working. We prefer python, so a quick rewrite was done. 
    1.17 +
    1.18 +Improvements
    1.19 +------------
    1.20 +
    1.21 +The full on ipmitool command is only run on suggests, then a specific get is run everytime 
    1.22 +munin-node is done. 
    1.23 +
    1.24 +The Ruby version can be found here : 
    1.25 +http://svn.noreply.org/cgi-bin/viewcvs.cgi/*checkout*/trunk/munin/ipmi_sensor_?root=WeaselUtils
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/ipmi_sensor_	Wed Jun 10 17:06:33 2009 +0200
     2.3 @@ -0,0 +1,285 @@
     2.4 +#!/usr/bin/python
     2.5 +#
     2.6 +# munin plugin for the sensors data provided by ipmi
     2.7 +# 
     2.8 +# Copyright (c) 2006 Logilab
     2.9 +#
    2.10 +# Inspired by code writtent by Peter Palfrader
    2.11 +#
    2.12 +# This program is free software; you can redistribute it and/or modify it under
    2.13 +# the terms of the GNU General Public License as published by the Free Software
    2.14 +# Foundation; either version 2 of the License, or (at your option) any later
    2.15 +# version.
    2.16 +#
    2.17 +# This program is distributed in the hope that it will be useful, but WITHOUT
    2.18 +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    2.19 +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
    2.20 +#
    2.21 +# You should have received a copy of the GNU General Public License along with
    2.22 +# this program; if not, write to the Free Software Foundation, Inc.,
    2.23 +# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
    2.24 +#
    2.25 +#
    2.26 +# ipmitool probably needs to be run as root, and it may take more than 10 seconds on some hosts.
    2.27 +#
    2.28 +# Add the following to your /etc/munin/plugin-conf.d/munin-node:
    2.29 +# [ipmi_sensor_*]
    2.30 +# user root
    2.31 +# timeout 20
    2.32 +#
    2.33 +# Magic markers
    2.34 +#%# capabilities=autoconf suggest
    2.35 +#%# family=contrib
    2.36 +
    2.37 +from subprocess import Popen, PIPE
    2.38 +from os import stat, access, R_OK, F_OK
    2.39 +from os.path import join
    2.40 +from stat import ST_MTIME
    2.41 +from time import time
    2.42 +import sys
    2.43 +import re
    2.44 +
    2.45 +CACHEDIR = "/var/lib/munin/plugin-state"
    2.46 +CACHEFILE = "plugin-ipmi_sensor.cache"
    2.47 +CACHEAGE = 120
    2.48 +CONFIG = '/etc/munin/plugin-conf.d/ipmi'
    2.49 +
    2.50 +
    2.51 +
    2.52 +def normalize_sensor(name):
    2.53 +    name = name.lower().replace("-","M").replace("+","P")
    2.54 +    name = re.sub("[^a-z0-9A-Z]","_", name)
    2.55 +    return name
    2.56 +
    2.57 +def parse_data( data ):
    2.58 +    """
    2.59 +    Parse the data returned by ipmitool get which should be of the
    2.60 +    following form:
    2.61 +
    2.62 +    Sensor ID              : FAN 1 RPM (0x30)
    2.63 +     Entity ID             : 7.1
    2.64 +     Sensor Type (Analog)  : Fan
    2.65 +     Sensor Reading        : 6150 (+/- 75) RPM
    2.66 +     Status                : ok
    2.67 +     Lower Non-Recoverable : na
    2.68 +     Lower Critical        : 2025.000
    2.69 +     Lower Non-Critical    : na
    2.70 +     Upper Non-Critical    : na
    2.71 +     Upper Critical        : na
    2.72 +     Upper Non-Recoverable : na
    2.73 +     Assertion Events      : 
    2.74 +     Assertions Enabled    : lcr- 
    2.75 +     Deassertions Enabled  : lcr- 
    2.76 +    
    2.77 +    """
    2.78 +    sensors = {}
    2.79 +    cur_sensor = None
    2.80 +    for line in data.splitlines()[1:]:
    2.81 +        if not line.strip():
    2.82 +            cur_sensor = None
    2.83 +            continue
    2.84 +        if line.startswith("Sensor ID"):
    2.85 +            label, data = line.split(":", 1)
    2.86 +            idm = re.match("(.*) \((0x.*)\)", data)
    2.87 +            if not idm:
    2.88 +                continue
    2.89 +            id = idm.group(1).strip()
    2.90 +            cur_sensor = { "id" : idm.group(2) }
    2.91 +            sensors[id] = cur_sensor
    2.92 +        if not cur_sensor:
    2.93 +            continue
    2.94 +        label, data = line.split(":", 1)
    2.95 +        cur_sensor[label.strip()] = data.strip()
    2.96 +    return sensors
    2.97 +
    2.98 +def get_sensor_names():
    2.99 +    p = Popen(["ipmitool","-I","open","sensor"], shell=False, stdout=PIPE)
   2.100 +    data = p.stdout.readlines()
   2.101 +
   2.102 +    units = {}
   2.103 +    for k,u in UNITS_TO_SENSORS.items():
   2.104 +        units[u['vlabel'].lower()] = k
   2.105 +    sensors = {}
   2.106 +    for line in data:
   2.107 +        columns = [ s.strip() for s in line.split('|') ]
   2.108 +        key = units.get(columns[2].lower(), None)
   2.109 +        if key:
   2.110 +            lst = sensors.setdefault(key, [])
   2.111 +            lst.append( columns[0] )
   2.112 +    return sensors
   2.113 +
   2.114 +def get_sensors():
   2.115 +    cache_filename = join(CACHEDIR,CACHEFILE)
   2.116 +    try:
   2.117 +        mtime = stat(cache_filename)[ST_MTIME]
   2.118 +    except OSError:
   2.119 +        mtime = 0
   2.120 +    curtime = time()
   2.121 +
   2.122 +    if curtime-mtime>CACHEAGE:
   2.123 +	if not SENSORS:
   2.124 +            p = Popen(["ipmitool","-I","open","sensor"], shell=False, stdout=PIPE)
   2.125 +	else:
   2.126 +            p = Popen(["ipmitool","-I","open","sensor", "get"] + SENSORS, shell=False, stdout=PIPE)
   2.127 +        data = p.stdout.read()
   2.128 +        try:
   2.129 +            f = file(cache_filename,"w")
   2.130 +            f.write(data)
   2.131 +        except OSError:
   2.132 +            pass
   2.133 +    else:
   2.134 +        data = file(cache_filename).read()
   2.135 +    return parse_data(data)
   2.136 +
   2.137 +def query_unit(arg):
   2.138 +    m = re.search( '_u_(.*)$', arg)
   2.139 +    if not m:
   2.140 +        raise RuntimeError("Could not figure which unit you want based on executable name")
   2.141 +    return m.group(1)
   2.142 +
   2.143 +
   2.144 +UNITS_TO_SENSORS = {
   2.145 +    'volts' : { 'title' : "Voltages",
   2.146 +                'args' : '--base 1000',
   2.147 +                'vlabel' : 'Volts',
   2.148 +                'info' : "This graph shows the voltages as reported by IPMI",
   2.149 +                'sensors' : [ 'Voltage 2', ],
   2.150 +                },
   2.151 +    'degrees_c' : { 'title' : "Temperature",
   2.152 +                'args' : '--base 1000 -l 0',
   2.153 +                'vlabel' : 'Degrees C',
   2.154 +                'info' : "This graph shows the temperatures as reported by IPMI",
   2.155 +                'sensors' : [ 'Ambient Temp', ],
   2.156 +                },
   2.157 +    'rpm' : { 'title' : "RPMs",
   2.158 +                'args' : '--base 1000 -l 0',
   2.159 +                'vlabel' : 'RPM',
   2.160 +                'info' : "This graph shows the RPMs as reported by IPMI",
   2.161 +                'sensors' : ['FAN 1 RPM', 'FAN 2 RPM', 'FAN 3 RPM', 'FAN 4 RPM',],
   2.162 +                },
   2.163 +    'amps' : { 'title' : "Amperes",
   2.164 +                'args' : '--base 1000',
   2.165 +                'vlabel' : 'Amperes',
   2.166 +                'info' : "This graph shows the amperes as reported by IPMI",
   2.167 +                'sensors' : ['Current 2'],
   2.168 +                },
   2.169 +    'watts' : { 'title' : "Watts",
   2.170 +                'args' : '--base 1000',
   2.171 +                'vlabel' : 'Watts',
   2.172 +                'info' : "This graph shows the watts as reported by IPMI",
   2.173 +                'sensors' : ['System Level',],
   2.174 +                },
   2.175 +}
   2.176 +
   2.177 +
   2.178 +if access(CONFIG, R_OK):
   2.179 +    for line in file(CONFIG):
   2.180 +        if line.strip().startswith('#'):
   2.181 +            continue
   2.182 +        data = line.split('=',1)
   2.183 +        if len(data)!=2:
   2.184 +            continue
   2.185 +        unit,sensors = [ d.strip() for d in data ]
   2.186 +        if unit not in UNITS_TO_SENSORS:
   2.187 +            continue
   2.188 +        sensor_list = [ s.strip() for s in sensors.split(',') if s.strip() ]
   2.189 +        UNITS_TO_SENSORS[unit]['sensors'] = sensor_list
   2.190 +
   2.191 +SENSORS = []
   2.192 +for v in UNITS_TO_SENSORS.values():
   2.193 +    SENSORS += v['sensors']
   2.194 +
   2.195 +
   2.196 +def config_unit(unit):
   2.197 +    info = UNITS_TO_SENSORS[unit]
   2.198 +    data = get_sensors()
   2.199 +    print "graph_title IPMI Sensors:", info['title']
   2.200 +    print "graph_args", info['args']
   2.201 +    print "graph_vlabel", info['vlabel']
   2.202 +    print "graph_category sensors"
   2.203 +    print "graph_info", info['info']
   2.204 +    for lbl in info['sensors']:
   2.205 +        values = data[lbl]
   2.206 +        nname = normalize_sensor(lbl)
   2.207 +        
   2.208 +        print "%s.label %s" % (nname, lbl)
   2.209 +        assertions = values['Assertions Enabled'].split()
   2.210 +        warn_l = warn_u = crit_l = crit_u = ""
   2.211 +        if 'lcr-' in assertions:
   2.212 +            crit_l = values['Lower Critical'].replace("na","")
   2.213 +        if 'lnc-' in assertions:
   2.214 +            warn_l = values['Lower Non-Critical'].replace("na","")
   2.215 +        if 'ucr+' in assertions:
   2.216 +            crit_u = values['Upper Critical'].replace("na","")
   2.217 +        if 'unc+' in assertions:
   2.218 +            warn_u = values['Upper Non-Critical'].replace("na","")
   2.219 +        warn = "%s:%s" % (warn_l,warn_u)
   2.220 +        crit = "%s:%s" % (crit_l,crit_u)
   2.221 +        if warn!=":":
   2.222 +            print "%s.warning %s" % (nname, warn)
   2.223 +        if crit!=":":
   2.224 +            print "%s.critical %s" % (nname, crit)
   2.225 +        
   2.226 +        
   2.227 +
   2.228 +def config():
   2.229 +    unit = query_unit(sys.argv[0])
   2.230 +    config_unit(unit)
   2.231 +
   2.232 +def report_unit(unit):
   2.233 +    info = UNITS_TO_SENSORS[unit]
   2.234 +    data = get_sensors()
   2.235 +    for lbl in info['sensors']:
   2.236 +        nname = normalize_sensor(lbl)
   2.237 +        value = data[lbl]["Sensor Reading"].split()[0]
   2.238 +        print "%s.value %s" % (nname, value)
   2.239 +    
   2.240 +
   2.241 +def report():
   2.242 +    unit = query_unit(sys.argv[0])
   2.243 +    report_unit(unit)
   2.244 +
   2.245 +def autoconf():
   2.246 +    data = get_sensors()
   2.247 +    if data:
   2.248 +        print "yes"
   2.249 +    else:
   2.250 +        print "no (no ipmitool output)"
   2.251 +
   2.252 +def suggest():
   2.253 +    names = get_sensor_names()
   2.254 +    if not os.access(CONFIG, F_OK):
   2.255 +        f = file(CONFIG, "w")
   2.256 +        for key, sensors in names.items():
   2.257 +            f.write("%s = %s\n" % (key, ",".join(sensors)))
   2.258 +    for key in names.keys():
   2.259 +        print "u_%s" % key
   2.260 +
   2.261 +
   2.262 +def debug():
   2.263 +    print SENSORS
   2.264 +    data = get_sensors()
   2.265 +    for key, value in data.items():
   2.266 +        print "%s : %s (%s - %s) [%s - %s] %s" % (key, value['Sensor Reading'],
   2.267 +                                                  value['Lower Non-Critical'], value['Upper Non-Critical'],
   2.268 +                                                  value['Lower Critical'], value['Upper Critical'],
   2.269 +                                                  value['Assertions Enabled'],)
   2.270 +
   2.271 +def main():
   2.272 +    if len(sys.argv)>1:
   2.273 +        command = sys.argv[1]
   2.274 +    else:
   2.275 +        command = ""
   2.276 +    if command=="autoconf":
   2.277 +        autoconf()
   2.278 +    elif command=="suggest":
   2.279 +        suggest()
   2.280 +    elif command=='config':
   2.281 +        config()
   2.282 +    elif command=='debug':
   2.283 +        debug()
   2.284 +    else:
   2.285 +        report()
   2.286 +
   2.287 +if __name__ == "__main__":
   2.288 +    main()