#!/usr/bin/env python import os import pickle import re import sys import time import math STAT_VALIDITY = 300 # 5min limit on reporting stats ''' You can essentially have munin graph every statistic that is obtainable from http://localhost:3456/statistics . Some, but not all, of them, are already preconfigured in a hopefully sensible way below. To add a new statistic, simply create a new "block" with the appropriate information. Depending on the type of statistic, different munin graph configurations make sense. Some further pointers: http://munin-monitoring.org/wiki/HowToWritePlugins http://munin-monitoring.org/wiki/fieldname.type http://munin-monitoring.org/wiki/fieldname.draw http://munin-monitoring.org/wiki/fieldname.warning You will need to experiment with the configuration until it suits your needs; the below configuration may, or may not, be suitable for your setup as it is. An example: to have munin graph the (aggregated) disk usage and disk free statistics of the entire grid: # ensure that /path/to/tahoe_stats is executable (chmod 755) cd /etc/munin/plugins/ ln -s /path/to/tahoe_stats tahoe_storage_used ln -s /path/to/tahoe_stats tahoe_storage_avail # make sure that some file (it doesn't matter which) # contains the env.statsfile setting for each and every # plugin that you "enabled" above. cd /etc/munin/plugin-conf.d/ cat tahoe_stats.conf [tahoe_storage_used] env.statsfile /tmp/tahoe-stats.pickle [tahoe_storage_avail] env.statsfile /tmp/tahoe-stats.pickle The plugin filename (in /etc/munin/plugins/), the configuration section name in the configuration file, and the key in the PLUGINS variable below must match. In this example, maybe the tahoe_storage_free is the most interesting one, as it shows a few "advanced" features. First, it uses the AREASTACK draw method (instead of LINE1), which makes the graph contain areas stacked on top of each other, giving information about both individual nodes, and the total grid capacity. Furthermore, it contains warning and critical settings (plus a description), which will give visual alerts if any node goes below 50GB (warning) or 15GB (critical). See the 'tahoe_storage_free' plugin entry below for reference on how this is defined. ''' PLUGINS = { # LOAD AVERAGE 'tahoe_runtime_load_avg': { 'statid': 'load_monitor.avg_load', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Average', 'graph_vlabel load', 'graph_category tahoe', 'graph_info This graph shows average reactor delay', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_runtime_load_peak': { 'statid': 'load_monitor.max_load', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Peak', 'graph_vlabel load', 'graph_category tahoe', 'graph_info This graph shows peak reactor delay', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, # STORAGE ALLOCATION (BYTES) 'tahoe_storage_consumed': { 'statid': 'storage_server.consumed', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Consumed', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows space consumed', 'graph_args --base 1024', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_avail': { 'statid': 'storage_server.disk_avail', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Disk Available', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows disk available', 'graph_args --base 1024', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', # '%(name)s.draw LINE1', '%(name)s.draw AREASTACK', '%(name)s.warning 53687091200:', '%(name)s.critical 16106127360:', '%(name)s.info Warn < 50G, Critical < 15G', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_used': { 'statid': 'storage_server.disk_used', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Disk Used', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows disk used', 'graph_args --base 1024', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', # '%(name)s.draw LINE1', '%(name)s.draw AREASTACK', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_allocated': { 'statid': 'storage_server.allocated', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Allocated', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows space allocated', 'graph_args --base 1024', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_bytes_added': { 'statid': 'storage_server.bytes_added', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Added', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows cummulative bytes added', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', # '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_bytes_freed': { 'statid': 'storage_server.bytes_freed', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Freed', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows cummulative bytes freed', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_buckets': { 'statid': 'storage_server.total_bucket_count', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Total Bucket Count', 'graph_vlabel buckets', 'graph_category tahoe_storage_server', 'graph_info This graph shows the total bucket count', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_allocate': { 'statid': 'storage_server.allocate', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Allocate_Bucket Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many allocate_buckets operations occured per second. Each immutable file upload causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_get': { 'statid': 'storage_server.get', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server get_bucket Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many get_bucket operations occured per second. Each immutable file download/check causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_writev': { 'statid': 'storage_server.writev', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server writev Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many writev operations occured per second. Each mutable file / dirnode write causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_readv': { 'statid': 'storage_server.readv', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server readv Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many readv operations occured per second. Each dirnode read causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, # HELPER 'tahoe_helper_incoming_files': { 'statid': 'chk_upload_helper.incoming_count', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Count', 'graph_vlabel n files', 'graph_category tahoe_upload_helper', 'graph_info This graph shows number of incoming files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_incoming_filesize': { 'statid': 'chk_upload_helper.incoming_size', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Size', 'graph_vlabel bytes', 'graph_category tahoe_upload_helper', 'graph_info This graph shows total size of incoming files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE2', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_incoming_files_old': { 'statid': 'chk_upload_helper.incoming_size_old', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming Old Files', 'graph_vlabel bytes', 'graph_category tahoe_upload_helper', 'graph_info This graph shows total size of old incoming files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoding_files': { 'statid': 'chk_upload_helper.encoding_count', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Count', 'graph_vlabel n files', 'graph_category tahoe_upload_helper', 'graph_info This graph shows number of encoding files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoding_filesize': { 'statid': 'chk_upload_helper.encoding_size', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Size', 'graph_vlabel bytes', 'graph_category tahoe_upload_helper', 'graph_info This graph shows total size of encoding files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw AREA', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoding_files_old': { 'statid': 'chk_upload_helper.encoding_size_old', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding Old Files', 'graph_vlabel bytes', 'graph_category tahoe_upload_helper', 'graph_info This graph shows total size of old encoding files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_active_uploads': { 'statid': 'chk_upload_helper.active_uploads', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Active Files', 'graph_vlabel n files', 'graph_category tahoe_upload_helper', 'graph_info This graph shows number of files actively being processed by the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw AREASTACK', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_upload_requests': { 'statid': 'chk_upload_helper.upload_requests', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Upload Requests', 'graph_vlabel requests', 'graph_category tahoe_upload_helper', 'graph_info This graph shows the number of upload requests arriving at the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_upload_already_present': { 'statid': 'chk_upload_helper.upload_already_present', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Already Present', 'graph_vlabel requests', 'graph_category tahoe_upload_helper', 'graph_info This graph shows the number of uploads whose files are already present in the grid', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_upload_need_upload': { 'statid': 'chk_upload_helper.upload_need_upload', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Needing Upload', 'graph_vlabel requests', 'graph_category tahoe_upload_helper', 'graph_info This graph shows the number of uploads whose files are not already present in the grid', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoded_bytes': { 'statid': 'chk_upload_helper.encoded_bytes', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoded Bytes', 'graph_vlabel bytes', 'graph_category tahoe_upload_helper', 'graph_info This graph shows the number of bytes encoded by the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_fetched_bytes': { 'statid': 'chk_upload_helper.fetched_bytes', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Fetched Bytes', 'graph_vlabel bytes', 'graph_category tahoe_upload_helper', 'graph_info This graph shows the number of bytes fetched by the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, # WEBAPI 'tahoe_uploader_bytes_uploaded': { 'statid': 'uploader.bytes_uploaded', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', 'graph_vlabel bytes', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of bytes uploaded', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', # '%(name)s.min 0', # '%(name)s.draw LINE1', '%(name)s.draw AREASTACK', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_uploader_files_uploaded': { 'statid': 'uploader.files_uploaded', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Uploader Files Uploaded', 'graph_vlabel files', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of files uploaded', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', # '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_downloader_bytes_downloaded': { 'statid': 'downloader.bytes_downloaded', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Downloader Bytes Downloaded', 'graph_vlabel bytes', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of bytes downloaded', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', # '%(name)s.min 0', # '%(name)s.draw LINE1', '%(name)s.draw AREASTACK', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_downloader_files_downloaded': { 'statid': 'uploader.files_downloaded', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Downloader Files Downloaded', 'graph_vlabel files', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of files downloaded', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', # '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_mutable_files_published': { 'statid': 'mutable.files_published', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Published', 'graph_vlabel files', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of mutable files published', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_mutable_files_retrieved': { 'statid': 'mutable.files_retrieved', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Retrieved', 'graph_vlabel files', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of files retrieved', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, } # This will slightly "obfuscate" node names that contain # an email address (as is the convention on VolunteerGrid2) def smash_name(name): name = re.sub('@([^\\.])+','@***',name) name = re.sub('[^a-zA-Z0-9@\\*-]', '_', name) return name def open_stats(fname): f = open(fname, 'rb') stats = pickle.load(f) f.close() return stats def main(argv): graph_name = os.path.basename(argv[0]) if graph_name.endswith('.py'): graph_name = graph_name[:-3] plugin_conf = PLUGINS.get(graph_name) for k,v in os.environ.items(): if k.startswith('statsfile'): stats_file = v break else: raise RuntimeError("No 'statsfile' env var found") stats = open_stats(stats_file) now = time.time() def output_nodes(output_section, check_time): pairs = dict([(smash_name("%s-%s" % (k[:4], v['nickname'])), k) for (k, v) in stats.iteritems()]) for name in sorted(pairs.keys()): tubid = pairs.get(name) nodestats = stats.get(tubid) if check_time and math.fabs(now - nodestats.get('timestamp', 0)) > STAT_VALIDITY: continue category = plugin_conf['category'] statid = plugin_conf['statid'] value = nodestats['stats'][category].get(statid) if value is not None: args = { 'name': name, 'value': value } print plugin_conf[output_section] % args if len(argv) > 1: if sys.argv[1] == 'config': print plugin_conf['configheader'] output_nodes('graph_config', False) sys.exit(0) output_nodes('graph_render', True) if __name__ == '__main__': main(sys.argv)