From 2254a769156fd0461e2fc5e33dedf17c16c53057 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Aur=C3=A9lien=20Reynaud?= Date: Tue, 10 Aug 2010 20:37:53 +0200 Subject: [PATCH] New plugin - lpar MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Hello, here is a patch against 4.10 adding a new plugin "lpar". LPAR stands for Logical PARtitions, which is the virtualization solution for IBM high-end power systems running AIX. The standard cpu plugin shows cpu usage as a percentage of each cpu available to the system, but in an LPAR the number of cpus and their apparent power can vary according to the load of every LPAR sharing the same hardware and to the policy set by the admin. This new plugin allows to monitor real (physical) CPU usage of the virtualized system, as well as some other metrics specific to IBM's partitioning solution. Regards, Aurélien Reynaud -- Love is like PI - natural, irrational, endless, and very important. Signed-off-by: Florian Forster --- configure.in | 2 + src/Makefile.am | 9 +++ src/lpar.c | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/types.db | 1 + 4 files changed, 253 insertions(+) create mode 100644 src/lpar.c diff --git a/configure.in b/configure.in index 8ebf1244..fcbd57c0 100644 --- a/configure.in +++ b/configure.in @@ -4495,6 +4495,7 @@ AC_PLUGIN([java], [$with_java], [Embed the Java Virtual Machine]) AC_PLUGIN([libvirt], [$plugin_libvirt], [Virtual machine statistics]) AC_PLUGIN([load], [$plugin_load], [System load]) AC_PLUGIN([logfile], [yes], [File logging plugin]) +AC_PLUGIN([lpar], [$with_perfstat], [AIX logical partitions statistics]) AC_PLUGIN([madwifi], [$have_linux_wireless_h], [Madwifi wireless statistics]) AC_PLUGIN([match_empty_counter], [yes], [The empty counter match]) AC_PLUGIN([match_hashed], [yes], [The hashed match]) @@ -4818,6 +4819,7 @@ Configuration: libvirt . . . . . . . $enable_libvirt load . . . . . . . . $enable_load logfile . . . . . . . $enable_logfile + lpar... . . . . . . . $enable_lpar madwifi . . . . . . . $enable_madwifi match_empty_counter . $enable_match_empty_counter match_hashed . . . . $enable_match_hashed diff --git a/src/Makefile.am b/src/Makefile.am index 4bcc5ab2..74c64305 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -512,6 +512,15 @@ collectd_LDADD += "-dlopen" logfile.la collectd_DEPENDENCIES += logfile.la endif +if BUILD_PLUGIN_LPAR +pkglib_LTLIBRARIES += lpar.la +lpar_la_SOURCES = lpar.c +lpar_la_LDFLAGS = -module -avoid-version +collectd_LDADD += "-dlopen" lpar.la +collectd_DEPENDENCIES += lpar.la +lpar_la_LIBADD = -lperfstat +endif + if BUILD_PLUGIN_MADWIFI pkglib_LTLIBRARIES += madwifi.la madwifi_la_SOURCES = madwifi.c madwifi.h diff --git a/src/lpar.c b/src/lpar.c new file mode 100644 index 00000000..cf9f94b7 --- /dev/null +++ b/src/lpar.c @@ -0,0 +1,241 @@ +/** + * collectd - src/lpar.c + * Copyright (C) 2010 Aurélien Reynaud + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; only version 2 of the License is applicable. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: + * Aurelien Reynaud + **/ + +#include "collectd.h" +#include "common.h" +#include "plugin.h" +#include +#include +#include +#include + +#ifndef XINTFRAC +# define XINTFRAC ((double)(_system_configuration.Xint) / \ + (double)(_system_configuration.Xfrac)) +#endif + +/* Max length of the type instance string */ +#define TYPE_INST_LEN (sizeof("lpar--total") + 2*sizeof(int) + 1) + +static const char *config_keys[] = +{ + "CpuPoolStats" +}; +static int config_keys_num = STATIC_ARRAY_SIZE (config_keys); +static int pool_stats = 0; + +/* As an LPAR can be moved transparently across physical systems + * through Live Partition Mobility (LPM), and the resources we are + * monitoring are tied to the underlying hardware, we need to keep + * track on which physical server we are currently on. This is done + * through the plugin instance which holds the chassis' serial. + */ +static char plugin_inst[SYS_NMLN]; + +static u_longlong_t last_time_base; +static u_longlong_t last_pcpu_user, + last_pcpu_sys, + last_pcpu_idle, + last_pcpu_wait; +static u_longlong_t last_pool_idle_time = 0; +static u_longlong_t last_idle_donated_purr = 0, + last_busy_donated_purr = 0, + last_busy_stolen_purr = 0, + last_idle_stolen_purr = 0; +static int donate_flag = 0; + + +/* Save the current values for the next iteration */ +static void save_last_values (perfstat_partition_total_t *lparstats) +{ + last_time_base = lparstats->timebase_last; + + last_pcpu_user = lparstats->puser; + last_pcpu_sys = lparstats->psys; + last_pcpu_idle = lparstats->pidle; + last_pcpu_wait = lparstats->pwait; + + if (donate_flag) + { + last_idle_donated_purr = lparstats->idle_donated_purr; + last_busy_donated_purr = lparstats->busy_donated_purr; + last_busy_stolen_purr = lparstats->busy_stolen_purr; + last_idle_stolen_purr = lparstats->idle_stolen_purr; + } + + last_pool_idle_time = lparstats->pool_idle_time; +} + +static int lpar_config (const char *key, const char *value) +{ + if (strcasecmp ("CpuPoolStats", key) == 0) + { + if (IS_TRUE (value)) + pool_stats = 1; + else + pool_stats = 0; + } + else + { + return (-1); + } + + return (0); +} /* int lpar_config */ + +static int lpar_init (void) +{ + perfstat_partition_total_t lparstats; + + /* retrieve the initial metrics */ + if (!perfstat_partition_total (NULL, &lparstats, + sizeof (perfstat_partition_total_t), 1)) + { + ERROR ("lpar plugin: perfstat_partition_total failed."); + return (-1); + } + + if (!lparstats.type.b.shared_enabled && lparstats.type.b.donate_enabled) + { + donate_flag = 1; + } + + /* save the initial data */ + save_last_values (&lparstats); + + return (0); +} /* int lpar_init */ + +static void lpar_submit (const char *type_instance, double value) +{ + value_t values[1]; + value_list_t vl = VALUE_LIST_INIT; + + values[0].gauge = (gauge_t)value; + + vl.values = values; + vl.values_len = 1; + sstrncpy (vl.host, hostname_g, sizeof (vl.host)); + sstrncpy (vl.plugin, "lpar", sizeof (vl.plugin)); + sstrncpy (vl.plugin_instance, plugin_inst, sizeof (vl.plugin)); + sstrncpy (vl.type, "lpar_pcpu", sizeof (vl.type)); + sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance)); + + plugin_dispatch_values (&vl); +} + +static int lpar_read (void) +{ + u_longlong_t dlt_pcpu_user, dlt_pcpu_sys, dlt_pcpu_idle, dlt_pcpu_wait; + u_longlong_t delta_time_base; + perfstat_partition_total_t lparstats; + struct utsname name; + + /* retrieve the current physical server's id and build the plugin + instance's name */ + if (uname (&name) != 0) + { + ERROR ("lpar plugin: uname failed."); + return (-1); + } + sstrncpy (plugin_inst, name.machine, sizeof (plugin_inst)); + + /* retrieve the current metrics */ + if (!perfstat_partition_total (NULL, &lparstats, + sizeof (perfstat_partition_total_t), 1)) + { + ERROR ("lpar plugin: perfstat_partition_total failed."); + return (-1); + } + + delta_time_base = lparstats.timebase_last - last_time_base; + if (delta_time_base == 0) + { + /* The system stats have not been updated since last time */ + return (0); + } + + dlt_pcpu_user = lparstats.puser - last_pcpu_user; + dlt_pcpu_sys = lparstats.psys - last_pcpu_sys; + dlt_pcpu_idle = lparstats.pidle - last_pcpu_idle; + dlt_pcpu_wait = lparstats.pwait - last_pcpu_wait; + + lpar_submit ("user", (double)dlt_pcpu_user / delta_time_base); + lpar_submit ("sys", (double)dlt_pcpu_sys / delta_time_base); + lpar_submit ("wait", (double)dlt_pcpu_wait / delta_time_base); + lpar_submit ("idle", (double)dlt_pcpu_idle / delta_time_base); + lpar_submit ("ent", (double)lparstats.entitled_proc_capacity / 100.0); + lpar_submit ("max", (double)lparstats.max_proc_capacity / 100.0); + lpar_submit ("min", (double)lparstats.min_proc_capacity / 100.0); + + if (donate_flag) + { + u_longlong_t dlt_busy_stolen, dlt_idle_stolen; + u_longlong_t dlt_idle_donated, dlt_busy_donated; + + dlt_idle_donated = lparstats.idle_donated_purr - last_idle_donated_purr; + dlt_busy_donated = lparstats.busy_donated_purr - last_busy_donated_purr; + dlt_idle_stolen = lparstats.idle_stolen_purr - last_idle_stolen_purr; + dlt_busy_stolen = lparstats.busy_stolen_purr - last_busy_stolen_purr; + + lpar_submit ("idle_donated", (double)dlt_idle_donated / delta_time_base); + lpar_submit ("busy_donated", (double)dlt_busy_donated / delta_time_base); + lpar_submit ("idle_stolen", (double)dlt_idle_stolen / delta_time_base); + lpar_submit ("busy_stolen", (double)dlt_busy_stolen / delta_time_base); + } + + if (pool_stats) + { + if (!lparstats.type.b.pool_util_authority) + { + WARNING ("lpar plugin: this system does not have pool authority."); + } + else + { + u_longlong_t dlt_pit; + double total, idle; + char type[TYPE_INST_LEN]; + + dlt_pit = lparstats.pool_idle_time - last_pool_idle_time; + total = (double)lparstats.phys_cpus_pool; + idle = (double)dlt_pit / XINTFRAC / (double)delta_time_base; + ssnprintf (type, sizeof(type), "pool-%X-total", lparstats.pool_id); + lpar_submit (type, total); + ssnprintf (type, sizeof(type), "pool-%X-used", lparstats.pool_id); + lpar_submit (type, total - idle); + } + } + + save_last_values (&lparstats); + + return (0); +} /* int lpar_read */ + +void module_register (void) +{ + plugin_register_config ("lpar", lpar_config, + config_keys, config_keys_num); + plugin_register_init ("lpar", lpar_init); + plugin_register_read ("lpar", lpar_read); +} /* void module_register */ + +/* vim: set sw=2 sts=2 ts=8 : */ + diff --git a/src/types.db b/src/types.db index 1b0020f6..962109f4 100644 --- a/src/types.db +++ b/src/types.db @@ -88,6 +88,7 @@ irq value:COUNTER:U:65535 latency value:GAUGE:0:65535 links value:GAUGE:0:U load shortterm:GAUGE:0:100, midterm:GAUGE:0:100, longterm:GAUGE:0:100 +lpar_pcpu value:GAUGE:0:U memcached_command value:COUNTER:0:U memcached_connections value:GAUGE:0:U memcached_items value:GAUGE:0:U -- 2.11.0