X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=src%2Flpar.c;h=4d534476f62c76754433b2c73d313967595e5843;hb=584b130e51fc0e8214c8b4499b404e8728356fd9;hp=2267e03cadfbc5f56f650a45b58920c8ae9d1081;hpb=3e375cba6873e0e5d47de4b16058cbd5ceb96184;p=collectd.git diff --git a/src/lpar.c b/src/lpar.c index 2267e03c..4d534476 100644 --- a/src/lpar.c +++ b/src/lpar.c @@ -16,25 +16,25 @@ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Authors: - * Aurelien Reynaud + * Aurélien Reynaud **/ #include "collectd.h" #include "common.h" #include "plugin.h" + #include #include #include +/* XINTFRAC was defined in libperfstat.h somewhere between AIX 5.3 and 6.1 */ #ifndef XINTFRAC # include # define XINTFRAC ((double)(_system_configuration.Xint) / \ (double)(_system_configuration.Xfrac)) #endif -#define HTIC2SEC(x) ((double)x * XINTFRAC / 1000000000.0) -/* Max length of the type instance string */ -#define TYPE_INST_LEN (sizeof("pool--total") + 2*sizeof(int) + 1) +#define CLOCKTICKS_TO_TICKS(cticks) ((cticks) / XINTFRAC) static const char *config_keys[] = { @@ -42,13 +42,15 @@ static const char *config_keys[] = "ReportBySerial" }; static int config_keys_num = STATIC_ARRAY_SIZE (config_keys); -static int pool_stats = 0, - report_by_serial = 0; -static u_longlong_t last_time_base; -static u_longlong_t ent_counter; -static int donate_flag = 0; +static _Bool pool_stats = 0; +static _Bool report_by_serial = 0; +#if PERFSTAT_SUPPORTS_DONATION +static _Bool donate_flag = 0; +#endif +static char serial[SYS_NMLN]; +static perfstat_partition_total_t lparstats_old; static int lpar_config (const char *key, const char *value) { @@ -56,11 +58,15 @@ static int lpar_config (const char *key, const char *value) { if (IS_TRUE (value)) pool_stats = 1; + else + pool_stats = 0; } else if (strcasecmp ("ReportBySerial", key) == 0) { if (IS_TRUE (value)) report_by_serial = 1; + else + report_by_serial = 0; } else { @@ -72,32 +78,36 @@ static int lpar_config (const char *key, const char *value) static int lpar_init (void) { - perfstat_partition_total_t lparstats; + int status; - /* Retrieve the initial metrics */ - if (!perfstat_partition_total (NULL, &lparstats, - sizeof (perfstat_partition_total_t), 1)) + /* Retrieve the initial metrics. Returns the number of structures filled. */ + status = perfstat_partition_total (/* name = */ NULL, /* (must be NULL) */ + &lparstats_old, sizeof (perfstat_partition_total_t), + /* number = */ 1 /* (must be 1) */); + if (status != 1) { - ERROR ("lpar plugin: perfstat_partition_total failed."); + char errbuf[1024]; + ERROR ("lpar plugin: perfstat_partition_total failed: %s (%i)", + sstrerror (errno, errbuf, sizeof (errbuf)), + status); return (-1); } - if (!lparstats.type.b.shared_enabled && lparstats.type.b.donate_enabled) +#if PERFSTAT_SUPPORTS_DONATION + if (!lparstats_old.type.b.shared_enabled + && lparstats_old.type.b.donate_enabled) { donate_flag = 1; } +#endif - if (pool_stats && !lparstats.type.b.pool_util_authority) + if (pool_stats && !lparstats_old.type.b.pool_util_authority) { - WARNING ("lpar plugin: this system does not have pool authority. " - "Disabling CPU pool statistics collection."); + WARNING ("lpar plugin: This partition does not have pool authority. " + "Disabling CPU pool statistics collection."); pool_stats = 0; } - /* Initialize the fake counter for entitled capacity */ - last_time_base = lparstats.timebase_last; - ent_counter = 0; - return (0); } /* int lpar_init */ @@ -106,28 +116,13 @@ static void lpar_submit (const char *type_instance, double value) value_t values[1]; value_list_t vl = VALUE_LIST_INIT; - /* Although it appears as a double, value is really a (scaled) counter, - expressed in CPU x seconds. At high collection rates (< 1 min), its - integer part is very small and the resulting graphs get blocky. We regain - some precision by applying a x100 factor before casting it to a counter, - turning the final value into CPU units instead of CPUs. */ - values[0].counter = (counter_t)(value * 100.0 + 0.5); + values[0].gauge = (gauge_t)value; vl.values = values; vl.values_len = 1; - - /* An LPAR has the same serial number as the physical system it is currently - running on. It is a convenient way of tracking LPARs as they are moved - from chassis to chassis through Live Partition Mobility (LPM). */ if (report_by_serial) { - struct utsname name; - if (uname (&name) != 0) - { - ERROR ("lpar plugin: uname failed."); - return; - } - sstrncpy (vl.host, name.machine, sizeof (vl.host)); + sstrncpy (vl.host, serial, sizeof (vl.host)); sstrncpy (vl.plugin_instance, hostname_g, sizeof (vl.plugin)); } else @@ -135,58 +130,134 @@ static void lpar_submit (const char *type_instance, double value) sstrncpy (vl.host, hostname_g, sizeof (vl.host)); } sstrncpy (vl.plugin, "lpar", sizeof (vl.plugin)); - sstrncpy (vl.type, "cpu", sizeof (vl.type)); + sstrncpy (vl.type, "vcpu", sizeof (vl.type)); sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance)); plugin_dispatch_values (&vl); -} +} /* void lpar_submit */ static int lpar_read (void) { - u_longlong_t delta_time_base; perfstat_partition_total_t lparstats; + int status; + struct utsname name; + u_longlong_t ticks; + u_longlong_t user_ticks, syst_ticks, wait_ticks, idle_ticks; + u_longlong_t consumed_ticks; + double entitled_proc_capacity; + + /* An LPAR has the same serial number as the physical system it is currently + running on. It is a convenient way of tracking LPARs as they are moved + from chassis to chassis through Live Partition Mobility (LPM). */ + if (uname (&name) != 0) + { + ERROR ("lpar plugin: uname failed."); + return (-1); + } + sstrncpy (serial, name.machine, sizeof (serial)); - /* Retrieve the current metrics */ - if (!perfstat_partition_total (NULL, &lparstats, - sizeof (perfstat_partition_total_t), 1)) + /* Retrieve the current metrics. Returns the number of structures filled. */ + status = perfstat_partition_total (/* name = */ NULL, /* (must be NULL) */ + &lparstats, sizeof (perfstat_partition_total_t), + /* number = */ 1 /* (must be 1) */); + if (status != 1) { - ERROR ("lpar plugin: perfstat_partition_total failed."); + char errbuf[1024]; + ERROR ("lpar plugin: perfstat_partition_total failed: %s (%i)", + sstrerror (errno, errbuf, sizeof (errbuf)), + status); return (-1); } - delta_time_base = lparstats.timebase_last - last_time_base; - last_time_base = lparstats.timebase_last; + /* Number of ticks since we last run. */ + ticks = lparstats.timebase_last - lparstats_old.timebase_last; + if (ticks == 0) + { + /* The stats have not been updated. Return now to avoid + * dividing by zero */ + return (0); + } + + /* + * On a shared partition, we're "entitled" to a certain amount of + * processing power, for example 250/100 of a physical CPU. Processing + * capacity not used by the partition may be assigned to a different + * partition by the hypervisor, so "idle" is hopefully a very small + * number. + * + * A dedicated partition may donate its CPUs to another partition and + * may steal ticks from somewhere else (another partition or maybe the + * shared pool, I don't know --octo). + */ + + /* entitled_proc_capacity is in 1/100th of a CPU */ + entitled_proc_capacity = 0.01 * ((double) lparstats.entitled_proc_capacity); + lpar_submit ("entitled", entitled_proc_capacity); + + /* The number of ticks actually spent in the various states */ + user_ticks = lparstats.puser - lparstats_old.puser; + syst_ticks = lparstats.psys - lparstats_old.psys; + wait_ticks = lparstats.pwait - lparstats_old.pwait; + idle_ticks = lparstats.pidle - lparstats_old.pidle; + consumed_ticks = user_ticks + syst_ticks + wait_ticks + idle_ticks; - lpar_submit ("user", HTIC2SEC(lparstats.puser)); - lpar_submit ("sys", HTIC2SEC(lparstats.psys)); - lpar_submit ("wait", HTIC2SEC(lparstats.pwait)); - lpar_submit ("idle", HTIC2SEC(lparstats.pidle)); - /* Entitled capacity is reported as an absolute value instead of a counter, - so we fake one. It's also in CPU units, hence the division by 100 before - submission. */ - ent_counter += lparstats.entitled_proc_capacity * delta_time_base; - lpar_submit ("ent", HTIC2SEC(ent_counter) / 100.0); + lpar_submit ("user", (double) user_ticks / (double) ticks); + lpar_submit ("system", (double) syst_ticks / (double) ticks); + lpar_submit ("wait", (double) wait_ticks / (double) ticks); + lpar_submit ("idle", (double) idle_ticks / (double) ticks); +#if PERFSTAT_SUPPORTS_DONATION if (donate_flag) { - lpar_submit ("idle_donated", HTIC2SEC(lparstats.idle_donated_purr)); - lpar_submit ("busy_donated", HTIC2SEC(lparstats.busy_donated_purr)); - lpar_submit ("idle_stolen", HTIC2SEC(lparstats.idle_stolen_purr)); - lpar_submit ("busy_stolen", HTIC2SEC(lparstats.busy_stolen_purr)); + /* donated => ticks given to another partition + * stolen => ticks received from another partition */ + u_longlong_t idle_donated_ticks, busy_donated_ticks; + u_longlong_t idle_stolen_ticks, busy_stolen_ticks; + + /* FYI: PURR == Processor Utilization of Resources Register + * SPURR == Scaled PURR */ + idle_donated_ticks = lparstats.idle_donated_purr - lparstats_old.idle_donated_purr; + busy_donated_ticks = lparstats.busy_donated_purr - lparstats_old.busy_donated_purr; + idle_stolen_ticks = lparstats.idle_stolen_purr - lparstats_old.idle_stolen_purr; + busy_stolen_ticks = lparstats.busy_stolen_purr - lparstats_old.busy_stolen_purr; + + lpar_submit ("idle_donated", (double) idle_donated_ticks / (double) ticks); + lpar_submit ("busy_donated", (double) busy_donated_ticks / (double) ticks); + lpar_submit ("idle_stolen", (double) idle_stolen_ticks / (double) ticks); + lpar_submit ("busy_stolen", (double) busy_stolen_ticks / (double) ticks); + + /* Donated ticks will be accounted for as stolen ticks in other LPARs */ + consumed_ticks += idle_stolen_ticks + busy_stolen_ticks; } +#endif + + lpar_submit ("consumed", (double) consumed_ticks / (double) ticks); if (pool_stats) { - char typinst[TYPE_INST_LEN]; + char typinst[DATA_MAX_NAME_LEN]; + u_longlong_t pool_idle_cticks; + double pool_idle_cpus; + double pool_busy_cpus; + + /* We're calculating "busy" from "idle" and the total number of + * CPUs, because the "busy" member didn't exist in early versions + * of libperfstat. It was added somewhere between AIX 5.3 ML5 and ML9. */ + pool_idle_cticks = lparstats.pool_idle_time - lparstats_old.pool_idle_time; + pool_idle_cpus = CLOCKTICKS_TO_TICKS ((double) pool_idle_cticks) / (double) ticks; + pool_busy_cpus = ((double) lparstats.phys_cpus_pool) - pool_idle_cpus; + if (pool_busy_cpus < 0.0) + pool_busy_cpus = 0.0; - /* Pool stats are in CPU x ns */ - ssnprintf (typinst, sizeof(typinst), "pool-%X-busy", lparstats.pool_id); - lpar_submit (typinst, (double)lparstats.pool_busy_time / 1000000000.0); + ssnprintf (typinst, sizeof (typinst), "pool-%X-busy", lparstats.pool_id); + lpar_submit (typinst, pool_busy_cpus); - ssnprintf (typinst, sizeof(typinst), "pool-%X-total", lparstats.pool_id); - lpar_submit (typinst, (double)lparstats.pool_max_time / 1000000000.0); + ssnprintf (typinst, sizeof (typinst), "pool-%X-idle", lparstats.pool_id); + lpar_submit (typinst, pool_idle_cpus); } + memcpy (&lparstats_old, &lparstats, sizeof (lparstats_old)); + return (0); } /* int lpar_read */ @@ -198,5 +269,5 @@ void module_register (void) plugin_register_read ("lpar", lpar_read); } /* void module_register */ -/* vim: set sw=2 sts=2 ts=8 : */ +/* vim: set sw=8 noet : */