* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors:
- * Aurelien Reynaud <collectd at wattapower.net>
+ * Aurélien Reynaud <collectd at wattapower.net>
**/
#include "collectd.h"
#include "common.h"
#include "plugin.h"
+
#include <sys/protosw.h>
#include <libperfstat.h>
#include <sys/utsname.h>
+/* XINTFRAC was defined in libperfstat.h somewhere between AIX 5.3 and 6.1 */
#ifndef XINTFRAC
# include <sys/systemcfg.h>
# define XINTFRAC ((double)(_system_configuration.Xint) / \
(double)(_system_configuration.Xfrac))
#endif
-#define HTIC2SEC(x) ((double)x * XINTFRAC / 1000000000.0)
-/* Max length of the type instance string */
-#define TYPE_INST_LEN (sizeof("pool--total") + 2*sizeof(int) + 1)
+#define CLOCKTICKS_TO_TICKS(cticks) ((cticks) / XINTFRAC)
static const char *config_keys[] =
{
"ReportBySerial"
};
static int config_keys_num = STATIC_ARRAY_SIZE (config_keys);
-static int pool_stats = 0,
- report_by_serial = 0;
-static u_longlong_t last_time_base;
-static u_longlong_t ent_counter;
-static int donate_flag = 0;
+static _Bool pool_stats = 0;
+static _Bool report_by_serial = 0;
+#if PERFSTAT_SUPPORTS_DONATION
+static _Bool donate_flag = 0;
+#endif
+static char serial[SYS_NMLN];
+static perfstat_partition_total_t lparstats_old;
static int lpar_config (const char *key, const char *value)
{
{
if (IS_TRUE (value))
pool_stats = 1;
+ else
+ pool_stats = 0;
}
else if (strcasecmp ("ReportBySerial", key) == 0)
{
if (IS_TRUE (value))
report_by_serial = 1;
+ else
+ report_by_serial = 0;
}
else
{
static int lpar_init (void)
{
- perfstat_partition_total_t lparstats;
+ int status;
- /* Retrieve the initial metrics */
- if (!perfstat_partition_total (NULL, &lparstats,
- sizeof (perfstat_partition_total_t), 1))
+ /* Retrieve the initial metrics. Returns the number of structures filled. */
+ status = perfstat_partition_total (/* name = */ NULL, /* (must be NULL) */
+ &lparstats_old, sizeof (perfstat_partition_total_t),
+ /* number = */ 1 /* (must be 1) */);
+ if (status != 1)
{
- ERROR ("lpar plugin: perfstat_partition_total failed.");
+ char errbuf[1024];
+ ERROR ("lpar plugin: perfstat_partition_total failed: %s (%i)",
+ sstrerror (errno, errbuf, sizeof (errbuf)),
+ status);
return (-1);
}
- if (!lparstats.type.b.shared_enabled && lparstats.type.b.donate_enabled)
+#if PERFSTAT_SUPPORTS_DONATION
+ if (!lparstats_old.type.b.shared_enabled
+ && lparstats_old.type.b.donate_enabled)
{
donate_flag = 1;
}
+#endif
- if (pool_stats && !lparstats.type.b.pool_util_authority)
+ if (pool_stats && !lparstats_old.type.b.pool_util_authority)
{
- WARNING ("lpar plugin: this system does not have pool authority. "
- "Disabling CPU pool statistics collection.");
+ WARNING ("lpar plugin: This partition does not have pool authority. "
+ "Disabling CPU pool statistics collection.");
pool_stats = 0;
}
- /* Initialize the fake counter for entitled capacity */
- last_time_base = lparstats.timebase_last;
- ent_counter = 0;
-
return (0);
} /* int lpar_init */
value_t values[1];
value_list_t vl = VALUE_LIST_INIT;
- /* Although it appears as a double, value is really a (scaled) counter,
- expressed in CPU x seconds. At high collection rates (< 1 min), its
- integer part is very small and the resulting graphs get blocky. We regain
- some precision by applying a x100 factor before casting it to a counter,
- turning the final value into CPU units instead of CPUs. */
- values[0].counter = (counter_t)(value * 100.0 + 0.5);
+ values[0].gauge = (gauge_t)value;
vl.values = values;
vl.values_len = 1;
-
- /* An LPAR has the same serial number as the physical system it is currently
- running on. It is a convenient way of tracking LPARs as they are moved
- from chassis to chassis through Live Partition Mobility (LPM). */
if (report_by_serial)
{
- struct utsname name;
- if (uname (&name) != 0)
- {
- ERROR ("lpar plugin: uname failed.");
- return;
- }
- sstrncpy (vl.host, name.machine, sizeof (vl.host));
+ sstrncpy (vl.host, serial, sizeof (vl.host));
sstrncpy (vl.plugin_instance, hostname_g, sizeof (vl.plugin));
}
else
sstrncpy (vl.host, hostname_g, sizeof (vl.host));
}
sstrncpy (vl.plugin, "lpar", sizeof (vl.plugin));
- sstrncpy (vl.type, "cpu", sizeof (vl.type));
+ sstrncpy (vl.type, "vcpu", sizeof (vl.type));
sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
plugin_dispatch_values (&vl);
-}
+} /* void lpar_submit */
static int lpar_read (void)
{
- u_longlong_t delta_time_base;
perfstat_partition_total_t lparstats;
+ int status;
+ struct utsname name;
+ u_longlong_t ticks;
+ u_longlong_t user_ticks, syst_ticks, wait_ticks, idle_ticks;
+ u_longlong_t consumed_ticks;
+ double entitled_proc_capacity;
+
+ /* An LPAR has the same serial number as the physical system it is currently
+ running on. It is a convenient way of tracking LPARs as they are moved
+ from chassis to chassis through Live Partition Mobility (LPM). */
+ if (uname (&name) != 0)
+ {
+ ERROR ("lpar plugin: uname failed.");
+ return (-1);
+ }
+ sstrncpy (serial, name.machine, sizeof (serial));
- /* Retrieve the current metrics */
- if (!perfstat_partition_total (NULL, &lparstats,
- sizeof (perfstat_partition_total_t), 1))
+ /* Retrieve the current metrics. Returns the number of structures filled. */
+ status = perfstat_partition_total (/* name = */ NULL, /* (must be NULL) */
+ &lparstats, sizeof (perfstat_partition_total_t),
+ /* number = */ 1 /* (must be 1) */);
+ if (status != 1)
{
- ERROR ("lpar plugin: perfstat_partition_total failed.");
+ char errbuf[1024];
+ ERROR ("lpar plugin: perfstat_partition_total failed: %s (%i)",
+ sstrerror (errno, errbuf, sizeof (errbuf)),
+ status);
return (-1);
}
- delta_time_base = lparstats.timebase_last - last_time_base;
- last_time_base = lparstats.timebase_last;
+ /* Number of ticks since we last run. */
+ ticks = lparstats.timebase_last - lparstats_old.timebase_last;
+ if (ticks == 0)
+ {
+ /* The stats have not been updated. Return now to avoid
+ * dividing by zero */
+ return (0);
+ }
+
+ /*
+ * On a shared partition, we're "entitled" to a certain amount of
+ * processing power, for example 250/100 of a physical CPU. Processing
+ * capacity not used by the partition may be assigned to a different
+ * partition by the hypervisor, so "idle" is hopefully a very small
+ * number.
+ *
+ * A dedicated partition may donate its CPUs to another partition and
+ * may steal ticks from somewhere else (another partition or maybe the
+ * shared pool, I don't know --octo).
+ */
+
+ /* entitled_proc_capacity is in 1/100th of a CPU */
+ entitled_proc_capacity = 0.01 * ((double) lparstats.entitled_proc_capacity);
+ lpar_submit ("entitled", entitled_proc_capacity);
+
+ /* The number of ticks actually spent in the various states */
+ user_ticks = lparstats.puser - lparstats_old.puser;
+ syst_ticks = lparstats.psys - lparstats_old.psys;
+ wait_ticks = lparstats.pwait - lparstats_old.pwait;
+ idle_ticks = lparstats.pidle - lparstats_old.pidle;
+ consumed_ticks = user_ticks + syst_ticks + wait_ticks + idle_ticks;
- lpar_submit ("user", HTIC2SEC(lparstats.puser));
- lpar_submit ("sys", HTIC2SEC(lparstats.psys));
- lpar_submit ("wait", HTIC2SEC(lparstats.pwait));
- lpar_submit ("idle", HTIC2SEC(lparstats.pidle));
- /* Entitled capacity is reported as an absolute value instead of a counter,
- so we fake one. It's also in CPU units, hence the division by 100 before
- submission. */
- ent_counter += lparstats.entitled_proc_capacity * delta_time_base;
- lpar_submit ("ent", HTIC2SEC(ent_counter) / 100.0);
+ lpar_submit ("user", (double) user_ticks / (double) ticks);
+ lpar_submit ("system", (double) syst_ticks / (double) ticks);
+ lpar_submit ("wait", (double) wait_ticks / (double) ticks);
+ lpar_submit ("idle", (double) idle_ticks / (double) ticks);
+#if PERFSTAT_SUPPORTS_DONATION
if (donate_flag)
{
- lpar_submit ("idle_donated", HTIC2SEC(lparstats.idle_donated_purr));
- lpar_submit ("busy_donated", HTIC2SEC(lparstats.busy_donated_purr));
- lpar_submit ("idle_stolen", HTIC2SEC(lparstats.idle_stolen_purr));
- lpar_submit ("busy_stolen", HTIC2SEC(lparstats.busy_stolen_purr));
+ /* donated => ticks given to another partition
+ * stolen => ticks received from another partition */
+ u_longlong_t idle_donated_ticks, busy_donated_ticks;
+ u_longlong_t idle_stolen_ticks, busy_stolen_ticks;
+
+ /* FYI: PURR == Processor Utilization of Resources Register
+ * SPURR == Scaled PURR */
+ idle_donated_ticks = lparstats.idle_donated_purr - lparstats_old.idle_donated_purr;
+ busy_donated_ticks = lparstats.busy_donated_purr - lparstats_old.busy_donated_purr;
+ idle_stolen_ticks = lparstats.idle_stolen_purr - lparstats_old.idle_stolen_purr;
+ busy_stolen_ticks = lparstats.busy_stolen_purr - lparstats_old.busy_stolen_purr;
+
+ lpar_submit ("idle_donated", (double) idle_donated_ticks / (double) ticks);
+ lpar_submit ("busy_donated", (double) busy_donated_ticks / (double) ticks);
+ lpar_submit ("idle_stolen", (double) idle_stolen_ticks / (double) ticks);
+ lpar_submit ("busy_stolen", (double) busy_stolen_ticks / (double) ticks);
+
+ /* Donated ticks will be accounted for as stolen ticks in other LPARs */
+ consumed_ticks += idle_stolen_ticks + busy_stolen_ticks;
}
+#endif
+
+ lpar_submit ("consumed", (double) consumed_ticks / (double) ticks);
if (pool_stats)
{
- char typinst[TYPE_INST_LEN];
+ char typinst[DATA_MAX_NAME_LEN];
+ u_longlong_t pool_idle_cticks;
+ double pool_idle_cpus;
+ double pool_busy_cpus;
+
+ /* We're calculating "busy" from "idle" and the total number of
+ * CPUs, because the "busy" member didn't exist in early versions
+ * of libperfstat. It was added somewhere between AIX 5.3 ML5 and ML9. */
+ pool_idle_cticks = lparstats.pool_idle_time - lparstats_old.pool_idle_time;
+ pool_idle_cpus = CLOCKTICKS_TO_TICKS ((double) pool_idle_cticks) / (double) ticks;
+ pool_busy_cpus = ((double) lparstats.phys_cpus_pool) - pool_idle_cpus;
+ if (pool_busy_cpus < 0.0)
+ pool_busy_cpus = 0.0;
- /* Pool stats are in CPU x ns */
- ssnprintf (typinst, sizeof(typinst), "pool-%X-busy", lparstats.pool_id);
- lpar_submit (typinst, (double)lparstats.pool_busy_time / 1000000000.0);
+ ssnprintf (typinst, sizeof (typinst), "pool-%X-busy", lparstats.pool_id);
+ lpar_submit (typinst, pool_busy_cpus);
- ssnprintf (typinst, sizeof(typinst), "pool-%X-total", lparstats.pool_id);
- lpar_submit (typinst, (double)lparstats.pool_max_time / 1000000000.0);
+ ssnprintf (typinst, sizeof (typinst), "pool-%X-idle", lparstats.pool_id);
+ lpar_submit (typinst, pool_idle_cpus);
}
+ memcpy (&lparstats_old, &lparstats, sizeof (lparstats_old));
+
return (0);
} /* int lpar_read */
plugin_register_read ("lpar", lpar_read);
} /* void module_register */
-/* vim: set sw=2 sts=2 ts=8 : */
+/* vim: set sw=8 noet : */