X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=src%2Fvirt.c;h=f3d7a51b349551eaf27d3eb87b8d0002e04a2dd8;hb=1ce900790385724fdef0baabed4b36e2dd630d8e;hp=06081097cb7e2116071ec3599d770d5a1eb39b59;hpb=3eebd68c6d41a672e64caecd82f57d08d0d0cf3d;p=collectd.git diff --git a/src/virt.c b/src/virt.c index 06081097..f3d7a51b 100644 --- a/src/virt.c +++ b/src/virt.c @@ -17,6 +17,7 @@ * * Authors: * Richard W.M. Jones + * Przemyslaw Szczerbik **/ #include "collectd.h" @@ -38,11 +39,57 @@ #define PLUGIN_NAME "virt" #ifdef LIBVIR_CHECK_VERSION + +#if LIBVIR_CHECK_VERSION(0, 9, 2) +#define HAVE_DOM_REASON 1 +#endif + #if LIBVIR_CHECK_VERSION(0, 9, 5) #define HAVE_BLOCK_STATS_FLAGS 1 +#define HAVE_DOM_REASON_PAUSED_SHUTTING_DOWN 1 +#endif + +#if LIBVIR_CHECK_VERSION(0, 9, 10) +#define HAVE_DISK_ERR 1 +#endif + +#if LIBVIR_CHECK_VERSION(0, 9, 11) +#define HAVE_CPU_STATS 1 +#define HAVE_DOM_STATE_PMSUSPENDED 1 +#define HAVE_DOM_REASON_RUNNING_WAKEUP 1 +#endif + +#if LIBVIR_CHECK_VERSION(1, 0, 1) +#define HAVE_DOM_REASON_PAUSED_SNAPSHOT 1 +#endif + +#if LIBVIR_CHECK_VERSION(1, 1, 1) +#define HAVE_DOM_REASON_PAUSED_CRASHED 1 +#endif + +#if LIBVIR_CHECK_VERSION(1, 2, 9) +#define HAVE_JOB_STATS 1 +#endif + +#if LIBVIR_CHECK_VERSION(1, 2, 10) +#define HAVE_DOM_REASON_CRASHED 1 +#endif + +#if LIBVIR_CHECK_VERSION(1, 2, 11) +#define HAVE_FS_INFO 1 +#endif + +#if LIBVIR_CHECK_VERSION(1, 2, 15) +#define HAVE_DOM_REASON_PAUSED_STARTING_UP 1 #endif + +#if LIBVIR_CHECK_VERSION(1, 3, 3) +#define HAVE_PERF_STATS 1 +#define HAVE_DOM_REASON_POSTCOPY 1 #endif +#endif /* LIBVIR_CHECK_VERSION */ + static const char *config_keys[] = {"Connection", "RefreshInterval", @@ -61,15 +108,323 @@ static const char *config_keys[] = {"Connection", "Instances", "ExtraStats", - + "PersistentNotification", NULL}; + +/* PersistentNotification is false by default */ +static _Bool persistent_notification = 0; + +/* libvirt event loop */ +static pthread_t event_loop_tid; + +static int domain_event_cb_id; + +const char *domain_states[] = { + [VIR_DOMAIN_NOSTATE] = "no state", + [VIR_DOMAIN_RUNNING] = "the domain is running", + [VIR_DOMAIN_BLOCKED] = "the domain is blocked on resource", + [VIR_DOMAIN_PAUSED] = "the domain is paused by user", + [VIR_DOMAIN_SHUTDOWN] = "the domain is being shut down", + [VIR_DOMAIN_SHUTOFF] = "the domain is shut off", + [VIR_DOMAIN_CRASHED] = "the domain is crashed", +#ifdef HAVE_DOM_STATE_PMSUSPENDED + [VIR_DOMAIN_PMSUSPENDED] = + "the domain is suspended by guest power management", +#endif +}; + +static int map_domain_event_to_state(int event) { + int ret; + switch (event) { + case VIR_DOMAIN_EVENT_STARTED: + ret = VIR_DOMAIN_RUNNING; + break; + case VIR_DOMAIN_EVENT_SUSPENDED: + ret = VIR_DOMAIN_PAUSED; + break; + case VIR_DOMAIN_EVENT_RESUMED: + ret = VIR_DOMAIN_RUNNING; + break; + case VIR_DOMAIN_EVENT_STOPPED: + ret = VIR_DOMAIN_SHUTOFF; + break; + case VIR_DOMAIN_EVENT_SHUTDOWN: + ret = VIR_DOMAIN_SHUTDOWN; + break; + case VIR_DOMAIN_EVENT_PMSUSPENDED: + ret = VIR_DOMAIN_PMSUSPENDED; + break; + case VIR_DOMAIN_EVENT_CRASHED: + ret = VIR_DOMAIN_CRASHED; + break; + default: + ret = VIR_DOMAIN_NOSTATE; + } + return ret; +} + +static int map_domain_event_detail_to_reason(int event, int detail) { + int ret; + switch (event) { + case VIR_DOMAIN_EVENT_STARTED: + switch (detail) { + case VIR_DOMAIN_EVENT_STARTED_BOOTED: /* Normal startup from boot */ + ret = VIR_DOMAIN_RUNNING_BOOTED; + break; + case VIR_DOMAIN_EVENT_STARTED_MIGRATED: /* Incoming migration from another host */ + ret = VIR_DOMAIN_RUNNING_MIGRATED; + break; + case VIR_DOMAIN_EVENT_STARTED_RESTORED: /* Restored from a state file */ + ret = VIR_DOMAIN_RUNNING_RESTORED; + break; + case VIR_DOMAIN_EVENT_STARTED_FROM_SNAPSHOT: /* Restored from snapshot */ + ret = VIR_DOMAIN_RUNNING_FROM_SNAPSHOT; + break; + case VIR_DOMAIN_EVENT_STARTED_WAKEUP: /* Started due to wakeup event */ + ret = VIR_DOMAIN_RUNNING_WAKEUP; + break; + default: + ret = VIR_DOMAIN_RUNNING_UNKNOWN; + } + break; + case VIR_DOMAIN_EVENT_SUSPENDED: + switch (detail) { + case VIR_DOMAIN_EVENT_SUSPENDED_PAUSED: /* Normal suspend due to admin pause */ + ret = VIR_DOMAIN_PAUSED_USER; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED: /* Suspended for offline migration */ + ret = VIR_DOMAIN_PAUSED_MIGRATION; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_IOERROR: /* Suspended due to a disk I/O error */ + ret = VIR_DOMAIN_PAUSED_IOERROR; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG: /* Suspended due to a watchdog firing */ + ret = VIR_DOMAIN_PAUSED_WATCHDOG; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_RESTORED: /* Restored from paused state file */ + ret = VIR_DOMAIN_PAUSED_UNKNOWN; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_FROM_SNAPSHOT: /* Restored from paused snapshot */ + ret = VIR_DOMAIN_PAUSED_FROM_SNAPSHOT; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR: /* Suspended after failure during libvirt API call */ + ret = VIR_DOMAIN_PAUSED_UNKNOWN; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY: /* Suspended for post-copy migration */ + ret = VIR_DOMAIN_PAUSED_POSTCOPY; + break; + case VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED: /* Suspended after failed post-copy */ + ret = VIR_DOMAIN_PAUSED_POSTCOPY_FAILED; + break; + default: + ret = VIR_DOMAIN_PAUSED_UNKNOWN; + } + break; + case VIR_DOMAIN_EVENT_RESUMED: + switch (detail) { + case VIR_DOMAIN_EVENT_RESUMED_UNPAUSED: /* Normal resume due to admin unpause */ + ret = VIR_DOMAIN_RUNNING_UNPAUSED; + break; + case VIR_DOMAIN_EVENT_RESUMED_MIGRATED: /* Resumed for completion of migration */ + ret = VIR_DOMAIN_RUNNING_MIGRATED; + break; + case VIR_DOMAIN_EVENT_RESUMED_FROM_SNAPSHOT: /* Resumed from snapshot */ + ret = VIR_DOMAIN_RUNNING_FROM_SNAPSHOT; + break; + case VIR_DOMAIN_EVENT_RESUMED_POSTCOPY: /* Resumed, but migration is still running in post-copy mode */ + ret = VIR_DOMAIN_RUNNING_POSTCOPY; + break; + default: + ret = VIR_DOMAIN_RUNNING_UNKNOWN; + } + break; + case VIR_DOMAIN_EVENT_STOPPED: + switch (detail) { + case VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN: /* Normal shutdown */ + ret = VIR_DOMAIN_SHUTOFF_SHUTDOWN; + break; + case VIR_DOMAIN_EVENT_STOPPED_DESTROYED: /* Forced poweroff from host */ + ret = VIR_DOMAIN_SHUTOFF_DESTROYED; + break; + case VIR_DOMAIN_EVENT_STOPPED_CRASHED: /* Guest crashed */ + ret = VIR_DOMAIN_SHUTOFF_CRASHED; + break; + case VIR_DOMAIN_EVENT_STOPPED_MIGRATED: /* Migrated off to another host */ + ret = VIR_DOMAIN_SHUTOFF_MIGRATED; + break; + case VIR_DOMAIN_EVENT_STOPPED_SAVED: /* Saved to a state file */ + ret = VIR_DOMAIN_SHUTOFF_SAVED; + break; + case VIR_DOMAIN_EVENT_STOPPED_FAILED: /* Host emulator/mgmt failed */ + ret = VIR_DOMAIN_SHUTOFF_FAILED; + break; + case VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT: /* Offline snapshot loaded */ + ret = VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT; + break; + default: + ret = VIR_DOMAIN_SHUTOFF_UNKNOWN; + } + break; + case VIR_DOMAIN_EVENT_SHUTDOWN: + switch (detail) { + case VIR_DOMAIN_EVENT_SHUTDOWN_FINISHED: /* Guest finished shutdown sequence */ + ret = VIR_DOMAIN_SHUTDOWN_USER; + break; + default: + ret = VIR_DOMAIN_SHUTDOWN_UNKNOWN; + } + break; + case VIR_DOMAIN_EVENT_PMSUSPENDED: + switch (detail) { + case VIR_DOMAIN_EVENT_PMSUSPENDED_MEMORY: /* Guest was PM suspended to memory */ + ret = VIR_DOMAIN_PMSUSPENDED_UNKNOWN; + break; + case VIR_DOMAIN_EVENT_PMSUSPENDED_DISK: /* Guest was PM suspended to disk */ + ret = VIR_DOMAIN_PMSUSPENDED_DISK_UNKNOWN; + break; + default: + ret = VIR_DOMAIN_PMSUSPENDED_UNKNOWN; + } + break; + case VIR_DOMAIN_EVENT_CRASHED: + switch (detail) { + case VIR_DOMAIN_EVENT_CRASHED_PANICKED: /* Guest was panicked */ + ret = VIR_DOMAIN_CRASHED_PANICKED; + break; + default: + ret = VIR_DOMAIN_CRASHED_UNKNOWN; + } + break; + default: + ret = VIR_DOMAIN_NOSTATE_UNKNOWN; + } + return ret; +} + +#ifdef HAVE_DOM_REASON +#define DOMAIN_STATE_REASON_MAX_SIZE 20 +const char *domain_reasons[][DOMAIN_STATE_REASON_MAX_SIZE] = { + [VIR_DOMAIN_NOSTATE][VIR_DOMAIN_NOSTATE_UNKNOWN] = + "the reason is unknown", + + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_UNKNOWN] = + "the reason is unknown", + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_BOOTED] = + "normal startup from boot", + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_MIGRATED] = + "migrated from another host", + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_RESTORED] = + "restored from a state file", + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_FROM_SNAPSHOT] = + "restored from snapshot", + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_UNPAUSED] = + "returned from paused state", + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_MIGRATION_CANCELED] = + "returned from migration", + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_SAVE_CANCELED] = + "returned from failed save process", +#ifdef HAVE_DOM_REASON_RUNNING_WAKEUP + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_WAKEUP] = + "returned from pmsuspended due to wakeup event", +#endif +#ifdef HAVE_DOM_REASON_CRASHED + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_CRASHED] = + "resumed from crashed", +#endif +#ifdef HAVE_DOM_REASON_POSTCOPY + [VIR_DOMAIN_RUNNING][VIR_DOMAIN_RUNNING_POSTCOPY] = + "running in post-copy migration mode", +#endif + + [VIR_DOMAIN_BLOCKED][VIR_DOMAIN_BLOCKED_UNKNOWN] = + "the reason is unknown", + + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_UNKNOWN] = + "the reason is unknown", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_USER] = "paused on user request", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_MIGRATION] = + "paused for offline migration", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_SAVE] = "paused for save", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_DUMP] = + "paused for offline core dump", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_IOERROR] = + "paused due to a disk I/O error", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_WATCHDOG] = + "paused due to a watchdog event", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_FROM_SNAPSHOT] = + "paused after restoring from snapshot", +#ifdef HAVE_DOM_REASON_PAUSED_SHUTTING_DOWN + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_SHUTTING_DOWN] = + "paused during shutdown process", +#endif +#ifdef HAVE_DOM_REASON_PAUSED_SNAPSHOT + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_SNAPSHOT] = + "paused while creating a snapshot", +#endif +#ifdef HAVE_DOM_REASON_PAUSED_CRASHED + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_CRASHED] = + "paused due to a guest crash", +#endif +#ifdef HAVE_DOM_REASON_PAUSED_STARTING_UP + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_STARTING_UP] = + "the domain is being started", +#endif +#ifdef HAVE_DOM_REASON_POSTCOPY + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_POSTCOPY] = + "paused for post-copy migration", + [VIR_DOMAIN_PAUSED][VIR_DOMAIN_PAUSED_POSTCOPY_FAILED] = + "paused after failed post-copy", +#endif + + [VIR_DOMAIN_SHUTDOWN][VIR_DOMAIN_SHUTDOWN_UNKNOWN] = + "the reason is unknown", + [VIR_DOMAIN_SHUTDOWN][VIR_DOMAIN_SHUTDOWN_USER] = + "shutting down on user request", + + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_UNKNOWN] = + "the reason is unknown", + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_SHUTDOWN] = "normal shutdown", + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_DESTROYED] = "forced poweroff", + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_CRASHED] = "domain crashed", + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_MIGRATED] = + "migrated to another host", + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_SAVED] = "saved to a file", + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_FAILED] = + "domain failed to start", + [VIR_DOMAIN_SHUTOFF][VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT] = + "restored from a snapshot which was taken while domain was shutoff", + + [VIR_DOMAIN_CRASHED][VIR_DOMAIN_CRASHED_UNKNOWN] = + "the reason is unknown", +#ifdef VIR_DOMAIN_CRASHED_PANICKED + [VIR_DOMAIN_CRASHED][VIR_DOMAIN_CRASHED_PANICKED] = "domain panicked", +#endif + +#ifdef HAVE_DOM_STATE_PMSUSPENDED + [VIR_DOMAIN_PMSUSPENDED][VIR_DOMAIN_PMSUSPENDED_UNKNOWN] = + "the reason is unknown", +#endif +}; +#endif /* HAVE_DOM_REASON */ + #define NR_CONFIG_KEYS ((sizeof config_keys / sizeof config_keys[0]) - 1) +#define NANOSEC_IN_SEC 1e9 + +#define GET_STATS(_f, _name, ...) \ + do { \ + status = _f(__VA_ARGS__); \ + if (status != 0) \ + ERROR(PLUGIN_NAME ": Failed to get " _name); \ + } while (0) /* Connection. */ static virConnectPtr conn = 0; static char *conn_string = NULL; static c_complain_t conn_complain = C_COMPLAIN_INIT_STATIC; +/* Node information required for %CPU */ +static virNodeInfo nodeinfo; + /* Seconds between list refreshes, 0 disables completely. */ static int interval = 60; @@ -97,9 +452,14 @@ struct interface_device { char *number; /* interface device number */ }; +typedef struct domain_s { + virDomainPtr ptr; + virDomainInfo info; +} domain_t; + struct lv_read_state { /* Actual list of domains found on last refresh. */ - virDomainPtr *domains; + domain_t *domains; int nr_domains; struct block_device *block_devices; @@ -166,10 +526,56 @@ enum bd_field { target, source }; enum if_field { if_address, if_name, if_number }; /* ExtraStats */ -#define EX_STATS_MAX_FIELDS 8 -#define EX_STATS_DISK "disk" -enum ex_stats { ex_stats_none = 0, ex_stats_disk = 1 }; -static enum ex_stats extra_stats = ex_stats_none; +#define EX_STATS_MAX_FIELDS 15 +enum ex_stats { + ex_stats_none = 0, + ex_stats_disk = 1 << 0, + ex_stats_pcpu = 1 << 1, + ex_stats_cpu_util = 1 << 2, + ex_stats_domain_state = 1 << 3, +#ifdef HAVE_PERF_STATS + ex_stats_perf = 1 << 4, +#endif + ex_stats_vcpupin = 1 << 5, +#ifdef HAVE_DISK_ERR + ex_stats_disk_err = 1 << 6, +#endif +#ifdef HAVE_FS_INFO + ex_stats_fs_info = 1 << 7, +#endif +#ifdef HAVE_JOB_STATS + ex_stats_job_stats_completed = 1 << 8, + ex_stats_job_stats_background = 1 << 9, +#endif +}; + +static unsigned int extra_stats = ex_stats_none; + +struct ex_stats_item { + const char *name; + enum ex_stats flag; +}; +static const struct ex_stats_item ex_stats_table[] = { + {"disk", ex_stats_disk}, + {"pcpu", ex_stats_pcpu}, + {"cpu_util", ex_stats_cpu_util}, + {"domain_state", ex_stats_domain_state}, +#ifdef HAVE_PERF_STATS + {"perf", ex_stats_perf}, +#endif + {"vcpupin", ex_stats_vcpupin}, +#ifdef HAVE_DISK_ERR + {"disk_err", ex_stats_disk_err}, +#endif +#ifdef HAVE_FS_INFO + {"fs_info", ex_stats_fs_info}, +#endif +#ifdef HAVE_JOB_STATS + {"job_stats_completed", ex_stats_job_stats_completed}, + {"job_stats_background", ex_stats_job_stats_background}, +#endif + {NULL, ex_stats_none}, +}; /* BlockDeviceFormatBasename */ _Bool blockdevice_format_basename = 0; @@ -181,6 +587,12 @@ static time_t last_refresh = (time_t)0; static int refresh_lists(struct lv_read_instance *inst); +struct lv_info { + virDomainInfo di; + unsigned long long total_user_cpu_time; + unsigned long long total_syst_cpu_time; +}; + struct lv_block_info { virDomainBlockStatsStruct bi; @@ -209,12 +621,10 @@ static void init_block_info(struct lv_block_info *binfo) { #ifdef HAVE_BLOCK_STATS_FLAGS #define GET_BLOCK_INFO_VALUE(NAME, FIELD) \ - do { \ - if (!strcmp(param[i].field, NAME)) { \ - binfo->FIELD = param[i].value.l; \ - continue; \ - } \ - } while (0) + if (!strcmp(param[i].field, NAME)) { \ + binfo->FIELD = param[i].value.l; \ + continue; \ + } static int get_block_info(struct lv_block_info *binfo, virTypedParameterPtr param, int nparams) { @@ -249,6 +659,56 @@ static int get_block_info(struct lv_block_info *binfo, ERROR("%s: %s", (s), err->message); \ } while (0) +static void init_lv_info(struct lv_info *info) { + if (info != NULL) + memset(info, 0, sizeof(*info)); +} + +static int lv_domain_info(virDomainPtr dom, struct lv_info *info) { +#ifdef HAVE_CPU_STATS + virTypedParameterPtr param = NULL; + int nparams = 0; +#endif /* HAVE_CPU_STATS */ + int ret = virDomainGetInfo(dom, &(info->di)); + if (ret != 0) { + return ret; + } + +#ifdef HAVE_CPU_STATS + nparams = virDomainGetCPUStats(dom, NULL, 0, -1, 1, 0); + if (nparams < 0) { + VIRT_ERROR(conn, "getting the CPU params count"); + return -1; + } + + param = calloc(nparams, sizeof(virTypedParameter)); + if (param == NULL) { + ERROR("virt plugin: alloc(%i) for cpu parameters failed.", nparams); + return -1; + } + + ret = virDomainGetCPUStats(dom, param, nparams, -1, 1, 0); // total stats. + if (ret < 0) { + virTypedParamsClear(param, nparams); + sfree(param); + VIRT_ERROR(conn, "getting the disk params values"); + return -1; + } + + for (int i = 0; i < nparams; ++i) { + if (!strcmp(param[i].field, "user_time")) + info->total_user_cpu_time = param[i].value.ul; + else if (!strcmp(param[i].field, "system_time")) + info->total_syst_cpu_time = param[i].value.ul; + } + + virTypedParamsClear(param, nparams); + sfree(param); +#endif /* HAVE_CPU_STATS */ + + return 0; +} + static void init_value_list(value_list_t *vl, virDomainPtr dom) { int n; const char *name; @@ -321,6 +781,34 @@ static void init_value_list(value_list_t *vl, virDomainPtr dom) { } /* void init_value_list */ +static int init_notif(notification_t *notif, const virDomainPtr domain, + int severity, const char *msg, const char *type, + const char *type_instance) { + value_list_t vl = VALUE_LIST_INIT; + + if (!notif) { + ERROR(PLUGIN_NAME ": init_notif: NULL pointer"); + return -1; + } + + init_value_list(&vl, domain); + notification_init(notif, severity, msg, vl.host, vl.plugin, + vl.plugin_instance, type, type_instance); + notif->time = cdtime(); + return 0; +} + +static void submit_notif(const virDomainPtr domain, int severity, + const char *msg, const char *type, + const char *type_instance) { + notification_t notif; + + init_notif(¬if, domain, severity, msg, type, type_instance); + plugin_dispatch_notification(¬if); + if (notif.meta) + plugin_notification_meta_free(notif.meta); +} + static void submit(virDomainPtr dom, char const *type, char const *type_instance, value_t *values, size_t values_len) { @@ -337,7 +825,7 @@ static void submit(virDomainPtr dom, char const *type, plugin_dispatch_values(&vl); } -static void memory_submit(gauge_t value, virDomainPtr dom) { +static void memory_submit(virDomainPtr dom, gauge_t value) { submit(dom, "memory", "total", &(value_t){.gauge = value}, 1); } @@ -345,9 +833,10 @@ static void memory_stats_submit(gauge_t value, virDomainPtr dom, int tag_index) { static const char *tags[] = {"swap_in", "swap_out", "major_fault", "minor_fault", "unused", "available", - "actual_balloon", "rss"}; + "actual_balloon", "rss", "usable", + "last_update"}; - if ((tag_index < 0) || (tag_index >= STATIC_ARRAY_SIZE(tags))) { + if ((tag_index < 0) || (tag_index >= (int)STATIC_ARRAY_SIZE(tags))) { ERROR("virt plugin: Array index out of bounds: tag_index = %d", tag_index); return; } @@ -355,35 +844,92 @@ static void memory_stats_submit(gauge_t value, virDomainPtr dom, submit(dom, "memory", tags[tag_index], &(value_t){.gauge = value}, 1); } -static void cpu_submit(unsigned long long value, virDomainPtr dom, - const char *type) { - submit(dom, type, NULL, &(value_t){.derive = (derive_t)value}, 1); +static void submit_derive2(const char *type, derive_t v0, derive_t v1, + virDomainPtr dom, const char *devname) { + value_t values[] = { + {.derive = v0}, {.derive = v1}, + }; + + submit(dom, type, devname, values, STATIC_ARRAY_SIZE(values)); +} /* void submit_derive2 */ + +static void pcpu_submit(virDomainPtr dom, struct lv_info *info) { +#ifdef HAVE_CPU_STATS + if (extra_stats & ex_stats_pcpu) + submit_derive2("ps_cputime", info->total_user_cpu_time, + info->total_syst_cpu_time, dom, NULL); +#endif /* HAVE_CPU_STATS */ +} + +static double cpu_ns_to_percent(unsigned int node_cpus, + unsigned long long cpu_time_old, + unsigned long long cpu_time_new) { + double percent = 0.0; + unsigned long long cpu_time_diff = 0; + double time_diff_sec = CDTIME_T_TO_DOUBLE(plugin_get_interval()); + + if (node_cpus != 0 && time_diff_sec != 0 && cpu_time_old != 0) { + cpu_time_diff = cpu_time_new - cpu_time_old; + percent = ((double)(100 * cpu_time_diff)) / + (time_diff_sec * node_cpus * NANOSEC_IN_SEC); + } + + DEBUG(PLUGIN_NAME ": node_cpus=%u cpu_time_old=%" PRIu64 + " cpu_time_new=%" PRIu64 "cpu_time_diff=%" PRIu64 + " time_diff_sec=%f percent=%f", + node_cpus, (uint64_t)cpu_time_old, (uint64_t)cpu_time_new, + (uint64_t)cpu_time_diff, time_diff_sec, percent); + + return percent; +} + +static void cpu_submit(const domain_t *dom, unsigned long long cpuTime_new) { + + if (!dom) + return; + + if (extra_stats & ex_stats_cpu_util) { + /* Computing %CPU requires 2 samples of cpuTime */ + if (dom->info.cpuTime != 0 && cpuTime_new != 0) { + + submit(dom->ptr, "percent", "virt_cpu_total", + &(value_t){.gauge = cpu_ns_to_percent( + nodeinfo.cpus, dom->info.cpuTime, cpuTime_new)}, + 1); + } + } + + submit(dom->ptr, "virt_cpu_total", NULL, &(value_t){.derive = cpuTime_new}, + 1); } static void vcpu_submit(derive_t value, virDomainPtr dom, int vcpu_nr, const char *type) { char type_instance[DATA_MAX_NAME_LEN]; - ssnprintf(type_instance, sizeof(type_instance), "%d", vcpu_nr); - + snprintf(type_instance, sizeof(type_instance), "%d", vcpu_nr); submit(dom, type, type_instance, &(value_t){.derive = value}, 1); } -static void submit_derive2(const char *type, derive_t v0, derive_t v1, - virDomainPtr dom, const char *devname) { - value_t values[] = { - {.derive = v0}, {.derive = v1}, - }; +static void disk_submit(struct lv_block_info *binfo, virDomainPtr dom, + const char *dev) { + char *dev_copy = strdup(dev); + const char *type_instance = dev_copy; - submit(dom, type, devname, values, STATIC_ARRAY_SIZE(values)); -} /* void submit_derive2 */ + if (!dev_copy) + return; -static void disk_submit(struct lv_block_info *binfo, virDomainPtr dom, - const char *type_instance) { - char flush_type_instance[DATA_MAX_NAME_LEN]; + if (blockdevice_format_basename && blockdevice_format == source) + type_instance = basename(dev_copy); - ssnprintf(flush_type_instance, sizeof(flush_type_instance), "flush-%s", - type_instance); + if (!type_instance) { + sfree(dev_copy); + return; + } + + char flush_type_instance[DATA_MAX_NAME_LEN]; + snprintf(flush_type_instance, sizeof(flush_type_instance), "flush-%s", + type_instance); if ((binfo->bi.rd_req != -1) && (binfo->bi.wr_req != -1)) submit_derive2("disk_ops", (derive_t)binfo->bi.rd_req, @@ -407,6 +953,82 @@ static void disk_submit(struct lv_block_info *binfo, virDomainPtr dom, &(value_t){.derive = value}, 1); } } + + sfree(dev_copy); +} + +static unsigned int parse_ex_stats_flags(char **exstats, int numexstats) { + unsigned int ex_stats_flags = ex_stats_none; + for (int i = 0; i < numexstats; i++) { + for (int j = 0; ex_stats_table[j].name != NULL; j++) { + if (strcasecmp(exstats[i], ex_stats_table[j].name) == 0) { + DEBUG(PLUGIN_NAME " plugin: enabling extra stats for '%s'", + ex_stats_table[j].name); + ex_stats_flags |= ex_stats_table[j].flag; + break; + } + + if (ex_stats_table[j + 1].name == NULL) { + ERROR(PLUGIN_NAME ": Unmatched ExtraStats option: %s", exstats[i]); + } + } + } + return ex_stats_flags; +} + +static void domain_state_submit(virDomainPtr dom, int state, int reason) { + if ((state < 0) || (state >= STATIC_ARRAY_SIZE(domain_states))) { + ERROR(PLUGIN_NAME ": Array index out of bounds: state=%d", state); + return; + } + + char msg[DATA_MAX_NAME_LEN]; + const char *state_str = domain_states[state]; +#ifdef HAVE_DOM_REASON + if ((reason < 0) || (reason >= STATIC_ARRAY_SIZE(domain_reasons[0]))) { + ERROR(PLUGIN_NAME ": Array index out of bounds: reason=%d", reason); + return; + } + + const char *reason_str = domain_reasons[state][reason]; + /* Array size for domain reasons is fixed, but different domain states can + * have different number of reasons. We need to check if reason was + * successfully parsed */ + if (!reason_str) { + ERROR(PLUGIN_NAME ": Invalid reason (%d) for domain state: %s", reason, + state_str); + return; + } +#else + const char *reason_str = "N/A"; +#endif + + snprintf(msg, sizeof(msg), "Domain state: %s. Reason: %s", state_str, + reason_str); + + int severity; + switch (state) { + case VIR_DOMAIN_NOSTATE: + case VIR_DOMAIN_RUNNING: + case VIR_DOMAIN_SHUTDOWN: + case VIR_DOMAIN_SHUTOFF: + severity = NOTIF_OKAY; + break; + case VIR_DOMAIN_BLOCKED: + case VIR_DOMAIN_PAUSED: +#ifdef DOM_STATE_PMSUSPENDED + case VIR_DOMAIN_PMSUSPENDED: +#endif + severity = NOTIF_WARNING; + break; + case VIR_DOMAIN_CRASHED: + severity = NOTIF_FAILURE; + break; + default: + ERROR(PLUGIN_NAME ": Unrecognized domain state (%d)", state); + return; + } + submit_notif(dom, severity, msg, "domain_state", NULL); } static int lv_config(const char *key, const char *value) { @@ -604,35 +1226,54 @@ static int lv_config(const char *key, const char *value) { } if (strcasecmp(key, "ExtraStats") == 0) { - char *localvalue = sstrdup(value); + char *localvalue = strdup(value); if (localvalue != NULL) { char *exstats[EX_STATS_MAX_FIELDS]; - int numexstats; - - numexstats = strsplit(localvalue, exstats, STATIC_ARRAY_SIZE(exstats)); - for (int i = 0; i < numexstats; i++) { - if (strcasecmp(exstats[i], EX_STATS_DISK) == 0) { - DEBUG(PLUGIN_NAME " plugin: enabling extra stats for '%s'", - EX_STATS_DISK); - extra_stats |= ex_stats_disk; - } - } + int numexstats = + strsplit(localvalue, exstats, STATIC_ARRAY_SIZE(exstats)); + extra_stats = parse_ex_stats_flags(exstats, numexstats); sfree(localvalue); + +#ifdef HAVE_JOB_STATS + if ((extra_stats & ex_stats_job_stats_completed) && + (extra_stats & ex_stats_job_stats_background)) { + ERROR(PLUGIN_NAME " plugin: Invalid job stats configuration. Only one " + "type of job statistics can be collected at the same " + "time"); + return 1; + } +#endif } } + if (strcasecmp(key, "PersistentNotification") == 0) { + persistent_notification = IS_TRUE(value); + return 0; + } + /* Unrecognised option. */ return -1; } static int lv_connect(void) { if (conn == NULL) { - /* `conn_string == NULL' is acceptable. */ - conn = virConnectOpenReadOnly(conn_string); +/* `conn_string == NULL' is acceptable */ +#ifdef HAVE_FS_INFO + /* virDomainGetFSInfo requires full read-write access connection */ + if (extra_stats & ex_stats_fs_info) + conn = virConnectOpen(conn_string); + else +#endif + conn = virConnectOpenReadOnly(conn_string); if (conn == NULL) { c_complain(LOG_ERR, &conn_complain, PLUGIN_NAME " plugin: Unable to connect: " - "virConnectOpenReadOnly failed."); + "virConnectOpen failed."); + return -1; + } + int status = virNodeGetInfo(conn, &nodeinfo); + if (status != 0) { + ERROR(PLUGIN_NAME ": virNodeGetInfo failed"); return -1; } } @@ -651,212 +1292,630 @@ static void lv_disconnect(void) { static int lv_domain_block_info(virDomainPtr dom, const char *path, struct lv_block_info *binfo) { #ifdef HAVE_BLOCK_STATS_FLAGS - virTypedParameterPtr params = NULL; int nparams = 0; - int rc = -1; - int ret; - - ret = virDomainBlockStatsFlags(dom, path, NULL, &nparams, 0); - if (ret < 0 || nparams == 0) { + if (virDomainBlockStatsFlags(dom, path, NULL, &nparams, 0) < 0 || + nparams <= 0) { VIRT_ERROR(conn, "getting the disk params count"); return -1; } - params = calloc(nparams, sizeof(virTypedParameter)); + virTypedParameterPtr params = calloc((size_t)nparams, sizeof(*params)); if (params == NULL) { ERROR("virt plugin: alloc(%i) for block=%s parameters failed.", nparams, path); return -1; } - ret = virDomainBlockStatsFlags(dom, path, params, &nparams, 0); - if (ret < 0) { + + int rc = -1; + if (virDomainBlockStatsFlags(dom, path, params, &nparams, 0) < 0) { VIRT_ERROR(conn, "getting the disk params values"); - goto done; + } else { + rc = get_block_info(binfo, params, nparams); } - rc = get_block_info(binfo, params, nparams); - -done: - virTypedParamsFree(params, nparams); + virTypedParamsClear(params, nparams); + sfree(params); return rc; #else return virDomainBlockStats(dom, path, &(binfo->bi), sizeof(binfo->bi)); #endif /* HAVE_BLOCK_STATS_FLAGS */ } -static int lv_read(user_data_t *ud) { - time_t t; - struct lv_read_instance *inst = NULL; - struct lv_read_state *state = NULL; - - if (ud->data == NULL) { - ERROR(PLUGIN_NAME " plugin: NULL userdata"); - return -1; +#ifdef HAVE_PERF_STATS +static void perf_submit(virDomainStatsRecordPtr stats) { + for (int i = 0; i < stats->nparams; ++i) { + /* Replace '.' with '_' in event field to match other metrics' naming + * convention */ + char *c = strchr(stats->params[i].field, '.'); + if (c) + *c = '_'; + submit(stats->dom, "perf", stats->params[i].field, + &(value_t){.derive = stats->params[i].value.ul}, 1); } +} - inst = ud->data; - state = &inst->read_state; +static int get_perf_events(virDomainPtr domain) { + virDomainStatsRecordPtr *stats = NULL; + /* virDomainListGetStats requires a NULL terminated list of domains */ + virDomainPtr domain_array[] = {domain, NULL}; - if (inst->id == 0) { - if (lv_connect() < 0) - return -1; + int status = + virDomainListGetStats(domain_array, VIR_DOMAIN_STATS_PERF, &stats, 0); + if (status == -1) { + ERROR("virt plugin: virDomainListGetStats failed with status %i.", status); + return status; } - time(&t); + for (int i = 0; i < status; ++i) + perf_submit(stats[i]); - /* Need to refresh domain or device lists? */ - if ((last_refresh == (time_t)0) || - ((interval > 0) && ((last_refresh + interval) <= t))) { - if (refresh_lists(inst) != 0) { - if (inst->id == 0) - lv_disconnect(); - return -1; + virDomainStatsRecordListFree(stats); + return 0; +} +#endif /* HAVE_PERF_STATS */ + +static void vcpu_pin_submit(virDomainPtr dom, int max_cpus, int vcpu, + unsigned char *cpu_maps, int cpu_map_len) { + for (int cpu = 0; cpu < max_cpus; ++cpu) { + char type_instance[DATA_MAX_NAME_LEN]; + _Bool is_set = VIR_CPU_USABLE(cpu_maps, cpu_map_len, vcpu, cpu) ? 1 : 0; + + snprintf(type_instance, sizeof(type_instance), "vcpu_%d-cpu_%d", vcpu, cpu); + submit(dom, "cpu_affinity", type_instance, &(value_t){.gauge = is_set}, 1); + } +} + +static int get_vcpu_stats(virDomainPtr domain, unsigned short nr_virt_cpu) { + int max_cpus = VIR_NODEINFO_MAXCPUS(nodeinfo); + int cpu_map_len = VIR_CPU_MAPLEN(max_cpus); + + virVcpuInfoPtr vinfo = calloc(nr_virt_cpu, sizeof(vinfo[0])); + if (vinfo == NULL) { + ERROR(PLUGIN_NAME " plugin: malloc failed."); + return -1; + } + + unsigned char *cpumaps = calloc(nr_virt_cpu, cpu_map_len); + if (cpumaps == NULL) { + ERROR(PLUGIN_NAME " plugin: malloc failed."); + sfree(vinfo); + return -1; + } + + int status = + virDomainGetVcpus(domain, vinfo, nr_virt_cpu, cpumaps, cpu_map_len); + if (status < 0) { + ERROR(PLUGIN_NAME " plugin: virDomainGetVcpus failed with status %i.", + status); + sfree(cpumaps); + sfree(vinfo); + return status; + } + + for (int i = 0; i < nr_virt_cpu; ++i) { + vcpu_submit(vinfo[i].cpuTime, domain, vinfo[i].number, "virt_vcpu"); + if (extra_stats & ex_stats_vcpupin) + vcpu_pin_submit(domain, max_cpus, i, cpumaps, cpu_map_len); + } + + sfree(cpumaps); + sfree(vinfo); + return 0; +} + +#ifdef HAVE_DOM_REASON +static int get_domain_state(virDomainPtr domain) { + int domain_state = 0; + int domain_reason = 0; + + int status = virDomainGetState(domain, &domain_state, &domain_reason, 0); + if (status != 0) { + ERROR(PLUGIN_NAME " plugin: virDomainGetState failed with status %i.", + status); + return status; + } + + if (persistent_notification) + domain_state_submit(domain, domain_state, domain_reason); + + return status; +} +#endif /* HAVE_DOM_REASON */ + +static int get_memory_stats(virDomainPtr domain) { + virDomainMemoryStatPtr minfo = + calloc(VIR_DOMAIN_MEMORY_STAT_NR, sizeof(virDomainMemoryStatStruct)); + if (minfo == NULL) { + ERROR("virt plugin: malloc failed."); + return -1; + } + + int mem_stats = + virDomainMemoryStats(domain, minfo, VIR_DOMAIN_MEMORY_STAT_NR, 0); + if (mem_stats < 0) { + ERROR("virt plugin: virDomainMemoryStats failed with mem_stats %i.", + mem_stats); + sfree(minfo); + return mem_stats; + } + + for (int i = 0; i < mem_stats; i++) + memory_stats_submit((gauge_t)minfo[i].val * 1024, domain, minfo[i].tag); + + sfree(minfo); + return 0; +} + +#ifdef HAVE_DISK_ERR +static void disk_err_submit(virDomainPtr domain, + virDomainDiskErrorPtr disk_err) { + submit(domain, "disk_error", disk_err->disk, + &(value_t){.gauge = disk_err->error}, 1); +} + +static int get_disk_err(virDomainPtr domain) { + /* Get preferred size of disk errors array */ + int disk_err_count = virDomainGetDiskErrors(domain, NULL, 0, 0); + if (disk_err_count == -1) { + ERROR(PLUGIN_NAME + " plugin: failed to get preferred size of disk errors array"); + return -1; + } + + DEBUG(PLUGIN_NAME + " plugin: preferred size of disk errors array: %d for domain %s", + disk_err_count, virDomainGetName(domain)); + virDomainDiskError disk_err[disk_err_count]; + + disk_err_count = virDomainGetDiskErrors(domain, disk_err, disk_err_count, 0); + if (disk_err_count == -1) { + ERROR(PLUGIN_NAME " plugin: virDomainGetDiskErrors failed with status %d", + disk_err_count); + return -1; + } + + DEBUG(PLUGIN_NAME " plugin: detected %d disk errors in domain %s", + disk_err_count, virDomainGetName(domain)); + + for (int i = 0; i < disk_err_count; ++i) { + disk_err_submit(domain, &disk_err[i]); + sfree(disk_err[i].disk); + } + + return 0; +} +#endif /* HAVE_DISK_ERR */ + +static int get_block_stats(struct block_device *block_dev) { + + if (!block_dev) { + ERROR(PLUGIN_NAME " plugin: get_block_stats NULL pointer"); + return -1; + } + + struct lv_block_info binfo; + init_block_info(&binfo); + + if (lv_domain_block_info(block_dev->dom, block_dev->path, &binfo) < 0) { + ERROR(PLUGIN_NAME " plugin: lv_domain_block_info failed"); + return -1; + } + + disk_submit(&binfo, block_dev->dom, block_dev->path); + return 0; +} + +#ifdef HAVE_FS_INFO + +#define NM_ADD_ITEM(_fun, _name, _val) \ + do { \ + ret = _fun(¬if, _name, _val); \ + if (ret != 0) { \ + ERROR(PLUGIN_NAME " plugin: failed to add notification metadata"); \ + goto cleanup; \ + } \ + } while (0) + +#define NM_ADD_STR_ITEMS(_items, _size) \ + do { \ + for (int _i = 0; _i < _size; ++_i) { \ + DEBUG(PLUGIN_NAME \ + " plugin: Adding notification metadata name=%s value=%s", \ + _items[_i].name, _items[_i].value); \ + NM_ADD_ITEM(plugin_notification_meta_add_string, _items[_i].name, \ + _items[_i].value); \ + } \ + } while (0) + +static int fs_info_notify(virDomainPtr domain, virDomainFSInfoPtr fs_info) { + notification_t notif; + int ret = 0; + + /* Local struct, just for the purpose of this function. */ + typedef struct nm_str_item_s { + const char *name; + const char *value; + } nm_str_item_t; + + nm_str_item_t fs_dev_alias[fs_info->ndevAlias]; + nm_str_item_t fs_str_items[] = { + {.name = "mountpoint", .value = fs_info->mountpoint}, + {.name = "name", .value = fs_info->name}, + {.name = "fstype", .value = fs_info->fstype}}; + + for (int i = 0; i < fs_info->ndevAlias; ++i) { + fs_dev_alias[i].name = "devAlias"; + fs_dev_alias[i].value = fs_info->devAlias[i]; + } + + init_notif(¬if, domain, NOTIF_OKAY, "File system information", + "file_system", NULL); + NM_ADD_STR_ITEMS(fs_str_items, STATIC_ARRAY_SIZE(fs_str_items)); + NM_ADD_ITEM(plugin_notification_meta_add_unsigned_int, "ndevAlias", + fs_info->ndevAlias); + NM_ADD_STR_ITEMS(fs_dev_alias, fs_info->ndevAlias); + + plugin_dispatch_notification(¬if); + +cleanup: + if (notif.meta) + plugin_notification_meta_free(notif.meta); + return ret; +} + +#undef RETURN_ON_ERR +#undef NM_ADD_STR_ITEMS + +static int get_fs_info(virDomainPtr domain) { + virDomainFSInfoPtr *fs_info = NULL; + int ret = 0; + + int mount_points_cnt = virDomainGetFSInfo(domain, &fs_info, 0); + if (mount_points_cnt == -1) { + ERROR(PLUGIN_NAME " plugin: virDomainGetFSInfo failed: %d", + mount_points_cnt); + return mount_points_cnt; + } + + for (int i = 0; i < mount_points_cnt; ++i) { + if (fs_info_notify(domain, fs_info[i]) != 0) { + ERROR(PLUGIN_NAME " plugin: failed to send file system notification " + "for mount point %s", + fs_info[i]->mountpoint); + ret = -1; } - last_refresh = t; + virDomainFSInfoFree(fs_info[i]); } -#if 0 - for (int i = 0; i < nr_domains; ++i) - fprintf (stderr, "domain %s\n", virDomainGetName (domains[i])); - for (int i = 0; i < nr_block_devices; ++i) - fprintf (stderr, "block device %d %s:%s\n", - i, virDomainGetName (block_devices[i].dom), - block_devices[i].path); - for (int i = 0; i < nr_interface_devices; ++i) - fprintf (stderr, "interface device %d %s:%s\n", - i, virDomainGetName (interface_devices[i].dom), - interface_devices[i].path); + sfree(fs_info); + return ret; +} + +#endif /* HAVE_FS_INFO */ + +#ifdef HAVE_JOB_STATS +static void job_stats_submit(virDomainPtr domain, virTypedParameterPtr param) { + value_t vl = {0}; + + if (param->type == VIR_TYPED_PARAM_INT) + vl.derive = param->value.i; + else if (param->type == VIR_TYPED_PARAM_UINT) + vl.derive = param->value.ui; + else if (param->type == VIR_TYPED_PARAM_LLONG) + vl.derive = param->value.l; + else if (param->type == VIR_TYPED_PARAM_ULLONG) + vl.derive = param->value.ul; + else if (param->type == VIR_TYPED_PARAM_DOUBLE) + vl.derive = param->value.d; + else if (param->type == VIR_TYPED_PARAM_BOOLEAN) + vl.derive = param->value.b; + else if (param->type == VIR_TYPED_PARAM_STRING) { + submit_notif(domain, NOTIF_OKAY, param->value.s, "job_stats", param->field); + return; + } else { + ERROR(PLUGIN_NAME " plugin: unrecognized virTypedParameterType"); + return; + } + + submit(domain, "job_stats", param->field, &vl, 1); +} + +static int get_job_stats(virDomainPtr domain) { + int ret = 0; + int job_type = 0; + int nparams = 0; + virTypedParameterPtr params = NULL; + int flags = (extra_stats & ex_stats_job_stats_completed) + ? VIR_DOMAIN_JOB_STATS_COMPLETED + : 0; + + ret = virDomainGetJobStats(domain, &job_type, ¶ms, &nparams, flags); + if (ret != 0) { + ERROR(PLUGIN_NAME " plugin: virDomainGetJobStats failed: %d", ret); + return ret; + } + + DEBUG(PLUGIN_NAME " plugin: job_type=%d nparams=%d", job_type, nparams); + + for (int i = 0; i < nparams; ++i) { + DEBUG(PLUGIN_NAME " plugin: param[%d] field=%s type=%d", i, params[i].field, + params[i].type); + job_stats_submit(domain, ¶ms[i]); + } + + virTypedParamsFree(params, nparams); + return ret; +} +#endif /* HAVE_JOB_STATS */ + +static int get_domain_metrics(domain_t *domain) { + struct lv_info info; + + if (!domain || !domain->ptr) { + ERROR(PLUGIN_NAME ": get_domain_metrics: NULL pointer"); + return -1; + } + + init_lv_info(&info); + int status = lv_domain_info(domain->ptr, &info); + if (status != 0) { + ERROR(PLUGIN_NAME " plugin: virDomainGetInfo failed with status %i.", + status); + return -1; + } + + if (extra_stats & ex_stats_domain_state) { +#ifdef HAVE_DOM_REASON + /* At this point we already know domain's state from virDomainGetInfo call, + * however it doesn't provide a reason for entering particular state. + * We need to get it from virDomainGetState. + */ + GET_STATS(get_domain_state, "domain reason", domain->ptr); +#else + /* virDomainGetState is not available. Submit 0, which corresponds to + * unknown reason. */ + if (persistent_notification) + domain_state_submit(domain->ptr, info.di.state, 0); #endif + } - /* Get CPU usage, memory, VCPU usage for each domain. */ - for (int i = 0; i < state->nr_domains; ++i) { - virDomainInfo info; - virVcpuInfoPtr vinfo = NULL; - virDomainMemoryStatPtr minfo = NULL; - int status; + /* Gather remaining stats only for running domains */ + if (info.di.state != VIR_DOMAIN_RUNNING) + return 0; - status = virDomainGetInfo(state->domains[i], &info); - if (status != 0) { - ERROR(PLUGIN_NAME " plugin: virDomainGetInfo failed with status %i.", - status); - continue; - } + pcpu_submit(domain->ptr, &info); + cpu_submit(domain, info.di.cpuTime); - if (info.state != VIR_DOMAIN_RUNNING) { - /* only gather stats for running domains */ - continue; - } + memory_submit(domain->ptr, (gauge_t)info.di.memory * 1024); - cpu_submit(info.cpuTime, state->domains[i], "virt_cpu_total"); - memory_submit((gauge_t)info.memory * 1024, state->domains[i]); + GET_STATS(get_vcpu_stats, "vcpu stats", domain->ptr, info.di.nrVirtCpu); + GET_STATS(get_memory_stats, "memory stats", domain->ptr); - vinfo = malloc(info.nrVirtCpu * sizeof(vinfo[0])); - if (vinfo == NULL) { - ERROR(PLUGIN_NAME " plugin: malloc failed."); - continue; - } +#ifdef HAVE_PERF_STATS + if (extra_stats & ex_stats_perf) + GET_STATS(get_perf_events, "performance monitoring events", domain->ptr); +#endif - status = virDomainGetVcpus(state->domains[i], vinfo, info.nrVirtCpu, - /* cpu map = */ NULL, /* cpu map length = */ 0); - if (status < 0) { - ERROR(PLUGIN_NAME " plugin: virDomainGetVcpus failed with status %i.", - status); - sfree(vinfo); - continue; - } +#ifdef HAVE_FS_INFO + if (extra_stats & ex_stats_fs_info) + GET_STATS(get_fs_info, "file system info", domain->ptr); +#endif - for (int j = 0; j < info.nrVirtCpu; ++j) - vcpu_submit(vinfo[j].cpuTime, state->domains[i], vinfo[j].number, - "virt_vcpu"); +#ifdef HAVE_DISK_ERR + if (extra_stats & ex_stats_disk_err) + GET_STATS(get_disk_err, "disk errors", domain->ptr); +#endif - sfree(vinfo); +#ifdef HAVE_JOB_STATS + if (extra_stats & + (ex_stats_job_stats_completed | ex_stats_job_stats_background)) + GET_STATS(get_job_stats, "job stats", domain->ptr); +#endif - minfo = - malloc(VIR_DOMAIN_MEMORY_STAT_NR * sizeof(virDomainMemoryStatStruct)); - if (minfo == NULL) { - ERROR("virt plugin: malloc failed."); - continue; - } + /* Update cached virDomainInfo. It has to be done after cpu_submit */ + memcpy(&domain->info, &info.di, sizeof(domain->info)); - status = virDomainMemoryStats(state->domains[i], minfo, - VIR_DOMAIN_MEMORY_STAT_NR, 0); + return 0; +} - if (status < 0) { - ERROR("virt plugin: virDomainMemoryStats failed with status %i.", status); - sfree(minfo); - continue; - } +static int get_if_dev_stats(struct interface_device *if_dev) { + virDomainInterfaceStatsStruct stats = {0}; + char *display_name = NULL; - for (int j = 0; j < status; j++) { - memory_stats_submit((gauge_t)minfo[j].val * 1024, state->domains[i], - minfo[j].tag); - } + if (!if_dev) { + ERROR(PLUGIN_NAME " plugin: get_if_dev_stats: NULL pointer"); + return -1; + } - sfree(minfo); + switch (interface_format) { + case if_address: + display_name = if_dev->address; + break; + case if_number: + display_name = if_dev->number; + break; + case if_name: + default: + display_name = if_dev->path; } - /* Get block device stats for each domain. */ - for (int i = 0; i < state->nr_block_devices; ++i) { - struct block_device *bdev = &(state->block_devices[i]); - struct lv_block_info binfo; - init_block_info(&binfo); + if (virDomainInterfaceStats(if_dev->dom, if_dev->path, &stats, + sizeof(stats)) != 0) { + ERROR(PLUGIN_NAME " plugin: virDomainInterfaceStats failed"); + return -1; + } - if (lv_domain_block_info(bdev->dom, bdev->path, &binfo) < 0) - continue; + if ((stats.rx_bytes != -1) && (stats.tx_bytes != -1)) + submit_derive2("if_octets", (derive_t)stats.rx_bytes, + (derive_t)stats.tx_bytes, if_dev->dom, display_name); - char *type_instance = NULL; - if (blockdevice_format_basename && blockdevice_format == source) - type_instance = strdup(basename(bdev->path)); - else - type_instance = strdup(bdev->path); + if ((stats.rx_packets != -1) && (stats.tx_packets != -1)) + submit_derive2("if_packets", (derive_t)stats.rx_packets, + (derive_t)stats.tx_packets, if_dev->dom, display_name); - disk_submit(&binfo, bdev->dom, type_instance); + if ((stats.rx_errs != -1) && (stats.tx_errs != -1)) + submit_derive2("if_errors", (derive_t)stats.rx_errs, + (derive_t)stats.tx_errs, if_dev->dom, display_name); - sfree(type_instance); - } /* for (nr_block_devices) */ + if ((stats.rx_drop != -1) && (stats.tx_drop != -1)) + submit_derive2("if_dropped", (derive_t)stats.rx_drop, + (derive_t)stats.tx_drop, if_dev->dom, display_name); + return 0; +} - /* Get interface stats for each domain. */ - for (int i = 0; i < state->nr_interface_devices; ++i) { - struct _virDomainInterfaceStats stats; - char *display_name = NULL; +static int domain_lifecycle_event_cb(__attribute__((unused)) virConnectPtr conn, + virDomainPtr dom, int event, int detail, + __attribute__((unused)) void *opaque) { + int domain_state = map_domain_event_to_state(event); + int domain_reason = map_domain_event_detail_to_reason(event, detail); + domain_state_submit(dom, domain_state, domain_reason); - switch (interface_format) { - case if_address: - display_name = state->interface_devices[i].address; - break; - case if_number: - display_name = state->interface_devices[i].number; - break; - case if_name: - default: - display_name = state->interface_devices[i].path; + return 0; +} + +static int register_event_impl(void) { + if (virEventRegisterDefaultImpl() < 0) { + virErrorPtr err = virGetLastError(); + ERROR(PLUGIN_NAME + " plugin: error while event implementation registering: %s", + err && err->message ? err->message : "Unknown error"); + return -1; + } + + return 0; +} + +/* worker function running default event implementation */ +static void *event_loop_worker(__attribute__((unused)) void *arg) { + while (1) { + if (virEventRunDefaultImpl() < 0) { + virErrorPtr err = virGetLastError(); + ERROR(PLUGIN_NAME " plugin: failed to run event loop: %s\n", + err && err->message ? err->message : "Unknown error"); } + } - if (virDomainInterfaceStats(state->interface_devices[i].dom, - state->interface_devices[i].path, &stats, - sizeof stats) != 0) - continue; + return NULL; +} + +/* register domain event callback and start event loop thread */ +static int start_event_loop(void) { + domain_event_cb_id = virConnectDomainEventRegisterAny( + conn, NULL, VIR_DOMAIN_EVENT_ID_LIFECYCLE, + VIR_DOMAIN_EVENT_CALLBACK(domain_lifecycle_event_cb), NULL, NULL); + if (domain_event_cb_id == -1) { + ERROR(PLUGIN_NAME " plugin: error while callback registering"); + return -1; + } + + if (pthread_create(&event_loop_tid, NULL, event_loop_worker, NULL)) { + ERROR(PLUGIN_NAME " plugin: failed event loop thread creation"); + virConnectDomainEventDeregisterAny(conn, domain_event_cb_id); + return -1; + } - if ((stats.rx_bytes != -1) && (stats.tx_bytes != -1)) - submit_derive2("if_octets", (derive_t)stats.rx_bytes, - (derive_t)stats.tx_bytes, state->interface_devices[i].dom, - display_name); + return 0; +} - if ((stats.rx_packets != -1) && (stats.tx_packets != -1)) - submit_derive2("if_packets", (derive_t)stats.rx_packets, - (derive_t)stats.tx_packets, - state->interface_devices[i].dom, display_name); +/* stop event loop thread and deregister callback */ +static void stop_event_loop(void) { + if (pthread_cancel(event_loop_tid) != 0) + ERROR(PLUGIN_NAME " plugin: cancelling thread %lu failed", + event_loop_tid); - if ((stats.rx_errs != -1) && (stats.tx_errs != -1)) - submit_derive2("if_errors", (derive_t)stats.rx_errs, - (derive_t)stats.tx_errs, state->interface_devices[i].dom, - display_name); + if (pthread_join(event_loop_tid, NULL) != 0) + ERROR(PLUGIN_NAME " plugin: stopping thread %lu failed", + event_loop_tid); - if ((stats.rx_drop != -1) && (stats.tx_drop != -1)) - submit_derive2("if_dropped", (derive_t)stats.rx_drop, - (derive_t)stats.tx_drop, state->interface_devices[i].dom, - display_name); - } /* for (nr_interface_devices) */ + if (conn != NULL && domain_event_cb_id != -1) + virConnectDomainEventDeregisterAny(conn, domain_event_cb_id); +} + +static int lv_read(user_data_t *ud) { + time_t t; + struct lv_read_instance *inst = NULL; + struct lv_read_state *state = NULL; + + if (ud->data == NULL) { + ERROR(PLUGIN_NAME " plugin: NULL userdata"); + return -1; + } + + inst = ud->data; + state = &inst->read_state; + + _Bool reconnect = conn == NULL ? 1 : 0; + /* event implementation must be registered before connection is opened */ + if (inst->id == 0) { + if (!persistent_notification && reconnect) + if (register_event_impl() != 0) + return -1; + + if (lv_connect() < 0) + return -1; + + if (!persistent_notification && reconnect && conn != NULL) + if (start_event_loop() != 0) + return -1; + } + + time(&t); + + /* Need to refresh domain or device lists? */ + if ((last_refresh == (time_t)0) || + ((interval > 0) && ((last_refresh + interval) <= t))) { + if (refresh_lists(inst) != 0) { + if (inst->id == 0) { + if (!persistent_notification) + stop_event_loop(); + lv_disconnect(); + } + return -1; + } + last_refresh = t; + } + + #if COLLECT_DEBUG + for (int i = 0; i < state->nr_domains; ++i) + DEBUG(PLUGIN_NAME " plugin: domain %s", + virDomainGetName(state->domains[i].ptr)); + for (int i = 0; i < state->nr_block_devices; ++i) + DEBUG(PLUGIN_NAME " plugin: block device %d %s:%s", + i, virDomainGetName(state->block_devices[i].dom), + state->block_devices[i].path); + for (int i = 0; i < state->nr_interface_devices; ++i) + DEBUG(PLUGIN_NAME " plugin: interface device %d %s:%s", + i, virDomainGetName(state->interface_devices[i].dom), + state->interface_devices[i].path); + #endif + + /* Get domains' metrics */ + for (int i = 0; i < state->nr_domains; ++i) { + int status = get_domain_metrics(&state->domains[i]); + if (status != 0) + ERROR(PLUGIN_NAME " failed to get metrics for domain=%s", + virDomainGetName(state->domains[i].ptr)); + } + + /* Get block device stats for each domain. */ + for (int i = 0; i < state->nr_block_devices; ++i) { + int status = get_block_stats(&state->block_devices[i]); + if (status != 0) + ERROR(PLUGIN_NAME + " failed to get stats for block device (%s) in domain %s", + state->block_devices[i].path, + virDomainGetName(state->domains[i].ptr)); + } + + /* Get interface stats for each domain. */ + for (int i = 0; i < state->nr_interface_devices; ++i) { + int status = get_if_dev_stats(&state->interface_devices[i]); + if (status != 0) + ERROR(PLUGIN_NAME + " failed to get interface stats for device (%s) in domain %s", + state->interface_devices[i].path, + virDomainGetName(state->interface_devices[i].dom)); + } return 0; } @@ -867,7 +1926,7 @@ static int lv_init_instance(size_t i, plugin_read_cb callback) { memset(lv_ud, 0, sizeof(*lv_ud)); - ssnprintf(inst->tag, sizeof(inst->tag), "%s-%zu", PLUGIN_NAME, i); + snprintf(inst->tag, sizeof(inst->tag), "%s-%" PRIsz, PLUGIN_NAME, i); inst->id = i; user_data_t *ud = &(lv_ud->ud); @@ -875,6 +1934,7 @@ static int lv_init_instance(size_t i, plugin_read_cb callback) { ud->free_func = NULL; INFO(PLUGIN_NAME " plugin: reader %s initialized", inst->tag); + return plugin_register_complex_read(NULL, inst->tag, callback, 0, ud); } @@ -889,6 +1949,7 @@ static void lv_fini_instance(size_t i) { struct lv_read_state *state = &(inst->read_state); lv_clean_read_state(state); + INFO(PLUGIN_NAME " plugin: reader %s finalized", inst->tag); } @@ -896,13 +1957,25 @@ static int lv_init(void) { if (virInitialize() != 0) return -1; + /* event implementation must be registered before connection is opened */ + if (!persistent_notification) + if (register_event_impl() != 0) + return -1; + if (lv_connect() != 0) return -1; + DEBUG(PLUGIN_NAME " plugin: starting event loop"); + + if (!persistent_notification) + if (start_event_loop() != 0) + return -1; + DEBUG(PLUGIN_NAME " plugin: starting %i instances", nr_instances); for (int i = 0; i < nr_instances; ++i) - lv_init_instance(i, lv_read); + if (lv_init_instance(i, lv_read) != 0) + return -1; return 0; } @@ -927,8 +2000,8 @@ static int lv_domain_get_tag(xmlXPathContextPtr xpath_ctx, const char *dom_name, goto done; } - ssnprintf(xpath_str, sizeof(xpath_str), "/domain/metadata/%s:%s/text()", - METADATA_VM_PARTITION_PREFIX, METADATA_VM_PARTITION_ELEMENT); + snprintf(xpath_str, sizeof(xpath_str), "/domain/metadata/%s:%s/text()", + METADATA_VM_PARTITION_PREFIX, METADATA_VM_PARTITION_ELEMENT); xpath_obj = xmlXPathEvalExpression((xmlChar *)xpath_str, xpath_ctx); if (xpath_obj == NULL) { ERROR(PLUGIN_NAME " plugin: xmlXPathEval(%s) failed on domain %s", @@ -1001,6 +2074,18 @@ static int lv_instance_include_domain(struct lv_read_instance *inst, return 0; } +/* + virConnectListAllDomains() appeared in 0.10.2 + Note that LIBVIR_CHECK_VERSION appeared a year later, so + in some systems which actually have virConnectListAllDomains() + we can't detect this. + */ +#ifdef LIBVIR_CHECK_VERSION +#if LIBVIR_CHECK_VERSION(0, 10, 2) +#define HAVE_LIST_ALL_DOMAINS 1 +#endif +#endif + static int refresh_lists(struct lv_read_instance *inst) { struct lv_read_state *state = &inst->read_state; int n; @@ -1014,6 +2099,11 @@ static int refresh_lists(struct lv_read_instance *inst) { lv_clean_read_state(state); if (n > 0) { +#ifdef HAVE_LIST_ALL_DOMAINS + virDomainPtr *domains; + n = virConnectListAllDomains(conn, &domains, + VIR_CONNECT_LIST_DOMAINS_ACTIVE); +#else int *domids; /* Get list of domains. */ @@ -1024,28 +2114,38 @@ static int refresh_lists(struct lv_read_instance *inst) { } n = virConnectListDomains(conn, domids, n); +#endif + if (n < 0) { VIRT_ERROR(conn, "reading list of domains"); +#ifndef HAVE_LIST_ALL_DOMAINS sfree(domids); +#endif return -1; } /* Fetch each domain and add it to the list, unless ignore. */ for (int i = 0; i < n; ++i) { - virDomainPtr dom = NULL; const char *name; char *xml = NULL; xmlDocPtr xml_doc = NULL; xmlXPathContextPtr xpath_ctx = NULL; xmlXPathObjectPtr xpath_obj = NULL; char tag[PARTITION_TAG_MAX_LEN] = {'\0'}; + virDomainInfo info; + int status; +#ifdef HAVE_LIST_ALL_DOMAINS + virDomainPtr dom = domains[i]; +#else + virDomainPtr dom = NULL; dom = virDomainLookupByID(conn, domids[i]); if (dom == NULL) { VIRT_ERROR(conn, "virDomainLookupByID"); /* Could be that the domain went away -- ignore it anyway. */ continue; } +#endif name = virDomainGetName(dom); if (name == NULL) { @@ -1053,6 +2153,18 @@ static int refresh_lists(struct lv_read_instance *inst) { goto cont; } + status = virDomainGetInfo(dom, &info); + if (status != 0) { + ERROR(PLUGIN_NAME " plugin: virDomainGetInfo failed with status %i.", + status); + continue; + } + + if (info.state != VIR_DOMAIN_RUNNING) { + DEBUG(PLUGIN_NAME " plugin: skipping inactive domain %s", name); + continue; + } + if (il_domains && ignorelist_match(il_domains, name) != 0) goto cont; @@ -1174,7 +2286,11 @@ static int refresh_lists(struct lv_read_instance *inst) { sfree(xml); } +#ifdef HAVE_LIST_ALL_DOMAINS + sfree(domains); +#else sfree(domids); +#endif } DEBUG(PLUGIN_NAME " plugin#%s: refreshing" @@ -1188,7 +2304,7 @@ static int refresh_lists(struct lv_read_instance *inst) { static void free_domains(struct lv_read_state *state) { if (state->domains) { for (int i = 0; i < state->nr_domains; ++i) - virDomainFree(state->domains[i]); + virDomainFree(state->domains[i].ptr); sfree(state->domains); } state->domains = NULL; @@ -1196,7 +2312,7 @@ static void free_domains(struct lv_read_state *state) { } static int add_domain(struct lv_read_state *state, virDomainPtr dom) { - virDomainPtr *new_ptr; + domain_t *new_ptr; int new_size = sizeof(state->domains[0]) * (state->nr_domains + 1); if (state->domains) @@ -1208,7 +2324,10 @@ static int add_domain(struct lv_read_state *state, virDomainPtr dom) { return -1; state->domains = new_ptr; - state->domains[state->nr_domains] = dom; + state->domains[state->nr_domains].ptr = dom; + memset(&state->domains[state->nr_domains].info, 0, + sizeof(state->domains[state->nr_domains].info)); + return state->nr_domains++; } @@ -1311,13 +2430,13 @@ static int ignore_device_match(ignorelist_t *il, const char *domname, if ((domname == NULL) || (devpath == NULL)) return 0; - n = sizeof(char) * (strlen(domname) + strlen(devpath) + 2); + n = strlen(domname) + strlen(devpath) + 2; name = malloc(n); if (name == NULL) { ERROR(PLUGIN_NAME " plugin: malloc failed."); return 0; } - ssnprintf(name, n, "%s:%s", domname, devpath); + snprintf(name, n, "%s:%s", domname, devpath); r = ignorelist_match(il, name); sfree(name); return r; @@ -1328,6 +2447,11 @@ static int lv_shutdown(void) { lv_fini_instance(i); } + DEBUG(PLUGIN_NAME " plugin: stopping event loop"); + + if (!persistent_notification) + stop_event_loop(); + lv_disconnect(); ignorelist_free(il_domains);