/**
* collectd - src/intel_pmu.c
*
- * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
*
* Authors:
* Serhiy Pshyk <serhiyx.pshyk@intel.com>
+ * Kamil Wiatrowski <kamilx.wiatrowski@intel.com>
**/
#include "collectd.h"
-#include "common.h"
+#include "utils/common/common.h"
-#include "jevents.h"
-#include "jsession.h"
+#include "utils/config_cores/config_cores.h"
+
+#include <jevents.h>
+#include <jsession.h>
#define PMU_PLUGIN "intel_pmu"
typedef struct event_info event_info_t;
struct intel_pmu_ctx_s {
- _Bool hw_cache_events;
- _Bool kernel_pmu_events;
- _Bool sw_events;
- char *hw_specific_events;
+ bool hw_cache_events;
+ bool kernel_pmu_events;
+ bool sw_events;
+ char event_list_fn[PATH_MAX];
+ char **hw_events;
+ size_t hw_events_count;
+ core_groups_list_t cores;
struct eventlist *event_list;
};
typedef struct intel_pmu_ctx_s intel_pmu_ctx_t;
DEBUG(PMU_PLUGIN ": event : %s", e->event);
DEBUG(PMU_PLUGIN ": group_lead: %d", e->group_leader);
DEBUG(PMU_PLUGIN ": end_group : %d", e->end_group);
- DEBUG(PMU_PLUGIN ": type : 0x%X", e->attr.type);
- DEBUG(PMU_PLUGIN ": config : 0x%X", (int)e->attr.config);
+ DEBUG(PMU_PLUGIN ": type : %#x", e->attr.type);
+ DEBUG(PMU_PLUGIN ": config : %#x", (unsigned)e->attr.config);
DEBUG(PMU_PLUGIN ": size : %d", e->attr.size);
}
-
- return;
}
static void pmu_dump_config(void) {
DEBUG(PMU_PLUGIN ": Config:");
DEBUG(PMU_PLUGIN ": hw_cache_events : %d", g_ctx.hw_cache_events);
DEBUG(PMU_PLUGIN ": kernel_pmu_events : %d", g_ctx.kernel_pmu_events);
- DEBUG(PMU_PLUGIN ": sw_events : %d", g_ctx.sw_events);
- DEBUG(PMU_PLUGIN ": hw_specific_events: %s", g_ctx.hw_specific_events);
+ DEBUG(PMU_PLUGIN ": software_events : %d", g_ctx.sw_events);
+
+ for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
+ DEBUG(PMU_PLUGIN ": hardware_events[%" PRIsz "]: %s", i,
+ g_ctx.hw_events[i]);
+ }
+}
+
+static void pmu_dump_cgroups(void) {
- return;
+ DEBUG(PMU_PLUGIN ": Core groups:");
+
+ for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
+ core_group_t *cgroup = g_ctx.cores.cgroups + i;
+ const size_t cores_size = cgroup->num_cores * 4 + 1;
+ char *cores = calloc(cores_size, sizeof(*cores));
+ if (cores == NULL) {
+ DEBUG(PMU_PLUGIN ": Failed to allocate string to list cores.");
+ return;
+ }
+ for (size_t j = 0; j < cgroup->num_cores; j++)
+ if (snprintf(cores + strlen(cores), cores_size - strlen(cores), " %d",
+ cgroup->cores[j]) < 0) {
+ DEBUG(PMU_PLUGIN ": Failed to write list of cores to string.");
+ sfree(cores);
+ return;
+ }
+
+ DEBUG(PMU_PLUGIN ": group[%" PRIsz "]", i);
+ DEBUG(PMU_PLUGIN ": description: %s", cgroup->desc);
+ DEBUG(PMU_PLUGIN ": cores count: %" PRIsz, cgroup->num_cores);
+ DEBUG(PMU_PLUGIN ": cores :%s", cores);
+ sfree(cores);
+ }
}
#endif /* COLLECT_DEBUG */
+static int pmu_validate_cgroups(core_group_t *cgroups, size_t len,
+ int max_cores) {
+ /* i - group index, j - core index */
+ for (size_t i = 0; i < len; i++) {
+ for (size_t j = 0; j < cgroups[i].num_cores; j++) {
+ int core = (int)cgroups[i].cores[j];
+
+ /* Core index cannot exceed number of cores in system,
+ note that max_cores include both online and offline CPUs. */
+ if (core >= max_cores) {
+ ERROR(PMU_PLUGIN ": Core %d is not valid, max core index: %d.", core,
+ max_cores - 1);
+ return -1;
+ }
+ }
+ /* Check if cores are set in remaining groups */
+ for (size_t k = i + 1; k < len; k++)
+ if (config_cores_cmp_cgroups(&cgroups[i], &cgroups[k]) != 0) {
+ ERROR(PMU_PLUGIN ": Same cores cannot be set in different groups.");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int pmu_config_hw_events(oconfig_item_t *ci) {
+
+ if (strcasecmp("HardwareEvents", ci->key) != 0) {
+ return -EINVAL;
+ }
+
+ if (g_ctx.hw_events) {
+ ERROR(PMU_PLUGIN ": Duplicate config for HardwareEvents.");
+ return -EINVAL;
+ }
+
+ g_ctx.hw_events = calloc(ci->values_num, sizeof(*g_ctx.hw_events));
+ if (g_ctx.hw_events == NULL) {
+ ERROR(PMU_PLUGIN ": Failed to allocate hw events.");
+ return -ENOMEM;
+ }
+
+ for (int i = 0; i < ci->values_num; i++) {
+ if (ci->values[i].type != OCONFIG_TYPE_STRING) {
+ WARNING(PMU_PLUGIN ": The %s option requires string arguments.", ci->key);
+ continue;
+ }
+
+ g_ctx.hw_events[g_ctx.hw_events_count] = strdup(ci->values[i].value.string);
+ if (g_ctx.hw_events[g_ctx.hw_events_count] == NULL) {
+ ERROR(PMU_PLUGIN ": Failed to allocate hw events entry.");
+ return -ENOMEM;
+ }
+
+ g_ctx.hw_events_count++;
+ }
+
+ return 0;
+}
+
static int pmu_config(oconfig_item_t *ci) {
- int ret = 0;
DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
for (int i = 0; i < ci->children_num; i++) {
+ int ret = 0;
oconfig_item_t *child = ci->children + i;
- if (strcasecmp("HWCacheEvents", child->key) == 0) {
+ if (strcasecmp("ReportHardwareCacheEvents", child->key) == 0) {
ret = cf_util_get_boolean(child, &g_ctx.hw_cache_events);
- } else if (strcasecmp("KernelPMUEvents", child->key) == 0) {
+ } else if (strcasecmp("ReportKernelPMUEvents", child->key) == 0) {
ret = cf_util_get_boolean(child, &g_ctx.kernel_pmu_events);
- } else if (strcasecmp("HWSpecificEvents", child->key) == 0) {
- ret = cf_util_get_string(child, &g_ctx.hw_specific_events);
- } else if (strcasecmp("SWEvents", child->key) == 0) {
+ } else if (strcasecmp("EventList", child->key) == 0) {
+ ret = cf_util_get_string_buffer(child, g_ctx.event_list_fn,
+ sizeof(g_ctx.event_list_fn));
+ } else if (strcasecmp("HardwareEvents", child->key) == 0) {
+ ret = pmu_config_hw_events(child);
+ } else if (strcasecmp("ReportSoftwareEvents", child->key) == 0) {
ret = cf_util_get_boolean(child, &g_ctx.sw_events);
+ } else if (strcasecmp("Cores", child->key) == 0) {
+ ret = config_cores_parse(child, &g_ctx.cores);
} else {
ERROR(PMU_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
- ret = (-1);
+ ret = -1;
}
if (ret != 0) {
pmu_dump_config();
#endif
- return (0);
+ return 0;
}
-static void pmu_submit_counter(int cpu, char *event, counter_t value) {
+static void pmu_submit_counter(const char *cgroup, const char *event,
+ counter_t value, meta_data_t *meta) {
value_list_t vl = VALUE_LIST_INIT;
vl.values = &(value_t){.counter = value};
vl.values_len = 1;
sstrncpy(vl.plugin, PMU_PLUGIN, sizeof(vl.plugin));
- if (cpu == -1) {
- snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "all");
- } else {
- snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%d", cpu);
- }
+ sstrncpy(vl.plugin_instance, cgroup, sizeof(vl.plugin_instance));
+ if (meta)
+ vl.meta = meta;
sstrncpy(vl.type, "counter", sizeof(vl.type));
sstrncpy(vl.type_instance, event, sizeof(vl.type_instance));
plugin_dispatch_values(&vl);
}
-static int pmu_dispatch_data(void) {
+meta_data_t *pmu_meta_data_create(const struct efd *efd) {
+ meta_data_t *meta = NULL;
- struct event *e;
+ /* create meta data only if value was scaled */
+ if (efd->val[1] == efd->val[2] || !efd->val[2]) {
+ return NULL;
+ }
- for (e = g_ctx.event_list->eventlist; e; e = e->next) {
- uint64_t all_value = 0;
- int event_enabled = 0;
- for (int i = 0; i < g_ctx.event_list->num_cpus; i++) {
+ meta = meta_data_create();
+ if (meta == NULL) {
+ ERROR(PMU_PLUGIN ": meta_data_create failed.");
+ return NULL;
+ }
- if (e->efd[i].fd < 0)
- continue;
+ meta_data_add_unsigned_int(meta, "intel_pmu:raw_count", efd->val[0]);
+ meta_data_add_unsigned_int(meta, "intel_pmu:time_enabled", efd->val[1]);
+ meta_data_add_unsigned_int(meta, "intel_pmu:time_running", efd->val[2]);
- event_enabled++;
+ return meta;
+}
- uint64_t value = event_scaled_value(e, i);
- all_value += value;
+static void pmu_dispatch_data(void) {
- /* dispatch per CPU value */
- pmu_submit_counter(i, e->event, value);
- }
+ struct event *e;
+
+ for (e = g_ctx.event_list->eventlist; e; e = e->next) {
+ for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
+ core_group_t *cgroup = g_ctx.cores.cgroups + i;
+ uint64_t cgroup_value = 0;
+ int event_enabled_cgroup = 0;
+ meta_data_t *meta = NULL;
+
+ for (size_t j = 0; j < cgroup->num_cores; j++) {
+ int core = (int)cgroup->cores[j];
+ if (e->efd[core].fd < 0)
+ continue;
+
+ event_enabled_cgroup++;
+
+ /* If there are more events than counters, the kernel uses time
+ * multiplexing. With multiplexing, at the end of the run,
+ * the counter is scaled basing on total time enabled vs time running.
+ * final_count = raw_count * time_enabled/time_running
+ */
+ uint64_t value = event_scaled_value(e, core);
+ cgroup_value += value;
+
+ /* get meta data with information about scaling */
+ if (cgroup->num_cores == 1)
+ meta = pmu_meta_data_create(&e->efd[core]);
+ }
- if (event_enabled > 0) {
- DEBUG(PMU_PLUGIN ": %-20s %'10lu", e->event, all_value);
- /* dispatch all CPU value */
- pmu_submit_counter(-1, e->event, all_value);
+ if (event_enabled_cgroup > 0) {
+ DEBUG(PMU_PLUGIN ": %s/%s = %lu", e->event, cgroup->desc, cgroup_value);
+ /* dispatch per core group value */
+ pmu_submit_counter(cgroup->desc, e->event, cgroup_value, meta);
+ meta_data_destroy(meta);
+ }
}
}
-
- return (0);
}
static int pmu_read(__attribute__((unused)) user_data_t *ud) {
int ret;
+ struct event *e;
DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
- ret = read_all_events(g_ctx.event_list);
- if (ret != 0) {
- DEBUG(PMU_PLUGIN ": Failed to read values of all events.");
- return (0);
+ /* read all events only for configured cores */
+ for (e = g_ctx.event_list->eventlist; e; e = e->next) {
+ for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
+ core_group_t *cgroup = g_ctx.cores.cgroups + i;
+ for (size_t j = 0; j < cgroup->num_cores; j++) {
+ int core = (int)cgroup->cores[j];
+ if (e->efd[core].fd < 0)
+ continue;
+
+ ret = read_event(e, core);
+ if (ret != 0) {
+ ERROR(PMU_PLUGIN ": Failed to read value of %s/%d event.", e->event,
+ core);
+ return ret;
+ }
+ }
+ }
}
- ret = pmu_dispatch_data();
- if (ret != 0) {
- DEBUG(PMU_PLUGIN ": Failed to dispatch event values.");
- return (0);
- }
+ pmu_dispatch_data();
- return (0);
+ return 0;
}
static int pmu_add_events(struct eventlist *el, uint32_t type,
- event_info_t *events, int count) {
+ event_info_t *events, size_t count) {
- for (int i = 0; i < count; i++) {
+ for (size_t i = 0; i < count; i++) {
+ /* Allocate memory for event struct that contains array of efd structs
+ for all cores */
struct event *e =
- calloc(sizeof(struct event) + sizeof(struct efd) * el->num_cpus, 1);
+ calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
if (e == NULL) {
ERROR(PMU_PLUGIN ": Failed to allocate event structure");
- return (-ENOMEM);
+ return -ENOMEM;
}
e->attr.type = type;
e->attr.config = events[i].config;
e->attr.size = PERF_ATTR_SIZE_VER0;
- e->group_leader = false;
- e->end_group = false;
- e->next = NULL;
if (!el->eventlist)
el->eventlist = e;
if (el->eventlist_last)
e->event = strdup(events[i].name);
}
- return (0);
+ return 0;
}
-static int pmu_parse_events(struct eventlist *el, char *events) {
- char *s, *tmp;
+static int pmu_add_hw_events(struct eventlist *el, char **e, size_t count) {
- events = strdup(events);
- if (!events)
- return -1;
+ for (size_t i = 0; i < count; i++) {
- for (s = strtok_r(events, ",", &tmp); s; s = strtok_r(NULL, ",", &tmp)) {
- bool group_leader = false, end_group = false;
- int len;
+ size_t group_events_count = 0;
- if (s[0] == '{') {
- s++;
- group_leader = true;
- } else if (len = strlen(s), len > 0 && s[len - 1] == '}') {
- s[len - 1] = 0;
- end_group = true;
- }
+ char *events = strdup(e[i]);
+ if (!events)
+ return -1;
- struct event *e =
- calloc(sizeof(struct event) + sizeof(struct efd) * el->num_cpus, 1);
- if (e == NULL) {
- free(events);
- return (-ENOMEM);
- }
+ char *s, *tmp = NULL;
+ for (s = strtok_r(events, ",", &tmp); s; s = strtok_r(NULL, ",", &tmp)) {
+
+ /* Allocate memory for event struct that contains array of efd structs
+ for all cores */
+ struct event *e =
+ calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
+ if (e == NULL) {
+ free(events);
+ return -ENOMEM;
+ }
+
+ if (resolve_event(s, &e->attr) != 0) {
+ WARNING(PMU_PLUGIN ": Cannot resolve %s", s);
+ sfree(e);
+ continue;
+ }
+
+ /* Multiple events parsed in one entry */
+ if (group_events_count == 1) {
+ /* Mark previously added event as group leader */
+ el->eventlist_last->group_leader = 1;
+ }
- if (resolve_event(s, &e->attr) == 0) {
- e->group_leader = group_leader;
- e->end_group = end_group;
e->next = NULL;
if (!el->eventlist)
el->eventlist = e;
el->eventlist_last->next = e;
el->eventlist_last = e;
e->event = strdup(s);
- } else {
- DEBUG(PMU_PLUGIN ": Cannot resolve %s", s);
- sfree(e);
+
+ group_events_count++;
+ }
+
+ /* Multiple events parsed in one entry */
+ if (group_events_count > 1) {
+ /* Mark last added event as group end */
+ el->eventlist_last->end_group = 1;
}
- }
- free(events);
+ free(events);
+ }
- return (0);
+ return 0;
}
static void pmu_free_events(struct eventlist *el) {
while (e) {
struct event *next = e->next;
+ sfree(e->event);
sfree(e);
e = next;
}
for (e = el->eventlist; e; e = e->next) {
- for (int i = 0; i < el->num_cpus; i++) {
- if (setup_event(e, i, leader, measure_all, measure_pid) < 0) {
- WARNING(PMU_PLUGIN ": perf event '%s' is not available (cpu=%d).",
- e->event, i);
- } else {
- /* success if at least one event was set */
- ret = 0;
+ for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
+ core_group_t *cgroup = g_ctx.cores.cgroups + i;
+ for (size_t j = 0; j < cgroup->num_cores; j++) {
+ int core = (int)cgroup->cores[j];
+
+ if (setup_event(e, core, leader, measure_all, measure_pid) < 0) {
+ WARNING(PMU_PLUGIN ": perf event '%s' is not available (cpu=%d).",
+ e->event, core);
+ } else {
+ /* success if at least one event was set */
+ ret = 0;
+ }
}
}
g_ctx.event_list = alloc_eventlist();
if (g_ctx.event_list == NULL) {
ERROR(PMU_PLUGIN ": Failed to allocate event list.");
- return (-ENOMEM);
+ return -ENOMEM;
+ }
+
+ if (g_ctx.cores.num_cgroups == 0) {
+ ret = config_cores_default(g_ctx.event_list->num_cpus, &g_ctx.cores);
+ if (ret != 0) {
+ ERROR(PMU_PLUGIN ": Failed to set default core groups.");
+ goto init_error;
+ }
+ } else {
+ ret = pmu_validate_cgroups(g_ctx.cores.cgroups, g_ctx.cores.num_cgroups,
+ g_ctx.event_list->num_cpus);
+ if (ret != 0) {
+ ERROR(PMU_PLUGIN ": Invalid core groups configuration.");
+ goto init_error;
+ }
}
+#if COLLECT_DEBUG
+ pmu_dump_cgroups();
+#endif
if (g_ctx.hw_cache_events) {
ret =
g_kernel_pmu_events,
STATIC_ARRAY_SIZE(g_kernel_pmu_events));
if (ret != 0) {
- ERROR(PMU_PLUGIN ": Failed to parse kernel PMU events.");
+ ERROR(PMU_PLUGIN ": Failed to add kernel PMU events.");
goto init_error;
}
}
/* parse events names if config option is present and is not empty */
- if (g_ctx.hw_specific_events && (strlen(g_ctx.hw_specific_events) != 0)) {
- ret = pmu_parse_events(g_ctx.event_list, g_ctx.hw_specific_events);
+ if (g_ctx.hw_events_count) {
+
+ ret = read_events(g_ctx.event_list_fn);
if (ret != 0) {
- ERROR(PMU_PLUGIN ": Failed to parse hw specific events.");
+ ERROR(PMU_PLUGIN ": Failed to read event list file '%s'.",
+ g_ctx.event_list_fn);
+ return ret;
+ }
+
+ ret = pmu_add_hw_events(g_ctx.event_list, g_ctx.hw_events,
+ g_ctx.hw_events_count);
+ if (ret != 0) {
+ ERROR(PMU_PLUGIN ": Failed to add hardware events.");
goto init_error;
}
}
": Events list is empty. No events were setup for monitoring.");
}
- return (0);
+ return 0;
init_error:
pmu_free_events(g_ctx.event_list);
sfree(g_ctx.event_list);
- sfree(g_ctx.hw_specific_events);
+ for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
+ sfree(g_ctx.hw_events[i]);
+ }
+ sfree(g_ctx.hw_events);
+ g_ctx.hw_events_count = 0;
+
+ config_cores_cleanup(&g_ctx.cores);
return ret;
}
pmu_free_events(g_ctx.event_list);
sfree(g_ctx.event_list);
- sfree(g_ctx.hw_specific_events);
+ for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
+ sfree(g_ctx.hw_events[i]);
+ }
+ sfree(g_ctx.hw_events);
+ g_ctx.hw_events_count = 0;
+
+ config_cores_cleanup(&g_ctx.cores);
- return (0);
+ return 0;
}
void module_register(void) {