2 * collectd - src/rdtmon.c
4 * Copyright(c) 2016 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy of
7 * this software and associated documentation files (the "Software"), to deal in
8 * the Software without restriction, including without limitation the rights to
9 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is furnished to do
11 * so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
32 #define RDTMON_PLUGIN "rdtmon"
34 #define RDTMON_MAX_SOCKETS 8
35 #define RDTMON_MAX_SOCKET_CORES 64
36 #define RDTMON_MAX_CORES (RDTMON_MAX_SOCKET_CORES * RDTMON_MAX_SOCKETS)
38 struct rdtmon_core_group_s {
42 enum pqos_mon_event events;
44 typedef struct rdtmon_core_group_s rdtmon_core_group_t;
47 rdtmon_core_group_t cgroups[RDTMON_MAX_CORES];
48 struct pqos_mon_data *pgroups[RDTMON_MAX_CORES];
50 const struct pqos_cpuinfo *pqos_cpu;
51 const struct pqos_cap *pqos_cap;
52 const struct pqos_capability *cap_mon;
54 typedef struct rdtmon_ctx_s rdtmon_ctx_t;
56 static rdtmon_ctx_t *g_rdtmon = NULL;
58 static int isdup(const uint64_t *nums, unsigned size, uint64_t val) {
59 for (unsigned i = 0; i < size; i++)
65 static int strtouint64(const char *s, uint64_t *n) {
71 *n = strtoull(s, &endptr, 0);
73 if (!(*s != '\0' && *endptr == '\0')) {
74 DEBUG(RDTMON_PLUGIN ": Error converting '%s' to unsigned number.", s);
86 * Converts string of characters representing list of numbers into array of
87 * numbers. Allowed formats are:
92 * Numbers can be in decimal or hexadecimal format.
95 * `s' String representing list of unsigned numbers.
96 * `nums' Array to put converted numeric values into.
97 * `max' Maximum number of elements that nums can accommodate.
100 * Number of elements placed into nums.
102 static unsigned strlisttonums(char *s, uint64_t *nums, unsigned max) {
105 char *saveptr = NULL;
107 if (s == NULL || nums == NULL || max == 0)
114 token = strtok_r(s, ",", &saveptr);
120 while (isspace(*token))
125 p = strchr(token, '-');
127 uint64_t n, start, end;
129 ret = strtouint64(token, &start);
132 ret = strtouint64(p + 1, &end);
140 for (n = start; n <= end; n++) {
141 if (!(isdup(nums, index, n))) {
151 ret = strtouint64(token, &val);
155 if (!(isdup(nums, index, val))) {
172 * Function to compare cores in 2 core groups.
175 * `cg_a' Pointer to core group a.
176 * `cg_b' Pointer to core group b.
179 * 1 if both groups contain the same cores
180 * 0 if none of their cores match
181 * -1 if some but not all cores match
183 static int cgroup_cmp(const rdtmon_core_group_t *cg_a,
184 const rdtmon_core_group_t *cg_b) {
187 assert(cg_a != NULL);
188 assert(cg_b != NULL);
190 const int sz_a = cg_a->num_cores;
191 const int sz_b = cg_b->num_cores;
192 const unsigned *tab_a = cg_a->cores;
193 const unsigned *tab_b = cg_b->cores;
195 for (int i = 0; i < sz_a; i++) {
196 for (int j = 0; j < sz_b; j++)
197 if (tab_a[i] == tab_b[j])
200 /* if no cores are the same */
203 /* if group contains same cores */
204 if (sz_a == sz_b && sz_b == found)
206 /* if not all cores are the same */
210 static int cgroup_set(rdtmon_core_group_t *cg, char *desc, uint64_t *cores,
213 assert(desc != NULL);
214 assert(cores != NULL);
215 assert(num_cores > 0);
217 cg->cores = malloc(sizeof(unsigned) * num_cores);
218 if (cg->cores == NULL) {
219 ERROR(RDTMON_PLUGIN ": Error allocating core group table");
222 cg->num_cores = num_cores;
225 for (int i = 0; i < num_cores; i++)
226 cg->cores[i] = (unsigned)cores[i];
236 * Function to set the descriptions and cores for each core group.
237 * Takes a config option containing list of strings that are used to set
241 * `item' Config option containing core groups.
242 * `groups' Table of core groups to set values in.
243 * `max' Maximum number of core groups allowed.
246 * On success, the number of core groups set up. On error, appropriate
247 * negative error value.
249 static int oconfig_to_cgroups(oconfig_item_t *item, rdtmon_core_group_t *groups,
252 unsigned n, index = 0;
253 uint64_t cores[RDTMON_MAX_CORES];
254 char value[DATA_MAX_NAME_LEN];
256 assert(groups != NULL);
258 assert(item != NULL);
260 for (int j = 0; j < item->values_num; j++) {
261 if (item->values[j].value.string != NULL &&
262 strlen(item->values[j].value.string)) {
265 sstrncpy(value, item->values[j].value.string, sizeof(value));
267 memset(cores, 0, sizeof(cores));
269 n = strlisttonums(value, cores, RDTMON_MAX_CORES);
271 ERROR(RDTMON_PLUGIN ": Error parsing core group (%s)", value);
275 desc = strdup(item->values[j].value.string);
277 /* set core group info */
278 ret = cgroup_set(&groups[index], desc, cores, n);
287 WARNING(RDTMON_PLUGIN ": Too many core groups configured");
297 static void rdtmon_dump_cgroups(void) {
298 char cores[RDTMON_MAX_CORES * 4];
300 if (g_rdtmon == NULL)
303 DEBUG(RDTMON_PLUGIN ": Core Groups Dump");
304 DEBUG(RDTMON_PLUGIN ": groups count: %d", g_rdtmon->num_groups);
306 for (int i = 0; i < g_rdtmon->num_groups; i++) {
308 memset(cores, 0, sizeof(cores));
309 for (int j = 0; j < g_rdtmon->cgroups[i].num_cores; j++) {
310 snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
311 g_rdtmon->cgroups[i].cores[j]);
314 DEBUG(RDTMON_PLUGIN ": group[%d]:", i);
315 DEBUG(RDTMON_PLUGIN ": description: %s", g_rdtmon->cgroups[i].desc);
316 DEBUG(RDTMON_PLUGIN ": cores: %s", cores);
317 DEBUG(RDTMON_PLUGIN ": events: 0x%X", g_rdtmon->cgroups[i].events);
323 static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
325 static inline double bytes_to_mb(const double bytes) {
326 return bytes / (1024.0 * 1024.0);
329 static void rdtmon_dump_data(void) {
331 * CORE - monitored group of cores
332 * RMID - Resource Monitoring ID associated with the monitored group
333 * LLC - last level cache occupancy
334 * MBL - local memory bandwidth
335 * MBR - remote memory bandwidth
337 DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]");
338 for (int i = 0; i < g_rdtmon->num_groups; i++) {
340 const struct pqos_event_values *pv = &g_rdtmon->pgroups[i]->values;
342 double llc = bytes_to_kb(pv->llc);
343 double mbr = bytes_to_mb(pv->mbm_remote_delta);
344 double mbl = bytes_to_mb(pv->mbm_local_delta);
346 DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdtmon->cgroups[i].desc,
347 g_rdtmon->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
350 #endif /* COLLECT_DEBUG */
352 static void rdtmon_free_cgroups(void) {
353 for (int i = 0; i < RDTMON_MAX_CORES; i++) {
354 if (g_rdtmon->cgroups[i].desc) {
355 sfree(g_rdtmon->cgroups[i].desc);
358 if (g_rdtmon->cgroups[i].cores) {
359 sfree(g_rdtmon->cgroups[i].cores);
360 g_rdtmon->cgroups[i].num_cores = 0;
363 if (g_rdtmon->pgroups[i]) {
364 sfree(g_rdtmon->pgroups[i]);
369 static int rdtmon_default_cgroups(void) {
372 /* configure each core in separate group */
373 for (int i = 0; i < g_rdtmon->pqos_cpu->num_cores; i++) {
377 desc = ssnprintf_alloc("%d", g_rdtmon->pqos_cpu->cores[i].lcore);
381 /* set core group info */
382 ret = cgroup_set(&g_rdtmon->cgroups[i], desc, &core, 1);
389 return g_rdtmon->pqos_cpu->num_cores;
392 static int rdtmon_config_cgroups(oconfig_item_t *item) {
394 enum pqos_mon_event events = 0;
397 DEBUG(RDTMON_PLUGIN ": cgroups_config: Invalid argument.");
401 DEBUG(RDTMON_PLUGIN ": Core groups [%d]:", item->values_num);
402 for (int j = 0; j < item->values_num; j++) {
403 if (item->values[j].type != OCONFIG_TYPE_STRING) {
404 ERROR(RDTMON_PLUGIN ": given core group value is not a string [idx=%d]",
408 DEBUG(RDTMON_PLUGIN ": [%d]: %s", j, item->values[j].value.string);
411 n = oconfig_to_cgroups(item, g_rdtmon->cgroups, RDTMON_MAX_CORES);
413 rdtmon_free_cgroups();
414 ERROR(RDTMON_PLUGIN ": Error parsing core groups configuration.");
419 /* create default core groups if "Cores" config option is empty */
420 n = rdtmon_default_cgroups();
422 rdtmon_free_cgroups();
424 ": Error creating default core groups configuration.");
428 ": No core groups configured. Default core groups created.");
431 /* Get all available events on this platform */
432 for (int i = 0; i < g_rdtmon->cap_mon->u.mon->num_events; i++)
433 events |= g_rdtmon->cap_mon->u.mon->events[i].type;
435 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
437 DEBUG(RDTMON_PLUGIN ": Available events to monitor [0x%X]", events);
439 g_rdtmon->num_groups = n;
440 for (int i = 0; i < n; i++) {
443 for (int j = 0; j < i; j++) {
444 found = cgroup_cmp(&g_rdtmon->cgroups[j], &g_rdtmon->cgroups[i]);
446 rdtmon_free_cgroups();
447 ERROR(RDTMON_PLUGIN ": Cannot monitor same cores in different groups.");
452 g_rdtmon->cgroups[i].events = events;
453 g_rdtmon->pgroups[i] = malloc(sizeof(struct pqos_mon_data));
454 if (g_rdtmon->pgroups[i] == NULL) {
455 rdtmon_free_cgroups();
456 ERROR(RDTMON_PLUGIN ": Failed to allocate memory for monitoring data.");
464 static int rdtmon_preinit(void) {
465 struct pqos_config pqos_cfg;
468 if (g_rdtmon != NULL) {
469 /* already initialized if config callback was called before init callback */
473 g_rdtmon = malloc(sizeof(rdtmon_ctx_t));
474 if (g_rdtmon == NULL) {
475 ERROR(RDTMON_PLUGIN ": Failed to allocate memory for rdtmon context.");
479 memset(g_rdtmon, 0, sizeof(rdtmon_ctx_t));
481 /* init PQoS library */
482 memset(&pqos_cfg, 0, sizeof(pqos_cfg));
484 * stdout should not be used here. Will be reworked when support of log
485 * callback is added to PQoS library.
487 pqos_cfg.fd_log = STDOUT_FILENO;
488 pqos_cfg.verbose = 0;
490 /* In case previous instance of the application was not closed properly
491 * call fini and ignore return code. */
494 ret = pqos_init(&pqos_cfg);
495 if (ret != PQOS_RETVAL_OK) {
496 ERROR(RDTMON_PLUGIN ": Error initializing PQoS library!");
497 goto rdtmon_preinit_error1;
500 ret = pqos_cap_get(&g_rdtmon->pqos_cap, &g_rdtmon->pqos_cpu);
501 if (ret != PQOS_RETVAL_OK) {
502 ERROR(RDTMON_PLUGIN ": Error retrieving PQoS capabilities.");
503 goto rdtmon_preinit_error2;
506 ret = pqos_cap_get_type(g_rdtmon->pqos_cap, PQOS_CAP_TYPE_MON,
508 if (ret == PQOS_RETVAL_PARAM) {
509 ERROR(RDTMON_PLUGIN ": Error retrieving monitoring capabilities.");
510 goto rdtmon_preinit_error2;
513 if (g_rdtmon->cap_mon == NULL) {
516 ": Monitoring capability not detected. Nothing to do for the plugin.");
517 goto rdtmon_preinit_error2;
522 rdtmon_preinit_error2:
525 rdtmon_preinit_error1:
532 static int rdtmon_config(oconfig_item_t *ci) {
535 ret = rdtmon_preinit();
539 for (int i = 0; i < ci->children_num; i++) {
540 oconfig_item_t *child = ci->children + i;
542 if (strcasecmp("Cores", child->key) == 0) {
544 ret = rdtmon_config_cgroups(child);
549 rdtmon_dump_cgroups();
550 #endif /* COLLECT_DEBUG */
553 ERROR(RDTMON_PLUGIN ": Unknown configuration parameter \"%s\".",
561 static void rdtmon_submit_gauge(char *cgroup, char *type, gauge_t value) {
563 value_list_t vl = VALUE_LIST_INIT;
565 values[0].gauge = value;
568 vl.values_len = STATIC_ARRAY_SIZE(values);
570 sstrncpy(vl.host, hostname_g, sizeof(vl.host));
571 sstrncpy(vl.plugin, RDTMON_PLUGIN, sizeof(vl.plugin));
572 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "[%s]", cgroup);
573 sstrncpy(vl.type, type, sizeof(vl.type));
575 plugin_dispatch_values(&vl);
578 static void rdtmon_submit_mbm(char *cgroup,
579 const struct pqos_event_values *pv) {
581 value_list_t vl = VALUE_LIST_INIT;
583 values[0].gauge = pv->mbm_local;
584 values[1].gauge = pv->mbm_remote;
585 values[2].gauge = pv->mbm_total;
586 values[3].gauge = pv->mbm_local_delta;
587 values[4].gauge = pv->mbm_remote_delta;
588 values[5].gauge = pv->mbm_total_delta;
591 vl.values_len = STATIC_ARRAY_SIZE(values);
593 sstrncpy(vl.host, hostname_g, sizeof(vl.host));
594 sstrncpy(vl.plugin, RDTMON_PLUGIN, sizeof(vl.plugin));
595 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "[%s]", cgroup);
596 sstrncpy(vl.type, "mbm", sizeof(vl.type));
598 plugin_dispatch_values(&vl);
601 static int rdtmon_read(user_data_t *ud) {
604 if (g_rdtmon == NULL) {
605 ERROR(RDTMON_PLUGIN ": rdtmon_read: plugin not initialized.");
609 ret = pqos_mon_poll(&g_rdtmon->pgroups[0], (unsigned)g_rdtmon->num_groups);
610 if (ret != PQOS_RETVAL_OK) {
611 ERROR(RDTMON_PLUGIN ": Failed to poll monitoring data.");
617 #endif /* COLLECT_DEBUG */
619 for (int i = 0; i < g_rdtmon->num_groups; i++) {
620 enum pqos_mon_event mbm_events =
621 (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
622 PQOS_MON_EVENT_RMEM_BW);
624 const struct pqos_event_values *pv = &g_rdtmon->pgroups[i]->values;
626 /* Submit only monitored events data */
628 if (g_rdtmon->cgroups[i].events & PQOS_MON_EVENT_L3_OCCUP)
629 rdtmon_submit_gauge(g_rdtmon->cgroups[i].desc, "llc", pv->llc);
631 if (g_rdtmon->cgroups[i].events & PQOS_PERF_EVENT_IPC)
632 rdtmon_submit_gauge(g_rdtmon->cgroups[i].desc, "ipc", pv->ipc);
634 if (g_rdtmon->cgroups[i].events & mbm_events)
635 rdtmon_submit_mbm(g_rdtmon->cgroups[i].desc, pv);
641 static int rdtmon_init(void) {
644 ret = rdtmon_preinit();
648 /* Start monitoring */
649 for (int i = 0; i < g_rdtmon->num_groups; i++) {
650 rdtmon_core_group_t *cg = &g_rdtmon->cgroups[i];
652 ret = pqos_mon_start(cg->num_cores, cg->cores, cg->events, (void *)cg->desc,
653 g_rdtmon->pgroups[i]);
655 if (ret != PQOS_RETVAL_OK) {
656 ERROR(RDTMON_PLUGIN ": Error starting monitoring (pqos status=%d)", ret);
664 static int rdtmon_shutdown(void) {
667 DEBUG(RDTMON_PLUGIN ": rdtmon_shutdown.");
669 if (g_rdtmon == NULL) {
670 ERROR(RDTMON_PLUGIN ": rdtmon_shutdown: plugin not initialized.");
674 /* Stop monitoring */
675 for (int i = 0; i < g_rdtmon->num_groups; i++) {
676 pqos_mon_stop(g_rdtmon->pgroups[i]);
680 if (ret != PQOS_RETVAL_OK)
681 ERROR(RDTMON_PLUGIN ": Error shutting down PQoS library.");
683 rdtmon_free_cgroups();
689 void module_register(void) {
690 plugin_register_init(RDTMON_PLUGIN, rdtmon_init);
691 plugin_register_complex_config(RDTMON_PLUGIN, rdtmon_config);
692 plugin_register_complex_read(NULL, RDTMON_PLUGIN, rdtmon_read, 0, NULL);
693 plugin_register_shutdown(RDTMON_PLUGIN, rdtmon_shutdown);