X-Git-Url: https://git.octo.it/?p=collectd.git;a=blobdiff_plain;f=src%2Fintel_rdt.c;h=62848dbfe5e24046fae4a4f3ae76606720fb47b6;hp=f7d9a34166501562b80d794f596b0e333e3eaef2;hb=54619dc85fd308b21ed09a0271e5c7383c7921b9;hpb=3ec575ba1e7402a7477925621565cfb9b167af20 diff --git a/src/intel_rdt.c b/src/intel_rdt.c index f7d9a341..62848dbf 100644 --- a/src/intel_rdt.c +++ b/src/intel_rdt.c @@ -1,7 +1,7 @@ /** * collectd - src/intel_rdt.c * - * Copyright(c) 2016-2018 Intel Corporation. All rights reserved. + * Copyright(c) 2016-2019 Intel Corporation. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,11 +25,13 @@ * Serhiy Pshyk * Starzyk, Mateusz * Wojciech Andralojc + * Michał Aleksiński **/ #include "collectd.h" #include "utils/common/common.h" #include "utils/config_cores/config_cores.h" +#include "utils/proc_pids/proc_pids.h" #include #define RDT_PLUGIN "intel_rdt" @@ -51,9 +53,7 @@ * Process name inside comm file is limited to 16 chars. * More info here: http://man7.org/linux/man-pages/man5/proc.5.html */ -#define RDT_MAX_NAME_LEN 16 #define RDT_MAX_NAMES_GROUPS 64 - #define RDT_PROC_PATH "/proc" #endif /* LIBPQOS2 */ @@ -63,28 +63,11 @@ typedef enum { } rdt_config_status; #ifdef LIBPQOS2 -/* Helper typedef for process name array - * Extra 1 char is added for string null termination. - */ -typedef char proc_comm_t[RDT_MAX_NAME_LEN + 1]; - -/* Linked one-way list of pids. */ -typedef struct pids_list_s { - pid_t pid; - struct pids_list_s *next; -} pids_list_t; - -/* Holds process name and list of pids assigned to that name */ -typedef struct proc_pids_s { - proc_comm_t proccess_name; - pids_list_t *pids; -} proc_pids_t; - struct rdt_name_group_s { char *desc; size_t num_names; char **names; - proc_pids_t *proc_pids_array; + proc_pids_t **proc_pids; size_t monitored_pids_count; enum pqos_mon_event events; }; @@ -99,6 +82,8 @@ struct rdt_ctx_s { rdt_name_group_t ngroups[RDT_MAX_NAMES_GROUPS]; struct pqos_mon_data *pngroups[RDT_MAX_NAMES_GROUPS]; size_t num_ngroups; + proc_pids_t **proc_pids; + size_t num_proc_pids; #endif /* LIBPQOS2 */ const struct pqos_cpuinfo *pqos_cpu; const struct pqos_cap *pqos_cap; @@ -112,10 +97,184 @@ static rdt_config_status g_state = UNKNOWN; static int g_interface = -1; +static void rdt_submit_derive(const char *cgroup, const char *type, + const char *type_instance, derive_t value) { + value_list_t vl = VALUE_LIST_INIT; + + vl.values = &(value_t){.derive = value}; + vl.values_len = 1; + + sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin)); + ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); + sstrncpy(vl.type, type, sizeof(vl.type)); + if (type_instance) + sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); + + plugin_dispatch_values(&vl); +} + +static void rdt_submit_gauge(const char *cgroup, const char *type, + const char *type_instance, gauge_t value) { + value_list_t vl = VALUE_LIST_INIT; + + vl.values = &(value_t){.gauge = value}; + vl.values_len = 1; + + sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin)); + ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); + sstrncpy(vl.type, type, sizeof(vl.type)); + if (type_instance) + sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); + + plugin_dispatch_values(&vl); +} + +#if COLLECT_DEBUG +static void rdt_dump_cgroups(void) { + char cores[RDT_MAX_CORES * 4]; + + if (g_rdt == NULL) + return; + + DEBUG(RDT_PLUGIN ": Core Groups Dump"); + DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->cores.num_cgroups); + + for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) { + core_group_t *cgroup = g_rdt->cores.cgroups + i; + + memset(cores, 0, sizeof(cores)); + for (size_t j = 0; j < cgroup->num_cores; j++) { + ssnprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d", + cgroup->cores[j]); + } + + DEBUG(RDT_PLUGIN ": group[%zu]:", i); + DEBUG(RDT_PLUGIN ": description: %s", cgroup->desc); + DEBUG(RDT_PLUGIN ": cores: %s", cores); + DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->events[i]); + } + + return; +} + +#ifdef LIBPQOS2 +static void rdt_dump_ngroups(void) { + + char names[DATA_MAX_NAME_LEN]; + + if (g_rdt == NULL) + return; + + DEBUG(RDT_PLUGIN ": Process Names Groups Dump"); + DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->num_ngroups); + + for (size_t i = 0; i < g_rdt->num_ngroups; i++) { + memset(names, 0, sizeof(names)); + for (size_t j = 0; j < g_rdt->ngroups[i].num_names; j++) + ssnprintf(names + strlen(names), sizeof(names) - strlen(names) - 1, " %s", + g_rdt->ngroups[i].names[j]); + + DEBUG(RDT_PLUGIN ": group[%d]:", (int)i); + DEBUG(RDT_PLUGIN ": description: %s", g_rdt->ngroups[i].desc); + DEBUG(RDT_PLUGIN ": process names:%s", names); + DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->ngroups[i].events); + } + + return; +} +#endif /* LIBPQOS2 */ + +static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; } + +static inline double bytes_to_mb(const double bytes) { + return bytes / (1024.0 * 1024.0); +} + +static void rdt_dump_cores_data(void) { +/* + * CORE - monitored group of cores + * RMID - Resource Monitoring ID associated with the monitored group + * This is not available for monitoring with resource control + * LLC - last level cache occupancy + * MBL - local memory bandwidth + * MBR - remote memory bandwidth + */ +#ifdef LIBPQOS2 + if (g_interface == PQOS_INTER_OS_RESCTRL_MON) { + DEBUG(RDT_PLUGIN ": CORE LLC[KB] MBL[MB] MBR[MB]"); + } else { + DEBUG(RDT_PLUGIN ": CORE RMID LLC[KB] MBL[MB] MBR[MB]"); + } +#else + DEBUG(RDT_PLUGIN ": CORE RMID LLC[KB] MBL[MB] MBR[MB]"); +#endif /* LIBPQOS2 */ + + for (int i = 0; i < g_rdt->cores.num_cgroups; i++) { + const struct pqos_event_values *pv = &g_rdt->pcgroups[i]->values; + + double llc = bytes_to_kb(pv->llc); + double mbr = bytes_to_mb(pv->mbm_remote_delta); + double mbl = bytes_to_mb(pv->mbm_local_delta); +#ifdef LIBPQOS2 + if (g_interface == PQOS_INTER_OS_RESCTRL_MON) { + DEBUG(RDT_PLUGIN ": [%s] %10.1f %10.1f %10.1f", + g_rdt->cores.cgroups[i].desc, llc, mbl, mbr); + } else { + DEBUG(RDT_PLUGIN ": [%s] %8u %10.1f %10.1f %10.1f", + g_rdt->cores.cgroups[i].desc, g_rdt->pcgroups[i]->poll_ctx[0].rmid, + llc, mbl, mbr); + } +#else + DEBUG(RDT_PLUGIN ": [%s] %8u %10.1f %10.1f %10.1f", + g_rdt->cores.cgroups[i].desc, g_rdt->pcgroups[i]->poll_ctx[0].rmid, + llc, mbl, mbr); +#endif /* LIBPQOS2 */ + } +} + +#ifdef LIBPQOS2 +static void rdt_dump_pids_data(void) { + /* + * NAME - monitored group of processes + * PIDs - list of PID numbers in the NAME group + * LLC - last level cache occupancy + * MBL - local memory bandwidth + * MBR - remote memory bandwidth + */ + + DEBUG(RDT_PLUGIN ": NAME PIDs"); + char pids[DATA_MAX_NAME_LEN]; + for (size_t i = 0; i < g_rdt->num_ngroups; ++i) { + memset(pids, 0, sizeof(pids)); + for (size_t j = 0; j < g_rdt->ngroups[i].num_names; ++j) { + pids_list_t *list = g_rdt->ngroups[i].proc_pids[j]->curr; + for (size_t k = 0; k < list->size; k++) + ssnprintf(pids + strlen(pids), sizeof(pids) - strlen(pids) - 1, " %u", + list->pids[k]); + } + DEBUG(RDT_PLUGIN ": [%s] %s", g_rdt->ngroups[i].desc, pids); + } + + DEBUG(RDT_PLUGIN ": NAME LLC[KB] MBL[MB] MBR[MB]"); + for (size_t i = 0; i < g_rdt->num_ngroups; i++) { + + const struct pqos_event_values *pv = &g_rdt->pngroups[i]->values; + + double llc = bytes_to_kb(pv->llc); + double mbr = bytes_to_mb(pv->mbm_remote_delta); + double mbl = bytes_to_mb(pv->mbm_local_delta); + + DEBUG(RDT_PLUGIN ": [%s] %10.1f %10.1f %10.1f", g_rdt->ngroups[i].desc, + llc, mbl, mbr); + } +} +#endif /* LIBPQOS2 */ +#endif /* COLLECT_DEBUG */ + #ifdef LIBPQOS2 static int isdupstr(const char *names[], const size_t size, const char *name) { for (size_t i = 0; i < size; i++) - if (strncmp(names[i], name, (size_t)RDT_MAX_NAME_LEN) == 0) + if (strncmp(names[i], name, (size_t)MAX_PROC_NAME_LEN) == 0) return 1; return 0; @@ -144,6 +303,14 @@ static int strlisttoarray(char *str_list, char ***names, size_t *names_num) { if (str_list == NULL || names == NULL) return -EINVAL; + if (strstr(str_list, ",,")) { + /* strtok ignores empty words between separators. + * This condition handles that by rejecting strings + * with consecutive seprators */ + ERROR(RDT_PLUGIN ": Empty process name"); + return -EINVAL; + } + for (;;) { char *token = strtok_r(str_list, ",", &saveptr); if (token == NULL) @@ -157,11 +324,20 @@ static int strlisttoarray(char *str_list, char ***names, size_t *names_num) { if (*token == '\0') continue; - if (!(isdupstr((const char **)*names, *names_num, token))) + if ((isdupstr((const char **)*names, *names_num, token))) { + if (str_list != NULL) + ERROR(RDT_PLUGIN ": Duplicated process name \'%s\' in group \'%s\'", + token, str_list); + else + ERROR(RDT_PLUGIN ": Duplicated process name \'%s\'", token); + + return -EINVAL; + } else { if (0 != strarray_add(names, names_num, token)) { ERROR(RDT_PLUGIN ": Error allocating process name string"); return -ENOMEM; } + } } return 0; @@ -197,7 +373,7 @@ static int ngroup_cmp(const rdt_name_group_t *ng_a, for (size_t i = 0; i < sz_a; i++) { for (size_t j = 0; j < sz_b; j++) - if (strncmp(tab_a[i], tab_b[j], (size_t)RDT_MAX_NAME_LEN) == 0) + if (strncmp(tab_a[i], tab_b[j], (size_t)MAX_PROC_NAME_LEN) == 0) found++; } /* if no names are the same */ @@ -242,8 +418,10 @@ static int oconfig_to_ngroups(const oconfig_item_t *item, char value[DATA_MAX_NAME_LEN]; if ((item->values[j].value.string == NULL) || - (strlen(item->values[j].value.string) == 0)) - continue; + (strlen(item->values[j].value.string) == 0)) { + ERROR(RDT_PLUGIN ": Error - empty group"); + return -EINVAL; + } sstrncpy(value, item->values[j].value.string, sizeof(value)); @@ -261,7 +439,7 @@ static int oconfig_to_ngroups(const oconfig_item_t *item, return -ENOMEM; } - groups[index].proc_pids_array = NULL; + groups[index].proc_pids = NULL; groups[index].monitored_pids_count = 0; index++; @@ -274,403 +452,125 @@ static int oconfig_to_ngroups(const oconfig_item_t *item, return index; } -#endif /* LIBPQOS2 */ -#if COLLECT_DEBUG -static void rdt_dump_cgroups(void) { - char cores[RDT_MAX_CORES * 4]; +/* + * NAME + * rdt_free_ngroups + * + * DESCRIPTION + * Function to deallocate memory allocated for name groups. + * + * PARAMETERS + * `rdt' Pointer to rdt context + */ +static void rdt_free_ngroups(rdt_ctx_t *rdt) { + for (int i = 0; i < RDT_MAX_NAMES_GROUPS; i++) { + if (rdt->ngroups[i].desc) + DEBUG(RDT_PLUGIN ": Freeing pids \'%s\' group\'s data...", + rdt->ngroups[i].desc); + sfree(rdt->ngroups[i].desc); + strarray_free(rdt->ngroups[i].names, rdt->ngroups[i].num_names); - if (g_rdt == NULL) - return; + if (rdt->ngroups[i].proc_pids) + proc_pids_free(rdt->ngroups[i].proc_pids, rdt->ngroups[i].num_names); - DEBUG(RDT_PLUGIN ": Core Groups Dump"); - DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->cores.num_cgroups); + rdt->ngroups[i].num_names = 0; + sfree(rdt->pngroups[i]); + } + if (rdt->proc_pids) + sfree(rdt->proc_pids); - for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) { - core_group_t *cgroup = g_rdt->cores.cgroups + i; + rdt->num_ngroups = 0; +} - memset(cores, 0, sizeof(cores)); - for (size_t j = 0; j < cgroup->num_cores; j++) { - snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d", - cgroup->cores[j]); - } +/* + * NAME + * rdt_config_ngroups + * + * DESCRIPTION + * Reads name groups configuration. + * + * PARAMETERS + * `rdt` Pointer to rdt context + * `item' Config option containing process names groups. + * + * RETURN VALUE + * 0 on success. Negative number on error. + */ +static int rdt_config_ngroups(rdt_ctx_t *rdt, const oconfig_item_t *item) { + int n = 0; + enum pqos_mon_event events = 0; - DEBUG(RDT_PLUGIN ": group[%zu]:", i); - DEBUG(RDT_PLUGIN ": description: %s", cgroup->desc); - DEBUG(RDT_PLUGIN ": cores: %s", cores); - DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->events[i]); + if (item == NULL) { + DEBUG(RDT_PLUGIN ": ngroups_config: Invalid argument."); + return -EINVAL; } - return; -} + DEBUG(RDT_PLUGIN ": Process names groups [%d]:", item->values_num); + for (int j = 0; j < item->values_num; j++) { + if (item->values[j].type != OCONFIG_TYPE_STRING) { + ERROR(RDT_PLUGIN + ": given process names group value is not a string [idx=%d]", + j); + return -EINVAL; + } + DEBUG(RDT_PLUGIN ": [%d]: %s", j, item->values[j].value.string); + } -#ifdef LIBPQOS2 -static void rdt_dump_ngroups(void) { + n = oconfig_to_ngroups(item, rdt->ngroups, RDT_MAX_NAMES_GROUPS); + if (n < 0) { + rdt_free_ngroups(rdt); + ERROR(RDT_PLUGIN ": Error parsing process name groups configuration."); + return -EINVAL; + } - char names[DATA_MAX_NAME_LEN]; + /* validate configured process name values */ + for (int group_idx = 0; group_idx < n; group_idx++) { + DEBUG(RDT_PLUGIN ": checking group [%d]: %s", group_idx, + rdt->ngroups[group_idx].desc); + for (size_t name_idx = 0; name_idx < rdt->ngroups[group_idx].num_names; + name_idx++) { + DEBUG(RDT_PLUGIN ": checking process name [%zu]: %s", name_idx, + rdt->ngroups[group_idx].names[name_idx]); + if (!proc_pids_is_name_valid(rdt->ngroups[group_idx].names[name_idx])) { + ERROR(RDT_PLUGIN ": Process name group '%s' contains invalid name '%s'", + rdt->ngroups[group_idx].desc, + rdt->ngroups[group_idx].names[name_idx]); + rdt_free_ngroups(rdt); + return -EINVAL; + } + } + } - if (g_rdt == NULL) - return; + if (n == 0) { + ERROR(RDT_PLUGIN ": Empty process name groups configured."); + return -EINVAL; + } - DEBUG(RDT_PLUGIN ": Process Names Groups Dump"); - DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->num_ngroups); - - for (size_t i = 0; i < g_rdt->num_ngroups; i++) { - memset(names, 0, sizeof(names)); - for (size_t j = 0; j < g_rdt->ngroups[i].num_names; j++) - snprintf(names + strlen(names), sizeof(names) - strlen(names) - 1, " %s", - g_rdt->ngroups[i].names[j]); - - DEBUG(RDT_PLUGIN ": group[%d]:", (int)i); - DEBUG(RDT_PLUGIN ": description: %s", g_rdt->ngroups[i].desc); - DEBUG(RDT_PLUGIN ": process names:%s", names); - DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->ngroups[i].events); - } - - return; -} -#endif /* LIBPQOS2 */ - -static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; } - -static inline double bytes_to_mb(const double bytes) { - return bytes / (1024.0 * 1024.0); -} - -static void rdt_dump_cores_data(void) { -/* - * CORE - monitored group of cores - * RMID - Resource Monitoring ID associated with the monitored group - * This is not available for monitoring with resource control - * LLC - last level cache occupancy - * MBL - local memory bandwidth - * MBR - remote memory bandwidth - */ -#ifdef LIBPQOS2 - if (g_interface == PQOS_INTER_OS_RESCTRL_MON) { - DEBUG(RDT_PLUGIN ": CORE LLC[KB] MBL[MB] MBR[MB]"); - } else { - DEBUG(RDT_PLUGIN ": CORE RMID LLC[KB] MBL[MB] MBR[MB]"); - } -#else - DEBUG(RDT_PLUGIN ": CORE RMID LLC[KB] MBL[MB] MBR[MB]"); -#endif /* LIBPQOS2 */ - - for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) { - const struct pqos_event_values *pv = &g_rdt->pcgroups[i]->values; - - double llc = bytes_to_kb(pv->llc); - double mbr = bytes_to_mb(pv->mbm_remote_delta); - double mbl = bytes_to_mb(pv->mbm_local_delta); -#ifdef LIBPQOS2 - if (g_interface == PQOS_INTER_OS_RESCTRL_MON) { - DEBUG(RDT_PLUGIN ": [%s] %10.1f %10.1f %10.1f", - g_rdt->cores.cgroups[i].desc, llc, mbl, mbr); - } else { - DEBUG(RDT_PLUGIN ": [%s] %8u %10.1f %10.1f %10.1f", - g_rdt->cores.cgroups[i].desc, g_rdt->pcgroups[i]->poll_ctx[0].rmid, - llc, mbl, mbr); - } -#else - DEBUG(RDT_PLUGIN ": [%s] %8u %10.1f %10.1f %10.1f", - g_rdt->cores.cgroups[i].desc, g_rdt->pcgroups[i]->poll_ctx[0].rmid, - llc, mbl, mbr); -#endif /* LIBPQOS2 */ - } -} - -#ifdef LIBPQOS2 -static void rdt_dump_pids_data(void) { - /* - * NAME - monitored group of processes - * PIDs - list of PID numbers in the NAME group - * LLC - last level cache occupancy - * MBL - local memory bandwidth - * MBR - remote memory bandwidth - */ - - DEBUG(RDT_PLUGIN ": NAME PIDs"); - char pids[DATA_MAX_NAME_LEN]; - for (size_t i = 0; i < g_rdt->num_ngroups; ++i) { - memset(pids, 0, sizeof(pids)); - for (size_t j = 0; j < g_rdt->ngroups[i].num_names; ++j) { - pids_list_t *list = g_rdt->ngroups[i].proc_pids_array[j].pids; - while (list != NULL) { - snprintf(pids + strlen(pids), sizeof(pids) - strlen(pids) - 1, " %u", - list->pid); - list = list->next; - } - } - DEBUG(RDT_PLUGIN ": [%s] %s", g_rdt->ngroups[i].desc, pids); - } - - DEBUG(RDT_PLUGIN ": NAME LLC[KB] MBL[MB] MBR[MB]"); - for (size_t i = 0; i < g_rdt->num_ngroups; i++) { - - const struct pqos_event_values *pv = &g_rdt->pngroups[i]->values; - - double llc = bytes_to_kb(pv->llc); - double mbr = bytes_to_mb(pv->mbm_remote_delta); - double mbl = bytes_to_mb(pv->mbm_local_delta); - - DEBUG(RDT_PLUGIN ": [%s] %10.1f %10.1f %10.1f", g_rdt->ngroups[i].desc, - llc, mbl, mbr); - } -} -#endif /* LIBPQOS2 */ -#endif /* COLLECT_DEBUG */ - -static void rdt_free_cgroups(void) { - config_cores_cleanup(&g_rdt->cores); - for (int i = 0; i < RDT_MAX_CORES; i++) { - sfree(g_rdt->pcgroups[i]); - } -} - -#ifdef LIBPQOS2 -static int pids_list_free(pids_list_t *list) { - assert(list); - - pids_list_t *current = list; - while (current != NULL) { - pids_list_t *previous = current; - current = current->next; - sfree(previous); - } - return 0; -} - -static void rdt_free_ngroups(void) { - for (int i = 0; i < RDT_MAX_NAMES_GROUPS; i++) { - if (g_rdt->ngroups[i].desc) - DEBUG(RDT_PLUGIN ": Freeing pids \'%s\' group\'s data...", - g_rdt->ngroups[i].desc); - sfree(g_rdt->ngroups[i].desc); - strarray_free(g_rdt->ngroups[i].names, g_rdt->ngroups[i].num_names); - - if (g_rdt->ngroups[i].proc_pids_array) { - for (size_t j = 0; j < g_rdt->ngroups[i].num_names; ++j) { - if (NULL == g_rdt->ngroups[i].proc_pids_array[j].pids) - continue; - pids_list_free(g_rdt->ngroups[i].proc_pids_array[j].pids); - } - - sfree(g_rdt->ngroups[i].proc_pids_array); - } - - g_rdt->ngroups[i].num_names = 0; - sfree(g_rdt->pngroups[i]); - } -} -#endif /* LIBPQOS2 */ - -static int rdt_default_cgroups(void) { - unsigned num_cores = g_rdt->pqos_cpu->num_cores; - - g_rdt->cores.cgroups = calloc(num_cores, sizeof(*g_rdt->cores.cgroups)); - if (g_rdt->cores.cgroups == NULL) { - ERROR(RDT_PLUGIN ": Error allocating core groups array"); - return -ENOMEM; - } - g_rdt->cores.num_cgroups = num_cores; - - /* configure each core in separate group */ - for (unsigned i = 0; i < num_cores; i++) { - core_group_t *cgroup = g_rdt->cores.cgroups + i; - char desc[DATA_MAX_NAME_LEN]; - - /* set core group info */ - cgroup->cores = calloc(1, sizeof(*cgroup->cores)); - if (cgroup->cores == NULL) { - ERROR(RDT_PLUGIN ": Error allocating cores array"); - rdt_free_cgroups(); - return -ENOMEM; - } - cgroup->num_cores = 1; - cgroup->cores[0] = i; - - snprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore); - cgroup->desc = strdup(desc); - if (cgroup->desc == NULL) { - ERROR(RDT_PLUGIN ": Error allocating core group description"); - rdt_free_cgroups(); - return -ENOMEM; - } - } - - return num_cores; -} - -static int rdt_is_core_id_valid(unsigned int core_id) { - - for (unsigned int i = 0; i < g_rdt->pqos_cpu->num_cores; i++) - if (core_id == g_rdt->pqos_cpu->cores[i].lcore) - return 1; - - return 0; -} - -#ifdef LIBPQOS2 -static int rdt_is_proc_name_valid(const char *name) { - - if (name != NULL) { - unsigned len = strlen(name); - if (len > 0 && len <= RDT_MAX_NAME_LEN) - return 1; - else { - DEBUG(RDT_PLUGIN - ": Process name \'%s\' is too long. Max supported len is %d chars.", - name, RDT_MAX_NAME_LEN); - } - } - - return 0; -} -#endif /* LIBPQOS2 */ - -static int rdt_config_cgroups(oconfig_item_t *item) { - size_t n = 0; - enum pqos_mon_event events = 0; - - if (config_cores_parse(item, &g_rdt->cores) < 0) { - rdt_free_cgroups(); - ERROR(RDT_PLUGIN ": Error parsing core groups configuration."); - return -EINVAL; - } - n = g_rdt->cores.num_cgroups; - - /* validate configured core id values */ - for (size_t group_idx = 0; group_idx < n; group_idx++) { - core_group_t *cgroup = g_rdt->cores.cgroups + group_idx; - for (size_t core_idx = 0; core_idx < cgroup->num_cores; core_idx++) { - if (!rdt_is_core_id_valid(cgroup->cores[core_idx])) { - ERROR(RDT_PLUGIN ": Core group '%s' contains invalid core id '%u'", - cgroup->desc, cgroup->cores[core_idx]); - rdt_free_cgroups(); - return -EINVAL; - } - } - } - - if (n == 0) { - /* create default core groups if "Cores" config option is empty */ - int ret = rdt_default_cgroups(); - if (ret < 0) { - rdt_free_cgroups(); - ERROR(RDT_PLUGIN ": Error creating default core groups configuration."); - return ret; - } - n = (size_t)ret; - INFO(RDT_PLUGIN - ": No core groups configured. Default core groups created."); - } - - /* Get all available events on this platform */ - for (unsigned int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++) - events |= g_rdt->cap_mon->u.mon->events[i].type; - - events &= ~(PQOS_PERF_EVENT_LLC_MISS); - - DEBUG(RDT_PLUGIN ": Number of cores in the system: %u", - g_rdt->pqos_cpu->num_cores); - DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events); - - g_rdt->cores.num_cgroups = n; - for (int i = 0; i < n; i++) { - for (int j = 0; j < i; j++) { - int found = 0; - found = config_cores_cmp_cgroups(&g_rdt->cores.cgroups[j], - &g_rdt->cores.cgroups[i]); - if (found != 0) { - rdt_free_cgroups(); - ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups."); - return -EINVAL; - } - } - - g_rdt->events[i] = events; - g_rdt->pcgroups[i] = calloc(1, sizeof(*g_rdt->pcgroups[i])); - if (g_rdt->pcgroups[i] == NULL) { - rdt_free_cgroups(); - ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data."); - return -ENOMEM; - } - } - - return 0; -} - -#ifdef LIBPQOS2 -static int rdt_config_ngroups(const oconfig_item_t *item) { - int n = 0; - enum pqos_mon_event events = 0; - - if (item == NULL) { - DEBUG(RDT_PLUGIN ": ngroups_config: Invalid argument."); - return -EINVAL; - } - - DEBUG(RDT_PLUGIN ": Process names groups [%d]:", item->values_num); - for (int j = 0; j < item->values_num; j++) { - if (item->values[j].type != OCONFIG_TYPE_STRING) { - ERROR(RDT_PLUGIN - ": given process names group value is not a string [idx=%d]", - j); - return -EINVAL; - } - DEBUG(RDT_PLUGIN ": [%d]: %s", j, item->values[j].value.string); - } - - n = oconfig_to_ngroups(item, g_rdt->ngroups, RDT_MAX_NAMES_GROUPS); - if (n < 0) { - rdt_free_ngroups(); - ERROR(RDT_PLUGIN ": Error parsing process name groups configuration."); - return -EINVAL; - } - - /* validate configured process name values */ - for (int group_idx = 0; group_idx < n; group_idx++) { - for (size_t name_idx = 0; name_idx < g_rdt->ngroups[group_idx].num_names; - name_idx++) { - if (!rdt_is_proc_name_valid(g_rdt->ngroups[group_idx].names[name_idx])) { - ERROR(RDT_PLUGIN ": Process name group '%s' contains invalid name '%s'", - g_rdt->ngroups[group_idx].desc, - g_rdt->ngroups[group_idx].names[name_idx]); - rdt_free_ngroups(); - return -EINVAL; - } - } - } - - if (n == 0) { - ERROR(RDT_PLUGIN ": Empty process name groups configured."); - return -EINVAL; - } - - /* Get all available events on this platform */ - for (unsigned i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++) - events |= g_rdt->cap_mon->u.mon->events[i].type; + /* Get all available events on this platform */ + for (unsigned i = 0; i < rdt->cap_mon->u.mon->num_events; i++) + events |= rdt->cap_mon->u.mon->events[i].type; events &= ~(PQOS_PERF_EVENT_LLC_MISS); DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events); - g_rdt->num_ngroups = n; + rdt->num_ngroups = n; for (int i = 0; i < n; i++) { for (int j = 0; j < i; j++) { - int found = ngroup_cmp(&g_rdt->ngroups[j], &g_rdt->ngroups[i]); + int found = ngroup_cmp(&rdt->ngroups[j], &rdt->ngroups[i]); if (found != 0) { - rdt_free_ngroups(); + rdt_free_ngroups(rdt); ERROR(RDT_PLUGIN ": Cannot monitor same process name in different groups."); return -EINVAL; } } - g_rdt->ngroups[i].events = events; - g_rdt->pngroups[i] = calloc(1, sizeof(*g_rdt->pngroups[i])); - if (g_rdt->pngroups[i] == NULL) { - rdt_free_ngroups(); + rdt->ngroups[i].events = events; + rdt->pngroups[i] = calloc(1, sizeof(*rdt->pngroups[i])); + if (rdt->pngroups[i] == NULL) { + rdt_free_ngroups(rdt); ERROR(RDT_PLUGIN ": Failed to allocate memory for process name monitoring data."); return -ENOMEM; @@ -682,852 +582,599 @@ static int rdt_config_ngroups(const oconfig_item_t *item) { /* * NAME - * pids_list_add_pid + * rdt_refresh_ngroup * * DESCRIPTION - * Adds pid at the end of the pids list. - * Allocates memory for new pid element, it is up to user to free it. + * Refresh pids monitored by name group. * * PARAMETERS - * `list' Head of target pids_list. - * `pid' Pid to be added. + * `ngroup` Pointer to name group. + * `group_mon_data' PQoS monitoring context. * * RETURN VALUE - * On success, returns 0. - * -1 on memory allocation error. + * 0 on success. Negative number on error. */ -static int pids_list_add_pid(pids_list_t **list, const pid_t pid) { - assert(list); +static int rdt_refresh_ngroup(rdt_name_group_t *ngroup, + struct pqos_mon_data *group_mon_data) { - pids_list_t *new_element = calloc(1, sizeof(*new_element)); + int result = 0; - if (new_element == NULL) { - ERROR(RDT_PLUGIN ": Alloc error\n"); + if (NULL == ngroup) return -1; - } - new_element->pid = pid; - new_element->next = NULL; - - pids_list_t **current = list; - while (*current != NULL) { - current = &((*current)->next); - } - *current = new_element; - return 0; -} -/* - * NAME - * pids_list_contains_pid - * - * DESCRIPTION - * Tests if pids list contains specific pid. - * - * PARAMETERS - * `list' Head of pids_list. - * `pid' Pid to be searched for. - * - * RETURN VALUE - * If PID found in list, returns 1, - * Otherwise returns 0. - */ -static int pids_list_contains_pid(pids_list_t *list, const pid_t pid) { - assert(list); + if (NULL == ngroup->proc_pids) { + ERROR(RDT_PLUGIN + ": rdt_refresh_ngroup: \'%s\' uninitialized process pids array.", + ngroup->desc); - pids_list_t *current = list; - while (current != NULL) { - if (current->pid == pid) - return 1; - current = current->next; + return -1; } - return 0; -} -/* - * NAME - * pids_list_add_pids_list - * - * DESCRIPTION - * Adds pids list at the end of the pids list. - * Allocates memory for new pid elements, it is up to user to free it. - * Increases dst_num by a number of added PIDs. - * - * PARAMETERS - * `dst' Head of target PIDs list. - * `src' Head of source PIDs list. - * `dst_num' Variable to be increased by a number of appended PIDs. - * - * RETURN VALUE - * On success, returns 0. - * -1 on memory allocation error. - */ -static int pids_list_add_pids_list(pids_list_t **dst, pids_list_t *src, - size_t *dst_num) { - assert(dst); - assert(src); - assert(dst_num); + DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group.", + ngroup->desc); - pids_list_t *current = src; - int ret; + proc_pids_t **proc_pids = ngroup->proc_pids; + pids_list_t added_pids; + pids_list_t removed_pids; - while (current != NULL) { - ret = pids_list_add_pid(dst, current->pid); - if (0 != ret) - return ret; + memset(&added_pids, 0, sizeof(added_pids)); + memset(&removed_pids, 0, sizeof(removed_pids)); - ++(*dst_num); - current = current->next; + for (size_t i = 0; i < ngroup->num_names; ++i) { + int diff_result = pids_list_diff(proc_pids[i], &added_pids, &removed_pids); + if (0 != diff_result) { + ERROR(RDT_PLUGIN + ": rdt_refresh_ngroup: \'%s\'. Error [%d] during PID diff.", + ngroup->desc, diff_result); + result = -1; + goto cleanup; + } } - return 0; -} - -/* - * NAME - * read_proc_name - * - * DESCRIPTION - * Reads process name from given pid directory. - * Strips new-line character (\n). - * - * PARAMETERS - * `procfs_path` Path to systems proc directory (e.g. /proc) - * `pid_entry' Dirent for PID directory - * `name' Output buffer for process name, recommended proc_comm. - * `out_size' Output buffer size, recommended sizeof(proc_comm) - * - * RETURN VALUE - * On success, the number of read bytes (includes stripped \n). - * -1 on file open error - */ -static int read_proc_name(const char *procfs_path, - const struct dirent *pid_entry, char *name, - const size_t out_size) { - assert(procfs_path); - assert(pid_entry); - assert(name); - assert(out_size); - memset(name, 0, out_size); - - const char *comm_file_name = "comm"; - - char *path = ssnprintf_alloc("%s/%s/%s", procfs_path, pid_entry->d_name, - comm_file_name); - - FILE *f = fopen(path, "r"); - if (f == NULL) { - ERROR(RDT_PLUGIN ": Failed to open comm file, error: %d\n", errno); - sfree(path); - return -1; - } - size_t read_length = fread(name, sizeof(char), out_size, f); - fclose(f); - sfree(path); - /* strip new line ending */ - char *newline = strchr(name, '\n'); - if (newline) { - *newline = '\0'; - } + DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group, added: " + "%u, removed: %u.", + ngroup->desc, (unsigned)added_pids.size, (unsigned)removed_pids.size); - return read_length; -} + if (added_pids.size > 0) { -/* - * NAME - * get_pid_number - * - * DESCRIPTION - * Gets pid number for given /proc/pid directory entry or - * returns error if input directory does not hold PID information. - * - * PARAMETERS - * `entry' Dirent for PID directory - * `pid' PID number to be filled - * - * RETURN VALUE - * 0 on success. -1 on error. - */ -static int get_pid_number(struct dirent *entry, pid_t *pid) { - char *tmp_end; /* used for strtoul error check*/ + /* no pids are monitored for this group yet: start monitoring */ + if (0 == ngroup->monitored_pids_count) { - if (pid == NULL || entry == NULL) - return -1; + int start_result = + pqos_mon_start_pids(added_pids.size, added_pids.pids, ngroup->events, + (void *)ngroup->desc, group_mon_data); + if (PQOS_RETVAL_OK == start_result) { + ngroup->monitored_pids_count = added_pids.size; + } else { + ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while " + "STARTING pids monitoring", + ngroup->desc, start_result); + result = -1; + goto pqos_error_recovery; + } - if (entry->d_type != DT_DIR) - return -1; + } else { - /* trying to get pid number from directory name*/ - *pid = strtoul(entry->d_name, &tmp_end, 10); - if (*tmp_end != '\0') { - return -1; /* conversion failed, not proc-pid */ + int add_result = + pqos_mon_add_pids(added_pids.size, added_pids.pids, group_mon_data); + if (PQOS_RETVAL_OK == add_result) + ngroup->monitored_pids_count += added_pids.size; + else { + ERROR(RDT_PLUGIN + ": rdt_refresh_ngroup: \'%s\'. Error [%d] while ADDING pids.", + ngroup->desc, add_result); + result = -1; + goto pqos_error_recovery; + } + } } - /* all checks passed, marking as success */ - return 0; -} - -/* - * NAME - * pids_list_to_array - * - * DESCRIPTION - * Copies element from list to array. Assumes the space for the array is - * allocated. - * - * PARAMETERS - * `array' First element of target array - * `list' Head of the list - * `array_length' Length (element count) of the target array - */ -static void pids_list_to_array(pid_t *array, pids_list_t *list, - const size_t array_length) { - - assert(list); - assert(array); - assert(array_length > 0); - size_t current = 0; + if (removed_pids.size > 0) { - while (list != NULL && current < array_length) { - array[current] = list->pid; - list = list->next; - ++current; + /* all pids are removed: stop monitoring */ + if (removed_pids.size == ngroup->monitored_pids_count) { + /* all pids for this group are lost: stop monitoring */ + int stop_result = pqos_mon_stop(group_mon_data); + if (PQOS_RETVAL_OK != stop_result) { + ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while " + "STOPPING monitoring", + ngroup->desc, stop_result); + result = -1; + goto pqos_error_recovery; + } + ngroup->monitored_pids_count = 0; + } else { + int remove_result = pqos_mon_remove_pids( + removed_pids.size, removed_pids.pids, group_mon_data); + if (PQOS_RETVAL_OK == remove_result) { + ngroup->monitored_pids_count -= removed_pids.size; + } else { + ERROR(RDT_PLUGIN + ": rdt_refresh_ngroup: \'%s\'. Error [%d] while REMOVING pids.", + ngroup->desc, remove_result); + result = -1; + goto pqos_error_recovery; + } + } } -} - -/* - * NAME - * initialize_proc_pids - * - * DESCRIPTION - * Helper function to properly initialize array of proc_pids. - * Allocates memory for proc_pids structs. - * - * PARAMETERS - * `procs_names_array' Array of null-terminated strings with - * process' names to be copied to new array - * `procs_names_array_size' procs_names_array element count - * `proc_pids_array' Address of pointer, under which new - * array of proc_pids will be allocated. - * Must be NULL. - * RETURN VALUE - * 0 on success. Negative number on error: - * -1: allocation error - */ -static int initialize_proc_pids(const char **procs_names_array, - const size_t procs_names_array_size, - proc_pids_t **proc_pids_array) { - assert(proc_pids_array); - assert(NULL == *proc_pids_array); + goto cleanup; - /* Copy procs names to output array. Initialize pids list with NULL value. */ - *proc_pids_array = calloc(procs_names_array_size, sizeof(**proc_pids_array)); +pqos_error_recovery: + /* Why? + * Resources might be temporary unavailable. + * + * How? + * Collectd will halt the reading thread for this + * plugin if it returns an error. + * Consecutive errors will be increasing the read period + * up to 1 day interval. + * On pqos error stop monitoring current group + * and reset the proc_pids array + * monitoring will be restarted on next collectd read cycle + */ + DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' group RESET after error.", + ngroup->desc); + pqos_mon_stop(group_mon_data); + for (size_t i = 0; i < ngroup->num_names; ++i) + if (ngroup->proc_pids[i]->curr) + ngroup->proc_pids[i]->curr->size = 0; - if (NULL == *proc_pids_array) - return -1; + ngroup->monitored_pids_count = 0; - for (size_t i = 0; i < procs_names_array_size; ++i) { - sstrncpy((*proc_pids_array)[i].proccess_name, procs_names_array[i], - STATIC_ARRAY_SIZE((*proc_pids_array)[i].proccess_name)); - (*proc_pids_array)[i].pids = NULL; - } +cleanup: + pids_list_clear(&added_pids); + pids_list_clear(&removed_pids); - return 0; + return result; } /* * NAME - * fetch_pids_for_procs + * read_pids_data * * DESCRIPTION - * Finds PIDs matching given process's names. - * Searches all PID directories in /proc fs and - * allocates memory for proc_pids structs, it is up to user to free it. - * Output array will have same element count as input array. - * - * PARAMETERS - * `procfs_path' Path to systems proc directory (e.g. /proc) - * `procs_names_array' Array of null-terminated strings with - * process' names to be copied to new array - * `procs_names_array_size' procs_names_array element count - * `proc_pids_array' Address of pointer, under which new - * array of proc_pids will be allocated. - * Must be NULL. + * Poll monitoring statistics for name groups * * RETURN VALUE - * 0 on success. -1 on error. + * 0 on success. Negative number on error. */ -static int fetch_pids_for_procs(const char *procfs_path, - const char **procs_names_array, - const size_t procs_names_array_size, - proc_pids_t **proc_pids_array) { - assert(procfs_path); - assert(procs_names_array); - assert(procs_names_array_size); - - DIR *proc_dir = opendir(procfs_path); - if (proc_dir == NULL) { - ERROR(RDT_PLUGIN ": Could not open %s directory, error: %d", procfs_path, - errno); - return -1; - } - - int init_result = initialize_proc_pids( - procs_names_array, procs_names_array_size, proc_pids_array); - if (0 != init_result) - return -1; - - /* Go through procfs and find PIDS and their comms */ - struct dirent *entry; - while ((entry = readdir(proc_dir)) != NULL) { - - pid_t pid; - int pid_conversion = get_pid_number(entry, &pid); - if (pid_conversion < 0) - continue; - - proc_comm_t comm; - int read_result = - read_proc_name(procfs_path, entry, comm, sizeof(proc_comm_t)); - if (read_result <= 0) { - ERROR(RDT_PLUGIN ": Comm file skipped. Read result: %d", read_result); - continue; - } - - /* Try to find comm in input procs array (proc_pids_array has same names) */ - for (size_t i = 0; i < procs_names_array_size; ++i) { - if (0 == strncmp(comm, (*proc_pids_array)[i].proccess_name, - STATIC_ARRAY_SIZE(comm))) - pids_list_add_pid(&((*proc_pids_array)[i].pids), pid); - } - } - - int close_result = closedir(proc_dir); - if (0 != close_result) { - ERROR(RDT_PLUGIN ": failed to close %s directory, error: %d", procfs_path, - errno); - sfree(*proc_pids_array); - return -1; - } - return 0; -} -#endif /* LIBPQOS2 */ - -static void rdt_pqos_log(void *context, const size_t size, const char *msg) { - DEBUG(RDT_PLUGIN ": %s", msg); -} - -static int rdt_preinit(void) { - int ret; +static int read_pids_data() { - if (g_rdt != NULL) { - /* already initialized if config callback was called before init callback */ + if (0 == g_rdt->num_ngroups) { + DEBUG(RDT_PLUGIN ": read_pids_data: not configured - PIDs read skipped"); return 0; } - g_rdt = calloc(1, sizeof(*g_rdt)); - if (g_rdt == NULL) { - ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context."); - return -ENOMEM; - } - - struct pqos_config pqos = {.fd_log = -1, - .callback_log = rdt_pqos_log, - .context_log = NULL, - .verbose = 0, -#ifdef LIBPQOS2 - .interface = PQOS_INTER_OS_RESCTRL_MON}; - DEBUG(RDT_PLUGIN ": Initializing PQoS with RESCTRL interface"); -#else - .interface = PQOS_INTER_MSR}; - DEBUG(RDT_PLUGIN ": Initializing PQoS with MSR interface"); -#endif - - ret = pqos_init(&pqos); - DEBUG(RDT_PLUGIN ": PQoS initialization result: [%d]", ret); - -#ifdef LIBPQOS2 - if (ret == PQOS_RETVAL_INTER) { - pqos.interface = PQOS_INTER_MSR; - DEBUG(RDT_PLUGIN ": Initializing PQoS with MSR interface"); - ret = pqos_init(&pqos); - DEBUG(RDT_PLUGIN ": PQoS initialization result: [%d]", ret); - } -#endif - - if (ret != PQOS_RETVAL_OK) { - ERROR(RDT_PLUGIN ": Error initializing PQoS library!"); - goto rdt_preinit_error1; - } - - g_interface = pqos.interface; + DEBUG(RDT_PLUGIN ": read_pids_data: Scanning active groups"); + struct pqos_mon_data *active_groups[RDT_MAX_NAMES_GROUPS] = {0}; + size_t active_group_idx = 0; + for (size_t pngroups_idx = 0; + pngroups_idx < STATIC_ARRAY_SIZE(g_rdt->pngroups); ++pngroups_idx) + if (0 != g_rdt->ngroups[pngroups_idx].monitored_pids_count) + active_groups[active_group_idx++] = g_rdt->pngroups[pngroups_idx]; - ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu); - if (ret != PQOS_RETVAL_OK) { - ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities."); - goto rdt_preinit_error2; - } + int ret = 0; - ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON, &g_rdt->cap_mon); - if (ret == PQOS_RETVAL_PARAM) { - ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities."); - goto rdt_preinit_error2; + if (0 == active_group_idx) { + DEBUG(RDT_PLUGIN ": read_pids_data: no active groups - PIDs read skipped"); + goto groups_refresh; } - if (g_rdt->cap_mon == NULL) { - ERROR( - RDT_PLUGIN - ": Monitoring capability not detected. Nothing to do for the plugin."); - goto rdt_preinit_error2; - } + DEBUG(RDT_PLUGIN ": read_pids_data: PIDs data polling"); - /* Reset pqos monitoring groups registers */ - pqos_mon_reset(); + int poll_result = pqos_mon_poll(active_groups, active_group_idx); + if (poll_result != PQOS_RETVAL_OK) { + ERROR(RDT_PLUGIN ": read_pids_data: Failed to poll monitoring data for " + "pids. Error [%d].", + poll_result); + ret = -poll_result; + goto groups_refresh; + } - return 0; + for (size_t i = 0; i < g_rdt->num_ngroups; i++) { + enum pqos_mon_event mbm_events = + (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW | + PQOS_MON_EVENT_RMEM_BW); -rdt_preinit_error2: - pqos_fini(); + if (g_rdt->pngroups[i] == NULL || + g_rdt->ngroups[i].monitored_pids_count == 0) + continue; -rdt_preinit_error1: - sfree(g_rdt); + const struct pqos_event_values *pv = &g_rdt->pngroups[i]->values; - return -1; -} + /* Submit only monitored events data */ -static int rdt_config(oconfig_item_t *ci) { - if (rdt_preinit() != 0) { - g_state = CONFIGURATION_ERROR; - /* if we return -1 at this point collectd - reports a failure in configuration and - aborts - */ - return (0); - } + if (g_rdt->ngroups[i].events & PQOS_MON_EVENT_L3_OCCUP) + rdt_submit_gauge(g_rdt->ngroups[i].desc, "bytes", "llc", pv->llc); - for (int i = 0; i < ci->children_num; i++) { - oconfig_item_t *child = ci->children + i; + if (g_rdt->ngroups[i].events & PQOS_PERF_EVENT_IPC) + rdt_submit_gauge(g_rdt->ngroups[i].desc, "ipc", NULL, pv->ipc); - if (strncasecmp("Cores", child->key, (size_t)strlen("Cores")) == 0) { - if (rdt_config_cgroups(child) != 0) { - g_state = CONFIGURATION_ERROR; - /* if we return -1 at this point collectd - reports a failure in configuration and - aborts - */ - return (0); - } + if (g_rdt->ngroups[i].events & mbm_events) { + rdt_submit_derive(g_rdt->ngroups[i].desc, "memory_bandwidth", "local", + pv->mbm_local_delta); + rdt_submit_derive(g_rdt->ngroups[i].desc, "memory_bandwidth", "remote", + pv->mbm_remote_delta); + } + } #if COLLECT_DEBUG - rdt_dump_cgroups(); + rdt_dump_pids_data(); #endif /* COLLECT_DEBUG */ - } else if (strncasecmp("Processes", child->key, - (size_t)strlen("Processes")) == 0) { -#ifdef LIBPQOS2 - if (g_interface != PQOS_INTER_OS_RESCTRL_MON) { - ERROR(RDT_PLUGIN ": Configuration parameter \"%s\" not supported. " - "Resctrl monitoring is needed for PIDs monitoring.", - child->key); - g_state = CONFIGURATION_ERROR; - /* if we return -1 at this point collectd - reports a failure in configuration and - aborts - */ - return 0; - } - if (rdt_config_ngroups(child) != 0) { - g_state = CONFIGURATION_ERROR; - /* if we return -1 at this point collectd - reports a failure in configuration and - aborts +groups_refresh: + ret = proc_pids_update(RDT_PROC_PATH, g_rdt->proc_pids, g_rdt->num_proc_pids); + if (0 != ret) { + ERROR(RDT_PLUGIN ": Initial update of proc pids failed"); + return ret; + } + + for (size_t i = 0; i < g_rdt->num_ngroups; i++) { + int refresh_result = + rdt_refresh_ngroup(&(g_rdt->ngroups[i]), g_rdt->pngroups[i]); + + if (0 != refresh_result) { + ERROR(RDT_PLUGIN ": read_pids_data: NGroup %zu refresh failed. Error: %d", + i, refresh_result); + if (0 == ret) { + /* refresh error will be escalated only if there were no + * errors before. */ - return 0; + ret = refresh_result; } - -#if COLLECT_DEBUG - rdt_dump_ngroups(); -#endif /* COLLECT_DEBUG */ -#else /* !LIBPQOS2 */ - ERROR(RDT_PLUGIN ": Configuration parameter \"%s\" not supported, please " - "recompile collectd with libpqos version 2.0 or newer.", - child->key); -#endif /* LIBPQOS2 */ - } else { - ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".", child->key); } } - return 0; + assert(ret <= 0); + return ret; } -static void rdt_submit_derive(const char *cgroup, const char *type, - const char *type_instance, derive_t value) { - value_list_t vl = VALUE_LIST_INIT; - - vl.values = &(value_t){.derive = value}; - vl.values_len = 1; - - sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin)); - snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); - sstrncpy(vl.type, type, sizeof(vl.type)); - if (type_instance) - sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); +/* + * NAME + * rdt_init_pids_monitoring + * + * DESCRIPTION + * Initialize pids monitoring for all name groups + */ +static void rdt_init_pids_monitoring() { + for (size_t group_idx = 0; group_idx < g_rdt->num_ngroups; group_idx++) { + /* + * Each group must have not-null proc_pids array. + * Initial refresh is not mandatory for proper + * PIDs statistics detection. + */ + rdt_name_group_t *ng = &g_rdt->ngroups[group_idx]; + int init_result = + proc_pids_init((const char **)ng->names, ng->num_names, &ng->proc_pids); + if (0 != init_result) { + ERROR(RDT_PLUGIN + ": Initialization of proc_pids for group %zu failed. Error: %d", + group_idx, init_result); + continue; + } - plugin_dispatch_values(&vl); -} + /* update global proc_pids table */ + proc_pids_t **proc_pids = + realloc(g_rdt->proc_pids, (g_rdt->num_proc_pids + ng->num_names) * + sizeof(*g_rdt->proc_pids)); + if (NULL == proc_pids) { + ERROR(RDT_PLUGIN ": Alloc error\n"); + continue; + } -static void rdt_submit_gauge(const char *cgroup, const char *type, - const char *type_instance, gauge_t value) { - value_list_t vl = VALUE_LIST_INIT; + for (size_t i = 0; i < ng->num_names; i++) + proc_pids[g_rdt->num_proc_pids + i] = ng->proc_pids[i]; - vl.values = &(value_t){.gauge = value}; - vl.values_len = 1; + g_rdt->proc_pids = proc_pids; + g_rdt->num_proc_pids += ng->num_names; + } - sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin)); - snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); - sstrncpy(vl.type, type, sizeof(vl.type)); - if (type_instance) - sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); + if (g_rdt->num_ngroups > 0) { + int update_result = + proc_pids_update(RDT_PROC_PATH, g_rdt->proc_pids, g_rdt->num_proc_pids); + if (0 != update_result) + ERROR(RDT_PLUGIN ": Initial update of proc pids failed"); + } - plugin_dispatch_values(&vl); + for (size_t group_idx = 0; group_idx < g_rdt->num_ngroups; group_idx++) { + int refresh_result = rdt_refresh_ngroup(&(g_rdt->ngroups[group_idx]), + g_rdt->pngroups[group_idx]); + if (0 != refresh_result) + ERROR(RDT_PLUGIN ": Initial refresh of group %zu failed. Error: %d", + group_idx, refresh_result); + } +} +#endif /* LIBPQOS2 */ +/* + * NAME + * rdt_free_cgroups + * + * DESCRIPTION + * Function to deallocate memory allocated for core groups. + */ +static void rdt_free_cgroups(void) { + config_cores_cleanup(&g_rdt->cores); + for (int i = 0; i < RDT_MAX_CORES; i++) { + sfree(g_rdt->pcgroups[i]); + } + g_rdt->cores.num_cgroups = 0; } -#ifdef LIBPQOS2 -static int rdt_pid_list_diff(pids_list_t *prev, pids_list_t *curr, - pids_list_t **added, size_t *added_num, - pids_list_t **removed, size_t *removed_num) { - assert(prev || curr); - assert(added); - assert(removed); - - if (NULL == prev) { - /* append all PIDs from curr to added*/ - return pids_list_add_pids_list(added, curr, added_num); - } else if (NULL == curr) { - /* append all PIDs from prev to removed*/ - return pids_list_add_pids_list(removed, prev, removed_num); +static int rdt_default_cgroups(void) { + unsigned num_cores = g_rdt->pqos_cpu->num_cores; + + g_rdt->cores.cgroups = calloc(num_cores, sizeof(*(g_rdt->cores.cgroups))); + if (g_rdt->cores.cgroups == NULL) { + ERROR(RDT_PLUGIN ": Error allocating core groups array"); + return -ENOMEM; } + g_rdt->cores.num_cgroups = num_cores; + + /* configure each core in separate group */ + for (unsigned i = 0; i < num_cores; i++) { + core_group_t *cgroup = g_rdt->cores.cgroups + i; + char desc[DATA_MAX_NAME_LEN]; - pids_list_t *item = prev; - while (item != NULL) { - if (0 == pids_list_contains_pid(curr, item->pid)) { - pids_list_add_pid(removed, item->pid); - ++(*removed_num); + /* set core group info */ + cgroup->cores = calloc(1, sizeof(*cgroup->cores)); + if (cgroup->cores == NULL) { + ERROR(RDT_PLUGIN ": Error allocating cores array"); + rdt_free_cgroups(); + return -ENOMEM; } - item = item->next; - } + cgroup->num_cores = 1; + cgroup->cores[0] = i; - item = curr; - while (item != NULL) { - if (0 == pids_list_contains_pid(prev, item->pid)) { - pids_list_add_pid(added, item->pid); - ++(*added_num); + ssnprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore); + cgroup->desc = strdup(desc); + if (cgroup->desc == NULL) { + ERROR(RDT_PLUGIN ": Error allocating core group description"); + rdt_free_cgroups(); + return -ENOMEM; } - item = item->next; } - return 0; + return num_cores; } -static int rdt_refresh_ngroup(rdt_name_group_t *ngroup, - struct pqos_mon_data *group_mon_data) { +static int rdt_is_core_id_valid(unsigned int core_id) { - int result = 0; + for (unsigned int i = 0; i < g_rdt->pqos_cpu->num_cores; i++) + if (core_id == g_rdt->pqos_cpu->cores[i].lcore) + return 1; - if (NULL == ngroup) - return -1; + return 0; +} - if (NULL == ngroup->proc_pids_array) { - ERROR(RDT_PLUGIN - ": rdt_refresh_ngroup: \'%s\' uninitialized process pids array.", - ngroup->desc); +static int rdt_config_cgroups(oconfig_item_t *item) { + size_t n = 0; + enum pqos_mon_event events = 0; - return -1; + if (config_cores_parse(item, &g_rdt->cores) < 0) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN ": Error parsing core groups configuration."); + return -EINVAL; } + n = g_rdt->cores.num_cgroups; - DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group.", - ngroup->desc); - - proc_pids_t *proc_pids_array_prev = ngroup->proc_pids_array; - proc_pids_t *proc_pids_array_curr = NULL; - - int fetch_result = - fetch_pids_for_procs(RDT_PROC_PATH, (const char **)ngroup->names, - ngroup->num_names, &proc_pids_array_curr); - - if (0 != fetch_result) { - ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' failed to fetch PIDs.", - ngroup->desc); - return fetch_result; + /* validate configured core id values */ + for (size_t group_idx = 0; group_idx < n; group_idx++) { + core_group_t *cgroup = g_rdt->cores.cgroups + group_idx; + for (size_t core_idx = 0; core_idx < cgroup->num_cores; core_idx++) { + if (!rdt_is_core_id_valid(cgroup->cores[core_idx])) { + ERROR(RDT_PLUGIN ": Core group '%s' contains invalid core id '%u'", + cgroup->desc, cgroup->cores[core_idx]); + rdt_free_cgroups(); + return -EINVAL; + } + } } - pids_list_t *new_pids = NULL; - size_t new_pids_count = 0; - - pids_list_t *lost_pids = NULL; - size_t lost_pids_count = 0; - - for (size_t i = 0; i < ngroup->num_names; ++i) { - if (NULL == proc_pids_array_prev[i].pids && - NULL == proc_pids_array_curr[i].pids) - continue; - int diff_result = rdt_pid_list_diff( - proc_pids_array_prev[i].pids, proc_pids_array_curr[i].pids, &new_pids, - &new_pids_count, &lost_pids, &lost_pids_count); - if (0 != diff_result) { - ERROR(RDT_PLUGIN - ": rdt_refresh_ngroup: \'%s\'. Error [%d] during PID diff.", - ngroup->desc, diff_result); - result = -1; - goto cleanup; + if (n == 0) { + /* create default core groups if "Cores" config option is empty */ + int ret = rdt_default_cgroups(); + if (ret < 0) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN ": Error creating default core groups configuration."); + return ret; } + n = (size_t)ret; + INFO(RDT_PLUGIN + ": No core groups configured. Default core groups created."); } - DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group, added: " - "%u, removed: %u.", - ngroup->desc, (unsigned)new_pids_count, (unsigned)lost_pids_count); - - if (new_pids_count != 0 || lost_pids_count != 0) { - - if (new_pids) { - pid_t new_pids_array[new_pids_count]; - pids_list_to_array(new_pids_array, new_pids, - STATIC_ARRAY_SIZE(new_pids_array)); - - /* no pids are monitored for this group yet: start monitoring */ - if (0 == ngroup->monitored_pids_count) { - - int start_result = - pqos_mon_start_pids(new_pids_count, new_pids_array, ngroup->events, - (void *)ngroup->desc, group_mon_data); - if (PQOS_RETVAL_OK == start_result) { - ngroup->monitored_pids_count = new_pids_count; - } else { - ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while " - "STARTING pids monitoring", - ngroup->desc, start_result); - result = -1; - goto pqos_error_recovery; - } + /* Get all available events on this platform */ + for (unsigned int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++) + events |= g_rdt->cap_mon->u.mon->events[i].type; - } else { + events &= ~(PQOS_PERF_EVENT_LLC_MISS); + + DEBUG(RDT_PLUGIN ": Number of cores in the system: %u", + g_rdt->pqos_cpu->num_cores); + DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events); - int add_result = - pqos_mon_add_pids(new_pids_count, new_pids_array, group_mon_data); - if (PQOS_RETVAL_OK == add_result) - ngroup->monitored_pids_count += new_pids_count; - else { - ERROR(RDT_PLUGIN - ": rdt_refresh_ngroup: \'%s\'. Error [%d] while ADDING pids.", - ngroup->desc, add_result); - result = -1; - goto pqos_error_recovery; - } + g_rdt->cores.num_cgroups = n; + for (int i = 0; i < n; i++) { + for (int j = 0; j < i; j++) { + int found = 0; + found = config_cores_cmp_cgroups(&g_rdt->cores.cgroups[j], + &g_rdt->cores.cgroups[i]); + if (found != 0) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups."); + return -EINVAL; } } - if (lost_pids) { - pid_t lost_pids_array[lost_pids_count]; - pids_list_to_array(lost_pids_array, lost_pids, - STATIC_ARRAY_SIZE(lost_pids_array)); - - if (lost_pids_count == ngroup->monitored_pids_count) { - /* all pids for this group are lost: stop monitoring */ - int stop_result = pqos_mon_stop(group_mon_data); - if (PQOS_RETVAL_OK != stop_result) { - ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while " - "STOPPING monitoring", - ngroup->desc, stop_result); - result = -1; - goto pqos_error_recovery; - } - ngroup->monitored_pids_count = 0; - } else { - assert(lost_pids_count < ngroup->monitored_pids_count); - int remove_result = pqos_mon_remove_pids( - lost_pids_count, lost_pids_array, group_mon_data); - if (PQOS_RETVAL_OK == remove_result) { - ngroup->monitored_pids_count -= lost_pids_count; - } else { - ERROR(RDT_PLUGIN - ": rdt_refresh_ngroup: \'%s\'. Error [%d] while REMOVING pids.", - ngroup->desc, remove_result); - result = -1; - goto pqos_error_recovery; - } - } + g_rdt->events[i] = events; + g_rdt->pcgroups[i] = calloc(1, sizeof(*g_rdt->pcgroups[i])); + if (g_rdt->pcgroups[i] == NULL) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data."); + return -ENOMEM; } - - ngroup->proc_pids_array = proc_pids_array_curr; - } - - goto cleanup; - -pqos_error_recovery: - /* Why? - * Resources might be temporary unavailable. - * - * How? - * Collectd will halt the reading thread for this - * plugin if it returns an error. - * Consecutive errors will be increasing the read period - * up to 1 day interval. - * On pqos error stop monitoring current group - * and reset the proc_pids array - * monitoring will be restarted on next collectd read cycle - */ - DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' group RESET after error.", - ngroup->desc); - pqos_mon_stop(group_mon_data); - for (size_t i = 0; i < ngroup->num_names; ++i) { - if (ngroup->proc_pids_array[i].pids) - pids_list_free(ngroup->proc_pids_array[i].pids); } - sfree(ngroup->proc_pids_array); - - initialize_proc_pids((const char **)ngroup->names, ngroup->num_names, - &ngroup->proc_pids_array); - ngroup->monitored_pids_count = 0; -cleanup: - if (ngroup->proc_pids_array == proc_pids_array_curr) { - assert(proc_pids_array_curr); - /* new list was successfully saved, free the old one */ - for (size_t i = 0; i < ngroup->num_names; ++i) - if (proc_pids_array_prev[i].pids) - pids_list_free(proc_pids_array_prev[i].pids); + return 0; +} - sfree(proc_pids_array_prev); +static void rdt_pqos_log(void *context, const size_t size, const char *msg) { + DEBUG(RDT_PLUGIN ": %s", msg); +} - } else { - /* new list was not saved. Free the new list, keep the old one*/ - for (size_t i = 0; i < ngroup->num_names; ++i) - if (proc_pids_array_curr[i].pids) - pids_list_free(proc_pids_array_curr[i].pids); +static int rdt_preinit(void) { + int ret; - sfree(proc_pids_array_curr); + if (g_rdt != NULL) { + /* already initialized if config callback was called before init callback */ + return 0; } - if (new_pids) - pids_list_free(new_pids); - - if (lost_pids) - pids_list_free(lost_pids); + g_rdt = calloc(1, sizeof(*g_rdt)); + if (g_rdt == NULL) { + ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context."); + return -ENOMEM; + } - return result; -} + struct pqos_config pqos = {.fd_log = -1, + .callback_log = rdt_pqos_log, + .context_log = NULL, + .verbose = 0, +#ifdef LIBPQOS2 + .interface = PQOS_INTER_OS_RESCTRL_MON}; + DEBUG(RDT_PLUGIN ": Initializing PQoS with RESCTRL interface"); +#else + .interface = PQOS_INTER_MSR}; + DEBUG(RDT_PLUGIN ": Initializing PQoS with MSR interface"); +#endif -static int read_pids_data() { + ret = pqos_init(&pqos); + DEBUG(RDT_PLUGIN ": PQoS initialization result: [%d]", ret); - if (0 == g_rdt->num_ngroups) { - DEBUG(RDT_PLUGIN ": read_pids_data: not configured - PIDs read skipped"); - return 0; +#ifdef LIBPQOS2 + if (ret == PQOS_RETVAL_INTER) { + pqos.interface = PQOS_INTER_MSR; + DEBUG(RDT_PLUGIN ": Initializing PQoS with MSR interface"); + ret = pqos_init(&pqos); + DEBUG(RDT_PLUGIN ": PQoS initialization result: [%d]", ret); } +#endif - DEBUG(RDT_PLUGIN ": read_pids_data: Scanning active groups"); - struct pqos_mon_data *active_groups[RDT_MAX_NAMES_GROUPS] = {0}; - size_t active_group_idx = 0; - for (size_t pngroups_idx = 0; - pngroups_idx < STATIC_ARRAY_SIZE(g_rdt->pngroups); ++pngroups_idx) - if (0 != g_rdt->ngroups[pngroups_idx].monitored_pids_count) - active_groups[active_group_idx++] = g_rdt->pngroups[pngroups_idx]; + if (ret != PQOS_RETVAL_OK) { + ERROR(RDT_PLUGIN ": Error initializing PQoS library!"); + goto rdt_preinit_error1; + } - int ret = 0; + g_interface = pqos.interface; - if (0 == active_group_idx) { - DEBUG(RDT_PLUGIN ": read_pids_data: no active groups - PIDs read skipped"); - goto groups_refresh; + ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu); + if (ret != PQOS_RETVAL_OK) { + ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities."); + goto rdt_preinit_error2; } - DEBUG(RDT_PLUGIN ": read_pids_data: PIDs data polling"); - - int poll_result = pqos_mon_poll(active_groups, active_group_idx); - if (poll_result != PQOS_RETVAL_OK) { - ERROR(RDT_PLUGIN ": read_pids_data: Failed to poll monitoring data for " - "pids. Error [%d].", - poll_result); - ret = -poll_result; - goto groups_refresh; + ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON, &g_rdt->cap_mon); + if (ret == PQOS_RETVAL_PARAM) { + ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities."); + goto rdt_preinit_error2; } - for (size_t i = 0; i < g_rdt->num_ngroups; i++) { - enum pqos_mon_event mbm_events = - (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW | - PQOS_MON_EVENT_RMEM_BW); + if (g_rdt->cap_mon == NULL) { + ERROR( + RDT_PLUGIN + ": Monitoring capability not detected. Nothing to do for the plugin."); + goto rdt_preinit_error2; + } - if (g_rdt->pngroups[i] == NULL || - g_rdt->ngroups[i].monitored_pids_count == 0) - continue; + /* Reset pqos monitoring groups registers */ + pqos_mon_reset(); - const struct pqos_event_values *pv = &g_rdt->pngroups[i]->values; + return 0; - /* Submit only monitored events data */ +rdt_preinit_error2: + pqos_fini(); - if (g_rdt->ngroups[i].events & PQOS_MON_EVENT_L3_OCCUP) - rdt_submit_gauge(g_rdt->ngroups[i].desc, "bytes", "llc", pv->llc); +rdt_preinit_error1: + sfree(g_rdt); - if (g_rdt->ngroups[i].events & PQOS_PERF_EVENT_IPC) - rdt_submit_gauge(g_rdt->ngroups[i].desc, "ipc", NULL, pv->ipc); + return -1; +} - if (g_rdt->ngroups[i].events & mbm_events) { - rdt_submit_derive(g_rdt->ngroups[i].desc, "memory_bandwidth", "local", - pv->mbm_local_delta); - rdt_submit_derive(g_rdt->ngroups[i].desc, "memory_bandwidth", "remote", - pv->mbm_remote_delta); - } +static int rdt_config(oconfig_item_t *ci) { + if (rdt_preinit() != 0) { + g_state = CONFIGURATION_ERROR; + /* if we return -1 at this point collectd + reports a failure in configuration and + aborts + */ + return 0; } -#if COLLECT_DEBUG - rdt_dump_pids_data(); -#endif /* COLLECT_DEBUG */ + for (int i = 0; i < ci->children_num; i++) { + oconfig_item_t *child = ci->children + i; -groups_refresh: - for (size_t i = 0; i < g_rdt->num_ngroups; i++) { - int refresh_result = - rdt_refresh_ngroup(&(g_rdt->ngroups[i]), g_rdt->pngroups[i]); + if (strncasecmp("Cores", child->key, (size_t)strlen("Cores")) == 0) { + if (g_rdt->cores.num_cgroups > 0) { + ERROR(RDT_PLUGIN + ": Configuration parameter \"%s\" can be used only once.", + child->key); + g_state = CONFIGURATION_ERROR; + } else if (rdt_config_cgroups(child) != 0) + g_state = CONFIGURATION_ERROR; - if (0 != refresh_result) { - ERROR(RDT_PLUGIN ": read_pids_data: NGroup %zu refresh failed. Error: %d", - i, refresh_result); - if (0 == ret) { - /* refresh error will be escalated only if there were no - * errors before. + if (g_state == CONFIGURATION_ERROR) + /* if we return -1 at this point collectd + reports a failure in configuration and + aborts */ - ret = refresh_result; + return 0; + +#if COLLECT_DEBUG + rdt_dump_cgroups(); +#endif /* COLLECT_DEBUG */ + } else if (strncasecmp("Processes", child->key, + (size_t)strlen("Processes")) == 0) { +#ifdef LIBPQOS2 + if (g_interface != PQOS_INTER_OS_RESCTRL_MON) { + ERROR(RDT_PLUGIN ": Configuration parameter \"%s\" not supported. " + "Resctrl monitoring is needed for PIDs monitoring.", + child->key); + g_state = CONFIGURATION_ERROR; } - } - } - assert(ret <= 0); - return ret; -} + else if (g_rdt->num_ngroups > 0) { + ERROR(RDT_PLUGIN + ": Configuration parameter \"%s\" can be used only once.", + child->key); + g_state = CONFIGURATION_ERROR; + } -static void rdt_init_pids_monitoring() { - for (size_t group_idx = 0; group_idx < g_rdt->num_ngroups; group_idx++) { - /* - * Each group must have not-null proc_pids array. - * Initial refresh is not mandatory for proper - * PIDs statistics detection. - */ - rdt_name_group_t *ng = &g_rdt->ngroups[group_idx]; - int init_result = initialize_proc_pids((const char **)ng->names, - ng->num_names, &ng->proc_pids_array); - if (0 != init_result) { - ERROR(RDT_PLUGIN - ": Initialization of proc_pids for group %zu failed. Error: %d", - group_idx, init_result); - continue; - } + else if (rdt_config_ngroups(g_rdt, child) != 0) + g_state = CONFIGURATION_ERROR; - int refresh_result = rdt_refresh_ngroup(&(g_rdt->ngroups[group_idx]), - g_rdt->pngroups[group_idx]); - if (0 != refresh_result) - ERROR(RDT_PLUGIN ": Initial refresh of group %zu failed. Error: %d", - group_idx, refresh_result); + if (g_state == CONFIGURATION_ERROR) + /* if we return -1 at this point collectd + reports a failure in configuration and + aborts + */ + return 0; + +#if COLLECT_DEBUG + rdt_dump_ngroups(); +#endif /* COLLECT_DEBUG */ +#else /* !LIBPQOS2 */ + ERROR(RDT_PLUGIN ": Configuration parameter \"%s\" not supported, please " + "recompile collectd with libpqos version 2.0 or newer.", + child->key); +#endif /* LIBPQOS2 */ + } else { + ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".", child->key); + } } + + return 0; } -#endif /* LIBPQOS2 */ static int read_cores_data() { @@ -1618,8 +1265,17 @@ static void rdt_init_cores_monitoring() { static int rdt_init(void) { - if (g_state == CONFIGURATION_ERROR) + if (g_state == CONFIGURATION_ERROR) { + if (g_rdt != NULL) { + if (g_rdt->cores.num_cgroups > 0) + rdt_free_cgroups(); +#ifdef LIBPQOS2 + if (g_rdt->num_ngroups > 0) + rdt_free_ngroups(g_rdt); +#endif + } return -1; + } int rdt_preinint_result = rdt_preinit(); if (rdt_preinint_result != 0) @@ -1655,10 +1311,9 @@ static int rdt_shutdown(void) { ret = pqos_fini(); if (ret != PQOS_RETVAL_OK) ERROR(RDT_PLUGIN ": Error shutting down PQoS library."); - rdt_free_cgroups(); #ifdef LIBPQOS2 - rdt_free_ngroups(); + rdt_free_ngroups(g_rdt); #endif /* LIBPQOS2 */ sfree(g_rdt);