+static int rdt_refresh_ngroup(rdt_name_group_t *ngroup,
+ struct pqos_mon_data *group_mon_data) {
+
+ int result = 0;
+
+ if (NULL == ngroup)
+ return -1;
+
+ if (NULL == ngroup->proc_pids_array) {
+ ERROR(RDT_PLUGIN
+ ": rdt_refresh_ngroup: \'%s\' uninitialized process pids array.",
+ ngroup->desc);
+
+ return -1;
+ }
+
+ DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group.",
+ ngroup->desc);
+
+ proc_pids_t *proc_pids_array_prev = ngroup->proc_pids_array;
+ proc_pids_t *proc_pids_array_curr = NULL;
+
+ int fetch_result =
+ fetch_pids_for_procs(RDT_PROC_PATH, (const char **)ngroup->names,
+ ngroup->num_names, &proc_pids_array_curr);
+
+ if (0 != fetch_result) {
+ ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' failed to fetch PIDs.",
+ ngroup->desc);
+ return fetch_result;
+ }
+
+ pids_list_t *new_pids = NULL;
+ pid_t *new_pids_array = NULL;
+ size_t new_pids_count = 0;
+
+ pids_list_t *lost_pids = NULL;
+ pid_t *lost_pids_array = NULL;
+ size_t lost_pids_count = 0;
+
+ for (size_t i = 0; i < ngroup->num_names; ++i) {
+ if (NULL == proc_pids_array_prev[i].pids &&
+ NULL == proc_pids_array_curr[i].pids)
+ continue;
+ int diff_result = pids_list_diff(
+ proc_pids_array_prev[i].pids, proc_pids_array_curr[i].pids, &new_pids,
+ &new_pids_count, &lost_pids, &lost_pids_count);
+ if (0 != diff_result) {
+ ERROR(RDT_PLUGIN
+ ": rdt_refresh_ngroup: \'%s\'. Error [%d] during PID diff.",
+ ngroup->desc, diff_result);
+ result = -1;
+ goto cleanup;
+ }
+ }
+
+ DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group, added: "
+ "%u, removed: %u.",
+ ngroup->desc, (unsigned)new_pids_count, (unsigned)lost_pids_count);
+
+ if (new_pids && new_pids_count > 0) {
+ new_pids_array = malloc(new_pids_count * sizeof(pid_t));
+ if (new_pids_array == NULL) {
+ ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Memory "
+ "allocation failed",
+ ngroup->desc);
+ result = -1;
+ goto cleanup;
+ }
+ pids_list_to_array(new_pids_array, new_pids, new_pids_count);
+
+ /* no pids are monitored for this group yet: start monitoring */
+ if (0 == ngroup->monitored_pids_count) {
+
+ int start_result =
+ pqos_mon_start_pids(new_pids_count, new_pids_array, ngroup->events,
+ (void *)ngroup->desc, group_mon_data);
+ if (PQOS_RETVAL_OK == start_result) {
+ ngroup->monitored_pids_count = new_pids_count;
+ } else {
+ ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
+ "STARTING pids monitoring",
+ ngroup->desc, start_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+
+ } else {
+
+ int add_result =
+ pqos_mon_add_pids(new_pids_count, new_pids_array, group_mon_data);
+ if (PQOS_RETVAL_OK == add_result)
+ ngroup->monitored_pids_count += new_pids_count;
+ else {
+ ERROR(RDT_PLUGIN
+ ": rdt_refresh_ngroup: \'%s\'. Error [%d] while ADDING pids.",
+ ngroup->desc, add_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+ }
+ }
+
+ if (lost_pids && lost_pids_count > 0) {
+ lost_pids_array = malloc(lost_pids_count * sizeof(pid_t));
+ if (lost_pids_array == NULL) {
+ ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Memory "
+ "allocation failed",
+ ngroup->desc);
+ result = -1;
+ goto cleanup;
+ }
+ pids_list_to_array(lost_pids_array, lost_pids, lost_pids_count);
+
+ if (lost_pids_count == ngroup->monitored_pids_count) {
+ /* all pids for this group are lost: stop monitoring */
+ int stop_result = pqos_mon_stop(group_mon_data);
+ if (PQOS_RETVAL_OK != stop_result) {
+ ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
+ "STOPPING monitoring",
+ ngroup->desc, stop_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+ ngroup->monitored_pids_count = 0;
+ } else {
+ assert(lost_pids_count < ngroup->monitored_pids_count);
+ int remove_result = pqos_mon_remove_pids(lost_pids_count, lost_pids_array,
+ group_mon_data);
+ if (PQOS_RETVAL_OK == remove_result) {
+ ngroup->monitored_pids_count -= lost_pids_count;
+ } else {
+ ERROR(RDT_PLUGIN
+ ": rdt_refresh_ngroup: \'%s\'. Error [%d] while REMOVING pids.",
+ ngroup->desc, remove_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+ }
+ }
+
+ if (new_pids_count > 0 || lost_pids_count > 0)
+ ngroup->proc_pids_array = proc_pids_array_curr;
+
+ goto cleanup;
+
+pqos_error_recovery:
+ /* Why?
+ * Resources might be temporary unavailable.
+ *
+ * How?
+ * Collectd will halt the reading thread for this
+ * plugin if it returns an error.
+ * Consecutive errors will be increasing the read period
+ * up to 1 day interval.
+ * On pqos error stop monitoring current group
+ * and reset the proc_pids array
+ * monitoring will be restarted on next collectd read cycle