+
+ DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group.",
+ ngroup->desc);
+
+ proc_pids_t **proc_pids = ngroup->proc_pids;
+ pids_list_t added_pids;
+ pids_list_t removed_pids;
+
+ memset(&added_pids, 0, sizeof(added_pids));
+ memset(&removed_pids, 0, sizeof(removed_pids));
+
+ for (size_t i = 0; i < ngroup->num_names; ++i) {
+ int diff_result = pids_list_diff(proc_pids[i], &added_pids, &removed_pids);
+ if (0 != diff_result) {
+ ERROR(RDT_PLUGIN
+ ": rdt_refresh_ngroup: \'%s\'. Error [%d] during PID diff.",
+ ngroup->desc, diff_result);
+ result = -1;
+ goto cleanup;
+ }
+ }
+
+ DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group, added: "
+ "%u, removed: %u.",
+ ngroup->desc, (unsigned)added_pids.size, (unsigned)removed_pids.size);
+
+ if (added_pids.size > 0) {
+
+ /* no pids are monitored for this group yet: start monitoring */
+ if (0 == ngroup->monitored_pids_count) {
+
+ int start_result =
+ pqos_mon_start_pids(added_pids.size, added_pids.pids, ngroup->events,
+ (void *)ngroup->desc, group_mon_data);
+ if (PQOS_RETVAL_OK == start_result) {
+ ngroup->monitored_pids_count = added_pids.size;
+ } else {
+ ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
+ "STARTING pids monitoring",
+ ngroup->desc, start_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+
+ } else {
+
+ int add_result =
+ pqos_mon_add_pids(added_pids.size, added_pids.pids, group_mon_data);
+ if (PQOS_RETVAL_OK == add_result)
+ ngroup->monitored_pids_count += added_pids.size;
+ else {
+ ERROR(RDT_PLUGIN
+ ": rdt_refresh_ngroup: \'%s\'. Error [%d] while ADDING pids.",
+ ngroup->desc, add_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+ }
+ }
+
+ if (removed_pids.size > 0) {
+
+ /* all pids are removed: stop monitoring */
+ if (removed_pids.size == ngroup->monitored_pids_count) {
+ /* all pids for this group are lost: stop monitoring */
+ int stop_result = pqos_mon_stop(group_mon_data);
+ if (PQOS_RETVAL_OK != stop_result) {
+ ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
+ "STOPPING monitoring",
+ ngroup->desc, stop_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+ ngroup->monitored_pids_count = 0;
+ } else {
+ int remove_result = pqos_mon_remove_pids(
+ removed_pids.size, removed_pids.pids, group_mon_data);
+ if (PQOS_RETVAL_OK == remove_result) {
+ ngroup->monitored_pids_count -= removed_pids.size;
+ } else {
+ ERROR(RDT_PLUGIN
+ ": rdt_refresh_ngroup: \'%s\'. Error [%d] while REMOVING pids.",
+ ngroup->desc, remove_result);
+ result = -1;
+ goto pqos_error_recovery;
+ }
+ }
+ }
+
+ goto cleanup;
+
+pqos_error_recovery:
+ /* Why?
+ * Resources might be temporary unavailable.
+ *
+ * How?
+ * Collectd will halt the reading thread for this
+ * plugin if it returns an error.
+ * Consecutive errors will be increasing the read period
+ * up to 1 day interval.
+ * On pqos error stop monitoring current group
+ * and reset the proc_pids array
+ * monitoring will be restarted on next collectd read cycle
+ */
+ DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' group RESET after error.",
+ ngroup->desc);
+ pqos_mon_stop(group_mon_data);
+ for (size_t i = 0; i < ngroup->num_names; ++i)
+ if (ngroup->proc_pids[i]->curr)
+ ngroup->proc_pids[i]->curr->size = 0;
+
+ ngroup->monitored_pids_count = 0;
+
+cleanup:
+ pids_list_clear(&added_pids);
+ pids_list_clear(&removed_pids);
+
+ return result;