intel-rdt: Changed memory allocation of new and lost pids tables from static to dynamic
[collectd.git] / src / intel_rdt.c
index f7d9a34..9f7e649 100644 (file)
@@ -144,6 +144,14 @@ static int strlisttoarray(char *str_list, char ***names, size_t *names_num) {
   if (str_list == NULL || names == NULL)
     return -EINVAL;
 
+  if (strstr(str_list, ",,")) {
+    /* strtok ignores empty words between separators.
+     * This condition handles that by rejecting strings
+     * with consecutive seprators */
+    ERROR(RDT_PLUGIN ": Empty process name");
+    return -EINVAL;
+  }
+
   for (;;) {
     char *token = strtok_r(str_list, ",", &saveptr);
     if (token == NULL)
@@ -157,11 +165,16 @@ static int strlisttoarray(char *str_list, char ***names, size_t *names_num) {
     if (*token == '\0')
       continue;
 
-    if (!(isdupstr((const char **)*names, *names_num, token)))
+    if ((isdupstr((const char **)*names, *names_num, token))) {
+      ERROR(RDT_PLUGIN ": Duplicated process name \'%s\' in group \'%s\'",
+            token, str_list);
+      return -EINVAL;
+    } else {
       if (0 != strarray_add(names, names_num, token)) {
         ERROR(RDT_PLUGIN ": Error allocating process name string");
         return -ENOMEM;
       }
+    }
   }
 
   return 0;
@@ -242,8 +255,10 @@ static int oconfig_to_ngroups(const oconfig_item_t *item,
     char value[DATA_MAX_NAME_LEN];
 
     if ((item->values[j].value.string == NULL) ||
-        (strlen(item->values[j].value.string) == 0))
-      continue;
+        (strlen(item->values[j].value.string) == 0)) {
+      ERROR(RDT_PLUGIN ": Error - empty group");
+      return -EINVAL;
+    }
 
     sstrncpy(value, item->values[j].value.string, sizeof(value));
 
@@ -440,26 +455,27 @@ static int pids_list_free(pids_list_t *list) {
   return 0;
 }
 
-static void rdt_free_ngroups(void) {
+static void rdt_free_ngroups(rdt_ctx_t *rdt) {
   for (int i = 0; i < RDT_MAX_NAMES_GROUPS; i++) {
-    if (g_rdt->ngroups[i].desc)
+    if (rdt->ngroups[i].desc)
       DEBUG(RDT_PLUGIN ": Freeing pids \'%s\' group\'s data...",
-            g_rdt->ngroups[i].desc);
-    sfree(g_rdt->ngroups[i].desc);
-    strarray_free(g_rdt->ngroups[i].names, g_rdt->ngroups[i].num_names);
+            rdt->ngroups[i].desc);
+    sfree(rdt->ngroups[i].desc);
+
+    strarray_free(rdt->ngroups[i].names, rdt->ngroups[i].num_names);
 
-    if (g_rdt->ngroups[i].proc_pids_array) {
-      for (size_t j = 0; j < g_rdt->ngroups[i].num_names; ++j) {
-        if (NULL == g_rdt->ngroups[i].proc_pids_array[j].pids)
+    if (rdt->ngroups[i].proc_pids_array) {
+      for (size_t j = 0; j < rdt->ngroups[i].num_names; ++j) {
+        if (NULL == rdt->ngroups[i].proc_pids_array[j].pids)
           continue;
-        pids_list_free(g_rdt->ngroups[i].proc_pids_array[j].pids);
+        pids_list_free(rdt->ngroups[i].proc_pids_array[j].pids);
       }
 
-      sfree(g_rdt->ngroups[i].proc_pids_array);
+      sfree(rdt->ngroups[i].proc_pids_array);
     }
 
-    g_rdt->ngroups[i].num_names = 0;
-    sfree(g_rdt->pngroups[i]);
+    rdt->ngroups[i].num_names = 0;
+    sfree(rdt->pngroups[i]);
   }
 }
 #endif /* LIBPQOS2 */
@@ -601,7 +617,7 @@ static int rdt_config_cgroups(oconfig_item_t *item) {
 }
 
 #ifdef LIBPQOS2
-static int rdt_config_ngroups(const oconfig_item_t *item) {
+static int rdt_config_ngroups(rdt_ctx_t *rdt, const oconfig_item_t *item) {
   int n = 0;
   enum pqos_mon_event events = 0;
 
@@ -621,22 +637,26 @@ static int rdt_config_ngroups(const oconfig_item_t *item) {
     DEBUG(RDT_PLUGIN ":  [%d]: %s", j, item->values[j].value.string);
   }
 
-  n = oconfig_to_ngroups(item, g_rdt->ngroups, RDT_MAX_NAMES_GROUPS);
+  n = oconfig_to_ngroups(item, rdt->ngroups, RDT_MAX_NAMES_GROUPS);
   if (n < 0) {
-    rdt_free_ngroups();
+    rdt_free_ngroups(rdt);
     ERROR(RDT_PLUGIN ": Error parsing process name groups configuration.");
     return -EINVAL;
   }
 
   /* validate configured process name values */
   for (int group_idx = 0; group_idx < n; group_idx++) {
-    for (size_t name_idx = 0; name_idx < g_rdt->ngroups[group_idx].num_names;
+    DEBUG(RDT_PLUGIN ":  checking group [%d]: %s", group_idx,
+          rdt->ngroups[group_idx].desc);
+    for (size_t name_idx = 0; name_idx < rdt->ngroups[group_idx].num_names;
          name_idx++) {
-      if (!rdt_is_proc_name_valid(g_rdt->ngroups[group_idx].names[name_idx])) {
+      DEBUG(RDT_PLUGIN ":    checking process name [%zu]: %s", name_idx,
+            rdt->ngroups[group_idx].names[name_idx]);
+      if (!rdt_is_proc_name_valid(rdt->ngroups[group_idx].names[name_idx])) {
         ERROR(RDT_PLUGIN ": Process name group '%s' contains invalid name '%s'",
-              g_rdt->ngroups[group_idx].desc,
-              g_rdt->ngroups[group_idx].names[name_idx]);
-        rdt_free_ngroups();
+              rdt->ngroups[group_idx].desc,
+              rdt->ngroups[group_idx].names[name_idx]);
+        rdt_free_ngroups(rdt);
         return -EINVAL;
       }
     }
@@ -648,29 +668,29 @@ static int rdt_config_ngroups(const oconfig_item_t *item) {
   }
 
   /* Get all available events on this platform */
-  for (unsigned i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
-    events |= g_rdt->cap_mon->u.mon->events[i].type;
+  for (unsigned i = 0; i < rdt->cap_mon->u.mon->num_events; i++)
+    events |= rdt->cap_mon->u.mon->events[i].type;
 
   events &= ~(PQOS_PERF_EVENT_LLC_MISS);
 
   DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
 
-  g_rdt->num_ngroups = n;
+  rdt->num_ngroups = n;
   for (int i = 0; i < n; i++) {
     for (int j = 0; j < i; j++) {
-      int found = ngroup_cmp(&g_rdt->ngroups[j], &g_rdt->ngroups[i]);
+      int found = ngroup_cmp(&rdt->ngroups[j], &rdt->ngroups[i]);
       if (found != 0) {
-        rdt_free_ngroups();
+        rdt_free_ngroups(rdt);
         ERROR(RDT_PLUGIN
               ": Cannot monitor same process name in different groups.");
         return -EINVAL;
       }
     }
 
-    g_rdt->ngroups[i].events = events;
-    g_rdt->pngroups[i] = calloc(1, sizeof(*g_rdt->pngroups[i]));
-    if (g_rdt->pngroups[i] == NULL) {
-      rdt_free_ngroups();
+    rdt->ngroups[i].events = events;
+    rdt->pngroups[i] = calloc(1, sizeof(*rdt->pngroups[i]));
+    if (rdt->pngroups[i] == NULL) {
+      rdt_free_ngroups(rdt);
       ERROR(RDT_PLUGIN
             ": Failed to allocate memory for process name monitoring data.");
       return -ENOMEM;
@@ -813,7 +833,8 @@ static int read_proc_name(const char *procfs_path,
 
   char *path = ssnprintf_alloc("%s/%s/%s", procfs_path, pid_entry->d_name,
                                comm_file_name);
-
+  if (path == NULL)
+    return -1;
   FILE *f = fopen(path, "r");
   if (f == NULL) {
     ERROR(RDT_PLUGIN ": Failed to open comm file, error: %d\n", errno);
@@ -821,6 +842,7 @@ static int read_proc_name(const char *procfs_path,
     return -1;
   }
   size_t read_length = fread(name, sizeof(char), out_size, f);
+  name[out_size - 1] = '\0';
   fclose(f);
   sfree(path);
   /* strip new line ending */
@@ -1136,7 +1158,7 @@ static int rdt_config(oconfig_item_t *ci) {
         return 0;
       }
 
-      if (rdt_config_ngroups(child) != 0) {
+      if (rdt_config_ngroups(g_rdt, child) != 0) {
         g_state = CONFIGURATION_ERROR;
         /* if we return -1 at this point collectd
            reports a failure in configuration and
@@ -1263,9 +1285,11 @@ static int rdt_refresh_ngroup(rdt_name_group_t *ngroup,
   }
 
   pids_list_t *new_pids = NULL;
+  pid_t *new_pids_array = NULL;
   size_t new_pids_count = 0;
 
   pids_list_t *lost_pids = NULL;
+  pid_t *lost_pids_array = NULL;
   size_t lost_pids_count = 0;
 
   for (size_t i = 0; i < ngroup->num_names; ++i) {
@@ -1288,79 +1312,89 @@ static int rdt_refresh_ngroup(rdt_name_group_t *ngroup,
                    "%u, removed: %u.",
         ngroup->desc, (unsigned)new_pids_count, (unsigned)lost_pids_count);
 
-  if (new_pids_count != 0 || lost_pids_count != 0) {
-
-    if (new_pids) {
-      pid_t new_pids_array[new_pids_count];
-      pids_list_to_array(new_pids_array, new_pids,
-                         STATIC_ARRAY_SIZE(new_pids_array));
-
-      /* no pids are monitored for this group yet: start monitoring */
-      if (0 == ngroup->monitored_pids_count) {
-
-        int start_result =
-            pqos_mon_start_pids(new_pids_count, new_pids_array, ngroup->events,
-                                (void *)ngroup->desc, group_mon_data);
-        if (PQOS_RETVAL_OK == start_result) {
-          ngroup->monitored_pids_count = new_pids_count;
-        } else {
-          ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
-                           "STARTING pids monitoring",
-                ngroup->desc, start_result);
-          result = -1;
-          goto pqos_error_recovery;
-        }
+  if (new_pids && new_pids_count > 0) {
+    new_pids_array = malloc(new_pids_count * sizeof(pid_t));
+    if (new_pids_array == NULL) {
+      ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Memory "
+                       "allocation failed",
+            ngroup->desc);
+      result = -1;
+      goto cleanup;
+    }
+    pids_list_to_array(new_pids_array, new_pids, new_pids_count);
+
+    /* no pids are monitored for this group yet: start monitoring */
+    if (0 == ngroup->monitored_pids_count) {
 
+      int start_result =
+          pqos_mon_start_pids(new_pids_count, new_pids_array, ngroup->events,
+                              (void *)ngroup->desc, group_mon_data);
+      if (PQOS_RETVAL_OK == start_result) {
+        ngroup->monitored_pids_count = new_pids_count;
       } else {
+        ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
+                         "STARTING pids monitoring",
+              ngroup->desc, start_result);
+        result = -1;
+        goto pqos_error_recovery;
+      }
+
+    } else {
 
-        int add_result =
-            pqos_mon_add_pids(new_pids_count, new_pids_array, group_mon_data);
-        if (PQOS_RETVAL_OK == add_result)
-          ngroup->monitored_pids_count += new_pids_count;
-        else {
-          ERROR(RDT_PLUGIN
-                ": rdt_refresh_ngroup: \'%s\'. Error [%d] while ADDING pids.",
-                ngroup->desc, add_result);
-          result = -1;
-          goto pqos_error_recovery;
-        }
+      int add_result =
+          pqos_mon_add_pids(new_pids_count, new_pids_array, group_mon_data);
+      if (PQOS_RETVAL_OK == add_result)
+        ngroup->monitored_pids_count += new_pids_count;
+      else {
+        ERROR(RDT_PLUGIN
+              ": rdt_refresh_ngroup: \'%s\'. Error [%d] while ADDING pids.",
+              ngroup->desc, add_result);
+        result = -1;
+        goto pqos_error_recovery;
       }
     }
+  }
 
-    if (lost_pids) {
-      pid_t lost_pids_array[lost_pids_count];
-      pids_list_to_array(lost_pids_array, lost_pids,
-                         STATIC_ARRAY_SIZE(lost_pids_array));
-
-      if (lost_pids_count == ngroup->monitored_pids_count) {
-        /* all pids for this group are lost: stop monitoring */
-        int stop_result = pqos_mon_stop(group_mon_data);
-        if (PQOS_RETVAL_OK != stop_result) {
-          ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
-                           "STOPPING monitoring",
-                ngroup->desc, stop_result);
-          result = -1;
-          goto pqos_error_recovery;
-        }
-        ngroup->monitored_pids_count = 0;
+  if (lost_pids && lost_pids_count > 0) {
+    lost_pids_array = malloc(lost_pids_count * sizeof(pid_t));
+    if (lost_pids_array == NULL) {
+      ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Memory "
+                       "allocation failed",
+            ngroup->desc);
+      result = -1;
+      goto cleanup;
+    }
+    pids_list_to_array(lost_pids_array, lost_pids, lost_pids_count);
+
+    if (lost_pids_count == ngroup->monitored_pids_count) {
+      /* all pids for this group are lost: stop monitoring */
+      int stop_result = pqos_mon_stop(group_mon_data);
+      if (PQOS_RETVAL_OK != stop_result) {
+        ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
+                         "STOPPING monitoring",
+              ngroup->desc, stop_result);
+        result = -1;
+        goto pqos_error_recovery;
+      }
+      ngroup->monitored_pids_count = 0;
+    } else {
+      assert(lost_pids_count < ngroup->monitored_pids_count);
+      int remove_result = pqos_mon_remove_pids(lost_pids_count, lost_pids_array,
+                                               group_mon_data);
+      if (PQOS_RETVAL_OK == remove_result) {
+        ngroup->monitored_pids_count -= lost_pids_count;
       } else {
-        assert(lost_pids_count < ngroup->monitored_pids_count);
-        int remove_result = pqos_mon_remove_pids(
-            lost_pids_count, lost_pids_array, group_mon_data);
-        if (PQOS_RETVAL_OK == remove_result) {
-          ngroup->monitored_pids_count -= lost_pids_count;
-        } else {
-          ERROR(RDT_PLUGIN
-                ": rdt_refresh_ngroup: \'%s\'. Error [%d] while REMOVING pids.",
-                ngroup->desc, remove_result);
-          result = -1;
-          goto pqos_error_recovery;
-        }
+        ERROR(RDT_PLUGIN
+              ": rdt_refresh_ngroup: \'%s\'. Error [%d] while REMOVING pids.",
+              ngroup->desc, remove_result);
+        result = -1;
+        goto pqos_error_recovery;
       }
     }
+  }
 
+  if (new_pids_count > 0 || lost_pids_count > 0)
     ngroup->proc_pids_array = proc_pids_array_curr;
-  }
 
   goto cleanup;
 
@@ -1412,9 +1446,15 @@ cleanup:
   if (new_pids)
     pids_list_free(new_pids);
 
+  if (new_pids_array)
+    free(new_pids_array);
+
   if (lost_pids)
     pids_list_free(lost_pids);
 
+  if (lost_pids_array)
+    free(lost_pids_array);
+
   return result;
 }
 
@@ -1658,7 +1698,7 @@ static int rdt_shutdown(void) {
 
   rdt_free_cgroups();
 #ifdef LIBPQOS2
-  rdt_free_ngroups();
+  rdt_free_ngroups(g_rdt);
 #endif /* LIBPQOS2 */
   sfree(g_rdt);