Turbostat: Refactor topology probing
[collectd.git] / src / turbostat.c
index bd2b2fe..e35f3d2 100644 (file)
@@ -196,21 +196,30 @@ static _Bool allocated = 0;
 static _Bool initialized = 0;
 
 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
-       (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
-               topo.num_threads_per_core + \
-               (core_no) * topo.num_threads_per_core + (thread_no))
+       (thread_base + \
+               (pkg_no) * topology.num_cores * topology.num_threads + \
+               (core_no) * topology.num_threads + \
+               (thread_no))
 #define GET_CORE(core_base, core_no, pkg_no) \
-       (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
+       (core_base + \
+               (pkg_no) * topology.num_cores + \
+               (core_no))
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
-struct topo_params {
+struct cpu_topology {
+       int package_id;
+       int core_id;
+       _Bool first_core_in_package;
+       _Bool first_thread_in_core;
+};
+
+struct topology {
+       int max_cpu_id;
        int num_packages;
-       int num_cpus;
        int num_cores;
-       int max_cpu_num;
-       int num_cores_per_pkg;
-       int num_threads_per_core;
-} topo;
+       int num_threads;
+       struct cpu_topology *cpus;
+} topology;
 
 struct timeval tv_even, tv_odd, tv_delta;
 
@@ -255,48 +264,10 @@ enum return_values {
        UNSUPPORTED_CPU,
 };
 
-static int setup_all_buffers(void);
-
-static int
-cpu_is_not_present(int cpu)
-{
-       return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
-}
-/*
- * run func(thread, core, package) in topology order
- * skip non-present cpus
- */
-
-static int __attribute__((warn_unused_result))
-for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
-       struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
-{
-       int retval, pkg_no, core_no, thread_no;
-
-       for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-               for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-                       for (thread_no = 0; thread_no <
-                               topo.num_threads_per_core; ++thread_no) {
-                               struct thread_data *t;
-                               struct core_data *c;
-                               struct pkg_data *p;
-
-                               t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
-
-                               if (cpu_is_not_present(t->cpu_id))
-                                       continue;
 
-                               c = GET_CORE(core_base, core_no, pkg_no);
-                               p = GET_PKG(pkg_base, pkg_no);
-
-                               retval = func(t, c, p);
-                               if (retval)
-                                       return retval;
-                       }
-               }
-       }
-       return 0;
-}
+/*****************************
+ *  MSR Manipulation helpers *
+ *****************************/
 
 /*
  * Open a MSR device for reading
@@ -366,6 +337,128 @@ get_msr(int cpu, off_t offset, unsigned long long *msr)
        return retval;
 }
 
+
+/********************************
+ * Raw data acquisition (1 CPU) *
+ ********************************/
+
+/*
+ * Read every data avalaible for a single CPU
+ *
+ * Core data is shared for all threads in one core: extracted only for the first thread
+ * Package data is shared for all core in one package: extracted only for the first thread of the first core
+ *
+ * Side effect: migrates to the targeted CPU
+ */
+static int __attribute__((warn_unused_result))
+get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       int cpu = t->cpu_id;
+       unsigned long long msr;
+       int msr_fd;
+       int retval = 0;
+
+       msr_fd = open_msr(cpu, 1);
+       if (msr_fd < 0)
+               return msr_fd;
+
+#define READ_MSR(msr, dst)                     \
+do {                                           \
+       if (read_msr(msr_fd, msr, dst)) {       \
+               retval = -ERR_##msr;            \
+               goto out;                       \
+       }                                       \
+} while (0)
+
+       READ_MSR(MSR_IA32_TSC, &t->tsc);
+
+       READ_MSR(MSR_IA32_APERF, &t->aperf);
+       READ_MSR(MSR_IA32_MPERF, &t->mperf);
+
+       READ_MSR(MSR_SMI_COUNT, &msr);
+       t->smi_count = msr & 0xFFFFFFFF;
+
+       /* collect core counters only for 1st thread in core */
+       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) {
+               retval = 0;
+               goto out;
+       }
+
+       if (do_core_cstate & (1 << 3))
+               READ_MSR(MSR_CORE_C3_RESIDENCY, &c->c3);
+       if (do_core_cstate & (1 << 6))
+               READ_MSR(MSR_CORE_C6_RESIDENCY, &c->c6);
+       if (do_core_cstate & (1 << 7))
+               READ_MSR(MSR_CORE_C7_RESIDENCY, &c->c7);
+
+       if (do_dts) {
+               READ_MSR(MSR_IA32_THERM_STATUS, &msr);
+               c->core_temp_c = p->tcc_activation_temp - ((msr >> 16) & 0x7F);
+       }
+
+       /* collect package counters only for 1st core in package */
+       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
+               retval = 0;
+               goto out;
+       }
+
+        if (do_pkg_cstate & (1 << 2))
+                READ_MSR(MSR_PKG_C2_RESIDENCY, &p->pc2);
+        if (do_pkg_cstate & (1 << 3))
+                READ_MSR(MSR_PKG_C3_RESIDENCY, &p->pc3);
+        if (do_pkg_cstate & (1 << 6))
+                READ_MSR(MSR_PKG_C6_RESIDENCY, &p->pc6);
+        if (do_pkg_cstate & (1 << 7))
+                READ_MSR(MSR_PKG_C7_RESIDENCY, &p->pc7);
+        if (do_pkg_cstate & (1 << 8))
+                READ_MSR(MSR_PKG_C8_RESIDENCY, &p->pc8);
+        if (do_pkg_cstate & (1 << 9))
+                READ_MSR(MSR_PKG_C9_RESIDENCY, &p->pc9);
+        if (do_pkg_cstate & (1 << 10))
+                READ_MSR(MSR_PKG_C10_RESIDENCY, &p->pc10);
+
+       if (do_rapl & RAPL_PKG) {
+               READ_MSR(MSR_PKG_ENERGY_STATUS, &msr);
+               p->energy_pkg = msr & 0xFFFFFFFF;
+       }
+       if (do_rapl & RAPL_CORES) {
+               READ_MSR(MSR_PP0_ENERGY_STATUS, &msr);
+               p->energy_cores = msr & 0xFFFFFFFF;
+       }
+       if (do_rapl & RAPL_DRAM) {
+               READ_MSR(MSR_DRAM_ENERGY_STATUS, &msr);
+               p->energy_dram = msr & 0xFFFFFFFF;
+       }
+       if (do_rapl & RAPL_GFX) {
+               READ_MSR(MSR_PP1_ENERGY_STATUS, &msr);
+               p->energy_gfx = msr & 0xFFFFFFFF;
+       }
+       if (do_rapl & RAPL_PKG_PERF_STATUS) {
+               READ_MSR(MSR_PKG_PERF_STATUS, &msr);
+               p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
+       }
+       if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+               READ_MSR(MSR_DRAM_PERF_STATUS, &msr);
+               p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
+       }
+       if (do_ptm) {
+               READ_MSR(MSR_IA32_PACKAGE_THERM_STATUS, &msr);
+               p->pkg_temp_c = p->tcc_activation_temp - ((msr >> 16) & 0x7F);
+       }
+
+out:
+       close(msr_fd);
+       return retval;
+}
+
+
+/**********************************
+ * Evaluating the changes (1 CPU) *
+ **********************************/
+
+/*
+ * Do delta = new - old on 32bits cyclique intergers
+ */
 #define DELTA_WRAP32(delta, new, old)                  \
        if (new > old) {                                \
                delta = new - old;                      \
@@ -373,7 +466,11 @@ get_msr(int cpu, off_t offset, unsigned long long *msr)
                delta = 0x100000000 + new - old;        \
        }
 
-static void
+/*
+ * Extract the evolution old->new in delta at a package level
+ * (some are not new-delta, e.g. temperature)
+ */
+static inline void
 delta_package(struct pkg_data *delta, const struct pkg_data *new, const struct pkg_data *old)
 {
        delta->pc2 = new->pc2 - old->pc2;
@@ -393,7 +490,11 @@ delta_package(struct pkg_data *delta, const struct pkg_data *new, const struct p
        DELTA_WRAP32(delta->rapl_dram_perf_status, new->rapl_dram_perf_status, old->rapl_dram_perf_status);
 }
 
-static void
+/*
+ * Extract the evolution old->new in delta at a core level
+ * (some are not new-delta, e.g. temperature)
+ */
+static inline void
 delta_core(struct core_data *delta, const struct core_data *new, const struct core_data *old)
 {
        delta->c3 = new->c3 - old->c3;
@@ -402,7 +503,11 @@ delta_core(struct core_data *delta, const struct core_data *new, const struct co
        delta->core_temp_c = new->core_temp_c;
 }
 
-static int __attribute__((warn_unused_result))
+/*
+ * Extract the evolution old->new in delta at a package level
+ * core_delta is required for c1 estimation (tsc - c0 - all core cstates)
+ */
+static inline int __attribute__((warn_unused_result))
 delta_thread(struct thread_data *delta, const struct thread_data *new, const struct thread_data *old,
        const struct core_data *core_delta)
 {
@@ -454,136 +559,234 @@ delta_thread(struct thread_data *delta, const struct thread_data *new, const str
        return 0;
 }
 
-static int __attribute__((warn_unused_result))
-delta_cpu(struct thread_data *t_delta, struct core_data *c_delta, struct pkg_data *p_delta,
-         const struct thread_data *t_new, const struct core_data *c_new, const struct pkg_data *p_new,
-         const struct thread_data *t_old, const struct core_data *c_old, const struct pkg_data *p_old)
-{
-       int ret;
+/**********************************
+ * Submitting the results (1 CPU) *
+ **********************************/
 
-       /* calculate core delta only for 1st thread in core */
-       if (t_new->flags & CPU_IS_FIRST_THREAD_IN_CORE)
-               delta_core(c_delta, c_new, c_old);
-
-       /* always calculate thread delta */
-       ret = delta_thread(t_delta, t_new, t_old, c_delta);
-       if (ret != 0)
-               return ret;
+/*
+ * Submit one gauge value
+ */
+static void
+turbostat_submit (const char *plugin_instance,
+       const char *type, const char *type_instance,
+       gauge_t value)
+{
+       value_list_t vl = VALUE_LIST_INIT;
+       value_t v;
 
-       /* calculate package delta only for 1st core in package */
-       if (t_new->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
-               delta_package(p_delta, p_new, p_old);
+       v.gauge = value;
+       vl.values = &v;
+       vl.values_len = 1;
+       sstrncpy (vl.host, hostname_g, sizeof (vl.host));
+       sstrncpy (vl.plugin, PLUGIN_NAME, sizeof (vl.plugin));
+       if (plugin_instance != NULL)
+               sstrncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance));
+       sstrncpy (vl.type, type, sizeof (vl.type));
+       if (type_instance != NULL)
+               sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
 
-       return 0;
+       plugin_dispatch_values (&vl);
 }
 
-
 /*
- * get_counters(...)
- * migrate to cpu
- * acquire and record local counters for that cpu
+ * Submit every data for a single CPU
+ *
+ * Core data is shared for all threads in one core: submitted only for the first thread
+ * Package data is shared for all core in one package: submitted only for the first thread of the first core
  */
-static int __attribute__((warn_unused_result))
-get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+static int
+submit_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
-       int cpu = t->cpu_id;
-       unsigned long long msr;
-       int msr_fd;
-       int retval = 0;
+       char name[12];
+       double interval_float;
 
-       msr_fd = open_msr(cpu, 1);
-       if (msr_fd < 0)
-               return msr_fd;
+       interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
-#define READ_MSR(msr, dst)                     \
-do {                                           \
-       if (read_msr(msr_fd, msr, dst)) {       \
-               retval = -ERR_##msr;            \
-               goto out;                       \
-       }                                       \
-} while (0)
+       ssnprintf(name, sizeof(name), "cpu%02d", t->cpu_id);
 
-       READ_MSR(MSR_IA32_TSC, &t->tsc);
+       if (!aperf_mperf_unstable)
+               turbostat_submit(name, "percent", "c0", 100.0 * t->mperf/t->tsc);
+       if (!aperf_mperf_unstable)
+               turbostat_submit(name, "percent", "c1", 100.0 * t->c1/t->tsc);
 
-       READ_MSR(MSR_IA32_APERF, &t->aperf);
-       READ_MSR(MSR_IA32_MPERF, &t->mperf);
+       /* GHz */
+       if ((!aperf_mperf_unstable) || (!(t->aperf > t->tsc || t->mperf > t->tsc)))
+               turbostat_submit(NULL, "frequency", name, 1.0 * t->tsc / 1000000000 * t->aperf / t->mperf / interval_float);
 
-       READ_MSR(MSR_SMI_COUNT, &msr);
-       t->smi_count = msr & 0xFFFFFFFF;
+       /* SMI */
+       turbostat_submit(NULL, "current", name, t->smi_count);
 
-       /* collect core counters only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) {
-               retval = 0;
-               goto out;
-       }
+       /* submit per-core data only for 1st thread in core */
+       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+               goto done;
+
+       ssnprintf(name, sizeof(name), "core%02d", c->core_id);
 
        if (do_core_cstate & (1 << 3))
-               READ_MSR(MSR_CORE_C3_RESIDENCY, &c->c3);
+               turbostat_submit(name, "percent", "c3", 100.0 * c->c3/t->tsc);
        if (do_core_cstate & (1 << 6))
-               READ_MSR(MSR_CORE_C6_RESIDENCY, &c->c6);
+               turbostat_submit(name, "percent", "c6", 100.0 * c->c6/t->tsc);
        if (do_core_cstate & (1 << 7))
-               READ_MSR(MSR_CORE_C7_RESIDENCY, &c->c7);
+               turbostat_submit(name, "percent", "c7", 100.0 * c->c7/t->tsc);
 
-       if (do_dts) {
-               READ_MSR(MSR_IA32_THERM_STATUS, &msr);
-               c->core_temp_c = p->tcc_activation_temp - ((msr >> 16) & 0x7F);
-       }
+       if (do_dts)
+               turbostat_submit(NULL, "temperature", name, c->core_temp_c);
 
-       /* collect package counters only for 1st core in package */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
-               retval = 0;
-               goto out;
-       }
+       /* submit per-package data only for 1st core in package */
+       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+               goto done;
 
-        if (do_pkg_cstate & (1 << 2))
-                READ_MSR(MSR_PKG_C2_RESIDENCY, &p->pc2);
-        if (do_pkg_cstate & (1 << 3))
-                READ_MSR(MSR_PKG_C3_RESIDENCY, &p->pc3);
-        if (do_pkg_cstate & (1 << 6))
-                READ_MSR(MSR_PKG_C6_RESIDENCY, &p->pc6);
-        if (do_pkg_cstate & (1 << 7))
-                READ_MSR(MSR_PKG_C7_RESIDENCY, &p->pc7);
-        if (do_pkg_cstate & (1 << 8))
-                READ_MSR(MSR_PKG_C8_RESIDENCY, &p->pc8);
-        if (do_pkg_cstate & (1 << 9))
-                READ_MSR(MSR_PKG_C9_RESIDENCY, &p->pc9);
-        if (do_pkg_cstate & (1 << 10))
-                READ_MSR(MSR_PKG_C10_RESIDENCY, &p->pc10);
+       ssnprintf(name, sizeof(name), "pkg%02d", p->package_id);
 
-       if (do_rapl & RAPL_PKG) {
-               READ_MSR(MSR_PKG_ENERGY_STATUS, &msr);
-               p->energy_pkg = msr & 0xFFFFFFFF;
-       }
-       if (do_rapl & RAPL_CORES) {
-               READ_MSR(MSR_PP0_ENERGY_STATUS, &msr);
-               p->energy_cores = msr & 0xFFFFFFFF;
-       }
-       if (do_rapl & RAPL_DRAM) {
-               READ_MSR(MSR_DRAM_ENERGY_STATUS, &msr);
-               p->energy_dram = msr & 0xFFFFFFFF;
-       }
-       if (do_rapl & RAPL_GFX) {
-               READ_MSR(MSR_PP1_ENERGY_STATUS, &msr);
-               p->energy_gfx = msr & 0xFFFFFFFF;
-       }
-       if (do_rapl & RAPL_PKG_PERF_STATUS) {
-               READ_MSR(MSR_PKG_PERF_STATUS, &msr);
-               p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
-       }
-       if (do_rapl & RAPL_DRAM_PERF_STATUS) {
-               READ_MSR(MSR_DRAM_PERF_STATUS, &msr);
-               p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
+       if (do_ptm)
+               turbostat_submit(NULL, "temperature", name, p->pkg_temp_c);
+
+       if (do_pkg_cstate & (1 << 2))
+               turbostat_submit(name, "percent", "pc2", 100.0 * p->pc2/t->tsc);
+       if (do_pkg_cstate & (1 << 3))
+               turbostat_submit(name, "percent", "pc3", 100.0 * p->pc3/t->tsc);
+       if (do_pkg_cstate & (1 << 6))
+               turbostat_submit(name, "percent", "pc6", 100.0 * p->pc6/t->tsc);
+       if (do_pkg_cstate & (1 << 7))
+               turbostat_submit(name, "percent", "pc7", 100.0 * p->pc7/t->tsc);
+       if (do_pkg_cstate & (1 << 8))
+               turbostat_submit(name, "percent", "pc8", 100.0 * p->pc8/t->tsc);
+       if (do_pkg_cstate & (1 << 9))
+               turbostat_submit(name, "percent", "pc9", 100.0 * p->pc9/t->tsc);
+       if (do_pkg_cstate & (1 << 10))
+               turbostat_submit(name, "percent", "pc10", 100.0 * p->pc10/t->tsc);
+
+       if (do_rapl) {
+               if (do_rapl & RAPL_PKG)
+                       turbostat_submit(name, "power", "Pkg_W", p->energy_pkg * rapl_energy_units / interval_float);
+               if (do_rapl & RAPL_CORES)
+                       turbostat_submit(name, "power", "Cor_W", p->energy_cores * rapl_energy_units / interval_float);
+               if (do_rapl & RAPL_GFX)
+                       turbostat_submit(name, "power", "GFX_W", p->energy_gfx * rapl_energy_units / interval_float);
+               if (do_rapl & RAPL_DRAM)
+                       turbostat_submit(name, "power", "RAM_W", p->energy_dram * rapl_energy_units / interval_float);
        }
-       if (do_ptm) {
-               READ_MSR(MSR_IA32_PACKAGE_THERM_STATUS, &msr);
-               p->pkg_temp_c = p->tcc_activation_temp - ((msr >> 16) & 0x7F);
+done:
+       return 0;
+}
+
+
+/**********************************
+ * Looping function over all CPUs *
+ **********************************/
+
+/*
+ * Check if a given cpu id is in our compiled list of existing CPUs
+ */
+static int
+cpu_is_not_present(int cpu)
+{
+       return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
+}
+
+/*
+ * Loop on all CPUs in topological order
+ *
+ * Skip non-present cpus
+ * Return the error code at the first error or 0
+ */
+static int __attribute__((warn_unused_result))
+for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
+       struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
+{
+       int retval, pkg_no, core_no, thread_no;
+
+       for (pkg_no = 0; pkg_no < topology.num_packages; ++pkg_no) {
+               for (core_no = 0; core_no < topology.num_cores; ++core_no) {
+                       for (thread_no = 0; thread_no < topology.num_threads; ++thread_no) {
+                               struct thread_data *t;
+                               struct core_data *c;
+                               struct pkg_data *p;
+
+                               t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
+
+                               if (cpu_is_not_present(t->cpu_id))
+                                       continue;
+
+                               c = GET_CORE(core_base, core_no, pkg_no);
+                               p = GET_PKG(pkg_base, pkg_no);
+
+                               retval = func(t, c, p);
+                               if (retval)
+                                       return retval;
+                       }
+               }
        }
+       return 0;
+}
 
-out:
-       close(msr_fd);
-       return retval;
+/*
+ * Dedicated loop: Extract every data evolution for all CPU
+ *
+ * Skip non-present cpus
+ * Return the error code at the first error or 0
+ *
+ * Core data is shared for all threads in one core: extracted only for the first thread
+ * Package data is shared for all core in one package: extracted only for the first thread of the first core
+ */
+static int __attribute__((warn_unused_result))
+for_all_cpus_delta(const struct thread_data *thread_new_base, const struct core_data *core_new_base, const struct pkg_data *pkg_new_base,
+                  const struct thread_data *thread_old_base, const struct core_data *core_old_base, const struct pkg_data *pkg_old_base)
+{
+       int retval, pkg_no, core_no, thread_no;
+
+       for (pkg_no = 0; pkg_no < topology.num_packages; ++pkg_no) {
+               for (core_no = 0; core_no < topology.num_cores; ++core_no) {
+                       for (thread_no = 0; thread_no < topology.num_threads; ++thread_no) {
+                               struct thread_data *t_delta;
+                               const struct thread_data *t_old, *t_new;
+                               struct core_data *c_delta;
+
+                               /* Get correct pointers for threads */
+                               t_delta = GET_THREAD(thread_delta, thread_no, core_no, pkg_no);
+                               t_new = GET_THREAD(thread_new_base, thread_no, core_no, pkg_no);
+                               t_old = GET_THREAD(thread_old_base, thread_no, core_no, pkg_no);
+
+                               /* Skip threads that disappeared */
+                               if (cpu_is_not_present(t_delta->cpu_id))
+                                       continue;
+
+                               /* c_delta is always required for delta_thread */
+                               c_delta = GET_CORE(core_delta, core_no, pkg_no);
+
+                               /* calculate core delta only for 1st thread in core */
+                               if (t_new->flags & CPU_IS_FIRST_THREAD_IN_CORE) {
+                                       const struct core_data *c_old, *c_new;
+
+                                       c_new = GET_CORE(core_new_base, core_no, pkg_no);
+                                       c_old = GET_CORE(core_old_base, core_no, pkg_no);
+
+                                       delta_core(c_delta, c_new, c_old);
+                               }
+
+                               /* Always calculate thread delta */
+                               retval = delta_thread(t_delta, t_new, t_old, c_delta);
+                               if (retval)
+                                       return retval;
+
+                               /* calculate package delta only for 1st core in package */
+                               if (t_new->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) {
+                                       struct pkg_data *p_delta;
+                                       const struct pkg_data *p_old, *p_new;
+
+                                       p_delta = GET_PKG(package_delta, pkg_no);
+                                       p_new = GET_PKG(pkg_new_base, pkg_no);
+                                       p_old = GET_PKG(pkg_old_base, pkg_no);
+
+                                       delta_package(p_delta, p_new, p_old);
+                               }
+                       }
+               }
+       }
+       return 0;
 }
 
+
 static void
 free_all_buffers(void)
 {
@@ -627,8 +830,13 @@ free_all_buffers(void)
        package_delta = NULL;
 }
 
+
+/****************
+ * File helpers *
+ ****************/
+
 /*
- * Parse a file containing a single int.
+ * Read a single int from a file.
  */
 static int __attribute__ ((format(printf,1,2)))
 parse_int_file(const char *fmt, ...)
@@ -654,40 +862,8 @@ parse_int_file(const char *fmt, ...)
        return value;
 }
 
-/*
- * cpu_is_first_sibling_in_core(cpu)
- * return 1 if given CPU is 1st HT sibling in the core
- */
-static int
-cpu_is_first_sibling_in_core(int cpu)
-{
-       return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
-}
-
-/*
- * cpu_is_first_core_in_package(cpu)
- * return 1 if given CPU is 1st core in package
- */
-static int
-cpu_is_first_core_in_package(int cpu)
-{
-       return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
-}
-
-static int
-get_physical_package_id(int cpu)
-{
-       return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
-}
-
-static int
-get_core_id(int cpu)
-{
-       return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
-}
-
 static int
-get_num_ht_siblings(int cpu)
+get_threads_on_core(int cpu)
 {
        char path[80];
        FILE *filep;
@@ -716,48 +892,6 @@ get_num_ht_siblings(int cpu)
                return 1;
 }
 
-static int __attribute__((warn_unused_result))
-for_all_cpus_delta(const struct thread_data *thread_new_base, const struct core_data *core_new_base, const struct pkg_data *pkg_new_base,
-                  const struct thread_data *thread_old_base, const struct core_data *core_old_base, const struct pkg_data *pkg_old_base)
-{
-       int retval, pkg_no, core_no, thread_no;
-
-       for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-               for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-                       for (thread_no = 0; thread_no <
-                               topo.num_threads_per_core; ++thread_no) {
-                               struct thread_data *t_delta;
-                               const struct thread_data *t_old, *t_new;
-                               struct core_data *c_delta;
-                               const struct core_data *c_old, *c_new;
-                               struct pkg_data *p_delta;
-                               const struct pkg_data *p_old, *p_new;
-
-                               t_delta = GET_THREAD(thread_delta, thread_no, core_no, pkg_no);
-                               t_new = GET_THREAD(thread_new_base, thread_no, core_no, pkg_no);
-                               t_old = GET_THREAD(thread_old_base, thread_no, core_no, pkg_no);
-                               if (cpu_is_not_present(t_delta->cpu_id))
-                                       continue;
-
-                               c_delta = GET_CORE(core_delta, core_no, pkg_no);
-                               c_new = GET_CORE(core_new_base, core_no, pkg_no);
-                               c_old = GET_CORE(core_old_base, core_no, pkg_no);
-
-                               p_delta = GET_PKG(package_delta, pkg_no);
-                               p_new = GET_PKG(pkg_new_base, pkg_no);
-                               p_old = GET_PKG(pkg_old_base, pkg_no);
-
-                               retval = delta_cpu(t_delta, c_delta, p_delta,
-                                                  t_new, c_new, p_new,
-                                                  t_old, c_old, p_old);
-                               if (retval)
-                                       return retval;
-                       }
-               }
-       }
-       return 0;
-}
-
 /*
  * run func(cpu) on every cpu in /proc/stat
  * return max_cpu number
@@ -778,6 +912,7 @@ for_all_proc_cpus(int (func)(int))
        retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
        if (retval != 0) {
                ERROR("Failed to parse /proc/stat");
+               fclose(fp);
                return -ERR_CANT_READ_PROC_STAT;
        }
 
@@ -797,18 +932,16 @@ for_all_proc_cpus(int (func)(int))
 }
 
 /*
- * count_cpus()
- * remember the last one seen, it will be the max
+ * Update the stored topology.max_cpu_id
  */
 static int
-count_cpus(int cpu)
+update_max_cpu_id(int cpu)
 {
-       if (topo.max_cpu_num < cpu)
-               topo.max_cpu_num = cpu;
-
-       topo.num_cpus += 1;
+       if (topology.max_cpu_id < cpu)
+               topology.max_cpu_id = cpu;
        return 0;
 }
+
 static int
 mark_cpu_present(int cpu)
 {
@@ -817,119 +950,7 @@ mark_cpu_present(int cpu)
 }
 
 
-static void
-turbostat_submit (const char *plugin_instance,
-       const char *type, const char *type_instance,
-       gauge_t value)
-{
-       value_list_t vl = VALUE_LIST_INIT;
-       value_t v;
-
-       v.gauge = value;
-       vl.values = &v;
-       vl.values_len = 1;
-       sstrncpy (vl.host, hostname_g, sizeof (vl.host));
-       sstrncpy (vl.plugin, PLUGIN_NAME, sizeof (vl.plugin));
-       if (plugin_instance != NULL)
-               sstrncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance));
-       sstrncpy (vl.type, type, sizeof (vl.type));
-       if (type_instance != NULL)
-               sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
-
-       plugin_dispatch_values (&vl);
-}
-
-/*
- * column formatting convention & formats
- * package: "pk" 2 columns %2d
- * core: "cor" 3 columns %3d
- * CPU: "CPU" 3 columns %3d
- * Pkg_W: %6.2
- * Cor_W: %6.2
- * GFX_W: %5.2
- * RAM_W: %5.2
- * GHz: "GHz" 3 columns %3.2
- * TSC: "TSC" 3 columns %3.2
- * SMI: "SMI" 4 columns %4d
- * percentage " %pc3" %6.2
- * Perf Status percentage: %5.2
- * "CTMP" 4 columns %4d
- */
-static int
-submit_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
-       char name[12];
-       double interval_float;
-
-       interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
-
-       ssnprintf(name, sizeof(name), "cpu%02d", t->cpu_id);
-
-       if (!aperf_mperf_unstable)
-               turbostat_submit(name, "percent", "c0", 100.0 * t->mperf/t->tsc);
-       if (!aperf_mperf_unstable)
-               turbostat_submit(name, "percent", "c1", 100.0 * t->c1/t->tsc);
-
-       /* GHz */
-       if ((!aperf_mperf_unstable) || (!(t->aperf > t->tsc || t->mperf > t->tsc)))
-               turbostat_submit(NULL, "frequency", name, 1.0 * t->tsc / 1000000000 * t->aperf / t->mperf / interval_float);
-
-       /* SMI */
-       turbostat_submit(NULL, "current", name, t->smi_count);
-
-       /* print per-core data only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
-               goto done;
-
-       ssnprintf(name, sizeof(name), "core%02d", c->core_id);
-
-       if (do_core_cstate & (1 << 3))
-               turbostat_submit(name, "percent", "c3", 100.0 * c->c3/t->tsc);
-       if (do_core_cstate & (1 << 6))
-               turbostat_submit(name, "percent", "c6", 100.0 * c->c6/t->tsc);
-       if (do_core_cstate & (1 << 7))
-               turbostat_submit(name, "percent", "c7", 100.0 * c->c7/t->tsc);
-
-       if (do_dts)
-               turbostat_submit(NULL, "temperature", name, c->core_temp_c);
-
-       /* print per-package data only for 1st core in package */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
-               goto done;
-
-       ssnprintf(name, sizeof(name), "pkg%02d", p->package_id);
-
-       if (do_ptm)
-               turbostat_submit(NULL, "temperature", name, p->pkg_temp_c);
-
-       if (do_pkg_cstate & (1 << 2))
-               turbostat_submit(name, "percent", "pc2", 100.0 * p->pc2/t->tsc);
-       if (do_pkg_cstate & (1 << 3))
-               turbostat_submit(name, "percent", "pc3", 100.0 * p->pc3/t->tsc);
-       if (do_pkg_cstate & (1 << 6))
-               turbostat_submit(name, "percent", "pc6", 100.0 * p->pc6/t->tsc);
-       if (do_pkg_cstate & (1 << 7))
-               turbostat_submit(name, "percent", "pc7", 100.0 * p->pc7/t->tsc);
-       if (do_pkg_cstate & (1 << 8))
-               turbostat_submit(name, "percent", "pc8", 100.0 * p->pc8/t->tsc);
-       if (do_pkg_cstate & (1 << 9))
-               turbostat_submit(name, "percent", "pc9", 100.0 * p->pc9/t->tsc);
-       if (do_pkg_cstate & (1 << 10))
-               turbostat_submit(name, "percent", "pc10", 100.0 * p->pc10/t->tsc);
-
-       if (do_rapl) {
-               if (do_rapl & RAPL_PKG)
-                       turbostat_submit(name, "power", "Pkg_W", p->energy_pkg * rapl_energy_units / interval_float);
-               if (do_rapl & RAPL_CORES)
-                       turbostat_submit(name, "power", "Cor_W", p->energy_cores * rapl_energy_units / interval_float);
-               if (do_rapl & RAPL_GFX)
-                       turbostat_submit(name, "power", "GFX_W", p->energy_gfx * rapl_energy_units / interval_float);
-               if (do_rapl & RAPL_DRAM)
-                       turbostat_submit(name, "power", "RAM_W", p->energy_dram * rapl_energy_units / interval_float);
-       }
-done:
-       return 0;
-}
+static int setup_all_buffers(void);
 
 static int
 turbostat_read(user_data_t * not_used)
@@ -1050,9 +1071,9 @@ set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_da
        if (get_msr(t->cpu_id, MSR_IA32_TEMPERATURE_TARGET, &msr))
                goto guess;
 
-       target_c_local = (msr >> 16) & 0x7F;
+       target_c_local = (msr >> 16) & 0xFF;
 
-       if (target_c_local < 85 || target_c_local > 127)
+       if (!target_c_local)
                goto guess;
 
        p->tcc_activation_temp = target_c_local;
@@ -1134,6 +1155,7 @@ probe_cpu()
         *
         * This check is valid for both Intel and AMD
         */
+       eax = ebx = ecx = edx = 0;
        __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
        if (!(edx & (1 << 8))) {
                ERROR("No invariant TSC");
@@ -1152,6 +1174,7 @@ probe_cpu()
         *
         * This check is valid for both Intel and AMD
         */
+       eax = ebx = ecx = edx = 0;
        __get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
        do_dts = eax & (1 << 0);
        do_ptm = eax & (1 << 6);
@@ -1204,28 +1227,28 @@ probe_cpu()
                case 0x3A: /* IVB */
                case 0x3E: /* IVB Xeon */
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
-                       do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7);
+                       do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                /* Haswell Bridge */
                case 0x3C: /* HSW */
                case 0x3F: /* HSW */
                case 0x46: /* HSW */
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
-                       do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7);
+                       do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                case 0x45: /* HSW */
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
-                       do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
+                       do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
                        break;
                /* Broadwel */
                case 0x4F: /* BDW */
                case 0x56: /* BDX-DE */
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
-                       do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7);
+                       do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                case 0x3D: /* BDW */
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
-                       do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
+                       do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
                        break;
                default:
                        ERROR("Unsupported CPU");
@@ -1258,7 +1281,7 @@ probe_cpu()
        }
 
        if (do_rapl) {
-               unsigned long msr;
+               unsigned long long msr;
                if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
                        return 0;
 
@@ -1271,229 +1294,212 @@ probe_cpu()
        return 0;
 }
 
+static int __attribute__((warn_unused_result))
+allocate_cpu_set(cpu_set_t * set, size_t * size) {
+       set = CPU_ALLOC(topology.max_cpu_id  + 1);
+       if (set == NULL) {
+               ERROR("Unable to allocate CPU state");
+               return -ERR_CPU_ALLOC;
+       }
+       *size = CPU_ALLOC_SIZE(topology.max_cpu_id  + 1);
+       CPU_ZERO_S(*size, set);
+       return 0;
+}
 
-
+/*
+ * Build a local representation of the cpu distribution
+ */
 static int __attribute__((warn_unused_result))
 topology_probe()
 {
        int i;
        int ret;
-       int max_core_id = 0;
-       int max_package_id = 0;
-       int max_siblings = 0;
-       struct cpu_topology {
-               int core_id;
-               int physical_package_id;
-       } *cpus;
-
-       /* Initialize num_cpus, max_cpu_num */
-       topo.num_cpus = 0;
-       topo.max_cpu_num = 0;
-       ret = for_all_proc_cpus(count_cpus);
-       if (ret < 0)
-               return ret;
-
-       DEBUG("num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
-
-       cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
-       if (cpus == NULL) {
-               ERROR("calloc cpus");
-               return -ERR_CALLOC;
-       }
-
-       /*
-        * Allocate and initialize cpu_present_set
-        */
-       cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
-       if (cpu_present_set == NULL) {
-               free(cpus);
-               ERROR("CPU_ALLOC");
-               return -ERR_CPU_ALLOC;
-       }
-       cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
-       CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
-       ret = for_all_proc_cpus(mark_cpu_present);
-       if (ret < 0) {
-               free(cpus);
-               return ret;
-       }
+       int max_package_id, max_core_id, max_thread_id;
+       max_package_id = max_core_id = max_thread_id = 0;
 
-       /*
-        * Allocate and initialize cpu_affinity_set
-        */
-       cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
-       if (cpu_affinity_set == NULL) {
-               free(cpus);
-               ERROR("CPU_ALLOC");
-               return -ERR_CPU_ALLOC;
-       }
-       cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
-       CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
+       /* Clean topology */
+       free(topology.cpus);
+       memset(&topology, 0, sizeof(topology));
 
+       /* Can't fail (update_max_cpu_id always returns 0) */
+       assert(for_all_proc_cpus(update_max_cpu_id));
 
-       /*
-        * Allocate and initialize cpu_saved_affinity_set
-        */
-       cpu_saved_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
-       if (cpu_saved_affinity_set == NULL) {
-               free(cpus);
-               ERROR("CPU_ALLOC");
-               return -ERR_CPU_ALLOC;
+       topology.cpus = calloc(1, (topology.max_cpu_id  + 1) * sizeof(struct cpu_topology));
+       if (topology.cpus == NULL) {
+               ERROR("Unable to allocate memory for cpu topology");
+               return -ERR_CALLOC;
        }
-       cpu_saved_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
-       CPU_ZERO_S(cpu_saved_affinity_setsize, cpu_saved_affinity_set);
 
+       ret = allocate_cpu_set(cpu_present_set, &cpu_present_setsize);
+       if (ret != 0)
+               goto err;
+       ret = allocate_cpu_set(cpu_affinity_set, &cpu_affinity_setsize);
+       if (ret != 0)
+               goto err;
+       ret = allocate_cpu_set(cpu_saved_affinity_set, &cpu_saved_affinity_setsize);
+       if (ret != 0)
+               goto err;
+
+       /* Can't fail (mark_cpu_present always returns 0) */
+       assert(for_all_proc_cpus(mark_cpu_present));
 
        /*
         * For online cpus
         * find max_core_id, max_package_id
         */
-       for (i = 0; i <= topo.max_cpu_num; ++i) {
-               int siblings;
+       for (i = 0; i <= topology.max_cpu_id; ++i) {
+               int num_threads;
+               struct cpu_topology *cpu = &topology.cpus[i];
 
                if (cpu_is_not_present(i)) {
                        WARNING("cpu%d NOT PRESENT", i);
                        continue;
                }
-               cpus[i].core_id = get_core_id(i);
-               if (cpus[i].core_id < 0)
-                       return cpus[i].core_id;
-               if (cpus[i].core_id > max_core_id)
-                       max_core_id = cpus[i].core_id;
-
-               cpus[i].physical_package_id = get_physical_package_id(i);
-               if (cpus[i].physical_package_id < 0)
-                       return cpus[i].physical_package_id;
-               if (cpus[i].physical_package_id > max_package_id)
-                       max_package_id = cpus[i].physical_package_id;
-
-               siblings = get_num_ht_siblings(i);
-               if (siblings < 0)
-                       return siblings;
-               if (siblings > max_siblings)
-                       max_siblings = siblings;
-               DEBUG("cpu %d pkg %d core %d\n",
-                       i, cpus[i].physical_package_id, cpus[i].core_id);
-       }
-       topo.num_cores_per_pkg = max_core_id + 1;
-       DEBUG("max_core_id %d, sizing for %d cores per package\n",
-               max_core_id, topo.num_cores_per_pkg);
 
-       topo.num_packages = max_package_id + 1;
-       DEBUG("max_package_id %d, sizing for %d packages\n",
-               max_package_id, topo.num_packages);
+               ret = parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i);
+               if (ret < 0)
+                       goto err;
+               else
+                       cpu->package_id = ret;
+               if (cpu->package_id > max_package_id)
+                       max_package_id = cpu->package_id;
 
-       topo.num_threads_per_core = max_siblings;
-       DEBUG("max_siblings %d\n", max_siblings);
+               ret = parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", i);
+               if (ret < 0)
+                       goto err;
+               else
+                       cpu->core_id = ret;
+               if (cpu->core_id > max_core_id)
+                       max_core_id = cpu->core_id;
+               ret = parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", i);
+               if (ret < 0)
+                       goto err;
+               else if (ret == cpu->core_id)
+                       cpu->first_core_in_package = 1;
+
+               ret = get_threads_on_core(i);
+               if (ret < 0)
+                       goto err;
+               else
+                       num_threads = ret;
+               if (num_threads > max_thread_id)
+                       max_thread_id = num_threads;
+               if (num_threads > 1) {
+                       ret = parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", i);
+                       if (ret < 0)
+                               goto err;
+                       else if (ret == num_threads)
+                               cpu->first_thread_in_core = 1;
+               }
+               DEBUG("cpu %d pkg %d core %d\n",
+                       i, cpu->package_id, cpu->core_id);
+       }
+       /* Num is max + 1 (need to count 0) */
+       topology.num_packages = max_package_id + 1;
+       topology.num_cores = max_core_id + 1;
+       topology.num_threads = max_thread_id + 1;
 
-       free(cpus);
        return 0;
+err:
+       free(topology.cpus);
+       return ret;
 }
 
 static int
-allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
+allocate_counters(struct thread_data **threads, struct core_data **cores, struct pkg_data **packages)
 {
        int i;
+       int total_threads, total_cores;
 
-       *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
-               topo.num_packages, sizeof(struct thread_data));
-       if (*t == NULL)
-               goto error;
+       total_threads = topology.num_threads * topology.num_cores * topology.num_packages;
+       *threads = calloc(total_threads, sizeof(struct thread_data));
+       if (*threads == NULL)
+               goto err;
 
-       for (i = 0; i < topo.num_threads_per_core *
-               topo.num_cores_per_pkg * topo.num_packages; i++)
-               (*t)[i].cpu_id = -1;
+       for (i = 0; i < total_threads; ++i)
+               (*threads)[i].cpu_id = -1;
 
-       *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
-               sizeof(struct core_data));
-       if (*c == NULL)
-               goto error;
+       total_cores = topology.num_cores * topology.num_packages;
+       *cores = calloc(total_cores, sizeof(struct core_data));
+       if (*cores == NULL)
+               goto err_clean_threads;
 
-       for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
-               (*c)[i].core_id = -1;
+       for (i = 0; i < total_cores; ++i)
+               (*cores)[i].core_id = -1;
 
-       *p = calloc(topo.num_packages, sizeof(struct pkg_data));
-       if (*p == NULL)
-               goto error;
+       *packages = calloc(topology.num_packages, sizeof(struct pkg_data));
+       if (*packages == NULL)
+               goto err_clean_cores;
 
-       for (i = 0; i < topo.num_packages; i++)
-               (*p)[i].package_id = i;
+       for (i = 0; i < topology.num_packages; i++)
+               (*packages)[i].package_id = i;
 
        return 0;
-error:
+
+err_clean_cores:
+       free(*cores);
+err_clean_threads:
+       free(*threads);
+err:
        ERROR("calloc counters");
        return -ERR_CALLOC;
 }
+
 /*
  * init_counter()
  *
- * set cpu_id, core_num, pkg_num
+ * set cpu_id, core_id, package_id
  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
  *
  * increment topo.num_cores when 1st core in pkg seen
  */
 static int
 init_counter(struct thread_data *thread_base, struct core_data *core_base,
-       struct pkg_data *pkg_base, int thread_num, int core_num,
-       int pkg_num, int cpu_id)
+       struct pkg_data *pkg_base, int cpu_id)
 {
-       int ret;
        struct thread_data *t;
        struct core_data *c;
        struct pkg_data *p;
+       struct cpu_topology *cpu = &topology.cpus[cpu_id];
 
-       t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
-       c = GET_CORE(core_base, core_num, pkg_num);
-       p = GET_PKG(pkg_base, pkg_num);
+       t = GET_THREAD(thread_base, !(cpu->first_thread_in_core), cpu->core_id, cpu->package_id);
+       c = GET_CORE(core_base, cpu->core_id, cpu->package_id);
+       p = GET_PKG(pkg_base, cpu->package_id);
 
        t->cpu_id = cpu_id;
-       if (thread_num == 0) {
+       if (cpu->first_thread_in_core)
                t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
-               if ((ret = cpu_is_first_core_in_package(cpu_id)) < 0) {
-                       return ret;
-               } else if (ret != 0) {
-                       t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
-               }
-       }
+       if (cpu->first_core_in_package)
+               t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
 
-       c->core_id = core_num;
-       p->package_id = pkg_num;
+       c->core_id = cpu->core_id;
+       p->package_id = cpu->package_id;
 
        return 0;
 }
 
 
 static int
-initialize_counters(int cpu_id)
+initialize_counters(void)
 {
-       int my_thread_id, my_core_id, my_package_id;
        int ret;
+       int cpu_id;
 
-       my_package_id = get_physical_package_id(cpu_id);
-       if (my_package_id < 0)
-               return my_package_id;
-       my_core_id = get_core_id(cpu_id);
-       if (my_core_id < 0)
-               return my_core_id;
-
-       if ((ret = cpu_is_first_sibling_in_core(cpu_id)) < 0) {
-               return ret;
-       } else if (ret != 0) {
-               my_thread_id = 0;
-               topo.num_cores++;
-       } else {
-               my_thread_id = 1;
-       }
+       for (cpu_id = 0; cpu_id <= topology.max_cpu_id; ++cpu_id) {
+               if (cpu_is_not_present(cpu_id)) {
+                       continue;
+               }
 
-       ret = init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
-       if (ret < 0)
-               return ret;
-       ret = init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
-       if (ret < 0)
-               return ret;
-       ret = init_counter(DELTA_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
-       if (ret < 0)
-               return ret;
+               ret = init_counter(EVEN_COUNTERS, cpu_id);
+               if (ret < 0)
+                       return ret;
+               ret = init_counter(ODD_COUNTERS, cpu_id);
+               if (ret < 0)
+                       return ret;
+               ret = init_counter(DELTA_COUNTERS, cpu_id);
+               if (ret < 0)
+                       return ret;
+       }
        return 0;
 }
 
@@ -1512,7 +1518,7 @@ static int setup_all_buffers(void)
        DO_OR_GOTO_ERR(allocate_counters(&thread_even, &core_even, &package_even));
        DO_OR_GOTO_ERR(allocate_counters(&thread_odd, &core_odd, &package_odd));
        DO_OR_GOTO_ERR(allocate_counters(&thread_delta, &core_delta, &package_delta));
-       DO_OR_GOTO_ERR(for_all_proc_cpus(initialize_counters));
+       DO_OR_GOTO_ERR(initialize_counters());
        DO_OR_GOTO_ERR(for_all_cpus(set_temperature_target, EVEN_COUNTERS));
        DO_OR_GOTO_ERR(for_all_cpus(set_temperature_target, ODD_COUNTERS));