Turbostat: simplify RAPL config and collection
[collectd.git] / src / turbostat.c
index 34c11b8..ec8757e 100644 (file)
@@ -57,6 +57,7 @@
 #include <ctype.h>
 #include <sched.h>
 #include <cpuid.h>
+#include <sys/capability.h>
 
 #define PLUGIN_NAME "turbostat"
 
@@ -88,6 +89,11 @@ static unsigned int do_core_cstate;
 static unsigned int do_pkg_cstate;
 
 /*
+ * Boolean indicating if the processor supports 'I/O System-Management Interrupt counter'
+ */
+static _Bool do_smi;
+
+/*
  * Boolean indicating if the processor supports 'Digital temperature sensor'
  * This feature enables the monitoring of the temperature of each core
  *
@@ -120,26 +126,15 @@ static double rapl_energy_units;
 #define RAPL_PKG               (1 << 0)
                                        /* 0x610 MSR_PKG_POWER_LIMIT */
                                        /* 0x611 MSR_PKG_ENERGY_STATUS */
-#define RAPL_PKG_PERF_STATUS   (1 << 1)
-                                       /* 0x613 MSR_PKG_PERF_STATUS */
-#define RAPL_PKG_POWER_INFO    (1 << 2)
-                                       /* 0x614 MSR_PKG_POWER_INFO */
-
-#define RAPL_DRAM              (1 << 3)
+#define RAPL_DRAM              (1 << 1)
                                        /* 0x618 MSR_DRAM_POWER_LIMIT */
                                        /* 0x619 MSR_DRAM_ENERGY_STATUS */
                                        /* 0x61c MSR_DRAM_POWER_INFO */
-#define RAPL_DRAM_PERF_STATUS  (1 << 4)
-                                       /* 0x61b MSR_DRAM_PERF_STATUS */
-
-#define RAPL_CORES             (1 << 5)
+#define RAPL_CORES             (1 << 2)
                                        /* 0x638 MSR_PP0_POWER_LIMIT */
                                        /* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_CORE_POLICY       (1 << 6)
-                                       /* 0x63a MSR_PP0_POLICY */
-
 
-#define RAPL_GFX               (1 << 7)
+#define RAPL_GFX               (1 << 3)
                                        /* 0x640 MSR_PP1_POWER_LIMIT */
                                        /* 0x641 MSR_PP1_ENERGY_STATUS */
                                        /* 0x642 MSR_PP1_POLICY */
@@ -181,8 +176,6 @@ static struct pkg_data {
        unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
        unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
        unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
-       unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
-       unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
        unsigned int tcc_activation_temp;
        unsigned int pkg_temp_c;
 } *package_delta, *package_even, *package_odd;
@@ -343,8 +336,10 @@ do {                                                                       \
        READ_MSR(MSR_IA32_APERF, &t->aperf);
        READ_MSR(MSR_IA32_MPERF, &t->mperf);
 
-       READ_MSR(MSR_SMI_COUNT, &msr);
-       t->smi_count = msr & 0xFFFFFFFF;
+       if (do_smi) {
+               READ_MSR(MSR_SMI_COUNT, &msr);
+               t->smi_count = msr & 0xFFFFFFFF;
+       }
 
        /* collect core counters only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) {
@@ -401,14 +396,6 @@ do {                                                                       \
                READ_MSR(MSR_PP1_ENERGY_STATUS, &msr);
                p->energy_gfx = msr & 0xFFFFFFFF;
        }
-       if (do_rapl & RAPL_PKG_PERF_STATUS) {
-               READ_MSR(MSR_PKG_PERF_STATUS, &msr);
-               p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
-       }
-       if (do_rapl & RAPL_DRAM_PERF_STATUS) {
-               READ_MSR(MSR_DRAM_PERF_STATUS, &msr);
-               p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
-       }
        if (do_ptm) {
                READ_MSR(MSR_IA32_PACKAGE_THERM_STATUS, &msr);
                p->pkg_temp_c = p->tcc_activation_temp - ((msr >> 16) & 0x7F);
@@ -454,8 +441,6 @@ delta_package(struct pkg_data *delta, const struct pkg_data *new, const struct p
        DELTA_WRAP32(delta->energy_cores, new->energy_cores, old->energy_cores);
        DELTA_WRAP32(delta->energy_gfx, new->energy_gfx, old->energy_gfx);
        DELTA_WRAP32(delta->energy_dram, new->energy_dram, old->energy_dram);
-       DELTA_WRAP32(delta->rapl_pkg_perf_status, new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
-       DELTA_WRAP32(delta->rapl_dram_perf_status, new->rapl_dram_perf_status, old->rapl_dram_perf_status);
 }
 
 /*
@@ -524,7 +509,8 @@ delta_thread(struct thread_data *delta, const struct thread_data *new, const str
                delta->mperf = 1;       /* divide by 0 protection */
        }
 
-       delta->smi_count = new->smi_count - old->smi_count;
+       if (do_smi)
+               delta->smi_count = new->smi_count - old->smi_count;
 
        return 0;
 }
@@ -579,12 +565,17 @@ submit_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (!aperf_mperf_unstable)
                turbostat_submit(name, "percent", "c1", 100.0 * t->c1/t->tsc);
 
-       /* GHz */
+       turbostat_submit("Average", "frequency", name, 1.0 / 1000000 * t->aperf / interval_float);
+
        if ((!aperf_mperf_unstable) || (!(t->aperf > t->tsc || t->mperf > t->tsc)))
-               turbostat_submit(NULL, "frequency", name, 1.0 * t->tsc / 1000000000 * t->aperf / t->mperf / interval_float);
+               turbostat_submit("Buzy", "frequency", name, 1.0 * t->tsc / 1000000 * t->aperf / t->mperf / interval_float);
+
+       /* Sanity check (should stay stable) */
+       turbostat_submit("TSC", "gauge", name, 1.0 * t->tsc / 1000000 / interval_float);
 
        /* SMI */
-       turbostat_submit(NULL, "current", name, t->smi_count);
+       if (do_smi)
+               turbostat_submit(NULL, "current", name, t->smi_count);
 
        /* submit per-core data only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -859,36 +850,6 @@ probe_cpu()
        }
 
        /*
-        * CPUID(0x80000000):
-        * - EAX: Maximum Input Value for Extended Function CPUID Information
-        *
-        * This allows us to verify if the CPUID(0x80000007) can be called
-        *
-        * This check is valid for both Intel and AMD.
-        */
-       max_level = ebx = ecx = edx = 0;
-       __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
-       if (max_level < 0x80000007) {
-               ERROR("Turbostat plugin: Unsupported CPU (no invariant TSC, "
-                     " Maximum Extended Function: 0x%x)", max_level);
-               return -1;
-       }
-
-       /*
-        * CPUID(0x80000007):
-        * - EDX:
-        *  + 8: Invariant TSC available if set
-        *
-        * This check is valid for both Intel and AMD
-        */
-       eax = ebx = ecx = edx = 0;
-       __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
-       if (!(edx & (1 << 8))) {
-               ERROR("Turbostat plugin: Unsupported CPU (No invariant TSC)");
-               return -1;
-       }
-
-       /*
         * CPUID(6):
         * - EAX:
         *  + 0: Digital temperature sensor is supported if set
@@ -916,15 +877,14 @@ probe_cpu()
                switch (model) {
                /* Atom (partial) */
                case 0x27:
+                       do_smi = 0;
                        do_core_cstate = 0;
                        do_pkg_cstate = (1 << 2) | (1 << 4) | (1 << 6);
                        break;
                /* Silvermont */
                case 0x37: /* BYT */
-               case 0x4A:
                case 0x4D: /* AVN */
-               case 0x5A:
-               case 0x5D:
+                       do_smi = 1;
                        do_core_cstate = (1 << 1) | (1 << 6);
                        do_pkg_cstate = (1 << 6);
                        break;
@@ -933,6 +893,7 @@ probe_cpu()
                case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
                case 0x1F: /* Core i7 and i5 Processor - Nehalem */
                case 0x2E: /* Nehalem-EX Xeon - Beckton */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6);
                        do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        break;
@@ -940,18 +901,21 @@ probe_cpu()
                case 0x25: /* Westmere Client - Clarkdale, Arrandale */
                case 0x2C: /* Westmere EP - Gulftown */
                case 0x2F: /* Westmere-EX Xeon - Eagleton */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6);
                        do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                /* Sandy Bridge */
                case 0x2A: /* SNB */
                case 0x2D: /* SNB Xeon */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                /* Ivy Bridge */
                case 0x3A: /* IVB */
                case 0x3E: /* IVB Xeon */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
@@ -959,44 +923,53 @@ probe_cpu()
                case 0x3C: /* HSW */
                case 0x3F: /* HSW */
                case 0x46: /* HSW */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                case 0x45: /* HSW */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
                        break;
                /* Broadwel */
                case 0x4F: /* BDW */
                case 0x56: /* BDX-DE */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                case 0x3D: /* BDW */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
                        break;
                default:
-                       ERROR("Turbostat plugin: Unsupported CPU (family: %#x,"
-                             " model: %#x)", family, model);
+                       do_smi = 0;
+                       do_core_cstate = 0;
+                       do_pkg_cstate = 0;
+                       break;
                }
                switch (model) {
-               case 0x2A:
-               case 0x3A:
-               case 0x3C:
-               case 0x45:
-               case 0x46:
-                       do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_PKG_POWER_INFO | RAPL_GFX;
+               case 0x2A: /* SNB */
+               case 0x3A: /* IVB */
+               case 0x3C: /* HSW */
+               case 0x45: /* HSW */
+               case 0x46: /* HSW */
+               case 0x3D: /* BDW */
+                       do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
                        break;
-               case 0x3F:
-                       do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM | RAPL_DRAM_PERF_STATUS;
+               case 0x3F: /* HSX */
+               case 0x4F: /* BDX */
+               case 0x56: /* BDX-DE */
+                       do_rapl = RAPL_PKG | RAPL_DRAM ;
                        break;
-               case 0x2D:
-               case 0x3E:
-                       do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_PKG_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM | RAPL_DRAM_PERF_STATUS;
+               case 0x2D: /* SNB Xeon */
+               case 0x3E: /* IVB Xeon */
+                       do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM;
                        break;
-               case 0x37:
-               case 0x4D:
+               case 0x37: /* BYT */
+               case 0x4D: /* AVN */
                        do_rapl = RAPL_PKG | RAPL_CORES;
                        break;
                default:
@@ -1485,6 +1458,52 @@ out:
 }
 
 static int
+check_permissions(void)
+{
+       struct __user_cap_header_struct cap_header_data;
+       cap_user_header_t cap_header = &cap_header_data;
+       struct __user_cap_data_struct cap_data_data;
+       cap_user_data_t cap_data = &cap_data_data;
+       int ret = 0;
+
+       if (getuid() == 0) {
+               /* We have everything we need */
+               return 0;
+       }
+
+       /* check for CAP_SYS_RAWIO */
+       cap_header->pid = getpid();
+       cap_header->version = _LINUX_CAPABILITY_VERSION;
+       if (capget(cap_header, cap_data) < 0) {
+               ERROR("Turbostat plugin: capget failed");
+               return -1;
+       }
+
+       if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
+               WARNING("Turbostat plugin: Collectd doesn't have the "
+                       "CAP_SYS_RAWIO capability. If you don't want to run "
+                       "collectd as root, try running \"setcap "
+                       "cap_sys_rawio=ep\" on collectd binary");
+               ret = -1;
+       }
+
+       if (euidaccess("/dev/cpu/0/msr", R_OK)) {
+               WARNING("Turbostat plugin: Collectd cannot open"
+                       "/dev/cpu/0/msr. If you don't want to run collectd as "
+                       "root, you need to change the ownership (chown) and "
+                       "permissions on /dev/cpu/*/msr to allow such access");
+               ret = -1;
+       }
+
+       if (ret != 0)
+               ERROR("Turbostat plugin: Initialization failed: this plugin "
+                     "requires collectd to either to run as root or give "
+                     "collectd a special capability (CAP_SYS_RAWIO) and read "
+                      "access to /dev/cpu/*/msr (see previous warnings)");
+       return ret;
+}
+
+static int
 turbostat_init(void)
 {
        struct stat sb;
@@ -1498,12 +1517,7 @@ turbostat_init(void)
                return -1;
        }
 
-       if (getuid() != 0) {
-               ERROR("Turbostat plugin: Initialization failed: this plugin "
-                     "requires collectd to run as root in order to read "
-                     "special CPU registers");
-               return -1;
-       }
+       DO_OR_GOTO_ERR(check_permissions());
 
        DO_OR_GOTO_ERR(probe_cpu());