Turbostat: relax dependency on invariant TSC
[collectd.git] / src / turbostat.c
index 5236eee..4cd1e50 100644 (file)
  */
 #define _GNU_SOURCE
 
+#include "collectd.h"
+#include "common.h"
+#include "plugin.h"
+#include "utils_time.h"
+
 #include <asm/msr-index.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <sys/resource.h>
 #include <fcntl.h>
 #include <signal.h>
-#include <sys/time.h>
 #include <stdlib.h>
 #include <dirent.h>
 #include <string.h>
 #include <ctype.h>
 #include <sched.h>
 #include <cpuid.h>
-
-#include "collectd.h"
-#include "common.h"
-#include "plugin.h"
+#include <sys/capability.h>
 
 #define PLUGIN_NAME "turbostat"
 
@@ -88,6 +89,11 @@ static unsigned int do_core_cstate;
 static unsigned int do_pkg_cstate;
 
 /*
+ * Boolean indicating if the processor supports 'I/O System-Management Interrupt counter'
+ */
+static _Bool do_smi;
+
+/*
  * Boolean indicating if the processor supports 'Digital temperature sensor'
  * This feature enables the monitoring of the temperature of each core
  *
@@ -145,10 +151,10 @@ static double rapl_energy_units;
                                        /* 0x642 MSR_PP1_POLICY */
 #define        TJMAX_DEFAULT   100
 
-cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_saved_affinity_set;
-size_t cpu_present_setsize, cpu_affinity_setsize, cpu_saved_affinity_setsize;
+static cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_saved_affinity_set;
+static size_t cpu_present_setsize, cpu_affinity_setsize, cpu_saved_affinity_setsize;
 
-struct thread_data {
+static struct thread_data {
        unsigned long long tsc;
        unsigned long long aperf;
        unsigned long long mperf;
@@ -160,7 +166,7 @@ struct thread_data {
 #define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
 } *thread_delta, *thread_even, *thread_odd;
 
-struct core_data {
+static struct core_data {
        unsigned long long c3;
        unsigned long long c6;
        unsigned long long c7;
@@ -168,7 +174,7 @@ struct core_data {
        unsigned int core_id;
 } *core_delta, *core_even, *core_odd;
 
-struct pkg_data {
+static struct pkg_data {
        unsigned long long pc2;
        unsigned long long pc3;
        unsigned long long pc6;
@@ -213,7 +219,7 @@ struct cpu_topology {
        _Bool first_thread_in_core;
 };
 
-struct topology {
+static struct topology {
        int max_cpu_id;
        int num_packages;
        int num_cores;
@@ -221,7 +227,13 @@ struct topology {
        struct cpu_topology *cpus;
 } topology;
 
-struct timeval tv_even, tv_odd, tv_delta;
+static cdtime_t time_even, time_odd, time_delta;
+
+static const char *config_keys[] =
+{
+       "TCCActivationTemp",
+};
+static const int config_keys_num = STATIC_ARRAY_SIZE (config_keys);
 
 /*****************************
  *  MSR Manipulation helpers *
@@ -337,8 +349,10 @@ do {                                                                       \
        READ_MSR(MSR_IA32_APERF, &t->aperf);
        READ_MSR(MSR_IA32_MPERF, &t->mperf);
 
-       READ_MSR(MSR_SMI_COUNT, &msr);
-       t->smi_count = msr & 0xFFFFFFFF;
+       if (do_smi) {
+               READ_MSR(MSR_SMI_COUNT, &msr);
+               t->smi_count = msr & 0xFFFFFFFF;
+       }
 
        /* collect core counters only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) {
@@ -518,7 +532,8 @@ delta_thread(struct thread_data *delta, const struct thread_data *new, const str
                delta->mperf = 1;       /* divide by 0 protection */
        }
 
-       delta->smi_count = new->smi_count - old->smi_count;
+       if (do_smi)
+               delta->smi_count = new->smi_count - old->smi_count;
 
        return 0;
 }
@@ -564,7 +579,7 @@ submit_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        char name[DATA_MAX_NAME_LEN];
        double interval_float;
 
-       interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+       interval_float = CDTIME_T_TO_DOUBLE(time_delta);
 
        ssnprintf(name, sizeof(name), "cpu%02d", t->cpu_id);
 
@@ -573,12 +588,17 @@ submit_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (!aperf_mperf_unstable)
                turbostat_submit(name, "percent", "c1", 100.0 * t->c1/t->tsc);
 
-       /* GHz */
+       turbostat_submit("Average", "frequency", name, 1.0 / 1000000 * t->aperf / interval_float);
+
        if ((!aperf_mperf_unstable) || (!(t->aperf > t->tsc || t->mperf > t->tsc)))
-               turbostat_submit(NULL, "frequency", name, 1.0 * t->tsc / 1000000000 * t->aperf / t->mperf / interval_float);
+               turbostat_submit("Buzy", "frequency", name, 1.0 * t->tsc / 1000000 * t->aperf / t->mperf / interval_float);
+
+       /* Sanity check (should stay stable) */
+       turbostat_submit("TSC", "gauge", name, 1.0 * t->tsc / 1000000 / interval_float);
 
        /* SMI */
-       turbostat_submit(NULL, "current", name, t->smi_count);
+       if (do_smi)
+               turbostat_submit(NULL, "current", name, t->smi_count);
 
        /* submit per-core data only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -853,36 +873,6 @@ probe_cpu()
        }
 
        /*
-        * CPUID(0x80000000):
-        * - EAX: Maximum Input Value for Extended Function CPUID Information
-        *
-        * This allows us to verify if the CPUID(0x80000007) can be called
-        *
-        * This check is valid for both Intel and AMD.
-        */
-       max_level = ebx = ecx = edx = 0;
-       __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
-       if (max_level < 0x80000007) {
-               ERROR("Turbostat plugin: Unsupported CPU (no invariant TSC, "
-                     " Maximum Extended Function: 0x%x)", max_level);
-               return -1;
-       }
-
-       /*
-        * CPUID(0x80000007):
-        * - EDX:
-        *  + 8: Invariant TSC available if set
-        *
-        * This check is valid for both Intel and AMD
-        */
-       eax = ebx = ecx = edx = 0;
-       __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
-       if (!(edx & (1 << 8))) {
-               ERROR("Turbostat plugin: Unsupported CPU (No invariant TSC)");
-               return -1;
-       }
-
-       /*
         * CPUID(6):
         * - EAX:
         *  + 0: Digital temperature sensor is supported if set
@@ -910,15 +900,14 @@ probe_cpu()
                switch (model) {
                /* Atom (partial) */
                case 0x27:
+                       do_smi = 0;
                        do_core_cstate = 0;
                        do_pkg_cstate = (1 << 2) | (1 << 4) | (1 << 6);
                        break;
                /* Silvermont */
                case 0x37: /* BYT */
-               case 0x4A:
                case 0x4D: /* AVN */
-               case 0x5A:
-               case 0x5D:
+                       do_smi = 1;
                        do_core_cstate = (1 << 1) | (1 << 6);
                        do_pkg_cstate = (1 << 6);
                        break;
@@ -927,6 +916,7 @@ probe_cpu()
                case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
                case 0x1F: /* Core i7 and i5 Processor - Nehalem */
                case 0x2E: /* Nehalem-EX Xeon - Beckton */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6);
                        do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        break;
@@ -934,18 +924,21 @@ probe_cpu()
                case 0x25: /* Westmere Client - Clarkdale, Arrandale */
                case 0x2C: /* Westmere EP - Gulftown */
                case 0x2F: /* Westmere-EX Xeon - Eagleton */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6);
                        do_pkg_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                /* Sandy Bridge */
                case 0x2A: /* SNB */
                case 0x2D: /* SNB Xeon */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                /* Ivy Bridge */
                case 0x3A: /* IVB */
                case 0x3E: /* IVB Xeon */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
@@ -953,44 +946,53 @@ probe_cpu()
                case 0x3C: /* HSW */
                case 0x3F: /* HSW */
                case 0x46: /* HSW */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                case 0x45: /* HSW */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
                        break;
                /* Broadwel */
                case 0x4F: /* BDW */
                case 0x56: /* BDX-DE */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7);
                        break;
                case 0x3D: /* BDW */
+                       do_smi = 1;
                        do_core_cstate = (1 << 3) | (1 << 6) | (1 << 7);
                        do_pkg_cstate = (1 << 2) | (1 << 3) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10);
                        break;
                default:
-                       ERROR("Turbostat plugin: Unsupported CPU (family: %#x,"
-                             " model: %#x)", family, model);
+                       do_smi = 0;
+                       do_core_cstate = 0;
+                       do_pkg_cstate = 0;
+                       break;
                }
                switch (model) {
-               case 0x2A:
-               case 0x3A:
-               case 0x3C:
-               case 0x45:
-               case 0x46:
+               case 0x2A: /* SNB */
+               case 0x3A: /* IVB */
+               case 0x3C: /* HSW */
+               case 0x45: /* HSW */
+               case 0x46: /* HSW */
+               case 0x3D: /* BDW */
                        do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_PKG_POWER_INFO | RAPL_GFX;
                        break;
-               case 0x3F:
+               case 0x3F: /* HSX */
+               case 0x4F: /* BDX */
+               case 0x56: /* BDX-DE */
                        do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM | RAPL_DRAM_PERF_STATUS;
                        break;
-               case 0x2D:
-               case 0x3E:
+               case 0x2D: /* SNB Xeon */
+               case 0x3E: /* IVB Xeon */
                        do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_PKG_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM | RAPL_DRAM_PERF_STATUS;
                        break;
-               case 0x37:
-               case 0x4D:
+               case 0x37: /* BYT */
+               case 0x4D: /* AVN */
                        do_rapl = RAPL_PKG | RAPL_CORES;
                        break;
                default:
@@ -1440,7 +1442,7 @@ turbostat_read(void)
        if (!initialized) {
                if ((ret = for_all_cpus(get_counters, EVEN_COUNTERS)) < 0)
                        goto out;
-               gettimeofday(&tv_even, (struct timezone *)NULL);
+               time_even = cdtime();
                is_even = 1;
                initialized = 1;
                ret = 0;
@@ -1450,9 +1452,9 @@ turbostat_read(void)
        if (is_even) {
                if ((ret = for_all_cpus(get_counters, ODD_COUNTERS)) < 0)
                        goto out;
-               gettimeofday(&tv_odd, (struct timezone *)NULL);
+               time_odd = cdtime();
                is_even = 0;
-               timersub(&tv_odd, &tv_even, &tv_delta);
+               time_delta = time_odd - time_even;
                if ((ret = for_all_cpus_delta(ODD_COUNTERS, EVEN_COUNTERS)) < 0)
                        goto out;
                if ((ret = for_all_cpus(submit_counters, DELTA_COUNTERS)) < 0)
@@ -1460,9 +1462,9 @@ turbostat_read(void)
        } else {
                if ((ret = for_all_cpus(get_counters, EVEN_COUNTERS)) < 0)
                        goto out;
-               gettimeofday(&tv_even, (struct timezone *)NULL);
+               time_even = cdtime();
                is_even = 1;
-               timersub(&tv_even, &tv_odd, &tv_delta);
+               time_delta = time_even - time_odd;
                if ((ret = for_all_cpus_delta(EVEN_COUNTERS, ODD_COUNTERS)) < 0)
                        goto out;
                if ((ret = for_all_cpus(submit_counters, DELTA_COUNTERS)) < 0)
@@ -1479,19 +1481,56 @@ out:
 }
 
 static int
-turbostat_init(void)
+check_permissions(void)
 {
-       struct stat sb;
-       int ret;
+       struct __user_cap_header_struct cap_header_data;
+       cap_user_header_t cap_header = &cap_header_data;
+       struct __user_cap_data_struct cap_data_data;
+       cap_user_data_t cap_data = &cap_data_data;
+       int ret = 0;
+
+       if (getuid() == 0) {
+               /* We have everything we need */
+               return 0;
+       }
 
-       if (getuid() != 0) {
-               ERROR("Turbostat plugin: Initialization failed: this plugin "
-                     "requires collectd to run as root in order to read "
-                     "special CPU registers");
+       /* check for CAP_SYS_RAWIO */
+       cap_header->pid = getpid();
+       cap_header->version = _LINUX_CAPABILITY_VERSION;
+       if (capget(cap_header, cap_data) < 0) {
+               ERROR("Turbostat plugin: capget failed");
                return -1;
        }
 
-       DO_OR_GOTO_ERR(probe_cpu());
+       if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
+               WARNING("Turbostat plugin: Collectd doesn't have the "
+                       "CAP_SYS_RAWIO capability. If you don't want to run "
+                       "collectd as root, try running \"setcap "
+                       "cap_sys_rawio=ep\" on collectd binary");
+               ret = -1;
+       }
+
+       if (euidaccess("/dev/cpu/0/msr", R_OK)) {
+               WARNING("Turbostat plugin: Collectd cannot open"
+                       "/dev/cpu/0/msr. If you don't want to run collectd as "
+                       "root, you need to change the ownership (chown) and "
+                       "permissions on /dev/cpu/*/msr to allow such access");
+               ret = -1;
+       }
+
+       if (ret != 0)
+               ERROR("Turbostat plugin: Initialization failed: this plugin "
+                     "requires collectd to either to run as root or give "
+                     "collectd a special capability (CAP_SYS_RAWIO) and read "
+                      "access to /dev/cpu/*/msr (see previous warnings)");
+       return ret;
+}
+
+static int
+turbostat_init(void)
+{
+       struct stat sb;
+       int ret;
 
        if (stat("/dev/cpu/0/msr", &sb)) {
                ERROR("Turbostat plugin: Initialization failed: /dev/cpu/0/msr"
@@ -1501,6 +1540,10 @@ turbostat_init(void)
                return -1;
        }
 
+       DO_OR_GOTO_ERR(check_permissions());
+
+       DO_OR_GOTO_ERR(probe_cpu());
+
        DO_OR_GOTO_ERR(setup_all_buffers());
 
        plugin_register_read(PLUGIN_NAME, turbostat_read);
@@ -1511,12 +1554,6 @@ err:
        return ret;
 }
 
-static const char *config_keys[] =
-{
-       "TCCActivationTemp",
-};
-static const int config_keys_num = STATIC_ARRAY_SIZE (config_keys);
-
 static int
 turbostat_config(const char *key, const char *value)
 {