From: Roman Korynkevych Date: Fri, 14 Jul 2017 14:26:44 +0000 (+0300) Subject: Merge branch 'master' into feat_mcelog_notification_bugfixes X-Git-Tag: collectd-5.8.0~109^2~1 X-Git-Url: https://git.octo.it/?p=collectd.git;a=commitdiff_plain;h=217ec252adf547f2dd048489e2498bc31e8f70db Merge branch 'master' into feat_mcelog_notification_bugfixes --- 217ec252adf547f2dd048489e2498bc31e8f70db diff --cc src/mcelog.c index e5764e43,fad18d50..8d6c0b4d --- a/src/mcelog.c +++ b/src/mcelog.c @@@ -108,129 -96,32 +108,129 @@@ static socket_adapter_t socket_adapter }; static _Bool mcelog_thread_running; +static _Bool mcelog_apply_defaults; + +static void mcelog_free_dimms_list_records(llist_t *dimms_list) { + + for (llentry_t *e = llist_head(dimms_list); e != NULL; e = e->next) { + sfree(e->key); + sfree(e->value); + } +} + +/* Create or get dimm by dimm name/location */ +static llentry_t *mcelog_dimm(const mcelog_memory_rec_t *rec, + llist_t *dimms_list) { + + char dimm_name[DATA_MAX_NAME_LEN]; + + if (strlen(rec->dimm_name) > 0) { + ssnprintf(dimm_name, sizeof(dimm_name), "%s_%s", rec->location, + rec->dimm_name); + } else + sstrncpy(dimm_name, rec->location, sizeof(dimm_name)); + + llentry_t *dimm_le = llist_search(g_mcelog_config.dimms_list, dimm_name); + + if (dimm_le == NULL) { + mcelog_memory_rec_t *dimm_mr = calloc(1, sizeof(*dimm_mr)); + if (dimm_mr == NULL) { + ERROR(MCELOG_PLUGIN ": Error allocating dimm memory item"); + return NULL; + } + char *p_name = strdup(dimm_name); + if (p_name == NULL) { + ERROR(MCELOG_PLUGIN ": strdup: error"); + free(dimm_mr); + return NULL; + } + + /* add new dimm */ + dimm_le = llentry_create(p_name, dimm_mr); + if (dimm_le == NULL) { + ERROR(MCELOG_PLUGIN ": llentry_create(): error"); + free(dimm_mr); + free(p_name); + return NULL; + } + pthread_mutex_lock(&g_mcelog_config.dimms_lock); + llist_append(g_mcelog_config.dimms_list, dimm_le); + pthread_mutex_unlock(&g_mcelog_config.dimms_lock); + } + + return dimm_le; +} + +static void mcelog_update_dimm_stats(llentry_t *dimm, + const mcelog_memory_rec_t *rec) { + pthread_mutex_lock(&g_mcelog_config.dimms_lock); + memcpy(dimm->value, rec, sizeof(mcelog_memory_rec_t)); + pthread_mutex_unlock(&g_mcelog_config.dimms_lock); +} static int mcelog_config(oconfig_item_t *ci) { + int use_logfile = 0, use_memory = 0; for (int i = 0; i < ci->children_num; i++) { oconfig_item_t *child = ci->children + i; - if (strcasecmp("McelogClientSocket", child->key) == 0) { - if (cf_util_get_string_buffer(child, socket_adapter.unix_sock.sun_path, - sizeof(socket_adapter.unix_sock.sun_path)) < - 0) { - ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".", + if (strcasecmp("McelogLogfile", child->key) == 0) { + use_logfile = 1; + if (use_memory) { + ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Memory " + "option is already configured.", child->key); - return (-1); + return -1; } - } else if (strcasecmp("McelogLogfile", child->key) == 0) { if (cf_util_get_string_buffer(child, g_mcelog_config.logfile, sizeof(g_mcelog_config.logfile)) < 0) { ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".", child->key); - return (-1); + return -1; } + memset(socket_adapter.unix_sock.sun_path, 0, + sizeof(socket_adapter.unix_sock.sun_path)); + } else if (strcasecmp("Memory", child->key) == 0) { + if (use_logfile) { + ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Logfile " + "option is already configured.", + child->key); + return (-1); + } + use_memory = 1; + oconfig_item_t *mem_child = child->children; + for (int j = 0; j < child->children_num; j++) { + mem_child += j; + if (strcasecmp("McelogClientSocket", mem_child->key) == 0) { + if (cf_util_get_string_buffer( + mem_child, socket_adapter.unix_sock.sun_path, + sizeof(socket_adapter.unix_sock.sun_path)) < 0) { + ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".", + mem_child->key); + return (-1); + } + } else if (strcasecmp("PersistentNotification", mem_child->key) == 0) { + if (cf_util_get_boolean(mem_child, &g_mcelog_config.persist) < 0) { + ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".", + mem_child->key); + return (-1); + } + } else { + ERROR(MCELOG_PLUGIN ": Invalid Memory configuration option: \"%s\".", + mem_child->key); + return (-1); + } + } + memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile)); } else { ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".", child->key); - return (-1); + return -1; } } - return 0; + + if (!use_logfile && !use_memory) + mcelog_apply_defaults = 1; + + return (0); } static int socket_close(socket_adapter_t *self) { @@@ -318,104 -209,79 +318,104 @@@ static int socket_reinit(socket_adapter .type_instance = "mcelog_status"}); } pthread_rwlock_unlock(&self->lock); - return (ret); + return ret; } -static int mcelog_prepare_notification(notification_t *n, - const mcelog_memory_rec_t *mr) { - if (n == NULL || mr == NULL) - return -1; +static int mcelog_dispatch_mem_notifications(const mcelog_memory_rec_t *mr) { + notification_t n = {.severity = NOTIF_WARNING, + .time = cdtime(), + .plugin = MCELOG_PLUGIN, + .type = "errors"}; - if ((mr->location[0] != '\0') && - (plugin_notification_meta_add_string(n, MCELOG_SOCKET_STR, mr->location) < - 0)) { - ERROR(MCELOG_PLUGIN ": add memory location meta data failed"); - return -1; - } - if ((mr->dimm_name[0] != '\0') && - (plugin_notification_meta_add_string(n, MCELOG_DIMM_NAME, mr->dimm_name) < - 0)) { - ERROR(MCELOG_PLUGIN ": add DIMM name meta data failed"); - plugin_notification_meta_free(n->meta); - return -1; - } - if (plugin_notification_meta_add_signed_int(n, MCELOG_CORRECTED_ERR, - mr->corrected_err_total) < 0) { - ERROR(MCELOG_PLUGIN ": add corrected errors meta data failed"); - plugin_notification_meta_free(n->meta); - return -1; - } - if (plugin_notification_meta_add_signed_int( - n, "corrected memory timed errors", mr->corrected_err_timed) < 0) { - ERROR(MCELOG_PLUGIN ": add corrected timed errors meta data failed"); - plugin_notification_meta_free(n->meta); - return -1; - } - if ((mr->corrected_err_timed_period[0] != '\0') && - (plugin_notification_meta_add_string(n, "corrected errors time period", - mr->corrected_err_timed_period) < - 0)) { - ERROR(MCELOG_PLUGIN ": add corrected errors period meta data failed"); - plugin_notification_meta_free(n->meta); - return -1; + int dispatch_corrected_notifs = 0, dispatch_uncorrected_notifs = 0; + + if (mr == NULL) + return (-1); + + llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list); + if (dimm == NULL) { + ERROR(MCELOG_PLUGIN + ": Error adding/getting dimm memory item to/from cache"); + return (-1); } - if (plugin_notification_meta_add_signed_int(n, MCELOG_UNCORRECTED_ERR, - mr->uncorrected_err_total) < 0) { - ERROR(MCELOG_PLUGIN ": add corrected errors meta data failed"); - plugin_notification_meta_free(n->meta); - return -1; + mcelog_memory_rec_t *mr_old = dimm->value; + if (!g_mcelog_config.persist) { + + if (mr_old->corrected_err_total != mr->corrected_err_total || + mr_old->corrected_err_timed != mr->corrected_err_timed) + dispatch_corrected_notifs = 1; + + if (mr_old->uncorrected_err_total != mr->uncorrected_err_total || + mr_old->uncorrected_err_timed != mr->uncorrected_err_timed) + dispatch_uncorrected_notifs = 1; + + if (!dispatch_corrected_notifs && !dispatch_uncorrected_notifs) { + DEBUG("%s: No new notifications to dispatch", MCELOG_PLUGIN); + return (0); + } + } else { + dispatch_corrected_notifs = 1; + dispatch_uncorrected_notifs = 1; } - if (plugin_notification_meta_add_signed_int(n, - "uncorrected memory timed errors", - mr->uncorrected_err_timed) < 0) { - ERROR(MCELOG_PLUGIN ": add corrected timed errors meta data failed"); - plugin_notification_meta_free(n->meta); - return -1; + + sstrncpy(n.host, hostname_g, sizeof(n.host)); + + if (mr->dimm_name[0] != '\0') + ssnprintf(n.plugin_instance, sizeof(n.plugin_instance), "%s_%s", + mr->location, mr->dimm_name); + else + sstrncpy(n.plugin_instance, mr->location, sizeof(n.plugin_instance)); + + if (dispatch_corrected_notifs && + (mr->corrected_err_total > 0 || mr->corrected_err_timed > 0)) { + /* Corrected Error Notifications */ + plugin_notification_meta_add_signed_int(&n, MCELOG_CORRECTED_ERR, + mr->corrected_err_total); + plugin_notification_meta_add_signed_int(&n, MCELOG_CORRECTED_ERR_TIMED, + mr->corrected_err_timed); + ssnprintf(n.message, sizeof(n.message), MCELOG_CORRECTED_ERR); + sstrncpy(n.type_instance, MCELOG_CORRECTED_ERR_TYPE_INS, + sizeof(n.type_instance)); + plugin_dispatch_notification(&n); + if (n.meta) + plugin_notification_meta_free(n.meta); + n.meta = NULL; } - if ((mr->uncorrected_err_timed_period[0] != '\0') && - (plugin_notification_meta_add_string(n, "uncorrected errors time period", - mr->uncorrected_err_timed_period) < - 0)) { - ERROR(MCELOG_PLUGIN ": add corrected errors period meta data failed"); - plugin_notification_meta_free(n->meta); - return -1; + + if (dispatch_uncorrected_notifs && + (mr->uncorrected_err_total > 0 || mr->uncorrected_err_timed > 0)) { + /* Uncorrected Error Notifications */ + plugin_notification_meta_add_signed_int(&n, MCELOG_UNCORRECTED_ERR, + mr->uncorrected_err_total); + plugin_notification_meta_add_signed_int(&n, MCELOG_UNCORRECTED_ERR_TIMED, + mr->uncorrected_err_timed); + ssnprintf(n.message, sizeof(n.message), MCELOG_UNCORRECTED_ERR); + sstrncpy(n.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS, + sizeof(n.type_instance)); + n.severity = NOTIF_FAILURE; + plugin_dispatch_notification(&n); + if (n.meta) + plugin_notification_meta_free(n.meta); + n.meta = NULL; } - return (0); + return 0; } static int mcelog_submit(const mcelog_memory_rec_t *mr) { if (!mr) { ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__); - return (-1); + return -1; } + llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list); + if (dimm == NULL) { + ERROR(MCELOG_PLUGIN + ": Error adding/getting dimm memory item to/from cache"); + return (-1); + } + value_list_t vl = { .values_len = 1, .values = &(value_t){.derive = (derive_t)mr->corrected_err_total}, @@@ -626,31 -496,17 +626,31 @@@ static void *poll_worker(__attribute__( } static int mcelog_init(void) { + if (mcelog_apply_defaults) { + INFO(MCELOG_PLUGIN + ": No configuration selected defaulting to memory errors."); + memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile)); + } + g_mcelog_config.dimms_list = llist_create(); + int err = pthread_mutex_init(&g_mcelog_config.dimms_lock, NULL); + if (err < 0) { + ERROR(MCELOG_PLUGIN ": plugin: failed to initialize cache lock"); + return (-1); + } + if (socket_adapter.reinit(&socket_adapter) != 0) { ERROR(MCELOG_PLUGIN ": Cannot connect to client socket"); - return (-1); + return -1; } - if (plugin_thread_create(&g_mcelog_config.tid, NULL, poll_worker, NULL, - NULL) != 0) { - ERROR(MCELOG_PLUGIN ": Error creating poll thread."); - return -1; + if (strlen(socket_adapter.unix_sock.sun_path)) { + if (plugin_thread_create(&g_mcelog_config.tid, NULL, poll_worker, NULL, + NULL) != 0) { + ERROR(MCELOG_PLUGIN ": Error creating poll thread."); + return (-1); + } } - return (0); + return 0; } static int get_memory_machine_checks(void) { @@@ -681,15 -537,10 +681,15 @@@ static int mcelog_shutdown(void) ret = -1; } } - + pthread_mutex_lock(&g_mcelog_config.dimms_lock); + mcelog_free_dimms_list_records(g_mcelog_config.dimms_list); + llist_destroy(g_mcelog_config.dimms_list); + g_mcelog_config.dimms_list = NULL; + pthread_mutex_unlock(&g_mcelog_config.dimms_lock); + pthread_mutex_destroy(&g_mcelog_config.dimms_lock); ret = socket_adapter.close(&socket_adapter) || ret; pthread_rwlock_destroy(&(socket_adapter.lock)); - return (-ret); + return -ret; } void module_register(void) {