From 672ef0e5684048a00aeb923b10131275ea688543 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 23 Jan 2020 09:02:54 +0000 Subject: EDAC: Store error type in struct edac_raw_error_desc Store the error type in struct edac_raw_error_desc. This makes the type parameter of edac_raw_mc_handle_error() obsolete. [ kernel-doc typo ] Reported-by: kbuild test robot Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Acked-by: Aristeu Rozanski Link: https://lkml.kernel.org/r/20200123090210.26933-4-rrichter@marvell.com --- include/linux/edac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index cc31b9742684..6703eb492cd2 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -442,6 +442,7 @@ struct errcount_attribute_data { * struct edac_raw_error_desc - Raw error report structure * @grain: minimum granularity for an error report, in bytes * @error_count: number of errors of the same type + * @type: severity of the error (CE/UE/Fatal) * @top_layer: top layer of the error (layer[0]) * @mid_layer: middle layer of the error (layer[1]) * @low_layer: low layer of the error (layer[2]) @@ -462,6 +463,7 @@ struct edac_raw_error_desc { long grain; u16 error_count; + enum hw_event_mc_err_type type; int top_layer; int mid_layer; int low_layer; -- cgit v1.2.3 From 67792cf9583c7816667c6b90007b5840f1b471f4 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 23 Jan 2020 09:03:02 +0000 Subject: EDAC/mc: Remove enable_per_layer_report function argument Many functions carry the enable_per_layer_report argument. This is a bool value indicating the error information contains some location data where the error occurred. This can easily being determined by checking the pos[] array for values. Negative values indicate there is no location available. So if the top layer is negative, the error location is unknown. Just check if the top layer is negative and remove enable_per_layer_report as function argument and also from struct edac_raw_error_desc. [ bp: Reflow comments to 80 columns, while at it. ] Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Acked-by: Aristeu Rozanski Link: https://lkml.kernel.org/r/20200123090210.26933-8-rrichter@marvell.com --- drivers/edac/edac_mc.c | 40 ++++++++++++++++++---------------------- drivers/edac/ghes_edac.c | 5 +---- include/linux/edac.h | 3 --- 3 files changed, 19 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 7a1445feb59d..1867f11a3438 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -946,7 +946,6 @@ const char *edac_layer_name[] = { EXPORT_SYMBOL_GPL(edac_layer_name); static void edac_inc_ce_error(struct mem_ctl_info *mci, - bool enable_per_layer_report, const int pos[EDAC_MAX_LAYERS], const u16 count) { @@ -954,7 +953,7 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci, mci->ce_mc += count; - if (!enable_per_layer_report) { + if (pos[0] < 0) { mci->ce_noinfo_count += count; return; } @@ -971,7 +970,6 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci, } static void edac_inc_ue_error(struct mem_ctl_info *mci, - bool enable_per_layer_report, const int pos[EDAC_MAX_LAYERS], const u16 count) { @@ -979,7 +977,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci, mci->ue_mc += count; - if (!enable_per_layer_report) { + if (pos[0] < 0) { mci->ue_noinfo_count += count; return; } @@ -1003,7 +1001,6 @@ static void edac_ce_error(struct mem_ctl_info *mci, const char *label, const char *detail, const char *other_detail, - const bool enable_per_layer_report, const unsigned long page_frame_number, const unsigned long offset_in_page, long grain) @@ -1026,7 +1023,7 @@ static void edac_ce_error(struct mem_ctl_info *mci, error_count, msg, msg_aux, label, location, detail); } - edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); + edac_inc_ce_error(mci, pos, error_count); if (mci->scrub_mode == SCRUB_SW_SRC) { /* @@ -1056,8 +1053,7 @@ static void edac_ue_error(struct mem_ctl_info *mci, const char *location, const char *label, const char *detail, - const char *other_detail, - const bool enable_per_layer_report) + const char *other_detail) { char *msg_aux = ""; @@ -1086,7 +1082,7 @@ static void edac_ue_error(struct mem_ctl_info *mci, msg, msg_aux, label, location, detail); } - edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); + edac_inc_ue_error(mci, pos, error_count); } static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan) @@ -1136,16 +1132,16 @@ void edac_raw_mc_handle_error(struct edac_raw_error_desc *e) "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", e->page_frame_number, e->offset_in_page, e->grain, e->syndrome); - edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, - detail, e->other_detail, e->enable_per_layer_report, + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, + e->label, detail, e->other_detail, e->page_frame_number, e->offset_in_page, e->grain); } else { snprintf(detail, sizeof(detail), "page:0x%lx offset:0x%lx grain:%ld", e->page_frame_number, e->offset_in_page, e->grain); - edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, - detail, e->other_detail, e->enable_per_layer_report); + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, + e->label, detail, e->other_detail); } @@ -1170,6 +1166,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; int i, n_labels = 0; struct edac_raw_error_desc *e = &mci->error_desc; + bool any_memory = true; edac_dbg(3, "MC%d\n", mci->mc_idx); @@ -1187,10 +1184,9 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, e->other_detail = other_detail; /* - * Check if the event report is consistent and if the memory - * location is known. If it is known, enable_per_layer_report will be - * true, the DIMM(s) label info will be filled and the per-layer - * error counters will be incremented. + * Check if the event report is consistent and if the memory location is + * known. If it is, the DIMM(s) label info will be filled and the + * per-layer error counters will be incremented. */ for (i = 0; i < mci->n_layers; i++) { if (pos[i] >= (int)mci->layers[i].size) { @@ -1208,7 +1204,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - e->enable_per_layer_report = true; + any_memory = false; } /* @@ -1239,9 +1235,9 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, /* * If the error is memory-controller wide, there's no need to - * seek for the affected DIMMs because the whole - * channel/memory controller/... may be affected. - * Also, don't show errors for empty DIMM slots. + * seek for the affected DIMMs because the whole channel/memory + * controller/... may be affected. Also, don't show errors for + * empty DIMM slots. */ if (!dimm->nr_pages) continue; @@ -1277,7 +1273,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, chan = -2; } - if (!e->enable_per_layer_report) + if (any_memory) strcpy(e->label, "any memory"); else if (!*e->label) strcpy(e->label, "unknown memory"); diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index bef8a428c429..cb3dab56a875 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -355,11 +355,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) mem_err->mem_dev_handle); index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle); - if (index >= 0) { + if (index >= 0) e->top_layer = index; - e->enable_per_layer_report = true; - } - } if (p > e->location) *(p - 1) = '\0'; diff --git a/include/linux/edac.h b/include/linux/edac.h index 6703eb492cd2..815f246e0abd 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -454,8 +454,6 @@ struct errcount_attribute_data { * @location: location of the error * @label: label of the affected DIMM(s) * @other_detail: other driver-specific detail about the error - * @enable_per_layer_report: if false, the error affects all layers - * (typically, a memory controller error) */ struct edac_raw_error_desc { char location[LOCATION_SIZE]; @@ -472,7 +470,6 @@ struct edac_raw_error_desc { unsigned long syndrome; const char *msg; const char *other_detail; - bool enable_per_layer_report; }; /* MEMORY controller information structure -- cgit v1.2.3 From 4aa92c86463273b673e4170c60cb78e2625781eb Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 17 Feb 2020 12:30:23 +0100 Subject: EDAC/mc: Remove per layer counters Looking at how mci->{ue,ce}_per_layer[EDAC_MAX_LAYERS] is used, it turns out that only the leaves in the memory hierarchy are consumed (in sysfs), but not the intermediate layers, e.g.: count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx]; These unused counters only add complexity, remove them. The error counter values are directly stored in struct dimm_info now. Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Acked-by: Aristeu Rozanski Link: https://lkml.kernel.org/r/20200123090210.26933-11-rrichter@marvell.com --- drivers/edac/edac_mc.c | 67 +++++++++++--------------------------------- drivers/edac/edac_mc_sysfs.c | 20 ++++++------- include/linux/edac.h | 4 ++- 3 files changed, 27 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 80629c702a4f..75ede27bdf6a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -451,11 +451,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, { struct mem_ctl_info *mci; struct edac_mc_layer *layer; - u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; - unsigned int idx, size, tot_dimms = 1, count = 1; - unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0; + unsigned int idx, size, tot_dimms = 1; + unsigned int tot_csrows = 1, tot_channels = 1; void *pvt, *ptr = NULL; - int i; bool per_rank = false; if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0)) @@ -482,19 +480,10 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, * stringent as what the compiler would provide if we could simply * hardcode everything into a single struct. */ - mci = edac_align_ptr(&ptr, sizeof(*mci), 1); - layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); - for (i = 0; i < n_layers; i++) { - count *= layers[i].size; - edac_dbg(4, "errcount layer %d size %d\n", i, count); - ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); - ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); - tot_errcount += 2 * count; - } - - edac_dbg(4, "allocating %d error counters\n", tot_errcount); - pvt = edac_align_ptr(&ptr, sz_pvt, 1); - size = ((unsigned long)pvt) + sz_pvt; + mci = edac_align_ptr(&ptr, sizeof(*mci), 1); + layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); + pvt = edac_align_ptr(&ptr, sz_pvt, 1); + size = ((unsigned long)pvt) + sz_pvt; edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", size, @@ -513,10 +502,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, * rather than an imaginary chunk of memory located at address 0. */ layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); - for (i = 0; i < n_layers; i++) { - mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); - mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); - } pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; /* setup index and various internal pointers */ @@ -949,48 +934,28 @@ static void edac_inc_ce_error(struct edac_raw_error_desc *e) { int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; struct mem_ctl_info *mci = error_desc_to_mci(e); - int i, index = 0; + struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]); mci->ce_mc += e->error_count; - if (pos[0] < 0) { + if (dimm) + dimm->ce_count += e->error_count; + else mci->ce_noinfo_count += e->error_count; - return; - } - - for (i = 0; i < mci->n_layers; i++) { - if (pos[i] < 0) - break; - index += pos[i]; - mci->ce_per_layer[i][index] += e->error_count; - - if (i < mci->n_layers - 1) - index *= mci->layers[i + 1].size; - } } static void edac_inc_ue_error(struct edac_raw_error_desc *e) { int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; struct mem_ctl_info *mci = error_desc_to_mci(e); - int i, index = 0; + struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]); mci->ue_mc += e->error_count; - if (pos[0] < 0) { + if (dimm) + dimm->ue_count += e->error_count; + else mci->ue_noinfo_count += e->error_count; - return; - } - - for (i = 0; i < mci->n_layers; i++) { - if (pos[i] < 0) - break; - index += pos[i]; - mci->ue_per_layer[i][index] += e->error_count; - - if (i < mci->n_layers - 1) - index *= mci->layers[i + 1].size; - } } static void edac_ce_error(struct edac_raw_error_desc *e) @@ -1143,8 +1108,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, /* * Check if the event report is consistent and if the memory location is - * known. If it is, the DIMM(s) label info will be filled and the - * per-layer error counters will be incremented. + * known. If it is, the DIMM(s) label info will be filled and the DIMM's + * error counters will be incremented. */ for (i = 0; i < mci->n_layers; i++) { if (pos[i] >= (int)mci->layers[i].size) { diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 11e1b436f116..4e6aca595133 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -551,10 +551,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev, char *data) { struct dimm_info *dimm = to_dimm(dev); - u32 count; - count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx]; - return sprintf(data, "%u\n", count); + return sprintf(data, "%u\n", dimm->ce_count); } static ssize_t dimmdev_ue_count_show(struct device *dev, @@ -562,10 +560,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev, char *data) { struct dimm_info *dimm = to_dimm(dev); - u32 count; - count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx]; - return sprintf(data, "%u\n", count); + return sprintf(data, "%u\n", dimm->ue_count); } /* dimm/rank attribute files */ @@ -661,7 +657,9 @@ static ssize_t mci_reset_counters_store(struct device *dev, const char *data, size_t count) { struct mem_ctl_info *mci = to_mci(dev); - int cnt, row, chan, i; + struct dimm_info *dimm; + int row, chan; + mci->ue_mc = 0; mci->ce_mc = 0; mci->ue_noinfo_count = 0; @@ -677,11 +675,9 @@ static ssize_t mci_reset_counters_store(struct device *dev, ri->channels[chan]->ce_count = 0; } - cnt = 1; - for (i = 0; i < mci->n_layers; i++) { - cnt *= mci->layers[i].size; - memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32)); - memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32)); + mci_for_each_dimm(mci, dimm) { + dimm->ue_count = 0; + dimm->ce_count = 0; } mci->start_time = jiffies; diff --git a/include/linux/edac.h b/include/linux/edac.h index 815f246e0abd..0f20b986b0ab 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -383,6 +383,9 @@ struct dimm_info { unsigned int csrow, cschannel; /* Points to the old API data */ u16 smbios_handle; /* Handle for SMBIOS type 17 */ + + u32 ce_count; + u32 ue_count; }; /** @@ -559,7 +562,6 @@ struct mem_ctl_info { */ u32 ce_noinfo_count, ue_noinfo_count; u32 ue_mc, ce_mc; - u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; struct completion complete; -- cgit v1.2.3