diff mbox series

[RFC,2/2] cxl/memdev: Register for and process CPER events

Message ID 20230601-cxl-cper-v1-2-99ba43f8f770@intel.com
State New
Headers show
Series efi/cxl-cper: Report CPER CXL component events through trace events | expand

Commit Message

Ira Weiny Oct. 13, 2023, 6:55 a.m. UTC
If the firmware has configured CXL event support to be firmware first
the OS can process those events through CPER records.

Detect firmware first configuration and register a notifier callback to
process catch records for this memdev.  Process those records destined
for this memdev through the normal trace mechanism.

Not-Yet-Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
RFC comments:
The matching of the CPER event to the MDS is a bit hacky right now and
could probably be much more robust.  But the general approach seems
sound.  Simply register a notifier for each device and when that device
finds a record for itself call the normal trace mechanisms.
---
 drivers/cxl/core/mbox.c |  7 ++---
 drivers/cxl/cxlmem.h    |  5 ++++
 drivers/cxl/pci.c       | 70 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 78 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 4df4f614f490..3a8ce7801e04 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -860,9 +860,9 @@  static const uuid_t mem_mod_event_uuid =
 	UUID_INIT(0xfe927475, 0xdd59, 0x4339,
 		  0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
 
-static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
-				   enum cxl_event_log_type type,
-				   struct cxl_event_record_raw *record)
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+			    enum cxl_event_log_type type,
+			    struct cxl_event_record_raw *record)
 {
 	uuid_t *id = &record->hdr.id;
 
@@ -885,6 +885,7 @@  static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 		trace_cxl_generic_event(cxlmd, type, record);
 	}
 }
+EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
 
 static int cxl_clear_event_record(struct cxl_memdev_state *mds,
 				  enum cxl_event_log_type log,
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 706f8a6d1ef4..2b4210c291b9 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -477,6 +477,8 @@  struct cxl_memdev_state {
 	struct cxl_security_state security;
 	struct cxl_fw_state fw;
 
+	struct notifier_block cxl_cper_nb;
+
 	struct rcuwait mbox_wait;
 	int (*mbox_send)(struct cxl_memdev_state *mds,
 			 struct cxl_mbox_cmd *cmd);
@@ -863,6 +865,9 @@  void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 				  unsigned long *cmds);
 void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+			    enum cxl_event_log_type type,
+			    struct cxl_event_record_raw *record);
 int cxl_set_timestamp(struct cxl_memdev_state *mds);
 int cxl_poison_state_init(struct cxl_memdev_state *mds);
 int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 44a21ab7add5..19922e32c098 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,5 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <asm-generic/unaligned.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/moduleparam.h>
 #include <linux/module.h>
@@ -10,6 +11,7 @@ 
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/io.h>
+#include <linux/efi.h>
 #include "cxlmem.h"
 #include "cxlpci.h"
 #include "cxl.h"
@@ -748,6 +750,70 @@  static bool cxl_event_int_is_fw(u8 setting)
 	return mode == CXL_INT_FW;
 }
 
+#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
+int cxl_cper_event(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct cxl_cper_notifier_data *nd = data;
+	struct cxl_event_record_raw record;
+	enum cxl_event_log_type log_type;
+	struct cxl_memdev_state *mds;
+	u32 hdr_flags;
+
+	mds = container_of(nb, struct cxl_memdev_state, cxl_cper_nb);
+
+	/* Need serial number for device identification */
+	if (!(nd->rec->hdr.validation_bits & CPER_CXL_DEVICE_SN_VALID))
+		return NOTIFY_DONE;
+
+	/* FIXME endianess and bytes of serial number need verification */
+	/* FIXME Should other values be checked? */
+	if (memcmp(&mds->cxlds.serial, &nd->rec->hdr.dev_serial_num,
+		   sizeof(mds->cxlds.serial)))
+		return NOTIFY_DONE;
+
+	/*
+	 * UEFI v2.10 defines N.2.14 defines the CXL CPER record as not
+	 * including the uuid field from the CXL record.
+	 *
+	 * Build the record from the UUID passed.
+	 */
+	record = (struct cxl_event_record_raw) {
+		.hdr.id = nd->uuid,
+	};
+	memcpy(&record.hdr.length, &nd->rec->comp_event_log,
+		CPER_CXL_REC_LEN(nd->rec));
+
+	/* ensure record can always handle the full CPER provided data */
+	BUILD_BUG_ON(sizeof(record) <
+		(CPER_CXL_COMP_EVENT_LOG_SIZE + sizeof(record.hdr.id)));
+
+	hdr_flags = get_unaligned_le24(record.hdr.flags);
+	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
+
+	cxl_event_trace_record(mds->cxlds.cxlmd, log_type, &record);
+
+	return NOTIFY_OK;
+}
+
+static void cxl_unregister_cper_events(void *_mds)
+{
+	struct cxl_memdev_state *mds = _mds;
+
+	unregister_cxl_cper_notifier(&mds->cxl_cper_nb);
+}
+
+static void register_cper_events(struct cxl_memdev_state *mds)
+{
+	mds->cxl_cper_nb.notifier_call = cxl_cper_event;
+
+	if (register_cxl_cper_notifier(&mds->cxl_cper_nb)) {
+		dev_err(mds->cxlds.dev, "CPER registration failed\n");
+		return;
+	}
+
+	devm_add_action_or_reset(mds->cxlds.dev, cxl_unregister_cper_events, mds);
+}
+
 static int cxl_event_config(struct pci_host_bridge *host_bridge,
 			    struct cxl_memdev_state *mds)
 {
@@ -758,8 +824,10 @@  static int cxl_event_config(struct pci_host_bridge *host_bridge,
 	 * When BIOS maintains CXL error reporting control, it will process
 	 * event records.  Only one agent can do so.
 	 */
-	if (!host_bridge->native_cxl_error)
+	if (!host_bridge->native_cxl_error) {
+		register_cper_events(mds);
 		return 0;
+	}
 
 	rc = cxl_mem_alloc_event_buf(mds);
 	if (rc)