diff mbox series

[v4,7/7] cxl/memdev: Register for and process CPER events

Message ID 20231215-cxl-cper-v4-7-01b6dab44fcd@intel.com
State New
Headers show
Series efi/cxl-cper: Report CPER CXL component events through trace events | expand

Commit Message

Ira Weiny Dec. 15, 2023, 11:26 p.m. UTC
If the firmware has configured CXL event support to be firmware first
the OS can process those events through CPER records.  The CXL layer has
unique DPA to HPA knowledge and standard event trace parsing in place.

CPER records contain Bus, Device, Function information which can be used
to identify the PCI device which is sending the event.

Change pci driver registration to include registration for a CXL CPER
notifier to process the events through the trace subsystem.

Define and use scoped based management to simplify the handling of the
pci device object.

Cc: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
NOTE this patch depends on Dan's addition of a device guard[1].

[1] https://lore.kernel.org/all/170250854466.1522182.17555361077409628655.stgit@dwillia2-xfh.jf.intel.com/

Changes for v3/v4:
[djbw: define a __free(pci_dev_put) to release the device automatically]
[djbw: use device guard from Vishal]
[iweiny: delete old notifier block structure]
[iweiny: adjust for new notifier interface]
---
 drivers/cxl/core/mbox.c | 31 +++++++++++++++++++++++-----
 drivers/cxl/cxlmem.h    |  4 ++++
 drivers/cxl/pci.c       | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/pci.h     |  2 ++
 4 files changed, 86 insertions(+), 6 deletions(-)

Comments

Smita Koralahalli Dec. 18, 2023, 6:17 p.m. UTC | #1
On 12/15/2023 3:26 PM, Ira Weiny wrote:
> If the firmware has configured CXL event support to be firmware first
> the OS can process those events through CPER records.  The CXL layer has
> unique DPA to HPA knowledge and standard event trace parsing in place.
> 
> CPER records contain Bus, Device, Function information which can be used
> to identify the PCI device which is sending the event.
> 
> Change pci driver registration to include registration for a CXL CPER
> notifier to process the events through the trace subsystem.
> 
> Define and use scoped based management to simplify the handling of the
> pci device object.
> 
> Cc: Bjorn Helgaas <bhelgaas@google.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---

[snip]


> +	switch (event_type) {
> +	case CXL_CPER_EVENT_GEN_MEDIA:
> +		trace_cxl_general_media(cxlmd, type, &gen_media_event_uuid,
> +					&event->gen_media);
> +		break;
> +	case CXL_CPER_EVENT_DRAM:
> +		trace_cxl_dram(cxlmd, type, &dram_event_uuid, &event->dram);
> +		break;
> +	case CXL_CPER_EVENT_MEM_MODULE:
> +		trace_cxl_memory_module(cxlmd, type, &mem_mod_event_uuid,
> +					&event->mem_module);
> +		break;
> +	}
> +}

Is default case needed here?

> +EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
> +
> +static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +				     enum cxl_event_log_type type,
> +				     struct cxl_event_record_raw *record)
>   {
>   	union cxl_event *evt = &record->event;
>   	uuid_t *id = &record->id;
> @@ -965,8 +986,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>   			break;
>   
>   		for (i = 0; i < nr_rec; i++)
> -			cxl_event_trace_record(cxlmd, type,
> -					       &payload->records[i]);
> +			__cxl_event_trace_record(cxlmd, type,
> +						 &payload->records[i]);
>   
>   		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
>   			trace_cxl_overflow(cxlmd, type, payload);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index e5d770e26e02..e7e9508fecac 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -802,6 +802,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>   void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>   				  unsigned long *cmds);
>   void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
> +void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +			    enum cxl_event_log_type type,
> +			    enum cxl_event_type event_type,
> +			    union cxl_event *event);
>   int cxl_set_timestamp(struct cxl_memdev_state *mds);
>   int cxl_poison_state_init(struct cxl_memdev_state *mds);
>   int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 0155fb66b580..638275569d63 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -1,5 +1,6 @@
>   // SPDX-License-Identifier: GPL-2.0-only
>   /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> +#include <asm-generic/unaligned.h>
>   #include <linux/io-64-nonatomic-lo-hi.h>
>   #include <linux/moduleparam.h>
>   #include <linux/module.h>
> @@ -969,6 +970,58 @@ static struct pci_driver cxl_pci_driver = {
>   	},
>   };
>   
> +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> +static void cxl_cper_event_call(enum cxl_event_type ev_type,
> +				struct cxl_cper_event_rec *rec)
> +{
> +	struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
> +	struct pci_dev *pdev __free(pci_dev_put) = NULL;
> +	struct cxl_dev_state *cxlds = NULL;
> +	enum cxl_event_log_type log_type;
> +	unsigned int devfn;
> +	u32 hdr_flags;
> +
> +	devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
> +	pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
> +					   device_id->bus_num, devfn);
> +	if (!pdev)
> +		return;
> +
> +	guard(device)(&pdev->dev);
> +	if (pdev->driver == &cxl_pci_driver)
> +		cxlds = pci_get_drvdata(pdev);
> +	if (!cxlds)
> +		return;
> +
> +	/* Fabricate a log type */
> +	hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
> +	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
> +
> +	cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, &rec->event);

Currently, when I run this, I see two trace events printed. One from 
here, and another as a non_standard_event from ghes. I think both should 
be unified?

I remember Dan pointing out to me this when I sent decoding for protocol 
errors and its still pending on me for protocol errors.

Thanks,
Smita

> +}
> +
> +static int __init cxl_pci_driver_init(void)
> +{
> +	int rc;
> +
> +	rc = pci_register_driver(&cxl_pci_driver);
> +	if (rc)
> +		return rc;
> +
> +	rc = cxl_cper_register_notifier(cxl_cper_event_call);
> +	if (rc)
> +		pci_unregister_driver(&cxl_pci_driver);
> +
> +	return rc;
> +}
> +
> +static void __exit cxl_pci_driver_exit(void)
> +{
> +	cxl_cper_unregister_notifier(cxl_cper_event_call);
> +	pci_unregister_driver(&cxl_pci_driver);
> +}
> +
> +module_init(cxl_pci_driver_init);
> +module_exit(cxl_pci_driver_exit);
>   MODULE_LICENSE("GPL v2");
> -module_pci_driver(cxl_pci_driver);
>   MODULE_IMPORT_NS(CXL);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 60ca768bc867..290d0a2651b2 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1170,6 +1170,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
>   u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
>   struct pci_dev *pci_dev_get(struct pci_dev *dev);
>   void pci_dev_put(struct pci_dev *dev);
> +DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
>   void pci_remove_bus(struct pci_bus *b);
>   void pci_stop_and_remove_bus_device(struct pci_dev *dev);
>   void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
> @@ -1871,6 +1872,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev);
>   void pci_dev_lock(struct pci_dev *dev);
>   int pci_dev_trylock(struct pci_dev *dev);
>   void pci_dev_unlock(struct pci_dev *dev);
> +DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
>   
>   /*
>    * PCI domain support.  Sometimes called PCI segment (eg by ACPI),
>
Dan Williams Dec. 18, 2023, 8:56 p.m. UTC | #2
Smita Koralahalli wrote:
> On 12/15/2023 3:26 PM, Ira Weiny wrote:
> > If the firmware has configured CXL event support to be firmware first
> > the OS can process those events through CPER records.  The CXL layer has
> > unique DPA to HPA knowledge and standard event trace parsing in place.
> > 
> > CPER records contain Bus, Device, Function information which can be used
> > to identify the PCI device which is sending the event.
> > 
> > Change pci driver registration to include registration for a CXL CPER
> > notifier to process the events through the trace subsystem.
> > 
> > Define and use scoped based management to simplify the handling of the
> > pci device object.
> > 
> > Cc: Bjorn Helgaas <bhelgaas@google.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > ---
> 
> [snip]
> 
> 
> > +	switch (event_type) {
> > +	case CXL_CPER_EVENT_GEN_MEDIA:
> > +		trace_cxl_general_media(cxlmd, type, &gen_media_event_uuid,
> > +					&event->gen_media);
> > +		break;
> > +	case CXL_CPER_EVENT_DRAM:
> > +		trace_cxl_dram(cxlmd, type, &dram_event_uuid, &event->dram);
> > +		break;
> > +	case CXL_CPER_EVENT_MEM_MODULE:
> > +		trace_cxl_memory_module(cxlmd, type, &mem_mod_event_uuid,
> > +					&event->mem_module);
> > +		break;
> > +	}
> > +}
> 
> Is default case needed here?

Yeah, it looks like an uninitialized @type value can be passed through
the stack here.

[..]
> > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > index 0155fb66b580..638275569d63 100644
> > --- a/drivers/cxl/pci.c
> > +++ b/drivers/cxl/pci.c
> > @@ -1,5 +1,6 @@
> >   // SPDX-License-Identifier: GPL-2.0-only
> >   /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> > +#include <asm-generic/unaligned.h>
> >   #include <linux/io-64-nonatomic-lo-hi.h>
> >   #include <linux/moduleparam.h>
> >   #include <linux/module.h>
> > @@ -969,6 +970,58 @@ static struct pci_driver cxl_pci_driver = {
> >   	},
> >   };
> >   
> > +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> > +static void cxl_cper_event_call(enum cxl_event_type ev_type,
> > +				struct cxl_cper_event_rec *rec)
> > +{
> > +	struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
> > +	struct pci_dev *pdev __free(pci_dev_put) = NULL;
> > +	struct cxl_dev_state *cxlds = NULL;
> > +	enum cxl_event_log_type log_type;
> > +	unsigned int devfn;
> > +	u32 hdr_flags;
> > +
> > +	devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
> > +	pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
> > +					   device_id->bus_num, devfn);
> > +	if (!pdev)
> > +		return;
> > +
> > +	guard(device)(&pdev->dev);
> > +	if (pdev->driver == &cxl_pci_driver)
> > +		cxlds = pci_get_drvdata(pdev);
> > +	if (!cxlds)
> > +		return;
> > +
> > +	/* Fabricate a log type */
> > +	hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
> > +	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
> > +
> > +	cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, &rec->event);
> 
> Currently, when I run this, I see two trace events printed. One from 
> here, and another as a non_standard_event from ghes. I think both should 
> be unified?
> 
> I remember Dan pointing out to me this when I sent decoding for protocol 
> errors and its still pending on me for protocol errors.

Good point, so I think the responsibility to trace CXL events should
belong to ghes_do_proc() and ghes_print_estatus() can just ignore CXL
events.

Notice how ghes_proc() sometimes skips ghes_print_estatus(), but
uncoditionally emits a trace event in ghes_do_proc()? To me that means
that the cper_estatus_print() inside ghes_print_estatus() can just defer
to the ghes code to do the hookup to the trace code.

For example, ras_userspace_consumers() was introduced to skip emitting
events to the kernel log when the trace event might be handled. My
assumption is that was for historical reasons, but since CXL events are
new, just never emit them to the kernel log and always require the trace
path.

I am open to other thoughts here, but it seems like ghes_do_proc() is
where the callback needs to be triggered.


> Thanks,
> Smita
> 
> > +}
> > +
> > +static int __init cxl_pci_driver_init(void)
> > +{
> > +	int rc;
> > +
> > +	rc = pci_register_driver(&cxl_pci_driver);
> > +	if (rc)
> > +		return rc;
> > +
> > +	rc = cxl_cper_register_notifier(cxl_cper_event_call);

Quick aside as I am reading through this, the "notifier" name is
misleading since this callback has nothing to do with the
include/linux/notifier.h API.
Jonathan Cameron Dec. 19, 2023, 2:37 p.m. UTC | #3
On Fri, 15 Dec 2023 15:26:33 -0800
Ira Weiny <ira.weiny@intel.com> wrote:

> If the firmware has configured CXL event support to be firmware first
> the OS can process those events through CPER records.  The CXL layer has
> unique DPA to HPA knowledge and standard event trace parsing in place.
> 
> CPER records contain Bus, Device, Function information which can be used
> to identify the PCI device which is sending the event.
> 
> Change pci driver registration to include registration for a CXL CPER
> notifier to process the events through the trace subsystem.
> 
> Define and use scoped based management to simplify the handling of the
> pci device object.
> 
> Cc: Bjorn Helgaas <bhelgaas@google.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

I'd break out the pci guard stuff as a precursor patch.  That's likely
to be used elsewhere so it would help for backporting for other users
if it wasn't buried in a patch doing other stuff.

Not to mention that has a different set of likely reviewers to the rest
of this patch.

More generally maybe we should just hardcode the UUID in the tracepoint
definitions?  I think for everything other than the generic one we
only ever call trace_cxl_memory_module(... &mem_mod_event_uuid..)
etc.

It's a little ugly to match on the UUID to call a function where it
hard coded, but less so than inserting the UUID like this does.
Better I think to make it obvious that this isn't actually a variable
(for the ones we understand).

Jonathan

> 
> ---
> NOTE this patch depends on Dan's addition of a device guard[1].
> 
> [1] https://lore.kernel.org/all/170250854466.1522182.17555361077409628655.stgit@dwillia2-xfh.jf.intel.com/
> 
> Changes for v3/v4:
> [djbw: define a __free(pci_dev_put) to release the device automatically]
> [djbw: use device guard from Vishal]
> [iweiny: delete old notifier block structure]
> [iweiny: adjust for new notifier interface]
> ---
>  drivers/cxl/core/mbox.c | 31 +++++++++++++++++++++++-----
>  drivers/cxl/cxlmem.h    |  4 ++++
>  drivers/cxl/pci.c       | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/pci.h     |  2 ++
>  4 files changed, 86 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index b7efa058a100..c9aa723e3391 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -840,9 +840,30 @@ static const uuid_t gen_media_event_uuid = CXL_EVENT_GEN_MEDIA_UUID;
>  static const uuid_t dram_event_uuid = CXL_EVENT_DRAM_UUID;
>  static const uuid_t mem_mod_event_uuid = CXL_EVENT_MEM_MODULE_UUID;
>  
> -static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> -				   enum cxl_event_log_type type,
> -				   struct cxl_event_record_raw *record)
> +void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +			    enum cxl_event_log_type type,
> +			    enum cxl_event_type event_type,
> +			    union cxl_event *event)
> +{
> +	switch (event_type) {
> +	case CXL_CPER_EVENT_GEN_MEDIA:
> +		trace_cxl_general_media(cxlmd, type, &gen_media_event_uuid,
> +					&event->gen_media);
> +		break;
> +	case CXL_CPER_EVENT_DRAM:
> +		trace_cxl_dram(cxlmd, type, &dram_event_uuid, &event->dram);
> +		break;
> +	case CXL_CPER_EVENT_MEM_MODULE:
> +		trace_cxl_memory_module(cxlmd, type, &mem_mod_event_uuid,
> +					&event->mem_module);
> +		break;
> +	}
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
> +
> +static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +				     enum cxl_event_log_type type,
> +				     struct cxl_event_record_raw *record)
>  {
>  	union cxl_event *evt = &record->event;
>  	uuid_t *id = &record->id;
> @@ -965,8 +986,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  			break;
>  
>  		for (i = 0; i < nr_rec; i++)
> -			cxl_event_trace_record(cxlmd, type,
> -					       &payload->records[i]);
> +			__cxl_event_trace_record(cxlmd, type,
> +						 &payload->records[i]);
>  
>  		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
>  			trace_cxl_overflow(cxlmd, type, payload);

> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 0155fb66b580..638275569d63 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -1,5 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> +#include <asm-generic/unaligned.h>
>  #include <linux/io-64-nonatomic-lo-hi.h>
>  #include <linux/moduleparam.h>
>  #include <linux/module.h>
> @@ -969,6 +970,58 @@ static struct pci_driver cxl_pci_driver = {
>  	},
>  };
>  
> +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> +static void cxl_cper_event_call(enum cxl_event_type ev_type,
> +				struct cxl_cper_event_rec *rec)
> +{
> +	struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
> +	struct pci_dev *pdev __free(pci_dev_put) = NULL;
> +	struct cxl_dev_state *cxlds = NULL;
> +	enum cxl_event_log_type log_type;
> +	unsigned int devfn;
> +	u32 hdr_flags;
> +
> +	devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
> +	pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
> +					   device_id->bus_num, devfn);
> +	if (!pdev)
> +		return;
> +
> +	guard(device)(&pdev->dev);
> +	if (pdev->driver == &cxl_pci_driver)
> +		cxlds = pci_get_drvdata(pdev);
> +	if (!cxlds)
> +		return;

This is handling two conditions. I'd find it more readable split like:

	if (pdev->driver != &cxl_pci_driver)
		return;

	cxlds = pci_get_drvdata(pdev);
	if (!cxlds)
		return;

and drop the = NULL above.

> +
> +	/* Fabricate a log type */
> +	hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
> +	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
> +
> +	cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, &rec->event);
> +}
> +
Ira Weiny Dec. 19, 2023, 4:58 p.m. UTC | #4
Dan Williams wrote:
> Smita Koralahalli wrote:
> > On 12/15/2023 3:26 PM, Ira Weiny wrote:
> > > If the firmware has configured CXL event support to be firmware first
> > > the OS can process those events through CPER records.  The CXL layer has
> > > unique DPA to HPA knowledge and standard event trace parsing in place.
> > > 
> > > CPER records contain Bus, Device, Function information which can be used
> > > to identify the PCI device which is sending the event.
> > > 
> > > Change pci driver registration to include registration for a CXL CPER
> > > notifier to process the events through the trace subsystem.
> > > 
> > > Define and use scoped based management to simplify the handling of the
> > > pci device object.
> > > 
> > > Cc: Bjorn Helgaas <bhelgaas@google.com>
> > > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > > 
> > > ---
> > 
> > [snip]
> > 
> > 
> > > +	switch (event_type) {
> > > +	case CXL_CPER_EVENT_GEN_MEDIA:
> > > +		trace_cxl_general_media(cxlmd, type, &gen_media_event_uuid,
> > > +					&event->gen_media);
> > > +		break;
> > > +	case CXL_CPER_EVENT_DRAM:
> > > +		trace_cxl_dram(cxlmd, type, &dram_event_uuid, &event->dram);
> > > +		break;
> > > +	case CXL_CPER_EVENT_MEM_MODULE:
> > > +		trace_cxl_memory_module(cxlmd, type, &mem_mod_event_uuid,
> > > +					&event->mem_module);
> > > +		break;
> > > +	}
> > > +}
> > 
> > Is default case needed here?
> 
> Yeah, it looks like an uninitialized @type value can be passed through
> the stack here.

That was not my intention but yea.

Added a generic trace with a null UUID.

[snip]

> > > +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> > > +static void cxl_cper_event_call(enum cxl_event_type ev_type,
> > > +				struct cxl_cper_event_rec *rec)
> > > +{
> > > +	struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
> > > +	struct pci_dev *pdev __free(pci_dev_put) = NULL;
> > > +	struct cxl_dev_state *cxlds = NULL;
> > > +	enum cxl_event_log_type log_type;
> > > +	unsigned int devfn;
> > > +	u32 hdr_flags;
> > > +
> > > +	devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
> > > +	pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
> > > +					   device_id->bus_num, devfn);
> > > +	if (!pdev)
> > > +		return;
> > > +
> > > +	guard(device)(&pdev->dev);
> > > +	if (pdev->driver == &cxl_pci_driver)
> > > +		cxlds = pci_get_drvdata(pdev);
> > > +	if (!cxlds)
> > > +		return;
> > > +
> > > +	/* Fabricate a log type */
> > > +	hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
> > > +	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
> > > +
> > > +	cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, &rec->event);
> > 
> > Currently, when I run this, I see two trace events printed. One from 
> > here, and another as a non_standard_event from ghes. I think both should 
> > be unified?

By the way, Smita,

Thanks for testing!  I really do appreciate it!

> > 
> > I remember Dan pointing out to me this when I sent decoding for protocol 
> > errors and its still pending on me for protocol errors.
> 
> Good point, so I think the responsibility to trace CXL events should
> belong to ghes_do_proc() and ghes_print_estatus() can just ignore CXL
> events.
> 
> Notice how ghes_proc() sometimes skips ghes_print_estatus(), but
> uncoditionally emits a trace event in ghes_do_proc()? To me that means
> that the cper_estatus_print() inside ghes_print_estatus() can just defer
> to the ghes code to do the hookup to the trace code.
> 
> For example, ras_userspace_consumers() was introduced to skip emitting
> events to the kernel log when the trace event might be handled. My
> assumption is that was for historical reasons, but since CXL events are
> new, just never emit them to the kernel log and always require the trace
> path.
> 
> I am open to other thoughts here, but it seems like ghes_do_proc() is
> where the callback needs to be triggered.

I see.

Ok.  I'll create a pre-patch which moves the protocol error first then
I'll put the events in the ghes_do_proc() well.


[snip]

> > > +	rc = cxl_cper_register_notifier(cxl_cper_event_call);
> 
> Quick aside as I am reading through this, the "notifier" name is
> misleading since this callback has nothing to do with the
> include/linux/notifier.h API.

Fair point.  I debated 'callback' vs 'notifier'.  I'll change it to
callback as I think that is equally correct and as you say does clarify
this is not a 'notifier'.

Ira
Ira Weiny Dec. 19, 2023, 5:17 p.m. UTC | #5
Ira Weiny wrote:
> Dan Williams wrote:
> > Smita Koralahalli wrote:
> > > On 12/15/2023 3:26 PM, Ira Weiny wrote:

[snip]

> > > I remember Dan pointing out to me this when I sent decoding for protocol 
> > > errors and its still pending on me for protocol errors.
> > 
> > Good point, so I think the responsibility to trace CXL events should
> > belong to ghes_do_proc() and ghes_print_estatus() can just ignore CXL
> > events.
> > 
> > Notice how ghes_proc() sometimes skips ghes_print_estatus(), but
> > uncoditionally emits a trace event in ghes_do_proc()? To me that means
> > that the cper_estatus_print() inside ghes_print_estatus() can just defer
> > to the ghes code to do the hookup to the trace code.
> > 
> > For example, ras_userspace_consumers() was introduced to skip emitting
> > events to the kernel log when the trace event might be handled. My
> > assumption is that was for historical reasons, but since CXL events are
> > new, just never emit them to the kernel log and always require the trace
> > path.
> > 
> > I am open to other thoughts here, but it seems like ghes_do_proc() is
> > where the callback needs to be triggered.
> 
> I see.
> 
> Ok.  I'll create a pre-patch which moves the protocol error first then
> I'll put the events in the ghes_do_proc() well.
> 

Apologies.  I really wanted to make this work a pre-cursor patch but I
see that there is not a trace point for the protocol errors yet.  So as
not to slow the progress of this work I'm going to skip moving the
protocol stuff right now.

Also, as part of this work I think moving the CXL specific defines into
the common linux/cper.h is appropriate at this time.

Unless I hear otherwise I'm going to land the event stuff in that common
header and we can move the protocol error defines later.

Thanks again for all the testing,
Ira
Ira Weiny Dec. 19, 2023, 11:27 p.m. UTC | #6
Jonathan Cameron wrote:
> On Fri, 15 Dec 2023 15:26:33 -0800
> Ira Weiny <ira.weiny@intel.com> wrote:
> 
> > If the firmware has configured CXL event support to be firmware first
> > the OS can process those events through CPER records.  The CXL layer has
> > unique DPA to HPA knowledge and standard event trace parsing in place.
> > 
> > CPER records contain Bus, Device, Function information which can be used
> > to identify the PCI device which is sending the event.
> > 
> > Change pci driver registration to include registration for a CXL CPER
> > notifier to process the events through the trace subsystem.
> > 
> > Define and use scoped based management to simplify the handling of the
> > pci device object.
> > 
> > Cc: Bjorn Helgaas <bhelgaas@google.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> I'd break out the pci guard stuff as a precursor patch.  That's likely
> to be used elsewhere so it would help for backporting for other users
> if it wasn't buried in a patch doing other stuff.

That is good.  I've done that.

> 
> Not to mention that has a different set of likely reviewers to the rest
> of this patch.

Yep.

> 
> More generally maybe we should just hardcode the UUID in the tracepoint
> definitions?  I think for everything other than the generic one we
> only ever call trace_cxl_memory_module(... &mem_mod_event_uuid..)
> etc.
> 
> It's a little ugly to match on the UUID to call a function where it
> hard coded, but less so than inserting the UUID like this does.
> Better I think to make it obvious that this isn't actually a variable
> (for the ones we understand).

I thought about that a bit.  But I built the tracing code with generic
macros which contained the UUID.  That complicated my efforts.

I've reworked it again and it took a bit of time but I got it to work.  It
was not that hard but there is a caveat in the generic macros, which I
made a note of.

[snip]

> >  
> > +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> > +static void cxl_cper_event_call(enum cxl_event_type ev_type,
> > +				struct cxl_cper_event_rec *rec)
> > +{
> > +	struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
> > +	struct pci_dev *pdev __free(pci_dev_put) = NULL;
> > +	struct cxl_dev_state *cxlds = NULL;
> > +	enum cxl_event_log_type log_type;
> > +	unsigned int devfn;
> > +	u32 hdr_flags;
> > +
> > +	devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
> > +	pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
> > +					   device_id->bus_num, devfn);
> > +	if (!pdev)
> > +		return;
> > +
> > +	guard(device)(&pdev->dev);
> > +	if (pdev->driver == &cxl_pci_driver)
> > +		cxlds = pci_get_drvdata(pdev);
> > +	if (!cxlds)
> > +		return;
> 
> This is handling two conditions. I'd find it more readable split like:
> 
> 	if (pdev->driver != &cxl_pci_driver)
> 		return;
> 
> 	cxlds = pci_get_drvdata(pdev);
> 	if (!cxlds)
> 		return;
> 
> and drop the = NULL above.

Done.

Ira
Dan Williams Dec. 19, 2023, 11:36 p.m. UTC | #7
Ira Weiny wrote:
[..]
> > and drop the = NULL above.
> 
> Done.

The NULL assignment was more about making it clear that
__free(pci_dev_put) will take no action until the pdev is acquired.
Otherwise, any future refactoring that introduces a 'return' before
@pdev is acquired needs to be careful to assign @pdev to NULL. So, just
include it in the declaration more as a __free() declaration style issue
than a correctness issue.
Ira Weiny Dec. 20, 2023, 12:29 a.m. UTC | #8
Dan Williams wrote:
> Ira Weiny wrote:
> [..]
> > > and drop the = NULL above.
> > 
> > Done.
> 
> The NULL assignment was more about making it clear that
> __free(pci_dev_put) will take no action until the pdev is acquired.
> Otherwise, any future refactoring that introduces a 'return' before
> @pdev is acquired needs to be careful to assign @pdev to NULL. So, just
> include it in the declaration more as a __free() declaration style issue
> than a correctness issue.

I think he meant the assignment to cxlds.  At least that is the NULL
assignment I took out.

Ira
Jonathan Cameron Jan. 3, 2024, 10:41 a.m. UTC | #9
On Tue, 19 Dec 2023 16:29:08 -0800
Ira Weiny <ira.weiny@intel.com> wrote:

> Dan Williams wrote:
> > Ira Weiny wrote:
> > [..]  
> > > > and drop the = NULL above.  
> > > 
> > > Done.  
> > 
> > The NULL assignment was more about making it clear that
> > __free(pci_dev_put) will take no action until the pdev is acquired.
> > Otherwise, any future refactoring that introduces a 'return' before
> > @pdev is acquired needs to be careful to assign @pdev to NULL. So, just
> > include it in the declaration more as a __free() declaration style issue
> > than a correctness issue.  
> 
> I think he meant the assignment to cxlds.  At least that is the NULL
> assignment I took out.
> 
> Ira

yup.  The other one is correct as Dan pointed out.
diff mbox series

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index b7efa058a100..c9aa723e3391 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -840,9 +840,30 @@  static const uuid_t gen_media_event_uuid = CXL_EVENT_GEN_MEDIA_UUID;
 static const uuid_t dram_event_uuid = CXL_EVENT_DRAM_UUID;
 static const uuid_t mem_mod_event_uuid = CXL_EVENT_MEM_MODULE_UUID;
 
-static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
-				   enum cxl_event_log_type type,
-				   struct cxl_event_record_raw *record)
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+			    enum cxl_event_log_type type,
+			    enum cxl_event_type event_type,
+			    union cxl_event *event)
+{
+	switch (event_type) {
+	case CXL_CPER_EVENT_GEN_MEDIA:
+		trace_cxl_general_media(cxlmd, type, &gen_media_event_uuid,
+					&event->gen_media);
+		break;
+	case CXL_CPER_EVENT_DRAM:
+		trace_cxl_dram(cxlmd, type, &dram_event_uuid, &event->dram);
+		break;
+	case CXL_CPER_EVENT_MEM_MODULE:
+		trace_cxl_memory_module(cxlmd, type, &mem_mod_event_uuid,
+					&event->mem_module);
+		break;
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
+
+static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+				     enum cxl_event_log_type type,
+				     struct cxl_event_record_raw *record)
 {
 	union cxl_event *evt = &record->event;
 	uuid_t *id = &record->id;
@@ -965,8 +986,8 @@  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
 			break;
 
 		for (i = 0; i < nr_rec; i++)
-			cxl_event_trace_record(cxlmd, type,
-					       &payload->records[i]);
+			__cxl_event_trace_record(cxlmd, type,
+						 &payload->records[i]);
 
 		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
 			trace_cxl_overflow(cxlmd, type, payload);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index e5d770e26e02..e7e9508fecac 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -802,6 +802,10 @@  void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 				  unsigned long *cmds);
 void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+			    enum cxl_event_log_type type,
+			    enum cxl_event_type event_type,
+			    union cxl_event *event);
 int cxl_set_timestamp(struct cxl_memdev_state *mds);
 int cxl_poison_state_init(struct cxl_memdev_state *mds);
 int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 0155fb66b580..638275569d63 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,5 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <asm-generic/unaligned.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/moduleparam.h>
 #include <linux/module.h>
@@ -969,6 +970,58 @@  static struct pci_driver cxl_pci_driver = {
 	},
 };
 
+#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
+static void cxl_cper_event_call(enum cxl_event_type ev_type,
+				struct cxl_cper_event_rec *rec)
+{
+	struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
+	struct pci_dev *pdev __free(pci_dev_put) = NULL;
+	struct cxl_dev_state *cxlds = NULL;
+	enum cxl_event_log_type log_type;
+	unsigned int devfn;
+	u32 hdr_flags;
+
+	devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
+	pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
+					   device_id->bus_num, devfn);
+	if (!pdev)
+		return;
+
+	guard(device)(&pdev->dev);
+	if (pdev->driver == &cxl_pci_driver)
+		cxlds = pci_get_drvdata(pdev);
+	if (!cxlds)
+		return;
+
+	/* Fabricate a log type */
+	hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
+	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
+
+	cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, &rec->event);
+}
+
+static int __init cxl_pci_driver_init(void)
+{
+	int rc;
+
+	rc = pci_register_driver(&cxl_pci_driver);
+	if (rc)
+		return rc;
+
+	rc = cxl_cper_register_notifier(cxl_cper_event_call);
+	if (rc)
+		pci_unregister_driver(&cxl_pci_driver);
+
+	return rc;
+}
+
+static void __exit cxl_pci_driver_exit(void)
+{
+	cxl_cper_unregister_notifier(cxl_cper_event_call);
+	pci_unregister_driver(&cxl_pci_driver);
+}
+
+module_init(cxl_pci_driver_init);
+module_exit(cxl_pci_driver_exit);
 MODULE_LICENSE("GPL v2");
-module_pci_driver(cxl_pci_driver);
 MODULE_IMPORT_NS(CXL);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 60ca768bc867..290d0a2651b2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1170,6 +1170,7 @@  int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
 u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
 struct pci_dev *pci_dev_get(struct pci_dev *dev);
 void pci_dev_put(struct pci_dev *dev);
+DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
 void pci_remove_bus(struct pci_bus *b);
 void pci_stop_and_remove_bus_device(struct pci_dev *dev);
 void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
@@ -1871,6 +1872,7 @@  void pci_cfg_access_unlock(struct pci_dev *dev);
 void pci_dev_lock(struct pci_dev *dev);
 int pci_dev_trylock(struct pci_dev *dev);
 void pci_dev_unlock(struct pci_dev *dev);
+DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
 
 /*
  * PCI domain support.  Sometimes called PCI segment (eg by ACPI),