diff mbox series

[03/14] iommufd: Replace the hwpt->devices list with iommufd_group

Message ID 3-v1-7612f88c19f5+2f21-iommufd_alloc_jgg@nvidia.com
State New
Headers show
Series Add iommufd physical device operations for replace and alloc hwpt | expand

Commit Message

Jason Gunthorpe Feb. 25, 2023, 12:27 a.m. UTC
The devices list was used as a simple way to avoid having per-group
information. Now that this seems to be unavoidable, just commit to
per-group information fully and remove the devices list.

The iommufd_group stores the currently assigned hwpt for the entire group
and we can manage the per-device attach/detach with a simple counter.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c          | 75 ++++++++++++-------------
 drivers/iommu/iommufd/hw_pagetable.c    | 23 +++-----
 drivers/iommu/iommufd/iommufd_private.h | 12 ++--
 3 files changed, 47 insertions(+), 63 deletions(-)

Comments

Tian, Kevin March 2, 2023, 8:01 a.m. UTC | #1
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, February 25, 2023 8:28 AM
> 
> +struct iommufd_hw_pagetable *
> +iommufd_hw_pagetable_detach(struct iommufd_device *idev)
>  {
> -	if (!iommufd_hw_pagetable_has_group(hwpt, idev->igroup))
> +	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
> +
> +	lockdep_assert_held(&idev->igroup->lock);
> +
> +	idev->igroup->devices--;
> +	if (!idev->igroup->devices) {
>  		iommu_detach_group(hwpt->domain, idev->igroup->group);
> +		idev->igroup->hwpt = NULL;

hwpt->obj.users should be decremented here instead of leaving it
in iommufd_device_detach().

>  void iommufd_device_detach(struct iommufd_device *idev)
>  {
> -	struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> +	struct iommufd_hw_pagetable *hwpt;
> 
> -	mutex_lock(&hwpt->devices_lock);
> -	list_del(&idev->devices_item);
> -	idev->hwpt = NULL;
> -	iommufd_hw_pagetable_detach(hwpt, idev);
> -	mutex_unlock(&hwpt->devices_lock);
> +	mutex_lock(&idev->igroup->lock);
> +	hwpt = iommufd_hw_pagetable_detach(idev);

the only parameter is idev while the name is called hw_pagetable_xxx.

is it cleaner to get hwpt here and then pass into the detach function?
Jason Gunthorpe March 6, 2023, 8:22 p.m. UTC | #2
On Thu, Mar 02, 2023 at 08:01:00AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Saturday, February 25, 2023 8:28 AM
> > 
> > +struct iommufd_hw_pagetable *
> > +iommufd_hw_pagetable_detach(struct iommufd_device *idev)
> >  {
> > -	if (!iommufd_hw_pagetable_has_group(hwpt, idev->igroup))
> > +	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
> > +
> > +	lockdep_assert_held(&idev->igroup->lock);
> > +
> > +	idev->igroup->devices--;
> > +	if (!idev->igroup->devices) {
> >  		iommu_detach_group(hwpt->domain, idev->igroup->group);
> > +		idev->igroup->hwpt = NULL;
> 
> hwpt->obj.users should be decremented here instead of leaving it
> in iommufd_device_detach().

It is like this because eventually we can't call
iommufd_object_destroy_user() while holding the locks.

So the lowest function returns the hwpt up the call chain and once
everything is unlocked then it calls iommufd_hw_pagetable_put()

> >  void iommufd_device_detach(struct iommufd_device *idev)
> >  {
> > -	struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> > +	struct iommufd_hw_pagetable *hwpt;
> > 
> > -	mutex_lock(&hwpt->devices_lock);
> > -	list_del(&idev->devices_item);
> > -	idev->hwpt = NULL;
> > -	iommufd_hw_pagetable_detach(hwpt, idev);
> > -	mutex_unlock(&hwpt->devices_lock);
> > +	mutex_lock(&idev->igroup->lock);
> > +	hwpt = iommufd_hw_pagetable_detach(idev);
> 
> the only parameter is idev while the name is called hw_pagetable_xxx.
>
> is it cleaner to get hwpt here and then pass into the detach function?

Not really, the function needs three members of the idev to work.

The pair'd attach function is:

int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
				struct iommufd_device *idev)

So I think it would be very unclear to change the name, and this is
more a hw_pagetable operation than a device operation.

Jason
Tian, Kevin March 7, 2023, 2:38 a.m. UTC | #3
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, March 7, 2023 4:22 AM
> 
> On Thu, Mar 02, 2023 at 08:01:00AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Saturday, February 25, 2023 8:28 AM
> > >
> > > +struct iommufd_hw_pagetable *
> > > +iommufd_hw_pagetable_detach(struct iommufd_device *idev)
> > >  {
> > > -	if (!iommufd_hw_pagetable_has_group(hwpt, idev->igroup))
> > > +	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
> > > +
> > > +	lockdep_assert_held(&idev->igroup->lock);
> > > +
> > > +	idev->igroup->devices--;
> > > +	if (!idev->igroup->devices) {
> > >  		iommu_detach_group(hwpt->domain, idev->igroup->group);
> > > +		idev->igroup->hwpt = NULL;
> >
> > hwpt->obj.users should be decremented here instead of leaving it
> > in iommufd_device_detach().
> 
> It is like this because eventually we can't call
> iommufd_object_destroy_user() while holding the locks.
> 
> So the lowest function returns the hwpt up the call chain and once
> everything is unlocked then it calls iommufd_hw_pagetable_put()

but don't we have unbalanced refcnt poke?

device_attach:

	if (!idev->igroup->devices) {
 		rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
 		if (rc)
 			goto err_unresv;
		idev->igroup->hwpt = hwpt;
		refcount_inc(&hwpt->obj.users); //first device increases refcnt
 	}

device_detach:

	idev->igroup->devices--;
	if (!idev->igroup->devices) {
 		iommu_detach_group(hwpt->domain, idev->igroup->group);
		idev->igroup->hwpt = NULL;
	}

	...

	if (hwpt->auto_domain)
		iommufd_object_destroy_user(idev->ictx, &hwpt->obj);
	else
		refcount_dec(&hwpt->obj.users); //every device decrease refcnt
Jason Gunthorpe March 7, 2023, 1:53 p.m. UTC | #4
On Tue, Mar 07, 2023 at 02:38:47AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Tuesday, March 7, 2023 4:22 AM
> > 
> > On Thu, Mar 02, 2023 at 08:01:00AM +0000, Tian, Kevin wrote:
> > > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > > Sent: Saturday, February 25, 2023 8:28 AM
> > > >
> > > > +struct iommufd_hw_pagetable *
> > > > +iommufd_hw_pagetable_detach(struct iommufd_device *idev)
> > > >  {
> > > > -	if (!iommufd_hw_pagetable_has_group(hwpt, idev->igroup))
> > > > +	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
> > > > +
> > > > +	lockdep_assert_held(&idev->igroup->lock);
> > > > +
> > > > +	idev->igroup->devices--;
> > > > +	if (!idev->igroup->devices) {
> > > >  		iommu_detach_group(hwpt->domain, idev->igroup->group);
> > > > +		idev->igroup->hwpt = NULL;
> > >
> > > hwpt->obj.users should be decremented here instead of leaving it
> > > in iommufd_device_detach().
> > 
> > It is like this because eventually we can't call
> > iommufd_object_destroy_user() while holding the locks.
> > 
> > So the lowest function returns the hwpt up the call chain and once
> > everything is unlocked then it calls iommufd_hw_pagetable_put()
> 
> but don't we have unbalanced refcnt poke?

Yes, the refcount should be incremented for every attached device

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index d1e227f310e823..264bfa2212481f 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -20,9 +20,12 @@  static void iommufd_group_release(struct kref *kref)
 	struct iommufd_group *igroup =
 		container_of(kref, struct iommufd_group, ref);
 
+	WARN_ON(igroup->hwpt || igroup->devices);
+
 	xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
 		   NULL, GFP_KERNEL);
 	iommu_group_put(igroup->group);
+	mutex_destroy(&igroup->lock);
 	kfree(igroup);
 }
 
@@ -70,6 +73,7 @@  static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
 	}
 
 	kref_init(&new_igroup->ref);
+	mutex_init(&new_igroup->lock);
 	/* group reference moves into new_igroup */
 	new_igroup->group = group;
 
@@ -266,28 +270,15 @@  static int iommufd_device_setup_msi(struct iommufd_device *idev,
 	return 0;
 }
 
-static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
-					   struct iommufd_group *igroup)
-{
-	struct iommufd_device *cur_dev;
-
-	lockdep_assert_held(&hwpt->devices_lock);
-
-	list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
-		if (cur_dev->igroup->group == igroup->group)
-			return true;
-	return false;
-}
-
 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 				struct iommufd_device *idev)
 {
 	phys_addr_t sw_msi_start = PHYS_ADDR_MAX;
 	int rc;
 
-	lockdep_assert_held(&hwpt->devices_lock);
+	lockdep_assert_held(&idev->igroup->lock);
 
-	if (WARN_ON(idev->hwpt))
+	if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt)
 		return -EINVAL;
 
 	/*
@@ -302,7 +293,7 @@  int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 				hwpt->domain->ops->enforce_cache_coherency(
 					hwpt->domain);
 		if (!hwpt->enforce_cache_coherency) {
-			WARN_ON(list_empty(&hwpt->devices));
+			WARN_ON(!idev->igroup->devices);
 			return -EINVAL;
 		}
 	}
@@ -318,26 +309,38 @@  int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 		goto err_unresv;
 
 	/*
-	 * FIXME: Hack around missing a device-centric iommu api, only attach to
-	 * the group once for the first device that is in the group.
+	 * Only attach to the group once for the first device that is in the
+	 * group. All the other devices will follow this attachment.
+	 * The user can attach every device individually as well.
 	 */
-	if (!iommufd_hw_pagetable_has_group(hwpt, idev->igroup)) {
+	if (!idev->igroup->devices) {
 		rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
 		if (rc)
 			goto err_unresv;
+		idev->igroup->hwpt = hwpt;
+		refcount_inc(&hwpt->obj.users);
 	}
+	idev->igroup->devices++;
 	return 0;
 err_unresv:
 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
 	return rc;
 }
 
-void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt,
-				 struct iommufd_device *idev)
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_detach(struct iommufd_device *idev)
 {
-	if (!iommufd_hw_pagetable_has_group(hwpt, idev->igroup))
+	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
+
+	lockdep_assert_held(&idev->igroup->lock);
+
+	idev->igroup->devices--;
+	if (!idev->igroup->devices) {
 		iommu_detach_group(hwpt->domain, idev->igroup->group);
+		idev->igroup->hwpt = NULL;
+	}
 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+	return hwpt;
 }
 
 static int iommufd_device_do_attach(struct iommufd_device *idev,
@@ -345,16 +348,9 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
 {
 	int rc;
 
-	mutex_lock(&hwpt->devices_lock);
+	mutex_lock(&idev->igroup->lock);
 	rc = iommufd_hw_pagetable_attach(hwpt, idev);
-	if (rc)
-		goto out_unlock;
-
-	idev->hwpt = hwpt;
-	refcount_inc(&hwpt->obj.users);
-	list_add(&idev->devices_item, &hwpt->devices);
-out_unlock:
-	mutex_unlock(&hwpt->devices_lock);
+	mutex_unlock(&idev->igroup->lock);
 	return rc;
 }
 
@@ -364,7 +360,7 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
  * Automatic domain selection will never pick a manually created domain.
  */
 static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
-					  struct iommufd_ioas *ioas)
+					  struct iommufd_ioas *ioas, u32 *pt_id)
 {
 	struct iommufd_hw_pagetable *hwpt;
 	int rc;
@@ -391,6 +387,7 @@  static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
 		 */
 		if (rc == -EINVAL)
 			continue;
+		*pt_id = hwpt->obj.id;
 		goto out_unlock;
 	}
 
@@ -400,6 +397,7 @@  static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
 		goto out_unlock;
 	}
 	hwpt->auto_domain = true;
+	*pt_id = hwpt->obj.id;
 
 	mutex_unlock(&ioas->mutex);
 	iommufd_object_finalize(idev->ictx, &hwpt->obj);
@@ -444,7 +442,7 @@  int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 		struct iommufd_ioas *ioas =
 			container_of(pt_obj, struct iommufd_ioas, obj);
 
-		rc = iommufd_device_auto_get_domain(idev, ioas);
+		rc = iommufd_device_auto_get_domain(idev, ioas, pt_id);
 		if (rc)
 			goto out_put_pt_obj;
 		break;
@@ -455,7 +453,6 @@  int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 	}
 
 	refcount_inc(&idev->obj.users);
-	*pt_id = idev->hwpt->obj.id;
 	rc = 0;
 
 out_put_pt_obj:
@@ -473,13 +470,11 @@  EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
  */
 void iommufd_device_detach(struct iommufd_device *idev)
 {
-	struct iommufd_hw_pagetable *hwpt = idev->hwpt;
+	struct iommufd_hw_pagetable *hwpt;
 
-	mutex_lock(&hwpt->devices_lock);
-	list_del(&idev->devices_item);
-	idev->hwpt = NULL;
-	iommufd_hw_pagetable_detach(hwpt, idev);
-	mutex_unlock(&hwpt->devices_lock);
+	mutex_lock(&idev->igroup->lock);
+	hwpt = iommufd_hw_pagetable_detach(idev);
+	mutex_unlock(&idev->igroup->lock);
 
 	if (hwpt->auto_domain)
 		iommufd_object_destroy_user(idev->ictx, &hwpt->obj);
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 6cdb6749d359f3..566eba0cd9b917 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -11,8 +11,6 @@  void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 	struct iommufd_hw_pagetable *hwpt =
 		container_of(obj, struct iommufd_hw_pagetable, obj);
 
-	WARN_ON(!list_empty(&hwpt->devices));
-
 	if (!list_empty(&hwpt->hwpt_item)) {
 		mutex_lock(&hwpt->ioas->mutex);
 		list_del(&hwpt->hwpt_item);
@@ -25,7 +23,6 @@  void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 		iommu_domain_free(hwpt->domain);
 
 	refcount_dec(&hwpt->ioas->obj.users);
-	mutex_destroy(&hwpt->devices_lock);
 }
 
 /**
@@ -52,9 +49,7 @@  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 	if (IS_ERR(hwpt))
 		return hwpt;
 
-	INIT_LIST_HEAD(&hwpt->devices);
 	INIT_LIST_HEAD(&hwpt->hwpt_item);
-	mutex_init(&hwpt->devices_lock);
 	/* Pairs with iommufd_hw_pagetable_destroy() */
 	refcount_inc(&ioas->obj.users);
 	hwpt->ioas = ioas;
@@ -65,13 +60,16 @@  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 		goto out_abort;
 	}
 
-	mutex_lock(&hwpt->devices_lock);
+	mutex_lock(&idev->igroup->lock);
 
 	/*
 	 * immediate_attach exists only to accommodate iommu drivers that cannot
 	 * directly allocate a domain. These drivers do not finish creating the
 	 * domain until attach is completed. Thus we must have this call
 	 * sequence. Once those drivers are fixed this should be removed.
+	 *
+	 * Note we hold the igroup->lock here which prevents any other thread
+	 * from observing igroup->hwpt until we finish setting it up.
 	 */
 	if (immediate_attach) {
 		rc = iommufd_hw_pagetable_attach(hwpt, idev);
@@ -84,21 +82,14 @@  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 		goto out_detach;
 	list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
 
-	if (immediate_attach) {
-		/* See iommufd_device_do_attach() */
-		refcount_inc(&hwpt->obj.users);
-		idev->hwpt = hwpt;
-		list_add(&idev->devices_item, &hwpt->devices);
-	}
-
-	mutex_unlock(&hwpt->devices_lock);
+	mutex_unlock(&idev->igroup->lock);
 	return hwpt;
 
 out_detach:
 	if (immediate_attach)
-		iommufd_hw_pagetable_detach(hwpt, idev);
+		iommufd_hw_pagetable_detach(idev);
 out_unlock:
-	mutex_unlock(&hwpt->devices_lock);
+	mutex_unlock(&idev->igroup->lock);
 out_abort:
 	iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
 	return ERR_PTR(rc);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 2544f10dae9aef..5f3ad16da819e7 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -250,8 +250,6 @@  struct iommufd_hw_pagetable {
 	bool msi_cookie : 1;
 	/* Head at iommufd_ioas::hwpt_list */
 	struct list_head hwpt_item;
-	struct mutex devices_lock;
-	struct list_head devices;
 };
 
 struct iommufd_hw_pagetable *
@@ -259,14 +257,17 @@  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 			   struct iommufd_device *idev, bool immediate_attach);
 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 				struct iommufd_device *idev);
-void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt,
-				 struct iommufd_device *idev);
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_detach(struct iommufd_device *idev);
 void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
 
 struct iommufd_group {
 	struct kref ref;
+	struct mutex lock;
 	struct iommufd_ctx *ictx;
 	struct iommu_group *group;
+	struct iommufd_hw_pagetable *hwpt;
+	unsigned int devices;
 };
 
 /*
@@ -278,9 +279,6 @@  struct iommufd_device {
 	struct iommufd_object obj;
 	struct iommufd_ctx *ictx;
 	struct iommufd_group *igroup;
-	struct iommufd_hw_pagetable *hwpt;
-	/* Head at iommufd_hw_pagetable::devices */
-	struct list_head devices_item;
 	/* always the physical device */
 	struct device *dev;
 	bool enforce_cache_coherency;