[PATCH 2/2] iommu/vt-d: Batch IOTLB/dev-IOTLB invalidation commands

From: Tina Zhang
Date: Thu May 16 2024 - 20:38:17 EST


Utilize batch command processing in IOTLB/dev-IOTLB invalidation
operations.

Signed-off-by: Tina Zhang <tina.zhang@xxxxxxxxx>
---
drivers/iommu/intel/cache.c | 76 ++++++++++++++++++++++++++++++-------
1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index dcf5e0e6af17..0a06e8565554 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -19,6 +19,14 @@
#include "pasid.h"
#include "trace.h"

+/* The max number of descriptors in a batch processing*/
+#define QI_MAX_BATCH_DESC_COUNT 2
+
+struct qi_cmd_batch {
+ struct qi_desc desc[QI_MAX_BATCH_DESC_COUNT];
+ int num;
+};
+
/* Check if an existing cache tag can be reused for a new association. */
static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
struct intel_iommu *iommu, struct device *dev,
@@ -254,6 +262,26 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
}

+static inline void cache_invalidate_cmd_batch_submit(struct intel_iommu *iommu,
+ struct qi_cmd_batch *cmds)
+{
+ if (!cmds->num)
+ return;
+
+ qi_submit_sync(iommu, cmds->desc, cmds->num, 0);
+ memset(cmds, 0, sizeof(struct qi_cmd_batch));
+}
+
+static inline void cache_invalidate_cmd_batch_add(struct intel_iommu *iommu,
+ struct qi_cmd_batch *cmds)
+{
+ if (!cmds->desc[cmds->num].qw0)
+ return;
+
+ if (++cmds->num == QI_MAX_BATCH_DESC_COUNT)
+ cache_invalidate_cmd_batch_submit(iommu, cmds);
+}
+
/*
* Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
* when the memory mappings in the target domain have been modified.
@@ -264,21 +292,28 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
+ struct intel_iommu *iommu = NULL;
+ struct qi_cmd_batch cmds = {0};

addr = calculate_psi_aligned_address(start, end, &pages, &mask);

spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
struct device_domain_info *info;
u16 sid;

+ if (iommu != tag->iommu) {
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
+ iommu = tag->iommu;
+ }
+
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level) {
qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, addr, pages, ih, NULL);
+ tag->pasid, addr, pages,
+ ih, &cmds.desc[cmds.num]);
} else {
/*
* Fallback to domain selective flush if no
@@ -288,13 +323,14 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
mask > cap_max_amask_val(iommu->cap))
iommu->flush.flush_iotlb(iommu, tag->domain_id,
0, 0, DMA_TLB_DSI_FLUSH,
- NULL);
+ &cmds.desc[cmds.num]);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
addr | ih, mask,
DMA_TLB_PSI_FLUSH,
- NULL);
+ &cmds.desc[cmds.num]);
}
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
case CACHE_TAG_NESTING_DEVTLB:
/*
@@ -310,23 +346,25 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
case CACHE_TAG_DEVTLB:
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
-
if (tag->pasid == IOMMU_NO_PASID)
qi_flush_dev_iotlb(iommu, sid, info->pfsid,
info->ats_qdep, addr, mask,
- NULL);
+ &cmds.desc[cmds.num]);
else
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
tag->pasid, info->ats_qdep,
- addr, mask, NULL);
+ addr, mask, &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);

quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid,
- info->ats_qdep, NULL);
+ info->ats_qdep, &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
}

trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
}
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}

@@ -338,40 +376,50 @@ void cache_tag_flush_all(struct dmar_domain *domain)
{
struct cache_tag *tag;
unsigned long flags;
+ struct intel_iommu *iommu = NULL;
+ struct qi_cmd_batch cmds = {0};

spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
struct device_domain_info *info;
u16 sid;

+ if (iommu != tag->iommu) {
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
+ iommu = tag->iommu;
+ }
+
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level)
qi_flush_piotlb(iommu, tag->domain_id,
tag->pasid, 0, -1, 0,
- NULL);
+ &cmds.desc[cmds.num]);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
0, 0, DMA_TLB_DSI_FLUSH,
- NULL);
+ &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
case CACHE_TAG_DEVTLB:
case CACHE_TAG_NESTING_DEVTLB:
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
-
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
- 0, MAX_AGAW_PFN_WIDTH, NULL);
+ 0, MAX_AGAW_PFN_WIDTH, &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
+
quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
IOMMU_NO_PASID, info->ats_qdep,
- NULL);
+ &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
}

trace_cache_tag_flush_all(tag);
}
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}

--
2.39.3