[PATCH v3 2/2] drm/msm: Extend gpu devcore dumps with pgtbl info

From: Rob Clark
Date: Wed May 22 2024 - 17:50:57 EST


From: Rob Clark <robdclark@xxxxxxxxxxxx>

In the case of iova fault triggered devcore dumps, include additional
debug information based on what we think is the current page tables,
including the TTBR0 value (which should match what we have in
adreno_smmu_fault_info unless things have gone horribly wrong), and
the pagetable entries traversed in the process of resolving the
faulting iova.

Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx>
---
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++++++++
drivers/gpu/drm/msm/msm_gpu.c | 22 ++++++++++++++++++++++
drivers/gpu/drm/msm/msm_gpu.h | 8 ++++++++
drivers/gpu/drm/msm/msm_iommu.c | 18 ++++++++++++++++++
drivers/gpu/drm/msm/msm_mmu.h | 5 ++++-
5 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index a00241e3373b..3b4c75df0a5f 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -861,6 +861,16 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
drm_printf(p, " - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ");
drm_printf(p, " - type=%s\n", info->type);
drm_printf(p, " - source=%s\n", info->block);
+
+ /* Information extracted from what we think are the current
+ * pgtables. Hopefully the TTBR0 matches what we've extracted
+ * from the SMMU registers in smmu_info!
+ */
+ drm_puts(p, "pgtable-fault-info:\n");
+ drm_printf(p, " - ttbr0: %.16llx\n", (u64)info->pgtbl_ttbr0);
+ drm_printf(p, " - asid: %d\n", info->asid);
+ drm_printf(p, " - ptes: %.16llx %.16llx %.16llx %.16llx\n",
+ info->ptes[0], info->ptes[1], info->ptes[2], info->ptes[3]);
}

drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 43cde0590250..647bddc897f2 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -256,6 +256,18 @@ static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state,
state->nr_bos++;
}

+static int pgtable_walk_cb(void *cb_data, void *pte, int level)
+{
+ struct msm_gpu_fault_info *info = cb_data;
+
+ if (level > ARRAY_SIZE(info->ptes))
+ return -EINVAL;
+
+ info->ptes[level] = *(u64 *)pte;
+
+ return 0;
+}
+
static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
struct msm_gem_submit *submit, char *comm, char *cmd)
{
@@ -281,6 +293,16 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
if (submit) {
int i;

+ if (state->fault_info.ttbr0) {
+ struct msm_gpu_fault_info *info = &state->fault_info;
+ struct msm_mmu *mmu = submit->aspace->mmu;
+
+ msm_iommu_pagetable_params(mmu, &info->pgtbl_ttbr0,
+ &info->asid);
+ msm_iommu_pagetable_walk(mmu, info->iova,
+ pgtable_walk_cb, info);
+ }
+
state->bos = kcalloc(submit->nr_bos,
sizeof(struct msm_gpu_state_bo), GFP_KERNEL);

diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 04a696ac4626..82fbb626461a 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -101,6 +101,14 @@ struct msm_gpu_fault_info {
int flags;
const char *type;
const char *block;
+
+ /* Information about what we think/expect is the current SMMU state,
+ * for example expected_ttbr0 should match smmu_info.ttbr0 which
+ * was read back from SMMU registers.
+ */
+ phys_addr_t pgtbl_ttbr0;
+ u64 ptes[4];
+ int asid;
};

/**
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index d5512037c38b..f46ed4667475 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -195,6 +195,24 @@ struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu)
return &iommu->domain->geometry;
}

+int msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova,
+ int (*cb)(void *cb_data, void *pte, int level),
+ void *cb_data)
+{
+ struct msm_iommu_pagetable *pagetable;
+
+ if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
+ return -EINVAL;
+
+ pagetable = to_pagetable(mmu);
+
+ if (!pagetable->pgtbl_ops->pgtable_walk)
+ return -EINVAL;
+
+ return pagetable->pgtbl_ops->pgtable_walk(pagetable->pgtbl_ops, iova,
+ cb, cb_data);
+}
+
static const struct msm_mmu_funcs pagetable_funcs = {
.map = msm_iommu_pagetable_map,
.unmap = msm_iommu_pagetable_unmap,
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index 88af4f490881..46b2550b9b7a 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -53,7 +53,10 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);

int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
- int *asid);
+ int *asid);
+int msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova,
+ int (*cb)(void *cb_data, void *pte, int level),
+ void *cb_data);
struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu);

#endif /* __MSM_MMU_H__ */
--
2.45.1