[PATCH v3 11/12] mm/hmm: add an helper function that fault pages and map them to a device v3
From: jglisse
Date: Wed Apr 03 2019 - 15:33:56 EST
From: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
This is a all in one helper that fault pages in a range and map them to
a device so that every single device driver do not have to re-implement
this common pattern.
This is taken from ODP RDMA in preparation of ODP RDMA convertion. It
will be use by nouveau and other drivers.
Changes since v2:
- Improved function comment for kernel documentation.
Changes since v1:
- improved commit message
Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Ralph Campbell <rcampbell@xxxxxxxxxx>
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Souptick Joarder <jrdr.linux@xxxxxxxxx>
---
include/linux/hmm.h | 9 +++
mm/hmm.c | 152 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 161 insertions(+)
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index a79fcc6681f5..f81fe2c0f343 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -474,6 +474,15 @@ int hmm_range_register(struct hmm_range *range,
void hmm_range_unregister(struct hmm_range *range);
long hmm_range_snapshot(struct hmm_range *range);
long hmm_range_fault(struct hmm_range *range, bool block);
+long hmm_range_dma_map(struct hmm_range *range,
+ struct device *device,
+ dma_addr_t *daddrs,
+ bool block);
+long hmm_range_dma_unmap(struct hmm_range *range,
+ struct vm_area_struct *vma,
+ struct device *device,
+ dma_addr_t *daddrs,
+ bool dirty);
/*
* HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
diff --git a/mm/hmm.c b/mm/hmm.c
index 39bc77d7e6e3..82fded7273d8 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -30,6 +30,7 @@
#include <linux/hugetlb.h>
#include <linux/memremap.h>
#include <linux/jump_label.h>
+#include <linux/dma-mapping.h>
#include <linux/mmu_notifier.h>
#include <linux/memory_hotplug.h>
@@ -1173,6 +1174,157 @@ long hmm_range_fault(struct hmm_range *range, bool block)
return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
}
EXPORT_SYMBOL(hmm_range_fault);
+
+/**
+ * hmm_range_dma_map() - hmm_range_fault() and dma map page all in one.
+ * @range: range being faulted
+ * @device: device against to dma map page to
+ * @daddrs: dma address of mapped pages
+ * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
+ * Returns: number of pages mapped on success, -EAGAIN if mmap_sem have been
+ * drop and you need to try again, some other error value otherwise
+ *
+ * Note same usage pattern as hmm_range_fault().
+ */
+long hmm_range_dma_map(struct hmm_range *range,
+ struct device *device,
+ dma_addr_t *daddrs,
+ bool block)
+{
+ unsigned long i, npages, mapped;
+ long ret;
+
+ ret = hmm_range_fault(range, block);
+ if (ret <= 0)
+ return ret ? ret : -EBUSY;
+
+ npages = (range->end - range->start) >> PAGE_SHIFT;
+ for (i = 0, mapped = 0; i < npages; ++i) {
+ enum dma_data_direction dir = DMA_FROM_DEVICE;
+ struct page *page;
+
+ /*
+ * FIXME need to update DMA API to provide invalid DMA address
+ * value instead of a function to test dma address value. This
+ * would remove lot of dumb code duplicated accross many arch.
+ *
+ * For now setting it to 0 here is good enough as the pfns[]
+ * value is what is use to check what is valid and what isn't.
+ */
+ daddrs[i] = 0;
+
+ page = hmm_pfn_to_page(range, range->pfns[i]);
+ if (page == NULL)
+ continue;
+
+ /* Check if range is being invalidated */
+ if (!range->valid) {
+ ret = -EBUSY;
+ goto unmap;
+ }
+
+ /* If it is read and write than map bi-directional. */
+ if (range->pfns[i] & range->values[HMM_PFN_WRITE])
+ dir = DMA_BIDIRECTIONAL;
+
+ daddrs[i] = dma_map_page(device, page, 0, PAGE_SIZE, dir);
+ if (dma_mapping_error(device, daddrs[i])) {
+ ret = -EFAULT;
+ goto unmap;
+ }
+
+ mapped++;
+ }
+
+ return mapped;
+
+unmap:
+ for (npages = i, i = 0; (i < npages) && mapped; ++i) {
+ enum dma_data_direction dir = DMA_FROM_DEVICE;
+ struct page *page;
+
+ page = hmm_pfn_to_page(range, range->pfns[i]);
+ if (page == NULL)
+ continue;
+
+ if (dma_mapping_error(device, daddrs[i]))
+ continue;
+
+ /* If it is read and write than map bi-directional. */
+ if (range->pfns[i] & range->values[HMM_PFN_WRITE])
+ dir = DMA_BIDIRECTIONAL;
+
+ dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
+ mapped--;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(hmm_range_dma_map);
+
+/**
+ * hmm_range_dma_unmap() - unmap range of that was map with hmm_range_dma_map()
+ * @range: range being unmapped
+ * @vma: the vma against which the range (optional)
+ * @device: device against which dma map was done
+ * @daddrs: dma address of mapped pages
+ * @dirty: dirty page if it had the write flag set
+ * Returns: number of page unmapped on success, -EINVAL otherwise
+ *
+ * Note that caller MUST abide by mmu notifier or use HMM mirror and abide
+ * to the sync_cpu_device_pagetables() callback so that it is safe here to
+ * call set_page_dirty(). Caller must also take appropriate locks to avoid
+ * concurrent mmu notifier or sync_cpu_device_pagetables() to make progress.
+ */
+long hmm_range_dma_unmap(struct hmm_range *range,
+ struct vm_area_struct *vma,
+ struct device *device,
+ dma_addr_t *daddrs,
+ bool dirty)
+{
+ unsigned long i, npages;
+ long cpages = 0;
+
+ /* Sanity check. */
+ if (range->end <= range->start)
+ return -EINVAL;
+ if (!daddrs)
+ return -EINVAL;
+ if (!range->pfns)
+ return -EINVAL;
+
+ npages = (range->end - range->start) >> PAGE_SHIFT;
+ for (i = 0; i < npages; ++i) {
+ enum dma_data_direction dir = DMA_FROM_DEVICE;
+ struct page *page;
+
+ page = hmm_pfn_to_page(range, range->pfns[i]);
+ if (page == NULL)
+ continue;
+
+ /* If it is read and write than map bi-directional. */
+ if (range->pfns[i] & range->values[HMM_PFN_WRITE]) {
+ dir = DMA_BIDIRECTIONAL;
+
+ /*
+ * See comments in function description on why it is
+ * safe here to call set_page_dirty()
+ */
+ if (dirty)
+ set_page_dirty(page);
+ }
+
+ /* Unmap and clear pfns/dma address */
+ dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
+ range->pfns[i] = range->values[HMM_PFN_NONE];
+ /* FIXME see comments in hmm_vma_dma_map() */
+ daddrs[i] = 0;
+ cpages++;
+ }
+
+ return cpages;
+}
+EXPORT_SYMBOL(hmm_range_dma_unmap);
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
--
2.17.2