dma-mapping-fast.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
  1. /* Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
  2. *
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License version 2 and
  5. * only version 2 as published by the Free Software Foundation.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. */
  12. #include <linux/dma-contiguous.h>
  13. #include <linux/dma-mapping.h>
  14. #include <linux/dma-mapping-fast.h>
  15. #include <linux/io-pgtable-fast.h>
  16. #include <linux/pci.h>
  17. #include <linux/vmalloc.h>
  18. #include <asm/cacheflush.h>
  19. #include <asm/dma-iommu.h>
  20. #include <linux/slab.h>
  21. #include <linux/vmalloc.h>
  22. #include <trace/events/iommu.h>
  23. #include <soc/qcom/secure_buffer.h>
  24. #include <linux/arm-smmu-errata.h>
  25. /* some redundant definitions... :( TODO: move to io-pgtable-fast.h */
  26. #define FAST_PAGE_SHIFT 12
  27. #define FAST_PAGE_SIZE (1UL << FAST_PAGE_SHIFT)
  28. #define FAST_PAGE_MASK (~(PAGE_SIZE - 1))
  29. #define FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000)
  30. #define FAST_MAIR_ATTR_IDX_CACHE 1
  31. #define FAST_PTE_ATTRINDX_SHIFT 2
  32. #define FAST_PTE_ATTRINDX_MASK 0x7
  33. #define FAST_PTE_SH_SHIFT 8
  34. #define FAST_PTE_SH_MASK (((av8l_fast_iopte)0x3) << FAST_PTE_SH_SHIFT)
  35. #define FAST_PTE_SH_OS (((av8l_fast_iopte)2) << FAST_PTE_SH_SHIFT)
  36. #define FAST_PTE_SH_IS (((av8l_fast_iopte)3) << FAST_PTE_SH_SHIFT)
  37. static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
  38. bool coherent)
  39. {
  40. if (attrs & DMA_ATTR_STRONGLY_ORDERED)
  41. return pgprot_noncached(prot);
  42. else if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
  43. return pgprot_writecombine(prot);
  44. return prot;
  45. }
  46. static int __get_iommu_pgprot(unsigned long attrs, int prot,
  47. bool coherent)
  48. {
  49. if (!(attrs & DMA_ATTR_EXEC_MAPPING))
  50. prot |= IOMMU_NOEXEC;
  51. if ((attrs & DMA_ATTR_STRONGLY_ORDERED))
  52. prot |= IOMMU_MMIO;
  53. if (coherent)
  54. prot |= IOMMU_CACHE;
  55. return prot;
  56. }
  57. static void fast_dmac_clean_range(struct dma_fast_smmu_mapping *mapping,
  58. void *start, void *end)
  59. {
  60. if (!mapping->is_smmu_pt_coherent)
  61. dmac_clean_range(start, end);
  62. }
  63. static bool __fast_is_pte_coherent(av8l_fast_iopte *ptep)
  64. {
  65. int attr_idx = (*ptep & (FAST_PTE_ATTRINDX_MASK <<
  66. FAST_PTE_ATTRINDX_SHIFT)) >>
  67. FAST_PTE_ATTRINDX_SHIFT;
  68. if ((attr_idx == FAST_MAIR_ATTR_IDX_CACHE) &&
  69. (((*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_IS) ||
  70. (*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_OS))
  71. return true;
  72. return false;
  73. }
  74. static bool is_dma_coherent(struct device *dev, unsigned long attrs)
  75. {
  76. bool is_coherent;
  77. if (attrs & DMA_ATTR_FORCE_COHERENT)
  78. is_coherent = true;
  79. else if (attrs & DMA_ATTR_FORCE_NON_COHERENT)
  80. is_coherent = false;
  81. else if (is_device_dma_coherent(dev))
  82. is_coherent = true;
  83. else
  84. is_coherent = false;
  85. return is_coherent;
  86. }
  87. /*
  88. * Checks if the allocated range (ending at @end) covered the upcoming
  89. * stale bit. We don't need to know exactly where the range starts since
  90. * we already know where the candidate search range started. If, starting
  91. * from the beginning of the candidate search range, we had to step over
  92. * (or landed directly on top of) the upcoming stale bit, then we return
  93. * true.
  94. *
  95. * Due to wrapping, there are two scenarios we'll need to check: (1) if the
  96. * range [search_start, upcoming_stale] spans 0 (i.e. search_start >
  97. * upcoming_stale), and, (2) if the range: [search_start, upcoming_stale]
  98. * does *not* span 0 (i.e. search_start <= upcoming_stale). And for each
  99. * of those two scenarios we need to handle three cases: (1) the bit was
  100. * found before wrapping or
  101. */
  102. static bool __bit_covered_stale(unsigned long upcoming_stale,
  103. unsigned long search_start,
  104. unsigned long end)
  105. {
  106. if (search_start > upcoming_stale) {
  107. if (end >= search_start) {
  108. /*
  109. * We started searching above upcoming_stale and we
  110. * didn't wrap, so we couldn't have crossed
  111. * upcoming_stale.
  112. */
  113. return false;
  114. }
  115. /*
  116. * We wrapped. Did we cross (or land on top of)
  117. * upcoming_stale?
  118. */
  119. return end >= upcoming_stale;
  120. }
  121. if (search_start <= upcoming_stale) {
  122. if (end >= search_start) {
  123. /*
  124. * We didn't wrap. Did we cross (or land on top
  125. * of) upcoming_stale?
  126. */
  127. return end >= upcoming_stale;
  128. }
  129. /*
  130. * We wrapped. So we must have crossed upcoming_stale
  131. * (since we started searching below it).
  132. */
  133. return true;
  134. }
  135. /* we should have covered all logical combinations... */
  136. WARN_ON(1);
  137. return true;
  138. }
  139. static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
  140. unsigned long attrs,
  141. size_t size)
  142. {
  143. unsigned long bit, prev_search_start, nbits;
  144. unsigned long align;
  145. unsigned long guard_len;
  146. dma_addr_t iova;
  147. if (mapping->min_iova_align)
  148. guard_len = ALIGN(size + mapping->force_guard_page_len,
  149. mapping->min_iova_align) - size;
  150. else
  151. guard_len = 0;
  152. nbits = (size + guard_len) >> FAST_PAGE_SHIFT;
  153. align = (1 << get_order(size + guard_len)) - 1;
  154. bit = bitmap_find_next_zero_area(
  155. mapping->bitmap, mapping->num_4k_pages, mapping->next_start,
  156. nbits, align);
  157. if (unlikely(bit > mapping->num_4k_pages)) {
  158. /* try wrapping */
  159. mapping->next_start = 0; /* TODO: SHOULD I REALLY DO THIS?!? */
  160. bit = bitmap_find_next_zero_area(
  161. mapping->bitmap, mapping->num_4k_pages, 0, nbits,
  162. align);
  163. if (unlikely(bit > mapping->num_4k_pages))
  164. return DMA_ERROR_CODE;
  165. }
  166. bitmap_set(mapping->bitmap, bit, nbits);
  167. prev_search_start = mapping->next_start;
  168. mapping->next_start = bit + nbits;
  169. if (unlikely(mapping->next_start >= mapping->num_4k_pages))
  170. mapping->next_start = 0;
  171. /*
  172. * If we just re-allocated a VA whose TLB hasn't been invalidated
  173. * since it was last used and unmapped, we need to invalidate it
  174. * here. We actually invalidate the entire TLB so that we don't
  175. * have to invalidate the TLB again until we wrap back around.
  176. */
  177. if (mapping->have_stale_tlbs &&
  178. __bit_covered_stale(mapping->upcoming_stale_bit,
  179. prev_search_start,
  180. bit + nbits - 1)) {
  181. bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
  182. iommu_tlbiall(mapping->domain);
  183. mapping->have_stale_tlbs = false;
  184. av8l_fast_clear_stale_ptes(mapping->pgtbl_pmds, skip_sync);
  185. }
  186. iova = (bit << FAST_PAGE_SHIFT) + mapping->base;
  187. if (guard_len &&
  188. iommu_map(mapping->domain, iova + size,
  189. page_to_phys(mapping->guard_page),
  190. guard_len, ARM_SMMU_GUARD_PROT)) {
  191. bitmap_clear(mapping->bitmap, bit, nbits);
  192. return DMA_ERROR_CODE;
  193. }
  194. return iova;
  195. }
  196. /*
  197. * Checks whether the candidate bit will be allocated sooner than the
  198. * current upcoming stale bit. We can say candidate will be upcoming
  199. * sooner than the current upcoming stale bit if it lies between the
  200. * starting bit of the next search range and the upcoming stale bit
  201. * (allowing for wrap-around).
  202. *
  203. * Stated differently, we're checking the relative ordering of three
  204. * unsigned numbers. So we need to check all 6 (i.e. 3!) permutations,
  205. * namely:
  206. *
  207. * 0 |---A---B---C---| TOP (Case 1)
  208. * 0 |---A---C---B---| TOP (Case 2)
  209. * 0 |---B---A---C---| TOP (Case 3)
  210. * 0 |---B---C---A---| TOP (Case 4)
  211. * 0 |---C---A---B---| TOP (Case 5)
  212. * 0 |---C---B---A---| TOP (Case 6)
  213. *
  214. * Note that since we're allowing numbers to wrap, the following three
  215. * scenarios are all equivalent for Case 1:
  216. *
  217. * 0 |---A---B---C---| TOP
  218. * 0 |---C---A---B---| TOP (C has wrapped. This is Case 5.)
  219. * 0 |---B---C---A---| TOP (C and B have wrapped. This is Case 4.)
  220. *
  221. * In any of these cases, if we start searching from A, we will find B
  222. * before we find C.
  223. *
  224. * We can also find two equivalent cases for Case 2:
  225. *
  226. * 0 |---A---C---B---| TOP
  227. * 0 |---B---A---C---| TOP (B has wrapped. This is Case 3.)
  228. * 0 |---C---B---A---| TOP (B and C have wrapped. This is Case 6.)
  229. *
  230. * In any of these cases, if we start searching from A, we will find C
  231. * before we find B.
  232. */
  233. static bool __bit_is_sooner(unsigned long candidate,
  234. struct dma_fast_smmu_mapping *mapping)
  235. {
  236. unsigned long A = mapping->next_start;
  237. unsigned long B = candidate;
  238. unsigned long C = mapping->upcoming_stale_bit;
  239. if ((A < B && B < C) || /* Case 1 */
  240. (C < A && A < B) || /* Case 5 */
  241. (B < C && C < A)) /* Case 4 */
  242. return true;
  243. if ((A < C && C < B) || /* Case 2 */
  244. (B < A && A < C) || /* Case 3 */
  245. (C < B && B < A)) /* Case 6 */
  246. return false;
  247. /*
  248. * For simplicity, we've been ignoring the possibility of any of
  249. * our three numbers being equal. Handle those cases here (they
  250. * shouldn't happen very often, (I think?)).
  251. */
  252. /*
  253. * If candidate is the next bit to be searched then it's definitely
  254. * sooner.
  255. */
  256. if (A == B)
  257. return true;
  258. /*
  259. * If candidate is the next upcoming stale bit we'll return false
  260. * to avoid doing `upcoming = candidate' in the caller (which would
  261. * be useless since they're already equal)
  262. */
  263. if (B == C)
  264. return false;
  265. /*
  266. * If next start is the upcoming stale bit then candidate can't
  267. * possibly be sooner. The "soonest" bit is already selected.
  268. */
  269. if (A == C)
  270. return false;
  271. /* We should have covered all logical combinations. */
  272. WARN(1, "Well, that's awkward. A=%ld, B=%ld, C=%ld\n", A, B, C);
  273. return true;
  274. }
  275. static void __fast_smmu_free_iova(struct dma_fast_smmu_mapping *mapping,
  276. dma_addr_t iova, size_t size)
  277. {
  278. unsigned long start_bit = (iova - mapping->base) >> FAST_PAGE_SHIFT;
  279. unsigned long nbits;
  280. unsigned long guard_len;
  281. if (mapping->min_iova_align)
  282. guard_len = ALIGN(size + mapping->force_guard_page_len,
  283. mapping->min_iova_align) - size;
  284. else
  285. guard_len = 0;
  286. if (guard_len)
  287. iommu_unmap(mapping->domain, iova + size, guard_len);
  288. nbits = (size + guard_len) >> FAST_PAGE_SHIFT;
  289. /*
  290. * We don't invalidate TLBs on unmap. We invalidate TLBs on map
  291. * when we're about to re-allocate a VA that was previously
  292. * unmapped but hasn't yet been invalidated. So we need to keep
  293. * track of which bit is the closest to being re-allocated here.
  294. */
  295. if (__bit_is_sooner(start_bit, mapping))
  296. mapping->upcoming_stale_bit = start_bit;
  297. bitmap_clear(mapping->bitmap, start_bit, nbits);
  298. mapping->have_stale_tlbs = true;
  299. }
  300. static void __fast_dma_page_cpu_to_dev(struct page *page, unsigned long off,
  301. size_t size, enum dma_data_direction dir)
  302. {
  303. __dma_map_area(page_address(page) + off, size, dir);
  304. }
  305. static void __fast_dma_page_dev_to_cpu(struct page *page, unsigned long off,
  306. size_t size, enum dma_data_direction dir)
  307. {
  308. __dma_unmap_area(page_address(page) + off, size, dir);
  309. /* TODO: WHAT IS THIS? */
  310. /*
  311. * Mark the D-cache clean for this page to avoid extra flushing.
  312. */
  313. if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
  314. set_bit(PG_dcache_clean, &page->flags);
  315. }
  316. static int __fast_dma_direction_to_prot(enum dma_data_direction dir)
  317. {
  318. switch (dir) {
  319. case DMA_BIDIRECTIONAL:
  320. return IOMMU_READ | IOMMU_WRITE;
  321. case DMA_TO_DEVICE:
  322. return IOMMU_READ;
  323. case DMA_FROM_DEVICE:
  324. return IOMMU_WRITE;
  325. default:
  326. return 0;
  327. }
  328. }
  329. static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page,
  330. unsigned long offset, size_t size,
  331. enum dma_data_direction dir,
  332. unsigned long attrs)
  333. {
  334. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  335. dma_addr_t iova;
  336. unsigned long flags;
  337. av8l_fast_iopte *pmd;
  338. phys_addr_t phys_plus_off = page_to_phys(page) + offset;
  339. phys_addr_t phys_to_map = round_down(phys_plus_off, FAST_PAGE_SIZE);
  340. unsigned long offset_from_phys_to_map = phys_plus_off & ~FAST_PAGE_MASK;
  341. size_t len = ALIGN(size + offset_from_phys_to_map, FAST_PAGE_SIZE);
  342. int nptes = len >> FAST_PAGE_SHIFT;
  343. bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
  344. int prot = __fast_dma_direction_to_prot(dir);
  345. bool is_coherent = is_dma_coherent(dev, attrs);
  346. prot = __get_iommu_pgprot(attrs, prot, is_coherent);
  347. if (!skip_sync && !is_coherent)
  348. __fast_dma_page_cpu_to_dev(phys_to_page(phys_to_map),
  349. offset_from_phys_to_map, size, dir);
  350. spin_lock_irqsave(&mapping->lock, flags);
  351. iova = __fast_smmu_alloc_iova(mapping, attrs, len);
  352. if (unlikely(iova == DMA_ERROR_CODE))
  353. goto fail;
  354. pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
  355. if (unlikely(av8l_fast_map_public(pmd, phys_to_map, len, prot)))
  356. goto fail_free_iova;
  357. fast_dmac_clean_range(mapping, pmd, pmd + nptes);
  358. spin_unlock_irqrestore(&mapping->lock, flags);
  359. trace_map(mapping->domain, iova, phys_to_map, len, prot);
  360. return iova + offset_from_phys_to_map;
  361. fail_free_iova:
  362. __fast_smmu_free_iova(mapping, iova, size);
  363. fail:
  364. spin_unlock_irqrestore(&mapping->lock, flags);
  365. return DMA_ERROR_CODE;
  366. }
  367. static void fast_smmu_unmap_page(struct device *dev, dma_addr_t iova,
  368. size_t size, enum dma_data_direction dir,
  369. unsigned long attrs)
  370. {
  371. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  372. unsigned long flags;
  373. av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
  374. unsigned long offset = iova & ~FAST_PAGE_MASK;
  375. size_t len = ALIGN(size + offset, FAST_PAGE_SIZE);
  376. int nptes = len >> FAST_PAGE_SHIFT;
  377. struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
  378. bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
  379. bool is_coherent = is_dma_coherent(dev, attrs);
  380. if (!skip_sync && !is_coherent)
  381. __fast_dma_page_dev_to_cpu(page, offset, size, dir);
  382. spin_lock_irqsave(&mapping->lock, flags);
  383. av8l_fast_unmap_public(pmd, len);
  384. fast_dmac_clean_range(mapping, pmd, pmd + nptes);
  385. __fast_smmu_free_iova(mapping, iova - offset, len);
  386. spin_unlock_irqrestore(&mapping->lock, flags);
  387. trace_unmap(mapping->domain, iova - offset, len, len);
  388. }
  389. static void fast_smmu_sync_single_for_cpu(struct device *dev,
  390. dma_addr_t iova, size_t size, enum dma_data_direction dir)
  391. {
  392. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  393. av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
  394. unsigned long offset = iova & ~FAST_PAGE_MASK;
  395. struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
  396. if (!__fast_is_pte_coherent(pmd))
  397. __fast_dma_page_dev_to_cpu(page, offset, size, dir);
  398. }
  399. static void fast_smmu_sync_single_for_device(struct device *dev,
  400. dma_addr_t iova, size_t size, enum dma_data_direction dir)
  401. {
  402. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  403. av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
  404. unsigned long offset = iova & ~FAST_PAGE_MASK;
  405. struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
  406. if (!__fast_is_pte_coherent(pmd))
  407. __fast_dma_page_cpu_to_dev(page, offset, size, dir);
  408. }
  409. static int fast_smmu_map_sg(struct device *dev, struct scatterlist *sg,
  410. int nents, enum dma_data_direction dir,
  411. unsigned long attrs)
  412. {
  413. /* 0 indicates error */
  414. return 0;
  415. }
  416. static void fast_smmu_unmap_sg(struct device *dev,
  417. struct scatterlist *sg, int nents,
  418. enum dma_data_direction dir,
  419. unsigned long attrs)
  420. {
  421. WARN_ON_ONCE(1);
  422. }
  423. static void fast_smmu_sync_sg_for_cpu(struct device *dev,
  424. struct scatterlist *sg, int nents, enum dma_data_direction dir)
  425. {
  426. WARN_ON_ONCE(1);
  427. }
  428. static void fast_smmu_sync_sg_for_device(struct device *dev,
  429. struct scatterlist *sg, int nents, enum dma_data_direction dir)
  430. {
  431. WARN_ON_ONCE(1);
  432. }
  433. static void __fast_smmu_free_pages(struct page **pages, int count)
  434. {
  435. int i;
  436. for (i = 0; i < count; i++)
  437. __free_page(pages[i]);
  438. kvfree(pages);
  439. }
  440. static struct page **__fast_smmu_alloc_pages(unsigned int count, gfp_t gfp)
  441. {
  442. struct page **pages;
  443. unsigned int i = 0, array_size = count * sizeof(*pages);
  444. if (array_size <= PAGE_SIZE)
  445. pages = kzalloc(array_size, GFP_KERNEL);
  446. else
  447. pages = vzalloc(array_size);
  448. if (!pages)
  449. return NULL;
  450. /* IOMMU can map any pages, so himem can also be used here */
  451. gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
  452. for (i = 0; i < count; ++i) {
  453. struct page *page = alloc_page(gfp);
  454. if (!page) {
  455. __fast_smmu_free_pages(pages, i);
  456. return NULL;
  457. }
  458. pages[i] = page;
  459. }
  460. return pages;
  461. }
  462. static void *fast_smmu_alloc(struct device *dev, size_t size,
  463. dma_addr_t *handle, gfp_t gfp,
  464. unsigned long attrs)
  465. {
  466. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  467. struct sg_table sgt;
  468. dma_addr_t dma_addr, iova_iter;
  469. void *addr;
  470. av8l_fast_iopte *ptep;
  471. unsigned long flags;
  472. struct sg_mapping_iter miter;
  473. size_t count = ALIGN(size, SZ_4K) >> PAGE_SHIFT;
  474. int prot = IOMMU_READ | IOMMU_WRITE; /* TODO: extract from attrs */
  475. bool is_coherent = is_dma_coherent(dev, attrs);
  476. pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent);
  477. struct page **pages;
  478. /*
  479. * sg_alloc_table_from_pages accepts unsigned int value for count
  480. * so check count doesn't exceed UINT_MAX.
  481. */
  482. if (count > UINT_MAX) {
  483. dev_err(dev, "count: %zx exceeds UNIT_MAX\n", count);
  484. return NULL;
  485. }
  486. prot = __get_iommu_pgprot(attrs, prot, is_coherent);
  487. *handle = DMA_ERROR_CODE;
  488. pages = __fast_smmu_alloc_pages(count, gfp);
  489. if (!pages) {
  490. dev_err(dev, "no pages\n");
  491. return NULL;
  492. }
  493. size = ALIGN(size, SZ_4K);
  494. if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, gfp)) {
  495. dev_err(dev, "no sg tablen\n");
  496. goto out_free_pages;
  497. }
  498. if (!is_coherent) {
  499. /*
  500. * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
  501. * sufficient here, so skip it by using the "wrong" direction.
  502. */
  503. sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
  504. SG_MITER_FROM_SG);
  505. while (sg_miter_next(&miter))
  506. __dma_flush_area(miter.addr, miter.length);
  507. sg_miter_stop(&miter);
  508. }
  509. spin_lock_irqsave(&mapping->lock, flags);
  510. dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size);
  511. if (dma_addr == DMA_ERROR_CODE) {
  512. dev_err(dev, "no iova\n");
  513. spin_unlock_irqrestore(&mapping->lock, flags);
  514. goto out_free_sg;
  515. }
  516. iova_iter = dma_addr;
  517. sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
  518. SG_MITER_FROM_SG | SG_MITER_ATOMIC);
  519. while (sg_miter_next(&miter)) {
  520. int nptes = miter.length >> FAST_PAGE_SHIFT;
  521. ptep = iopte_pmd_offset(mapping->pgtbl_pmds, iova_iter);
  522. if (unlikely(av8l_fast_map_public(
  523. ptep, page_to_phys(miter.page),
  524. miter.length, prot))) {
  525. dev_err(dev, "no map public\n");
  526. /* TODO: unwind previously successful mappings */
  527. goto out_free_iova;
  528. }
  529. fast_dmac_clean_range(mapping, ptep, ptep + nptes);
  530. iova_iter += miter.length;
  531. }
  532. sg_miter_stop(&miter);
  533. spin_unlock_irqrestore(&mapping->lock, flags);
  534. addr = dma_common_pages_remap(pages, size, VM_USERMAP, remap_prot,
  535. __builtin_return_address(0));
  536. if (!addr) {
  537. dev_err(dev, "no common pages\n");
  538. goto out_unmap;
  539. }
  540. *handle = dma_addr;
  541. sg_free_table(&sgt);
  542. return addr;
  543. out_unmap:
  544. /* need to take the lock again for page tables and iova */
  545. spin_lock_irqsave(&mapping->lock, flags);
  546. ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_addr);
  547. av8l_fast_unmap_public(ptep, size);
  548. fast_dmac_clean_range(mapping, ptep, ptep + count);
  549. out_free_iova:
  550. __fast_smmu_free_iova(mapping, dma_addr, size);
  551. spin_unlock_irqrestore(&mapping->lock, flags);
  552. out_free_sg:
  553. sg_free_table(&sgt);
  554. out_free_pages:
  555. __fast_smmu_free_pages(pages, count);
  556. return NULL;
  557. }
  558. static void fast_smmu_free(struct device *dev, size_t size,
  559. void *vaddr, dma_addr_t dma_handle,
  560. unsigned long attrs)
  561. {
  562. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  563. struct vm_struct *area;
  564. struct page **pages;
  565. size_t count = ALIGN(size, SZ_4K) >> FAST_PAGE_SHIFT;
  566. av8l_fast_iopte *ptep;
  567. unsigned long flags;
  568. size = ALIGN(size, SZ_4K);
  569. area = find_vm_area(vaddr);
  570. if (WARN_ON_ONCE(!area))
  571. return;
  572. pages = area->pages;
  573. dma_common_free_remap(vaddr, size, VM_USERMAP, false);
  574. ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_handle);
  575. spin_lock_irqsave(&mapping->lock, flags);
  576. av8l_fast_unmap_public(ptep, size);
  577. fast_dmac_clean_range(mapping, ptep, ptep + count);
  578. __fast_smmu_free_iova(mapping, dma_handle, size);
  579. spin_unlock_irqrestore(&mapping->lock, flags);
  580. __fast_smmu_free_pages(pages, count);
  581. }
  582. static int fast_smmu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
  583. void *cpu_addr, dma_addr_t dma_addr,
  584. size_t size, unsigned long attrs)
  585. {
  586. struct vm_struct *area;
  587. unsigned long uaddr = vma->vm_start;
  588. struct page **pages;
  589. int i, nr_pages, ret = 0;
  590. bool coherent = is_dma_coherent(dev, attrs);
  591. vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
  592. coherent);
  593. area = find_vm_area(cpu_addr);
  594. if (!area)
  595. return -EINVAL;
  596. pages = area->pages;
  597. nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
  598. for (i = vma->vm_pgoff; i < nr_pages && uaddr < vma->vm_end; i++) {
  599. ret = vm_insert_page(vma, uaddr, pages[i]);
  600. if (ret)
  601. break;
  602. uaddr += PAGE_SIZE;
  603. }
  604. return ret;
  605. }
  606. static int fast_smmu_get_sgtable(struct device *dev, struct sg_table *sgt,
  607. void *cpu_addr, dma_addr_t dma_addr,
  608. size_t size, unsigned long attrs)
  609. {
  610. unsigned int n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
  611. struct vm_struct *area;
  612. area = find_vm_area(cpu_addr);
  613. if (!area || !area->pages)
  614. return -EINVAL;
  615. return sg_alloc_table_from_pages(sgt, area->pages, n_pages, 0, size,
  616. GFP_KERNEL);
  617. }
  618. static dma_addr_t fast_smmu_dma_map_resource(
  619. struct device *dev, phys_addr_t phys_addr,
  620. size_t size, enum dma_data_direction dir,
  621. unsigned long attrs)
  622. {
  623. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  624. size_t offset = phys_addr & ~FAST_PAGE_MASK;
  625. size_t len = round_up(size + offset, FAST_PAGE_SIZE);
  626. dma_addr_t dma_addr;
  627. int prot;
  628. unsigned long flags;
  629. spin_lock_irqsave(&mapping->lock, flags);
  630. dma_addr = __fast_smmu_alloc_iova(mapping, attrs, len);
  631. spin_unlock_irqrestore(&mapping->lock, flags);
  632. if (dma_addr == DMA_ERROR_CODE)
  633. return dma_addr;
  634. prot = __fast_dma_direction_to_prot(dir);
  635. prot |= IOMMU_MMIO;
  636. if (iommu_map(mapping->domain, dma_addr, phys_addr - offset,
  637. len, prot)) {
  638. spin_lock_irqsave(&mapping->lock, flags);
  639. __fast_smmu_free_iova(mapping, dma_addr, len);
  640. spin_unlock_irqrestore(&mapping->lock, flags);
  641. return DMA_ERROR_CODE;
  642. }
  643. return dma_addr + offset;
  644. }
  645. static void fast_smmu_dma_unmap_resource(
  646. struct device *dev, dma_addr_t addr,
  647. size_t size, enum dma_data_direction dir,
  648. unsigned long attrs)
  649. {
  650. struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
  651. size_t offset = addr & ~FAST_PAGE_MASK;
  652. size_t len = round_up(size + offset, FAST_PAGE_SIZE);
  653. unsigned long flags;
  654. iommu_unmap(mapping->domain, addr - offset, len);
  655. spin_lock_irqsave(&mapping->lock, flags);
  656. __fast_smmu_free_iova(mapping, addr - offset, len);
  657. spin_unlock_irqrestore(&mapping->lock, flags);
  658. }
  659. static int fast_smmu_mapping_error(struct device *dev,
  660. dma_addr_t dma_addr)
  661. {
  662. return dma_addr == DMA_ERROR_CODE;
  663. }
  664. static void __fast_smmu_mapped_over_stale(struct dma_fast_smmu_mapping *fast,
  665. void *data)
  666. {
  667. av8l_fast_iopte *ptep = data;
  668. dma_addr_t iova;
  669. unsigned long bitmap_idx;
  670. bitmap_idx = (unsigned long)(ptep - fast->pgtbl_pmds);
  671. iova = bitmap_idx << FAST_PAGE_SHIFT;
  672. dev_err(fast->dev, "Mapped over stale tlb at %pa\n", &iova);
  673. dev_err(fast->dev, "bitmap (failure at idx %lu):\n", bitmap_idx);
  674. dev_err(fast->dev, "ptep: %p pmds: %p diff: %lu\n", ptep,
  675. fast->pgtbl_pmds, bitmap_idx);
  676. print_hex_dump(KERN_ERR, "bmap: ", DUMP_PREFIX_ADDRESS,
  677. 32, 8, fast->bitmap, fast->bitmap_size, false);
  678. }
  679. static int fast_smmu_notify(struct notifier_block *self,
  680. unsigned long action, void *data)
  681. {
  682. struct dma_fast_smmu_mapping *fast = container_of(
  683. self, struct dma_fast_smmu_mapping, notifier);
  684. switch (action) {
  685. case MAPPED_OVER_STALE_TLB:
  686. __fast_smmu_mapped_over_stale(fast, data);
  687. return NOTIFY_OK;
  688. default:
  689. WARN(1, "Unhandled notifier action");
  690. return NOTIFY_DONE;
  691. }
  692. }
  693. static const struct dma_map_ops fast_smmu_dma_ops = {
  694. .alloc = fast_smmu_alloc,
  695. .free = fast_smmu_free,
  696. .mmap = fast_smmu_mmap_attrs,
  697. .get_sgtable = fast_smmu_get_sgtable,
  698. .map_page = fast_smmu_map_page,
  699. .unmap_page = fast_smmu_unmap_page,
  700. .sync_single_for_cpu = fast_smmu_sync_single_for_cpu,
  701. .sync_single_for_device = fast_smmu_sync_single_for_device,
  702. .map_sg = fast_smmu_map_sg,
  703. .unmap_sg = fast_smmu_unmap_sg,
  704. .sync_sg_for_cpu = fast_smmu_sync_sg_for_cpu,
  705. .sync_sg_for_device = fast_smmu_sync_sg_for_device,
  706. .map_resource = fast_smmu_dma_map_resource,
  707. .unmap_resource = fast_smmu_dma_unmap_resource,
  708. .mapping_error = fast_smmu_mapping_error,
  709. };
  710. /**
  711. * __fast_smmu_create_mapping_sized
  712. * @base: bottom of the VA range
  713. * @size: size of the VA range in bytes
  714. *
  715. * Creates a mapping structure which holds information about used/unused IO
  716. * address ranges, which is required to perform mapping with IOMMU aware
  717. * functions. The only VA range supported is [0, 4GB).
  718. *
  719. * The client device need to be attached to the mapping with
  720. * fast_smmu_attach_device function.
  721. */
  722. static struct dma_fast_smmu_mapping *__fast_smmu_create_mapping_sized(
  723. dma_addr_t base, u64 size)
  724. {
  725. struct dma_fast_smmu_mapping *fast;
  726. fast = kzalloc(sizeof(struct dma_fast_smmu_mapping), GFP_KERNEL);
  727. if (!fast)
  728. goto err;
  729. fast->base = base;
  730. fast->size = size;
  731. fast->num_4k_pages = size >> FAST_PAGE_SHIFT;
  732. fast->bitmap_size = BITS_TO_LONGS(fast->num_4k_pages) * sizeof(long);
  733. fast->bitmap = kzalloc(fast->bitmap_size, GFP_KERNEL | __GFP_NOWARN |
  734. __GFP_NORETRY);
  735. if (!fast->bitmap)
  736. fast->bitmap = vzalloc(fast->bitmap_size);
  737. if (!fast->bitmap)
  738. goto err2;
  739. spin_lock_init(&fast->lock);
  740. return fast;
  741. err2:
  742. kfree(fast);
  743. err:
  744. return ERR_PTR(-ENOMEM);
  745. }
  746. /*
  747. * Based off of similar code from dma-iommu.c, but modified to use a different
  748. * iova allocator
  749. */
  750. static void fast_smmu_reserve_pci_windows(struct device *dev,
  751. struct dma_fast_smmu_mapping *mapping)
  752. {
  753. struct pci_host_bridge *bridge;
  754. struct resource_entry *window;
  755. phys_addr_t start, end;
  756. struct pci_dev *pci_dev;
  757. unsigned long flags;
  758. if (!dev_is_pci(dev))
  759. return;
  760. pci_dev = to_pci_dev(dev);
  761. bridge = pci_find_host_bridge(pci_dev->bus);
  762. spin_lock_irqsave(&mapping->lock, flags);
  763. resource_list_for_each_entry(window, &bridge->windows) {
  764. if (resource_type(window->res) != IORESOURCE_MEM &&
  765. resource_type(window->res) != IORESOURCE_IO)
  766. continue;
  767. start = round_down(window->res->start - window->offset,
  768. FAST_PAGE_SIZE);
  769. end = round_up(window->res->end - window->offset,
  770. FAST_PAGE_SIZE);
  771. start = max_t(unsigned long, mapping->base, start);
  772. end = min_t(unsigned long, mapping->base + mapping->size, end);
  773. if (start >= end)
  774. continue;
  775. dev_dbg(dev, "iova allocator reserved 0x%pa-0x%pa\n",
  776. &start, &end);
  777. start = (start - mapping->base) >> FAST_PAGE_SHIFT;
  778. end = (end - mapping->base) >> FAST_PAGE_SHIFT;
  779. bitmap_set(mapping->bitmap, start, end - start);
  780. }
  781. spin_unlock_irqrestore(&mapping->lock, flags);
  782. }
  783. static int fast_smmu_errata_init(struct dma_iommu_mapping *mapping)
  784. {
  785. struct dma_fast_smmu_mapping *fast = mapping->fast;
  786. int vmid = VMID_HLOS;
  787. int min_iova_align = 0;
  788. int force_iova_guard_page = 0;
  789. iommu_domain_get_attr(mapping->domain,
  790. DOMAIN_ATTR_MMU500_ERRATA_MIN_ALIGN,
  791. &min_iova_align);
  792. iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_SECURE_VMID, &vmid);
  793. iommu_domain_get_attr(mapping->domain,
  794. DOMAIN_ATTR_FORCE_IOVA_GUARD_PAGE,
  795. &force_iova_guard_page);
  796. if (vmid >= VMID_LAST || vmid < 0)
  797. vmid = VMID_HLOS;
  798. fast->min_iova_align = (min_iova_align) ? ARM_SMMU_MIN_IOVA_ALIGN :
  799. PAGE_SIZE;
  800. if (force_iova_guard_page)
  801. fast->force_guard_page_len = PAGE_SIZE;
  802. fast->guard_page =
  803. arm_smmu_errata_get_guard_page(vmid);
  804. if (!fast->guard_page)
  805. return -ENOMEM;
  806. return 0;
  807. }
  808. /**
  809. * fast_smmu_init_mapping
  810. * @dev: valid struct device pointer
  811. * @mapping: io address space mapping structure (returned from
  812. * arm_iommu_create_mapping)
  813. *
  814. * Called the first time a device is attached to this mapping.
  815. * Not for dma client use.
  816. */
  817. int fast_smmu_init_mapping(struct device *dev,
  818. struct dma_iommu_mapping *mapping)
  819. {
  820. int err = 0;
  821. struct iommu_domain *domain = mapping->domain;
  822. struct iommu_pgtbl_info info;
  823. u64 size = (u64)mapping->bits << PAGE_SHIFT;
  824. if (mapping->base + size > (SZ_1G * 4ULL)) {
  825. dev_err(dev, "Iova end address too large\n");
  826. return -EINVAL;
  827. }
  828. mapping->fast = __fast_smmu_create_mapping_sized(mapping->base, size);
  829. if (IS_ERR(mapping->fast))
  830. return -ENOMEM;
  831. mapping->fast->domain = domain;
  832. mapping->fast->dev = dev;
  833. if (fast_smmu_errata_init(mapping))
  834. goto release_mapping;
  835. fast_smmu_reserve_pci_windows(dev, mapping->fast);
  836. if (iommu_domain_get_attr(domain, DOMAIN_ATTR_PGTBL_INFO,
  837. &info)) {
  838. dev_err(dev, "Couldn't get page table info\n");
  839. err = -EINVAL;
  840. goto release_mapping;
  841. }
  842. mapping->fast->pgtbl_pmds = info.pmds;
  843. if (iommu_domain_get_attr(domain, DOMAIN_ATTR_PAGE_TABLE_IS_COHERENT,
  844. &mapping->fast->is_smmu_pt_coherent)) {
  845. err = -EINVAL;
  846. goto release_mapping;
  847. }
  848. mapping->fast->notifier.notifier_call = fast_smmu_notify;
  849. av8l_register_notify(&mapping->fast->notifier);
  850. mapping->ops = &fast_smmu_dma_ops;
  851. return 0;
  852. release_mapping:
  853. kfree(mapping->fast->bitmap);
  854. kfree(mapping->fast);
  855. return err;
  856. }
  857. /**
  858. * fast_smmu_release_mapping
  859. * @kref: dma_iommu_mapping->kref
  860. *
  861. * Cleans up the given iommu mapping.
  862. */
  863. void fast_smmu_release_mapping(struct kref *kref)
  864. {
  865. struct dma_iommu_mapping *mapping =
  866. container_of(kref, struct dma_iommu_mapping, kref);
  867. kvfree(mapping->fast->bitmap);
  868. kfree(mapping->fast);
  869. iommu_domain_free(mapping->domain);
  870. kfree(mapping);
  871. }