tsb.c 15 KB


  1. /* arch/sparc64/mm/tsb.c
  2. *
  3. * Copyright (C) 2006, 2008 David S. Miller <[email protected]>
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/preempt.h>
  7. #include <linux/slab.h>
  8. #include <asm/page.h>
  9. #include <asm/pgtable.h>
  10. #include <asm/mmu_context.h>
  11. #include <asm/setup.h>
  12. #include <asm/tsb.h>
  13. #include <asm/tlb.h>
  14. #include <asm/oplib.h>
  15. extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
  16. static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
  17. {
  18. vaddr >>= hash_shift;
  19. return vaddr & (nentries - 1);
  20. }
  21. static inline int tag_compare(unsigned long tag, unsigned long vaddr)
  22. {
  23. return (tag == (vaddr >> 22));
  24. }
  25. static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
  26. {
  27. unsigned long idx;
  28. for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
  29. struct tsb *ent = &swapper_tsb[idx];
  30. unsigned long match = idx << 13;
  31. match |= (ent->tag << 22);
  32. if (match >= start && match < end)
  33. ent->tag = (1UL << TSB_TAG_INVALID_BIT);
  34. }
  35. }
  36. /* TSB flushes need only occur on the processor initiating the address
  37. * space modification, not on each cpu the address space has run on.
  38. * Only the TLB flush needs that treatment.
  39. */
  40. void flush_tsb_kernel_range(unsigned long start, unsigned long end)
  41. {
  42. unsigned long v;
  43. if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
  44. return flush_tsb_kernel_range_scan(start, end);
  45. for (v = start; v < end; v += PAGE_SIZE) {
  46. unsigned long hash = tsb_hash(v, PAGE_SHIFT,
  47. KERNEL_TSB_NENTRIES);
  48. struct tsb *ent = &swapper_tsb[hash];
  49. if (tag_compare(ent->tag, v))
  50. ent->tag = (1UL << TSB_TAG_INVALID_BIT);
  51. }
  52. }
  53. static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v,
  54. unsigned long hash_shift,
  55. unsigned long nentries)
  56. {
  57. unsigned long tag, ent, hash;
  58. v &= ~0x1UL;
  59. hash = tsb_hash(v, hash_shift, nentries);
  60. ent = tsb + (hash * sizeof(struct tsb));
  61. tag = (v >> 22UL);
  62. tsb_flush(ent, tag);
  63. }
  64. static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
  65. unsigned long tsb, unsigned long nentries)
  66. {
  67. unsigned long i;
  68. for (i = 0; i < tb->tlb_nr; i++)
  69. __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries);
  70. }
  71. void flush_tsb_user(struct tlb_batch *tb)
  72. {
  73. struct mm_struct *mm = tb->mm;
  74. unsigned long nentries, base, flags;
  75. spin_lock_irqsave(&mm->context.lock, flags);
  76. if (!tb->huge) {
  77. base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
  78. nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
  79. if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  80. base = __pa(base);
  81. __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
  82. }
  83. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  84. if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
  85. base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
  86. nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
  87. if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  88. base = __pa(base);
  89. __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
  90. }
  91. #endif
  92. spin_unlock_irqrestore(&mm->context.lock, flags);
  93. }
  94. void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge)
  95. {
  96. unsigned long nentries, base, flags;
  97. spin_lock_irqsave(&mm->context.lock, flags);
  98. if (!huge) {
  99. base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
  100. nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
  101. if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  102. base = __pa(base);
  103. __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
  104. }
  105. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  106. if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
  107. base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
  108. nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
  109. if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  110. base = __pa(base);
  111. __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
  112. }
  113. #endif
  114. spin_unlock_irqrestore(&mm->context.lock, flags);
  115. }
  116. #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
  117. #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
  118. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  119. #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
  120. #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
  121. #endif
  122. static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
  123. {
  124. unsigned long tsb_reg, base, tsb_paddr;
  125. unsigned long page_sz, tte;
  126. mm->context.tsb_block[tsb_idx].tsb_nentries =
  127. tsb_bytes / sizeof(struct tsb);
  128. switch (tsb_idx) {
  129. case MM_TSB_BASE:
  130. base = TSBMAP_8K_BASE;
  131. break;
  132. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  133. case MM_TSB_HUGE:
  134. base = TSBMAP_4M_BASE;
  135. break;
  136. #endif
  137. default:
  138. BUG();
  139. }
  140. tte = pgprot_val(PAGE_KERNEL_LOCKED);
  141. tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
  142. BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
  143. /* Use the smallest page size that can map the whole TSB
  144. * in one TLB entry.
  145. */
  146. switch (tsb_bytes) {
  147. case 8192 << 0:
  148. tsb_reg = 0x0UL;
  149. #ifdef DCACHE_ALIASING_POSSIBLE
  150. base += (tsb_paddr & 8192);
  151. #endif
  152. page_sz = 8192;
  153. break;
  154. case 8192 << 1:
  155. tsb_reg = 0x1UL;
  156. page_sz = 64 * 1024;
  157. break;
  158. case 8192 << 2:
  159. tsb_reg = 0x2UL;
  160. page_sz = 64 * 1024;
  161. break;
  162. case 8192 << 3:
  163. tsb_reg = 0x3UL;
  164. page_sz = 64 * 1024;
  165. break;
  166. case 8192 << 4:
  167. tsb_reg = 0x4UL;
  168. page_sz = 512 * 1024;
  169. break;
  170. case 8192 << 5:
  171. tsb_reg = 0x5UL;
  172. page_sz = 512 * 1024;
  173. break;
  174. case 8192 << 6:
  175. tsb_reg = 0x6UL;
  176. page_sz = 512 * 1024;
  177. break;
  178. case 8192 << 7:
  179. tsb_reg = 0x7UL;
  180. page_sz = 4 * 1024 * 1024;
  181. break;
  182. default:
  183. printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
  184. current->comm, current->pid, tsb_bytes);
  185. do_exit(SIGSEGV);
  186. }
  187. tte |= pte_sz_bits(page_sz);
  188. if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
  189. /* Physical mapping, no locked TLB entry for TSB. */
  190. tsb_reg |= tsb_paddr;
  191. mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
  192. mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
  193. mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
  194. } else {
  195. tsb_reg |= base;
  196. tsb_reg |= (tsb_paddr & (page_sz - 1UL));
  197. tte |= (tsb_paddr & ~(page_sz - 1UL));
  198. mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
  199. mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
  200. mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
  201. }
  202. /* Setup the Hypervisor TSB descriptor. */
  203. if (tlb_type == hypervisor) {
  204. struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
  205. switch (tsb_idx) {
  206. case MM_TSB_BASE:
  207. hp->pgsz_idx = HV_PGSZ_IDX_BASE;
  208. break;
  209. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  210. case MM_TSB_HUGE:
  211. hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
  212. break;
  213. #endif
  214. default:
  215. BUG();
  216. }
  217. hp->assoc = 1;
  218. hp->num_ttes = tsb_bytes / 16;
  219. hp->ctx_idx = 0;
  220. switch (tsb_idx) {
  221. case MM_TSB_BASE:
  222. hp->pgsz_mask = HV_PGSZ_MASK_BASE;
  223. break;
  224. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  225. case MM_TSB_HUGE:
  226. hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
  227. break;
  228. #endif
  229. default:
  230. BUG();
  231. }
  232. hp->tsb_base = tsb_paddr;
  233. hp->resv = 0;
  234. }
  235. }
  236. struct kmem_cache *pgtable_cache __read_mostly;
  237. static struct kmem_cache *tsb_caches[8] __read_mostly;
  238. static const char *tsb_cache_names[8] = {
  239. "tsb_8KB",
  240. "tsb_16KB",
  241. "tsb_32KB",
  242. "tsb_64KB",
  243. "tsb_128KB",
  244. "tsb_256KB",
  245. "tsb_512KB",
  246. "tsb_1MB",
  247. };
  248. void __init pgtable_cache_init(void)
  249. {
  250. unsigned long i;
  251. pgtable_cache = kmem_cache_create("pgtable_cache",
  252. PAGE_SIZE, PAGE_SIZE,
  253. 0,
  254. _clear_page);
  255. if (!pgtable_cache) {
  256. prom_printf("pgtable_cache_init(): Could not create!\n");
  257. prom_halt();
  258. }
  259. for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) {
  260. unsigned long size = 8192 << i;
  261. const char *name = tsb_cache_names[i];
  262. tsb_caches[i] = kmem_cache_create(name,
  263. size, size,
  264. 0, NULL);
  265. if (!tsb_caches[i]) {
  266. prom_printf("Could not create %s cache\n", name);
  267. prom_halt();
  268. }
  269. }
  270. }
  271. int sysctl_tsb_ratio = -2;
  272. static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
  273. {
  274. unsigned long num_ents = (new_size / sizeof(struct tsb));
  275. if (sysctl_tsb_ratio < 0)
  276. return num_ents - (num_ents >> -sysctl_tsb_ratio);
  277. else
  278. return num_ents + (num_ents >> sysctl_tsb_ratio);
  279. }
  280. /* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
  281. * do_sparc64_fault() invokes this routine to try and grow it.
  282. *
  283. * When we reach the maximum TSB size supported, we stick ~0UL into
  284. * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
  285. * will not trigger any longer.
  286. *
  287. * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
  288. * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
  289. * must be 512K aligned. It also must be physically contiguous, so we
  290. * cannot use vmalloc().
  291. *
  292. * The idea here is to grow the TSB when the RSS of the process approaches
  293. * the number of entries that the current TSB can hold at once. Currently,
  294. * we trigger when the RSS hits 3/4 of the TSB capacity.
  295. */
  296. void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
  297. {
  298. unsigned long max_tsb_size = 1 * 1024 * 1024;
  299. unsigned long new_size, old_size, flags;
  300. struct tsb *old_tsb, *new_tsb;
  301. unsigned long new_cache_index, old_cache_index;
  302. unsigned long new_rss_limit;
  303. gfp_t gfp_flags;
  304. if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
  305. max_tsb_size = (PAGE_SIZE << MAX_ORDER);
  306. new_cache_index = 0;
  307. for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
  308. new_rss_limit = tsb_size_to_rss_limit(new_size);
  309. if (new_rss_limit > rss)
  310. break;
  311. new_cache_index++;
  312. }
  313. if (new_size == max_tsb_size)
  314. new_rss_limit = ~0UL;
  315. retry_tsb_alloc:
  316. gfp_flags = GFP_KERNEL;
  317. if (new_size > (PAGE_SIZE * 2))
  318. gfp_flags |= __GFP_NOWARN | __GFP_NORETRY;
  319. new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
  320. gfp_flags, numa_node_id());
  321. if (unlikely(!new_tsb)) {
  322. /* Not being able to fork due to a high-order TSB
  323. * allocation failure is very bad behavior. Just back
  324. * down to a 0-order allocation and force no TSB
  325. * growing for this address space.
  326. */
  327. if (mm->context.tsb_block[tsb_index].tsb == NULL &&
  328. new_cache_index > 0) {
  329. new_cache_index = 0;
  330. new_size = 8192;
  331. new_rss_limit = ~0UL;
  332. goto retry_tsb_alloc;
  333. }
  334. /* If we failed on a TSB grow, we are under serious
  335. * memory pressure so don't try to grow any more.
  336. */
  337. if (mm->context.tsb_block[tsb_index].tsb != NULL)
  338. mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
  339. return;
  340. }
  341. /* Mark all tags as invalid. */
  342. tsb_init(new_tsb, new_size);
  343. /* Ok, we are about to commit the changes. If we are
  344. * growing an existing TSB the locking is very tricky,
  345. * so WATCH OUT!
  346. *
  347. * We have to hold mm->context.lock while committing to the
  348. * new TSB, this synchronizes us with processors in
  349. * flush_tsb_user() and switch_mm() for this address space.
  350. *
  351. * But even with that lock held, processors run asynchronously
  352. * accessing the old TSB via TLB miss handling. This is OK
  353. * because those actions are just propagating state from the
  354. * Linux page tables into the TSB, page table mappings are not
  355. * being changed. If a real fault occurs, the processor will
  356. * synchronize with us when it hits flush_tsb_user(), this is
  357. * also true for the case where vmscan is modifying the page
  358. * tables. The only thing we need to be careful with is to
  359. * skip any locked TSB entries during copy_tsb().
  360. *
  361. * When we finish committing to the new TSB, we have to drop
  362. * the lock and ask all other cpus running this address space
  363. * to run tsb_context_switch() to see the new TSB table.
  364. */
  365. spin_lock_irqsave(&mm->context.lock, flags);
  366. old_tsb = mm->context.tsb_block[tsb_index].tsb;
  367. old_cache_index =
  368. (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
  369. old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
  370. sizeof(struct tsb));
  371. /* Handle multiple threads trying to grow the TSB at the same time.
  372. * One will get in here first, and bump the size and the RSS limit.
  373. * The others will get in here next and hit this check.
  374. */
  375. if (unlikely(old_tsb &&
  376. (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
  377. spin_unlock_irqrestore(&mm->context.lock, flags);
  378. kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
  379. return;
  380. }
  381. mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
  382. if (old_tsb) {
  383. extern void copy_tsb(unsigned long old_tsb_base,
  384. unsigned long old_tsb_size,
  385. unsigned long new_tsb_base,
  386. unsigned long new_tsb_size,
  387. unsigned long page_size_shift);
  388. unsigned long old_tsb_base = (unsigned long) old_tsb;
  389. unsigned long new_tsb_base = (unsigned long) new_tsb;
  390. if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
  391. old_tsb_base = __pa(old_tsb_base);
  392. new_tsb_base = __pa(new_tsb_base);
  393. }
  394. copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
  395. tsb_index == MM_TSB_BASE ?
  396. PAGE_SHIFT : REAL_HPAGE_SHIFT);
  397. }
  398. mm->context.tsb_block[tsb_index].tsb = new_tsb;
  399. setup_tsb_params(mm, tsb_index, new_size);
  400. spin_unlock_irqrestore(&mm->context.lock, flags);
  401. /* If old_tsb is NULL, we're being invoked for the first time
  402. * from init_new_context().
  403. */
  404. if (old_tsb) {
  405. /* Reload it on the local cpu. */
  406. tsb_context_switch(mm);
  407. /* Now force other processors to do the same. */
  408. preempt_disable();
  409. smp_tsb_sync(mm);
  410. preempt_enable();
  411. /* Now it is safe to free the old tsb. */
  412. kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
  413. }
  414. }
  415. int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
  416. {
  417. unsigned long mm_rss = get_mm_rss(mm);
  418. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  419. unsigned long saved_hugetlb_pte_count;
  420. unsigned long saved_thp_pte_count;
  421. #endif
  422. unsigned int i;
  423. spin_lock_init(&mm->context.lock);
  424. mm->context.sparc64_ctx_val = 0UL;
  425. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  426. /* We reset them to zero because the fork() page copying
  427. * will re-increment the counters as the parent PTEs are
  428. * copied into the child address space.
  429. */
  430. saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
  431. saved_thp_pte_count = mm->context.thp_pte_count;
  432. mm->context.hugetlb_pte_count = 0;
  433. mm->context.thp_pte_count = 0;
  434. mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
  435. #endif
  436. /* copy_mm() copies over the parent's mm_struct before calling
  437. * us, so we need to zero out the TSB pointer or else tsb_grow()
  438. * will be confused and think there is an older TSB to free up.
  439. */
  440. for (i = 0; i < MM_NUM_TSBS; i++)
  441. mm->context.tsb_block[i].tsb = NULL;
  442. /* If this is fork, inherit the parent's TSB size. We would
  443. * grow it to that size on the first page fault anyways.
  444. */
  445. tsb_grow(mm, MM_TSB_BASE, mm_rss);
  446. #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  447. if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
  448. tsb_grow(mm, MM_TSB_HUGE,
  449. (saved_hugetlb_pte_count + saved_thp_pte_count) *
  450. REAL_HPAGE_PER_HPAGE);
  451. #endif
  452. if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
  453. return -ENOMEM;
  454. return 0;
  455. }
  456. static void tsb_destroy_one(struct tsb_config *tp)
  457. {
  458. unsigned long cache_index;
  459. if (!tp->tsb)
  460. return;
  461. cache_index = tp->tsb_reg_val & 0x7UL;
  462. kmem_cache_free(tsb_caches[cache_index], tp->tsb);
  463. tp->tsb = NULL;
  464. tp->tsb_reg_val = 0UL;
  465. }
  466. void destroy_context(struct mm_struct *mm)
  467. {
  468. unsigned long flags, i;
  469. for (i = 0; i < MM_NUM_TSBS; i++)
  470. tsb_destroy_one(&mm->context.tsb_block[i]);
  471. spin_lock_irqsave(&ctx_alloc_lock, flags);
  472. if (CTX_VALID(mm->context)) {
  473. unsigned long nr = CTX_NRBITS(mm->context);
  474. mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
  475. }
  476. spin_unlock_irqrestore(&ctx_alloc_lock, flags);
  477. }