cpu_rmap.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. /*
  2. * cpu_rmap.c: CPU affinity reverse-map support
  3. * Copyright 2011 Solarflare Communications Inc.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published
  7. * by the Free Software Foundation, incorporated herein by reference.
  8. */
  9. #include <linux/cpu_rmap.h>
  10. #include <linux/interrupt.h>
  11. #include <linux/export.h>
  12. /*
  13. * These functions maintain a mapping from CPUs to some ordered set of
  14. * objects with CPU affinities. This can be seen as a reverse-map of
  15. * CPU affinity. However, we do not assume that the object affinities
  16. * cover all CPUs in the system. For those CPUs not directly covered
  17. * by object affinities, we attempt to find a nearest object based on
  18. * CPU topology.
  19. */
  20. /**
  21. * alloc_cpu_rmap - allocate CPU affinity reverse-map
  22. * @size: Number of objects to be mapped
  23. * @flags: Allocation flags e.g. %GFP_KERNEL
  24. */
  25. struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
  26. {
  27. struct cpu_rmap *rmap;
  28. unsigned int cpu;
  29. size_t obj_offset;
  30. /* This is a silly number of objects, and we use u16 indices. */
  31. if (size > 0xffff)
  32. return NULL;
  33. /* Offset of object pointer array from base structure */
  34. obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
  35. sizeof(void *));
  36. rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
  37. if (!rmap)
  38. return NULL;
  39. kref_init(&rmap->refcount);
  40. rmap->obj = (void **)((char *)rmap + obj_offset);
  41. /* Initially assign CPUs to objects on a rota, since we have
  42. * no idea where the objects are. Use infinite distance, so
  43. * any object with known distance is preferable. Include the
  44. * CPUs that are not present/online, since we definitely want
  45. * any newly-hotplugged CPUs to have some object assigned.
  46. */
  47. for_each_possible_cpu(cpu) {
  48. rmap->near[cpu].index = cpu % size;
  49. rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
  50. }
  51. rmap->size = size;
  52. return rmap;
  53. }
  54. EXPORT_SYMBOL(alloc_cpu_rmap);
  55. /**
  56. * cpu_rmap_release - internal reclaiming helper called from kref_put
  57. * @ref: kref to struct cpu_rmap
  58. */
  59. static void cpu_rmap_release(struct kref *ref)
  60. {
  61. struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
  62. kfree(rmap);
  63. }
  64. /**
  65. * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
  66. * @rmap: reverse-map allocated with alloc_cpu_rmap()
  67. */
  68. static inline void cpu_rmap_get(struct cpu_rmap *rmap)
  69. {
  70. kref_get(&rmap->refcount);
  71. }
  72. /**
  73. * cpu_rmap_put - release ref on a cpu_rmap
  74. * @rmap: reverse-map allocated with alloc_cpu_rmap()
  75. */
  76. int cpu_rmap_put(struct cpu_rmap *rmap)
  77. {
  78. return kref_put(&rmap->refcount, cpu_rmap_release);
  79. }
  80. EXPORT_SYMBOL(cpu_rmap_put);
  81. /* Reevaluate nearest object for given CPU, comparing with the given
  82. * neighbours at the given distance.
  83. */
  84. static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
  85. const struct cpumask *mask, u16 dist)
  86. {
  87. int neigh;
  88. for_each_cpu(neigh, mask) {
  89. if (rmap->near[cpu].dist > dist &&
  90. rmap->near[neigh].dist <= dist) {
  91. rmap->near[cpu].index = rmap->near[neigh].index;
  92. rmap->near[cpu].dist = dist;
  93. return true;
  94. }
  95. }
  96. return false;
  97. }
  98. #ifdef DEBUG
  99. static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
  100. {
  101. unsigned index;
  102. unsigned int cpu;
  103. pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
  104. for_each_possible_cpu(cpu) {
  105. index = rmap->near[cpu].index;
  106. pr_info("cpu %d -> obj %u (distance %u)\n",
  107. cpu, index, rmap->near[cpu].dist);
  108. }
  109. }
  110. #else
  111. static inline void
  112. debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
  113. {
  114. }
  115. #endif
  116. /**
  117. * cpu_rmap_add - add object to a rmap
  118. * @rmap: CPU rmap allocated with alloc_cpu_rmap()
  119. * @obj: Object to add to rmap
  120. *
  121. * Return index of object.
  122. */
  123. int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
  124. {
  125. u16 index;
  126. BUG_ON(rmap->used >= rmap->size);
  127. index = rmap->used++;
  128. rmap->obj[index] = obj;
  129. return index;
  130. }
  131. EXPORT_SYMBOL(cpu_rmap_add);
  132. /**
  133. * cpu_rmap_update - update CPU rmap following a change of object affinity
  134. * @rmap: CPU rmap to update
  135. * @index: Index of object whose affinity changed
  136. * @affinity: New CPU affinity of object
  137. */
  138. int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
  139. const struct cpumask *affinity)
  140. {
  141. cpumask_var_t update_mask;
  142. unsigned int cpu;
  143. if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
  144. return -ENOMEM;
  145. /* Invalidate distance for all CPUs for which this used to be
  146. * the nearest object. Mark those CPUs for update.
  147. */
  148. for_each_online_cpu(cpu) {
  149. if (rmap->near[cpu].index == index) {
  150. rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
  151. cpumask_set_cpu(cpu, update_mask);
  152. }
  153. }
  154. debug_print_rmap(rmap, "after invalidating old distances");
  155. /* Set distance to 0 for all CPUs in the new affinity mask.
  156. * Mark all CPUs within their NUMA nodes for update.
  157. */
  158. for_each_cpu(cpu, affinity) {
  159. rmap->near[cpu].index = index;
  160. rmap->near[cpu].dist = 0;
  161. cpumask_or(update_mask, update_mask,
  162. cpumask_of_node(cpu_to_node(cpu)));
  163. }
  164. debug_print_rmap(rmap, "after updating neighbours");
  165. /* Update distances based on topology */
  166. for_each_cpu(cpu, update_mask) {
  167. if (cpu_rmap_copy_neigh(rmap, cpu,
  168. topology_sibling_cpumask(cpu), 1))
  169. continue;
  170. if (cpu_rmap_copy_neigh(rmap, cpu,
  171. topology_core_cpumask(cpu), 2))
  172. continue;
  173. if (cpu_rmap_copy_neigh(rmap, cpu,
  174. cpumask_of_node(cpu_to_node(cpu)), 3))
  175. continue;
  176. /* We could continue into NUMA node distances, but for now
  177. * we give up.
  178. */
  179. }
  180. debug_print_rmap(rmap, "after copying neighbours");
  181. free_cpumask_var(update_mask);
  182. return 0;
  183. }
  184. EXPORT_SYMBOL(cpu_rmap_update);
  185. /* Glue between IRQ affinity notifiers and CPU rmaps */
  186. struct irq_glue {
  187. struct irq_affinity_notify notify;
  188. struct cpu_rmap *rmap;
  189. u16 index;
  190. };
  191. /**
  192. * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
  193. * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
  194. *
  195. * Must be called in process context, before freeing the IRQs.
  196. */
  197. void free_irq_cpu_rmap(struct cpu_rmap *rmap)
  198. {
  199. struct irq_glue *glue;
  200. u16 index;
  201. if (!rmap)
  202. return;
  203. for (index = 0; index < rmap->used; index++) {
  204. glue = rmap->obj[index];
  205. irq_set_affinity_notifier(glue->notify.irq, NULL);
  206. }
  207. cpu_rmap_put(rmap);
  208. }
  209. EXPORT_SYMBOL(free_irq_cpu_rmap);
  210. /**
  211. * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
  212. * @notify: struct irq_affinity_notify passed by irq/manage.c
  213. * @mask: cpu mask for new SMP affinity
  214. *
  215. * This is executed in workqueue context.
  216. */
  217. static void
  218. irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
  219. {
  220. struct irq_glue *glue =
  221. container_of(notify, struct irq_glue, notify);
  222. int rc;
  223. rc = cpu_rmap_update(glue->rmap, glue->index, mask);
  224. if (rc)
  225. pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
  226. }
  227. /**
  228. * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
  229. * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
  230. */
  231. static void irq_cpu_rmap_release(struct kref *ref)
  232. {
  233. struct irq_glue *glue =
  234. container_of(ref, struct irq_glue, notify.kref);
  235. cpu_rmap_put(glue->rmap);
  236. kfree(glue);
  237. }
  238. /**
  239. * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
  240. * @rmap: The reverse-map
  241. * @irq: The IRQ number
  242. *
  243. * This adds an IRQ affinity notifier that will update the reverse-map
  244. * automatically.
  245. *
  246. * Must be called in process context, after the IRQ is allocated but
  247. * before it is bound with request_irq().
  248. */
  249. int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
  250. {
  251. struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
  252. int rc;
  253. if (!glue)
  254. return -ENOMEM;
  255. glue->notify.notify = irq_cpu_rmap_notify;
  256. glue->notify.release = irq_cpu_rmap_release;
  257. glue->rmap = rmap;
  258. cpu_rmap_get(rmap);
  259. glue->index = cpu_rmap_add(rmap, glue);
  260. rc = irq_set_affinity_notifier(irq, &glue->notify);
  261. if (rc) {
  262. cpu_rmap_put(glue->rmap);
  263. kfree(glue);
  264. }
  265. return rc;
  266. }
  267. EXPORT_SYMBOL(irq_cpu_rmap_add);