blk-mq-tag.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /*
  2. * Tag allocation using scalable bitmaps. Uses active queue tracking to support
  3. * fairer distribution of tags between multiple submitters when a shared tag map
  4. * is used.
  5. *
  6. * Copyright (C) 2013-2014 Jens Axboe
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/module.h>
  10. #include <linux/blk-mq.h>
  11. #include "blk.h"
  12. #include "blk-mq.h"
  13. #include "blk-mq-tag.h"
  14. bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
  15. {
  16. if (!tags)
  17. return true;
  18. return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
  19. }
  20. /*
  21. * If a previously inactive queue goes active, bump the active user count.
  22. */
  23. bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
  24. {
  25. if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
  26. !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  27. atomic_inc(&hctx->tags->active_queues);
  28. return true;
  29. }
  30. /*
  31. * Wakeup all potentially sleeping on tags
  32. */
  33. void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
  34. {
  35. sbitmap_queue_wake_all(&tags->bitmap_tags);
  36. if (include_reserve)
  37. sbitmap_queue_wake_all(&tags->breserved_tags);
  38. }
  39. /*
  40. * If a previously busy queue goes inactive, potential waiters could now
  41. * be allowed to queue. Wake them up and check.
  42. */
  43. void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
  44. {
  45. struct blk_mq_tags *tags = hctx->tags;
  46. if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  47. return;
  48. atomic_dec(&tags->active_queues);
  49. blk_mq_tag_wakeup_all(tags, false);
  50. }
  51. /*
  52. * For shared tag users, we track the number of currently active users
  53. * and attempt to provide a fair share of the tag depth for each of them.
  54. */
  55. static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
  56. struct sbitmap_queue *bt)
  57. {
  58. unsigned int depth, users;
  59. if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
  60. return true;
  61. if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  62. return true;
  63. /*
  64. * Don't try dividing an ant
  65. */
  66. if (bt->sb.depth == 1)
  67. return true;
  68. users = atomic_read(&hctx->tags->active_queues);
  69. if (!users)
  70. return true;
  71. /*
  72. * Allow at least some tags
  73. */
  74. depth = max((bt->sb.depth + users - 1) / users, 4U);
  75. return atomic_read(&hctx->nr_active) < depth;
  76. }
  77. static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt)
  78. {
  79. if (!hctx_may_queue(hctx, bt))
  80. return -1;
  81. return __sbitmap_queue_get(bt);
  82. }
  83. static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt,
  84. struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
  85. {
  86. struct sbq_wait_state *ws;
  87. DEFINE_WAIT(wait);
  88. int tag;
  89. tag = __bt_get(hctx, bt);
  90. if (tag != -1)
  91. return tag;
  92. if (data->flags & BLK_MQ_REQ_NOWAIT)
  93. return -1;
  94. ws = bt_wait_ptr(bt, hctx);
  95. do {
  96. prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
  97. tag = __bt_get(hctx, bt);
  98. if (tag != -1)
  99. break;
  100. /*
  101. * We're out of tags on this hardware queue, kick any
  102. * pending IO submits before going to sleep waiting for
  103. * some to complete. Note that hctx can be NULL here for
  104. * reserved tag allocation.
  105. */
  106. if (hctx)
  107. blk_mq_run_hw_queue(hctx, false);
  108. /*
  109. * Retry tag allocation after running the hardware queue,
  110. * as running the queue may also have found completions.
  111. */
  112. tag = __bt_get(hctx, bt);
  113. if (tag != -1)
  114. break;
  115. blk_mq_put_ctx(data->ctx);
  116. io_schedule();
  117. data->ctx = blk_mq_get_ctx(data->q);
  118. data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
  119. if (data->flags & BLK_MQ_REQ_RESERVED) {
  120. bt = &data->hctx->tags->breserved_tags;
  121. } else {
  122. hctx = data->hctx;
  123. bt = &hctx->tags->bitmap_tags;
  124. }
  125. finish_wait(&ws->wait, &wait);
  126. ws = bt_wait_ptr(bt, hctx);
  127. } while (1);
  128. finish_wait(&ws->wait, &wait);
  129. return tag;
  130. }
  131. static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
  132. {
  133. int tag;
  134. tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
  135. data->hctx->tags);
  136. if (tag >= 0)
  137. return tag + data->hctx->tags->nr_reserved_tags;
  138. return BLK_MQ_TAG_FAIL;
  139. }
  140. static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
  141. {
  142. int tag;
  143. if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
  144. WARN_ON_ONCE(1);
  145. return BLK_MQ_TAG_FAIL;
  146. }
  147. tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
  148. data->hctx->tags);
  149. if (tag < 0)
  150. return BLK_MQ_TAG_FAIL;
  151. return tag;
  152. }
  153. unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
  154. {
  155. if (data->flags & BLK_MQ_REQ_RESERVED)
  156. return __blk_mq_get_reserved_tag(data);
  157. return __blk_mq_get_tag(data);
  158. }
  159. void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
  160. unsigned int tag)
  161. {
  162. struct blk_mq_tags *tags = hctx->tags;
  163. if (tag >= tags->nr_reserved_tags) {
  164. const int real_tag = tag - tags->nr_reserved_tags;
  165. BUG_ON(real_tag >= tags->nr_tags);
  166. sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
  167. } else {
  168. BUG_ON(tag >= tags->nr_reserved_tags);
  169. sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
  170. }
  171. }
  172. struct bt_iter_data {
  173. struct blk_mq_hw_ctx *hctx;
  174. busy_iter_fn *fn;
  175. void *data;
  176. bool reserved;
  177. };
  178. static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  179. {
  180. struct bt_iter_data *iter_data = data;
  181. struct blk_mq_hw_ctx *hctx = iter_data->hctx;
  182. struct blk_mq_tags *tags = hctx->tags;
  183. bool reserved = iter_data->reserved;
  184. struct request *rq;
  185. if (!reserved)
  186. bitnr += tags->nr_reserved_tags;
  187. rq = tags->rqs[bitnr];
  188. if (rq->q == hctx->queue)
  189. iter_data->fn(hctx, rq, iter_data->data, reserved);
  190. return true;
  191. }
  192. static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
  193. busy_iter_fn *fn, void *data, bool reserved)
  194. {
  195. struct bt_iter_data iter_data = {
  196. .hctx = hctx,
  197. .fn = fn,
  198. .data = data,
  199. .reserved = reserved,
  200. };
  201. sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
  202. }
  203. struct bt_tags_iter_data {
  204. struct blk_mq_tags *tags;
  205. busy_tag_iter_fn *fn;
  206. void *data;
  207. bool reserved;
  208. };
  209. static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  210. {
  211. struct bt_tags_iter_data *iter_data = data;
  212. struct blk_mq_tags *tags = iter_data->tags;
  213. bool reserved = iter_data->reserved;
  214. struct request *rq;
  215. if (!reserved)
  216. bitnr += tags->nr_reserved_tags;
  217. rq = tags->rqs[bitnr];
  218. iter_data->fn(rq, iter_data->data, reserved);
  219. return true;
  220. }
  221. static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
  222. busy_tag_iter_fn *fn, void *data, bool reserved)
  223. {
  224. struct bt_tags_iter_data iter_data = {
  225. .tags = tags,
  226. .fn = fn,
  227. .data = data,
  228. .reserved = reserved,
  229. };
  230. if (tags->rqs)
  231. sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
  232. }
  233. static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
  234. busy_tag_iter_fn *fn, void *priv)
  235. {
  236. if (tags->nr_reserved_tags)
  237. bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
  238. bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
  239. }
  240. void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
  241. busy_tag_iter_fn *fn, void *priv)
  242. {
  243. int i;
  244. for (i = 0; i < tagset->nr_hw_queues; i++) {
  245. if (tagset->tags && tagset->tags[i])
  246. blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
  247. }
  248. }
  249. EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
  250. int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
  251. {
  252. int i, j, ret = 0;
  253. if (!set->ops->reinit_request)
  254. goto out;
  255. for (i = 0; i < set->nr_hw_queues; i++) {
  256. struct blk_mq_tags *tags = set->tags[i];
  257. if (!tags)
  258. continue;
  259. for (j = 0; j < tags->nr_tags; j++) {
  260. if (!tags->rqs[j])
  261. continue;
  262. ret = set->ops->reinit_request(set->driver_data,
  263. tags->rqs[j]);
  264. if (ret)
  265. goto out;
  266. }
  267. }
  268. out:
  269. return ret;
  270. }
  271. EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
  272. void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
  273. void *priv)
  274. {
  275. struct blk_mq_hw_ctx *hctx;
  276. int i;
  277. queue_for_each_hw_ctx(q, hctx, i) {
  278. struct blk_mq_tags *tags = hctx->tags;
  279. /*
  280. * If not software queues are currently mapped to this
  281. * hardware queue, there's nothing to check
  282. */
  283. if (!blk_mq_hw_queue_mapped(hctx))
  284. continue;
  285. if (tags->nr_reserved_tags)
  286. bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
  287. bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
  288. }
  289. }
  290. static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
  291. {
  292. return bt->sb.depth - sbitmap_weight(&bt->sb);
  293. }
  294. static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
  295. bool round_robin, int node)
  296. {
  297. return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
  298. node);
  299. }
  300. static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
  301. int node, int alloc_policy)
  302. {
  303. unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
  304. bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
  305. if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
  306. goto free_tags;
  307. if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
  308. node))
  309. goto free_bitmap_tags;
  310. return tags;
  311. free_bitmap_tags:
  312. sbitmap_queue_free(&tags->bitmap_tags);
  313. free_tags:
  314. kfree(tags);
  315. return NULL;
  316. }
  317. struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
  318. unsigned int reserved_tags,
  319. int node, int alloc_policy)
  320. {
  321. struct blk_mq_tags *tags;
  322. if (total_tags > BLK_MQ_TAG_MAX) {
  323. pr_err("blk-mq: tag depth too large\n");
  324. return NULL;
  325. }
  326. tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
  327. if (!tags)
  328. return NULL;
  329. tags->nr_tags = total_tags;
  330. tags->nr_reserved_tags = reserved_tags;
  331. return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
  332. }
  333. void blk_mq_free_tags(struct blk_mq_tags *tags)
  334. {
  335. sbitmap_queue_free(&tags->bitmap_tags);
  336. sbitmap_queue_free(&tags->breserved_tags);
  337. kfree(tags);
  338. }
  339. int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
  340. {
  341. tdepth -= tags->nr_reserved_tags;
  342. if (tdepth > tags->nr_tags)
  343. return -EINVAL;
  344. /*
  345. * Don't need (or can't) update reserved tags here, they remain
  346. * static and should never need resizing.
  347. */
  348. sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
  349. blk_mq_tag_wakeup_all(tags, false);
  350. return 0;
  351. }
  352. /**
  353. * blk_mq_unique_tag() - return a tag that is unique queue-wide
  354. * @rq: request for which to compute a unique tag
  355. *
  356. * The tag field in struct request is unique per hardware queue but not over
  357. * all hardware queues. Hence this function that returns a tag with the
  358. * hardware context index in the upper bits and the per hardware queue tag in
  359. * the lower bits.
  360. *
  361. * Note: When called for a request that is queued on a non-multiqueue request
  362. * queue, the hardware context index is set to zero.
  363. */
  364. u32 blk_mq_unique_tag(struct request *rq)
  365. {
  366. struct request_queue *q = rq->q;
  367. struct blk_mq_hw_ctx *hctx;
  368. int hwq = 0;
  369. if (q->mq_ops) {
  370. hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
  371. hwq = hctx->queue_num;
  372. }
  373. return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
  374. (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
  375. }
  376. EXPORT_SYMBOL(blk_mq_unique_tag);
  377. ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
  378. {
  379. char *orig_page = page;
  380. unsigned int free, res;
  381. if (!tags)
  382. return 0;
  383. page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
  384. "bits_per_word=%u\n",
  385. tags->nr_tags, tags->nr_reserved_tags,
  386. 1U << tags->bitmap_tags.sb.shift);
  387. free = bt_unused_tags(&tags->bitmap_tags);
  388. res = bt_unused_tags(&tags->breserved_tags);
  389. page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
  390. page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
  391. return page - orig_page;
  392. }