dir.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /*
  2. * (C) 2001 Clemson University and The University of Chicago
  3. *
  4. * See COPYING in top-level directory.
  5. */
  6. #include "protocol.h"
  7. #include "orangefs-kernel.h"
  8. #include "orangefs-bufmap.h"
  9. /*
  10. * decode routine used by kmod to deal with the blob sent from
  11. * userspace for readdirs. The blob contains zero or more of these
  12. * sub-blobs:
  13. * __u32 - represents length of the character string that follows.
  14. * string - between 1 and ORANGEFS_NAME_MAX bytes long.
  15. * padding - (if needed) to cause the __u32 plus the string to be
  16. * eight byte aligned.
  17. * khandle - sizeof(khandle) bytes.
  18. */
  19. static long decode_dirents(char *ptr, size_t size,
  20. struct orangefs_readdir_response_s *readdir)
  21. {
  22. int i;
  23. struct orangefs_readdir_response_s *rd =
  24. (struct orangefs_readdir_response_s *) ptr;
  25. char *buf = ptr;
  26. int khandle_size = sizeof(struct orangefs_khandle);
  27. size_t offset = offsetof(struct orangefs_readdir_response_s,
  28. dirent_array);
  29. /* 8 reflects eight byte alignment */
  30. int smallest_blob = khandle_size + 8;
  31. __u32 len;
  32. int aligned_len;
  33. int sizeof_u32 = sizeof(__u32);
  34. long ret;
  35. gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
  36. /* size is = offset on empty dirs, > offset on non-empty dirs... */
  37. if (size < offset) {
  38. gossip_err("%s: size:%zu: offset:%zu:\n",
  39. __func__,
  40. size,
  41. offset);
  42. ret = -EINVAL;
  43. goto out;
  44. }
  45. if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
  46. gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
  47. __func__,
  48. size,
  49. readdir->orangefs_dirent_outcount);
  50. ret = -EINVAL;
  51. goto out;
  52. }
  53. readdir->token = rd->token;
  54. readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
  55. readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
  56. sizeof(*readdir->dirent_array),
  57. GFP_KERNEL);
  58. if (readdir->dirent_array == NULL) {
  59. gossip_err("%s: kcalloc failed.\n", __func__);
  60. ret = -ENOMEM;
  61. goto out;
  62. }
  63. buf += offset;
  64. size -= offset;
  65. for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
  66. if (size < smallest_blob) {
  67. gossip_err("%s: size:%zu: smallest_blob:%d:\n",
  68. __func__,
  69. size,
  70. smallest_blob);
  71. ret = -EINVAL;
  72. goto free;
  73. }
  74. len = *(__u32 *)buf;
  75. if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
  76. gossip_err("%s: len:%d:\n", __func__, len);
  77. ret = -EINVAL;
  78. goto free;
  79. }
  80. gossip_debug(GOSSIP_DIR_DEBUG,
  81. "%s: size:%zu: len:%d:\n",
  82. __func__,
  83. size,
  84. len);
  85. readdir->dirent_array[i].d_name = buf + sizeof_u32;
  86. readdir->dirent_array[i].d_length = len;
  87. /*
  88. * Calculate "aligned" length of this string and its
  89. * associated __u32 descriptor.
  90. */
  91. aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
  92. gossip_debug(GOSSIP_DIR_DEBUG,
  93. "%s: aligned_len:%d:\n",
  94. __func__,
  95. aligned_len);
  96. /*
  97. * The end of the blob should coincide with the end
  98. * of the last sub-blob.
  99. */
  100. if (size < aligned_len + khandle_size) {
  101. gossip_err("%s: ran off the end of the blob.\n",
  102. __func__);
  103. ret = -EINVAL;
  104. goto free;
  105. }
  106. size -= aligned_len + khandle_size;
  107. buf += aligned_len;
  108. readdir->dirent_array[i].khandle =
  109. *(struct orangefs_khandle *) buf;
  110. buf += khandle_size;
  111. }
  112. ret = buf - ptr;
  113. gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
  114. goto out;
  115. free:
  116. kfree(readdir->dirent_array);
  117. readdir->dirent_array = NULL;
  118. out:
  119. return ret;
  120. }
  121. /*
  122. * Read directory entries from an instance of an open directory.
  123. */
  124. static int orangefs_readdir(struct file *file, struct dir_context *ctx)
  125. {
  126. int ret = 0;
  127. int buffer_index;
  128. /*
  129. * ptoken supports Orangefs' distributed directory logic, added
  130. * in 2.9.2.
  131. */
  132. __u64 *ptoken = file->private_data;
  133. __u64 pos = 0;
  134. ino_t ino = 0;
  135. struct dentry *dentry = file->f_path.dentry;
  136. struct orangefs_kernel_op_s *new_op = NULL;
  137. struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
  138. struct orangefs_readdir_response_s readdir_response;
  139. void *dents_buf;
  140. int i = 0;
  141. int len = 0;
  142. ino_t current_ino = 0;
  143. char *current_entry = NULL;
  144. long bytes_decoded;
  145. gossip_debug(GOSSIP_DIR_DEBUG,
  146. "%s: ctx->pos:%lld, ptoken = %llu\n",
  147. __func__,
  148. lld(ctx->pos),
  149. llu(*ptoken));
  150. pos = (__u64) ctx->pos;
  151. /* are we done? */
  152. if (pos == ORANGEFS_READDIR_END) {
  153. gossip_debug(GOSSIP_DIR_DEBUG,
  154. "Skipping to termination path\n");
  155. return 0;
  156. }
  157. gossip_debug(GOSSIP_DIR_DEBUG,
  158. "orangefs_readdir called on %pd (pos=%llu)\n",
  159. dentry, llu(pos));
  160. memset(&readdir_response, 0, sizeof(readdir_response));
  161. new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
  162. if (!new_op)
  163. return -ENOMEM;
  164. /*
  165. * Only the indices are shared. No memory is actually shared, but the
  166. * mechanism is used.
  167. */
  168. new_op->uses_shared_memory = 1;
  169. new_op->upcall.req.readdir.refn = orangefs_inode->refn;
  170. new_op->upcall.req.readdir.max_dirent_count =
  171. ORANGEFS_MAX_DIRENT_COUNT_READDIR;
  172. gossip_debug(GOSSIP_DIR_DEBUG,
  173. "%s: upcall.req.readdir.refn.khandle: %pU\n",
  174. __func__,
  175. &new_op->upcall.req.readdir.refn.khandle);
  176. new_op->upcall.req.readdir.token = *ptoken;
  177. get_new_buffer_index:
  178. buffer_index = orangefs_readdir_index_get();
  179. if (buffer_index < 0) {
  180. ret = buffer_index;
  181. gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
  182. ret);
  183. goto out_free_op;
  184. }
  185. new_op->upcall.req.readdir.buf_index = buffer_index;
  186. ret = service_operation(new_op,
  187. "orangefs_readdir",
  188. get_interruptible_flag(dentry->d_inode));
  189. gossip_debug(GOSSIP_DIR_DEBUG,
  190. "Readdir downcall status is %d. ret:%d\n",
  191. new_op->downcall.status,
  192. ret);
  193. orangefs_readdir_index_put(buffer_index);
  194. if (ret == -EAGAIN && op_state_purged(new_op)) {
  195. /* Client-core indices are invalid after it restarted. */
  196. gossip_debug(GOSSIP_DIR_DEBUG,
  197. "%s: Getting new buffer_index for retry of readdir..\n",
  198. __func__);
  199. goto get_new_buffer_index;
  200. }
  201. if (ret == -EIO && op_state_purged(new_op)) {
  202. gossip_err("%s: Client is down. Aborting readdir call.\n",
  203. __func__);
  204. goto out_free_op;
  205. }
  206. if (ret < 0 || new_op->downcall.status != 0) {
  207. gossip_debug(GOSSIP_DIR_DEBUG,
  208. "Readdir request failed. Status:%d\n",
  209. new_op->downcall.status);
  210. if (ret >= 0)
  211. ret = new_op->downcall.status;
  212. goto out_free_op;
  213. }
  214. dents_buf = new_op->downcall.trailer_buf;
  215. if (dents_buf == NULL) {
  216. gossip_err("Invalid NULL buffer in readdir response\n");
  217. ret = -ENOMEM;
  218. goto out_free_op;
  219. }
  220. bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
  221. &readdir_response);
  222. if (bytes_decoded < 0) {
  223. ret = bytes_decoded;
  224. gossip_err("Could not decode readdir from buffer %d\n", ret);
  225. goto out_vfree;
  226. }
  227. if (bytes_decoded != new_op->downcall.trailer_size) {
  228. gossip_err("orangefs_readdir: # bytes decoded (%ld) "
  229. "!= trailer size (%ld)\n",
  230. bytes_decoded,
  231. (long)new_op->downcall.trailer_size);
  232. ret = -EINVAL;
  233. goto out_destroy_handle;
  234. }
  235. /*
  236. * orangefs doesn't actually store dot and dot-dot, but
  237. * we need to have them represented.
  238. */
  239. if (pos == 0) {
  240. ino = get_ino_from_khandle(dentry->d_inode);
  241. gossip_debug(GOSSIP_DIR_DEBUG,
  242. "%s: calling dir_emit of \".\" with pos = %llu\n",
  243. __func__,
  244. llu(pos));
  245. ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
  246. pos += 1;
  247. }
  248. if (pos == 1) {
  249. ino = get_parent_ino_from_dentry(dentry);
  250. gossip_debug(GOSSIP_DIR_DEBUG,
  251. "%s: calling dir_emit of \"..\" with pos = %llu\n",
  252. __func__,
  253. llu(pos));
  254. ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
  255. pos += 1;
  256. }
  257. /*
  258. * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
  259. * to prevent "finding" dot and dot-dot on any iteration
  260. * other than the first.
  261. */
  262. if (ctx->pos == ORANGEFS_ITERATE_NEXT)
  263. ctx->pos = 0;
  264. gossip_debug(GOSSIP_DIR_DEBUG,
  265. "%s: dirent_outcount:%d:\n",
  266. __func__,
  267. readdir_response.orangefs_dirent_outcount);
  268. for (i = ctx->pos;
  269. i < readdir_response.orangefs_dirent_outcount;
  270. i++) {
  271. len = readdir_response.dirent_array[i].d_length;
  272. current_entry = readdir_response.dirent_array[i].d_name;
  273. current_ino = orangefs_khandle_to_ino(
  274. &readdir_response.dirent_array[i].khandle);
  275. gossip_debug(GOSSIP_DIR_DEBUG,
  276. "calling dir_emit for %s with len %d"
  277. ", ctx->pos %ld\n",
  278. current_entry,
  279. len,
  280. (unsigned long)ctx->pos);
  281. /*
  282. * type is unknown. We don't return object type
  283. * in the dirent_array. This leaves getdents
  284. * clueless about type.
  285. */
  286. ret =
  287. dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
  288. if (!ret)
  289. break;
  290. ctx->pos++;
  291. gossip_debug(GOSSIP_DIR_DEBUG,
  292. "%s: ctx->pos:%lld\n",
  293. __func__,
  294. lld(ctx->pos));
  295. }
  296. /*
  297. * we ran all the way through the last batch, set up for
  298. * getting another batch...
  299. */
  300. if (ret) {
  301. *ptoken = readdir_response.token;
  302. ctx->pos = ORANGEFS_ITERATE_NEXT;
  303. }
  304. /*
  305. * Did we hit the end of the directory?
  306. */
  307. if (readdir_response.token == ORANGEFS_READDIR_END) {
  308. gossip_debug(GOSSIP_DIR_DEBUG,
  309. "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
  310. ctx->pos = ORANGEFS_READDIR_END;
  311. }
  312. out_destroy_handle:
  313. /* kfree(NULL) is safe */
  314. kfree(readdir_response.dirent_array);
  315. out_vfree:
  316. gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
  317. vfree(dents_buf);
  318. out_free_op:
  319. op_release(new_op);
  320. gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
  321. return ret;
  322. }
  323. static int orangefs_dir_open(struct inode *inode, struct file *file)
  324. {
  325. __u64 *ptoken;
  326. file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
  327. if (!file->private_data)
  328. return -ENOMEM;
  329. ptoken = file->private_data;
  330. *ptoken = ORANGEFS_READDIR_START;
  331. return 0;
  332. }
  333. static int orangefs_dir_release(struct inode *inode, struct file *file)
  334. {
  335. orangefs_flush_inode(inode);
  336. kfree(file->private_data);
  337. return 0;
  338. }
  339. /** ORANGEFS implementation of VFS directory operations */
  340. const struct file_operations orangefs_dir_operations = {
  341. .read = generic_read_dir,
  342. .iterate = orangefs_readdir,
  343. .open = orangefs_dir_open,
  344. .release = orangefs_dir_release,
  345. };