stree.c 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262
  1. /*
  2. * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  3. */
  4. /*
  5. * Written by Anatoly P. Pinchuk [email protected]
  6. * Programm System Institute
  7. * Pereslavl-Zalessky Russia
  8. */
  9. #include <linux/time.h>
  10. #include <linux/string.h>
  11. #include <linux/pagemap.h>
  12. #include "reiserfs.h"
  13. #include <linux/buffer_head.h>
  14. #include <linux/quotaops.h>
  15. /* Does the buffer contain a disk block which is in the tree. */
  16. inline int B_IS_IN_TREE(const struct buffer_head *bh)
  17. {
  18. RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
  19. "PAP-1010: block (%b) has too big level (%z)", bh, bh);
  20. return (B_LEVEL(bh) != FREE_LEVEL);
  21. }
  22. /* to get item head in le form */
  23. inline void copy_item_head(struct item_head *to,
  24. const struct item_head *from)
  25. {
  26. memcpy(to, from, IH_SIZE);
  27. }
  28. /*
  29. * k1 is pointer to on-disk structure which is stored in little-endian
  30. * form. k2 is pointer to cpu variable. For key of items of the same
  31. * object this returns 0.
  32. * Returns: -1 if key1 < key2
  33. * 0 if key1 == key2
  34. * 1 if key1 > key2
  35. */
  36. inline int comp_short_keys(const struct reiserfs_key *le_key,
  37. const struct cpu_key *cpu_key)
  38. {
  39. __u32 n;
  40. n = le32_to_cpu(le_key->k_dir_id);
  41. if (n < cpu_key->on_disk_key.k_dir_id)
  42. return -1;
  43. if (n > cpu_key->on_disk_key.k_dir_id)
  44. return 1;
  45. n = le32_to_cpu(le_key->k_objectid);
  46. if (n < cpu_key->on_disk_key.k_objectid)
  47. return -1;
  48. if (n > cpu_key->on_disk_key.k_objectid)
  49. return 1;
  50. return 0;
  51. }
  52. /*
  53. * k1 is pointer to on-disk structure which is stored in little-endian
  54. * form. k2 is pointer to cpu variable.
  55. * Compare keys using all 4 key fields.
  56. * Returns: -1 if key1 < key2 0
  57. * if key1 = key2 1 if key1 > key2
  58. */
  59. static inline int comp_keys(const struct reiserfs_key *le_key,
  60. const struct cpu_key *cpu_key)
  61. {
  62. int retval;
  63. retval = comp_short_keys(le_key, cpu_key);
  64. if (retval)
  65. return retval;
  66. if (le_key_k_offset(le_key_version(le_key), le_key) <
  67. cpu_key_k_offset(cpu_key))
  68. return -1;
  69. if (le_key_k_offset(le_key_version(le_key), le_key) >
  70. cpu_key_k_offset(cpu_key))
  71. return 1;
  72. if (cpu_key->key_length == 3)
  73. return 0;
  74. /* this part is needed only when tail conversion is in progress */
  75. if (le_key_k_type(le_key_version(le_key), le_key) <
  76. cpu_key_k_type(cpu_key))
  77. return -1;
  78. if (le_key_k_type(le_key_version(le_key), le_key) >
  79. cpu_key_k_type(cpu_key))
  80. return 1;
  81. return 0;
  82. }
  83. inline int comp_short_le_keys(const struct reiserfs_key *key1,
  84. const struct reiserfs_key *key2)
  85. {
  86. __u32 *k1_u32, *k2_u32;
  87. int key_length = REISERFS_SHORT_KEY_LEN;
  88. k1_u32 = (__u32 *) key1;
  89. k2_u32 = (__u32 *) key2;
  90. for (; key_length--; ++k1_u32, ++k2_u32) {
  91. if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
  92. return -1;
  93. if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
  94. return 1;
  95. }
  96. return 0;
  97. }
  98. inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
  99. {
  100. int version;
  101. to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
  102. to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
  103. /* find out version of the key */
  104. version = le_key_version(from);
  105. to->version = version;
  106. to->on_disk_key.k_offset = le_key_k_offset(version, from);
  107. to->on_disk_key.k_type = le_key_k_type(version, from);
  108. }
  109. /*
  110. * this does not say which one is bigger, it only returns 1 if keys
  111. * are not equal, 0 otherwise
  112. */
  113. inline int comp_le_keys(const struct reiserfs_key *k1,
  114. const struct reiserfs_key *k2)
  115. {
  116. return memcmp(k1, k2, sizeof(struct reiserfs_key));
  117. }
  118. /**************************************************************************
  119. * Binary search toolkit function *
  120. * Search for an item in the array by the item key *
  121. * Returns: 1 if found, 0 if not found; *
  122. * *pos = number of the searched element if found, else the *
  123. * number of the first element that is larger than key. *
  124. **************************************************************************/
  125. /*
  126. * For those not familiar with binary search: lbound is the leftmost item
  127. * that it could be, rbound the rightmost item that it could be. We examine
  128. * the item halfway between lbound and rbound, and that tells us either
  129. * that we can increase lbound, or decrease rbound, or that we have found it,
  130. * or if lbound <= rbound that there are no possible items, and we have not
  131. * found it. With each examination we cut the number of possible items it
  132. * could be by one more than half rounded down, or we find it.
  133. */
  134. static inline int bin_search(const void *key, /* Key to search for. */
  135. const void *base, /* First item in the array. */
  136. int num, /* Number of items in the array. */
  137. /*
  138. * Item size in the array. searched. Lest the
  139. * reader be confused, note that this is crafted
  140. * as a general function, and when it is applied
  141. * specifically to the array of item headers in a
  142. * node, width is actually the item header size
  143. * not the item size.
  144. */
  145. int width,
  146. int *pos /* Number of the searched for element. */
  147. )
  148. {
  149. int rbound, lbound, j;
  150. for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
  151. lbound <= rbound; j = (rbound + lbound) / 2)
  152. switch (comp_keys
  153. ((struct reiserfs_key *)((char *)base + j * width),
  154. (struct cpu_key *)key)) {
  155. case -1:
  156. lbound = j + 1;
  157. continue;
  158. case 1:
  159. rbound = j - 1;
  160. continue;
  161. case 0:
  162. *pos = j;
  163. return ITEM_FOUND; /* Key found in the array. */
  164. }
  165. /*
  166. * bin_search did not find given key, it returns position of key,
  167. * that is minimal and greater than the given one.
  168. */
  169. *pos = lbound;
  170. return ITEM_NOT_FOUND;
  171. }
  172. /* Minimal possible key. It is never in the tree. */
  173. const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
  174. /* Maximal possible key. It is never in the tree. */
  175. static const struct reiserfs_key MAX_KEY = {
  176. cpu_to_le32(0xffffffff),
  177. cpu_to_le32(0xffffffff),
  178. {{cpu_to_le32(0xffffffff),
  179. cpu_to_le32(0xffffffff)},}
  180. };
  181. /*
  182. * Get delimiting key of the buffer by looking for it in the buffers in the
  183. * path, starting from the bottom of the path, and going upwards. We must
  184. * check the path's validity at each step. If the key is not in the path,
  185. * there is no delimiting key in the tree (buffer is first or last buffer
  186. * in tree), and in this case we return a special key, either MIN_KEY or
  187. * MAX_KEY.
  188. */
  189. static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
  190. const struct super_block *sb)
  191. {
  192. int position, path_offset = chk_path->path_length;
  193. struct buffer_head *parent;
  194. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  195. "PAP-5010: invalid offset in the path");
  196. /* While not higher in path than first element. */
  197. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  198. RFALSE(!buffer_uptodate
  199. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  200. "PAP-5020: parent is not uptodate");
  201. /* Parent at the path is not in the tree now. */
  202. if (!B_IS_IN_TREE
  203. (parent =
  204. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  205. return &MAX_KEY;
  206. /* Check whether position in the parent is correct. */
  207. if ((position =
  208. PATH_OFFSET_POSITION(chk_path,
  209. path_offset)) >
  210. B_NR_ITEMS(parent))
  211. return &MAX_KEY;
  212. /* Check whether parent at the path really points to the child. */
  213. if (B_N_CHILD_NUM(parent, position) !=
  214. PATH_OFFSET_PBUFFER(chk_path,
  215. path_offset + 1)->b_blocknr)
  216. return &MAX_KEY;
  217. /*
  218. * Return delimiting key if position in the parent
  219. * is not equal to zero.
  220. */
  221. if (position)
  222. return internal_key(parent, position - 1);
  223. }
  224. /* Return MIN_KEY if we are in the root of the buffer tree. */
  225. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  226. b_blocknr == SB_ROOT_BLOCK(sb))
  227. return &MIN_KEY;
  228. return &MAX_KEY;
  229. }
  230. /* Get delimiting key of the buffer at the path and its right neighbor. */
  231. inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
  232. const struct super_block *sb)
  233. {
  234. int position, path_offset = chk_path->path_length;
  235. struct buffer_head *parent;
  236. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  237. "PAP-5030: invalid offset in the path");
  238. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  239. RFALSE(!buffer_uptodate
  240. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  241. "PAP-5040: parent is not uptodate");
  242. /* Parent at the path is not in the tree now. */
  243. if (!B_IS_IN_TREE
  244. (parent =
  245. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  246. return &MIN_KEY;
  247. /* Check whether position in the parent is correct. */
  248. if ((position =
  249. PATH_OFFSET_POSITION(chk_path,
  250. path_offset)) >
  251. B_NR_ITEMS(parent))
  252. return &MIN_KEY;
  253. /*
  254. * Check whether parent at the path really points
  255. * to the child.
  256. */
  257. if (B_N_CHILD_NUM(parent, position) !=
  258. PATH_OFFSET_PBUFFER(chk_path,
  259. path_offset + 1)->b_blocknr)
  260. return &MIN_KEY;
  261. /*
  262. * Return delimiting key if position in the parent
  263. * is not the last one.
  264. */
  265. if (position != B_NR_ITEMS(parent))
  266. return internal_key(parent, position);
  267. }
  268. /* Return MAX_KEY if we are in the root of the buffer tree. */
  269. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  270. b_blocknr == SB_ROOT_BLOCK(sb))
  271. return &MAX_KEY;
  272. return &MIN_KEY;
  273. }
  274. /*
  275. * Check whether a key is contained in the tree rooted from a buffer at a path.
  276. * This works by looking at the left and right delimiting keys for the buffer
  277. * in the last path_element in the path. These delimiting keys are stored
  278. * at least one level above that buffer in the tree. If the buffer is the
  279. * first or last node in the tree order then one of the delimiting keys may
  280. * be absent, and in this case get_lkey and get_rkey return a special key
  281. * which is MIN_KEY or MAX_KEY.
  282. */
  283. static inline int key_in_buffer(
  284. /* Path which should be checked. */
  285. struct treepath *chk_path,
  286. /* Key which should be checked. */
  287. const struct cpu_key *key,
  288. struct super_block *sb
  289. )
  290. {
  291. RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
  292. || chk_path->path_length > MAX_HEIGHT,
  293. "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
  294. key, chk_path->path_length);
  295. RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
  296. "PAP-5060: device must not be NODEV");
  297. if (comp_keys(get_lkey(chk_path, sb), key) == 1)
  298. /* left delimiting key is bigger, that the key we look for */
  299. return 0;
  300. /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
  301. if (comp_keys(get_rkey(chk_path, sb), key) != 1)
  302. /* key must be less than right delimitiing key */
  303. return 0;
  304. return 1;
  305. }
  306. int reiserfs_check_path(struct treepath *p)
  307. {
  308. RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
  309. "path not properly relsed");
  310. return 0;
  311. }
  312. /*
  313. * Drop the reference to each buffer in a path and restore
  314. * dirty bits clean when preparing the buffer for the log.
  315. * This version should only be called from fix_nodes()
  316. */
  317. void pathrelse_and_restore(struct super_block *sb,
  318. struct treepath *search_path)
  319. {
  320. int path_offset = search_path->path_length;
  321. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  322. "clm-4000: invalid path offset");
  323. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
  324. struct buffer_head *bh;
  325. bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
  326. reiserfs_restore_prepared_buffer(sb, bh);
  327. brelse(bh);
  328. }
  329. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  330. }
  331. /* Drop the reference to each buffer in a path */
  332. void pathrelse(struct treepath *search_path)
  333. {
  334. int path_offset = search_path->path_length;
  335. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  336. "PAP-5090: invalid path offset");
  337. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
  338. brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
  339. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  340. }
  341. static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
  342. {
  343. struct block_head *blkh;
  344. struct item_head *ih;
  345. int used_space;
  346. int prev_location;
  347. int i;
  348. int nr;
  349. blkh = (struct block_head *)buf;
  350. if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
  351. reiserfs_warning(NULL, "reiserfs-5080",
  352. "this should be caught earlier");
  353. return 0;
  354. }
  355. nr = blkh_nr_item(blkh);
  356. if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
  357. /* item number is too big or too small */
  358. reiserfs_warning(NULL, "reiserfs-5081",
  359. "nr_item seems wrong: %z", bh);
  360. return 0;
  361. }
  362. ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
  363. used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
  364. /* free space does not match to calculated amount of use space */
  365. if (used_space != blocksize - blkh_free_space(blkh)) {
  366. reiserfs_warning(NULL, "reiserfs-5082",
  367. "free space seems wrong: %z", bh);
  368. return 0;
  369. }
  370. /*
  371. * FIXME: it is_leaf will hit performance too much - we may have
  372. * return 1 here
  373. */
  374. /* check tables of item heads */
  375. ih = (struct item_head *)(buf + BLKH_SIZE);
  376. prev_location = blocksize;
  377. for (i = 0; i < nr; i++, ih++) {
  378. if (le_ih_k_type(ih) == TYPE_ANY) {
  379. reiserfs_warning(NULL, "reiserfs-5083",
  380. "wrong item type for item %h",
  381. ih);
  382. return 0;
  383. }
  384. if (ih_location(ih) >= blocksize
  385. || ih_location(ih) < IH_SIZE * nr) {
  386. reiserfs_warning(NULL, "reiserfs-5084",
  387. "item location seems wrong: %h",
  388. ih);
  389. return 0;
  390. }
  391. if (ih_item_len(ih) < 1
  392. || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
  393. reiserfs_warning(NULL, "reiserfs-5085",
  394. "item length seems wrong: %h",
  395. ih);
  396. return 0;
  397. }
  398. if (prev_location - ih_location(ih) != ih_item_len(ih)) {
  399. reiserfs_warning(NULL, "reiserfs-5086",
  400. "item location seems wrong "
  401. "(second one): %h", ih);
  402. return 0;
  403. }
  404. prev_location = ih_location(ih);
  405. }
  406. /* one may imagine many more checks */
  407. return 1;
  408. }
  409. /* returns 1 if buf looks like an internal node, 0 otherwise */
  410. static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
  411. {
  412. struct block_head *blkh;
  413. int nr;
  414. int used_space;
  415. blkh = (struct block_head *)buf;
  416. nr = blkh_level(blkh);
  417. if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
  418. /* this level is not possible for internal nodes */
  419. reiserfs_warning(NULL, "reiserfs-5087",
  420. "this should be caught earlier");
  421. return 0;
  422. }
  423. nr = blkh_nr_item(blkh);
  424. /* for internal which is not root we might check min number of keys */
  425. if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
  426. reiserfs_warning(NULL, "reiserfs-5088",
  427. "number of key seems wrong: %z", bh);
  428. return 0;
  429. }
  430. used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
  431. if (used_space != blocksize - blkh_free_space(blkh)) {
  432. reiserfs_warning(NULL, "reiserfs-5089",
  433. "free space seems wrong: %z", bh);
  434. return 0;
  435. }
  436. /* one may imagine many more checks */
  437. return 1;
  438. }
  439. /*
  440. * make sure that bh contains formatted node of reiserfs tree of
  441. * 'level'-th level
  442. */
  443. static int is_tree_node(struct buffer_head *bh, int level)
  444. {
  445. if (B_LEVEL(bh) != level) {
  446. reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
  447. "not match to the expected one %d",
  448. B_LEVEL(bh), level);
  449. return 0;
  450. }
  451. if (level == DISK_LEAF_NODE_LEVEL)
  452. return is_leaf(bh->b_data, bh->b_size, bh);
  453. return is_internal(bh->b_data, bh->b_size, bh);
  454. }
  455. #define SEARCH_BY_KEY_READA 16
  456. /*
  457. * The function is NOT SCHEDULE-SAFE!
  458. * It might unlock the write lock if we needed to wait for a block
  459. * to be read. Note that in this case it won't recover the lock to avoid
  460. * high contention resulting from too much lock requests, especially
  461. * the caller (search_by_key) will perform other schedule-unsafe
  462. * operations just after calling this function.
  463. *
  464. * @return depth of lock to be restored after read completes
  465. */
  466. static int search_by_key_reada(struct super_block *s,
  467. struct buffer_head **bh,
  468. b_blocknr_t *b, int num)
  469. {
  470. int i, j;
  471. int depth = -1;
  472. for (i = 0; i < num; i++) {
  473. bh[i] = sb_getblk(s, b[i]);
  474. }
  475. /*
  476. * We are going to read some blocks on which we
  477. * have a reference. It's safe, though we might be
  478. * reading blocks concurrently changed if we release
  479. * the lock. But it's still fine because we check later
  480. * if the tree changed
  481. */
  482. for (j = 0; j < i; j++) {
  483. /*
  484. * note, this needs attention if we are getting rid of the BKL
  485. * you have to make sure the prepared bit isn't set on this
  486. * buffer
  487. */
  488. if (!buffer_uptodate(bh[j])) {
  489. if (depth == -1)
  490. depth = reiserfs_write_unlock_nested(s);
  491. ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j);
  492. }
  493. brelse(bh[j]);
  494. }
  495. return depth;
  496. }
  497. /*
  498. * This function fills up the path from the root to the leaf as it
  499. * descends the tree looking for the key. It uses reiserfs_bread to
  500. * try to find buffers in the cache given their block number. If it
  501. * does not find them in the cache it reads them from disk. For each
  502. * node search_by_key finds using reiserfs_bread it then uses
  503. * bin_search to look through that node. bin_search will find the
  504. * position of the block_number of the next node if it is looking
  505. * through an internal node. If it is looking through a leaf node
  506. * bin_search will find the position of the item which has key either
  507. * equal to given key, or which is the maximal key less than the given
  508. * key. search_by_key returns a path that must be checked for the
  509. * correctness of the top of the path but need not be checked for the
  510. * correctness of the bottom of the path
  511. */
  512. /*
  513. * search_by_key - search for key (and item) in stree
  514. * @sb: superblock
  515. * @key: pointer to key to search for
  516. * @search_path: Allocated and initialized struct treepath; Returned filled
  517. * on success.
  518. * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
  519. * stop at leaf level.
  520. *
  521. * The function is NOT SCHEDULE-SAFE!
  522. */
  523. int search_by_key(struct super_block *sb, const struct cpu_key *key,
  524. struct treepath *search_path, int stop_level)
  525. {
  526. b_blocknr_t block_number;
  527. int expected_level;
  528. struct buffer_head *bh;
  529. struct path_element *last_element;
  530. int node_level, retval;
  531. int right_neighbor_of_leaf_node;
  532. int fs_gen;
  533. struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
  534. b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
  535. int reada_count = 0;
  536. #ifdef CONFIG_REISERFS_CHECK
  537. int repeat_counter = 0;
  538. #endif
  539. PROC_INFO_INC(sb, search_by_key);
  540. /*
  541. * As we add each node to a path we increase its count. This means
  542. * that we must be careful to release all nodes in a path before we
  543. * either discard the path struct or re-use the path struct, as we
  544. * do here.
  545. */
  546. pathrelse(search_path);
  547. right_neighbor_of_leaf_node = 0;
  548. /*
  549. * With each iteration of this loop we search through the items in the
  550. * current node, and calculate the next current node(next path element)
  551. * for the next iteration of this loop..
  552. */
  553. block_number = SB_ROOT_BLOCK(sb);
  554. expected_level = -1;
  555. while (1) {
  556. #ifdef CONFIG_REISERFS_CHECK
  557. if (!(++repeat_counter % 50000))
  558. reiserfs_warning(sb, "PAP-5100",
  559. "%s: there were %d iterations of "
  560. "while loop looking for key %K",
  561. current->comm, repeat_counter,
  562. key);
  563. #endif
  564. /* prep path to have another element added to it. */
  565. last_element =
  566. PATH_OFFSET_PELEMENT(search_path,
  567. ++search_path->path_length);
  568. fs_gen = get_generation(sb);
  569. /*
  570. * Read the next tree node, and set the last element
  571. * in the path to have a pointer to it.
  572. */
  573. if ((bh = last_element->pe_buffer =
  574. sb_getblk(sb, block_number))) {
  575. /*
  576. * We'll need to drop the lock if we encounter any
  577. * buffers that need to be read. If all of them are
  578. * already up to date, we don't need to drop the lock.
  579. */
  580. int depth = -1;
  581. if (!buffer_uptodate(bh) && reada_count > 1)
  582. depth = search_by_key_reada(sb, reada_bh,
  583. reada_blocks, reada_count);
  584. if (!buffer_uptodate(bh) && depth == -1)
  585. depth = reiserfs_write_unlock_nested(sb);
  586. ll_rw_block(REQ_OP_READ, 0, 1, &bh);
  587. wait_on_buffer(bh);
  588. if (depth != -1)
  589. reiserfs_write_lock_nested(sb, depth);
  590. if (!buffer_uptodate(bh))
  591. goto io_error;
  592. } else {
  593. io_error:
  594. search_path->path_length--;
  595. pathrelse(search_path);
  596. return IO_ERROR;
  597. }
  598. reada_count = 0;
  599. if (expected_level == -1)
  600. expected_level = SB_TREE_HEIGHT(sb);
  601. expected_level--;
  602. /*
  603. * It is possible that schedule occurred. We must check
  604. * whether the key to search is still in the tree rooted
  605. * from the current buffer. If not then repeat search
  606. * from the root.
  607. */
  608. if (fs_changed(fs_gen, sb) &&
  609. (!B_IS_IN_TREE(bh) ||
  610. B_LEVEL(bh) != expected_level ||
  611. !key_in_buffer(search_path, key, sb))) {
  612. PROC_INFO_INC(sb, search_by_key_fs_changed);
  613. PROC_INFO_INC(sb, search_by_key_restarted);
  614. PROC_INFO_INC(sb,
  615. sbk_restarted[expected_level - 1]);
  616. pathrelse(search_path);
  617. /*
  618. * Get the root block number so that we can
  619. * repeat the search starting from the root.
  620. */
  621. block_number = SB_ROOT_BLOCK(sb);
  622. expected_level = -1;
  623. right_neighbor_of_leaf_node = 0;
  624. /* repeat search from the root */
  625. continue;
  626. }
  627. /*
  628. * only check that the key is in the buffer if key is not
  629. * equal to the MAX_KEY. Latter case is only possible in
  630. * "finish_unfinished()" processing during mount.
  631. */
  632. RFALSE(comp_keys(&MAX_KEY, key) &&
  633. !key_in_buffer(search_path, key, sb),
  634. "PAP-5130: key is not in the buffer");
  635. #ifdef CONFIG_REISERFS_CHECK
  636. if (REISERFS_SB(sb)->cur_tb) {
  637. print_cur_tb("5140");
  638. reiserfs_panic(sb, "PAP-5140",
  639. "schedule occurred in do_balance!");
  640. }
  641. #endif
  642. /*
  643. * make sure, that the node contents look like a node of
  644. * certain level
  645. */
  646. if (!is_tree_node(bh, expected_level)) {
  647. reiserfs_error(sb, "vs-5150",
  648. "invalid format found in block %ld. "
  649. "Fsck?", bh->b_blocknr);
  650. pathrelse(search_path);
  651. return IO_ERROR;
  652. }
  653. /* ok, we have acquired next formatted node in the tree */
  654. node_level = B_LEVEL(bh);
  655. PROC_INFO_BH_STAT(sb, bh, node_level - 1);
  656. RFALSE(node_level < stop_level,
  657. "vs-5152: tree level (%d) is less than stop level (%d)",
  658. node_level, stop_level);
  659. retval = bin_search(key, item_head(bh, 0),
  660. B_NR_ITEMS(bh),
  661. (node_level ==
  662. DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
  663. KEY_SIZE,
  664. &last_element->pe_position);
  665. if (node_level == stop_level) {
  666. return retval;
  667. }
  668. /* we are not in the stop level */
  669. /*
  670. * item has been found, so we choose the pointer which
  671. * is to the right of the found one
  672. */
  673. if (retval == ITEM_FOUND)
  674. last_element->pe_position++;
  675. /*
  676. * if item was not found we choose the position which is to
  677. * the left of the found item. This requires no code,
  678. * bin_search did it already.
  679. */
  680. /*
  681. * So we have chosen a position in the current node which is
  682. * an internal node. Now we calculate child block number by
  683. * position in the node.
  684. */
  685. block_number =
  686. B_N_CHILD_NUM(bh, last_element->pe_position);
  687. /*
  688. * if we are going to read leaf nodes, try for read
  689. * ahead as well
  690. */
  691. if ((search_path->reada & PATH_READA) &&
  692. node_level == DISK_LEAF_NODE_LEVEL + 1) {
  693. int pos = last_element->pe_position;
  694. int limit = B_NR_ITEMS(bh);
  695. struct reiserfs_key *le_key;
  696. if (search_path->reada & PATH_READA_BACK)
  697. limit = 0;
  698. while (reada_count < SEARCH_BY_KEY_READA) {
  699. if (pos == limit)
  700. break;
  701. reada_blocks[reada_count++] =
  702. B_N_CHILD_NUM(bh, pos);
  703. if (search_path->reada & PATH_READA_BACK)
  704. pos--;
  705. else
  706. pos++;
  707. /*
  708. * check to make sure we're in the same object
  709. */
  710. le_key = internal_key(bh, pos);
  711. if (le32_to_cpu(le_key->k_objectid) !=
  712. key->on_disk_key.k_objectid) {
  713. break;
  714. }
  715. }
  716. }
  717. }
  718. }
  719. /*
  720. * Form the path to an item and position in this item which contains
  721. * file byte defined by key. If there is no such item
  722. * corresponding to the key, we point the path to the item with
  723. * maximal key less than key, and *pos_in_item is set to one
  724. * past the last entry/byte in the item. If searching for entry in a
  725. * directory item, and it is not found, *pos_in_item is set to one
  726. * entry more than the entry with maximal key which is less than the
  727. * sought key.
  728. *
  729. * Note that if there is no entry in this same node which is one more,
  730. * then we point to an imaginary entry. for direct items, the
  731. * position is in units of bytes, for indirect items the position is
  732. * in units of blocknr entries, for directory items the position is in
  733. * units of directory entries.
  734. */
  735. /* The function is NOT SCHEDULE-SAFE! */
  736. int search_for_position_by_key(struct super_block *sb,
  737. /* Key to search (cpu variable) */
  738. const struct cpu_key *p_cpu_key,
  739. /* Filled up by this function. */
  740. struct treepath *search_path)
  741. {
  742. struct item_head *p_le_ih; /* pointer to on-disk structure */
  743. int blk_size;
  744. loff_t item_offset, offset;
  745. struct reiserfs_dir_entry de;
  746. int retval;
  747. /* If searching for directory entry. */
  748. if (is_direntry_cpu_key(p_cpu_key))
  749. return search_by_entry_key(sb, p_cpu_key, search_path,
  750. &de);
  751. /* If not searching for directory entry. */
  752. /* If item is found. */
  753. retval = search_item(sb, p_cpu_key, search_path);
  754. if (retval == IO_ERROR)
  755. return retval;
  756. if (retval == ITEM_FOUND) {
  757. RFALSE(!ih_item_len
  758. (item_head
  759. (PATH_PLAST_BUFFER(search_path),
  760. PATH_LAST_POSITION(search_path))),
  761. "PAP-5165: item length equals zero");
  762. pos_in_item(search_path) = 0;
  763. return POSITION_FOUND;
  764. }
  765. RFALSE(!PATH_LAST_POSITION(search_path),
  766. "PAP-5170: position equals zero");
  767. /* Item is not found. Set path to the previous item. */
  768. p_le_ih =
  769. item_head(PATH_PLAST_BUFFER(search_path),
  770. --PATH_LAST_POSITION(search_path));
  771. blk_size = sb->s_blocksize;
  772. if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
  773. return FILE_NOT_FOUND;
  774. /* FIXME: quite ugly this far */
  775. item_offset = le_ih_k_offset(p_le_ih);
  776. offset = cpu_key_k_offset(p_cpu_key);
  777. /* Needed byte is contained in the item pointed to by the path. */
  778. if (item_offset <= offset &&
  779. item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
  780. pos_in_item(search_path) = offset - item_offset;
  781. if (is_indirect_le_ih(p_le_ih)) {
  782. pos_in_item(search_path) /= blk_size;
  783. }
  784. return POSITION_FOUND;
  785. }
  786. /*
  787. * Needed byte is not contained in the item pointed to by the
  788. * path. Set pos_in_item out of the item.
  789. */
  790. if (is_indirect_le_ih(p_le_ih))
  791. pos_in_item(search_path) =
  792. ih_item_len(p_le_ih) / UNFM_P_SIZE;
  793. else
  794. pos_in_item(search_path) = ih_item_len(p_le_ih);
  795. return POSITION_NOT_FOUND;
  796. }
  797. /* Compare given item and item pointed to by the path. */
  798. int comp_items(const struct item_head *stored_ih, const struct treepath *path)
  799. {
  800. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  801. struct item_head *ih;
  802. /* Last buffer at the path is not in the tree. */
  803. if (!B_IS_IN_TREE(bh))
  804. return 1;
  805. /* Last path position is invalid. */
  806. if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
  807. return 1;
  808. /* we need only to know, whether it is the same item */
  809. ih = tp_item_head(path);
  810. return memcmp(stored_ih, ih, IH_SIZE);
  811. }
  812. /* unformatted nodes are not logged anymore, ever. This is safe now */
  813. #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
  814. /* block can not be forgotten as it is in I/O or held by someone */
  815. #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
  816. /* prepare for delete or cut of direct item */
  817. static inline int prepare_for_direct_item(struct treepath *path,
  818. struct item_head *le_ih,
  819. struct inode *inode,
  820. loff_t new_file_length, int *cut_size)
  821. {
  822. loff_t round_len;
  823. if (new_file_length == max_reiserfs_offset(inode)) {
  824. /* item has to be deleted */
  825. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  826. return M_DELETE;
  827. }
  828. /* new file gets truncated */
  829. if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
  830. round_len = ROUND_UP(new_file_length);
  831. /* this was new_file_length < le_ih ... */
  832. if (round_len < le_ih_k_offset(le_ih)) {
  833. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  834. return M_DELETE; /* Delete this item. */
  835. }
  836. /* Calculate first position and size for cutting from item. */
  837. pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
  838. *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
  839. return M_CUT; /* Cut from this item. */
  840. }
  841. /* old file: items may have any length */
  842. if (new_file_length < le_ih_k_offset(le_ih)) {
  843. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  844. return M_DELETE; /* Delete this item. */
  845. }
  846. /* Calculate first position and size for cutting from item. */
  847. *cut_size = -(ih_item_len(le_ih) -
  848. (pos_in_item(path) =
  849. new_file_length + 1 - le_ih_k_offset(le_ih)));
  850. return M_CUT; /* Cut from this item. */
  851. }
  852. static inline int prepare_for_direntry_item(struct treepath *path,
  853. struct item_head *le_ih,
  854. struct inode *inode,
  855. loff_t new_file_length,
  856. int *cut_size)
  857. {
  858. if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
  859. new_file_length == max_reiserfs_offset(inode)) {
  860. RFALSE(ih_entry_count(le_ih) != 2,
  861. "PAP-5220: incorrect empty directory item (%h)", le_ih);
  862. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  863. /* Delete the directory item containing "." and ".." entry. */
  864. return M_DELETE;
  865. }
  866. if (ih_entry_count(le_ih) == 1) {
  867. /*
  868. * Delete the directory item such as there is one record only
  869. * in this item
  870. */
  871. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  872. return M_DELETE;
  873. }
  874. /* Cut one record from the directory item. */
  875. *cut_size =
  876. -(DEH_SIZE +
  877. entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
  878. return M_CUT;
  879. }
  880. #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
  881. /*
  882. * If the path points to a directory or direct item, calculate mode
  883. * and the size cut, for balance.
  884. * If the path points to an indirect item, remove some number of its
  885. * unformatted nodes.
  886. * In case of file truncate calculate whether this item must be
  887. * deleted/truncated or last unformatted node of this item will be
  888. * converted to a direct item.
  889. * This function returns a determination of what balance mode the
  890. * calling function should employ.
  891. */
  892. static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
  893. struct inode *inode,
  894. struct treepath *path,
  895. const struct cpu_key *item_key,
  896. /*
  897. * Number of unformatted nodes
  898. * which were removed from end
  899. * of the file.
  900. */
  901. int *removed,
  902. int *cut_size,
  903. /* MAX_KEY_OFFSET in case of delete. */
  904. unsigned long long new_file_length
  905. )
  906. {
  907. struct super_block *sb = inode->i_sb;
  908. struct item_head *p_le_ih = tp_item_head(path);
  909. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  910. BUG_ON(!th->t_trans_id);
  911. /* Stat_data item. */
  912. if (is_statdata_le_ih(p_le_ih)) {
  913. RFALSE(new_file_length != max_reiserfs_offset(inode),
  914. "PAP-5210: mode must be M_DELETE");
  915. *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
  916. return M_DELETE;
  917. }
  918. /* Directory item. */
  919. if (is_direntry_le_ih(p_le_ih))
  920. return prepare_for_direntry_item(path, p_le_ih, inode,
  921. new_file_length,
  922. cut_size);
  923. /* Direct item. */
  924. if (is_direct_le_ih(p_le_ih))
  925. return prepare_for_direct_item(path, p_le_ih, inode,
  926. new_file_length, cut_size);
  927. /* Case of an indirect item. */
  928. {
  929. int blk_size = sb->s_blocksize;
  930. struct item_head s_ih;
  931. int need_re_search;
  932. int delete = 0;
  933. int result = M_CUT;
  934. int pos = 0;
  935. if ( new_file_length == max_reiserfs_offset (inode) ) {
  936. /*
  937. * prepare_for_delete_or_cut() is called by
  938. * reiserfs_delete_item()
  939. */
  940. new_file_length = 0;
  941. delete = 1;
  942. }
  943. do {
  944. need_re_search = 0;
  945. *cut_size = 0;
  946. bh = PATH_PLAST_BUFFER(path);
  947. copy_item_head(&s_ih, tp_item_head(path));
  948. pos = I_UNFM_NUM(&s_ih);
  949. while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
  950. __le32 *unfm;
  951. __u32 block;
  952. /*
  953. * Each unformatted block deletion may involve
  954. * one additional bitmap block into the transaction,
  955. * thereby the initial journal space reservation
  956. * might not be enough.
  957. */
  958. if (!delete && (*cut_size) != 0 &&
  959. reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
  960. break;
  961. unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
  962. block = get_block_num(unfm, 0);
  963. if (block != 0) {
  964. reiserfs_prepare_for_journal(sb, bh, 1);
  965. put_block_num(unfm, 0, 0);
  966. journal_mark_dirty(th, bh);
  967. reiserfs_free_block(th, inode, block, 1);
  968. }
  969. reiserfs_cond_resched(sb);
  970. if (item_moved (&s_ih, path)) {
  971. need_re_search = 1;
  972. break;
  973. }
  974. pos --;
  975. (*removed)++;
  976. (*cut_size) -= UNFM_P_SIZE;
  977. if (pos == 0) {
  978. (*cut_size) -= IH_SIZE;
  979. result = M_DELETE;
  980. break;
  981. }
  982. }
  983. /*
  984. * a trick. If the buffer has been logged, this will
  985. * do nothing. If we've broken the loop without logging
  986. * it, it will restore the buffer
  987. */
  988. reiserfs_restore_prepared_buffer(sb, bh);
  989. } while (need_re_search &&
  990. search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
  991. pos_in_item(path) = pos * UNFM_P_SIZE;
  992. if (*cut_size == 0) {
  993. /*
  994. * Nothing was cut. maybe convert last unformatted node to the
  995. * direct item?
  996. */
  997. result = M_CONVERT;
  998. }
  999. return result;
  1000. }
  1001. }
  1002. /* Calculate number of bytes which will be deleted or cut during balance */
  1003. static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
  1004. {
  1005. int del_size;
  1006. struct item_head *p_le_ih = tp_item_head(tb->tb_path);
  1007. if (is_statdata_le_ih(p_le_ih))
  1008. return 0;
  1009. del_size =
  1010. (mode ==
  1011. M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
  1012. if (is_direntry_le_ih(p_le_ih)) {
  1013. /*
  1014. * return EMPTY_DIR_SIZE; We delete emty directories only.
  1015. * we can't use EMPTY_DIR_SIZE, as old format dirs have a
  1016. * different empty size. ick. FIXME, is this right?
  1017. */
  1018. return del_size;
  1019. }
  1020. if (is_indirect_le_ih(p_le_ih))
  1021. del_size = (del_size / UNFM_P_SIZE) *
  1022. (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
  1023. return del_size;
  1024. }
  1025. static void init_tb_struct(struct reiserfs_transaction_handle *th,
  1026. struct tree_balance *tb,
  1027. struct super_block *sb,
  1028. struct treepath *path, int size)
  1029. {
  1030. BUG_ON(!th->t_trans_id);
  1031. memset(tb, '\0', sizeof(struct tree_balance));
  1032. tb->transaction_handle = th;
  1033. tb->tb_sb = sb;
  1034. tb->tb_path = path;
  1035. PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
  1036. PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
  1037. tb->insert_size[0] = size;
  1038. }
  1039. void padd_item(char *item, int total_length, int length)
  1040. {
  1041. int i;
  1042. for (i = total_length; i > length;)
  1043. item[--i] = 0;
  1044. }
  1045. #ifdef REISERQUOTA_DEBUG
  1046. char key2type(struct reiserfs_key *ih)
  1047. {
  1048. if (is_direntry_le_key(2, ih))
  1049. return 'd';
  1050. if (is_direct_le_key(2, ih))
  1051. return 'D';
  1052. if (is_indirect_le_key(2, ih))
  1053. return 'i';
  1054. if (is_statdata_le_key(2, ih))
  1055. return 's';
  1056. return 'u';
  1057. }
  1058. char head2type(struct item_head *ih)
  1059. {
  1060. if (is_direntry_le_ih(ih))
  1061. return 'd';
  1062. if (is_direct_le_ih(ih))
  1063. return 'D';
  1064. if (is_indirect_le_ih(ih))
  1065. return 'i';
  1066. if (is_statdata_le_ih(ih))
  1067. return 's';
  1068. return 'u';
  1069. }
  1070. #endif
  1071. /*
  1072. * Delete object item.
  1073. * th - active transaction handle
  1074. * path - path to the deleted item
  1075. * item_key - key to search for the deleted item
  1076. * indode - used for updating i_blocks and quotas
  1077. * un_bh - NULL or unformatted node pointer
  1078. */
  1079. int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
  1080. struct treepath *path, const struct cpu_key *item_key,
  1081. struct inode *inode, struct buffer_head *un_bh)
  1082. {
  1083. struct super_block *sb = inode->i_sb;
  1084. struct tree_balance s_del_balance;
  1085. struct item_head s_ih;
  1086. struct item_head *q_ih;
  1087. int quota_cut_bytes;
  1088. int ret_value, del_size, removed;
  1089. int depth;
  1090. #ifdef CONFIG_REISERFS_CHECK
  1091. char mode;
  1092. int iter = 0;
  1093. #endif
  1094. BUG_ON(!th->t_trans_id);
  1095. init_tb_struct(th, &s_del_balance, sb, path,
  1096. 0 /*size is unknown */ );
  1097. while (1) {
  1098. removed = 0;
  1099. #ifdef CONFIG_REISERFS_CHECK
  1100. iter++;
  1101. mode =
  1102. #endif
  1103. prepare_for_delete_or_cut(th, inode, path,
  1104. item_key, &removed,
  1105. &del_size,
  1106. max_reiserfs_offset(inode));
  1107. RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
  1108. copy_item_head(&s_ih, tp_item_head(path));
  1109. s_del_balance.insert_size[0] = del_size;
  1110. ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
  1111. if (ret_value != REPEAT_SEARCH)
  1112. break;
  1113. PROC_INFO_INC(sb, delete_item_restarted);
  1114. /* file system changed, repeat search */
  1115. ret_value =
  1116. search_for_position_by_key(sb, item_key, path);
  1117. if (ret_value == IO_ERROR)
  1118. break;
  1119. if (ret_value == FILE_NOT_FOUND) {
  1120. reiserfs_warning(sb, "vs-5340",
  1121. "no items of the file %K found",
  1122. item_key);
  1123. break;
  1124. }
  1125. } /* while (1) */
  1126. if (ret_value != CARRY_ON) {
  1127. unfix_nodes(&s_del_balance);
  1128. return 0;
  1129. }
  1130. /* reiserfs_delete_item returns item length when success */
  1131. ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
  1132. q_ih = tp_item_head(path);
  1133. quota_cut_bytes = ih_item_len(q_ih);
  1134. /*
  1135. * hack so the quota code doesn't have to guess if the file has a
  1136. * tail. On tail insert, we allocate quota for 1 unformatted node.
  1137. * We test the offset because the tail might have been
  1138. * split into multiple items, and we only want to decrement for
  1139. * the unfm node once
  1140. */
  1141. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
  1142. if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
  1143. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1144. } else {
  1145. quota_cut_bytes = 0;
  1146. }
  1147. }
  1148. if (un_bh) {
  1149. int off;
  1150. char *data;
  1151. /*
  1152. * We are in direct2indirect conversion, so move tail contents
  1153. * to the unformatted node
  1154. */
  1155. /*
  1156. * note, we do the copy before preparing the buffer because we
  1157. * don't care about the contents of the unformatted node yet.
  1158. * the only thing we really care about is the direct item's
  1159. * data is in the unformatted node.
  1160. *
  1161. * Otherwise, we would have to call
  1162. * reiserfs_prepare_for_journal on the unformatted node,
  1163. * which might schedule, meaning we'd have to loop all the
  1164. * way back up to the start of the while loop.
  1165. *
  1166. * The unformatted node must be dirtied later on. We can't be
  1167. * sure here if the entire tail has been deleted yet.
  1168. *
  1169. * un_bh is from the page cache (all unformatted nodes are
  1170. * from the page cache) and might be a highmem page. So, we
  1171. * can't use un_bh->b_data.
  1172. * -clm
  1173. */
  1174. data = kmap_atomic(un_bh->b_page);
  1175. off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
  1176. memcpy(data + off,
  1177. ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
  1178. ret_value);
  1179. kunmap_atomic(data);
  1180. }
  1181. /* Perform balancing after all resources have been collected at once. */
  1182. do_balance(&s_del_balance, NULL, NULL, M_DELETE);
  1183. #ifdef REISERQUOTA_DEBUG
  1184. reiserfs_debug(sb, REISERFS_DEBUG_CODE,
  1185. "reiserquota delete_item(): freeing %u, id=%u type=%c",
  1186. quota_cut_bytes, inode->i_uid, head2type(&s_ih));
  1187. #endif
  1188. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1189. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1190. reiserfs_write_lock_nested(inode->i_sb, depth);
  1191. /* Return deleted body length */
  1192. return ret_value;
  1193. }
  1194. /*
  1195. * Summary Of Mechanisms For Handling Collisions Between Processes:
  1196. *
  1197. * deletion of the body of the object is performed by iput(), with the
  1198. * result that if multiple processes are operating on a file, the
  1199. * deletion of the body of the file is deferred until the last process
  1200. * that has an open inode performs its iput().
  1201. *
  1202. * writes and truncates are protected from collisions by use of
  1203. * semaphores.
  1204. *
  1205. * creates, linking, and mknod are protected from collisions with other
  1206. * processes by making the reiserfs_add_entry() the last step in the
  1207. * creation, and then rolling back all changes if there was a collision.
  1208. * - Hans
  1209. */
  1210. /* this deletes item which never gets split */
  1211. void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
  1212. struct inode *inode, struct reiserfs_key *key)
  1213. {
  1214. struct super_block *sb = th->t_super;
  1215. struct tree_balance tb;
  1216. INITIALIZE_PATH(path);
  1217. int item_len = 0;
  1218. int tb_init = 0;
  1219. struct cpu_key cpu_key;
  1220. int retval;
  1221. int quota_cut_bytes = 0;
  1222. BUG_ON(!th->t_trans_id);
  1223. le_key2cpu_key(&cpu_key, key);
  1224. while (1) {
  1225. retval = search_item(th->t_super, &cpu_key, &path);
  1226. if (retval == IO_ERROR) {
  1227. reiserfs_error(th->t_super, "vs-5350",
  1228. "i/o failure occurred trying "
  1229. "to delete %K", &cpu_key);
  1230. break;
  1231. }
  1232. if (retval != ITEM_FOUND) {
  1233. pathrelse(&path);
  1234. /*
  1235. * No need for a warning, if there is just no free
  1236. * space to insert '..' item into the
  1237. * newly-created subdir
  1238. */
  1239. if (!
  1240. ((unsigned long long)
  1241. GET_HASH_VALUE(le_key_k_offset
  1242. (le_key_version(key), key)) == 0
  1243. && (unsigned long long)
  1244. GET_GENERATION_NUMBER(le_key_k_offset
  1245. (le_key_version(key),
  1246. key)) == 1))
  1247. reiserfs_warning(th->t_super, "vs-5355",
  1248. "%k not found", key);
  1249. break;
  1250. }
  1251. if (!tb_init) {
  1252. tb_init = 1;
  1253. item_len = ih_item_len(tp_item_head(&path));
  1254. init_tb_struct(th, &tb, th->t_super, &path,
  1255. -(IH_SIZE + item_len));
  1256. }
  1257. quota_cut_bytes = ih_item_len(tp_item_head(&path));
  1258. retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
  1259. if (retval == REPEAT_SEARCH) {
  1260. PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
  1261. continue;
  1262. }
  1263. if (retval == CARRY_ON) {
  1264. do_balance(&tb, NULL, NULL, M_DELETE);
  1265. /*
  1266. * Should we count quota for item? (we don't
  1267. * count quotas for save-links)
  1268. */
  1269. if (inode) {
  1270. int depth;
  1271. #ifdef REISERQUOTA_DEBUG
  1272. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1273. "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
  1274. quota_cut_bytes, inode->i_uid,
  1275. key2type(key));
  1276. #endif
  1277. depth = reiserfs_write_unlock_nested(sb);
  1278. dquot_free_space_nodirty(inode,
  1279. quota_cut_bytes);
  1280. reiserfs_write_lock_nested(sb, depth);
  1281. }
  1282. break;
  1283. }
  1284. /* IO_ERROR, NO_DISK_SPACE, etc */
  1285. reiserfs_warning(th->t_super, "vs-5360",
  1286. "could not delete %K due to fix_nodes failure",
  1287. &cpu_key);
  1288. unfix_nodes(&tb);
  1289. break;
  1290. }
  1291. reiserfs_check_path(&path);
  1292. }
  1293. int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
  1294. struct inode *inode)
  1295. {
  1296. int err;
  1297. inode->i_size = 0;
  1298. BUG_ON(!th->t_trans_id);
  1299. /* for directory this deletes item containing "." and ".." */
  1300. err =
  1301. reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
  1302. if (err)
  1303. return err;
  1304. #if defined( USE_INODE_GENERATION_COUNTER )
  1305. if (!old_format_only(th->t_super)) {
  1306. __le32 *inode_generation;
  1307. inode_generation =
  1308. &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
  1309. le32_add_cpu(inode_generation, 1);
  1310. }
  1311. /* USE_INODE_GENERATION_COUNTER */
  1312. #endif
  1313. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1314. return err;
  1315. }
  1316. static void unmap_buffers(struct page *page, loff_t pos)
  1317. {
  1318. struct buffer_head *bh;
  1319. struct buffer_head *head;
  1320. struct buffer_head *next;
  1321. unsigned long tail_index;
  1322. unsigned long cur_index;
  1323. if (page) {
  1324. if (page_has_buffers(page)) {
  1325. tail_index = pos & (PAGE_SIZE - 1);
  1326. cur_index = 0;
  1327. head = page_buffers(page);
  1328. bh = head;
  1329. do {
  1330. next = bh->b_this_page;
  1331. /*
  1332. * we want to unmap the buffers that contain
  1333. * the tail, and all the buffers after it
  1334. * (since the tail must be at the end of the
  1335. * file). We don't want to unmap file data
  1336. * before the tail, since it might be dirty
  1337. * and waiting to reach disk
  1338. */
  1339. cur_index += bh->b_size;
  1340. if (cur_index > tail_index) {
  1341. reiserfs_unmap_buffer(bh);
  1342. }
  1343. bh = next;
  1344. } while (bh != head);
  1345. }
  1346. }
  1347. }
  1348. static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
  1349. struct inode *inode,
  1350. struct page *page,
  1351. struct treepath *path,
  1352. const struct cpu_key *item_key,
  1353. loff_t new_file_size, char *mode)
  1354. {
  1355. struct super_block *sb = inode->i_sb;
  1356. int block_size = sb->s_blocksize;
  1357. int cut_bytes;
  1358. BUG_ON(!th->t_trans_id);
  1359. BUG_ON(new_file_size != inode->i_size);
  1360. /*
  1361. * the page being sent in could be NULL if there was an i/o error
  1362. * reading in the last block. The user will hit problems trying to
  1363. * read the file, but for now we just skip the indirect2direct
  1364. */
  1365. if (atomic_read(&inode->i_count) > 1 ||
  1366. !tail_has_to_be_packed(inode) ||
  1367. !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
  1368. /* leave tail in an unformatted node */
  1369. *mode = M_SKIP_BALANCING;
  1370. cut_bytes =
  1371. block_size - (new_file_size & (block_size - 1));
  1372. pathrelse(path);
  1373. return cut_bytes;
  1374. }
  1375. /* Perform the conversion to a direct_item. */
  1376. return indirect2direct(th, inode, page, path, item_key,
  1377. new_file_size, mode);
  1378. }
  1379. /*
  1380. * we did indirect_to_direct conversion. And we have inserted direct
  1381. * item successesfully, but there were no disk space to cut unfm
  1382. * pointer being converted. Therefore we have to delete inserted
  1383. * direct item(s)
  1384. */
  1385. static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
  1386. struct inode *inode, struct treepath *path)
  1387. {
  1388. struct cpu_key tail_key;
  1389. int tail_len;
  1390. int removed;
  1391. BUG_ON(!th->t_trans_id);
  1392. make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
  1393. tail_key.key_length = 4;
  1394. tail_len =
  1395. (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
  1396. while (tail_len) {
  1397. /* look for the last byte of the tail */
  1398. if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
  1399. POSITION_NOT_FOUND)
  1400. reiserfs_panic(inode->i_sb, "vs-5615",
  1401. "found invalid item");
  1402. RFALSE(path->pos_in_item !=
  1403. ih_item_len(tp_item_head(path)) - 1,
  1404. "vs-5616: appended bytes found");
  1405. PATH_LAST_POSITION(path)--;
  1406. removed =
  1407. reiserfs_delete_item(th, path, &tail_key, inode,
  1408. NULL /*unbh not needed */ );
  1409. RFALSE(removed <= 0
  1410. || removed > tail_len,
  1411. "vs-5617: there was tail %d bytes, removed item length %d bytes",
  1412. tail_len, removed);
  1413. tail_len -= removed;
  1414. set_cpu_key_k_offset(&tail_key,
  1415. cpu_key_k_offset(&tail_key) - removed);
  1416. }
  1417. reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
  1418. "conversion has been rolled back due to "
  1419. "lack of disk space");
  1420. mark_inode_dirty(inode);
  1421. }
  1422. /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
  1423. int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
  1424. struct treepath *path,
  1425. struct cpu_key *item_key,
  1426. struct inode *inode,
  1427. struct page *page, loff_t new_file_size)
  1428. {
  1429. struct super_block *sb = inode->i_sb;
  1430. /*
  1431. * Every function which is going to call do_balance must first
  1432. * create a tree_balance structure. Then it must fill up this
  1433. * structure by using the init_tb_struct and fix_nodes functions.
  1434. * After that we can make tree balancing.
  1435. */
  1436. struct tree_balance s_cut_balance;
  1437. struct item_head *p_le_ih;
  1438. int cut_size = 0; /* Amount to be cut. */
  1439. int ret_value = CARRY_ON;
  1440. int removed = 0; /* Number of the removed unformatted nodes. */
  1441. int is_inode_locked = 0;
  1442. char mode; /* Mode of the balance. */
  1443. int retval2 = -1;
  1444. int quota_cut_bytes;
  1445. loff_t tail_pos = 0;
  1446. int depth;
  1447. BUG_ON(!th->t_trans_id);
  1448. init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
  1449. cut_size);
  1450. /*
  1451. * Repeat this loop until we either cut the item without needing
  1452. * to balance, or we fix_nodes without schedule occurring
  1453. */
  1454. while (1) {
  1455. /*
  1456. * Determine the balance mode, position of the first byte to
  1457. * be cut, and size to be cut. In case of the indirect item
  1458. * free unformatted nodes which are pointed to by the cut
  1459. * pointers.
  1460. */
  1461. mode =
  1462. prepare_for_delete_or_cut(th, inode, path,
  1463. item_key, &removed,
  1464. &cut_size, new_file_size);
  1465. if (mode == M_CONVERT) {
  1466. /*
  1467. * convert last unformatted node to direct item or
  1468. * leave tail in the unformatted node
  1469. */
  1470. RFALSE(ret_value != CARRY_ON,
  1471. "PAP-5570: can not convert twice");
  1472. ret_value =
  1473. maybe_indirect_to_direct(th, inode, page,
  1474. path, item_key,
  1475. new_file_size, &mode);
  1476. if (mode == M_SKIP_BALANCING)
  1477. /* tail has been left in the unformatted node */
  1478. return ret_value;
  1479. is_inode_locked = 1;
  1480. /*
  1481. * removing of last unformatted node will
  1482. * change value we have to return to truncate.
  1483. * Save it
  1484. */
  1485. retval2 = ret_value;
  1486. /*
  1487. * So, we have performed the first part of the
  1488. * conversion:
  1489. * inserting the new direct item. Now we are
  1490. * removing the last unformatted node pointer.
  1491. * Set key to search for it.
  1492. */
  1493. set_cpu_key_k_type(item_key, TYPE_INDIRECT);
  1494. item_key->key_length = 4;
  1495. new_file_size -=
  1496. (new_file_size & (sb->s_blocksize - 1));
  1497. tail_pos = new_file_size;
  1498. set_cpu_key_k_offset(item_key, new_file_size + 1);
  1499. if (search_for_position_by_key
  1500. (sb, item_key,
  1501. path) == POSITION_NOT_FOUND) {
  1502. print_block(PATH_PLAST_BUFFER(path), 3,
  1503. PATH_LAST_POSITION(path) - 1,
  1504. PATH_LAST_POSITION(path) + 1);
  1505. reiserfs_panic(sb, "PAP-5580", "item to "
  1506. "convert does not exist (%K)",
  1507. item_key);
  1508. }
  1509. continue;
  1510. }
  1511. if (cut_size == 0) {
  1512. pathrelse(path);
  1513. return 0;
  1514. }
  1515. s_cut_balance.insert_size[0] = cut_size;
  1516. ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
  1517. if (ret_value != REPEAT_SEARCH)
  1518. break;
  1519. PROC_INFO_INC(sb, cut_from_item_restarted);
  1520. ret_value =
  1521. search_for_position_by_key(sb, item_key, path);
  1522. if (ret_value == POSITION_FOUND)
  1523. continue;
  1524. reiserfs_warning(sb, "PAP-5610", "item %K not found",
  1525. item_key);
  1526. unfix_nodes(&s_cut_balance);
  1527. return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
  1528. } /* while */
  1529. /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
  1530. if (ret_value != CARRY_ON) {
  1531. if (is_inode_locked) {
  1532. /*
  1533. * FIXME: this seems to be not needed: we are always
  1534. * able to cut item
  1535. */
  1536. indirect_to_direct_roll_back(th, inode, path);
  1537. }
  1538. if (ret_value == NO_DISK_SPACE)
  1539. reiserfs_warning(sb, "reiserfs-5092",
  1540. "NO_DISK_SPACE");
  1541. unfix_nodes(&s_cut_balance);
  1542. return -EIO;
  1543. }
  1544. /* go ahead and perform balancing */
  1545. RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
  1546. /* Calculate number of bytes that need to be cut from the item. */
  1547. quota_cut_bytes =
  1548. (mode ==
  1549. M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
  1550. insert_size[0];
  1551. if (retval2 == -1)
  1552. ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
  1553. else
  1554. ret_value = retval2;
  1555. /*
  1556. * For direct items, we only change the quota when deleting the last
  1557. * item.
  1558. */
  1559. p_le_ih = tp_item_head(s_cut_balance.tb_path);
  1560. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
  1561. if (mode == M_DELETE &&
  1562. (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
  1563. 1) {
  1564. /* FIXME: this is to keep 3.5 happy */
  1565. REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
  1566. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1567. } else {
  1568. quota_cut_bytes = 0;
  1569. }
  1570. }
  1571. #ifdef CONFIG_REISERFS_CHECK
  1572. if (is_inode_locked) {
  1573. struct item_head *le_ih =
  1574. tp_item_head(s_cut_balance.tb_path);
  1575. /*
  1576. * we are going to complete indirect2direct conversion. Make
  1577. * sure, that we exactly remove last unformatted node pointer
  1578. * of the item
  1579. */
  1580. if (!is_indirect_le_ih(le_ih))
  1581. reiserfs_panic(sb, "vs-5652",
  1582. "item must be indirect %h", le_ih);
  1583. if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
  1584. reiserfs_panic(sb, "vs-5653", "completing "
  1585. "indirect2direct conversion indirect "
  1586. "item %h being deleted must be of "
  1587. "4 byte long", le_ih);
  1588. if (mode == M_CUT
  1589. && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
  1590. reiserfs_panic(sb, "vs-5654", "can not complete "
  1591. "indirect2direct conversion of %h "
  1592. "(CUT, insert_size==%d)",
  1593. le_ih, s_cut_balance.insert_size[0]);
  1594. }
  1595. /*
  1596. * it would be useful to make sure, that right neighboring
  1597. * item is direct item of this file
  1598. */
  1599. }
  1600. #endif
  1601. do_balance(&s_cut_balance, NULL, NULL, mode);
  1602. if (is_inode_locked) {
  1603. /*
  1604. * we've done an indirect->direct conversion. when the
  1605. * data block was freed, it was removed from the list of
  1606. * blocks that must be flushed before the transaction
  1607. * commits, make sure to unmap and invalidate it
  1608. */
  1609. unmap_buffers(page, tail_pos);
  1610. REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
  1611. }
  1612. #ifdef REISERQUOTA_DEBUG
  1613. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1614. "reiserquota cut_from_item(): freeing %u id=%u type=%c",
  1615. quota_cut_bytes, inode->i_uid, '?');
  1616. #endif
  1617. depth = reiserfs_write_unlock_nested(sb);
  1618. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1619. reiserfs_write_lock_nested(sb, depth);
  1620. return ret_value;
  1621. }
  1622. static void truncate_directory(struct reiserfs_transaction_handle *th,
  1623. struct inode *inode)
  1624. {
  1625. BUG_ON(!th->t_trans_id);
  1626. if (inode->i_nlink)
  1627. reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
  1628. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
  1629. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
  1630. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1631. reiserfs_update_sd(th, inode);
  1632. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
  1633. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
  1634. }
  1635. /*
  1636. * Truncate file to the new size. Note, this must be called with a
  1637. * transaction already started
  1638. */
  1639. int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
  1640. struct inode *inode, /* ->i_size contains new size */
  1641. struct page *page, /* up to date for last block */
  1642. /*
  1643. * when it is called by file_release to convert
  1644. * the tail - no timestamps should be updated
  1645. */
  1646. int update_timestamps
  1647. )
  1648. {
  1649. INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
  1650. struct item_head *p_le_ih; /* Pointer to an item header. */
  1651. /* Key to search for a previous file item. */
  1652. struct cpu_key s_item_key;
  1653. loff_t file_size, /* Old file size. */
  1654. new_file_size; /* New file size. */
  1655. int deleted; /* Number of deleted or truncated bytes. */
  1656. int retval;
  1657. int err = 0;
  1658. BUG_ON(!th->t_trans_id);
  1659. if (!
  1660. (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
  1661. || S_ISLNK(inode->i_mode)))
  1662. return 0;
  1663. /* deletion of directory - no need to update timestamps */
  1664. if (S_ISDIR(inode->i_mode)) {
  1665. truncate_directory(th, inode);
  1666. return 0;
  1667. }
  1668. /* Get new file size. */
  1669. new_file_size = inode->i_size;
  1670. /* FIXME: note, that key type is unimportant here */
  1671. make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
  1672. TYPE_DIRECT, 3);
  1673. retval =
  1674. search_for_position_by_key(inode->i_sb, &s_item_key,
  1675. &s_search_path);
  1676. if (retval == IO_ERROR) {
  1677. reiserfs_error(inode->i_sb, "vs-5657",
  1678. "i/o failure occurred trying to truncate %K",
  1679. &s_item_key);
  1680. err = -EIO;
  1681. goto out;
  1682. }
  1683. if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
  1684. reiserfs_error(inode->i_sb, "PAP-5660",
  1685. "wrong result %d of search for %K", retval,
  1686. &s_item_key);
  1687. err = -EIO;
  1688. goto out;
  1689. }
  1690. s_search_path.pos_in_item--;
  1691. /* Get real file size (total length of all file items) */
  1692. p_le_ih = tp_item_head(&s_search_path);
  1693. if (is_statdata_le_ih(p_le_ih))
  1694. file_size = 0;
  1695. else {
  1696. loff_t offset = le_ih_k_offset(p_le_ih);
  1697. int bytes =
  1698. op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
  1699. /*
  1700. * this may mismatch with real file size: if last direct item
  1701. * had no padding zeros and last unformatted node had no free
  1702. * space, this file would have this file size
  1703. */
  1704. file_size = offset + bytes - 1;
  1705. }
  1706. /*
  1707. * are we doing a full truncate or delete, if so
  1708. * kick in the reada code
  1709. */
  1710. if (new_file_size == 0)
  1711. s_search_path.reada = PATH_READA | PATH_READA_BACK;
  1712. if (file_size == 0 || file_size < new_file_size) {
  1713. goto update_and_out;
  1714. }
  1715. /* Update key to search for the last file item. */
  1716. set_cpu_key_k_offset(&s_item_key, file_size);
  1717. do {
  1718. /* Cut or delete file item. */
  1719. deleted =
  1720. reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
  1721. inode, page, new_file_size);
  1722. if (deleted < 0) {
  1723. reiserfs_warning(inode->i_sb, "vs-5665",
  1724. "reiserfs_cut_from_item failed");
  1725. reiserfs_check_path(&s_search_path);
  1726. return 0;
  1727. }
  1728. RFALSE(deleted > file_size,
  1729. "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
  1730. deleted, file_size, &s_item_key);
  1731. /* Change key to search the last file item. */
  1732. file_size -= deleted;
  1733. set_cpu_key_k_offset(&s_item_key, file_size);
  1734. /*
  1735. * While there are bytes to truncate and previous
  1736. * file item is presented in the tree.
  1737. */
  1738. /*
  1739. * This loop could take a really long time, and could log
  1740. * many more blocks than a transaction can hold. So, we do
  1741. * a polite journal end here, and if the transaction needs
  1742. * ending, we make sure the file is consistent before ending
  1743. * the current trans and starting a new one
  1744. */
  1745. if (journal_transaction_should_end(th, 0) ||
  1746. reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
  1747. pathrelse(&s_search_path);
  1748. if (update_timestamps) {
  1749. inode->i_mtime = current_time(inode);
  1750. inode->i_ctime = current_time(inode);
  1751. }
  1752. reiserfs_update_sd(th, inode);
  1753. err = journal_end(th);
  1754. if (err)
  1755. goto out;
  1756. err = journal_begin(th, inode->i_sb,
  1757. JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
  1758. if (err)
  1759. goto out;
  1760. reiserfs_update_inode_transaction(inode);
  1761. }
  1762. } while (file_size > ROUND_UP(new_file_size) &&
  1763. search_for_position_by_key(inode->i_sb, &s_item_key,
  1764. &s_search_path) == POSITION_FOUND);
  1765. RFALSE(file_size > ROUND_UP(new_file_size),
  1766. "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
  1767. new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
  1768. update_and_out:
  1769. if (update_timestamps) {
  1770. /* this is truncate, not file closing */
  1771. inode->i_mtime = current_time(inode);
  1772. inode->i_ctime = current_time(inode);
  1773. }
  1774. reiserfs_update_sd(th, inode);
  1775. out:
  1776. pathrelse(&s_search_path);
  1777. return err;
  1778. }
  1779. #ifdef CONFIG_REISERFS_CHECK
  1780. /* this makes sure, that we __append__, not overwrite or add holes */
  1781. static void check_research_for_paste(struct treepath *path,
  1782. const struct cpu_key *key)
  1783. {
  1784. struct item_head *found_ih = tp_item_head(path);
  1785. if (is_direct_le_ih(found_ih)) {
  1786. if (le_ih_k_offset(found_ih) +
  1787. op_bytes_number(found_ih,
  1788. get_last_bh(path)->b_size) !=
  1789. cpu_key_k_offset(key)
  1790. || op_bytes_number(found_ih,
  1791. get_last_bh(path)->b_size) !=
  1792. pos_in_item(path))
  1793. reiserfs_panic(NULL, "PAP-5720", "found direct item "
  1794. "%h or position (%d) does not match "
  1795. "to key %K", found_ih,
  1796. pos_in_item(path), key);
  1797. }
  1798. if (is_indirect_le_ih(found_ih)) {
  1799. if (le_ih_k_offset(found_ih) +
  1800. op_bytes_number(found_ih,
  1801. get_last_bh(path)->b_size) !=
  1802. cpu_key_k_offset(key)
  1803. || I_UNFM_NUM(found_ih) != pos_in_item(path)
  1804. || get_ih_free_space(found_ih) != 0)
  1805. reiserfs_panic(NULL, "PAP-5730", "found indirect "
  1806. "item (%h) or position (%d) does not "
  1807. "match to key (%K)",
  1808. found_ih, pos_in_item(path), key);
  1809. }
  1810. }
  1811. #endif /* config reiserfs check */
  1812. /*
  1813. * Paste bytes to the existing item.
  1814. * Returns bytes number pasted into the item.
  1815. */
  1816. int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
  1817. /* Path to the pasted item. */
  1818. struct treepath *search_path,
  1819. /* Key to search for the needed item. */
  1820. const struct cpu_key *key,
  1821. /* Inode item belongs to */
  1822. struct inode *inode,
  1823. /* Pointer to the bytes to paste. */
  1824. const char *body,
  1825. /* Size of pasted bytes. */
  1826. int pasted_size)
  1827. {
  1828. struct super_block *sb = inode->i_sb;
  1829. struct tree_balance s_paste_balance;
  1830. int retval;
  1831. int fs_gen;
  1832. int depth;
  1833. BUG_ON(!th->t_trans_id);
  1834. fs_gen = get_generation(inode->i_sb);
  1835. #ifdef REISERQUOTA_DEBUG
  1836. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1837. "reiserquota paste_into_item(): allocating %u id=%u type=%c",
  1838. pasted_size, inode->i_uid,
  1839. key2type(&key->on_disk_key));
  1840. #endif
  1841. depth = reiserfs_write_unlock_nested(sb);
  1842. retval = dquot_alloc_space_nodirty(inode, pasted_size);
  1843. reiserfs_write_lock_nested(sb, depth);
  1844. if (retval) {
  1845. pathrelse(search_path);
  1846. return retval;
  1847. }
  1848. init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
  1849. pasted_size);
  1850. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1851. s_paste_balance.key = key->on_disk_key;
  1852. #endif
  1853. /* DQUOT_* can schedule, must check before the fix_nodes */
  1854. if (fs_changed(fs_gen, inode->i_sb)) {
  1855. goto search_again;
  1856. }
  1857. while ((retval =
  1858. fix_nodes(M_PASTE, &s_paste_balance, NULL,
  1859. body)) == REPEAT_SEARCH) {
  1860. search_again:
  1861. /* file system changed while we were in the fix_nodes */
  1862. PROC_INFO_INC(th->t_super, paste_into_item_restarted);
  1863. retval =
  1864. search_for_position_by_key(th->t_super, key,
  1865. search_path);
  1866. if (retval == IO_ERROR) {
  1867. retval = -EIO;
  1868. goto error_out;
  1869. }
  1870. if (retval == POSITION_FOUND) {
  1871. reiserfs_warning(inode->i_sb, "PAP-5710",
  1872. "entry or pasted byte (%K) exists",
  1873. key);
  1874. retval = -EEXIST;
  1875. goto error_out;
  1876. }
  1877. #ifdef CONFIG_REISERFS_CHECK
  1878. check_research_for_paste(search_path, key);
  1879. #endif
  1880. }
  1881. /*
  1882. * Perform balancing after all resources are collected by fix_nodes,
  1883. * and accessing them will not risk triggering schedule.
  1884. */
  1885. if (retval == CARRY_ON) {
  1886. do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
  1887. return 0;
  1888. }
  1889. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1890. error_out:
  1891. /* this also releases the path */
  1892. unfix_nodes(&s_paste_balance);
  1893. #ifdef REISERQUOTA_DEBUG
  1894. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1895. "reiserquota paste_into_item(): freeing %u id=%u type=%c",
  1896. pasted_size, inode->i_uid,
  1897. key2type(&key->on_disk_key));
  1898. #endif
  1899. depth = reiserfs_write_unlock_nested(sb);
  1900. dquot_free_space_nodirty(inode, pasted_size);
  1901. reiserfs_write_lock_nested(sb, depth);
  1902. return retval;
  1903. }
  1904. /*
  1905. * Insert new item into the buffer at the path.
  1906. * th - active transaction handle
  1907. * path - path to the inserted item
  1908. * ih - pointer to the item header to insert
  1909. * body - pointer to the bytes to insert
  1910. */
  1911. int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
  1912. struct treepath *path, const struct cpu_key *key,
  1913. struct item_head *ih, struct inode *inode,
  1914. const char *body)
  1915. {
  1916. struct tree_balance s_ins_balance;
  1917. int retval;
  1918. int fs_gen = 0;
  1919. int quota_bytes = 0;
  1920. BUG_ON(!th->t_trans_id);
  1921. if (inode) { /* Do we count quotas for item? */
  1922. int depth;
  1923. fs_gen = get_generation(inode->i_sb);
  1924. quota_bytes = ih_item_len(ih);
  1925. /*
  1926. * hack so the quota code doesn't have to guess
  1927. * if the file has a tail, links are always tails,
  1928. * so there's no guessing needed
  1929. */
  1930. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
  1931. quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
  1932. #ifdef REISERQUOTA_DEBUG
  1933. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1934. "reiserquota insert_item(): allocating %u id=%u type=%c",
  1935. quota_bytes, inode->i_uid, head2type(ih));
  1936. #endif
  1937. /*
  1938. * We can't dirty inode here. It would be immediately
  1939. * written but appropriate stat item isn't inserted yet...
  1940. */
  1941. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1942. retval = dquot_alloc_space_nodirty(inode, quota_bytes);
  1943. reiserfs_write_lock_nested(inode->i_sb, depth);
  1944. if (retval) {
  1945. pathrelse(path);
  1946. return retval;
  1947. }
  1948. }
  1949. init_tb_struct(th, &s_ins_balance, th->t_super, path,
  1950. IH_SIZE + ih_item_len(ih));
  1951. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1952. s_ins_balance.key = key->on_disk_key;
  1953. #endif
  1954. /*
  1955. * DQUOT_* can schedule, must check to be sure calling
  1956. * fix_nodes is safe
  1957. */
  1958. if (inode && fs_changed(fs_gen, inode->i_sb)) {
  1959. goto search_again;
  1960. }
  1961. while ((retval =
  1962. fix_nodes(M_INSERT, &s_ins_balance, ih,
  1963. body)) == REPEAT_SEARCH) {
  1964. search_again:
  1965. /* file system changed while we were in the fix_nodes */
  1966. PROC_INFO_INC(th->t_super, insert_item_restarted);
  1967. retval = search_item(th->t_super, key, path);
  1968. if (retval == IO_ERROR) {
  1969. retval = -EIO;
  1970. goto error_out;
  1971. }
  1972. if (retval == ITEM_FOUND) {
  1973. reiserfs_warning(th->t_super, "PAP-5760",
  1974. "key %K already exists in the tree",
  1975. key);
  1976. retval = -EEXIST;
  1977. goto error_out;
  1978. }
  1979. }
  1980. /* make balancing after all resources will be collected at a time */
  1981. if (retval == CARRY_ON) {
  1982. do_balance(&s_ins_balance, ih, body, M_INSERT);
  1983. return 0;
  1984. }
  1985. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1986. error_out:
  1987. /* also releases the path */
  1988. unfix_nodes(&s_ins_balance);
  1989. #ifdef REISERQUOTA_DEBUG
  1990. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1991. "reiserquota insert_item(): freeing %u id=%u type=%c",
  1992. quota_bytes, inode->i_uid, head2type(ih));
  1993. #endif
  1994. if (inode) {
  1995. int depth = reiserfs_write_unlock_nested(inode->i_sb);
  1996. dquot_free_space_nodirty(inode, quota_bytes);
  1997. reiserfs_write_lock_nested(inode->i_sb, depth);
  1998. }
  1999. return retval;
  2000. }