gc.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. /*
  2. * fs/logfs/gc.c - garbage collection code
  3. *
  4. * As should be obvious for Linux kernel code, license is GPLv2
  5. *
  6. * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
  7. */
  8. #include "logfs.h"
  9. #include <linux/sched.h>
  10. #include <linux/slab.h>
  11. /*
  12. * Wear leveling needs to kick in when the difference between low erase
  13. * counts and high erase counts gets too big. A good value for "too big"
  14. * may be somewhat below 10% of maximum erase count for the device.
  15. * Why not 397, to pick a nice round number with no specific meaning? :)
  16. *
  17. * WL_RATELIMIT is the minimum time between two wear level events. A huge
  18. * number of segments may fulfil the requirements for wear leveling at the
  19. * same time. If that happens we don't want to cause a latency from hell,
  20. * but just gently pick one segment every so often and minimize overhead.
  21. */
  22. #define WL_DELTA 397
  23. #define WL_RATELIMIT 100
  24. #define MAX_OBJ_ALIASES 2600
  25. #define SCAN_RATIO 512 /* number of scanned segments per gc'd segment */
  26. #define LIST_SIZE 64 /* base size of candidate lists */
  27. #define SCAN_ROUNDS 128 /* maximum number of complete medium scans */
  28. #define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
  29. static int no_free_segments(struct super_block *sb)
  30. {
  31. struct logfs_super *super = logfs_super(sb);
  32. return super->s_free_list.count;
  33. }
  34. /* journal has distance -1, top-most ifile layer distance 0 */
  35. static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
  36. {
  37. struct logfs_super *super = logfs_super(sb);
  38. u8 gc_level = (__force u8)__gc_level;
  39. switch (gc_level) {
  40. case 0: /* fall through */
  41. case 1: /* fall through */
  42. case 2: /* fall through */
  43. case 3:
  44. /* file data or indirect blocks */
  45. return super->s_ifile_levels + super->s_iblock_levels - gc_level;
  46. case 6: /* fall through */
  47. case 7: /* fall through */
  48. case 8: /* fall through */
  49. case 9:
  50. /* inode file data or indirect blocks */
  51. return super->s_ifile_levels - (gc_level - 6);
  52. default:
  53. printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
  54. gc_level);
  55. WARN_ON(1);
  56. return super->s_ifile_levels + super->s_iblock_levels;
  57. }
  58. }
  59. static int segment_is_reserved(struct super_block *sb, u32 segno)
  60. {
  61. struct logfs_super *super = logfs_super(sb);
  62. struct logfs_area *area;
  63. void *reserved;
  64. int i;
  65. /* Some segments are reserved. Just pretend they were all valid */
  66. reserved = btree_lookup32(&super->s_reserved_segments, segno);
  67. if (reserved)
  68. return 1;
  69. /* Currently open segments */
  70. for_each_area(i) {
  71. area = super->s_area[i];
  72. if (area->a_is_open && area->a_segno == segno)
  73. return 1;
  74. }
  75. return 0;
  76. }
  77. static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  78. {
  79. BUG();
  80. }
  81. /*
  82. * Returns the bytes consumed by valid objects in this segment. Object headers
  83. * are counted, the segment header is not.
  84. */
  85. static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
  86. gc_level_t *gc_level)
  87. {
  88. struct logfs_segment_entry se;
  89. u32 ec_level;
  90. logfs_get_segment_entry(sb, segno, &se);
  91. if (se.ec_level == cpu_to_be32(BADSEG) ||
  92. se.valid == cpu_to_be32(RESERVED))
  93. return RESERVED;
  94. ec_level = be32_to_cpu(se.ec_level);
  95. *ec = ec_level >> 4;
  96. *gc_level = GC_LEVEL(ec_level & 0xf);
  97. return be32_to_cpu(se.valid);
  98. }
  99. static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
  100. u64 bix, gc_level_t gc_level)
  101. {
  102. struct inode *inode;
  103. int err, cookie;
  104. inode = logfs_safe_iget(sb, ino, &cookie);
  105. err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
  106. BUG_ON(err);
  107. logfs_safe_iput(inode, cookie);
  108. }
  109. static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
  110. {
  111. struct logfs_super *super = logfs_super(sb);
  112. struct logfs_segment_header sh;
  113. struct logfs_object_header oh;
  114. u64 ofs, ino, bix;
  115. u32 seg_ofs, logical_segno, cleaned = 0;
  116. int err, len, valid;
  117. gc_level_t gc_level;
  118. LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
  119. btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
  120. err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
  121. BUG_ON(err);
  122. gc_level = GC_LEVEL(sh.level);
  123. logical_segno = be32_to_cpu(sh.segno);
  124. if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
  125. logfs_mark_segment_bad(sb, segno);
  126. cleaned = -1;
  127. goto out;
  128. }
  129. for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
  130. seg_ofs + sizeof(oh) < super->s_segsize; ) {
  131. ofs = dev_ofs(sb, logical_segno, seg_ofs);
  132. err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
  133. &oh);
  134. BUG_ON(err);
  135. if (!memchr_inv(&oh, 0xff, sizeof(oh)))
  136. break;
  137. if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
  138. logfs_mark_segment_bad(sb, segno);
  139. cleaned = super->s_segsize - 1;
  140. goto out;
  141. }
  142. ino = be64_to_cpu(oh.ino);
  143. bix = be64_to_cpu(oh.bix);
  144. len = sizeof(oh) + be16_to_cpu(oh.len);
  145. valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
  146. if (valid == 1) {
  147. logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
  148. cleaned += len;
  149. } else if (valid == 2) {
  150. /* Will be invalid upon journal commit */
  151. cleaned += len;
  152. }
  153. seg_ofs += len;
  154. }
  155. out:
  156. btree_remove32(&super->s_reserved_segments, segno);
  157. return cleaned;
  158. }
  159. static struct gc_candidate *add_list(struct gc_candidate *cand,
  160. struct candidate_list *list)
  161. {
  162. struct rb_node **p = &list->rb_tree.rb_node;
  163. struct rb_node *parent = NULL;
  164. struct gc_candidate *cur;
  165. int comp;
  166. cand->list = list;
  167. while (*p) {
  168. parent = *p;
  169. cur = rb_entry(parent, struct gc_candidate, rb_node);
  170. if (list->sort_by_ec)
  171. comp = cand->erase_count < cur->erase_count;
  172. else
  173. comp = cand->valid < cur->valid;
  174. if (comp)
  175. p = &parent->rb_left;
  176. else
  177. p = &parent->rb_right;
  178. }
  179. rb_link_node(&cand->rb_node, parent, p);
  180. rb_insert_color(&cand->rb_node, &list->rb_tree);
  181. if (list->count <= list->maxcount) {
  182. list->count++;
  183. return NULL;
  184. }
  185. cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
  186. rb_erase(&cand->rb_node, &list->rb_tree);
  187. cand->list = NULL;
  188. return cand;
  189. }
  190. static void remove_from_list(struct gc_candidate *cand)
  191. {
  192. struct candidate_list *list = cand->list;
  193. rb_erase(&cand->rb_node, &list->rb_tree);
  194. list->count--;
  195. }
  196. static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
  197. {
  198. struct logfs_super *super = logfs_super(sb);
  199. btree_remove32(&super->s_cand_tree, cand->segno);
  200. kfree(cand);
  201. }
  202. u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
  203. {
  204. struct gc_candidate *cand;
  205. u32 segno;
  206. BUG_ON(list->count == 0);
  207. cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
  208. remove_from_list(cand);
  209. segno = cand->segno;
  210. if (ec)
  211. *ec = cand->erase_count;
  212. free_candidate(sb, cand);
  213. return segno;
  214. }
  215. /*
  216. * We have several lists to manage segments with. The reserve_list is used to
  217. * deal with bad blocks. We try to keep the best (lowest ec) segments on this
  218. * list.
  219. * The free_list contains free segments for normal usage. It usually gets the
  220. * second pick after the reserve_list. But when the free_list is running short
  221. * it is more important to keep the free_list full than to keep a reserve.
  222. *
  223. * Segments that are not free are put onto a per-level low_list. If we have
  224. * to run garbage collection, we pick a candidate from there. All segments on
  225. * those lists should have at least some free space so GC will make progress.
  226. *
  227. * And last we have the ec_list, which is used to pick segments for wear
  228. * leveling.
  229. *
  230. * If all appropriate lists are full, we simply free the candidate and forget
  231. * about that segment for a while. We have better candidates for each purpose.
  232. */
  233. static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
  234. {
  235. struct logfs_super *super = logfs_super(sb);
  236. u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
  237. if (cand->valid == 0) {
  238. /* 100% free segments */
  239. log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
  240. cand->segno, cand->erase_count,
  241. dev_ofs(sb, cand->segno, 0));
  242. cand = add_list(cand, &super->s_reserve_list);
  243. if (cand) {
  244. log_gc_noisy("add free segment %x (ec %x) at %llx\n",
  245. cand->segno, cand->erase_count,
  246. dev_ofs(sb, cand->segno, 0));
  247. cand = add_list(cand, &super->s_free_list);
  248. }
  249. } else {
  250. /* good candidates for Garbage Collection */
  251. if (cand->valid < full)
  252. cand = add_list(cand, &super->s_low_list[cand->dist]);
  253. /* good candidates for wear leveling,
  254. * segments that were recently written get ignored */
  255. if (cand)
  256. cand = add_list(cand, &super->s_ec_list);
  257. }
  258. if (cand)
  259. free_candidate(sb, cand);
  260. }
  261. static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
  262. u8 dist)
  263. {
  264. struct logfs_super *super = logfs_super(sb);
  265. struct gc_candidate *cand;
  266. cand = kmalloc(sizeof(*cand), GFP_NOFS);
  267. if (!cand)
  268. return -ENOMEM;
  269. cand->segno = segno;
  270. cand->valid = valid;
  271. cand->erase_count = ec;
  272. cand->dist = dist;
  273. btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
  274. __add_candidate(sb, cand);
  275. return 0;
  276. }
  277. static void remove_segment_from_lists(struct super_block *sb, u32 segno)
  278. {
  279. struct logfs_super *super = logfs_super(sb);
  280. struct gc_candidate *cand;
  281. cand = btree_lookup32(&super->s_cand_tree, segno);
  282. if (cand) {
  283. remove_from_list(cand);
  284. free_candidate(sb, cand);
  285. }
  286. }
  287. static void scan_segment(struct super_block *sb, u32 segno)
  288. {
  289. u32 valid, ec = 0;
  290. gc_level_t gc_level = 0;
  291. u8 dist;
  292. if (segment_is_reserved(sb, segno))
  293. return;
  294. remove_segment_from_lists(sb, segno);
  295. valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
  296. if (valid == RESERVED)
  297. return;
  298. dist = root_distance(sb, gc_level);
  299. add_candidate(sb, segno, valid, ec, dist);
  300. }
  301. static struct gc_candidate *first_in_list(struct candidate_list *list)
  302. {
  303. if (list->count == 0)
  304. return NULL;
  305. return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
  306. }
  307. /*
  308. * Find the best segment for garbage collection. Main criterion is
  309. * the segment requiring the least effort to clean. Secondary
  310. * criterion is to GC on the lowest level available.
  311. *
  312. * So we search the least effort segment on the lowest level first,
  313. * then move up and pick another segment iff is requires significantly
  314. * less effort. Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
  315. */
  316. static struct gc_candidate *get_candidate(struct super_block *sb)
  317. {
  318. struct logfs_super *super = logfs_super(sb);
  319. int i, max_dist;
  320. struct gc_candidate *cand = NULL, *this;
  321. max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS - 1);
  322. for (i = max_dist; i >= 0; i--) {
  323. this = first_in_list(&super->s_low_list[i]);
  324. if (!this)
  325. continue;
  326. if (!cand)
  327. cand = this;
  328. if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
  329. cand = this;
  330. }
  331. return cand;
  332. }
  333. static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
  334. {
  335. struct logfs_super *super = logfs_super(sb);
  336. gc_level_t gc_level;
  337. u32 cleaned, valid, segno, ec;
  338. u8 dist;
  339. if (!cand) {
  340. log_gc("GC attempted, but no candidate found\n");
  341. return 0;
  342. }
  343. segno = cand->segno;
  344. dist = cand->dist;
  345. valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
  346. free_candidate(sb, cand);
  347. log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
  348. segno, (u64)segno << super->s_segshift,
  349. dist, no_free_segments(sb), valid,
  350. super->s_free_bytes);
  351. cleaned = logfs_gc_segment(sb, segno);
  352. log_gc("GC segment #%02x complete - now %x valid\n", segno,
  353. valid - cleaned);
  354. BUG_ON(cleaned != valid);
  355. return 1;
  356. }
  357. static int logfs_gc_once(struct super_block *sb)
  358. {
  359. struct gc_candidate *cand;
  360. cand = get_candidate(sb);
  361. if (cand)
  362. remove_from_list(cand);
  363. return __logfs_gc_once(sb, cand);
  364. }
  365. /* returns 1 if a wrap occurs, 0 otherwise */
  366. static int logfs_scan_some(struct super_block *sb)
  367. {
  368. struct logfs_super *super = logfs_super(sb);
  369. u32 segno;
  370. int i, ret = 0;
  371. segno = super->s_sweeper;
  372. for (i = SCAN_RATIO; i > 0; i--) {
  373. segno++;
  374. if (segno >= super->s_no_segs) {
  375. segno = 0;
  376. ret = 1;
  377. /* Break out of the loop. We want to read a single
  378. * block from the segment size on next invocation if
  379. * SCAN_RATIO is set to match block size
  380. */
  381. break;
  382. }
  383. scan_segment(sb, segno);
  384. }
  385. super->s_sweeper = segno;
  386. return ret;
  387. }
  388. /*
  389. * In principle, this function should loop forever, looking for GC candidates
  390. * and moving data. LogFS is designed in such a way that this loop is
  391. * guaranteed to terminate.
  392. *
  393. * Limiting the loop to some iterations serves purely to catch cases when
  394. * these guarantees have failed. An actual endless loop is an obvious bug
  395. * and should be reported as such.
  396. */
  397. static void __logfs_gc_pass(struct super_block *sb, int target)
  398. {
  399. struct logfs_super *super = logfs_super(sb);
  400. struct logfs_block *block;
  401. int round, progress, last_progress = 0;
  402. /*
  403. * Doing too many changes to the segfile at once would result
  404. * in a large number of aliases. Write the journal before
  405. * things get out of hand.
  406. */
  407. if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
  408. logfs_write_anchor(sb);
  409. if (no_free_segments(sb) >= target &&
  410. super->s_no_object_aliases < MAX_OBJ_ALIASES)
  411. return;
  412. log_gc("__logfs_gc_pass(%x)\n", target);
  413. for (round = 0; round < SCAN_ROUNDS; ) {
  414. if (no_free_segments(sb) >= target)
  415. goto write_alias;
  416. /* Sync in-memory state with on-medium state in case they
  417. * diverged */
  418. logfs_write_anchor(sb);
  419. round += logfs_scan_some(sb);
  420. if (no_free_segments(sb) >= target)
  421. goto write_alias;
  422. progress = logfs_gc_once(sb);
  423. if (progress)
  424. last_progress = round;
  425. else if (round - last_progress > 2)
  426. break;
  427. continue;
  428. /*
  429. * The goto logic is nasty, I just don't know a better way to
  430. * code it. GC is supposed to ensure two things:
  431. * 1. Enough free segments are available.
  432. * 2. The number of aliases is bounded.
  433. * When 1. is achieved, we take a look at 2. and write back
  434. * some alias-containing blocks, if necessary. However, after
  435. * each such write we need to go back to 1., as writes can
  436. * consume free segments.
  437. */
  438. write_alias:
  439. if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
  440. return;
  441. if (list_empty(&super->s_object_alias)) {
  442. /* All aliases are still in btree */
  443. return;
  444. }
  445. log_gc("Write back one alias\n");
  446. block = list_entry(super->s_object_alias.next,
  447. struct logfs_block, alias_list);
  448. block->ops->write_block(block);
  449. /*
  450. * To round off the nasty goto logic, we reset round here. It
  451. * is a safety-net for GC not making any progress and limited
  452. * to something reasonably small. If incremented it for every
  453. * single alias, the loop could terminate rather quickly.
  454. */
  455. round = 0;
  456. }
  457. LOGFS_BUG(sb);
  458. }
  459. static int wl_ratelimit(struct super_block *sb, u64 *next_event)
  460. {
  461. struct logfs_super *super = logfs_super(sb);
  462. if (*next_event < super->s_gec) {
  463. *next_event = super->s_gec + WL_RATELIMIT;
  464. return 0;
  465. }
  466. return 1;
  467. }
  468. static void logfs_wl_pass(struct super_block *sb)
  469. {
  470. struct logfs_super *super = logfs_super(sb);
  471. struct gc_candidate *wl_cand, *free_cand;
  472. if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
  473. return;
  474. wl_cand = first_in_list(&super->s_ec_list);
  475. if (!wl_cand)
  476. return;
  477. free_cand = first_in_list(&super->s_free_list);
  478. if (!free_cand)
  479. return;
  480. if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
  481. remove_from_list(wl_cand);
  482. __logfs_gc_once(sb, wl_cand);
  483. }
  484. }
  485. /*
  486. * The journal needs wear leveling as well. But moving the journal is an
  487. * expensive operation so we try to avoid it as much as possible. And if we
  488. * have to do it, we move the whole journal, not individual segments.
  489. *
  490. * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
  491. * calculations. First we check whether moving the journal would be a
  492. * significant improvement. That means that a) the current journal segments
  493. * have more wear than the future journal segments and b) the current journal
  494. * segments have more wear than normal ostore segments.
  495. * Rationale for b) is that we don't have to move the journal if it is aging
  496. * less than the ostore, even if the reserve segments age even less (they are
  497. * excluded from wear leveling, after all).
  498. * Next we check that the superblocks have less wear than the journal. Since
  499. * moving the journal requires writing the superblocks, we have to protect the
  500. * superblocks even more than the journal.
  501. *
  502. * Also we double the acceptable wear difference, compared to ostore wear
  503. * leveling. Journal data is read and rewritten rapidly, comparatively. So
  504. * soft errors have much less time to accumulate and we allow the journal to
  505. * be a bit worse than the ostore.
  506. */
  507. static void logfs_journal_wl_pass(struct super_block *sb)
  508. {
  509. struct logfs_super *super = logfs_super(sb);
  510. struct gc_candidate *cand;
  511. u32 min_journal_ec = -1, max_reserve_ec = 0;
  512. int i;
  513. if (wl_ratelimit(sb, &super->s_wl_gec_journal))
  514. return;
  515. if (super->s_reserve_list.count < super->s_no_journal_segs) {
  516. /* Reserve is not full enough to move complete journal */
  517. return;
  518. }
  519. journal_for_each(i)
  520. if (super->s_journal_seg[i])
  521. min_journal_ec = min(min_journal_ec,
  522. super->s_journal_ec[i]);
  523. cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
  524. struct gc_candidate, rb_node);
  525. max_reserve_ec = cand->erase_count;
  526. for (i = 0; i < 2; i++) {
  527. struct logfs_segment_entry se;
  528. u32 segno = seg_no(sb, super->s_sb_ofs[i]);
  529. u32 ec;
  530. logfs_get_segment_entry(sb, segno, &se);
  531. ec = be32_to_cpu(se.ec_level) >> 4;
  532. max_reserve_ec = max(max_reserve_ec, ec);
  533. }
  534. if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
  535. do_logfs_journal_wl_pass(sb);
  536. }
  537. }
  538. void logfs_gc_pass(struct super_block *sb)
  539. {
  540. struct logfs_super *super = logfs_super(sb);
  541. //BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
  542. /* Write journal before free space is getting saturated with dirty
  543. * objects.
  544. */
  545. if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
  546. + LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
  547. logfs_write_anchor(sb);
  548. __logfs_gc_pass(sb, super->s_total_levels);
  549. logfs_wl_pass(sb);
  550. logfs_journal_wl_pass(sb);
  551. }
  552. static int check_area(struct super_block *sb, int i)
  553. {
  554. struct logfs_super *super = logfs_super(sb);
  555. struct logfs_area *area = super->s_area[i];
  556. gc_level_t gc_level;
  557. u32 cleaned, valid, ec;
  558. u32 segno = area->a_segno;
  559. u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
  560. if (!area->a_is_open)
  561. return 0;
  562. if (super->s_devops->can_write_buf(sb, ofs) == 0)
  563. return 0;
  564. printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
  565. /*
  566. * The device cannot write back the write buffer. Most likely the
  567. * wbuf was already written out and the system crashed at some point
  568. * before the journal commit happened. In that case we wouldn't have
  569. * to do anything. But if the crash happened before the wbuf was
  570. * written out correctly, we must GC this segment. So assume the
  571. * worst and always do the GC run.
  572. */
  573. area->a_is_open = 0;
  574. valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
  575. cleaned = logfs_gc_segment(sb, segno);
  576. if (cleaned != valid)
  577. return -EIO;
  578. return 0;
  579. }
  580. int logfs_check_areas(struct super_block *sb)
  581. {
  582. int i, err;
  583. for_each_area(i) {
  584. err = check_area(sb, i);
  585. if (err)
  586. return err;
  587. }
  588. return 0;
  589. }
  590. static void logfs_init_candlist(struct candidate_list *list, int maxcount,
  591. int sort_by_ec)
  592. {
  593. list->count = 0;
  594. list->maxcount = maxcount;
  595. list->sort_by_ec = sort_by_ec;
  596. list->rb_tree = RB_ROOT;
  597. }
  598. int logfs_init_gc(struct super_block *sb)
  599. {
  600. struct logfs_super *super = logfs_super(sb);
  601. int i;
  602. btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
  603. logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
  604. logfs_init_candlist(&super->s_reserve_list,
  605. super->s_bad_seg_reserve, 1);
  606. for_each_area(i)
  607. logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
  608. logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
  609. return 0;
  610. }
  611. static void logfs_cleanup_list(struct super_block *sb,
  612. struct candidate_list *list)
  613. {
  614. struct gc_candidate *cand;
  615. while (list->count) {
  616. cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
  617. rb_node);
  618. remove_from_list(cand);
  619. free_candidate(sb, cand);
  620. }
  621. BUG_ON(list->rb_tree.rb_node);
  622. }
  623. void logfs_cleanup_gc(struct super_block *sb)
  624. {
  625. struct logfs_super *super = logfs_super(sb);
  626. int i;
  627. if (!super->s_free_list.count)
  628. return;
  629. /*
  630. * FIXME: The btree may still contain a single empty node. So we
  631. * call the grim visitor to clean up that mess. Btree code should
  632. * do it for us, really.
  633. */
  634. btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
  635. logfs_cleanup_list(sb, &super->s_free_list);
  636. logfs_cleanup_list(sb, &super->s_reserve_list);
  637. for_each_area(i)
  638. logfs_cleanup_list(sb, &super->s_low_list[i]);
  639. logfs_cleanup_list(sb, &super->s_ec_list);
  640. }