shrinker.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /*
  2. * This file is part of UBIFS.
  3. *
  4. * Copyright (C) 2006-2008 Nokia Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License version 2 as published by
  8. * the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but WITHOUT
  11. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. * more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along with
  16. * this program; if not, write to the Free Software Foundation, Inc., 51
  17. * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18. *
  19. * Authors: Artem Bityutskiy (Битюцкий Артём)
  20. * Adrian Hunter
  21. */
  22. /*
  23. * This file implements UBIFS shrinker which evicts clean znodes from the TNC
  24. * tree when Linux VM needs more RAM.
  25. *
  26. * We do not implement any LRU lists to find oldest znodes to free because it
  27. * would add additional overhead to the file system fast paths. So the shrinker
  28. * just walks the TNC tree when searching for znodes to free.
  29. *
  30. * If the root of a TNC sub-tree is clean and old enough, then the children are
  31. * also clean and old enough. So the shrinker walks the TNC in level order and
  32. * dumps entire sub-trees.
  33. *
  34. * The age of znodes is just the time-stamp when they were last looked at.
  35. * The current shrinker first tries to evict old znodes, then young ones.
  36. *
  37. * Since the shrinker is global, it has to protect against races with FS
  38. * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
  39. */
  40. #include "ubifs.h"
  41. /* List of all UBIFS file-system instances */
  42. LIST_HEAD(ubifs_infos);
  43. /*
  44. * We number each shrinker run and record the number on the ubifs_info structure
  45. * so that we can easily work out which ubifs_info structures have already been
  46. * done by the current run.
  47. */
  48. static unsigned int shrinker_run_no;
  49. /* Protects 'ubifs_infos' list */
  50. DEFINE_SPINLOCK(ubifs_infos_lock);
  51. /* Global clean znode counter (for all mounted UBIFS instances) */
  52. atomic_long_t ubifs_clean_zn_cnt;
  53. /**
  54. * shrink_tnc - shrink TNC tree.
  55. * @c: UBIFS file-system description object
  56. * @nr: number of znodes to free
  57. * @age: the age of znodes to free
  58. * @contention: if any contention, this is set to %1
  59. *
  60. * This function traverses TNC tree and frees clean znodes. It does not free
  61. * clean znodes which younger then @age. Returns number of freed znodes.
  62. */
  63. static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
  64. {
  65. int total_freed = 0;
  66. struct ubifs_znode *znode, *zprev;
  67. int time = get_seconds();
  68. ubifs_assert(mutex_is_locked(&c->umount_mutex));
  69. ubifs_assert(mutex_is_locked(&c->tnc_mutex));
  70. if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
  71. return 0;
  72. /*
  73. * Traverse the TNC tree in levelorder manner, so that it is possible
  74. * to destroy large sub-trees. Indeed, if a znode is old, then all its
  75. * children are older or of the same age.
  76. *
  77. * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
  78. * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
  79. * changed only when the 'c->tnc_mutex' is held.
  80. */
  81. zprev = NULL;
  82. znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
  83. while (znode && total_freed < nr &&
  84. atomic_long_read(&c->clean_zn_cnt) > 0) {
  85. int freed;
  86. /*
  87. * If the znode is clean, but it is in the 'c->cnext' list, this
  88. * means that this znode has just been written to flash as a
  89. * part of commit and was marked clean. They will be removed
  90. * from the list at end commit. We cannot change the list,
  91. * because it is not protected by any mutex (design decision to
  92. * make commit really independent and parallel to main I/O). So
  93. * we just skip these znodes.
  94. *
  95. * Note, the 'clean_zn_cnt' counters are not updated until
  96. * after the commit, so the UBIFS shrinker does not report
  97. * the znodes which are in the 'c->cnext' list as freeable.
  98. *
  99. * Also note, if the root of a sub-tree is not in 'c->cnext',
  100. * then the whole sub-tree is not in 'c->cnext' as well, so it
  101. * is safe to dump whole sub-tree.
  102. */
  103. if (znode->cnext) {
  104. /*
  105. * Very soon these znodes will be removed from the list
  106. * and become freeable.
  107. */
  108. *contention = 1;
  109. } else if (!ubifs_zn_dirty(znode) &&
  110. abs(time - znode->time) >= age) {
  111. if (znode->parent)
  112. znode->parent->zbranch[znode->iip].znode = NULL;
  113. else
  114. c->zroot.znode = NULL;
  115. freed = ubifs_destroy_tnc_subtree(znode);
  116. atomic_long_sub(freed, &ubifs_clean_zn_cnt);
  117. atomic_long_sub(freed, &c->clean_zn_cnt);
  118. total_freed += freed;
  119. znode = zprev;
  120. }
  121. if (unlikely(!c->zroot.znode))
  122. break;
  123. zprev = znode;
  124. znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
  125. cond_resched();
  126. }
  127. return total_freed;
  128. }
  129. /**
  130. * shrink_tnc_trees - shrink UBIFS TNC trees.
  131. * @nr: number of znodes to free
  132. * @age: the age of znodes to free
  133. * @contention: if any contention, this is set to %1
  134. *
  135. * This function walks the list of mounted UBIFS file-systems and frees clean
  136. * znodes which are older than @age, until at least @nr znodes are freed.
  137. * Returns the number of freed znodes.
  138. */
  139. static int shrink_tnc_trees(int nr, int age, int *contention)
  140. {
  141. struct ubifs_info *c;
  142. struct list_head *p;
  143. unsigned int run_no;
  144. int freed = 0;
  145. spin_lock(&ubifs_infos_lock);
  146. do {
  147. run_no = ++shrinker_run_no;
  148. } while (run_no == 0);
  149. /* Iterate over all mounted UBIFS file-systems and try to shrink them */
  150. p = ubifs_infos.next;
  151. while (p != &ubifs_infos) {
  152. c = list_entry(p, struct ubifs_info, infos_list);
  153. /*
  154. * We move the ones we do to the end of the list, so we stop
  155. * when we see one we have already done.
  156. */
  157. if (c->shrinker_run_no == run_no)
  158. break;
  159. if (!mutex_trylock(&c->umount_mutex)) {
  160. /* Some un-mount is in progress, try next FS */
  161. *contention = 1;
  162. p = p->next;
  163. continue;
  164. }
  165. /*
  166. * We're holding 'c->umount_mutex', so the file-system won't go
  167. * away.
  168. */
  169. if (!mutex_trylock(&c->tnc_mutex)) {
  170. mutex_unlock(&c->umount_mutex);
  171. *contention = 1;
  172. p = p->next;
  173. continue;
  174. }
  175. spin_unlock(&ubifs_infos_lock);
  176. /*
  177. * OK, now we have TNC locked, the file-system cannot go away -
  178. * it is safe to reap the cache.
  179. */
  180. c->shrinker_run_no = run_no;
  181. freed += shrink_tnc(c, nr, age, contention);
  182. mutex_unlock(&c->tnc_mutex);
  183. spin_lock(&ubifs_infos_lock);
  184. /* Get the next list element before we move this one */
  185. p = p->next;
  186. /*
  187. * Move this one to the end of the list to provide some
  188. * fairness.
  189. */
  190. list_move_tail(&c->infos_list, &ubifs_infos);
  191. mutex_unlock(&c->umount_mutex);
  192. if (freed >= nr)
  193. break;
  194. }
  195. spin_unlock(&ubifs_infos_lock);
  196. return freed;
  197. }
  198. /**
  199. * kick_a_thread - kick a background thread to start commit.
  200. *
  201. * This function kicks a background thread to start background commit. Returns
  202. * %-1 if a thread was kicked or there is another reason to assume the memory
  203. * will soon be freed or become freeable. If there are no dirty znodes, returns
  204. * %0.
  205. */
  206. static int kick_a_thread(void)
  207. {
  208. int i;
  209. struct ubifs_info *c;
  210. /*
  211. * Iterate over all mounted UBIFS file-systems and find out if there is
  212. * already an ongoing commit operation there. If no, then iterate for
  213. * the second time and initiate background commit.
  214. */
  215. spin_lock(&ubifs_infos_lock);
  216. for (i = 0; i < 2; i++) {
  217. list_for_each_entry(c, &ubifs_infos, infos_list) {
  218. long dirty_zn_cnt;
  219. if (!mutex_trylock(&c->umount_mutex)) {
  220. /*
  221. * Some un-mount is in progress, it will
  222. * certainly free memory, so just return.
  223. */
  224. spin_unlock(&ubifs_infos_lock);
  225. return -1;
  226. }
  227. dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
  228. if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
  229. c->ro_mount || c->ro_error) {
  230. mutex_unlock(&c->umount_mutex);
  231. continue;
  232. }
  233. if (c->cmt_state != COMMIT_RESTING) {
  234. spin_unlock(&ubifs_infos_lock);
  235. mutex_unlock(&c->umount_mutex);
  236. return -1;
  237. }
  238. if (i == 1) {
  239. list_move_tail(&c->infos_list, &ubifs_infos);
  240. spin_unlock(&ubifs_infos_lock);
  241. ubifs_request_bg_commit(c);
  242. mutex_unlock(&c->umount_mutex);
  243. return -1;
  244. }
  245. mutex_unlock(&c->umount_mutex);
  246. }
  247. }
  248. spin_unlock(&ubifs_infos_lock);
  249. return 0;
  250. }
  251. unsigned long ubifs_shrink_count(struct shrinker *shrink,
  252. struct shrink_control *sc)
  253. {
  254. long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
  255. /*
  256. * Due to the way UBIFS updates the clean znode counter it may
  257. * temporarily be negative.
  258. */
  259. return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
  260. }
  261. unsigned long ubifs_shrink_scan(struct shrinker *shrink,
  262. struct shrink_control *sc)
  263. {
  264. unsigned long nr = sc->nr_to_scan;
  265. int contention = 0;
  266. unsigned long freed;
  267. long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
  268. if (!clean_zn_cnt) {
  269. /*
  270. * No clean znodes, nothing to reap. All we can do in this case
  271. * is to kick background threads to start commit, which will
  272. * probably make clean znodes which, in turn, will be freeable.
  273. * And we return -1 which means will make VM call us again
  274. * later.
  275. */
  276. dbg_tnc("no clean znodes, kick a thread");
  277. return kick_a_thread();
  278. }
  279. freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
  280. if (freed >= nr)
  281. goto out;
  282. dbg_tnc("not enough old znodes, try to free young ones");
  283. freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
  284. if (freed >= nr)
  285. goto out;
  286. dbg_tnc("not enough young znodes, free all");
  287. freed += shrink_tnc_trees(nr - freed, 0, &contention);
  288. if (!freed && contention) {
  289. dbg_tnc("freed nothing, but contention");
  290. return SHRINK_STOP;
  291. }
  292. out:
  293. dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
  294. return freed;
  295. }