dm-cache-policy-smq.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761
  1. /*
  2. * Copyright (C) 2015 Red Hat. All rights reserved.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-cache-policy.h"
  7. #include "dm-cache-policy-internal.h"
  8. #include "dm.h"
  9. #include <linux/hash.h>
  10. #include <linux/jiffies.h>
  11. #include <linux/module.h>
  12. #include <linux/mutex.h>
  13. #include <linux/vmalloc.h>
  14. #include <linux/math64.h>
  15. #define DM_MSG_PREFIX "cache-policy-smq"
  16. /*----------------------------------------------------------------*/
  17. /*
  18. * Safe division functions that return zero on divide by zero.
  19. */
  20. static unsigned safe_div(unsigned n, unsigned d)
  21. {
  22. return d ? n / d : 0u;
  23. }
  24. static unsigned safe_mod(unsigned n, unsigned d)
  25. {
  26. return d ? n % d : 0u;
  27. }
  28. /*----------------------------------------------------------------*/
  29. struct entry {
  30. unsigned hash_next:28;
  31. unsigned prev:28;
  32. unsigned next:28;
  33. unsigned level:7;
  34. bool dirty:1;
  35. bool allocated:1;
  36. bool sentinel:1;
  37. dm_oblock_t oblock;
  38. };
  39. /*----------------------------------------------------------------*/
  40. #define INDEXER_NULL ((1u << 28u) - 1u)
  41. /*
  42. * An entry_space manages a set of entries that we use for the queues.
  43. * The clean and dirty queues share entries, so this object is separate
  44. * from the queue itself.
  45. */
  46. struct entry_space {
  47. struct entry *begin;
  48. struct entry *end;
  49. };
  50. static int space_init(struct entry_space *es, unsigned nr_entries)
  51. {
  52. if (!nr_entries) {
  53. es->begin = es->end = NULL;
  54. return 0;
  55. }
  56. es->begin = vzalloc(sizeof(struct entry) * nr_entries);
  57. if (!es->begin)
  58. return -ENOMEM;
  59. es->end = es->begin + nr_entries;
  60. return 0;
  61. }
  62. static void space_exit(struct entry_space *es)
  63. {
  64. vfree(es->begin);
  65. }
  66. static struct entry *__get_entry(struct entry_space *es, unsigned block)
  67. {
  68. struct entry *e;
  69. e = es->begin + block;
  70. BUG_ON(e >= es->end);
  71. return e;
  72. }
  73. static unsigned to_index(struct entry_space *es, struct entry *e)
  74. {
  75. BUG_ON(e < es->begin || e >= es->end);
  76. return e - es->begin;
  77. }
  78. static struct entry *to_entry(struct entry_space *es, unsigned block)
  79. {
  80. if (block == INDEXER_NULL)
  81. return NULL;
  82. return __get_entry(es, block);
  83. }
  84. /*----------------------------------------------------------------*/
  85. struct ilist {
  86. unsigned nr_elts; /* excluding sentinel entries */
  87. unsigned head, tail;
  88. };
  89. static void l_init(struct ilist *l)
  90. {
  91. l->nr_elts = 0;
  92. l->head = l->tail = INDEXER_NULL;
  93. }
  94. static struct entry *l_head(struct entry_space *es, struct ilist *l)
  95. {
  96. return to_entry(es, l->head);
  97. }
  98. static struct entry *l_tail(struct entry_space *es, struct ilist *l)
  99. {
  100. return to_entry(es, l->tail);
  101. }
  102. static struct entry *l_next(struct entry_space *es, struct entry *e)
  103. {
  104. return to_entry(es, e->next);
  105. }
  106. static struct entry *l_prev(struct entry_space *es, struct entry *e)
  107. {
  108. return to_entry(es, e->prev);
  109. }
  110. static bool l_empty(struct ilist *l)
  111. {
  112. return l->head == INDEXER_NULL;
  113. }
  114. static void l_add_head(struct entry_space *es, struct ilist *l, struct entry *e)
  115. {
  116. struct entry *head = l_head(es, l);
  117. e->next = l->head;
  118. e->prev = INDEXER_NULL;
  119. if (head)
  120. head->prev = l->head = to_index(es, e);
  121. else
  122. l->head = l->tail = to_index(es, e);
  123. if (!e->sentinel)
  124. l->nr_elts++;
  125. }
  126. static void l_add_tail(struct entry_space *es, struct ilist *l, struct entry *e)
  127. {
  128. struct entry *tail = l_tail(es, l);
  129. e->next = INDEXER_NULL;
  130. e->prev = l->tail;
  131. if (tail)
  132. tail->next = l->tail = to_index(es, e);
  133. else
  134. l->head = l->tail = to_index(es, e);
  135. if (!e->sentinel)
  136. l->nr_elts++;
  137. }
  138. static void l_add_before(struct entry_space *es, struct ilist *l,
  139. struct entry *old, struct entry *e)
  140. {
  141. struct entry *prev = l_prev(es, old);
  142. if (!prev)
  143. l_add_head(es, l, e);
  144. else {
  145. e->prev = old->prev;
  146. e->next = to_index(es, old);
  147. prev->next = old->prev = to_index(es, e);
  148. if (!e->sentinel)
  149. l->nr_elts++;
  150. }
  151. }
  152. static void l_del(struct entry_space *es, struct ilist *l, struct entry *e)
  153. {
  154. struct entry *prev = l_prev(es, e);
  155. struct entry *next = l_next(es, e);
  156. if (prev)
  157. prev->next = e->next;
  158. else
  159. l->head = e->next;
  160. if (next)
  161. next->prev = e->prev;
  162. else
  163. l->tail = e->prev;
  164. if (!e->sentinel)
  165. l->nr_elts--;
  166. }
  167. static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l)
  168. {
  169. struct entry *e;
  170. for (e = l_tail(es, l); e; e = l_prev(es, e))
  171. if (!e->sentinel) {
  172. l_del(es, l, e);
  173. return e;
  174. }
  175. return NULL;
  176. }
  177. /*----------------------------------------------------------------*/
  178. /*
  179. * The stochastic-multi-queue is a set of lru lists stacked into levels.
  180. * Entries are moved up levels when they are used, which loosely orders the
  181. * most accessed entries in the top levels and least in the bottom. This
  182. * structure is *much* better than a single lru list.
  183. */
  184. #define MAX_LEVELS 64u
  185. struct queue {
  186. struct entry_space *es;
  187. unsigned nr_elts;
  188. unsigned nr_levels;
  189. struct ilist qs[MAX_LEVELS];
  190. /*
  191. * We maintain a count of the number of entries we would like in each
  192. * level.
  193. */
  194. unsigned last_target_nr_elts;
  195. unsigned nr_top_levels;
  196. unsigned nr_in_top_levels;
  197. unsigned target_count[MAX_LEVELS];
  198. };
  199. static void q_init(struct queue *q, struct entry_space *es, unsigned nr_levels)
  200. {
  201. unsigned i;
  202. q->es = es;
  203. q->nr_elts = 0;
  204. q->nr_levels = nr_levels;
  205. for (i = 0; i < q->nr_levels; i++) {
  206. l_init(q->qs + i);
  207. q->target_count[i] = 0u;
  208. }
  209. q->last_target_nr_elts = 0u;
  210. q->nr_top_levels = 0u;
  211. q->nr_in_top_levels = 0u;
  212. }
  213. static unsigned q_size(struct queue *q)
  214. {
  215. return q->nr_elts;
  216. }
  217. /*
  218. * Insert an entry to the back of the given level.
  219. */
  220. static void q_push(struct queue *q, struct entry *e)
  221. {
  222. if (!e->sentinel)
  223. q->nr_elts++;
  224. l_add_tail(q->es, q->qs + e->level, e);
  225. }
  226. static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
  227. {
  228. if (!e->sentinel)
  229. q->nr_elts++;
  230. l_add_before(q->es, q->qs + e->level, old, e);
  231. }
  232. static void q_del(struct queue *q, struct entry *e)
  233. {
  234. l_del(q->es, q->qs + e->level, e);
  235. if (!e->sentinel)
  236. q->nr_elts--;
  237. }
  238. /*
  239. * Return the oldest entry of the lowest populated level.
  240. */
  241. static struct entry *q_peek(struct queue *q, unsigned max_level, bool can_cross_sentinel)
  242. {
  243. unsigned level;
  244. struct entry *e;
  245. max_level = min(max_level, q->nr_levels);
  246. for (level = 0; level < max_level; level++)
  247. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
  248. if (e->sentinel) {
  249. if (can_cross_sentinel)
  250. continue;
  251. else
  252. break;
  253. }
  254. return e;
  255. }
  256. return NULL;
  257. }
  258. static struct entry *q_pop(struct queue *q)
  259. {
  260. struct entry *e = q_peek(q, q->nr_levels, true);
  261. if (e)
  262. q_del(q, e);
  263. return e;
  264. }
  265. /*
  266. * Pops an entry from a level that is not past a sentinel.
  267. */
  268. static struct entry *q_pop_old(struct queue *q, unsigned max_level)
  269. {
  270. struct entry *e = q_peek(q, max_level, false);
  271. if (e)
  272. q_del(q, e);
  273. return e;
  274. }
  275. /*
  276. * This function assumes there is a non-sentinel entry to pop. It's only
  277. * used by redistribute, so we know this is true. It also doesn't adjust
  278. * the q->nr_elts count.
  279. */
  280. static struct entry *__redist_pop_from(struct queue *q, unsigned level)
  281. {
  282. struct entry *e;
  283. for (; level < q->nr_levels; level++)
  284. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e))
  285. if (!e->sentinel) {
  286. l_del(q->es, q->qs + e->level, e);
  287. return e;
  288. }
  289. return NULL;
  290. }
  291. static void q_set_targets_subrange_(struct queue *q, unsigned nr_elts, unsigned lbegin, unsigned lend)
  292. {
  293. unsigned level, nr_levels, entries_per_level, remainder;
  294. BUG_ON(lbegin > lend);
  295. BUG_ON(lend > q->nr_levels);
  296. nr_levels = lend - lbegin;
  297. entries_per_level = safe_div(nr_elts, nr_levels);
  298. remainder = safe_mod(nr_elts, nr_levels);
  299. for (level = lbegin; level < lend; level++)
  300. q->target_count[level] =
  301. (level < (lbegin + remainder)) ? entries_per_level + 1u : entries_per_level;
  302. }
  303. /*
  304. * Typically we have fewer elements in the top few levels which allows us
  305. * to adjust the promote threshold nicely.
  306. */
  307. static void q_set_targets(struct queue *q)
  308. {
  309. if (q->last_target_nr_elts == q->nr_elts)
  310. return;
  311. q->last_target_nr_elts = q->nr_elts;
  312. if (q->nr_top_levels > q->nr_levels)
  313. q_set_targets_subrange_(q, q->nr_elts, 0, q->nr_levels);
  314. else {
  315. q_set_targets_subrange_(q, q->nr_in_top_levels,
  316. q->nr_levels - q->nr_top_levels, q->nr_levels);
  317. if (q->nr_in_top_levels < q->nr_elts)
  318. q_set_targets_subrange_(q, q->nr_elts - q->nr_in_top_levels,
  319. 0, q->nr_levels - q->nr_top_levels);
  320. else
  321. q_set_targets_subrange_(q, 0, 0, q->nr_levels - q->nr_top_levels);
  322. }
  323. }
  324. static void q_redistribute(struct queue *q)
  325. {
  326. unsigned target, level;
  327. struct ilist *l, *l_above;
  328. struct entry *e;
  329. q_set_targets(q);
  330. for (level = 0u; level < q->nr_levels - 1u; level++) {
  331. l = q->qs + level;
  332. target = q->target_count[level];
  333. /*
  334. * Pull down some entries from the level above.
  335. */
  336. while (l->nr_elts < target) {
  337. e = __redist_pop_from(q, level + 1u);
  338. if (!e) {
  339. /* bug in nr_elts */
  340. break;
  341. }
  342. e->level = level;
  343. l_add_tail(q->es, l, e);
  344. }
  345. /*
  346. * Push some entries up.
  347. */
  348. l_above = q->qs + level + 1u;
  349. while (l->nr_elts > target) {
  350. e = l_pop_tail(q->es, l);
  351. if (!e)
  352. /* bug in nr_elts */
  353. break;
  354. e->level = level + 1u;
  355. l_add_head(q->es, l_above, e);
  356. }
  357. }
  358. }
  359. static void q_requeue_before(struct queue *q, struct entry *dest, struct entry *e, unsigned extra_levels)
  360. {
  361. struct entry *de;
  362. unsigned new_level;
  363. q_del(q, e);
  364. if (extra_levels && (e->level < q->nr_levels - 1u)) {
  365. new_level = min(q->nr_levels - 1u, e->level + extra_levels);
  366. for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) {
  367. if (de->sentinel)
  368. continue;
  369. q_del(q, de);
  370. de->level = e->level;
  371. if (dest)
  372. q_push_before(q, dest, de);
  373. else
  374. q_push(q, de);
  375. break;
  376. }
  377. e->level = new_level;
  378. }
  379. q_push(q, e);
  380. }
  381. static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels)
  382. {
  383. q_requeue_before(q, NULL, e, extra_levels);
  384. }
  385. /*----------------------------------------------------------------*/
  386. #define FP_SHIFT 8
  387. #define SIXTEENTH (1u << (FP_SHIFT - 4u))
  388. #define EIGHTH (1u << (FP_SHIFT - 3u))
  389. struct stats {
  390. unsigned hit_threshold;
  391. unsigned hits;
  392. unsigned misses;
  393. };
  394. enum performance {
  395. Q_POOR,
  396. Q_FAIR,
  397. Q_WELL
  398. };
  399. static void stats_init(struct stats *s, unsigned nr_levels)
  400. {
  401. s->hit_threshold = (nr_levels * 3u) / 4u;
  402. s->hits = 0u;
  403. s->misses = 0u;
  404. }
  405. static void stats_reset(struct stats *s)
  406. {
  407. s->hits = s->misses = 0u;
  408. }
  409. static void stats_level_accessed(struct stats *s, unsigned level)
  410. {
  411. if (level >= s->hit_threshold)
  412. s->hits++;
  413. else
  414. s->misses++;
  415. }
  416. static void stats_miss(struct stats *s)
  417. {
  418. s->misses++;
  419. }
  420. /*
  421. * There are times when we don't have any confidence in the hotspot queue.
  422. * Such as when a fresh cache is created and the blocks have been spread
  423. * out across the levels, or if an io load changes. We detect this by
  424. * seeing how often a lookup is in the top levels of the hotspot queue.
  425. */
  426. static enum performance stats_assess(struct stats *s)
  427. {
  428. unsigned confidence = safe_div(s->hits << FP_SHIFT, s->hits + s->misses);
  429. if (confidence < SIXTEENTH)
  430. return Q_POOR;
  431. else if (confidence < EIGHTH)
  432. return Q_FAIR;
  433. else
  434. return Q_WELL;
  435. }
  436. /*----------------------------------------------------------------*/
  437. struct hash_table {
  438. struct entry_space *es;
  439. unsigned long long hash_bits;
  440. unsigned *buckets;
  441. };
  442. /*
  443. * All cache entries are stored in a chained hash table. To save space we
  444. * use indexing again, and only store indexes to the next entry.
  445. */
  446. static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_entries)
  447. {
  448. unsigned i, nr_buckets;
  449. ht->es = es;
  450. nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u));
  451. ht->hash_bits = __ffs(nr_buckets);
  452. ht->buckets = vmalloc(sizeof(*ht->buckets) * nr_buckets);
  453. if (!ht->buckets)
  454. return -ENOMEM;
  455. for (i = 0; i < nr_buckets; i++)
  456. ht->buckets[i] = INDEXER_NULL;
  457. return 0;
  458. }
  459. static void h_exit(struct hash_table *ht)
  460. {
  461. vfree(ht->buckets);
  462. }
  463. static struct entry *h_head(struct hash_table *ht, unsigned bucket)
  464. {
  465. return to_entry(ht->es, ht->buckets[bucket]);
  466. }
  467. static struct entry *h_next(struct hash_table *ht, struct entry *e)
  468. {
  469. return to_entry(ht->es, e->hash_next);
  470. }
  471. static void __h_insert(struct hash_table *ht, unsigned bucket, struct entry *e)
  472. {
  473. e->hash_next = ht->buckets[bucket];
  474. ht->buckets[bucket] = to_index(ht->es, e);
  475. }
  476. static void h_insert(struct hash_table *ht, struct entry *e)
  477. {
  478. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  479. __h_insert(ht, h, e);
  480. }
  481. static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t oblock,
  482. struct entry **prev)
  483. {
  484. struct entry *e;
  485. *prev = NULL;
  486. for (e = h_head(ht, h); e; e = h_next(ht, e)) {
  487. if (e->oblock == oblock)
  488. return e;
  489. *prev = e;
  490. }
  491. return NULL;
  492. }
  493. static void __h_unlink(struct hash_table *ht, unsigned h,
  494. struct entry *e, struct entry *prev)
  495. {
  496. if (prev)
  497. prev->hash_next = e->hash_next;
  498. else
  499. ht->buckets[h] = e->hash_next;
  500. }
  501. /*
  502. * Also moves each entry to the front of the bucket.
  503. */
  504. static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock)
  505. {
  506. struct entry *e, *prev;
  507. unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
  508. e = __h_lookup(ht, h, oblock, &prev);
  509. if (e && prev) {
  510. /*
  511. * Move to the front because this entry is likely
  512. * to be hit again.
  513. */
  514. __h_unlink(ht, h, e, prev);
  515. __h_insert(ht, h, e);
  516. }
  517. return e;
  518. }
  519. static void h_remove(struct hash_table *ht, struct entry *e)
  520. {
  521. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  522. struct entry *prev;
  523. /*
  524. * The down side of using a singly linked list is we have to
  525. * iterate the bucket to remove an item.
  526. */
  527. e = __h_lookup(ht, h, e->oblock, &prev);
  528. if (e)
  529. __h_unlink(ht, h, e, prev);
  530. }
  531. /*----------------------------------------------------------------*/
  532. struct entry_alloc {
  533. struct entry_space *es;
  534. unsigned begin;
  535. unsigned nr_allocated;
  536. struct ilist free;
  537. };
  538. static void init_allocator(struct entry_alloc *ea, struct entry_space *es,
  539. unsigned begin, unsigned end)
  540. {
  541. unsigned i;
  542. ea->es = es;
  543. ea->nr_allocated = 0u;
  544. ea->begin = begin;
  545. l_init(&ea->free);
  546. for (i = begin; i != end; i++)
  547. l_add_tail(ea->es, &ea->free, __get_entry(ea->es, i));
  548. }
  549. static void init_entry(struct entry *e)
  550. {
  551. /*
  552. * We can't memset because that would clear the hotspot and
  553. * sentinel bits which remain constant.
  554. */
  555. e->hash_next = INDEXER_NULL;
  556. e->next = INDEXER_NULL;
  557. e->prev = INDEXER_NULL;
  558. e->level = 0u;
  559. e->allocated = true;
  560. }
  561. static struct entry *alloc_entry(struct entry_alloc *ea)
  562. {
  563. struct entry *e;
  564. if (l_empty(&ea->free))
  565. return NULL;
  566. e = l_pop_tail(ea->es, &ea->free);
  567. init_entry(e);
  568. ea->nr_allocated++;
  569. return e;
  570. }
  571. /*
  572. * This assumes the cblock hasn't already been allocated.
  573. */
  574. static struct entry *alloc_particular_entry(struct entry_alloc *ea, unsigned i)
  575. {
  576. struct entry *e = __get_entry(ea->es, ea->begin + i);
  577. BUG_ON(e->allocated);
  578. l_del(ea->es, &ea->free, e);
  579. init_entry(e);
  580. ea->nr_allocated++;
  581. return e;
  582. }
  583. static void free_entry(struct entry_alloc *ea, struct entry *e)
  584. {
  585. BUG_ON(!ea->nr_allocated);
  586. BUG_ON(!e->allocated);
  587. ea->nr_allocated--;
  588. e->allocated = false;
  589. l_add_tail(ea->es, &ea->free, e);
  590. }
  591. static bool allocator_empty(struct entry_alloc *ea)
  592. {
  593. return l_empty(&ea->free);
  594. }
  595. static unsigned get_index(struct entry_alloc *ea, struct entry *e)
  596. {
  597. return to_index(ea->es, e) - ea->begin;
  598. }
  599. static struct entry *get_entry(struct entry_alloc *ea, unsigned index)
  600. {
  601. return __get_entry(ea->es, ea->begin + index);
  602. }
  603. /*----------------------------------------------------------------*/
  604. #define NR_HOTSPOT_LEVELS 64u
  605. #define NR_CACHE_LEVELS 64u
  606. #define WRITEBACK_PERIOD (10 * HZ)
  607. #define DEMOTE_PERIOD (60 * HZ)
  608. #define HOTSPOT_UPDATE_PERIOD (HZ)
  609. #define CACHE_UPDATE_PERIOD (10u * HZ)
  610. struct smq_policy {
  611. struct dm_cache_policy policy;
  612. /* protects everything */
  613. spinlock_t lock;
  614. dm_cblock_t cache_size;
  615. sector_t cache_block_size;
  616. sector_t hotspot_block_size;
  617. unsigned nr_hotspot_blocks;
  618. unsigned cache_blocks_per_hotspot_block;
  619. unsigned hotspot_level_jump;
  620. struct entry_space es;
  621. struct entry_alloc writeback_sentinel_alloc;
  622. struct entry_alloc demote_sentinel_alloc;
  623. struct entry_alloc hotspot_alloc;
  624. struct entry_alloc cache_alloc;
  625. unsigned long *hotspot_hit_bits;
  626. unsigned long *cache_hit_bits;
  627. /*
  628. * We maintain three queues of entries. The cache proper,
  629. * consisting of a clean and dirty queue, containing the currently
  630. * active mappings. The hotspot queue uses a larger block size to
  631. * track blocks that are being hit frequently and potential
  632. * candidates for promotion to the cache.
  633. */
  634. struct queue hotspot;
  635. struct queue clean;
  636. struct queue dirty;
  637. struct stats hotspot_stats;
  638. struct stats cache_stats;
  639. /*
  640. * Keeps track of time, incremented by the core. We use this to
  641. * avoid attributing multiple hits within the same tick.
  642. */
  643. unsigned tick;
  644. /*
  645. * The hash tables allows us to quickly find an entry by origin
  646. * block.
  647. */
  648. struct hash_table table;
  649. struct hash_table hotspot_table;
  650. bool current_writeback_sentinels;
  651. unsigned long next_writeback_period;
  652. bool current_demote_sentinels;
  653. unsigned long next_demote_period;
  654. unsigned write_promote_level;
  655. unsigned read_promote_level;
  656. unsigned long next_hotspot_period;
  657. unsigned long next_cache_period;
  658. };
  659. /*----------------------------------------------------------------*/
  660. static struct entry *get_sentinel(struct entry_alloc *ea, unsigned level, bool which)
  661. {
  662. return get_entry(ea, which ? level : NR_CACHE_LEVELS + level);
  663. }
  664. static struct entry *writeback_sentinel(struct smq_policy *mq, unsigned level)
  665. {
  666. return get_sentinel(&mq->writeback_sentinel_alloc, level, mq->current_writeback_sentinels);
  667. }
  668. static struct entry *demote_sentinel(struct smq_policy *mq, unsigned level)
  669. {
  670. return get_sentinel(&mq->demote_sentinel_alloc, level, mq->current_demote_sentinels);
  671. }
  672. static void __update_writeback_sentinels(struct smq_policy *mq)
  673. {
  674. unsigned level;
  675. struct queue *q = &mq->dirty;
  676. struct entry *sentinel;
  677. for (level = 0; level < q->nr_levels; level++) {
  678. sentinel = writeback_sentinel(mq, level);
  679. q_del(q, sentinel);
  680. q_push(q, sentinel);
  681. }
  682. }
  683. static void __update_demote_sentinels(struct smq_policy *mq)
  684. {
  685. unsigned level;
  686. struct queue *q = &mq->clean;
  687. struct entry *sentinel;
  688. for (level = 0; level < q->nr_levels; level++) {
  689. sentinel = demote_sentinel(mq, level);
  690. q_del(q, sentinel);
  691. q_push(q, sentinel);
  692. }
  693. }
  694. static void update_sentinels(struct smq_policy *mq)
  695. {
  696. if (time_after(jiffies, mq->next_writeback_period)) {
  697. __update_writeback_sentinels(mq);
  698. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  699. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  700. }
  701. if (time_after(jiffies, mq->next_demote_period)) {
  702. __update_demote_sentinels(mq);
  703. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  704. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  705. }
  706. }
  707. static void __sentinels_init(struct smq_policy *mq)
  708. {
  709. unsigned level;
  710. struct entry *sentinel;
  711. for (level = 0; level < NR_CACHE_LEVELS; level++) {
  712. sentinel = writeback_sentinel(mq, level);
  713. sentinel->level = level;
  714. q_push(&mq->dirty, sentinel);
  715. sentinel = demote_sentinel(mq, level);
  716. sentinel->level = level;
  717. q_push(&mq->clean, sentinel);
  718. }
  719. }
  720. static void sentinels_init(struct smq_policy *mq)
  721. {
  722. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  723. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  724. mq->current_writeback_sentinels = false;
  725. mq->current_demote_sentinels = false;
  726. __sentinels_init(mq);
  727. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  728. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  729. __sentinels_init(mq);
  730. }
  731. /*----------------------------------------------------------------*/
  732. /*
  733. * These methods tie together the dirty queue, clean queue and hash table.
  734. */
  735. static void push_new(struct smq_policy *mq, struct entry *e)
  736. {
  737. struct queue *q = e->dirty ? &mq->dirty : &mq->clean;
  738. h_insert(&mq->table, e);
  739. q_push(q, e);
  740. }
  741. static void push(struct smq_policy *mq, struct entry *e)
  742. {
  743. struct entry *sentinel;
  744. h_insert(&mq->table, e);
  745. /*
  746. * Punch this into the queue just in front of the sentinel, to
  747. * ensure it's cleaned straight away.
  748. */
  749. if (e->dirty) {
  750. sentinel = writeback_sentinel(mq, e->level);
  751. q_push_before(&mq->dirty, sentinel, e);
  752. } else {
  753. sentinel = demote_sentinel(mq, e->level);
  754. q_push_before(&mq->clean, sentinel, e);
  755. }
  756. }
  757. /*
  758. * Removes an entry from cache. Removes from the hash table.
  759. */
  760. static void __del(struct smq_policy *mq, struct queue *q, struct entry *e)
  761. {
  762. q_del(q, e);
  763. h_remove(&mq->table, e);
  764. }
  765. static void del(struct smq_policy *mq, struct entry *e)
  766. {
  767. __del(mq, e->dirty ? &mq->dirty : &mq->clean, e);
  768. }
  769. static struct entry *pop_old(struct smq_policy *mq, struct queue *q, unsigned max_level)
  770. {
  771. struct entry *e = q_pop_old(q, max_level);
  772. if (e)
  773. h_remove(&mq->table, e);
  774. return e;
  775. }
  776. static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
  777. {
  778. return to_cblock(get_index(&mq->cache_alloc, e));
  779. }
  780. static void requeue(struct smq_policy *mq, struct entry *e)
  781. {
  782. struct entry *sentinel;
  783. if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
  784. if (e->dirty) {
  785. sentinel = writeback_sentinel(mq, e->level);
  786. q_requeue_before(&mq->dirty, sentinel, e, 1u);
  787. } else {
  788. sentinel = demote_sentinel(mq, e->level);
  789. q_requeue_before(&mq->clean, sentinel, e, 1u);
  790. }
  791. }
  792. }
  793. static unsigned default_promote_level(struct smq_policy *mq)
  794. {
  795. /*
  796. * The promote level depends on the current performance of the
  797. * cache.
  798. *
  799. * If the cache is performing badly, then we can't afford
  800. * to promote much without causing performance to drop below that
  801. * of the origin device.
  802. *
  803. * If the cache is performing well, then we don't need to promote
  804. * much. If it isn't broken, don't fix it.
  805. *
  806. * If the cache is middling then we promote more.
  807. *
  808. * This scheme reminds me of a graph of entropy vs probability of a
  809. * binary variable.
  810. */
  811. static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1};
  812. unsigned hits = mq->cache_stats.hits;
  813. unsigned misses = mq->cache_stats.misses;
  814. unsigned index = safe_div(hits << 4u, hits + misses);
  815. return table[index];
  816. }
  817. static void update_promote_levels(struct smq_policy *mq)
  818. {
  819. /*
  820. * If there are unused cache entries then we want to be really
  821. * eager to promote.
  822. */
  823. unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
  824. default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
  825. /*
  826. * If the hotspot queue is performing badly then we have little
  827. * confidence that we know which blocks to promote. So we cut down
  828. * the amount of promotions.
  829. */
  830. switch (stats_assess(&mq->hotspot_stats)) {
  831. case Q_POOR:
  832. threshold_level /= 4u;
  833. break;
  834. case Q_FAIR:
  835. threshold_level /= 2u;
  836. break;
  837. case Q_WELL:
  838. break;
  839. }
  840. mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
  841. mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level) + 2u;
  842. }
  843. /*
  844. * If the hotspot queue is performing badly, then we try and move entries
  845. * around more quickly.
  846. */
  847. static void update_level_jump(struct smq_policy *mq)
  848. {
  849. switch (stats_assess(&mq->hotspot_stats)) {
  850. case Q_POOR:
  851. mq->hotspot_level_jump = 4u;
  852. break;
  853. case Q_FAIR:
  854. mq->hotspot_level_jump = 2u;
  855. break;
  856. case Q_WELL:
  857. mq->hotspot_level_jump = 1u;
  858. break;
  859. }
  860. }
  861. static void end_hotspot_period(struct smq_policy *mq)
  862. {
  863. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  864. update_promote_levels(mq);
  865. if (time_after(jiffies, mq->next_hotspot_period)) {
  866. update_level_jump(mq);
  867. q_redistribute(&mq->hotspot);
  868. stats_reset(&mq->hotspot_stats);
  869. mq->next_hotspot_period = jiffies + HOTSPOT_UPDATE_PERIOD;
  870. }
  871. }
  872. static void end_cache_period(struct smq_policy *mq)
  873. {
  874. if (time_after(jiffies, mq->next_cache_period)) {
  875. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  876. q_redistribute(&mq->dirty);
  877. q_redistribute(&mq->clean);
  878. stats_reset(&mq->cache_stats);
  879. mq->next_cache_period = jiffies + CACHE_UPDATE_PERIOD;
  880. }
  881. }
  882. static int demote_cblock(struct smq_policy *mq,
  883. struct policy_locker *locker,
  884. dm_oblock_t *oblock)
  885. {
  886. struct entry *demoted = q_peek(&mq->clean, mq->clean.nr_levels, false);
  887. if (!demoted)
  888. /*
  889. * We could get a block from mq->dirty, but that
  890. * would add extra latency to the triggering bio as it
  891. * waits for the writeback. Better to not promote this
  892. * time and hope there's a clean block next time this block
  893. * is hit.
  894. */
  895. return -ENOSPC;
  896. if (locker->fn(locker, demoted->oblock))
  897. /*
  898. * We couldn't lock this block.
  899. */
  900. return -EBUSY;
  901. del(mq, demoted);
  902. *oblock = demoted->oblock;
  903. free_entry(&mq->cache_alloc, demoted);
  904. return 0;
  905. }
  906. enum promote_result {
  907. PROMOTE_NOT,
  908. PROMOTE_TEMPORARY,
  909. PROMOTE_PERMANENT
  910. };
  911. /*
  912. * Converts a boolean into a promote result.
  913. */
  914. static enum promote_result maybe_promote(bool promote)
  915. {
  916. return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
  917. }
  918. static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, struct bio *bio,
  919. bool fast_promote)
  920. {
  921. if (bio_data_dir(bio) == WRITE) {
  922. if (!allocator_empty(&mq->cache_alloc) && fast_promote)
  923. return PROMOTE_TEMPORARY;
  924. else
  925. return maybe_promote(hs_e->level >= mq->write_promote_level);
  926. } else
  927. return maybe_promote(hs_e->level >= mq->read_promote_level);
  928. }
  929. static void insert_in_cache(struct smq_policy *mq, dm_oblock_t oblock,
  930. struct policy_locker *locker,
  931. struct policy_result *result, enum promote_result pr)
  932. {
  933. int r;
  934. struct entry *e;
  935. if (allocator_empty(&mq->cache_alloc)) {
  936. result->op = POLICY_REPLACE;
  937. r = demote_cblock(mq, locker, &result->old_oblock);
  938. if (r) {
  939. result->op = POLICY_MISS;
  940. return;
  941. }
  942. } else
  943. result->op = POLICY_NEW;
  944. e = alloc_entry(&mq->cache_alloc);
  945. BUG_ON(!e);
  946. e->oblock = oblock;
  947. if (pr == PROMOTE_TEMPORARY)
  948. push(mq, e);
  949. else
  950. push_new(mq, e);
  951. result->cblock = infer_cblock(mq, e);
  952. }
  953. static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
  954. {
  955. sector_t r = from_oblock(b);
  956. (void) sector_div(r, mq->cache_blocks_per_hotspot_block);
  957. return to_oblock(r);
  958. }
  959. static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, struct bio *bio)
  960. {
  961. unsigned hi;
  962. dm_oblock_t hb = to_hblock(mq, b);
  963. struct entry *e = h_lookup(&mq->hotspot_table, hb);
  964. if (e) {
  965. stats_level_accessed(&mq->hotspot_stats, e->level);
  966. hi = get_index(&mq->hotspot_alloc, e);
  967. q_requeue(&mq->hotspot, e,
  968. test_and_set_bit(hi, mq->hotspot_hit_bits) ?
  969. 0u : mq->hotspot_level_jump);
  970. } else {
  971. stats_miss(&mq->hotspot_stats);
  972. e = alloc_entry(&mq->hotspot_alloc);
  973. if (!e) {
  974. e = q_pop(&mq->hotspot);
  975. if (e) {
  976. h_remove(&mq->hotspot_table, e);
  977. hi = get_index(&mq->hotspot_alloc, e);
  978. clear_bit(hi, mq->hotspot_hit_bits);
  979. }
  980. }
  981. if (e) {
  982. e->oblock = hb;
  983. q_push(&mq->hotspot, e);
  984. h_insert(&mq->hotspot_table, e);
  985. }
  986. }
  987. return e;
  988. }
  989. /*
  990. * Looks the oblock up in the hash table, then decides whether to put in
  991. * pre_cache, or cache etc.
  992. */
  993. static int map(struct smq_policy *mq, struct bio *bio, dm_oblock_t oblock,
  994. bool can_migrate, bool fast_promote,
  995. struct policy_locker *locker, struct policy_result *result)
  996. {
  997. struct entry *e, *hs_e;
  998. enum promote_result pr;
  999. hs_e = update_hotspot_queue(mq, oblock, bio);
  1000. e = h_lookup(&mq->table, oblock);
  1001. if (e) {
  1002. stats_level_accessed(&mq->cache_stats, e->level);
  1003. requeue(mq, e);
  1004. result->op = POLICY_HIT;
  1005. result->cblock = infer_cblock(mq, e);
  1006. } else {
  1007. stats_miss(&mq->cache_stats);
  1008. pr = should_promote(mq, hs_e, bio, fast_promote);
  1009. if (pr == PROMOTE_NOT)
  1010. result->op = POLICY_MISS;
  1011. else {
  1012. if (!can_migrate) {
  1013. result->op = POLICY_MISS;
  1014. return -EWOULDBLOCK;
  1015. }
  1016. insert_in_cache(mq, oblock, locker, result, pr);
  1017. }
  1018. }
  1019. return 0;
  1020. }
  1021. /*----------------------------------------------------------------*/
  1022. /*
  1023. * Public interface, via the policy struct. See dm-cache-policy.h for a
  1024. * description of these.
  1025. */
  1026. static struct smq_policy *to_smq_policy(struct dm_cache_policy *p)
  1027. {
  1028. return container_of(p, struct smq_policy, policy);
  1029. }
  1030. static void smq_destroy(struct dm_cache_policy *p)
  1031. {
  1032. struct smq_policy *mq = to_smq_policy(p);
  1033. h_exit(&mq->hotspot_table);
  1034. h_exit(&mq->table);
  1035. free_bitset(mq->hotspot_hit_bits);
  1036. free_bitset(mq->cache_hit_bits);
  1037. space_exit(&mq->es);
  1038. kfree(mq);
  1039. }
  1040. static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
  1041. bool can_block, bool can_migrate, bool fast_promote,
  1042. struct bio *bio, struct policy_locker *locker,
  1043. struct policy_result *result)
  1044. {
  1045. int r;
  1046. unsigned long flags;
  1047. struct smq_policy *mq = to_smq_policy(p);
  1048. result->op = POLICY_MISS;
  1049. spin_lock_irqsave(&mq->lock, flags);
  1050. r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
  1051. spin_unlock_irqrestore(&mq->lock, flags);
  1052. return r;
  1053. }
  1054. static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
  1055. {
  1056. int r;
  1057. unsigned long flags;
  1058. struct smq_policy *mq = to_smq_policy(p);
  1059. struct entry *e;
  1060. spin_lock_irqsave(&mq->lock, flags);
  1061. e = h_lookup(&mq->table, oblock);
  1062. if (e) {
  1063. *cblock = infer_cblock(mq, e);
  1064. r = 0;
  1065. } else
  1066. r = -ENOENT;
  1067. spin_unlock_irqrestore(&mq->lock, flags);
  1068. return r;
  1069. }
  1070. static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, bool set)
  1071. {
  1072. struct entry *e;
  1073. e = h_lookup(&mq->table, oblock);
  1074. BUG_ON(!e);
  1075. del(mq, e);
  1076. e->dirty = set;
  1077. push(mq, e);
  1078. }
  1079. static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
  1080. {
  1081. unsigned long flags;
  1082. struct smq_policy *mq = to_smq_policy(p);
  1083. spin_lock_irqsave(&mq->lock, flags);
  1084. __smq_set_clear_dirty(mq, oblock, true);
  1085. spin_unlock_irqrestore(&mq->lock, flags);
  1086. }
  1087. static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
  1088. {
  1089. struct smq_policy *mq = to_smq_policy(p);
  1090. unsigned long flags;
  1091. spin_lock_irqsave(&mq->lock, flags);
  1092. __smq_set_clear_dirty(mq, oblock, false);
  1093. spin_unlock_irqrestore(&mq->lock, flags);
  1094. }
  1095. static int smq_load_mapping(struct dm_cache_policy *p,
  1096. dm_oblock_t oblock, dm_cblock_t cblock,
  1097. uint32_t hint, bool hint_valid)
  1098. {
  1099. struct smq_policy *mq = to_smq_policy(p);
  1100. struct entry *e;
  1101. e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
  1102. e->oblock = oblock;
  1103. e->dirty = false; /* this gets corrected in a minute */
  1104. e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : 1;
  1105. push(mq, e);
  1106. return 0;
  1107. }
  1108. static int smq_save_hints(struct smq_policy *mq, struct queue *q,
  1109. policy_walk_fn fn, void *context)
  1110. {
  1111. int r;
  1112. unsigned level;
  1113. struct entry *e;
  1114. for (level = 0; level < q->nr_levels; level++)
  1115. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
  1116. if (!e->sentinel) {
  1117. r = fn(context, infer_cblock(mq, e),
  1118. e->oblock, e->level);
  1119. if (r)
  1120. return r;
  1121. }
  1122. }
  1123. return 0;
  1124. }
  1125. static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
  1126. void *context)
  1127. {
  1128. struct smq_policy *mq = to_smq_policy(p);
  1129. int r = 0;
  1130. /*
  1131. * We don't need to lock here since this method is only called once
  1132. * the IO has stopped.
  1133. */
  1134. r = smq_save_hints(mq, &mq->clean, fn, context);
  1135. if (!r)
  1136. r = smq_save_hints(mq, &mq->dirty, fn, context);
  1137. return r;
  1138. }
  1139. static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
  1140. {
  1141. struct entry *e;
  1142. e = h_lookup(&mq->table, oblock);
  1143. BUG_ON(!e);
  1144. del(mq, e);
  1145. free_entry(&mq->cache_alloc, e);
  1146. }
  1147. static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
  1148. {
  1149. struct smq_policy *mq = to_smq_policy(p);
  1150. unsigned long flags;
  1151. spin_lock_irqsave(&mq->lock, flags);
  1152. __remove_mapping(mq, oblock);
  1153. spin_unlock_irqrestore(&mq->lock, flags);
  1154. }
  1155. static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
  1156. {
  1157. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1158. if (!e || !e->allocated)
  1159. return -ENODATA;
  1160. del(mq, e);
  1161. free_entry(&mq->cache_alloc, e);
  1162. return 0;
  1163. }
  1164. static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
  1165. {
  1166. int r;
  1167. unsigned long flags;
  1168. struct smq_policy *mq = to_smq_policy(p);
  1169. spin_lock_irqsave(&mq->lock, flags);
  1170. r = __remove_cblock(mq, cblock);
  1171. spin_unlock_irqrestore(&mq->lock, flags);
  1172. return r;
  1173. }
  1174. #define CLEAN_TARGET_CRITICAL 5u /* percent */
  1175. static bool clean_target_met(struct smq_policy *mq, bool critical)
  1176. {
  1177. if (critical) {
  1178. /*
  1179. * Cache entries may not be populated. So we're cannot rely on the
  1180. * size of the clean queue.
  1181. */
  1182. unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
  1183. unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_CRITICAL / 100u;
  1184. return nr_clean >= target;
  1185. } else
  1186. return !q_size(&mq->dirty);
  1187. }
  1188. static int __smq_writeback_work(struct smq_policy *mq, dm_oblock_t *oblock,
  1189. dm_cblock_t *cblock, bool critical_only)
  1190. {
  1191. struct entry *e = NULL;
  1192. bool target_met = clean_target_met(mq, critical_only);
  1193. if (critical_only)
  1194. /*
  1195. * Always try and keep the bottom level clean.
  1196. */
  1197. e = pop_old(mq, &mq->dirty, target_met ? 1u : mq->dirty.nr_levels);
  1198. else
  1199. e = pop_old(mq, &mq->dirty, mq->dirty.nr_levels);
  1200. if (!e)
  1201. return -ENODATA;
  1202. *oblock = e->oblock;
  1203. *cblock = infer_cblock(mq, e);
  1204. e->dirty = false;
  1205. push_new(mq, e);
  1206. return 0;
  1207. }
  1208. static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
  1209. dm_cblock_t *cblock, bool critical_only)
  1210. {
  1211. int r;
  1212. unsigned long flags;
  1213. struct smq_policy *mq = to_smq_policy(p);
  1214. spin_lock_irqsave(&mq->lock, flags);
  1215. r = __smq_writeback_work(mq, oblock, cblock, critical_only);
  1216. spin_unlock_irqrestore(&mq->lock, flags);
  1217. return r;
  1218. }
  1219. static void __force_mapping(struct smq_policy *mq,
  1220. dm_oblock_t current_oblock, dm_oblock_t new_oblock)
  1221. {
  1222. struct entry *e = h_lookup(&mq->table, current_oblock);
  1223. if (e) {
  1224. del(mq, e);
  1225. e->oblock = new_oblock;
  1226. e->dirty = true;
  1227. push(mq, e);
  1228. }
  1229. }
  1230. static void smq_force_mapping(struct dm_cache_policy *p,
  1231. dm_oblock_t current_oblock, dm_oblock_t new_oblock)
  1232. {
  1233. unsigned long flags;
  1234. struct smq_policy *mq = to_smq_policy(p);
  1235. spin_lock_irqsave(&mq->lock, flags);
  1236. __force_mapping(mq, current_oblock, new_oblock);
  1237. spin_unlock_irqrestore(&mq->lock, flags);
  1238. }
  1239. static dm_cblock_t smq_residency(struct dm_cache_policy *p)
  1240. {
  1241. dm_cblock_t r;
  1242. unsigned long flags;
  1243. struct smq_policy *mq = to_smq_policy(p);
  1244. spin_lock_irqsave(&mq->lock, flags);
  1245. r = to_cblock(mq->cache_alloc.nr_allocated);
  1246. spin_unlock_irqrestore(&mq->lock, flags);
  1247. return r;
  1248. }
  1249. static void smq_tick(struct dm_cache_policy *p, bool can_block)
  1250. {
  1251. struct smq_policy *mq = to_smq_policy(p);
  1252. unsigned long flags;
  1253. spin_lock_irqsave(&mq->lock, flags);
  1254. mq->tick++;
  1255. update_sentinels(mq);
  1256. end_hotspot_period(mq);
  1257. end_cache_period(mq);
  1258. spin_unlock_irqrestore(&mq->lock, flags);
  1259. }
  1260. /* Init the policy plugin interface function pointers. */
  1261. static void init_policy_functions(struct smq_policy *mq)
  1262. {
  1263. mq->policy.destroy = smq_destroy;
  1264. mq->policy.map = smq_map;
  1265. mq->policy.lookup = smq_lookup;
  1266. mq->policy.set_dirty = smq_set_dirty;
  1267. mq->policy.clear_dirty = smq_clear_dirty;
  1268. mq->policy.load_mapping = smq_load_mapping;
  1269. mq->policy.walk_mappings = smq_walk_mappings;
  1270. mq->policy.remove_mapping = smq_remove_mapping;
  1271. mq->policy.remove_cblock = smq_remove_cblock;
  1272. mq->policy.writeback_work = smq_writeback_work;
  1273. mq->policy.force_mapping = smq_force_mapping;
  1274. mq->policy.residency = smq_residency;
  1275. mq->policy.tick = smq_tick;
  1276. }
  1277. static bool too_many_hotspot_blocks(sector_t origin_size,
  1278. sector_t hotspot_block_size,
  1279. unsigned nr_hotspot_blocks)
  1280. {
  1281. return (hotspot_block_size * nr_hotspot_blocks) > origin_size;
  1282. }
  1283. static void calc_hotspot_params(sector_t origin_size,
  1284. sector_t cache_block_size,
  1285. unsigned nr_cache_blocks,
  1286. sector_t *hotspot_block_size,
  1287. unsigned *nr_hotspot_blocks)
  1288. {
  1289. *hotspot_block_size = cache_block_size * 16u;
  1290. *nr_hotspot_blocks = max(nr_cache_blocks / 4u, 1024u);
  1291. while ((*hotspot_block_size > cache_block_size) &&
  1292. too_many_hotspot_blocks(origin_size, *hotspot_block_size, *nr_hotspot_blocks))
  1293. *hotspot_block_size /= 2u;
  1294. }
  1295. static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
  1296. sector_t origin_size,
  1297. sector_t cache_block_size)
  1298. {
  1299. unsigned i;
  1300. unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
  1301. unsigned total_sentinels = 2u * nr_sentinels_per_queue;
  1302. struct smq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
  1303. if (!mq)
  1304. return NULL;
  1305. init_policy_functions(mq);
  1306. mq->cache_size = cache_size;
  1307. mq->cache_block_size = cache_block_size;
  1308. calc_hotspot_params(origin_size, cache_block_size, from_cblock(cache_size),
  1309. &mq->hotspot_block_size, &mq->nr_hotspot_blocks);
  1310. mq->cache_blocks_per_hotspot_block = div64_u64(mq->hotspot_block_size, mq->cache_block_size);
  1311. mq->hotspot_level_jump = 1u;
  1312. if (space_init(&mq->es, total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size))) {
  1313. DMERR("couldn't initialize entry space");
  1314. goto bad_pool_init;
  1315. }
  1316. init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
  1317. for (i = 0; i < nr_sentinels_per_queue; i++)
  1318. get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
  1319. init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
  1320. for (i = 0; i < nr_sentinels_per_queue; i++)
  1321. get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
  1322. init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
  1323. total_sentinels + mq->nr_hotspot_blocks);
  1324. init_allocator(&mq->cache_alloc, &mq->es,
  1325. total_sentinels + mq->nr_hotspot_blocks,
  1326. total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size));
  1327. mq->hotspot_hit_bits = alloc_bitset(mq->nr_hotspot_blocks);
  1328. if (!mq->hotspot_hit_bits) {
  1329. DMERR("couldn't allocate hotspot hit bitset");
  1330. goto bad_hotspot_hit_bits;
  1331. }
  1332. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  1333. if (from_cblock(cache_size)) {
  1334. mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size));
  1335. if (!mq->cache_hit_bits) {
  1336. DMERR("couldn't allocate cache hit bitset");
  1337. goto bad_cache_hit_bits;
  1338. }
  1339. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  1340. } else
  1341. mq->cache_hit_bits = NULL;
  1342. mq->tick = 0;
  1343. spin_lock_init(&mq->lock);
  1344. q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS);
  1345. mq->hotspot.nr_top_levels = 8;
  1346. mq->hotspot.nr_in_top_levels = min(mq->nr_hotspot_blocks / NR_HOTSPOT_LEVELS,
  1347. from_cblock(mq->cache_size) / mq->cache_blocks_per_hotspot_block);
  1348. q_init(&mq->clean, &mq->es, NR_CACHE_LEVELS);
  1349. q_init(&mq->dirty, &mq->es, NR_CACHE_LEVELS);
  1350. stats_init(&mq->hotspot_stats, NR_HOTSPOT_LEVELS);
  1351. stats_init(&mq->cache_stats, NR_CACHE_LEVELS);
  1352. if (h_init(&mq->table, &mq->es, from_cblock(cache_size)))
  1353. goto bad_alloc_table;
  1354. if (h_init(&mq->hotspot_table, &mq->es, mq->nr_hotspot_blocks))
  1355. goto bad_alloc_hotspot_table;
  1356. sentinels_init(mq);
  1357. mq->write_promote_level = mq->read_promote_level = NR_HOTSPOT_LEVELS;
  1358. mq->next_hotspot_period = jiffies;
  1359. mq->next_cache_period = jiffies;
  1360. return &mq->policy;
  1361. bad_alloc_hotspot_table:
  1362. h_exit(&mq->table);
  1363. bad_alloc_table:
  1364. free_bitset(mq->cache_hit_bits);
  1365. bad_cache_hit_bits:
  1366. free_bitset(mq->hotspot_hit_bits);
  1367. bad_hotspot_hit_bits:
  1368. space_exit(&mq->es);
  1369. bad_pool_init:
  1370. kfree(mq);
  1371. return NULL;
  1372. }
  1373. /*----------------------------------------------------------------*/
  1374. static struct dm_cache_policy_type smq_policy_type = {
  1375. .name = "smq",
  1376. .version = {1, 0, 0},
  1377. .hint_size = 4,
  1378. .owner = THIS_MODULE,
  1379. .create = smq_create
  1380. };
  1381. static struct dm_cache_policy_type default_policy_type = {
  1382. .name = "default",
  1383. .version = {1, 4, 0},
  1384. .hint_size = 4,
  1385. .owner = THIS_MODULE,
  1386. .create = smq_create,
  1387. .real = &smq_policy_type
  1388. };
  1389. static int __init smq_init(void)
  1390. {
  1391. int r;
  1392. r = dm_cache_policy_register(&smq_policy_type);
  1393. if (r) {
  1394. DMERR("register failed %d", r);
  1395. return -ENOMEM;
  1396. }
  1397. r = dm_cache_policy_register(&default_policy_type);
  1398. if (r) {
  1399. DMERR("register failed (as default) %d", r);
  1400. dm_cache_policy_unregister(&smq_policy_type);
  1401. return -ENOMEM;
  1402. }
  1403. return 0;
  1404. }
  1405. static void __exit smq_exit(void)
  1406. {
  1407. dm_cache_policy_unregister(&smq_policy_type);
  1408. dm_cache_policy_unregister(&default_policy_type);
  1409. }
  1410. module_init(smq_init);
  1411. module_exit(smq_exit);
  1412. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  1413. MODULE_LICENSE("GPL");
  1414. MODULE_DESCRIPTION("smq cache policy");
  1415. MODULE_ALIAS("dm-cache-default");