proportions.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. /*
  2. * Floating proportions
  3. *
  4. * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  5. *
  6. * Description:
  7. *
  8. * The floating proportion is a time derivative with an exponentially decaying
  9. * history:
  10. *
  11. * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12. *
  13. * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14. * and i the time period over which the differential is taken. So d/dt_{-i} is
  15. * the differential over the i-th last period.
  16. *
  17. * The decaying history gives smooth transitions. The time differential carries
  18. * the notion of speed.
  19. *
  20. * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21. *
  22. * \Sum_{i=0} 1/2^(1+i) = 1
  23. *
  24. * Further more, if we measure time (t) in the same events as x; so that:
  25. *
  26. * t = \Sum_{j} x_{j}
  27. *
  28. * we get that:
  29. *
  30. * \Sum_{j} p_{j} = 1
  31. *
  32. * Writing this in an iterative fashion we get (dropping the 'd's):
  33. *
  34. * if (++x_{j}, ++t > period)
  35. * t /= 2;
  36. * for_each (j)
  37. * x_{j} /= 2;
  38. *
  39. * so that:
  40. *
  41. * p_{j} = x_{j} / t;
  42. *
  43. * We optimize away the '/= 2' for the global time delta by noting that:
  44. *
  45. * if (++t > period) t /= 2:
  46. *
  47. * Can be approximated by:
  48. *
  49. * period/2 + (++t % period/2)
  50. *
  51. * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52. * binary operations and wraparound artefacts disappear. ]
  53. *
  54. * Also note that this yields a natural counter of the elapsed periods:
  55. *
  56. * c = t / (period/2)
  57. *
  58. * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59. * around issue. ]
  60. *
  61. * This allows us to do away with the loop over all prop_locals on each period
  62. * expiration. By remembering the period count under which it was last accessed
  63. * as c_{j}, we can obtain the number of 'missed' cycles from:
  64. *
  65. * c - c_{j}
  66. *
  67. * We can then lazily catch up to the global period count every time we are
  68. * going to use x_{j}, by doing:
  69. *
  70. * x_{j} /= 2^(c - c_{j}), c_{j} = c
  71. */
  72. #include <linux/proportions.h>
  73. #include <linux/rcupdate.h>
  74. int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
  75. {
  76. int err;
  77. if (shift > PROP_MAX_SHIFT)
  78. shift = PROP_MAX_SHIFT;
  79. pd->index = 0;
  80. pd->pg[0].shift = shift;
  81. mutex_init(&pd->mutex);
  82. err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
  83. if (err)
  84. goto out;
  85. err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
  86. if (err)
  87. percpu_counter_destroy(&pd->pg[0].events);
  88. out:
  89. return err;
  90. }
  91. /*
  92. * We have two copies, and flip between them to make it seem like an atomic
  93. * update. The update is not really atomic wrt the events counter, but
  94. * it is internally consistent with the bit layout depending on shift.
  95. *
  96. * We copy the events count, move the bits around and flip the index.
  97. */
  98. void prop_change_shift(struct prop_descriptor *pd, int shift)
  99. {
  100. int index;
  101. int offset;
  102. u64 events;
  103. unsigned long flags;
  104. if (shift > PROP_MAX_SHIFT)
  105. shift = PROP_MAX_SHIFT;
  106. mutex_lock(&pd->mutex);
  107. index = pd->index ^ 1;
  108. offset = pd->pg[pd->index].shift - shift;
  109. if (!offset)
  110. goto out;
  111. pd->pg[index].shift = shift;
  112. local_irq_save(flags);
  113. events = percpu_counter_sum(&pd->pg[pd->index].events);
  114. if (offset < 0)
  115. events <<= -offset;
  116. else
  117. events >>= offset;
  118. percpu_counter_set(&pd->pg[index].events, events);
  119. /*
  120. * ensure the new pg is fully written before the switch
  121. */
  122. smp_wmb();
  123. pd->index = index;
  124. local_irq_restore(flags);
  125. synchronize_rcu();
  126. out:
  127. mutex_unlock(&pd->mutex);
  128. }
  129. /*
  130. * wrap the access to the data in an rcu_read_lock() section;
  131. * this is used to track the active references.
  132. */
  133. static struct prop_global *prop_get_global(struct prop_descriptor *pd)
  134. __acquires(RCU)
  135. {
  136. int index;
  137. rcu_read_lock();
  138. index = pd->index;
  139. /*
  140. * match the wmb from vcd_flip()
  141. */
  142. smp_rmb();
  143. return &pd->pg[index];
  144. }
  145. static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
  146. __releases(RCU)
  147. {
  148. rcu_read_unlock();
  149. }
  150. static void
  151. prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  152. {
  153. int offset = *pl_shift - new_shift;
  154. if (!offset)
  155. return;
  156. if (offset < 0)
  157. *pl_period <<= -offset;
  158. else
  159. *pl_period >>= offset;
  160. *pl_shift = new_shift;
  161. }
  162. /*
  163. * PERCPU
  164. */
  165. #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
  166. int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
  167. {
  168. raw_spin_lock_init(&pl->lock);
  169. pl->shift = 0;
  170. pl->period = 0;
  171. return percpu_counter_init(&pl->events, 0, gfp);
  172. }
  173. void prop_local_destroy_percpu(struct prop_local_percpu *pl)
  174. {
  175. percpu_counter_destroy(&pl->events);
  176. }
  177. /*
  178. * Catch up with missed period expirations.
  179. *
  180. * until (c_{j} == c)
  181. * x_{j} -= x_{j}/2;
  182. * c_{j}++;
  183. */
  184. static
  185. void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
  186. {
  187. unsigned long period = 1UL << (pg->shift - 1);
  188. unsigned long period_mask = ~(period - 1);
  189. unsigned long global_period;
  190. unsigned long flags;
  191. global_period = percpu_counter_read(&pg->events);
  192. global_period &= period_mask;
  193. /*
  194. * Fast path - check if the local and global period count still match
  195. * outside of the lock.
  196. */
  197. if (pl->period == global_period)
  198. return;
  199. raw_spin_lock_irqsave(&pl->lock, flags);
  200. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  201. /*
  202. * For each missed period, we half the local counter.
  203. * basically:
  204. * pl->events >> (global_period - pl->period);
  205. */
  206. period = (global_period - pl->period) >> (pg->shift - 1);
  207. if (period < BITS_PER_LONG) {
  208. s64 val = percpu_counter_read(&pl->events);
  209. if (val < (nr_cpu_ids * PROP_BATCH))
  210. val = percpu_counter_sum(&pl->events);
  211. __percpu_counter_add(&pl->events, -val + (val >> period),
  212. PROP_BATCH);
  213. } else
  214. percpu_counter_set(&pl->events, 0);
  215. pl->period = global_period;
  216. raw_spin_unlock_irqrestore(&pl->lock, flags);
  217. }
  218. /*
  219. * ++x_{j}, ++t
  220. */
  221. void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  222. {
  223. struct prop_global *pg = prop_get_global(pd);
  224. prop_norm_percpu(pg, pl);
  225. __percpu_counter_add(&pl->events, 1, PROP_BATCH);
  226. percpu_counter_add(&pg->events, 1);
  227. prop_put_global(pd, pg);
  228. }
  229. /*
  230. * identical to __prop_inc_percpu, except that it limits this pl's fraction to
  231. * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
  232. */
  233. void __prop_inc_percpu_max(struct prop_descriptor *pd,
  234. struct prop_local_percpu *pl, long frac)
  235. {
  236. struct prop_global *pg = prop_get_global(pd);
  237. prop_norm_percpu(pg, pl);
  238. if (unlikely(frac != PROP_FRAC_BASE)) {
  239. unsigned long period_2 = 1UL << (pg->shift - 1);
  240. unsigned long counter_mask = period_2 - 1;
  241. unsigned long global_count;
  242. long numerator, denominator;
  243. numerator = percpu_counter_read_positive(&pl->events);
  244. global_count = percpu_counter_read(&pg->events);
  245. denominator = period_2 + (global_count & counter_mask);
  246. if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
  247. goto out_put;
  248. }
  249. percpu_counter_add(&pl->events, 1);
  250. percpu_counter_add(&pg->events, 1);
  251. out_put:
  252. prop_put_global(pd, pg);
  253. }
  254. /*
  255. * Obtain a fraction of this proportion
  256. *
  257. * p_{j} = x_{j} / (period/2 + t % period/2)
  258. */
  259. void prop_fraction_percpu(struct prop_descriptor *pd,
  260. struct prop_local_percpu *pl,
  261. long *numerator, long *denominator)
  262. {
  263. struct prop_global *pg = prop_get_global(pd);
  264. unsigned long period_2 = 1UL << (pg->shift - 1);
  265. unsigned long counter_mask = period_2 - 1;
  266. unsigned long global_count;
  267. prop_norm_percpu(pg, pl);
  268. *numerator = percpu_counter_read_positive(&pl->events);
  269. global_count = percpu_counter_read(&pg->events);
  270. *denominator = period_2 + (global_count & counter_mask);
  271. prop_put_global(pd, pg);
  272. }
  273. /*
  274. * SINGLE
  275. */
  276. int prop_local_init_single(struct prop_local_single *pl)
  277. {
  278. raw_spin_lock_init(&pl->lock);
  279. pl->shift = 0;
  280. pl->period = 0;
  281. pl->events = 0;
  282. return 0;
  283. }
  284. void prop_local_destroy_single(struct prop_local_single *pl)
  285. {
  286. }
  287. /*
  288. * Catch up with missed period expirations.
  289. */
  290. static
  291. void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
  292. {
  293. unsigned long period = 1UL << (pg->shift - 1);
  294. unsigned long period_mask = ~(period - 1);
  295. unsigned long global_period;
  296. unsigned long flags;
  297. global_period = percpu_counter_read(&pg->events);
  298. global_period &= period_mask;
  299. /*
  300. * Fast path - check if the local and global period count still match
  301. * outside of the lock.
  302. */
  303. if (pl->period == global_period)
  304. return;
  305. raw_spin_lock_irqsave(&pl->lock, flags);
  306. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  307. /*
  308. * For each missed period, we half the local counter.
  309. */
  310. period = (global_period - pl->period) >> (pg->shift - 1);
  311. if (likely(period < BITS_PER_LONG))
  312. pl->events >>= period;
  313. else
  314. pl->events = 0;
  315. pl->period = global_period;
  316. raw_spin_unlock_irqrestore(&pl->lock, flags);
  317. }
  318. /*
  319. * ++x_{j}, ++t
  320. */
  321. void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
  322. {
  323. struct prop_global *pg = prop_get_global(pd);
  324. prop_norm_single(pg, pl);
  325. pl->events++;
  326. percpu_counter_add(&pg->events, 1);
  327. prop_put_global(pd, pg);
  328. }
  329. /*
  330. * Obtain a fraction of this proportion
  331. *
  332. * p_{j} = x_{j} / (period/2 + t % period/2)
  333. */
  334. void prop_fraction_single(struct prop_descriptor *pd,
  335. struct prop_local_single *pl,
  336. long *numerator, long *denominator)
  337. {
  338. struct prop_global *pg = prop_get_global(pd);
  339. unsigned long period_2 = 1UL << (pg->shift - 1);
  340. unsigned long counter_mask = period_2 - 1;
  341. unsigned long global_count;
  342. prop_norm_single(pg, pl);
  343. *numerator = pl->events;
  344. global_count = percpu_counter_read(&pg->events);
  345. *denominator = period_2 + (global_count & counter_mask);
  346. prop_put_global(pd, pg);
  347. }