123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407 |
- /*
- * Floating proportions
- *
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * Description:
- *
- * The floating proportion is a time derivative with an exponentially decaying
- * history:
- *
- * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
- *
- * Where j is an element from {prop_local}, x_{j} is j's number of events,
- * and i the time period over which the differential is taken. So d/dt_{-i} is
- * the differential over the i-th last period.
- *
- * The decaying history gives smooth transitions. The time differential carries
- * the notion of speed.
- *
- * The denominator is 2^(1+i) because we want the series to be normalised, ie.
- *
- * \Sum_{i=0} 1/2^(1+i) = 1
- *
- * Further more, if we measure time (t) in the same events as x; so that:
- *
- * t = \Sum_{j} x_{j}
- *
- * we get that:
- *
- * \Sum_{j} p_{j} = 1
- *
- * Writing this in an iterative fashion we get (dropping the 'd's):
- *
- * if (++x_{j}, ++t > period)
- * t /= 2;
- * for_each (j)
- * x_{j} /= 2;
- *
- * so that:
- *
- * p_{j} = x_{j} / t;
- *
- * We optimize away the '/= 2' for the global time delta by noting that:
- *
- * if (++t > period) t /= 2:
- *
- * Can be approximated by:
- *
- * period/2 + (++t % period/2)
- *
- * [ Furthermore, when we choose period to be 2^n it can be written in terms of
- * binary operations and wraparound artefacts disappear. ]
- *
- * Also note that this yields a natural counter of the elapsed periods:
- *
- * c = t / (period/2)
- *
- * [ Its monotonic increasing property can be applied to mitigate the wrap-
- * around issue. ]
- *
- * This allows us to do away with the loop over all prop_locals on each period
- * expiration. By remembering the period count under which it was last accessed
- * as c_{j}, we can obtain the number of 'missed' cycles from:
- *
- * c - c_{j}
- *
- * We can then lazily catch up to the global period count every time we are
- * going to use x_{j}, by doing:
- *
- * x_{j} /= 2^(c - c_{j}), c_{j} = c
- */
- #include <linux/proportions.h>
- #include <linux/rcupdate.h>
- int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
- {
- int err;
- if (shift > PROP_MAX_SHIFT)
- shift = PROP_MAX_SHIFT;
- pd->index = 0;
- pd->pg[0].shift = shift;
- mutex_init(&pd->mutex);
- err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
- if (err)
- goto out;
- err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
- if (err)
- percpu_counter_destroy(&pd->pg[0].events);
- out:
- return err;
- }
- /*
- * We have two copies, and flip between them to make it seem like an atomic
- * update. The update is not really atomic wrt the events counter, but
- * it is internally consistent with the bit layout depending on shift.
- *
- * We copy the events count, move the bits around and flip the index.
- */
- void prop_change_shift(struct prop_descriptor *pd, int shift)
- {
- int index;
- int offset;
- u64 events;
- unsigned long flags;
- if (shift > PROP_MAX_SHIFT)
- shift = PROP_MAX_SHIFT;
- mutex_lock(&pd->mutex);
- index = pd->index ^ 1;
- offset = pd->pg[pd->index].shift - shift;
- if (!offset)
- goto out;
- pd->pg[index].shift = shift;
- local_irq_save(flags);
- events = percpu_counter_sum(&pd->pg[pd->index].events);
- if (offset < 0)
- events <<= -offset;
- else
- events >>= offset;
- percpu_counter_set(&pd->pg[index].events, events);
- /*
- * ensure the new pg is fully written before the switch
- */
- smp_wmb();
- pd->index = index;
- local_irq_restore(flags);
- synchronize_rcu();
- out:
- mutex_unlock(&pd->mutex);
- }
- /*
- * wrap the access to the data in an rcu_read_lock() section;
- * this is used to track the active references.
- */
- static struct prop_global *prop_get_global(struct prop_descriptor *pd)
- __acquires(RCU)
- {
- int index;
- rcu_read_lock();
- index = pd->index;
- /*
- * match the wmb from vcd_flip()
- */
- smp_rmb();
- return &pd->pg[index];
- }
- static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
- __releases(RCU)
- {
- rcu_read_unlock();
- }
- static void
- prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
- {
- int offset = *pl_shift - new_shift;
- if (!offset)
- return;
- if (offset < 0)
- *pl_period <<= -offset;
- else
- *pl_period >>= offset;
- *pl_shift = new_shift;
- }
- /*
- * PERCPU
- */
- #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
- int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
- {
- raw_spin_lock_init(&pl->lock);
- pl->shift = 0;
- pl->period = 0;
- return percpu_counter_init(&pl->events, 0, gfp);
- }
- void prop_local_destroy_percpu(struct prop_local_percpu *pl)
- {
- percpu_counter_destroy(&pl->events);
- }
- /*
- * Catch up with missed period expirations.
- *
- * until (c_{j} == c)
- * x_{j} -= x_{j}/2;
- * c_{j}++;
- */
- static
- void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
- {
- unsigned long period = 1UL << (pg->shift - 1);
- unsigned long period_mask = ~(period - 1);
- unsigned long global_period;
- unsigned long flags;
- global_period = percpu_counter_read(&pg->events);
- global_period &= period_mask;
- /*
- * Fast path - check if the local and global period count still match
- * outside of the lock.
- */
- if (pl->period == global_period)
- return;
- raw_spin_lock_irqsave(&pl->lock, flags);
- prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
- /*
- * For each missed period, we half the local counter.
- * basically:
- * pl->events >> (global_period - pl->period);
- */
- period = (global_period - pl->period) >> (pg->shift - 1);
- if (period < BITS_PER_LONG) {
- s64 val = percpu_counter_read(&pl->events);
- if (val < (nr_cpu_ids * PROP_BATCH))
- val = percpu_counter_sum(&pl->events);
- __percpu_counter_add(&pl->events, -val + (val >> period),
- PROP_BATCH);
- } else
- percpu_counter_set(&pl->events, 0);
- pl->period = global_period;
- raw_spin_unlock_irqrestore(&pl->lock, flags);
- }
- /*
- * ++x_{j}, ++t
- */
- void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
- {
- struct prop_global *pg = prop_get_global(pd);
- prop_norm_percpu(pg, pl);
- __percpu_counter_add(&pl->events, 1, PROP_BATCH);
- percpu_counter_add(&pg->events, 1);
- prop_put_global(pd, pg);
- }
- /*
- * identical to __prop_inc_percpu, except that it limits this pl's fraction to
- * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
- */
- void __prop_inc_percpu_max(struct prop_descriptor *pd,
- struct prop_local_percpu *pl, long frac)
- {
- struct prop_global *pg = prop_get_global(pd);
- prop_norm_percpu(pg, pl);
- if (unlikely(frac != PROP_FRAC_BASE)) {
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
- long numerator, denominator;
- numerator = percpu_counter_read_positive(&pl->events);
- global_count = percpu_counter_read(&pg->events);
- denominator = period_2 + (global_count & counter_mask);
- if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
- goto out_put;
- }
- percpu_counter_add(&pl->events, 1);
- percpu_counter_add(&pg->events, 1);
- out_put:
- prop_put_global(pd, pg);
- }
- /*
- * Obtain a fraction of this proportion
- *
- * p_{j} = x_{j} / (period/2 + t % period/2)
- */
- void prop_fraction_percpu(struct prop_descriptor *pd,
- struct prop_local_percpu *pl,
- long *numerator, long *denominator)
- {
- struct prop_global *pg = prop_get_global(pd);
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
- prop_norm_percpu(pg, pl);
- *numerator = percpu_counter_read_positive(&pl->events);
- global_count = percpu_counter_read(&pg->events);
- *denominator = period_2 + (global_count & counter_mask);
- prop_put_global(pd, pg);
- }
- /*
- * SINGLE
- */
- int prop_local_init_single(struct prop_local_single *pl)
- {
- raw_spin_lock_init(&pl->lock);
- pl->shift = 0;
- pl->period = 0;
- pl->events = 0;
- return 0;
- }
- void prop_local_destroy_single(struct prop_local_single *pl)
- {
- }
- /*
- * Catch up with missed period expirations.
- */
- static
- void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
- {
- unsigned long period = 1UL << (pg->shift - 1);
- unsigned long period_mask = ~(period - 1);
- unsigned long global_period;
- unsigned long flags;
- global_period = percpu_counter_read(&pg->events);
- global_period &= period_mask;
- /*
- * Fast path - check if the local and global period count still match
- * outside of the lock.
- */
- if (pl->period == global_period)
- return;
- raw_spin_lock_irqsave(&pl->lock, flags);
- prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
- /*
- * For each missed period, we half the local counter.
- */
- period = (global_period - pl->period) >> (pg->shift - 1);
- if (likely(period < BITS_PER_LONG))
- pl->events >>= period;
- else
- pl->events = 0;
- pl->period = global_period;
- raw_spin_unlock_irqrestore(&pl->lock, flags);
- }
- /*
- * ++x_{j}, ++t
- */
- void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
- {
- struct prop_global *pg = prop_get_global(pd);
- prop_norm_single(pg, pl);
- pl->events++;
- percpu_counter_add(&pg->events, 1);
- prop_put_global(pd, pg);
- }
- /*
- * Obtain a fraction of this proportion
- *
- * p_{j} = x_{j} / (period/2 + t % period/2)
- */
- void prop_fraction_single(struct prop_descriptor *pd,
- struct prop_local_single *pl,
- long *numerator, long *denominator)
- {
- struct prop_global *pg = prop_get_global(pd);
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
- prop_norm_single(pg, pl);
- *numerator = pl->events;
- global_count = percpu_counter_read(&pg->events);
- *denominator = period_2 + (global_count & counter_mask);
- prop_put_global(pd, pg);
- }
|