123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821 |
- /*
- * Copyright (C) 2012 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
- #include "dm-array.h"
- #include "dm-space-map.h"
- #include "dm-transaction-manager.h"
- #include <linux/export.h>
- #include <linux/device-mapper.h>
- #define DM_MSG_PREFIX "array"
- /*----------------------------------------------------------------*/
- /*
- * The array is implemented as a fully populated btree, which points to
- * blocks that contain the packed values. This is more space efficient
- * than just using a btree since we don't store 1 key per value.
- */
- struct array_block {
- __le32 csum;
- __le32 max_entries;
- __le32 nr_entries;
- __le32 value_size;
- __le64 blocknr; /* Block this node is supposed to live in. */
- } __packed;
- /*----------------------------------------------------------------*/
- /*
- * Validator methods. As usual we calculate a checksum, and also write the
- * block location into the header (paranoia about ssds remapping areas by
- * mistake).
- */
- #define CSUM_XOR 595846735
- static void array_block_prepare_for_write(struct dm_block_validator *v,
- struct dm_block *b,
- size_t size_of_block)
- {
- struct array_block *bh_le = dm_block_data(b);
- bh_le->blocknr = cpu_to_le64(dm_block_location(b));
- bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
- size_of_block - sizeof(__le32),
- CSUM_XOR));
- }
- static int array_block_check(struct dm_block_validator *v,
- struct dm_block *b,
- size_t size_of_block)
- {
- struct array_block *bh_le = dm_block_data(b);
- __le32 csum_disk;
- if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
- DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
- (unsigned long long) le64_to_cpu(bh_le->blocknr),
- (unsigned long long) dm_block_location(b));
- return -ENOTBLK;
- }
- csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
- size_of_block - sizeof(__le32),
- CSUM_XOR));
- if (csum_disk != bh_le->csum) {
- DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
- (unsigned) le32_to_cpu(csum_disk),
- (unsigned) le32_to_cpu(bh_le->csum));
- return -EILSEQ;
- }
- return 0;
- }
- static struct dm_block_validator array_validator = {
- .name = "array",
- .prepare_for_write = array_block_prepare_for_write,
- .check = array_block_check
- };
- /*----------------------------------------------------------------*/
- /*
- * Functions for manipulating the array blocks.
- */
- /*
- * Returns a pointer to a value within an array block.
- *
- * index - The index into _this_ specific block.
- */
- static void *element_at(struct dm_array_info *info, struct array_block *ab,
- unsigned index)
- {
- unsigned char *entry = (unsigned char *) (ab + 1);
- entry += index * info->value_type.size;
- return entry;
- }
- /*
- * Utility function that calls one of the value_type methods on every value
- * in an array block.
- */
- static void on_entries(struct dm_array_info *info, struct array_block *ab,
- void (*fn)(void *, const void *))
- {
- unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
- for (i = 0; i < nr_entries; i++)
- fn(info->value_type.context, element_at(info, ab, i));
- }
- /*
- * Increment every value in an array block.
- */
- static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
- {
- struct dm_btree_value_type *vt = &info->value_type;
- if (vt->inc)
- on_entries(info, ab, vt->inc);
- }
- /*
- * Decrement every value in an array block.
- */
- static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
- {
- struct dm_btree_value_type *vt = &info->value_type;
- if (vt->dec)
- on_entries(info, ab, vt->dec);
- }
- /*
- * Each array block can hold this many values.
- */
- static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
- {
- return (size_of_block - sizeof(struct array_block)) / value_size;
- }
- /*
- * Allocate a new array block. The caller will need to unlock block.
- */
- static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
- uint32_t max_entries,
- struct dm_block **block, struct array_block **ab)
- {
- int r;
- r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
- if (r)
- return r;
- (*ab) = dm_block_data(*block);
- (*ab)->max_entries = cpu_to_le32(max_entries);
- (*ab)->nr_entries = cpu_to_le32(0);
- (*ab)->value_size = cpu_to_le32(info->value_type.size);
- return 0;
- }
- /*
- * Pad an array block out with a particular value. Every instance will
- * cause an increment of the value_type. new_nr must always be more than
- * the current number of entries.
- */
- static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
- const void *value, unsigned new_nr)
- {
- unsigned i;
- uint32_t nr_entries;
- struct dm_btree_value_type *vt = &info->value_type;
- BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
- BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
- nr_entries = le32_to_cpu(ab->nr_entries);
- for (i = nr_entries; i < new_nr; i++) {
- if (vt->inc)
- vt->inc(vt->context, value);
- memcpy(element_at(info, ab, i), value, vt->size);
- }
- ab->nr_entries = cpu_to_le32(new_nr);
- }
- /*
- * Remove some entries from the back of an array block. Every value
- * removed will be decremented. new_nr must be <= the current number of
- * entries.
- */
- static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
- unsigned new_nr)
- {
- unsigned i;
- uint32_t nr_entries;
- struct dm_btree_value_type *vt = &info->value_type;
- BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
- BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
- nr_entries = le32_to_cpu(ab->nr_entries);
- for (i = nr_entries; i > new_nr; i--)
- if (vt->dec)
- vt->dec(vt->context, element_at(info, ab, i - 1));
- ab->nr_entries = cpu_to_le32(new_nr);
- }
- /*
- * Read locks a block, and coerces it to an array block. The caller must
- * unlock 'block' when finished.
- */
- static int get_ablock(struct dm_array_info *info, dm_block_t b,
- struct dm_block **block, struct array_block **ab)
- {
- int r;
- r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
- if (r)
- return r;
- *ab = dm_block_data(*block);
- return 0;
- }
- /*
- * Unlocks an array block.
- */
- static void unlock_ablock(struct dm_array_info *info, struct dm_block *block)
- {
- dm_tm_unlock(info->btree_info.tm, block);
- }
- /*----------------------------------------------------------------*/
- /*
- * Btree manipulation.
- */
- /*
- * Looks up an array block in the btree, and then read locks it.
- *
- * index is the index of the index of the array_block, (ie. the array index
- * / max_entries).
- */
- static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
- unsigned index, struct dm_block **block,
- struct array_block **ab)
- {
- int r;
- uint64_t key = index;
- __le64 block_le;
- r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
- if (r)
- return r;
- return get_ablock(info, le64_to_cpu(block_le), block, ab);
- }
- /*
- * Insert an array block into the btree. The block is _not_ unlocked.
- */
- static int insert_ablock(struct dm_array_info *info, uint64_t index,
- struct dm_block *block, dm_block_t *root)
- {
- __le64 block_le = cpu_to_le64(dm_block_location(block));
- __dm_bless_for_disk(block_le);
- return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
- }
- /*
- * Looks up an array block in the btree. Then shadows it, and updates the
- * btree to point to this new shadow. 'root' is an input/output parameter
- * for both the current root block, and the new one.
- */
- static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
- unsigned index, struct dm_block **block,
- struct array_block **ab)
- {
- int r, inc;
- uint64_t key = index;
- dm_block_t b;
- __le64 block_le;
- /*
- * lookup
- */
- r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
- if (r)
- return r;
- b = le64_to_cpu(block_le);
- /*
- * shadow
- */
- r = dm_tm_shadow_block(info->btree_info.tm, b,
- &array_validator, block, &inc);
- if (r)
- return r;
- *ab = dm_block_data(*block);
- if (inc)
- inc_ablock_entries(info, *ab);
- /*
- * Reinsert.
- *
- * The shadow op will often be a noop. Only insert if it really
- * copied data.
- */
- if (dm_block_location(*block) != b) {
- /*
- * dm_tm_shadow_block will have already decremented the old
- * block, but it is still referenced by the btree. We
- * increment to stop the insert decrementing it below zero
- * when overwriting the old value.
- */
- dm_tm_inc(info->btree_info.tm, b);
- r = insert_ablock(info, index, *block, root);
- }
- return r;
- }
- /*
- * Allocate an new array block, and fill it with some values.
- */
- static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
- uint32_t max_entries,
- unsigned block_index, uint32_t nr,
- const void *value, dm_block_t *root)
- {
- int r;
- struct dm_block *block;
- struct array_block *ab;
- r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
- if (r)
- return r;
- fill_ablock(info, ab, value, nr);
- r = insert_ablock(info, block_index, block, root);
- unlock_ablock(info, block);
- return r;
- }
- static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
- unsigned begin_block, unsigned end_block,
- unsigned max_entries, const void *value,
- dm_block_t *root)
- {
- int r = 0;
- for (; !r && begin_block != end_block; begin_block++)
- r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
- return r;
- }
- /*
- * There are a bunch of functions involved with resizing an array. This
- * structure holds information that commonly needed by them. Purely here
- * to reduce parameter count.
- */
- struct resize {
- /*
- * Describes the array.
- */
- struct dm_array_info *info;
- /*
- * The current root of the array. This gets updated.
- */
- dm_block_t root;
- /*
- * Metadata block size. Used to calculate the nr entries in an
- * array block.
- */
- size_t size_of_block;
- /*
- * Maximum nr entries in an array block.
- */
- unsigned max_entries;
- /*
- * nr of completely full blocks in the array.
- *
- * 'old' refers to before the resize, 'new' after.
- */
- unsigned old_nr_full_blocks, new_nr_full_blocks;
- /*
- * Number of entries in the final block. 0 iff only full blocks in
- * the array.
- */
- unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block;
- /*
- * The default value used when growing the array.
- */
- const void *value;
- };
- /*
- * Removes a consecutive set of array blocks from the btree. The values
- * in block are decremented as a side effect of the btree remove.
- *
- * begin_index - the index of the first array block to remove.
- * end_index - the one-past-the-end value. ie. this block is not removed.
- */
- static int drop_blocks(struct resize *resize, unsigned begin_index,
- unsigned end_index)
- {
- int r;
- while (begin_index != end_index) {
- uint64_t key = begin_index++;
- r = dm_btree_remove(&resize->info->btree_info, resize->root,
- &key, &resize->root);
- if (r)
- return r;
- }
- return 0;
- }
- /*
- * Calculates how many blocks are needed for the array.
- */
- static unsigned total_nr_blocks_needed(unsigned nr_full_blocks,
- unsigned nr_entries_in_last_block)
- {
- return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
- }
- /*
- * Shrink an array.
- */
- static int shrink(struct resize *resize)
- {
- int r;
- unsigned begin, end;
- struct dm_block *block;
- struct array_block *ab;
- /*
- * Lose some blocks from the back?
- */
- if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
- begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
- resize->new_nr_entries_in_last_block);
- end = total_nr_blocks_needed(resize->old_nr_full_blocks,
- resize->old_nr_entries_in_last_block);
- r = drop_blocks(resize, begin, end);
- if (r)
- return r;
- }
- /*
- * Trim the new tail block
- */
- if (resize->new_nr_entries_in_last_block) {
- r = shadow_ablock(resize->info, &resize->root,
- resize->new_nr_full_blocks, &block, &ab);
- if (r)
- return r;
- trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
- unlock_ablock(resize->info, block);
- }
- return 0;
- }
- /*
- * Grow an array.
- */
- static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
- {
- int r;
- struct dm_block *block;
- struct array_block *ab;
- r = shadow_ablock(resize->info, &resize->root,
- resize->old_nr_full_blocks, &block, &ab);
- if (r)
- return r;
- fill_ablock(resize->info, ab, resize->value, new_nr_entries);
- unlock_ablock(resize->info, block);
- return r;
- }
- static int grow_add_tail_block(struct resize *resize)
- {
- return insert_new_ablock(resize->info, resize->size_of_block,
- resize->max_entries,
- resize->new_nr_full_blocks,
- resize->new_nr_entries_in_last_block,
- resize->value, &resize->root);
- }
- static int grow_needs_more_blocks(struct resize *resize)
- {
- int r;
- unsigned old_nr_blocks = resize->old_nr_full_blocks;
- if (resize->old_nr_entries_in_last_block > 0) {
- old_nr_blocks++;
- r = grow_extend_tail_block(resize, resize->max_entries);
- if (r)
- return r;
- }
- r = insert_full_ablocks(resize->info, resize->size_of_block,
- old_nr_blocks,
- resize->new_nr_full_blocks,
- resize->max_entries, resize->value,
- &resize->root);
- if (r)
- return r;
- if (resize->new_nr_entries_in_last_block)
- r = grow_add_tail_block(resize);
- return r;
- }
- static int grow(struct resize *resize)
- {
- if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
- return grow_needs_more_blocks(resize);
- else if (resize->old_nr_entries_in_last_block)
- return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
- else
- return grow_add_tail_block(resize);
- }
- /*----------------------------------------------------------------*/
- /*
- * These are the value_type functions for the btree elements, which point
- * to array blocks.
- */
- static void block_inc(void *context, const void *value)
- {
- __le64 block_le;
- struct dm_array_info *info = context;
- memcpy(&block_le, value, sizeof(block_le));
- dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
- }
- static void block_dec(void *context, const void *value)
- {
- int r;
- uint64_t b;
- __le64 block_le;
- uint32_t ref_count;
- struct dm_block *block;
- struct array_block *ab;
- struct dm_array_info *info = context;
- memcpy(&block_le, value, sizeof(block_le));
- b = le64_to_cpu(block_le);
- r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
- if (r) {
- DMERR_LIMIT("couldn't get reference count for block %llu",
- (unsigned long long) b);
- return;
- }
- if (ref_count == 1) {
- /*
- * We're about to drop the last reference to this ablock.
- * So we need to decrement the ref count of the contents.
- */
- r = get_ablock(info, b, &block, &ab);
- if (r) {
- DMERR_LIMIT("couldn't get array block %llu",
- (unsigned long long) b);
- return;
- }
- dec_ablock_entries(info, ab);
- unlock_ablock(info, block);
- }
- dm_tm_dec(info->btree_info.tm, b);
- }
- static int block_equal(void *context, const void *value1, const void *value2)
- {
- return !memcmp(value1, value2, sizeof(__le64));
- }
- /*----------------------------------------------------------------*/
- void dm_array_info_init(struct dm_array_info *info,
- struct dm_transaction_manager *tm,
- struct dm_btree_value_type *vt)
- {
- struct dm_btree_value_type *bvt = &info->btree_info.value_type;
- memcpy(&info->value_type, vt, sizeof(info->value_type));
- info->btree_info.tm = tm;
- info->btree_info.levels = 1;
- bvt->context = info;
- bvt->size = sizeof(__le64);
- bvt->inc = block_inc;
- bvt->dec = block_dec;
- bvt->equal = block_equal;
- }
- EXPORT_SYMBOL_GPL(dm_array_info_init);
- int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
- {
- return dm_btree_empty(&info->btree_info, root);
- }
- EXPORT_SYMBOL_GPL(dm_array_empty);
- static int array_resize(struct dm_array_info *info, dm_block_t root,
- uint32_t old_size, uint32_t new_size,
- const void *value, dm_block_t *new_root)
- {
- int r;
- struct resize resize;
- if (old_size == new_size) {
- *new_root = root;
- return 0;
- }
- resize.info = info;
- resize.root = root;
- resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
- resize.max_entries = calc_max_entries(info->value_type.size,
- resize.size_of_block);
- resize.old_nr_full_blocks = old_size / resize.max_entries;
- resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
- resize.new_nr_full_blocks = new_size / resize.max_entries;
- resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
- resize.value = value;
- r = ((new_size > old_size) ? grow : shrink)(&resize);
- if (r)
- return r;
- *new_root = resize.root;
- return 0;
- }
- int dm_array_resize(struct dm_array_info *info, dm_block_t root,
- uint32_t old_size, uint32_t new_size,
- const void *value, dm_block_t *new_root)
- __dm_written_to_disk(value)
- {
- int r = array_resize(info, root, old_size, new_size, value, new_root);
- __dm_unbless_for_disk(value);
- return r;
- }
- EXPORT_SYMBOL_GPL(dm_array_resize);
- int dm_array_del(struct dm_array_info *info, dm_block_t root)
- {
- return dm_btree_del(&info->btree_info, root);
- }
- EXPORT_SYMBOL_GPL(dm_array_del);
- int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
- uint32_t index, void *value_le)
- {
- int r;
- struct dm_block *block;
- struct array_block *ab;
- size_t size_of_block;
- unsigned entry, max_entries;
- size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
- max_entries = calc_max_entries(info->value_type.size, size_of_block);
- r = lookup_ablock(info, root, index / max_entries, &block, &ab);
- if (r)
- return r;
- entry = index % max_entries;
- if (entry >= le32_to_cpu(ab->nr_entries))
- r = -ENODATA;
- else
- memcpy(value_le, element_at(info, ab, entry),
- info->value_type.size);
- unlock_ablock(info, block);
- return r;
- }
- EXPORT_SYMBOL_GPL(dm_array_get_value);
- static int array_set_value(struct dm_array_info *info, dm_block_t root,
- uint32_t index, const void *value, dm_block_t *new_root)
- {
- int r;
- struct dm_block *block;
- struct array_block *ab;
- size_t size_of_block;
- unsigned max_entries;
- unsigned entry;
- void *old_value;
- struct dm_btree_value_type *vt = &info->value_type;
- size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
- max_entries = calc_max_entries(info->value_type.size, size_of_block);
- r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
- if (r)
- return r;
- *new_root = root;
- entry = index % max_entries;
- if (entry >= le32_to_cpu(ab->nr_entries)) {
- r = -ENODATA;
- goto out;
- }
- old_value = element_at(info, ab, entry);
- if (vt->dec &&
- (!vt->equal || !vt->equal(vt->context, old_value, value))) {
- vt->dec(vt->context, old_value);
- if (vt->inc)
- vt->inc(vt->context, value);
- }
- memcpy(old_value, value, info->value_type.size);
- out:
- unlock_ablock(info, block);
- return r;
- }
- int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
- uint32_t index, const void *value, dm_block_t *new_root)
- __dm_written_to_disk(value)
- {
- int r;
- r = array_set_value(info, root, index, value, new_root);
- __dm_unbless_for_disk(value);
- return r;
- }
- EXPORT_SYMBOL_GPL(dm_array_set_value);
- struct walk_info {
- struct dm_array_info *info;
- int (*fn)(void *context, uint64_t key, void *leaf);
- void *context;
- };
- static int walk_ablock(void *context, uint64_t *keys, void *leaf)
- {
- struct walk_info *wi = context;
- int r;
- unsigned i;
- __le64 block_le;
- unsigned nr_entries, max_entries;
- struct dm_block *block;
- struct array_block *ab;
- memcpy(&block_le, leaf, sizeof(block_le));
- r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
- if (r)
- return r;
- max_entries = le32_to_cpu(ab->max_entries);
- nr_entries = le32_to_cpu(ab->nr_entries);
- for (i = 0; i < nr_entries; i++) {
- r = wi->fn(wi->context, keys[0] * max_entries + i,
- element_at(wi->info, ab, i));
- if (r)
- break;
- }
- unlock_ablock(wi->info, block);
- return r;
- }
- int dm_array_walk(struct dm_array_info *info, dm_block_t root,
- int (*fn)(void *, uint64_t key, void *leaf),
- void *context)
- {
- struct walk_info wi;
- wi.info = info;
- wi.fn = fn;
- wi.context = context;
- return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
- }
- EXPORT_SYMBOL_GPL(dm_array_walk);
- /*----------------------------------------------------------------*/
|