123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610 |
- /*
- * Bad block management
- *
- * - Heavily based on MD badblocks code from Neil Brown
- *
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
- #include <linux/badblocks.h>
- #include <linux/seqlock.h>
- #include <linux/device.h>
- #include <linux/kernel.h>
- #include <linux/module.h>
- #include <linux/stddef.h>
- #include <linux/types.h>
- #include <linux/slab.h>
- /**
- * badblocks_check() - check a given range for bad sectors
- * @bb: the badblocks structure that holds all badblock information
- * @s: sector (start) at which to check for badblocks
- * @sectors: number of sectors to check for badblocks
- * @first_bad: pointer to store location of the first badblock
- * @bad_sectors: pointer to store number of badblocks after @first_bad
- *
- * We can record which blocks on each device are 'bad' and so just
- * fail those blocks, or that stripe, rather than the whole device.
- * Entries in the bad-block table are 64bits wide. This comprises:
- * Length of bad-range, in sectors: 0-511 for lengths 1-512
- * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
- * A 'shift' can be set so that larger blocks are tracked and
- * consequently larger devices can be covered.
- * 'Acknowledged' flag - 1 bit. - the most significant bit.
- *
- * Locking of the bad-block table uses a seqlock so badblocks_check
- * might need to retry if it is very unlucky.
- * We will sometimes want to check for bad blocks in a bi_end_io function,
- * so we use the write_seqlock_irq variant.
- *
- * When looking for a bad block we specify a range and want to
- * know if any block in the range is bad. So we binary-search
- * to the last range that starts at-or-before the given endpoint,
- * (or "before the sector after the target range")
- * then see if it ends after the given start.
- *
- * Return:
- * 0: there are no known bad blocks in the range
- * 1: there are known bad block which are all acknowledged
- * -1: there are bad blocks which have not yet been acknowledged in metadata.
- * plus the start/length of the first bad section we overlap.
- */
- int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
- sector_t *first_bad, int *bad_sectors)
- {
- int hi;
- int lo;
- u64 *p = bb->page;
- int rv;
- sector_t target = s + sectors;
- unsigned seq;
- if (bb->shift > 0) {
- /* round the start down, and the end up */
- s >>= bb->shift;
- target += (1<<bb->shift) - 1;
- target >>= bb->shift;
- sectors = target - s;
- }
- /* 'target' is now the first block after the bad range */
- retry:
- seq = read_seqbegin(&bb->lock);
- lo = 0;
- rv = 0;
- hi = bb->count;
- /* Binary search between lo and hi for 'target'
- * i.e. for the last range that starts before 'target'
- */
- /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
- * are known not to be the last range before target.
- * VARIANT: hi-lo is the number of possible
- * ranges, and decreases until it reaches 1
- */
- while (hi - lo > 1) {
- int mid = (lo + hi) / 2;
- sector_t a = BB_OFFSET(p[mid]);
- if (a < target)
- /* This could still be the one, earlier ranges
- * could not.
- */
- lo = mid;
- else
- /* This and later ranges are definitely out. */
- hi = mid;
- }
- /* 'lo' might be the last that started before target, but 'hi' isn't */
- if (hi > lo) {
- /* need to check all range that end after 's' to see if
- * any are unacknowledged.
- */
- while (lo >= 0 &&
- BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
- if (BB_OFFSET(p[lo]) < target) {
- /* starts before the end, and finishes after
- * the start, so they must overlap
- */
- if (rv != -1 && BB_ACK(p[lo]))
- rv = 1;
- else
- rv = -1;
- *first_bad = BB_OFFSET(p[lo]);
- *bad_sectors = BB_LEN(p[lo]);
- }
- lo--;
- }
- }
- if (read_seqretry(&bb->lock, seq))
- goto retry;
- return rv;
- }
- EXPORT_SYMBOL_GPL(badblocks_check);
- static void badblocks_update_acked(struct badblocks *bb)
- {
- u64 *p = bb->page;
- int i;
- bool unacked = false;
- if (!bb->unacked_exist)
- return;
- for (i = 0; i < bb->count ; i++) {
- if (!BB_ACK(p[i])) {
- unacked = true;
- break;
- }
- }
- if (!unacked)
- bb->unacked_exist = 0;
- }
- /**
- * badblocks_set() - Add a range of bad blocks to the table.
- * @bb: the badblocks structure that holds all badblock information
- * @s: first sector to mark as bad
- * @sectors: number of sectors to mark as bad
- * @acknowledged: weather to mark the bad sectors as acknowledged
- *
- * This might extend the table, or might contract it if two adjacent ranges
- * can be merged. We binary-search to find the 'insertion' point, then
- * decide how best to handle it.
- *
- * Return:
- * 0: success
- * 1: failed to set badblocks (out of space)
- */
- int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
- int acknowledged)
- {
- u64 *p;
- int lo, hi;
- int rv = 0;
- unsigned long flags;
- if (bb->shift < 0)
- /* badblocks are disabled */
- return 1;
- if (bb->shift) {
- /* round the start down, and the end up */
- sector_t next = s + sectors;
- s >>= bb->shift;
- next += (1<<bb->shift) - 1;
- next >>= bb->shift;
- sectors = next - s;
- }
- write_seqlock_irqsave(&bb->lock, flags);
- p = bb->page;
- lo = 0;
- hi = bb->count;
- /* Find the last range that starts at-or-before 's' */
- while (hi - lo > 1) {
- int mid = (lo + hi) / 2;
- sector_t a = BB_OFFSET(p[mid]);
- if (a <= s)
- lo = mid;
- else
- hi = mid;
- }
- if (hi > lo && BB_OFFSET(p[lo]) > s)
- hi = lo;
- if (hi > lo) {
- /* we found a range that might merge with the start
- * of our new range
- */
- sector_t a = BB_OFFSET(p[lo]);
- sector_t e = a + BB_LEN(p[lo]);
- int ack = BB_ACK(p[lo]);
- if (e >= s) {
- /* Yes, we can merge with a previous range */
- if (s == a && s + sectors >= e)
- /* new range covers old */
- ack = acknowledged;
- else
- ack = ack && acknowledged;
- if (e < s + sectors)
- e = s + sectors;
- if (e - a <= BB_MAX_LEN) {
- p[lo] = BB_MAKE(a, e-a, ack);
- s = e;
- } else {
- /* does not all fit in one range,
- * make p[lo] maximal
- */
- if (BB_LEN(p[lo]) != BB_MAX_LEN)
- p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
- s = a + BB_MAX_LEN;
- }
- sectors = e - s;
- }
- }
- if (sectors && hi < bb->count) {
- /* 'hi' points to the first range that starts after 's'.
- * Maybe we can merge with the start of that range
- */
- sector_t a = BB_OFFSET(p[hi]);
- sector_t e = a + BB_LEN(p[hi]);
- int ack = BB_ACK(p[hi]);
- if (a <= s + sectors) {
- /* merging is possible */
- if (e <= s + sectors) {
- /* full overlap */
- e = s + sectors;
- ack = acknowledged;
- } else
- ack = ack && acknowledged;
- a = s;
- if (e - a <= BB_MAX_LEN) {
- p[hi] = BB_MAKE(a, e-a, ack);
- s = e;
- } else {
- p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
- s = a + BB_MAX_LEN;
- }
- sectors = e - s;
- lo = hi;
- hi++;
- }
- }
- if (sectors == 0 && hi < bb->count) {
- /* we might be able to combine lo and hi */
- /* Note: 's' is at the end of 'lo' */
- sector_t a = BB_OFFSET(p[hi]);
- int lolen = BB_LEN(p[lo]);
- int hilen = BB_LEN(p[hi]);
- int newlen = lolen + hilen - (s - a);
- if (s >= a && newlen < BB_MAX_LEN) {
- /* yes, we can combine them */
- int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
- p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
- memmove(p + hi, p + hi + 1,
- (bb->count - hi - 1) * 8);
- bb->count--;
- }
- }
- while (sectors) {
- /* didn't merge (it all).
- * Need to add a range just before 'hi'
- */
- if (bb->count >= MAX_BADBLOCKS) {
- /* No room for more */
- rv = 1;
- break;
- } else {
- int this_sectors = sectors;
- memmove(p + hi + 1, p + hi,
- (bb->count - hi) * 8);
- bb->count++;
- if (this_sectors > BB_MAX_LEN)
- this_sectors = BB_MAX_LEN;
- p[hi] = BB_MAKE(s, this_sectors, acknowledged);
- sectors -= this_sectors;
- s += this_sectors;
- }
- }
- bb->changed = 1;
- if (!acknowledged)
- bb->unacked_exist = 1;
- else
- badblocks_update_acked(bb);
- write_sequnlock_irqrestore(&bb->lock, flags);
- return rv;
- }
- EXPORT_SYMBOL_GPL(badblocks_set);
- /**
- * badblocks_clear() - Remove a range of bad blocks to the table.
- * @bb: the badblocks structure that holds all badblock information
- * @s: first sector to mark as bad
- * @sectors: number of sectors to mark as bad
- *
- * This may involve extending the table if we spilt a region,
- * but it must not fail. So if the table becomes full, we just
- * drop the remove request.
- *
- * Return:
- * 0: success
- * 1: failed to clear badblocks
- */
- int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
- {
- u64 *p;
- int lo, hi;
- sector_t target = s + sectors;
- int rv = 0;
- if (bb->shift > 0) {
- /* When clearing we round the start up and the end down.
- * This should not matter as the shift should align with
- * the block size and no rounding should ever be needed.
- * However it is better the think a block is bad when it
- * isn't than to think a block is not bad when it is.
- */
- s += (1<<bb->shift) - 1;
- s >>= bb->shift;
- target >>= bb->shift;
- sectors = target - s;
- }
- write_seqlock_irq(&bb->lock);
- p = bb->page;
- lo = 0;
- hi = bb->count;
- /* Find the last range that starts before 'target' */
- while (hi - lo > 1) {
- int mid = (lo + hi) / 2;
- sector_t a = BB_OFFSET(p[mid]);
- if (a < target)
- lo = mid;
- else
- hi = mid;
- }
- if (hi > lo) {
- /* p[lo] is the last range that could overlap the
- * current range. Earlier ranges could also overlap,
- * but only this one can overlap the end of the range.
- */
- if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
- (BB_OFFSET(p[lo]) < target)) {
- /* Partial overlap, leave the tail of this range */
- int ack = BB_ACK(p[lo]);
- sector_t a = BB_OFFSET(p[lo]);
- sector_t end = a + BB_LEN(p[lo]);
- if (a < s) {
- /* we need to split this range */
- if (bb->count >= MAX_BADBLOCKS) {
- rv = -ENOSPC;
- goto out;
- }
- memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
- bb->count++;
- p[lo] = BB_MAKE(a, s-a, ack);
- lo++;
- }
- p[lo] = BB_MAKE(target, end - target, ack);
- /* there is no longer an overlap */
- hi = lo;
- lo--;
- }
- while (lo >= 0 &&
- (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
- (BB_OFFSET(p[lo]) < target)) {
- /* This range does overlap */
- if (BB_OFFSET(p[lo]) < s) {
- /* Keep the early parts of this range. */
- int ack = BB_ACK(p[lo]);
- sector_t start = BB_OFFSET(p[lo]);
- p[lo] = BB_MAKE(start, s - start, ack);
- /* now low doesn't overlap, so.. */
- break;
- }
- lo--;
- }
- /* 'lo' is strictly before, 'hi' is strictly after,
- * anything between needs to be discarded
- */
- if (hi - lo > 1) {
- memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
- bb->count -= (hi - lo - 1);
- }
- }
- badblocks_update_acked(bb);
- bb->changed = 1;
- out:
- write_sequnlock_irq(&bb->lock);
- return rv;
- }
- EXPORT_SYMBOL_GPL(badblocks_clear);
- /**
- * ack_all_badblocks() - Acknowledge all bad blocks in a list.
- * @bb: the badblocks structure that holds all badblock information
- *
- * This only succeeds if ->changed is clear. It is used by
- * in-kernel metadata updates
- */
- void ack_all_badblocks(struct badblocks *bb)
- {
- if (bb->page == NULL || bb->changed)
- /* no point even trying */
- return;
- write_seqlock_irq(&bb->lock);
- if (bb->changed == 0 && bb->unacked_exist) {
- u64 *p = bb->page;
- int i;
- for (i = 0; i < bb->count ; i++) {
- if (!BB_ACK(p[i])) {
- sector_t start = BB_OFFSET(p[i]);
- int len = BB_LEN(p[i]);
- p[i] = BB_MAKE(start, len, 1);
- }
- }
- bb->unacked_exist = 0;
- }
- write_sequnlock_irq(&bb->lock);
- }
- EXPORT_SYMBOL_GPL(ack_all_badblocks);
- /**
- * badblocks_show() - sysfs access to bad-blocks list
- * @bb: the badblocks structure that holds all badblock information
- * @page: buffer received from sysfs
- * @unack: weather to show unacknowledged badblocks
- *
- * Return:
- * Length of returned data
- */
- ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
- {
- size_t len;
- int i;
- u64 *p = bb->page;
- unsigned seq;
- if (bb->shift < 0)
- return 0;
- retry:
- seq = read_seqbegin(&bb->lock);
- len = 0;
- i = 0;
- while (len < PAGE_SIZE && i < bb->count) {
- sector_t s = BB_OFFSET(p[i]);
- unsigned int length = BB_LEN(p[i]);
- int ack = BB_ACK(p[i]);
- i++;
- if (unack && ack)
- continue;
- len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
- (unsigned long long)s << bb->shift,
- length << bb->shift);
- }
- if (unack && len == 0)
- bb->unacked_exist = 0;
- if (read_seqretry(&bb->lock, seq))
- goto retry;
- return len;
- }
- EXPORT_SYMBOL_GPL(badblocks_show);
- /**
- * badblocks_store() - sysfs access to bad-blocks list
- * @bb: the badblocks structure that holds all badblock information
- * @page: buffer received from sysfs
- * @len: length of data received from sysfs
- * @unack: weather to show unacknowledged badblocks
- *
- * Return:
- * Length of the buffer processed or -ve error.
- */
- ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
- int unack)
- {
- unsigned long long sector;
- int length;
- char newline;
- switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
- case 3:
- if (newline != '\n')
- return -EINVAL;
- case 2:
- if (length <= 0)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
- if (badblocks_set(bb, sector, length, !unack))
- return -ENOSPC;
- else
- return len;
- }
- EXPORT_SYMBOL_GPL(badblocks_store);
- static int __badblocks_init(struct device *dev, struct badblocks *bb,
- int enable)
- {
- bb->dev = dev;
- bb->count = 0;
- if (enable)
- bb->shift = 0;
- else
- bb->shift = -1;
- if (dev)
- bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
- else
- bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!bb->page) {
- bb->shift = -1;
- return -ENOMEM;
- }
- seqlock_init(&bb->lock);
- return 0;
- }
- /**
- * badblocks_init() - initialize the badblocks structure
- * @bb: the badblocks structure that holds all badblock information
- * @enable: weather to enable badblocks accounting
- *
- * Return:
- * 0: success
- * -ve errno: on error
- */
- int badblocks_init(struct badblocks *bb, int enable)
- {
- return __badblocks_init(NULL, bb, enable);
- }
- EXPORT_SYMBOL_GPL(badblocks_init);
- int devm_init_badblocks(struct device *dev, struct badblocks *bb)
- {
- if (!bb)
- return -EINVAL;
- return __badblocks_init(dev, bb, 1);
- }
- EXPORT_SYMBOL_GPL(devm_init_badblocks);
- /**
- * badblocks_exit() - free the badblocks structure
- * @bb: the badblocks structure that holds all badblock information
- */
- void badblocks_exit(struct badblocks *bb)
- {
- if (!bb)
- return;
- if (bb->dev)
- devm_kfree(bb->dev, bb->page);
- else
- kfree(bb->page);
- bb->page = NULL;
- }
- EXPORT_SYMBOL_GPL(badblocks_exit);
|