for-5.5/block-20191121
-----BEGIN PGP SIGNATURE-----
iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl3WxrEQHGF4Ym9lQGtl
cm5lbC5kawAKCRD301j7KXHgpuH5D/9qQKfIIuQDUNO4Xx+dIHimTDCrfiEOeO9e
CRaMuSj+yMxLDMwfX8RnDmR17H3ZVoiIY1CT24U9ZkA5iDjeAH4xmzkH30US7LR7
/64YVZTxB0OrWppRK8RiIhaJJZDQ6+HPUQsn6PRaLVuFHi2unMoTQnj/ZQKz03QA
Pl8Xx7qBtH1JwYCzQ21f/uryAcNg9eWabRLN2f1uiOXLmvRxOfh6Z/iaezlaZlmL
qeJdcdLjjvOgOPwEOfNjfS6pd+XBz3gdEhn0l+11nHITxWZmVBwsWTKyUQlCmKnl
yuCWDVyx5d6zCnlrLYG0l2Fn2lr9SwAkdkq3YAKV03hA/6s6P9q9bm31VvOf828x
7gmr4YVz68y7H9bM0QAHCvDpjll0aIEUw6XFzSOCDtZ9B6/pppYQWzMU71J05eyF
8DOKv2M2EVNLUjf6u0RDyolnWGU0kIjt5ryWE3OsGcezAVa2wYstgUJTKbrn1YgT
j+4KTpaI+sg8GKDFauvxcSa6gwoRp6jweFNW+7vC090/shXmrGmVLOnQZKRuHho/
O4W8y/1/deM8CCIAETpiNxA8RV5U/EZygrFGDFc7yzTtVDGHY356M/B4Bmm2qkVu
K3WgeZp8Fc0lH0QF6Pp9ZlBkZEpGNCAPVsPkXIsxQXbctftkn3KY//uIubfpFEB1
PpHSicvkww==
=HYYq
-----END PGP SIGNATURE-----
Merge tag 'for-5.5/block-20191121' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
"Due to more granular branches, this one is small and will be followed
with other core branches that add specific features. I meant to just
have a core and drivers branch, but external dependencies we ended up
adding a few more that are also core.
The changes are:
- Fixes and improvements for the zoned device support (Ajay, Damien)
- sed-opal table writing and datastore UID (Revanth)
- blk-cgroup (and bfq) blk-cgroup stat fixes (Tejun)
- Improvements to the block stats tracking (Pavel)
- Fix for overruning sysfs buffer for large number of CPUs (Ming)
- Optimization for small IO (Ming, Christoph)
- Fix typo in RWH lifetime hint (Eugene)
- Dead code removal and documentation (Bart)
- Reduction in memory usage for queue and tag set (Bart)
- Kerneldoc header documentation (André)
- Device/partition revalidation fixes (Jan)
- Stats tracking for flush requests (Konstantin)
- Various other little fixes here and there (et al)"
* tag 'for-5.5/block-20191121' of git://git.kernel.dk/linux-block: (48 commits)
Revert "block: split bio if the only bvec's length is > SZ_4K"
block: add iostat counters for flush requests
block,bfq: Skip tracing hooks if possible
block: sed-opal: Introduce SUM_SET_LIST parameter and append it using 'add_token_u64'
blk-cgroup: cgroup_rstat_updated() shouldn't be called on cgroup1
block: Don't disable interrupts in trigger_softirq()
sbitmap: Delete sbitmap_any_bit_clear()
blk-mq: Delete blk_mq_has_free_tags() and blk_mq_can_queue()
block: split bio if the only bvec's length is > SZ_4K
block: still try to split bio if the bvec crosses pages
blk-cgroup: separate out blkg_rwstat under CONFIG_BLK_CGROUP_RWSTAT
blk-cgroup: reimplement basic IO stats using cgroup rstat
blk-cgroup: remove now unused blkg_print_stat_{bytes|ios}_recursive()
blk-throtl: stop using blkg->stat_bytes and ->stat_ios
bfq-iosched: stop using blkg->stat_bytes and ->stat_ios
bfq-iosched: relocate bfqg_*rwstat*() helpers
block: add zone open, close and finish ioctl support
block: add zone open, close and finish operations
block: Simplify REQ_OP_ZONE_RESET_ALL handling
block: Remove REQ_OP_ZONE_RESET plugging
...
This commit is contained in:
commit
ff6814b078
51 changed files with 1404 additions and 806 deletions
|
|
@ -15,7 +15,9 @@
|
|||
*/
|
||||
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/percpu_counter.h>
|
||||
#include <linux/u64_stats_sync.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
|
@ -31,15 +33,12 @@
|
|||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
enum blkg_rwstat_type {
|
||||
BLKG_RWSTAT_READ,
|
||||
BLKG_RWSTAT_WRITE,
|
||||
BLKG_RWSTAT_SYNC,
|
||||
BLKG_RWSTAT_ASYNC,
|
||||
BLKG_RWSTAT_DISCARD,
|
||||
enum blkg_iostat_type {
|
||||
BLKG_IOSTAT_READ,
|
||||
BLKG_IOSTAT_WRITE,
|
||||
BLKG_IOSTAT_DISCARD,
|
||||
|
||||
BLKG_RWSTAT_NR,
|
||||
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
|
||||
BLKG_IOSTAT_NR,
|
||||
};
|
||||
|
||||
struct blkcg_gq;
|
||||
|
|
@ -61,17 +60,15 @@ struct blkcg {
|
|||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* blkg_[rw]stat->aux_cnt is excluded for local stats but included for
|
||||
* recursive. Used to carry stats of dead children.
|
||||
*/
|
||||
struct blkg_rwstat {
|
||||
struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
|
||||
atomic64_t aux_cnt[BLKG_RWSTAT_NR];
|
||||
struct blkg_iostat {
|
||||
u64 bytes[BLKG_IOSTAT_NR];
|
||||
u64 ios[BLKG_IOSTAT_NR];
|
||||
};
|
||||
|
||||
struct blkg_rwstat_sample {
|
||||
u64 cnt[BLKG_RWSTAT_NR];
|
||||
struct blkg_iostat_set {
|
||||
struct u64_stats_sync sync;
|
||||
struct blkg_iostat cur;
|
||||
struct blkg_iostat last;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -127,8 +124,8 @@ struct blkcg_gq {
|
|||
/* is this blkg online? protected by both blkcg and q locks */
|
||||
bool online;
|
||||
|
||||
struct blkg_rwstat stat_bytes;
|
||||
struct blkg_rwstat stat_ios;
|
||||
struct blkg_iostat_set __percpu *iostat_cpu;
|
||||
struct blkg_iostat_set iostat;
|
||||
|
||||
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
|
||||
|
||||
|
|
@ -202,13 +199,6 @@ int blkcg_activate_policy(struct request_queue *q,
|
|||
void blkcg_deactivate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol);
|
||||
|
||||
static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
|
||||
unsigned int idx)
|
||||
{
|
||||
return atomic64_read(&rwstat->aux_cnt[idx]) +
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
|
||||
}
|
||||
|
||||
const char *blkg_dev_name(struct blkcg_gq *blkg);
|
||||
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
|
||||
u64 (*prfill)(struct seq_file *,
|
||||
|
|
@ -216,17 +206,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
|
|||
const struct blkcg_policy *pol, int data,
|
||||
bool show_total);
|
||||
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat);
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off);
|
||||
int blkg_print_stat_bytes(struct seq_file *sf, void *v);
|
||||
int blkg_print_stat_ios(struct seq_file *sf, void *v);
|
||||
int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
|
||||
int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
|
||||
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum);
|
||||
|
||||
struct blkg_conf_ctx {
|
||||
struct gendisk *disk;
|
||||
|
|
@ -578,128 +557,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
|
|||
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
|
||||
(p_blkg)->q, false)))
|
||||
|
||||
static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
|
||||
if (ret) {
|
||||
while (--i >= 0)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
return ret;
|
||||
}
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add - add a value to a blkg_rwstat
|
||||
* @rwstat: target blkg_rwstat
|
||||
* @op: REQ_OP and flags
|
||||
* @val: value to add
|
||||
*
|
||||
* Add @val to @rwstat. The counters are chosen according to @rw. The
|
||||
* caller is responsible for synchronizing calls to this function.
|
||||
*/
|
||||
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
|
||||
unsigned int op, uint64_t val)
|
||||
{
|
||||
struct percpu_counter *cnt;
|
||||
|
||||
if (op_is_discard(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
|
||||
else if (op_is_write(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
|
||||
if (op_is_sync(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_read - read the current values of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Read the current snapshot of @rwstat and return it in the aux counts.
|
||||
*/
|
||||
static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
|
||||
struct blkg_rwstat_sample *result)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
result->cnt[i] =
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_total - read the total count of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Return the total count of @rwstat regardless of the IO direction. This
|
||||
* function can be called without synchronization and takes care of u64
|
||||
* atomicity.
|
||||
*/
|
||||
static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
struct blkg_rwstat_sample tmp = { };
|
||||
|
||||
blkg_rwstat_read(rwstat, &tmp);
|
||||
return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_reset - reset a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to reset
|
||||
*/
|
||||
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
percpu_counter_set(&rwstat->cpu_cnt[i], 0);
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
|
||||
* @to: the destination blkg_rwstat
|
||||
* @from: the source
|
||||
*
|
||||
* Add @from's count including the aux one to @to's aux count.
|
||||
*/
|
||||
static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
|
||||
struct blkg_rwstat *from)
|
||||
{
|
||||
u64 sum[BLKG_RWSTAT_NR];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
|
||||
&to->aux_cnt[i]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING
|
||||
extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
struct bio *bio);
|
||||
|
|
@ -745,15 +602,33 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
|||
throtl = blk_throtl_bio(q, blkg, bio);
|
||||
|
||||
if (!throtl) {
|
||||
struct blkg_iostat_set *bis;
|
||||
int rwd, cpu;
|
||||
|
||||
if (op_is_discard(bio->bi_opf))
|
||||
rwd = BLKG_IOSTAT_DISCARD;
|
||||
else if (op_is_write(bio->bi_opf))
|
||||
rwd = BLKG_IOSTAT_WRITE;
|
||||
else
|
||||
rwd = BLKG_IOSTAT_READ;
|
||||
|
||||
cpu = get_cpu();
|
||||
bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
u64_stats_update_begin(&bis->sync);
|
||||
|
||||
/*
|
||||
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
|
||||
* is a split bio and we would have already accounted for the
|
||||
* size of the bio.
|
||||
*/
|
||||
if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
|
||||
blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
|
||||
bio->bi_iter.bi_size);
|
||||
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
|
||||
bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
|
||||
bis->cur.ios[rwd]++;
|
||||
|
||||
u64_stats_update_end(&bis->sync);
|
||||
if (cgroup_subsys_on_dfl(io_cgrp_subsys))
|
||||
cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
|
|
|||
|
|
@ -10,103 +10,239 @@ struct blk_mq_tags;
|
|||
struct blk_flush_queue;
|
||||
|
||||
/**
|
||||
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device
|
||||
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
|
||||
* block device
|
||||
*/
|
||||
struct blk_mq_hw_ctx {
|
||||
struct {
|
||||
/** @lock: Protects the dispatch list. */
|
||||
spinlock_t lock;
|
||||
/**
|
||||
* @dispatch: Used for requests that are ready to be
|
||||
* dispatched to the hardware but for some reason (e.g. lack of
|
||||
* resources) could not be sent to the hardware. As soon as the
|
||||
* driver can send new requests, requests at this list will
|
||||
* be sent first for a fairer dispatch.
|
||||
*/
|
||||
struct list_head dispatch;
|
||||
unsigned long state; /* BLK_MQ_S_* flags */
|
||||
/**
|
||||
* @state: BLK_MQ_S_* flags. Defines the state of the hw
|
||||
* queue (active, scheduled to restart, stopped).
|
||||
*/
|
||||
unsigned long state;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/**
|
||||
* @run_work: Used for scheduling a hardware queue run at a later time.
|
||||
*/
|
||||
struct delayed_work run_work;
|
||||
/** @cpumask: Map of available CPUs where this hctx can run. */
|
||||
cpumask_var_t cpumask;
|
||||
/**
|
||||
* @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
|
||||
* selection from @cpumask.
|
||||
*/
|
||||
int next_cpu;
|
||||
/**
|
||||
* @next_cpu_batch: Counter of how many works left in the batch before
|
||||
* changing to the next CPU.
|
||||
*/
|
||||
int next_cpu_batch;
|
||||
|
||||
unsigned long flags; /* BLK_MQ_F_* flags */
|
||||
/** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
|
||||
unsigned long flags;
|
||||
|
||||
/**
|
||||
* @sched_data: Pointer owned by the IO scheduler attached to a request
|
||||
* queue. It's up to the IO scheduler how to use this pointer.
|
||||
*/
|
||||
void *sched_data;
|
||||
/**
|
||||
* @queue: Pointer to the request queue that owns this hardware context.
|
||||
*/
|
||||
struct request_queue *queue;
|
||||
/** @fq: Queue of requests that need to perform a flush operation. */
|
||||
struct blk_flush_queue *fq;
|
||||
|
||||
/**
|
||||
* @driver_data: Pointer to data owned by the block driver that created
|
||||
* this hctx
|
||||
*/
|
||||
void *driver_data;
|
||||
|
||||
/**
|
||||
* @ctx_map: Bitmap for each software queue. If bit is on, there is a
|
||||
* pending request in that software queue.
|
||||
*/
|
||||
struct sbitmap ctx_map;
|
||||
|
||||
/**
|
||||
* @dispatch_from: Software queue to be used when no scheduler was
|
||||
* selected.
|
||||
*/
|
||||
struct blk_mq_ctx *dispatch_from;
|
||||
/**
|
||||
* @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
|
||||
* decide if the hw_queue is busy using Exponential Weighted Moving
|
||||
* Average algorithm.
|
||||
*/
|
||||
unsigned int dispatch_busy;
|
||||
|
||||
/** @type: HCTX_TYPE_* flags. Type of hardware queue. */
|
||||
unsigned short type;
|
||||
/** @nr_ctx: Number of software queues. */
|
||||
unsigned short nr_ctx;
|
||||
/** @ctxs: Array of software queues. */
|
||||
struct blk_mq_ctx **ctxs;
|
||||
|
||||
/** @dispatch_wait_lock: Lock for dispatch_wait queue. */
|
||||
spinlock_t dispatch_wait_lock;
|
||||
/**
|
||||
* @dispatch_wait: Waitqueue to put requests when there is no tag
|
||||
* available at the moment, to wait for another try in the future.
|
||||
*/
|
||||
wait_queue_entry_t dispatch_wait;
|
||||
|
||||
/**
|
||||
* @wait_index: Index of next available dispatch_wait queue to insert
|
||||
* requests.
|
||||
*/
|
||||
atomic_t wait_index;
|
||||
|
||||
/**
|
||||
* @tags: Tags owned by the block driver. A tag at this set is only
|
||||
* assigned when a request is dispatched from a hardware queue.
|
||||
*/
|
||||
struct blk_mq_tags *tags;
|
||||
/**
|
||||
* @sched_tags: Tags owned by I/O scheduler. If there is an I/O
|
||||
* scheduler associated with a request queue, a tag is assigned when
|
||||
* that request is allocated. Else, this member is not used.
|
||||
*/
|
||||
struct blk_mq_tags *sched_tags;
|
||||
|
||||
/** @queued: Number of queued requests. */
|
||||
unsigned long queued;
|
||||
/** @run: Number of dispatched requests. */
|
||||
unsigned long run;
|
||||
#define BLK_MQ_MAX_DISPATCH_ORDER 7
|
||||
/** @dispatched: Number of dispatch requests by queue. */
|
||||
unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
|
||||
|
||||
/** @numa_node: NUMA node the storage adapter has been connected to. */
|
||||
unsigned int numa_node;
|
||||
/** @queue_num: Index of this hardware queue. */
|
||||
unsigned int queue_num;
|
||||
|
||||
/**
|
||||
* @nr_active: Number of active requests. Only used when a tag set is
|
||||
* shared across request queues.
|
||||
*/
|
||||
atomic_t nr_active;
|
||||
|
||||
/** @cpuhp_dead: List to store request if some CPU die. */
|
||||
struct hlist_node cpuhp_dead;
|
||||
/** @kobj: Kernel object for sysfs. */
|
||||
struct kobject kobj;
|
||||
|
||||
/** @poll_considered: Count times blk_poll() was called. */
|
||||
unsigned long poll_considered;
|
||||
/** @poll_invoked: Count how many requests blk_poll() polled. */
|
||||
unsigned long poll_invoked;
|
||||
/** @poll_success: Count how many polled requests were completed. */
|
||||
unsigned long poll_success;
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
/**
|
||||
* @debugfs_dir: debugfs directory for this hardware queue. Named
|
||||
* as cpu<cpu_number>.
|
||||
*/
|
||||
struct dentry *debugfs_dir;
|
||||
/** @sched_debugfs_dir: debugfs directory for the scheduler. */
|
||||
struct dentry *sched_debugfs_dir;
|
||||
#endif
|
||||
|
||||
/** @hctx_list: List of all hardware queues. */
|
||||
struct list_head hctx_list;
|
||||
|
||||
/* Must be the last member - see also blk_mq_hw_ctx_size(). */
|
||||
/**
|
||||
* @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
|
||||
* blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
|
||||
* blk_mq_hw_ctx_size().
|
||||
*/
|
||||
struct srcu_struct srcu[0];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_mq_queue_map - Map software queues to hardware queues
|
||||
* @mq_map: CPU ID to hardware queue index map. This is an array
|
||||
* with nr_cpu_ids elements. Each element has a value in the range
|
||||
* [@queue_offset, @queue_offset + @nr_queues).
|
||||
* @nr_queues: Number of hardware queues to map CPU IDs onto.
|
||||
* @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
|
||||
* driver to map each hardware queue type (enum hctx_type) onto a distinct
|
||||
* set of hardware queues.
|
||||
*/
|
||||
struct blk_mq_queue_map {
|
||||
unsigned int *mq_map;
|
||||
unsigned int nr_queues;
|
||||
unsigned int queue_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum hctx_type - Type of hardware queue
|
||||
* @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for.
|
||||
* @HCTX_TYPE_READ: Just for READ I/O.
|
||||
* @HCTX_TYPE_POLL: Polled I/O of any kind.
|
||||
* @HCTX_MAX_TYPES: Number of types of hctx.
|
||||
*/
|
||||
enum hctx_type {
|
||||
HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
|
||||
HCTX_TYPE_READ, /* just for READ I/O */
|
||||
HCTX_TYPE_POLL, /* polled I/O of any kind */
|
||||
HCTX_TYPE_DEFAULT,
|
||||
HCTX_TYPE_READ,
|
||||
HCTX_TYPE_POLL,
|
||||
|
||||
HCTX_MAX_TYPES,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_mq_tag_set - tag set that can be shared between request queues
|
||||
* @map: One or more ctx -> hctx mappings. One map exists for each
|
||||
* hardware queue type (enum hctx_type) that the driver wishes
|
||||
* to support. There are no restrictions on maps being of the
|
||||
* same size, and it's perfectly legal to share maps between
|
||||
* types.
|
||||
* @nr_maps: Number of elements in the @map array. A number in the range
|
||||
* [1, HCTX_MAX_TYPES].
|
||||
* @ops: Pointers to functions that implement block driver behavior.
|
||||
* @nr_hw_queues: Number of hardware queues supported by the block driver that
|
||||
* owns this data structure.
|
||||
* @queue_depth: Number of tags per hardware queue, reserved tags included.
|
||||
* @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
|
||||
* allocations.
|
||||
* @cmd_size: Number of additional bytes to allocate per request. The block
|
||||
* driver owns these additional bytes.
|
||||
* @numa_node: NUMA node the storage adapter has been connected to.
|
||||
* @timeout: Request processing timeout in jiffies.
|
||||
* @flags: Zero or more BLK_MQ_F_* flags.
|
||||
* @driver_data: Pointer to data owned by the block driver that created this
|
||||
* tag set.
|
||||
* @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues
|
||||
* elements.
|
||||
* @tag_list_lock: Serializes tag_list accesses.
|
||||
* @tag_list: List of the request queues that use this tag set. See also
|
||||
* request_queue.tag_set_list.
|
||||
*/
|
||||
struct blk_mq_tag_set {
|
||||
/*
|
||||
* map[] holds ctx -> hctx mappings, one map exists for each type
|
||||
* that the driver wishes to support. There are no restrictions
|
||||
* on maps being of the same size, and it's perfectly legal to
|
||||
* share maps between types.
|
||||
*/
|
||||
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
|
||||
unsigned int nr_maps; /* nr entries in map[] */
|
||||
unsigned int nr_maps;
|
||||
const struct blk_mq_ops *ops;
|
||||
unsigned int nr_hw_queues; /* nr hw queues across maps */
|
||||
unsigned int queue_depth; /* max hw supported */
|
||||
unsigned int nr_hw_queues;
|
||||
unsigned int queue_depth;
|
||||
unsigned int reserved_tags;
|
||||
unsigned int cmd_size; /* per-request extra data */
|
||||
unsigned int cmd_size;
|
||||
int numa_node;
|
||||
unsigned int timeout;
|
||||
unsigned int flags; /* BLK_MQ_F_* */
|
||||
unsigned int flags;
|
||||
void *driver_data;
|
||||
|
||||
struct blk_mq_tags **tags;
|
||||
|
|
@ -115,6 +251,12 @@ struct blk_mq_tag_set {
|
|||
struct list_head tag_list;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_mq_queue_data - Data about a request inserted in a queue
|
||||
*
|
||||
* @rq: Request pointer.
|
||||
* @last: If it is the last request in the queue.
|
||||
*/
|
||||
struct blk_mq_queue_data {
|
||||
struct request *rq;
|
||||
bool last;
|
||||
|
|
@ -142,81 +284,101 @@ typedef bool (busy_fn)(struct request_queue *);
|
|||
typedef void (complete_fn)(struct request *);
|
||||
typedef void (cleanup_rq_fn)(struct request *);
|
||||
|
||||
|
||||
/**
|
||||
* struct blk_mq_ops - Callback functions that implements block driver
|
||||
* behaviour.
|
||||
*/
|
||||
struct blk_mq_ops {
|
||||
/*
|
||||
* Queue request
|
||||
/**
|
||||
* @queue_rq: Queue a new request from block IO.
|
||||
*/
|
||||
queue_rq_fn *queue_rq;
|
||||
|
||||
/*
|
||||
* If a driver uses bd->last to judge when to submit requests to
|
||||
* hardware, it must define this function. In case of errors that
|
||||
* make us stop issuing further requests, this hook serves the
|
||||
/**
|
||||
* @commit_rqs: If a driver uses bd->last to judge when to submit
|
||||
* requests to hardware, it must define this function. In case of errors
|
||||
* that make us stop issuing further requests, this hook serves the
|
||||
* purpose of kicking the hardware (which the last request otherwise
|
||||
* would have done).
|
||||
*/
|
||||
commit_rqs_fn *commit_rqs;
|
||||
|
||||
/*
|
||||
* Reserve budget before queue request, once .queue_rq is
|
||||
/**
|
||||
* @get_budget: Reserve budget before queue request, once .queue_rq is
|
||||
* run, it is driver's responsibility to release the
|
||||
* reserved budget. Also we have to handle failure case
|
||||
* of .get_budget for avoiding I/O deadlock.
|
||||
*/
|
||||
get_budget_fn *get_budget;
|
||||
/**
|
||||
* @put_budget: Release the reserved budget.
|
||||
*/
|
||||
put_budget_fn *put_budget;
|
||||
|
||||
/*
|
||||
* Called on request timeout
|
||||
/**
|
||||
* @timeout: Called on request timeout.
|
||||
*/
|
||||
timeout_fn *timeout;
|
||||
|
||||
/*
|
||||
* Called to poll for completion of a specific tag.
|
||||
/**
|
||||
* @poll: Called to poll for completion of a specific tag.
|
||||
*/
|
||||
poll_fn *poll;
|
||||
|
||||
/**
|
||||
* @complete: Mark the request as complete.
|
||||
*/
|
||||
complete_fn *complete;
|
||||
|
||||
/*
|
||||
* Called when the block layer side of a hardware queue has been
|
||||
* set up, allowing the driver to allocate/init matching structures.
|
||||
* Ditto for exit/teardown.
|
||||
/**
|
||||
* @init_hctx: Called when the block layer side of a hardware queue has
|
||||
* been set up, allowing the driver to allocate/init matching
|
||||
* structures.
|
||||
*/
|
||||
init_hctx_fn *init_hctx;
|
||||
/**
|
||||
* @exit_hctx: Ditto for exit/teardown.
|
||||
*/
|
||||
exit_hctx_fn *exit_hctx;
|
||||
|
||||
/*
|
||||
* Called for every command allocated by the block layer to allow
|
||||
* the driver to set up driver specific data.
|
||||
/**
|
||||
* @init_request: Called for every command allocated by the block layer
|
||||
* to allow the driver to set up driver specific data.
|
||||
*
|
||||
* Tag greater than or equal to queue_depth is for setting up
|
||||
* flush request.
|
||||
*
|
||||
* Ditto for exit/teardown.
|
||||
*/
|
||||
init_request_fn *init_request;
|
||||
/**
|
||||
* @exit_request: Ditto for exit/teardown.
|
||||
*/
|
||||
exit_request_fn *exit_request;
|
||||
/* Called from inside blk_get_request() */
|
||||
|
||||
/**
|
||||
* @initialize_rq_fn: Called from inside blk_get_request().
|
||||
*/
|
||||
void (*initialize_rq_fn)(struct request *rq);
|
||||
|
||||
/*
|
||||
* Called before freeing one request which isn't completed yet,
|
||||
* and usually for freeing the driver private data
|
||||
/**
|
||||
* @cleanup_rq: Called before freeing one request which isn't completed
|
||||
* yet, and usually for freeing the driver private data.
|
||||
*/
|
||||
cleanup_rq_fn *cleanup_rq;
|
||||
|
||||
/*
|
||||
* If set, returns whether or not this queue currently is busy
|
||||
/**
|
||||
* @busy: If set, returns whether or not this queue currently is busy.
|
||||
*/
|
||||
busy_fn *busy;
|
||||
|
||||
/**
|
||||
* @map_queues: This allows drivers specify their own queue mapping by
|
||||
* overriding the setup-time function that builds the mq_map.
|
||||
*/
|
||||
map_queues_fn *map_queues;
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
/*
|
||||
* Used by the debugfs implementation to show driver-specific
|
||||
/**
|
||||
* @show_rq: Used by the debugfs implementation to show driver-specific
|
||||
* information about a request.
|
||||
*/
|
||||
void (*show_rq)(struct seq_file *m, struct request *rq);
|
||||
|
|
@ -262,7 +424,6 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
|
|||
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
|
||||
|
||||
void blk_mq_free_request(struct request *rq);
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
|
||||
|
||||
bool blk_mq_queue_inflight(struct request_queue *q);
|
||||
|
||||
|
|
@ -301,9 +462,25 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
|
|||
return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
|
||||
* @rq: target request.
|
||||
*/
|
||||
static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
|
||||
{
|
||||
return READ_ONCE(rq->state);
|
||||
}
|
||||
|
||||
static inline int blk_mq_request_started(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
|
||||
}
|
||||
|
||||
static inline int blk_mq_request_completed(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
|
||||
}
|
||||
|
||||
int blk_mq_request_started(struct request *rq);
|
||||
int blk_mq_request_completed(struct request *rq);
|
||||
void blk_mq_start_request(struct request *rq);
|
||||
void blk_mq_end_request(struct request *rq, blk_status_t error);
|
||||
void __blk_mq_end_request(struct request *rq, blk_status_t error);
|
||||
|
|
@ -324,7 +501,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
|
|||
void blk_mq_quiesce_queue(struct request_queue *q);
|
||||
void blk_mq_unquiesce_queue(struct request_queue *q);
|
||||
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
|
||||
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
|
||||
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
|
||||
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
|
||||
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
busy_tag_iter_fn *fn, void *priv);
|
||||
|
|
@ -343,14 +520,29 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
|
|||
|
||||
unsigned int blk_mq_rq_cpu(struct request *rq);
|
||||
|
||||
/*
|
||||
/**
|
||||
* blk_mq_rq_from_pdu - cast a PDU to a request
|
||||
* @pdu: the PDU (Protocol Data Unit) to be casted
|
||||
*
|
||||
* Return: request
|
||||
*
|
||||
* Driver command data is immediately after the request. So subtract request
|
||||
* size to get back to the original request, add request size to get the PDU.
|
||||
* size to get back to the original request.
|
||||
*/
|
||||
static inline struct request *blk_mq_rq_from_pdu(void *pdu)
|
||||
{
|
||||
return pdu - sizeof(struct request);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_rq_to_pdu - cast a request to a PDU
|
||||
* @rq: the request to be casted
|
||||
*
|
||||
* Return: pointer to the PDU
|
||||
*
|
||||
* Driver command data is immediately after the request. So add request to get
|
||||
* the PDU.
|
||||
*/
|
||||
static inline void *blk_mq_rq_to_pdu(struct request *rq)
|
||||
{
|
||||
return rq + 1;
|
||||
|
|
|
|||
|
|
@ -153,10 +153,10 @@ struct bio {
|
|||
unsigned short bi_write_hint;
|
||||
blk_status_t bi_status;
|
||||
u8 bi_partno;
|
||||
atomic_t __bi_remaining;
|
||||
|
||||
struct bvec_iter bi_iter;
|
||||
|
||||
atomic_t __bi_remaining;
|
||||
bio_end_io_t *bi_end_io;
|
||||
|
||||
void *bi_private;
|
||||
|
|
@ -290,6 +290,12 @@ enum req_opf {
|
|||
REQ_OP_ZONE_RESET_ALL = 8,
|
||||
/* write the zero filled sector many times */
|
||||
REQ_OP_WRITE_ZEROES = 9,
|
||||
/* Open a zone */
|
||||
REQ_OP_ZONE_OPEN = 10,
|
||||
/* Close a zone */
|
||||
REQ_OP_ZONE_CLOSE = 11,
|
||||
/* Transition a zone to full */
|
||||
REQ_OP_ZONE_FINISH = 12,
|
||||
|
||||
/* SCSI passthrough using struct scsi_request */
|
||||
REQ_OP_SCSI_IN = 32,
|
||||
|
|
@ -371,6 +377,7 @@ enum stat_group {
|
|||
STAT_READ,
|
||||
STAT_WRITE,
|
||||
STAT_DISCARD,
|
||||
STAT_FLUSH,
|
||||
|
||||
NR_STAT_GROUPS
|
||||
};
|
||||
|
|
@ -417,6 +424,25 @@ static inline bool op_is_discard(unsigned int op)
|
|||
return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a bio or request operation is a zone management operation, with
|
||||
* the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
|
||||
* due to its different handling in the block layer and device response in
|
||||
* case of command failure.
|
||||
*/
|
||||
static inline bool op_is_zone_mgmt(enum req_opf op)
|
||||
{
|
||||
switch (op & REQ_OP_MASK) {
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
case REQ_OP_ZONE_CLOSE:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int op_stat_group(unsigned int op)
|
||||
{
|
||||
if (op_is_discard(op))
|
||||
|
|
|
|||
|
|
@ -360,14 +360,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
|
|||
extern int blkdev_report_zones(struct block_device *bdev,
|
||||
sector_t sector, struct blk_zone *zones,
|
||||
unsigned int *nr_zones);
|
||||
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
|
||||
sector_t nr_sectors, gfp_t gfp_mask);
|
||||
extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
|
||||
sector_t sectors, sector_t nr_sectors,
|
||||
gfp_t gfp_mask);
|
||||
extern int blk_revalidate_disk_zones(struct gendisk *disk);
|
||||
|
||||
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
|
|
@ -388,9 +389,9 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
|
|||
return -ENOTTY;
|
||||
}
|
||||
|
||||
static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
|
||||
fmode_t mode, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
|
||||
fmode_t mode, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
|
@ -411,7 +412,6 @@ struct request_queue {
|
|||
|
||||
/* sw queues */
|
||||
struct blk_mq_ctx __percpu *queue_ctx;
|
||||
unsigned int nr_queues;
|
||||
|
||||
unsigned int queue_depth;
|
||||
|
||||
|
|
|
|||
|
|
@ -216,15 +216,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
|
|||
*/
|
||||
bool sbitmap_any_bit_set(const struct sbitmap *sb);
|
||||
|
||||
/**
|
||||
* sbitmap_any_bit_clear() - Check for an unset bit in a &struct
|
||||
* sbitmap.
|
||||
* @sb: Bitmap to check.
|
||||
*
|
||||
* Return: true if any bit in the bitmap is clear, false otherwise.
|
||||
*/
|
||||
bool sbitmap_any_bit_clear(const struct sbitmap *sb);
|
||||
|
||||
#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
|
||||
#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))
|
||||
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
|
|||
case IOC_OPAL_PSID_REVERT_TPR:
|
||||
case IOC_OPAL_MBR_DONE:
|
||||
case IOC_OPAL_WRITE_SHADOW_MBR:
|
||||
case IOC_OPAL_GENERIC_TABLE_RW:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue