static struct elevator_type iosched_deadline = {
.ops = {
.elevator_merge_fn = deadline_merge,
.elevator_merged_fn = deadline_merged_request,
.elevator_merge_req_fn = deadline_merged_requests,
.elevator_dispatch_fn = deadline_dispatch_requests,
.elevator_add_req_fn = deadline_add_request,
.elevator_queue_empty_fn = deadline_queue_empty,
.elevator_former_req_fn = elv_rb_former_request,
.elevator_latter_req_fn = elv_rb_latter_request,
.elevator_init_fn = deadline_init_queue,
.elevator_exit_fn = deadline_exit_queue,
},
.elevator_attrs = deadline_attrs,
.elevator_name = "deadline",
.elevator_owner = THIS_MODULE,
};
struct deadline_data
{
/*
* run time data
*/
/*
* requests (deadline_rq s) are present on both sort_list and fifo_list
*/
struct rb_root sort_list[2];
struct list_head fifo_list[2];
/*
* next in sort order. read, write or both are NULL
*/
struct request *next_rq[2];
unsigned int batching; /* number of sequential requests made */
sector_t last_sector; /* head position */
unsigned int starved; /* times reads have starved writes */
/*
* settings that change how the i/o scheduler behaves
*/
int fifo_expire[2];
int fifo_batch;
int writes_starved;
int front_merges;
}; /*
* add rq to rbtree and fifo
*/
static void deadline_add_request(struct request_queue *q, struct request *rq)
{
struct deadline_data *dd = q->elevator->elevator_data;
const int data_dir = rq_data_dir(rq);
deadline_add_rq_rb(dd, rq);
/*
* set expire time and add to fifo list
*/
rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
}
static int deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
struct deadline_data *dd = q->elevator->elevator_data;
struct request *__rq;
int ret;
/*
* check for front merge
*/
if (dd->front_merges)
{
sector_t sector = bio_end_sector(bio);
__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
if (__rq)
{
BUG_ON(sector != blk_rq_pos(__rq));
if (elv_rq_merge_ok(__rq, bio))
{
ret = ELEVATOR_FRONT_MERGE;
goto out;
}
}
}
return ELEVATOR_NO_MERGE;
out:
*req = __rq;
return ret;
}if (__rq)
{
BUG_ON(sector != blk_rq_pos(__rq));
if (elv_rq_merge_ok(__rq, bio))
{
ret = ELEVATOR_FRONT_MERGE;
goto out;
}
}static void deadline_merged_request(struct request_queue *q, struct request *req, int type)
{
struct deadline_data *dd = q->elevator->elevator_data;
/*
* if the merge was a front merge, we need to reposition request
*/
if (type == ELEVATOR_FRONT_MERGE)
{
elv_rb_del(deadline_rb_root(dd, req), req);
deadline_add_rq_rb(dd, req);
}
}static void deadline_merged_requests(struct request_queue *q, struct request *req, struct request *next)
{
/*
* if next expires before rq, assign its expire time to rq
* and move into next position (next will be deleted) in fifo
*/
if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist))
{
if (time_before(rq_fifo_time(next), rq_fifo_time(req)))
{
list_move(&req->queuelist, &next->queuelist);
rq_set_fifo_time(req, rq_fifo_time(next));
}
}
/*
* kill knowledge of next, this one is a goner
*/
deadline_remove_request(q, next);
}static int deadline_dispatch_requests(struct request_queue *q, int force)
{
struct deadline_data *dd = q->elevator->elevator_data;
const int reads = !list_empty(&dd->fifo_list[READ]);
const int writes = !list_empty(&dd->fifo_list[WRITE]);
struct request *rq;
int data_dir;
/*
* batches are currently reads XOR writes
*/
if (dd->next_rq[WRITE])
rq = dd->next_rq[WRITE];
else
rq = dd->next_rq[READ];
if (rq && dd->batching < dd->fifo_batch)
/* we have a next request are still entitled to batch */
goto dispatch_request;
/*
* at this point we are not running a batch. select the appropriate
* data direction (read / write)
*/
if (reads)
{
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
if (writes && (dd->starved++ >= dd->writes_starved))
goto dispatch_writes;
data_dir = READ;
goto dispatch_find_request;
}
/*
* there are either no reads or writes have been starved
*/
if (writes)
{
dispatch_writes:
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
dd->starved = 0;
data_dir = WRITE;
goto dispatch_find_request;
}
return 0;
dispatch_find_request:
/*
* we are not running a batch, find best request for selected data_dir
*/
if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir])
{
/*
* A deadline has expired, the last request was in the other
* direction, or we have run out of higher-sectored requests.
* Start again from the request with the earliest expiry time.
*/
rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
}
else
{
/*
* The last req was the same dir and we have a next request in
* sort order. No expired requests so continue on from here.
*/
rq = dd->next_rq[data_dir];
}
dd->batching = 0;
dispatch_request:
/*
* rq is the selected appropriate request.
*/
dd->batching++;
deadline_move_request(dd, rq);
return 1;
}if (dd->next_rq[WRITE])
rq = dd->next_rq[WRITE];
else
rq = dd->next_rq[READ];
if (rq && dd->batching < dd->fifo_batch)
/* we have a next request are still entitled to batch */
goto dispatch_request; const int reads = !list_empty(&dd->fifo_list[READ]);
const int writes = !list_empty(&dd->fifo_list[WRITE]);
...
if (reads)
{
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
if (writes && (dd->starved++ >= dd->writes_starved))
goto dispatch_writes;
data_dir = READ;
goto dispatch_find_request;
} if (writes)
{
dispatch_writes:
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
dd->starved = 0;
data_dir = WRITE;
goto dispatch_find_request;
} dispatch_find_request:
/*
* we are not running a batch, find best request for selected data_dir
*/
if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir])
{
/*
* A deadline has expired, the last request was in the other
* direction, or we have run out of higher-sectored requests.
* Start again from the request with the earliest expiry time.
*/
rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
}
else
{
/*
* The last req was the same dir and we have a next request in
* sort order. No expired requests so continue on from here.
*/
rq = dd->next_rq[data_dir];
}
dd->batching = 0; dispatch_request: /* * rq is the selected appropriate request. */ dd->batching++; deadline_move_request(dd, rq); return 1;
static void deadline_move_request(struct deadline_data *dd, struct request *rq)
{
const int data_dir = rq_data_dir(rq);
dd->next_rq[READ] = NULL;
dd->next_rq[WRITE] = NULL;
dd->next_rq[data_dir] = deadline_latter_request(rq);
dd->last_sector = rq_end_sector(rq);
/*
* take it off the sort and fifo list, move
* to dispatch queue
*/
deadline_move_to_dispatch(dd, rq);
} static inline struct request * deadline_latter_request(struct request *rq)
{
struct rb_node *node = rb_next(&rq->rb_node);
if (node)
return rb_entry_rq(node);
return NULL;
}static inline void deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
{
struct request_queue *q = rq->q;
deadline_remove_request(q, rq);
elv_dispatch_add_tail(q, rq);
}/*
* batches are currently reads XOR writes
*/
if (dd->next_rq[WRITE])
rq = dd->next_rq[WRITE];
else
rq = dd->next_rq[READ];
if (rq && dd->batching < dd->fifo_batch)
/* we have a next request are still entitled to batch */
goto dispatch_request;
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
static const int writes_starved = 2; /* max times reads can starve a write */
static const int fifo_batch = 16; /* # of sequential requests treated as one
by the above parameters. For throughput. */
struct deadline_data {
/*
* run time data
*/
/*
* requests (deadline_rq s) are present on both sort_list and fifo_list
*/
struct rb_root sort_list[2]; //按照请求的起始地址,将请求在红黑树中排序
struct list_head fifo_list[2];//按照请求超时的先后顺序,将请求在链表中排序
/*
* next in sort order. read, write or both are NULL
*/
struct request *next_rq[2];//指向 sort_list 中的下一个请求
/* 当前连续提交的请求数目,只要小于fifo_batch就可以进行连续提交 */
unsigned int batching; /* number of sequential requests made */
sector_t last_sector; /* head position */
unsigned int starved; /* times reads have starved writes */
/*
* settings that change how the i/o scheduler behaves
*/
int fifo_expire[2];//可以设置读写请求的超时时间
int fifo_batch;//在一次读/写请求处理方向上,最多可连续提交的请求数
int writes_starved;//读请求最多可饥饿写请求的次数
int front_merges;//是否开启前向merge,根据文件的一般访问模式,前向merge的效果不是很好
};
static void deadline_move_request(struct deadline_data *, struct request *);
static inline struct rb_root *
deadline_rb_root(struct deadline_data *dd, struct request *rq)
{
return &dd->sort_list[rq_data_dir(rq)];
}
/*
* get the request after `rq‘ in sector-sorted order
*/
static inline struct request *
deadline_latter_request(struct request *rq)
{
struct rb_node *node = rb_next(&rq->rb_node);
if (node)
return rb_entry_rq(node);
/*
struct request结构中内嵌一个struct rb_node(这也正是红黑树的使用模式)
通过获取rb_node的地址+rb_node在request中的偏移地址来获取request的地址
*/
return NULL;
}
static void
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
{
struct rb_root *root = deadline_rb_root(dd, rq);//根据请求rq的读写方向,获取相应读写请求红黑树的根结点
struct request *__alias;
/*
将请求rq插入相应的读写请求红黑树
注意:插入成功才返回NULL,插入不成功,说明该请求原本就存在
如果该请求原本就存在,就需要将该请求从io调度队列分发到块设备的dispatch队列
*/
while (unlikely(__alias = elv_rb_add(root, rq)))
deadline_move_request(dd, __alias);
}
static inline void
deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
{
const int data_dir = rq_data_dir(rq);
/*
如果待删除的请求是next_rq请求队列中的第一个请求,
就需要将next_rq请求队列中的第一个请求设置为待删除请求的下一个请求,再执行删除请求操作,
否则,直接删除该请求
*/
if (dd->next_rq[data_dir] == rq)
dd->next_rq[data_dir] = deadline_latter_request(rq);
elv_rb_del(deadline_rb_root(dd, rq), rq);
}
/*
* add rq to rbtree and fifo
*/
static void
deadline_add_request(struct request_queue *q, struct request *rq)
{
struct deadline_data *dd = q->elevator->elevator_data;//获取请求的io调度队列
const int data_dir = rq_data_dir(rq);//判断请求的读写类型
deadline_add_rq_rb(dd, rq);
/*
1、请求原本存存在于调度队列中
根据请求类型(读or写),将请求插入调度队列相应的读写红黑树中
2、请求原本存在于调度队列中
将请求从调度队列发往块设备的请求队列
*/
/*
* set expire time and add to fifo list
*/
rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);//设置请求的超时时间
list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);//将请求加入调度队列的fifo队列中
}
/*
* remove rq from rbtree and fifo.
*/
static void deadline_remove_request(struct request_queue *q, struct request *rq)
{
struct deadline_data *dd = q->elevator->elevator_data;//获取请求的调度队列
rq_fifo_clear(rq);//将请求从调度队列的fifo_list队列中删除
deadline_del_rq_rb(dd, rq);//将请求从调度队列的红黑树中删除
}
/*
该deadline_merge函数中试图将bio插入到请求的链表头。????????????????????????????
在该函数中通过elv_rb_find函数在读写的排序队列sort_list中(通过红黑二叉树来实现)
查找可以把bio插入到请求的链表头的req,
(这里只是找到可以插入的req,究竟bio是否可以插入到此req中是在执行插入的时候才做判断)
如果找到,则返回ELEVATOR_FRONT_MERGE,否则返回ELEVATOR_NO_MERGE。
*/
static int
deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
struct deadline_data *dd = q->elevator->elevator_data;
struct request *__rq;
int ret;
/*
* check for front merge
*/
if (dd->front_merges) {//判断是否执行前向merge操作
sector_t sector = bio->bi_sector + bio_sectors(bio);
//通过bio请求的起始地址 + bio请求的长度 获取bio请求的结束地址
__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
/*
通过elv_rb_find()函数判断是否存在与该bio请求连续的请求:
1、返回值为NULL,说明不存在前向连续的请求
2、返回值不为NULL,说明存在前向连续的请求,有进行合并操作的可能性,并返回指向与bio请求连续的请求的指针
*/
if (__rq) {
BUG_ON(sector != blk_rq_pos(__rq));
if (elv_rq_merge_ok(__rq, bio)) {//判断是否可以对该request安全的进行merge操作
ret = ELEVATOR_FRONT_MERGE;//可以进行前向merge操作
goto out;
}
}
}
return ELEVATOR_NO_MERGE;
/*
返回ELEVATOR_NO_MERGE有三种情况:
1、front_merges被设置为0
2、front_merges被设置为1,但是不存在与bio连续的request
3、front_merges被设置为1,也存在与bio连续的request,但是不能针对该request进行merge操作
*/
out:
*req = __rq;//将执行merge操作的request
return ret;
}
static void deadline_merged_request(struct request_queue *q,
struct request *req, int type)
{
struct deadline_data *dd = q->elevator->elevator_data;
/*
* if the merge was a front merge, we need to reposition request
*/
if (type == ELEVATOR_FRONT_MERGE) {
/*
如果做了一个前向merge,需要将那个request先删除,然后又插入,
是为了继续进行有可能的前向merge。
*/
elv_rb_del(deadline_rb_root(dd, req), req);
deadline_add_rq_rb(dd, req);
}
}
//删除合并之后的另一个请求next
static void
deadline_merged_requests(struct request_queue *q, struct request *req,
struct request *next)
{
/*
* if next expires before rq, assign its expire time to rq
* and move into next position (next will be deleted) in fifo
*/
if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
list_move(&req->queuelist, &next->queuelist);
rq_set_fifo_time(req, rq_fifo_time(next));
}
}
/*
* kill knowledge of next, this one is a goner
*/
deadline_remove_request(q, next);
}
/*
* move request from sort list to dispatch queue.
*/
static inline void
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
{
struct request_queue *q = rq->q;
deadline_remove_request(q, rq);//将请求从deadline调度器的sort_list 和 fifo_list中删除
elv_dispatch_add_tail(q, rq);//将请求加入块设备的请求队列的尾部
}
/*
* move an entry to dispatch queue
*/
static void
deadline_move_request(struct deadline_data *dd, struct request *rq)
{
const int data_dir = rq_data_dir(rq);
dd->next_rq[READ] = NULL;
dd->next_rq[WRITE] = NULL;
dd->next_rq[data_dir] = deadline_latter_request(rq);
dd->last_sector = rq_end_sector(rq);
/*
* take it off the sort and fifo list, move
* to dispatch queue
*/
deadline_move_to_dispatch(dd, rq);
}
/*
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
*/
//判断fifo_list中的第一个请求是否超时
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
{
struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
/*
* rq is expired!
*/
if (time_after(jiffies, rq_fifo_time(rq)))
return 1;
return 0;
}
/*
* deadline_dispatch_requests selects the best request according to
* read/write expire, fifo_batch, etc
*/
static int deadline_dispatch_requests(struct request_queue *q, int force)
{
struct deadline_data *dd = q->elevator->elevator_data;
const int reads = !list_empty(&dd->fifo_list[READ]);
const int writes = !list_empty(&dd->fifo_list[WRITE]);
struct request *rq;
int data_dir;
/*
* batches are currently reads XOR writes
*/
if (dd->next_rq[WRITE])
rq = dd->next_rq[WRITE];
else
rq = dd->next_rq[READ];
if (rq && dd->batching < dd->fifo_batch)//存在请求,并且连续提交的请求数没有超过上限,可以继续分发请求
/* we have a next request are still entitled to batch */
goto dispatch_request;
/*
* at this point we are not running a batch. select the appropriate
* data direction (read / write)
*/
if (reads) {//存在读请求,优先处理读请求
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
if (writes && (dd->starved++ >= dd->writes_starved))
//如果存在写请求,并且饥饿写请求的次数已达上限,就需要分发写请求
goto dispatch_writes;
data_dir = READ;
goto dispatch_find_request;
}
/*
* there are either no reads or writes have been starved
*/
if (writes) {//没有读请求,或者写请求被饥饿的次数已达上限,就要处理写请求
dispatch_writes:
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
dd->starved = 0;//进行一次写请求处理之后,就需要重新设置写请求被饥饿的次数为0
data_dir = WRITE;
goto dispatch_find_request;
}
return 0;//如果既没有读请求,又没有写请求,就返回0
dispatch_find_request:
/*
* we are not running a batch, find best request for selected data_dir
*/
if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) {
/*
如果本次处理方向的fifo_list中有超时请求,或者本次处理方向扫描到电梯尾,
都会返回来处理等待最久的request,并从这个request开始继续进行电梯扫描
*/
/*
* A deadline has expired, the last request was in the other
* direction, or we have run out of higher-sectored requests.
* Start again from the request with the earliest expiry time.
*/
rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
} else {
/*
如果既没有发生超时,也没有扫描到电梯末尾,
则沿sector递增方向上的下一个request就是当前要处理的request
*/
/*
* The last req was the same dir and we have a next request in
* sort order. No expired requests so continue on from here.
*/
rq = dd->next_rq[data_dir];
}
dd->batching = 0;//进行一次处理之后,需要重新设置batching值
dispatch_request:
/*
* rq is the selected appropriate request.
*/
dd->batching++;
deadline_move_request(dd, rq);
return 1;
}
//判断deadline调度器中是否存在请求
static int deadline_queue_empty(struct request_queue *q)
{
struct deadline_data *dd = q->elevator->elevator_data;
return list_empty(&dd->fifo_list[WRITE])
&& list_empty(&dd->fifo_list[READ]);
}
static void deadline_exit_queue(struct elevator_queue *e)
{
struct deadline_data *dd = e->elevator_data;
BUG_ON(!list_empty(&dd->fifo_list[READ]));
BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
kfree(dd);
}
/*
* initialize elevator private data (deadline_data).
*/
static void *deadline_init_queue(struct request_queue *q)
{
struct deadline_data *dd;
dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node);
if (!dd)
return NULL;
INIT_LIST_HEAD(&dd->fifo_list[READ]);
INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
dd->sort_list[READ] = RB_ROOT;
dd->sort_list[WRITE] = RB_ROOT;
dd->fifo_expire[READ] = read_expire;
dd->fifo_expire[WRITE] = write_expire;
dd->writes_starved = writes_starved;
dd->front_merges = 1;
dd->fifo_batch = fifo_batch;
return dd;
}
/*
* sysfs parts below
*/
static ssize_t
deadline_var_show(int var, char *page)
{
return sprintf(page, "%d\n", var);
}
static ssize_t
deadline_var_store(int *var, const char *page, size_t count)
{
char *p = (char *) page;
*var = simple_strtol(p, &p, 10);
return count;
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) static ssize_t __FUNC(struct elevator_queue *e, char *page) { struct deadline_data *dd = e->elevator_data; int __data = __VAR; if (__CONV) __data = jiffies_to_msecs(__data); return deadline_var_show(__data, (page)); }
SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) { struct deadline_data *dd = e->elevator_data; int __data; int ret = deadline_var_store(&__data, (page), count); if (__data < (MIN)) __data = (MIN); else if (__data > (MAX)) __data = (MAX); if (__CONV) *(__PTR) = msecs_to_jiffies(__data); else *(__PTR) = __data; return ret; }
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
#undef STORE_FUNCTION
#define DD_ATTR(name) __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, deadline_##name##_store)
static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(read_expire),
DD_ATTR(write_expire),
DD_ATTR(writes_starved),
DD_ATTR(front_merges),
DD_ATTR(fifo_batch),
__ATTR_NULL
};
static struct elevator_type iosched_deadline = {
.ops = {
.elevator_merge_fn = deadline_merge,
.elevator_merged_fn = deadline_merged_request,
.elevator_merge_req_fn = deadline_merged_requests,
.elevator_dispatch_fn = deadline_dispatch_requests,
.elevator_add_req_fn = deadline_add_request,
.elevator_queue_empty_fn = deadline_queue_empty,
.elevator_former_req_fn = elv_rb_former_request,
.elevator_latter_req_fn = elv_rb_latter_request,
.elevator_init_fn = deadline_init_queue,
.elevator_exit_fn = deadline_exit_queue,
},
.elevator_attrs = deadline_attrs,
.elevator_name = "deadline",
.elevator_owner = THIS_MODULE,
};
static int __init deadline_init(void)
{
elv_register(&iosched_deadline);//注册一个调度器类型
return 0;
}
static void __exit deadline_exit(void)
{
elv_unregister(&iosched_deadline);
}
module_init(deadline_init);
module_exit(deadline_exit);Linux deadline io 调度算法,码迷,mamicode.com
原文地址:http://blog.csdn.net/hs794502825/article/details/24664259