标签:
页缓冲在《linux内核情景分析》一书的第5.6节文件的写与读一章中说明的很详细,这里摘抄下来;
在文件系统层中有三隔主要的数据结构,file结构、dentry结构和inode结构;
file结构:代表目标文件的一个上下文,不同进程可以在同一文件上建立不同的上下文,而且同一进程也可以通过打开一个文件多次而建立起多个上下文。因此不能在file结构上设置缓冲区队列,因为这些file结构体之间都不共享。
dentry结构体:该结构体是文件名结构体,通过软/硬链接可以得到多个dentry结构体对应一个文件,dentry结构体和文件也不是一对一关系,所以也不能在该结构体上建立缓冲区队列;
inode结构体:很显然就只有inode结构体了,inode结构体和文件是一对一的关系,可以这么说inode就是代表文件。在inode结构体上设置了i_mapping指针,该指针指向了一个address_space数据结构,一般来说该数据结构就是inode->i_data,缓冲区队列就是在该数据结构中;
挂在缓冲区队列中的不是记录块而是内存页面,因此当一个进程调用mmap()函数将一个文件映射到它用户空间时,它只要设置相应的内存映射表,就可以很自然的把这些缓存页面映射到进程的用户空间。所以才又起名为i_mapping。
这里还要了解下基数树概念,先看看图(图片来自《深入linux内核架构》)
基数树不是不是平衡树,树本身由两种不同的数据结构组成,树根节点和非叶子节点,树根节点由简单的数据结构表示,其中包含了树的高度和指向组成树的第一个节点的数据结构。节点本质上是数组,count是该节点的指针计数,其他的都是指向下一层节点的指针。而叶子节点是指向page的指针;
其中节点上的数据结构还包含了搜索标记,比如脏页标记和回写标记,可以很快的指定哪边有标记的页;
块缓冲
块缓冲在结构上由两个部分组成:
1、缓冲头:包含与缓冲区状态相关的所有管理数据,块号、长度,访问器等,这些缓冲头不直接存储在缓冲头之后,而是由缓冲头指针指向的物理内存独立区域中。
2、有用的数据保存在专门分配的页中,这些页也可以能同事存在页缓冲中。
缓冲头:
/*
* Historically, a buffer_head was used to map a single block
* within a page, and of course as the unit of I/O through the
* filesystem and block layers. Nowadays the basic I/O unit
* is the bio, and buffer_heads are used for extracting block
* mappings (via a get_block_t call), for tracking state within
* a page (via a page_mapping) and for wrapping bio submission
* for backward compatibility reasons (e.g. submit_bh).
*/
struct buffer_head {
unsigned long b_state; /* buffer state bitmap (see above) *///缓冲区状态标识,看下面
struct buffer_head *b_this_page;/* circular list of page's buffers *///指向下一个缓冲头
struct page *b_page; /* the page this bh is mapped to *///指向拥有该块缓冲区的页描述符指针
sector_t b_blocknr; /* start block number *///块设备的逻辑块号
size_t b_size; /* size of mapping *///块大小
char *b_data; /* pointer to data within the page *///块在缓冲页内的位置
struct block_device *b_bdev;//指向块设备描述符
bh_end_io_t *b_end_io; /* I/O completion *///i/o完成回调函数
void *b_private; /* reserved for b_end_io *///指向i/o完成回调函数的数据参数
struct list_head b_assoc_buffers; /* associated with another mapping */
struct address_space *b_assoc_map; /* mapping this buffer is
associated with */
atomic_t b_count; /* users using this buffer_head *///块使用计算器
};
缓冲区头部的通用标志
enum bh_state_bits {
BH_Uptodate, /* Contains valid data *///表示缓冲区包含有效数据
BH_Dirty, /* Is dirty *///缓冲区是脏的
BH_Lock, /* Is locked *///缓冲区被锁住
BH_Req, /* Has been submitted for I/O *///初始化缓冲区而请求数据传输
BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise
* IO completion of other buffers in the page
*/
BH_Mapped, /* Has a disk mapping *///b_bdev和b_blocknr是有效的
BH_New, /* Disk mapping was newly created by get_block *///刚分配还没有访问过
BH_Async_Read, /* Is under end_buffer_async_read I/O *///异步读该缓冲区
BH_Async_Write, /* Is under end_buffer_async_write I/O *///异步写该缓冲区
BH_Delay, /* Buffer is not yet allocated on disk *///还没有在磁盘上分配缓冲区
BH_Boundary, /* Block is followed by a discontiguity *///
BH_Write_EIO, /* I/O error on write *///i/o错误
BH_Unwritten, /* Buffer is allocated on disk but not written */
BH_Quiet, /* Buffer Error Prinks to be quiet */
BH_Meta, /* Buffer contains metadata */
BH_Prio, /* Buffer should be submitted with REQ_PRIO */
BH_PrivateStart,/* not a state bit, but the first bit available
* for private allocation by other entities
*/
};
从上图可以看出一个缓冲页对应了4个缓冲区,这就统一了page cache和buffer cache了。修改缓冲区或者缓冲页,他们之间都会相互影响。
address_space结构体:
struct address_space {
struct inode *host; /* owner: inode, block_device *///指向宿主文件的inode
struct radix_tree_root page_tree; /* radix tree of all pages *///基数树的root
spinlock_t tree_lock; /* and lock protecting it *///基数树的锁
unsigned int i_mmap_writable;/* count VM_SHARED mappings *///vm_SHARED共享映射页计数
struct rb_root i_mmap; /* tree of private and shared mappings *///私有和共享映射的树
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings *///匿名映射的链表元素
struct mutex i_mmap_mutex; /* protect tree, count, list *///包含树的mutex
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages *///页的总数
pgoff_t writeback_index;/* writeback starts here *///回写的开始
const struct address_space_operations *a_ops; /* methods *///函数指针
unsigned long flags; /* error bits/gfp mask *///错误码
struct backing_dev_info *backing_dev_info; /* device readahead, etc *///设备预读
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
void *private_data; /* ditto */
} __attribute__((aligned(sizeof(long))));
struct inode *host和struct radix_tree_root page_tree关联了文件和内存页。
346 struct address_space_operations {
347 int (*writepage)(struct page *page, struct writeback_control *wbc);//写操作,从页写到所有者的磁盘映像
348 int (*readpage)(struct file *, struct page *);//读操作,从所有者磁盘映像读取到页
349
350 /* Write back some dirty pages from this mapping. */
351 int (*writepages)(struct address_space *, struct writeback_control *);//指定数量的所有者脏页回写磁盘
352
353 /* Set a page dirty. Return true if this dirtied it */
354 int (*set_page_dirty)(struct page *page);//把所有者的页设置为脏页
355
356 int (*readpages)(struct file *filp, struct address_space *mapping,
357 struct list_head *pages, unsigned nr_pages);//从磁盘中读取所有者页的链表
358
359 int (*write_begin)(struct file *, struct address_space *mapping,
360 loff_t pos, unsigned len, unsigned flags,
361 struct page **pagep, void **fsdata);//
362 int (*write_end)(struct file *, struct address_space *mapping,
363 loff_t pos, unsigned len, unsigned copied,
364 struct page *page, void *fsdata);
365
366 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
367 sector_t (*bmap)(struct address_space *, sector_t);
368 void (*invalidatepage) (struct page *, unsigned long);
369 int (*releasepage) (struct page *, gfp_t);
370 void (*freepage)(struct page *);
371 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
372 loff_t offset, unsigned long nr_segs);
373 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
374 void **, unsigned long *);
375 /*
376 * migrate the contents of a page to the specified target. If sync
377 * is false, it must not block.
378 */
379 int (*migratepage) (struct address_space *,
380 struct page *, struct page *, enum migrate_mode);
381 int (*launder_page) (struct page *);
382 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
383 unsigned long);
384 int (*error_remove_page)(struct address_space *, struct page *);
385
386 /* swapfile support */
387 int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
388 sector_t *span);
389 void (*swap_deactivate)(struct file *file);
390 };
391
标签:
原文地址:http://blog.csdn.net/yuzhihui_no1/article/details/50951126