linux: files and file systems gregory kesden 15-412 fall ‘00

32
Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Upload: aldous-anderson

Post on 14-Jan-2016

234 views

Category:

Documents


0 download

TRANSCRIPT

Page 1: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Linux: Files and File Systems

Gregory Kesden 15-412 Fall ‘00

Page 2: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Linux Virtual File System Overview

Rusling, David A, The Linux Kernel, V0.8-3, LDP, 1999, S.9.2.

Page 3: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Major Data Structures (1/2)

• struct files_struct – per process table

• struct file_system_type – represents and entire file system

• struct super_block – represents super block (metadata) of file system

• struct super_operations – operations to manipulate the super-block (metadata)

• struct inode – represents a file

Page 4: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Major Data Structures (2/2)

• struct inode_operations – operations to manipulate contents of an inode

• struct dentry – represents a name to inode mapping

• struct file – entry in open file table; represents state of an open file

• struct file_operations – collection of operations that can be performed on an open file (remember this from device drivers?)

Page 5: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Per Process File Informationinclude/linux/sched.h:

struct files_struct { /* kept within task_struct (PCB) */

atomic_t count;

rwlock_t file_lock;

int max_fds;

int max_fdset;

int next_fd;

struct file ** fd; /* current fd array */

fd_set *close_on_exec;

fd_set *open_fds;

fd_set close_on_exec_init;

fd_set open_fds_init;

struct file * fd_array[NR_OPEN_DEFAULT];

};

Page 6: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

System-wide File Information

List of files in use: (struct list_head) sb->s_files

• One such list exists per file system. It holds the file structures for open files.

List of free files: struct list_head free_list

• This is a system-wide list. It holds file descriptors that are no longer used. Think of it as a big recycle bin.

List of newly created files: struct list_head anon_list

• This list holds newly created file structs. They are added to this list in response to an open that couldn’t be satsified with the free_list.

Page 7: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct file (1/2)

struct file {

struct list_head f_list;

struct dentry *f_dentry;

struct file_operations *f_op;

atomic_t f_count;

unsigned int f_flags;

mode_t f_mode;

loff_t f_pos;

unsigned long f_reada, f_ramax, f_raend, f_ralen, f_rawin;

Page 8: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct file (2/2)

struct fown_struct f_owner;

unsigned int f_uid, f_gid;

int f_error;

unsigned long f_version;

/* needed for tty driver, and maybe others */

void *private_data;

};

Page 9: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct file_operations (1/2)

struct file_operations {

loff_t (*llseek) (struct file *, loff_t, int);

ssize_t (*read) (struct file *, char *, size_t, loff_t *);

ssize_t (*write) (struct file *, const char *, size_t, loff_t *);

int (*readdir) (struct file *, void *, filldir_t);

unsigned int (*poll) (struct file *, struct poll_table_struct *);

int (*ioctl) (struct inode *, struct file *, unsigned int,

unsigned long );

Page 10: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct file_operations (2/2)

int (*mmap) (struct file *, struct vm_area_struct *);

int (*open) (struct inode *, struct file *);

int (*flush) (struct file *);

int (*release) (struct inode *, struct file *);

int (*fsync) (struct file *, struct dentry *);

int (*fasync) (int, struct file *, int);

int (*check_media_change) (kdev_t dev);

int (*revalidate) (kdev_t dev);

int (*lock) (struct file *, int, struct file_lock *);

};

Page 11: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct inode (1/2)

struct inode {

struct list_head i_hash;

struct list_head i_list;

struct list_head i_dentry;

unsigned long i_ino;

kdev_t i_dev;

/* Usual metadata, such as might be seen with “ls –l” */

Page 12: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct inode (2/2)

struct inode_operations *i_op;

struct super_block *i_sb;

wait_queue_head_t i_wait;

struct vm_area_struct *i_mmap;

struct pipe_inode_info *i_pipe;

union {

struct minix_inode_info minix_i;

struct ext2_inode_info ext2_i;

} u;

}

Page 13: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Memory Mapping (1/2)

struct vm_area_struct

{

struct mm_struct * vm_mm; /* VM area parameters */

/* linked list of VM areas per task, sorted by address */

unsigned long vm_start; unsigned long vm_end;

struct vm_area_struct *vm_next;

pgprot_t vm_page_prot;

/* AVL tree of VM areas per task, sorted by address */

unsigned short vm_flags;

Page 14: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Memory Mapping (2/2)

short vm_avl_height;

struct vm_area_struct * vm_avl_left;

struct vm_area_struct * vm_avl_right;

/* For areas with inode, the list inode->i_mmap, for shm areas, * the list of attaches, otherwise unused. */

struct vm_area_struct *vm_next_share;

struct vm_area_struct **vm_pprev_share;

struct vm_operations_struct * vm_ops;

unsigned long vm_offset;

struct file * vm_file; void * vm_private_data; /* was vm_pte

};

Page 15: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Memory Mapping Operationsstruct vm_operations_struct {

void (*open)(struct vm_area_struct * area);

void (*close)(struct vm_area_struct * area);

void (*unmap)(struct vm_area_struct *area, unsigned long, size_t);

void (*protect)(struct vm_area_struct *area, unsigned long, size_t,

unsigned int newprot);

int (*sync)(struct vm_area_struct *area, unsigned long, size_t,

unsigned int flags);

void (*advise)(struct vm_area_struct *area, unsigned long, size_t,

unsigned int advise);

unsigned long (*nopage)(struct vm_area_struct * area,

unsigned long address, int write_access);

unsigned long (*wppage)(struct vm_area_struct * area,

unsigned long address, unsigned long page);

int (*swapout)(struct vm_area_struct *, struct page *);

};

Page 16: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Dirty inodes

Inode_hashtable

sb->write_inode(sync one)

Fs storage

Used inodes

Unused inodes

Fs storage

sb->read_inode(iget)

sb->clear_inode(freeing inos)orsb->delete_inode(iput)

media fs only

(mark_inode_dirty)

Fs storage

Inode Cache

Adapted from Linux Virtual File System Presentation © Peter J Bramm/CMU

Page 17: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct dentry (1/2)

struct dentry {

int d_count;

unsigned int d_flags;

struct inode * d_inode; /* Where the name belongs to */

struct dentry * d_parent; /* parent directory */

struct dentry * d_mounts; /* mount information */

struct dentry * d_covers;

struct list_head d_hash; /* lookup hash list */

struct list_head d_lru; /* d_count = 0 LRU list */

struct list_head d_child; /* child of parent list */

struct list_head d_subdirs; /* our children */

Page 18: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct dentry (2/2)

struct list_head d_alias; /* inode alias list */

struct qstr d_name;

unsigned long d_time; /* used by d_revalidate */

struct dentry_operations *d_op;

struct super_block * d_sb; /* The root of the dentry tree */

unsigned long d_reftime; /* last time referenced */

void * d_fsdata; /* fs-specific data */

/* small names */

unsigned char d_iname[DNAME_INLINE_LEN];

};

Page 19: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

The Buffer Cache

Rusling, David A, The Linux Kernel, V0.8-3, LDP, 1999, S.9.3.

Page 20: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

dentry_operations

struct dentry_operations {

int (*d_revalidate)(struct dentry *, int);

int (*d_hash) (struct dentry *, struct qstr *);

int (*d_compare) (struct dentry *, struct qstr *,

struct qstr *);

void (*d_delete)(struct dentry *);

void (*d_release)(struct dentry *);

};

Page 21: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Dcache

dentry_hashtable (hash chains)

unused dentries (dentry->lru chains)

namei inode->lookup dentry->add

pruneDentry->invalidate dentry->drop

Replacement:

level1_cache/level1_head

• LRU list of recently translated entries. Entries added to the end may displace older entries if cache is full.

level_2_cache/level2_head

• LRU list of recently accessed entries (moved from level 1 on second access).

Level 2 is safer – only displaced by repeatedly accessed entry, not just a new entry.

Dentry->hash(device #, name) list head

Adapted from Linux Virtual File System Presentation © Peter J Bramm/CMU

Page 22: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct file_system_type

struct file_system_type {

const char *name;

int fs_flags;

struct super_block * (*read_super)

(struct super_block *, void *, int);

struct file_system_type * next;

};

Page 23: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct super_block (1/2)

struct super_block {

struct list_head s_list; /* Keep this first */

kdev_t s_dev;

unsigned long s_blocksize;

unsigned char s_lock;

unsigned char s_rd_only;

unsigned char s_dirt;

struct inode *s_ibasket;

short int s_ibasket_count;

Page 24: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct super_block (2/2)

short int s_ibasket_max;

struct list_head s_dirty; /* dirty inodes */

struct list_head s_files;

union {

struct minix_sb_info minix_sb;

struct ext2_sb_info ext2_sb;

struct hpfs_sb_info hpfs_sb;

….

} u;

}

Page 25: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct super_operationsstruct super_operations {

void (*read_inode) (struct inode *);

void (*write_inode) (struct inode *);

void (*put_inode) (struct inode *);

void (*delete_inode) (struct inode *);

int (*notify_change) (struct dentry *, struct iattr *);

void (*put_super) (struct super_block *);

void (*write_super) (struct super_block *);

int (*statfs) (struct super_block *, struct statfs *, int);

int (*remount_fs) (struct super_block *, int *, char *);

void (*clear_inode) (struct inode *);

void (*umount_begin) (struct super_block *);

};

Page 26: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

The Buffer Cache

Two main parts:

• Lists of empty buffers of several sizes: 512B, 1K, 2K, 4K, 8K

• Open-chaining has table of block buffers: hash (device #, block #) is index

Properties

• Block buffers are either in a free list or in the hash table

• All block buffers are also kept in an LRU list for replacement

Page 27: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

The Buffer Cache

Rusling, David A, The Linux Kernel, V0.8-3, LDP, 1999, S.9.3.

Page 28: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

Victim Selection

Each block buffer is maintained on one of the following LRU lists:

• BUF_CLEAN

• BUF_UNSHARED

• BUF_SHARED

• BUF_LOCKED – scheduled to be flushed

• BUF_LOCKED1 – super block and inode buffers that can’t be flushed

• BUF_DIRTY

The victim is the best clean buffer. If a victim can’t be found, the system will try to create more buffers. If that fails, it will try to free block buffers of ofther sizes and try again.

Page 29: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

The bdflush Kernel Daemon

The bdflush daemon flushes dirty blocks creating clean blocks.

It normally sleeps, but wakes up:

• If the system runs out of clean buffers

• More than 60% (configurable) of the buffers are dirty

Page 30: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct buffer_head (1/3)

struct buffer_head {

/* First cache line: */

struct buffer_head *b_next; /* Hash queue list */

unsigned long b_blocknr; /* block number */

unsigned short b_size; /* block size */

unsigned short b_list; /* List that this buffer appears */

kdev_t b_dev; /* device (B_FREE = free) */

atomic_t b_count; /* users using this block */

kdev_t b_rdev; /* Real device */

Page 31: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct buffer_head (2/3)

unsigned long b_state; /* buffer state bitmap (see above) */

unsigned long b_flushtime; /* Time to write (dirty) buffer */

struct buffer_head *b_next_free;/* lru/free list linkage */

struct buffer_head *b_prev_free;/* doubly linked list of buffers */

struct buffer_head *b_this_page;/* circular list of buffers in one

page */

struct buffer_head *b_reqnext; /* request queue */

Page 32: Linux: Files and File Systems Gregory Kesden 15-412 Fall ‘00

struct buffer_head (3/3)

struct buffer_head **b_pprev; /* 2x linked list of hash-queue */

char *b_data; /* pointer to data block (1024 bytes) */

void (*b_end_io)(struct buffer_head *bh, int uptodate);

/* I/O completion */

void *b_dev_id;

unsigned long b_rsector; /* Real buffer location on disk */

wait_queue_head_t b_wait;

struct kiobuf * b_kiobuf; /* kiobuf which owns this IO */

};