diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-07-09 23:29:35 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-07-09 23:29:35 +0000 |
commit | 35385d7a83b4cae6d5ea5f80f3b3377d94178344 (patch) | |
tree | 49494d95dfef31ba4f9a697d31e4028cf65a57bd /Documentation/filesystems | |
parent | d9d8062e7b49943b2a2fb034f817a9fc217fd40f (diff) |
Merge with 2.4.0-test3-pre7.
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking new file mode 100644 index 000000000..0dd2cc8eb --- /dev/null +++ b/Documentation/filesystems/Locking @@ -0,0 +1,315 @@ + The text below describes the locking rules for VFS-related methods. +It is (believed to be) up-to-date. *Please*, if you change anything in +prototypes or locking protocols - update this file. And update the relevant +instances in the tree, don't leave that to maintainers of filesystems/devices/ +etc. At the very least, put the list of dubious cases in the end of this file. +Don't turn it into log - maintainers of out-of-the-tree code are supposed to +be able to use diff(1). + Thing currently missing here: socket operations. Alexey? + +--------------------------- dentry_operations -------------------------- +prototypes: + int (*d_revalidate)(struct dentry *, int); + int (*d_hash) (struct dentry *, struct qstr *); + int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); + int (*d_delete)(struct dentry *); + void (*d_release)(struct dentry *); + void (*d_iput)(struct dentry *, struct inode *); + +locking rules: + none have BKL + dcache_lock may block +d_revalidate: no yes +d_hash no yes +d_compare: yes no +d_delete: yes no +d_release: no yes +d_iput: no yes + +--------------------------- inode_operations --------------------------- +prototypes: + int (*create) (struct inode *,struct dentry *,int); + struct dentry * (*lookup) (struct inode *,struct dentry *); + int (*link) (struct dentry *,struct inode *,struct dentry *); + int (*unlink) (struct inode *,struct dentry *); + int (*symlink) (struct inode *,struct dentry *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); + int (*rmdir) (struct inode *,struct dentry *); + int (*mknod) (struct inode *,struct dentry *,int,int); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); + int (*readlink) (struct dentry *, char *,int); + int (*follow_link) (struct dentry *, struct nameidata *); + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int); + int (*revalidate) (struct dentry *); + int (*setattr) (struct dentry *, struct iattr *); + int (*getattr) (struct dentry *, struct iattr *); + +locking rules: + all may block + BKL i_sem(inode) i_zombie(inode) +lookup: yes yes no +create: yes yes yes +link: yes yes yes +mknod: yes yes yes +mkdir: yes yes yes +unlink: yes yes yes +rmdir: yes yes yes (see below) +rename: yes yes (both) yes (both) (see below) +readlink: no no no +follow_link: no no no +truncate: yes yes no (see below) +setattr: yes if ATTR_SIZE no +permssion: yes no no +getattr: (see below) +revalidate: no (see below) + Additionally, ->rmdir() has i_zombie on victim and so does ->rename() +in case when target exists and is a directory. + ->revalidate(), it may be called both with and without the i_sem +on dentry->d_inode. VFS never calls it with i_zombie on dentry->d_inode, +but watch for other methods directly calling this one... + ->truncate() is never called directly - it's a callback, not a +method. It's called by vmtruncate() - library function normally used by +->setattr(). Locking information above applies to that call (i.e. is +inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been +passed). + ->getattr() is currently unused. + +--------------------------- super_operations --------------------------- +prototypes: + void (*read_inode) (struct inode *); + void (*write_inode) (struct inode *, int); + void (*put_inode) (struct inode *); + void (*delete_inode) (struct inode *); + void (*put_super) (struct super_block *); + void (*write_super) (struct super_block *); + int (*statfs) (struct super_block *, struct statfs *); + int (*remount_fs) (struct super_block *, int *, char *); + void (*clear_inode) (struct inode *); + void (*umount_begin) (struct super_block *); + +locking rules: + All may block. + BKL s_lock mount_sem +read_inode: yes (see below) +write_inode: no +put_inode: no +delete_inode: no +clear_inode: no +put_super: yes yes maybe (see below) +write_super: yes yes maybe (see below) +statfs: yes no no +remount_fs: yes yes maybe (see below) +umount_begin: yes no maybe (see below) + +->read_inode() is not a method - it's a callback used in iget()/iget4(). +rules for mount_sem are not too nice - it is going to die and be replaced +by better scheme anyway. + +--------------------------- file_system_type --------------------------- +prototypes: + struct super_block *(*read_super) (struct super_block *, void *, int); +locking rules: +may block BKL ->s_lock mount_sem +yes yes yes maybe + +--------------------------- address_space_operations -------------------------- +prototypes: + int (*writepage)(struct file *, struct page *); + int (*readpage)(struct file *, struct page *); + int (*sync_page)(struct page *); + int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); + int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + int (*bmap)(struct address_space *, long); +locking rules: + All may block + BKL PageLocked(page) +writepage: no yes +readpage: no yes +sync_page: no maybe +prepare_write: no yes +commit_write: no yes +bmap: yes + + ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage() +may be called from the request handler (/dev/loop). + ->sync_page() locking rules are not well-defined - usually it is called +with lock on page, but that is not guaranteed. Considering the currently +existsing instances of this method ->sync_page() itself doesn't look +well-defined... + ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some +filesystems and by the swapper. The latter will eventually go away. All +instances do not actually need the BKL. Please, keep it that way and don't +breed new callers. + Note: currently almost all instances of address_space methods are +using BKL for internal serialization and that's one of the worst sources +of contention. Normally they are calling library functions (in fs/buffer.c) +and pass foo_get_block() as a callback (on local block-based filesystems, +indeed). BKL is not needed for library stuff and is usually taken by +foo_get_block(). It's an overkill, since block bitmaps can be protected by +internal fs locking and real critical areas are much smaller than the areas +filesystems protect now. + +--------------------------- file_lock ------------------------------------ +prototypes: + void (*fl_notify)(struct file_lock *); /* unblock callback */ + void (*fl_insert)(struct file_lock *); /* lock insertion callback */ + void (*fl_remove)(struct file_lock *); /* lock removal callback */ + +locking rules: + BKL may block +fl_notify: yes no +fl_insert: yes maybe +fl_remove: yes maybe + Currently only NLM provides instances of this class. None of the +them block. If you have out-of-tree instances - please, show up. Locking +in that area will change. + +--------------------------- buffer_head ----------------------------------- +prototypes: + void (*b_end_io)(struct buffer_head *bh, int uptodate); + +locking rules: + called from interrupts. In other words, extreme care is needed here. +bh is locked, but that's all warranties we have here. Currently only RAID1, +highmem and fs/buffer.c are providing these. Block devices call this method +upon the IO completion. + +--------------------------- block_device_operations ----------------------- +prototypes: + int (*open) (struct inode *, struct file *); + int (*release) (struct inode *, struct file *); + int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); + int (*check_media_change) (kdev_t); + int (*revalidate) (kdev_t); +locking rules: + BKL bd_sem +open: yes yes +release: yes yes +ioctl: yes no +check_media_change: yes no +revalidate: yes no + +The last two are called only from check_disk_change(). Prototypes are very +bad - as soon as we'll get disk_struct they will change (and methods will +become per-disk instead of per-partition). + +--------------------------- file_operations ------------------------------- +prototypes: + loff_t (*llseek) (struct file *, loff_t, int); + ssize_t (*read) (struct file *, char *, size_t, loff_t *); + ssize_t (*write) (struct file *, const char *, size_t, loff_t *); + int (*readdir) (struct file *, void *, filldir_t); + unsigned int (*poll) (struct file *, struct poll_table_struct *); + int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); + int (*mmap) (struct file *, struct vm_area_struct *); + int (*open) (struct inode *, struct file *); + int (*flush) (struct file *); + int (*release) (struct inode *, struct file *); + int (*fsync) (struct file *, struct dentry *, int datasync); + int (*fasync) (int, struct file *, int); + int (*lock) (struct file *, int, struct file_lock *); + ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); + ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); +}; + +locking rules: + All except ->poll() may block. + BKL +llseek: yes +read: no +write: no +readdir: yes (see below) +poll: no +ioctl: yes (see below) +mmap: no +open: maybe (see below) +flush: yes +release: yes +fsync: yes (see below) +fasync: yes (see below) +lock: yes +readv: no +writev: no + +->open() locking is in-transit: big lock partially moved into the methods. +The only exception is ->open() in the instances of file_operations that never +end up in ->i_fop/->proc_fops, i.e. ones that belong to character devices +(chrdev_open() takes lock before replacing ->f_op and calling the secondary +method. As soon as we fix the handling of module reference counters all +instances of ->open() will be called without the BKL. At the same point +->release() will lose BKL. Currently ext2_release() is *the* source of +contention on fs-intensive loads and dropping BKL on ->release() will get +rid of that (we will still need some locking for cases when we close a file +that had been opened r/w, but that can be done using the internal locking with +smaller critical areas). sock_close() is also going to win from that change. + +->fasync() is a mess. This area needs a big cleanup and that will probably +affect locking. + +->readdir() and ->ioctl() on directories must be changed. Ideally we would +move ->readdir() to inode_operations and use a separate method for directory +->ioctl() or kill the latter completely. One of the problems is that for +anything that resembles union-mount we won't have a struct file for all +components. And there are other reasons why the current interface is a mess... + +->read on directories probably must go away - we should just enforce -EISDIR +in sys_read() and friends. + +->fsync() has i_sem on inode. + +--------------------------- dquot_operations ------------------------------- +prototypes: + void (*initialize) (struct inode *, short); + void (*drop) (struct inode *); + int (*alloc_block) (const struct inode *, unsigned long, char); + int (*alloc_inode) (const struct inode *, unsigned long); + void (*free_block) (const struct inode *, unsigned long); + void (*free_inode) (const struct inode *, unsigned long); + int (*transfer) (struct dentry *, struct iattr *); + +locking rules: + BKL +initialize: no +drop: no +alloc_block: yes +alloc_inode: yes +free_block: yes +free_inode: yes +transfer: no + +--------------------------- vm_operations_struct ----------------------------- +prototypes: + void (*open)(struct vm_area_struct*); + void (*close)(struct vm_area_struct*); + void (*unmap)(struct vm_area_struct*, unsigned long, size_t); + void (*protect)(struct vm_area_struct*, unsigned long, size_t, unsigned); + int (*sync)(struct vm_area_struct*, unsigned long, size_t, unsigned); + struct page *(*nopage)(struct vm_area_struct*, unsigned long, int); + struct page *(*wppage)(struct vm_area_struct*, unsigned long, struct page*); + int (*swapout)(struct page *, struct file *); + +locking rules: + BKL mmap_sem +open: no yes +close: no yes +sync: no yes +unmap: no yes +nopage: no yes +swapout: yes yes +wpppage: (see below) +protect: (see below) + +->wppage() and ->protect() have no instances and nothing calls them; looks like +they must die... + +================================================================================ + Dubious stuff + +(if you break something or notice that it is broken and do not fix it yourself +- at least put it here) + +ipc/shm.c::shm_delete() - may need BKL. +->read() and ->write() in many drivers are (probably) missing BKL. +drivers/sgi/char/graphics.c::sgi_graphics_nopage() - may need BKL. |