summaryrefslogtreecommitdiffstats
path: root/fs/select.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/select.c')
-rw-r--r--fs/select.c258
1 files changed, 258 insertions, 0 deletions
diff --git a/fs/select.c b/fs/select.c
new file mode 100644
index 000000000..e87ae07eb
--- /dev/null
+++ b/fs/select.c
@@ -0,0 +1,258 @@
+/*
+ * This file contains the procedures for the handling of select
+ *
+ * Created for Linux based loosely upon Mathius Lattner's minix
+ * patches by Peter MacDonald. Heavily edited by Linus.
+ *
+ * 4 February 1994
+ * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
+ * flag set in its personality we do *not* modify the given timeout
+ * parameter to reflect time remaining.
+ */
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/personality.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
+
+/*
+ * Ok, Peter made a complicated, but straightforward multiple_wait() function.
+ * I have rewritten this, taking some shortcuts: This code may not be easy to
+ * follow, but it should be free of race-conditions, and it's practical. If you
+ * understand what I'm doing here, then you understand how the linux
+ * sleep/wakeup mechanism works.
+ *
+ * Two very simple procedures, select_wait() and free_wait() make all the work.
+ * select_wait() is a inline-function defined in <linux/sched.h>, as all select
+ * functions have to call it to add an entry to the select table.
+ */
+
+/*
+ * I rewrote this again to make the select_table size variable, take some
+ * more shortcuts, improve responsiveness, and remove another race that
+ * Linus noticed. -- jrs
+ */
+
+static void free_wait(select_table * p)
+{
+ struct select_table_entry * entry = p->entry + p->nr;
+
+ while (p->nr > 0) {
+ p->nr--;
+ entry--;
+ remove_wait_queue(entry->wait_address,&entry->wait);
+ }
+}
+
+/*
+ * The check function checks the ready status of a file using the vfs layer.
+ *
+ * If the file was not ready we were added to its wait queue. But in
+ * case it became ready just after the check and just before it called
+ * select_wait, we call it again, knowing we are already on its
+ * wait queue this time. The second call is not necessary if the
+ * select_table is NULL indicating an earlier file check was ready
+ * and we aren't going to sleep on the select_table. -- jrs
+ */
+
+static int check(int flag, select_table * wait, struct file * file)
+{
+ struct inode * inode;
+ struct file_operations *fops;
+ int (*select) (struct inode *, struct file *, int, select_table *);
+
+ inode = file->f_inode;
+ if ((fops = file->f_op) && (select = fops->select))
+ return select(inode, file, flag, wait)
+ || (wait && select(inode, file, flag, NULL));
+ if (S_ISREG(inode->i_mode))
+ return 1;
+ return 0;
+}
+
+static int do_select(int n, fd_set *in, fd_set *out, fd_set *ex,
+ fd_set *res_in, fd_set *res_out, fd_set *res_ex)
+{
+ int count;
+ select_table wait_table, *wait;
+ struct select_table_entry *entry;
+ unsigned long set;
+ int i,j;
+ int max = -1;
+
+ for (j = 0 ; j < __FDSET_LONGS ; j++) {
+ i = j << 5;
+ if (i >= n)
+ break;
+ set = in->fds_bits[j] | out->fds_bits[j] | ex->fds_bits[j];
+ for ( ; set ; i++,set >>= 1) {
+ if (i >= n)
+ goto end_check;
+ if (!(set & 1))
+ continue;
+ if (!current->files->fd[i])
+ return -EBADF;
+ if (!current->files->fd[i]->f_inode)
+ return -EBADF;
+ max = i;
+ }
+ }
+end_check:
+ n = max + 1;
+ if(!(entry = (struct select_table_entry*) __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ FD_ZERO(res_in);
+ FD_ZERO(res_out);
+ FD_ZERO(res_ex);
+ count = 0;
+ wait_table.nr = 0;
+ wait_table.entry = entry;
+ wait = &wait_table;
+repeat:
+ current->state = TASK_INTERRUPTIBLE;
+ for (i = 0 ; i < n ; i++) {
+ if (FD_ISSET(i,in) && check(SEL_IN,wait,current->files->fd[i])) {
+ FD_SET(i, res_in);
+ count++;
+ wait = NULL;
+ }
+ if (FD_ISSET(i,out) && check(SEL_OUT,wait,current->files->fd[i])) {
+ FD_SET(i, res_out);
+ count++;
+ wait = NULL;
+ }
+ if (FD_ISSET(i,ex) && check(SEL_EX,wait,current->files->fd[i])) {
+ FD_SET(i, res_ex);
+ count++;
+ wait = NULL;
+ }
+ }
+ wait = NULL;
+ if (!count && current->timeout && !(current->signal & ~current->blocked)) {
+ schedule();
+ goto repeat;
+ }
+ free_wait(&wait_table);
+ free_page((unsigned long) entry);
+ current->state = TASK_RUNNING;
+ return count;
+}
+
+/*
+ * We do a VERIFY_WRITE here even though we are only reading this time:
+ * we'll write to it eventually..
+ */
+static int __get_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
+{
+ int error;
+
+ FD_ZERO(fdset);
+ if (!fs_pointer)
+ return 0;
+ error = verify_area(VERIFY_WRITE,fs_pointer,sizeof(fd_set));
+ if (error)
+ return error;
+ while (nr > 0) {
+ *fdset = get_fs_long(fs_pointer);
+ fdset++;
+ fs_pointer++;
+ nr -= 32;
+ }
+ return 0;
+}
+
+static void __set_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
+{
+ if (!fs_pointer)
+ return;
+ while (nr > 0) {
+ put_fs_long(*fdset, fs_pointer);
+ fdset++;
+ fs_pointer++;
+ nr -= 32;
+ }
+}
+
+#define get_fd_set(nr,fsp,fdp) \
+__get_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
+
+#define set_fd_set(nr,fsp,fdp) \
+__set_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
+
+/*
+ * We can actually return ERESTARTSYS instead of EINTR, but I'd
+ * like to be certain this leads to no problems. So I return
+ * EINTR just for safety.
+ *
+ * Update: ERESTARTSYS breaks at least the xview clock binary, so
+ * I'm trying ERESTARTNOHAND which restart only when you want to.
+ */
+asmlinkage int sys_select( unsigned long *buffer )
+{
+/* Perform the select(nd, in, out, ex, tv) system call. */
+ int i;
+ fd_set res_in, in, *inp;
+ fd_set res_out, out, *outp;
+ fd_set res_ex, ex, *exp;
+ int n;
+ struct timeval *tvp;
+ unsigned long timeout;
+
+ i = verify_area(VERIFY_READ, buffer, 20);
+ if (i)
+ return i;
+ n = get_fs_long(buffer++);
+ if (n < 0)
+ return -EINVAL;
+ if (n > NR_OPEN)
+ n = NR_OPEN;
+ inp = (fd_set *) get_fs_long(buffer++);
+ outp = (fd_set *) get_fs_long(buffer++);
+ exp = (fd_set *) get_fs_long(buffer++);
+ tvp = (struct timeval *) get_fs_long(buffer);
+ if ((i = get_fd_set(n, inp, &in)) ||
+ (i = get_fd_set(n, outp, &out)) ||
+ (i = get_fd_set(n, exp, &ex))) return i;
+ timeout = ~0UL;
+ if (tvp) {
+ i = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
+ if (i)
+ return i;
+ timeout = ROUND_UP(get_fs_long((unsigned long *)&tvp->tv_usec),(1000000/HZ));
+ timeout += get_fs_long((unsigned long *)&tvp->tv_sec) * HZ;
+ if (timeout)
+ timeout += jiffies + 1;
+ }
+ current->timeout = timeout;
+ i = do_select(n, &in, &out, &ex, &res_in, &res_out, &res_ex);
+ if (current->timeout > jiffies)
+ timeout = current->timeout - jiffies;
+ else
+ timeout = 0;
+ current->timeout = 0;
+ if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
+ put_fs_long(timeout/HZ, (unsigned long *) &tvp->tv_sec);
+ timeout %= HZ;
+ timeout *= (1000000/HZ);
+ put_fs_long(timeout, (unsigned long *) &tvp->tv_usec);
+ }
+ if (i < 0)
+ return i;
+ if (!i && (current->signal & ~current->blocked))
+ return -ERESTARTNOHAND;
+ set_fd_set(n, inp, &res_in);
+ set_fd_set(n, outp, &res_out);
+ set_fd_set(n, exp, &res_ex);
+ return i;
+}