From 66c03dc3a92b84f0320b1c350238396cdf203cc7 Mon Sep 17 00:00:00 2001 From: Ingo Weinhold Date: Mon, 1 Oct 2007 01:37:28 +0000 Subject: [PATCH] * fd.c -> fd.cpp * Reworked the select support: - The io_context additionally stores a table of lists of select_infos, which enables it to deselect events of a pending select() when closing a FD. This prevents a race condition potentially causing a write to stale memory. - The opaque selectsync* passed to FSs is now actually a select_info*. This was necessary, since the FDs deselect() hook (unlike the select() hook) doesn't take a "ref" argument and deselecting a single info (e.g. caused by a premature close()) was not possible. The select() hook's "ref" argument has become superfluous. - It should now be relatively easy to implement a poll_on_steroids() that can also wait for objects other than FDs (e.g. semaphores, ports, threads etc.). * Set/reset the signal mask in common_select(). This makes pselect() work as required. * Reorganized vfs_resize_fd_table(). git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@22391 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- headers/private/kernel/fs/fd.h | 13 +- headers/private/kernel/vfs.h | 2 + src/system/kernel/fs/Jamfile | 4 +- src/system/kernel/fs/{fd.c => fd.cpp} | 205 +++++++++++++----- src/system/kernel/fs/vfs.cpp | 100 +++++---- src/system/kernel/fs/vfs_select.cpp | 292 ++++++++++++++------------ src/system/kernel/fs/vfs_select.h | 24 ++- 7 files changed, 394 insertions(+), 246 deletions(-) rename src/system/kernel/fs/{fd.c => fd.cpp} (83%) diff --git a/headers/private/kernel/fs/fd.h b/headers/private/kernel/fs/fd.h index 8a31fc113f..ce2c6972d1 100644 --- a/headers/private/kernel/fs/fd.h +++ b/headers/private/kernel/fs/fd.h @@ -16,6 +16,7 @@ extern "C" { #endif struct file_descriptor; +struct selectsync; struct select_sync; struct fd_ops { @@ -23,8 +24,10 @@ struct fd_ops { status_t (*fd_write)(struct file_descriptor *, off_t pos, const void *buffer, size_t *length); off_t (*fd_seek)(struct file_descriptor *, off_t pos, int seekType); status_t (*fd_ioctl)(struct file_descriptor *, ulong op, void *buffer, size_t length); - status_t (*fd_select)(struct file_descriptor *, uint8 event, uint32 ref, struct select_sync *sync); - status_t (*fd_deselect)(struct file_descriptor *, uint8 event, struct select_sync *sync); + status_t (*fd_select)(struct file_descriptor *, uint8 event, uint32 ref, + struct selectsync *sync); + status_t (*fd_deselect)(struct file_descriptor *, uint8 event, + struct selectsync *sync); status_t (*fd_read_dir)(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count); status_t (*fd_rewind_dir)(struct file_descriptor *); status_t (*fd_read_stat)(struct file_descriptor *, struct stat *); @@ -74,9 +77,9 @@ extern void close_fd(struct file_descriptor *descriptor); extern void put_fd(struct file_descriptor *descriptor); extern void disconnect_fd(struct file_descriptor *descriptor); extern void inc_fd_ref_count(struct file_descriptor *descriptor); -extern status_t select_fd(int fd, uint8 event, uint32 ref, - struct select_sync *sync, bool kernel); -extern status_t deselect_fd(int fd, uint8 event, struct select_sync *sync, +extern status_t select_fd(int fd, struct select_sync *sync, uint32 ref, + bool kernel); +extern status_t deselect_fd(int fd, struct select_sync *sync, uint32 ref, bool kernel); extern bool fd_is_valid(int fd, bool kernel); extern struct vnode *fd_vnode(struct file_descriptor *descriptor); diff --git a/headers/private/kernel/vfs.h b/headers/private/kernel/vfs.h index 21e0c2bd6f..03f62adfbe 100644 --- a/headers/private/kernel/vfs.h +++ b/headers/private/kernel/vfs.h @@ -30,6 +30,7 @@ struct kernel_args; struct vm_cache; struct file_descriptor; struct selectsync; +struct select_info; struct pollfd; struct vnode; @@ -41,6 +42,7 @@ typedef struct io_context { uint32 table_size; uint32 num_used_fds; struct file_descriptor **fds; + struct select_info **select_infos; uint8 *fds_close_on_exec; struct list node_monitors; uint32 num_monitors; diff --git a/src/system/kernel/fs/Jamfile b/src/system/kernel/fs/Jamfile index 71d59d180d..1eaba4ec0d 100644 --- a/src/system/kernel/fs/Jamfile +++ b/src/system/kernel/fs/Jamfile @@ -9,7 +9,7 @@ KernelMergeObject kernel_fs.o : devfs.cpp rootfs.c pipefs.cpp - fd.c + fd.cpp vfs.cpp vfs_boot.cpp vfs_net_boot.cpp @@ -19,4 +19,4 @@ KernelMergeObject kernel_fs.o : KPath.cpp : $(TARGET_KERNEL_PIC_CCFLAGS) -Wno-unused - ; +; diff --git a/src/system/kernel/fs/fd.c b/src/system/kernel/fs/fd.cpp similarity index 83% rename from src/system/kernel/fs/fd.c rename to src/system/kernel/fs/fd.cpp index 2b5ca628c9..646dfa7182 100644 --- a/src/system/kernel/fs/fd.c +++ b/src/system/kernel/fs/fd.cpp @@ -5,14 +5,18 @@ */ +#include + +#include +#include + #include -#include -#include #include +#include +#include -#include -#include +#include "vfs_select.h" //#define TRACE_FD @@ -23,6 +27,10 @@ #endif +static void deselect_select_infos(file_descriptor* descriptor, + select_info* infos); + + /*** General fd routines ***/ @@ -44,9 +52,8 @@ dump_fd(int fd,struct file_descriptor *descriptor) struct file_descriptor * alloc_fd(void) { - struct file_descriptor *descriptor; - - descriptor = malloc(sizeof(struct file_descriptor)); + file_descriptor *descriptor + = (file_descriptor*)malloc(sizeof(struct file_descriptor)); if (descriptor == NULL) return NULL; @@ -199,18 +206,13 @@ inc_fd_ref_count(struct file_descriptor *descriptor) } -struct file_descriptor * -get_fd(struct io_context *context, int fd) +static struct file_descriptor * +get_fd_locked(struct io_context *context, int fd) { - struct file_descriptor *descriptor = NULL; - - if (fd < 0) + if (fd < 0 || (uint32)fd >= context->table_size) return NULL; - mutex_lock(&context->io_mutex); - - if ((uint32)fd < context->table_size) - descriptor = context->fds[fd]; + struct file_descriptor *descriptor = context->fds[fd]; if (descriptor != NULL) { // Disconnected descriptors cannot be accessed anymore @@ -220,12 +222,19 @@ get_fd(struct io_context *context, int fd) inc_fd_ref_count(descriptor); } - mutex_unlock(&context->io_mutex); - return descriptor; } +struct file_descriptor * +get_fd(struct io_context *context, int fd) +{ + MutexLocker(context->io_mutex); + + return get_fd_locked(context, fd); +} + + /** Removes the file descriptor from the specified slot. */ @@ -242,19 +251,27 @@ remove_fd(struct io_context *context, int fd) if ((uint32)fd < context->table_size) descriptor = context->fds[fd]; + select_info* selectInfos = NULL; + bool disconnected = false; + if (descriptor) { // fd is valid context->fds[fd] = NULL; fd_set_close_on_exec(context, fd, false); context->num_used_fds--; - if (descriptor->open_mode & O_DISCONNECTED) - descriptor = NULL; + selectInfos = context->select_infos[fd]; + context->select_infos[fd] = NULL; + + disconnected = (descriptor->open_mode & O_DISCONNECTED); } mutex_unlock(&context->io_mutex); - return descriptor; + if (selectInfos != NULL) + deselect_select_infos(descriptor, selectInfos); + + return disconnected ? NULL : descriptor; } @@ -292,7 +309,6 @@ dup_fd(int fd, bool kernel) * * We do dup2() directly to be thread-safe. */ - static int dup2_fd(int oldfd, int newfd, bool kernel) { @@ -321,9 +337,12 @@ dup2_fd(int oldfd, int newfd, bool kernel) // Check for identity, note that it cannot be made above // because we always want to return an error on invalid // handles + select_info* selectInfos = NULL; if (oldfd != newfd) { // Now do the work evicted = context->fds[newfd]; + selectInfos = context->select_infos[newfd]; + context->select_infos[newfd] = NULL; atomic_add(&context->fds[oldfd]->ref_count, 1); atomic_add(&context->fds[oldfd]->open_count, 1); context->fds[newfd] = context->fds[oldfd]; @@ -338,6 +357,7 @@ dup2_fd(int oldfd, int newfd, bool kernel) // Say bye bye to the evicted fd if (evicted) { + deselect_select_infos(evicted, selectInfos); close_fd(evicted); put_fd(evicted); } @@ -366,50 +386,133 @@ fd_ioctl(bool kernelFD, int fd, ulong op, void *buffer, size_t length) } -status_t -select_fd(int fd, uint8 event, uint32 ref, struct select_sync *sync, bool kernel) +static void +deselect_select_infos(file_descriptor* descriptor, select_info* infos) { - struct file_descriptor *descriptor; - status_t status; + TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos)); - TRACE(("select_fd(fd = %d, event = %u, ref = %lu, selectsync = %p)\n", fd, event, ref, sync)); + select_info* info = infos; + while (info != NULL) { + select_sync* sync = info->sync; - descriptor = get_fd(get_current_io_context(kernel), fd); - if (descriptor == NULL) - return B_FILE_ERROR; + // deselect the selected events + if (descriptor->ops->fd_deselect && info->selected_events) { + for (uint16 event = 1; event < 16; event++) { + if (info->selected_events & SELECT_FLAG(event)) { + descriptor->ops->fd_deselect(descriptor, event, + (selectsync*)info); + } + } + } - if (descriptor->ops->fd_select) { - status = descriptor->ops->fd_select(descriptor, event, ref, sync); - } else { - // if the I/O subsystem doesn't support select(), we will - // immediately notify the select call - status = notify_select_event((void *)sync, ref, event); + info = info->next; + put_select_sync(sync); } - - put_fd(descriptor); - return status; } status_t -deselect_fd(int fd, uint8 event, struct select_sync *sync, bool kernel) +select_fd(int fd, struct select_sync* sync, uint32 ref, bool kernel) { - struct file_descriptor *descriptor; - status_t status; + TRACE(("select_fd(fd = %d, selectsync = %p, ref = %lu, 0x%x)\n", fd, sync, ref, sync->set[ref].selected_events)); - TRACE(("deselect_fd(fd = %d, event = %u, selectsync = %p)\n", fd, event, sync)); + select_info* info = &sync->set[ref]; + if (info->selected_events == 0) + return B_OK; - descriptor = get_fd(get_current_io_context(kernel), fd); + io_context* context = get_current_io_context(kernel); + MutexLocker locker(context->io_mutex); + + struct file_descriptor* descriptor = get_fd_locked(context, fd); if (descriptor == NULL) return B_FILE_ERROR; - if (descriptor->ops->fd_deselect) - status = descriptor->ops->fd_deselect(descriptor, event, sync); - else - status = B_OK; + if (!descriptor->ops->fd_select) { + // if the I/O subsystem doesn't support select(), we will + // immediately notify the select call + locker.Unlock(); + put_fd(descriptor); + return notify_select_events(info, info->selected_events); + } + + // add the info to the IO context + info->next = context->select_infos[fd]; + context->select_infos[fd] = info; + + // as long as the info is in the list, we keep a reference to the sync + // object + atomic_add(&sync->ref_count, 1); + + locker.Unlock(); + + // select any events asked for + uint32 selectedEvents = 0; + + for (uint16 event = 1; event < 16; event++) { + if (info->selected_events & SELECT_FLAG(event) + && descriptor->ops->fd_select(descriptor, event, ref, + (selectsync*)info) == B_OK) { + selectedEvents |= SELECT_FLAG(event); + } + } + info->selected_events = selectedEvents; + + // if nothing has been selected, we deselect immediately + if (selectedEvents == 0) + deselect_fd(fd, sync, ref, kernel); put_fd(descriptor); - return status; + return B_OK; +} + + +status_t +deselect_fd(int fd, struct select_sync* sync, uint32 ref, bool kernel) +{ + TRACE(("deselect_fd(fd = %d, selectsync = %p, ref = %lu)\n", fd, sync, ref)); + + select_info* info = &sync->set[ref]; + if (info->selected_events == 0) + return B_OK; + + io_context* context = get_current_io_context(kernel); + MutexLocker locker(context->io_mutex); + + struct file_descriptor* descriptor = get_fd_locked(context, fd); + if (descriptor == NULL) + return B_FILE_ERROR; + + // remove the info from the IO context + + select_info** infoLocation = &context->select_infos[fd]; + while (*infoLocation != NULL && *infoLocation != info) + infoLocation = &(*infoLocation)->next; + + // If not found, someone else beat us to it. + if (*infoLocation != info) { + locker.Unlock(); + put_fd(descriptor); + return B_OK; + } + + *infoLocation = info->next; + + locker.Unlock(); + + // deselect the selected events + if (descriptor->ops->fd_deselect && info->selected_events) { + for (uint16 event = 1; event < 16; event++) { + if (info->selected_events & SELECT_FLAG(event)) { + descriptor->ops->fd_deselect(descriptor, event, + (selectsync*)info); + } + } + } + + put_select_sync(sync); + + put_fd(descriptor); + return B_OK; } @@ -541,7 +644,7 @@ _user_readv(int fd, off_t pos, const iovec *userVecs, size_t count) goto err1; } - vecs = malloc(sizeof(iovec) * count); + vecs = (iovec*)malloc(sizeof(iovec) * count); if (vecs == NULL) { status = B_NO_MEMORY; goto err1; @@ -654,7 +757,7 @@ _user_writev(int fd, off_t pos, const iovec *userVecs, size_t count) goto err1; } - vecs = malloc(sizeof(iovec) * count); + vecs = (iovec*)malloc(sizeof(iovec) * count); if (vecs == NULL) { status = B_NO_MEMORY; goto err1; diff --git a/src/system/kernel/fs/vfs.cpp b/src/system/kernel/fs/vfs.cpp index 5627c0c3a3..1a2dab74a9 100644 --- a/src/system/kernel/fs/vfs.cpp +++ b/src/system/kernel/fs/vfs.cpp @@ -202,9 +202,9 @@ static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type); static void file_free_fd(struct file_descriptor *); static status_t file_close(struct file_descriptor *); static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref, - struct select_sync *sync); + struct selectsync *sync); static status_t file_deselect(struct file_descriptor *, uint8 event, - struct select_sync *sync); + struct selectsync *sync); static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count); static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count); static status_t dir_rewind(struct file_descriptor *); @@ -3272,16 +3272,21 @@ vfs_new_io_context(void *_parentContext) tableSize = DEFAULT_FD_TABLE_SIZE; // allocate space for FDs and their close-on-exec flag - context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize + context->fds = (file_descriptor**)malloc( + sizeof(struct file_descriptor*) * tableSize + + sizeof(struct select_sync*) * tableSize + (tableSize + 7) / 8); if (context->fds == NULL) { free(context); return NULL; } - memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize + context->select_infos = (select_info**)(context->fds + tableSize); + context->fds_close_on_exec = (uint8 *)(context->select_infos + tableSize); + + memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize + + sizeof(struct select_sync*) * tableSize + (tableSize + 7) / 8); - context->fds_close_on_exec = (uint8 *)(context->fds + tableSize); if (mutex_init(&context->io_mutex, "I/O context") < 0) { free(context->fds); @@ -3360,69 +3365,62 @@ static status_t vfs_resize_fd_table(struct io_context *context, const int newSize) { struct file_descriptor **fds; - int status = B_OK; if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE) return EINVAL; - mutex_lock(&context->io_mutex); + MutexLocker(context->io_mutex); int oldSize = context->table_size; int oldCloseOnExitBitmapSize = (oldSize + 7) / 8; int newCloseOnExitBitmapSize = (newSize + 7) / 8; + // If the tables shrink, make sure none of the fds being dropped are in use. if (newSize < oldSize) { - // shrink the fd table - - // Make sure none of the fds being dropped are in use for (int i = oldSize; i-- > newSize;) { - if (context->fds[i]) { - status = EBUSY; - goto out; - } + if (context->fds[i]) + return EBUSY; } + } - fds = (struct file_descriptor **)malloc( - sizeof(struct file_descriptor *) * newSize - + newCloseOnExitBitmapSize); - if (fds == NULL) { - status = ENOMEM; - goto out; - } + // store pointers to the old tables + file_descriptor** oldFDs = context->fds; + select_info** oldSelectInfos = context->select_infos; + uint8* oldCloseOnExecTable = context->fds_close_on_exec; - memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize); + // allocate new tables + file_descriptor** newFDs = (file_descriptor**)malloc( + sizeof(struct file_descriptor*) * newSize + + sizeof(struct select_sync*) * newSize + + newCloseOnExitBitmapSize); + if (newFDs == NULL) + return ENOMEM; - // copy close-on-exit bitmap - memcpy(fds + newSize, context->fds + oldSize, newCloseOnExitBitmapSize); - } else { - // enlarge the fd table + context->fds = newFDs; + context->select_infos = (select_info**)(context->fds + newSize); + context->fds_close_on_exec = (uint8 *)(context->select_infos + newSize); + context->table_size = newSize; - fds = (struct file_descriptor **)malloc( - sizeof(struct file_descriptor *) * newSize - + newCloseOnExitBitmapSize); - if (fds == NULL) { - status = ENOMEM; - goto out; - } + // copy entries from old tables + int toCopy = min_c(oldSize, newSize); - // copy the fd array, and zero the additional slots - memcpy(fds, context->fds, sizeof(void *) * oldSize); - memset(fds + oldSize, 0, sizeof(void *) * (newSize - oldSize)); + memcpy(context->fds, oldFDs, sizeof(void*) * toCopy); + memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy); + memcpy(context->fds_close_on_exec, oldCloseOnExecTable, + min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize)); - // copy close-on-exit bitmap, and zero out additional bytes - memcpy(fds + newSize, context->fds + oldSize, oldCloseOnExitBitmapSize); - memset((uint8*)(fds + newSize) + oldCloseOnExitBitmapSize, 0, + // clear additional entries, if the tables grow + if (newSize > oldSize) { + memset(context->fds + oldSize, 0, sizeof(void *) * (newSize - oldSize)); + memset(context->select_infos + oldSize, 0, + sizeof(void *) * (newSize - oldSize)); + memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0, newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize); } - free(context->fds); - context->fds = fds; - context->fds_close_on_exec = (uint8 *)(context->fds + newSize); - context->table_size = newSize; + free(oldFDs); -out: - mutex_unlock(&context->io_mutex); - return status; + return B_OK; } @@ -3897,7 +3895,7 @@ file_seek(struct file_descriptor *descriptor, off_t pos, int seekType) static status_t file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref, - struct select_sync *sync) + struct selectsync *sync) { FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync)); @@ -3905,16 +3903,16 @@ file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref, // If the FS has no select() hook, notify select() now. if (FS_CALL(vnode, select) == NULL) - return notify_select_event((selectsync*)sync, ref, event); + return notify_select_event(sync, ref, event); return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node, - descriptor->cookie, event, ref, (selectsync*)sync); + descriptor->cookie, event, ref, sync); } static status_t file_deselect(struct file_descriptor *descriptor, uint8 event, - struct select_sync *sync) + struct selectsync *sync) { struct vnode *vnode = descriptor->u.vnode; @@ -3922,7 +3920,7 @@ file_deselect(struct file_descriptor *descriptor, uint8 event, return B_OK; return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node, - descriptor->cookie, event, (selectsync*)sync); + descriptor->cookie, event, sync); } diff --git a/src/system/kernel/fs/vfs_select.cpp b/src/system/kernel/fs/vfs_select.cpp index fd7113bc7a..b25db2fe2d 100644 --- a/src/system/kernel/fs/vfs_select.cpp +++ b/src/system/kernel/fs/vfs_select.cpp @@ -1,4 +1,5 @@ /* + * Copyright 2007, Ingo Weinhold, bonefish@cs.tu-berlin.de. All rights reserved. * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved. * Distributed under the terms of the MIT License. */ @@ -6,17 +7,22 @@ #include "vfs_select.h" -#include -#include -#include -#include - #include + #include +#include #include #include #include +#include + +#include +#include +#include +#include +#include + //#define TRACE_VFS_SELECT #ifdef TRACE_VFS_SELECT # define PRINT(x) dprintf x @@ -27,37 +33,7 @@ #endif -/*! Selects all events in the mask on the specified file descriptor */ -static int -select_events(struct select_sync *sync, int fd, int ref, uint16 selectedEvents, - bool kernel) -{ - uint32 count = 0; - uint16 event = 1; - - // select any events asked for - for (; event < 16; event++) { - if (selectedEvents & SELECT_FLAG(event) - && select_fd(fd, event, ref, sync, kernel) == B_OK) - count++; - } - return count; -} - - -/*! Deselects all events in the mask on the specified file descriptor */ -static void -deselect_events(struct select_sync *sync, int fd, uint16 selectedEvents, - bool kernel) -{ - uint16 event = 1; - - // deselect any events previously asked for - for (; event < 16; event++) { - if (selectedEvents & SELECT_FLAG(event)) - deselect_fd(fd, event, sync, kernel); - } -} +using std::nothrow; /*! @@ -74,18 +50,73 @@ fd_zero(fd_set *set, int numFDs) } +static status_t +create_select_sync(int numFDs, select_sync*& _sync) +{ + // create sync structure + select_sync* sync = new(nothrow) select_sync; + if (sync == NULL) + return B_NO_MEMORY; + ObjectDeleter syncDeleter(sync); + + // create info set + sync->set = new(nothrow) select_info[numFDs]; + if (sync->set == NULL) + return B_NO_MEMORY; + ArrayDeleter setDeleter(sync->set); + + // create select event semaphore + sync->sem = create_sem(0, "select"); + if (sync->sem < 0) + return sync->sem; + + // create lock + status_t error = benaphore_init(&sync->lock, "select sync"); + if (error != B_OK) { + delete_sem(sync->sem); + return error; + } + + sync->count = numFDs; + sync->ref_count = 1; + + for (int i = 0; i < numFDs; i++) { + sync->set[i].next = NULL; + sync->set[i].sync = sync; + } + + setDeleter.Detach(); + syncDeleter.Detach(); + _sync = sync; + + return B_OK; +} + + +void +put_select_sync(select_sync* sync) +{ + FUNCTION(("put_select_sync(%p): -> %ld\n", sync, sync->ref_count - 1)); + + if (atomic_add(&sync->ref_count, -1) == 1) { + delete_sem(sync->sem); + benaphore_destroy(&sync->lock); + delete[] sync->set; + delete sync; + } +} + + static int common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, bigtime_t timeout, const sigset_t *sigMask, bool kernel) { - struct select_sync sync; status_t status = B_OK; int fd; - FUNCTION(("common_select(%d, %p, %p, %p, %lld, %p, %d)\n", numFDs, readSet, - writeSet, errorSet, timeout, sigMask, kernel)); - - // TODO: set sigMask to make pselect() functional different from select() + FUNCTION(("[%ld] common_select(%d, %p, %p, %p, %lld, %p, %d)\n", + find_thread(NULL), numFDs, readSet, writeSet, errorSet, timeout, + sigMask, kernel)); // check if fds are valid before doing anything @@ -97,62 +128,54 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, return B_FILE_ERROR; } - // allocate resources - - sync.sem = create_sem(0, "select"); - if (sync.sem < B_OK) - return sync.sem; - - set_sem_owner(sync.sem, B_SYSTEM_TEAM); - - sync.set = (select_info *)malloc(sizeof(select_info) * numFDs); - if (sync.set == NULL) { - delete_sem(sync.sem); - return B_NO_MEMORY; - } - sync.count = numFDs; + // allocate sync object + select_sync* sync; + status = create_select_sync(numFDs, sync); + if (status != B_OK) + return status; // start selecting file descriptors + BenaphoreLocker locker(sync->lock); + for (fd = 0; fd < numFDs; fd++) { - sync.set[fd].selected_events = 0; - sync.set[fd].events = 0; + sync->set[fd].selected_events = 0; + sync->set[fd].events = 0; if (readSet && FD_ISSET(fd, readSet)) - sync.set[fd].selected_events = SELECT_FLAG(B_SELECT_READ); + sync->set[fd].selected_events = SELECT_FLAG(B_SELECT_READ); if (writeSet && FD_ISSET(fd, writeSet)) - sync.set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE); + sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE); if (errorSet && FD_ISSET(fd, errorSet)) - sync.set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR); + sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR); - select_events(&sync, fd, fd, sync.set[fd].selected_events, kernel); + select_fd(fd, sync, fd, kernel); // array position is the same as the fd for select() } - status = acquire_sem_etc(sync.sem, 1, + locker.Unlock(); + + // set new signal mask + sigset_t oldSigMask; + if (sigMask != NULL) + sigprocmask(SIG_SETMASK, sigMask, &oldSigMask); + + // wait for something to happen + status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); + // restore the old signal mask + if (sigMask != NULL) + sigprocmask(SIG_SETMASK, &oldSigMask, NULL); + PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status)); // deselect file descriptors - for (fd = 0; fd < numFDs; fd++) { - deselect_events(&sync, fd, sync.set[fd].selected_events, kernel); - // TODO: Since we're using FD indices instead of FDs (file_descriptors), - // it can happen that the FD index (even the FD) on which we invoked - // the select() FS hook is already closed at this point. - // This has two main consequences: - // 1) deselect() is not invoked, if a currently selected FD index is - // closed. Thus on close of a FD the FS would need to cleanup the - // select resources it had acquired. Harmless. - // 2) Caused by 1): If the FS close hook invokes notify_select_event() - // (which is a nice gesture, though actually undefined by the - // POSIX standard), it has no means of synchronization with the - // pending select() (since deselect() won't be invoked), i.e. a - // second call to notify_select_event() might be too late, since - // select() might already be finished. Dangerous! - // notify_select_event() would operate on already free()d memory! - } + locker.Lock(); + + for (fd = 0; fd < numFDs; fd++) + deselect_fd(fd, sync, fd, kernel); PRINT(("common_select(): events deselected\n")); @@ -163,8 +186,9 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, if (status == B_INTERRUPTED) { // We must not clear the sets in this case, as applications may // rely on the contents of them. - count = B_INTERRUPTED; - goto err; + locker.Unlock(); + put_select_sync(sync); + return B_INTERRUPTED; } // Clear sets to store the received events @@ -176,15 +200,17 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, if (status == B_OK) { for (count = 0, fd = 0;fd < numFDs; fd++) { - if (readSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_READ)) { + if (readSet && sync->set[fd].events & SELECT_FLAG(B_SELECT_READ)) { FD_SET(fd, readSet); count++; } - if (writeSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_WRITE)) { + if (writeSet + && sync->set[fd].events & SELECT_FLAG(B_SELECT_WRITE)) { FD_SET(fd, writeSet); count++; } - if (errorSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) { + if (errorSet + && sync->set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) { FD_SET(fd, errorSet); count++; } @@ -193,9 +219,8 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, // B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0 -err: - delete_sem(sync.sem); - free(sync.set); + locker.Unlock(); + put_select_sync(sync); return count; } @@ -208,24 +233,16 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel) int count = 0; uint32 i; - // allocate resources - - select_sync sync; - sync.sem = create_sem(0, "poll"); - if (sync.sem < B_OK) - return sync.sem; - - set_sem_owner(sync.sem, B_SYSTEM_TEAM); - - sync.set = (select_info*)malloc(numFDs * sizeof(select_info)); - if (sync.set == NULL) { - delete_sem(sync.sem); - return B_NO_MEMORY; - } - sync.count = numFDs; + // allocate sync object + select_sync* sync; + status = create_select_sync(numFDs, sync); + if (status != B_OK) + return status; // start polling file descriptors (by selecting them) + BenaphoreLocker locker(sync->lock); + for (i = 0; i < numFDs; i++) { int fd = fds[i].fd; @@ -238,10 +255,12 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel) // initialize events masks fds[i].events &= ~POLLNVAL; fds[i].revents = 0; - sync.set[i].selected_events = fds[i].events; - sync.set[i].events = 0; + sync->set[i].selected_events = fds[i].events; + sync->set[i].events = 0; - count += select_events(&sync, fd, i, fds[i].events, kernel); + select_fd(fd, sync, i, kernel); + if (sync->set[i].selected_events != 0) + count++; } if (count < 1) { @@ -249,15 +268,17 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel) goto err; } - status = acquire_sem_etc(sync.sem, 1, + locker.Unlock(); + + status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); // deselect file descriptors - for (i = 0; i < numFDs; i++) { - deselect_events(&sync, fds[i].fd, sync.set[i].selected_events, kernel); - // TODO: same comments apply as on common_select() - } + locker.Lock(); + + for (i = 0; i < numFDs; i++) + deselect_fd(fds[i].fd, sync, i, kernel); // collect the events that are happened in the meantime @@ -268,7 +289,7 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel) continue; // POLLxxx flags and B_SELECT_xxx flags are compatible - fds[i].revents = sync.set[i].events; + fds[i].revents = sync->set[i].events; if (fds[i].revents != 0) count++; } @@ -282,36 +303,45 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel) } err: - delete_sem(sync.sem); - free(sync.set); + locker.Unlock(); + put_select_sync(sync); return count; } +// #pragma mark - VFS private + + +status_t +notify_select_events(select_info* info, uint16 events) +{ + FUNCTION(("notify_select_events(%p (%p), 0x%x)\n", info, info->sync, + events)); + + if (info == NULL + || info->sync == NULL + || info->sync->sem < B_OK) + return B_BAD_VALUE; + + info->events |= events; + + // only wake up the waiting select()/poll() call if the events + // match one of the selected ones + if (info->selected_events & events) + return release_sem(info->sync->sem); + + return B_OK; +} + + // #pragma mark - public kernel API status_t -notify_select_event(struct selectsync *_sync, uint32 ref, uint8 event) +notify_select_event(struct selectsync *sync, uint32 /*ref*/, uint8 event) { - select_sync *sync = (select_sync *)_sync; - - FUNCTION(("notify_select_event(%p, %lu, %u)\n", _sync, ref, event)); - - if (sync == NULL - || sync->sem < B_OK - || ref > sync->count) - return B_BAD_VALUE; - - sync->set[ref].events |= SELECT_FLAG(event); - - // only wake up the waiting select()/poll() call if the event - // match the ones selected - if (sync->set[ref].selected_events & SELECT_FLAG(event)) - return release_sem(sync->sem); - - return B_OK; + return notify_select_events((select_info*)sync, SELECT_FLAG(event)); } diff --git a/src/system/kernel/fs/vfs_select.h b/src/system/kernel/fs/vfs_select.h index 50d41da54d..407eaa5298 100644 --- a/src/system/kernel/fs/vfs_select.h +++ b/src/system/kernel/fs/vfs_select.h @@ -6,21 +6,29 @@ #define VFS_SELECT_H +#include + #include #include -#include +#include +struct select_sync; + typedef struct select_info { - uint16 selected_events; - uint16 events; + select_info* next; // next in the IO context's list + select_sync* sync; + uint16 selected_events; + uint16 events; } select_info; typedef struct select_sync { - sem_id sem; - uint32 count; - select_info *set; + vint32 ref_count; + benaphore lock; + sem_id sem; + uint32 count; + select_info* set; } select_sync; #define SELECT_FLAG(type) (1L << (type - 1)) @@ -38,4 +46,8 @@ struct select_sync_pool { SelectSyncPoolEntryList entries; }; +void put_select_sync(select_sync* sync); + +status_t notify_select_events(select_info* info, uint16 events); + #endif /* VFS_SELECT_H */