* fd.c -> fd.cpp

* Reworked the select support:
  - The io_context additionally stores a table of lists of select_infos,
    which enables it to deselect events of a pending select() when
    closing a FD. This prevents a race condition potentially causing a
    write to stale memory.
  - The opaque selectsync* passed to FSs is now actually a select_info*.
    This was necessary, since the FDs deselect() hook (unlike the
    select() hook) doesn't take a "ref" argument and deselecting a
    single info (e.g. caused by a premature close()) was not possible.
    The select() hook's "ref" argument has become superfluous.
  - It should now be relatively easy to implement a poll_on_steroids()
    that can also wait for objects other than FDs (e.g. semaphores,
    ports, threads etc.). 
* Set/reset the signal mask in common_select(). This makes pselect()
  work as required.
* Reorganized vfs_resize_fd_table().



git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@22391 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Ingo Weinhold 2007-10-01 01:37:28 +00:00
parent f5ede4ec02
commit 66c03dc3a9
7 changed files with 394 additions and 246 deletions

View File

@ -16,6 +16,7 @@ extern "C" {
#endif #endif
struct file_descriptor; struct file_descriptor;
struct selectsync;
struct select_sync; struct select_sync;
struct fd_ops { struct fd_ops {
@ -23,8 +24,10 @@ struct fd_ops {
status_t (*fd_write)(struct file_descriptor *, off_t pos, const void *buffer, size_t *length); status_t (*fd_write)(struct file_descriptor *, off_t pos, const void *buffer, size_t *length);
off_t (*fd_seek)(struct file_descriptor *, off_t pos, int seekType); off_t (*fd_seek)(struct file_descriptor *, off_t pos, int seekType);
status_t (*fd_ioctl)(struct file_descriptor *, ulong op, void *buffer, size_t length); status_t (*fd_ioctl)(struct file_descriptor *, ulong op, void *buffer, size_t length);
status_t (*fd_select)(struct file_descriptor *, uint8 event, uint32 ref, struct select_sync *sync); status_t (*fd_select)(struct file_descriptor *, uint8 event, uint32 ref,
status_t (*fd_deselect)(struct file_descriptor *, uint8 event, struct select_sync *sync); struct selectsync *sync);
status_t (*fd_deselect)(struct file_descriptor *, uint8 event,
struct selectsync *sync);
status_t (*fd_read_dir)(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count); status_t (*fd_read_dir)(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
status_t (*fd_rewind_dir)(struct file_descriptor *); status_t (*fd_rewind_dir)(struct file_descriptor *);
status_t (*fd_read_stat)(struct file_descriptor *, struct stat *); status_t (*fd_read_stat)(struct file_descriptor *, struct stat *);
@ -74,9 +77,9 @@ extern void close_fd(struct file_descriptor *descriptor);
extern void put_fd(struct file_descriptor *descriptor); extern void put_fd(struct file_descriptor *descriptor);
extern void disconnect_fd(struct file_descriptor *descriptor); extern void disconnect_fd(struct file_descriptor *descriptor);
extern void inc_fd_ref_count(struct file_descriptor *descriptor); extern void inc_fd_ref_count(struct file_descriptor *descriptor);
extern status_t select_fd(int fd, uint8 event, uint32 ref, extern status_t select_fd(int fd, struct select_sync *sync, uint32 ref,
struct select_sync *sync, bool kernel); bool kernel);
extern status_t deselect_fd(int fd, uint8 event, struct select_sync *sync, extern status_t deselect_fd(int fd, struct select_sync *sync, uint32 ref,
bool kernel); bool kernel);
extern bool fd_is_valid(int fd, bool kernel); extern bool fd_is_valid(int fd, bool kernel);
extern struct vnode *fd_vnode(struct file_descriptor *descriptor); extern struct vnode *fd_vnode(struct file_descriptor *descriptor);

View File

@ -30,6 +30,7 @@ struct kernel_args;
struct vm_cache; struct vm_cache;
struct file_descriptor; struct file_descriptor;
struct selectsync; struct selectsync;
struct select_info;
struct pollfd; struct pollfd;
struct vnode; struct vnode;
@ -41,6 +42,7 @@ typedef struct io_context {
uint32 table_size; uint32 table_size;
uint32 num_used_fds; uint32 num_used_fds;
struct file_descriptor **fds; struct file_descriptor **fds;
struct select_info **select_infos;
uint8 *fds_close_on_exec; uint8 *fds_close_on_exec;
struct list node_monitors; struct list node_monitors;
uint32 num_monitors; uint32 num_monitors;

View File

@ -9,7 +9,7 @@ KernelMergeObject kernel_fs.o :
devfs.cpp devfs.cpp
rootfs.c rootfs.c
pipefs.cpp pipefs.cpp
fd.c fd.cpp
vfs.cpp vfs.cpp
vfs_boot.cpp vfs_boot.cpp
vfs_net_boot.cpp vfs_net_boot.cpp
@ -19,4 +19,4 @@ KernelMergeObject kernel_fs.o :
KPath.cpp KPath.cpp
: $(TARGET_KERNEL_PIC_CCFLAGS) -Wno-unused : $(TARGET_KERNEL_PIC_CCFLAGS) -Wno-unused
; ;

View File

@ -5,14 +5,18 @@
*/ */
#include <fd.h>
#include <stdlib.h>
#include <string.h>
#include <OS.h> #include <OS.h>
#include <fd.h>
#include <vfs.h>
#include <syscalls.h> #include <syscalls.h>
#include <util/AutoLock.h>
#include <vfs.h>
#include <malloc.h> #include "vfs_select.h"
#include <string.h>
//#define TRACE_FD //#define TRACE_FD
@ -23,6 +27,10 @@
#endif #endif
static void deselect_select_infos(file_descriptor* descriptor,
select_info* infos);
/*** General fd routines ***/ /*** General fd routines ***/
@ -44,9 +52,8 @@ dump_fd(int fd,struct file_descriptor *descriptor)
struct file_descriptor * struct file_descriptor *
alloc_fd(void) alloc_fd(void)
{ {
struct file_descriptor *descriptor; file_descriptor *descriptor
= (file_descriptor*)malloc(sizeof(struct file_descriptor));
descriptor = malloc(sizeof(struct file_descriptor));
if (descriptor == NULL) if (descriptor == NULL)
return NULL; return NULL;
@ -199,18 +206,13 @@ inc_fd_ref_count(struct file_descriptor *descriptor)
} }
struct file_descriptor * static struct file_descriptor *
get_fd(struct io_context *context, int fd) get_fd_locked(struct io_context *context, int fd)
{ {
struct file_descriptor *descriptor = NULL; if (fd < 0 || (uint32)fd >= context->table_size)
if (fd < 0)
return NULL; return NULL;
mutex_lock(&context->io_mutex); struct file_descriptor *descriptor = context->fds[fd];
if ((uint32)fd < context->table_size)
descriptor = context->fds[fd];
if (descriptor != NULL) { if (descriptor != NULL) {
// Disconnected descriptors cannot be accessed anymore // Disconnected descriptors cannot be accessed anymore
@ -220,12 +222,19 @@ get_fd(struct io_context *context, int fd)
inc_fd_ref_count(descriptor); inc_fd_ref_count(descriptor);
} }
mutex_unlock(&context->io_mutex);
return descriptor; return descriptor;
} }
struct file_descriptor *
get_fd(struct io_context *context, int fd)
{
MutexLocker(context->io_mutex);
return get_fd_locked(context, fd);
}
/** Removes the file descriptor from the specified slot. /** Removes the file descriptor from the specified slot.
*/ */
@ -242,19 +251,27 @@ remove_fd(struct io_context *context, int fd)
if ((uint32)fd < context->table_size) if ((uint32)fd < context->table_size)
descriptor = context->fds[fd]; descriptor = context->fds[fd];
select_info* selectInfos = NULL;
bool disconnected = false;
if (descriptor) { if (descriptor) {
// fd is valid // fd is valid
context->fds[fd] = NULL; context->fds[fd] = NULL;
fd_set_close_on_exec(context, fd, false); fd_set_close_on_exec(context, fd, false);
context->num_used_fds--; context->num_used_fds--;
if (descriptor->open_mode & O_DISCONNECTED) selectInfos = context->select_infos[fd];
descriptor = NULL; context->select_infos[fd] = NULL;
disconnected = (descriptor->open_mode & O_DISCONNECTED);
} }
mutex_unlock(&context->io_mutex); mutex_unlock(&context->io_mutex);
return descriptor; if (selectInfos != NULL)
deselect_select_infos(descriptor, selectInfos);
return disconnected ? NULL : descriptor;
} }
@ -292,7 +309,6 @@ dup_fd(int fd, bool kernel)
* *
* We do dup2() directly to be thread-safe. * We do dup2() directly to be thread-safe.
*/ */
static int static int
dup2_fd(int oldfd, int newfd, bool kernel) dup2_fd(int oldfd, int newfd, bool kernel)
{ {
@ -321,9 +337,12 @@ dup2_fd(int oldfd, int newfd, bool kernel)
// Check for identity, note that it cannot be made above // Check for identity, note that it cannot be made above
// because we always want to return an error on invalid // because we always want to return an error on invalid
// handles // handles
select_info* selectInfos = NULL;
if (oldfd != newfd) { if (oldfd != newfd) {
// Now do the work // Now do the work
evicted = context->fds[newfd]; evicted = context->fds[newfd];
selectInfos = context->select_infos[newfd];
context->select_infos[newfd] = NULL;
atomic_add(&context->fds[oldfd]->ref_count, 1); atomic_add(&context->fds[oldfd]->ref_count, 1);
atomic_add(&context->fds[oldfd]->open_count, 1); atomic_add(&context->fds[oldfd]->open_count, 1);
context->fds[newfd] = context->fds[oldfd]; context->fds[newfd] = context->fds[oldfd];
@ -338,6 +357,7 @@ dup2_fd(int oldfd, int newfd, bool kernel)
// Say bye bye to the evicted fd // Say bye bye to the evicted fd
if (evicted) { if (evicted) {
deselect_select_infos(evicted, selectInfos);
close_fd(evicted); close_fd(evicted);
put_fd(evicted); put_fd(evicted);
} }
@ -366,50 +386,133 @@ fd_ioctl(bool kernelFD, int fd, ulong op, void *buffer, size_t length)
} }
status_t static void
select_fd(int fd, uint8 event, uint32 ref, struct select_sync *sync, bool kernel) deselect_select_infos(file_descriptor* descriptor, select_info* infos)
{ {
struct file_descriptor *descriptor; TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
status_t status;
TRACE(("select_fd(fd = %d, event = %u, ref = %lu, selectsync = %p)\n", fd, event, ref, sync)); select_info* info = infos;
while (info != NULL) {
select_sync* sync = info->sync;
descriptor = get_fd(get_current_io_context(kernel), fd); // deselect the selected events
if (descriptor == NULL) if (descriptor->ops->fd_deselect && info->selected_events) {
return B_FILE_ERROR; for (uint16 event = 1; event < 16; event++) {
if (info->selected_events & SELECT_FLAG(event)) {
descriptor->ops->fd_deselect(descriptor, event,
(selectsync*)info);
}
}
}
if (descriptor->ops->fd_select) { info = info->next;
status = descriptor->ops->fd_select(descriptor, event, ref, sync); put_select_sync(sync);
} else {
// if the I/O subsystem doesn't support select(), we will
// immediately notify the select call
status = notify_select_event((void *)sync, ref, event);
} }
put_fd(descriptor);
return status;
} }
status_t status_t
deselect_fd(int fd, uint8 event, struct select_sync *sync, bool kernel) select_fd(int fd, struct select_sync* sync, uint32 ref, bool kernel)
{ {
struct file_descriptor *descriptor; TRACE(("select_fd(fd = %d, selectsync = %p, ref = %lu, 0x%x)\n", fd, sync, ref, sync->set[ref].selected_events));
status_t status;
TRACE(("deselect_fd(fd = %d, event = %u, selectsync = %p)\n", fd, event, sync)); select_info* info = &sync->set[ref];
if (info->selected_events == 0)
return B_OK;
descriptor = get_fd(get_current_io_context(kernel), fd); io_context* context = get_current_io_context(kernel);
MutexLocker locker(context->io_mutex);
struct file_descriptor* descriptor = get_fd_locked(context, fd);
if (descriptor == NULL) if (descriptor == NULL)
return B_FILE_ERROR; return B_FILE_ERROR;
if (descriptor->ops->fd_deselect) if (!descriptor->ops->fd_select) {
status = descriptor->ops->fd_deselect(descriptor, event, sync); // if the I/O subsystem doesn't support select(), we will
else // immediately notify the select call
status = B_OK; locker.Unlock();
put_fd(descriptor);
return notify_select_events(info, info->selected_events);
}
// add the info to the IO context
info->next = context->select_infos[fd];
context->select_infos[fd] = info;
// as long as the info is in the list, we keep a reference to the sync
// object
atomic_add(&sync->ref_count, 1);
locker.Unlock();
// select any events asked for
uint32 selectedEvents = 0;
for (uint16 event = 1; event < 16; event++) {
if (info->selected_events & SELECT_FLAG(event)
&& descriptor->ops->fd_select(descriptor, event, ref,
(selectsync*)info) == B_OK) {
selectedEvents |= SELECT_FLAG(event);
}
}
info->selected_events = selectedEvents;
// if nothing has been selected, we deselect immediately
if (selectedEvents == 0)
deselect_fd(fd, sync, ref, kernel);
put_fd(descriptor); put_fd(descriptor);
return status; return B_OK;
}
status_t
deselect_fd(int fd, struct select_sync* sync, uint32 ref, bool kernel)
{
TRACE(("deselect_fd(fd = %d, selectsync = %p, ref = %lu)\n", fd, sync, ref));
select_info* info = &sync->set[ref];
if (info->selected_events == 0)
return B_OK;
io_context* context = get_current_io_context(kernel);
MutexLocker locker(context->io_mutex);
struct file_descriptor* descriptor = get_fd_locked(context, fd);
if (descriptor == NULL)
return B_FILE_ERROR;
// remove the info from the IO context
select_info** infoLocation = &context->select_infos[fd];
while (*infoLocation != NULL && *infoLocation != info)
infoLocation = &(*infoLocation)->next;
// If not found, someone else beat us to it.
if (*infoLocation != info) {
locker.Unlock();
put_fd(descriptor);
return B_OK;
}
*infoLocation = info->next;
locker.Unlock();
// deselect the selected events
if (descriptor->ops->fd_deselect && info->selected_events) {
for (uint16 event = 1; event < 16; event++) {
if (info->selected_events & SELECT_FLAG(event)) {
descriptor->ops->fd_deselect(descriptor, event,
(selectsync*)info);
}
}
}
put_select_sync(sync);
put_fd(descriptor);
return B_OK;
} }
@ -541,7 +644,7 @@ _user_readv(int fd, off_t pos, const iovec *userVecs, size_t count)
goto err1; goto err1;
} }
vecs = malloc(sizeof(iovec) * count); vecs = (iovec*)malloc(sizeof(iovec) * count);
if (vecs == NULL) { if (vecs == NULL) {
status = B_NO_MEMORY; status = B_NO_MEMORY;
goto err1; goto err1;
@ -654,7 +757,7 @@ _user_writev(int fd, off_t pos, const iovec *userVecs, size_t count)
goto err1; goto err1;
} }
vecs = malloc(sizeof(iovec) * count); vecs = (iovec*)malloc(sizeof(iovec) * count);
if (vecs == NULL) { if (vecs == NULL) {
status = B_NO_MEMORY; status = B_NO_MEMORY;
goto err1; goto err1;

View File

@ -202,9 +202,9 @@ static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
static void file_free_fd(struct file_descriptor *); static void file_free_fd(struct file_descriptor *);
static status_t file_close(struct file_descriptor *); static status_t file_close(struct file_descriptor *);
static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref, static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref,
struct select_sync *sync); struct selectsync *sync);
static status_t file_deselect(struct file_descriptor *, uint8 event, static status_t file_deselect(struct file_descriptor *, uint8 event,
struct select_sync *sync); struct selectsync *sync);
static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count); static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count); static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
static status_t dir_rewind(struct file_descriptor *); static status_t dir_rewind(struct file_descriptor *);
@ -3272,16 +3272,21 @@ vfs_new_io_context(void *_parentContext)
tableSize = DEFAULT_FD_TABLE_SIZE; tableSize = DEFAULT_FD_TABLE_SIZE;
// allocate space for FDs and their close-on-exec flag // allocate space for FDs and their close-on-exec flag
context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize context->fds = (file_descriptor**)malloc(
sizeof(struct file_descriptor*) * tableSize
+ sizeof(struct select_sync*) * tableSize
+ (tableSize + 7) / 8); + (tableSize + 7) / 8);
if (context->fds == NULL) { if (context->fds == NULL) {
free(context); free(context);
return NULL; return NULL;
} }
memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize context->select_infos = (select_info**)(context->fds + tableSize);
context->fds_close_on_exec = (uint8 *)(context->select_infos + tableSize);
memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
+ sizeof(struct select_sync*) * tableSize
+ (tableSize + 7) / 8); + (tableSize + 7) / 8);
context->fds_close_on_exec = (uint8 *)(context->fds + tableSize);
if (mutex_init(&context->io_mutex, "I/O context") < 0) { if (mutex_init(&context->io_mutex, "I/O context") < 0) {
free(context->fds); free(context->fds);
@ -3360,69 +3365,62 @@ static status_t
vfs_resize_fd_table(struct io_context *context, const int newSize) vfs_resize_fd_table(struct io_context *context, const int newSize)
{ {
struct file_descriptor **fds; struct file_descriptor **fds;
int status = B_OK;
if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE) if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
return EINVAL; return EINVAL;
mutex_lock(&context->io_mutex); MutexLocker(context->io_mutex);
int oldSize = context->table_size; int oldSize = context->table_size;
int oldCloseOnExitBitmapSize = (oldSize + 7) / 8; int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
int newCloseOnExitBitmapSize = (newSize + 7) / 8; int newCloseOnExitBitmapSize = (newSize + 7) / 8;
// If the tables shrink, make sure none of the fds being dropped are in use.
if (newSize < oldSize) { if (newSize < oldSize) {
// shrink the fd table
// Make sure none of the fds being dropped are in use
for (int i = oldSize; i-- > newSize;) { for (int i = oldSize; i-- > newSize;) {
if (context->fds[i]) { if (context->fds[i])
status = EBUSY; return EBUSY;
goto out;
}
} }
}
fds = (struct file_descriptor **)malloc( // store pointers to the old tables
sizeof(struct file_descriptor *) * newSize file_descriptor** oldFDs = context->fds;
+ newCloseOnExitBitmapSize); select_info** oldSelectInfos = context->select_infos;
if (fds == NULL) { uint8* oldCloseOnExecTable = context->fds_close_on_exec;
status = ENOMEM;
goto out;
}
memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize); // allocate new tables
file_descriptor** newFDs = (file_descriptor**)malloc(
sizeof(struct file_descriptor*) * newSize
+ sizeof(struct select_sync*) * newSize
+ newCloseOnExitBitmapSize);
if (newFDs == NULL)
return ENOMEM;
// copy close-on-exit bitmap context->fds = newFDs;
memcpy(fds + newSize, context->fds + oldSize, newCloseOnExitBitmapSize); context->select_infos = (select_info**)(context->fds + newSize);
} else { context->fds_close_on_exec = (uint8 *)(context->select_infos + newSize);
// enlarge the fd table context->table_size = newSize;
fds = (struct file_descriptor **)malloc( // copy entries from old tables
sizeof(struct file_descriptor *) * newSize int toCopy = min_c(oldSize, newSize);
+ newCloseOnExitBitmapSize);
if (fds == NULL) {
status = ENOMEM;
goto out;
}
// copy the fd array, and zero the additional slots memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
memcpy(fds, context->fds, sizeof(void *) * oldSize); memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
memset(fds + oldSize, 0, sizeof(void *) * (newSize - oldSize)); memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
// copy close-on-exit bitmap, and zero out additional bytes // clear additional entries, if the tables grow
memcpy(fds + newSize, context->fds + oldSize, oldCloseOnExitBitmapSize); if (newSize > oldSize) {
memset((uint8*)(fds + newSize) + oldCloseOnExitBitmapSize, 0, memset(context->fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
memset(context->select_infos + oldSize, 0,
sizeof(void *) * (newSize - oldSize));
memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize); newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
} }
free(context->fds); free(oldFDs);
context->fds = fds;
context->fds_close_on_exec = (uint8 *)(context->fds + newSize);
context->table_size = newSize;
out: return B_OK;
mutex_unlock(&context->io_mutex);
return status;
} }
@ -3897,7 +3895,7 @@ file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
static status_t static status_t
file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref, file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
struct select_sync *sync) struct selectsync *sync)
{ {
FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync)); FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync));
@ -3905,16 +3903,16 @@ file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
// If the FS has no select() hook, notify select() now. // If the FS has no select() hook, notify select() now.
if (FS_CALL(vnode, select) == NULL) if (FS_CALL(vnode, select) == NULL)
return notify_select_event((selectsync*)sync, ref, event); return notify_select_event(sync, ref, event);
return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node, return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
descriptor->cookie, event, ref, (selectsync*)sync); descriptor->cookie, event, ref, sync);
} }
static status_t static status_t
file_deselect(struct file_descriptor *descriptor, uint8 event, file_deselect(struct file_descriptor *descriptor, uint8 event,
struct select_sync *sync) struct selectsync *sync)
{ {
struct vnode *vnode = descriptor->u.vnode; struct vnode *vnode = descriptor->u.vnode;
@ -3922,7 +3920,7 @@ file_deselect(struct file_descriptor *descriptor, uint8 event,
return B_OK; return B_OK;
return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node, return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
descriptor->cookie, event, (selectsync*)sync); descriptor->cookie, event, sync);
} }

View File

@ -1,4 +1,5 @@
/* /*
* Copyright 2007, Ingo Weinhold, bonefish@cs.tu-berlin.de. All rights reserved.
* Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved. * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
* Distributed under the terms of the MIT License. * Distributed under the terms of the MIT License.
*/ */
@ -6,17 +7,22 @@
#include "vfs_select.h" #include "vfs_select.h"
#include <vfs.h>
#include <fd.h>
#include <syscalls.h>
#include <fs/select_sync_pool.h>
#include <new> #include <new>
#include <poll.h> #include <poll.h>
#include <signal.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/select.h> #include <sys/select.h>
#include <AutoDeleter.h>
#include <fd.h>
#include <fs/select_sync_pool.h>
#include <syscalls.h>
#include <util/AutoLock.h>
#include <vfs.h>
//#define TRACE_VFS_SELECT //#define TRACE_VFS_SELECT
#ifdef TRACE_VFS_SELECT #ifdef TRACE_VFS_SELECT
# define PRINT(x) dprintf x # define PRINT(x) dprintf x
@ -27,37 +33,7 @@
#endif #endif
/*! Selects all events in the mask on the specified file descriptor */ using std::nothrow;
static int
select_events(struct select_sync *sync, int fd, int ref, uint16 selectedEvents,
bool kernel)
{
uint32 count = 0;
uint16 event = 1;
// select any events asked for
for (; event < 16; event++) {
if (selectedEvents & SELECT_FLAG(event)
&& select_fd(fd, event, ref, sync, kernel) == B_OK)
count++;
}
return count;
}
/*! Deselects all events in the mask on the specified file descriptor */
static void
deselect_events(struct select_sync *sync, int fd, uint16 selectedEvents,
bool kernel)
{
uint16 event = 1;
// deselect any events previously asked for
for (; event < 16; event++) {
if (selectedEvents & SELECT_FLAG(event))
deselect_fd(fd, event, sync, kernel);
}
}
/*! /*!
@ -74,18 +50,73 @@ fd_zero(fd_set *set, int numFDs)
} }
static status_t
create_select_sync(int numFDs, select_sync*& _sync)
{
// create sync structure
select_sync* sync = new(nothrow) select_sync;
if (sync == NULL)
return B_NO_MEMORY;
ObjectDeleter<select_sync> syncDeleter(sync);
// create info set
sync->set = new(nothrow) select_info[numFDs];
if (sync->set == NULL)
return B_NO_MEMORY;
ArrayDeleter<select_info> setDeleter(sync->set);
// create select event semaphore
sync->sem = create_sem(0, "select");
if (sync->sem < 0)
return sync->sem;
// create lock
status_t error = benaphore_init(&sync->lock, "select sync");
if (error != B_OK) {
delete_sem(sync->sem);
return error;
}
sync->count = numFDs;
sync->ref_count = 1;
for (int i = 0; i < numFDs; i++) {
sync->set[i].next = NULL;
sync->set[i].sync = sync;
}
setDeleter.Detach();
syncDeleter.Detach();
_sync = sync;
return B_OK;
}
void
put_select_sync(select_sync* sync)
{
FUNCTION(("put_select_sync(%p): -> %ld\n", sync, sync->ref_count - 1));
if (atomic_add(&sync->ref_count, -1) == 1) {
delete_sem(sync->sem);
benaphore_destroy(&sync->lock);
delete[] sync->set;
delete sync;
}
}
static int static int
common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
bigtime_t timeout, const sigset_t *sigMask, bool kernel) bigtime_t timeout, const sigset_t *sigMask, bool kernel)
{ {
struct select_sync sync;
status_t status = B_OK; status_t status = B_OK;
int fd; int fd;
FUNCTION(("common_select(%d, %p, %p, %p, %lld, %p, %d)\n", numFDs, readSet, FUNCTION(("[%ld] common_select(%d, %p, %p, %p, %lld, %p, %d)\n",
writeSet, errorSet, timeout, sigMask, kernel)); find_thread(NULL), numFDs, readSet, writeSet, errorSet, timeout,
sigMask, kernel));
// TODO: set sigMask to make pselect() functional different from select()
// check if fds are valid before doing anything // check if fds are valid before doing anything
@ -97,62 +128,54 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
return B_FILE_ERROR; return B_FILE_ERROR;
} }
// allocate resources // allocate sync object
select_sync* sync;
sync.sem = create_sem(0, "select"); status = create_select_sync(numFDs, sync);
if (sync.sem < B_OK) if (status != B_OK)
return sync.sem; return status;
set_sem_owner(sync.sem, B_SYSTEM_TEAM);
sync.set = (select_info *)malloc(sizeof(select_info) * numFDs);
if (sync.set == NULL) {
delete_sem(sync.sem);
return B_NO_MEMORY;
}
sync.count = numFDs;
// start selecting file descriptors // start selecting file descriptors
BenaphoreLocker locker(sync->lock);
for (fd = 0; fd < numFDs; fd++) { for (fd = 0; fd < numFDs; fd++) {
sync.set[fd].selected_events = 0; sync->set[fd].selected_events = 0;
sync.set[fd].events = 0; sync->set[fd].events = 0;
if (readSet && FD_ISSET(fd, readSet)) if (readSet && FD_ISSET(fd, readSet))
sync.set[fd].selected_events = SELECT_FLAG(B_SELECT_READ); sync->set[fd].selected_events = SELECT_FLAG(B_SELECT_READ);
if (writeSet && FD_ISSET(fd, writeSet)) if (writeSet && FD_ISSET(fd, writeSet))
sync.set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE); sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE);
if (errorSet && FD_ISSET(fd, errorSet)) if (errorSet && FD_ISSET(fd, errorSet))
sync.set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR); sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR);
select_events(&sync, fd, fd, sync.set[fd].selected_events, kernel); select_fd(fd, sync, fd, kernel);
// array position is the same as the fd for select() // array position is the same as the fd for select()
} }
status = acquire_sem_etc(sync.sem, 1, locker.Unlock();
// set new signal mask
sigset_t oldSigMask;
if (sigMask != NULL)
sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
// wait for something to happen
status = acquire_sem_etc(sync->sem, 1,
B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout);
// restore the old signal mask
if (sigMask != NULL)
sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status)); PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status));
// deselect file descriptors // deselect file descriptors
for (fd = 0; fd < numFDs; fd++) { locker.Lock();
deselect_events(&sync, fd, sync.set[fd].selected_events, kernel);
// TODO: Since we're using FD indices instead of FDs (file_descriptors), for (fd = 0; fd < numFDs; fd++)
// it can happen that the FD index (even the FD) on which we invoked deselect_fd(fd, sync, fd, kernel);
// the select() FS hook is already closed at this point.
// This has two main consequences:
// 1) deselect() is not invoked, if a currently selected FD index is
// closed. Thus on close of a FD the FS would need to cleanup the
// select resources it had acquired. Harmless.
// 2) Caused by 1): If the FS close hook invokes notify_select_event()
// (which is a nice gesture, though actually undefined by the
// POSIX standard), it has no means of synchronization with the
// pending select() (since deselect() won't be invoked), i.e. a
// second call to notify_select_event() might be too late, since
// select() might already be finished. Dangerous!
// notify_select_event() would operate on already free()d memory!
}
PRINT(("common_select(): events deselected\n")); PRINT(("common_select(): events deselected\n"));
@ -163,8 +186,9 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
if (status == B_INTERRUPTED) { if (status == B_INTERRUPTED) {
// We must not clear the sets in this case, as applications may // We must not clear the sets in this case, as applications may
// rely on the contents of them. // rely on the contents of them.
count = B_INTERRUPTED; locker.Unlock();
goto err; put_select_sync(sync);
return B_INTERRUPTED;
} }
// Clear sets to store the received events // Clear sets to store the received events
@ -176,15 +200,17 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
if (status == B_OK) { if (status == B_OK) {
for (count = 0, fd = 0;fd < numFDs; fd++) { for (count = 0, fd = 0;fd < numFDs; fd++) {
if (readSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_READ)) { if (readSet && sync->set[fd].events & SELECT_FLAG(B_SELECT_READ)) {
FD_SET(fd, readSet); FD_SET(fd, readSet);
count++; count++;
} }
if (writeSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_WRITE)) { if (writeSet
&& sync->set[fd].events & SELECT_FLAG(B_SELECT_WRITE)) {
FD_SET(fd, writeSet); FD_SET(fd, writeSet);
count++; count++;
} }
if (errorSet && sync.set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) { if (errorSet
&& sync->set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) {
FD_SET(fd, errorSet); FD_SET(fd, errorSet);
count++; count++;
} }
@ -193,9 +219,8 @@ common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
// B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0 // B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0
err: locker.Unlock();
delete_sem(sync.sem); put_select_sync(sync);
free(sync.set);
return count; return count;
} }
@ -208,24 +233,16 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel)
int count = 0; int count = 0;
uint32 i; uint32 i;
// allocate resources // allocate sync object
select_sync* sync;
select_sync sync; status = create_select_sync(numFDs, sync);
sync.sem = create_sem(0, "poll"); if (status != B_OK)
if (sync.sem < B_OK) return status;
return sync.sem;
set_sem_owner(sync.sem, B_SYSTEM_TEAM);
sync.set = (select_info*)malloc(numFDs * sizeof(select_info));
if (sync.set == NULL) {
delete_sem(sync.sem);
return B_NO_MEMORY;
}
sync.count = numFDs;
// start polling file descriptors (by selecting them) // start polling file descriptors (by selecting them)
BenaphoreLocker locker(sync->lock);
for (i = 0; i < numFDs; i++) { for (i = 0; i < numFDs; i++) {
int fd = fds[i].fd; int fd = fds[i].fd;
@ -238,10 +255,12 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel)
// initialize events masks // initialize events masks
fds[i].events &= ~POLLNVAL; fds[i].events &= ~POLLNVAL;
fds[i].revents = 0; fds[i].revents = 0;
sync.set[i].selected_events = fds[i].events; sync->set[i].selected_events = fds[i].events;
sync.set[i].events = 0; sync->set[i].events = 0;
count += select_events(&sync, fd, i, fds[i].events, kernel); select_fd(fd, sync, i, kernel);
if (sync->set[i].selected_events != 0)
count++;
} }
if (count < 1) { if (count < 1) {
@ -249,15 +268,17 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel)
goto err; goto err;
} }
status = acquire_sem_etc(sync.sem, 1, locker.Unlock();
status = acquire_sem_etc(sync->sem, 1,
B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout);
// deselect file descriptors // deselect file descriptors
for (i = 0; i < numFDs; i++) { locker.Lock();
deselect_events(&sync, fds[i].fd, sync.set[i].selected_events, kernel);
// TODO: same comments apply as on common_select() for (i = 0; i < numFDs; i++)
} deselect_fd(fds[i].fd, sync, i, kernel);
// collect the events that are happened in the meantime // collect the events that are happened in the meantime
@ -268,7 +289,7 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel)
continue; continue;
// POLLxxx flags and B_SELECT_xxx flags are compatible // POLLxxx flags and B_SELECT_xxx flags are compatible
fds[i].revents = sync.set[i].events; fds[i].revents = sync->set[i].events;
if (fds[i].revents != 0) if (fds[i].revents != 0)
count++; count++;
} }
@ -282,36 +303,45 @@ common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel)
} }
err: err:
delete_sem(sync.sem); locker.Unlock();
free(sync.set); put_select_sync(sync);
return count; return count;
} }
// #pragma mark - VFS private
status_t
notify_select_events(select_info* info, uint16 events)
{
FUNCTION(("notify_select_events(%p (%p), 0x%x)\n", info, info->sync,
events));
if (info == NULL
|| info->sync == NULL
|| info->sync->sem < B_OK)
return B_BAD_VALUE;
info->events |= events;
// only wake up the waiting select()/poll() call if the events
// match one of the selected ones
if (info->selected_events & events)
return release_sem(info->sync->sem);
return B_OK;
}
// #pragma mark - public kernel API // #pragma mark - public kernel API
status_t status_t
notify_select_event(struct selectsync *_sync, uint32 ref, uint8 event) notify_select_event(struct selectsync *sync, uint32 /*ref*/, uint8 event)
{ {
select_sync *sync = (select_sync *)_sync; return notify_select_events((select_info*)sync, SELECT_FLAG(event));
FUNCTION(("notify_select_event(%p, %lu, %u)\n", _sync, ref, event));
if (sync == NULL
|| sync->sem < B_OK
|| ref > sync->count)
return B_BAD_VALUE;
sync->set[ref].events |= SELECT_FLAG(event);
// only wake up the waiting select()/poll() call if the event
// match the ones selected
if (sync->set[ref].selected_events & SELECT_FLAG(event))
return release_sem(sync->sem);
return B_OK;
} }

View File

@ -6,21 +6,29 @@
#define VFS_SELECT_H #define VFS_SELECT_H
#include <OS.h>
#include <Select.h> #include <Select.h>
#include <util/DoublyLinkedList.h> #include <util/DoublyLinkedList.h>
#include <OS.h> #include <lock.h>
struct select_sync;
typedef struct select_info { typedef struct select_info {
uint16 selected_events; select_info* next; // next in the IO context's list
uint16 events; select_sync* sync;
uint16 selected_events;
uint16 events;
} select_info; } select_info;
typedef struct select_sync { typedef struct select_sync {
sem_id sem; vint32 ref_count;
uint32 count; benaphore lock;
select_info *set; sem_id sem;
uint32 count;
select_info* set;
} select_sync; } select_sync;
#define SELECT_FLAG(type) (1L << (type - 1)) #define SELECT_FLAG(type) (1L << (type - 1))
@ -38,4 +46,8 @@ struct select_sync_pool {
SelectSyncPoolEntryList entries; SelectSyncPoolEntryList entries;
}; };
void put_select_sync(select_sync* sync);
status_t notify_select_events(select_info* info, uint16 events);
#endif /* VFS_SELECT_H */ #endif /* VFS_SELECT_H */