haiku/src/system/kernel/wait_for_objects.cpp

817 lines
17 KiB
C++
Raw Normal View History

/*
* Copyright 2007, Ingo Weinhold, bonefish@cs.tu-berlin.de. All rights reserved.
* Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
* Distributed under the terms of the MIT License.
*/
#include <fs/select_sync_pool.h>
#include <wait_for_objects.h>
#include <new>
#include <poll.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <OS.h>
#include <Select.h>
#include <AutoDeleter.h>
#include <fs/fd.h>
#include <port.h>
#include <sem.h>
#include <syscalls.h>
#include <thread.h>
#include <util/AutoLock.h>
#include <util/DoublyLinkedList.h>
#include <vfs.h>
//#define TRACE_WAIT_FOR_OBJECTS
#ifdef TRACE_WAIT_FOR_OBJECTS
# define PRINT(x) dprintf x
# define FUNCTION(x) dprintf x
#else
# define PRINT(x) ;
# define FUNCTION(x) ;
#endif
using std::nothrow;
struct select_sync_pool_entry
: DoublyLinkedListLinkImpl<select_sync_pool_entry> {
selectsync *sync;
uint16 events;
};
typedef DoublyLinkedList<select_sync_pool_entry> SelectSyncPoolEntryList;
struct select_sync_pool {
SelectSyncPoolEntryList entries;
};
struct select_ops {
status_t (*select)(int32 object, struct select_info* info, bool kernel);
status_t (*deselect)(int32 object, struct select_info* info, bool kernel);
};
static const select_ops kSelectOps[] = {
// B_OBJECT_TYPE_FD
{
select_fd,
deselect_fd
},
// B_OBJECT_TYPE_SEMAPHORE
{
select_sem,
deselect_sem
},
// B_OBJECT_TYPE_PORT
{
select_port,
deselect_port
},
// B_OBJECT_TYPE_THREAD
{
select_thread,
deselect_thread
}
};
static const uint32 kSelectOpsCount = sizeof(kSelectOps) / sizeof(select_ops);
/*!
Clears all bits in the fd_set - since we are using variable sized
arrays in the kernel, we can't use the FD_ZERO() macro provided by
sys/select.h for this task.
All other FD_xxx() macros are safe to use, though.
*/
static inline void
fd_zero(fd_set *set, int numFDs)
{
if (set != NULL)
memset(set, 0, _howmany(numFDs, NFDBITS) * sizeof(fd_mask));
}
static status_t
create_select_sync(int numFDs, select_sync*& _sync)
{
// create sync structure
select_sync* sync = new(nothrow) select_sync;
if (sync == NULL)
return B_NO_MEMORY;
ObjectDeleter<select_sync> syncDeleter(sync);
// create info set
sync->set = new(nothrow) select_info[numFDs];
if (sync->set == NULL)
return B_NO_MEMORY;
ArrayDeleter<select_info> setDeleter(sync->set);
// create select event semaphore
sync->sem = create_sem(0, "select");
if (sync->sem < 0)
return sync->sem;
// create lock
status_t error = benaphore_init(&sync->lock, "select sync");
if (error != B_OK) {
delete_sem(sync->sem);
return error;
}
sync->count = numFDs;
sync->ref_count = 1;
for (int i = 0; i < numFDs; i++) {
sync->set[i].next = NULL;
sync->set[i].sync = sync;
}
setDeleter.Detach();
syncDeleter.Detach();
_sync = sync;
return B_OK;
}
void
put_select_sync(select_sync* sync)
{
FUNCTION(("put_select_sync(%p): -> %ld\n", sync, sync->ref_count - 1));
if (atomic_add(&sync->ref_count, -1) == 1) {
delete_sem(sync->sem);
benaphore_destroy(&sync->lock);
delete[] sync->set;
delete sync;
}
}
static int
common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
bigtime_t timeout, const sigset_t *sigMask, bool kernel)
{
status_t status = B_OK;
int fd;
FUNCTION(("[%ld] common_select(%d, %p, %p, %p, %lld, %p, %d)\n",
find_thread(NULL), numFDs, readSet, writeSet, errorSet, timeout,
sigMask, kernel));
// check if fds are valid before doing anything
for (fd = 0; fd < numFDs; fd++) {
if (((readSet && FD_ISSET(fd, readSet))
|| (writeSet && FD_ISSET(fd, writeSet))
|| (errorSet && FD_ISSET(fd, errorSet)))
&& !fd_is_valid(fd, kernel))
return B_FILE_ERROR;
}
// allocate sync object
select_sync* sync;
status = create_select_sync(numFDs, sync);
if (status != B_OK)
return status;
// start selecting file descriptors
BenaphoreLocker locker(sync->lock);
for (fd = 0; fd < numFDs; fd++) {
sync->set[fd].selected_events = 0;
sync->set[fd].events = 0;
if (readSet && FD_ISSET(fd, readSet))
sync->set[fd].selected_events = SELECT_FLAG(B_SELECT_READ);
if (writeSet && FD_ISSET(fd, writeSet))
sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE);
if (errorSet && FD_ISSET(fd, errorSet))
sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR);
if (sync->set[fd].selected_events != 0) {
select_fd(fd, sync->set + fd, kernel);
// array position is the same as the fd for select()
}
}
locker.Unlock();
// set new signal mask
sigset_t oldSigMask;
if (sigMask != NULL)
sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
// wait for something to happen
status = acquire_sem_etc(sync->sem, 1,
B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout);
// restore the old signal mask
if (sigMask != NULL)
sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status));
// deselect file descriptors
locker.Lock();
for (fd = 0; fd < numFDs; fd++)
deselect_fd(fd, sync->set + fd, kernel);
PRINT(("common_select(): events deselected\n"));
// collect the events that happened in the meantime
int count = 0;
if (status == B_INTERRUPTED) {
// We must not clear the sets in this case, as applications may
// rely on the contents of them.
locker.Unlock();
put_select_sync(sync);
return B_INTERRUPTED;
}
// Clear sets to store the received events
// (we can't use the macros, because we have variable sized arrays;
// the other FD_xxx() macros are safe, though).
fd_zero(readSet, numFDs);
fd_zero(writeSet, numFDs);
fd_zero(errorSet, numFDs);
if (status == B_OK) {
for (count = 0, fd = 0;fd < numFDs; fd++) {
if (readSet && sync->set[fd].events & SELECT_FLAG(B_SELECT_READ)) {
FD_SET(fd, readSet);
count++;
}
if (writeSet
&& sync->set[fd].events & SELECT_FLAG(B_SELECT_WRITE)) {
FD_SET(fd, writeSet);
count++;
}
if (errorSet
&& sync->set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) {
FD_SET(fd, errorSet);
count++;
}
}
}
// B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0
locker.Unlock();
put_select_sync(sync);
return count;
}
static int
common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel)
{
status_t status = B_OK;
int count = 0;
uint32 i;
// allocate sync object
select_sync* sync;
status = create_select_sync(numFDs, sync);
if (status != B_OK)
return status;
// start polling file descriptors (by selecting them)
BenaphoreLocker locker(sync->lock);
for (i = 0; i < numFDs; i++) {
int fd = fds[i].fd;
// initialize events masks
sync->set[i].selected_events = fds[i].events & ~POLLNVAL
| POLLERR | POLLHUP;
sync->set[i].events = 0;
if (select_fd(fd, sync->set + i, kernel) == B_OK) {
fds[i].revents = 0;
if (sync->set[i].selected_events != 0)
count++;
} else
fds[i].revents = POLLNVAL;
}
if (count < 1) {
count = B_BAD_VALUE;
goto err;
}
locker.Unlock();
status = acquire_sem_etc(sync->sem, 1,
B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout);
// deselect file descriptors
locker.Lock();
for (i = 0; i < numFDs; i++)
deselect_fd(fds[i].fd, sync->set + i, kernel);
// collect the events that are happened in the meantime
switch (status) {
case B_OK:
for (count = 0, i = 0; i < numFDs; i++) {
if (fds[i].revents == POLLNVAL)
continue;
// POLLxxx flags and B_SELECT_xxx flags are compatible
fds[i].revents = sync->set[i].events
& sync->set[i].selected_events;
if (fds[i].revents != 0)
count++;
}
break;
case B_INTERRUPTED:
count = B_INTERRUPTED;
break;
default:
// B_TIMED_OUT, and B_WOULD_BLOCK
count = 0;
}
err:
locker.Unlock();
put_select_sync(sync);
return count;
}
static ssize_t
common_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
bigtime_t timeout, bool kernel)
{
status_t status = B_OK;
// allocate sync object
select_sync* sync;
status = create_select_sync(numInfos, sync);
if (status != B_OK)
return status;
// start selecting objects
BenaphoreLocker locker(sync->lock);
ssize_t count = 0;
bool invalid = false;
for (int i = 0; i < numInfos; i++) {
uint16 type = infos[i].type;
int32 object = infos[i].object;
// initialize events masks
sync->set[i].selected_events = infos[i].events
| B_EVENT_INVALID | B_EVENT_ERROR | B_EVENT_DISCONNECTED;
sync->set[i].events = 0;
if (type < kSelectOpsCount
&& kSelectOps[type].select(object, sync->set + i, kernel) == B_OK) {
infos[i].events = 0;
if (sync->set[i].selected_events != 0)
count++;
} else {
sync->set[i].selected_events = B_EVENT_INVALID;
infos[i].events = B_EVENT_INVALID;
invalid = true;
}
}
locker.Unlock();
if (count < 1) {
put_select_sync(sync);
return B_BAD_VALUE;
}
if (!invalid) {
status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | flags,
timeout);
}
// deselect objects
locker.Lock();
for (int i = 0; i < numInfos; i++) {
uint16 type = infos[i].type;
if (type < kSelectOpsCount && (infos[i].events & B_EVENT_INVALID) == 0)
kSelectOps[type].deselect(infos[i].object, sync->set + i, kernel);
}
// collect the events that are happened in the meantime
switch (status) {
case B_OK:
count = 0;
for (int i = 0; i < numInfos; i++) {
infos[i].events = sync->set[i].events
& sync->set[i].selected_events;
if (infos[i].events != 0)
count++;
}
break;
default:
// B_INTERRUPTED, B_TIMED_OUT, and B_WOULD_BLOCK
count = status;
break;
}
locker.Unlock();
put_select_sync(sync);
return count;
}
// #pragma mark - kernel private
status_t
notify_select_events(select_info* info, uint16 events)
{
FUNCTION(("notify_select_events(%p (%p), 0x%x)\n", info, info->sync,
events));
if (info == NULL
|| info->sync == NULL
|| info->sync->sem < B_OK)
return B_BAD_VALUE;
info->events |= events;
// only wake up the waiting select()/poll() call if the events
// match one of the selected ones
if (info->selected_events & events)
return release_sem_etc(info->sync->sem, 1, B_DO_NOT_RESCHEDULE);
return B_OK;
}
void
notify_select_events_list(select_info* list, uint16 events)
{
struct select_info* info = list;
while (info != NULL) {
notify_select_events(info, events);
info = info->next;
}
}
// #pragma mark - public kernel API
status_t
notify_select_event(struct selectsync *sync, uint8 event)
{
return notify_select_events((select_info*)sync, SELECT_FLAG(event));
}
// #pragma mark - private kernel exported API
static select_sync_pool_entry *
find_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync)
{
for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
it.HasNext();) {
select_sync_pool_entry *entry = it.Next();
if (entry->sync == sync)
return entry;
}
return NULL;
}
static status_t
add_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync,
uint8 event)
{
// check, whether the entry does already exist
select_sync_pool_entry *entry = find_select_sync_pool_entry(pool, sync);
if (!entry) {
entry = new (std::nothrow) select_sync_pool_entry;
if (!entry)
return B_NO_MEMORY;
entry->sync = sync;
entry->events = 0;
pool->entries.Add(entry);
}
entry->events |= SELECT_FLAG(event);
return B_OK;
}
status_t
add_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
uint8 event)
{
// create the pool, if necessary
select_sync_pool *pool = *_pool;
if (!pool) {
pool = new (std::nothrow) select_sync_pool;
if (!pool)
return B_NO_MEMORY;
*_pool = pool;
}
// add the entry
status_t error = add_select_sync_pool_entry(pool, sync, event);
// cleanup
if (pool->entries.IsEmpty()) {
delete pool;
*_pool = NULL;
}
return error;
}
status_t
remove_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
uint8 event)
{
select_sync_pool *pool = *_pool;
if (!pool)
return B_ENTRY_NOT_FOUND;
// clear the event flag of the concerned entries
bool found = false;
for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
it.HasNext();) {
select_sync_pool_entry *entry = it.Next();
if (entry->sync == sync) {
found = true;
entry->events &= ~SELECT_FLAG(event);
// remove the entry, if no longer needed
if (entry->events == 0) {
it.Remove();
delete entry;
}
}
}
if (!found)
return B_ENTRY_NOT_FOUND;
// delete the pool, if no longer needed
if (pool->entries.IsEmpty()) {
delete pool;
*_pool = NULL;
}
return B_OK;
}
void
delete_select_sync_pool(select_sync_pool *pool)
{
if (!pool)
return;
while (select_sync_pool_entry *entry = pool->entries.Head()) {
pool->entries.Remove(entry);
delete entry;
}
delete pool;
}
void
notify_select_event_pool(select_sync_pool *pool, uint8 event)
{
if (!pool)
return;
FUNCTION(("notify_select_event_pool(%p, %u)\n", pool, event));
for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
it.HasNext();) {
select_sync_pool_entry *entry = it.Next();
if (entry->events & SELECT_FLAG(event))
notify_select_event(entry->sync, event);
}
}
// #pragma mark - Kernel POSIX layer
ssize_t
_kern_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
bigtime_t timeout, const sigset_t *sigMask)
{
return common_select(numFDs, readSet, writeSet, errorSet, timeout,
sigMask, true);
}
ssize_t
_kern_poll(struct pollfd *fds, int numFDs, bigtime_t timeout)
{
return common_poll(fds, numFDs, timeout, true);
}
ssize_t
_kern_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
bigtime_t timeout)
{
return common_wait_for_objects(infos, numInfos, flags, timeout, true);
}
// #pragma mark - User syscalls
ssize_t
_user_select(int numFDs, fd_set *userReadSet, fd_set *userWriteSet,
fd_set *userErrorSet, bigtime_t timeout, const sigset_t *userSigMask)
{
fd_set *readSet = NULL, *writeSet = NULL, *errorSet = NULL;
uint32 bytes = _howmany(numFDs, NFDBITS) * sizeof(fd_mask);
sigset_t sigMask;
int result;
if (numFDs < 0)
return B_BAD_VALUE;
if ((userReadSet != NULL && !IS_USER_ADDRESS(userReadSet))
|| (userWriteSet != NULL && !IS_USER_ADDRESS(userWriteSet))
|| (userErrorSet != NULL && !IS_USER_ADDRESS(userErrorSet))
|| (userSigMask != NULL && !IS_USER_ADDRESS(userSigMask)))
return B_BAD_ADDRESS;
// copy parameters
if (userReadSet != NULL) {
readSet = (fd_set *)malloc(bytes);
if (readSet == NULL)
return B_NO_MEMORY;
if (user_memcpy(readSet, userReadSet, bytes) < B_OK) {
result = B_BAD_ADDRESS;
goto err;
}
}
if (userWriteSet != NULL) {
writeSet = (fd_set *)malloc(bytes);
if (writeSet == NULL) {
result = B_NO_MEMORY;
goto err;
}
if (user_memcpy(writeSet, userWriteSet, bytes) < B_OK) {
result = B_BAD_ADDRESS;
goto err;
}
}
if (userErrorSet != NULL) {
errorSet = (fd_set *)malloc(bytes);
if (errorSet == NULL) {
result = B_NO_MEMORY;
goto err;
}
if (user_memcpy(errorSet, userErrorSet, bytes) < B_OK) {
result = B_BAD_ADDRESS;
goto err;
}
}
if (userSigMask != NULL)
sigMask = *userSigMask;
result = common_select(numFDs, readSet, writeSet, errorSet, timeout,
userSigMask ? &sigMask : NULL, false);
// copy back results
if (result >= B_OK
&& ((readSet != NULL
&& user_memcpy(userReadSet, readSet, bytes) < B_OK)
|| (writeSet != NULL
&& user_memcpy(userWriteSet, writeSet, bytes) < B_OK)
|| (errorSet != NULL
&& user_memcpy(userErrorSet, errorSet, bytes) < B_OK)))
result = B_BAD_ADDRESS;
err:
free(readSet);
free(writeSet);
free(errorSet);
return result;
}
ssize_t
_user_poll(struct pollfd *userfds, int numFDs, bigtime_t timeout)
{
struct pollfd *fds;
size_t bytes;
int result;
if (numFDs < 0)
return B_BAD_VALUE;
if (userfds == NULL || !IS_USER_ADDRESS(userfds))
return B_BAD_ADDRESS;
// copy parameters
fds = (struct pollfd *)malloc(bytes = numFDs * sizeof(struct pollfd));
if (fds == NULL)
return B_NO_MEMORY;
if (user_memcpy(fds, userfds, bytes) < B_OK) {
result = B_BAD_ADDRESS;
goto err;
}
result = common_poll(fds, numFDs, timeout, false);
// copy back results
if (result >= B_OK && user_memcpy(userfds, fds, bytes) < B_OK)
result = B_BAD_ADDRESS;
err:
free(fds);
return result;
}
ssize_t
_user_wait_for_objects(object_wait_info* userInfos, int numInfos, uint32 flags,
bigtime_t timeout)
{
if (numInfos < 0)
return B_BAD_VALUE;
if (userInfos == NULL || !IS_USER_ADDRESS(userInfos))
return B_BAD_ADDRESS;
int bytes = sizeof(object_wait_info) * numInfos;
object_wait_info* infos = (object_wait_info*)malloc(bytes);
if (infos == NULL)
return B_NO_MEMORY;
// copy parameters to kernel space, call the function, and copy the results
// back
ssize_t result;
if (user_memcpy(infos, userInfos, bytes) == B_OK) {
result = common_wait_for_objects(infos, numInfos, flags, timeout,
false);
if (user_memcpy(userInfos, infos, bytes) != B_OK)
result = B_BAD_ADDRESS;
} else
result = B_BAD_ADDRESS;
free(infos);
return result;
}