/* * Copyright 2007, Ingo Weinhold, bonefish@cs.tu-berlin.de. All rights reserved. * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved. * Distributed under the terms of the MIT License. */ #include "vfs_select.h" #include #include #include #include #include #include #include #include #include #include #include #include //#define TRACE_VFS_SELECT #ifdef TRACE_VFS_SELECT # define PRINT(x) dprintf x # define FUNCTION(x) dprintf x #else # define PRINT(x) ; # define FUNCTION(x) ; #endif using std::nothrow; /*! Clears all bits in the fd_set - since we are using variable sized arrays in the kernel, we can't use the FD_ZERO() macro provided by sys/select.h for this task. All other FD_xxx() macros are safe to use, though. */ static inline void fd_zero(fd_set *set, int numFDs) { if (set != NULL) memset(set, 0, _howmany(numFDs, NFDBITS) * sizeof(fd_mask)); } static status_t create_select_sync(int numFDs, select_sync*& _sync) { // create sync structure select_sync* sync = new(nothrow) select_sync; if (sync == NULL) return B_NO_MEMORY; ObjectDeleter syncDeleter(sync); // create info set sync->set = new(nothrow) select_info[numFDs]; if (sync->set == NULL) return B_NO_MEMORY; ArrayDeleter setDeleter(sync->set); // create select event semaphore sync->sem = create_sem(0, "select"); if (sync->sem < 0) return sync->sem; // create lock status_t error = benaphore_init(&sync->lock, "select sync"); if (error != B_OK) { delete_sem(sync->sem); return error; } sync->count = numFDs; sync->ref_count = 1; for (int i = 0; i < numFDs; i++) { sync->set[i].next = NULL; sync->set[i].sync = sync; } setDeleter.Detach(); syncDeleter.Detach(); _sync = sync; return B_OK; } void put_select_sync(select_sync* sync) { FUNCTION(("put_select_sync(%p): -> %ld\n", sync, sync->ref_count - 1)); if (atomic_add(&sync->ref_count, -1) == 1) { delete_sem(sync->sem); benaphore_destroy(&sync->lock); delete[] sync->set; delete sync; } } static int common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, bigtime_t timeout, const sigset_t *sigMask, bool kernel) { status_t status = B_OK; int fd; FUNCTION(("[%ld] common_select(%d, %p, %p, %p, %lld, %p, %d)\n", find_thread(NULL), numFDs, readSet, writeSet, errorSet, timeout, sigMask, kernel)); // check if fds are valid before doing anything for (fd = 0; fd < numFDs; fd++) { if (((readSet && FD_ISSET(fd, readSet)) || (writeSet && FD_ISSET(fd, writeSet)) || (errorSet && FD_ISSET(fd, errorSet))) && !fd_is_valid(fd, kernel)) return B_FILE_ERROR; } // allocate sync object select_sync* sync; status = create_select_sync(numFDs, sync); if (status != B_OK) return status; // start selecting file descriptors BenaphoreLocker locker(sync->lock); for (fd = 0; fd < numFDs; fd++) { sync->set[fd].selected_events = 0; sync->set[fd].events = 0; if (readSet && FD_ISSET(fd, readSet)) sync->set[fd].selected_events = SELECT_FLAG(B_SELECT_READ); if (writeSet && FD_ISSET(fd, writeSet)) sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE); if (errorSet && FD_ISSET(fd, errorSet)) sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR); if (sync->set[fd].selected_events != 0) { select_fd(fd, sync, fd, kernel); // array position is the same as the fd for select() } } locker.Unlock(); // set new signal mask sigset_t oldSigMask; if (sigMask != NULL) sigprocmask(SIG_SETMASK, sigMask, &oldSigMask); // wait for something to happen status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); // restore the old signal mask if (sigMask != NULL) sigprocmask(SIG_SETMASK, &oldSigMask, NULL); PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status)); // deselect file descriptors locker.Lock(); for (fd = 0; fd < numFDs; fd++) deselect_fd(fd, sync, fd, kernel); PRINT(("common_select(): events deselected\n")); // collect the events that happened in the meantime int count = 0; if (status == B_INTERRUPTED) { // We must not clear the sets in this case, as applications may // rely on the contents of them. locker.Unlock(); put_select_sync(sync); return B_INTERRUPTED; } // Clear sets to store the received events // (we can't use the macros, because we have variable sized arrays; // the other FD_xxx() macros are safe, though). fd_zero(readSet, numFDs); fd_zero(writeSet, numFDs); fd_zero(errorSet, numFDs); if (status == B_OK) { for (count = 0, fd = 0;fd < numFDs; fd++) { if (readSet && sync->set[fd].events & SELECT_FLAG(B_SELECT_READ)) { FD_SET(fd, readSet); count++; } if (writeSet && sync->set[fd].events & SELECT_FLAG(B_SELECT_WRITE)) { FD_SET(fd, writeSet); count++; } if (errorSet && sync->set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) { FD_SET(fd, errorSet); count++; } } } // B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0 locker.Unlock(); put_select_sync(sync); return count; } static int common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout, bool kernel) { status_t status = B_OK; int count = 0; uint32 i; // allocate sync object select_sync* sync; status = create_select_sync(numFDs, sync); if (status != B_OK) return status; // start polling file descriptors (by selecting them) BenaphoreLocker locker(sync->lock); for (i = 0; i < numFDs; i++) { int fd = fds[i].fd; // initialize events masks sync->set[i].selected_events = fds[i].events & ~POLLNVAL | POLLERR | POLLHUP; sync->set[i].events = 0; if (select_fd(fd, sync, i, kernel) == B_OK) { fds[i].revents = 0; if (sync->set[i].selected_events != 0) count++; } else fds[i].revents = POLLNVAL; } if (count < 1) { count = B_BAD_VALUE; goto err; } locker.Unlock(); status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | (timeout != -1 ? B_RELATIVE_TIMEOUT : 0), timeout); // deselect file descriptors locker.Lock(); for (i = 0; i < numFDs; i++) deselect_fd(fds[i].fd, sync, i, kernel); // collect the events that are happened in the meantime switch (status) { case B_OK: for (count = 0, i = 0; i < numFDs; i++) { if (fds[i].revents == POLLNVAL) continue; // POLLxxx flags and B_SELECT_xxx flags are compatible fds[i].revents = sync->set[i].events & sync->set[i].selected_events; if (fds[i].revents != 0) count++; } break; case B_INTERRUPTED: count = B_INTERRUPTED; break; default: // B_TIMED_OUT, and B_WOULD_BLOCK count = 0; } err: locker.Unlock(); put_select_sync(sync); return count; } // #pragma mark - VFS private status_t notify_select_events(select_info* info, uint16 events) { FUNCTION(("notify_select_events(%p (%p), 0x%x)\n", info, info->sync, events)); if (info == NULL || info->sync == NULL || info->sync->sem < B_OK) return B_BAD_VALUE; info->events |= events; // only wake up the waiting select()/poll() call if the events // match one of the selected ones if (info->selected_events & events) return release_sem(info->sync->sem); return B_OK; } // #pragma mark - public kernel API status_t notify_select_event(struct selectsync *sync, uint32 /*ref*/, uint8 event) { return notify_select_events((select_info*)sync, SELECT_FLAG(event)); } // #pragma mark - private kernel exported API static select_sync_pool_entry * find_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync, uint32 ref) { for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator(); it.HasNext();) { select_sync_pool_entry *entry = it.Next(); if (entry->sync == sync && entry->ref == ref) return entry; } return NULL; } static status_t add_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync, uint32 ref, uint8 event) { // check, whether the entry does already exist select_sync_pool_entry *entry = find_select_sync_pool_entry(pool, sync, ref); if (!entry) { entry = new (std::nothrow) select_sync_pool_entry; if (!entry) return B_NO_MEMORY; entry->sync = sync; entry->ref = ref; entry->events = 0; pool->entries.Add(entry); } entry->events |= SELECT_FLAG(event); return B_OK; } status_t add_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync, uint32 ref, uint8 event) { // create the pool, if necessary select_sync_pool *pool = *_pool; if (!pool) { pool = new (std::nothrow) select_sync_pool; if (!pool) return B_NO_MEMORY; *_pool = pool; } // add the entry status_t error = add_select_sync_pool_entry(pool, sync, ref, event); // cleanup if (pool->entries.IsEmpty()) { delete pool; *_pool = NULL; } return error; } status_t remove_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync, uint8 event) { select_sync_pool *pool = *_pool; if (!pool) return B_ENTRY_NOT_FOUND; // clear the event flag of the concerned entries bool found = false; for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator(); it.HasNext();) { select_sync_pool_entry *entry = it.Next(); if (entry->sync == sync) { found = true; entry->events &= ~SELECT_FLAG(event); // remove the entry, if no longer needed if (entry->events == 0) { it.Remove(); delete entry; } } } if (!found) return B_ENTRY_NOT_FOUND; // delete the pool, if no longer needed if (pool->entries.IsEmpty()) { delete pool; *_pool = NULL; } return B_OK; } void delete_select_sync_pool(select_sync_pool *pool) { if (!pool) return; while (select_sync_pool_entry *entry = pool->entries.Head()) { pool->entries.Remove(entry); delete entry; } delete pool; } void notify_select_event_pool(select_sync_pool *pool, uint8 event) { if (!pool) return; FUNCTION(("notify_select_event_pool(%p, %u)\n", pool, event)); for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator(); it.HasNext();) { select_sync_pool_entry *entry = it.Next(); if (entry->events & SELECT_FLAG(event)) notify_select_event(entry->sync, entry->ref, event); } } // #pragma mark - Kernel POSIX layer ssize_t _kern_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet, bigtime_t timeout, const sigset_t *sigMask) { return common_select(numFDs, readSet, writeSet, errorSet, timeout, sigMask, true); } ssize_t _kern_poll(struct pollfd *fds, int numFDs, bigtime_t timeout) { return common_poll(fds, numFDs, timeout, true); } // #pragma mark - User syscalls ssize_t _user_select(int numFDs, fd_set *userReadSet, fd_set *userWriteSet, fd_set *userErrorSet, bigtime_t timeout, const sigset_t *userSigMask) { fd_set *readSet = NULL, *writeSet = NULL, *errorSet = NULL; uint32 bytes = _howmany(numFDs, NFDBITS) * sizeof(fd_mask); sigset_t sigMask; int result; if (numFDs < 0) return B_BAD_VALUE; if ((userReadSet != NULL && !IS_USER_ADDRESS(userReadSet)) || (userWriteSet != NULL && !IS_USER_ADDRESS(userWriteSet)) || (userErrorSet != NULL && !IS_USER_ADDRESS(userErrorSet)) || (userSigMask != NULL && !IS_USER_ADDRESS(userSigMask))) return B_BAD_ADDRESS; // copy parameters if (userReadSet != NULL) { readSet = (fd_set *)malloc(bytes); if (readSet == NULL) return B_NO_MEMORY; if (user_memcpy(readSet, userReadSet, bytes) < B_OK) { result = B_BAD_ADDRESS; goto err; } } if (userWriteSet != NULL) { writeSet = (fd_set *)malloc(bytes); if (writeSet == NULL) { result = B_NO_MEMORY; goto err; } if (user_memcpy(writeSet, userWriteSet, bytes) < B_OK) { result = B_BAD_ADDRESS; goto err; } } if (userErrorSet != NULL) { errorSet = (fd_set *)malloc(bytes); if (errorSet == NULL) { result = B_NO_MEMORY; goto err; } if (user_memcpy(errorSet, userErrorSet, bytes) < B_OK) { result = B_BAD_ADDRESS; goto err; } } if (userSigMask != NULL) sigMask = *userSigMask; result = common_select(numFDs, readSet, writeSet, errorSet, timeout, userSigMask ? &sigMask : NULL, false); // copy back results if (result >= B_OK && ((readSet != NULL && user_memcpy(userReadSet, readSet, bytes) < B_OK) || (writeSet != NULL && user_memcpy(userWriteSet, writeSet, bytes) < B_OK) || (errorSet != NULL && user_memcpy(userErrorSet, errorSet, bytes) < B_OK))) result = B_BAD_ADDRESS; err: free(readSet); free(writeSet); free(errorSet); return result; } ssize_t _user_poll(struct pollfd *userfds, int numFDs, bigtime_t timeout) { struct pollfd *fds; size_t bytes; int result; if (numFDs < 0) return B_BAD_VALUE; if (userfds == NULL || !IS_USER_ADDRESS(userfds)) return B_BAD_ADDRESS; // copy parameters fds = (struct pollfd *)malloc(bytes = numFDs * sizeof(struct pollfd)); if (fds == NULL) return B_NO_MEMORY; if (user_memcpy(fds, userfds, bytes) < B_OK) { result = B_BAD_ADDRESS; goto err; } result = common_poll(fds, numFDs, timeout, false); // copy back results if (result >= B_OK && user_memcpy(userfds, fds, bytes) < B_OK) result = B_BAD_ADDRESS; err: free(fds); return result; }