voiduv__io_poll(uv_loop_t* loop, int timeout) { /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes * effectively infinite on 32 bits architectures. To avoid blocking * indefinitely, we cap the timeout and poll again if necessary. * * Note that "30 minutes" is a simplification because it depends on * the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200, * that being the largest value I have seen in the wild (and only once.) */ staticconstint max_safe_timeout = 1789569; staticint no_epoll_pwait; staticint no_epoll_wait; structuv__epoll_eventevents[1024]; structuv__epoll_event* pe; structuv__epoll_evente; int real_timeout; QUEUE* q; uv__io_t* w; sigset_t sigset; uint64_t sigmask; uint64_t base; int have_signals; int nevents; int count; int nfds; int fd; int op; int i;
if (w->events == 0) op = UV__EPOLL_CTL_ADD; else op = UV__EPOLL_CTL_MOD;
/* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching * events, skip the syscall and squelch the events after epoll_wait(). */ if (uv__epoll_ctl(loop->backend_fd, op, w->fd, &e)) { if (errno != EEXIST) abort();
assert(op == UV__EPOLL_CTL_ADD);
/* We've reactivated a file descriptor that's been watched before. */ if (uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_MOD, w->fd, &e)) abort(); }
assert(timeout >= -1); base = loop->time; count = 48; /* Benchmarks suggest this gives the best throughput. */ real_timeout = timeout;
for (;;) { /* See the comment for max_safe_timeout for an explanation of why * this is necessary. Executive summary: kernel bug workaround. */ /* 见上面的英文注释 */ if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout) timeout = max_safe_timeout;
if (sigmask != 0 && no_epoll_pwait != 0) if (pthread_sigmask(SIG_BLOCK, &sigset, NULL)) abort();
if (sigmask != 0 && no_epoll_pwait != 0) if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL)) abort();
/* Update loop->time unconditionally. It's tempting to skip the update when * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the * operating system didn't reschedule our process while in the syscall. */ /* 见上面英文注释 */ SAVE_ERRNO(uv__update_time(loop));
if (nfds == 0) { assert(timeout != -1);
if (timeout == 0) return;
/* We may have been inside the system call for longer than |timeout| * milliseconds so we need to update the timestamp to avoid drift. */ goto update_timeout; }
if (nfds == -1) { if (errno == ENOSYS) { /* epoll_wait() or epoll_pwait() failed, try the other system call. */ assert(no_epoll_wait == 0 || no_epoll_pwait == 0); continue; }
if (errno != EINTR) abort();
if (timeout == -1) continue;
if (timeout == 0) return;
/* Interrupted by a signal. Update timeout and poll again. */ goto update_timeout; }
have_signals = 0; nevents = 0;
/* 把所有poll到的事件描述信息放在数组的最后两个元素 */ assert(loop->watchers != NULL); loop->watchers[loop->nwatchers] = (void*) events; loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds; for (i = 0; i < nfds; i++) { pe = events + i; fd = pe->data;
/* Skip invalidated events, see uv__platform_invalidate_fd */ if (fd == -1) continue;
if (w == NULL) { /* File descriptor that we've stopped watching, disarm it. * * Ignore all errors because we may be racing with another thread * when the file descriptor is closed. */ uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_DEL, fd, pe); continue; }
/* Give users only events they're interested in. Prevents spurious * callbacks when previous callback invocation in this loop has stopped * the current watcher. Also, filters out events that users has not * requested us to watch. */ pe->events &= w->pevents | POLLERR | POLLHUP;
/* Work around an epoll quirk where it sometimes reports just the * EPOLLERR or EPOLLHUP event. In order to force the event loop to * move forward, we merge in the read/write events that the watcher * is interested in; uv__read() and uv__write() will then deal with * the error or hangup in the usual fashion. * * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user * reads the available data, calls uv_read_stop(), then sometime later * calls uv_read_start() again. By then, libuv has forgotten about the * hangup and the kernel won't report EPOLLIN again because there's * nothing left to read. If anything, libuv is to blame here. The * current hack is just a quick bandaid; to properly fix it, libuv * needs to remember the error/hangup event. We should get that for * free when we switch over to edge-triggered I/O. */ if (pe->events == POLLERR || pe->events == POLLHUP) pe->events |= w->pevents & (POLLIN | POLLOUT | UV__POLLPRI);
if (pe->events != 0) { /* Run signal watchers last. This also affects child process watchers * because those are implemented in terms of signal watchers. */ /* 调用回调, 对于信号处理放在后面统一执行 */ if (w == &loop->signal_io_watcher) have_signals = 1; else w->cb(loop, w, pe->events);
do { r = read(loop->signal_pipefd[0], buf + bytes, sizeof(buf) - bytes);
if (r == -1 && errno == EINTR) continue;
if (r == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { /* If there are bytes in the buffer already (which really is extremely * unlikely if possible at all) we can't exit the function here. We'll * spin until more bytes are read instead. */ if (bytes > 0) continue;
/* Otherwise, there was nothing there. */ return; }
/* Other errors really should never happen. */ if (r == -1) abort();
bytes += r;
/* `end` is rounded down to a multiple of sizeof(uv__signal_msg_t). */ end = (bytes / sizeof(uv__signal_msg_t)) * sizeof(uv__signal_msg_t);
for (i = 0; i < end; i += sizeof(uv__signal_msg_t)) { msg = (uv__signal_msg_t*) (buf + i); handle = msg->handle;
if (handle->flags & UV__SIGNAL_ONE_SHOT) uv__signal_stop(handle);
/* If uv_close was called while there were caught signals that were not * yet dispatched, the uv__finish_close was deferred. Make close pending * now if this has happened. */ if ((handle->flags & UV_CLOSING) && (handle->caught_signals == handle->dispatched_signals)) { uv__make_close_pending((uv_handle_t*) handle); } }
bytes -= end;
/* If there are any "partial" messages left, move them to the start of the * the buffer, and spin. This should not happen. */ if (bytes) { memmove(buf, buf + end, bytes); continue; } } while (end == sizeof buf); }