summaryrefslogtreecommitdiffstats
path: root/man/man2/select.2
diff options
context:
space:
mode:
Diffstat (limited to 'man/man2/select.2')
-rw-r--r--man/man2/select.2765
1 files changed, 765 insertions, 0 deletions
diff --git a/man/man2/select.2 b/man/man2/select.2
new file mode 100644
index 000000000..bbf8ea9e0
--- /dev/null
+++ b/man/man2/select.2
@@ -0,0 +1,765 @@
+.\" This manpage is copyright (C) 1992 Drew Eckhardt,
+.\" copyright (C) 1995 Michael Shields,
+.\" copyright (C) 2001 Paul Sheer,
+.\" copyright (C) 2006, 2019 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
+.\" Modified 1995-05-18 by Jim Van Zandt <jrv@vanzandt.mv.com>
+.\" Sun Feb 11 14:07:00 MET 1996 Martin Schulze <joey@linux.de>
+.\" * layout slightly modified
+.\"
+.\" Modified Mon Oct 21 23:05:29 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
+.\" Modified Thu Feb 24 01:41:09 CET 2000 by aeb
+.\" Modified Thu Feb 9 22:32:09 CET 2001 by bert hubert <ahu@ds9a.nl>, aeb
+.\" Modified Mon Nov 11 14:35:00 PST 2002 by Ben Woodard <ben@zork.net>
+.\" 2005-03-11, mtk, modified pselect() text (it is now a system
+.\" call in Linux 2.6.16.
+.\"
+.TH select 2 (date) "Linux man-pages (unreleased)"
+.SH NAME
+select, pselect, FD_CLR, FD_ISSET, FD_SET, FD_ZERO, fd_set \-
+synchronous I/O multiplexing
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.B #include <sys/select.h>
+.P
+.BR typedef " /* ... */ " fd_set;
+.P
+.BI "int select(int " nfds ", fd_set *_Nullable restrict " readfds ,
+.BI " fd_set *_Nullable restrict " writefds ,
+.BI " fd_set *_Nullable restrict " exceptfds ,
+.BI " struct timeval *_Nullable restrict " timeout );
+.P
+.BI "void FD_CLR(int " fd ", fd_set *" set );
+.BI "int FD_ISSET(int " fd ", fd_set *" set );
+.BI "void FD_SET(int " fd ", fd_set *" set );
+.BI "void FD_ZERO(fd_set *" set );
+.P
+.BI "int pselect(int " nfds ", fd_set *_Nullable restrict " readfds ,
+.BI " fd_set *_Nullable restrict " writefds ,
+.BI " fd_set *_Nullable restrict " exceptfds ,
+.BI " const struct timespec *_Nullable restrict " timeout ,
+.BI " const sigset_t *_Nullable restrict " sigmask );
+.fi
+.P
+.RS -4
+Feature Test Macro Requirements for glibc (see
+.BR feature_test_macros (7)):
+.RE
+.P
+.BR pselect ():
+.nf
+ _POSIX_C_SOURCE >= 200112L
+.fi
+.SH DESCRIPTION
+.BR "WARNING" :
+.BR select ()
+can monitor only file descriptors numbers that are less than
+.B FD_SETSIZE
+(1024)\[em]an unreasonably low limit for many modern applications\[em]and
+this limitation will not change.
+All modern applications should instead use
+.BR poll (2)
+or
+.BR epoll (7),
+which do not suffer this limitation.
+.P
+.BR select ()
+allows a program to monitor multiple file descriptors,
+waiting until one or more of the file descriptors become "ready"
+for some class of I/O operation (e.g., input possible).
+A file descriptor is considered ready if it is possible to
+perform a corresponding I/O operation (e.g.,
+.BR read (2),
+or a sufficiently small
+.BR write (2))
+without blocking.
+.\"
+.SS fd_set
+A structure type that can represent a set of file descriptors.
+According to POSIX,
+the maximum number of file descriptors in an
+.I fd_set
+structure is the value of the macro
+.BR FD_SETSIZE .
+.\"
+.SS File descriptor sets
+The principal arguments of
+.BR select ()
+are three "sets" of file descriptors (declared with the type
+.IR fd_set ),
+which allow the caller to wait for three classes of events
+on the specified set of file descriptors.
+Each of the
+.I fd_set
+arguments may be specified as NULL if no file descriptors are
+to be watched for the corresponding class of events.
+.P
+.BR "Note well" :
+Upon return, each of the file descriptor sets is modified in place
+to indicate which file descriptors are currently "ready".
+Thus, if using
+.BR select ()
+within a loop, the sets \fImust be reinitialized\fP before each call.
+.P
+The contents of a file descriptor set can be manipulated
+using the following macros:
+.TP
+.BR FD_ZERO ()
+This macro clears (removes all file descriptors from)
+.IR set .
+It should be employed as the first step in initializing a file descriptor set.
+.TP
+.BR FD_SET ()
+This macro adds the file descriptor
+.I fd
+to
+.IR set .
+Adding a file descriptor that is already present in the set is a no-op,
+and does not produce an error.
+.TP
+.BR FD_CLR ()
+This macro removes the file descriptor
+.I fd
+from
+.IR set .
+Removing a file descriptor that is not present in the set is a no-op,
+and does not produce an error.
+.TP
+.BR FD_ISSET ()
+.BR select ()
+modifies the contents of the sets according to the rules
+described below.
+After calling
+.BR select (),
+the
+.BR FD_ISSET ()
+macro
+can be used to test if a file descriptor is still present in a set.
+.BR FD_ISSET ()
+returns nonzero if the file descriptor
+.I fd
+is present in
+.IR set ,
+and zero if it is not.
+.\"
+.SS Arguments
+The arguments of
+.BR select ()
+are as follows:
+.TP
+.I readfds
+The file descriptors in this set are watched to see if they are
+ready for reading.
+A file descriptor is ready for reading if a read operation will not
+block; in particular, a file descriptor is also ready on end-of-file.
+.IP
+After
+.BR select ()
+has returned, \fIreadfds\fP will be
+cleared of all file descriptors except for those that are ready for reading.
+.TP
+.I writefds
+The file descriptors in this set are watched to see if they are
+ready for writing.
+A file descriptor is ready for writing if a write operation will not block.
+However, even if a file descriptor indicates as writable,
+a large write may still block.
+.IP
+After
+.BR select ()
+has returned, \fIwritefds\fP will be
+cleared of all file descriptors except for those that are ready for writing.
+.TP
+.I exceptfds
+The file descriptors in this set are watched for "exceptional conditions".
+For examples of some exceptional conditions, see the discussion of
+.B POLLPRI
+in
+.BR poll (2).
+.IP
+After
+.BR select ()
+has returned,
+\fIexceptfds\fP will be cleared of all file descriptors except for those
+for which an exceptional condition has occurred.
+.TP
+.I nfds
+This argument should be set to the highest-numbered file descriptor in any
+of the three sets, plus 1.
+The indicated file descriptors in each set are checked, up to this limit
+(but see BUGS).
+.TP
+.I timeout
+The
+.I timeout
+argument is a
+.I timeval
+structure (shown below) that specifies the interval that
+.BR select ()
+should block waiting for a file descriptor to become ready.
+The call will block until either:
+.RS
+.IP \[bu] 3
+a file descriptor becomes ready;
+.IP \[bu]
+the call is interrupted by a signal handler; or
+.IP \[bu]
+the timeout expires.
+.RE
+.IP
+Note that the
+.I timeout
+interval will be rounded up to the system clock granularity,
+and kernel scheduling delays mean that the blocking interval
+may overrun by a small amount.
+.IP
+If both fields of the
+.I timeval
+structure are zero, then
+.BR select ()
+returns immediately.
+(This is useful for polling.)
+.IP
+If
+.I timeout
+is specified as NULL,
+.BR select ()
+blocks indefinitely waiting for a file descriptor to become ready.
+.\"
+.SS pselect()
+The
+.BR pselect ()
+system call allows an application to safely wait until either
+a file descriptor becomes ready or until a signal is caught.
+.P
+The operation of
+.BR select ()
+and
+.BR pselect ()
+is identical, other than these three differences:
+.IP \[bu] 3
+.BR select ()
+uses a timeout that is a
+.I struct timeval
+(with seconds and microseconds), while
+.BR pselect ()
+uses a
+.I struct timespec
+(with seconds and nanoseconds).
+.IP \[bu]
+.BR select ()
+may update the
+.I timeout
+argument to indicate how much time was left.
+.BR pselect ()
+does not change this argument.
+.IP \[bu]
+.BR select ()
+has no
+.I sigmask
+argument, and behaves as
+.BR pselect ()
+called with NULL
+.IR sigmask .
+.P
+.I sigmask
+is a pointer to a signal mask (see
+.BR sigprocmask (2));
+if it is not NULL, then
+.BR pselect ()
+first replaces the current signal mask by the one pointed to by
+.IR sigmask ,
+then does the "select" function, and then restores the original
+signal mask.
+(If
+.I sigmask
+is NULL,
+the signal mask is not modified during the
+.BR pselect ()
+call.)
+.P
+Other than the difference in the precision of the
+.I timeout
+argument, the following
+.BR pselect ()
+call:
+.P
+.in +4n
+.EX
+ready = pselect(nfds, &readfds, &writefds, &exceptfds,
+ timeout, &sigmask);
+.EE
+.in
+.P
+is equivalent to
+.I atomically
+executing the following calls:
+.P
+.in +4n
+.EX
+sigset_t origmask;
+\&
+pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
+ready = select(nfds, &readfds, &writefds, &exceptfds, timeout);
+pthread_sigmask(SIG_SETMASK, &origmask, NULL);
+.EE
+.in
+.P
+The reason that
+.BR pselect ()
+is needed is that if one wants to wait for either a signal
+or for a file descriptor to become ready, then
+an atomic test is needed to prevent race conditions.
+(Suppose the signal handler sets a global flag and
+returns.
+Then a test of this global flag followed by a call of
+.BR select ()
+could hang indefinitely if the signal arrived just after the test
+but just before the call.
+By contrast,
+.BR pselect ()
+allows one to first block signals, handle the signals that have come in,
+then call
+.BR pselect ()
+with the desired
+.IR sigmask ,
+avoiding the race.)
+.SS The timeout
+The
+.I timeout
+argument for
+.BR select ()
+is a structure of the following type:
+.P
+.in +4n
+.EX
+struct timeval {
+ time_t tv_sec; /* seconds */
+ suseconds_t tv_usec; /* microseconds */
+};
+.EE
+.in
+.P
+The corresponding argument for
+.BR pselect ()
+is a
+.BR timespec (3)
+structure.
+.P
+On Linux,
+.BR select ()
+modifies
+.I timeout
+to reflect the amount of time not slept; most other implementations
+do not do this.
+(POSIX.1 permits either behavior.)
+This causes problems both when Linux code which reads
+.I timeout
+is ported to other operating systems, and when code is ported to Linux
+that reuses a \fIstruct timeval\fP for multiple
+.BR select ()s
+in a loop without reinitializing it.
+Consider
+.I timeout
+to be undefined after
+.BR select ()
+returns.
+.\" .P - it is rumored that:
+.\" On BSD, when a timeout occurs, the file descriptor bits are not changed.
+.\" - it is certainly true that:
+.\" Linux follows SUSv2 and sets the bit masks to zero upon a timeout.
+.SH RETURN VALUE
+On success,
+.BR select ()
+and
+.BR pselect ()
+return the number of file descriptors contained in the three returned
+descriptor sets (that is, the total number of bits that are set in
+.IR readfds ,
+.IR writefds ,
+.IR exceptfds ).
+The return value may be zero if the timeout expired before any
+file descriptors became ready.
+.P
+On error, \-1 is returned, and
+.I errno
+is set to indicate the error;
+the file descriptor sets are unmodified,
+and
+.I timeout
+becomes undefined.
+.SH ERRORS
+.TP
+.B EBADF
+An invalid file descriptor was given in one of the sets.
+(Perhaps a file descriptor that was already closed,
+or one on which an error has occurred.)
+However, see BUGS.
+.TP
+.B EINTR
+A signal was caught; see
+.BR signal (7).
+.TP
+.B EINVAL
+.I nfds
+is negative or exceeds the
+.B RLIMIT_NOFILE
+resource limit (see
+.BR getrlimit (2)).
+.TP
+.B EINVAL
+The value contained within
+.I timeout
+is invalid.
+.TP
+.B ENOMEM
+Unable to allocate memory for internal tables.
+.SH VERSIONS
+On some other UNIX systems,
+.\" Darwin, according to a report by Jeremy Sequoia, relayed by Josh Triplett
+.BR select ()
+can fail with the error
+.B EAGAIN
+if the system fails to allocate kernel-internal resources, rather than
+.B ENOMEM
+as Linux does.
+POSIX specifies this error for
+.BR poll (2),
+but not for
+.BR select ().
+Portable programs may wish to check for
+.B EAGAIN
+and loop, just as with
+.BR EINTR .
+.SH STANDARDS
+POSIX.1-2008.
+.SH HISTORY
+.TP
+.BR select ()
+POSIX.1-2001, 4.4BSD (first appeared in 4.2BSD).
+.IP
+Generally portable to/from
+non-BSD systems supporting clones of the BSD socket layer (including
+System\ V variants).
+However, note that the System\ V variant typically
+sets the timeout variable before returning, but the BSD variant does not.
+.TP
+.BR pselect ()
+Linux 2.6.16.
+POSIX.1g, POSIX.1-2001.
+.IP
+Prior to this,
+it was emulated in glibc (but see BUGS).
+.TP
+.B fd_set
+POSIX.1-2001.
+.SH NOTES
+The following header also provides the
+.I fd_set
+type:
+.IR <sys/time.h> .
+.P
+An
+.I fd_set
+is a fixed size buffer.
+Executing
+.BR FD_CLR ()
+or
+.BR FD_SET ()
+with a value of
+.I fd
+that is negative or is equal to or larger than
+.B FD_SETSIZE
+will result
+in undefined behavior.
+Moreover, POSIX requires
+.I fd
+to be a valid file descriptor.
+.P
+The operation of
+.BR select ()
+and
+.BR pselect ()
+is not affected by the
+.B O_NONBLOCK
+flag.
+.\"
+.SS The self-pipe trick
+On systems that lack
+.BR pselect (),
+reliable (and more portable) signal trapping can be achieved
+using the self-pipe trick.
+In this technique,
+a signal handler writes a byte to a pipe whose other end
+is monitored by
+.BR select ()
+in the main program.
+(To avoid possibly blocking when writing to a pipe that may be full
+or reading from a pipe that may be empty,
+nonblocking I/O is used when reading from and writing to the pipe.)
+.\"
+.SS Emulating usleep(3)
+Before the advent of
+.BR usleep (3),
+some code employed a call to
+.BR select ()
+with all three sets empty,
+.I nfds
+zero, and a non-NULL
+.I timeout
+as a fairly portable way to sleep with subsecond precision.
+.\"
+.SS Correspondence between select() and poll() notifications
+Within the Linux kernel source,
+.\" fs/select.c
+we find the following definitions which show the correspondence
+between the readable, writable, and exceptional condition notifications of
+.BR select ()
+and the event notifications provided by
+.BR poll (2)
+and
+.BR epoll (7):
+.P
+.in +4n
+.EX
+#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
+ EPOLLHUP | EPOLLERR)
+ /* Ready for reading */
+#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT |
+ EPOLLERR)
+ /* Ready for writing */
+#define POLLEX_SET (EPOLLPRI)
+ /* Exceptional condition */
+.EE
+.in
+.\"
+.SS Multithreaded applications
+If a file descriptor being monitored by
+.BR select ()
+is closed in another thread, the result is unspecified.
+On some UNIX systems,
+.BR select ()
+unblocks and returns, with an indication that the file descriptor is ready
+(a subsequent I/O operation will likely fail with an error,
+unless another process reopens the file descriptor between the time
+.BR select ()
+returned and the I/O operation is performed).
+On Linux (and some other systems),
+closing the file descriptor in another thread has no effect on
+.BR select ().
+In summary, any application that relies on a particular behavior
+in this scenario must be considered buggy.
+.\"
+.SS C library/kernel differences
+The Linux kernel allows file descriptor sets of arbitrary size,
+determining the length of the sets to be checked from the value of
+.IR nfds .
+However, in the glibc implementation, the
+.I fd_set
+type is fixed in size.
+See also BUGS.
+.P
+The
+.BR pselect ()
+interface described in this page is implemented by glibc.
+The underlying Linux system call is named
+.BR pselect6 ().
+This system call has somewhat different behavior from the glibc
+wrapper function.
+.P
+The Linux
+.BR pselect6 ()
+system call modifies its
+.I timeout
+argument.
+However, the glibc wrapper function hides this behavior
+by using a local variable for the timeout argument that
+is passed to the system call.
+Thus, the glibc
+.BR pselect ()
+function does not modify its
+.I timeout
+argument;
+this is the behavior required by POSIX.1-2001.
+.P
+The final argument of the
+.BR pselect6 ()
+system call is not a
+.I "sigset_t\ *"
+pointer, but is instead a structure of the form:
+.P
+.in +4n
+.EX
+struct {
+ const kernel_sigset_t *ss; /* Pointer to signal set */
+ size_t ss_len; /* Size (in bytes) of object
+ pointed to by \[aq]ss\[aq] */
+};
+.EE
+.in
+.P
+This allows the system call to obtain both
+a pointer to the signal set and its size,
+while allowing for the fact that most architectures
+support a maximum of 6 arguments to a system call.
+See
+.BR sigprocmask (2)
+for a discussion of the difference between the kernel and libc
+notion of the signal set.
+.\"
+.SS Historical glibc details
+glibc 2.0 provided an incorrect version of
+.BR pselect ()
+that did not take a
+.I sigmask
+argument.
+.P
+From glibc 2.1 to glibc 2.2.1,
+one must define
+.B _GNU_SOURCE
+in order to obtain the declaration of
+.BR pselect ()
+from
+.IR <sys/select.h> .
+.SH BUGS
+POSIX allows an implementation to define an upper limit,
+advertised via the constant
+.BR FD_SETSIZE ,
+on the range of file descriptors that can be specified
+in a file descriptor set.
+The Linux kernel imposes no fixed limit, but the glibc implementation makes
+.I fd_set
+a fixed-size type, with
+.B FD_SETSIZE
+defined as 1024, and the
+.BR FD_* ()
+macros operating according to that limit.
+To monitor file descriptors greater than 1023, use
+.BR poll (2)
+or
+.BR epoll (7)
+instead.
+.P
+The implementation of the
+.I fd_set
+arguments as value-result arguments is a design error that is avoided in
+.BR poll (2)
+and
+.BR epoll (7).
+.P
+According to POSIX,
+.BR select ()
+should check all specified file descriptors in the three file descriptor sets,
+up to the limit
+.IR nfds\-1 .
+However, the current implementation ignores any file descriptor in
+these sets that is greater than the maximum file descriptor number
+that the process currently has open.
+According to POSIX, any such file descriptor that is specified in one
+of the sets should result in the error
+.BR EBADF .
+.P
+Starting with glibc 2.1, glibc provided an emulation of
+.BR pselect ()
+that was implemented using
+.BR sigprocmask (2)
+and
+.BR select ().
+This implementation remained vulnerable to the very race condition that
+.BR pselect ()
+was designed to prevent.
+Modern versions of glibc use the (race-free)
+.BR pselect ()
+system call on kernels where it is provided.
+.P
+On Linux,
+.BR select ()
+may report a socket file descriptor as "ready for reading", while
+nevertheless a subsequent read blocks.
+This could for example
+happen when data has arrived but upon examination has the wrong
+checksum and is discarded.
+There may be other circumstances
+in which a file descriptor is spuriously reported as ready.
+.\" Stevens discusses a case where accept can block after select
+.\" returns successfully because of an intervening RST from the client.
+Thus it may be safer to use
+.B O_NONBLOCK
+on sockets that should not block.
+.\" Maybe the kernel should have returned EIO in such a situation?
+.P
+On Linux,
+.BR select ()
+also modifies
+.I timeout
+if the call is interrupted by a signal handler (i.e., the
+.B EINTR
+error return).
+This is not permitted by POSIX.1.
+The Linux
+.BR pselect ()
+system call has the same behavior,
+but the glibc wrapper hides this behavior by internally copying the
+.I timeout
+to a local variable and passing that variable to the system call.
+.SH EXAMPLES
+.\" SRC BEGIN (select.c)
+.EX
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/select.h>
+\&
+int
+main(void)
+{
+ int retval;
+ fd_set rfds;
+ struct timeval tv;
+\&
+ /* Watch stdin (fd 0) to see when it has input. */
+\&
+ FD_ZERO(&rfds);
+ FD_SET(0, &rfds);
+\&
+ /* Wait up to five seconds. */
+\&
+ tv.tv_sec = 5;
+ tv.tv_usec = 0;
+\&
+ retval = select(1, &rfds, NULL, NULL, &tv);
+ /* Don\[aq]t rely on the value of tv now! */
+\&
+ if (retval == \-1)
+ perror("select()");
+ else if (retval)
+ printf("Data is available now.\en");
+ /* FD_ISSET(0, &rfds) will be true. */
+ else
+ printf("No data within five seconds.\en");
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR accept (2),
+.BR connect (2),
+.BR poll (2),
+.BR read (2),
+.BR recv (2),
+.BR restart_syscall (2),
+.BR send (2),
+.BR sigprocmask (2),
+.BR write (2),
+.BR timespec (3),
+.BR epoll (7),
+.BR time (7)
+.P
+For a tutorial with discussion and examples, see
+.BR select_tut (2).