1 files changed, 427 insertions, 0 deletions
diff --git a/man/man2/sched_setaffinity.2 b/man/man2/sched_setaffinity.2
new file mode 100644
index 000000000..7f87fabb1
--- /dev/null
+++ b/man/man2/sched_setaffinity.2
@@ -0,0 +1,427 @@
+.\" Copyright (C) 2002 Robert Love
+.\" and Copyright (C) 2006, 2015 Michael Kerrisk
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" 2002-11-19 Robert Love <rml@tech9.net> - initial version
+.\" 2004-04-20 mtk - fixed description of return value
+.\" 2004-04-22 aeb - added glibc prototype history
+.\" 2005-05-03 mtk - noted that sched_setaffinity may cause thread
+.\"	migration and that CPU affinity is a per-thread attribute.
+.\" 2006-02-03 mtk -- Major rewrite
+.\" 2008-11-12, mtk, removed CPU_*() macro descriptions to a
+.\" separate CPU_SET(3) page.
+.\"
+.TH sched_setaffinity 2 (date) "Linux man-pages (unreleased)"
+.SH NAME
+sched_setaffinity, sched_getaffinity \- \
+set and get a thread's CPU affinity mask
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.BR "#define _GNU_SOURCE" "             /* See feature_test_macros(7) */"
+.B #include <sched.h>
+.P
+.BI "int sched_setaffinity(pid_t " pid ", size_t " cpusetsize ,
+.BI "                      const cpu_set_t *" mask );
+.BI "int sched_getaffinity(pid_t " pid ", size_t " cpusetsize ,
+.BI "                      cpu_set_t *" mask );
+.fi
+.SH DESCRIPTION
+A thread's CPU affinity mask determines the set of CPUs on which
+it is eligible to run.
+On a multiprocessor system, setting the CPU affinity mask
+can be used to obtain performance benefits.
+For example,
+by dedicating one CPU to a particular thread
+(i.e., setting the affinity mask of that thread to specify a single CPU,
+and setting the affinity mask of all other threads to exclude that CPU),
+it is possible to ensure maximum execution speed for that thread.
+Restricting a thread to run on a single CPU also avoids
+the performance cost caused by the cache invalidation that occurs
+when a thread ceases to execute on one CPU and then
+recommences execution on a different CPU.
+.P
+A CPU affinity mask is represented by the
+.I cpu_set_t
+structure, a "CPU set", pointed to by
+.IR mask .
+A set of macros for manipulating CPU sets is described in
+.BR CPU_SET (3).
+.P
+.BR sched_setaffinity ()
+sets the CPU affinity mask of the thread whose ID is
+.I pid
+to the value specified by
+.IR mask .
+If
+.I pid
+is zero, then the calling thread is used.
+The argument
+.I cpusetsize
+is the length (in bytes) of the data pointed to by
+.IR mask .
+Normally this argument would be specified as
+.IR "sizeof(cpu_set_t)" .
+.P
+If the thread specified by
+.I pid
+is not currently running on one of the CPUs specified in
+.IR mask ,
+then that thread is migrated to one of the CPUs specified in
+.IR mask .
+.P
+.BR sched_getaffinity ()
+writes the affinity mask of the thread whose ID is
+.I pid
+into the
+.I cpu_set_t
+structure pointed to by
+.IR mask .
+The
+.I cpusetsize
+argument specifies the size (in bytes) of
+.IR mask .
+If
+.I pid
+is zero, then the mask of the calling thread is returned.
+.SH RETURN VALUE
+On success,
+.BR sched_setaffinity ()
+and
+.BR sched_getaffinity ()
+return 0 (but see "C library/kernel differences" below,
+which notes that the underlying
+.BR sched_getaffinity ()
+differs in its return value).
+On failure, \-1 is returned, and
+.I errno
+is set to indicate the error.
+.SH ERRORS
+.TP
+.B EFAULT
+A supplied memory address was invalid.
+.TP
+.B EINVAL
+The affinity bit mask
+.I mask
+contains no processors that are currently physically on the system
+and permitted to the thread according to any restrictions that
+may be imposed by
+.I cpuset
+cgroups or the "cpuset" mechanism described in
+.BR cpuset (7).
+.TP
+.B EINVAL
+.RB ( sched_getaffinity ()
+and, before Linux 2.6.9,
+.BR sched_setaffinity ())
+.I cpusetsize
+is smaller than the size of the affinity mask used by the kernel.
+.TP
+.B EPERM
+.RB ( sched_setaffinity ())
+The calling thread does not have appropriate privileges.
+The caller needs an effective user ID equal to the real user ID
+or effective user ID of the thread identified by
+.IR pid ,
+or it must possess the
+.B CAP_SYS_NICE
+capability in the user namespace of the thread
+.IR pid .
+.TP
+.B ESRCH
+The thread whose ID is \fIpid\fP could not be found.
+.SH STANDARDS
+Linux.
+.SH HISTORY
+Linux 2.5.8,
+glibc 2.3.
+.P
+Initially, the glibc interfaces included a
+.I cpusetsize
+argument, typed as
+.IR "unsigned int" .
+In glibc 2.3.3, the
+.I cpusetsize
+argument was removed, but was then restored in glibc 2.3.4, with type
+.IR size_t .
+.SH NOTES
+After a call to
+.BR sched_setaffinity (),
+the set of CPUs on which the thread will actually run is
+the intersection of the set specified in the
+.I mask
+argument and the set of CPUs actually present on the system.
+The system may further restrict the set of CPUs on which the thread
+runs if the "cpuset" mechanism described in
+.BR cpuset (7)
+is being used.
+These restrictions on the actual set of CPUs on which the thread
+will run are silently imposed by the kernel.
+.P
+There are various ways of determining the number of CPUs
+available on the system, including: inspecting the contents of
+.IR /proc/cpuinfo ;
+using
+.BR sysconf (3)
+to obtain the values of the
+.B _SC_NPROCESSORS_CONF
+and
+.B _SC_NPROCESSORS_ONLN
+parameters; and inspecting the list of CPU directories under
+.IR /sys/devices/system/cpu/ .
+.P
+.BR sched (7)
+has a description of the Linux scheduling scheme.
+.P
+The affinity mask is a per-thread attribute that can be
+adjusted independently for each of the threads in a thread group.
+The value returned from a call to
+.BR gettid (2)
+can be passed in the argument
+.IR pid .
+Specifying
+.I pid
+as 0 will set the attribute for the calling thread,
+and passing the value returned from a call to
+.BR getpid (2)
+will set the attribute for the main thread of the thread group.
+(If you are using the POSIX threads API, then use
+.BR pthread_setaffinity_np (3)
+instead of
+.BR sched_setaffinity ().)
+.P
+The
+.I isolcpus
+boot option can be used to isolate one or more CPUs at boot time,
+so that no processes are scheduled onto those CPUs.
+Following the use of this boot option,
+the only way to schedule processes onto the isolated CPUs is via
+.BR sched_setaffinity ()
+or the
+.BR cpuset (7)
+mechanism.
+For further information, see the kernel source file
+.IR Documentation/admin\-guide/kernel\-parameters.txt .
+As noted in that file,
+.I isolcpus
+is the preferred mechanism of isolating CPUs
+(versus the alternative of manually setting the CPU affinity
+of all processes on the system).
+.P
+A child created via
+.BR fork (2)
+inherits its parent's CPU affinity mask.
+The affinity mask is preserved across an
+.BR execve (2).
+.SS C library/kernel differences
+This manual page describes the glibc interface for the CPU affinity calls.
+The actual system call interface is slightly different, with the
+.I mask
+being typed as
+.IR "unsigned long\ *" ,
+reflecting the fact that the underlying implementation of CPU
+sets is a simple bit mask.
+.P
+On success, the raw
+.BR sched_getaffinity ()
+system call returns the number of bytes placed copied into the
+.I mask
+buffer;
+this will be the minimum of
+.I cpusetsize
+and the size (in bytes) of the
+.I cpumask_t
+data type that is used internally by the kernel to
+represent the CPU set bit mask.
+.SS Handling systems with large CPU affinity masks
+The underlying system calls (which represent CPU masks as bit masks of type
+.IR "unsigned long\ *" )
+impose no restriction on the size of the CPU mask.
+However, the
+.I cpu_set_t
+data type used by glibc has a fixed size of 128 bytes,
+meaning that the maximum CPU number that can be represented is 1023.
+.\" FIXME . See https://sourceware.org/bugzilla/show_bug.cgi?id=15630
+.\" and https://sourceware.org/ml/libc-alpha/2013-07/msg00288.html
+If the kernel CPU affinity mask is larger than 1024,
+then calls of the form:
+.P
+.in +4n
+.EX
+sched_getaffinity(pid, sizeof(cpu_set_t), &mask);
+.EE
+.in
+.P
+fail with the error
+.BR EINVAL ,
+the error produced by the underlying system call for the case where the
+.I mask
+size specified in
+.I cpusetsize
+is smaller than the size of the affinity mask used by the kernel.
+(Depending on the system CPU topology, the kernel affinity mask can
+be substantially larger than the number of active CPUs in the system.)
+.P
+When working on systems with large kernel CPU affinity masks,
+one must dynamically allocate the
+.I mask
+argument (see
+.BR CPU_ALLOC (3)).
+Currently, the only way to do this is by probing for the size
+of the required mask using
+.BR sched_getaffinity ()
+calls with increasing mask sizes (until the call does not fail with the error
+.BR EINVAL ).
+.P
+Be aware that
+.BR CPU_ALLOC (3)
+may allocate a slightly larger CPU set than requested
+(because CPU sets are implemented as bit masks allocated in units of
+.IR sizeof(long) ).
+Consequently,
+.BR sched_getaffinity ()
+can set bits beyond the requested allocation size, because the kernel
+sees a few additional bits.
+Therefore, the caller should iterate over the bits in the returned set,
+counting those which are set, and stop upon reaching the value returned by
+.BR CPU_COUNT (3)
+(rather than iterating over the number of bits
+requested to be allocated).
+.SH EXAMPLES
+The program below creates a child process.
+The parent and child then each assign themselves to a specified CPU
+and execute identical loops that consume some CPU time.
+Before terminating, the parent waits for the child to complete.
+The program takes three command-line arguments:
+the CPU number for the parent,
+the CPU number for the child,
+and the number of loop iterations that both processes should perform.
+.P
+As the sample runs below demonstrate, the amount of real and CPU time
+consumed when running the program will depend on intra-core caching effects
+and whether the processes are using the same CPU.
+.P
+We first employ
+.BR lscpu (1)
+to determine that this (x86)
+system has two cores, each with two CPUs:
+.P
+.in +4n
+.EX
+$ \fBlscpu | egrep \-i \[aq]core.*:|socket\[aq]\fP
+Thread(s) per core:    2
+Core(s) per socket:    2
+Socket(s):             1
+.EE
+.in
+.P
+We then time the operation of the example program for three cases:
+both processes running on the same CPU;
+both processes running on different CPUs on the same core;
+and both processes running on different CPUs on different cores.
+.P
+.in +4n
+.EX
+$ \fBtime \-p ./a.out 0 0 100000000\fP
+real 14.75
+user 3.02
+sys 11.73
+$ \fBtime \-p ./a.out 0 1 100000000\fP
+real 11.52
+user 3.98
+sys 19.06
+$ \fBtime \-p ./a.out 0 3 100000000\fP
+real 7.89
+user 3.29
+sys 12.07
+.EE
+.in
+.SS Program source
+\&
+.\" SRC BEGIN (sched_setaffinity.c)
+.EX
+#define _GNU_SOURCE
+#include <err.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+\&
+int
+main(int argc, char *argv[])
+{
+    int           parentCPU, childCPU;
+    cpu_set_t     set;
+    unsigned int  nloops;
+\&
+    if (argc != 4) {
+        fprintf(stderr, "Usage: %s parent\-cpu child\-cpu num\-loops\en",
+                argv[0]);
+        exit(EXIT_FAILURE);
+    }
+\&
+    parentCPU = atoi(argv[1]);
+    childCPU = atoi(argv[2]);
+    nloops = atoi(argv[3]);
+\&
+    CPU_ZERO(&set);
+\&
+    switch (fork()) {
+    case \-1:            /* Error */
+        err(EXIT_FAILURE, "fork");
+\&
+    case 0:             /* Child */
+        CPU_SET(childCPU, &set);
+\&
+        if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
+            err(EXIT_FAILURE, "sched_setaffinity");
+\&
+        for (unsigned int j = 0; j < nloops; j++)
+            getppid();
+\&
+        exit(EXIT_SUCCESS);
+\&
+    default:            /* Parent */
+        CPU_SET(parentCPU, &set);
+\&
+        if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
+            err(EXIT_FAILURE, "sched_setaffinity");
+\&
+        for (unsigned int j = 0; j < nloops; j++)
+            getppid();
+\&
+        wait(NULL);     /* Wait for child to terminate */
+        exit(EXIT_SUCCESS);
+    }
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.ad l
+.nh
+.BR lscpu (1),
+.BR nproc (1),
+.BR taskset (1),
+.BR clone (2),
+.BR getcpu (2),
+.BR getpriority (2),
+.BR gettid (2),
+.BR nice (2),
+.BR sched_get_priority_max (2),
+.BR sched_get_priority_min (2),
+.BR sched_getscheduler (2),
+.BR sched_setscheduler (2),
+.BR setpriority (2),
+.BR CPU_SET (3),
+.BR get_nprocs (3),
+.BR pthread_setaffinity_np (3),
+.BR sched_getcpu (3),
+.BR capabilities (7),
+.BR cpuset (7),
+.BR sched (7),
+.BR numactl (8)