summaryrefslogtreecommitdiffstats
path: root/man/man2/setns.2
diff options
context:
space:
mode:
Diffstat (limited to 'man/man2/setns.2')
-rw-r--r--man/man2/setns.2419
1 files changed, 419 insertions, 0 deletions
diff --git a/man/man2/setns.2 b/man/man2/setns.2
new file mode 100644
index 000000000..860ee7e10
--- /dev/null
+++ b/man/man2/setns.2
@@ -0,0 +1,419 @@
+.\" Copyright (C) 2011, Eric Biederman <ebiederm@xmission.com>
+.\" and Copyright (C) 2011, 2012, Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-only
+.\"
+.TH setns 2 (date) "Linux man-pages (unreleased)"
+.SH NAME
+setns \- reassociate thread with a namespace
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
+.B #include <sched.h>
+.P
+.BI "int setns(int " fd ", int " nstype );
+.fi
+.SH DESCRIPTION
+The
+.BR setns ()
+system call allows the calling thread to move into different namespaces.
+The
+.I fd
+argument is one of the following:
+.IP \[bu] 3
+a file descriptor referring to one of the magic links in a
+.IR /proc/ pid /ns/
+directory (or a bind mount to such a link);
+.IP \[bu]
+a PID file descriptor (see
+.BR pidfd_open (2)).
+.P
+The
+.I nstype
+argument is interpreted differently in each case.
+.\"
+.SS fd refers to a \fI/proc/\fPpid\fI/ns/\fP link
+If
+.I fd
+refers to a
+.IR /proc/ pid /ns/
+link, then
+.BR setns ()
+reassociates the calling thread with the namespace associated with that link,
+subject to any constraints imposed by the
+.I nstype
+argument.
+In this usage, each call to
+.BR setns ()
+changes just one of the caller's namespace memberships.
+.P
+The
+.I nstype
+argument specifies which type of namespace
+the calling thread may be reassociated with.
+This argument can have
+.I one
+of the following values:
+.TP
+.B 0
+Allow any type of namespace to be joined.
+.TP
+.BR CLONE_NEWCGROUP " (since Linux 4.6)"
+.I fd
+must refer to a cgroup namespace.
+.TP
+.BR CLONE_NEWIPC " (since Linux 3.0)"
+.I fd
+must refer to an IPC namespace.
+.TP
+.BR CLONE_NEWNET " (since Linux 3.0)"
+.I fd
+must refer to a network namespace.
+.TP
+.BR CLONE_NEWNS " (since Linux 3.8)"
+.I fd
+must refer to a mount namespace.
+.TP
+.BR CLONE_NEWPID " (since Linux 3.8)"
+.I fd
+must refer to a descendant PID namespace.
+.TP
+.BR CLONE_NEWTIME " (since Linux 5.8)"
+.\" commit 76c12881a38aaa83e1eb4ce2fada36c3a732bad4
+.I fd
+must refer to a time namespace.
+.TP
+.BR CLONE_NEWUSER " (since Linux 3.8)"
+.I fd
+must refer to a user namespace.
+.TP
+.BR CLONE_NEWUTS " (since Linux 3.0)"
+.I fd
+must refer to a UTS namespace.
+.P
+Specifying
+.I nstype
+as 0 suffices if the caller knows (or does not care)
+what type of namespace is referred to by
+.IR fd .
+Specifying a nonzero value for
+.I nstype
+is useful if the caller does not know what type of namespace is referred to by
+.I fd
+and wants to ensure that the namespace is of a particular type.
+(The caller might not know the type of the namespace referred to by
+.I fd
+if the file descriptor was opened by another process and, for example,
+passed to the caller via a UNIX domain socket.)
+.\"
+.SS fd is a PID file descriptor
+Since Linux 5.8,
+.I fd
+may refer to a PID file descriptor obtained from
+.BR pidfd_open (2)
+or
+.BR clone (2).
+In this usage,
+.BR setns ()
+atomically moves the calling thread into one or more of the same namespaces
+as the thread referred to by
+.IR fd .
+.P
+The
+.I nstype
+argument is a bit mask specified by ORing together
+.I "one or more"
+of the
+.B CLONE_NEW*
+namespace constants listed above.
+The caller is moved into each of the target thread's namespaces
+that is specified in
+.IR nstype ;
+the caller's memberships in the remaining namespaces are left unchanged.
+.P
+For example, the following code would move the caller into the
+same user, network, and UTS namespaces as PID 1234,
+but would leave the caller's other namespace memberships unchanged:
+.P
+.in +4n
+.EX
+int fd = pidfd_open(1234, 0);
+setns(fd, CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS);
+.EE
+.in
+.\"
+.SS Details for specific namespace types
+Note the following details and restrictions when reassociating with
+specific namespace types:
+.TP
+User namespaces
+A process reassociating itself with a user namespace must have the
+.B CAP_SYS_ADMIN
+.\" See kernel/user_namespace.c:userns_install() [3.8 source]
+capability in the target user namespace.
+(This necessarily implies that it is only possible to join
+a descendant user namespace.)
+Upon successfully joining a user namespace,
+a process is granted all capabilities in that namespace,
+regardless of its user and group IDs.
+.IP
+A multithreaded process may not change user namespace with
+.BR setns ().
+.IP
+It is not permitted to use
+.BR setns ()
+to reenter the caller's current user namespace.
+This prevents a caller that has dropped capabilities from regaining
+those capabilities via a call to
+.BR setns ().
+.IP
+For security reasons,
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
+.\" https://lwn.net/Articles/543273/
+a process can't join a new user namespace if it is sharing
+filesystem-related attributes
+(the attributes whose sharing is controlled by the
+.BR clone (2)
+.B CLONE_FS
+flag) with another process.
+.IP
+For further details on user namespaces, see
+.BR user_namespaces (7).
+.TP
+Mount namespaces
+Changing the mount namespace requires that the caller possess both
+.B CAP_SYS_CHROOT
+and
+.B CAP_SYS_ADMIN
+capabilities in its own user namespace and
+.B CAP_SYS_ADMIN
+in the user namespace that owns the target mount namespace.
+.IP
+A process can't join a new mount namespace if it is sharing
+filesystem-related attributes
+(the attributes whose sharing is controlled by the
+.BR clone (2)
+.B CLONE_FS
+flag) with another process.
+.\" Above check is in fs/namespace.c:mntns_install() [3.8 source]
+.IP
+See
+.BR user_namespaces (7)
+for details on the interaction of user namespaces and mount namespaces.
+.TP
+PID namespaces
+In order to reassociate itself with a new PID namespace,
+the caller must have the
+.B CAP_SYS_ADMIN
+capability both in its own user namespace and in the user namespace
+that owns the target PID namespace.
+.IP
+Reassociating the PID namespace has somewhat different
+from other namespace types.
+Reassociating the calling thread with a PID namespace changes only
+the PID namespace that subsequently created child processes of
+the caller will be placed in;
+it does not change the PID namespace of the caller itself.
+.IP
+Reassociating with a PID namespace is allowed only if the target
+PID namespace is a descendant (child, grandchild, etc.)
+of, or is the same as, the current PID namespace of the caller.
+.IP
+For further details on PID namespaces, see
+.BR pid_namespaces (7).
+.TP
+Cgroup namespaces
+In order to reassociate itself with a new cgroup namespace,
+the caller must have the
+.B CAP_SYS_ADMIN
+capability both in its own user namespace and in the user namespace
+that owns the target cgroup namespace.
+.IP
+Using
+.BR setns ()
+to change the caller's cgroup namespace does not change
+the caller's cgroup memberships.
+.TP
+Network, IPC, time, and UTS namespaces
+In order to reassociate itself with a new network, IPC, time, or UTS namespace,
+the caller must have the
+.B CAP_SYS_ADMIN
+capability both in its own user namespace and in the user namespace
+that owns the target namespace.
+.SH RETURN VALUE
+On success,
+.BR setns ()
+returns 0.
+On failure, \-1 is returned and
+.I errno
+is set to indicate the error.
+.SH ERRORS
+.TP
+.B EBADF
+.I fd
+is not a valid file descriptor.
+.TP
+.B EINVAL
+.I fd
+refers to a namespace whose type does not match that specified in
+.IR nstype .
+.TP
+.B EINVAL
+There is problem with reassociating
+the thread with the specified namespace.
+.TP
+.\" See kernel/pid_namespace.c::pidns_install() [kernel 3.18 sources]
+.B EINVAL
+The caller tried to join an ancestor (parent, grandparent, and so on)
+PID namespace.
+.TP
+.B EINVAL
+The caller attempted to join the user namespace
+in which it is already a member.
+.TP
+.B EINVAL
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
+The caller shares filesystem
+.RB ( CLONE_FS )
+state (in particular, the root directory)
+with other processes and tried to join a new user namespace.
+.TP
+.B EINVAL
+.\" See kernel/user_namespace.c::userns_install() [kernel 3.15 sources]
+The caller is multithreaded and tried to join a new user namespace.
+.TP
+.B EINVAL
+.I fd
+is a PID file descriptor and
+.I nstype
+is invalid (e.g., it is 0).
+.TP
+.B ENOMEM
+Cannot allocate sufficient memory to change the specified namespace.
+.TP
+.B EPERM
+The calling thread did not have the required capability
+for this operation.
+.TP
+.B ESRCH
+.I fd
+is a PID file descriptor but the process it refers to no longer exists
+(i.e., it has terminated and been waited on).
+.SH STANDARDS
+Linux.
+.SH VERSIONS
+Linux 3.0,
+glibc 2.14.
+.SH NOTES
+For further information on the
+.IR /proc/ pid /ns/
+magic links, see
+.BR namespaces (7).
+.P
+Not all of the attributes that can be shared when
+a new thread is created using
+.BR clone (2)
+can be changed using
+.BR setns ().
+.SH EXAMPLES
+The program below takes two or more arguments.
+The first argument specifies the pathname of a namespace file in an existing
+.IR /proc/ pid /ns/
+directory.
+The remaining arguments specify a command and its arguments.
+The program opens the namespace file, joins that namespace using
+.BR setns (),
+and executes the specified command inside that namespace.
+.P
+The following shell session demonstrates the use of this program
+(compiled as a binary named
+.IR ns_exec )
+in conjunction with the
+.B CLONE_NEWUTS
+example program in the
+.BR clone (2)
+man page (complied as a binary named
+.IR newuts ).
+.P
+We begin by executing the example program in
+.BR clone (2)
+in the background.
+That program creates a child in a separate UTS namespace.
+The child changes the hostname in its namespace,
+and then both processes display the hostnames in their UTS namespaces,
+so that we can see that they are different.
+.P
+.in +4n
+.EX
+$ \fBsu\fP # Need privilege for namespace operations
+Password:
+# \fB./newuts bizarro &\fP
+[1] 3549
+clone() returned 3550
+uts.nodename in child: bizarro
+uts.nodename in parent: antero
+# \fBuname \-n\fP # Verify hostname in the shell
+antero
+.EE
+.in
+.P
+We then run the program shown below,
+using it to execute a shell.
+Inside that shell, we verify that the hostname is the one
+set by the child created by the first program:
+.P
+.in +4n
+.EX
+# \fB./ns_exec /proc/3550/ns/uts /bin/bash\fP
+# \fBuname \-n\fP # Executed in shell started by ns_exec
+bizarro
+.EE
+.in
+.SS Program source
+.\" SRC BEGIN (setns.c)
+.EX
+#define _GNU_SOURCE
+#include <err.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+\&
+int
+main(int argc, char *argv[])
+{
+ int fd;
+\&
+ if (argc < 3) {
+ fprintf(stderr, "%s /proc/PID/ns/FILE cmd args...\en", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Get file descriptor for namespace; the file descriptor is opened
+ with O_CLOEXEC so as to ensure that it is not inherited by the
+ program that is later executed. */
+\&
+ fd = open(argv[1], O_RDONLY | O_CLOEXEC);
+ if (fd == \-1)
+ err(EXIT_FAILURE, "open");
+\&
+ if (setns(fd, 0) == \-1) /* Join that namespace */
+ err(EXIT_FAILURE, "setns");
+\&
+ execvp(argv[2], &argv[2]); /* Execute a command in namespace */
+ err(EXIT_FAILURE, "execvp");
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR nsenter (1),
+.BR clone (2),
+.BR fork (2),
+.BR unshare (2),
+.BR vfork (2),
+.BR namespaces (7),
+.BR unix (7)