summaryrefslogtreecommitdiffstats
path: root/man2/setns.2
blob: b4ce783f4bd97c687e8935e0cb5ddbd78213e40b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
.\" Copyright (C) 2011, Eric Biederman <ebiederm@xmission.com>
.\" and Copyright (C) 2011, 2012, Michael Kerrisk <mtk.manpages@gmail.com>
.\"
.\" SPDX-License-Identifier: GPL-2.0-only
.\"
.TH setns 2 (date) "Linux man-pages (unreleased)"
.SH NAME
setns \- reassociate thread with a namespace
.SH LIBRARY
Standard C library
.RI ( libc ", " \-lc )
.SH SYNOPSIS
.nf
.BR "#define _GNU_SOURCE" "             /* See feature_test_macros(7) */"
.B #include <sched.h>
.PP
.BI "int setns(int " fd ", int " nstype );
.fi
.SH DESCRIPTION
The
.BR setns ()
system call allows the calling thread to move into different namespaces.
The
.I fd
argument is one of the following:
.IP \[bu] 3
a file descriptor referring to one of the magic links in a
.IR /proc/ pid /ns/
directory (or a bind mount to such a link);
.IP \[bu]
a PID file descriptor (see
.BR pidfd_open (2)).
.PP
The
.I nstype
argument is interpreted differently in each case.
.\"
.SS fd refers to a \fI/proc/\fPpid\fI/ns/\fP link
If
.I fd
refers to a
.IR /proc/ pid /ns/
link, then
.BR setns ()
reassociates the calling thread with the namespace associated with that link,
subject to any constraints imposed by the
.I nstype
argument.
In this usage, each call to
.BR setns ()
changes just one of the caller's namespace memberships.
.PP
The
.I nstype
argument specifies which type of namespace
the calling thread may be reassociated with.
This argument can have
.I one
of the following values:
.TP
.B 0
Allow any type of namespace to be joined.
.TP
.BR CLONE_NEWCGROUP " (since Linux 4.6)"
.I fd
must refer to a cgroup namespace.
.TP
.BR CLONE_NEWIPC " (since Linux 3.0)"
.I fd
must refer to an IPC namespace.
.TP
.BR CLONE_NEWNET " (since Linux 3.0)"
.I fd
must refer to a network namespace.
.TP
.BR CLONE_NEWNS " (since Linux 3.8)"
.I fd
must refer to a mount namespace.
.TP
.BR CLONE_NEWPID " (since Linux 3.8)"
.I fd
must refer to a descendant PID namespace.
.TP
.BR CLONE_NEWTIME " (since Linux 5.8)"
.\" commit 76c12881a38aaa83e1eb4ce2fada36c3a732bad4
.I fd
must refer to a time namespace.
.TP
.BR CLONE_NEWUSER " (since Linux 3.8)"
.I fd
must refer to a user namespace.
.TP
.BR CLONE_NEWUTS " (since Linux 3.0)"
.I fd
must refer to a UTS namespace.
.PP
Specifying
.I nstype
as 0 suffices if the caller knows (or does not care)
what type of namespace is referred to by
.IR fd .
Specifying a nonzero value for
.I nstype
is useful if the caller does not know what type of namespace is referred to by
.I fd
and wants to ensure that the namespace is of a particular type.
(The caller might not know the type of the namespace referred to by
.I fd
if the file descriptor was opened by another process and, for example,
passed to the caller via a UNIX domain socket.)
.\"
.SS fd is a PID file descriptor
Since Linux 5.8,
.I fd
may refer to a PID file descriptor obtained from
.BR pidfd_open (2)
or
.BR clone (2).
In this usage,
.BR setns ()
atomically moves the calling thread into one or more of the same namespaces
as the thread referred to by
.IR fd .
.PP
The
.I nstype
argument is a bit mask specified by ORing together
.I "one or more"
of the
.B CLONE_NEW*
namespace constants listed above.
The caller is moved into each of the target thread's namespaces
that is specified in
.IR nstype ;
the caller's memberships in the remaining namespaces are left unchanged.
.PP
For example, the following code would move the caller into the
same user, network, and UTS namespaces as PID 1234,
but would leave the caller's other namespace memberships unchanged:
.PP
.in +4n
.EX
int fd = pidfd_open(1234, 0);
setns(fd, CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS);
.EE
.in
.\"
.SS Details for specific namespace types
Note the following details and restrictions when reassociating with
specific namespace types:
.TP
User namespaces
A process reassociating itself with a user namespace must have the
.B CAP_SYS_ADMIN
.\" See kernel/user_namespace.c:userns_install() [3.8 source]
capability in the target user namespace.
(This necessarily implies that it is only possible to join
a descendant user namespace.)
Upon successfully joining a user namespace,
a process is granted all capabilities in that namespace,
regardless of its user and group IDs.
.IP
A multithreaded process may not change user namespace with
.BR setns ().
.IP
It is not permitted to use
.BR setns ()
to reenter the caller's current user namespace.
This prevents a caller that has dropped capabilities from regaining
those capabilities via a call to
.BR setns ().
.IP
For security reasons,
.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
.\" https://lwn.net/Articles/543273/
a process can't join a new user namespace if it is sharing
filesystem-related attributes
(the attributes whose sharing is controlled by the
.BR clone (2)
.B CLONE_FS
flag) with another process.
.IP
For further details on user namespaces, see
.BR user_namespaces (7).
.TP
Mount namespaces
Changing the mount namespace requires that the caller possess both
.B CAP_SYS_CHROOT
and
.B CAP_SYS_ADMIN
capabilities in its own user namespace and
.B CAP_SYS_ADMIN
in the user namespace that owns the target mount namespace.
.IP
A process can't join a new mount namespace if it is sharing
filesystem-related attributes
(the attributes whose sharing is controlled by the
.BR clone (2)
.B CLONE_FS
flag) with another process.
.\" Above check is in fs/namespace.c:mntns_install() [3.8 source]
.IP
See
.BR user_namespaces (7)
for details on the interaction of user namespaces and mount namespaces.
.TP
PID namespaces
In order to reassociate itself with a new PID namespace,
the caller must have the
.B CAP_SYS_ADMIN
capability both in its own user namespace and in the user namespace
that owns the target PID namespace.
.IP
Reassociating the PID namespace has somewhat different
from other namespace types.
Reassociating the calling thread with a PID namespace changes only
the PID namespace that subsequently created child processes of
the caller will be placed in;
it does not change the PID namespace of the caller itself.
.IP
Reassociating with a PID namespace is allowed only if the target
PID namespace is a descendant (child, grandchild, etc.)
of, or is the same as, the current PID namespace of the caller.
.IP
For further details on PID namespaces, see
.BR pid_namespaces (7).
.TP
Cgroup namespaces
In order to reassociate itself with a new cgroup namespace,
the caller must have the
.B CAP_SYS_ADMIN
capability both in its own user namespace and in the user namespace
that owns the target cgroup namespace.
.IP
Using
.BR setns ()
to change the caller's cgroup namespace does not change
the caller's cgroup memberships.
.TP
Network, IPC, time, and UTS namespaces
In order to reassociate itself with a new network, IPC, time, or UTS namespace,
the caller must have the
.B CAP_SYS_ADMIN
capability both in its own user namespace and in the user namespace
that owns the target namespace.
.SH RETURN VALUE
On success,
.BR setns ()
returns 0.
On failure, \-1 is returned and
.I errno
is set to indicate the error.
.SH ERRORS
.TP
.B EBADF
.I fd
is not a valid file descriptor.
.TP
.B EINVAL
.I fd
refers to a namespace whose type does not match that specified in
.IR nstype .
.TP
.B EINVAL
There is problem with reassociating
the thread with the specified namespace.
.TP
.\" See kernel/pid_namespace.c::pidns_install() [kernel 3.18 sources]
.B EINVAL
The caller tried to join an ancestor (parent, grandparent, and so on)
PID namespace.
.TP
.B EINVAL
The caller attempted to join the user namespace
in which it is already a member.
.TP
.B EINVAL
.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
The caller shares filesystem
.RB ( CLONE_FS )
state (in particular, the root directory)
with other processes and tried to join a new user namespace.
.TP
.B EINVAL
.\" See kernel/user_namespace.c::userns_install() [kernel 3.15 sources]
The caller is multithreaded and tried to join a new user namespace.
.TP
.B EINVAL
.I fd
is a PID file descriptor and
.I nstype
is invalid (e.g., it is 0).
.TP
.B ENOMEM
Cannot allocate sufficient memory to change the specified namespace.
.TP
.B EPERM
The calling thread did not have the required capability
for this operation.
.TP
.B ESRCH
.I fd
is a PID file descriptor but the process it refers to no longer exists
(i.e., it has terminated and been waited on).
.SH STANDARDS
Linux.
.SH VERSIONS
Linux 3.0,
glibc 2.14.
.SH NOTES
For further information on the
.IR /proc/ pid /ns/
magic links, see
.BR namespaces (7).
.PP
Not all of the attributes that can be shared when
a new thread is created using
.BR clone (2)
can be changed using
.BR setns ().
.SH EXAMPLES
The program below takes two or more arguments.
The first argument specifies the pathname of a namespace file in an existing
.IR /proc/ pid /ns/
directory.
The remaining arguments specify a command and its arguments.
The program opens the namespace file, joins that namespace using
.BR setns (),
and executes the specified command inside that namespace.
.PP
The following shell session demonstrates the use of this program
(compiled as a binary named
.IR ns_exec )
in conjunction with the
.B CLONE_NEWUTS
example program in the
.BR clone (2)
man page (complied as a binary named
.IR newuts ).
.PP
We begin by executing the example program in
.BR clone (2)
in the background.
That program creates a child in a separate UTS namespace.
The child changes the hostname in its namespace,
and then both processes display the hostnames in their UTS namespaces,
so that we can see that they are different.
.PP
.in +4n
.EX
$ \fBsu\fP                   # Need privilege for namespace operations
Password:
# \fB./newuts bizarro &\fP
[1] 3549
clone() returned 3550
uts.nodename in child:  bizarro
uts.nodename in parent: antero
# \fBuname \-n\fP             # Verify hostname in the shell
antero
.EE
.in
.PP
We then run the program shown below,
using it to execute a shell.
Inside that shell, we verify that the hostname is the one
set by the child created by the first program:
.PP
.in +4n
.EX
# \fB./ns_exec /proc/3550/ns/uts /bin/bash\fP
# \fBuname \-n\fP             # Executed in shell started by ns_exec
bizarro
.EE
.in
.SS Program source
.\" SRC BEGIN (setns.c)
.EX
#define _GNU_SOURCE
#include <err.h>
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int
main(int argc, char *argv[])
{
    int fd;

    if (argc < 3) {
        fprintf(stderr, "%s /proc/PID/ns/FILE cmd args...\en", argv[0]);
        exit(EXIT_FAILURE);
    }

    /* Get file descriptor for namespace; the file descriptor is opened
       with O_CLOEXEC so as to ensure that it is not inherited by the
       program that is later executed. */

    fd = open(argv[1], O_RDONLY | O_CLOEXEC);
    if (fd == \-1)
        err(EXIT_FAILURE, "open");

    if (setns(fd, 0) == \-1)       /* Join that namespace */
        err(EXIT_FAILURE, "setns");

    execvp(argv[2], &argv[2]);    /* Execute a command in namespace */
    err(EXIT_FAILURE, "execvp");
}
.EE
.\" SRC END
.SH SEE ALSO
.BR nsenter (1),
.BR clone (2),
.BR fork (2),
.BR unshare (2),
.BR vfork (2),
.BR namespaces (7),
.BR unix (7)